import later fedora-branch tweaks

author: Roland McGrath <roland@gnu.org> 2005-02-16 12:31:10 +0000
committer: Roland McGrath <roland@gnu.org> 2005-02-16 12:31:10 +0000
commit: 833861be818bb5d45ab0c47370b84068dfb2fedf (patch)
tree: 2f1754a415c378f6b067f9158cc42df24d4641d2
parent: c397a0064061e28a00eea873669e59f3983db791 (diff)
441 files changed, 54630 insertions, 49498 deletions
diff --git a/ChangeLog b/ChangeLog
index b762bd97c1..8bd99f21f1 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,1290 +1,12 @@
-2005-02-12  Jakub Jelinek  <jakub@redhat.com>
-
-	* elf/rtld.c (dlmain): If LD_TRACE_PRELINKING, clear l_relocated flag
-	before relocating ld.so again.
-
-2005-02-11  Jakub Jelinek  <jakub@redhat.com>
-
-	* debug/chk_fail.c (__chk_fail): Add a while (1) loop around
-	__libc_message to kill GCC warning about noreturn function returning.
-
-	* elf/rtld.c (_dl_start): Set bootstrap_map.l_relocated even
-	for already prelinked ld.so.
-
-	* scripts/soversions.awk: Only record first WORDSIZE{32,64}
-	matching line.
-
-2005-02-11  Roland McGrath  <roland@redhat.com>
-
-	[BZ #715]
-	* Makefile (installed-stubs): New variable.
-	Set to $(inst_includedir)/gnu/stubs.h if $(biarch) is no,
-	or to $(inst_includedir)/gnu/stubs-$(biarch).h if otherwise.
-	[$(biarch) != no] (install-others-nosubdir): Add $(installed-stubs).
-	($(inst_includedir)/gnu/stubs.h): Use $(installed-stubs) for this
-	rule's target.
-	($(inst_includedir)/gnu/stubs.h): New target, install from ...
-	* include/stubs-biarch.h: New file.
-
-	* shlib-versions (i.86-.*-.*): Add WORDSIZE64 line mapping to x86_64.
-
-	* Makeconfig ($(common-objpfx)soversions.mk): Depend on Makeconfig.
-	Emit defn for variable `biarch'.
-
-	* include/stub-tag.h: Update comment.
-
-	* scripts/soversions.awk: Fix default version set handling.
-
-2005-02-09  Jakub Jelinek  <jakub@redhat.com>
-
-	* sysdeps/unix/sysv/linux/bits/termios.h (CMSPAR): Define.
-	* sysdeps/unix/sysv/linux/alpha/bits/termios.h: Add __USE_MISC
-	and __USE_XOPEN guards to match linux/bits/termios.h.
-	(CMSPAR): Define.
-	* sysdeps/unix/sysv/linux/powerpc/bits/termios.h: Add __USE_MISC
-	and __USE_XOPEN guards to match linux/bits/termios.h.
-	(CMSPAR): Define.
-	* sysdeps/unix/sysv/linux/sparc/bits/termios.h: Add __USE_MISC
-	and __USE_XOPEN guards to match linux/bits/termios.h.
-
-2005-02-10  Roland McGrath  <roland@redhat.com>
-
-	[BZ #157]
-	* include/libc-symbols.h (stub_warning): Emit a marker section called
-	.gnu.glibc-stub.NAME.
-	* Makerules ($(objpfx)stubs): Depend on object files, not dep files.
-	Use objdump to collect those marker section names.
-	($(common-objpfx)shlib.lds): Discard .gnu.glibc-stub.* sections.
-
-	* Rules (subdir_objs, subdir_stubs): New phony targets.
-	* Makefile (+subdir_targets): Add them.
-
-	* Makerules ($(common-objpfx)Versions.all): Grok new leading column in
-	soversions.i.
-
-	* scripts/lib-names.awk: Consider [0-9].* a "number", not just [0-9]+.
-	Reported by H.J. Lu <hongjiu.lu@intel.com>.
-
-	* scripts/lib-names.awk: Always print WORDSIZE32 section first when
-	doing two.
-
-2005-02-10  Jakub Jelinek  <jakub@redhat.com>
-
-	* sysdeps/powerpc/powerpc32/bits/link.h: Removed.
-	* sysdeps/powerpc/powerpc64/bits/link.h: Moved to...
-	* sysdeps/powerpc/bits/link.h: ... here.  New file.
-
-2005-02-09  Jakub Jelinek  <jakub@redhat.com>
-
-	[BZ #710]
-	* stdlib/random_r.c (__initstate_r): Save old state.
-	* stdlib/Makefile (tests): Add tst-random2.
-	* stdlib/tst-random2.c: New test.
-	Reported by Peter Bergner <bergner@vnet.ibm.com>.
-
-2005-01-25  H.J. Lu  <hongjiu.lu@intel.com>
-
-	* dlfcn/dlfcn.c (init): Put it in .init_array section.
-
-2005-02-10  Roland McGrath  <roland@redhat.com>
-
-	[BZ #632]
-	* scripts/soversions.awk: Expect cpu, vendor, os as separate variables
-	from command line.
-	Grok shlib-versions lines with WORDSIZE* in second column.
-	Add new leading column to output, DEFAULT for existing output lines.
-	Also emit lines with WORDSIZE* for alternate configurations.
-	* Makeconfig ($(common-objpfx)soversions.i): Pass those variables.
-	($(common-objpfx)soversions.mk): Grok new column, use only DEFAULT.
-	($(common-objpfx)gnu/lib-names.stmp): Depend on soversions.i instead
-	of soversions.mk; replace inline shell script with use of ...
-	* scripts/lib-names.awk: New file.  If input has non-DEFAULT lines,
-	emit multiple sets of macros under #if.
-	* shlib-versions (x86_64-.*-.*): Add WORDSIZE32 line mapping to i686.
-	(s390x-.*-.*): Likewise for s390.
-	(powerpc64-.*-.*): Likewise for powerpc.
-	(sparc64-.*-.*): Likewise for sparc.
-	(s390-.*-.*, powerpc.*-.*-.*, sparc.*-.*-.*): Add WORDSIZE64 entries
-	for these mapping back to the above.
-
-2005-02-08  Jakub Jelinek  <jakub@redhat.com>
-
-	* elf/dl-load.c (_dl_map_object_from_fd): Fix a typo.
-
-2005-02-08  Andreas Schwab  <schwab@suse.de>
-
-	* sysdeps/m68k/dl-machine.h (elf_machine_rela): Remove use of
-	RESOLVE.
-
-2005-02-07  Richard Henderson  <rth@redhat.com>
-
-	* iconvdata/jis0208.h (struct jisx0208_ucs_idx): Move before use.
-
-2005-02-08  Kaz Kojima  <kkojima@rr.iij4u.or.jp>
-
-	* sysdeps/sh/dl-machine.h (elf_machine_rela): Remove code using
-	RESOLVE.
-
-2005-02-07  Ulrich Drepper  <drepper@redhat.com>
-
-	* elf/dl-load.c (_dl_map_object_from_fd): Makre sure registers are
-	set correctly.
-
-2005-01-07  Richard Henderson  <rth@redhat.com>
-
-	* math/math_private.h (__copysign): Define as builtin for gcc 4.
-	(__copysignf, __copysignl): Likewise.
-	* sysdeps/alpha/fpu/bits/mathinline.h (copysign): Don't define
-	for gcc 4.0.
-	(copysignf, copysignl, fabsf, fabs): Likewise.
-	(__copysign, __copysignf, __copysignl): Remove.
-	(__fabs, __fabsf): Remove.
-
-2005-01-07  Jakub Jelinek  <jakub@redhat.com>
-
-	* elf/dl-load.c (open_path): If rtld_search_dirs is in RELRO segment,
-	avoid writing to it if none of the standard search directories exist.
-
-2005-02-07  Steven Munroe  <sjmunroe@us.ibm.com>
-
-	[BZ #700]
-	* sysdeps/unix/sysv/linux/powerpc/powerpc64/setcontext.S
-	(__novec_setcontext, __setcontext): Fix typo so CCR is restored.
-	Load MSR as a doubleword.
-	* sysdeps/unix/sysv/linux/powerpc/powerpc64/swapcontext.S
-	(__novec_swapcontext, __swapcontext): Likewise.
-
-2005-02-07  Ulrich Drepper  <drepper@redhat.com>
-
-	* iconv/iconv_prog.c (main): Provide more help in case on an error.
-
-	* sysdeps/unix/sysv/linux/i386/sysdep.h (check_consistency): Define.
-
-2005-02-07  Jakub Jelinek  <jakub@redhat.com>
-
-	* nscd/nscd.c (termination_handler): Avoid segfault if some database
-	is not enabled.
-
-	* nscd/nscd_getai.c (__nscd_getai): If ai_resp->found == -1, set
-	__nss_not_use_nscd_hosts and return -1.
-	* nscd/nscd_initgroups.c (__nscd_getgrouplist): If
-	initgr_resp->found == -1, set __nss_not_use_nscd_group and return -1.
-	Avoid leaking sockets.
-
-2005-01-28  Andreas Schwab  <schwab@suse.de>
-	    H.J. Lu  <hongjiu.lu@intel.com>
-
-	[BZ #677]
-	* elf/dl-runtime.c (fixup): Change return type to
-	DL_FIXUP_VALUE_TYPE. Use DL_FIXUP_VALUE_TYPE,
-	DL_FIXUP_MAKE_VALUE and DL_FIXUP_VALUE_CODE_ADDR for relocation
-	values. Use DL_FIXUP_VALUE_ADDR and DL_FIXUP_ADDR_VALUE to
-	store and retrieve relocation values.
-	(profile_fixup): Likewise.
-	* include/link.h (link_map): Use DL_FIXUP_VALUE_TYPE for
-	l_reloc_result.
-	* sysdeps/generic/dl-fptr.h (link_map): Forward declaration.
-	* sysdeps/generic/dl-lookupcfg.h (DL_FIXUP_VALUE_TYPE): New.
-	(DL_FIXUP_MAKE_VALUE): Likewise.
-	(DL_FIXUP_VALUE_CODE_ADDR): Likewise.
-	(DL_FIXUP_VALUE_ADDR): Likewise.
-	(DL_FIXUP_ADDR_VALUE): Likewise.
-	* sysdeps/ia64/dl-lookupcfg.h: Include <dl-fptr.h> for "struct fdesc".
-	(DL_FIXUP_VALUE_TYPE): New.
-	(DL_FIXUP_MAKE_VALUE): Likewise.
-	(DL_FIXUP_VALUE_CODE_ADDR): Likewise.
-	(DL_FIXUP_VALUE_ADDR): Likewise.
-	(DL_FIXUP_ADDR_VALUE): Likewise.
-	* sysdeps/ia64/dl-machine.h (elf_machine_profile_fixup_plt): Removed.
-	(elf_machine_profile_plt): Removed.
-	(elf_machine_fixup_plt): Change return type and type of value
-	parameter to struct fdesc.
-	(elf_machine_plt_value): Likewise.
-	(elf_machine_rela): Use DL_FIXUP_MAKE_VALUE to construct
-	argument for elf_machine_fixup_plt.
-
-2005-02-07  Jakub Jelinek  <jakub@redhat.com>
-
-	* nscd/nscd.init (reload): Print Reloading nscd: before and a newline
-	after the status string printed by killproc.
-
-2004-11-18  Alexandre Oliva  <aoliva@redhat.com>
-
-	* manual/.cvsignore: Add dir-add.texi.
-
-2005-02-06  Richard Henderson  <rth@redhat.com>
-
-	* sysdeps/alpha/dl-machine.h (elf_machine_rela): Use RESOLVE_MAP
-	all the time.
-
-2004-11-03  Marcus Brinkmann  <marcus@gnu.org>
-
-	* configure.in (sysnames): Append sysdeps/generic for each add-on.
-	* configure: Regenerated.
-
-2005-02-02  Alfred M. Szmidt  <ams@gnu.org>
-
-	* sysdeps/mach/hurd/tls.h: Include <stdbool.h>
-	(dtv_t): Change pointer type to be a struct which also contains
-	information whether the memory pointed to is static TLS or not.
-
-	* sysdeps/generic/syslog.c (send_flags) [!send_flags]: Define it.
-
-	* shadow/sgetspent_r.c (FALSE): Macro renamed to ...
-	(FALSEP): ... this.  Updated all references.
-
-	* libio/fmemopen.c: Include <stdint.h>.
-
-2005-01-30  Ulrich Drepper  <drepper@redhat.com>
-
-	* nscd/nscd_helper.c (get_mapping): Use MSG_NOSIGNAL if available.
-	* nscd/connections.c (send_ro_fd): Likewise.
-
-2005-01-28  H.J. Lu  <hongjiu.lu@intel.com>
-
-	* elf/tst-auditmod1.c: Add ia64 entries.
-	* sysdeps/generic/ldsodefs.h (La_ia64_regs): New.
-	(La_ia64_retval): New.
-	(audit_ifaces): Add ia64 entries.
-	* sysdeps/ia64/bits/link.h: New file.
-	* sysdeps/ia64/dl-machine.h (elf_machine_runtime_setup): Test
-	for dl_profile non-null.
-	(ARCH_LA_PLTENTER): New.
-	(ARCH_LA_PLTEXIT): New.
-	* sysdeps/ia64/dl-trampoline.S (_dl_runtime_resolve): Allocate
-	only 2 output registers. Allocate stack to save/restore
-	8 incoming fp registers. Call _dl_fixup instead of fixup.
-	(_dl_runtime_profile): Rewrite.
-
-2005-01-28  Martin Schwidefsky  <schwidefsky@de.ibm.com>
-
-	* sysdeps/s390/bits/link.h: New file.
-	* sysdeps/s390/s390-32/dl-trampoline.S: New file.
-	* sysdeps/s390/s390-64/dl-trampoline.S: New file.
-	* sysdeps/s390/s390-32/dl-machine.h: Move PLT trampolines to
-	dl-trampoline.S. Use RESOLVE_MAP instead of RESOLVE to protect
-	relocation code.
-	(elf_machine_runtime_setup): Test for dl_profile non-null.
-	(elf_machine_rela): Remove code using RESOLVE.
-	(ARCH_LA_PLTENTER, ARCH_LA_PLTEXIT): New.
-	* sysdeps/s390/s390-64/dl-machine.h: Likewise.
-	* sysdeps/generic/ldsodefs.h (La_s390_32_regs, La_s390_32_retval,
-	La_s390_64_regs, La_s390_64_retval): New.
-	* elf/tst-auditmod1.c: Add s390 entries.
-
-	* sysdeps/s390/bits/string.h (strlen, strncpy, strcat, strncat,
-	strncat, memchr, strcmp): Add missing memory clobber.
-
-2005-01-27  Ulrich Drepper  <drepper@redhat.com>
-
-	* sysdeps/unix/sysv/linux/alpha/oldglob.c (__old_globfree): Also
-	copy gl_offs.  Patch by Sergey Tikhonov <tsv@solvo.ru>.
-
-2005-01-27  Paolo Bonzini  <bonzini@gnu.org>
-
-	[BZ #558]
-	* posix/regcomp.c (calc_inveclosure): Return reg_errcode_t.
-	Initialize the node sets in dfa->inveclosures.
-	(analyze): Initialize inveclosures only if it is needed.
-	Check errors from calc_inveclosure.
-	* posix/regex_internal.c (re_dfa_add_node): Do not initialize
-	the inveclosure node set.
-	* posix/regexec.c (re_search_internal): If nmatch includes unused
-	subexpressions, reset them to { rm_so: -1, rm_eo: -1 } here.
-
-	* posix/regcomp.c (parse_bracket_exp) [!RE_ENABLE_I18N]:
-	Do build a SIMPLE_BRACKET token.
-
-	* posix/regexec.c (transit_state_mb): Do not examine nodes
-	where ACCEPT_MB is not set.
-
-2005-01-27  Jakub Jelinek  <jakub@redhat.com>
-
-	* stdlib/tst-fmtmsg.c: Include stdlib.h.
-	* stdio-common/tst-fmemopen2.c: Include string.h.
-	* posix/execvp.c: Include stdbool.h.
-
-2004-12-13  Paolo Bonzini  <bonzini@gnu.org>
-
-	Separate parsing and creation of the NFA.  Avoided recursion on
-	the (very unbalanced) parse tree.
-	[BZ #611]
-	* posix/regcomp.c (struct subexp_optimize, analyze_tree, calc_epsdest,
-	re_dfa_add_tree_node, mark_opt_subexp_iter): Removed.
-	(optimize_subexps, duplicate_tree, calc_first, calc_next,
-	mark_opt_subexp): Rewritten.
-	(preorder, postorder, lower_subexps, lower_subexp, link_nfa_nodes,
-	create_token_tree, free_tree, free_token): New.
-	(analyze): Accept a regex_t *.  Invoke the passes via the preorder and
-	postorder generic visitors.  Do not initialize the fields in the
-	re_dfa_t that represent the transitions.
-	(free_dfa_content): Use free_token.
-	(re_compile_internal): Analyze before UTF-8 optimizations.  Do not
-	include optimization of subexpressions.
-	(create_initial_state): Fetch the DFA node index from the first node's
-	bin_tree_t *.
-	(optimize_utf8): Abort on unexpected nodes, including OP_DUP_QUESTION.
-	Return on COMPLEX_BRACKET.
-	(duplicate_node_closure): Fix comment.
-	(duplicate_node): Do not initialize the fields in the
-	re_dfa_t that represent the transitions.
-	(calc_eclosure, calc_inveclosure): Do not handle OP_DELETED_SUBEXP.
-	(create_tree): Remove final argument.  All callers adjusted.  Rewritten
-	to use create_token_tree.
-	(parse_reg_exp, parse_branch, parse_expression, parse_bracket_exp,
-	build_charclass_op): Use create_tree or create_token_tree instead
-	of re_dfa_add_tree_node.
-	(parse_dup_op): Likewise.  Also free the tree using free_tree for
-	"<re>{0}", and lower OP_DUP_QUESTION to OP_ALT: "a?" is equivalent
-	to "a|".  Adjust invocation of mark_opt_subexp.
-	(parse_sub_exp): Create a single SUBEXP node.
-	* posix/regex_internal.c (re_dfa_add_node): Remove last parameter,
-	always perform as if it was 1.  Do not initialize OPT_SUBEXP and
-	DUPLICATED, and initialize the DFA fields representing the transitions.
-	* posix/regex_internal.h (re_dfa_add_node): Adjust prototype.
-	(re_token_type_t): Move OP_DUP_PLUS and OP_DUP_QUESTION to the tokens
-	section.  Add a tree-only code SUBEXP.  Remove OP_DELETED_SUBEXP.
-	(bin_tree_t): Include a full re_token_t for TOKEN.  Turn FIRST and
-	NEXT into pointers to trees.  Remove ECLOSURE.
-
-2004-12-28  Paolo Bonzini  <bonzini@gnu.org >
-
-	[BZ #605]
-	* posix/regcomp.c (parse_bracket_exp): Do not modify DFA nodes
-	that were already created.
-	* posix/regex_internal.c (re_dfa_add_node): Set accept_mb field
-	in the token if needed.
-	(create_ci_newstate, create_cd_newstate): Set accept_mb field
-	from the tokens' field.
-	* posix/regex_internal.h (re_token_t): Add accept_mb field.
-	(ACCEPT_MB_NODE): Removed.
-	* posix/regexec.c (proceed_next_node, transit_states_mb,
-	build_sifted_states, check_arrival_add_next_nodes): Use
-	accept_mb instead of ACCEPT_MB_NODE.
-
-2005-01-26  Ulrich Drepper  <drepper@redhat.com>
-
-	* debug/chk_fail.c (__chk_fail): Print program name in final message.
-
-	* sysdeps/unix/sysv/linux/kernel-features.h: Found reference to
-	MSG_NOSIGNAL being in 2.2 kernels.
-
-2005-01-26  Jakub Jelinek  <jakub@redhat.com>
-
-	* sysdeps/unix/sysv/linux/i386/sysdep.h
-	(SYSCALL_ERROR_HANDLER_TLS_STORE): Remove unnecessary 0 imm.
-
-	[BZ #693]
-	* posix/regex_internal.h (DUMMY_CONSTRAINT): Rename to...
-	(WORD_DELIM_CONSTRAINT): ...this.
-	(NOT_WORD_DELIM_CONSTRAINT): Define.
-	(re_context_type): Add INSIDE_NOTWORD and NOT_WORD_DELIM,
-	change WORD_DELIM to use WORD_DELIM_CONSTRAINT.
-	* posix/regcomp.c (peek_token): For \B create NOT_WORD_DELIM
-	anchor instead of INSIDE_WORD.
-	(parse_expression): Handle NOT_WORD_DELIM constraint.
-	* posix/bug-regex19.c (tests): Adjust tests that relied on \B
-	being inside word instead of not word delim.
-	* posix/tst-rxspencer.c (mb_frob_pattern): Don't frob escaped
-	characters.
-	* posix/rxspencer/tests: Add some new tests.
-
-2005-01-14  GOTO Masanori  <gotom@debian.or.jp>
-
-	* sunrpc/rpc_main.c (s_output): Generate #include <rpc/pmap_clnt.h>
-	irrespective of Cflag.
-
-	* manual/memory.texi (sbrk): Fix definition.
-	* manual/string.texi (strcasestr): Fix example typo.
-
-2005-01-25  Roland McGrath  <roland@redhat.com>
-
-	* sysdeps/generic/syslog.c [NO_SIGPIPE]: Protect sigpipe_handler decl.
-
-2005-01-23  Roland McGrath  <roland@redhat.com>
-
-	* sysdeps/i386/Makefile (defines): If -mno-tls-direct-seg-refs appears
-	in $(CFLAGS), add -DNO_TLS_DIRECT_SEG_REFS.
-	* sysdeps/unix/sysv/linux/i386/sysdep.h [USE___THREAD]
-	(SYSCALL_ERROR_HANDLER) [NO_TLS_DIRECT_SEG_REFS]: Load thread pointer
-	from %gs:0 and add to that value, rather that direct %gs:OFFSET access.
-	* sysdeps/unix/i386/sysdep.S [NO_TLS_DIRECT_SEG_REFS]: Likewise.
-
-2005-01-25  Jakub Jelinek  <jakub@redhat.com>
-
-	* stdlib/fmtmsg.c (addseverity): Remove new_string variable.
-	(free_mem): Don't free string.
-	* stdlib/tst-fmtmsg.c: Include string.h.
-	(main): Add some more tests.
-
-2005-01-25  Andreas Schwab  <schwab@suse.de>
-
-	* timezone/asia: Update from tzdata2005c.
-	* timezone/backward: Likewise.
-	* timezone/leapseconds: Likewise.
-	* timezone/northamerica: Likewise.
-	* timezone/southamerica: Likewise.
-
-	* timezone/private.h: Update from tzcode2005c.
-	* timezone/tzfile.h: Likewise.
-	* timezone/zdump.c: Likewise.
-	* timezone/zic.c: Likewise.
-
-2005-01-25  Ulrich Drepper  <drepper@redhat.com>
-
-	* sysdeps/i386/dl-machine.h (elf_machine_rel): Remove code using
-	RESOLVE.
-	* sysdeps/x86_64/dl-machine.h (elf_machine_rela): Remove code
-	using RESOLVE.
-	* elf/rtld.c (_dl_start): Remove RESOLVE definition.
-
-2005-01-25  Alan Modra  <amodra@bigpond.net.au>
-
-	* sysdeps/powerpc/powerpc32/dl-machine.h (elf_machine_rela): Test
-	sym_map, not sym, against zero.
-
-2005-01-24  Ulrich Drepper  <drepper@redhat.com>
-
-	* misc/syslog.c: Moved to...
-	* sysdeps/generic/syslog.c: ...here.
-	[NO_SIGIPE]: Don't install SIGPIPE handler.
-	* sysdeps/unix/sysv/linux/syslog.c: New file.
-	* sysdeps/unix/sysv/linux/kernel-features.h: Define
-	__ASSUME_MSG_NOSIGNAL.
-
-2005-01-22  Richard Henderson  <rth@redhat.com>
-
-	* sysdeps/alpha/dl-trampoline.S: New file.
-	* sysdeps/alpha/dl-machine.h: Move PLT trampolines there.
-	Use RESOLVE_MAP instead of RESOLVE to protect relocation code.
-	(elf_machine_runtime_setup): Test for dl_profile non-null.
-	(ARCH_LA_PLTENTER, ARCH_LA_PLTEXIT): New.
-	* sysdeps/alpha/bits/link.h: New file.
-	* sysdeps/generic/ldsodefs.h (La_alpha_regs, La_alpha_retval): New.
-	(struct audit_ifaces): Add alpha entries.
-	* elf/tst-auditmod1.c: Add alpha entries.
-
-2005-01-22  Ulrich Drepper  <drepper@redhat.com>
-
-	* sysdeps/ia64/dl-machine.h: Remove PLT trampolines here.
-	* sysdeps/ia64/dl-trampoline.S: New file.
-
-	* sysdeps/x86_64/bits/link.h: Use namespace-safe identifiers in
-	La_x86_64_xmm definition.
-
-	* posix/Makefile: Use CFLAGS-*.os instead of CFLAGS-*.c for frame
-	pointer option.
-	* stdlib/Makefile (CFLAGS-system.os): Use this instead of
-	CFLAGS-system.c for frame pointer option.
-
-2005-01-21  Roland McGrath  <roland@redhat.com>
-
-	* elf/dl-runtime.c (_dl_profile_fixup): Remove const from REGS.
-	* sysdeps/i386/dl-machine.h: Update decl.
-
-2005-01-21  Jakub Jelinek  <jakub@redhat.com>
-
-	* elf/Makefile: Add rules to build and run tst-align2.
-	* elf/tst-align2.c: New test.
-	* elf/tst-alignmod2.c: New file.
-	* sysdeps/powerpc/tst-stack-align.h: New file.
-	* sysdeps/i386/dl-machine.h (RTLD_START): Align stack and clear frame
-	pointer before calling _dl_init.
-	* sysdeps/x86_64/dl-machine.h (RTLD_START): Likewise.
-
-2005-01-20  Ulrich Drepper  <drepper@redhat.com>
-
-	* posix/execl.c: Do not allocate potentially large buffers on the
-	stack.
-	* posix/execle.c: Likewise.
-	* posix/execlp.c: Likewise.
-	* posix/execlp.c: Likewise.
-	(script_execute): Removed.
-	(allocate_scripts_argv): New function.  Called at most once to
-	allocate memory, not every time a script is run.  Adjust caller.
-
-	* sysdeps/generic/wordexp.c (exec_comm): Add a few
-	TEMP_FAILURE_RETRY.  Reorganize code to avoid multiple calls to
-	exec_comm_child.
-	(exec_comm_child): Can now be inlined.
-
-	* posix/Makefile: Add -fomit-frame-pointer for a few more files.
-	* stdlib/Makefile: Likewise.
-
-2005-01-19  Roland McGrath  <roland@redhat.com>
-
-	[BZ #681]
-	* sunrpc/openchild.c (_openchild): Use NULL instead of 0 for trailing
-	argument to execlp.
-	Reported by Marcus Meissner <meissner@suse.de>.
-
-2005-01-19  Jakub Jelinek  <jakub@redhat.com>
-
-	* hurd/sigunwind.c (_hurdsig_longjmp_from_handler): Fix a typo
-	in assert.
-	* iconv/strtab.c (strtabfinalize): Likewise.
-
-	* libio/iofopncook.c (_IO_cookie_seekoff): Add prototype.
-
-2005-01-17  Roland McGrath  <roland@redhat.com>
-
-	* nscd/Makefile (LDLIBS-nscd): New variable.
-	($(objpfx)nscd): Use that instead of selinux-LIBS.
-
-	* Makeconfig (link-extra-libs): Define just as $(LDLIBS-$(@F)).
-	(link-extra-libs-static): Define to $(link-extra-libs).
-	(link-extra-libs-bounded): Likewise.
-
-2005-01-17  Ulrich Drepper  <drepper@redhat.com>
-
-	* include/link.h: Remove stray definition of pltenter.
-
-2005-01-16  GOTO Masanori  <gotom@debian.or.jp>
-
-	* sysdeps/unix/rewinddir.c: Reset filepos.
-	* dirent/tst-seekdir.c: Check telldir value after calling rewinddir.
-
-2005-01-15  Ulrich Drepper  <drepper@redhat.com>
-
-	* elf/tst-auditmod1.c: Reduce duplication.
-
-2005-01-16  Andreas Schwab  <schwab@suse.de>
-
-	* sysdeps/m68k/dl-machine.h: Remove trampoline code.  Define
-	ARCH_LA_PLTENTER and ARCH_LA_PLTEXIT.
-	(elf_machine_runtime_setup): If profile != 0 does not anymore mean
-	GLRO(dl_profile) != NULL.
-	* sysdeps/m68k/dl-trampoline.S: New file.
-	* sysdeps/m68k/bits/link.h: New file.
-	* sysdeps/generic/ldsodefs.h (struct audit_ifaces): Add m68k
-	variants.
-	* elf/tst-auditmod1.c: Add m68k support.
-
-2005-01-14  Ulrich Drepper  <drepper@redhat.com>
-
-	* posix/regcomp.c [!_LIBC] (init_dfa): Fix determining of relevant
-	LC_* variable.  Patch by Aharon Robbins <arnold@skeeve.com>.
-
-	* stdlib/fmtmsg.c (internal_addseverity): Remove incorrect free call.
-	* stdlib/tst-fmtmsg.c (main): Add another addseverity test.
-
-2005-01-12  Ulrich Drepper  <drepper@redhat.com>
-
-	* elf/dl-load.c (_dl_map_object_from_fd): We don't have to allow
-	callers from libc anymore.
-
-	* elf/dl-open.c (dl_open_worker): Pass __RTLD_AUDIT flag from caller
-	to _dl_map_object_deps.
-	* elf/dl-load.c (_dl_map_object_from_fd): Don't change memory
-	protections when loading auditing modules.
-
-	* dlfcn/dlopen.c (dlopen_doit): Catch invalid mode arguments and fail.
-
-	* posix/getconf.c: Update copyright year.
-	* nss/getent.c: Likewise.
-	* nscd/nscd_nischeck.c: Likewise.
-	* iconv/iconvconfig.c: Likewise.
-	* iconv/iconv_prog.c: Likewise.
-	* elf/ldconfig.c: Likewise.
-	* catgets/gencat.c: Likewise.
-	* csu/version.c: Likewise.
-	* elf/ldd.bash.in: Likewise.
-	* elf/sprof.c (print_version): Likewise.
-	* locale/programs/locale.c: Likewise.
-	* locale/programs/localedef.c: Likewise.
-	* nscd/nscd.c (print_version): Likewise.
-	* debug/xtrace.sh: Likewise.
-	* malloc/memusage.sh: Likewise.
-	* malloc/mtrace.pl: Likewise.
-	* debug/catchsegv.sh: Likewise.
-
-2005-01-11  Thorsten Kukuk  <kukuk@suse.de>
-
-	* sunrpc/svc_tcp.c (svctcp_create): Call listen with SOMAXCONN
-	as backlog.
-	* sunrpc/svc_unix.c (svcunix_create): Likewise.
-
-	* grp/putgrent.c (putgrent): Don't write 0 as group
-	ID if groupname starts with + or -.
-	* pwd/putpwent.c (putpwent): Don't write 0 as user or
-	group ID if user name starts with + or -.
-
-2005-01-09  Kaz Kojima  <kkojima@rr.iij4u.or.jp>
-
-	* sysdeps/generic/ldsodefs.h (struct audit_ifaces): Add sh variants.
-	* sysdeps/elf/tst-auditmod1.c: Add sh support.
-	* sysdeps/sh/bits/link.h: New.
-	* sysdeps/sh/dl-machine.h: Remove trampoline code here.  Define
-	ARCH_LA_PLTENTER and ARCH_LA_PLTEXIT.  Remove obsolete comments.
-	(RTLD_START): Define __fpscr_values.
-	* sysdeps/sh/dl-trampoline.S: New file.
-	* sysdeps/sh/sh4/dl-trampoline.S: New file.
-	* sysdeps/sh/sh4/Versions [ld]: Add __fpscr_values.
-	* sysdeps/sh/sh4/dl-machine.h: Remove.
-
-2005-01-10  Jakub Jelinek  <jakub@redhat.com>
-
-	* time/tst-strptime.c (day_tests): Add 2 new tests.
-	(test_tm, main): Issue an error instead of segfaulting if
-	strptime returns NULL.
-
-2005-01-10  H.J. Lu  <hongjiu.lu@intel.com>
-
-	* sysdeps/alpha/libc-tls.c (__tls_get_addr): Updated for dtv_t union.
-	* sysdeps/ia64/libc-tls.c (__tls_get_addr): Likewise.
-
-2005-01-11  Ulrich Drepper  <drepper@redhat.com>
-
-	* malloc/malloc.h: Remove no-glibc support.
-
-2005-01-12  GOTO Masanori  <gotom@debian.or.jp>
-
-	[BZ #650]
-	* malloc/malloc.h: Always include features.h.
-
-2005-01-10  Roland McGrath  <roland@redhat.com>
-
-	* sysdeps/ia64/fpu/e_logl.c: File removed.
-
-	* dlfcn/dlfcn.c (init): Fix typo in attribute decl.
-
-2005-01-10  Ulrich Drepper  <drepper@redhat.com>
-
-	* sysdeps/generic/dl-tls.c (_dl_next_tls_modid): Fix assertion and
-	recognition of last entry.
-
-2005-01-09  Ulrich Drepper  <drepper@redhat.com>
-
-	* elf/dl-runtime.c: Include <sys/param.h>.
-
-	* elf/Makefile (headers): Add bits/link.h.
-
-2005-01-09  Andreas Schwab  <schwab@suse.de>
-
-	* elf/rtld.c (dl_main): Create main_map with __RTLD_OPENEXEC.
-
-2005-01-09  Andreas Jaeger  <aj@suse.de>
-
-	* time/strptime_l.c (__strptime_internal): Add braces to avoid
-	warning.
-
-	* sysdeps/x86_64/bits/link.h: Use vector_size for GCC 4.0.
-
-	* elf/rtld.c (dl_main): Call _dl_add_to_slotinfo only if USE_TLS.
-
-2005-01-08  Jakub Jelinek  <jakub@redhat.com>
-
-	* elf/Makefile (generated): Add tst-pie1{,.out,.o}.
-
-2005-01-09  Ulrich Drepper  <drepper@redhat.com>
-
-	* elf/dl-fini.c (_dl_fini): Call destructors of audit DSOs after
-	those of all the regular objects.
-
-	* elf/dl-debug.c (_dl_debug_initialize): Take extra parameter and
-	use it to select the r_debug structure for that namespace.
-	* elf/dl-close.c (_dl_close): Adjust call to _dl_debug_initialize.
-	* elf/dl-load.c (_dl_map_object_from_fd): Likewise.
-	* elf/dl-open.c (_dl_open): Likewise.
-	* elf/rtld.c (dl_main): Likewise.
-	* sysdeps/generic/ldsodefs.h (struct link_namespaces): Add _ns_debug
-	member.
-	(_dl_debug_initialize): Add new parameter in declaration.
-
-	* elf/dl-close.c (_dl_close): Make sure auditing callbacks are not
-	called for the auditing objects themselves.
-	* elf/dl-load.c (_dl_map_object_from_fd): Likewise.
-
-2005-01-07  Ulrich Drepper  <drepper@redhat.com>
-
-	* sysdeps/powerpc/powerpc64/dl-machine.h
-	(elf_machine_runtime_setup): If profile != 0 does not anymore mean
-	GLRO(dl_profile) != NULL.
-	* sysdeps/powerpc/powerpc64/bits/link.h (struct la_ppc64_regs): Add
-	padding.
-	* sysdeps/powerpc/powerpc64/dl-trampoline.S: (_dl_profile_resolve):
-	Extend _dl_prof_resolve to add pass extra parameters to
-	_dl_profile_fixup and set up structure with register content.
-
-	* sysdeps/powerpc/powerpc32/dl-machine.c (__elf_machine_runtime_setup):
-	If profile != 0 does not anymore mean GLRO(dl_profile) != NULL.
-	* sysdeps/powerpc/powerpc32/dl-trampoline.S (_dl_prof_resolve):
-	Extend _dl_prof_resolve to add pass extra parameters to
-	_dl_profile_fixup and set up structure with register content.
-	* sysdeps/powerpc/powerpc32/bits/link.h: Fix types of some fields in
-	the register and result structures.
-	* sysdeps/powerpc/powerpc64/bits/link.h: Fix types of some fields
-	in the 32-bit register and result structures.
-
-	* sysdeps/powerpc/powerpc64/dl-trampoline.S: Use register names.
-
-	* sysdeps/powerpc/powerpc32/dl-trampoline.S: New file.
-	* sysdeps/powerpc/powerpc32/dl-machine.h: Remove trampoline code here.
-	Define ARCH_LA_PLTENTER and ARCH_LA_PLTEXIT.
-	* sysdeps/generic/ldsodefs.h (struct audit_ifaces): Add ppc32 variants.
-	* elf/tst-auditmod1.c: Add ppc32 support.
-	* sysdeps/powerpc/powerpc32/bits/link.h: New file.
-	* sysdeps/powerpc/powerpc64/bits/link.h: Add ppc32 definitions.
-
-	* malloc/malloc.c (malloc_printerr): Print program name as part of
-	error message.
-
-	* misc/sys/cdefs.h (__attribute_warn_unused_result__): Define.
-	* stdlib/stdlib.h: Make realloc with
-	__attribute_warn_unused_result__ instead of __wur.
-	* malloc/malloc.h: Add __wur and __attribute_warn_unused_result__
-	markers as in <stdlib.h>.
-
-	* libio/stdio.h: Remove __wur from rename and remove.
-	* posix/unistd.h: Remove __wur from dup2.
-
-2005-01-03  Andreas Jaeger  <aj@suse.de>
-
-	* elf/Makefile (tests): Revert patch from 2005-01-03.
-
-2005-01-07  Ulrich Drepper  <drepper@redhat.com>
-
-	* sysdeps/unix/sysv/linux/init-first.c (__libc_init_first): Don't
-	make __libc_init_first hidden.
-
-	* elf/rtld.c [!DONT_USE_BOOTSTRAP_MAP] (_dl_start_final): Initialize
-	l_relocated of rtld map.
-	* sysdeps/powerpc/powerpc64/dl-trampoline.S: New file.
-	* sysdeps/powerpc/powerpc64/dl-machine.h: Remove trampoline code here.
-	Define ARCH_LA_PLTENTER and ARCH_LA_PLTEXIT.
-	* sysdeps/generic/ldsodefs.h (struct audit_ifaces): Add ppc64 variants.
-	* elf/tst-auditmod1.c: Add ppc64 support.
-	* sysdeps/powerpc/powerpc64/bits/link.h: New file.
-
-2005-01-06  Roland McGrath  <roland@redhat.com>
-
-	[BZ #633]
-	* sysdeps/unix/sysv/linux/futimes.c (__futimes): Catch errno values
-	indicating file-name lookup errors, and return ENOSYS or EBADF instead.
-
-2005-01-06  Ulrich Drepper  <drepper@redhat.com>
-
-	* csu/elf-init.c (__libc_csu_fini): Don't do anything here.
-	* sysdeps/generic/libc-start.c: Don't register program destructor here.
-
-	* dlfcn/Makefile: Add rules to build dlfcn.c.
-	(LDFLAGS-dl.so): Removed.
-	* dlfcn/dlclose.c: _dl_close is now in ld.so, use function pointer
-	table.
-	* dlfcn/dlmopen.c: Likewise for _dl_open.
-	* dlfcn/dlopen.c: Likewise.
-	* dlfcn/dlopenold.c: Likewise.
-	* elf/dl-libc.c: Likewise for _dl_open and _dl_close.
-	* elf/Makefile (routines): Remove dl-open and dl-close.
-	(dl-routines): Add dl-open, dl-close, and dl-trampoline.
-	Add rules to build and run tst-audit1.
-	* elf/tst-audit1.c: New file.
-	* elf/tst-auditmod1.c: New file.
-	* elf/Versions [libc]: Remove _dl_open and _dl_close.
-	* elf/dl-close.c: Change for use inside ld.so instead of libc.so.
-	* elf/dl-open.c: Likewise.
-	* elf/dl-debug.c (_dl_debug_initialize): Allow reinitialization,
-	signaled by nonzero parameter.
-	* elf/dl-init.c: Fix use of r_state.
-	* elf/dl-load.c: Likewise.
-
-	* elf/dl-close.c: Add auditing checkpoints.
-	* elf/dl-open.c: Likewise.
-	* elf/dl-fini.c: Likewise.
-	* elf/dl-load.c: Likewise.
-	* elf/dl-sym.c: Likewise.
-	* sysdeps/generic/libc-start.c: Likewise.
-	* elf/dl-object.c: Allocate memory for auditing information.
-	* elf/dl-reloc.c: Remove RESOLV.  We now always need the map.
-	Correctly initialize slotinfo.
-	* elf/dynamic-link.h: Adjust after removal of RESOLV.
-	* sysdeps/hppa/dl-lookupcfg.h: Likewise.
-	* sysdeps/ia64/dl-lookupcfg.h: Likewise.
-	* sysdeps/powerpc/powerpc64/dl-lookupcfg.h: Removed.
-	* elf/dl-runtime.c (_dl_fixup): Little cleanup.
-	(_dl_profile_fixup): New parameters to point to register struct and
-	variable for frame size.
-	Add auditing checkpoints.
-	(_dl_call_pltexit): New function.
-	Don't define trampoline code here.
-	* elf/rtld.c: Recognize LD_AUDIT.  Load modules on startup.
-	Remove all the functions from _rtld_global_ro which only _dl_open
-	and _dl_close needed.
-	Add auditing checkpoints.
-	* elf/link.h: Define symbols for auditing interfaces.
-	* include/link.h: Likewise.
-	* include/dlfcn.h: Define __RTLD_AUDIT.
-	Remove prototypes for _dl_open and _dl_close.
-	Adjust access to argc and argv in libdl.
-	* dlfcn/dlfcn.c: New file.
-	* sysdeps/generic/dl-lookupcfg.h: Remove all content now that RESOLVE
-	is gone.
-	* sysdeps/generic/ldsodefs.h: Add definitions for auditing interfaces.
-	* sysdeps/generic/unsecvars.h: Add LD_AUDIT.
-	* sysdeps/i386/dl-machine.h: Remove trampoline code here.
-	Adjust for removal of RESOLVE.
-	* sysdeps/x86_64/dl-machine.h: Likewise.
-	* sysdeps/generic/dl-trampoline.c: New file.
-	* sysdeps/i386/dl-trampoline.c: New file.
-	* sysdeps/x86_64/dl-trampoline.c: New file.
-
-	* sysdeps/generic/dl-tls.c: Cleanups.  Fixup for dtv_t change.
-	Fix updating of DTV.
-	* sysdeps/generic/libc-tls.c: Likewise.
-
-	* sysdeps/arm/bits/link.h: Renamed to ...
-	* sysdeps/arm/bits/linkmap.h: ...this.
-	* sysdeps/generic/bits/link.h: Renamed to...
-	* sysdeps/generic/bits/linkmap.h: ...this.
-	* sysdeps/hppa/bits/link.h: Renamed to...
-	* sysdeps/hppa/bits/linkmap.h: ...this.
-	* sysdeps/i386/bits/link.h: Renamed to...
-	* sysdeps/i386/bits/linkmap.h: ...this.
-	* sysdeps/ia64/bits/link.h: Renamed to...
-	* sysdeps/ia64/bits/linkmap.h: ...this.
-	* sysdeps/s390/bits/link.h: Renamed to...
-	* sysdeps/s390/bits/linkmap.h: ...this.
-	* sysdeps/sh/bits/link.h: Renamed to...
-	* sysdeps/sh/bits/linkmap.h: ...this.
-	* sysdeps/x86_64/bits/link.h: Renamed to...
-	* sysdeps/x86_64/bits/linkmap.h: ...this.
-
-	* posix/unistd.h: Declare ftruncate for POSIX 2003.  [BZ #640]
-
-2004-12-22  Steven Munroe  <sjmunroe@us.ibm.com>
-
-	* math/libm-test.inc (rint_test_tonearest): New test.
-	(rint_test_towardzero): New test.
-	(rint_test_downward): New test.
-	(rint_test_upward): New test.
-	* sysdeps/powerpc/powerpc32/fpu/s_ceil.S: Fix -0.0 case.
-	Remove redundant const values.
-	* sysdeps/powerpc/powerpc32/fpu/s_ceilf.S: Fix -0.0 case.
-	Remove redundant const values.  Use float const.
-	* sysdeps/powerpc/powerpc32/fpu/s_floor.S: Fix -0.0 case.
-	* sysdeps/powerpc/powerpc32/fpu/s_floorf.S: Fix -0.0 case.
-	Use float const.
-	* sysdeps/powerpc/powerpc32/fpu/s_rint.S: Fix -0.0 case.
-	* sysdeps/powerpc/powerpc32/fpu/s_rintf.S: Fix -0.0 case.
-	Use float const.
-	* sysdeps/powerpc/powerpc32/fpu/s_round.S: Fix -0.0 case.
-	Remove redundant const values.
-	* sysdeps/powerpc/powerpc32/fpu/s_roundf.S: Fix -0.0 case.
-	Remove redundant const values.  Use float const.
-	* sysdeps/powerpc/powerpc32/fpu/s_trunc.S: Fix -0.0 case.
-	Remove redundant const values.
-	* sysdeps/powerpc/powerpc32/fpu/s_truncf.S: Fix -0.0 case.
-	Remove redundant const values.  Use float const.
-	* sysdeps/powerpc/powerpc64/fpu/s_ceil.S: Use EALIGN for Quadword
-	alignment.  Fix -0.0 case.  Remove redundant const values.
-	* sysdeps/powerpc/powerpc64/fpu/s_ceilf.S: Use EALIGN for Quadword
-	alignment.  Fix -0.0 case.  Remove redundant const values.
-	Use float const.
-	* sysdeps/powerpc/powerpc64/fpu/s_floor.S: Use EALIGN for Quadword
-	alignment.  Fix -0.0 case.
-	* sysdeps/powerpc/powerpc64/fpu/s_floorf.S: Use EALIGN for Quadword
-	alignment.  Fix -0.0 case.  Use float const.
-	* sysdeps/powerpc/powerpc64/fpu/s_rint.S: Use EALIGN for Quadword
-	alignment.  Fix -0.0 case.
-	* sysdeps/powerpc/powerpc64/fpu/s_rintf.S: Use EALIGN for Quadword
-	alignment.  Fix -0.0 case.  Use float const.
-	* sysdeps/powerpc/powerpc64/fpu/s_round.S: Use EALIGN for Quadword
-	alignment.  Fix -0.0 case.  Remove redundant const values.
-	* sysdeps/powerpc/powerpc64/fpu/s_roundf.S: Use EALIGN for Quadword
-	alignment.  Fix -0.0 case.  Remove redundant const values.
-	Use float const.
-	* sysdeps/powerpc/powerpc64/fpu/s_trunc.S: Use EALIGN for Quadword
-	alignment.  Fix -0.0 case.
-	* sysdeps/powerpc/powerpc64/fpu/s_truncf.S: Use EALIGN for Quadword
-	alignment.  Fix -0.0 case.  Remove redundant const values.
-	Use float const.
-
-2005-01-06  Ulrich Drepper  <drepper@redhat.com>
-
-	* misc/sys/cdefs.h: Define __wur.
-	* libio/stdio.h: Use __wur for a number of interfaces.
-	* posix/unistd.h: Likewise.
-	* stdlib/stdlib.h: Likewise.
-
-2004-12-29  Jakub Jelinek  <jakub@redhat.com>
-
-	* sysdeps/ia64/fpu/libm_support.h (__libm_error_support): Use
-	libc_hidden_proto instead of HIDDEN_PROTO.
-	* sysdeps/ia64/fpu/libm-symbols.h (HIDDEN_PROTO): Remove.
-	(__libm_error_support): If ASSEMBLER and in libc, define to
-	HIDDEN_JUMPTARGET(__libm_error_support).
-
-2004-12-28  David Mosberger  <davidm@hpl.hp.com>
-
-	* sysdeps/ia64/fpu/Makefile (duplicated-routines): New macro.
-	(sysdep_routines): Replace libm_ldexp{,f,l} and libm_scalbn{,f,l}
-	with $(duplicated-routines).
-	(libm-sysdep_routines): Likewise, but substitute "s_" prefix for
-	"m_" prefix.
-
-2004-12-27  David Mosberger  <davidm@hpl.hp.com>
-
-	* sysdeps/ia64/fpu/libm-symbols.h: Add include of <sysdep.h> and
-	undefine "ret" macro.  Add __libm_error_support hidden definitions.
-
-	* sysdeps/ia64/fpu/e_lgamma_r.c: Remove CVS-id comment.  Add
-	missing portion of copyright statement.
-	* sysdeps/ia64/fpu/e_lgammaf_r.c: Likewise.
-	* sysdeps/ia64/fpu/e_lgammal_r.c: Likewise.
-
-	* sysdeps/ia64/fpu/w_lgamma.c: Remove CVS-id comment.  Add
-	missing portion of copyright statement.
-	(__ieee754_lgamma): Rename from lgamma().  Make lgamma() a weak alias.
-	(__ieee754_gamma): Likewise.
-	* sysdeps/ia64/fpu/w_lgammaf.c: Likewise.
-	* sysdeps/ia64/fpu/w_lgammal.c: Likewise.
-
-2004-12-09  H. J. Lu <hjl@lucon.org>
-
-	* sysdeps/ia64/fpu/s_nextafterl.c: Remove.
-	* sysdeps/ia64/fpu/s_nexttoward.c: Likewise.
-	* sysdeps/ia64/fpu/s_nexttowardf.c: Likewise.
-	* sysdeps/ia64/fpu/e_atan2l.S: Remove (duplicate of e_atan2l.c).
-	* sysdeps/ia64/fpu/e_expl.S: Likewise.
-	* sysdeps/ia64/fpu/e_logl.c: Remove (conflicts with e_logl.S).
-
-2004-11-18  David Mosberger  <davidm@hpl.hp.com>
-
-	* sysdeps/ia64/fpu/README: New file.
-	* sysdeps/ia64/fpu/gen_import_file_list: New file.
-	* sysdeps/ia64/fpu/import_check: Likewise.
-	* sysdeps/ia64/fpu/import_diffs: Likewise.
-	* sysdeps/ia64/fpu/import_file.awk: Likewise.
-	* sysdeps/ia64/fpu/import_intel_libm: Likewise.
-	* sysdeps/ia64/fpu/libm-symbols.h: Likewise.
-
-	* sysdeps/ia64/fpu/e_acos.S: Update from Intel libm v2.1+.
-	* sysdeps/ia64/fpu/e_acosf.S: Likewise.
-	* sysdeps/ia64/fpu/e_acosl.S: Likewise.
-	* sysdeps/ia64/fpu/e_asin.S: Likewise.
-	* sysdeps/ia64/fpu/e_asinf.S: Likewise.
-	* sysdeps/ia64/fpu/e_asinl.S: Likewise.
-	* sysdeps/ia64/fpu/e_atan2.S: Likewise.
-	* sysdeps/ia64/fpu/e_atan2f.S: Likewise.
-	* sysdeps/ia64/fpu/e_cosh.S: Likewise.
-	* sysdeps/ia64/fpu/e_coshf.S: Likewise.
-	* sysdeps/ia64/fpu/e_coshl.S: Likewise.
-	* sysdeps/ia64/fpu/e_exp.S: Likewise.
-	* sysdeps/ia64/fpu/e_expf.S: Likewise.
-	* sysdeps/ia64/fpu/e_fmod.S: Likewise.
-	* sysdeps/ia64/fpu/e_fmodf.S: Likewise.
-	* sysdeps/ia64/fpu/e_fmodl.S: Likewise.
-	* sysdeps/ia64/fpu/e_hypot.S: Likewise.
-	* sysdeps/ia64/fpu/e_hypotf.S: Likewise.
-	* sysdeps/ia64/fpu/e_hypotl.S: Likewise.
-	* sysdeps/ia64/fpu/e_log.S: Likewise.
-	* sysdeps/ia64/fpu/e_log2.S: Likewise.
-	* sysdeps/ia64/fpu/e_log2f.S: Likewise.
-	* sysdeps/ia64/fpu/e_log2l.S: Likewise.
-	* sysdeps/ia64/fpu/e_logf.S: Likewise.
-	* sysdeps/ia64/fpu/e_pow.S: Likewise.
-	* sysdeps/ia64/fpu/e_powf.S: Likewise.
-	* sysdeps/ia64/fpu/e_powl.S: Likewise.
-	* sysdeps/ia64/fpu/e_remainder.S: Likewise.
-	* sysdeps/ia64/fpu/e_remainderf.S: Likewise.
-	* sysdeps/ia64/fpu/e_remainderl.S: Likewise.
-	* sysdeps/ia64/fpu/e_scalb.S: Likewise.
-	* sysdeps/ia64/fpu/e_scalbf.S: Likewise.
-	* sysdeps/ia64/fpu/e_scalbl.S: Likewise.
-	* sysdeps/ia64/fpu/e_sinh.S: Likewise.
-	* sysdeps/ia64/fpu/e_sinhf.S: Likewise.
-	* sysdeps/ia64/fpu/e_sinhl.S: Likewise.
-	* sysdeps/ia64/fpu/e_sqrt.S: Likewise.
-	* sysdeps/ia64/fpu/e_sqrtf.S: Likewise.
-	* sysdeps/ia64/fpu/e_sqrtl.S: Likewise.
-	* sysdeps/ia64/fpu/libm_error.c: Likewise.
-	* sysdeps/ia64/fpu/libm_reduce.c: Likewise.
-	* sysdeps/ia64/fpu/libm_support.h: Likewise.
-	* sysdeps/ia64/fpu/s_atan.S: Likewise.
-	* sysdeps/ia64/fpu/s_atanf.S: Likewise.
-	* sysdeps/ia64/fpu/s_atanl.S: Likewise.
-	* sysdeps/ia64/fpu/s_cbrt.S: Likewise.
-	* sysdeps/ia64/fpu/s_cbrtf.S: Likewise.
-	* sysdeps/ia64/fpu/s_cbrtl.S: Likewise.
-	* sysdeps/ia64/fpu/s_ceil.S: Likewise.
-	* sysdeps/ia64/fpu/s_ceilf.S: Likewise.
-	* sysdeps/ia64/fpu/s_ceill.S: Likewise.
-	* sysdeps/ia64/fpu/s_cos.S: Likewise.
-	* sysdeps/ia64/fpu/s_cosf.S: Likewise.
-	* sysdeps/ia64/fpu/s_cosl.S: Likewise.
-	* sysdeps/ia64/fpu/s_expm1.S: Likewise.
-	* sysdeps/ia64/fpu/s_expm1f.S: Likewise.
-	* sysdeps/ia64/fpu/s_expm1l.S: Likewise.
-	* sysdeps/ia64/fpu/s_fabs.S: Likewise.
-	* sysdeps/ia64/fpu/s_fabsf.S: Likewise.
-	* sysdeps/ia64/fpu/s_fabsl.S: Likewise.
-	* sysdeps/ia64/fpu/s_floor.S: Likewise.
-	* sysdeps/ia64/fpu/s_floorf.S: Likewise.
-	* sysdeps/ia64/fpu/s_floorl.S: Likewise.
-	* sysdeps/ia64/fpu/s_frexp.c: Likewise.
-	* sysdeps/ia64/fpu/s_frexpf.c: Likewise.
-	* sysdeps/ia64/fpu/s_frexpl.c: Likewise.
-	* sysdeps/ia64/fpu/s_ilogb.S: Likewise.
-	* sysdeps/ia64/fpu/s_ilogbf.S: Likewise.
-	* sysdeps/ia64/fpu/s_ilogbl.S: Likewise.
-	* sysdeps/ia64/fpu/s_log1p.S: Likewise.
-	* sysdeps/ia64/fpu/s_log1pf.S: Likewise.
-	* sysdeps/ia64/fpu/s_log1pl.S: Likewise.
-	* sysdeps/ia64/fpu/s_logb.S: Likewise.
-	* sysdeps/ia64/fpu/s_logbf.S: Likewise.
-	* sysdeps/ia64/fpu/s_logbl.S: Likewise.
-	* sysdeps/ia64/fpu/s_modf.S: Likewise.
-	* sysdeps/ia64/fpu/s_modff.S: Likewise.
-	* sysdeps/ia64/fpu/s_modfl.S: Likewise.
-	* sysdeps/ia64/fpu/s_nearbyint.S: Likewise.
-	* sysdeps/ia64/fpu/s_nearbyintf.S: Likewise.
-	* sysdeps/ia64/fpu/s_nearbyintl.S: Likewise.
-	* sysdeps/ia64/fpu/s_rint.S: Likewise.
-	* sysdeps/ia64/fpu/s_rintf.S: Likewise.
-	* sysdeps/ia64/fpu/s_rintl.S: Likewise.
-	* sysdeps/ia64/fpu/s_round.S: Likewise.
-	* sysdeps/ia64/fpu/s_roundf.S: Likewise.
-	* sysdeps/ia64/fpu/s_roundl.S: Likewise.
-	* sysdeps/ia64/fpu/s_significand.S: Likewise.
-	* sysdeps/ia64/fpu/s_significandf.S: Likewise.
-	* sysdeps/ia64/fpu/s_significandl.S: Likewise.
-	* sysdeps/ia64/fpu/s_tan.S: Likewise.
-	* sysdeps/ia64/fpu/s_tanf.S: Likewise.
-	* sysdeps/ia64/fpu/s_tanl.S: Likewise.
-	* sysdeps/ia64/fpu/s_trunc.S: Likewise.
-	* sysdeps/ia64/fpu/s_truncf.S: Likewise.
-	* sysdeps/ia64/fpu/s_truncl.S: Likewise.
-
-	* sysdeps/ia64/fpu/e_acosh.S: New file from Intel libm v2.1+.
-	* sysdeps/ia64/fpu/e_acoshf.S: Likewise.
-	* sysdeps/ia64/fpu/e_acoshl.S: Likewise.
-	* sysdeps/ia64/fpu/e_atanh.S: Likewise.
-	* sysdeps/ia64/fpu/e_atanhf.S: Likewise.
-	* sysdeps/ia64/fpu/e_atanhl.S: Likewise.
-	* sysdeps/ia64/fpu/e_exp10.S: Likewise.
-	* sysdeps/ia64/fpu/e_exp10f.S: Likewise.
-	* sysdeps/ia64/fpu/e_exp10l.S: Likewise.
-	* sysdeps/ia64/fpu/e_exp2.S: Likewise.
-	* sysdeps/ia64/fpu/e_exp2f.S: Likewise.
-	* sysdeps/ia64/fpu/e_exp2l.S: Likewise.
-	* sysdeps/ia64/fpu/e_lgamma_r.S: Likewise.
-	* sysdeps/ia64/fpu/e_lgammaf_r.S: Likewise.
-	* sysdeps/ia64/fpu/e_lgammal_r.S: Likewise.
-	* sysdeps/ia64/fpu/e_logl.S: Likewise.
-	* sysdeps/ia64/fpu/libm_frexp.S: Likewise.
-	* sysdeps/ia64/fpu/libm_frexpf.S: Likewise.
-	* sysdeps/ia64/fpu/libm_frexpl.S: Likewise.
-	* sysdeps/ia64/fpu/s_libm_ldexp.S: Likewise.
-	* sysdeps/ia64/fpu/s_libm_ldexpf.S: Likewise.
-	* sysdeps/ia64/fpu/s_libm_ldexpl.S: Likewise.
-	* sysdeps/ia64/fpu/s_libm_scalbn.S: Likewise.
-	* sysdeps/ia64/fpu/s_libm_scalbnf.S: Likewise.
-	* sysdeps/ia64/fpu/s_libm_scalbnl.S: Likewise.
-	* sysdeps/ia64/fpu/libm_lgamma.S: Likewise.
-	* sysdeps/ia64/fpu/libm_lgammaf.S: Likewise.
-	* sysdeps/ia64/fpu/libm_lgammal.S: Likewise.
-	* sysdeps/ia64/fpu/libm_sincos.S: Likewise.
-	* sysdeps/ia64/fpu/libm_sincos_large.S: Likewise.
-	* sysdeps/ia64/fpu/libm_sincosf.S: Likewise.
-	* sysdeps/ia64/fpu/libm_sincosl.S: Likewise.
-	* sysdeps/ia64/fpu/libm_scalblnf.S: Likewise.
-	* sysdeps/ia64/fpu/s_asinh.S: Likewise.
-	* sysdeps/ia64/fpu/s_asinhf.S: Likewise.
-	* sysdeps/ia64/fpu/s_asinhl.S: Likewise.
-	* sysdeps/ia64/fpu/s_erf.S: Likewise.
-	* sysdeps/ia64/fpu/s_erfc.S: Likewise.
-	* sysdeps/ia64/fpu/s_erfcf.S: Likewise.
-	* sysdeps/ia64/fpu/s_erfcl.S: Likewise.
-	* sysdeps/ia64/fpu/s_erff.S: Likewise.
-	* sysdeps/ia64/fpu/s_erfl.S: Likewise.
-	* sysdeps/ia64/fpu/s_fdim.S: Likewise.
-	* sysdeps/ia64/fpu/s_fdimf.S: Likewise.
-	* sysdeps/ia64/fpu/s_fdiml.S: Likewise.
-	* sysdeps/ia64/fpu/s_fma.S: Likewise.
-	* sysdeps/ia64/fpu/s_fmaf.S: Likewise.
-	* sysdeps/ia64/fpu/s_fmal.S: Likewise.
-	* sysdeps/ia64/fpu/s_fmax.S: Likewise.
-	* sysdeps/ia64/fpu/s_fmaxf.S: Likewise.
-	* sysdeps/ia64/fpu/s_fmaxl.S: Likewise.
-	* sysdeps/ia64/fpu/s_ldexp.c: Likewise.
-	* sysdeps/ia64/fpu/s_ldexpf.c: Likewise.
-	* sysdeps/ia64/fpu/s_ldexpl.c: Likewise.
-	* sysdeps/ia64/fpu/s_nextafter.S: Likewise.
-	* sysdeps/ia64/fpu/s_nextafterf.S: Likewise.
-	* sysdeps/ia64/fpu/s_nextafterl.S: Likewise.
-	* sysdeps/ia64/fpu/s_nexttoward.S: Likewise.
-	* sysdeps/ia64/fpu/s_nexttowardf.S: Likewise.
-	* sysdeps/ia64/fpu/s_nexttowardl.S: Likewise.
-	* sysdeps/ia64/fpu/s_tanh.S: Likewise.
-	* sysdeps/ia64/fpu/s_tanhf.S: Likewise.
-	* sysdeps/ia64/fpu/s_tanhl.S: Likewise.
-	* sysdeps/ia64/fpu/s_scalblnf.c: Likewise.
-	* sysdeps/ia64/fpu/w_lgamma.c: Likewise.
-	* sysdeps/ia64/fpu/w_lgammaf.c: Likewise.
-	* sysdeps/ia64/fpu/w_lgammal.c: Likewise.
-	* sysdeps/ia64/fpu/w_tgamma.S: Likewise.
-	* sysdeps/ia64/fpu/w_tgammaf.S: Likewise.
-	* sysdeps/ia64/fpu/w_tgammal.S: Likewise.
-
-	* sysdeps/ia64/fpu/e_gamma_r.c: New empty dummy-file.
-	* sysdeps/ia64/fpu/e_gammaf_r.c: Likewise.
-	* sysdeps/ia64/fpu/e_gammal_r.c: Likewise.
-	* sysdeps/ia64/fpu/w_acosh.c: Likewise.
-	* sysdeps/ia64/fpu/w_acoshf.c: Likewise.
-	* sysdeps/ia64/fpu/w_acoshl.c: Likewise.
-	* sysdeps/ia64/fpu/w_atanh.c: Likewise.
-	* sysdeps/ia64/fpu/w_atanhf.c: Likewise.
-	* sysdeps/ia64/fpu/w_atanhl.c: Likewise.
-	* sysdeps/ia64/fpu/w_exp10.c: Likewise.
-	* sysdeps/ia64/fpu/w_exp10f.c: Likewise.
-	* sysdeps/ia64/fpu/w_exp10l.c: Likewise.
-	* sysdeps/ia64/fpu/w_exp2.c: Likewise.
-	* sysdeps/ia64/fpu/w_exp2f.c: Likewise.
-	* sysdeps/ia64/fpu/w_exp2l.c: Likewise.
-	* sysdeps/ia64/fpu/w_expl.c: Likewise.
-	* sysdeps/ia64/fpu/e_expl.S: Likewise.
-	* sysdeps/ia64/fpu/w_lgamma_r.c: Likewise.
-	* sysdeps/ia64/fpu/w_lgammaf_r.c: Likewise.
-	* sysdeps/ia64/fpu/w_lgammal_r.c: Likewise.
-	* sysdeps/ia64/fpu/w_log2.c: Likewise.
-	* sysdeps/ia64/fpu/w_log2f.c: Likewise.
-	* sysdeps/ia64/fpu/w_log2l.c: Likewise.
-	* sysdeps/ia64/fpu/w_sinh.c: Likewise.
-	* sysdeps/ia64/fpu/w_sinhf.c: Likewise.
-	* sysdeps/ia64/fpu/w_sinhl.c: Likewise.
-
-	* sysdeps/ia64/fpu/libm_atan2_reg.S: Remove.
-	* sysdeps/ia64/fpu/s_ldexp.S: Likewise.
-	* sysdeps/ia64/fpu/s_ldexpf.S: Likewise.
-	* sysdeps/ia64/fpu/s_ldexpl.S: Likewise.
-	* sysdeps/ia64/fpu/s_scalbn.S: Likewise.
-	* sysdeps/ia64/fpu/s_scalbnf.S: Likewise.
-	* sysdeps/ia64/fpu/s_scalbnl.S: Likewise.
-
-	* sysdeps/ia64/fpu/s_sincos.c: Make it an empty dummy-file.
-	* sysdeps/ia64/fpu/s_sincosf.c: Likewise.
-	* sysdeps/ia64/fpu/s_sincosl.c: Likewise.
-
-	* sysdeps/ia64/fpu/e_atan2l.S: Add "Not needed" comment.
-
-	* sysdeps/ia64/fpu/s_copysign.S: Add __libm_copysign{,f,l}
-	alias for use by libm_error.c
-
-	* sysdeps/ia64/fpu/Makefile (libm-sysdep_routines): Remove
-	libm_atan2_reg, libm_tan, libm_frexp4{f,l}.
-	Mention s_erfc{,f,l}, libm_frexp{,f,l}, libm_ldexp{,f,l},
-	libm_sincos{,f,l}, libm_sincos_large, libm_lgamma{,f,l},
-	libm_scalbn{,f,l}, libm_scalblnf.
-	(sysdep_routines): Remove libm_frexp4{,f,l}.
-	Mention libm_frexp{,f,l}, libm_ldexp{,f,l}, and libm_scalbn{,f,l}.
-	(sysdep-CPPFLAGS): Add -include libm-symbols.h, -D__POSIX__,
-	_D_LIB_VERSIONIMF=_LIB_VERSION, -DSIZE_LONG_INT_64, and
-	-DSIZE_LONG_LONG_INT_64.
-
-2005-01-05  Steven Munroe  <sjmunroe@us.ibm.com>
-
-	* elf/rtld.c (dl_main) [NEED_DL_SYSINFO_DSO]: Insure l_map_end and
-	l_text_end are set for a VDSO with a single PT_LOAD entry.
-
-2005-01-05  Ulrich Drepper  <drepper@redhat.com>
-
-	* libio/iofopncook.c (_IO_cookie_seekoff): Define.  Mark offset as
-	invalid to disable optimizations in fileops which won't work here.
-	(_IO_cookie_jumps): Use it.
-	(_IO_old_cookie_jumps): Likewise.
-	* libio/fmemopen.c (fmemopen_seek): Result must be returned in *P,
-	not the return value.
-	* stdio-common/Makefile (tests): Add tst-fmemopen2.
-	* stdio-common/tst-fmemopen2.c: New file.
-
-	* sysdeps/unix/sysv/linux/bits/waitflags.h: Define __WNOTHREAD.
-
-2005-01-05  Roland McGrath  <roland@redhat.com>
-
-	* configure.in (libc_cv_cpp_asm_debuginfo): Checked moved ...
-	* sysdeps/i386/configure.in: ... here.  New file.
-
-2005-01-03  Ulrich Drepper  <drepper@redhat.com>
-
-	* sysdeps/generic/libc-start.c [SHARED] (__libc_start_main): Don't
-	initialize __environ again.
-	* stdlib/Makefile: Add rules to build and run tst-putenv.
-	* stdlib/tst-putenv.c: New file.
-	* stdlib/tst-putenvmod.c: New file.
-
-	* sysdeps/unix/sysv/linux/init-first.c: Clean file up.  Don't
-	define unnecessary wrappers or aliases of static functions.
-
-2005-01-03  Andreas Jaeger  <aj@suse.de>
-
-	* csu/Makefile (generated): Add start.os and start.ob.
-
-	* elf/Makefile (tests): Change rule for tst-pie1 so that make
-	clean works.
-
-2004-12-29  Roland McGrath  <roland@redhat.com>
-
-	[BZ #626]
-	* sysdeps/unix/alarm.c (alarm): Round return value to nearest rather
-	than always up; when nearest is zero, round up to one.
-
-2004-12-28  Ulrich Drepper  <drepper@redhat.com>
-
-	* po/es.po: Update from translation team.
-
-	* sysdeps/generic/dl-tls.c (__tls_get_addr): Fix typo.
-
-2004-12-27  Ulrich Drepper  <drepper@redhat.com>
-
-	* include/signal.h: Define __sigemptyset.
-
-2004-04-27  Paolo Bonzini  <bonzini@gnu.org>
-
-	* posix/regex_internal.h (struct re_dfastate_t): Make
-	word_trtable a pointer to the 512-item transition table.
-	* posix/regexec.c (build_trtable): Fill in either state->trtable
-	or state->word_trtable.  Return a boolean indicating success.
-	(transit_state): Expect state->trtable to be a 256-item
-	transition table.  Reorganize code to have less tests in
-	the common case, and to save an indentation level.
-	* posix/regex_internal.c (free_state): Free word_trtable.
-
-2004-12-21  Jakub Jelinek  <jakub@redhat.com>
-
-	* sysdeps/unix/sysv/linux/i386/clone.S (__clone): Make sure %esp when
-	calling fn is 16 byte aligned.
-	* sysdeps/i386/tst-stack-align.h: New file.
-
-	* misc/efgcvt_r.c (FLOAT_MIN_10_EXP, FLOAT_MIN_10_NORM): Define.
-	(ecvt_r): Special case denormals.
-	* misc/qefgcvt_r.c (FLOAT_MIN_10_EXP, FLOAT_MIN_10_NORM): Define.
-	* misc/tst-efgcvt.c: Include float.h.
-	(ecvt_tests): Add 2 new tests.
-
-2004-12-20  Roland McGrath  <roland@frob.com>
-
-	* version.h (RELEASE, VERSION): development, 2.3.90
-	* include/features.h (__GLIBC_MINOR__): Now 4.
-
-2004-12-20  Jakub Jelinek  <jakub@redhat.com>,
-	    Jim Gifford  <giffordj@linkline.com>
-
-	[BZ #562]
-	* sysdeps/mips/Makefile (librt-sysdep_routines): Add.
-	* sysdeps/unix/mips/rt-sysdep.S: New file.
-
 2004-12-19  Roland McGrath  <roland@redhat.com>
 
-	* iconv/Makefile (test-iconvconfig): New target.
-	[$(cross-compiling) != yes] (xtests): Depend on it.
-
 	* iconv/iconvconfig.c (nostdlib, output_file, output_file_len):
 	New variables.
 	(options, parse_opt, main): Take new options --nostdlib and
 	--output/-o to set them.  Under --nostdlib, skip GCONV_PATH dirs.
 	(write_output): If output_file is set, write the output there.
 
-2004-12-19  Andreas Jaeger  <aj@suse.de>
+2004-12-19  Andreas Jaeger  <aj@suse.de>NULL
 
 	[BZ #560]
 	* inet/netinet/in.h: Use __interface_addr instead of __interface.
diff --git a/ChangeLog.15 b/ChangeLog.15
new file mode 100644
index 0000000000..5d9b00d42a
--- /dev/null
+++ b/ChangeLog.15
@@ -0,0 +1,5480 @@
+2004-12-20  Jakub Jelinek  <jakub@redhat.com>,
+	    Jim Gifford  <giffordj@linkline.com>
+
+	[BZ #562]
+	* sysdeps/mips/Makefile (librt-sysdep_routines): Add.
+	* sysdeps/unix/mips/rt-sysdep.S: New file.
+
+2004-12-19  Roland McGrath  <roland@redhat.com>
+
+	* iconv/Makefile (test-iconvconfig): New target.
+	[$(cross-compiling) != yes] (xtests): Depend on it.
+
+	* iconv/iconvconfig.c (nostdlib, output_file, output_file_len):
+	New variables.
+	(options, parse_opt, main): Take new options --nostdlib and
+	--output/-o to set them.  Under --nostdlib, skip GCONV_PATH dirs.
+	(write_output): If output_file is set, write the output there.
+
+2004-12-19  Andreas Jaeger  <aj@suse.de>
+
+	[BZ #560]
+	* inet/netinet/in.h: Use __interface_addr instead of __interface.
+
+	[BZ #573]
+	* sunrpc/xcrypt.c (passwd2des_internal): Make it hidden instead of
+	internal linkage.
+
+2004-12-19  Roland McGrath  <roland@frob.com>
+
+	* version.h (VERSION): 2.3.4.
+	* README.template: Various updates.
+	* README: Regenerated.
+	* NEWS: Mention ports.
+	* README-alpha: File removed.
+
+	[BZ #416]
+	* locale/langinfo.h: Comment fixes.
+
+2004-12-17  Ulrich Drepper  <drepper@redhat.com>
+
+	* po/ja.po: Update from translation team.
+
+2004-12-17  Richard Henderson  <rth@redhat.com>
+
+	* sysdeps/unix/sysv/linux/alpha/clone.S (__clone): Add support
+	for NPTL where the PID is stored at userlevel and needs to be
+	reset when CLONE_THREAD is not used.
+
+2004-12-17  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/sparc/sparc64/fpu/libm-test-ulps: Update.
+
+2004-12-17  Andreas Jaeger  <aj@suse.de>
+
+	* math/libm-test.inc (atan2_test): Compute value with 36 digits.
+	* sysdeps/alpha/fpu/libm-test-ulps: Adjust for changed result.
+	* sysdeps/powerpc/fpu/libm-test-ulps: Likewise.
+	* sysdeps/s390/fpu/libm-test-ulps: Likewise.
+	* sysdeps/sparc/sparc32/fpu/libm-test-ulps: Likewise.
+	* sysdeps/sparc/sparc64/fpu/libm-test-ulps: Likewise.
+	* sysdeps/x86_64/fpu/libm-test-ulps: Likewise.
+
+2004-12-16  Ulrich Drepper  <drepper@redhat.com>
+
+	* stdlib/tst-setcontext.c: Enlarge st1 and st2 arrays.
+
+2004-09-02  Steven Munroe  <sjmunroe@us.ibm.com>
+
+	[BZ #610]
+	* sysdeps/unix/sysv/linux/powerpc/powerpc64/getcontext.S
+	(__novec_getcontext): Fix typo in store of fp29.
+	(__getcontext): Fix typo in store of fp29.
+	* sysdeps/unix/sysv/linux/powerpc/powerpc64/swapcontext.S
+	(__novec_swapcontext): Fix typo in store of fp29.
+	(__swapcontext): Fix typo in store of fp29.
+
+2004-12-17  GOTO Masanori  <gotom@debian.or.jp>
+
+	* sysdeps/unix/sysv/linux/dl-osinfo.h (DL_SYSDEP_OSCHECK): Fix
+	vague message.
+
+2004-12-16  Roland McGrath  <roland@redhat.com>
+
+	* nscd/Makefile ($(objpfx)nscd): Don't depend on $(selinux-LIBS),
+	which is usually a -lselinux that make will resolve wrongly.
+
+2004-12-16  Andreas Jaeger  <aj@suse.de>
+
+	* math/math.h: Use #if defined to not receive warnings about
+	undefined symbols.
+
+2004-12-16  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/unix/sysv/linux/sparc/sparc32/clone.S (__clone): Add support
+	for NPTL where the PID is stored at userlevel and needs to be reset
+	when CLONE_THREAD is not used.
+
+	* sysdeps/unix/sysv/linux/sparc/sparc64/sysdep.h
+	(SYSCALL_ERROR_HANDLER): If RTLD_PRIVATE_ERRNO, use rtld_errno
+	instead of errno.
+	* sysdeps/unix/sysv/linux/sparc/sparc64/socket.S: Include
+	sysdep-cancel.h instead of sysdep.h.  Handle cancellation.
+	* sysdeps/sparc/sparc64/fpu/libm-test-ulps: Regenerate.
+
+	* sysdeps/ieee754/ldbl-128/e_expl.c: Include stdlib.h.
+
+2004-12-15  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/unix/sysv/linux/gethostid.c: Make bi-arch safe.
+
+2004-12-15  Steven Munroe  <sjmunroe@us.ibm.com>
+
+	* sysdeps/unix/sysv/linux/powerpc/powerpc32/clone.S: Correct stack
+	alignment.  Clean up flag bit tests.
+	* sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S: Correct stack
+	alignment.  Clean up flag bit tests.  Remove redundent SP assignment.
+	Add TOC register save/restore around function call.
+
+	* sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S: Make no_vmx symbol
+	local.
+	* sysdeps/powerpc/powerpc32/fpu/setjmp-common.S: Make no_vmx symbol
+	local.
+	* sysdeps/powerpc/powerpc64/__longjmp-common.S: Make no_vmx symbol
+	local.
+	* sysdeps/powerpc/powerpc64/setjmp-common.S: Make no_vmx and
+	aligned_save_vmx symbol local.
+
+2004-12-15  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/i386/i486/bits/string.h (__strncat_g): Fix i686
+	implementation.
+
+2004-09-08  H.J. Lu  <hongjiu.lu@intel.com>
+
+	* Makeconfig (libunwind): New.
+	(libgcc_eh): Add $(libunwind).
+	(gnulib): Always set to -lgcc $(libgcc_eh).
+	(static-gnulib): Always set to -lgcc -lgcc_eh $(libunwind).
+	(libc.so-gnulib): New.
+	* Makerules (LDLIBS-c.so): Use $(libc.so-gnulib) instead of
+	$(static-gnulib).
+	* configure.in (libc_cv_cc_with_libunwind): Set to yes if gcc
+	uses -lunwind for static binaries.
+
+2004-06-05  Joseph S. Myers  <jsm@polyomino.org.uk>
+
+	* malloc/Makefile (install-bin): Remove memusage.
+	(install-bin-script): Add memusage.
+
+2004-12-15  Jakub Jelinek  <jakub@redhat.com>
+
+	* nis/nis_domain_of_r.c (nis_domain_of_r): Use libnsl_hidden_def,
+	not libnsl_hidden_proto.
+
+	* sysdeps/unix/sysv/linux/s390/s390-32/clone.S (__clone): Add support
+	for NPTL where the PID is stored at userlevel and needs to be reset
+	when CLONE_THREAD is not used.
+	* sysdeps/unix/sysv/linux/s390/s390-64/clone.S (__clone): Likewise.
+
+	* sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S (__clone): Save
+	and restore r2 around call to fn.
+
+2004-12-15  Andreas Jaeger  <aj@suse.de>
+
+	* sysdeps/ia64/dl-machine.h (elf_machine_rela): Mark auto instead
+	of static, add always_inline attribute.
+	(elf_machine_rela_relative): Likewise.
+	(elf_machine_lazy_rel): Likewise.
+
+2004-12-15  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/unix/sysv/linux/powerpc/powerpc32/clone.S: Add support
+	for pid caching in nptl.
+
+2004-12-14  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S: Add support
+	for pid caching in nptl.
+
+2004-10-18  Maciej W. Rozycki  <macro@mips.com>
+
+	* sysdeps/unix/sysv/linux/mips/bits/socket.h (__cmsg_nxthdr): Use
+	__NTH instead of __THROW in the inline definition.
+	* sysdeps/unix/sysv/linux/mips/sys/tas.h (_test_and_set): Likewise.
+
+	* sysdeps/mips/bits/dlfcn.h (RTLD_DEEPBIND): New macro.
+
+	* sysdeps/unix/sysv/linux/mips/bits/mman.h
+	(PROT_GROWSDOWN, PROT_GROWSUP): New macros.
+
+2004-10-06  Alan Modra  <amodra@bigpond.net.au>
+
+	* sysdeps/powerpc/powerpc64/ppc-mcount.S (PROF): Don't undef.
+	* sysdeps/unix/sysv/linux/powerpc/powerpc64/brk.S: Invoke CALL_MOUNT.
+	* sysdeps/unix/sysv/linux/powerpc/powerpc64/clone.S: Likewise.
+	* sysdeps/unix/sysv/linux/powerpc/powerpc64/getcontext.S: Likewise.
+	* sysdeps/unix/sysv/linux/powerpc/powerpc64/makecontext.S: Likewise.
+	* sysdeps/unix/sysv/linux/powerpc/powerpc64/setcontext.S: Likewise.
+	* sysdeps/unix/sysv/linux/powerpc/powerpc64/socket.S: Likewise.
+	* sysdeps/unix/sysv/linux/powerpc/powerpc64/swapcontext.S: Likewise.
+	* sysdeps/unix/sysv/linux/powerpc/powerpc64/vfork.S: Likewise.
+
+2004-10-19  Wolfram Gloger  <wg@malloc.de>
+
+	* malloc/hooks.c (mem2chunk_check, top_check): Handle
+	non-contiguous arena.  Reported by Michael Dalton
+	<mwdalton@stanford.edu> [BZ #457].  Add further checks for top chunk.
+
+2004-12-14  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/posix/sysconf.c (__sysconf_check_spec): Remove leading
+	underscore from GETCONF_DIR filenames.
+
+2004-12-13  Ulrich Drepper  <drepper@redhat.com>
+
+	* po/de.po: Update from translation team.
+
+	* nss/getnssent.c (__nss_getent): Double buffer size each round to
+	avoid problems with delays for some people's huge entries.
+	* nss/getXXbyYY.c (FUNCTION_NAME): Likewise.
+
+2004-12-13  Jakub Jelinek  <jakub@redhat.com>
+
+	* posix/getconf.c (main): Prepend just $GETCONF_DIR/ instead of
+	$GETCONF_DIR/_ to spec.
+	* posix/confstr.c (confstr): Remove leading underscores for
+	_CS_POSIX_V6_WIDTH_RESTRICTED_ENVS.
+
+2004-12-12  Ulrich Drepper  <drepper@redhat.com>
+
+	* elf/dl-load.c (_dl_map_object_from_fd): Fix computation of
+	mapping start.  It must take the actual pagesize into account, not
+	the alignment in the file.
+
+2004-12-11  Ulrich Drepper  <drepper@redhat.com>
+
+	* malloc/malloc.c (_int_realloc): Add checks for corrupted memory.
+	(_int_free): Make clear message are result of free() calls.
+
+	* malloc/malloc.c (_int_realloc): Remove unnecessary tests for
+	oldmem and size == 0.
+
+2004-12-10  Ulrich Drepper  <drepper@redhat.com>
+
+	* malloc/arena.c (arena_get2): Prevent endless loop if arenas and
+	list lock are taken.
+
+2004-12-08  Thorsten Kukuk  <kukuk@suse.de>
+
+	* nis/nss_nisplus/nisplus-netgrp.c (_nss_nisplus_getnetgrent_r):
+	Add check if the value is not an empty string.  [BZ #597]
+
+2004-11-29  Jakub Jelinek  <jakub@redhat.com>
+
+	* stdlib/strtod_l.c (INTERNAL (__STRTOF)): If densize > 2
+	and numsize < densize, always shift num up by empty + 1 limbs.
+
+2004-12-07  Paolo Bonzini  <bonzini@gnu.org>
+
+	* posix/regexec.c (proceed_next_node): Simplify treatment of epsilon
+	nodes.  Pass the pushed node to push_fail_stack.
+	(push_fail_stack): Accept a single node rather than an array
+	of two epsilon destinations.
+	(build_sifted_states): Only walk non-epsilon nodes.
+	(check_arrival): Don't pass epsilon nodes to
+	check_arrival_add_next_nodes.
+	(check_arrival_add_next_nodes) [DEBUG]: Abort if an epsilon node is
+	found.
+	(check_node_accept): Do expensive checks later.
+	(add_epsilon_src_nodes): Cache result of merging the inveclosures.
+	* posix/regex_internal.h (re_dfastate_t): Add non_eps_nodes and
+	inveclosure.
+	(re_string_elem_size_at, re_string_char_size_at, re_string_wchar_at,
+	re_string_context_at, re_string_peek_byte_case,
+	re_string_fetch_byte_case, re_node_set_compare, re_node_set_contains):
+	Declare as pure.
+	* posix/regex_internal.c (create_newstate_common): Remove.
+	(register_state): Move part of it here.  Initialize non_eps_nodes.
+	(free_state): Free inveclosure and non_eps_nodes.
+	(create_cd_newstate, create_ci_newstate): Allocate the new
+	re_dfastate_t here.
+
+2004-12-09  Ulrich Drepper  <drepper@redhat.com>
+
+	* malloc/malloc.c (public_rEALLOc): Add parameter checks.
+	(_int_free): Provide better error message for invalid pointers.
+
+2004-12-01  Jakub Jelinek  <jakub@redhat.com>
+
+	* posix/tst-regex.c: Use defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0
+	conditionals instead of defined _POSIX_CPUTIME.
+	(main): If _POSIX_CPUTIME == 0, call sysconf to see if CPUTIME
+	option is available.
+	* posix/tst-regex.c2: Use defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0
+	conditionals instead of defined _POSIX_CPUTIME.
+	(do_test): If _POSIX_CPUTIME == 0, call sysconf to see if CPUTIME
+	option is available.
+	* sysdeps/posix/sysconf.c (__sysconf): If _POSIX_CPUTIME resp.
+	_POSIX_THREAD_CPUTIME is defined to 0, return -1 for the corresponding
+	_SC_ argument.
+
+2004-12-08  Jakub Jelinek  <jakub@redhat.com>
+
+	* elf/ldd.bash.in: When set -o pipefail is available, use that for
+	piping to cat; when not, don't use the pipe at all.
+	Pipe to cat in all cases of running the executable.
+	When direct running exits with code 5, retry running via ${RTLD}.
+	* elf/rtld.c (process_envvars): If __libc_enable_secure and
+	mode != normal, exit with exitcode 5.
+
+2004-12-07  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/posix/sysconf.c (__sysconf_check_spec): Only define
+	if it will be actually used.
+
+2004-12-07  Roland McGrath  <roland@redhat.com>
+
+	* rt/tst-timer5.c (setup_test): New function.
+	(TEST_CLOCK_MISSING): Use it to punt test if timer_create does not
+	support CLOCK_MONOTONIC.
+
+2004-12-07  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/unix/sysv/linux/ia64/clone2.S (__clone2): Add support for
+	NPTL where the PID is stored at userlevel and needs to be reset when
+	CLONE_THREAD is not used.  Restore gp before calling _exit.
+
+2004-12-07  Kaz Kojima  <kkojima@rr.iij4u.or.jp>
+
+	* sysdeps/unix/sysv/linux/sh/clone.S: Clear the frame pointer when
+	starting a new thread.  Add support for NPTL where the PID is stored
+	at userlevel and needs to be reset when CLONE_THREAD is not used.
+
+2004-12-01  Jakub Jelinek  <jakub@redhat.com>
+
+	* elf/rtld.c (process_envvars): Don't consider LD_SHOW_AUXV
+	and LD_DYNAMIC_WEAK if __libc_enable_secure.
+	If __libc_enable_secure, /etc/suid-debug doesn't exist and
+	program will be actually run, turn off all debugging.
+	* sysdeps/generic/unsecvars.h (UNSECURE_ENVVARS): Add LD_DEBUG,
+	LD_DYNAMIC_WEAK and LD_SHOW_AUXV.
+
+2004-12-06  Jakub Jelinek  <jakub@redhat.com>
+
+	* time/tzset.c (tzset_internal): If + or - is seen,
+	but no offset after it, reset offset to 0.  [BZ #601]
+
+2004-12-06  Ulrich Drepper  <drepper@redhat.com>
+
+	* libio/ioseekpos.c (_IO_seekpos_unlocked): Call _IO_SEEKOFF not
+	_IO_SEEKPOS, saving one indirect jump.
+
+	* libio/fileops.c (_IO_new_file_seekoff): Fix optimization of in-buffer
+	seek.  Remove dead code.
+
+2004-12-02  Jakub Jelinek  <jakub@redhat.com>
+
+	* libio/Makefile (tests): Add bug-ungetc4.
+	* libio/bug-ungetc4.c: New test.
+
+2004-12-06  Roland McGrath  <roland@redhat.com>
+
+	* sysdeps/unix/clock_nanosleep.c (clock_nanosleep): Diagnose EINVAL
+	for CLOCK_THREAD_CPUTIME_ID, not ENOTSUP.
+	Use SYSDEP_NANOSLEEP handler before validating CLOCK_ID value.
+
+	* rt/tst-timer4.c (TEST_CLOCK, TEST_CLOCK_NANOSLEEP): New macros.
+	Use them throughout in place of CLOCK_REALTIME and nanosleep.
+	(do_test) [TEST_CLOCK_MISSING]: Call this macro and if it returns
+	non-null, punt the test with a message using the string returned.
+	* rt/tst-timer5.c: New file.
+	* rt/Makefile (tests): Add it.
+
+2004-12-01  Paolo Bonzini  <bonzini@gnu.org>
+
+	* posix/regcomp.c (free_dfa_content, init_dfa): Remove
+	references to re_dfa_t's subexps field.
+	(parse_sub_exp, parse_expression): Do not use it.  Use
+	completed_bkref_map instead.
+	(create_initial_state, peek_token): Store a backreference \N
+	with opr.idx = N-1.
+	* posix/regexec.c (proceed_next_node, check_dst_limits, get_subexp):
+	Likewise.
+	(check_subexp_limits): Remove useless condition.
+	* posix/regex_internal.h (re_subexp_t): Remove.
+	(re_dfa_t): Remove subexps and subexps_alloc field, add
+	completed_bkref_map.
+
+2004-12-05  Roland McGrath  <roland@frob.com>
+
+	* Makeconfig: Comment typo fix.
+
+2004-11-30  Andreas Schwab  <schwab@suse.de>
+
+	* nis/ypclnt.c (ypprot_err): Remove unused entries.
+
+2004-11-30  Paolo Bonzini  <bonzini@gnu.org>
+
+	* posix/regexec.c (check_dst_limits_calc_pos_1): Check for
+	bkref_idx == -1, and don't recurse in that case.
+
+2004-11-30  GOTO Masanori  <gotom@debian.or.jp>
+
+	* posix/confstr.c: Enclose #error message with double quote
+	for gcc-3.3 and lower.
+
+2004-12-04  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/unix/sysv/linux/i386/clone.S: Add support for NPTL where
+	the PID is stored at userlevel and needs to be reset when CLONE_THREAD
+	is not used.
+	* sysdeps/unix/sysv/linux/x86_64/clone.S: Likewise.
+
+2004-11-18  Daniel Jacobowitz  <dan@codesourcery.com>
+
+	* sysdeps/arm/sysdep.h: Define __USE_BX__ if bx is available.
+	Use it instead of __THUMB_INTERWORK__.  Make RETINSTR take
+	only a condition and a register.
+	* sysdeps/arm/dl-machine.h: Use __USE_BX__ instead of
+	__THUMB_INTERWORK__.
+	(_dl_start_user): Use BX.
+	* sysdeps/arm/strlen.S: Use DO_RET.
+	* sysdeps/unix/arm/brk.S: Likewise.
+	* sysdeps/unix/arm/fork.S: Likewise.
+	* sysdeps/unix/arm/sysdep.S: Likewise.
+	* sysdeps/unix/arm/sysdep.h: Likewise.
+	* sysdeps/unix/sysv/linux/arm/clone.S: Update uses of RETINSTR.
+	* sysdeps/unix/sysv/linux/arm/mmap.S: Likewise.
+	* sysdeps/unix/sysv/linux/arm/mmap64.S: Likewise.
+	* sysdeps/unix/sysv/linux/arm/socket.S: Likewise.
+	* sysdeps/unix/sysv/linux/arm/sysdep.h: Likewise.
+	* sysdeps/unix/sysv/linux/arm/vfork.S: Likewise.
+
+2004-12-02  Roland McGrath  <roland@redhat.com>
+
+	* extra-lib.mk (object-suffixes-$(lib)): Add .oS when
+	$(lib)-static-only-routines is nonempty.
+	(extra-objs, o-iterator.mk rule): Filter out .oS from generators.
+	Add a special rule for .oS objects -> _nonshared.a library.
+
+2004-12-01  Jakub Jelinek  <jakub@redhat.com>
+
+	* time/mktime.c (__mktime_internal): If SEC_REQUESTED != SEC,
+	convert T2, not T.
+	* time/Makefile (tests): Add tst-mktime3.
+	* time/tst-mktime3.c: New test.
+
+2004-12-01  Jakub Jelinek  <jakub@redhat.com>
+
+	* stdlib/stdlib.h (realpath): Remove nonnull attribute.
+
+	* posix/fnmatch_loop.c (internal_fnmatch): Clear is_seqval after
+	normal_bracket label.
+
+	* time/tst-mktime2.c (bigtime_test): Initialize tm.tm_isdst to -1.
+
+2004-12-01  Roland McGrath  <roland@redhat.com>
+
+	* sysdeps/gnu/Makefile ($(objpfx)errlist-compat.c):
+	Do $(make-target-directory).
+
+2004-11-29  Roland McGrath  <roland@redhat.com>
+
+	* posix/confstr.c: Avoid punctuation in #error text.
+
+2004-11-27  Ulrich Drepper  <drepper@redhat.com>
+
+	* posix/Makefile (tests): Add tst-sysconf.
+	* posix/tst-sysconf.c: New file.
+
+	* posix/getconf.c (vars): Add support for _SC_IPV6 and
+	_SC_RAW_SOCKETS.
+	* sysdeps/posix/sysconf.c (__sysconf): Add support for _SC_IPV6
+	and _SC_RAW_SOCKETS.
+	* sysdeps/generic/bits/confname.h: Define _SC_IPV6 and
+	_SC_RAW_SOCKETS.
+
+2004-11-26  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/generic/unsecvars.h (UNSECURE_ENVVARS): Add GETCONF_DIR.
+
+2004-11-26  Kaz Kojima  <kkojima@rr.iij4u.or.jp>
+
+	* sysdeps/unix/sysv/linux/mips/pread.c: Include sgidefs.h only if
+	NO_SGIDEFS_H isn't defined.  Don't include sgidefs.h twice.
+	* sysdeps/unix/sysv/linux/mips/pwrite.c: Likewise.
+	* sysdeps/unix/sysv/linux/mips/pread64.c: Likewise.
+	* sysdeps/unix/sysv/linux/mips/pwrite64.c: Likewise.
+
+2004-11-27  Ulrich Drepper  <drepper@redhat.com>
+
+	* include/dlfcn.h (__libc_dlopen): Add __RTLD_DLOPEN to the mode
+	parameter.  Reported by VY Newsum <newsum@fel.tno.nl>.
+
+2004-11-10  Daniel Jacobowitz  <dan@debian.org>
+
+	* libio/iolibio.h (_IO_fclose, _IO_new_fclose, _IO_old_fclose,
+	_IO_fflush, _IO_fgetpos, _IO_fgetpos64, _IO_fgets, _IO_fopen,
+	_IO_old_fopen, _IO_new_fopen, _IO_fopen64, __fopen_internal,
+	_IO_fprintf, _IO_fputs, _IO_fsetpos, _IO_fsetpos64, _IO_ftell,
+	_IO_fread, _IO_fwrite, _IO_gets, _IO_printf, _IO_puts, _IO_scanf,
+	_IO_fflush_internal, _IO_ftell_internal, _IO_fputs_internal)
+	_IO_fwrite_internal): Remove incorrect __THROW.
+	* libio/libioP.h (_IO_default_xsputn, _IO_wdefault_xsputn,
+	_IO_default_xsgetn, _IO_wdefault_xsgetn, _IO_default_write,
+	_IO_default_read, _IO_getline, _IO_getline_info, _IO_getwline,
+	_IO_getwline_info, _IO_file_read_internal, _IO_sgetn_internal,
+	_IO_wdo_write_internal, _IO_do_write_internal,
+	_IO_getline_info_internal, _IO_getline_internal,
+	_IO_vfprintf_internal, _IO_putc_internal, _IO_read, _IO_write,
+	_IO_close): Likewise.
+
+2004-11-26  Jakub Jelinek  <jakub@redhat.com>
+
+	* posix/Makefile (install-others): Add $(inst_libexecdir)/getconf.
+	(CFLAGS-sysconf.c): Add -D_GETCONF_DIR.
+	(CFLAGS-getconf.c): New.
+	($(inst_libexecdir)/getconf): New.
+	* posix/confstr.c (confstr): Use __sysconf to query specifications
+	that don't have _POSIX_V6_* macros defined.  Use __*_{C,LD}FLAGS
+	macros defined in bits/environments.h.
+	* sysdeps/posix/sysconf.c: Include stdlib.h, string.h and
+	sys/stat.h.
+	(__sysconf_check_spec): New routine.
+	(__sysconf): Use it.
+	* posix/getconf.c (specs): Change into structure array.
+	(main): If -v is not given, try to get default from
+	$(libexecdir)/getconf/default.  If specification is not
+	supported by this getconf, try to execute
+	$(libexecdir)/getconf/$(specification).
+	* sysdeps/unix/bsd/bsd4.4/freebsd/bits/environments.h: New file.
+	* sysdeps/unix/sysv/linux/s390/bits/environments.h: New file.
+	* sysdeps/unix/sysv/linux/powerpc/bits/environments.h: New file.
+	* sysdeps/unix/sysv/linux/sparc/bits/environments.h: New file.
+	* sysdeps/unix/sysv/linux/i386/bits/environments.h: New file.
+	* sysdeps/unix/sysv/linux/x86_64/bits/environments.h: New file.
+	* sysdeps/generic/bits/environments.h (__ILP32_OFFBIG_CFLAGS):
+	Define.
+
+	* stdlib/stdlib.h (setenv): Use nonnull only for second argument.
+	(unsetenv): Remove.
+
+2004-11-23  Paolo Bonzini  <bonzini@gnu.org>
+
+	* posix/regcomp.c (analyze_tree): Always call calc_epsdest.
+	(calc_inveclosure): Use re_node_set_insert_last.
+	(parse_dup_op): Lower X{1,5} to (X(X(X(XX?)?)?)?)?
+	rather than X?X?X?X?X?.
+	* posix/regex_internal.h (re_node_set_insert_last): New declaration.
+	* posix/regex_internal.c (re_node_set_insert_last): New function.
+	* posix/PCRE.tests: Add testcases.
+
+2004-11-25  Ulrich Drepper  <drepper@redhat.com>
+
+	* dlfcn/dlfcn.h: Remove nonnull attribute from dlopen.
+
+	* posix/confstr.c (confstr): Return appropriate strings for
+	_CS_LFS_LINTFLAGS, _CS_LFS64_CFLAGS, and _CS_LFS64_LINTFLAGS.
+
+2004-11-05  Maciej W. Rozycki  <macro@mips.com>
+
+	* sysdeps/mips/dl-machine.h: Include <sgidefs.h>.  Use _ABIO32,
+	_ABIN32 and _ABI64 for ABI selection throughout.
+	* sysdeps/mips/elf/start.S: Likewise.
+	* sysdeps/mips/mips64/__longjmp.c: Likewise.
+	* sysdeps/mips/mips64/bsd-_setjmp.S: Likewise.
+	* sysdeps/mips/mips64/bsd-setjmp.S: Likewise.
+	* sysdeps/mips/mips64/setjmp.S: Likewise.
+	* sysdeps/mips/mips64/setjmp_aux.c: Likewise.
+	* sysdeps/mips/sys/regdef.h: Likewise.
+	* sysdeps/mips/sys/ucontext.h: Likewise.
+	* sysdeps/unix/mips/sysdep.h: Likewise.
+	* sysdeps/unix/sysv/linux/kernel-features.h: Likewise.
+	* sysdeps/unix/sysv/linux/mips/pread.c: Likewise.
+	* sysdeps/unix/sysv/linux/mips/pread64.c: Likewise.
+	* sysdeps/unix/sysv/linux/mips/ptrace.c: Likewise.
+	* sysdeps/unix/sysv/linux/mips/pwrite.c: Likewise.
+	* sysdeps/unix/sysv/linux/mips/pwrite64.c: Likewise.
+	* sysdeps/unix/sysv/linux/mips/sigaction.c: Likewise.
+	* sysdeps/unix/sysv/linux/mips/sys/procfs.h: Likewise.
+	* sysdeps/unix/sysv/linux/mips/sys/ucontext.h: Likewise.
+
+	* sysdeps/mips/atomicity.h: Use _ABIO32, _ABIN32 and _ABI64 for
+	ABI selection throughout.
+	* sysdeps/mips/bits/setjmp.h: Likewise.
+	* sysdeps/mips/fpu/bits/mathdef.h: Likewise.
+	* sysdeps/mips/machine-gmon.h: Likewise.
+	* sysdeps/mips/sys/asm.h: Likewise.
+	* sysdeps/unix/sysv/linux/mips/bits/fcntl.h: Likewise.
+	* sysdeps/unix/sysv/linux/mips/bits/sigcontext.h: Likewise.
+	* sysdeps/unix/sysv/linux/mips/bits/stat.h: Likewise.
+	* sysdeps/unix/sysv/linux/mips/kernel_stat.h: Likewise.
+	* sysdeps/unix/sysv/linux/mips/sigcontextinfo.h: Likewise.
+	* sysdeps/unix/sysv/linux/mips/sys/ptrace.h: Likewise.
+	* sysdeps/unix/sysv/linux/mips/sys/tas.h: Likewise.
+	* sysdeps/unix/sysv/linux/mips/sys/user.h: Likewise.
+
+	* sysdeps/mips/sgidefs.h: Prevent <asm/sgidefs.h> from being
+	included by kernel headers and undo its settings if already
+	included.  Define _ABIO32, _ABIN32 and _ABI64 if missing and use
+	them to define _MIPS_SIM_ABI32, _MIPS_SIM_NABI32 and
+	_MIPS_SIM_ABI64 for compatibility.
+	* sysdeps/unix/sysv/linux/mips/Makefile: Use _ABIO32, _ABIN32 and
+	_ABI64 for ABI selection in generated syscall-list.h
+	* sysdeps/unix/sysv/linux/mips/configure.in: Use _ABIO32, _ABIN32
+	and _ABI64 for ABI selection in generated asm-unistd.h.
+	* sysdeps/unix/sysv/linux/mips/configure: Regenerate.
+
+	* sysdeps/unix/sysv/linux/mips/pwrite.c (__libc_pwrite): Correct
+	an inverted _MIPS_SIM conditional.
+
+2004-11-23  Alexandre Oliva  <aoliva@redhat.com>
+
+	* sysdeps/generic/dl-tls.c (_dl_determine_tlsoffset): Use free
+	range even if it doesn't match exactly.
+
+2004-11-23  Jakub Jelinek  <jakub@redhat.com>
+
+	* nss/nss_files/files-XXX.c (internal_getent): If parse_line returned
+	-1, also do H_ERRNO_SET (NETDB_INTERNAL).
+
+2004-11-22  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/i386/fpu_control.h: Add volatile to the asms.
+	Patch by Alexander Stohr.
+
+2004-11-22  Jakub Jelinek  <jakub@redhat.com>
+
+	* nscd/nscd_getai (__nscd_getai): Avoid memory and file descriptor
+	leaks.
+	* sysdeps/posix/getaddrinfo.c (gaih_inet): Free air.
+
+2004-11-15  Maciej W. Rozycki  <macro@mips.com>
+
+	* sysdeps/unix/sysv/linux/mips/bits/siginfo.h (__SI_MAX_SIZE):
+	Define appropriately based on __WORDSIZE.
+	[struct siginfo] (__pad0): Add for explicit padding.
+
+	* sysdeps/unix/sysv/linux/mips/bits/siginfo.h: Formatting fixes
+	throughout.
+
+2004-11-22  Ulrich Drepper  <drepper@redhat.com>
+
+	* dirent/dirent.h: Add nonnull attributes.
+	* dlfcn/dlfcn.h: Likewise.
+
+2004-11-20  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/ieee754/k_standard.c: Document code 50.
+	(__kernel_standard) <case 50>: Avoid raising div-by-zero
+	exception again.
+
+2004-11-19  H.J. Lu  <hongjiu.lu@intel.com>
+
+	[BZ #552]
+	* math/libm-test.inc (tgamma_test): Update tgamma (0) and
+	tgamma (-0).
+	* sysdeps/generic/w_tgamma.c (__tgamma): Properly handle |x| == 0.
+	* sysdeps/generic/w_tgammaf.c (__tgammaf): Likewise.
+	* sysdeps/generic/w_tgammal.c (__tgammal): Likewise.
+	* sysdeps/ieee754/dbl-64/e_gamma_r.c (__ieee754_gamma_r): Likewise.
+	* sysdeps/ieee754/flt-32/e_gammaf_r.c: Likewise.
+	* sysdeps/ieee754/ldbl-128/e_gammal_r.c: Likewise.
+	* sysdeps/ieee754/ldbl-96/e_gammal_r.c: Likewise.
+	* sysdeps/ieee754/k_standard.c (__kernel_standard): Handle
+	tgamma (0) and tgamma (-0).
+
+2004-11-20  Ulrich Drepper  <drepper@redhat.com>
+
+	* time/tzfile.c (__tzfile_read): Avoid open for checking whether
+	the file we already use changed.
+
+	* misc/syslog.c: Remove !USE_IN_LIBIO code.
+
+2004-11-20  Jakub Jelinek  <jakub@redhat.com>
+
+	* signal/signal.h (__sysv_signal, sysv_signal, signal, bsd_signal,
+	ssignal): Remove __nonnull attribute.
+
+2004-11-20  Kaz Kojima  <kkojima@rr.iij4u.or.jp>
+
+	* sysdeps/unix/sysv/linux/sh/sys/procfs.h: New file.
+
+2004-11-20  Ulrich Drepper  <drepper@redhat.com>
+
+	* signal/signal.h: Add nonnull attributes.
+
+	* signal/signal.h: Add deprecated attributes to sigstack,
+	sigpause, sigblock, sigsetmask, siggetmask.
+
+2004-11-20  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/unix/sysv/linux/bits/socket.h (SCM_RIGHTS): Avoid
+	comma at the end of enum if __USE_BSD is not defined.
+
+2004-11-19  Ulrich Drepper  <drepper@redhat.com>
+
+	* malloc/malloc.c (_int_malloc): Check for corruption of chunk
+	which is about to be returned.
+
+	* malloc/malloc.c (_int_free): Add a few more cheap tests for
+	corruption.
+
+2004-11-17  Randolph Chung  <tausq@debian.org>
+
+	* sysdeps/hppa/dl-machine.h (TRAMPOLINE_TEMPLATE): Add unwind
+	annotations.
+
+2004-11-18  Jakub Jelinek  <jakub@redhat.com>
+
+	[BZ #544]
+	* posix/regex.h (RE_NO_SUB): New define.
+	* posix/regex_internal.h (OP_DELETED_SUBEXP): New.
+	(re_dfa_t): Add subexp_map.
+	* posix/regcomp.c (struct subexp_optimize): New type.
+	(optimize_subexps): New routine.
+	(re_compile_internal): Call it.
+	(re_compile_pattern): Set preg->no_sub to 1 if RE_NO_SUB.
+	(free_dfa_content): Free subexp_map.
+	(calc_inveclosure, calc_eclosure): Skip OP_DELETED_SUBEXP nodes.
+	* posix/regexec.c (re_search_internal): If subexp_map
+	is not NULL, duplicate registers as needed.
+	* posix/Makefile: Add rules to build and run tst-regex2.
+	* posix/tst-regex2.c: New test.
+	* posix/rxspencer/tests: Fix last two tests (\0 -> \1).
+	Add some new tests for nested subexpressions.
+
+2004-11-18  Ulrich Drepper  <drepper@redhat.com>
+
+	* libio/libio.h (_IO_FLAGS2_FORTIFY): Renamed from
+	_IO_FLAGS2_CHECK_PERCENT_N.
+	* debug/fprintf_chk.c: Adjust all users.
+	* debug/printf_chk.c: Likewise.
+	* debug/vfprintf_chk.c: Likewise.
+	* debug/vprintf_chk.c: Likewise.
+	* debug/vsnprintf_chk.c: Likewise.
+	* debug/vsprintf_chk.c: Likewise.
+	* stdio-common/vfprintf.c: Likewise.  Detect missing %N$ formats.
+	* debug/tst-chk1.c: Test detection of missing %N$ formats.
+
+2004-11-15  Jakub Jelinek  <jakub@redhat.com>
+
+	* posix/bug-regex24.c: Include string.h.
+
+	* nis/nis_clone_obj.c (nis_clone_object): Rename out3 label to out2
+	and out2 to out.  Remove out label.  Formatting.
+
+2004-11-15  Ulrich Drepper  <drepper@redhat.com>
+
+	* include/stdio.h: Do not mark __libc_message as noreturn.
+	* sysdeps/unix/sysv/linux/libc_fatal.c (__libc_fatal): Add loop to
+	fool gcc.  Include <stdbool.h>.
+	* sysdeps/posix/libc_fatal.c (__libc_fatal): Add loop to fool gcc.
+	(__libc_message): Fix typo.
+
+2004-11-13  Ulrich Drepper  <drepper@redhat.com>
+
+	* malloc/malloc.c (malloc_state): stat_lock_* elements need only
+	be defined if THREAD_STATS is defined.  Remove pad0_ since it does
+	not align with cache line sizes in general anyway.
+
+2004-11-13  Jakub Jelinek  <jakub@redhat.com>
+
+	* elf/rtld.c (print_statistics): Avoid segfaults if not all namespaces
+	are used.  Fix computation of num_relative_relocations on RELA
+	architectures other than IA-64 and Alpha.
+
+2004-11-13  Ulrich Drepper  <drepper@redhat.com>
+
+	* malloc/malloc.c (_int_free): Use munmap_chunk for handling
+	mmaped memory.
+
+2004-11-12  Ulrich Drepper  <drepper@redhat.com>
+
+	* malloc/malloc.c (_int_free): Remove test for NULL parameter.
+	(_int_realloc): Call _int_free only if memory parameter is not NULL.
+
+	* sysdeps/unix/sysv/linux/libc_fatal.c: Add new function __libc_message
+	which performs the printing and simple format string handling.  The
+	string is written to tty, stderr, syslog in this order, stopping after
+	the first successful output.
+	(__libc_fatal): Call __libc_message.
+	* include/stdio.h: Declare __libc_message.
+	* malloc/malloc.c (malloc_printerr): Use __libc_message.
+	* debug/chk_fail.c: Also print message with __libc_message.
+	* debug/test-strcpy_chk.c: Ensure that debug messages are not printed
+	to the terminal or stderr.
+	* debug/tst-chk1.c: Likewise.
+
+	* posix/Makefile: Remove gpl2lgpl variable.
+
+2004-11-12  Martin Schwidefsky  <schwidefsky@de.ibm.com>
+
+	* elf/elf.h: Add 20 bit relocations R_390_*20.
+
+2004-11-12  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/unix/sysv/linux/i386/setuid.c: Include linux/posix_types.h.
+	* sysdeps/unix/sysv/linux/i386/setgid.c: Likewise.
+	* sysdeps/unix/sysv/linux/i386/setreuid.c: Likewise.
+	* sysdeps/unix/sysv/linux/i386/setregid.c: Likewise.
+	* sysdeps/unix/sysv/linux/i386/setresuid.c: Likewise.
+	* sysdeps/unix/sysv/linux/i386/setresgid.c: Likewise.
+
+2004-11-12  Andreas Schwab  <schwab@suse.de>
+
+	* nis/ypclnt.c (ypprot_err): Fix "minor optimizations".
+
+2004-11-12  Ulrich Drepper  <drepper@redhat.com>
+
+	* posix/Makefile (tests): Add bug-regex24.
+	* posix/bug-regex24.c: New file.
+
+2004-11-12  Paolo Bonzini  <bonzini@gnu.org>
+
+	* posix/regexec.c (check_dst_limits_calc_pos_1): Use the map to
+	cut recursive paths.  Make exit condition more precise.
+	(match_ctx_add_entry): Initialize the map.
+	* posix/regex_internal.h (struct re_backref_cache_entry): Add a map of
+	reachable subexpression nodes from each backreference cache entry.
+
+2004-11-10  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/unix/sysv/linux/setreuid.c: Remove sys/syscall.h,
+	sys/types.h, linux/posix_types.h, sysdep.h and pthread-functions.h
+	includes.  Include setxid.h.  Use INLINE_SETXID_SYSCALL macro
+	instead of INLINE_SYSCALL, kill the HAVE_PTR__NPTL_SETXID guarded
+	snippets.
+	* sysdeps/unix/sysv/linux/setegid.c: Likewise.
+	* sysdeps/unix/sysv/linux/setuid.c: Likewise.
+	* sysdeps/unix/sysv/linux/seteuid.c: Likewise.
+	* sysdeps/unix/sysv/linux/setgid.c: Likewise.
+	* sysdeps/unix/sysv/linux/setresuid.c: Likewise.
+	* sysdeps/unix/sysv/linux/setresgid.c: Likewise.
+	* sysdeps/unix/sysv/linux/setregid.c: Likewise.
+	* sysdeps/unix/sysv/linux/i386/setegid.c: Likewise.
+	* sysdeps/unix/sysv/linux/i386/setreuid.c: Likewise.
+	* sysdeps/unix/sysv/linux/i386/setuid.c: Likewise.
+	* sysdeps/unix/sysv/linux/i386/seteuid.c: Likewise.
+	* sysdeps/unix/sysv/linux/i386/setgid.c: Likewise.
+	* sysdeps/unix/sysv/linux/i386/setresuid.c: Likewise.
+	* sysdeps/unix/sysv/linux/i386/setresgid.c: Likewise.
+	* sysdeps/unix/sysv/linux/i386/setregid.c: Likewise.
+	* sysdeps/unix/sysv/linux/alpha/setreuid.c: Likewise.
+	Formatting.  Change signed int into int.
+	* sysdeps/unix/sysv/linux/alpha/setresuid.c: Likewise.
+	* sysdeps/unix/sysv/linux/alpha/setresgid.c: Likewise.
+	* sysdeps/unix/sysv/linux/alpha/setregid.c: Likewise.
+	* sysdeps/unix/sysv/linux/syscalls.list (setresuid, setresgid):
+	Remove.
+	* sysdeps/unix/setxid.h: New file.
+
+	* Rules (binaries-static): Add xtests-static.
+	* Makeconfig (run-program-prefix): Filter also xtests-static.
+
+2004-11-09  Paul Eggert  <eggert@cs.ucla.edu.
+
+	[BZ #535]
+	* time/difftime.c: Fix a double-rounding bug on hosts with
+	64-bit time_t and long double being IEEE double.  Also, port
+	to more valid C99 hosts, even those that have padding bits.
+	Don't include <values.h> since it is marked as an obsolescent
+	interface.  Include <limits.h>, <float.h>, and <stdint.h> instead.
+	(TYPE_BITS, TYPE_FLOATING, TYPE_SIGNED): New macros.
+	(subtract): New static function, that works correctly without
+	double-rounding, even on hosts with 64-bit time_t.  Also cater
+	to hosts with padding bits.
+	(__difftime): Use it.  Use DBL_MANT_DIG and LDBL_MANT_DIG to
+	determine whether floating types are wide enough: the old
+	test (which used sizeof) could in theory report the wrong results
+	on hosts with padding bits in floating-point values.
+
+2004-11-11  Simon Josefsson  <jas@extundo.com>
+
+	[BZ #542]
+	* sysdeps/generic/strtok_r.c [HAVE_CONFIG_H]: Include config.h.
+	[!_LIBC]: Include strtok_r.h (in gnulib), map __strtok_r to
+	strtok_r and __rawmemchr to strch.
+	(__strtok_r): Use C89 prototype.
+	[weak_alias]: Move calls to libc_hidden_def and weak_alias into
+	this #ifdef.
+
+2004-11-10  Paul Eggert  <eggert@cs.ucla.edu>
+
+	[BZ #541]
+	* time/mktime.c (SHR): New macro, which is a portable
+	substitute for >> that should work even on Crays.
+	(TIME_T_MIDPOINT, ydhms_diff, __mktime_internal): Use it.
+	Problem reported by Mark D. Baushke in
+	<http://lists.gnu.org/archive/html/bug-gnulib/2004-11/msg00071.html>.
+
+2004-11-09  Paolo Bonzini  <bonzini@gnu.org>
+
+	* posix/regexec.c (match_ctx_free_subtops): Remove, merge into...
+	(match_ctx_clean): ... this function.
+	(match_ctx_free): Call match_ctx_clean.
+
+	* posix/regexec.c (transit_state): Remove the check for
+	out-of-bounds buffers.
+	(check_matching): Check here for out-of-bounds buffers.
+	(re_search_internal): Store into match_kind a set of bits
+	indicating which incantation of fastmap scanning must be
+	used.  Use a switch statement instead of multiple ifs.
+	Exit the final "for (;;)" with goto free_return unless
+	the match succeeded, thus simplifying some conditionals.
+
+	* posix/regex_internal.c (re_string_reconstruct,
+	re_string_context_at): Add several branch predictions for
+	case-sensitive matching and no transition table being used.
+
+2004-11-10  Ulrich Drepper  <drepper@redhat.com>
+
+	* posix/tst-waitid.c: Don't use error to print error message, they
+	won't end up in the .out file.
+
+2004-11-09  Ulrich Drepper  <drepper@redhat.com>
+
+	* nscd/nscd-client.h (libc_locked_map_ptr): Add new first
+	parameter, used as class for definition.
+	* nscd/nscd_getpw_r.c: Adjust for libc_locked_map_ptr change.
+	(pw_map_free): Ensure no crash after memory is freed.
+	* nscd/nscd_getgr_r.c: Likewise.  Make map externally visible.
+	* nscd/nscd_gethst_r.c: Likewise.
+	* nscd/nscd_getai.c: Use map from nscd_gethost.c.
+	* nscd/nscd_initgroups.c: Use map from nscd_getgr.c.
+
+	* nscd/nscd_getai.c: Add some checks to detect corrupt databases.
+	* nscd/nscd_getgr_r.c: Likewise
+	* nscd/nscd_gethst_r.c: Likewise.
+	* nscd/nscd_getpw_r.c: Likewise
+
+2004-11-09  Jakub Jelinek  <jakub@redhat.com>
+
+	* posix/regcomp.c (calc_eclosure_iter): Don't access
+	dfa->edests[node].elems[0] if dfa->edests[node].nelem == 0.
+	* posix/rxspencer/tests: Add 5 new tests.
+
+2004-11-09  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/unix/sysv/linux/ifaddrs.c: Determine sin6_scope_id field
+	value correctly.  Patch by Mitsuru Kanda <mk@karaba.org>.
+
+2004-11-04  Jakub Jelinek  <jakub@redhat.com>
+
+	* libio/fileops.c (_IO_new_file_seekoff): If mode is 0 and
+	fp->_offset == _IO_pos_BAD, just call _IO_SYSSEEK (fp, 0, dir)
+	and if successful set fp->_offset.
+	* libio/Makefile (tests): Add bug-ungetc3.
+	* libio/bug-ungetc3.c: New test.
+
+2004-11-03  Marcus Brinkmann  <marcus@gnu.org>
+
+	* sysdeps/gnu/_G_config.h (_G_HAVE_MREMAP): Define symbol.
+	* sysdeps/mach/hurd/_G_config.h: New file.
+	* libio/fileops.c (mmap_remap_check) [__linux__]: Replaced with
+	[_G_HAVE_MREMAP].
+
+2004-11-08  Ulrich Drepper  <drepper@redhat.com>
+
+	* posix/regcomp.c (utf8_sb_map): Define.
+	(free_dfa_content): Don't free dfa->sb_char if it's a pointer to
+	utf8_sb_map.
+	(init_dfa): Use utf8_sb_map instead of initializing memory when the
+	encoding is UTF-8.
+
+2004-11-03  Paolo Bonzini  <bonzini@gnu.org>
+
+	* posix/regcomp.c (init_dfa): Get the codeset name outside glibc as
+	well.  Check if it is spelled UTF8 as well as UTF-8, and check
+	case-insensitively.  Set dfa->map_notascii manually when outside
+	glibc.
+	* posix/regex_internal.c (build_wcs_upper_buffer) [!_LIBC]: Enable
+	optimizations based on map_notascii.
+	* posix/regex_internal.h [HAVE_LANGINFO_H || HAVE_LANGINFO_CODESET
+	|| _LIBC]: Include langinfo.h.
+
+	* posix/regex_internal.h (struct re_backref_cache_entry): Add "more"
+	field.
+	* posix/regexec.c (check_dst_limits): Hoist computation of the source
+	and destination bkref_idx out of the loop.  Pass it to
+	check_dst_limits_calc_pos.
+	(check_dst_limits_calc_pos_1): New function, containing the recursive
+	loop of check_dst_limits_calc_pos; uses the "more" field of
+	struct re_backref_cache to control the loop.
+	(check_dst_limits_calc_pos): Store into "boundaries" the position
+	relative to lim's start and end positions.  Do not accept eclosures,
+	accept bkref_idx instead.  Call check_dst_limits_calc_pos_1 to do the
+	work.
+	(sift_states_bkref): Use the "more" field of struct re_backref_cache
+	to control the loop.  A big "if" was turned into a continue and the
+	function was reindented.
+	(get_subexp): Use the "more" field of struct re_backref_cache
+	to control the loop.
+	(match_ctx_add_entry): Initialize the bkref_ents' "more" field.
+	(search_cur_bkref_entry): Return -1 if out of bounds.
+
+	* posix/regexec.c (empty_set): Remove.
+	(sift_states_backward): Remove cur_src variable.  Move inner loop
+	to build_sifted_states.
+	(build_sifted_states): Extract from sift_states_backward.  Do not
+	use empty_set.
+	(update_cur_sifted_state): Do not use empty_set.  Special case
+	dest_nodes->nelem == 0.
+
+	* posix/regex_internal.h (struct re_backref_cache_entry): Remove flag
+	field.
+	(struct re_sift_context_t): Remove cur_bkref, cls_subexp_idx,
+	check_subexp fields.  Move limits last.
+	* posix/regexec.c (match_ctx_clear_flag): Remove.
+	(sift_ctx_init): Remove check_subexp parameter.  Do not set removed
+	fields.  Callers adjusted.
+	(expand_bkref_cache): Remove last_str parameter.  Callers adjusted.
+	(re_search_internal): Remove fast_translate variable.
+	(update_cur_sifted_state): Pass candidates as the final parameter
+	to sift_states_bkref.
+	(sift_states_bkref): Change last unused parameter to be "candidates",
+	do not fetch candidates into a local variable.
+	Remove dead test for "node == sctx->bkref", and the cur_bkref_idx
+	variable.
+	Remove loops that set/reset the flag field of backref cache entries.
+	(check_arrival_add_next_nodes): Use a signed int to hold the return
+	value of re_node_set_insert.
+	(group_nodes_into_DFAstates): Likewise.
+	(match_ctx_add_entry): Do not set the flag field of the new entry.
+
+2004-11-05  Roland McGrath  <roland@redhat.com>
+
+	* sysdeps/generic/ldsodefs.h (struct rtld_global_ro): Define
+	_dl_sysinfo_dso under [NEED_DL_SYSINFO_DSO] as well.
+	* elf/rtld.c (dl_main): Set up GLRO(dl_sysinfo_dso) under
+	[NEED_DL_SYSINFO_DSO] as well.
+	* sysdeps/generic/dl-sysdep.c (_dl_show_auxv): Always include
+	AT_SYSINFO and AT_SYSINFO_EHDR in name table.
+	(_dl_sysdep_start) [NEED_DL_SYSINFO_DSO]: Match AT_SYSINFO_EHDR.
+	* elf/dl-support.c (_dl_sysinfo_dso): Define also under
+	[NEED_DL_SYSINFO_DSO].
+	(_dl_aux_init) [NEED_DL_SYSINFO || NEED_DL_SYSINFO_DSO]:
+	Match AT_SYSINFO_EHDR and set GL(dl_sysinfo_dso).
+
+2004-11-05  Roland McGrath  <roland@redhat.com>
+
+	* manual/errno.texi (Error Codes): Revert last change for now.
+	* sysdeps/gnu/errlist.c: Regenerated.
+
+2004-11-04  Roland McGrath  <roland@frob.com>
+
+	* Makeconfig (link-libc, rpath-dirs): Remove AIX cruft definitions.
+	(LDFLAGS-rpath-ORIGIN, LDFLAGS-soname-fname): Likewise.
+	(LDFLAGS-rdynamic, LDFLAGS-Bsymbolic): Likewise.
+	($(common-objpfx)gnu/lib-names.stmp): Likewise.
+
+2004-11-01  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/unix/sysv/linux/x86_64/sys/procfs.h [__WORDSIZE == 32]
+	(elf_fpxregset_t): New type.
+	(struct elf_prpsinfo): If __WORDSIZE == 32, change pr_[ug]id type
+	to unsigned short int.
+	* sysdeps/unix/sysv/linux/x86_64/sys/user.h
+	(struct user_fpregs_struct): Fix comment.
+	* sysdeps/i386/fpu/bits/mathdef.h (float_t, double_t): If
+	__FLOAT_EVAL_METHOD__ is defined and 0, typedef to float resp. double.
+	* sysdeps/x86_64/fpu/bits/mathdef.h: Include bits/wordsize.h.
+	(float_t, double_t): If -m32 and not -mfpmath=sse, typedef to
+	long double.
+	* sysdeps/x86_64/fpu/bits/fenv.h: Include bits/wordsize.h.
+	(fenv_t): Remove __mxcsr field for -m32.
+
+2004-11-04  Jakub Jelinek  <jakub@redhat.com>
+
+	* libio/ftello.c (ftello): Don't subtract save_end - save_base
+	if pos is _IO_pos_BAD.
+	* libio/ftello64.c (ftello64): Likewise.
+	* libio/iofgetpos.c (_IO_new_fgetpos): Likewise.
+	* libio/iofgetpos64.c (_IO_new_fgetpos64): Likewise.
+	* libio/oldiofgetpos.c (_IO_old_fgetpos): Likewise.
+	* libio/oldiofgetpos64.c (_IO_old_fgetpos64): Likewise.
+	* libio/ioftell.c (_IO_ftell): Likewise.
+	Cast to long int instead of off_t when checking for overflow.
+
+2004-11-04  Richard Henderson  <rth@redhat.com>
+
+	* sysdeps/unix/sysv/linux/alpha/register-dump.h (regnames): Align.
+	(linefeed): Remove.
+	(register_dump): Rewrite to generate into a flat buffer instead
+	of into iovecs.
+
+2004-11-02  Jakub Jelinek  <jakub@redhat.com>
+
+	* debug/tst-chk1.c (ret): New volatile variable.
+	(CHK_FAIL_END): Remove redundant ret setting.
+	(do_test): Remote ret variable.
+
+2004-01-03  Paolo Bonzini  <bonzini@gnu.org>
+
+	* posix/regex_internal.h (__regfree) [!_LIBC]: Define to regfree.
+
+2004-11-03  Marcus Brinkmann  <marcus@gnu.org>
+
+	* sysdeps/generic/utime.c: Include <stddef.h>.
+
+	* sysdeps/generic/sysconf.c: Include <grp.h> and <pwd.h>.
+
+	* sysdeps/generic/tempname.c (__path_search): Add missing argument
+	TRY_TMPDIR.
+
+2004-10-31  Mariusz Mazur <mmazur@kernel.pl>
+
+	* sysdeps/unix/sysv/linux/alpha/setregid.c: New file.
+	* sysdeps/unix/sysv/linux/alpha/setresgid.c: New file.
+	* sysdeps/unix/sysv/linux/alpha/setresuid.c: New file.
+	* sysdeps/unix/sysv/linux/alpha/setreuid.c: New file.
+
+2004-10-27  Derek R. Price  <derek@ximbiot.com>
+
+	[BZ #487] This change is imported from gnulib.
+	* time/mktime.c (not_equal_tm) [DEBUG]: Remove redundant check.
+
+2004-10-24  Paul Eggert  <eggert@cs.ucla.edu>
+
+	[BZ #473]
+	* time/tst-mktime.c (main): Don't assume that mktime fails
+	when given time stamps before 1970.  It returns negative
+	time_t values instead, for compatibility with BSD.
+
+	* time/tst-mktime2.c: New file.
+	* time/Makefile (tests): Add it.
+
+	[BZ #473] Import from gnulib.  Revamp to avoid several problems near
+	time_t extrema, and on hosts with 64-bit time_t and 32-bit int.
+	This fixes Debian bug 177940.
+	* time/mktime.c (TIME_T_MIDPOINT): New macro.
+	(ydhms_diff): Renamed from ydhms_tm_diff, with a new signature,
+	which avoids overflow problems on hosts with 64-bit time_t and
+	32-bit int.  All callers changed.  Now an inline function.
+	Verify at compile-time that long int is wide enough to avoid
+	these overflow problems.
+	(guess_time_tm): New function.
+	(__mktime_internal): Use it.  Avoid overflow when computing yday on
+	hosts with 64-bit long and 32-bit int.  Remove tests for 69;
+	no longer needed.  Use if rather than #ifdef for LEAP_SECONDS_POSSIBLE
+	so that the code is checked by more compilers.
+	Do not rely on floating point to probe: stick to integer arithmetic,
+	to avoid potential porting problems.
+	Repair potential overflow correctly in the Southern Hemisphere.
+	(localtime_offset): Add a FIXME for the case where time_t is unsigned.
+
+2004-10-30  Andreas Schwab  <schwab@suse.de>
+
+	* sysdeps/m68k/dl-machine.h (elf_machine_rela)
+	(elf_machine_rela_relative, elf_machine_lazy_rel): Mark auto
+	instead of static.
+
+2004-10-30  Andreas Schwab  <schwab@suse.de>
+
+	* sysdeps/unix/sysv/linux/waitid.c: Include <stddef.h> for NULL.
+
+2004-10-30  Ulrich Drepper  <drepper@redhat.com>
+
+	* malloc/malloc.c (_int_free): Use unique comments for the error
+	cases.
+
+2004-10-28  Roland McGrath  <roland@frob.com>
+
+	* sysdeps/mach/hurd/i386/tls.h (_hurd_tls_fork): Use i386_thread_state
+	instead of machine_thread_state.
+
+2004-10-28  Roland McGrath  <roland@redhat.com>
+
+	* sysdeps/unix/sysv/linux/syscalls.list: Remove setaltroot.
+
+2004-10-28  Ulrich Drepper  <drepper@redhat.com>
+
+	* elf/dl-open.c (dl_open_worker): Remove reference to glibcbug script.
+
+2004-10-27  Ulrich Drepper  <drepper@redhat.com>
+
+	* elf/dl-load.c (_dl_map_object): Use cache_rpath to check for
+	existing rpath in main executable, not explicit test.
+
+2004-10-27  Jakub Jelinek  <jakub@redhat.com>
+
+	* include/resolv.h (_res_opcodes): New extern.
+	Add libresolv_hidden_proto.
+	* resolv/res_debug.c (_res_opcodes): Remove.
+	(__p_class_syms, __p_type_syms): Add libresolv_hidden_proto
+	and libresolv_hidden_data_def.  Remove attribute_hidden.
+	* resolv/res_mkquery (_res_opcodes): Remove.
+	* resolv/res_data.c (_res_opcodes): Remove attribute_hidden.
+	Add libresolv_hidden_data_def.
+
+2004-10-27  Ulrich Drepper  <drepper@redhat.com>
+
+	* elf/dl-open.c (_dl_open): Don't allow explicitly opening a DSO
+	into an empty namespace.
+
+	* elf/dl-fini.c (_dl_fini): Fix search for map in maps array.
+	Reverse order of namespaces.
+	* elf/Makefile: Add rules to build and run tst-dlmopen3.
+	* elf/tst-dlmopen3.c: New file.
+	* elf/tst-dlmopen1mod.c: Add check whether constructor runs.
+
+2004-10-27  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/generic/glob.c (globfree): Clear gl_pathv after freeing it.
+	* posix/Makefile: Add rules to build and run bug-glob2 test.
+	* posix/bug-glob2.c: New test.
+
+2004-10-27  Roland McGrath  <roland@frob.com>
+
+	* sysdeps/mach/hurd/i386/tls.h (HURD_TLS_DESC_DECL): New macro.
+	(_hurd_tls_init): Use it.
+	(_hurd_tls_fork): New function.
+	* sysdeps/mach/hurd/fork.c (__fork) [USE_TLS]: Call it.
+
+2004-10-26  Roland McGrath  <roland@frob.com>
+
+	* sysdeps/mach/hurd/i386/tls.h (_hurd_tls_init): Initialize TCB->tcb.
+
+2004-10-26  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/gnu/netinet/udp.h (struct udphdr): Use u_int16_t
+	type instead of uint16_t.  Formatting.
+
+2004-10-25  Roland McGrath  <roland@redhat.com>
+
+	* login/openpty.c (openpty): Add libutil_hidden_def.
+
+	* nss/nss_files/files-parse.c (nss_files_parse_hidden_def): Define to
+	either libc_hidden_def or libnss_files_hidden_def, not hidden_def.
+	This file is also compiled into libnss_hesiod by #include.
+
+2004-10-25  Roland McGrath  <roland@frob.com>
+
+	* sysdeps/unix/bsd/hp: Directory and all files removed.
+	These are now in the ports repository.
+	* sysdeps/unix/bsd/osf: Likewise.
+	* sysdeps/unix/bsd/sequent: Likewise.
+	* sysdeps/unix/bsd/sony: Likewise.
+	* sysdeps/unix/bsd/ultrix4: Likewise.
+	* sysdeps/unix/sysv/aix: Likewise.
+	* sysdeps/unix/sysv/hpux: Likewise.
+	* sysdeps/unix/sysv/irix4: Likewise.
+	* sysdeps/unix/sysv/isc2.2: Likewise.
+	* sysdeps/unix/sysv/minix: Likewise.
+	* sysdeps/unix/sysv/sco3.2.4: Likewise.
+	* sysdeps/unix/sysv/sco3.2: Likewise.
+	* sysdeps/unix/sysv/sysv4: Likewise.
+
+	* configure.in (ASM_LINE_SEP): Move this setting to ...
+	* sysdeps/hppa/configure.in: ... here, new file.
+	* sysdeps/hppa/configure: New generated file.
+	* configure: Regenerated.
+
+2004-10-25  Kaz Kojima  <kkojima@rr.iij4u.or.jp>
+
+	* sysdeps/sh/dl-machine.h: Include sysdep.h.
+	(ELF_MACHINE_RUNTIME_TRAMPOLINE): Add CFI directives.
+	(elf_machine_runtime_setup): Add always_inline attribute.
+	(_dl_start_user): Pass the correct environ.
+	(elf_machine_rela): Replace static inline by auto inline, add
+	always_inline attribute.
+	(elf_machine_rela_relative): Likewise.
+	(elf_machine_lazy_rel): Likewise.
+
+2004-10-24  Ulrich Drepper  <drepper@redhat.com>
+
+	* nis/nis_call.c: Pretty printing.  Minor cleanups.
+	* nis/nis_addmember.c (nis_addmember): Add assert to check buffer
+	bounds.
+
+	* resolv/nss_dns/dns-host.c: Avoid using PLTs.
+	* include/libc-symbols.h: Define hidden attribute macros for
+	libnss_nisplus and libutil.
+	* include/utmp.h: Add libutil_hidden_proto for login_tty.
+	* login/login_tty.c: Add libutil_hidden_def.
+	* nis/nisplus-parser.h: Add libnss_nisplus_hidden_proto for parsers.
+	* nis/nss_nisplus/nisplus-parser.c: Add libnss_nisplus_hidden_def.
+	* include/pty.h: New file.
+	* include/rpcsvc/yp.h: New file.
+	* include/rpcsvc/ypclnt.h: New file.
+	* include/rpcsvc/ypupd.h: New file.
+	* include/libc-symbols.h: Define hidden attribute macros for libnsl.
+	* include/rpcsvc/nislib.h: Use libnsl_hidden_proto for various
+	functions.
+	* nis/nis_add.c: Add libnsl_hidden_def.  Minor optimizations.
+	* nis/nis_call.c: Likewise.
+	* nis/nis_clone_obj.c: Likewise.
+	* nis/nis_defaults.c: Likewise.
+	* nis/nis_domain_of_r.c: Likewise.
+	* nis/nis_error.c: Likewise.
+	* nis/nis_file.c: Likewise.
+	* nis/nis_free.c: Likewise.
+	* nis/nis_local_names.c: Likewise.
+	* nis/nis_lookup.c: Likewise.
+	* nis/nis_modify.c: Likewise.
+	* nis/nis_print.c: Likewise.
+	* nis/nis_remove.c: Likewise.
+	* nis/nis_subr.c: Likewise.
+	* nis/nis_table.c: Likewise.
+	* nis/nis_util.c: Likewise.
+	* nis/yp_xdr.c: Likewise.
+	* nis/ypclnt.c: Likewise.
+	* nis/ypupdate_xdr.c: Likewise.
+
+	* resolv/res_send.c (send_dg): Cope with failures.
+
+	* include/libc-symbols.h: Define hidden attribute macros for
+	libnss_files.
+	* include/netdb.h: Use libnss_files_hidden_proto for the parsers
+	defined in libnss_files, not libc_hidden_proto.
+	* include/netinet/ether.h: Likewise.
+	* include/rpc/netdb.h: Likewise.
+	* nss/nss_files/files-parse.c: Use hidden_def in parser definitions
+	instead of libc_hidden_def.
+	* nss/nss_files/files-netgrp.c: Add libnss_files_hidden_def to
+	_nss_netgroup_parseline definition.
+
+2004-10-23  Roland McGrath  <roland@frob.com>
+
+	* sysdeps/mach/hurd/i386/tls.h (_hurd_tls_init): Don't return early
+	after an RPC succeeds.
+
+	* sysdeps/vax, sysdeps/unix/bsd/vax: Directories and all files removed.
+	These are now in the ports repository.
+	* sysdeps/tahoe, sysdeps/unix/bsd/tahoe: Likewise.
+	* sysdeps/cris, sysdeps/unix/sysv/linux/cris: Likewise.
+	* sysdeps/am29k, sysdeps/i860, sysdeps/i960, sysdeps/m88k: Likewise.
+	* sysdeps/standalone, sysdeps/z8000: Likewise.
+
+2004-10-23  Ulrich Drepper  <drepper@redhat.com>
+
+	* resolv/res_send.c (send_dg): Combine write and read to socket
+	into one loop.
+
+2004-10-22  Roland McGrath  <roland@frob.com>
+
+	* Makefile (%.bz2, %.gz): Move these pattern rules ...
+	* Makerules: ... to here.
+
+2001-10-31  Alexandre Oliva  <aoliva@redhat.com>
+
+	* elf/elf.h: Add R_MN10300_* relocation numbers.
+
+2004-10-22  Paul Eggert  <eggert@cs.ucla.edu>
+
+	[BZ #471] Fix imported from gnulib.
+	* time/mktime.c (leapyear, ydms_tm_diff): Year is of type
+	long int, not int, to avoid problems when tm_year == INT_MAX
+	and tm_mon > 12.
+	(__mktime_intenral): Compute year using long int arithmetic,
+	not int arithmetic, to avoid problems on hosts where time_t
+	and long are 64 bits but int is 32.
+
+	[BZ #468] Import a fix from gnulib.
+	* time/mktime.c [! DEBUG]: Do not include <string.h>.
+	It's needed only if DEBUG is nonzero.
+
+	[BZ #470] Import fix from gnulib.
+	* time/mktime.c [!_LIBC] (__mktime_internal): Define to
+	mktime_internal, to avoid clashes with any __mktime_internal
+	function defined in the standard library.
+
+	[BZ #469] Imported from gnulib.
+	* time/mktime.c (__isleap): Remove; all uses replaced by:
+	(leapyear): New function, which avoids overflow by not adding
+	1900 to year before testing whether it is a leap year.
+
+	[BZ #472] Imported from gnulib.
+	* time/mktime.c (Local Variables): Remove -DHAVE_TIME_R_POSIX;
+	no longer used.
+
+2004-10-22  Ulrich Drepper  <drepper@redhat.com>
+
+	* resolv/res_send.c: Remove compatibility code which is unused in
+	glibc and probably bitrotten.
+
+	* debug/execinfo.h: Remove __THROW from backtrace prototype.
+
+2004-10-22  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/i386/Makefile (CFLAGS-backtrace.c): Add -fexceptions.
+	* sysdeps/i386/backtrace.c: Include <bits/libc-lock.h>, <dlfcn.h>,
+	<stdlib.h> and <unwind.h>.  Remove <bp-checks.h> include.
+	(struct trace_arg): New type.
+	(unwind_backtrace, unwind_getip, unwind_getcfa, unwind_getgr): New
+	fn pointers resp. macros.
+	(init, backtrace_helper): New functions.
+	(__backtrace): Rewritten to use _Unwind_Backtrace first and fall
+	back to frame pointer walking.
+
+2004-10-22  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/unix/sysv/linux/Versions: Things are still in flux, it
+	seems.  Undo last additions.
+
+2004-10-21  Ulrich Drepper  <drepper@redhat.com>
+
+	* posix/execvp.c (execvp): Also ignore ENODEV and ETIMEDOUT errno
+	values.
+
+2004-10-20  Roland McGrath  <roland@redhat.com>
+
+	* Makeconfig ($(common-objpfx)shlib-versions.v.i): Check also
+	$(config-sysdirs) for shlib-versions files.
+
+	* Makeconfig ($(common-objpfx)soversions.i): Replace shell loop with
+	use of ...
+	* scripts/soversions.awk: ... this new file.  Collect lib info and
+	match any DEFAULT line before emitting anything, so DEFAULT can come
+	later in the concatenation of shlib-versions files.
+
+	* manual/errno.texi (Error Codes): Add ENOKEY, EKEYEXPIRED,
+	EKEYREVOKED, EKEYREJECTED.
+	* sysdeps/unix/sysv/linux/Versions (libc: GLIBC_2.3.4): New errlist.
+	* sysdeps/gnu/errlist.c: Regenerated
+
+	* sysdeps/gnu/errlist-compat.awk: Don't bail if Versions gives a count
+	higher than ERR_MAX reports.  Instead, emit a #define ERR_MAX.
+	* sysdeps/gnu/Makefile ($(objpfx)errlist-compat.h): New target.
+	(generated): Add errlist-compat.h.
+	* sysdeps/gnu/errlist.awk: Make output #include <errlist-compat.h> to
+	define ERR_MAX and use that for table size.
+
+2004-10-20  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/unix/sysv/linux/syscalls.list: Add entries for setaltroot,
+	key_add, key_request, and keyctl syscalls.
+	* sysdeps/unix/sysv/linux/Versions: Export them.
+
+2004-10-19  Roland McGrath  <roland@frob.com>
+
+	* sysdeps/mach/readonly-area.c: New file.
+
+2004-10-19  Ulrich Drepper  <drepper@redhat.com>
+
+	* elf/Versions [ld, GLIBC_PRIVATE]: Add _dl_debug_state.
+	* elf/dl-debug.c (_dl_debug_state): Add rtld_hidden_def.
+	* sysdeps/generic/ldsodefs.h (_dl_debug_state): Don't mark as
+	hidden but use rtld_hidden_proto.
+
+2004-10-19  Alfred M. Szmidt  <ams@gnu.org>
+
+	* sysdeps/generic/readonly-area.c (__readonly_str): Renamed to ...
+	(__readonly_area): ... this.
+
+2004-10-18  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/generic/strcpy_chk.c (__strcpy_chk): Speed up by checking
+	destlen only every 4 bytes.
+
+2004-10-19  Ulrich Drepper  <drepper@redhat.com>
+
+	* nss/getent.c (hosts_keys): Let inet_pton decide whether the
+	string is an address or not.
+
+2004-10-19  Jakub Jelinek  <jakub@redhat.com>
+
+	* elf/dl-addr.c (_dl_addr): Don't look at STT_TLS symbols.
+	Use DL_SYMBOL_ADDRESS to set dli_saddr.
+
+	* debug/Makefile (catchsegv): Prefix $LIB with a backslash.
+
+2004-10-19  Ulrich Drepper  <drepper@redhat.com>
+
+	* debug/Makefile ($(objpfx)catchsegv): To support multilib
+	platforms, use $LIB in path to slibdir.
+
+2004-10-19  Jakub Jelinek  <jakub@redhat.com>
+
+	* debug/catchsegv.sh: Update copyright year.
+	Use mktemp to create segv_output file.
+
+2004-10-18  Jakub Jelinek  <jakub@redhat.com>
+
+	* elf/dl-libc.c (__libc_dlsym_private, __libc_register_dl_open_hook):
+	New functions.
+	(__libc_dlopen_mode): Call __libc_register_dl_open_hook and
+	__libc_register_dlfcn_hook.
+	* dlfcn/Makefile (routines, elide-routines.os): Set.
+	Add rules to build and test tststatic2.
+	* dlfcn/tststatic2.c: New test.
+	* dlfcn/modstatic2.c: New test module.
+	* dlfcn/dladdr.c: Call _dlfcn_hook from libdl.so if not NULL.
+	Define __ prefixed routine in libc.a and in libdl.a just call it.
+	* dlfcn/dladdr1.c: Likewise.
+	* dlfcn/dlclose.c: Likewise.
+	* dlfcn/dlerror.c: Likewise.
+	* dlfcn/dlinfo.c: Likewise.
+	* dlfcn/dlmopen.c: Likewise.
+	* dlfcn/dlopen.c: Likewise.
+	* dlfcn/dlopenold.c: Likewise.
+	* dlfcn/dlsym.c: Likewise.
+	* dlfcn/dlvsym.c: Likewise.
+	* dlfcn/sdladdr.c: New file.
+	* dlfcn/sdladdr1.c: New file.
+	* dlfcn/sdlclose.c: New file.
+	* dlfcn/sdlerror.c: New file.
+	* dlfcn/sdlinfo.c: New file.
+	* dlfcn/sdlopen.c: New file.
+	* dlfcn/sdlsym.c: New file.
+	* dlfcn/sdlvsym.c: New file.
+	* dlfcn/Versions (libdl): Export _dlfcn_hook@GLIBC_PRIVATE.
+	* include/dlfcn.h (DL_CALLER_DECL, DL_CALLER RETURN_ADDRESS): Define.
+	(struct dlfcn_hook): New type.
+	(_dlfcn_hook): New extern decl.
+	(__dlopen, __dlclose, __dlsym, __dlerror, __dladdr, __dladdr1,
+	__dlinfo, __dlmopen, __libc_dlsym_private,
+	__libc_register_dl_open_hook, __libc_register_dlfcn_hook): New
+	prototypes.
+	(__dlvsym): Use DL_CALLER_DECL.
+	* include/libc-symbols.h: Define libdl_hidden_proto and friends.
+
+	* malloc/arena.c (_dl_open_hook): Extern decl.
+	(ptmalloc_init): Don't call _dl_addr when dlopened from statically
+	linked programs but don't use brk for them either.
+
+2004-10-18  Roland McGrath  <roland@redhat.com>
+
+	* dlfcn/bug-dlsym1.c (main): Remove bogus setenv call.
+
+2004-10-18  Ulrich Drepper  <drepper@redhat.com>
+
+	* elf/dl-open.c (dl_open_worker): Avoid dereferencing map in
+	statically linked code if there might none be found.
+
+2004-10-06  Maciej W. Rozycki  <macro@mips.com>
+
+	* sysdeps/unix/sysv/linux/mips/mips32/sysdep.h
+	(__SYSCALL_CLOBBERS): Add "memory".
+	* sysdeps/unix/sysv/linux/mips/mips64/n32/sysdep.h
+	(__SYSCALL_CLOBBERS): Likewise.
+	* sysdeps/unix/sysv/linux/mips/mips64/n64/sysdep.h
+	(__SYSCALL_CLOBBERS): Likewise.
+
+2004-10-17  Ulrich Drepper  <drepper@redhat.com>
+
+	* include/libc-symbols.h: Define libresolv_hidden_proto and friends.
+	* include/resolv.h: Add libresolv_hidden_proto for symbols defined,
+	used, and exported in libresolv.
+	* resolv/base64.c: Add libresolv_hidden_def.
+	* resolv/gethnamaddr.c: Likewise.
+	* resolv/ns_name.c: Likewise.
+	* resolv/ns_netint.c: Likewise.
+	* resolv/res_comp.c: Likewise.
+	* resolv/res_data.c: Likewise.
+	* resolv/res_debug.c: Likewise.
+	* resolv/res_mkquery.c: Likewise.
+	* resolv/res_query.c: Likewise.
+	* resolv/res_send.c: Likewise.
+
+2004-10-15  Jakub Jelinek  <jakub@redhat.com>
+
+	* elf/dl-minimal.c (__chk_fail): New.  Add rtld_hidden_def.
+	* sysdeps/unix/sysv/linux/readonly-area.c: New file.
+	* sysdeps/i386/i686/memmove.S (__memmove_chk): Add checking
+	routine.
+	* sysdeps/i386/i686/memcpy.S (__memcpy_chk): Likewise.
+	* sysdeps/i386/i686/mempcpy.S (__mempcpy_chk): Likewise.
+	* sysdeps/i386/i686/memset.S (__memset_chk): Likewise.
+	* sysdeps/i386/i686/memmove-chk.S: New file.
+	* sysdeps/i386/i686/memcpy-chk.S: Likewise.
+	* sysdeps/i386/i686/mempcpy-chk.S: Likewise.
+	* sysdeps/i386/i686/memset-chk.S: Likewise.
+	* sysdeps/generic/strcat-chk.c (__strcat_chk): Don't __chk_fail
+	if exactly fitting into buffer.
+	* sysdeps/generic/strncat-chk.c (__strncat_chk): Likewise.
+	* sysdeps/generic/readonly-area.c: New file.
+	* sysdeps/generic/strncpy-chk.c (__strncpy_chk): Only test
+	destlen once.
+	* sysdeps/x86_64/memset.S (__memset_chk): Add checking routine.
+	* sysdeps/x86_64/memcpy.S (__memcpy_chk): Likewise.
+	* sysdeps/x86_64/mempcpy.S (__memcpy_chk): Define to __mempcpy_chk.
+	* sysdeps/x86_64/memcpy-chk.S: New file.
+	* sysdeps/x86_64/mempcpy-chk.S: Likewise.
+	* sysdeps/x86_64/memset-chk.S: Likewise.
+	* sysdeps/x86_64/strcpy-chk.S: Likewise.
+	* sysdeps/x86_64/stpcpy-chk.S: Likewise.
+	* argp/argp-xinl.c (__OPTIMIZE__): Define to 1 instead of nothing.
+	* argp/argp-fs-xinl.c (__OPTIMIZE__): Likewise.
+	* debug/tst-chk1.c: New test.
+	* debug/tst-chk2.c: Likewise.
+	* debug/tst-chk3.c: Likewise.
+	* debug/test-strcpy_chk.c: Likewise.
+	* debug/test-stpcpy_chk.c: Likewise.
+	* debug/vsprintf_chk.c (__vsprintf_chk): If flags > 0, request
+	_IO_FLAGS2_CHECK_PERCENT_N.  Add libc_hidden_def.
+	* debug/Makefile (routines): Add printf_chk, fprintf_chk, vprintf_chk,
+	vfprintf_chk, gets_chk and readonly-area.
+	(CFLAGS-*_chk.c): Set.
+	(tests): Add tst-chk1, tst-chk2, tst-chk3, test-strcpy_chk and
+	test-stpcpy_chk.
+	* debug/vprintf_chk.c: New file.
+	* debug/printf_chk.c: Likewise.
+	* debug/vfprintf_chk.c: Likewise.
+	* debug/fprintf_chk.c: Likewise.
+	* debug/gets_chk.c: Likewise.
+	* debug/chk_fail.c (__chk_fail): Add libc_hidden_def.
+	* debug/snprintf_chk.c (__snprintf_chk): Fix order of arguments
+	passed to __vsnprintf_chk.
+	* debug/Versions (libc): Export __printf_chk, __fprintf_chk,
+	__vprintf_chk, __vfprintf_chk and __gets_chk @GLIBC_2.3.4.
+	* debug/vsnprintf_chk.c (__vsnprintf_chk): Don't call
+	__vsnprintf, instead create a temporary file with
+	_IO_strn_jumps jumptable.  If flags > 0, request
+	_IO_FLAGS2_CHECK_PERCENT_N.  Add libc_hidden_def.
+	* libio/Makefile (headers): Add bits/stdio2.h.
+	* libio/stdio.h: Include <bits/stdio2.h> if __USE_FORTIFY_LEVEL.
+	(sprintf, snprintf, vsprintf, vsnprintf): Remove defines.
+	* libio/strfile.h (_IO_strnfile): New type.
+	(_IO_strn_jumps): New extern.
+	* libio/vsnprintf.c (_IO_strnfile): Remove.
+	(_IO_strn_jumps): Remove static.
+	* libio/bits/stdio2.h: New file.
+	* libio/vswprintf.c (_IO_strnfile): Rename type to...
+	(_IO_wstrnfile): ...this.  Adjust all uses.
+	* libio/libio.h (_IO_FLAGS2_CHECK_PERCENT_N): Define.
+	* stdio-common/vfprintf.c (STR_LEN): Define.
+	(vfprintf): Add readonly_format variable.
+	Handle _IO_FLAGS2_CHECK_PERCENT_N.
+	(buffered_vfprintf): Copy _flags2.
+	* include/stdio.h (__sprintf_chk, __snprintf_chk, __vsprintf_chk,
+	__vsnprintf_chk, __printf_chk, __fprintf_chk, __vprintf_chk,
+	__vfprintf_chk): New prototypes.
+	(__vsprintf_chk, __vsnprintf_chk): Add libc_hidden_proto.
+	* include/string.h (__memcpy_chk, __memmove_chk, __mempcpy_chk,
+	__memset_chk, __strcpy_chk, __stpcpy_chk, __strncpy_chk, __strcat_chk,
+	__strncat_chk): New prototypes.
+	* include/bits/string3.h: New file.
+	* include/sys/cdefs.h (__chk_fail): Add libc_hidden_proto
+	and rtld_hidden_proto.
+	* string/Makefile (headers): Add bits/string3.h.
+	* string/bits/string3.h (bcopy, bzero): New defines.
+	(memset, memcpy, memmove, strcpy, strncpy, strcat, strncat): Change
+	macros so that inlines are used only if unknown destination size
+	or side-effects in destination argument.
+	(mempcpy, stpcpy): Likewise.  Protect with #ifdef __USE_GNU.
+
+2004-09-16  Ulrich Drepper  <drepper@redhat.com>
+
+	* debug/Makefile (routines): Add *_chk.
+	* debug/Versions (libc): Export __chk_fail, __memcpy_chk,
+	__memmove_chk, __mempcpy_chk, __memset_chk, __stpcpy_chk,
+	__strcat_chk, __strcpy_chk, __strncat_chk, __strncpy_chk,
+	__sprintf_chk, __vsprintf_chk, __snprintf_chk, __vsnprintf_chk
+	@GLIBC_2.3.4.
+	* debug/chk_fail.c: New file.
+	* debug/snprintf_chk.c: Likewise.
+	* debug/sprintf_chk.c: Likewise.
+	* debug/vsnprintf_chk.c: Likewise.
+	* debug/vsprintf_chk.c: Likewise.
+	* include/features.h (_FORTIFY_SOURCE): Document, handle.
+	(__USE_FORTIFY_LEVEL): Define.
+	(__GNUC_PREREQ): Move to earlier location.
+	* include/sys/cdefs.h (__chk_fail): New prototype.
+	* libio/bits/stdio.h (sprintf, vsprintf, snprintf, vsnprintf):
+	Define if __USE_FORTIFY_LEVEL.
+	* misc/sys/cdefs.h (__bos, __bos0): Define.
+	* string/string.h: Include <bits/string3.h> if __USE_FORTIFY_LEVEL.
+	* bits/string/string3.h: New header.
+	* sysdeps/generic/memcpy_chk.c: New file.
+	* sysdeps/generic/memmove_chk.c: Likewise.
+	* sysdeps/generic/mempcpy_chk.c: Likewise.
+	* sysdeps/generic/memset_chk.c: Likewise.
+	* sysdeps/generic/stpcpy_chk.c: Likewise.
+	* sysdeps/generic/strcat_chk.c: Likewise.
+	* sysdeps/generic/strcpy_chk.c: Likewise.
+	* sysdeps/generic/strncat_chk.c: Likewise.
+	* sysdeps/generic/strncpy_chk.c: Likewise.
+
+2004-10-17  Roland McGrath  <roland@frob.com>
+
+	* manual/memory.texi (Page Lock Functions): Typo fix.
+	Reported by Carlos Maziero <maziero@ppgia.pucpr.br>
+
+2004-10-16  Alfred M. Szmidt  <ams@kemisten.nu>
+
+	* sysdeps/mach/hurd/Makefile (link-libc-static): Use
+	`$(static-gnulib') instead of `$(gnulib)'.
+
+2004-10-17  Ulrich Drepper  <drepper@redhat.com>
+
+	* sunrpc/rpc_clntout.c: Avoid including rcsid into binary.
+	* sunrpc/rpc_cout.c: Likewise.
+	* sunrpc/rpc_hout.c: Likewise.
+	* sunrpc/rpc_main.c: Likewise.
+	* sunrpc/rpc_parse.c: Likewise.
+	* sunrpc/rpc_sample.c: Likewise.
+	* sunrpc/rpc_scan.c: Likewise.
+	* sunrpc/rpc_svcout.c: Likewise.
+	* sunrpc/rpc_tblout.c: Likewise.
+	* sunrpc/rpc_util.c: Likewise.
+
+2004-10-15  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/unix/sysv/linux/i386/sysdep.h (PUSHARGS_6, DOARGS_6,
+	POPARGS_6, _PUSHARGS_6, _DOARGS_6, _POPARGS_6): Define.
+	* sysdeps/unix/sysv/linux/i386/syscall.S (syscall): Handle 6 argument
+	syscalls.
+
+2004-10-15  Ulrich Drepper  <drepper@redhat.com>
+
+	* nscd/nscd.h (_PATH_NSCD_PASSWD_DB): Move to /var/db.
+	(_PATH_NSCD_GROUP_DB): Likewise.
+	(_PATH_NSCD_HOSTS_DB): Likewise.
+	(_PATH_NSCD_XYZ_DB_TMP): New #define, point to /var/run.
+	* nscd/connections.c (nscd_init): Non-persistent database files
+	are created with the _PATH_NSCD_XYZ_DB_TMP path.
+	* nscd/nscd.init: Create /var/db/nscd if necessary.
+
+2004-10-15  Richard Henderson  <rth@redhat.com>
+
+	* sysdeps/unix/sysv/linux/alpha/register-dump.h: New file.
+	* sysdeps/unix/sysv/linux/alpha/sigcontextinfo.h (SIGCONTEXT): Add
+	_code argument, pass sigcontext by pointer.
+	(SIGCONTEXT_EXTRA_ARGS): Likewise.
+	(GET_PC, GET_FRAME, GET_STACK): Expect ctx as pointer.
+
+2004-10-14  Richard Henderson  <rth@redhat.com>
+
+	* sysdeps/alpha/dl-machine.h (elf_machine_rela,
+	elf_machine_rela_relative, elf_machine_lazy_rel): Mark auto
+	instead of static.
+
+	* sysdeps/unix/sysv/linux/adjtime.c (ADJTIME): Use prototype
+	style definition.
+	* sysdeps/unix/sysv/linux/alpha/adjtime.c (ADJTIME): If
+	__ASSUME_TIMEVAL64, define __adjtime directly rather than
+	via strong_alias.
+
+2004-10-14  Ulrich Drepper  <drepper@redhat.com>
+
+	* nscd/Makefile: When using compilers without -fpie support, also
+	link with -lselinux if necessary.
+	Patch by Arkadiusz Miskiewicz <arekm@pld-linux.org>.
+
+	* nscd/connections.c (nscd_init): Remove file if not persistent
+	and not shared.  Patch by Jerome Borsboom <j.borsboom@erasmusmc.nl>.
+
+2004-10-14  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/unix/sysv/linux/i386/setresuid.c: Handle
+	defined __NR_setresuid32 && !defined __NR_setresuid.
+	* sysdeps/unix/sysv/linux/i386/setresgid.c: Handle
+	defined __NR_setresgid32 && !defined __NR_setresgid.
+
+	* sysdeps/sparc/fpu/bits/mathinline.h (__signbitf, __signbit,
+	__signbitl, sqrtf, sqrt, sqrtl, fdim, fdimf): Use __NTH macro.
+
+	* sysdeps/generic/errno-loc.c: Don't undef #errno
+	if RTLD_PRIVATE_ERRNO.
+	* include/errno.h (__errno_location): If RTLD_PRIVATE_ERRNO,
+	add attribute_hidden.
+
+	* dlfcn/dlinfo.c (dlinfo_doit): Replace iteration over GL(dl_loaded)
+	chain with iteration over all namespaces' _ns_loaded chains.
+	* sysdeps/powerpc/powerpc32/dl-machine.c (__elf_preferred_address):
+	Likewise.
+	* sysdeps/mips/dl-machine.h (elf_machine_runtime_link_map): Likewise.
+
+	* elf/rtld.c (_dl_start): Fix one last dl_loaded.
+	* elf/dl-load.c (_dl_map_object_from_fd): Avoid definition of
+	label when it is not needed.
+	* elf/dl-close.c (_dl_close): Typo: & -> &&.
+
+2004-10-12  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/generic/segfault.c: Include alloca.h and stdint.h.
+	Don't include frame.h.
+	(CURRENT_STACK_FRAME, INNER_THAN, ADVANCE_STACK_FRAME): Remove.
+	(catch_segfault): Use backtrace function.
+
+	* sysdeps/unix/sysv/linux/ia64/bits/sigcontext.h: Fix comment.
+	* sysdeps/unix/sysv/linux/ia64/register-dump.h: New file.
+	* sysdeps/unix/sysv/linux/ia64/sigcontextinfo.h (GET_PC): Return sc_ip
+	field.
+
+2004-10-13  Ulrich Drepper  <drepper@redhat.com>
+
+	Add support for namespaces in the dynamic linker.
+	* dlfcn/Makefile (libdl-routines): Add dlmopen.
+	* dlfcn/Versions [libdl, GLIBC_2.3.4]: Add dlmopen.
+	* dlfcn/dlfcn.h: Define Lmid_t, LM_ID_BASE, and LM_ID_NEWLM.
+	Declare dlmopen.  Document RTLD_DI_LMID.
+	* dlfcn/dlinfo.c: Handle RTLD_DI_LMID.
+	* dlfcn/dlmopen.c: New file.
+	* dlfcn/dlopen.c: Pass new parameter to _dl_open.
+	* dlfcn/dlopenold.c: Likewise.
+	* elf/dl-addr.c: Adjust for removal of GL(dl_loaded).
+	* elf/dl-caller.c: Likewise.
+	* elf/dl-close.c: Likewise.
+	* elf/dl-conflict.c: Likewise.
+	* elf/dl-debug.c: Likewise.
+	* elf/dl-lookup.c: Likewise.
+	* elf/dl-sym.c: Likewise.
+	* elf/dl-version.c: Likewise.
+	* elf/do-lookup.h: Likewise.
+	* elf/rtld.c: Likewise.
+	* sysdeps/unix/sysv/linux/i386/dl-librecon.h: Likewise.
+	* elf/dl-depsc: Likewise.  Add new parameter to _dl_map_object.
+	* elf/dl-fini.c: Call destructors in all namespaces.
+	* elf/dl-iteratephdr.c: Compute total nloaded.  Adjust for removal of
+	GL(dl_loaded).
+	* elf/dl-libc.c: Pass new parameter to _dl_open.  Adjust for removal
+	of GL(dl_loaded).
+	* elf/dl-load.c (_dl_map_object_from_fd): Don't load ld.so a second
+	time.  Reuse the one from the main namespace in all others.
+	Pass new parameter to _dl_new_object.
+	Adjust for removal of GL(dl_loaded).
+	* elf/dl-object.c: Take new parameter.  Use it to initialize l_ns.
+	Adjust for removal of GL(dl_loaded).
+	* elf/dl-open.c (_dl_open): Take new parameter.
+	Adjust for removal of GL(dl_loaded).
+	* elf/dl-support.c: Replace global _dl_loaded etc variables with
+	_dl_ns variable.
+	* include/dlfcn.h: Adjust prototype of _dl_open.
+	Define __LM_ID_CALLER.
+	* include/link.h: Add l_real, l_ns, and l_direct_opencount elements.
+	* sysdeps/generic/dl-tls.c: Bump TLS_STATIC_SURPLUS.  Since libc is
+	using TLS we need memory appropriate to the number of namespaces.
+	* sysdeps/generic/ldsodefs.h (struct rtld_global): Replace _dl_loaded,
+	_dl_nloaded, _dl_global_scope, _dl_main_searchlist, and
+	_dl_global_scope_alloc with _dl_ns element.  Define DL_NNS.
+	Adjust prototypes of _dl_map_object and member in rtld_global_ro.
+	* malloc/malloc.c: Include <dlfcn.h>.
+	* malloc/arena.c (ptmalloc_init): If libc is not in primary namespace,
+	never use brk.
+	* elf/Makefile: Add rules to build and run tst-dlmopen1 and
+	tst-dlmopen2.
+	* elf/tst-dlmopen1.c: New file.
+	* elf/tst-dlmopen1mod.c: New file.
+	* elf/tst-dlmopen2.c: New file.
+
+	* elf/dl-close.c: Improve reference counting by tracking direct loads.
+	* elf/dl-lookup.c (add_dependency): Likewise.
+	* elf/dl-open.c (dl_open_worker): Likewise.
+	* elf/rtld.c (dl_main): Likewise.
+
+2004-09-09  GOTO Masanori  <gotom@debian.or.jp>
+
+	[BZ #77]
+	* elf/dl-close.c: Count down l_opencount to check not only for
+	l_reldeps, but also l_initfini.
+
+2004-10-13  Ulrich Drepper  <drepper@redhat.com>
+
+	* elf/dl-close.c (_dl_close): Update bug reporting instructions.
+
+2004-10-11  Ulrich Drepper  <drepper@redhat.com>
+
+	* timezone/asia: Update from tzdata2004e.
+	* timezone/southamerica: Likewise.
+	* timezone/private.h: Update from tzcode2004e.
+	* timezone/zdump.c: Likewise.
+
+	* stdio-common/vfscanf.c: Add support for reading localized
+	digits.  Patch mainly by Hamed Malek <hamed@bamdad.org>.
+
+	* resolv/res_init.c (res_thread_freeres): Reset _res.options.
+	[BZ #434]
+
+	* resolv/res_send.c (send_dg): Use nonblocking sockets.  Add
+	appropriate poll/select calls and restart operation if necessary.
+	Also handle EINTR.
+
+	* elf/tst-dlopenrpath.c (do_test): Enable code which was disabled
+	for debugging.
+
+	* elf/dl-sym.c (do_sym): Avoid using global variable.
+
+	* elf/dl-addr.c (_dl_addr): Really use match everywhere.
+
+2004-10-09  Andreas Schwab  <schwab@suse.de>
+
+	* sysdeps/m68k/memcopy.h (WORD_COPY_BWD): Remove use of cast as
+	lvalue.
+
+	* sysdeps/m68k/fpu/bits/mathinline.h: Remove __THROW from inline
+	definitions.
+
+2004-10-07  Andreas Schwab  <schwab@suse.de>
+
+	* misc/sys/uio.h: Change __vector to __iovec to avoid clash with
+	altivec.
+
+2004-10-06  Alan Modra  <amodra@bigpond.net.au>
+
+	* gmon/Makefile (CFLAGS-mcount.c): Move before inclusion of "Rules".
+	* sysdeps/powerpc/powerpc64/Makefile (CFLAGS-mcount.c): Add
+	-msoft-float.
+	* sysdeps/powerpc/powerpc64/sysdep.h (SAVE_ARG, REST_ARG): New macros.
+	(CALL_MCOUNT): Replace with a gas macro implementation.
+	(EALIGN): Delete PROF version.
+	* sysdeps/powerpc/powerpc64/__longjmp-common.S: Invoke CALL_MCOUNT.
+	* sysdeps/powerpc/powerpc64/bsd-_setjmp.S: Likewise.
+	* sysdeps/powerpc/powerpc64/bsd-setjmp.S: Likewise.
+	* sysdeps/powerpc/powerpc64/setjmp-common.S: Likewise.
+	* sysdeps/powerpc/powerpc64/memcpy.S: Likewise.
+	* sysdeps/powerpc/powerpc64/memset.S: Likewise.
+	* sysdeps/powerpc/powerpc64/stpcpy.S: Likewise.
+	* sysdeps/powerpc/powerpc64/strchr.S: Likewise.
+	* sysdeps/powerpc/powerpc64/strcmp.S: Likewise.
+	* sysdeps/powerpc/powerpc64/strcpy.S: Likewise.
+	* sysdeps/powerpc/powerpc64/strlen.S: Likewise.
+	* sysdeps/powerpc/powerpc64/strncmp.S: Likewise.
+	* sysdeps/powerpc/powerpc64/fpu/s_ceil.S: Likewise.
+	* sysdeps/powerpc/powerpc64/fpu/s_ceilf.S: Likewise.
+	* sysdeps/powerpc/powerpc64/fpu/s_copysign.S: Likewise.
+	* sysdeps/powerpc/powerpc64/fpu/s_floor.S: Likewise.
+	* sysdeps/powerpc/powerpc64/fpu/s_floorf.S: Likewise.
+	* sysdeps/powerpc/powerpc64/fpu/s_llrint.S: Likewise.
+	* sysdeps/powerpc/powerpc64/fpu/s_llrintf.S: Likewise.
+	* sysdeps/powerpc/powerpc64/fpu/s_llround.S: Likewise.
+	* sysdeps/powerpc/powerpc64/fpu/s_llroundf.S: Likewise.
+	* sysdeps/powerpc/powerpc64/fpu/s_rint.S: Likewise.
+	* sysdeps/powerpc/powerpc64/fpu/s_rintf.S: Likewise.
+	* sysdeps/powerpc/powerpc64/fpu/s_round.S: Likewise.
+	* sysdeps/powerpc/powerpc64/fpu/s_roundf.S: Likewise.
+	* sysdeps/powerpc/powerpc64/fpu/s_trunc.S: Likewise.
+	* sysdeps/powerpc/powerpc64/fpu/s_truncf.S: Likewise.
+
+	* sysdeps/powerpc/powerpc64/setjmp-common.S: Add extra entry point
+	past _mcount call.
+	* sysdeps/powerpc/powerpc64/bsd-_setjmp.S: Use it.
+	* sysdeps/powerpc/powerpc64/bsd-setjmp.S: Likewise.
+
+2004-10-06  Ulrich Drepper  <drepper@redhat.com>
+
+	* resolv/res_mkquery.c (res_nmkquery): Reject randombits value if
+	low 16 bits are zero.
+
+2004-10-06  Jakub Jelinek  <jakub@redhat.com>
+
+	* posix/tst-getaddrinfo2.c: Include stdlib.h and string.h.
+	(do_test): Use %p instead of 0x%08X to print a pointer.
+
+	* malloc/malloc.c: Include stdio-common/_itoa.h.
+
+2004-10-05  Ulrich Drepper  <drepper@redhat.com>
+
+	* elf/rtld.c (dl_main): Use _dl_debug_printf instead of _dl_printf
+	for prelink message.
+
+2004-10-05  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/unix/sysv/linux/s390/s390-32/sysdep.h: Include dl-sysdep.h.
+	* sysdeps/unix/sysv/linux/s390/s390-64/sysdep.h: Likewise.
+
+2004-10-05  Ulrich Drepper  <drepper@redhat.com>
+
+	* grp/initgroups.c: Remove duplicate group IDs.
+	* grp/compat-initgroups.c: Likewise.
+	* nscd/initgrcache.c: Likewise.
+
+2004-10-05  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/unix/sysv/linux/x86_64/sysconf.c (__sysconf): Return 200112L
+	for _SC_CPUTIME or _SC_THREAD_CPUTIME.
+
+	* nscd/Makefile (nscd-cflags): Set to -DIS_IN_nscd=1, plus
+	-fpie if building PIE.
+	(CFLAGS-*.c): Use it.
+
+	* nscd/Makefile (relro-LDFLAGS): Add -Wl,-z,now if have-z-relro.
+	($(objpfx)nscd): Add $(relro-LDFLAGS).
+
+	* sysdeps/unix/sysv/linux/i386/sysconf.c: Include hp-timing.h.
+	(__sysconf): Return -1 for _SC_CPUTIME or _SC_THREAD_CPUTIME if
+	!HP_TIMING_AVAIL.
+
+2004-10-05  Ulrich Drepper  <drepper@redhat.com>
+
+	* nscd/Makefile (distribute): Remove TODO.
+	* nscd/TODO: Removed.
+
+2004-10-04  Ulrich Drepper  <drepper@redhat.com>
+
+	* nscd/gai.c: Define __no_netlink_support if NEED_NETLINK is
+	defined and __ASSUME_NETLINK_SUPPORT is zero.
+	* sysdeps/unix/sysv/linux/Makefile (CFLAGS-gai.c): Add
+	-DNEED_NETLINK.
+
+	* malloc/mtrace.pl: Avoid calling location unless it is needed for
+	output.  Patch by Edward Bishop <binutils@gmail.com>.
+
+	* nscd/Makefile (CFLAGS-gai.c): Add -fpie.
+
+2004-10-04  H.J. Lu  <hongjiu.lu@intel.com>
+
+	* sysdeps/unix/sysv/linux/ia64/clock_getcpuclockid.c
+	(clock_getcpuclockid): Add missing retval.
+
+	* sysdeps/unix/sysv/linux/ia64/sysconf.c (linux_sysconf): Fix a typo.
+
+2004-10-04  Roland McGrath  <roland@redhat.com>
+
+	* include/errno.h [RTLD_PRIVATE_ERRNO] (errno): Rename the real symbol
+	to rtld_errno.
+	* sysdeps/generic/errno.c [RTLD_PRIVATE_ERRNO] (rtld_errno): Define it,
+	and don't define any other errno names.
+	* sysdeps/unix/alpha/sysdep.h [RTLD_PRIVATE_ERRNO]: Use rtld_errno in
+	place of errno.
+	* sysdeps/unix/i386/sysdep.S: Likewise.
+	* sysdeps/unix/sysv/linux/i386/sysdep.h: Likewise.
+	* sysdeps/unix/sysv/linux/ia64/sysdep.S: Likewise.
+	* sysdeps/unix/sysv/linux/m68k/sysdep.h: Likewise.
+	* sysdeps/unix/sysv/linux/s390/s390-32/sysdep.S: Likewise.
+	* sysdeps/unix/sysv/linux/s390/s390-32/sysdep.h: Likewise.
+	* sysdeps/unix/sysv/linux/s390/s390-64/sysdep.S: Likewise.
+	* sysdeps/unix/sysv/linux/s390/s390-64/sysdep.h: Likewise.
+	* sysdeps/unix/sysv/linux/sh/sysdep.h: Likewise.
+	* sysdeps/unix/sysv/linux/sparc/sparc32/sysdep.h: Likewise.
+	* sysdeps/unix/sysv/linux/x86_64/sysdep.h: Likewise.
+	* sysdeps/unix/x86_64/sysdep.S: Likewise.
+
+	* sysdeps/generic/errno.c [! USE___THREAD] (errno): Use `nocommon'
+	attribute instead of `section'.
+
+2004-10-04  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/unix/sysv/linux/ia64/sysconf.c: New file.
+	* sysdeps/unix/sysv/linux/ia64/Dist: Add has_cpuclock.c.
+	* sysdeps/unix/sysv/linux/ia64/clock_getcpuclockid.c: Move actual
+	testing code to...
+	* sysdeps/unix/sysv/linux/ia64/has_cpuclock.c: ...here.  New file.
+	* sysdeps/unix/sysv/linux/i386/sysconf.c: Add dynamic check for
+	_SC_CPUTIME and _SC_THREAD_CPUTIME.
+
+	* nscd/connections.c (start_threads): Use sysconf in case
+	_POSIX_CLOCK_SELECTION or _POSIX_MONOTONIC_CLOCK is not greater zero.
+
+	* nscd/Makefile (nscd-modules): Add gai.
+	* nscd/gai.c: New file.
+	* nscd/nscd.c: Remove getaddrinfo stub definition.
+
+	* assert/assert.h: Give up on using __builtin_expect.
+
+	* elf/rtld.c (dl_main): Only skip => output in ldd mode if both
+	strings are identical.
+
+2004-03-18  Jakub Jelinek  <jakub@redhat.com>
+
+	* malloc/arena.c (aligned_heap_area): New variable.
+	(new_heap): If aligned_heap_area != NULL, attempt to use that
+	first.  If HEAP_MAX_SIZE << 1 area is already HEAP_MAX_SIZE bytes
+	aligned, remember the second half in aligned_heap_area.
+	(delete_heap): Clear aligned_heap_area if deleting the area right
+	before aligned_heap_area.
+
+2004-10-03  Juerg Billeter  <j@bitron.ch>
+
+	* nscd/nscd_initgroups.c (__nscd_getgrouplist): Return -1 if nscd
+	can't be used.  [BZ #424]
+
+2004-10-03  Ulrich Drepper  <drepper@redhat.com>
+
+	Dynamically create new threads if necessary.
+	* nscd/connections.c (fd_ready): If no thread available for processing
+	the request, create a new one unless the limit is reached.
+	(start_threads): Check errors from pthread_create.
+	* nscd/nscd.h: Declare max_nthreads.
+	* nscd/nscd_conf.c: Parse max-nthreads entry.
+	* nscd/nscd.conf: Add max-threads entry.
+	* nscd/nscd_stat.c: Print current and maximum number of threads.
+
+	Implement paranoia mode.
+	* nscd/connections.c (nscd_init): Mark database and socket descriptors
+	as close on exec.
+	(restart): New function.
+	(restart_p): New function.
+	(nscd_run): Add missing descrement of nready in case readylist is
+	empty.
+	(main_loop_poll): Call restart_p and restart.
+	(main_loop_epoll): Likewise.
+	(begin_drop_privileges): Save original UID and GID.
+	* nscd/nscd.c: Define new variables paranoia, restart_time,
+	restart_interval, oldcwd, old_gid, old_uid.
+	(main): Disable paranoia mode if we are not forking.
+	(check_pid): When re-execing, the PID file contains the same PID as
+	the current process.  Do not fail in this case.
+	* nscd/nscd.conf: Add paranoia and restart-interval entries.
+	* nscd/nscd.h: Define RESTART_INTERVAL.  Declare new variables.
+	* nscd/nscd_conf.c: Parse paranoia and restart-internal configurations.
+	* nscd/nscd_stat.c: Print paranoia and restart-internal values.
+
+	* nscd/connections.c: Implement alternative loop for main thread
+	which uses epoll.
+	* sysdeps/unix/sysv/linux/Makefile [subdir=nscd]
+	(CFLAGS-connections.c): Add -DHAVE_EPOLL.
+
+2004-10-02  Ulrich Drepper  <drepper@redhat.com>
+
+	* nscd/Makefile (CFLAGS-initgrcache.c): Add to CFLAGS-* variables,
+	don't replace old content.
+
+	* nscd/connections.c: Rewrite handling of incoming connections.  All
+	are handled by one thread which then hands off the descriptors for the
+	real work to the worker threads.
+	* nscd/Makefile: Link nscd with librt.
+
+	* nscd/selinux.c: Pretty printing.
+
+	* nscd/dbg_log.c (dbg_log): Don't add unnecessary newline to
+	output.  Let syslog do the formatting if debug_level == 0.
+
+	* nscd/nscd_helper.c (get_mapping): No need to check timestamp if
+	nscd_certainly_running is nonzero.
+
+2004-10-02  Simon Josefsson  <jas@extundo.com>
+
+	[BZ #420]
+	* sysdeps/generic/memmem.c [!_LIBC]: Define __builtin_expect, to
+	make the file usable inside gnulib.
+
+2004-10-01  Ulrich Drepper  <drepper@redhat.com>
+
+	* malloc/malloc.c (public_vALLOc): Add missing use of hooks.
+	(public_pVALLOc): Likewise.
+
+	* nscd/nscd_initgroups.c (__nscd_getgrouplist): Always add the
+	group the caller provided unless there is a real problem.
+
+	* posix/bug-glob1.c (prepare): Fix creation of symlink.
+
+2004-09-30  Ulrich Drepper  <drepper@redhat.com>
+
+	* posix/Makefile: Add rules to build and run bug-glob1.
+	* posix/bug-glob1.c: New file.
+
+	* iconv/iconv_prog.c (main): Print progress information to stderr.
+
+	* nscd/nscd.c (termination_handler): Reset timestamp so that
+	clients immediately stop using the database.
+
+	* nscd/nscd-client.h (__nscd_get_map_ref): Drop volatile from last
+	parameter.
+	(__nscd_drop_map_ref): Change second parameter to be a reference to
+	a variable.  Update variable when cycle count changed.
+	* nscd/nscd_helper.c (__nscd_get_map_ref): Remove volatile here, too.
+	* nscd/nscd_getai.c: Correctly use __nscd_drop_map_ref.  Reinitialize
+	all variables and avoid memory leak in case of retries.
+	* nscd/nscd_getgr_r.c: Likewise.
+	* nscd/nscd_gethst_r.c: Likewise.
+	* nscd/nscd_getpw_r.c: Likewise.
+	* nscd/nscd_initgroups.c: Likewise.
+
+	* nscd/nscd.h: Add declaration of addinitgroups and
+	readdinitgroups.
+
+2004-09-30  Andreas Jaeger  <aj@suse.de>
+
+	* nscd/Makefile (CFLAGS-nscd_initgroups.c): Set to -fpie.
+	(CFLAGS-initgrcache.c): Set to -fexceptions.
+
+2004-09-29  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/generic/glob.c (glob_in_dir): Don't blindly trust
+	readdir results; for symlinks or files of unknown type check using
+	stat whether the file exists.
+
+	* posix/tst-gnuglob.c (find_file): Handle leading "./".  Fix
+	recognition of files.
+
+2004-09-29  Jakub Jelinek  <jakub@redhat.com>
+
+	* time/tzfile.c (tzfile_mtime): New variable.
+	(__tzfile_read): Reread the file if mtime is different.
+
+2004-09-28  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/alpha/fpu/bits/mathinline.h (__fdimf, __fdim, fdimf, fdim):
+	Handle +inf/+inf.
+	* sysdeps/powerpc/fpu/bits/mathinline.h (fdim, fdimf): Likewise.
+	* sysdeps/sparc/fpu/bits/mathinline.h (fdim, fdimf): Likewise.
+
+2004-09-29  Ulrich Drepper  <drepper@redhat.com>
+
+	* nscd/nscd_gethst_r.c (nscd_gethst_r): Use correct constant for
+	testing result of __nscd_get_map_ref.
+
+2004-09-29  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/i386/fpu/s_fdim.S (__fdim): Handle +inf/+inf.
+	* sysdeps/i386/fpu/s_fdimf.S (__fdimf): Likewise.
+	* sysdeps/i386/fpu/s_fdiml.S (__fdiml): Likewise.
+
+004-09-29  Ulrich Drepper  <drepper@redhat.com>
+
+	* grp/initgroups.c: Move compat_call implementation...
+	* grp/compat-initgroups.c: ...to here.  New file.
+	* grp/Makefile (distribute): Add compat-initgroups.c.
+	(CFLAGS-initgroups.c): Add -DUSE_NSCD=1.
+	* mscd/initgrcache.c: New file.
+	* nscd/nscd_initgroups.c: New file.
+	* nscd/Makefile (routines): Add nscd_initgroups.
+	(nscd-modules): Add initgrcache.
+	* nscd/cache.c (prune_cache): Add support for INITGROUPS entries.
+	* nscd/connections.c: Handle INITGROUPS requests.
+	* nscd/nscd-client.h: Define INITGROUPS, initgr_response_header.
+	Add initgrdata element to struct datahead.  Fix typo in comment.
+	* nscd/nscd_proto.h: Declare __nscd_getgrouplist.  Fix parameter
+	type in __nscd_getgrgrid_r.
+	* nscd/selinux.c (perms): Add INITGROUPS entry.
+
+	* nscd/nscd_getai.c: No need to include <sys/mman.h>.
+
+	* sunrpc/get_myaddr.c (get_myaddress): Account for interfaces without
+	assigned addresses.
+	* sunrpc/pmap_clnt.c (__get_myaddress): Likewise.
+	* sunrpc/pmap_rmt.c (getbroadcastnets): Likewise.
+	* sunrpc/clnt_udp.c (is_network_up): Likewise.
+
+	* nscd/nscd.c: Define getaddrinfo hidden so that it is never found
+	outside.
+
+	* sysdeps/unix/sysv/linux/ia64/bits/siginfo.h (si_segvflags):
+	Renamed from si_flags due to conflict with si_flags from <elf.h>.
+
+2004-09-28  Ulrich Drepper  <drepper@redhat.com>
+
+	* nscd/nscd_getai.c: Use NO_MAPPING instead of MAP_FAILED for test
+	of failing __nscd_get_map_ref.  Fix a few typos.
+
+	* sysdeps/unix/sysv/linux/sigwait.c (do_sigwait): Make sure
+	SIGSETXID is not blocked.
+	* sysdeps/unix/sysv/linux/sigwaitinfo.c (do_sigwaitinfo): Likewise.
+	* sysdeps/unix/sysv/linux/sigtimedwait.c (do_sigtimedwait): Likewise.
+	* sysdeps/unix/sysv/linux/sigprocmask.c (__sigprocmask): Likewise.
+	* sysdeps/generic/sigfillset.c (sigfillset): Don't set SIGSETXID.
+
+	* sunrpc/get_myaddr.c (get_myaddress): Fix test for failing
+	getifaddrs call.
+	* sunrpc/pmap_clnt.c (__get_myaddress): Likewise.
+	* sunrpc/pmap_rmt.c (getbroadcastnets): Likewise.
+	* sunrpc/Makefile (xtests): Add tst-getmyaddr.
+	* sunrpc/tst-getmyaddr.c: New file.
+
+	* malloc/arena.c (ptmalloc_init): Allow MALLOC_CHECK_==0 to
+	disable all checking.
+
+	* sysdeps/unix/sysv/linux/ia64/bits/siginfo.h (si_flags): Add
+	support to get this value.
+
+2004-09-28  Jakub Jelinek  <jakub@redhat.com>
+
+	* io/utime.h (utime): Allow second argument to be NULL.
+
+2004-09-28  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/posix/getaddrinfo.c (gaih_inet): If NAME is a numerical IP
+	address and AI_CANONNAME is set, return copy of NAME as ai_canonname.
+
+2004-09-27  Andreas Jaeger  <aj@suse.de>
+
+	* sysdeps/i386/dl-machine.h (elf_machine_rela_relative): Replace
+	static inline by auto inline, add always_inline attribute.
+	(elf_machine_rel): Likewise.
+	(elf_machine_rela): Likewise.
+	(elf_machine_lazy_rel): Likewise.
+	(elf_machine_lazy_rela): Likewise.
+	(elf_machine_rel_relative): Likewise.
+
+2004-09-27  Ulrich Drepper  <drepper@redhat.com>
+
+	* sunrpc/xdr_intXX_t.c: Add xdr_quad_t and xdr_u_quad_t aliases.
+	* sunrpc/rpc/xdr.h: Declare xdr_quad_t and xdr_u_quad_t.
+	* sunrpc/rpc_parse.c (get_type): Use "quad_t" for TOK_HYPER.
+	Otherwise isvectordef will loop infinitely if typedef hyper int64_t
+	is seen.
+	(unsigned_dec): Use "u_quad_t" for similar reasons.
+	* sunrpc/Versions: Export xdr_quad_t and xdr_u_quad_t.
+
+2004-09-27  Roland McGrath  <roland@redhat.com>
+
+	* sysdeps/generic/bits/waitstatus.h (__WIFSIGNALED): Simplify bit
+	twiddling in last change.
+
+	* posix/tst-waitid.c (do_test): Add tests for waitpid with WCONTINUED.
+
+	[BZ #409]
+	* posix/sys/wait.h [__WIFCONTINUED] (WIFCONTINUED): New macro.
+	* stdlib/stdlib.h [__WIFCONTINUED] (WIFCONTINUED): New macro.
+	* sysdeps/generic/bits/waitstatus.h (__W_CONTINUED): New macro.
+	[WCONTINUED] (__WIFCONTINUED): New macro.
+	(__WIFSIGNALED): Rewritten to exclude __W_CONTINUED value, and have no
+	branches.
+
+	* sysdeps/unix/sysv/linux/waitid.c (do_waitid): Add fifth argument to
+	all three syscall uses, not just one!
+
+2004-09-26  Richard Henderson  <rth@redhat.com>
+
+	* sysdeps/alpha/alphaev6/memcpy.S: Mark .prologue.
+	* sysdeps/unix/alpha/sysdep.h (LEAF, ENTRY): Align entry points
+	to 16 byte boundaries.
+
+2004-09-26  Ulrich Drepper  <drepper@redhat.com>
+
+	* elf/readlib.c (process_file): Before complaining about too-short
+	file, check that it potentially be an ELF file.  Also complain about
+	empty files.  [BZ #151].
+
+	* scripts/test-installation.pl: Fix ld.so recognition for new
+	LD_TRACE_LOADED_OBJECTS output format.
+	Patch by <jsberg04+computing.glibc@ftml.net>  [BZ #407].
+
+	* elf/dl-support.c (_dl_non_dynamic_init): Fix cleaning of
+	environment.  [BZ #384]
+
+	* sunrpc/clnt_udp.c (is_network_up): Use getifaddrs instead of ioctl.
+	* sunrpc/get_myaddr.c (get_myaddress): Likewise.
+	* sunrpc/pmap_clnt.c (__get_myaddress): Likewise.
+	* sunrpc/pmap_rmt.c (getbroadcastnets): Likewise.  Change interface
+	to avoid buffer overrun and remove now useless parameters.
+	(clnt_broadcast): Adjust caller.  [BZ #381].
+
+	* sysdeps/generic/s_fdim.c: Handle +inf/+inf
+	* sysdeps/generic/s_fdimf.c: Likewise.
+	* sysdeps/generic/s_fdiml.c: Likewise.
+	* sysdeps/i386/i686/fpu/s_fdim.S: Likewise.
+	* sysdeps/i386/i686/fpu/s_fdimf.S: Likewise.
+	* sysdeps/i386/i686/fpu/s_fdiml.S: Likewise.
+	* sysdeps/powerpc/fpu/s_fdim.c: Likewise.
+	* sysdeps/powerpc/fpu/s_fdimf.c: Likewise.
+	* sysdeps/x86_64/fpu/s_fdiml.S: Likewise.
+	* math/libm-test.inc (fdim_test): Add test case.  [BZ #376].
+
+	* sysdeps/generic/bits/types.h: Fix __SQUAD_TYPE and __UQUAD_TYPE
+	for compilers without __GLIBC_HAVE_LONG_LONG.  [BZ #362]
+
+	* sysdeps/posix/getaddrinfo.c (getaddrinfo): Remove incorrect
+	requirement on socktype and protocol.
+	(gaih_inet): If numeric port number is given, return records for all
+	possible socket types.
+	* posix/tst-getaddrinfo2.c: New file.
+	* posix/Makefile (tests): Add tst-getaddrinfo2.  [BZ #358]
+
+2004-09-25  Ulrich Drepper  <drepper@redhat.com>
+
+	* locale/loadlocale.c (_nl_intern_locale_data): Recognize LC_CTYPE
+	data where _nl_value_type_LC_CTYPE does not contain the type
+	information.  Add range checks.
+	Reported by John Lumby <johnlumby@hotmail.com> [BZ #356].
+
+	* libio/vasprintf.c (_IO_vasprintf): Fix condition to decide
+	whether to realloc or not.
+	Reported by Pavel Kankovsky <peak@argo.troja.mff.cuni.cz> [BZ #346].
+
+	* intl/dcigettext.c (DCIGETTEXT): Protect tfind/tsearch calls.
+	* intl/dcigettext.c (_nl_find_msg): Call _nl_load_domain also if
+	decided < 0.
+	* intl/finddomain.c (_nl_find_domain): Likewise.
+	* intl/loadmsgcat.c (_nl_load_domain): Set decided to 1 only once we
+	are done.  First set to -1 to signal initialization is ongoing.
+	Protect against concurrent callers with recursive lock.
+	* intl/finddomain.c (_nl_find_domain): Protect calls to
+	_nl_make_l10nflist.  [BZ #322]
+
+	* sysdeps/posix/getaddrinfo.c (getaddrinfo): If determinination of
+	source address fails, initialized source_addr_len field so that
+	duplicate address recognition does not copy junk.
+
+2004-09-25  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/unix/sysv/linux/i386/setuid.c (__setuid): Remove second
+	result declaration.
+
+2004-09-22  Andreas Schwab  <schwab@suse.de>
+
+	* sysdeps/unix/sysv/linux/ia64/sysdep.h: Adjust whitespace.
+
+2004-09-24  Ulrich Drepper  <drepper@redhat.com>
+
+	* misc/daemon.c (daemon): Don't succeed if /dev/null cannot be
+	opened.
+
+	* nis/ypclnt.c (do_ypcall): Add one missing unlock.  Simplify the
+	code a bit.
+
+	* misc/daemon.c (daemon): Define errno in case /dev/null is not
+	the correct device.
+
+	* nis/ypclnt.c (yp_bind_file): Optimize a bit.  Minimal cleanups.
+
+2004-09-23  Andreas Jaeger  <aj@suse.de>
+
+	* locale/weight.h (findidx): Remove static, it's not supported
+	anymore with GCC 4.0 in a block scope.
+	* locale/weightwc.h (findidx): Likewise.
+	* posix/regcomp.c (seek_collating_symbol_entry): Likewise.
+	(lookup_collation_sequence_value): Likewise.
+	(build_range_exp): Likewise.
+	(build_collating_symbol): Likewise.
+	* iconv/iconvconfig.c (write_output): Likewise.
+	* elf/do-rel.h (elf_dynamic_do_rel): Likewise.
+
+	* sysdeps/x86_64/dl-machine.h (elf_machine_rela_relative): Remove
+	static, add always_inline attribute.
+	(elf_machine_rela): Likewise.
+	(elf_machine_lazy_rel): Likewise.
+
+	* elf/dynamic-link.h (elf_get_dynamic_info): Make static dependend
+	on !RESOLVE so that it's not defined in local scope.
+
+2004-09-23  Kaz Kojima  <kkojima@rr.iij4u.or.jp>
+
+	* sysdeps/unix/sysv/linux/sh/sysdep.h (INTERNAL_SYSCALL_NCS): Define.
+
+2004-09-23  Thorsten Kukuk  <kukuk@suse.de>
+
+	* sysdeps/unix/sysv/linux/sys/mount.h: Sync MS_RMT_MASK flag
+	and BLK* ioctls with linux kernel headers.
+
+2004-09-23  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/generic/bits/dlfcn.h: Add RTLD_DEEPBIND.
+	* elf/dl-object.c (_dl_new_object): Add new parameter mode.  If mode
+	has RTLD_DEEPBIND set add local searchlist before global scope.
+	* sysdeps/generic/ldsodefs.h (_dl_new_object): Adjust prototype.
+	* elf/rtld.c: Adjust callers of _dl_new_object.
+	* elf/dl-load.c: Likewise.
+	(_dl_map_object_from_fd): If RTLD_DEEPBIND is used, don't do anything
+	for DF_SYMBOLIC.
+	* elf/dl-open.c (dl_open_writer): Pass RTLD_DEEPBIND flag on to
+	_dl_map_object_deps.
+	* elf/tst-deep1.c: New file.
+	* elf/tst-deep1mod1.c: New file.
+	* elf/tst-deep1mod2.c: New file.
+	* elf/tst-deep1mod3.c: New file.
+	* elf/Makefile: Add rules to build and run new tests.
+
+	* elf/dl-deps.c: Pretty printing.
+
+2004-09-23  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/unix/alpha/sysdep.h (inline_syscall[0-6]): Change name
+	argument to numbers from syscall names.
+	(INLINE_SYSCALL1): Pass __NR_##name to inline_syscall##nr.
+	(INTERNAL_SYSCALL_NCS): Renamed from...
+	(INTERNAL_SYSCALL_1): ... this.  Use INTERNAL_SYSCALL_NCS.
+	* sysdeps/unix/sysv/linux/s390/s390-32/sysdep.h
+	(INTERNAL_SYSCALL_NCS): Define.
+	* sysdeps/unix/sysv/linux/s390/s390-64/sysdep.h
+	(INTERNAL_SYSCALL_NCS): Likewise.
+	* sysdeps/unix/sysv/linux/sparc/sysdep.h (inline_syscall[0-6]):
+	Change name argument to numbers from syscall names.
+	(INLINE_SYSCALL, INTERNAL_SYSCALL): Adjust.
+	(INTERNAL_SYSCALL_NCS): Define.
+
+2004-09-22  Ulrich Drepper  <drepper@redhat.com>
+
+	* malloc/malloc.c (malloc_printerr): Use syslog if writev failed.
+
+	* string/string.h: Add __nonnull annotations.
+	* stdlib/stdlib.h: Likewise.
+
+2004-09-20  H.J. Lu  <hongjiu.lu@intel.com>
+
+	* sysdeps/unix/sysv/linux/ia64/sysdep.h (DO_INLINE_SYSCALL):
+	Renamed to ...
+	(DO_INLINE_SYSCALL_NCS): This.
+	(DO_INLINE_SYSCALL): New.
+	(INLINE_SYSCALL): Updated.
+	(INTERNAL_SYSCALL_NCS): Updated.
+
+2004-09-21  Ulrich Drepper  <drepper@redhat.com>
+
+	* elf/sprof.c (load_shobj): Add support for reading symbol table
+	from debuginfo file.
+
+	* elf/ldd.bash.in: Fix syntax errors.
+
+2004-09-20  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/unix/sysv/linux/dl-execstack.c
+	(_dl_make_stack_executable): Remove some duplication.
+
+	* nscd/nscd.c (options): Mark S option as hidden.
+	(parse_opt): When S option is used, print warning message.
+	* nscd/grpcache.c (adgrptbyX): Don't handle secure mode.
+	* nscd/hstcache.c (addhstbyX): Don't handle secure mode.
+	* nscd/aicache.c (addhstaiX): Don't handle secure mode.
+	* nscd/pwdcache.c (addpwbyX): Don't handle secure mode.
+
+2004-09-20  Roland McGrath  <roland@frob.com>
+
+	* elf/dl-load.c (__stack_prot): Only use PROT_GROWSUP/PROT_GROWSDOWN
+	in initializer #if defined.
+
+2004-09-18  Paul Eggert  <eggert@cs.ucla.edu>
+
+	[BZ #391]
+	* stdlib/getsubopt.c: Merge fixes from gnulib.
+	(__strchrnul) [!_LIBC]: Define and include "strchrnul.c".
+	(getsubopt): Use prototypes, not K&R style.
+	Fix bug: memcmp(A,B,N) was being invoked on a memory block B
+	whose size might be smaller than N.  Use strncmp to avoid the bug.
+
+2004-09-20  Ulrich Drepper  <drepper@redhat.com>
+
+	* configure.in: If selinux has not explictly been requested, don't
+	comment on it missing.
+
+	* elf/dl-load.c: Define __stack_prot.
+	* sysdeps/unix/sysv/linux/dl-execstack.c: Don't define
+	__stack_prot here, just declare it.
+
+2004-09-20  Jakub Jelinek  <jakub@redhat.com>
+
+	* configure.in (libc_cv_z_relro): Only set to yes if linker script
+	contains DATA_SEGMENT_RELRO_END.
+
+2004-09-20  Ulrich Drepper  <drepper@redhat.com>
+
+	* elf/dl-load.c (_dl_map_object_from_fd): Add some error checking.
+	Reorder code slightly.
+	* elf/rtld.c (dl_main): No need to check whether l_info[DT_HASH]
+	is non-null, _dl_setup_hash will do that.
+
+2004-09-19  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/unix/sysv/linux/setegid.c [HAVE_PTR__NPTL_SETXID]: Call
+	callback to set IDs in all other threads as well.
+	* sysdeps/unix/sysv/linux/seteuid.c: Likewise.
+	* sysdeps/unix/sysv/linux/i386/setegid.c: Likewise.
+	* sysdeps/unix/sysv/linux/i386/seteuid.c: Likewise.
+	* sysdeps/unix/sysv/linux/i386/setgid.c: Likewise.
+	* sysdeps/unix/sysv/linux/i386/setuid.c: Likewise.
+	* sysdeps/unix/sysv/linux/i386/setreuid.c: Likewise.
+	* sysdeps/unix/sysv/linux/i386/setreuid.c: Likewise.
+	* sysdeps/unix/sysv/linux/i386/setresuid.c: Likewise.
+	* sysdeps/unix/sysv/linux/i386/setresuid.c: Likewise.
+	* sysdeps/unix/sysv/linux/setuid.c: New file.
+	* sysdeps/unix/sysv/linux/setgid.c: New file.
+	* sysdeps/unix/sysv/linux/setreuid.c: New file.
+	* sysdeps/unix/sysv/linux/setregid.c: New file.
+	* sysdeps/unix/sysv/linux/setresuid.c: New file.
+	* sysdeps/unix/sysv/linux/setresgid.c: New file.
+	* sysdeps/unix/sysv/linux/i386/sysdep.h: Define INTERNAL_SYSCALL_NCS.
+	* sysdeps/unix/sysv/linux/ia64/sysdep.h: Likewise.
+	* sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep.h: Likewise.
+	* sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep.h: Likewise.
+	* sysdeps/unix/sysv/linux/x86_64/sysdep.h: Likewise.
+	* sysdeps/unix/sysv/linux/sparc/sparc32/setegid.c: Use x86 version.
+	* sysdeps/unix/sysv/linux/sparc/sparc32/seteuid.c: Likewise.
+	* sysdeps/unix/sysv/linux/sparc/sparc32/setresgid.c: New file.
+	* sysdeps/unix/sysv/linux/sparc/sparc32/setresuid.c: New file.
+	* sysdeps/unix/sysv/linux/sparc/sparc32/syscalls.list: Remove setresgid
+	and setresuid.
+	* nscd/aicache.c: Use pthread_seteuid_np instead of seteuid.
+	* nscd/grpcache.c: Likewise.
+	* nscd/hstcache.c: Likewise.
+	* nscd/pwdcache.c: Likewise.
+
+	* resolv/res_mkquery.c (res_nmkquery): Fix typo.
+
+2004-09-18  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/unisx/sysv/linux/dl-execstack.c: Get protection flag
+	from memory.
+	* elf/dl-load.c (_dl_map_object_from_fd): Add PROT_EXEC flag to
+	__stack_flags.
+
+	* sysdeps/posix/getaddrinfo.c (gaih_inet): Do not use
+	gethostbyname3_r if we are not looking for the canonical name.
+
+	* resolv/res_mkquery.c (res_nmkquery): Randomize request ID every
+	time.
+
+2004-09-18  Roland McGrath  <roland@redhat.com>
+
+	* sysdeps/unix/sysv/linux/waitid.c (do_waitid): Pass fifth argument to
+	system call.
+
+2004-09-17  Ulrich Drepper  <drepper@redhat.com>
+
+	* include/link.h (struct link_map): Add l_used element.
+	* sysdeps/generic/ldsodefs.h: Define DL_DEBUG_UNUSED.
+	* elf/rtld.c (process_dl_debug): Recognize unused.
+	(dl_main): When unused debug flag is set check for unused direct
+	dependencies.
+	When printing dependencies and SONAME starts with /, omit the SONAME =>
+	part.
+	* elf/dl-lookup.c (_dl_lookup_symbol_x): Mark object in which the
+	symbol has been found as used.
+	* elf/ldd.bash.in: Add -u option.
+
+2004-09-18  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/unix/sysv/linux/nscd_setup_thread.c (setup_thread):
+	Do nothing if __NR_set_tid_address is not defined.  [BZ #390]
+
+2004-09-17  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/posix/getaddrinfo.c: Use gethostbyname3_r NSS function
+	in case it is available.
+
+2004-09-17  Jakub Jelinek  <jakub@redhat.com>
+
+	* nscd/nscd.c (parse_opt): Write arg string instead of (void *)
+	key to the socket.
+
+2004-09-17  Ulrich Drepper  <drepper@redhat.com>
+
+	* misc/sys/cdefs.h: Define __nonnull using nonnull function attribute
+	for gcc 3.3 and higher.
+	* io/fcntl.h: Add __nonnull where appropriate.
+	* io/ftw.h: Likewise.
+	* io/utime.h: Likewise.
+	* io/sys/poll.h: Likewise.
+	* io/sys/sendfile.h: Likewise.
+	* io/sys/stat.h: Likewise.
+	* io/sys/statfs.h: Likewise.
+	* io/sys/statvfs.h: Likewise.
+	* posix/unistd.h: Likewise.
+	* catgets/nl_types.h: Likewise.
+	* crypt/crypt.h: Likewise.
+	* debug/execinfo.h: Likewise.
+
+2004-09-16  Ulrich Drepper  <drepper@redhat.com>
+
+	* posix/glob.h: Remove cruft to make header usable outside glibc.
+	The maintenance headache is too big.
+
+	* configure.in: Add test for required SELinux features.
+	* config.make.in: Add have-selinux entry.
+	* config.h.in: Add HAVE_SELINUX entry.
+	* nscd/Makefile (nscd-modules): Add selinux.
+	(CFLAGS-selinux.c): Add -fpie.
+	Define selinux-LIBS and use in link line.
+	* nscd/connections.c (handle_request): Check access SELinux permissions
+	before processing request.
+	* nscd/nscd.c (main): Initialize selinux_enabled and stop avc thread.
+	* nscd/nscd_stat.c: Transmit and print AVC statistics.
+	* nscd/selinux.c: New file.
+	* nscd/selinux.h: New file.
+	Patch mostly by Matthew Rickard <mjricka@epoch.ncsc.mil>.
+
+2004-09-16  Jakub Jelinek  <jakub@redhat.com>
+
+	* nscd/nscd_helper.c (__nscd_unmap, get_mapping): Use __munmap
+	instead of munmap.
+
+	* nscd/Makefile (CFLAGS-aicache.c): Set to -fpie.
+
+2004-09-16  Thorsten Kukuk  <kukuk@suse.de>
+
+	* sysdeps/posix/getaddrinfo.c (gaih_inet): Check
+	__nss_not_use_nscd_hosts variable if nscd should be used or not.
+
+2004-09-16  Ulrich Drepper  <drepper@redhat.com>
+
+	* nscd/nscd_proto.h: Define NSS_NSCD_RETRY.
+	Add __nscd_getai prototype.
+	* nss/getXXbyYY_r.c: Remode definition of NSS_NSCD_RETRY.
+	* nscd/nscd-client.h: Remove __nscd_getai prototype.
+	* nscd/nscd_getai.c: Include nscd_proto.h.
+
+	* elf/ldd.bash.in: Add support for SELinux environments.
+	Patch by Stephen Smalley <sds@epoch.ncsc.mil>.
+
+2004-09-16  Roland McGrath  <roland@redhat.com>
+
+	* configure.in (--with-headers): Let argument contain a : separated
+	list of directories to use, not just one.
+	* configure: Regenerated.
+
+2004-09-15  Richard Henderson  <rth@redhat.com>
+
+	* sysdeps/alpha/fpu/libm-test-ulps: Update.
+	* scripts/data/c++-types-alpha-linux-gnu.data: New file.
+
+2004-09-15  Ulrich Drepper  <drepper@redhat.com>
+
+	* nscd/aicache.c: Prefer using gethostbyname3_r NSS callback to also
+	get ttl and canonical name.  Use these two values.
+	* resolv/Versions: Export _nss_dns_gethostbyname3_r from libnss_dns.
+	* resolv/nss_dns/dns-host.c (getanswer_r): Take two new parameters.
+	If nonnull fill with TTL and pointer to canonical name respectively.
+	(_nss_dns_gethostbyaddr_r): Pass NULL in new parameters of getanswer_r.
+	(_nss_dns_gethostbyname2_r): Just wrapper around
+	_nss_dns_gethostbyname3_r.
+	(_nss_dns_gethostbyname3_r): Renamed from _nss_dns_gethostbyname2_r.
+	Take two new parameters which as passed to getanswer_r.
+
+	* nscd/Makefile (rountines): Add nscd_getai.
+	(nscd-modules): Add aicache.
+	* nscd/aicache.c: New file.
+	* nscd/nscd_getai.c: New file.
+	* nscd/cache.c (prune_cache): Handle GETAI request type.
+	* nscd/connections.c: Add GETAI support in request handling.
+	* nscd/nscd-client.h (request_type): Add GETAI.
+	Define ai_response_header and struct nscd_ai_result types.
+	(struct datahead): Add aidata field.
+	Declare __nscd_getai.
+	* nscd/nscd.c: Add getaddrinfo definition to catch problems.
+	* nscd/nscd.h: Declare addhstai and readdhstai.
+
+	* sysdeps/posix/getaddrinfo.c: Add support for using cached results.
+	* posix/Makefile (CFLAGS-getaddrinfo.c): Add -DUSE_NSCD.
+
+	* nscd/nscd-client.h  (struct datahead): Use uint8_t instead of bool.
+
+2004-09-14  Ulrich Drepper  <drepper@redhat.com>
+
+	* misc/sys/cdefs.h: Remove debugging text from __P and __PMT.
+
+2004-09-13  Ulrich Drepper  <drepper@redhat.com>
+
+	* misc/sys/cdefs.h: Restore old definition of __P.
+	* csu/munch.awk: Remove uses of __P and __PMT.
+	* gmon/gmon.c: Likewise.
+	* hesiod/hesiod.h: Likewise.
+	* include/stdio.h: Likewise.
+	* io/fts.c: Likewise.
+	* libio/genops.c: Likewise.
+	* libio/libioP.h: Likewise.
+	* libio/fileops.c: Likewise.
+	* libio/iolibio.h: Likewise.
+	* libio/libio.h: Likewise.
+	* libio/memstream.c: Likewise.
+	* libio/oldfileops.c: Likewise.
+	* libio/oldstdfiles.c: Likewise.
+	* libio/iopopen.c: Likewise.
+	* libio/vsnprintf.c: Likewise.
+	* libio/vswprintf.c: Likewise.
+	* libio/wgenops.c: Likewise.
+	* libio/oldiopopen.c: Likewise.
+	* locale/programs/xmalloc.c: Likewise.
+	* locale/programs/xstrdup.c: Likewise.
+	* malloc/mtrace.c: Likewise.
+	* misc/getttyent.c: Likewise.
+	* misc/getusershell.c: Likewise.
+	* nis/rpcsvc/ypupd.h: Likewise.
+	* posix/fnmatch.h: Likewise.
+	* posix/glob.h: Likewise.
+	* resolv/arpa/nameser.h: Likewise.
+	* resolv/gethnamaddr.c: Likewise.
+	* resolv/resolv.h: Likewise.
+	* resolv/inet_net_ntop.c: Likewise.
+	* resolv/inet_net_pton.c: Likewise.
+	* resolv/res_init.c: Likewise.
+	* resolv/nss_dns/dns-network.c: Likewise.
+	* stdio-common/vfprintf.c: Likewise.
+	* stdio-common/reg-printf.c: Likewise.
+	* sysdeps/generic/chflags.c: Likewise.
+	* sysdeps/generic/fchflags.c: Likewise.
+	* sysdeps/generic/glob.c: Likewise.
+	* sysdeps/generic/printf_fphex.c: Likewise.
+	* sysdeps/generic/memcmp.c: Likewise.
+	* sysdeps/generic/memcopy.h: Likewise.
+	* sysdeps/generic/morecore.c: Likewise.
+	* sysdeps/generic/sstk.c: Likewise.
+	* sysdeps/posix/sigvec.c: Likewise.
+	* sysdeps/posix/ttyname_r.c: Likewise.
+	* sysdeps/standalone/arm/bits/errno.h: Likewise.
+	* sysdeps/standalone/standalone.h: Likewise.
+	* sysdeps/standalone/i386/force_cpu386/brdinit.c: Likewise.
+	* sysdeps/standalone/i386/force_cpu386/_exit.c: Likewise.
+	* sysdeps/unix/arm/start.c: Likewise.
+	* sysdeps/unix/bsd/sigaction.c: Likewise.
+	* sysdeps/unix/bsd/sun/m68k/sigtramp.c: Likewise.
+	* sysdeps/unix/bsd/sun/sparc/sigtramp.c: Likewise.
+	* sysdeps/unix/bsd/sun/sunos4/wait4.c: Likewise.
+	* sysdeps/unix/bsd/ultrix4/mips/sigvec.c: Likewise.
+	* sysdeps/unix/bsd/ultrix4/sysconf.c: Likewise.
+	* sysdeps/unix/sparc/start.c: Likewise.
+	* sysdeps/unix/sysv/getdents.c: Likewise.
+	* sysdeps/unix/sysv/irix4/fpathconf.c: Likewise.
+	* sysdeps/unix/sysv/irix4/getgroups.c: Likewise.
+	* sysdeps/unix/sysv/irix4/getpriority.c: Likewise.
+	* sysdeps/unix/sysv/irix4/getrusage.c: Likewise.
+	* sysdeps/unix/sysv/irix4/pathconf.c: Likewise.
+	* sysdeps/unix/sysv/irix4/setgroups.c: Likewise.
+	* sysdeps/unix/sysv/irix4/sigtramp.c: Likewise.
+	* sysdeps/unix/sysv/irix4/start.c: Likewise.
+	* sysdeps/unix/sysv/irix4/sysconf.c: Likewise.
+	* sysdeps/unix/sysv/sco3.2.4/__setpgid.c: Likewise.
+	* sysdeps/unix/sysv/sco3.2.4/getgroups.c: Likewise.
+	* sysdeps/unix/sysv/sysv4/__getpgid.c: Likewise.
+	* sysdeps/unix/sysv/sysv4/__setpgid.c: Likewise.
+	* sysdeps/unix/sysv/sysv4/getpgid.c: Likewise.
+	* sysdeps/unix/sysv/sysv4/setpgid.c: Likewise.
+	* sysdeps/unix/sysv/sysv4/sethostname.c: Likewise.
+	* sysdeps/unix/sysv/sysv4/setsid.c: Likewise.
+	* sysdeps/unix/sysv/sysv4/sysconf.c: Likewise.
+	* sysdeps/unix/sysv/sysv4/waitpid.c: Likewise.
+	* sysdeps/unix/sysv/sysv4/solaris2/getdents.c: Likewise.
+	* time/tzset.c: Likewise.
+	* time/strftime_l.c: Likewise.
+	* time/strptime_l.c: Likewise.
+	* crypt/md5.h: Likewise.
+
+2004-09-13  Andreas Jaeger  <aj@suse.de>
+
+	* configure.in: Support GCC 4.x.
+	* configure: Regenerated.
+
+2004-09-13  Thorsten Kukuk  <kukuk@suse.de>
+
+	* nscd/nscd_stat.c: Don't access dbs[cnt].head for disabled services.
+
+	* nscd/nscd.init: Fix path to socket.
+
+2004-09-12  Ulrich Drepper  <drepper@redhat.com>
+
+	* nscd/nscd_helper.c (get_mapping): Correctly check cmsg length.
+	Avoid file descriptor leak in case of size mismatch.
+
+	* nscd/nscd-client.h: Fix database structure layout for biarch.
+	* nscd/mem.c (gc): Add casts to avoid warnings.
+
+	* nss/getent.c: Don't preconstruct help message.  Do it only when
+	needed.
+
+	* locale/programs/locale.c: Simplify help message printing.
+
+2004-09-12  Roland McGrath  <roland@frob.com>
+
+	* sysdeps/mach/hurd/i386/init-first.c (init1) [! SHARED]: Add decls
+	missing in last change.
+
+2004-09-11  Thorsten Kukuk  <kukuk@suse.de>
+
+	* nis/nss_compat/compat-grp.c: Check that buflen is greater zero
+	before writing data into the buffer with negative offset.
+	* nis/nss_compat/compat-initgroups.c: Likewise.
+	* nis/nss_compat/compat-pwd.c: Likewise.
+	* nis/nss_compat/compat-spwd.c Likewise.
+
+2004-09-12  Ulrich Drepper  <drepper@redhat.com>
+
+	* misc/syslog.c (vsyslog): Fix copying of PID in case of
+	out-of-memory situation.  [BZ #365].
+
+	* sysdeps/alpha/fpu/bits/mathinline.h: Use __NTH instead of
+	__THROW in inline function definitions.
+
+	* posix/spawn.h [__USE_GNU]: Define POSIX_SPAWN_USEVFORK.
+	* posix/spawnattr_setflags.c: Check whether any unknown bit is set
+	in FLAGS parameter and fail if this is the case.
+	* sysdeps/posix/spawni.c: Use vfork if POSIX_SPAWN_USEVFORK flag is
+	set.
+
+	* nscd/pwdcache.c (cache_addpw): Sync also negative results to disk.
+	* nscd/grpcache.c (cache_addgr): Likewise.
+	* nscd/hstcache.c (cache_addhst): Likewise.
+
+2004-09-11  Roland McGrath  <roland@frob.com>
+
+	* sysdeps/mach/hurd/i386/init-first.c (init1) [! SHARED]:
+	Set _dl_phdr and _dl_phnum.
+	(init1): When bootstrap task, bail early and never examine *D.
+
+2004-09-11  Alfred M. Szmidt  <ams@kemisten.nu>
+
+	* sysdeps/mach/hurd/i386/tls.h (__i386_set_gdt) [!HAVE_I386_SET_GDT]:
+	Cast THR, SEL and DESC to `void'.
+
+2004-09-11  Ulrich Drepper  <drepper@redhat.com>
+
+	* nscd/connections.c (nscd_run): Call setup_thread only for enabled
+	databases.
+
+	* sysdeps/unix/bsd/bsd4.4/bits/socket.h: Use __NTH for __cmsg_nxthdr.
+
+2004-09-10  Ulrich Drepper  <drepper@redhat.com>
+
+	* nscd/nscd.c (pagesize_m1): New variable.
+	(main): Initialize it.
+	* nscd/nscd.h: Declare pagesize_m1.
+	* nscd/hstcache.c: Pass correctly aligned address to msync.
+	* nscd/grpcache.c: Likewise.
+	* nscd/pwdcache.c: Likewise.
+
+2004-09-10  Kaz Kojima  <kkojima@rr.iij4u.or.jp>
+
+	* sysdeps/unix/sysv/linux/sh/pthread_cond_wait.S: Decrement
+	__nwaiters.  If pthread_cond_destroy has been called and this is
+	the last waiter, signal pthread_cond_destroy caller and	avoid
+	using the pthread_cond_t structure after unlock.
+	* sysdeps/unix/sysv/linux/sh/pthread_cond_timedwait.S: Likewise.
+
+2004-09-10  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/unix/sysv/linux/kernel-features.h: Don't define
+	__ASSUME_CLONE_STOPPED.
+
+2004-09-10  Jakub Jelinek  <jakub@redhat.com>
+
+	* misc/sys/cdefs.h (__REDIRECT_NTH): Change order of __THROW and
+	__asm__ for C++.  [BZ #377]
+
+2004-09-10  Ulrich Drepper  <drepper@redhat.com>
+
+	* nscd/nscd_stat.c: Improve output by also printing .shared and
+	.persistent.
+
+	* nscd/connections.c: Allow cache sharing to be really disabled.
+
+2004-09-10  Jakub Jelinek  <jakub@redhat.com>
+
+	* malloc/malloc.c (_int_free): Only do arena boundary check for
+	contiguous arenas.
+
+2004-09-10  Kazuhiro Inaoka  <inaoka.kazuhiro@renesas.com>
+
+	* stdlib/longlong.h [__M32R__] (add_ssaaaa, sub_ddmmss): Fix broken
+	instruct operands.
+	* elf/elf.h: Add R_M32R_* relocs.
+
+2004-09-09  Ulrich Drepper  <drepper@redhat.com>
+
+	* misc/sys/cdefs.h: Despite what the gcc manual says, gcc 3.2
+	seems not to support the nothrow attribute.  Use it only for gcc
+	3.3 and higher.
+
+	* malloc/hooks.c (top_check): Print top chunk corruption as normal
+	error message.
+
+	* malloc/malloc.c (malloc_printerr): Don't make informational
+	message look like error message.
+
+2004-09-09  Andreas Jaeger  <aj@suse.de>
+
+	* nscd/Makefile (CFLAGS-nscd_setup_thread.c): Set to -fpie.
+
+2004-09-08  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/unix/sysv/linux/bits/socket.h (__SCM_CONNECT): Removed.
+
+	* malloc/malloc.c (_int_free): Add inexpensive double free and
+	memory corruption tests.
+	(malloc_printf_nc): Renamed to malloc_printerr.  Second parameter
+	is no format string anymore.  Don't use stdio.  Adjust all callers.
+	* malloc/hooks.c: Adjust malloc_printf_nc callers.
+
+2004-09-08  Roland McGrath  <roland@redhat.com>
+
+	* malloc/mcheck.c: Don't use __P.
+	Use prototypes definitions for static functions.
+
+2004-09-08  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/ia64/fpu/bits/mathinline.h: Use __NTH instead of __THROW.
+
+2004-09-08  Ulrich Drepper  <drepper@redhat.com>
+	    Jakub Jelinek  <jakub@redhat.com>
+
+	* nscd/nscd-client.h: Add a few #includes.
+
+	* nscd/Makefile (nscd-modules): Add nscd_setup_thread.
+	* nscd/connections.c (nscd_run): Call setup_thread for maintenance
+	threads.
+	* nscd/nscd-client.h (struct database_pers_head): Add
+	nscd_certainly_running field.
+	* nscd/nscd.h: Declare setup_thread.
+	* nscd/nscd_helper.c (__nscd_get_map_ref): Avoid the time test if
+	nscd_certainly_running is nonzero.
+	* sysdeps/generic/nscd_setup_thread.c: New file.
+	* sysdeps/unix/sysv/linux/nscd_setup_thread.c: New file.
+
+2004-09-08  Ulrich Drepper  <drepper@redhat.com>
+
+	* nscd/connections.c: Implement r/o sharing of nscd's cache with client
+	processes via shared memory.
+	* nscd/nscd-client.h: Likewise.
+	* nscd/nscd.h: Likewise.
+	* nscd/nscd_conf.c: Likewise.
+	* nscd/nscd_getgr_r.c: Likewise.
+	* nscd/nscd_getpw_r.c: Likewise.
+	* nscd/nscd_gethst_r.c: Likewise.
+	* nscd/nscd.conf: Add new config parameters.
+	* nscd/Makefile (aux): Add nscd_helper.
+	* nscd/nscd_helper.c: New file.
+	* nscd/mem.c (gc): Indicate beginning and end of the gc cycle.
+
+	* nscd/hstcache.c: Simplify a lot.  We cache only the request itself,
+	no derived information.
+	* connections.c (nscd_init): Fix bug in testing size of the persistent.
+
+	* nis/Makefile (aux): Add nis_hash.
+	* nis/nis_hash.c: New file.  Split out from nis_util.c.
+	* nis/nis_util.c: Move __nis_hash code in separate file.
+
+	* csu/tst-atomic.c: Improve atomic_increment_val test which would
+	not have found a ppc bug.
+
+	* sysdeps/s390/fpu/bits/mathinline.h: Remove unnecessary includes.
+
+	* malloc/arena.c: Remove __MALLOC_P uses.
+	* malloc/malloc.c: Likewise.
+
+	* malloc/mtrace.c: Remove __P uses.
+	* malloc/mcheck-init.c: Likewise.
+
+2004-09-07  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/powerpc/powerpc64/configure.in: New file.
+	* config.h.in (USE_PPC64_OVERLAPPING_OPD): Add.
+	* configure.in (HAVE_ASM_GLOBAL_DOT_NAME): Remove.
+	* sysdeps/powerpc/powerpc64/sysdep.h: Formatting.
+	(OPD_ENT, BODY_LABEL, ENTRY_1, ENTRY_2, END_2, DOT_PREFIX,
+	BODY_PREFIX): Define.
+	(ENTRY, DOT_LABEL, END, TRACEBACK, END_GEN_TB, EALIGN): Support
+	HAVE_ASM_GLOBAL_DOT_NAME or no dot symbols,
+	USE_PPC64_OVERLAPPING_OPD or never overlapping .opd entries.
+	* sysdeps/powerpc/powerpc64/dl-machine.h: Include sysdep.h.
+	(TRAMPOLINE_TEMPLATE, RTLD_START): Use the new sysdep.h macros.
+
+2004-09-07  Ulrich Drepper  <drepper@redhat.com>
+
+	* malloc/malloc.h: Don't define __THROW if it is already defined.
+
+	* sysdeps/powerpc/bits/atomic.h (atomic_increment): Define.
+	(atomic_decrement): Define.
+
+	* sysdeps/powerpc/bits/atomic.h: Implement atomic_increment_val and
+	atomic_decrement_val.
+	* sysdeps/powerpc/powerpc32/bits/atomic.h: Likewise.
+	* sysdeps/powerpc/powerpc64/bits/atomic.h: Likewise.
+
+	* csu/tst-atomic.c (do_test): Add tests of atomic_increment_val
+	and atomic_decrement_val.
+
+	* include/atomic.h: Define atomic_increment_val, atomic_decrement_val,
+	and atomic_delay is not already defined.
+	* sysdeps/i386/i486/bits/atomic.h: Define atomic_delay.
+	* sysdeps/x86_64/bits/atomic.h: Likewise.
+
+	* miscd/sys/cdefs.h (__NTH): New macro.
+	(__THROW): Define using nothrow attribute for C code and gcc >= 3.2.
+	(__REDIRECT_NTH): New macro.
+	* argp/argp.h: Use __NTH and __REDIRECT_NTH where necessary.
+	* ctype/ctype.h: Likewise.
+	* dirent/dirent.h: Likewise.
+	* io/fcntl.h: Likewise.
+	* io/sys/sendfile.h: Likewise.
+	* io/sys/stat.h: Likewise.
+	* io/sys/statfs.h: Likewise.
+	* io/sys/statvfs.h: Likewise.
+	* libio/bits/stdio.h: Likewise.
+	* misc/sys/mman.h: Likewise.
+	* posix/unistd.h: Likewise.
+	* resource/sys/resource.h: Likewise.
+	* rt/aio.h: Likewise.
+	* signal/signal.h: Likewise.
+	* stdlib/stdlib.h: Likewise.
+	* string/argz.h: Likewise.
+	* string/string.h: Likewise.
+	* sysdeps/generic/inttypes.h: Likewise.
+	* sysdeps/i386/fpu/bits/mathinline.h: Likewise.
+	* sysdeps/powerpc/fpu/bits/mathinline.h: Likewise.
+	* sysdeps/s390/fpu/bits/mathinline.h: Likewise.
+	* sysdeps/x86_64/fpu/bits/mathinline.h: Likewise.
+	* sysdeps/unix/sysv/linux/bits/socket.h: Likewise.
+	* sysdeps/unix/sysv/linux/bits/sys/sysmacros.h: Likewise.
+	* wcsmbs/wchar.h: Likewise.
+
+	* sysdeps/generic/glob.c: Use __PMT isntead of __P where appropriate.
+
+	* resolv/gethnamaddr.c (getanswer): Remove __P use in variable
+	definition.
+
+	* io/sys/poll.h: Remove __THROW from poll prototype, it's a
+	cancellation point.
+
+	* io/fts.c (fts_open): Remove uses of __P.
+
+	* include/stdlib.h: No need to use __THROW in this header.
+
+2004-09-06  Roland McGrath  <roland@frob.com>
+
+	* sysdeps/mach/hurd/dl-sysdep.c (__writev): Does use assert on FD
+	validity, since __assert_fail gets to here anyway.  Just fail.
+
+2004-09-06  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/posix/getaddrinfo.c (gaih_inet): Fix problem with
+	AF_UNSPEC lookup with AI_CANONNAME of name which has only IPv6
+	addresses.
+
+2004-09-05  Richard Henderson  <rth@redhat.com>
+
+	* sysdeps/alpha/fpu/fraiseexcpt.c: Remove file.
+	* sysdeps/unix/sysv/linux/kernel-features.h
+	(__ASSUME_IEEE_RAISE_EXCEPTION): New.
+	* sysdeps/unix/sysv/linux/alpha/fraiseexcpt.c: New file.
+	* sysdeps/unix/sysv/linux/alpha/kernel_sysinfo.h: New file.
+	* sysdeps/unix/sysv/linux/alpha/ieee_get_fp_control.S: Use it.
+	* sysdeps/unix/sysv/linux/alpha/ieee_set_fp_control.S: Likewise.
+
+2004-09-05  Richard Henderson  <rth@redhat.com>
+
+	* sysdeps/alpha/div.S: Save and restore FPCR around fp operations.
+	* sysdeps/alpha/divl.S, sysdeps/alpha/divq.S, sysdeps/alpha/divqu.S,
+	sysdeps/alpha/ldiv.S, sysdeps/alpha/reml.S, sysdeps/alpha/remq.S,
+	sysdeps/alpha/remqu.S: Likewise.
+	* sysdeps/alpha/div_libc.h (FRAME): Increase to 64.
+
+2004-09-05  Ulrich Drepper  <drepper@redhat.com>
+
+	* nscd/cache.c (cache_add): Correctly log GETHOSTBYADDR and
+	GETHOSTBYADDRv6 requests.
+
+2004-09-04  Ulrich Drepper  <drepper@redhat.com>
+
+	* nscd/pwdcache.c (cache_addpw): Use correct key length in
+	cache_add calls.
+	* nscd/grpcache.c (cache_addgr): Likewise.
+
+2004-09-03  Alfred M. Szmidt  <ams@kemisten.nu>
+
+	* sysdeps/mach/hurd/i386/tls.h (THREAD_DTV): Changed type of _DTV
+	to `dtv_t *'.
+
+2004-09-03  Ulrich Drepper  <drepper@redhat.com>
+
+	* nscd/nscd.c (parse_opt): Use writev instead of two write for
+	invalidate command.
+
+2004-09-02  Ulrich Drepper  <drepper@redhat.com>
+
+	* nscd/connections.c (nscd_run): Check early for invalid request types.
+
+2004-09-02  Roland McGrath  <roland@frob.com>
+
+	* sysdeps/mach/hurd/i386/tls.h (TLS_INIT_TP_EXPENSIVE): New macro.
+	(INSTALL_NEW_DTV, THREAD_DTV): Rewritten to fetch the right word.
+	(THREAD_SELF): New macro.
+
+2004-09-02  Steven Munroe  <sjmunroe@us.ibm.com>
+
+	[BZ #357]
+	* stdlib/tst-setcontext.c (test_stack): Added test for stack clobber.
+	(main): Call test_stack.
+	* sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext.S
+	(__getcontext): Push stack frame then save parms in local frame.
+	Improve instruction scheduling.
+	* sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext.S
+	(__swapcontext): Likewise.
+
+2004-09-01  Andreas Schwab  <schwab@suse.de>
+
+	* sysdeps/unix/sysv/linux/ia64/sys/ucontext.h [g++ >= 3.5]: Use
+	__builtin_offsetof.
+
+2004-09-01  Jakub Jelinek  <jakub@redhat.com>
+
+	[BZ #361]
+	* posix/fnmatch_loop.c (FCT): For backslash between brackets, branch
+	to normal_bracket after fetching the next character.
+	* posix/tst-fnmatch.input: Add 25 new tests.
+	Reported by Markus Oberhumer <markus@oberhumer.com>.
+
+2004-09-01  Ulrich Drepper  <drepper@redhat.com>
+
+	* elf/rtld.c (dl_main): First check existence of ld.so.preload
+	with access.
+
+2004-09-01  Roland McGrath  <roland@redhat.com>
+
+	* sysdeps/unix/sysv/linux/bits/waitflags.h
+	(WSTOPPED, WEXITED, WCONTINUED, WNOWAIT): New macros.
+	* sysdeps/unix/sysv/linux/kernel-features.h (__ASSUME_WAITID_SYSCALL):
+	New macro.
+	* sysdeps/unix/sysv/linux/waitid.c: New file.  Use new syscall when
+	available, or fall back to the waitpid-based generic code.
+
+2004-08-14  Alfred M. Szmidt  <ams@kemisten.nu>
+
+	* sysdeps/mach/hurd/i386/init-first.c (_hurd_stack_setup): Let gcc
+	clobber the `ebp' register.
+	* sysdeps/mach/hurd/i386/Makefile (CFLAGS-init-first.c): Removed.
+	Reverts change from 2004-05-07 by Jeroen Dekkers.
+
+	* sysdeps/mach/hurd/i386/init-first.c (init): Changed the type of
+	NEWSP from `void *' to `int *'.  Changed all casts accordingly.
+
+2004-08-31  Jakub Jelinek  <jakub@redhat.com>
+
+	* wcsmbs/wcsmbsload.c (__wcsmbs_getfct): Move attribute_hidden
+	before return type.
+	* locale/localename.c (__current_locale_name): Likewise.
+
+2004-08-30  Roland McGrath  <roland@frob.com>
+
+	* scripts/extract-abilist.awk: If `lastversion' variable defined, omit
+	later sets from output.
+	* Makerules (check-abi): Pass option to set that with value of
+	LIB-abi-frozen variable if one is set.
+
+	* abilist/libcidn.abilist: New file (empty).
+
+2004-08-30  Jakub Jelinek  <jakub@redhat.com>
+
+	* posix/bits/posix1_lim.h (_POSIX_CHILD_MAX, _POSIX_OPEN_MAX): If
+	not __USE_XOPEN2K, use the Unix98 mandated values.
+
+2004-08-27  Roland McGrath  <roland@redhat.com>
+
+	* configure.in (usetls): Default to yes.
+	* configure: Regenerated.
+
+2004-08-26  Roland McGrath  <roland@redhat.com>
+
+	* configure.in (add_ons_automatic): New variable, set to yes or no
+	indicating --enable-add-ons with no explicit list.
+	(running add-on fragments): Allow a fragment to modify $libc_add_on
+	and have that affect its place in the list of add-ons to use.
+	* configure: Regenerated.
+
+2004-08-26  Ulrich Drepper  <drepper@redhat.com>
+
+	* nscd/cache.c: Major rewrite.  The data is now optionally kept in
+	a mmaped memory region which is automatically mirrored on disk.
+	This implements persistent data storage.  The Memory handled
+	needed to be completely revamped, it now uses a garbage collection
+	mechanism instead of malloc.
+	* nscd/connections.c: Likewise.
+	* nscd/nscd.c: Likewise.
+	* nscd/nscd.h: Likewise.
+	* nscd/nscd_conf.c: Likewise.
+	* nscd/nscd_stat.c: Likewise.
+	* nscd/grpcache.c: Likewise.
+	* nscd/hstcache.c:: Likewise.
+	* nscd/pwdcache.c:: Likewise.
+	* nscd/Makefile: Add rules to build mem.c.
+	* nscd/mem.c: New file.
+	* nscd/nscd.conf: Describe new configuration options.
+
+2004-08-26  Kaz Kojima  <kkojima@rr.iij4u.or.jp>
+
+	* sysdeps/unix/sysv/linux/mips/pread.c: Include sgidefs.h only if
+	NO_SGIDEFS_H isn't defined.
+	* sysdeps/unix/sysv/linux/mips/pwrite.c: Likewise.
+	* sysdeps/unix/sysv/linux/mips/pread64.c: Likewise.
+
+	* sysdeps/unix/sysv/linux/sh/pread.c: Define NO_SGIDEFS_H and
+	_MIPS_SIM.
+	* sysdeps/unix/sysv/linux/sh/pwrite.c: Likewise.
+	* sysdeps/unix/sysv/linux/sh/pread64.c: Likewise.
+	* sysdeps/unix/sysv/linux/sh/pwrite64.c: Likewise.
+
+2004-08-26  Ulrich Drepper  <drepper@redhat.com>
+
+	* nscd/connections.c (nscd_run): atomic_increment was not missing.
+
+	* sysdeps/gnu/Makefile (libdl-sysdep_routines): Don't add eval.
+	Patch by Greg Schafer.
+
+2004-08-25  Richard Henderson  <rth@redhat.com>
+
+	* sysdeps/alpha/elf/start.S (_start): Use $15 as frame unwind
+	instead of $31.  Zero $15.
+	* sysdeps/unix/sysv/linux/alpha/clone.S (thread_start): Likewise.
+
+2004-08-25  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/powerpc/powerpc64/bits/atomic.h
+	(__arch_compare_and_exchange_bool_32_acq): Fix case where oldval
+	is negative.
+	(__arch_compare_and_exchange_bool_32_rel): Likewise.
+
+	* nscd/connections.c: Make socket nonblocking so that threads
+	don't get stuck on accept.  Fix locking.
+
+	* nscd/grpcache.c (cache_addgr): Use copy of original key in hash
+	entry with alternative key.
+	* nscd/pwdcache.c (cache_addpw): Likewise.
+
+2004-08-25  Richard Sandiford  <rsandifo@redhat.com>
+
+	* sysdeps/mips/dl-machine.h (_dl_start_user): Don't set
+	__libc_stack_end.
+
+2004-08-23  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/gnu/netinet/udp.h: Cosmetic changes.  Remove unnecessary
+	__BEGIN_DECLS/__END_DECLS.
+
+2004-08-23  Andreas Jaeger  <aj@suse.de>
+
+	[BZ #341]
+	* sysdeps/unix/sysv/linux/alpha/bits/fcntl.h (O_NOATIME): Define.
+	* sysdeps/unix/sysv/linux/arm/bits/fcntl.h (O_NOATIME): Likewise.
+	* sysdeps/unix/sysv/linux/cris/bits/fcntl.h (O_NOATIME): Likewise.
+	* sysdeps/unix/sysv/linux/hppa/bits/fcntl.h (O_NOATIME): Likewise.
+	* sysdeps/unix/sysv/linux/i386/bits/fcntl.h (O_NOATIME): Likewise.
+	* sysdeps/unix/sysv/linux/ia64/bits/fcntl.h (O_NOATIME): Likewise.
+	* sysdeps/unix/sysv/linux/m68k/bits/fcntl.h (O_NOATIME): Likewise.
+	* sysdeps/unix/sysv/linux/mips/bits/fcntl.h (O_NOATIME): Likewise.
+	* sysdeps/unix/sysv/linux/powerpc/bits/fcntl.h (O_NOATIME): Likewise.
+	* sysdeps/unix/sysv/linux/s390/bits/fcntl.h (O_NOATIME): Likewise.
+	* sysdeps/unix/sysv/linux/sh/bits/fcntl.h (O_NOATIME): Likewise.
+	* sysdeps/unix/sysv/linux/sparc/bits/fcntl.h (O_NOATIME): Likewise.
+	* sysdeps/unix/sysv/linux/x86_64/bits/fcntl.h (O_NOATIME): Likewise.
+
+2004-08-21  Ulrich Drepper  <drepper@redhat.com>
+
+	* malloc/hooks.c (DEFAULT_CHECK_ACTION): Moved to malloc.c.
+	(check_action): Likewise.
+	When printing error messages, use malloc_printf_nc now instead of
+	fiddling with the streams cancellation flag in every place.
+	* malloc/malloc.c (DEFAULT_CHECK_ACTION): New definition.  Change
+	default to 3.
+	(check_action): New variable.
+	(unlink): Print error message and eventually terminate in case list
+	is corrupted.
+	(malloc_printf_nc): New function.  Use it in _int_free.
+	Change proposed by Arjan van de Ven.
+
+	* dlfcn/Makefile: Don't build eval.c anymore.
+
+2004-08-20  Roland McGrath  <roland@frob.com>
+
+	* csu/Makefile ($(objpfx)version-info.h): Use printf in place
+	of echo -e for POSIX.2 portability.
+	Reported by Paul Jarc <prj@po.cwru.edu>.
+
+2004-08-19  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/posix/getaddrinfo.c (gaih_inet): Minor optimizations in
+	list generation.
+
+	* sysdeps/posix/getaddrinfo.c (gaih_inet): Don't use
+	getcanonname_r function if AI_CANONNAME flag is not set in
+	request.
+
+	* nis/nss_compat/compat-initgroups.c (getgrent_next_nss):
+	Initialize mysize with limits only if latter is >= 0.  Use mysize
+	in malloc call.
+
+2004-08-19  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/posix/getaddrinfo.c (gaih_inet): Cast canon to (char *)
+	to avoid warning.
+
+	* resolv/nss_dns/dns-canon.c (_nss_dns_getcanonname_r): Initialize
+	status to NSS_STATUS_UNAVAIL.
+
+2004-08-19  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/posix/getaddrinfo.c (gaih_inet): Use h->h_name in the
+	cannoname lookup since it has the FQDN even if the original NAME
+	value has not.
+
+2004-08-18  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/posix/getaddrinfo.c (gaih_inet): Store NAME parameter
+	pointer in new variable ORIG_NAME and use this pointer when
+	determination of canonical name failed, not the possibly IDN
+	translated value of NAME.
+
+2004-08-17  Ulrich Drepper  <drepper@redhat.com>
+
+	* resolv/resolv.h (RES_DEFAULT): Add RES_NOIP6DOTINT.
+	* resolv/res_init.c (res_setoptions): Recognize ip6-dotint option.
+	Reset RES_NOIP6DOTINT flag in this case..
+
+	* sysdeps/posix/getaddrinfo.c: Fix memory handling of
+	ai_canonname.
+
+2004-08-16  Ulrich Drepper  <drepper@redhat.com>
+
+	* resolv/nss_dns/dns-canon.c (_nss_dns_getcanonname_r): Don't use
+	CNAME records, we better follow the chain of CNAME records which
+	can be accomplished with A/AAAA lookups.
+
+2004-08-15  Roland McGrath  <roland@redhat.com>
+
+	* sysdeps/unix/sysv/linux/bits/resource.h (enum __rusage_who):
+	Remove __RUSAGE_BOTH constant and RUSAGE_BOTH macro.
+	* sysdeps/unix/sysv/linux/alpha/bits/resource.h: Likewise.
+	* sysdeps/unix/sysv/linux/mips/bits/resource.h: Likewise.
+	* sysdeps/unix/sysv/linux/sparc/bits/resource.h: Likewise.
+
+2004-08-15  Roland McGrath  <roland@frob.com>
+
+	* sysdeps/i386/i686/Makefile (elide-routines.os): Append hp-timing to
+	this, not ...
+	(static-only-routines): ... this.
+	* sysdeps/ia64/Makefile: Likewise.
+	* sysdeps/sparc/sparc32/sparcv9/Makefile: Likewise.
+	* sysdeps/sparc/sparc64/Makefile: Likewise.
+	* sysdeps/x86_64/Makefile: Likewise.
+	* sysdeps/i386/i686/hp-timing.c: Revert copyright terms change.
+	* sysdeps/ia64/hp-timing.c: Likewise.
+	* sysdeps/sparc/sparc32/sparcv9/hp-timing.c: Likewise.
+	* sysdeps/sparc/sparc64/hp-timing.c: Likewise.
+
+	* csu/elf-init.c: Update copyright terms including special exception
+	for these trivial files, which are statically linked into executables
+	that use dynamic linking for the significant library code.
+	* io/fstat.c: Likewise.
+	* io/fstat64.c: Likewise.
+	* io/lstat.c: Likewise.
+	* io/lstat64.c: Likewise.
+	* io/stat.c: Likewise.
+	* io/stat64.c: Likewise.
+	* stdlib/atexit.c: Likewise.
+	* sysdeps/alpha/elf/initfini.c: Likewise.
+	* sysdeps/alpha/elf/start.S: Likewise.
+	* sysdeps/arm/elf/start.S: Likewise.
+	* sysdeps/cris/elf/start.S: Likewise.
+	* sysdeps/generic/initfini.c: Likewise.
+	* sysdeps/generic/mknod.c: Likewise.
+	* sysdeps/hppa/elf/initfini.c: Likewise.
+	* sysdeps/hppa/elf/start.S: Likewise.
+	* sysdeps/i386/elf/start.S: Likewise.
+	* sysdeps/i386/i686/hp-timing.c: Likewise.
+	* sysdeps/ia64/elf/initfini.c: Likewise.
+	* sysdeps/ia64/elf/start.S: Likewise.
+	* sysdeps/ia64/hp-timing.c: Likewise.
+	* sysdeps/m68k/elf/start.S: Likewise.
+	* sysdeps/mach/start.c: Likewise.
+	* sysdeps/mips/elf/start.S: Likewise.
+	* sysdeps/powerpc/powerpc32/elf/start.S: Likewise.
+	* sysdeps/powerpc/powerpc64/elf/start.S: Likewise.
+	* sysdeps/s390/s390-32/elf/start.S: Likewise.
+	* sysdeps/s390/s390-32/initfini.c: Likewise.
+	* sysdeps/s390/s390-64/elf/start.S: Likewise.
+	* sysdeps/s390/s390-64/initfini.c: Likewise.
+	* sysdeps/sh/elf/initfini.c: Likewise.
+	* sysdeps/sh/elf/start.S: Likewise.
+	* sysdeps/sparc/sparc32/elf/start.S: Likewise.
+	* sysdeps/sparc/sparc32/sparcv9/hp-timing.c: Likewise.
+	* sysdeps/sparc/sparc64/elf/start.S: Likewise.
+	* sysdeps/sparc/sparc64/hp-timing.c: Likewise.
+	* sysdeps/standalone/i386/start.S: Likewise.
+	* sysdeps/standalone/i960/start.S: Likewise.
+	* sysdeps/standalone/m68k/m68020/start.S: Likewise.
+	* sysdeps/unix/arm/start.c: Likewise.
+	* sysdeps/unix/bsd/osf/alpha/start.S: Likewise.
+	* sysdeps/unix/bsd/ultrix4/mips/start.S: Likewise.
+	* sysdeps/unix/sparc/start.c: Likewise.
+	* sysdeps/unix/start.c: Likewise.
+	* sysdeps/unix/sysv/aix/start.s: Likewise.
+	* sysdeps/unix/sysv/irix4/start.c: Likewise.
+	* sysdeps/x86_64/elf/initfini.c: Likewise.
+	* sysdeps/x86_64/elf/start.S: Likewise.
+
+2004-08-15  Roland McGrath  <roland@redhat.com>
+
+	[BZ #227]
+	* sysdeps/unix/sysv/linux/kernel-features.h
+	(__ASSUME_BRK_PAGE_ROUNDED): New macro.
+	* sysdeps/unix/sysv/linux/dl-sysdep.c (frob_brk)
+	[! __ASSUME_BRK_PAGE_ROUNDED]: Adjust the break up if it falls within
+	the partial page after the dynamic linker's own data segment.
+
+2004-08-15  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/posix/getaddrinfo.c (gaih_inet): Optimize generation of
+	v4-mapped addresses a bit.
+	(gethosts): Move alloca out of macro, so that it is done only once.
+
+	* sysdeps/posix/getaddrinfo.c (gaih_addrtuple): Change type of
+	addr to avoid casts.
+	(gethosts): Removed.
+	(gethosts2): Renamed to gethosts.  Make it usable for family !=
+	AF_UNSPEC.  Fix AI_V4MAPPED.
+	(gaih_inet): Remove use of old gethosts.  Always use what used to be
+	gethosts2.  If entry is found, try to use the same NSS module's
+	getcanonname_r function.  Use gethostbyaddr for AI_CANONNAME only
+	if getcanonname_r was not available.  Fix filtering of AI_V4MAPPED
+	addresses.  Numerous cleanups.
+	* resolv/nss_dns/dns-canon.c: New file.
+	* resolv/Makefile (libnss_dns-routines): Add dns-canon.
+	* resolv/Versions (libnss_dns): Add _nss_dns_getcanonname_r.
+
+	* elf/Makefile: Add rules to build and run tst-dlopenrpath.
+	* elf/tst-dlopenrpath.c: New file.
+	* elf/tst-dlopenrpathmod.c: New file.
+
+	* intl/tst-gettext.sh: Adjust for change of de.po file to UTF-8.
+	* intl/tst-gettext.c: Likewise.
+
+	* nss/getent.c (ahosts_keys_int): Correctly print IPv6 addresses.
+
+	* nss/getent.c: Allow queries for getaddrinfo with AF_INET and
+	AF_INET6.
+
+2004-08-14  Ulrich Drepper  <drepper@redhat.com>
+
+	* po/de.po: Update from translation team.
+
+2004-08-14  Roland McGrath  <roland@frob.com>
+
+	* sysdeps/mach/configure: Regenerated.
+	* sysdeps/mach/hurd/configure: Regenerated.
+
+2004-08-14  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/posix/getaddrinfo.c (getaddrinfo): If RFC3484 sorting is
+	performed, make sure it is still the first entry after sorting
+	that has the ai_canonname information.
+
+	* sysdeps/posix/getaddrinfo.c (gaih_inet): Really set ai_canonname
+	only in one entry.
+
+2004-08-13  Daniel Jacobowitz  <dan@debian.org>
+
+	* scripts/output-format.sed: Handle default case of three-argument
+	OUTPUT_FORMAT.
+
+	* sysdeps/arm/machine-gmon.h (mcount_internal): Mark as
+	__attribute_used__.
+
+2004-08-13  Ulrich Drepper  <drepper@redhat.com>
+
+	* nss/getent.c (ahosts_keys): ai_canonname is NULL for all but the
+	first returned entry.  Print name only if not NULL.
+
+	* nis/nss_nis/nis-netgrp.c: Remove locking by using data in struct
+	__netgrent object passed in instead of global variables.
+	Optimize.
+	* nis/nss_nisplus/nisplus-netgrp.c: Remove locking by using data
+	in struct __netgrent object passed in instead of global variables.
+	* inet/netgroup.h (struct __netgrent): Add service_user field.
+	Move cursor in anonymous union, add new field location to that
+	union.
+	* inet/getnetgrent_r.c: Extensive rewrite to really enable
+	concurrent uset of set/get/endnetgrent and innetgr.
+	Reported by Chuck Simmons.
+
+	* inet/netgroup.h (struct name_list): Replace name pointer with
+	zero-sized array.
+	* inet/getnetgrent_r.c: Adjust code for change in name_list
+	layout.  Numerous strdup and free calls removed.
+
+	* elf/sprof.c (read_symbols): When comparing aliases, prefer
+	non-hidden over hidden symbols and strong over weak symbols
+	if both don't start with '_'.
+
+	* malloc/malloc.c: Use strong_alias instead of weak_alias wherever
+	possible.
+
+2004-08-12  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/unix/sysv/linux/bits/resource.h: Define non-standard
+	RUSAGE_ enums as __RUSAGE_ and adjust macros accordingly.
+	* sysdeps/unix/sysv/linux/alpha/bits/resource.h: Likewise.
+	* sysdeps/unix/sysv/linux/sparc/bits/resource.h: Likewise.
+	* sysdeps/unix/sysv/linux/mips/bits/resource.h: Likewise.
+	Define non-standard RLIMIT_ enums as __RLIMIT_ and adjust macros
+	accordingly.
+
+2004-08-11  Andreas Schwab  <schwab@suse.de>
+
+	* resolv/res_libc.c: Move definition of __res_initstamp ...
+	* resolv/res_init.c: ... here.
+
+2004-08-10  GOTO Masanori  <gotom@debian.or.jp>
+
+	* locale/C-time.c: Change default ERA value from NULL to "".
+	* locale/tst-C-locale.c: Add test case for ERA keywords.
+
+2004-08-12  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/unix/sysv/linux/bits/resource.h: Define non-standard
+	RLIMIT__ enums as __RLIMIT_ and adjust macros accordingly.
+	* sysdeps/unix/sysv/linux/alpha/bits/resource.h: Likewise.
+	* sysdeps/unix/sysv/linux/sparc/bits/resource.h: Likewise.
+
+2004-08-12  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/unix/sysv/linux/bits/resource.h (RLIMIT_SIGPENDING,
+	RLIMIT_MSGQUEUE): Add.
+	(RLIMIT_NLIMITS, RLIM_NLIMITS): Adjust.
+	* sysdeps/unix/sysv/linux/alpha/bits/resource.h (RLIMIT_SIGPENDING,
+	RLIMIT_MSGQUEUE, RLIMIT_NLIMITS): Add.
+	(RLIM_NLIMITS): Adjust.
+	* sysdeps/unix/sysv/linux/sparc/bits/resource.h (RLIMIT_SIGPENDING,
+	RLIMIT_MSGQUEUE, RLIMIT_NLIMITS): Add.
+	(RLIM_NLIMITS): Adjust.
+	* sysdeps/unix/sysv/linux/mips/bits/resource.h (RLIMIT_SIGPENDING,
+	RLIMIT_MSGQUEUE, RLIMIT_NLIMITS): Add.
+	(RLIM_NLIMITS): Adjust.
+
+2004-08-12  Jakub Jelinek  <jakub@redhat.com>
+
+	* resolv/res_query.c (__libc_res_nsearch): Protect the debugging
+	printf with #ifdef DEBUG and RES_DEBUG check.
+
+	* sysdeps/unix/sysv/linux/bits/shm.h: Move __END_DECLS after
+	__USE_MISC #endif.
+	* sysdeps/generic/bits/shm.h: Add __BEGIN_DECLS for __getpagesize
+	declaration.
+	* sysdeps/gnu/bits/shm.h: Likewise.
+	* sysdeps/unix/sysv/linux/alpha/bits/shm.h: Likewise.
+	* sysdeps/unix/sysv/linux/powerpc/bits/shm.h: Likewise.
+	* sysdeps/unix/sysv/linux/s390/bits/shm.h: Likewise.
+	* sysdeps/unix/sysv/linux/sparc/bits/shm.h: Likewise.
+	* sysdeps/unix/sysv/linux/x86_64/bits/shm.h: Likewise.
+
+2004-08-12  Ulrich Drepper  <drepper@redhat.com>
+
+	* po/ca.po: Update from translation team.
+
+2004-08-11  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/unix/sysv/linux/bits/shm.h: Add __BEGIN_DECLS for
+	__getpagesize declaration.
+
+2004-08-11  Roland McGrath  <roland@redhat.com>
+
+	* configure.in (libc_cv_cpp_asm_debuginfo): Add missing braces around
+	commands inside &&.
+	Reported by Andreas Schwab <schwab@suse.de>.
+	* configure: Regenerated.
+
+	* posix/tst-waitid.c (do_test): Ignore SIGCHLD before cleanup SIGKILL.
+
+2004-08-11  Ulrich Drepper  <drepper@redhat.com>
+
+	* time/tzset.c (tzset_internal): Add new parameter which is
+	nonzero if called through tzset.  Use TZDEFAULT name including
+	name comparison if the new parameter is zero.  This means implicit
+	tzset calls will not cause files to be opened and read by tzfile.c
+	all the time.
+
+2004-08-11  Jakub Jelinek  <jakub@redhat.com>
+
+	* rt/tst-timer2.c (do_test): If timer_create fails, just continue.
+	* rt/tst-timer4.c (do_test): If one of the timer_create calls fails,
+	return 1 immediately.
+
+	* time/tzfile.c (__tzfile_read): Free transitions only if it will
+	not be reused.
+
+	* sysdeps/ieee754/dbl-64/mpa.c: Include <sys/param.h>.
+	* sysdeps/ieee754/dbl-64/mpa.h (MAX, MIN): Macros removed.
+
+	* stdio-common/tst-popen.c: Include <string.h>.
+
+	* resolv/res_send.c (__libc_res_nsend): Only define TMPBUF #if DEBUG.
+
+	* sysdeps/pthread/aio_misc.c (handle_fildes_io): Remove noreturn
+	attribute.  Return NULL instead of calling pthread_exit at the end.
+
+2004-08-11  Roland McGrath  <roland@redhat.com>
+
+	* iconvdata/testdata/ISO-2022-JP-3: Regenerated.
+
+2004-08-10  Alfred M. Szmidt  <ams@kemisten.nu>
+
+	* sysdeps/generic/bits/in.h (struct ip_mreq): Remove definition.
+
+2004-08-10  Jakub Jelinek  <jakub@redhat.com>
+
+	* libio/bits/stdio.h (fread_unlocked): Cast 0 to (size_t).
+	(fwrite_unlocked): When checking if size * n is <= 8, cast each
+	argument to size_t individually.  Cast n to (void) instead of
+	(size_t), surround with (), return (size_t) 0 if one of n or size
+	is 0.  [BZ #316]
+	* stdio-common/Makefile (tests): Add tst-unlockedio.
+	* stdio-common/tst-unlockedio.c: New test.
+
+2004-08-09  Roland McGrath  <roland@frob.com>
+
+	* manual/install.texi (Supported Configurations): Replace bug-glibc
+	mention with web URL.
+	* INSTALL: Regenerated.
+	* locale/iso-3166.def: Likewise, in comment.
+	* locale/iso-4217.def: Likewise.
+	* locale/iso-639.def: Likewise.
+	* posix/cpio.h: Remove bug reporting comment.
+
+2004-08-09  Ulrich Drepper  <drepper@redhat.com>
+
+	* libio/bits/stdio.h (fread_unlocked): Add a couple of (size_t)
+	casts to handle funny calls with floating point argument values
+	and signed values correctly and without warning.
+	(fwrite_unlocked): Likewise.  [BZ #309]
+
+	* malloc/memusage.c (me): Use creat64, not creat.
+	* malloc/memusagestat.c: Fix handling of very large sizes.  [BZ #285]
+	Patch by Guy Maor <guymaor@yahoo.com>.
+
+	* elf/ldconfig.c (options): Mark parameter option names as
+	translatable.  [BZ #253]  Patch by Jakub Bogusz <qboosh@pld-linux.org>.
+
+	* iconv/gconv_charset.h (strip): Also allow comma which is what is
+	used to separate options.  [BZ #194]
+
+2004-08-09  Roland McGrath  <roland@redhat.com>
+
+	* FAQ.in: Refer to web pages instead of bug-glibc.
+	* FAQ: Regenerated.
+
+	* time/strptime_l.c: #include <stdbool.h>, `bool' used in last change.
+
+2004-08-09  Ulrich Drepper  <drepper@redhat.com>
+
+	* time/tzset.c (tzset_internal): If TZ is not set do not compare
+	old and new tz value since it might be /etc/localtime in both
+	cases although the file changed.  [BZ #154]
+	Patch by Christian Franke <franke@computer.org>.
+
+	* time/tzfile.c (__tzfile_read): Determine dev/ino of file.
+	Compare with values of previously opened file.  Don't do anything
+	is they match.
+
+2004-08-08  Ulrich Drepper  <drepper@redhat.com>
+
+	* elf/dl-load.c (_dl_map_object): If __RTLD_CALLMAP flag is set,
+	reset loader before the actual loading.
+	* elf/dl-open.c (dl_open_worker): If file name contains no path
+	element determine map of caller.  Pass caller map in this case to
+	_dl_map_object.  Set __RTLD_CALLMAP in mode.
+	* include/dlfcn.h (__RTLD_CALLMAP): Define.  [BZ #116]
+	Patch by Greg Wolodkin <greg@mathworks.com>.
+
+	* misc/syslog.c (openlog_internal): Always try both UDP and TCP.
+	[BZ #108]  Patch mainly by Bjorn Andersson <bjorn@iki.fi>.
+
+	* configure.in: Also recognize i786. [BZ #106]
+	Patch by <pluto@pld-linux.org>.
+
+	* resolv/res_query.c (__libc_res_nsearch): Correctly test whether
+	name contains any dots. [BZ #95]
+
+	* resolv/res_send.c: Compiling with DEBUG defined works again.
+	* resolv/gethnamaddr.c (dprintf): Renamed to Dprintf.  Adjust all
+	callers.
+
+	* resolv/tst-leaks.c (TIMEOUT): Define so that if no server is
+	available the process is not killed. [BZ #41]
+
+	* intl/tst-gettext.c (main): Improve some messages. [BZ #33]
+
+	* time/strptime_l.c (__strptime_internal): Fix handling of %Ey.
+	[BZ #28]
+
+	* po/sv.po: Update from translation team.
+
+2004-08-07  Ulrich Drepper  <drepper@redhat.com>
+
+	* inet/netinet/in.h: Add more const to the setipv4soucefilter,
+	getsourcefilter, and setsourcefilter parameter list.
+	* sysdeps/generic/setipv4sourcefilter.c: Likewise.
+	* sysdeps/generic/getsourcefilter.c: Likewise.
+	* sysdeps/generic/setsourcefilter.c: Likewise.
+	* sysdeps/unix/sysv/linux/setipv4sourcefilter.c: Likewise.
+	* sysdeps/unix/sysv/linux/getsourcefilter.c: Likewise.
+	* sysdeps/unix/sysv/linux/setsourcefilter.c: Likewise.
+
+	* po/tr.po: Update from translation team.
+
+2004-08-06  Ulrich Drepper  <drepper@redhat.com>
+
+	* iconvdata/jisx0213.h (jisx0213_added_in_2004_p): Fix typo.
+	Reported by Paolo Bonzini.
+
+2004-08-06  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/ia64/dl-machine.h (elf_machine_fixup_plt): Add
+	always_inline.
+	* sysdeps/powerpc/powerpc64/dl-machine.h (elf_machine_runtime_setup,
+	elf_machine_fixup_plt, elf_machine_plt_conflict): Likewise.
+
+	* sysdeps/unix/sysv/linux/netatalk/at.h: Include bits/sockaddr.h
+	before including linux/atalk.h.
+
+	* resolv/res_libc.c: Include atomic.h.
+
+	* intl/finddomain.c (free_mem): Rename to...
+	(_nl_finddomain_subfreeres): ... this.  Add
+	__libc_freeres_fn_section.
+	* intl/loadmsgcat.c (_nl_unload_domain): Add
+	__libc_freeres_fn_section.
+	* intl/gettextP.h (_nl_unload_domain): Move into #ifdef _LIBC.
+	Add attribute_hidden.
+	(_nl_findomain_subfreeres): New prototype.
+	* iconv/gconv_db.c (free_mem): Call _nl_findomain_subfreeres.
+
+2004-07-30  Guido Guenther  <agx@sigxcpu.org>
+
+	* nss/getent.c (passwd_keys): Use strtoul instead of isdigit to
+	test if the key is numeric or not.
+	(group_keys): Likewise.
+
+2004-08-05  Ulrich Drepper  <drepper@redhat.com>
+
+	* inet/netinet/in.h: Define struct ip_msfilter, IP_MSFILTER_SIZE,
+	struct group_filter, and GROUP_FILTER_SIZE.
+	* include/sys/socket.h: Declare __getsockopt.
+	* sysdeps/unix/sysv/linux/setipv4sourcefilter.c: New file.
+	* sysdeps/unix/sysv/linux/getipv4sourcefilter.c: New file.
+	* sysdeps/unix/sysv/linux/setsourcefilter.c: New file.
+	* sysdeps/unix/sysv/linux/getsourcefilter.c: New file. [BZ #211]
+
+	* po/ko.po: Update from translation team.
+
+2004-08-04  Jakub Jelinek  <jakub@redhat.com>
+
+	* hesiod/hesiod.c (__hesiod_res_get): Use calloc instead of malloc +
+	memset.
+	(__hesiod_res_set): Free nsaddrs.
+
+	* include/resolv.h (__res_maybe_init): Add prototype.
+	* resolv/resolv.h (struct __res_state): Add _u._ext.initstamp field.
+	* resolv/Versions (libc): Add __res_maybe_init@@GLIBC_PRIVATE.
+	* resolv/res_libc.c (__res_initstamp, lock): New variables.
+	(res_init): Increase __res_initstamp.
+	(__res_maybe_init): New function.
+	* resolv/res_init.c (__res_vinit): Initialize _u._ext.initstamp.
+	* hesiod/hesiod.c (__hesiod_res_get): Use __res_maybe_init instead
+	of RES_INIT check and {res_ninit,__res_ninit,res_init} call.
+	* sysdeps/posix/getaddrinfo.c (gaih_inet): Likewise.
+	* resolv/nss_dns/dns-host.c (_nss_dns_gethostbyname2_r,
+	_nss_dns_gethostbyaddr_r): Likewise.
+	* resolv/nss_dns/dns-network.c (_nss_dns_getnetbyname_r,
+	_nss_dns_getnetbyaddr_r): Likewise.
+	* resolv/gethnamaddr.c (gethostbyname, gethostbyname2,
+	gethostbyaddr): Likewise.
+	* resolv/res_data.c (fp_nquery, res_mkquery, res_mkupdate,
+	res_isourserver, res_sendsigned, res_update, res_search,
+	res_querydomain): Likewise.
+	* nss/getXXbyYY_r.c (INTERNAL (REENTRANT_NAME)): Likewise.
+	* nss/digits_dots.c (__nss_hostname_digits_dots): Likewise.
+	* nss/getnssent_r.c (__nss_setent, __nss_endent, __nss_getent_r):
+	Likewise.
+
+2004-08-05  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/posix/getaddrinfo.c (gaih_inet): Set ai_family for
+	V4-mapped IPv6 addresses and req->ai_family==AF_INET.
+	Reported by A. Guru <a.guru@sympatico.ca>.
+
+	* po/sv.po: Update from translation team.
+	* po/sk.po: Likewise.
+	* po/pl.po: Likewise.
+
+2004-08-04  Jakub Jelinek  <jakub@redhat.com>
+	    Ulrich Drepper  <drepper@redhat.com>
+
+	* wcsmbs/mbsrtowcs_l.c (__mbsrtowcs_l): Don't read more input
+	characters than necessary.
+
+2004-08-05  Ulrich Drepper  <drepper@redhat.com>
+
+	* wcsmbs/Makefile (tests): Add tst-mbsrtowcs.
+	* wcsmbs/tst-mbsrtowcs.c: New file.
+
+	* po/fr.po: Update from translation team.
+
+2004-08-04  Jakub Jelinek  <jakub@redhat.com>
+
+	* resolv/inet_pton.c (inet_pton4): Disallow octal numbers.  Reported
+	by A. Guru <a.guru@sympatico.ca>.  [BZ #295]
+
+2004-08-05  Ulrich Drepper  <drepper@redhat.com>
+
+	* po/nl.po: Update from translation team.
+
+	* sysdeps/posix/getaddrinfo.c (gaih_inet): Recognize all the IPv4
+	numeric address formats inet_addr knows.
+	(getaddrinfo): Allow AI_NUMERICSERV flag.
+	If neither IPv4 nor IPv6 interface is present we cannot make any
+	decision for AI_ADDRCONFIG.  Fail if AI_NUMERICSERV is set and the
+	string is not just a number.  Remove useless freeaddrinfo call.
+	* resolv/netdb.h (AI_NUMERICSERV): Define.
+	Based on a patch by a.guru@sympatico.ca.
+
+2004-08-04  Jakub Jelinek  <jakub@redhat.com>
+
+	* stdlib/strfmon_l.c (__vstrfmon_l): Memset whole info structure
+	instead of trying to initialize some, but not all, fields one by
+	one.
+	* stdio-common/printf_size.c (printf_size): Initialize fb_info
+	structure with *info instead of trying to initialize some, but not
+	all, fields from it.
+
+	* nscd/connections.c (handle_request): Check if req->type is in
+	LASTDBREQ .. LASTREQ range instead of req.
+
+	* locale/programs/linereader.c (lr_create): Initialize
+	lr->return_widestr to 0.
+
+	* elf/dl-close.c (free_slotinfo): Add __libc_freeres_fn_section.
+	(free_mem): Call free_slotinfo just once.
+
+	* stdio-common/tst-fmemopen.c (main): Check for MAP_FAILED instead
+	of NULL.
+
+	* locale/localeinfo.h (_nl_locale_subfreeres): New prototype.
+	* locale/setlocale.c (free_category): Add __libc_freeres_fn_section.
+	(free_mem): Rename to _nl_locale_subfreeres.
+	* iconv/gconv_db.c: Include locale/localeinfo.h.
+	(free_derivation, free_modules_db): Add __libc_freeres_fn_section.
+	(free_mem): Call _nl_locale_subfreeres.
+	* iconv/gconv_dl.c (do_release_all): Add __libc_freeres_fn_section.
+
+2004-08-04  Roland McGrath  <roland@frob.com>
+
+	* Makeconfig ($(common-objpfx)config.status):
+	Fix typo: $(add_ons) -> $(add-ons).
+	(Makeconfig-add-on): New variable.  When doing $(sysdep-makeconfigs)
+	include, use black magic to get it set to an add-on's name during the
+	include of the add-on's Makeconfig.
+
+	* configure.in: Use variable name `libc_add_on' when sourcing add-on
+	configure fragments, so they can refer to this.
+	* configure: Regenerated.
+
+2004-08-04  Roland McGrath  <roland@redhat.com>
+
+	* posix/tst-waitid.c (test_child): Sleep a second before stopping.
+	(do_test): Bump sleep to three seconds.
+	(sigchld, check_sigchld): New functions.
+	(do_test): Handle SIGCHLD and check for getting the right details.
+
+	* posix/tst-waitid.c (do_test): Kill the child process when bailing
+	out early on some failure.
+	[WCONTINUED]: Test WCONTINUED functionality.
+
+2004-08-03  Ulrich Drepper  <drepper@redhat.com>
+
+	* nscd/connections.c (handle_request): Print more descriptive
+	message for invalid request types.
+
+2004-08-02  Jakub Jelinek  <jakub@redhat.com>
+
+	* iconvdata/ibm932.c (BODY): Avoid binary search for ch >= 0xffff.
+	Always treat high as highest number in range + 1.
+	* iconvdata/ibm943.c (BODY): Likewise.
+
+2004-07-31  Bruno Haible  <bruno@clisp.org>
+
+	* iconvdata/JISX0213.TXT: Updated to JISX0213 plane 1 version 2004.
+	* iconvdata/jisx0213.c (__jisx0213_to_ucs_main,
+	__jisx0213_to_ucs_pagestart, __jisx0213_from_ucs_level1,
+	__jisx0213_from_ucs_level2): Regenerated.
+	* iconvdata/jisx0213.h (jisx0213_added_in_2004_p): New function.
+	* iconvdata/iso-2022-jp-3.c (JISX0213_1_2000_set): Renamed from
+	JISX0213_1_set.
+	(JISX0213_1_2004_set): New enum value.
+	(BODY for FROM_LOOP): Treat ESC $ ( Q like ESC $ ( O.
+	(BODY for TO_LOOP): For JISX 0213 plane 1 characters, emit ESC $ ( O
+	when possible, ESC $ ( Q when needed.
+	* iconvdata/testdata/EUC-JISX0213: Add the 10 new characters.
+	* iconvdata/testdata/EUC-JISX0213..UTF8: Update.
+	* iconvdata/testdata/SHIFT_JISX0213: Add the 10 new characters.
+	* iconvdata/testdata/SHIFT_JISX0213..UTF8: Update.
+	* iconvdata/testdata/ISO-2022-JP-3: Add the 10 new JISX0213 characters.
+	* iconvdata/testdata/ISO-2022-JP-3..UTF8: Update.
+
+2004-07-22  Bruno Haible  <bruno@clisp.org>
+
+	* iconvdata/gconv-modules (ISO-8859-7): Add alias ISO_8859-7:2003.
+
+2004-07-29  David S. Miller  <davem@redhat.com>
+
+	* sysdeps/sparc/sparc64/sparcv9b/memcpy.S (memcpy): Optimize
+	better for smaller than 256 byte copies.  Also, use only one
+	unrolled loop instead of two for the large copy case.
+
+2004-07-30  Richard Henderson  <rth@redhat.com>
+
+	* sysdeps/alpha/divq.S: Save t3 before it gets clobbered.
+	* sysdeps/alpha/remq.S: Likewise.
+	* sysdeps/alpha/div.S, sysdeps/alpha/ldiv.S: Rewrite with the
+	new division algorithms in divl.S and divq.S respectively.
+
+2004-07-28  GOTO Masanori  <gotom@debian.or.jp>
+
+	* timezone/asia: Update from tzdata2004b.
+	* timezone/backward: Likewise.
+	* timezone/europe: Likewise.
+	* timezone/iso3166.tab: Likewise.
+	* timezone/leapseconds: Likewise.
+	* timezone/northamerica: Likewise.
+	* timezone/southamerica: Likewise.
+	* timezone/zone.tab: Likewise.
+
+	* timezone/private.h: Update from tzcode2004b.
+	* timezone/zic.c: Likewise.
+
+2004-07-27  Ulrich Drepper  <drepper@redhat.com>
+
+	* nscd/grpcache.c (cache_addgr): If necessary, add entry also
+	under the name the user provided.
+	* nscd/pwdcache.c (cache_addpw): Likewise.
+
+2004-07-26  Roland McGrath  <roland@redhat.com>
+
+	* sysdeps/posix/waitid.c [WEXITED]: Clear WEXITED bit in OPTIONS for
+	call to __waitpid.
+
+2004-07-26  Ulrich Drepper  <drepper@redhat.com>
+
+	* nscd/hstcache.c (cache_addhst): Fix two scenarios which lead to
+	memory leaks.
+
+	* sysdeps/unix/sysv/linux/ifreq.c (__ifreq): Assign pointer for
+	new buffer at the right time.
+	Reported by Jakub Bogusz <qboosh@pld-linux.org>.
+
+2004-07-25  Ulrich Drepper  <drepper@redhat.com>
+
+	* inet/Versions [libc, GLIBC_2.3.4]: Add getipv4sourcefilter,
+	getsourcefilter, setipv4sourcefilter, and setsourcefilter.
+	* inet/Makefile (routines): Likewise.
+	* inet/netinet/in.h: Add prototypes for getipv4sourcefilter,
+	getsourcefilter, setipv4sourcefilter, and setsourcefilter.
+	* sysdeps/generic/getipv4sourcefilter.c: New file.
+	* sysdeps/generic/setipv4sourcefilter.c: New file.
+	* sysdeps/generic/getsourcefilter.c: New file.
+	* sysdeps/generic/setsourcefilter.c: New file.
+
+2004-07-17  Steven Munroe  <sjmunroe@us.ibm.com>
+
+	* sysdeps/powerpc/powerpc64/memcpy.S: Improve instruction scheduling
+	for POWER4 machines.
+
+2004-07-21  Jakub Jelinek  <jakub@redhat.com>
+
+	[BZ #274]
+	* stdlib/strtod_l.c (INTERNAL (__STRTOF)): Fix used >=
+	BITS_PER_MP_LIMB shifting up.
+	* stdlib/tst-strtod.c (main): Add new tests.
+
+2004-07-23  Jakub Jelinek  <jakub@redhat.com>
+
+	[BZ #282]
+	* libio/iopopen.c (_IO_new_popen): Use _IO_init instead of
+	_IO_no_init.  Remove wd from struct locked_FILE.
+	(_IO_wproc_jumps): Remove.
+	Reported by Andrew Josey <a.josey@opengroup.org>.
+	* stdio-common/Makefile (tests): Add tst-popen.
+	* stdio-common/tst-popen.c: New test.
+
+2004-07-23  Ulrich Drepper  <drepper@redhat.com>
+
+	* posix/bits/posix1_lim.h: Fix values for _POSIX_CHILD_MAX and
+	_POSIX_OPEN_MAX.  Add _POSIX_HOST_NAME_MAX, _POSIX_SYMLINK_MAX,
+	_POSIX_SYMLOOP_MAX, and _POSIX_RE_DUP_MAX.
+	Reported by Andrew Josey.
+
+	* include/features.h: Document _POSIX_C_SOURCE == 200112L.
+
+	* grp/grp.h: Define gid_t if __USE_XOPEN2K is defined.
+	* pwd/pwd.h: Define uid_t and gid_t if __USE_XOPEN2K is defined.
+	* io/sys/stat.h: Define dev_t, gid_t, ino_t, mode_t, nlink_t,
+	off_t, time_t, and uid_t if __USE_XOPEN2K is defined.
+	* signal/signal.h: Define pid_t if __USE_XOPEN2K is defined.
+	* posix/unistd.h: Define gid_t, off_t, pid_t, uid_t, and
+	useconds_t if __USE_XOPEN2K is defined.
+	* io/utime.h: Define time_t if __USE_XOPEN2K is defined.
+	* libio/stdio.h: Declare fseeko and ftello if __USE_XOPEN2K is defined.
+
+2004-07-19  Thorsten Kukuk  <kukuk@suse.de>
+
+	* nis/nss_compat/compat-initgroups.c (getgrent_next_nss): Don't
+	allocate memory for large temporary variables with alloca.
+
+2004-07-22  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext.S: Compatibility
+	code must have version GLIBC_2.0.  Patch by Dwayne McConnell.
+
+	* nscd/nscd_getgr_r.c (nscd_getgr_r): Avoid read call with NULL
+	pointer and zero length.
+
+2004-07-21  Ulrich Drepper  <drepper@redhat.com>
+
+	* nscd/cache.c (prune_cache): Print correct list when debugging.
+
+2004-07-21  Jakub Jelinek  <jakub@redhat.com>
+
+	* resolv/res_libc.c (res_init): If RES_INIT is set and
+	_res.nscount > 0, call __res_nclose and free nsaddrs.
+	* resolv/Makefile: Add rules to build and run tst-leaks2.
+	* resolv/tst-leaks2.c: New test.
+
+2004-07-21  Ulrich Drepper  <drepper@redhat.com>
+
+	* intl/libintl.h: Don't define macros for C++.
+	Patch by Goto Masanori.
+
+2004-07-22  GOTO Masanori  <gotom@debian.or.jp>
+
+	[BZ #276]
+	* include/arpa/inet.h: Change inet_aton type from in_addr_t to int.
+	* inet/arpa/inet.h: Likewise.
+	* resolv/inet_addr.c: Likewise.
+
+2004-07-21  Alexandre Oliva  <aoliva@redhat.com>
+
+	* sysdeps/unix/sysv/linux/mips/mips64/syscalls.list: Add semtimedop.
+
+2004-07-20  Roland McGrath  <roland@redhat.com>
+
+	* configure.in (add_ons): Substitute this.  Move $add_ons handling
+	after AC_CANONICAL_HOST and default setting of $machine et al.
+	Don't set $subdirs from $add_ons.
+	Instead, source add-on/configure fragments early on.
+	(base_machine): If it's already set, don't set it based on $machine.
+	(libc_config_ok): New variable, set to no.  If an add-on fragment sets
+	it to yes, skip the  tuple sanity check as if --enable-hacker-mode.
+	(sysnames): Try appending add-on names after machine as well.
+	* config.make.in (add-ons): Set from @add_ons@ instead of @subdirs@.
+	* Makeconfig ($(common-objpfx)config.status): Also depend on configure
+	files in $(add_ons) dirs.
+
+	* sysdeps/unix/sysv/linux/bits/in.h (struct ip_mreq): Remove
+	definition, now in netinet/in.h proper.
+
+2004-07-20  Alexandre Oliva  <aoliva@redhat.com>
+
+	* sysdeps/unix/sysv/linux/mips/Makefile ($(objpfx)syscall-%.h):
+	Sort by syscalls.  Make sure we get headers such as sgidefs.h from
+	the build tree before just-installed ones.
+
+	* sysdeps/mips/atomicity.h: Use standard names for ABI macros,
+	include sgidefs.h where appropriate.
+	* sysdeps/mips/dl-machine.h: Likewise.
+	* sysdeps/mips/machine-gmon.h: Likewise.
+	* sysdeps/mips/bits/setjmp.h: Likewise.
+	* sysdeps/mips/fpu/bits/mathdef.h: Likewise.
+	* sysdeps/mips/mips64/__longjmp.c: Likewise.
+	* sysdeps/mips/mips64/setjmp_aux.c: Likewise.
+	* sysdeps/unix/sysv/linux/mips/kernel_stat.h: Likewise.
+	* sysdeps/unix/sysv/linux/mips/pread.c: Likewise.
+	* sysdeps/unix/sysv/linux/mips/pread64.c: Likewise.
+	* sysdeps/unix/sysv/linux/mips/ptrace.c: Likewise.
+	* sysdeps/unix/sysv/linux/mips/pwrite.c: Likewise.
+	* sysdeps/unix/sysv/linux/mips/pwrite64.c: Likewise.
+	* sysdeps/unix/sysv/linux/mips/sigaction.c: Likewise.
+	* sysdeps/unix/sysv/linux/mips/sigcontextinfo.h: Likewise.
+	* sysdeps/unix/sysv/linux/mips/bits/fcntl.h: Likewise.
+	* sysdeps/unix/sysv/linux/mips/bits/sigcontext.h: Likewise.
+	* sysdeps/unix/sysv/linux/mips/bits/stat.h: Likewise.
+	* sysdeps/unix/sysv/linux/mips/sys/procfs.h: Likewise.
+	* sysdeps/unix/sysv/linux/mips/sys/ptrace.h: Likewise.
+	* sysdeps/unix/sysv/linux/mips/sys/tas.h: Likewise.
+	* sysdeps/unix/sysv/linux/mips/sys/ucontext.h: Likewise.
+	* sysdeps/unix/sysv/linux/mips/sys/user.h: Likewise.
+	* sysdeps/unix/sysv/linux/mips/Makefile ($(objpfx)syscall-%.h):
+	Likewise.
+	* sysdeps/unix/sysv/linux/mips/configure.in (asm-unistd.h):
+	Likewise.
+
+	* sysdeps/mips/dl-machine.h (__dl_runtime_resolve): Update to use
+	_dl_lookup_symbol_x.
+	(elf_machine_runtime_link_map): Don't INTUSE _dl_signal_error.
+
+2004-07-20  Ulrich Drepper  <drepper@redhat.com>
+
+	* inet/netinet/in.h: Define struct ip_mreq and struct
+	ip_mreq_source.
+	Define struct group_req and struct group_source_req.
+	* sysdeps/unix/sysv/linux/bits/in.h: Define IP_UNBLOCK_SOURCE,
+	IP_BLOCK_SOURCE, IP_ADD_SOURCE_MEMBERSHIP,
+	IP_DROP_SOURCE_MEMBERSHIP, IP_MSFILTER, MCAST_JOIN_GROUP,
+	MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, MCAST_LEAVE_GROUP,
+	MCAST_JOIN_SOURCE_GROUP, MCAST_LEAVE_SOURCE_GROUP, and
+	MCAST_MSFILTER.
+	Define MCAST_INCLUDE and MCAST_EXCLUDE.
+
+	* iconvdata/gconv-modules: Add alias for IBM874 [BZ #244].
+
+2004-07-19  Jakub Jelinek  <jakub@redhat.com>
+
+	[BZ #258]
+	* math/libm-test.inc (max_value, min_value): New variables.
+	(initialize): Initialize them.
+	(pow_test): Add a couple of new tests.
+	* sysdeps/i386/fpu/e_powf.S (__ieee754_powf): Don't generate invalid
+	exception if |y| >= 1U<<31.
+	* sysdeps/i386/fpu/e_pow.S (__ieee754_pow): Don't generate invalid
+	exception if |y| >= 1L<<63.
+	* sysdeps/i386/fpu/e_powl.S (__ieee754_powl): Likewise.
+	If y*log2(x) overflows to +-inf, return still +inf/+0 instead of NaN.
+	* sysdeps/x86_64/fpu/e_powl.S (__ieee754_powl): Likewise.
+
+2004-07-18  Ulrich Drepper  <drepper@redhat.com>
+
+	* nscd/pwdcache.c (cache_addpw): Optimize case of unsuccessful
+	lookup a bit.
+	* nscd/grpcache.c (cache_addgr): Likewise.
+	* nscd/hstcache.c (cache_addhst): Likewise.
+
+2004-07-10  GOTO Masanori  <gotom@debian.or.jp>
+
+	* sysdeps/s390/s390-32/elf/start.S: Remove symbol _fp_hw.
+	* sysdeps/s390/s390-64/elf/start.S: Likewise.
+
+2004-04-16  Andreas Schwab  <schwab@suse.de>
+
+	* sysdeps/ia64/bits/atomic.h: Cast first argument of
+	__sync_bool_compare_and_swap_si correct to void*.
+
+2004-07-17  Ulrich Drepper  <drepper@redhat.com>
+
+	* iconv/iconv_prog.c (print_known_names): Make machine-readable
+	output even less cluttered.
+
+2004-07-16  Steven Munroe  <sjmunroe@us.ibm.com>
+
+	[BZ #269]
+	* setjmp/Makefile (tests): Add bug269-setjmp.
+	* setjmp/bug269-setjmp.c: New file.
+	* sysdeps/powerpc/powerpc64/__longjmp-common.S [SHARED && !IS_IN_rtld]:
+	Store R2 from jmpbuf in callers TOC save area.
+	* sysdeps/powerpc/powerpc64/bsd-_setjmp.S [SHARED && !IS_IN_rtld]:
+	Store R2 in TOC save area.
+	* sysdeps/powerpc/powerpc64/setjmp-common.S [SHARED && !IS_IN_rtld]:
+	Copy TOC save area from previous frame as R2 (TOC) in jmpbuf.
+
+2004-07-16  Jakub Jelinek  <jakub@redhat.com>
+
+	* locale/newlocale.c: Include bits/libc-lock.h.
+	(__libc_setlocale_lock): Extern decl.
+	(__newlocale): Use it.
+	Reported by Ulrich Weigand <Ulrich.Weigand@de.ibm.com>.
+
+2004-07-15  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/unix/sysv/linux/fcntl.c (__fcntl_nocancel): Remove
+	static inline __attribute ((always_inline)).  Don't define if
+	NO_CANCELLATION.
+	(__libc_fcntl): Use INLINE_SYSCALL directly instead of
+	__fcntl_nocancel.
+	* sysdeps/unix/sysv/linux/powerpc/powerpc64/fcntl.c (__fcntl_nocancel):
+	Remove static inline __attribute ((always_inline)).  Don't define
+	if NO_CANCELLATION.
+	(__libc_fcntl): Use INLINE_SYSCALL directly instead of
+	__fcntl_nocancel.
+	* sysdeps/unix/sysv/linux/i386/fcntl.c (__fcntl_nocancel): Define to
+	__libc_fcntl if NO_CANCELLATION and __ASSUME_FCNTL64 == 0.
+	Don't define at all if NO_CANCELLATION and __ASSUME_FCNTL64 > 0.
+	(__libc_fcntl): Don't define if __fcntl_nocancel is a macro.
+
+	[BZ #262]
+	* sysdeps/i386/elf/start.S (_start): Use @GOT instead of @GOTOFF
+	for main.
+	* elf/Makefile: Add rules to build and run tst-pie1.
+	* elf/tst-pie1.c: New test.
+	* elf/tst-piemod1.c: New file.
+
+2004-07-14  Jakub Jelinek  <jakub@redhat.com>
+
+	[BZ #266]
+	* manual/string.texi (l64a): Note that the static buffer is 7 bytes
+	long.  Rewrite example code so that it takes into account l64a output
+	shorter than 6 characters.
+	Reported by Julian Graham <julian.graham@aya.yale.edu>.
+
+2004-07-14  Kaz  Kojima  <kkojima@rr.iij4u.or.jp>
+
+	* sysdeps/sh/dl-machine.h: Don't reset _dl_starting_up here.
+	(elf_machine_rela_relative): Remove unused valiable.
+
+2004-07-12  Paul Eggert  <eggert@cs.ucla.edu>
+
+	[BZ #263]
+	* sysdeps/unix/sysv/linux/getloadavg.c (getloadavg): Don't store
+	outside the buffer if the read returns 0.  __strtod_l can't set
+	endp to NULL, so remove a test for that case.
+
+2004-07-12  Roland McGrath  <roland@redhat.com>
+
+	* manual/signal.texi (Interrupted Primitives): Make clear that
+	TEMP_FAILURE_RETRY evaluates its expression as long int and compares
+	it to -1 to define "failure".
+
+2004-07-10  Ulrich Drepper  <drepper@redhat.com>
+
+	* elf/ldconfig.c: Define PROCINFO_CLASS as static before including
+	ldsodefs.h.
+	* sysdeps/generic/ldsodefs.h: Only define PROCINFO_CLASS if it is not
+	already defined.
+	* sysdeps/i386/dl-procinfo.c: Define PROCINFO_CALLS only if not
+	already defined.
+
+	* elf/rtld.c (print_statistics): Mark with noinline attribute.
+	* sysdeps/i386/dl-machine.h (elf_machine_rel): Mark with always_inline
+	attribute.
+	(elf_machine_rel_relative): Likewise.
+
+	* include/string.h: Add libc_hidden_proto for __strtok_r and
+	__strsep_g.
+	* sysdeps/generic/strsep.c: Add libc_hidden_def.
+	* sysdeps/generic/strtok_r.c: Likewise.
+	* sysdeps/i386/strtok_r.S: Add alias for internal symbol.
+	* sysdeps/i386/i686/strtok_r.S: Likewise.
+	* sysdeps/x86_64/strtok_r.S: Likewise.
+
+2004-07-09  Alexandre Oliva  <aoliva@redhat.com>
+
+	* inet/getnetgrent_r.c (internal_setnetgrent): Make it hidden
+	instead of internal-linkage, such that we can alias to it.
+	(internal_endnetgrent, internal_getnetgrent_r): Likewise.
+
+2004-07-09  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/unix/sysv/linux/fcntl.c (__fcntl_nocancel): Move attribute
+	to the front for gcc 3.5+.
+	* sysdeps/unix/sysv/linux/i386/fcntl.c: Likewise.
+	* sysdeps/unix/sysv/linux/powerpc/powerpc64/fcntl.c: Likewise.
+	* include/fcntl.h: Don't declare __fcntl_nocancel here if
+	NO_CANCELLATION is defined.
+
+2004-07-07  Jakub Jelinek  <jakub@redhat.com>
+
+	* elf/dl-fini.c (_dl_fini): Add nloaded variable, load
+	GL(dl_nloaded) into it while still in critical section.
+
+2004-07-06  Thorsten Kukuk  <kukuk@suse.de>
+
+	* nss/getent.c (print_group, print_passwd): Use %lu format for
+	`unsigned long int' values.
+
+	* sysdeps/unix/sysv/linux/i386/fcntl.c (__fcntl_nocancel): Fix syntax
+	error typos in goto statements.
+
+2004-07-07  Roland McGrath  <roland@frob.com>
+
+	* Makefile (dist-separate): New variable.
+	(glibc-%.tar rule): Make separate tar files for add-ons listed there.
+	Depend on their configure files.
+	(dist-do-separate-dirs): New canned sequence to do that.
+	(dist-separate-libidn, dist-separate-linuxthreads): New variables.
+	(dist): Depend on add-on tar files based on $(dist-separate).
+	(dist-version): New variable, default to $(version).
+	(tag-for-dist, dist): Use that in place of $(version) in deps.
+
+2004-07-07  Ulrich Drepper  <drepper@redhat.com>
+
+	* elf/dl-fini.c (_dl_fini): Move the unlock of the ld.so lock
+	before the loop running the destructors.
+
+2004-05-18  Andreas Schwab  <schwab@suse.de>
+
+	* elf/dl-load.c (_dl_map_object_from_fd): Use the end address of
+	the first segment for mprotect, not l_text_end.
+
+2004-07-05  Ulrich Drepper  <drepper@redhat.com>
+
+	* elf/dl-profile.c (_dl_start_profile): Compact error handling.
+	Remove hashfraction variable.  Make kcount and kcountsize local
+	variables.
+
+	* elf/dl-init.c: Don't define and use _dl_starting_up if
+	HAVE_INLINED_SYSCALLS is defined and the variable is not used.
+	* elf/dl-support.c: Likewise.
+	* elf/rtld.c: Likewise.
+	* elf/dl-misc.c (_dl_debug_vdprintf): Use writev syscall directly
+	if HAVE_INLINED_SYSCALLS is defined.
+	* sysdeps/powerpc/powerpc64/dl-machine.h: Don't reset _dl_starting_up
+	here.
+	* sysdeps/powerpc/powerpc32/dl-start.S: Likewise.
+	* sysdeps/unix/sysv/linux/configure.in: Define HAVE_INLINED_SYSCALLS.
+	* config.h.in: Add entry for HAVE_INLINED_SYSCALLS.
+
+	* sysdeps/posix/profil.c: If compiled for ld.so, omit code which
+	is needed to stop profiling.
+	* elf/dl-open.c (dl_open_worker): If a newly opened object is to be
+	profile make sure it cannot be unloaded.
+
+	* sysdeps/unix/sysv/linux/dl-origin.c: Inline readlink syscall.
+
+	* sysdeps/unix/sysv/linux/fcntl.c: If compiled without cancellation
+	support, make sure the helper function is inlined.
+	* sysdeps/unix/sysv/linux/pread.c: Likewise.
+	* sysdeps/unix/sysv/linux/pwrite.c: Likewise.
+	* sysdeps/unix/sysv/linux/i386/fcntl.c: Likewise.
+	* sysdeps/unix/sysv/linux/powerpc/powerpc64/fcntl.c: Likewise.
+
+2004-07-05  Jakub Jelinek  <jakub@redhat.com>
+
+	* include/string.h (ffs): Add libc_hidden_builtin_proto.
+	* sysdeps/rs6000/ffs.c (ffs): Add libc_hidden_builtin_def.
+	* sysdeps/alpha/alphaev67/ffs.S (ffs): Likewise.
+	* sysdeps/alpha/ffs.S (ffs): Likewise.
+	* sysdeps/s390/ffs.c (ffs): Likewise.
+	* sysdeps/powerpc/ffs.c (ffs): Likewise.
+	* sysdeps/i386/ffs.c (ffs): Likewise.
+	* sysdeps/i386/i686/ffs.c (ffs): Likewise.
+	* sysdeps/m68k/ffs.c (ffs): Likewise.
+	* sysdeps/generic/ffs.c (ffs): Likewise.
+	* sysdeps/m88k/ffs.c (ffs): Likewise.
+	* sysdeps/am29k/ffs.c (ffs): Likewise.
+	* sysdeps/i960/ffs.c (ffs): Likewise.
+	* sysdeps/x86_64/ffs.c (ffs): Likewise.
+
+	* Makerules (check-abi): Use diff -p -U 0 instead of diff -pu0.
+
+	* sysdeps/powerpc/novmx-longjmp.c (__libc_longjmp,
+	__libc_siglongjmp): Remove symbol_version.
+	* sysdeps/powerpc/longjmp.c (__libc_longjmp, __libc_siglongjmp):
+	Export @@GLIBC_PRIVATE, not @@GLIBC_2.3.4.
+	* sysdeps/powerpc/sigjmp.c (__sigjmp_save): Use strong_alias
+	unconditionally.
+	* sysdeps/powerpc/novmx-sigjmp.c (__sigjmp_save): Remove.
+	* sysdeps/powerpc/powerpc32/__longjmp.S (__longjmp): Use
+	strong_alias instead of default_symbol_version, remove
+	symbol_version.
+	* sysdeps/powerpc/powerpc64/__longjmp.S (__longjmp): Likewise.
+	* sysdeps/powerpc/powerpc32/bsd-setjmp.S (__novmx__setjmp): Change
+	into strong_alias to __novmxsetjmp.
+	(__vmx__setjmp): Similarly with __vmxsetjmp.
+	(__setjmp): Make it strong_alias to __vmx__setjmp, remove
+	default_symbol_version and symbol_version.
+	* sysdeps/powerpc/powerpc64/bsd-setjmp.S (__novmx__setjmp): Change
+	into strong_alias to __novmxsetjmp.
+	(__vmx__setjmp): Similarly with __vmxsetjmp.
+	(__setjmp): Make it strong_alias to __vmx__setjmp, remove
+	default_symbol_version and symbol_version.
+
+	* nscd/nscd_getgr_r.c: Include stdio-common/_itoa.h.
+
+2004-07-05  Ulrich Drepper  <drepper@redhat.com>
+
+	* elf/rtld.c (dl_main): Mark dyn_temp with attribute_relro.
+
+2004-07-04  Matthew Reppert  <arashi@kai.vm.bytemark.co.uk>
+
+	* sysdeps/unix/sysv/linux/i386/glob64.c (glob64): Use libc_hidden_ver
+	instead of libc_hidden_def.
+
+2004-07-01  Roland McGrath  <roland@redhat.com>
+
+	* aclocal.m4: Add provide for AC_CONFIG_AUX_DIR_DEFAULT.
+
+	* Makeconfig ($(common-objpfx)shlib-versions.v.i): Move top-level
+	$(..)shlib-versions file to last in deps list.  This lets add-ons give
+	more-specific matches that override defaults in the top-level file.
+
+	* sysdeps/unix/sysv/linux/configure.in: If arch_minimum_kernel was
+	already set, don't set it or libc_cv_gcc_unwind_find_fde.
+	* sysdeps/unix/sysv/linux/configure: Regenerated.
+
+2004-07-01  Martin Schwidefsky  <schwidefsky@de.ibm.com>
+
+	* sysdeps/s390/fpu/bits/mathinline.h [__LIBC_INTERNAL_MATH_INLINES]
+	(__ieee754_sqrt): Define as __MATH_INLINE using sqdbr instruction.
+	(__ieee754_sqrtf): Define as __MATH_INLINE using sqebr instruction.
+	* sysdeps/s390/fpu/e_sqrt.c: New file.
+	* sysdeps/s390/fpu/e_sqrtf.c: New file.
+	* sysdeps/s390/Implies: New file.
+	* sysdeps/s390/s390-32/Implies: Remove ieee754, move
+	ieee754/dbl-64 and ieee754/flt-32 to s390/Implies.
+	* sysdeps/s390/s390-64/Implies: Likewise.
+
+2004-06-30  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/unix/sysv/linux/s390/s390-32/sysdep.S: Include tls.h.
+	* sysdeps/unix/sysv/linux/s390/s390-64/sysdep.S: Likewise.
+
+	* sysdeps/unix/sysv/linux/s390/s390-32/clone.S (thread_start):
+	DO_CALL (exit, 1) instead of branching to _exit.
+	* sysdeps/unix/sysv/linux/s390/s390-64/clone.S (thread_start):
+	Likewise.
+
+	* sysdeps/s390/fpu/bits/mathinline.h: New file.
+
+	* include/glob.h (glob64): Add libc_hidden_proto.
+	* sysdeps/generic/glob64.c (glob64): Add libc_hidden_def.
+	* sysdeps/gnu/glob64.c (glob64): Likewise.
+	* sysdeps/unix/sysv/linux/i386/glob64.c (glob64): Likewise.
+
+2004-06-11  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/i386/fpu/libm-test-ulps: Update for GCC 3.4.
+	* sysdeps/s390/fpu/libm-test-ulps: Likewise.
+
+2004-06-11  Dwayne Grant McConnell  <dgm69@us.ibm.com>
+
+	* sysdeps/powerpc/fpu/s_lround.c: Removed.
+	* sysdeps/powerpc/fpu/s_lroundf.c: Removed.
+	* sysdeps/powerpc/powerpc32/fpu/s_ceilf.S: New file.
+	* sysdeps/powerpc/powerpc32/fpu/s_ceil.S: New file.
+	* sysdeps/powerpc/powerpc32/fpu/s_floorf.S: New file.
+	* sysdeps/powerpc/powerpc32/fpu/s_floor.S: New file.
+	* sysdeps/powerpc/powerpc32/fpu/s_lrint.c: Removed.
+	* sysdeps/powerpc/powerpc32/fpu/s_lrint.S: New file.
+	* sysdeps/powerpc/powerpc32/fpu/s_lroundf.S: New file.
+	* sysdeps/powerpc/powerpc32/fpu/s_lround.S: New file.
+	* sysdeps/powerpc/powerpc32/fpu/s_rintf.S: New file.
+	* sysdeps/powerpc/powerpc32/fpu/s_rint.S: New file.
+	* sysdeps/powerpc/powerpc32/fpu/s_roundf.S: New file.
+	* sysdeps/powerpc/powerpc32/fpu/s_round.S: New file.
+	* sysdeps/powerpc/powerpc32/fpu/s_truncf.S: New file.
+	* sysdeps/powerpc/powerpc32/fpu/s_trunc.S: New file.
+
+2004-06-30  Ulrich Drepper  <drepper@redhat.com>
+
+	* include/net/if.h: Handle if_nameindex and if_freenameindex with
+	libc_proto_hidden.
+	* sysdeps/unix/sysv/linux/netlinkaccess.h: New file.
+	* sysdeps/unix/sysv/linux/ifaddrs.c: Export netlink handling functions.
+	* sysdeps/unix/sysv/linux/if_index.c (if_nameindex): Implement using
+	netlink if possible.  Fall back on ioctl method if necessary.
+	* sysdeps/unix/sysv/linux/Dist: Add netlinkaccess.h.
+
+	* include/unistd.h: Declare __truncate.
+	* sysdeps/generic/truncate.c: Also define __truncate.
+	* sysdeps/mach/hurd/truncate.c: Likewise.
+	* sysdeps/unix/common/syscalls.list: Likewise.
+	* sysdeps/unix/sysv/linux/truncate64.c: Use __truncate, not truncate.
+
+2004-06-29  Ulrich Drepper  <drepper@redhat.com>
+
+	* stdio-common/printf-parsemb.c (__parse_one_specmb): Initialize
+	info.extra.  Patch by Marcus Meissner.
+
+2004-06-29  Kaz Kojima  <kkojima@rr.iij4u.or.jp>
+
+	* sysdeps/unix/sysv/linux/sh/sysdep.h (SYSCALL_ERROR_HANDLER):
+	Fix branch offset for a PLT entry.
+
+2004-06-20  Jim Meyering  <jim@meyering.net>
+
+	* malloc/obstack.h (obstack_base): Cast to `void *', to align with
+	documentation.
+
+2004-06-28  Ulrich Drepper  <drepper@redhat.com>
+
+	* inet/bug-if1.c (do_test): Simply use 0 as invalid index.
+	Workaround for [BZ #232].
+
+	* sysdeps/unix/sysv/linux/if_index.c (if_nameindex): Use extend_alloca.
+
+2004-06-28  GOTO Masanori  <gotom@debian.or.jp>
+
+	* iconv/gconv_simple.c: Use get16/put16 for user given buffer
+	in ucs2/ucs2reverse when unaligned memory access is attempted.
+	* iconv/tst-iconv5.c: New file.
+	* iconv/Makefile (tests): Add tst-iconv5.
+
+2004-06-28  Jakub Jelinek  <jakub@redhat.com>
+
+	* inet/bug-if1.c: Include <string.h>.
+
+2004-06-19  Roland McGrath  <roland@redhat.com>
+
+	* sysdeps/posix/waitid.c (do_waitid) [DO_WAITID]: Define function
+	under this macro name instead.
+	[NO_DO_WAITID]: Don't define it at all.
+	(do_waitid) [WNOWAIT, WEXITED]: If these POSIX.1 waitid flag bits are
+	defined, then return ENOTSUP for combinations of selection bits other
+	than WEXITED and WEXITED|WSTOPPED, which this version cannot support.
+
+	* posix/tst-waitid.c: New file.
+	* posix/Makefile (tests): Add it.
+
+2004-06-28  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/unix/alpha/sysdep.h (inline_syscall6): Fix a typo.
+
+	[BZ #231]
+	* sysdeps/unix/alpha/sysdep.S (__syscall_error): Avoid !samegp
+	relocation in librt.so.
+
+	[BZ #230]
+	* sysdeps/alpha/dl-machine.h (_dl_start_user): Use ldah/ldl to load
+	_dl_skip_args.  Patch by Jakub Bogusz <qboosh@pld-linux.org>.
+
+2004-06-27  Ulrich Drepper  <drepper@redhat.com>
+
+	* elf/ldconfig.c (add_dir): Take chroot into account.
+	Based on changes by HJ Lu and Hideki Iwamoto.
+
+	* nscd/connections.c (invalidate_cache): If the host cache has to
+	be invalidated, re-read resolv.conf.
+
+	* resolv/resolv.h (RES_NOIP6DOTINT): Define.
+	* resolv/res_init.c (res_setoptions): Handle no-ip6-dotint option.
+	* resolv/gethnamaddr.c (gethostbyaddr): Don't lookup with .ip6.int
+	if RES_NOIP6DOTINT flag is set.
+	* resolv/nss_dns/dns-host.c (_nss_dns_gethostbyaddr_r): Likewise.
+
+2004-06-25  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/unix/sysv/linux/powerpc/fcntl.c: Move to...
+	* sysdeps/unix/sysv/linux/powerpc/powerpc32/fcntl.c: ... here.
+	* sysdeps/unix/sysv/linux/powerpc/lockf64.c: Move to...
+	* sysdeps/unix/sysv/linux/powerpc/powerpc32/lockf64.c: ... here.
+	* sysdeps/unix/sysv/linux/powerpc/powerpc64/fcntl.c: New file.
+
+	* sysdeps/unix/sysv/linux/x86_64/bits/fcntl.h (F_GETLK, F_SETLK,
+	F_SETLKW): Fix values for -m32 -D_FILE_OFFSET_BITS=64.
+
+2004-06-21  Jakub Jelinek  <jakub@redhat.com>
+
+	[BZ #231]
+	* sysdeps/unix/alpha/Makefile: New file.
+	* sysdeps/unix/alpha/rt-sysdep.S: New file.
+	Reported by Jakub Bogusz <qboosh@pld-linux.org>.
+
+2004-06-18  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/posix/getaddrinfo.c (gaih_inet): For AI_CANONNAME,
+	determine the canonical name only for the first returned entry.
+
+	* inet/bug-if1.c: New file.
+	* inet/Makefile (tests): Add bug-if1.
+
+2004-06-18  Roland McGrath  <roland@frob.com>
+
+	* Makerules (compile-mkdep-flags): Add -MT $@.
+
+	* Makefile (dist, tag-for-dist): New targets.
+	(files-for-dist, tag-of-stem): New variables.
+	(glibc-%.tar glibc-linuxthreads-%.tar): New pattern rule.
+	Make tar files using cvs export.
+	(%.bz2, %.gz, tag-%): New pattern rules.
+	(distribute): Variable removed.
+	(+subdir_targets): Remove distinfo targets.
+	(echo_subdirs, echo-distinfo, parent_echo-distinfo): Targets removed.
+	(rpm/%): Pattern rule removed.
+	* rpm/Makefile, rpm/rpmrc, rpm/template: Ancient cruft files removed.
+	* Rules (subdir_echo-headers, subdir_echo-distinfo, subdir_dist):
+	Targets removed.
+	* Makerules (dist, subdir_distinfo): Targets removed.
+	($(objpfx)distinfo): Depend on $(distribute).
+	* Make-dist: File removed.
+
+	* NEWS: Fix a typo.  Update bug-reporting instructions.
+
+2004-06-17  Thorsten Kukuk  <kukuk@suse.de>
+
+	* sysdeps/unix/sysv/linux/sched_setaffinity.c
+	(__sched_setaffinity_new): Set errno to EINVAL and return -1 if
+	cpuset is wrongly set.
+
+2004-06-15  Steven Munroe  <sjmunroe@us.ibm.com>
+
+	* sysdeps/powerpc/powerpc64/memcpy.S: Improve instruction scheduling
+	for POWER4 machines.
+
+2004-06-14  Jakub Jelinek  <jakub@redhat.com>
+
+	[BZ #218]
+	* sunrpc/pmap_prot2.c (xdr_pmaplist): When freeing, remember pml_next
+	in a local variable, point rp to that local variable afterwards.
+
+2004-06-11  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/ieee754/dbl-64/e_sqrt.c (__ieee754_sqrt): Handle special
+	cases properly.
+
+2004-06-17  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/unix/sysv/linux/if_index.c (if_indextoname): Correct
+	error value for unknown interface. [BZ #198]
+
+2004-06-13  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/i386/bits/string.h (memcpy): Add () around arguments.
+	(memchr, __memrchr, strlen, strcmp, strncmp, __strchr_g, __strchr_c,
+	__strchrnul_g, __strchrnul_c, strspn, strcspn, strpbrk): Add memory
+	the asm uses as its input, either of size __n where __n is known or
+	0xfffffff.
+	(strstr): Add "memory" clobber.
+
+2004-06-14  Jakub Jelinek  <jakub@redhat.com>
+
+	[BZ #219]
+	* nss/nsswitch.c (free_mem): Don't try to close a library handle
+	if the handle is invalid.  Patch by David Kimdon <kimdon@esrf.fr>.
+
+2004-06-11  Dmitry V. Levin  <ldv@altlinux.org>
+
+	[BZ #217]
+	* debug/xtrace.sh: Fix typo in error diagnostics.
+
+2004-06-14  Andreas Schwab  <schwab@suse.de>
+
+	* stdio-common/psignal.c (psignal): Don't use BUF when asprintf
+	failed.
+
+2004-06-15  Steven Munroe  <sjmunroe@us.ibm.com>
+
+	* sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext.S: Fix pasto
+	that clobbers r19.  Fix pasto that overflowed sigcontext.v_reserve.
+	* sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext.S: Fix pasto
+	that clobbers r19.
+	* sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext.S: Fix pasto
+	that clobbers r19.  Fix pasto that overflowed sigcontext.v_reserve.
+	* sysdeps/unix/sysv/linux/powerpc/powerpc64/getcontext.S:
+	Fix setting of sigcontext.v_regs.  Fix pasto that clobbers r19.
+	Fix pasto that overflowed sigcontext.v_reserve.
+	* sysdeps/unix/sysv/linux/powerpc/powerpc64/setcontext.S:
+	Fix pasto that clobbers r19.
+	* sysdeps/unix/sysv/linux/powerpc/powerpc64/swapcontext.S: Fix setting
+	of sigcontext.v_regs.  Fix pasto that clobbers r19.  Fix pasto that
+	overflowed sigcontext.v_reserve.
+
+2004-05-04  H.J. Lu  <hongjiu.lu@intel.com>
+
+	[BZ #150]
+	* sysdeps/generic/strtol_l.c (DEF): Use ".gnu.linkonce.r."
+	instead of ".gnu.linkonce.ro." as the prefix for linkonce
+	read-only section name.
+
+2004-06-14  Richard Henderson  <rth@redhat.com>
+
+	* sysdeps/alpha/div_libc.h (_ITOFS): Use "sp" not "$sp".
+	(_ITOFT, _FTOIT, _ITOFT2): Likewise.
+
+2004-06-08  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/s390/s390-32/backtrace.c (init): Guard with #ifdef SHARED.
+	(unwind_backtrace, unwind_getip): Define #ifndef SHARED.
+	(__backtrace): Don't call init #ifndef SHARED.
+	* sysdeps/s390/s390-64/backtrace.c (init): Guard with #ifdef SHARED.
+	(unwind_backtrace, unwind_getip): Define #ifndef SHARED.
+	(__backtrace): Don't call init #ifndef SHARED.
+	* sysdeps/ia64/backtrace.c (init): Guard with #ifdef SHARED.
+	(unwind_backtrace, unwind_getip): Define #ifndef SHARED.
+	(__backtrace): Don't call init #ifndef SHARED.
+
+2004-06-11  Roland McGrath  <roland@redhat.com>
+
+	* sysdeps/arm/strlen.S [__ARMEB__]: Compute correctly for big-endian.
+	From Krzysztof Halasa <khc@pm.waw.pl>.
+
+2004-06-10  Jakub Jelinek  <jakub@redhat.com>
+
+	* elf/tls-macros.h [__s390x__] (TLS_LD, TLS_GD): Clobber also r14.
+
+2004-06-08  Jakub Jelinek  <jakub@redhat.com>
+
+	[BZ #199]
+	* crypt/md5-crypt.c (__md5_crypt): Only update buflen if realloc
+	succeeds.  Reported by Miles Ohlrich <miles@cray.com>.
+
+	* elf/chroot_canon.c (chroot_canon): Avoid segfault if first malloc
+	fails.  Avoid memory leak if realloc fails.
+
+2004-06-09  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/generic/setenv.c (setenv): Return -1/EINVAL if name is
+	NULL, "" or contains '=' character in it.  Reported by
+	Michael T Kerrisk <mtk-lists@gmx.net>.
+	* stdlib/tst-environ.c: Include errno.h.
+	(main): Add tests for these arguments to setenv/unsetenv.
+
+2004-06-07  Roland McGrath  <roland@frob.com>
+
+	* NEWS: Update bug reporting instructions.  Fix some typos.
+
+2004-06-05  Ulrich Drepper  <drepper@redhat.com>
+
+	* stdio-common/_itoa.h: Don't expand _itoa inline for libc.
+	* stdio-common/_itoa.c: Add _itoa implementation.
+
+	* nscd/nscd_gethst_r.c (__nscd_open_socket): Change implementation
+	to also send request.  Add parameter to allow this.
+	Change callers.
+	* nscd/nscd_getgr_r.c: Change __nscd_open_socket caller.
+	* nscd/nscd_getpw_r.c: Likewise.
+	* nscd/nscd-client.h: Change __nscd_open_socket prototype.
+
+2004-06-05  Andreas Jaeger  <aj@suse.de>
+
+	* sysdeps/unix/sysv/linux/x86_64/makecontext.c (__makecontext):
+	Handle more than 6 args correctly.  Based on a patch by Masahide
+	Washizawa <washi@jp.ibm.com>.
+
+2004-06-04  Ulrich Drepper  <drepper@redhat.com>
+
+	* nscd/nscd_gethst_r.c (nscd_gethst_r): Use __nss_not_use_nscd_hosts
+	in all places, not __nss_not_use_nscd_group.
+	Reported by Philippe Gregoire.
+
+2004-06-03  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/i386/i486/bits/atomic.h: Optimize a bit.
+
+2004-05-07  Dmitry V. Levin  <ldv@altlinux.org>
+
+	* argp/argp-help.c (__argp_error, __argp_failure): Check result
+	of __asprintf call and don't use string if it failed.
+	* stdio-common/psignal.c (psignal): Likewise.
+	* locale/programs/localedef.c (more_help): Likewise.
+	* resolv/res_hconf.c (arg_service_list, arg_trimdomain_list,
+	arg_bool, parse_line): Check result of __asprintf calls and
+	don't use string if they failed.
+	* sunrpc/svc_simple.c (registerrpc, universal): Likewise.
+	* elf/ldconfig.c (parse_conf_include): Check result of __asprintf
+	call and exit if it failed.
+
+2004-05-10  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/posix/sysconf.c (__sysconf) <cases _SC_REALTIME_SIGNALS,
+	_SC_PRIORITY_SCHEDULING, _SC_TIMERS, _SC_ASYNCHRONOUS_IO,
+	_SC_PRIORITIZED_IO, _SC_SYNCHRONIZED_IO, _SC_FSYNC, _SC_MAPPED_FILES,
+	_SC_MEMLOCK, _SC_MEMLOCK_RANGE, _SC_MEMORY_PROTECTION,
+	_SC_MESSAGE_PASSING, _SC_SEMAPHORES, _SC_SHARED_MEMORY_OBJECTS,
+	_SC_THREADS, _SC_THREAD_SAFE_FUNCTIONS, _SC_THREAD_ATTR_STACKADDR,
+	_SC_THREAD_ATTR_STACKSIZE, _SC_THREAD_PRIORITY_SCHEDULING,
+	_SC_THREAD_PRIO_INHERIT, _SC_THREAD_PRIO_PROTECT,
+	_SC_THREAD_PROCESS_SHARED>: Return _POSIX_* value instead of 1.
+	* sysdeps/unix/sysv/linux/sysconf.c (__sysconf)
+	<case _SC_MONOTONIC_CLOCK>: Return _POSIX_VERSION instead of 1.
+
+2004-05-07  Jeroen Dekkers  <jeroen@dekkers.cx>
+
+	* sysdeps/mach/hurd/i386/Makefile (CFLAGS-init-first.c): Add
+	-momit-leaf-frame-pointer.
+
+	* inet/test-ifaddrs.c (addr_string): Surround AF_PACKET case with
+	#ifdef AF_PACKET.
+
+	* sysdeps/mach/hurd/getcwd.c
+	(_hurd_canonicalize_directory_name_intern): Only realloc when
+	size is <= 0.
+
+	* sysdeps/mach/hurd/mmap.c (__mmap): Fail when addr or offset
+	isn't page aligned.
+
+	* sysdeps/mach/hurd/spawni.c (EXPAND_DTABLE): Set dtablesize to
+	new size.
+
+	* sysdeps/mach/hurd/Versions (GLIBC_PRIVATE): Add __libc_read,
+	__libc_write and __libc_lseek64.
+
+2004-05-29  Roland McGrath  <roland@redhat.com>
+
+	* elf/Makefile (shared-only-routines): Add dl-caller.
+
+2004-05-28  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/powerpc/configure.in: New file.
+
+2004-05-28  Steven Munroe  <sjmunroe@us.ibm.com>
+
+	* sysdeps/powerpc/powerpc32/fpu/__longjmp-common.S: Add .machine
+	"altivec" to enable VMX instructions.
+	* sysdeps/powerpc/powerpc32/fpu/setjmp-common.S: Likewise.
+	* sysdeps/powerpc/powerpc64/__longjmp-common.S: Likewise.
+	* sysdeps/powerpc/powerpc64/setjmp-common.S: Likewise.
+	* sysdeps/unix/sysv/linux/powerpc/powerpc32/getcontext.S: Likewise.
+	* sysdeps/unix/sysv/linux/powerpc/powerpc32/setcontext.S: Likewise.
+	* sysdeps/unix/sysv/linux/powerpc/powerpc32/swapcontext.S: Likewise.
+	* sysdeps/unix/sysv/linux/powerpc/powerpc64/getcontext.S: Likewise.
+	* sysdeps/unix/sysv/linux/powerpc/powerpc64/setcontext.S: Likewise.
+	* sysdeps/unix/sysv/linux/powerpc/powerpc64/swapcontext.S: Likewise.
+
+2004-05-27  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/i386/fpu/bits/mathinline.h (__expm1_code): Define using
+	__builtin_expm1l for GCC 3.5+.
+	(__expl): Define using __builtin_expl for GCC 3.4+.
+	(exp, expf, expl): Don't define for GCC 3.4+.
+	(tan, tanf, tanl): Don't define for GCC 3.5+.
+	(__atan2l): Define using __builtin_atan2l for GCC 3.4+.
+	(atan2, atan2f, atan2l): Don't define for GCC 3.4+ or !__FAST_MATH__.
+	(fmod, fmodf, fmodl): Don't define for GCC 3.5+ or !__FAST_MATH__.
+	(fabsf, fabsl): Only provide if __USE_MISC or __USE_ISOC99.
+	(sin, sinf, sinl, cos, cosf, cosl, log, logf, logl): Don't define
+	for GCC 3.4+.
+	(log10, log10f, log10l, asin, asinf, asinl, acos, acosf, acosl):
+	Don't define for GCC 3.5+.
+	(atan, atanf, atanl): Don't define for GCC 3.4+ or !__FAST_MATH__.
+	(log1p, log1pf, log1pl, logb, logbf, logbl, log2, log2f, log2l): Don't
+	define for GCC 3.5+.
+	(drem, dremf, dreml): Don't define for GCC 3.5+ or !__FAST_MATH__.
+	* sysdeps/sparc/fpu/bits/mathinline.h (sqrt, sqrtf, sqrtl): Don't
+	define for GCC 3.2+.
+
+2004-05-27  Jakub Jelinek  <jakub@redhat.com>
+
+	* string/bits/string2.h (__bzero): Define even for GCC 3.0+.
+	* sysdeps/alpha/stpcpy.S (stpcpy): Add libc_hidden_builtin_def.
+	* sysdeps/alpha/alphaev67/stpcpy.S (stpcpy): Likewise.
+	* sysdeps/powerpc/powerpc32/stpcpy.S (stpcpy): Likewise.
+	* sysdeps/powerpc/powerpc64/stpcpy.S (stpcpy): Likewise.
+	* sysdeps/sparc/sparc32/stpcpy.S (stpcpy): Likewise.
+	* sysdeps/sparc/sparc64/stpcpy.S (stpcpy): Likewise.
+	* sysdeps/i386/stpcpy.S (stpcpy): Likewise.
+	* sysdeps/i386/i586/stpcpy.S (stpcpy): Likewise.
+	* sysdeps/generic/stpcpy.c (stpcpy): Likewise.
+	* sysdeps/x86_64/stpcpy.S (stpcpy): Likewise.
+	* sysdeps/i386/i586/memcpy.S (memcpy): Remove
+	libc_hidden_builtin_def if MEMPCPY_P.
+	* sysdeps/x86_64/memcpy.S (memcpy): Likewise.
+	* sysdeps/i386/i686/mempcpy.S (mempcpy): Add libc_hidden_builtin_def.
+	* sysdeps/i386/i586/mempcpy.S (mempcpy): Likewise.
+	* sysdeps/generic/mempcpy.c (mempcpy): Likewise.
+	* sysdeps/x86_64/mempcpy.S (mempcpy): Likewise.
+
+2004-05-26  Roland McGrath  <roland@frob.com>
+
+	* sysdeps/generic/bits/in.h
+	(IMPLINK_IP, IMPLINK_LOWEXPER, IMPLINK_HIGHEXPER): Macros removed.
+	These are long obsolete in BSD systems where they originated.
+	* conform/data/netinet/in.h-data: Remove `allow IMPLINK_*'
+
+2004-05-26  Jakub Jelinek  <jakub@redhat.com>
+
+	* include/string.h (mempcpy, stpcpy): Add libc_hidden_builtin_proto.
+	* string/bits/string2.h (memset): Disable macro for GCC 3.0+.
+	(__mempcpy): Use __builtin_mempcpy for GCC 3.4+.
+	(strchr): For GCC 3.2+, only use __rawmemchr if second argument is
+	constant '\0' and first argument is not constant.
+	(__stpcpy): Use __builtin_stpcpy for GCC 3.4+.
+	(strncpy): Remove #ifdef _USE_STRING_ARCH_mempcpy variant.
+	For GCC 3.2+ use __builtin_strncpy.
+	(strncat): For GCC 3.2+ use __builtin_strncat.
+	(strcmp): For GCC 3.2+ use __builtin_strcmp if both arguments are
+	constant.
+	(strcspn, strspn, strpbrk): For GCC 3.2+, use builtin function
+	if both arguments are constant.
+
+2004-05-26  Ulrich Drepper  <drepper@redhat.com>
+
+	* nss/nss_files/files-hosts.c: Fix condition for looking up IPv4
+	mapped addresses in gethostbyaddr.
+
+2004-05-25  Ulrich Drepper  <drepper@redhat.com>
+
+	* nss/digits_dots.c (__nss_hostname_digits_dots): Remove typep and
+	flags parameter, convert afp to simple int parameter.  Adjust code.
+	typep was never != NULL and flags therefore also unused.  *afp is
+	never modified.
+	* nss/nsswitch.h: Adjust __nss_hostname_digits_dots prototype.
+	* nss/getXXbyYY.c: Remove HAVE_TYPE handling.  Adjust af parameter
+	handling for __nss_hostname_digits_dots calls.
+	* nss/getXXbyYY_r.c: Likewise.
+
+	* elf/dl-load.c (_dl_map_object_from_fd): Map DSOs with MAP_DENYWRITE.
+
+2004-05-25  Steven Munroe  <sjmunroe@us.ibm.com>
+
+	* sysdeps/powerpc/fpu/Makefile: Make ld.so a dependency of libm.so.
+	* sysdeps/powerpc/fpu/bits/mathinline.h [__LIBC_INERNAL_MATH_INLINES]
+	(__ieee754_sqrt): Define as __MATH_INLINE using fsqrt instruction.
+	(__ieee754_sqrtf): Define as __MATH_INLINE using fsqrts instruction.
+	* sysdeps/powerpc/fpu/e_sqrt.c (__slow_ieee754_sqrt): Moved
+	implementation from w_sqrt.c.
+	* sysdeps/powerpc/fpu/e_sqrtf.c (__slow_ieee754_sqrtf): Moved
+	implementation from w_sqrtf.c.
+	* sysdeps/powerpc/fpu/w_sqrt.c (__sqrt): Wrapper implementation
+	using inline __ieee754_sqrt().
+	* sysdeps/powerpc/fpu/w_sqrtf.c (__sqrtf): Wrapper implementation
+	using inline __ieee754_sqrtf().
+	* sysdeps/powerpc/powerpc32/sysdep.h [__ASSEMBLER__]: Include
+	<sysdeps/powerpc/sysdep.h> independent of __ASSEMBLER__.
+	* sysdeps/powerpc/sysdep.h [__ASSEMBLER__] (PPC_FEATURE_*): Define
+	PPC_FEATURE_*  independent of __ASSEMBLER__.
+
+2004-05-25  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/pthread/aio_notify.c: Use <> instead of "" for aio_misc.h
+	include.
+	(aio_start_notify_thread): Define if not defined.
+	(notify_func_wrapper): Use it.
+	* sysdeps/pthread/aio_misc.c: Use <> instead of "" for aio_misc.h
+	include.
+	(aio_create_helper_thread): Define if not defined.
+	(__aio_create_helper_thread): New function.
+	(__aio_enqueue_request): Use aio_create_helper_thread.
+
+	* nis/ypclnt.c (ypall_data, ypall_foreach): Remove.
+	(struct ypresp_all_data): New type.
+	(__xdr_ypresp_all): Change second argument to
+	struct ypresp_all_data *.  Replace ypall_foreach and
+	ypall_data with objp->foreach and objp->data.
+	(yp_all): Remove status variable, add data.  Replace
+	all uses of status with data.status.  Initialize data.foreach
+	and data.data instead of ypall_foreach and ypall_data.
+
+2004-05-24  Jakub Jelinek  <jakub@redhat.com>
+
+	* elf/dl-lookup.c (add_dependency): Set DF_1_NODELETE bit
+	in l_flags_1, not in l_flags.
+
+2004-04-10  Robert Millan  <robertmh@gnu.org>
+
+	* sysdeps/unix/sysv/linux/bits/in.h: Cosmetic fixes to get in sync
+	with sysdeps/generic/bits/in.h.
+
+2004-05-25  Roland McGrath  <roland@frob.com>
+
+	* sysdeps/generic/unwind-dw2-fde-glibc.c: Change copyright terms from
+	GCC GPL to standard glibc LGPL.
+
+2004-05-24  Ulrich Drepper  <drepper@redhat.com>
+
+	* manual/string.texi (Copying and Concatenation): Fixed second
+	concat example.
+	Reported by Fabian Pietsch <fabian@zzznowman.dyndns.org>.
+
+2004-05-23  Ulrich Drepper  <drepper@redhat.com>
+
+	* malloc/obstack.c: Don't allow linking with _obstack.
+
+2004-05-23  Andreas Schwab  <schwab@suse.de>
+
+	* sysdeps/m68k/m68020/bits/atomic.h: Use "+m" constraint instead
+	of separate "m" constraints.
+
+2004-05-15  Chris Demetriou  <cgd@broadcom.com>
+
+	* sysdeps/mips/dl-machine.h (ELF_DL_FRAME_SIZE)
+	(ELF_DL_SAVE_ARG_REGS, ELF_DL_RESTORE_ARG_REGS): For the N32
+	and 64 ABIs, save and restore regs $10 and $11 (a6 and a7).
+
+2004-05-20  Paul Eggert  <eggert@cs.ucla.edu>
+
+	* malloc/obstack.c (_obstack) [!defined _LIBC]: Remove; not used.
+	Add comment explaining why libc still defines it.
+
+2004-05-19  Paul Eggert  <eggert@cs.ucla.edu>
+
+	* malloc/obstack.h (_obstack_free, obstack_1grow, obstack_1grow_fast,
+	obstack_alignment_mask, obstack_alloc, obstack_base,
+	obstack_blank, obstack_blank_fast, obstack_chunk_size,
+	obstack_copy, obstack_copy0, obstack_finish, obstack_grow,
+	obstack_grow0, obstack_init, obstack_int_grow,
+	obstack_int_grow_fast, obstack_make_room, obstack_memory_used,
+	obstack_next_free, obstack_object_size, obstack_ptr_grow,
+	obstack_ptr_grow_fast, obstack_room): Remove declarations of
+	nonexistent functions.
+
+2004-05-18  Steven Munroe  <sjmunroe@us.ibm.com>
+
+	* sysdeps/powerpc/powerpc64/sysdep.h [__ASSEMBLER__]: Include
+	<sysdeps/powerpc/sysdep.h> independent of __ASSEMBLER__.
+	[PIC]: Redundent for powerpc64, removed.
+	(ENTRY): Generate size and alignment for opd entry.
+	(EALIGN): Generate size and alignment for opd entry.
+	(END): Use DOT_LABEL in ASM_SIZE_DIRECTIVE.
+	(END_GEN_TB): Use DOT_LABEL in ASM_SIZE_DIRECTIVE.
+
+2004-05-19  Ulrich Drepper  <drepper@redhat.com>
+
+	* misc/regexp.h: Say the functions have been withdrawn.
+
+	* wcsmbs/tst-wcpncpy.c: Add more tests.
+
+2004-05-18  Roland McGrath  <roland@frob.com>
+
+	* sysdeps/generic/unwind.h: Change copyright terms from GCC GPL to
+	standard glibc LGPL.
+	* sysdeps/generic/unwind-dw2.c: Likewise.
+	* sysdeps/generic/unwind-dw2-fde.c: Likewise.
+	* sysdeps/generic/unwind-dw2-fde.h: Likewise.
+	* sysdeps/generic/unwind-pe.h: Likewise.
+
+2004-05-15  Petter Reinholdtsen  <pere@hungry.com>
+
+	* locale/iso-3166.def: Remove YUGOSLAVIA and insert "SERBIA AND
+	MONTENEGRO" which have taken over the code 819.  Patch from
+	Danilo Segan. [BZ #40]
+
+2004-05-15  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/unix/sysv/linux/sparc/sparc32/sysdep.h
+	(SYSCALL_ERROR_HANDLER): Rename __sparc.get_pic.l7 to
+	__sparc_get_pic_l7.
+
+2004-05-15  Joseph S. Myers  <jsm@polyomino.org.uk>
+
+	* catgets/gencat.c: Update bug reporting instructions.
+	* csu/version.c: Likewise.
+	* debug/catchsegv.sh: Likewise.
+	* debug/pcprofiledump.c: Likewise.
+	* debug/xtrace.sh: Likewise.
+	* elf/ldd.bash.in: Likewise.
+	* iconv/iconv_prog.c: Likewise.
+	* iconv/iconvconfig.c: Likewise.
+	* locale/programs/locale.c: Likewise.
+	* locale/programs/localedef.c: Likewise.
+	* login/programs/pt_chown.c: Likewise.
+	* malloc/memusage.sh: Likewise.
+	* malloc/memusagestat.c: Likewise.
+	* malloc/mtrace.pl: Likewise.
+	* manual/crypt.texi: Likewise.
+	* manual/install.texi: Likewise.
+	* nss/makedb.c: Likewise.
+
+2004-05-14  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/sparc/sparc32/dl-machine.h (elf_machine_rela): Only
+	CHECK_STATIC_TLS if sym != NULL.
+	* sysdeps/sh/dl-machine.h (elf_machine_rela): Likewise.
+	* sysdeps/i386/dl-machine.h (elf_machine_rela): Likewise.
+
+2004-05-12  Andreas Schwab  <schwab@suse.de>
+
+	* posix/regex_internal.c (build_wcs_buffer): Also set pstr->mbs
+	when translating.
+
+2004-05-13  H.J. Lu  <hongjiu.lu@intel.com>
+
+	* Rules (xtests): Depend on tests.
+
+2004-05-13  Jakub Jelinek  <jakub@redhat.com>
+
+	* libio/genops.c (_IO_default_xsputn): Avoid one overflow per char if
+	count is negative, yet write_ptr < write_end.
+	(_IO_default_xsgetn): Avoid one underflow per char if count is
+	negative, yet read_ptr < read_end.
+
+2004-05-12  Steven Munroe  <sjmunroe@us.ibm.com>
+
+	* sysdeps/unix/sysv/linux/powerpc/bits/termios.h (XTABS): Define XTABS
+	equal to TAB3.
+
+2004-05-12  Ulrich Drepper  <drepper@redhat.com>
+
+	* elf/dl-lookup.c (_dl_lookup_symbol_x): Correct _dl_signal_cerror
+	call.
+
+2004-05-10  Roland McGrath  <roland@frob.com>
+
+	* hurd/hurdlookup.c (__hurd_file_name_split): Return ENOENT for "".
+	(__hurd_directory_name_split): Likewise.
+
+2004-05-10  Ulrich Drepper  <drepper@redhat.com>
+
+	* po/fr.po: Update from translation team.
+
+2004-05-10  Andreas Jaeger  <aj@suse.de>
+
+	* sysdeps/unix/sysv/linux/x86_64/makecontext.c (__makecontext):
+	Correctly compute alignment.
+	Patch by Michael Matz <matz@suse.de>.
+
+2004-05-09  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/generic/sigpause.c: Prevent sigpause prototype.
+	* sysdeps/posix/sigpause.c: Likewise.
+	* signal/signal.h: Don't define sigpause macro unless needed.
+
+2004-05-08  Jakub Jelinek  <jakub@redhat.com>
+
+	* configure.in (libc_cv_libgcc_s_suffix): New check.
+	(libc_cv_as_needed): Use -lgcc_s$libc_cv_libgcc_s_suffix.
+	* config.make.in (libgcc_s_suffix): Set.
+	* Makeconfig (libgcc_eh): Use -lgcc_s$(libgcc_s_suffix).
+
+2004-05-08  Ulrich Drepper  <drepper@redhat.com>
+
+	* signal/signal.h: Use BSD sigpause only if BSD behavior is preferred.
+
+2004-04-29  Steven Munroe  <sjmunroe@us.ibm.com>
+
+	* sysdeps/unix/sysv/linux/powerpc/powerpc32/sysdep.h (LOADARGS_1,
+	LOADARGS_2, LOADARGS_3, LOADARGS_4, LOADARGS_5, LOADARGS_6):
+	Load argument values into temporary variables.
+	* sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep.h (LOADARGS_1,
+	LOADARGS_2, LOADARGS_3, LOADARGS_4, LOADARGS_5, LOADARGS_6):
+	Likewise.
+
+2004-05-07  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/unix/sysv/linux/Makefile: mq_send.c and mq_receive.c
+	need to be compiled with exceptions.
+
+2004-05-06  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/unix/sysv/linux/ifreq.c (__ifreq): Fix memory handling.
+	* sysdeps/generic/ifreq.c (__ifreq): Fix memory handling.
+
+	* resolv/res_hconf.c (_res_hconf_reorder_addrs): Make clear that
+	realloc cannot fail.
+
+	* nss/nss_files/files-netgrp.c (EXPAND): Free buffer which cannot
+	be expanded.
+
+	* nis/nis_table.c: Clean up memory handling.
+	* nis/nis_subr.c (nis_getnames): Clean up memory handling.
+	* nis/nis_removemember.c (nis_removemember): Add comment
+	explaining use of realloc.
+
+	* math/tgmath.h (fabs): Use __TGMATH_UNARY_REAL_IMAG_RET_REAL.
+	(carg): Likewise.
+	Patch by Lev S Bishop <lev.bishop@yale.edu>.
+
+	* math/bug-tgmath1.c (main): Test fabs and carg as well.
+
+2004-05-06  Richard Henderson  <rth@redhat.com>
+
+	* elf/elf.h (AT_L1I_CACHESHAPE, AT_L1D_CACHESHAPE,
+	AT_L2_CACHESHAPE, AT_L3_CACHESHAPE): New.
+	* sysdeps/unix/sysv/linux/alpha/Versions: Export
+	__libc_alpha_cache_shape as a private symbol.
+	* sysdeps/unix/sysv/linux/alpha/dl-sysdep.c: New file.
+	* sysdeps/unix/sysv/linux/alpha/sysconf.c: New file.
+
+2004-05-06  Richard Henderson  <rth@redhat.com>
+
+	* sysdeps/alpha/div_libc.h (_ITOFS, _ITOFT, _FTOIT, _ITOFT2): New.
+	* sysdeps/alpha/divl.S: Use them.
+	* sysdeps/alpha/divq.S: Likewise.
+	* sysdeps/alpha/divqu.S: Likewise.
+	* sysdeps/alpha/reml.S: Likewise.
+	* sysdeps/alpha/remq.S: Likewise.
+	* sysdeps/alpha/remqu.S: Likewise.
+
+2004-05-06  Ulrich Drepper  <drepper@redhat.com>
+
+	* math/tgmath.h (__TGMATH_UNARY_REAL_IMAG_RET_REAL):Define.
+	(cimag): Use it.
+	(creal): Likewise.
+	* math/Makefile (tests): Add bug-tgmath1.
+	* math/bug-tgmath1.c: New file.
+
+2004-05-05  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/alpha/atomicity.h: Remove.
+	* sysdeps/generic/atomicity.h: Remove.
+
+2004-05-05  H.J. Lu  <hongjiu.lu@intel.com>
+
+	* sysdeps/i386/i486/bits/string.h (strpbrk): Cast return to
+	char *.
+
+2004-04-22  David Mosberger  <davidm@hpl.hp.com>
+
+	* sysdeps/unix/sysv/linux/ia64/Makefile (librt-routines): Mention
+	  rt-sysdep.
+	* sysdeps/unix/sysv/linux/ia64/rt-sysdep.S: New file.
+
+	* sysdeps/ia64/strcat.c: New file.
+	* sysdeps/ia64/strcat.S: Delete.
+
+2004-05-04  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/unix/sysv/linux/ia64/sysdep.S (USE___THREAD): Remove
+	defined.
+
+2004-04-23  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/ia64/fpu/libm_support.h (__libm_error_support): Add
+	libc_hidden_proto.  Define to __GI___libm_error_support for
+	assembly going into libc.so.
+	* sysdeps/ia64/fpu/libc_libm_error.c (__libm_error_support): Add
+	libc_hidden_def.
+
+	* include/libc-symbols.h (HIDDEN_BUILTIN_JUMPTARGET): Define.
+	* sysdeps/ia64/bcopy.S (bcopy): Use it for jump to memmove.
+
+	* sysdeps/unix/sysv/linux/ia64/sysdep.S (__syscall_error): Access
+	gprel errno if RTLD_PRIVATE_ERRNO or __thread __libc_errno/errno
+	if USE___THREAD.
+
+2004-05-03  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/unix/bsd/bits/fcntl.h (F_SETOWN, F_GETOWN): Define if
+	__USE_BSD or __USE_UNIX98.
+	* sysdeps/unix/bsd/ultrix4/bits/fcntl.h (F_SETOWN, F_GETOWN): Likewise.
+	* sysdeps/unix/bsd/bsd4.4/bits/fcntl.h (F_SETOWN, F_GETOWN): Likewise.
+	* sysdeps/unix/bsd/sun/sunos4/bits/fcntl.h (F_SETOWN, F_GETOWN):
+	Likewise.
+	* sysdeps/unix/common/bits/fcntl.h (F_SETOWN, F_GETOWN): Likewise.
+	* sysdeps/unix/sysv/aix/bits/fcntl.h (F_SETOWN, F_GETOWN): Likewise.
+	* sysdeps/unix/sysv/irix4/bits/fcntl.h (F_SETOWN, F_GETOWN): Likewise.
+	* sysdeps/unix/sysv/linux/alpha/bits/fcntl.h (F_SETOWN, F_GETOWN):
+	Likewise.
+	* sysdeps/unix/sysv/linux/s390/bits/fcntl.h (F_SETOWN, F_GETOWN):
+	Likewise.
+	* sysdeps/unix/sysv/linux/cris/bits/fcntl.h (F_SETOWN, F_GETOWN):
+	Likewise.
+	* sysdeps/unix/sysv/linux/powerpc/bits/fcntl.h (F_SETOWN, F_GETOWN):
+	Likewise.
+	* sysdeps/unix/sysv/linux/sparc/bits/fcntl.h (F_SETOWN, F_GETOWN):
+	Likewise.
+	* sysdeps/unix/sysv/linux/mips/bits/fcntl.h (F_SETOWN, F_GETOWN):
+	Likewise.
+	* sysdeps/unix/sysv/linux/sh/bits/fcntl.h (F_SETOWN, F_GETOWN):
+	Likewise.
+	* sysdeps/unix/sysv/linux/i386/bits/fcntl.h (F_SETOWN, F_GETOWN):
+	Likewise.
+	* sysdeps/unix/sysv/linux/m68k/bits/fcntl.h (F_SETOWN, F_GETOWN):
+	Likewise.
+	* sysdeps/unix/sysv/linux/ia64/bits/fcntl.h (F_SETOWN, F_GETOWN):
+	Likewise.
+	* sysdeps/unix/sysv/linux/arm/bits/fcntl.h (F_SETOWN, F_GETOWN):
+	Likewise.
+	* sysdeps/unix/sysv/linux/hppa/bits/fcntl.h (F_SETOWN, F_GETOWN):
+	Likewise.
+	* sysdeps/unix/sysv/linux/x86_64/bits/fcntl.h (F_SETOWN, F_GETOWN):
+	Likewise.
+	* sysdeps/generic/bits/fcntl.h (F_SETOWN, F_GETOWN): Likewise.
+	* sysdeps/mach/hurd/bits/fcntl.h (F_SETOWN, F_GETOWN): Likewise.
+	* io/sys/stat.h (S_ISSOCK, S_IFSOCK): Likewise.
+
+2004-05-01  Jakub Jelinek  <jakub@redhat.com>
+
+	* posix/Versions (libc): Remove __libc_wait, __libc_waitpid,
+	__libc_pause, __libc_nanosleep, __libc_pread, __libc_pread64,
+	__libc_pwrite64, __waitid and __pselect @@GLIBC_PRIVATE.
+	* stdlib/Versions (libc): Remove __on_exit@@GLIBC_PRIVATE.
+	* sysdeps/unix/sysv/linux/Versions (libc): Remove
+	__libc_sigaction@@GLIBC_PRIVATE.
+	* sysdeps/unix/sysv/linux/x86_64/Versions (libc): Remove
+	__modify_ldt@@GLIBC_PRIVATE.
+	* socket/Versions (libc): Remove __libc_accept, __libc_send,
+	__libc_recvfrom, __libc_recvmsg, __libc_sendmsg, __libc_recv,
+	__libc_sendto and __libc_connect @@GLIBC_PRIVATE.
+	* stdio-common/Versions (libc): Remove
+	_itoa_upper_digits@@GLIBC_PRIVATE.
+	* resolv/Versions (libresolv): Remove __ns_samename@@GLIBC_PRIVATE.
+	* misc/Versions (libc): Remove __libc_fsync, __libc_msync,
+	__libc_readv and __libc_writev @@GLIBC_PRIVATE.
+	* termios/Versions (libc): Remove __libc_tcdrain@@GLIBC_PRIVATE.
+	* io/Versions (libc): Remove __libc_open, __libc_close, __libc_read,
+	__libc_write, __libc_lseek, __libc_fcntl, __libc_open64 and
+	__libc_lseek64 @@GLIBC_PRIVATE.
+
+2004-04-30  Jakub Jelinek  <jakub@redhat.com>
+
+	* elf/dl-load.c (open_verify): Move e_phentsize check after e_type
+	check.
+
+2004-04-29  Steven Munroe  <sjmunroe@us.ibm.com>
+
+	* sysdeps/powerpc/powerpc64/fpu/s_ceil.S: New file.
+	* sysdeps/powerpc/powerpc64/fpu/s_ceilf.S: New file.
+	* sysdeps/powerpc/powerpc64/fpu/s_floor.S: New file.
+	* sysdeps/powerpc/powerpc64/fpu/s_floorf.S: New file.
+	* sysdeps/powerpc/powerpc64/fpu/s_llrint.c: Removed.
+	* sysdeps/powerpc/powerpc64/fpu/s_llrint.S: New file.
+	* sysdeps/powerpc/powerpc64/fpu/s_llrintf.c: Removed.
+	* sysdeps/powerpc/powerpc64/fpu/s_llrintf.S: New file.
+	* sysdeps/powerpc/powerpc64/fpu/s_llround.S: New file.
+	* sysdeps/powerpc/powerpc64/fpu/s_llroundf.S: New file.
+	* sysdeps/powerpc/powerpc64/fpu/s_lround.S: New file.
+	* sysdeps/powerpc/powerpc64/fpu/s_lroundf.S: New file.
+	* sysdeps/powerpc/powerpc64/fpu/s_rint.S: New file.
+	* sysdeps/powerpc/powerpc64/fpu/s_rintf.S: New file.
+	* sysdeps/powerpc/powerpc64/fpu/s_round.S: New file.
+	* sysdeps/powerpc/powerpc64/fpu/s_roundf.S: New file.
+	* sysdeps/powerpc/powerpc64/fpu/s_trunc.S: New file.
+	* sysdeps/powerpc/powerpc64/fpu/s_truncf.S: New file.
+
+	* math/test-misc.c [LDBL_MANT_DIG == 106](main): Correct LDBL_MAX
+	mantissa for AIX long double format.
+	* misc/qefgcvt.c [LDBL_MANT_DIG == 106] (NDIGIT_MAX): Define for AIX
+	long double format.
+	* misc/qefgcvt_r.c [LDBL_MANT_DIG == 106] (NDIGIT_MAX): Likewise.
+	* stdlib/fpioconst.c [!__NO_LONG_DOUBLE_MATH && __LDBL_MAX_EXP__>1024]
+	(_fpioconst_pow10): AIX long double format has same exponent range as
+	double.
+	* stdlib/fpioconst.h [!__NO_LONG_DOUBLE_MATH && __LDBL_MAX_EXP__>1024]
+	(LDBL_MAX_10_EXP_LOG): AIX long double format has same exponent range
+	as double.
+
+2004-04-23  Art Haas  <ahaas@airmail.net>
+
+	* sysdeps/unix/sysv/linux/kernel-features.h: Add 'defined'.
+
+2004-04-28  Carlos O'Donell  <carlos@baldric.uwo.ca>
+
+	* sysdeps/unix/sysv/linux/mq_getattr.c: Include <stddef.h>.
+	* sysdeps/unix/sysv/linux/mq_notify.c: Likewise.
+	* sysdeps/unix/sysv/linux/mq_open.c: Likewise.
+	* sysdeps/unix/sysv/linux/mq_receive.c: Likewise.
+	* sysdeps/unix/sysv/linux/mq_send.c: Likewise.
+
+2004-04-29  Philip Blundell  <pb@nexus.co.uk>
+
+	* sysdeps/arm/dl-machine.h (RTLD_START): Avoid unnecessary GOT
+	entries.
+
+2004-04-29  Jakub Jelinek  <jakub@redhat.com>
+
+	* manual/resource.texi (sched_setaffinity, sched_getaffinity): Fix
+	prototypes and description [BZ #131].
+
+	* string/bits/string2.h (strpbrk): Cast NULL to char * [BZ #130].
+	Patch by Ed Catmur <ed@catmur.co.uk>.
+	* string/tst-inlcall.c (main): Add test for strpbrk.
+
+	[BZ #140]
+	* sysdeps/unix/sysv/linux/sys/sysctl.h: Remove linux/compiler.h
+	include.
+	(_LINUX_KERNEL_H, _LINUX_TYPES_H, _LINUX_LIST_H): Only define if not
+	yet defined, #undef back after including linux/sysctl.h if defined
+	here.
+	(__LINUX_COMPILER_H, __user): Define if not yet defined, #undef
+	back after including linux/sysctl.h if defined here.
+
+	* sysdeps/sparc/sparc64/soft-fp/qp_qtoi.c (_Qp_qtoi): Use %f31
+	for single precision register, add it to __asm clobbers [BZ #139].
+	* sysdeps/sparc/sparc64/soft-fp/qp_qtoui.c (_Qp_qtoui): Use %f31
+	for single precision register, add it to __asm clobbers.
+	* sysdeps/sparc/sparc64/soft-fp/qp_qtoux.c (_Qp_qtoux): Use fqtox
+	instead of fqtoi in QP_HANDLE_EXCEPTIONS.
+	* sysdeps/sparc/sparc64/soft-fp/qp_qtox.c (_Qp_qtox): Likewise.
+	Reported by M. H. VanLeeuwen <vanl@megsinet.net>.
+
+2004-04-23  Andreas Schwab  <schwab@suse.de>
+
+	* sysdeps/unix/sysv/linux/m68k/register-dump.h: Use
+	__attribute_used__.
+
+2004-04-22  Philip Blundell  <philb@gnu.org>
+
+	* sysdeps/arm/dl-machine.h (elf_machine_rela): Don't use INTUSE
+	when calling _dl_signal_error.
+	(elf_machine_rel): Likewise.
+
+2004-04-21  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/unix/sysv/linux/ia64/setjmp.S: Define _GI___sigsetjmp
+	and use it internally instead of __sigsetjmp.
+	* sysdeps/ia64/fpu/s_frexpf.c (frexpf): Use _GI___libm_frexp_4f.
+	* sysdeps/ia64/fpu/s_frexpl.c (frexpl): Use _GI___libm_frexp_4l.
+	* sysdeps/ia64/fpu/libm_frexp4.S: Define _GI___libm_frexp_4.
+	* sysdeps/ia64/fpu/libm_frexp4f.S: Define _GI___libm_frexp_4f.
+	* sysdeps/ia64/fpu/libm_frexp4l.S: Define _GI___libm_frexp_4l.
+	* sysdeps/ia64/fpu/s_frexp.c (frexp): Use _GI___libm_frexp_4.
+	* sysdeps/ia64/fpu/libm_support.h: Declare _GI___libm_frexp_4,
+	_GI___libm_frexp_4f, _GI___libm_frexp_4l.
+	* sysdeps/ia64/fpu/bits/mathinline.h: New file.
+	* sysdeps/unix/sysv/linux/ia64/__start_context.S: Use
+	HIDDEN_JUMPTARGET for exit call.
+	* sysdeps/unix/sysv/linux/ia64/clone2.S: Use HIDDEN_JUMPTARGET for
+	_exit call.
+	* sysdeps/ia64/bcopy.S: Use HIDDEN_JUMPTARGET for memmove call.
+	* sysdeps/ia64/strcat.S: Use HIDDEN_JUMPTARGET for strlen and
+	strcpy calls.
+
+2004-04-21  Jakub Jelinek  <jakub@redhat.com>
+
+	* posix/tst-chmod.c (do_test): Fix a typo.
+
+	* elf/lateglobal.c (main): Fix error checks.
+	Patch by Stephen Clarke <stephen.clarke@st.com>.
+
+	* manual/ctype.texi (isblank, iswblank): Mark as ISO functions,
+	mention they have been added in ISO C99.
+	Reported by Ben Pfaff <blp@cs.stanford.edu>.
+
+2004-03-31  H.J. Lu  <hongjiu.lu@intel.com>
+
+	* sysdeps/ieee754/ldbl-128/bits/huge_vall.h: Fix typo.
+
+2004-04-20  Jakub Jelinek  <jakub@redhat.com>
+
+	* sysdeps/unix/sysv/linux/shm_open.c (shm_unlink): Change EPERM into
+	EACCES.
+
+2004-04-20  Jakub Jelinek  <jakub@redhat.com>
+
+	* stdio-common/vfscanf.c (_IO_vfscanf): Revert last %% whitespace
+	handling change.
+	* stdio-common/tst-sscanf.c (int_tests): Adjust.
+
+	* nis/nss-nis.c: Include stdlib.h.
+
+	* sysdeps/sparc/sparc32/dl-machine.h (elf_machine_rela): Shut up a
+	warning.
+	* sysdeps/sparc/sparc64/memcmp.S (memcmp): Remove BP_SYM () from
+	libc_hidden_builtin_def.
+
+2004-04-20  Jim Meyering  <jim@meyering.net>
+
+	* misc/error.c (error_tail): Don't leak upon realloc failure.
+
+2004-04-20  Martin Schwidefsky  <schwidefsky@de.ibm.com>
+
+	* sysdeps/unix/sysv/linux/dl-execstack.c (_dl_make_stack_executable):
+	Use RETURN_ADDRESS instead of __builtin_return_address.
+
+2004-04-19  Ulrich Drepper  <drepper@redhat.com>
+
+	* sysdeps/unix/sysv/linux/mq_unlink.c: Rewrite to produce more
+	compact code.
+
+2004-04-20  Jakub Jelinek  <jakub@redhat.com>
+
+	* stdio-common/vfscanf.c (_IO_vfscanf): When skipping whitespace,
+	do input_error () instead of conv_error () and don't look at errno.
+	Don't eat any whitespace before %% if skip_space == 0.
+	* stdio-common/tst-sscanf.c (int_tests): New array.
+	(main): Run int_tests.
+
+
+See ChangeLog.14 for earlier changes.
diff --git a/Makeconfig b/Makeconfig
index 4c3a4e344c..d267e5af21 100644
--- a/Makeconfig
+++ b/Makeconfig
@@ -1,4 +1,4 @@
-# Copyright (C) 1991-2003, 2004, 2005 Free Software Foundation, Inc.
+# Copyright (C) 1991-2003, 2004 Free Software Foundation, Inc.
 # This file is part of the GNU C Library.
 
 # The GNU C Library is free software; you can redistribute it and/or
@@ -480,21 +480,32 @@ default-rpath = $(libdir)
 endif
 
 ifndef link-extra-libs
-link-extra-libs = $(LDLIBS-$(@F))
-link-extra-libs-static = $(link-extra-libs)
-link-extra-libs-bounded = $(link-extra-libs)
+ifeq (yes,$(build-shared))
+ifneq ($(common-objpfx),$(objpfx))
+link-extra-libs = $(foreach lib,$(LDLIBS-$(@F)),\
+	$(wildcard $(common-objpfx)$(lib).so$($(notdir $(lib)).so-version) \
+		   $(objpfx)$(lib).so$($(notdir $(lib)).so-version)))
+else
+link-extra-libs = $(foreach lib,$(LDLIBS-$(@F)),$(common-objpfx)$(lib).so$($(notdir $(lib)).so-version))
+endif
+else
+link-extra-libs = $(foreach lib,$(LDLIBS-$(@F)),$(common-objpfx)$(lib).a)
+endif
 endif
 
 # The static libraries.
 ifeq (yes,$(build-static))
 link-libc-static = $(common-objpfx)libc.a $(static-gnulib) $(common-objpfx)libc.a
+link-extra-libs-static = $(foreach lib,$(LDLIBS-$(@F)),$(common-objpfx)$(lib).a)
 else
 ifeq (yes,$(build-shared))
 # We can try to link the programs with lib*_pic.a...
 link-libc-static = $(static-gnulib) $(common-objpfx)libc_pic.a
+link-extra-libs-static = $(link-extra-libs)
 endif
 endif
 link-libc-bounded = $(common-objpfx)libc_b.a $(gnulib) $(common-objpfx)libc_b.a
+link-extra-libs-bounded = $(foreach lib,$(LDLIBS-$(@F:%-bp=%)),$(common-objpfx)$(lib)_b.a)
 
 ifndef gnulib
 ifneq ($(have-cc-with-libunwind),yes)
@@ -787,16 +798,11 @@ soversions-default-setname = $(patsubst %, %,\
 $(common-objpfx)soversions.i: $(..)scripts/soversions.awk \
 			      $(common-objpfx)shlib-versions.v
 	$(AWK) -v default_setname='$(soversions-default-setname)' \
-	       -v cpu='$(config-machine)' \
-	       -v vendor='$(config-vendor)' \
-	       -v os='$(config-os)' \
+	       -v config='$(config-machine)-$(config-vendor)-$(config-os)' \
 	       -f $^ > $@T
 	mv -f $@T $@
-$(common-objpfx)soversions.mk: $(common-objpfx)soversions.i $(..)Makeconfig
-	(seen_DEFAULT=0; seen_WORDSIZE32=0; seen_WORDSIZE64=0; \
-	 while read which lib number setname; do \
-	   eval seen_$$which=1; \
-	   test x"$$which" = xDEFAULT || continue; \
+$(common-objpfx)soversions.mk: $(common-objpfx)soversions.i
+	(while read lib number setname; do \
 	   case $$number in \
 	     [0-9]*) echo "$$lib.so-version=.$$number"; \
 		     echo "all-sonames+=$$lib=$$lib.so\$$($$lib.so-version)";;\
@@ -804,12 +810,6 @@ $(common-objpfx)soversions.mk: $(common-objpfx)soversions.i $(..)Makeconfig
 		     echo "all-sonames+=$$lib=\$$($$lib.so-version)";;\
 	   esac; \
 	 done; \
-	 case "$$seen_DEFAULT$$seen_WORDSIZE32$$seen_WORDSIZE64" in \
-	   100) echo biarch = no;; \
-	   101) echo biarch = 32;; \
-	   ?1?) echo biarch = 64;; \
-	   *) echo >&2 BUG; exit 2;; \
-	 esac; \
 	 echo soversions.mk-done = t;) < $< > $@T; exit 0
 	mv -f $@T $@
 endif
@@ -823,11 +823,31 @@ postclean-generated += soversions.mk soversions.i \
 before-compile += $(common-objpfx)gnu/lib-names.h
 ifeq ($(soversions.mk-done),t)
 $(common-objpfx)gnu/lib-names.h: $(common-objpfx)gnu/lib-names.stmp; @:
-$(common-objpfx)gnu/lib-names.stmp: $(..)scripts/lib-names.awk \
-				    $(common-objpfx)soversions.i
+$(common-objpfx)gnu/lib-names.stmp: $(common-objpfx)soversions.mk
 	$(make-target-directory)
 	@rm -f ${@:stmp=T} $@
-	$(AWK) -f $^ > ${@:stmp=T}
+	(echo '/* This file is automatically generated.';\
+	 echo '   It defines macros to allow user program to find the shared';\
+	 echo '   library files which come as part of GNU libc.  */';\
+	 echo '#ifndef __GNU_LIB_NAMES_H'; \
+	 echo '#define __GNU_LIB_NAMES_H	1'; \
+	 echo; \
+	 (libs='$(all-sonames)';\
+	  for l in $$libs; do \
+	    name=`echo $$l | sed 's/.*=//'`; \
+	    upname=`echo $$l | sed 's/=.*//' | \
+		    tr 'abcdefghijklmnopqrstuvwxyz-' \
+		       'ABCDEFGHIJKLMNOPQRSTUVWXYZ_'`; \
+	    upname2=`echo $$name | sed 's/[.]so.*//' | \
+		     tr 'abcdefghijklmnopqrstuvwxyz-' \
+		        'ABCDEFGHIJKLMNOPQRSTUVWXYZ_'`; \
+	    echo "#define	$${upname}_SO	\"$$name\""; \
+	    if test $$upname != $$upname2; then \
+	      echo "#define	$${upname2}_SO	\"$$name\""; \
+	    fi; \
+	  done;) | sort; \
+	 echo; \
+	 echo '#endif	/* gnu/lib-names.h */';) > ${@:stmp=T}
 	$(move-if-change) ${@:stmp=T} ${@:stmp=h}
 	touch $@
 endif
diff --git a/Makefile b/Makefile
index dc7a6f177a..50ee40c26a 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-# Copyright (C) 1991-2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+# Copyright (C) 1991-2002, 2003, 2004 Free Software Foundation, Inc.
 # This file is part of the GNU C Library.
 
 # The GNU C Library is free software; you can redistribute it and/or
@@ -64,7 +64,7 @@ endif # $(AUTOCONF) = no
 		   subdir_update-abi subdir_check-abi 			\
 		   subdir_echo-headers 					\
 		   subdir_install					\
-		   subdir_objs subdir_stubs subdir_testclean		\
+		   subdir_testclean					\
 		   $(addprefix install-, no-libc.a bin lib data headers others)
 
 headers := limits.h values.h features.h gnu-versions.h bits/libc-lock.h \
@@ -157,25 +157,13 @@ others: $(common-objpfx)testrun.sh
 
 subdir-stubs := $(foreach dir,$(subdirs),$(common-objpfx)$(dir)/stubs)
 
-ifeq ($(biarch),no)
-installed-stubs = $(inst_includedir)/gnu/stubs.h
-else
-installed-stubs = $(inst_includedir)/gnu/stubs-$(biarch).h
-
-$(inst_includedir)/gnu/stubs.h: include/stubs-biarch.h $(+force)
-	$(INSTALL_DATA) $< $@
-
-install-others-nosubdir: $(installed-stubs)
-endif
-
-
 # Since stubs.h is never needed when building the library, we simplify the
 # hairy installation process by producing it in place only as the last part
 # of the top-level `make install'.  It depends on subdir_install, which
 # iterates over all the subdirs; subdir_install in each subdir depends on
 # the subdir's stubs file.  Having more direct dependencies would result in
 # extra iterations over the list for subdirs and many recursive makes.
-$(installed-stubs): include/stubs-prologue.h subdir_install
+$(inst_includedir)/gnu/stubs.h: include/stubs-prologue.h subdir_install
 	$(make-target-directory)
 	@rm -f $(objpfx)stubs.h
 	(sed '/^@/d' $<; LC_ALL=C sort $(subdir-stubs)) > $(objpfx)stubs.h
diff --git a/Makerules b/Makerules
index 887d7275c3..5d7b24f31f 100644
--- a/Makerules
+++ b/Makerules
@@ -1,4 +1,4 @@
-# Copyright (C) 1991-2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+# Copyright (C) 1991-2002, 2003, 2004 Free Software Foundation, Inc.
 # This file is part of the GNU C Library.
 
 # The GNU C Library is free software; you can redistribute it and/or
@@ -338,8 +338,7 @@ $(common-objpfx)Versions.def.v.i: $(..)Versions.def \
 $(common-objpfx)Versions.all: $(..)scripts/firstversions.awk \
 			      $(common-objpfx)soversions.i \
 			      $(common-objpfx)Versions.def.v
-	{ while read which lib version setname; do \
-	    test x"$$which" = xDEFAULT || continue; \
+	{ while read lib version setname; do \
 	    test -z "$$setname" || echo "$$lib : $$setname"; \
 	  done < $(word 2,$^); \
 	  cat $(word 3,$^); \
@@ -492,7 +491,7 @@ $(common-objpfx)shlib.lds: $(common-objpfx)config.make $(..)Makerules
 		 PROVIDE(__start___libc_freeres_ptrs = .); \
 		 *(__libc_freeres_ptrs) \
 		 PROVIDE(__stop___libc_freeres_ptrs = .);/'\
-	      -e 's@^.*\*(\.jcr).*$$@& \
+	      -e 's/^.*\*(\.jcr).*$$/& \
 		 PROVIDE(__start___libc_subfreeres = .);\
 		 __libc_subfreeres : { *(__libc_subfreeres) }\
 		 PROVIDE(__stop___libc_subfreeres = .);\
@@ -501,8 +500,7 @@ $(common-objpfx)shlib.lds: $(common-objpfx)config.make $(..)Makerules
 		 PROVIDE(__stop___libc_atexit = .);\
 		 PROVIDE(__start___libc_thread_subfreeres = .);\
 		 __libc_thread_subfreeres : { *(__libc_thread_subfreeres) }\
-		 PROVIDE(__stop___libc_thread_subfreeres = .);\
-		 /DISCARD/ : { *(.gnu.glibc-stub.*) }@'
+		 PROVIDE(__stop___libc_thread_subfreeres = .);/'
 	mv -f $@T $@
 common-generated += shlib.lds
 
@@ -1386,15 +1384,19 @@ ifdef objpfx
 .PHONY: stubs # The parent Makefile calls this target.
 stubs: $(objpfx)stubs
 endif
-objs-for-stubs := $(foreach o,$(object-suffixes-for-libc),$(o-objects)) \
-		  $(addprefix $(objpfx),$(extra-objs))
-$(objpfx)stubs: $(objs-for-stubs)
-ifneq (,$(strip $(objs-for-stubs)))
-	$(OBJDUMP) -h $^ | \
-	$(AWK) '/\.gnu\.glibc-stub\./ { \
-	          sub(/\.gnu\.glibc-stub\./, "", $$2); \
-		  stubs[$$2] = 1; } \
-		END { for (s in stubs) print "#define __stub_" s }' > $@T
+$(objpfx)stubs: $(+depfiles)
+ifneq (,$(strip $(+depfiles)))
+# Use /dev/null since `...` might expand to empty.
+	c=`($(patsubst %/,cd % &&,$(objpfx)) \
+	    sed -n -e 's@\$$(common-objpfx)@$(common-objpfx)@g' \
+		   -e 's@\$$(objpfx)@$(objpfx)@g' \
+		   -e '/stub-tag\.h/{; g; s/./&/p; }' \
+		   -e '/:/{x; s/^.*$$//; x; }' \
+		   -e 's/^.*://;s/\\$$//;s/^ *\([^ ][^ ]*\) .*$$/\1/' \
+		   -e '/^[^ ][^ ]*$$/{G;s/^.*\n\(..*\)/\1/;s/\n//;h; }' \
+		   $(patsubst $(objpfx)%,%,$^)) | sort | uniq`; \
+	sed -n 's/^stub_warning *(\([^)]*\).*$$/#define __stub_\1/p' \
+	    $$c /dev/null > $@T
 	mv -f $@T $@
 else
 	> $@
diff --git a/Rules b/Rules
index 5758611081..a29daaa7b5 100644
--- a/Rules
+++ b/Rules
@@ -1,4 +1,4 @@
-# Copyright (C) 1991-2000,2002,2003, 2004, 2005 Free Software Foundation, Inc.
+# Copyright (C) 1991-2000,2002,2003, 2004 Free Software Foundation, Inc.
 # This file is part of the GNU C Library.
 
 # The GNU C Library is free software; you can redistribute it and/or
@@ -165,9 +165,7 @@ distclean: clean
 .PHONY: subdir_install
 subdir_install: install-no-libc.a lib-noranlib stubs
 
-.PHONY: subdir_objs subdir_stubs subdir_TAGS
-subdir_objs: objs
-subdir_stubs: stubs
+.PHONY: subdir_TAGS
 subdir_TAGS: TAGS
 
 # Target required by the Hurd to ensure that all the MiG-generated
diff --git a/WUR-REPORT b/WUR-REPORT
deleted file mode 100644
index 52c4a812d9..0000000000
--- a/WUR-REPORT
+++ /dev/null
@@ -1,45 +0,0 @@
-<unistd.h>:
-
-lssek:   Probably should be __wur but lseek(fd,SEEK_SET,0) will succeed if
-         the descriptor is fine.
-lseek64: same
-
-setuid:  will always succeed given correct privileges, so there might
-         be places which don't check for it.
-setreuid: same
-seteuid: same
-setgid:  same
-setregid: same
-setegid: same
-setresuid: same
-setresgid: same
-
-
-<stdio.h>:
-
-setvbuf:   if stream and buffer are fine and other parameters constant,
-           it cannot really fail.
-fseek:     see lseek
-fseeko:    likewise
-fgetpos:   similarly
-fsetpos:   likewise
-
-
-<stdlib.h>:
-
-atext:     it is guaranteed that a certain number of handlers can be
-           registered, so some calls might need not be checked
-on_exit:   same
-random functions:  one might want to discard a number of results.  In any
-           case, no security problem
-
-
-putenv:    probably SHOULD be marked, but we'll wait a bit.
-setenv:    likewise
-unsetenv:  likewise
-clearenv:  likewise
-
-mbstowcs:  probably SHOULD be marked
-wcstombs:  likewise
-
-ptsname_r: probably SHOULD be marked
diff --git a/bits/atomic.h b/bits/atomic.h
deleted file mode 100644
index 6245130a91..0000000000
--- a/bits/atomic.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/* Copyright (C) 2003 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Ulrich Drepper <drepper@redhat.com>, 2003.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef _BITS_ATOMIC_H
-#define _BITS_ATOMIC_H	1
-
-/* We have by default no support for atomic operations.  So define
-   them non-atomic.  If this is a problem somebody will have to come
-   up with real definitions.  */
-
-/* The only basic operation needed is compare and exchange.  */
-#define atomic_compare_and_exchange_val_acq(mem, newval, oldval) \
-  ({ __typeof (mem) __gmemp = (mem);				      \
-     __typeof (*mem) __gret = *__gmemp;				      \
-     __typeof (*mem) __gnewval = (newval);			      \
-								      \
-     if (__gret == (oldval))					      \
-       *__gmemp = __gnewval;					      \
-     __gret; })
-
-#define atomic_compare_and_exchange_bool_acq(mem, newval, oldval) \
-  ({ __typeof (mem) __gmemp = (mem);				      \
-     __typeof (*mem) __gnewval = (newval);			      \
-								      \
-     *__gmemp == (oldval) ? (*__gmemp = __gnewval, 0) : 1; })
-
-#endif	/* bits/atomic.h */
diff --git a/bits/dirent.h b/bits/dirent.h
deleted file mode 100644
index 3407ebd802..0000000000
--- a/bits/dirent.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Directory entry structure `struct dirent'.  Stub version.
-   Copyright (C) 1996, 1997 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef _DIRENT_H
-# error "Never use <bits/dirent.h> directly; include <dirent.h> instead."
-#endif
-
-struct dirent
-  {
-    char d_name[1];		/* Variable length.  */
-    int d_fileno;
-  };
-
-#ifdef __USE_LARGEFILE64
-struct dirent64
-  {
-    char d_name[1];		/* Variable length.  */
-    int d_fileno;
-  };
-#endif
-
diff --git a/bits/elfclass.h b/bits/elfclass.h
deleted file mode 100644
index 180227d9e7..0000000000
--- a/bits/elfclass.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* This file specifies the native word size of the machine, which indicates
-   the ELF file class used for executables and shared objects on this
-   machine.  */
-
-#ifndef _LINK_H
-# error "Never use <bits/elfclass.h> directly; include <link.h> instead."
-#endif
-
-#include <bits/wordsize.h>
-
-#define __ELF_NATIVE_CLASS __WORDSIZE
-
-/* The entries in the .hash table always have a size of 32 bits.  */
-typedef uint32_t Elf_Symndx;
diff --git a/bits/endian.h b/bits/endian.h
deleted file mode 100644
index 45afd4ae47..0000000000
--- a/bits/endian.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* This file should define __BYTE_ORDER as appropriate for the machine
-   in question.  See string/endian.h for how to define it.
-
-   If only the stub bits/endian.h applies to a particular configuration,
-   bytesex.h is generated by running a program on the host machine.
-   So if cross-compiling to a machine with a different byte order,
-   the bits/endian.h file for that machine must exist.  */
-
-#ifndef _ENDIAN_H
-# error "Never use <bits/endian.h> directly; include <endian.h> instead."
-#endif
-
-#error Machine byte order unknown.
diff --git a/bits/errno.h b/bits/errno.h
deleted file mode 100644
index 89a5cfddeb..0000000000
--- a/bits/errno.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/* Copyright (C) 1991, 1994, 1996, 1997 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-/* This file defines the `errno' constants.  */
-
-#if !defined __Emath_defined && (defined _ERRNO_H || defined __need_Emath)
-#undef	__need_Emath
-#define	__Emath_defined	1
-
-# define EDOM	XXX	<--- fill in what is actually needed
-# define EILSEQ	XXX	<--- fill in what is actually needed
-# define ERANGE	XXX	<--- fill in what is actually needed
-#endif
-
-#ifdef	_ERRNO_H
-# error "Define here all the missing error messages for the port.  These"
-# error "must match the numbers of the kernel."
-# define Exxxx	XXX
-...
-#endif
diff --git a/bits/fcntl.h b/bits/fcntl.h
deleted file mode 100644
index b397f812f1..0000000000
--- a/bits/fcntl.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/* O_*, F_*, FD_* bit values for stub configuration.
-   Copyright (C) 1991, 1992, 1997, 2000, 2004 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-/* These values should be changed as appropriate for your system.  */
-
-#ifndef	_FCNTL_H
-# error "Never use <bits/fcntl.h> directly; include <fcntl.h> instead."
-#endif
-
-
-/* File access modes for `open' and `fcntl'.  */
-#define	O_RDONLY	0	/* Open read-only.  */
-#define	O_WRONLY	1	/* Open write-only.  */
-#define	O_RDWR		2	/* Open read/write.  */
-
-
-/* Bits OR'd into the second argument to open.  */
-#define	O_CREAT		0x0200	/* Create file if it doesn't exist.  */
-#define	O_EXCL		0x0800	/* Fail if file already exists.  */
-#define	O_TRUNC		0x0400	/* Truncate file to zero length.  */
-#define	O_NOCTTY	0x0100	/* Don't assign a controlling terminal.  */
-
-/* File status flags for `open' and `fcntl'.  */
-#define	O_APPEND	0x0008	/* Writes append to the file.  */
-#define	O_NONBLOCK	0x0004	/* Non-blocking I/O.  */
-
-#ifdef __USE_BSD
-# define O_NDELAY	O_NONBLOCK
-#endif
-
-/* Mask for file access modes.  This is system-dependent in case
-   some system ever wants to define some other flavor of access.  */
-#define	O_ACCMODE	(O_RDONLY|O_WRONLY|O_RDWR)
-
-/* Values for the second argument to `fcntl'.  */
-#define	F_DUPFD	  	0	/* Duplicate file descriptor.  */
-#define	F_GETFD		1	/* Get file descriptor flags.  */
-#define	F_SETFD		2	/* Set file descriptor flags.  */
-#define	F_GETFL		3	/* Get file status flags.  */
-#define	F_SETFL		4	/* Set file status flags.  */
-#if defined __USE_BSD || defined __USE_UNIX98
-# define F_GETOWN	5	/* Get owner (receiver of SIGIO).  */
-# define F_SETOWN	6	/* Set owner (receiver of SIGIO).  */
-#endif
-#define	F_GETLK		7	/* Get record locking info.  */
-#define	F_SETLK		8	/* Set record locking info.  */
-#define	F_SETLKW	9	/* Set record locking info, wait.  */
-
-/* File descriptor flags used with F_GETFD and F_SETFD.  */
-#define	FD_CLOEXEC	1	/* Close on exec.  */
-
-
-#include <bits/types.h>
-
-/* The structure describing an advisory lock.  This is the type of the third
-   argument to `fcntl' for the F_GETLK, F_SETLK, and F_SETLKW requests.  */
-struct flock
-  {
-    short int l_type;	/* Type of lock: F_RDLCK, F_WRLCK, or F_UNLCK.  */
-    short int l_whence;	/* Where `l_start' is relative to (like `lseek').  */
-    __off_t l_start;	/* Offset where the lock begins.  */
-    __off_t l_len;	/* Size of the locked area; zero means until EOF.  */
-    __pid_t l_pid;	/* Process holding the lock.  */
-  };
-
-/* Values for the `l_type' field of a `struct flock'.  */
-#define	F_RDLCK	1	/* Read lock.  */
-#define	F_WRLCK	2	/* Write lock.  */
-#define	F_UNLCK	3	/* Remove lock.  */
-
-/* Advise to `posix_fadvise'.  */
-#ifdef __USE_XOPEN2K
-# define POSIX_FADV_NORMAL	0 /* No further special treatment.  */
-# define POSIX_FADV_RANDOM	1 /* Expect random page references.  */
-# define POSIX_FADV_SEQUENTIAL	2 /* Expect sequential page references.  */
-# define POSIX_FADV_WILLNEED	3 /* Will need these pages.  */
-# define POSIX_FADV_DONTNEED	4 /* Don't need these pages.  */
-# define POSIX_FADV_NOREUSE	5 /* Data will be accessed once.  */
-#endif
diff --git a/bits/fenv.h b/bits/fenv.h
deleted file mode 100644
index a9cb53b40e..0000000000
--- a/bits/fenv.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/* Copyright (C) 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef _FENV_H
-# error "Never use <bits/fenv.h> directly; include <fenv.h> instead."
-#endif
-
-
-/* Here should be the exception be defined:
-    FE_INVALID
-    FE_DIVBYZERO
-    FE_OVERFLOW
-    FE_UNDERFLOW
-    FE_INEXACT
-   We define no macro which signals no exception is supported.  */
-
-#define FE_ALL_EXCEPT 0
-
-
-/* Here should the rounding modes be defined:
-    FE_TONEAREST
-    FE_DOWNWARD
-    FE_UPWARD
-    FE_TOWARDZERO
-   We define no macro which signals no rounding mode is selectable.  */
-
-
-/* Type representing exception flags.  */
-typedef unsigned int fexcept_t;
-
-
-/* Type representing floating-point environment.  */
-typedef struct
-  {
-    fexcept_t __excepts;
-    /* XXX I don't know what else we should save.  */
-  }
-fenv_t;
-
-/* If the default argument is used we use this value.  */
-#define FE_DFL_ENV	((__const fenv_t *) -1l)
diff --git a/bits/fenvinline.h b/bits/fenvinline.h
deleted file mode 100644
index 42f77b5618..0000000000
--- a/bits/fenvinline.h
+++ /dev/null
@@ -1,8 +0,0 @@
-/* This file provides inline versions of floating-pint environment
-   handling functions.  If there were any.  */
-
-#ifndef __NO_MATH_INLINES
-
-/* Here is where the code would go.  */
-
-#endif
diff --git a/bits/huge_val.h b/bits/huge_val.h
deleted file mode 100644
index e102ac3220..0000000000
--- a/bits/huge_val.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/* Stub `HUGE_VAL' constant.
-   Used by <stdlib.h> and <math.h> functions for overflow.
-   Copyright (C) 1992, 1996, 1997, 2004 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef _MATH_H
-# error "Never use <bits/huge_val.h> directly; include <math.h> instead."
-#endif
-
-#if __GNUC_PREREQ(3,3)
-# define HUGE_VAL	(__builtin_huge_val())
-#else
-# define HUGE_VAL	1e37
-#endif
diff --git a/bits/huge_valf.h b/bits/huge_valf.h
deleted file mode 100644
index 4cb5ebdfcb..0000000000
--- a/bits/huge_valf.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/* Stub `HUGE_VALF' constant.
-   Used by <stdlib.h> and <math.h> functions for overflow.
-   Copyright (C) 1992, 1996, 1997, 2004 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef _MATH_H
-# error "Never use <bits/huge_valf.h> directly; include <math.h> instead."
-#endif
-
-#if __GNUC_PREREQ(3,3)
-# define HUGE_VALF	(__builtin_huge_valf())
-#else
-# define HUGE_VALF	1e37f
-#endif
diff --git a/bits/huge_vall.h b/bits/huge_vall.h
deleted file mode 100644
index d5e8e2237b..0000000000
--- a/bits/huge_vall.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/* Default `HUGE_VALL' constant.
-   Used by <stdlib.h> and <math.h> functions for overflow.
-   Copyright (C) 1992, 1996, 1997, 2004 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef _MATH_H
-# error "Never use <bits/huge_vall.h> directly; include <math.h> instead."
-#endif
-
-#if __GNUC_PREREQ(3,3)
-# define HUGE_VALL	(__builtin_huge_vall())
-#else
-# define HUGE_VALL	((long double) HUGE_VAL)
-#endif
diff --git a/bits/in.h b/bits/in.h
deleted file mode 100644
index 31eb0f9fcf..0000000000
--- a/bits/in.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/* Copyright (C) 1997, 2000, 2004 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-/* Generic version.  */
-
-#ifndef _NETINET_IN_H
-# error "Never use <bits/in.h> directly; include <netinet/in.h> instead."
-#endif
-
-/* Options for use with `getsockopt' and `setsockopt' at the IP level.
-   The first word in the comment at the right is the data type used;
-   "bool" means a boolean value stored in an `int'.  */
-#define	IP_OPTIONS	1	/* ip_opts; IP per-packet options.  */
-#define	IP_HDRINCL	2	/* int; Header is included with data.  */
-#define	IP_TOS		3	/* int; IP type of service and precedence.  */
-#define	IP_TTL		4	/* int; IP time to live.  */
-#define	IP_RECVOPTS	5	/* bool; Receive all IP options w/datagram.  */
-#define	IP_RECVRETOPTS	6	/* bool; Receive IP options for response.  */
-#define	IP_RECVDSTADDR	7	/* bool; Receive IP dst addr w/datagram.  */
-#define	IP_RETOPTS	8	/* ip_opts; Set/get IP per-packet options.  */
-#define IP_MULTICAST_IF 9	/* in_addr; set/get IP multicast i/f */
-#define IP_MULTICAST_TTL 10	/* u_char; set/get IP multicast ttl */
-#define IP_MULTICAST_LOOP 11	/* i_char; set/get IP multicast loopback */
-#define IP_ADD_MEMBERSHIP 12	/* ip_mreq; add an IP group membership */
-#define IP_DROP_MEMBERSHIP 13	/* ip_mreq; drop an IP group membership */
-
-/* Structure used to describe IP options for IP_OPTIONS and IP_RETOPTS.
-   The `ip_dst' field is used for the first-hop gateway when using a
-   source route (this gets put into the header proper).  */
-struct ip_opts
-  {
-    struct in_addr ip_dst;	/* First hop; zero without source route.  */
-    char ip_opts[40];		/* Actually variable in size.  */
-  };
-
-/* IPV6 socket options.  */
-#define IPV6_ADDRFORM		1
-#define IPV6_RXINFO		2
-#define IPV6_HOPOPTS		3
-#define IPV6_DSTOPTS		4
-#define IPV6_RTHDR		5
-#define IPV6_PKTOPTIONS		6
-#define IPV6_CHECKSUM		7
-#define IPV6_HOPLIMIT		8
-
-#define IPV6_TXINFO		IPV6_RXINFO
-#define SCM_SRCINFO		IPV6_TXINFO
-#define SCM_SRCRT		IPV6_RXSRCRT
-
-#define IPV6_UNICAST_HOPS	16
-#define IPV6_MULTICAST_IF	17
-#define IPV6_MULTICAST_HOPS	18
-#define IPV6_MULTICAST_LOOP	19
-#define IPV6_JOIN_GROUP		20
-#define IPV6_LEAVE_GROUP	21
-
-/* Obsolete synonyms for the above.  */
-#define IPV6_ADD_MEMBERSHIP	IPV6_JOIN_GROUP
-#define IPV6_DROP_MEMBERSHIP	IPV6_LEAVE_GROUP
-#define IPV6_RXHOPOPTS		IPV6_HOPOPTS
-#define IPV6_RXDSTOPTS		IPV6_DSTOPTS
-
-/* Routing header options for IPv6.  */
-#define IPV6_RTHDR_LOOSE	0	/* Hop doesn't need to be neighbour. */
-#define IPV6_RTHDR_STRICT	1	/* Hop must be a neighbour.  */
-
-#define IPV6_RTHDR_TYPE_0	0	/* IPv6 Routing header type 0.  */
diff --git a/bits/inf.h b/bits/inf.h
deleted file mode 100644
index 2d526adb23..0000000000
--- a/bits/inf.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/* Default `INFINITY' constant.
-   Copyright (C) 2004 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef _MATH_H
-# error "Never use <bits/inf.h> directly; include <math.h> instead."
-#endif
-
-/* If we don't have real infinity, then we're supposed to produce a float
-   value that overflows at translation time, which is required to produce
-   a diagnostic.  GCC's __builtin_inff produces a quite nice diagnostic
-   that tells the user that the target doesn't support infinities.  */
-
-#if __GNUC_PREREQ(3,3)
-# define INFINITY	(__builtin_inff())
-#else
-# define INFINITY	(1e9999f)
-#endif
diff --git a/bits/ioctl-types.h b/bits/ioctl-types.h
deleted file mode 100644
index 58b78a6af3..0000000000
--- a/bits/ioctl-types.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/* Structure types for pre-termios terminal ioctls.  Generic Unix version.
-   Copyright (C) 1996, 1997 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef _SYS_IOCTL_H
-# error "Never use <bits/ioctl-types.h> directly; include <sys/ioctl.h> instead."
-#endif
-
-#if defined TIOCGETC || defined TIOCSETC
-/* Type of ARG for TIOCGETC and TIOCSETC requests.  */
-struct tchars
-{
-  char t_intrc;			/* Interrupt character.  */
-  char t_quitc;			/* Quit character.  */
-  char t_startc;		/* Start-output character.  */
-  char t_stopc;			/* Stop-output character.  */
-  char t_eofc;			/* End-of-file character.  */
-  char t_brkc;			/* Input delimiter character.  */
-};
-
-#define	_IOT_tchars	/* Hurd ioctl type field.  */ \
-  _IOT (_IOTS (char), 6, 0, 0, 0, 0)
-#endif
-
-#if defined TIOCGLTC || defined TIOCSLTC
-/* Type of ARG for TIOCGLTC and TIOCSLTC requests.  */
-struct ltchars
-{
-  char t_suspc;			/* Suspend character.  */
-  char t_dsuspc;		/* Delayed suspend character.  */
-  char t_rprntc;		/* Reprint-line character.  */
-  char t_flushc;		/* Flush-output character.  */
-  char t_werasc;		/* Word-erase character.  */
-  char t_lnextc;		/* Literal-next character.  */
-};
-
-#define	_IOT_ltchars	/* Hurd ioctl type field.  */ \
-  _IOT (_IOTS (char), 6, 0, 0, 0, 0)
-#endif
-
-/* Type of ARG for TIOCGETP and TIOCSETP requests (and gtty and stty).  */
-struct sgttyb
-{
-  char sg_ispeed;		/* Input speed.  */
-  char sg_ospeed;		/* Output speed.  */
-  char sg_erase;		/* Erase character.  */
-  char sg_kill;			/* Kill character.  */
-  short int sg_flags;		/* Mode flags.  */
-};
-
-#define	_IOT_sgttyb	/* Hurd ioctl type field.  */ \
-  _IOT (_IOTS (char), 6, _IOTS (short int), 1, 0, 0)
-
-#if defined TIOCGWINSZ || defined TIOCSWINSZ
-/* Type of ARG for TIOCGWINSZ and TIOCSWINSZ requests.  */
-struct winsize
-{
-  unsigned short int ws_row;	/* Rows, in characters.  */
-  unsigned short int ws_col;	/* Columns, in characters.  */
-
-  /* These are not actually used.  */
-  unsigned short int ws_xpixel;	/* Horizontal pixels.  */
-  unsigned short int ws_ypixel;	/* Vertical pixels.  */
-};
-
-#define	_IOT_winsize	/* Hurd ioctl type field.  */ \
-  _IOT (_IOTS (unsigned short int), 4, 0, 0, 0, 0)
-#endif
-
-#if defined TIOCGSIZE || defined TIOCSSIZE
-/* The BSD-style ioctl constructor macros use `sizeof', which can't be used
-   in a preprocessor conditional.  Since the commands are always unique
-   regardless of the size bits, we can safely define away `sizeof' for the
-   purpose of the conditional.  */
-#  define sizeof(type) 0
-#  if defined TIOCGWINSZ && TIOCGSIZE == TIOCGWINSZ
-/* Many systems that have TIOCGWINSZ define TIOCGSIZE for source
-   compatibility with Sun; they define `struct ttysize' to have identical
-   layout as `struct winsize' and #define TIOCGSIZE to be TIOCGWINSZ
-   (likewise TIOCSSIZE and TIOCSWINSZ).  */
-struct ttysize
-{
-  unsigned short int ts_lines;
-  unsigned short int ts_cols;
-  unsigned short int ts_xxx;
-  unsigned short int ts_yyy;
-};
-#define	_IOT_ttysize	_IOT_winsize
-#  else
-/* Suns use a different layout for `struct ttysize', and TIOCGSIZE and
-   TIOCGWINSZ are separate commands that do the same thing with different
-   structures (likewise TIOCSSIZE and TIOCSWINSZ).  */
-struct ttysize
-{
-  int ts_lines, ts_cols;	/* Lines and columns, in characters.  */
-};
-#  endif
-#  undef sizeof			/* See above.  */
-#endif
diff --git a/bits/ioctls.h b/bits/ioctls.h
deleted file mode 100644
index d3ecad9515..0000000000
--- a/bits/ioctls.h
+++ /dev/null
@@ -1,5 +0,0 @@
-#ifndef _SYS_IOCTL_H
-# error "Never use <bits/ioctls.h> directly; include <sys/ioctl.h> instead."
-#endif
-
-/* This space intentionally left blank.  */
diff --git a/bits/ipc.h b/bits/ipc.h
deleted file mode 100644
index 5ba227d36b..0000000000
--- a/bits/ipc.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/* Copyright (C) 1995, 1996, 1997, 1999 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef _SYS_IPC_H
-# error "Never use <bits/ipc.h> directly; include <sys/ipc.h> instead."
-#endif
-
-#include <bits/types.h>
-
-/* Mode bits for `msgget', `semget', and `shmget'.  */
-#define IPC_CREAT	01000		/* create key if key does not exist */
-#define IPC_EXCL	02000		/* fail if key exists */
-#define IPC_NOWAIT	04000		/* return error on wait */
-
-/* Control commands for `msgctl', `semctl', and `shmctl'.  */
-#define IPC_RMID	0		/* remove identifier */
-#define IPC_SET		1		/* set `ipc_perm' options */
-#define IPC_STAT	2		/* get `ipc_perm' options */
-
-/* Special key values.  */
-#define IPC_PRIVATE	((key_t) 0)	/* private key */
-
-
-/* Data structure used to pass permission information to IPC operations.  */
-struct ipc_perm
-  {
-    __uid_t uid;			/* owner's user ID */
-    __gid_t gid;			/* owner's group ID */
-    __uid_t cuid;			/* creator's user ID */
-    __gid_t cgid;			/* creator's group ID */
-    __mode_t mode;			/* read/write permission */
-  };
diff --git a/bits/ipctypes.h b/bits/ipctypes.h
deleted file mode 100644
index b88ca1d87b..0000000000
--- a/bits/ipctypes.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/* bits/ipctypes.h -- Define some types used by SysV IPC/MSG/SHM.  Generic.
-   Copyright (C) 2002 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-/*
- * Never include <bits/ipctypes.h> directly.
- */
-
-#ifndef _BITS_IPCTYPES_H
-#define _BITS_IPCTYPES_H	1
-
-#include <bits/types.h>
-
-/* Used in `struct shmid_ds'.  */
-# if __WORDSIZE == 32
-typedef unsigned short int __ipc_pid_t;
-# else
-typedef int __ipc_pid_t;
-# endif
-
-
-#endif /* bits/ipctypes.h */
diff --git a/bits/libc-lock.h b/bits/libc-lock.h
deleted file mode 100644
index 3f9c211e0a..0000000000
--- a/bits/libc-lock.h
+++ /dev/null
@@ -1,138 +0,0 @@
-/* libc-internal interface for mutex locks.  Stub version.
-   Copyright (C) 1996,97,99,2000-2002,2003 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef _BITS_LIBC_LOCK_H
-#define _BITS_LIBC_LOCK_H 1
-
-
-/* Define a lock variable NAME with storage class CLASS.  The lock must be
-   initialized with __libc_lock_init before it can be used (or define it
-   with __libc_lock_define_initialized, below).  Use `extern' for CLASS to
-   declare a lock defined in another module.  In public structure
-   definitions you must use a pointer to the lock structure (i.e., NAME
-   begins with a `*'), because its storage size will not be known outside
-   of libc.  */
-#define __libc_lock_define(CLASS,NAME)
-#define __libc_lock_define_recursive(CLASS,NAME)
-#define __rtld_lock_define_recursive(CLASS,NAME)
-#define __libc_rwlock_define(CLASS,NAME)
-
-/* Define an initialized lock variable NAME with storage class CLASS.  */
-#define __libc_lock_define_initialized(CLASS,NAME)
-#define __libc_rwlock_define_initialized(CLASS,NAME)
-
-/* Define an initialized recursive lock variable NAME with storage
-   class CLASS.  */
-#define __libc_lock_define_initialized_recursive(CLASS,NAME)
-#define __rtld_lock_define_initialized_recursive(CLASS,NAME)
-
-/* Initialize the named lock variable, leaving it in a consistent, unlocked
-   state.  */
-#define __libc_lock_init(NAME)
-#define __libc_rwlock_init(NAME)
-
-/* Same as last but this time we initialize a recursive mutex.  */
-#define __libc_lock_init_recursive(NAME)
-#define __rtld_lock_init_recursive(NAME)
-
-/* Finalize the named lock variable, which must be locked.  It cannot be
-   used again until __libc_lock_init is called again on it.  This must be
-   called on a lock variable before the containing storage is reused.  */
-#define __libc_lock_fini(NAME)
-#define __libc_rwlock_fini(NAME)
-
-/* Finalize recursive named lock.  */
-#define __libc_lock_fini_recursive(NAME)
-
-/* Lock the named lock variable.  */
-#define __libc_lock_lock(NAME)
-#define __libc_rwlock_rdlock(NAME)
-#define __libc_rwlock_wrlock(NAME)
-
-/* Lock the recursive named lock variable.  */
-#define __libc_lock_lock_recursive(NAME)
-#define __rtld_lock_lock_recursive(NAME)
-
-/* Try to lock the named lock variable.  */
-#define __libc_lock_trylock(NAME) 0
-#define __libc_rwlock_tryrdlock(NAME) 0
-#define __libc_rwlock_trywrlock(NAME) 0
-
-/* Try to lock the recursive named lock variable.  */
-#define __libc_lock_trylock_recursive(NAME) 0
-
-/* Unlock the named lock variable.  */
-#define __libc_lock_unlock(NAME)
-#define __libc_rwlock_unlock(NAME)
-
-/* Unlock the recursive named lock variable.  */
-#define __libc_lock_unlock_recursive(NAME)
-#define __rtld_lock_unlock_recursive(NAME)
-
-
-/* Define once control variable.  */
-#define __libc_once_define(CLASS, NAME) CLASS int NAME = 0
-
-/* Call handler iff the first call.  */
-#define __libc_once(ONCE_CONTROL, INIT_FUNCTION) \
-  do {									      \
-    if ((ONCE_CONTROL) == 0) {						      \
-      INIT_FUNCTION ();							      \
-      (ONCE_CONTROL) = 1;						      \
-    }									      \
-  } while (0)
-
-
-/* Start a critical region with a cleanup function */
-#define __libc_cleanup_region_start(DOIT, FCT, ARG)			    \
-{									    \
-  typeof (***(FCT)) *__save_FCT = (DOIT) ? (FCT) : 0;			    \
-  typeof (ARG) __save_ARG = ARG;					    \
-  /* close brace is in __libc_cleanup_region_end below. */
-
-/* End a critical region started with __libc_cleanup_region_start. */
-#define __libc_cleanup_region_end(DOIT)					    \
-  if ((DOIT) && __save_FCT != 0)					    \
-    (*__save_FCT)(__save_ARG);						    \
-}
-
-/* Sometimes we have to exit the block in the middle.  */
-#define __libc_cleanup_end(DOIT)					    \
-  if ((DOIT) && __save_FCT != 0)					    \
-    (*__save_FCT)(__save_ARG);						    \
-
-#define __libc_cleanup_push(fct, arg) __libc_cleanup_region_start (1, fct, arg)
-#define __libc_cleanup_pop(execute) __libc_cleanup_region_end (execute)
-
-/* We need portable names for some of the functions.  */
-#define __libc_mutex_unlock
-
-/* Type for key of thread specific data.  */
-typedef int __libc_key_t;
-
-/* Create key for thread specific data.  */
-#define __libc_key_create(KEY,DEST) -1
-
-/* Set thread-specific data associated with KEY to VAL.  */
-#define __libc_setspecific(KEY,VAL) ((void)0)
-
-/* Get thread-specific data associated with KEY.  */
-#define __libc_getspecific(KEY) 0
-
-#endif	/* bits/libc-lock.h */
diff --git a/bits/libc-tsd.h b/bits/libc-tsd.h
deleted file mode 100644
index d39382952a..0000000000
--- a/bits/libc-tsd.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/* libc-internal interface for thread-specific data.  Stub or TLS version.
-   Copyright (C) 1998,2001,02 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef _GENERIC_BITS_LIBC_TSD_H
-#define _GENERIC_BITS_LIBC_TSD_H 1
-
-/* This file defines the following macros for accessing a small fixed
-   set of thread-specific `void *' data used only internally by libc.
-
-   __libc_tsd_define(CLASS, KEY)	-- Define or declare a `void *' datum
-   					   for KEY.  CLASS can be `static' for
-					   keys used in only one source file,
-					   empty for global definitions, or
-					   `extern' for global declarations.
-   __libc_tsd_address(KEY)		-- Return the `void **' pointing to
-   					   the current thread's datum for KEY.
-   __libc_tsd_get(KEY)			-- Return the `void *' datum for KEY.
-   __libc_tsd_set(KEY, VALUE)		-- Set the datum for KEY to VALUE.
-
-   The set of available KEY's will usually be provided as an enum,
-   and contains (at least):
-		_LIBC_TSD_KEY_MALLOC
-		_LIBC_TSD_KEY_DL_ERROR
-		_LIBC_TSD_KEY_RPC_VARS
-   All uses must be the literal _LIBC_TSD_* name in the __libc_tsd_* macros.
-   Some implementations may not provide any enum at all and instead
-   using string pasting in the macros.  */
-
-#include <tls.h>
-
-/* When full support for __thread variables is available, this interface is
-   just a trivial wrapper for it.  Without TLS, this is the generic/stub
-   implementation for wholly single-threaded systems.
-
-   We don't define an enum for the possible key values, because the KEYs
-   translate directly into variables by macro magic.  */
-
-#if USE___THREAD
-# define __libc_tsd_define(CLASS, KEY)	\
-  CLASS __thread void *__libc_tsd_##KEY attribute_tls_model_ie;
-
-# define __libc_tsd_address(KEY)	(&__libc_tsd_##KEY)
-# define __libc_tsd_get(KEY)		(__libc_tsd_##KEY)
-# define __libc_tsd_set(KEY, VALUE)	(__libc_tsd_##KEY = (VALUE))
-#else
-# define __libc_tsd_define(CLASS, KEY)	CLASS void *__libc_tsd_##KEY##_data;
-
-# define __libc_tsd_address(KEY)	(&__libc_tsd_##KEY##_data)
-# define __libc_tsd_get(KEY)		(__libc_tsd_##KEY##_data)
-# define __libc_tsd_set(KEY, VALUE)	(__libc_tsd_##KEY##_data = (VALUE))
-#endif
-
-#endif	/* bits/libc-tsd.h */
diff --git a/bits/local_lim.h b/bits/local_lim.h
deleted file mode 100644
index 42cc7ebbc9..0000000000
--- a/bits/local_lim.h
+++ /dev/null
@@ -1,3 +0,0 @@
-/* This file should define the implementation-specific limits described
-   in posix[12]_lim.h.  If there are no useful values to give a limit,
-   don't define it.  */
diff --git a/bits/mathdef.h b/bits/mathdef.h
deleted file mode 100644
index 00c67241a0..0000000000
--- a/bits/mathdef.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/* Copyright (C) 1997, 1998, 1999, 2000, 2004 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#if !defined _MATH_H && !defined _COMPLEX_H
-# error "Never use <bits/mathdef.h> directly; include <math.h> instead"
-#endif
-
-#if defined  __USE_ISOC99 && defined _MATH_H && !defined _MATH_H_MATHDEF
-# define _MATH_H_MATHDEF	1
-
-/* Normally, there is no long double type and the `float' and `double'
-   expressions are evaluated as `double'.  */
-typedef double float_t;		/* `float' expressions are evaluated as
-				   `double'.  */
-typedef double double_t;	/* `double' expressions are evaluated as
-				   `double'.  */
-
-/* The values returned by `ilogb' for 0 and NaN respectively.  */
-# define FP_ILOGB0	(-2147483647)
-# define FP_ILOGBNAN	2147483647
-
-#endif	/* ISO C99 */
-
-#ifndef __NO_LONG_DOUBLE_MATH
-/* Signal that we do not really have a `long double'.  The disables the
-   declaration of all the `long double' function variants.  */
-# define __NO_LONG_DOUBLE_MATH	1
-#endif
diff --git a/bits/mathinline.h b/bits/mathinline.h
deleted file mode 100644
index 5498af6b63..0000000000
--- a/bits/mathinline.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* This file should provide inline versions of math functions.
-
-   Surround GCC-specific parts with #ifdef __GNUC__, and use `extern __inline'.
-
-   This file should define __MATH_INLINES if functions are actually defined as
-   inlines.  */
-
-#if !defined __NO_MATH_INLINES && defined __OPTIMIZE__
-
-/* Here goes the real code.  */
-
-#endif
diff --git a/bits/mqueue.h b/bits/mqueue.h
deleted file mode 100644
index 27bb4824b8..0000000000
--- a/bits/mqueue.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/* Copyright (C) 2004 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef _MQUEUE_H
-# error "Never use <bits/mqueue.h> directly; include <mqueue.h> instead."
-#endif
-
-typedef int mqd_t;
-
-struct mq_attr
-{
-  long int mq_flags;	/* Message queue flags.  */
-  long int mq_maxmsg;	/* Maximum number of messages.  */
-  long int mq_msgsize;	/* Maximum message size.  */
-  long int mq_curmsgs;	/* Number of messages currently queued.  */
-};
diff --git a/bits/msq.h b/bits/msq.h
deleted file mode 100644
index 0125c43dc3..0000000000
--- a/bits/msq.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/* Copyright (C) 1995, 1997, 2000 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef _SYS_MSG_H
-#error "Never use <bits/msq.h> directly; include <sys/msg.h> instead."
-#endif
-
-#include <bits/types.h>
-
-/* Define options for message queue functions.  */
-#define MSG_NOERROR	010000	/* no error if message is too big */
-
-/* Types used in the structure definition.  */
-typedef unsigned short int msgqnum_t;
-typedef unsigned short int msglen_t;
-
-
-/* Structure of record for one message inside the kernel.
-   The type `struct __msg' is opaque.  */
-struct msqid_ds
-{
-  struct ipc_perm msg_perm;	/* structure describing operation permission */
-  __time_t msg_stime;		/* time of last msgsnd command */
-  __time_t msg_rtime;		/* time of last msgrcv command */
-  __time_t msg_ctime;		/* time of last change */
-  msgqnum_t msg_qnum;		/* number of messages currently on queue */
-  msglen_t msg_qbytes;		/* max number of bytes allowed on queue */
-  __pid_t msg_lspid;		/* pid of last msgsnd() */
-  __pid_t msg_lrpid;		/* pid of last msgrcv() */
-};
diff --git a/bits/nan.h b/bits/nan.h
deleted file mode 100644
index ab38168ea4..0000000000
--- a/bits/nan.h
+++ /dev/null
@@ -1,5 +0,0 @@
-#ifndef _MATH_H
-#error "Never use <bits/nan.h> directly; include <math.h> instead."
-#endif
-
-/* This file should define `NAN' on machines that have such things.  */
diff --git a/bits/netdb.h b/bits/netdb.h
deleted file mode 100644
index 41dc731931..0000000000
--- a/bits/netdb.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/* Copyright (C) 1996, 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef _NETDB_H
-# error "Never include <bits/netdb.h> directly; use <netdb.h> instead."
-#endif
-
-
-/* Description of data base entry for a single network.  NOTE: here a
-   poor assumption is made.  The network number is expected to fit
-   into an unsigned long int variable.  */
-struct netent
-{
-  char *n_name;			/* Official name of network.  */
-  char **n_aliases;		/* Alias list.  */
-  int n_addrtype;		/* Net address type.  */
-  uint32_t n_net;		/* Network number.  */
-};
diff --git a/bits/poll.h b/bits/poll.h
deleted file mode 100644
index 022a06cc1b..0000000000
--- a/bits/poll.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/* Copyright (C) 1997, 2000, 2001 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef _SYS_POLL_H
-# error "Never use <bits/poll.h> directly; include <sys/poll.h> instead."
-#endif
-
-/* Event types that can be polled for.  These bits may be set in `events'
-   to indicate the interesting event types; they will appear in `revents'
-   to indicate the status of the file descriptor.  */
-#define POLLIN		01              /* There is data to read.  */
-#define POLLPRI		02              /* There is urgent data to read.  */
-#define POLLOUT		04              /* Writing now will not block.  */
-
-#ifdef __USE_XOPEN
-/* These values are defined in XPG4.2.  */
-# define POLLRDNORM	POLLIN		/* Normal data may be read.  */
-# define POLLRDBAND	POLLPRI		/* Priority data may be read.  */
-# define POLLWRNORM	POLLOUT		/* Writing now will not block.  */
-# define POLLWRBAND	POLLOUT		/* Priority data may be written.  */
-#endif
-
-/* Event types always implicitly polled for.  These bits need not be set in
-   `events', but they will appear in `revents' to indicate the status of
-   the file descriptor.  */
-#define POLLERR         010             /* Error condition.  */
-#define POLLHUP         020             /* Hung up.  */
-#define POLLNVAL        040             /* Invalid polling request.  */
diff --git a/bits/posix_opt.h b/bits/posix_opt.h
deleted file mode 100644
index 54f5a79aa2..0000000000
--- a/bits/posix_opt.h
+++ /dev/null
@@ -1,2 +0,0 @@
-/* This file should define the POSIX options described in <unistd.h>,
-   or leave them undefined, as appropriate.  */
diff --git a/bits/pthreadtypes.h b/bits/pthreadtypes.h
deleted file mode 100644
index 0e26952c96..0000000000
--- a/bits/pthreadtypes.h
+++ /dev/null
@@ -1 +0,0 @@
-/* No thread support.  */
diff --git a/bits/resource.h b/bits/resource.h
deleted file mode 100644
index 05b28dfccd..0000000000
--- a/bits/resource.h
+++ /dev/null
@@ -1,182 +0,0 @@
-/* Bit values & structures for resource limits.  4.4 BSD/generic GNU version.
-   Copyright (C) 1994, 1996, 1997, 1998 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef _SYS_RESOURCE_H
-# error "Never use <bits/resource.h> directly; include <sys/resource.h> instead."
-#endif
-
-#include <bits/types.h>
-
-/* These are the values for 4.4 BSD and GNU.  Earlier BSD systems have a
-   subset of these kinds of resource limit.  In systems where `getrlimit'
-   and `setrlimit' are not system calls, these are the values used by the C
-   library to emulate them.  */
-
-/* Kinds of resource limit.  */
-enum __rlimit_resource
-  {
-    /* Per-process CPU limit, in seconds.  */
-    RLIMIT_CPU,
-#define	RLIMIT_CPU	RLIMIT_CPU
-    /* Largest file that can be created, in bytes.  */
-    RLIMIT_FSIZE,
-#define	RLIMIT_FSIZE	RLIMIT_FSIZE
-    /* Maximum size of data segment, in bytes.  */
-    RLIMIT_DATA,
-#define	RLIMIT_DATA	RLIMIT_DATA
-    /* Maximum size of stack segment, in bytes.  */
-    RLIMIT_STACK,
-#define	RLIMIT_STACK	RLIMIT_STACK
-    /* Largest core file that can be created, in bytes.  */
-    RLIMIT_CORE,
-#define	RLIMIT_CORE	RLIMIT_CORE
-    /* Largest resident set size, in bytes.
-       This affects swapping; processes that are exceeding their
-       resident set size will be more likely to have physical memory
-       taken from them.  */
-    RLIMIT_RSS,
-#define	RLIMIT_RSS	RLIMIT_RSS
-    /* Locked-in-memory address space.  */
-    RLIMIT_MEMLOCK,
-#define	RLIMIT_MEMLOCK	RLIMIT_MEMLOCK
-    /* Number of processes.  */
-    RLIMIT_NPROC,
-#define	RLIMIT_NPROC	RLIMIT_NPROC
-    /* Number of open files.  */
-    RLIMIT_OFILE,
-    RLIMIT_NOFILE = RLIMIT_OFILE, /* Another name for the same thing.  */
-#define	RLIMIT_OFILE	RLIMIT_OFILE
-#define	RLIMIT_NOFILE	RLIMIT_NOFILE
-
-    RLIMIT_NLIMITS,		/* Number of limit flavors.  */
-    RLIM_NLIMITS = RLIMIT_NLIMITS /* Traditional name for same.  */
-  };
-
-/* Value to indicate that there is no limit.  */
-#ifndef __USE_FILE_OFFSET64
-# define RLIM_INFINITY 0x7fffffff
-#else
-# define RLIM_INFINITY 0x7fffffffffffffffLL
-#endif
-
-#ifdef __USE_LARGEFILE64
-# define RLIM64_INFINITY 0x7fffffffffffffffLL
-#endif
-
-
-/* Type for resource quantity measurement.  */
-#ifndef __USE_FILE_OFFSET64
-typedef __rlim_t rlim_t;
-#else
-typedef __rlim64_t rlim_t;
-#endif
-#ifdef __USE_LARGEFILE64
-typedef __rlim64_t rlim64_t;
-#endif
-
-struct rlimit
-  {
-    /* The current (soft) limit.  */
-    rlim_t rlim_cur;
-    /* The hard limit.  */
-    rlim_t rlim_max;
-  };
-
-#ifdef __USE_LARGEFILE64
-struct rlimit64
-  {
-    /* The current (soft) limit.  */
-    rlim64_t rlim_cur;
-    /* The hard limit.  */
-    rlim64_t rlim_max;
- };
-#endif
-
-/* Whose usage statistics do you want?  */
-enum __rusage_who
-/* The macro definitions are necessary because some programs want
-   to test for operating system features with #ifdef RUSAGE_SELF.
-   In ISO C the reflexive definition is a no-op.  */
-  {
-    /* The calling process.  */
-    RUSAGE_SELF = 0,
-#define RUSAGE_SELF     RUSAGE_SELF
-    /* All of its terminated child processes.  */
-    RUSAGE_CHILDREN = -1
-#define RUSAGE_CHILDREN RUSAGE_CHILDREN
-  };
-
-#define __need_timeval
-#include <bits/time.h>           /* For `struct timeval'.  */
-
-/* Structure which says how much of each resource has been used.  */
-struct rusage
-  {
-    /* Total amount of user time used.  */
-    struct timeval ru_utime;
-    /* Total amount of system time used.  */
-    struct timeval ru_stime;
-    /* Maximum resident set size (in kilobytes).  */
-    long int ru_maxrss;
-    /* Amount of sharing of text segment memory
-       with other processes (kilobyte-seconds).  */
-    long int ru_ixrss;
-    /* Amount of data segment memory used (kilobyte-seconds).  */
-    long int ru_idrss;
-    /* Amount of stack memory used (kilobyte-seconds).  */
-    long int ru_isrss;
-    /* Number of soft page faults (i.e. those serviced by reclaiming
-       a page from the list of pages awaiting reallocation.  */
-    long int ru_minflt;
-    /* Number of hard page faults (i.e. those that required I/O).  */
-    long int ru_majflt;
-    /* Number of times a process was swapped out of physical memory.  */
-    long int ru_nswap;
-    /* Number of input operations via the file system.  Note: This
-       and `ru_oublock' do not include operations with the cache.  */
-    long int ru_inblock;
-    /* Number of output operations via the file system.  */
-    long int ru_oublock;
-    /* Number of IPC messages sent.  */
-    long int ru_msgsnd;
-    /* Number of IPC messages received.  */
-    long int ru_msgrcv;
-    /* Number of signals delivered.  */
-    long int ru_nsignals;
-    /* Number of voluntary context switches, i.e. because the process
-       gave up the process before it had to (usually to wait for some
-       resource to be available).  */
-    long int ru_nvcsw;
-    /* Number of involuntary context switches, i.e. a higher priority process
-       became runnable or the current process used up its time slice.  */
-    long int ru_nivcsw;
-  };
-
-/* Priority limits.  */
-#define PRIO_MIN        -20     /* Minimum priority a process can have.  */
-#define PRIO_MAX        20      /* Maximum priority a process can have.  */
-
-/* The type of the WHICH argument to `getpriority' and `setpriority',
-   indicating what flavor of entity the WHO argument specifies.  */
-enum __priority_which
-  {
-    PRIO_PROCESS = 0,           /* WHO is a process ID.  */
-    PRIO_PGRP = 1,              /* WHO is a process group ID.  */
-    PRIO_USER = 2               /* WHO is a user ID.  */
-  };
diff --git a/bits/sched.h b/bits/sched.h
deleted file mode 100644
index 91b6dca0ca..0000000000
--- a/bits/sched.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/* Definitions of constants and data structure for POSIX 1003.1b-1993
-   scheduling interface.
-   Copyright (C) 1996, 1997, 2001, 2003 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef _SCHED_H
-# error "Never include <bits/sched.h> directly; use <sched.h> instead."
-#endif
-
-
-/* Scheduling algorithms.  */
-#define SCHED_OTHER	0
-#define SCHED_FIFO	1
-#define SCHED_RR	2
-
-/* Data structure to describe a process' schedulability.  */
-struct sched_param
-{
-  int __sched_priority;
-};
-
-
-#if defined _SCHED_H && !defined __cpu_set_t_defined
-# define __cpu_set_t_defined
-/* Size definition for CPU sets.  */
-# define __CPU_SETSIZE	1024
-# define __NCPUBITS	(8 * sizeof (__cpu_mask))
-
-/* Type for array elements in 'cpu_set'.  */
-typedef unsigned long int __cpu_mask;
-
-/* Basic access functions.  */
-# define __CPUELT(cpu)	((cpu) / __NCPUBITS)
-# define __CPUMASK(cpu)	((__cpu_mask) 1 << ((cpu) % __NCPUBITS))
-
-/* Data structure to describe CPU mask.  */
-typedef struct
-{
-  __cpu_mask __bits[__CPU_SETSIZE / __NCPUBITS];
-} cpu_set_t;
-
-/* Access functions for CPU masks.  */
-# define __CPU_ZERO(cpusetp) \
-  do {									      \
-    unsigned int __i;							      \
-    cpu_set *__arr = (cpusetp);						      \
-    for (__i = 0; __i < sizeof (cpu_set) / sizeof (__cpu_mask); ++__i)	      \
-      __arr->__bits[__i] = 0;						      \
-  } while (0)
-# define __CPU_SET(cpu, cpusetp) \
-  ((cpusetp)->__bits[__CPUELT (cpu)] |= __CPUMASK (cpu))
-# define __CPU_CLR(cpu, cpusetp) \
-  ((cpusetp)->__bits[__CPUELT (cpu)] &= ~__CPUMASK (cpu))
-# define __CPU_ISSET(cpu, cpusetp) \
-  (((cpusetp)->__bits[__CPUELT (cpu)] & __CPUMASK (cpu)) != 0)
-#endif
diff --git a/bits/select.h b/bits/select.h
deleted file mode 100644
index 47e7dedc30..0000000000
--- a/bits/select.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/* Copyright (C) 1997, 1998, 2001 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef _SYS_SELECT_H
-# error "Never use <bits/select.h> directly; include <sys/select.h> instead."
-#endif
-
-
-/* We don't use `memset' because this would require a prototype and
-   the array isn't too big.  */
-#define __FD_ZERO(s) \
-  do {									      \
-    unsigned int __i;							      \
-    fd_set *__arr = (s);						      \
-    for (__i = 0; __i < sizeof (fd_set) / sizeof (__fd_mask); ++__i)	      \
-      __FDS_BITS (__arr)[__i] = 0;					      \
-  } while (0)
-#define __FD_SET(d, s)     (__FDS_BITS (s)[__FDELT(d)] |= __FDMASK(d))
-#define __FD_CLR(d, s)     (__FDS_BITS (s)[__FDELT(d)] &= ~__FDMASK(d))
-#define __FD_ISSET(d, s)   ((__FDS_BITS (s)[__FDELT(d)] & __FDMASK(d)) != 0)
diff --git a/bits/sem.h b/bits/sem.h
deleted file mode 100644
index dcb1c3edf7..0000000000
--- a/bits/sem.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/* Copyright (C) 1995, 1996, 1997, 1998
-   Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef _SYS_SEM_H
-# error "Never include <bits/sem.h> directly; use <sys/sem.h> instead."
-#endif
-
-#include <sys/types.h>
-
-/* Flags for `semop'.  */
-#define SEM_UNDO	0x1000		/* undo the operation on exit */
-
-/* Commands for `semctl'.  */
-#define GETPID		11		/* get sempid */
-#define GETVAL		12		/* get semval */
-#define GETALL		13		/* get all semval's */
-#define GETNCNT		14		/* get semncnt */
-#define GETZCNT		15		/* get semzcnt */
-#define SETVAL		16		/* set semval */
-#define SETALL		17		/* set all semval's */
-
-
-/* Data structure describing a set of semaphores.  */
-struct semid_ds
-{
-  struct ipc_perm sem_perm;		/* operation permission struct */
-  __time_t sem_otime;			/* last semop() time */
-  __time_t sem_ctime;			/* last time changed by semctl() */
-  unsigned short int sem_nsems;		/* number of semaphores in set */
-};
-
-/* The user should define a union like the following to use it for arguments
-   for `semctl'.
-
-   union semun
-   {
-     int val;				<= value for SETVAL
-     struct semid_ds *buf;		<= buffer for IPC_STAT & IPC_SET
-     unsigned short int *array;		<= array for GETALL & SETALL
-     struct seminfo *__buf;		<= buffer for IPC_INFO
-   };
-
-   Previous versions of this file used to define this union but this is
-   incorrect.  One can test the macro _SEM_SEMUN_UNDEFINED to see whether
-   one must define the union or not.  */
-#define _SEM_SEMUN_UNDEFINED	1
diff --git a/bits/setjmp.h b/bits/setjmp.h
deleted file mode 100644
index 9150d8d764..0000000000
--- a/bits/setjmp.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/* Define the machine-dependent type `jmp_buf'.  Stub version.  */
-
-#ifndef _SETJMP_H
-# error "Never include <bits/setjmp.h> directly; use <setjmp.h> instead."
-#endif
-
-typedef int __jmp_buf[1];
diff --git a/bits/shm.h b/bits/shm.h
deleted file mode 100644
index 746a863486..0000000000
--- a/bits/shm.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/* Copyright (C) 1995, 1996, 1997, 2000, 2002, 2004
-   Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef _SYS_SHM_H
-# error "Never include <bits/shm.h> directly; use <sys/shm.h> instead."
-#endif
-
-#include <bits/types.h>
-
-/* Flags for `shmat'.  */
-#define SHM_RDONLY	010000		/* attach read-only else read-write */
-#define SHM_RND		020000		/* round attach address to SHMLBA */
-#define SHM_REMAP	040000		/* take-over region on attach */
-
-/* Commands for `shmctl'.  */
-#define SHM_LOCK	11		/* lock segment (root only) */
-#define SHM_UNLOCK	12		/* unlock segment (root only) */
-
-__BEGIN_DECLS
-
-/* Segment low boundary address multiple.  */
-#define SHMLBA		(__getpagesize ())
-extern int __getpagesize (void) __THROW __attribute__ ((__const__));
-
-
-/* Type to count number of attaches.  */
-typedef unsigned short int shmatt_t;
-
-/* Data structure describing a set of semaphores.  */
-struct shmid_ds
-  {
-    struct ipc_perm shm_perm;		/* operation permission struct */
-    int shm_segsz;			/* size of segment in bytes */
-    __time_t shm_atime;			/* time of last shmat() */
-    __time_t shm_dtime;			/* time of last shmdt() */
-    __time_t shm_ctime;			/* time of last change by shmctl() */
-    __pid_t shm_cpid;			/* pid of creator */
-    __pid_t shm_lpid;			/* pid of last shmop */
-    shmatt_t shm_nattch;		/* number of current attaches */
-  };
-
-__END_DECLS
diff --git a/bits/sigaction.h b/bits/sigaction.h
deleted file mode 100644
index adcc276cc8..0000000000
--- a/bits/sigaction.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/* Copyright (C) 1991,92,96,97,98,2001 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef _SIGNAL_H
-# error "Never include <bits/sigaction.h> directly; use <signal.h> instead."
-#endif
-
-/* These definitions match those used by the 4.4 BSD kernel.
-   If the operating system has a `sigaction' system call that correctly
-   implements the POSIX.1 behavior, there should be a system-dependent
-   version of this file that defines `struct sigaction' and the `SA_*'
-   constants appropriately.  */
-
-/* Structure describing the action to be taken when a signal arrives.  */
-struct sigaction
-  {
-    /* Signal handler.  */
-#ifdef __USE_POSIX199309
-    union
-      {
-	/* Used if SA_SIGINFO is not set.  */
-	__sighandler_t sa_handler;
-	/* Used if SA_SIGINFO is set.  */
-	void (*sa_sigaction) (int, siginfo_t *, void *);
-      }
-    __sigaction_handler;
-# define sa_handler	__sigaction_handler.sa_handler
-# define sa_sigaction	__sigaction_handler.sa_sigaction
-#else
-    __sighandler_t sa_handler;
-#endif
-
-    /* Additional set of signals to be blocked.  */
-    __sigset_t sa_mask;
-
-    /* Special flags.  */
-    int sa_flags;
-  };
-
-/* Bits in `sa_flags'.  */
-#if defined __USE_UNIX98 || defined __USE_MISC
-# define SA_ONSTACK	0x0001	/* Take signal on signal stack.  */
-# define SA_RESTART	0x0002	/* Restart syscall on signal return.  */
-# define SA_NODEFER	0x0010	/* Don't automatically block the signal when
-				    its handler is being executed.  */
-# define SA_RESETHAND	0x0004	/* Reset to SIG_DFL on entry to handler.  */
-#endif
-#define	SA_NOCLDSTOP	0x0008	/* Don't send SIGCHLD when children stop.  */
-
-#ifdef __USE_MISC
-# define SA_INTERRUPT	0	/* Historical no-op ("not SA_RESTART").  */
-
-/* Some aliases for the SA_ constants.  */
-# define SA_NOMASK    SA_NODEFER
-# define SA_ONESHOT   SA_RESETHAND
-# define SA_STACK     SA_ONSTACK
-#endif
-
-
-/* Values for the HOW argument to `sigprocmask'.  */
-#define	SIG_BLOCK	1	/* Block signals.  */
-#define	SIG_UNBLOCK	2	/* Unblock signals.  */
-#define	SIG_SETMASK	3	/* Set the set of blocked signals.  */
diff --git a/bits/sigcontext.h b/bits/sigcontext.h
deleted file mode 100644
index 7e90e78c4a..0000000000
--- a/bits/sigcontext.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/* Structure describing state saved while handling a signal.  Stub version.
-   Copyright (C) 1991, 1994, 1997 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef _SIGNAL_H
-# error "Never use <bits/sigcontext.h> directly; include <signal.h> instead."
-#endif
-
-/* State of this thread when the signal was taken.  */
-struct sigcontext
-  {
-    int sc_onstack;
-    __sigset_t sc_mask;
-
-    /* Registers and such.  */
-  };
-
-/* Signal subcodes should be defined here.  */
diff --git a/bits/siginfo.h b/bits/siginfo.h
deleted file mode 100644
index fe7b3b5f9c..0000000000
--- a/bits/siginfo.h
+++ /dev/null
@@ -1,211 +0,0 @@
-/* siginfo_t, sigevent and constants.  Stub version.
-   Copyright (C) 1997, 1998, 2000, 2001 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#if !defined _SIGNAL_H && !defined __need_siginfo_t \
-    && !defined __need_sigevent_t
-# error "Never include this file directly.  Use <signal.h> instead"
-#endif
-
-#if (!defined __have_sigval_t \
-     && (defined _SIGNAL_H || defined __need_siginfo_t \
-	 || defined __need_sigevent_t))
-# define __have_sigval_t 1
-
-/* Type for data associated with a signal.  */
-typedef union sigval
-  {
-    int sival_int;
-    void *sival_ptr;
-  } sigval_t;
-#endif
-
-#if (!defined __have_siginfo_t \
-     && (defined _SIGNAL_H || defined __need_siginfo_t))
-# define __have_siginfo_t	1
-
-typedef struct siginfo
-  {
-    int si_signo;		/* Signal number.  */
-    int si_errno;		/* If non-zero, an errno value associated with
-				   this signal, as defined in <errno.h>.  */
-    int si_code;		/* Signal code.  */
-    __pid_t si_pid;		/* Sending process ID.  */
-    __uid_t si_uid;		/* Real user ID of sending process.  */
-    void *si_addr;		/* Address of faulting instruction.  */
-    int si_status;		/* Exit value or signal.  */
-    long int si_band;		/* Band event for SIGPOLL.  */
-    union sigval si_value;	/* Signal value.  */
-  } siginfo_t;
-
-
-/* Values for `si_code'.  Positive values are reserved for kernel-generated
-   signals.  */
-enum
-{
-  SI_ASYNCIO = -4,		/* Sent by AIO completion.  */
-# define SI_ASYNCIO	SI_ASYNCIO
-  SI_MESGQ,			/* Sent by real time mesq state change.  */
-# define SI_MESGQ	SI_MESGQ
-  SI_TIMER,			/* Sent by timer expiration.  */
-# define SI_TIMER	SI_TIMER
-  SI_QUEUE,			/* Sent by sigqueue.  */
-# define SI_QUEUE	SI_QUEUE
-  SI_USER			/* Sent by kill, sigsend, raise.  */
-# define SI_USER	SI_USER
-};
-
-
-/* `si_code' values for SIGILL signal.  */
-enum
-{
-  ILL_ILLOPC = 1,		/* Illegal opcode.  */
-# define ILL_ILLOPC	ILL_ILLOPC
-  ILL_ILL_OPN,			/* Illegal operand.  */
-# define ILL_ILLOPN	ILL_ILLOPN
-  ILL_ILLADR,			/* Illegal addressing mode.  */
-# define ILL_ILLADR	ILL_ILLADR
-  ILL_ILLTRP,			/* Illegal trap. */
-# define ILL_ILLTRP	ILL_ILLTRP
-  ILL_PRVOPC,			/* Privileged opcode.  */
-# define ILL_PRVOPC	ILL_PRVOPC
-  ILL_PRVREG,			/* Privileged register.  */
-# define ILL_PRVREG	ILL_PRVREG
-  ILL_COPROC,			/* Coprocessor error.  */
-# define ILL_COPROC	ILL_COPROC
-  ILL_BADSTK			/* Internal stack error.  */
-# define ILL_BADSTK	ILL_BADSTK
-};
-
-/* `si_code' values for SIGFPE signal.  */
-enum
-{
-  FPE_INTDIV = 1,		/* Integer divide by zero.  */
-# define FPE_INTDIV	FPE_INTDIV
-  FPE_INTOVF,			/* Integer overflow.  */
-# define FPE_INTOVF	FPE_INTOVF
-  FPE_FLTDIV,			/* Floating point divide by zero.  */
-# define FPE_FLTDIV	FPE_FLTDIV
-  FPE_FLTOVF,			/* Floating point overflow.  */
-# define FPE_FLTOVF	FPE_FLTOVF
-  FPE_FLTUND,			/* Floating point underflow.  */
-# define FPE_FLTUND	FPE_FLTUND
-  FPE_FLTRES,			/* Floating point inexact result.  */
-# define FPE_FLTRES	FPE_FLTRES
-  FPE_FLTINV,			/* Floating point invalid operation.  */
-# define FPE_FLTINV	FPE_FLTINV
-  FPE_FLTSUB			/* Subscript out of range.  */
-# define FPE_FLTSUB	FPE_FLTSUB
-};
-
-/* `si_code' values for SIGSEGV signal.  */
-enum
-{
-  SEGV_MAPERR = 1,		/* Address not mapped to object.  */
-# define SEGV_MAPERR	SEGV_MAPERR
-  SEGV_ACCERR			/* Invalid permissions for mapped object.  */
-# define SEGV_ACCERR	SEGV_ACCERR
-};
-
-/* `si_code' values for SIGBUS signal.  */
-enum
-{
-  BUS_ADRALN = 1,		/* Invalid address alignment.  */
-# define BUS_ADRALN	BUS_ADRALN
-  BUS_ADRERR,			/* Non-existant physical address.  */
-# define BUS_ADRERR	BUS_ADRERR
-  BUS_OBJERR			/* Object specific hardware error.  */
-# define BUS_OBJERR	BUS_OBJERR
-};
-
-/* `si_code' values for SIGTRAP signal.  */
-enum
-{
-  TRAP_BRKPT = 1,		/* Process breakpoint.  */
-# define TRAP_BRKPT	TRAP_BRKPT
-  TRAP_TRACE			/* Process trace trap.  */
-# define TRAP_TRACE	TRAP_TRACE
-};
-
-/* `si_code' values for SIGCHLD signal.  */
-enum
-{
-  CLD_EXITED = 1,		/* Child has exited.  */
-# define CLD_EXITED	CLD_EXITED
-  CLD_KILLED,			/* Child was killed.  */
-# define CLD_KILLED	CLD_KILLED
-  CLD_DUMPED,			/* Child terminated abnormally.  */
-# define CLD_DUMPED	CLD_DUMPED
-  CLD_TRAPPED,			/* Traced child has trapped.  */
-# define CLD_TRAPPED	CLD_TRAPPED
-  CLD_STOPPED,			/* Child has stopped.  */
-# define CLD_STOPPED	CLD_STOPPED
-  CLD_CONTINUED			/* Stopped child has continued.  */
-# define CLD_CONTINUED	CLD_CONTINUED
-};
-
-/* `si_code' values for SIGPOLL signal.  */
-enum
-{
-  POLL_IN = 1,			/* Data input available.  */
-# define POLL_IN	POLL_IN
-  POLL_OUT,			/* Output buffers available.  */
-# define POLL_OUT	POLL_OUT
-  POLL_MSG,			/* Input message available.   */
-# define POLL_MSG	POLL_MSG
-  POLL_ERR,			/* I/O error.  */
-# define POLL_ERR	POLL_ERR
-  POLL_PRI,			/* High priority input available.  */
-# define POLL_PRI	POLL_PRI
-  POLL_HUP			/* Device disconnected.  */
-# define POLL_HUP	POLL_HUP
-};
-
-# undef __need_siginfo_t
-#endif	/* !have siginfo_t && (have _SIGNAL_H || need siginfo_t).  */
-
-
-#if (defined _SIGNAL_H || defined __need_sigevent_t) \
-    && !defined __have_sigevent_t
-# define __have_sigevent_t	1
-
-/* Structure to transport application-defined values with signals.  */
-# define SIGEV_MAX_SIZE	64
-# define SIGEV_PAD_SIZE	((SIGEV_MAX_SIZE / sizeof (int)) - 3)
-
-typedef struct sigevent
-  {
-    sigval_t sigev_value;
-    int sigev_signo;
-    int sigev_notify;
-    void (*sigev_notify_function) (sigval_t);	    /* Function to start.  */
-    void *sigev_notify_attributes;		    /* Really pthread_attr_t.*/
-  } sigevent_t;
-
-/* `sigev_notify' values.  */
-enum
-{
-  SIGEV_SIGNAL = 0,		/* Notify via signal.  */
-# define SIGEV_SIGNAL	SIGEV_SIGNAL
-  SIGEV_NONE,			/* Other notification: meaningless.  */
-# define SIGEV_NONE	SIGEV_NONE
-  SIGEV_THREAD			/* Deliver via thread creation.  */
-# define SIGEV_THREAD	SIGEV_THREAD
-};
-
-#endif	/* have _SIGNAL_H.  */
diff --git a/bits/signum.h b/bits/signum.h
deleted file mode 100644
index 25331f943c..0000000000
--- a/bits/signum.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/* Copyright (C) 1991, 1993, 1996, 1998 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifdef	_SIGNAL_H
-
-/* Fake signal functions.  */
-
-#define	SIG_ERR	 ((__sighandler_t) -1)	/* Error return.  */
-#define	SIG_DFL	 ((__sighandler_t)  0)	/* Default action.  */
-#define	SIG_IGN	 ((__sighandler_t)  1)	/* Ignore signal.  */
-
-#ifdef __USE_UNIX98
-# define SIG_HOLD ((__sighandler_t)  2)	/* Add signal to hold mask.  */
-#endif
-
-/* Signals in the 1-15 range are defined with their historical numbers.
-   Signals in the 20-25 range are relatively new and have no ingrained
-   numbers. */
-
-/* ANSI signals.  */
-#define	SIGINT	2	/* Interactive attention signal.  */
-#define	SIGILL	4	/* Illegal instruction.  */
-#define	SIGABRT	6	/* Abnormal termination.  */
-#define	SIGFPE	8	/* Erroneous arithmetic operation.  */
-#define	SIGSEGV	11	/* Invalid access to storage.  */
-#define	SIGTERM	15	/* Termination request.  */
-
-/* Historical signals specified by POSIX. */
-#define	SIGHUP	1	/* Hangup.  */
-#define	SIGQUIT	3	/* Quit.  */
-#define	SIGKILL	9	/* Kill (cannot be blocked, caught, or ignored).  */
-#define	SIGPIPE	13	/* Broken pipe.  */
-#define	SIGALRM	14	/* Alarm clock.  */
-
-/* New(er) POSIX signals. */
-#define	SIGSTOP	20	/* Stop (cannot be blocked, caught, or ignored).  */
-#define	SIGCONT	21	/* Continue.  */
-#define	SIGTSTP	22	/* Keyboard stop.  */
-#define	SIGTTIN	23	/* Background read from control terminal.  */
-#define	SIGTTOU	24	/* Background write to control terminal.  */
-#define	SIGCHLD	25	/* Child terminated or stopped.  */
-
-#define	_NSIG	26
-
-/* Archaic names for compatibility. */
-#define	SIGIOT  SIGABRT	/* IOT instruction, abort() on a PDP11 */
-#define	SIGCLD  SIGCHLD	/* Old System V name */
-
-#endif	/* <signal.h> included.  */
diff --git a/bits/sigset.h b/bits/sigset.h
deleted file mode 100644
index 5fc8692df0..0000000000
--- a/bits/sigset.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/* __sig_atomic_t, __sigset_t, and related definitions.  Generic/BSD version.
-   Copyright (C) 1991, 1992, 1994, 1996, 1997 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef	_SIGSET_H_types
-#define	_SIGSET_H_types	1
-
-typedef int __sig_atomic_t;
-
-/* A `sigset_t' has a bit for each signal.  */
-typedef unsigned long int __sigset_t;
-
-#endif
-
-
-/* We only want to define these functions if <signal.h> was actually
-   included; otherwise we were included just to define the types.  Since we
-   are namespace-clean, it wouldn't hurt to define extra macros.  But
-   trouble can be caused by functions being defined (e.g., any global
-   register vars declared later will cause compilation errors).  */
-
-#if !defined _SIGSET_H_fns && defined _SIGNAL_H
-#define _SIGSET_H_fns 1
-
-#ifndef _EXTERN_INLINE
-# define _EXTERN_INLINE extern __inline
-#endif
-
-/* Return a mask that includes SIG only.  The cast to `sigset_t' avoids
-   overflow if `sigset_t' is wider than `int'.  */
-#define	__sigmask(sig)	(((__sigset_t) 1) << ((sig) - 1))
-
-#define	__sigemptyset(set)	((*(set) = (__sigset_t) 0), 0)
-#define	__sigfillset(set)	((*(set) = ~(__sigset_t) 0), 0)
-
-#ifdef _GNU_SOURCE
-# define __sigisemptyset(set)	(*(set) == (__sigset_t) 0)
-# define __sigandset(dest, left, right) \
-				((*(dest) = (*(left) & *(right))), 0)
-# define __sigorset(dest, left, right) \
-				((*(dest) = (*(left) | *(right))), 0)
-#endif
-
-/* These functions needn't check for a bogus signal number -- error
-   checking is done in the non __ versions.  */
-
-extern int __sigismember (__const __sigset_t *, int);
-extern int __sigaddset (__sigset_t *, int);
-extern int __sigdelset (__sigset_t *, int);
-
-#ifdef __USE_EXTERN_INLINES
-# define __SIGSETFN(NAME, BODY, CONST)					      \
-  _EXTERN_INLINE int							      \
-  NAME (CONST __sigset_t *__set, int __sig)				      \
-  {									      \
-    __sigset_t __mask = __sigmask (__sig);				      \
-    return BODY;							      \
-  }
-
-__SIGSETFN (__sigismember, (*__set & __mask) ? 1 : 0, __const)
-__SIGSETFN (__sigaddset, ((*__set |= __mask), 0), )
-__SIGSETFN (__sigdelset, ((*__set &= ~__mask), 0), )
-
-# undef __SIGSETFN
-#endif
-
-
-#endif /* ! _SIGSET_H_fns.  */
diff --git a/bits/sigstack.h b/bits/sigstack.h
deleted file mode 100644
index de79c90afa..0000000000
--- a/bits/sigstack.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/* sigstack, sigaltstack definitions.
-   Copyright (C) 1998, 1999 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef _SIGNAL_H
-# error "Never include this file directly.  Use <signal.h> instead"
-#endif
-
-
-/* Structure describing a signal stack (obsolete).  */
-struct sigstack
-  {
-    __ptr_t ss_sp;		/* Signal stack pointer.  */
-    int ss_onstack;		/* Nonzero if executing on this stack.  */
-  };
-
-
-/* Alternate, preferred interface.  */
-typedef struct sigaltstack
-  {
-    __ptr_t ss_sp;
-    size_t ss_size;
-    int ss_flags;
-  } stack_t;
-
-
-/* Possible values for `ss_flags.'.  */
-enum
-{
-  SS_ONSTACK = 0x0001,
-#define SS_ONSTACK	SS_ONSTACK
-  SS_DISABLE = 0x0004
-#define SS_DISABLE	SS_DISABLE
-};
-
-/* Minumum stack size for a signal handler.  */
-#define MINSIGSTKSZ	8192
-
-/* System default stack size.  */
-#define SIGSTKSZ	(MINSIGSTKSZ + 32768)
diff --git a/bits/sigthread.h b/bits/sigthread.h
deleted file mode 100644
index 2edb58cd51..0000000000
--- a/bits/sigthread.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/* Signal handling function for threaded programs.  Generic version.
-   Copyright (C) 2000 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef _BITS_SIGTHREAD_H
-#define _BITS_SIGTHREAD_H	1
-
-#if !defined _SIGNAL_H && !defined _PTHREAD_H
-# error "Never include this file directly.  Use <pthread.h> instead"
-#endif
-
-/* Modify the signal mask for the calling thread.  The arguments have the
-   same meaning as for sigprocmask; in fact, this and sigprocmask might be
-   the same function.  We declare this the same on all platforms, since it
-   doesn't use any thread-related types.  */
-extern int pthread_sigmask (int __how, __const __sigset_t *__newmask,
-			    __sigset_t *__oldmask) __THROW;
-
-
-#endif	/* bits/sigthread.h */
diff --git a/bits/sockaddr.h b/bits/sockaddr.h
deleted file mode 100644
index 3e1d1312d8..0000000000
--- a/bits/sockaddr.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/* Definition of `struct sockaddr_*' common members.  Generic/4.2 BSD version.
-   Copyright (C) 1995,1996,1997,1998,2000,2001 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-/*
- * Never include this file directly; use <sys/socket.h> instead.
- */
-
-#ifndef _BITS_SOCKADDR_H
-#define _BITS_SOCKADDR_H	1
-
-
-/* POSIX.1g specifies this type name for the `sa_family' member.  */
-typedef unsigned short int sa_family_t;
-
-/* This macro is used to declare the initial common members
-   of the data types used for socket addresses, `struct sockaddr',
-   `struct sockaddr_in', `struct sockaddr_un', etc.  */
-
-#define	__SOCKADDR_COMMON(sa_prefix) \
-  sa_family_t sa_prefix##family
-
-#define __SOCKADDR_COMMON_SIZE	(sizeof (unsigned short int))
-
-#endif	/* bits/sockaddr.h */
diff --git a/bits/socket.h b/bits/socket.h
deleted file mode 100644
index 20e6f0337f..0000000000
--- a/bits/socket.h
+++ /dev/null
@@ -1,251 +0,0 @@
-/* System-specific socket constants and types.  Generic/4.3 BSD version.
-   Copyright (C) 1991,92,1994-1999,2000,2001 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef __BITS_SOCKET_H
-#define __BITS_SOCKET_H	1
-
-#if !defined _SYS_SOCKET_H && !defined _NETINET_IN_H
-# error "Never include <bits/socket.h> directly; use <sys/socket.h> instead."
-#endif
-
-#include <limits.h>
-#include <bits/types.h>
-
-#define	__need_size_t
-#include <stddef.h>
-
-/* Type for length arguments in socket calls.  */
-#ifndef __socklen_t_defined
-typedef __socklen_t socklen_t;
-# define __socklen_t_defined
-#endif
-
-
-/* Types of sockets.  */
-enum __socket_type
-{
-  SOCK_STREAM = 1,		/* Sequenced, reliable, connection-based
-				   byte streams.  */
-#define SOCK_STREAM SOCK_STREAM
-  SOCK_DGRAM = 2,		/* Connectionless, unreliable datagrams
-				   of fixed maximum length.  */
-#define SOCK_DGRAM SOCK_DGRAM
-  SOCK_RAW = 3,			/* Raw protocol interface.  */
-#define SOCK_RAW SOCK_RAW
-  SOCK_RDM = 4,			/* Reliably-delivered messages.  */
-#define SOCK_RDM SOCK_RDM
-  SOCK_SEQPACKET = 5		/* Sequenced, reliable, connection-based,
-				   datagrams of fixed maximum length.  */
-#define SOCK_SEQPACKET SOCK_SEQPACKET
-};
-
-/* Protocol families.  */
-#define	PF_UNSPEC	0	/* Unspecified.  */
-#define	PF_LOCAL	1	/* Local to host (pipes and file-domain).  */
-#define	PF_UNIX		PF_LOCAL /* Old BSD name for PF_LOCAL.  */
-#define	PF_FILE		PF_LOCAL /* POSIX name for PF_LOCAL.  */
-#define	PF_INET		2	/* IP protocol family.  */
-#define	PF_IMPLINK	3	/* ARPAnet IMP protocol.  */
-#define	PF_PUP		4	/* PUP protocols.  */
-#define	PF_CHAOS	5	/* MIT Chaos protocols.  */
-#define	PF_NS		6	/* Xerox NS protocols.  */
-#define	PF_ISO		7	/* ISO protocols.  */
-#define	PF_OSI		PF_ISO
-#define	PF_ECMA		8	/* ECMA protocols.  */
-#define	PF_DATAKIT	9	/* AT&T Datakit protocols.  */
-#define	PF_CCITT	10	/* CCITT protocols (X.25 et al).  */
-#define	PF_SNA		11	/* IBM SNA protocol.  */
-#define	PF_DECnet	12	/* DECnet protocols.  */
-#define	PF_DLI		13	/* Direct data link interface.  */
-#define	PF_LAT		14	/* DEC Local Area Transport protocol.  */
-#define	PF_HYLINK	15	/* NSC Hyperchannel protocol.  */
-#define	PF_APPLETALK	16	/* Don't use this.  */
-#define	PF_ROUTE	17	/* Internal Routing Protocol.  */
-#define	PF_LINK		18	/* Link layer interface.  */
-#define	PF_XTP		19	/* eXpress Transfer Protocol (no AF).  */
-#define	PF_COIP		20	/* Connection-oriented IP, aka ST II.  */
-#define	PF_CNT		21	/* Computer Network Technology.  */
-#define PF_RTIP		22	/* Help Identify RTIP packets.  **/
-#define	PF_IPX		23	/* Novell Internet Protocol.  */
-#define	PF_SIP		24	/* Simple Internet Protocol.  */
-#define PF_PIP		25	/* Help Identify PIP packets.  */
-#define PF_INET6	26	/* IP version 6.  */
-#define	PF_MAX		27
-
-/* Address families.  */
-#define	AF_UNSPEC	PF_UNSPEC
-#define	AF_LOCAL	PF_LOCAL
-#define	AF_UNIX		PF_UNIX
-#define	AF_FILE		PF_FILE
-#define	AF_INET		PF_INET
-#define	AF_IMPLINK	PF_IMPLINK
-#define	AF_PUP		PF_PUP
-#define	AF_CHAOS	PF_CHAOS
-#define	AF_NS		PF_NS
-#define	AF_ISO		PF_ISO
-#define	AF_OSI		PF_OSI
-#define	AF_ECMA		PF_ECMA
-#define	AF_DATAKIT	PF_DATAKIT
-#define	AF_CCITT	PF_CCITT
-#define	AF_SNA		PF_SNA
-#define	AF_DECnet	PF_DECnet
-#define	AF_DLI		PF_DLI
-#define	AF_LAT		PF_LAT
-#define	AF_HYLINK	PF_HYLINK
-#define	AF_APPLETALK	PF_APPLETALK
-#define	AF_ROUTE	PF_ROUTE
-#define	AF_LINK		PF_LINK
-#define	pseudo_AF_XTP	PF_XTP
-#define	AF_COIP		PF_COIP
-#define	AF_CNT		PF_CNT
-#define pseudo_AF_RTIP	PF_RTIP
-#define	AF_IPX		PF_IPX
-#define	AF_SIP		PF_SIP
-#define pseudo_AF_PIP	PF_PIP
-#define AF_INET6	PF_INET6
-#define	AF_MAX		PF_MAX
-
-
-/* Get the definition of the macro to define the common sockaddr members.  */
-#include <bits/sockaddr.h>
-
-/* Structure describing a generic socket address.  */
-struct sockaddr
-  {
-    __SOCKADDR_COMMON (sa_);	/* Common data: address family and length.  */
-    char sa_data[14];		/* Address data.  */
-  };
-
-
-/* Structure large enough to hold any socket address (with the historical
-   exception of AF_UNIX).  We reserve 128 bytes.  */
-#if ULONG_MAX > 0xffffffff
-# define __ss_aligntype	__uint64_t
-#else
-# define __ss_aligntype	__uint32_t
-#endif
-#define _SS_SIZE	128
-#define _SS_PADSIZE	(_SS_SIZE - (2 * sizeof (__ss_aligntype)))
-
-struct sockaddr_storage
-  {
-    __SOCKADDR_COMMON (ss_);	/* Address family, etc.  */
-    __ss_aligntype __ss_align;	/* Force desired alignment.  */
-    char __ss_padding[_SS_PADSIZE];
-  };
-
-
-/* Bits in the FLAGS argument to `send', `recv', et al.  */
-enum
-  {
-    MSG_OOB		= 0x01,	/* Process out-of-band data.  */
-#define MSG_OOB MSG_OOB
-    MSG_PEEK		= 0x02,	/* Peek at incoming messages.  */
-#define MSG_PEEK MSG_PEEK
-    MSG_DONTROUTE	= 0x04,	/* Don't use local routing.  */
-#define MSG_DONTROUTE MSG_DONTROUTE
-    MSG_EOR		= 0x08,	/* Data completes record.  */
-#define MSG_EOR MSG_EOR
-    MSG_TRUNC		= 0x10,	/* Data discarded before delivery.  */
-#define MSG_TRUNC MSG_TRUNC
-    MSG_CTRUNC		= 0x20,	/* Control data lost before delivery.  */
-#define MSG_CTRUNC MSG_CTRUNC
-    MSG_WAITALL		= 0x40,	/* Wait for full request or error.  */
-#define MSG_WAITALL MSG_WAITALL
-    MSG_DONTWAIT	= 0x80	/* This message should be nonblocking.  */
-#define MSG_DONTWAIT MSG_DONTWAIT
-  };
-
-
-/* Structure describing messages sent by
-   `sendmsg' and received by `recvmsg'.  */
-struct msghdr
-  {
-    __ptr_t msg_name;		/* Address to send to/receive from.  */
-    socklen_t msg_namelen;	/* Length of address data.  */
-
-    struct iovec *msg_iov;	/* Vector of data to send/receive into.  */
-    int msg_iovlen;		/* Number of elements in the vector.  */
-
-    __ptr_t msg_accrights;	/* Access rights information.  */
-    socklen_t msg_accrightslen;	/* Length of access rights information.  */
-
-    int msg_flags;		/* Flags in received message.  */
-  };
-
-
-/* Protocol number used to manipulate socket-level options
-   with `getsockopt' and `setsockopt'.  */
-#define	SOL_SOCKET	0xffff
-
-/* Socket-level options for `getsockopt' and `setsockopt'.  */
-enum
-  {
-    SO_DEBUG = 0x0001,		/* Record debugging information.  */
-#define SO_DEBUG SO_DEBUG
-    SO_ACCEPTCONN = 0x0002,	/* Accept connections on socket.  */
-#define SO_ACCEPTCONN SO_ACCEPTCONN
-    SO_REUSEADDR = 0x0004,	/* Allow reuse of local addresses.  */
-#define SO_REUSEADDR SO_REUSEADDR
-    SO_KEEPALIVE = 0x0008,	/* Keep connections alive and send
-				   SIGPIPE when they die.  */
-#define SO_KEEPALIVE SO_KEEPALIVE
-    SO_DONTROUTE = 0x0010,	/* Don't do local routing.  */
-#define SO_DONTROUTE SO_DONTROUTE
-    SO_BROADCAST = 0x0020,	/* Allow transmission of
-				   broadcast messages.  */
-#define SO_BROADCAST SO_BROADCAST
-    SO_USELOOPBACK = 0x0040,	/* Use the software loopback to avoid
-				   hardware use when possible.  */
-#define SO_USELOOPBACK SO_USELOOPBACK
-    SO_LINGER = 0x0080,		/* Block on close of a reliable
-				   socket to transmit pending data.  */
-#define SO_LINGER SO_LINGER
-    SO_OOBINLINE = 0x0100,	/* Receive out-of-band data in-band.  */
-#define SO_OOBINLINE SO_OOBINLINE
-    SO_REUSEPORT = 0x0200,	/* Allow local address and port reuse.  */
-#define SO_REUSEPORT SO_REUSEPORT
-    SO_SNDBUF = 0x1001,		/* Send buffer size.  */
-#define SO_SNDBUF SO_SNDBUF
-    SO_RCVBUF = 0x1002,		/* Receive buffer.  */
-#define SO_RCVBUF SO_RCVBUF
-    SO_SNDLOWAT = 0x1003,	/* Send low-water mark.  */
-#define SO_SNDLOWAT SO_SNDLOWAT
-    SO_RCVLOWAT = 0x1004,	/* Receive low-water mark.  */
-#define SO_RCVLOWAT SO_RCVLOWAT
-    SO_SNDTIMEO = 0x1005,	/* Send timeout.  */
-#define SO_SNDTIMEO SO_SNDTIMEO
-    SO_RCVTIMEO = 0x1006,	/* Receive timeout.  */
-#define SO_RCVTIMEO SO_RCVTIMEO
-    SO_ERROR = 0x1007,		/* Get and clear error status.  */
-#define SO_ERROR SO_ERROR
-    SO_STYLE = 0x1008,		/* Get socket connection style.  */
-#define SO_STYLE SO_STYLE
-    SO_TYPE = SO_STYLE		/* Compatible name for SO_STYLE.  */
-#define SO_TYPE SO_TYPE
-  };
-
-/* Structure used to manipulate the SO_LINGER option.  */
-struct linger
-  {
-    int l_onoff;		/* Nonzero to linger on close.  */
-    int l_linger;		/* Time to linger.  */
-  };
-
-#endif	/* bits/socket.h */
diff --git a/bits/stat.h b/bits/stat.h
deleted file mode 100644
index c8391fc87f..0000000000
--- a/bits/stat.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/* Copyright (C) 1992, 1996, 1997, 2000 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef _SYS_STAT_H
-# error "Never include <bits/stat.h> directly; use <sys/stat.h> instead."
-#endif
-
-/* This structure needs to be defined in accordance with the
-   implementation of __stat, __fstat, and __lstat.  */
-
-#include <bits/types.h>
-
-/* Structure describing file characteristics.  */
-struct stat
-  {
-    /* These are the members that POSIX.1 requires.  */
-
-    __mode_t st_mode;		/* File mode.  */
-#ifndef __USE_FILE_OFFSET64
-    __ino_t st_ino;		/* File serial number.  */
-#else
-    __ino64_t st_ino;		/* File serial number.	*/
-#endif
-    __dev_t st_dev;		/* Device containing the file.  */
-    __nlink_t st_nlink;		/* Link count.  */
-
-    __uid_t st_uid;		/* User ID of the file's owner.  */
-    __gid_t st_gid;		/* Group ID of the file's group.  */
-#ifndef __USE_FILE_OFFSET64
-    __off_t st_size;		/* Size of file, in bytes.  */
-#else
-    __off64_t st_size;		/* Size of file, in bytes.  */
-#endif
-
-    __time_t st_atime;		/* Time of last access.  */
-    __time_t st_mtime;		/* Time of last modification.  */
-    __time_t st_ctime;		/* Time of last status change.  */
-
-    /* This should be defined if there is a `st_blksize' member.  */
-#undef	_STATBUF_ST_BLKSIZE
-  };
-
-/* Encoding of the file mode.  These are the standard Unix values,
-   but POSIX.1 does not specify what values should be used.  */
-
-#define	__S_IFMT	0170000	/* These bits determine file type.  */
-
-/* File types.  */
-#define	__S_IFDIR	0040000	/* Directory.  */
-#define	__S_IFCHR	0020000	/* Character device.  */
-#define	__S_IFBLK	0060000	/* Block device.  */
-#define	__S_IFREG	0100000	/* Regular file.  */
-#define	__S_IFIFO	0010000	/* FIFO.  */
-
-/* POSIX.1b objects.  */
-#define __S_TYPEISMQ(buf) 0
-#define __S_TYPEISSEM(buf) 0
-#define __S_TYPEISSHM(buf) 0
-
-/* Protection bits.  */
-
-#define	__S_ISUID	04000	/* Set user ID on execution.  */
-#define	__S_ISGID	02000	/* Set group ID on execution.  */
-#define	__S_IREAD	0400	/* Read by owner.  */
-#define	__S_IWRITE	0200	/* Write by owner.  */
-#define	__S_IEXEC	0100	/* Execute by owner.  */
-
-#ifdef __USE_LARGEFILE64
-struct stat64
-  {
-    __mode_t st_mode;		/* File mode.  */
-    __ino64_t st_ino;		/* File serial number.	*/
-    __dev_t st_dev;		/* Device.  */
-    __nlink_t st_nlink;		/* Link count.  */
-
-    __uid_t st_uid;		/* User ID of the file's owner.	*/
-    __gid_t st_gid;		/* Group ID of the file's group.*/
-    __off64_t st_size;		/* Size of file, in bytes.  */
-
-    __time_t st_atime;		/* Time of last access.  */
-    __time_t st_mtime;		/* Time of last modification.  */
-    __time_t st_ctime;		/* Time of last status change.  */
-  };
-#endif
diff --git a/bits/statfs.h b/bits/statfs.h
deleted file mode 100644
index 851e464fcc..0000000000
--- a/bits/statfs.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/* Definition of `struct statfs', information about a filesystem.
-   Copyright (C) 1996, 1997 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef _SYS_STATFS_H
-# error "Never include <bits/statfs.h> directly; use <sys/statfs.h> instead."
-#endif
-
-#include <bits/types.h>
-
-/* GNU Hurd NOTE: The size of this structure (16 ints) is known in
-   <hurd/hurd_types.defs>, since it is used in the `file_statfs' RPC.  MiG
-   does not cope at all well with the passed C structure not being of the
-   expected size.  There are some filler words at the end to allow for
-   future expansion.  To increase the size of the structure used in the RPC
-   and retain binary compatibility, we would need to assign a new message
-   number.  */
-
-struct statfs
-  {
-    unsigned int f_type;
-    unsigned int f_bsize;
-#ifndef __USE_FILE_OFFSET64
-    __fsblkcnt_t f_blocks;
-    __fsblkcnt_t f_bfree;
-    __fsblkcnt_t f_bavail;
-    __fsblkcnt_t f_files;
-    __fsblkcnt_t f_ffree;
-#else
-    __fsblkcnt64_t f_blocks;
-    __fsblkcnt64_t f_bfree;
-    __fsblkcnt64_t f_bavail;
-    __fsblkcnt64_t f_files;
-    __fsblkcnt64_t f_ffree;
-#endif
-    __fsid_t f_fsid;
-    unsigned int f_namelen;
-    unsigned int f_spare[6];
-  };
-
-#ifdef __USE_LARGEFILE64
-struct statfs64
-  {
-    unsigned int f_type;
-    unsigned int f_bsize;
-    __fsblkcnt64_t f_blocks;
-    __fsblkcnt64_t f_bfree;
-    __fsblkcnt64_t f_bavail;
-    __fsblkcnt64_t f_files;
-    __fsblkcnt64_t f_ffree;
-    __fsid_t f_fsid;
-    unsigned int f_namelen;
-    unsigned int f_spare[6];
-  };
-#endif
diff --git a/bits/statvfs.h b/bits/statvfs.h
deleted file mode 100644
index 66841bf09a..0000000000
--- a/bits/statvfs.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/* Definition of `struct statvfs', information about a filesystem.
-   Copyright (C) 1998 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef _SYS_STATVFS_H
-# error "Never include <bits/statvfs.h> directly; use <sys/statvfs.h> instead."
-#endif
-
-#include <bits/types.h>
-
-/* GNU Hurd NOTE: The size of this structure (16 ints) is known in
-   <hurd/hurd_types.defs>, since it is used in the `file_statfs' RPC.  MiG
-   does not cope at all well with the passed C structure not being of the
-   expected size.  There are some filler words at the end to allow for
-   future expansion.  To increase the size of the structure used in the RPC
-   and retain binary compatibility, we would need to assign a new message
-   number.  */
-
-struct statvfs
-  {
-    unsigned long int f_bsize;
-    unsigned long int f_frsize;
-#ifndef __USE_FILE_OFFSET64
-    __fsblkcnt_t f_blocks;
-    __fsblkcnt_t f_bfree;
-    __fsblkcnt_t f_bavail;
-    __fsfilcnt_t f_files;
-    __fsfilcnt_t f_ffree;
-    __fsfilcnt_t f_favail;
-#else
-    __fsblkcnt64_t f_blocks;
-    __fsblkcnt64_t f_bfree;
-    __fsblkcnt64_t f_bavail;
-    __fsfilcnt64_t f_files;
-    __fsfilcnt64_t f_ffree;
-    __fsfilcnt64_t f_favail;
-#endif
-    __fsid_t f_fsid;
-    unsigned long int f_flag;
-    unsigned long int f_namemax;
-    unsigned int f_spare[6];
-  };
-
-#ifdef __USE_LARGEFILE64
-struct statvfs64
-  {
-    unsigned long int f_bsize;
-    unsigned long int f_frsize;
-    __fsblkcnt64_t f_blocks;
-    __fsblkcnt64_t f_bfree;
-    __fsblkcnt64_t f_bavail;
-    __fsfilcnt64_t f_files;
-    __fsfilcnt64_t f_ffree;
-    __fsfilcnt64_t f_favail;
-    __fsid_t f_fsid;
-    unsigned long int f_flag;
-    unsigned long int f_namemax;
-    unsigned int f_spare[6];
-  };
-#endif
-
-/* Definitions for the flag in `f_flag'.  */
-enum
-{
-  ST_RDONLY = 1,
-#define ST_RDONLY	ST_RDONLY
-  ST_NOSUID = 2
-#define ST_NOSUID	ST_NOSUID
-};
diff --git a/bits/stdio-lock.h b/bits/stdio-lock.h
deleted file mode 100644
index d7c1db0132..0000000000
--- a/bits/stdio-lock.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/* Thread package specific definitions of stream lock type.  Generic version.
-   Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef _BITS_STDIO_LOCK_H
-#define _BITS_STDIO_LOCK_H 1
-
-#include <bits/libc-lock.h>
-
-__libc_lock_define_recursive (typedef, _IO_lock_t)
-
-/* We need recursive (counting) mutexes.  */
-#ifdef _LIBC_LOCK_RECURSIVE_INITIALIZER
-# define _IO_lock_initializer _LIBC_LOCK_RECURSIVE_INITIALIZER
-#elif _IO_MTSAFE_IO
- #error libio needs recursive mutexes for _IO_MTSAFE_IO
-#endif
-
-#define _IO_lock_init(_name)	__libc_lock_init_recursive (_name)
-#define _IO_lock_fini(_name)	__libc_lock_fini_recursive (_name)
-#define _IO_lock_lock(_name)	__libc_lock_lock_recursive (_name)
-#define _IO_lock_trylock(_name)	__libc_lock_trylock_recursive (_name)
-#define _IO_lock_unlock(_name)	__libc_lock_unlock_recursive (_name)
-
-
-#define _IO_cleanup_region_start(_fct, _fp) \
-  __libc_cleanup_region_start (((_fp)->_flags & _IO_USER_LOCK) == 0, _fct, _fp)
-#define _IO_cleanup_region_start_noarg(_fct) \
-  __libc_cleanup_region_start (1, _fct, NULL)
-#define _IO_cleanup_region_end(_doit) \
-  __libc_cleanup_region_end (_doit)
-
-#if defined _LIBC && !defined NOT_IN_libc
-# define _IO_acquire_lock(_fp) \
-  _IO_cleanup_region_start ((void (*) (void *)) _IO_funlockfile, (_fp));      \
-  _IO_flockfile (_fp)
-
-# define _IO_release_lock(_fp) \
-  _IO_funlockfile (_fp);						      \
-  _IO_cleanup_region_end (0)
-#endif
-
-#endif /* bits/stdio-lock.h */
diff --git a/bits/string.h b/bits/string.h
deleted file mode 100644
index ad68b038b6..0000000000
--- a/bits/string.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* This file should provide inline versions of string functions.
-
-   Surround GCC-specific parts with #ifdef __GNUC__, and use `extern __inline'.
-
-   This file should define __STRING_INLINES if functions are actually defined
-   as inlines.  */
-
-#ifndef _BITS_STRING_H
-#define _BITS_STRING_H	1
-
-
-#endif /* bits/string.h */
diff --git a/bits/stropts.h b/bits/stropts.h
deleted file mode 100644
index c8e2c2c034..0000000000
--- a/bits/stropts.h
+++ /dev/null
@@ -1,231 +0,0 @@
-/* Copyright (C) 1998, 1999, 2000, 2002 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef _STROPTS_H
-# error "Never include <bits/stropts.h> directly; use <stropts.h> instead."
-#endif
-
-#ifndef _BITS_STROPTS_H
-#define _BITS_STROPTS_H	1
-
-#include <bits/types.h>
-
-/* Macros used as `request' argument to `ioctl'.  */
-#define __SID		('S' << 8)
-
-#define I_NREAD	    (__SID | 1)	/* Counts the number of data bytes in the data
-				   block in the first message.  */
-#define I_PUSH	    (__SID | 2)	/* Push STREAMS module onto top of the current
-				   STREAM, just below the STREAM head.  */
-#define I_POP	    (__SID | 3)	/* Remove STREAMS module from just below the
-				   STREAM head.  */
-#define I_LOOK	    (__SID | 4)	/* Retrieve the name of the module just below
-				   the STREAM head and place it in a character
-				   string.  */
-#define I_FLUSH	    (__SID | 5)	/* Flush all input and/or output.  */
-#define I_SRDOPT    (__SID | 6)	/* Sets the read mode.  */
-#define I_GRDOPT    (__SID | 7)	/* Returns the current read mode setting.  */
-#define I_STR	    (__SID | 8)	/* Construct an internal STREAMS `ioctl'
-				   message and send that message downstream. */
-#define I_SETSIG    (__SID | 9)	/* Inform the STREAM head that the process
-				   wants the SIGPOLL signal issued.  */
-#define I_GETSIG    (__SID |10) /* Return the events for which the calling
-				   process is currently registered to be sent
-				   a SIGPOLL signal.  */
-#define I_FIND	    (__SID |11) /* Compares the names of all modules currently
-				   present in the STREAM to the name pointed to
-				   by `arg'.  */
-#define I_LINK	    (__SID |12) /* Connect two STREAMs.  */
-#define I_UNLINK    (__SID |13) /* Disconnects the two STREAMs.  */
-#define I_PEEK	    (__SID |15) /* Allows a process to retrieve the information
-				   in the first message on the STREAM head read
-				   queue without taking the message off the
-				   queue.  */
-#define I_FDINSERT  (__SID |16) /* Create a message from the specified
-				   buffer(s), adds information about another
-				   STREAM, and send the message downstream.  */
-#define I_SENDFD    (__SID |17) /* Requests the STREAM associated with `fildes'
-				   to send a message, containing a file
-				   pointer, to the STREAM head at the other end
-				   of a STREAMS pipe.  */
-#define I_RECVFD    (__SID |14) /* Non-EFT definition.  */
-#define I_SWROPT    (__SID |19) /* Set the write mode.  */
-#define I_GWROPT    (__SID |20) /* Return the current write mode setting.  */
-#define I_LIST	    (__SID |21) /* List all the module names on the STREAM, up
-				   to and including the topmost driver name. */
-#define I_PLINK	    (__SID |22) /* Connect two STREAMs with a persistent
-				   link.  */
-#define I_PUNLINK   (__SID |23) /* Disconnect the two STREAMs that were
-				   connected with a persistent link.  */
-#define I_FLUSHBAND (__SID |28) /* Flush only band specified.  */
-#define I_CKBAND    (__SID |29) /* Check if the message of a given priority
-				   band exists on the STREAM head read
-				   queue.  */
-#define I_GETBAND   (__SID |30) /* Return the priority band of the first
-				   message on the STREAM head read queue.  */
-#define I_ATMARK    (__SID |31) /* See if the current message on the STREAM
-				   head read queue is "marked" by some module
-				   downstream.  */
-#define I_SETCLTIME (__SID |32) /* Set the time the STREAM head will delay when
-				   a STREAM is closing and there is data on
-				   the write queues.  */
-#define I_GETCLTIME (__SID |33) /* Get current value for closing timeout.  */
-#define I_CANPUT    (__SID |34) /* Check if a certain band is writable.  */
-
-
-/* Used in `I_LOOK' request.  */
-#define FMNAMESZ	8	/* compatibility w/UnixWare/Solaris.  */
-
-/* Flush options.  */
-#define FLUSHR		0x01	/* Flush read queues.  */
-#define FLUSHW		0x02	/* Flush write queues.  */
-#define FLUSHRW		0x03	/* Flush read and write queues.  */
-#ifdef __USE_GNU
-# define FLUSHBAND	0x04	/* Flush only specified band.  */
-#endif
-
-/* Possible arguments for `I_SETSIG'.  */
-#define S_INPUT		0x0001	/* A message, other than a high-priority
-				   message, has arrived.  */
-#define S_HIPRI		0x0002	/* A high-priority message is present.  */
-#define S_OUTPUT	0x0004	/* The write queue for normal data is no longer
-				   full.  */
-#define S_MSG		0x0008	/* A STREAMS signal message that contains the
-				   SIGPOLL signal reaches the front of the
-				   STREAM head read queue.  */
-#define S_ERROR		0x0010	/* Notification of an error condition.  */
-#define S_HANGUP	0x0020	/* Notification of a hangup.  */
-#define S_RDNORM	0x0040	/* A normal message has arrived.  */
-#define S_WRNORM	S_OUTPUT
-#define S_RDBAND	0x0080	/* A message with a non-zero priority has
-				   arrived.  */
-#define S_WRBAND	0x0100	/* The write queue for a non-zero priority
-				   band is no longer full.  */
-#define S_BANDURG	0x0200	/* When used in conjunction with S_RDBAND,
-				   SIGURG is generated instead of SIGPOLL when
-				   a priority message reaches the front of the
-				   STREAM head read queue.  */
-
-/* Option for `I_PEEK'.  */
-#define RS_HIPRI	0x01	/* Only look for high-priority messages.  */
-
-/* Options for `I_SRDOPT'.  */
-#define RNORM		0x0000	/* Byte-STREAM mode, the default.  */
-#define RMSGD		0x0001	/* Message-discard mode.   */
-#define RMSGN		0x0002	/* Message-nondiscard mode.   */
-#define RPROTDAT	0x0004	/* Deliver the control part of a message as
-				   data.  */
-#define RPROTDIS	0x0008	/* Discard the control part of a message,
-				   delivering any data part.  */
-#define RPROTNORM	0x0010	/* Fail `read' with EBADMSG if a message
-				   containing a control part is at the front
-				   of the STREAM head read queue.  */
-#ifdef __USE_GNU
-# define RPROTMASK	0x001C	/* The RPROT bits */
-#endif
-
-/* Possible mode for `I_SWROPT'.  */
-#define SNDZERO		0x001	/* Send a zero-length message downstream when a
-				   `write' of 0 bytes occurs.  */
-#ifdef __USE_GNU
-# define SNDPIPE	0x002	/* Send SIGPIPE on write and putmsg if
-				   sd_werror is set.  */
-#endif
-
-/* Arguments for `I_ATMARK'.  */
-#define ANYMARK		0x01	/* Check if the message is marked.  */
-#define LASTMARK	0x02	/* Check if the message is the last one marked
-				   on the queue.  */
-
-/* Argument for `I_UNLINK'.  */
-#ifdef __USE_GNU
-# define MUXID_ALL	(-1)	/* Unlink all STREAMs linked to the STREAM
-				   associated with `fildes'.  */
-#endif
-
-
-/* Macros for `getmsg', `getpmsg', `putmsg' and `putpmsg'.  */
-#define MSG_HIPRI	0x01	/* Send/receive high priority message.  */
-#define MSG_ANY		0x02	/* Receive any message.  */
-#define MSG_BAND	0x04	/* Receive message from specified band.  */
-
-/* Values returned by getmsg and getpmsg */
-#define MORECTL		1	/* More control information is left in
-				   message.  */
-#define MOREDATA	2	/* More data is left in message.  */
-
-
-/* Structure used for the I_FLUSHBAND ioctl on streams.  */
-struct bandinfo
-  {
-    unsigned char bi_pri;
-    int bi_flag;
-  };
-
-struct strbuf
-  {
-    int maxlen;		/* Maximum buffer length.  */
-    int len;		/* Length of data.  */
-    char *buf;		/* Pointer to buffer.  */
-  };
-
-struct strpeek
-  {
-    struct strbuf ctlbuf;
-    struct strbuf databuf;
-    t_uscalar_t flags;			/* UnixWare/Solaris compatibility.  */
-  };
-
-struct strfdinsert
-  {
-    struct strbuf ctlbuf;
-    struct strbuf databuf;
-    t_uscalar_t flags;			/* UnixWare/Solaris compatibility.  */
-    int fildes;
-    int offset;
-  };
-
-struct strioctl
-  {
-    int ic_cmd;
-    int ic_timout;
-    int ic_len;
-    char *ic_dp;
-  };
-
-struct strrecvfd
-  {
-    int fd;
-    uid_t uid;
-    gid_t gid;
-    char __fill[8];			/* UnixWare/Solaris compatibility */
-  };
-
-
-struct str_mlist
-  {
-    char l_name[FMNAMESZ + 1];
-  };
-
-struct str_list
-  {
-    int sl_nmods;
-    struct str_mlist *sl_modlist;
-  };
-
-#endif /* bits/stropts.h */
diff --git a/bits/termios.h b/bits/termios.h
deleted file mode 100644
index 43bb1ce52d..0000000000
--- a/bits/termios.h
+++ /dev/null
@@ -1,256 +0,0 @@
-/* termios type and macro definitions.  4.4 BSD/generic GNU version.
-   Copyright (C) 1993,94,96,97,99,2001 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef _TERMIOS_H
-# error "Never include <bits/termios.h> directly; use <termios.h> instead."
-#endif
-
-/* These macros are also defined in some <bits/ioctls.h> files (with
-   numerically identical values), but this serves to shut up cpp's
-   complaining. */
-#ifdef __USE_BSD
-
-# ifdef MDMBUF
-#  undef MDMBUF
-# endif
-# ifdef FLUSHO
-#  undef FLUSHO
-# endif
-# ifdef PENDIN
-#  undef PENDIN
-# endif
-
-#endif /* __USE_BSD */
-
-#ifdef ECHO
-# undef ECHO
-#endif
-#ifdef TOSTOP
-# undef TOSTOP
-#endif
-#ifdef NOFLSH
-# undef NOFLSH
-#endif
-
-
-/* These definitions match those used by the 4.4 BSD kernel.
-   If the operating system has termios system calls or ioctls that
-   correctly implement the POSIX.1 behavior, there should be a
-   system-dependent version of this file that defines `struct termios',
-   `tcflag_t', `cc_t', `speed_t' and the `TC*' constants appropriately.  */
-
-/* Type of terminal control flag masks.  */
-typedef unsigned long int tcflag_t;
-
-/* Type of control characters.  */
-typedef unsigned char cc_t;
-
-/* Type of baud rate specifiers.  */
-typedef long int speed_t;
-
-/* Terminal control structure.  */
-struct termios
-{
-  /* Input modes.  */
-  tcflag_t c_iflag;
-#define	IGNBRK	(1 << 0)	/* Ignore break condition.  */
-#define	BRKINT	(1 << 1)	/* Signal interrupt on break.  */
-#define	IGNPAR	(1 << 2)	/* Ignore characters with parity errors.  */
-#define	PARMRK	(1 << 3)	/* Mark parity and framing errors.  */
-#define	INPCK	(1 << 4)	/* Enable input parity check.  */
-#define	ISTRIP	(1 << 5)	/* Strip 8th bit off characters.  */
-#define	INLCR	(1 << 6)	/* Map NL to CR on input.  */
-#define	IGNCR	(1 << 7)	/* Ignore CR.  */
-#define	ICRNL	(1 << 8)	/* Map CR to NL on input.  */
-#define	IXON	(1 << 9)	/* Enable start/stop output control.  */
-#define	IXOFF	(1 << 10)	/* Enable start/stop input control.  */
-#ifdef	__USE_BSD
-# define IXANY	(1 << 11)	/* Any character will restart after stop.  */
-# define IMAXBEL (1 << 13)	/* Ring bell when input queue is full.  */
-#endif
-#ifdef __USE_GNU
-# define IUCLC	(1 << 14)	/* Translate upper case input to lower case. */
-#endif
-
-  /* Output modes.  */
-  tcflag_t c_oflag;
-#define	OPOST	(1 << 0)	/* Perform output processing.  */
-#ifdef	__USE_BSD
-# define ONLCR	(1 << 1)	/* Map NL to CR-NL on output.  */
-# define OXTABS	(1 << 2)	/* Expand tabs to spaces.  */
-# define ONOEOT	(1 << 3)	/* Discard EOT (^D) on output.  */
-#endif
-#ifdef __USE_GNU
-# define OLCUC	(1 << 9)	/* Translate lower case output to upper case */
-#endif
-
-  /* Control modes.  */
-  tcflag_t c_cflag;
-#ifdef	__USE_BSD
-# define CIGNORE	(1 << 0)	/* Ignore these control flags.  */
-#endif
-#define	CSIZE	(CS5|CS6|CS7|CS8)	/* Number of bits per byte (mask).  */
-#define	CS5	0		/* 5 bits per byte.  */
-#define	CS6	(1 << 8)	/* 6 bits per byte.  */
-#define	CS7	(1 << 9)	/* 7 bits per byte.  */
-#define	CS8	(CS6|CS7)	/* 8 bits per byte.  */
-#define	CSTOPB	(1 << 10)	/* Two stop bits instead of one.  */
-#define	CREAD	(1 << 11)	/* Enable receiver.  */
-#define	PARENB	(1 << 12)	/* Parity enable.  */
-#define	PARODD	(1 << 13)	/* Odd parity instead of even.  */
-#define	HUPCL	(1 << 14)	/* Hang up on last close.  */
-#define	CLOCAL	(1 << 15)	/* Ignore modem status lines.  */
-#ifdef	__USE_BSD
-# define CCTS_OFLOW	(1 << 16)	/* CTS flow control of output.  */
-# define CRTS_IFLOW	(1 << 17)	/* RTS flow control of input.  */
-# define CRTSCTS	(CCTS_OFLOW|CRTS_IFLOW)	/* CTS/RTS flow control.  */
-# define MDMBUF		(1 << 20)	/* Carrier flow control of output.  */
-#endif
-
-  /* Local modes.  */
-  tcflag_t c_lflag;
-#ifdef	__USE_BSD
-# define ECHOKE	(1 << 0)	/* Visual erase for KILL.  */
-#endif
-#define	_ECHOE	(1 << 1)	/* Visual erase for ERASE.  */
-#define	ECHOE	_ECHOE
-#define	_ECHOK	(1 << 2)	/* Echo NL after KILL.  */
-#define	ECHOK	_ECHOK
-#define	_ECHO	(1 << 3)	/* Enable echo.  */
-#define	ECHO	_ECHO
-#define	_ECHONL	(1 << 4)	/* Echo NL even if ECHO is off.  */
-#define	ECHONL	_ECHONL
-#ifdef	__USE_BSD
-# define ECHOPRT	(1 << 5)	/* Hardcopy visual erase.  */
-# define ECHOCTL	(1 << 6)	/* Echo control characters as ^X.  */
-#endif
-#define	_ISIG	(1 << 7)	/* Enable signals.  */
-#define	ISIG	_ISIG
-#define	_ICANON	(1 << 8)	/* Do erase and kill processing.  */
-#define	ICANON	_ICANON
-#ifdef	__USE_BSD
-# define ALTWERASE (1 << 9)	/* Alternate WERASE algorithm.  */
-#endif
-#define	_IEXTEN	(1 << 10)	/* Enable DISCARD and LNEXT.  */
-#define	IEXTEN	_IEXTEN
-#define	EXTPROC	(1 << 11)	/* External processing.  */
-#define	_TOSTOP	(1 << 22)	/* Send SIGTTOU for background output.  */
-#define	TOSTOP	_TOSTOP
-#ifdef	__USE_BSD
-# define FLUSHO	(1 << 23)	/* Output being flushed (state).  */
-# define NOKERNINFO (1 << 25)	/* Disable VSTATUS.  */
-# define PENDIN	(1 << 29)	/* Retype pending input (state).  */
-#endif
-#define	_NOFLSH	(1 << 31)	/* Disable flush after interrupt.  */
-#define	NOFLSH	_NOFLSH
-
-  /* Control characters.  */
-#define	VEOF	0		/* End-of-file character [ICANON].  */
-#define	VEOL	1		/* End-of-line character [ICANON].  */
-#ifdef	__USE_BSD
-# define VEOL2	2		/* Second EOL character [ICANON].  */
-#endif
-#define	VERASE	3		/* Erase character [ICANON].  */
-#ifdef	__USE_BSD
-# define VWERASE	4		/* Word-erase character [ICANON].  */
-#endif
-#define	VKILL	5		/* Kill-line character [ICANON].  */
-#ifdef	__USE_BSD
-# define VREPRINT 6		/* Reprint-line character [ICANON].  */
-#endif
-#define	VINTR	8		/* Interrupt character [ISIG].  */
-#define	VQUIT	9		/* Quit character [ISIG].  */
-#define	VSUSP	10		/* Suspend character [ISIG].  */
-#ifdef	__USE_BSD
-# define VDSUSP	11		/* Delayed suspend character [ISIG].  */
-#endif
-#define	VSTART	12		/* Start (X-ON) character [IXON, IXOFF].  */
-#define	VSTOP	13		/* Stop (X-OFF) character [IXON, IXOFF].  */
-#ifdef	__USE_BSD
-# define VLNEXT	14		/* Literal-next character [IEXTEN].  */
-# define VDISCARD 15		/* Discard character [IEXTEN].  */
-#endif
-#define	VMIN	16		/* Minimum number of bytes read at once [!ICANON].  */
-#define	VTIME	17		/* Time-out value (tenths of a second) [!ICANON].  */
-#ifdef	__USE_BSD
-# define VSTATUS	18		/* Status character [ICANON].  */
-#endif
-#define	NCCS	20		/* Value duplicated in <hurd/tioctl.defs>.  */
-  cc_t c_cc[NCCS];
-
-  /* Input and output baud rates.  */
-  speed_t __ispeed, __ospeed;
-#define	B0	0		/* Hang up.  */
-#define	B50	50		/* 50 baud.  */
-#define	B75	75		/* 75 baud.  */
-#define	B110	110		/* 110 baud.  */
-#define	B134	134		/* 134.5 baud.  */
-#define	B150	150		/* 150 baud.  */
-#define	B200	200		/* 200 baud.  */
-#define	B300	300		/* 300 baud.  */
-#define	B600	600		/* 600 baud.  */
-#define	B1200	1200		/* 1200 baud.  */
-#define	B1800	1800		/* 1800 baud.  */
-#define	B2400	2400		/* 2400 baud.  */
-#define	B4800	4800		/* 4800 baud.  */
-#define	B9600	9600		/* 9600 baud.  */
-#define	B19200	19200		/* 19200 baud.  */
-#define	B38400	38400		/* 38400 baud.  */
-#ifdef	__USE_MISC
-# define EXTA	19200
-# define EXTB	38400
-#endif
-#define	B57600	57600
-#define	B115200	115200
-#define	B230400	230400
-#define	B460800	460800
-#define	B500000	500000
-#define	B576000	576000
-#define	B921600	921600
-#define	B1000000 1000000
-#define	B1152000 1152000
-#define	B1500000 1500000
-#define	B2000000 2000000
-#define	B2500000 2500000
-#define	B3000000 3000000
-#define	B3500000 3500000
-#define	B4000000 4000000
-};
-
-#define _IOT_termios /* Hurd ioctl type field.  */ \
-  _IOT (_IOTS (tcflag_t), 4, _IOTS (cc_t), NCCS, _IOTS (speed_t), 2)
-
-/* Values for the OPTIONAL_ACTIONS argument to `tcsetattr'.  */
-#define	TCSANOW		0	/* Change immediately.  */
-#define	TCSADRAIN	1	/* Change when pending output is written.  */
-#define	TCSAFLUSH	2	/* Flush pending input before changing.  */
-#ifdef	__USE_BSD
-# define TCSASOFT	0x10	/* Flag: Don't alter hardware state.  */
-#endif
-
-/* Values for the QUEUE_SELECTOR argument to `tcflush'.  */
-#define	TCIFLUSH	1	/* Discard data received but not yet read.  */
-#define	TCOFLUSH	2	/* Discard data written but not yet sent.  */
-#define	TCIOFLUSH	3	/* Discard all pending data.  */
-
-/* Values for the ACTION argument to `tcflow'.  */
-#define	TCOOFF	1		/* Suspend output.  */
-#define	TCOON	2		/* Restart suspended output.  */
-#define	TCIOFF	3		/* Send a STOP character.  */
-#define	TCION	4		/* Send a START character.  */
diff --git a/bits/time.h b/bits/time.h
deleted file mode 100644
index b3184d1de9..0000000000
--- a/bits/time.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/* System-dependent timing definitions.  Generic version.
-   Copyright (C) 1996,1997,1999-2002,2003 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-/*
- * Never include this file directly; use <time.h> instead.
- */
-
-#ifndef __need_timeval
-# ifndef _BITS_TIME_H
-#  define _BITS_TIME_H	1
-
-/* ISO/IEC 9899:1990 7.12.1: <time.h>
-   The macro `CLOCKS_PER_SEC' is the number per second of the value
-   returned by the `clock' function. */
-/* CAE XSH, Issue 4, Version 2: <time.h>
-   The value of CLOCKS_PER_SEC is required to be 1 million on all
-   XSI-conformant systems. */
-#  define CLOCKS_PER_SEC  1000000l
-
-#  if !defined __STRICT_ANSI__ && !defined __USE_XOPEN2K
-/* Even though CLOCKS_PER_SEC has such a strange value CLK_TCK
-   presents the real value for clock ticks per second for the system.  */
-#   include <bits/types.h>
-extern long int __sysconf (int);
-#   define CLK_TCK ((__clock_t) __sysconf (2))	/* 2 is _SC_CLK_TCK */
-#  endif
-
-#  ifdef __USE_POSIX199309
-/* Identifier for system-wide realtime clock.  */
-#   define CLOCK_REALTIME		0
-/* Monotonic system-wide clock.  */
-#   define CLOCK_MONOTONIC		1
-/* High-resolution timer from the CPU.  */
-#   define CLOCK_PROCESS_CPUTIME_ID	2
-/* Thread-specific CPU-time clock.  */
-#   define CLOCK_THREAD_CPUTIME_ID	3
-
-/* Flag to indicate time is absolute.  */
-#   define TIMER_ABSTIME		1
-#  endif
-
-# endif	/* bits/time.h */
-#endif
-
-#ifdef __need_timeval
-# undef __need_timeval
-# ifndef _STRUCT_TIMEVAL
-#  define _STRUCT_TIMEVAL	1
-#  include <bits/types.h>
-
-/* A time value that is accurate to the nearest
-   microsecond but also has a range of years.  */
-struct timeval
-  {
-    __time_t tv_sec;		/* Seconds.  */
-    __suseconds_t tv_usec;	/* Microseconds.  */
-  };
-# endif	/* struct timeval */
-#endif	/* need timeval */
diff --git a/bits/typesizes.h b/bits/typesizes.h
deleted file mode 100644
index e9226c4174..0000000000
--- a/bits/typesizes.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/* bits/typesizes.h -- underlying types for *_t.  Generic version.
-   Copyright (C) 2002, 2003 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef _BITS_TYPES_H
-# error "Never include <bits/typesizes.h> directly; use <sys/types.h> instead."
-#endif
-
-#ifndef	_BITS_TYPESIZES_H
-#define	_BITS_TYPESIZES_H	1
-
-/* See <bits/types.h> for the meaning of these macros.  This file exists so
-   that <bits/types.h> need not vary across different GNU platforms.  */
-
-#define __DEV_T_TYPE		__UQUAD_TYPE
-#define __UID_T_TYPE		__U32_TYPE
-#define __GID_T_TYPE		__U32_TYPE
-#define __INO_T_TYPE		__ULONGWORD_TYPE
-#define __INO64_T_TYPE		__UQUAD_TYPE
-#define __MODE_T_TYPE		__U32_TYPE
-#define __NLINK_T_TYPE		__UWORD_TYPE
-#define __OFF_T_TYPE		__SLONGWORD_TYPE
-#define __OFF64_T_TYPE		__SQUAD_TYPE
-#define __PID_T_TYPE		__S32_TYPE
-#define __RLIM_T_TYPE		__ULONGWORD_TYPE
-#define __RLIM64_T_TYPE		__UQUAD_TYPE
-#define	__BLKCNT_T_TYPE		__SLONGWORD_TYPE
-#define	__BLKCNT64_T_TYPE	__SQUAD_TYPE
-#define	__FSBLKCNT_T_TYPE	__ULONGWORD_TYPE
-#define	__FSBLKCNT64_T_TYPE	__UQUAD_TYPE
-#define	__FSFILCNT_T_TYPE	__ULONGWORD_TYPE
-#define	__FSFILCNT64_T_TYPE	__UQUAD_TYPE
-#define	__ID_T_TYPE		__U32_TYPE
-#define __CLOCK_T_TYPE		__SLONGWORD_TYPE
-#define __TIME_T_TYPE		__SLONGWORD_TYPE
-#define __USECONDS_T_TYPE	__U32_TYPE
-#define __SUSECONDS_T_TYPE	__SLONGWORD_TYPE
-#define __DADDR_T_TYPE		__S32_TYPE
-#define __SWBLK_T_TYPE		__SLONGWORD_TYPE
-#define __KEY_T_TYPE		__S32_TYPE
-#define __CLOCKID_T_TYPE	__S32_TYPE
-#define __TIMER_T_TYPE		void *
-#define __BLKSIZE_T_TYPE	__SLONGWORD_TYPE
-#define __FSID_T_TYPE		struct { int __val[2]; }
-#define __SSIZE_T_TYPE		__SWORD_TYPE
-
-/* Number of descriptors that can fit in an `fd_set'.  */
-#define	__FD_SETSIZE		1024
-
-
-#endif /* bits/typesizes.h */
diff --git a/bits/uio.h b/bits/uio.h
deleted file mode 100644
index 410ce2a98a..0000000000
--- a/bits/uio.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/* Copyright (C) 1996, 1997 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef _SYS_UIO_H
-# error "Never include <bits/uio.h> directly; use <sys/uio.h> instead."
-#endif
-
-
-/* `struct iovec' -- Structure describing a section of memory.  */
-
-struct iovec
-{
-  /* Starting address.  */
-  __ptr_t iov_base;
-  /* Length in bytes.  */
-  size_t iov_len;
-};
diff --git a/bits/ustat.h b/bits/ustat.h
deleted file mode 100644
index 69c6b72270..0000000000
--- a/bits/ustat.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/* Copyright (C) 1997, 2002 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef _SYS_USTAT_H
-# error "Never include <bits/ustat.h> directly; use <sys/ustat.h> instead."
-#endif
-
-#include <sys/types.h>
-
-struct ustat
-  {
-    __daddr_t f_tfree;		/* Number of free blocks.  */
-    __ino_t f_tinode;		/* Number of free inodes.  */
-    char f_fname[6];
-    char f_fpack[6];
-  };
diff --git a/bits/utmp.h b/bits/utmp.h
deleted file mode 100644
index 03a2b1f40b..0000000000
--- a/bits/utmp.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/* The `struct utmp' type, describing entries in the utmp file.  Generic/BSDish
-   Copyright (C) 1993, 1996, 1997 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef _UTMP_H
-# error "Never include <bits/utmp.h> directly; use <utmp.h> instead."
-#endif
-
-#include <paths.h>
-#include <time.h>
-
-
-#define	UT_NAMESIZE	8
-#define	UT_LINESIZE	8
-#define	UT_HOSTSIZE	16
-
-
-struct lastlog
-  {
-    time_t ll_time;
-    char ll_line[UT_LINESIZE];
-    char ll_host[UT_HOSTSIZE];
-  };
-
-struct utmp
-  {
-    char ut_line[UT_LINESIZE];
-    char ut_user[UT_NAMESIZE];
-#define ut_name ut_user
-    char ut_host[UT_HOSTSIZE];
-    long int ut_time;
-  };
-
-
-#define _HAVE_UT_HOST 1		/* We have the ut_host field.  */
diff --git a/bits/utsname.h b/bits/utsname.h
deleted file mode 100644
index 48e8a8e9b6..0000000000
--- a/bits/utsname.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/* Copyright (C) 1997 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef _SYS_UTSNAME_H
-# error "Never include <bits/utsname.h> directly; use <sys/utsname.h> instead."
-#endif
-
-/* The size of the character arrays used to hold the information
-   in a `struct utsname'.  Enlarge this as necessary.  */
-#define	_UTSNAME_LENGTH	1024
diff --git a/bits/waitflags.h b/bits/waitflags.h
deleted file mode 100644
index 157dd1211a..0000000000
--- a/bits/waitflags.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/* Definitions of flag bits for `waitpid' et al.
-   Copyright (C) 1992, 1996, 1997, 2000 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#if !defined _SYS_WAIT_H && !defined _STDLIB_H
-# error "Never include <bits/waitflags.h> directly; use <sys/wait.h> instead."
-#endif
-
-
-/* Bits in the third argument to `waitpid'.  */
-#define	WNOHANG		1	/* Don't block waiting.  */
-#define	WUNTRACED	2	/* Report status of stopped children.  */
diff --git a/bits/wchar.h b/bits/wchar.h
deleted file mode 100644
index ef1f56363a..0000000000
--- a/bits/wchar.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/* wchar_t type related definitions.
-   Copyright (C) 2000 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef _BITS_WCHAR_H
-#define _BITS_WCHAR_H	1
-
-#define __WCHAR_MIN	(-2147483647 - 1)
-#define __WCHAR_MAX	(2147483647)
-
-#endif	/* bits/wchar.h */
diff --git a/bits/wordsize.h b/bits/wordsize.h
deleted file mode 100644
index 9ef0e8526a..0000000000
--- a/bits/wordsize.h
+++ /dev/null
@@ -1 +0,0 @@
-#error "This file must be written based on the data type sizes of the target"
diff --git a/bits/xtitypes.h b/bits/xtitypes.h
deleted file mode 100644
index c21bfb036c..0000000000
--- a/bits/xtitypes.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/* bits/xtitypes.h -- Define some types used by <bits/stropts.h>.  Generic.
-   Copyright (C) 2002 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef _STROPTS_H
-# error "Never include <bits/xtitypes.h> directly; use <stropts.h> instead."
-#endif
-
-#ifndef _BITS_XTITYPES_H
-#define _BITS_XTITYPES_H	1
-
-#include <bits/types.h>
-
-/* This type is used by some structs in <bits/stropts.h>.  */
-typedef __SLONGWORD_TYPE __t_scalar_t;
-typedef __ULONGWORD_TYPE __t_uscalar_t;
-
-
-#endif /* bits/xtitypes.h */
diff --git a/catgets/gencat.c b/catgets/gencat.c
index 39938097d3..2f6c81dc1d 100644
--- a/catgets/gencat.c
+++ b/catgets/gencat.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1996-2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+/* Copyright (C) 1996-2002, 2003, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@redhat.com>, 1996.
 
@@ -248,7 +248,7 @@ print_version (FILE *stream, struct argp_state *state)
 Copyright (C) %s Free Software Foundation, Inc.\n\
 This is free software; see the source for copying conditions.  There is NO\n\
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
-"), "2005");
+"), "2004");
   fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper");
 }
 
diff --git a/configure b/configure
index 1e9e50c491..93fff2a64d 100755
--- a/configure
+++ b/configure
@@ -313,7 +313,7 @@ ac_includes_default="\
 # include <unistd.h>
 #endif"
 
-ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS with_fp with_cvs enable_check_abi oldest_abi bindnow force_install all_warnings build build_cpu build_vendor build_os host host_cpu host_vendor host_os subdirs add_ons base_machine sysnames INSTALL_PROGRAM INSTALL_SCRIPT INSTALL_DATA LN_S CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC OBJEXT BUILD_CC cross_compiling CPP CXX CXXFLAGS ac_ct_CXX AR OBJDUMP RANLIB ac_ct_RANLIB MIG AS LD PWD_P MAKE MSGFMT MAKEINFO SED AUTOCONF SYSINCLUDES libc_cv_gcc_static_libgcc BASH libc_cv_have_bash2 KSH libc_cv_have_ksh AWK PERL INSTALL_INFO BISON VERSIONING libc_cv_asm_protected_directive libc_cv_initfinit_array libc_cv_cc_with_libunwind libc_cv_z_nodelete libc_cv_z_nodlopen libc_cv_z_initfirst libc_cv_z_relro libc_cv_Bgroup libc_cv_libgcc_s_suffix libc_cv_as_needed ASFLAGS_config libc_cv_z_combreloc libc_cv_z_execstack libc_cv_fpie fno_unit_at_a_time libc_cv_have_initfini no_whole_archive exceptions LIBGD have_selinux EGREP sizeof_long_double libc_cv_gcc_unwind_find_fde uname_sysname uname_release uname_version old_glibc_headers libc_cv_slibdir libc_cv_localedir libc_cv_sysconfdir libc_cv_rootsbindir libc_cv_forced_unwind use_ldconfig ldd_rewrite_script gnu_ld gnu_as elf xcoff static shared pic_default profile omitfp bounded static_nss nopic_initfini DEFINES linux_doors mach_interface_list VERSION RELEASE LIBOBJS LTLIBOBJS'
+ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS with_fp with_cvs enable_check_abi oldest_abi bindnow force_install all_warnings build build_cpu build_vendor build_os host host_cpu host_vendor host_os subdirs add_ons base_machine sysnames INSTALL_PROGRAM INSTALL_SCRIPT INSTALL_DATA LN_S CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC OBJEXT BUILD_CC cross_compiling CPP CXX CXXFLAGS ac_ct_CXX AR OBJDUMP RANLIB ac_ct_RANLIB MIG AS LD PWD_P MAKE MSGFMT MAKEINFO SED AUTOCONF SYSINCLUDES libc_cv_gcc_static_libgcc BASH libc_cv_have_bash2 KSH libc_cv_have_ksh AWK PERL INSTALL_INFO BISON VERSIONING libc_cv_asm_protected_directive libc_cv_initfinit_array libc_cv_cc_with_libunwind libc_cv_z_nodelete libc_cv_z_nodlopen libc_cv_z_initfirst libc_cv_z_relro libc_cv_Bgroup libc_cv_libgcc_s_suffix libc_cv_as_needed ASFLAGS_config libc_cv_z_combreloc libc_cv_z_execstack libc_cv_fpie fno_unit_at_a_time libc_cv_have_initfini libc_cv_cpp_asm_debuginfo no_whole_archive exceptions LIBGD have_selinux EGREP sizeof_long_double libc_cv_gcc_unwind_find_fde uname_sysname uname_release uname_version old_glibc_headers libc_cv_slibdir libc_cv_localedir libc_cv_sysconfdir libc_cv_rootsbindir libc_cv_forced_unwind use_ldconfig ldd_rewrite_script gnu_ld gnu_as elf xcoff static shared pic_default profile omitfp bounded static_nss nopic_initfini DEFINES linux_doors mach_interface_list VERSION RELEASE LIBOBJS LTLIBOBJS'
 ac_subst_files=''
 
 # Initialize some variables set by options.
@@ -2093,14 +2093,6 @@ for d in $add_ons_pfx ''; do
       done
     done
   done
-  if test -n "$d"; then
-    try="${d}sysdeps/generic"
-    test -n "$enable_debug_configure" &&
-    echo "$0 DEBUG: try $try" >&2
-    if test -d $srcdir/$try; then
-      sysnames="$sysnames $try"
-    fi
-  fi
 done
 IFS="$ac_save_ifs"
 
@@ -6093,6 +6085,58 @@ _ACEOF
 
 fi
 
+echo "$as_me:$LINENO: checking if -g produces usable source locations for assembler-with-cpp" >&5
+echo $ECHO_N "checking if -g produces usable source locations for assembler-with-cpp... $ECHO_C" >&6
+if test "${libc_cv_cpp_asm_debuginfo+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat > conftest.S <<EOF
+#include "confdefs.h"
+
+/* comment on
+   two lines */
+	${libc_cv_dot_text}
+	${libc_cv_asm_global_directive} foo
+foo:
+	/* Unfortunately this test only works for a real instruction,
+	   not for any of the machine-independent pseudo-ops.
+	   So we just have to assume everybody has a "nop".  */
+	nop
+	/* comment */
+	nop
+	/* comment */
+	nop
+EOF
+if { ac_try='${CC-cc} $CPPFLAGS $ASFLAGS -g -c conftest.S 1>&5'
+  { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } && {
+   ac_pattern='conftest\.S'
+   { ac_try='readelf --debug-dump=line conftest.o |
+		   grep $ac_pattern 1>&5'
+  { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }
+  }; then
+  libc_cv_cpp_asm_debuginfo=yes
+else
+  libc_cv_cpp_asm_debuginfo=no
+fi
+rm -f conftest*
+fi
+echo "$as_me:$LINENO: result: $libc_cv_cpp_asm_debuginfo" >&5
+echo "${ECHO_T}$libc_cv_cpp_asm_debuginfo" >&6
+if test $libc_cv_cpp_asm_debuginfo = yes; then
+  cat >>confdefs.h <<\_ACEOF
+#define HAVE_CPP_ASM_DEBUGINFO 1
+_ACEOF
+
+fi
+
 echo "$as_me:$LINENO: checking for ld --no-whole-archive" >&5
 echo $ECHO_N "checking for ld --no-whole-archive... $ECHO_C" >&6
 if test "${libc_cv_ld_no_whole_archive+set}" = set; then
@@ -8346,6 +8390,7 @@ s,@libc_cv_z_execstack@,$libc_cv_z_execstack,;t t
 s,@libc_cv_fpie@,$libc_cv_fpie,;t t
 s,@fno_unit_at_a_time@,$fno_unit_at_a_time,;t t
 s,@libc_cv_have_initfini@,$libc_cv_have_initfini,;t t
+s,@libc_cv_cpp_asm_debuginfo@,$libc_cv_cpp_asm_debuginfo,;t t
 s,@no_whole_archive@,$no_whole_archive,;t t
 s,@exceptions@,$exceptions,;t t
 s,@LIBGD@,$LIBGD,;t t
diff --git a/configure.in b/configure.in
index 11f78b0ec3..559bee0173 100644
--- a/configure.in
+++ b/configure.in
@@ -593,14 +593,6 @@ for d in $add_ons_pfx ''; do
       done
     done
   done
-  if test -n "$d"; then
-    try="${d}sysdeps/generic"
-    test -n "$enable_debug_configure" &&
-    echo "$0 DEBUG: try $try" >&2
-    if test -d $srcdir/$try; then
-      sysnames="$sysnames $try"
-    fi
-  fi
 done
 IFS="$ac_save_ifs"
 
@@ -1649,6 +1641,39 @@ if test $libc_cv_asm_cfi_directives = yes; then
   AC_DEFINE(HAVE_ASM_CFI_DIRECTIVES)
 fi
 
+AC_CACHE_CHECK(if -g produces usable source locations for assembler-with-cpp,
+	       libc_cv_cpp_asm_debuginfo, [dnl
+cat > conftest.S <<EOF
+#include "confdefs.h"
+
+/* comment on
+   two lines */
+	${libc_cv_dot_text}
+	${libc_cv_asm_global_directive} foo
+foo:
+	/* Unfortunately this test only works for a real instruction,
+	   not for any of the machine-independent pseudo-ops.
+	   So we just have to assume everybody has a "nop".  */
+	nop
+	/* comment */
+	nop
+	/* comment */
+	nop
+EOF
+if AC_TRY_COMMAND([${CC-cc} $CPPFLAGS $ASFLAGS -g -c conftest.S 1>&AS_MESSAGE_LOG_FD]) && {
+   ac_pattern='conftest\.S'
+   AC_TRY_COMMAND([readelf --debug-dump=line conftest.o |
+		   grep $ac_pattern 1>&AS_MESSAGE_LOG_FD])
+  }; then
+  libc_cv_cpp_asm_debuginfo=yes
+else
+  libc_cv_cpp_asm_debuginfo=no
+fi
+rm -f conftest*])AC_SUBST(libc_cv_cpp_asm_debuginfo)
+if test $libc_cv_cpp_asm_debuginfo = yes; then
+  AC_DEFINE(HAVE_CPP_ASM_DEBUGINFO)
+fi
+
 AC_CACHE_CHECK(for ld --no-whole-archive, libc_cv_ld_no_whole_archive, [dnl
 cat > conftest.c <<\EOF
 _start () {}
diff --git a/csu/Makefile b/csu/Makefile
index 20709c3c8d..fbbfe0050a 100644
--- a/csu/Makefile
+++ b/csu/Makefile
@@ -1,5 +1,5 @@
 # Makefile for csu code for GNU C library.
-# Copyright (C) 1995-2004, 2005 Free Software Foundation, Inc.
+# Copyright (C) 1995-2003, 2004 Free Software Foundation, Inc.
 # This file is part of the GNU C Library.
 
 # The GNU C Library is free software; you can redistribute it and/or
@@ -54,13 +54,11 @@ include ../Makeconfig
 ifeq (yes,$(build-shared))
 extra-objs += S$(start-installed-name)
 install-lib += S$(start-installed-name)
-generated += start.os
 endif
 
 ifeq (yes,$(build-bounded))
 extra-objs += b$(start-installed-name)
 install-lib += b$(start-installed-name)
-generated += start.ob
 endif
 
 ifneq ($(start-installed-name),$(static-start-installed-name))
diff --git a/csu/elf-init.c b/csu/elf-init.c
index dbd2a91cb7..c538627dfd 100644
--- a/csu/elf-init.c
+++ b/csu/elf-init.c
@@ -1,5 +1,5 @@
 /* Startup support for ELF initializers/finalizers in the main executable.
-   Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+   Copyright (C) 2002, 2003 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -104,13 +104,9 @@ __libc_csu_init (void)
 #endif
 }
 
-/* This function should not be used anymore.  We run the executable's
-   destructor now just like any other.  We cannot remove the function,
-   though.  */
 void
 __libc_csu_fini (void)
 {
-#if 0
 #ifdef HAVE_INITFINI_ARRAY
   size_t i = __fini_array_end - __fini_array_start;
   while (i-- > 0)
@@ -118,5 +114,4 @@ __libc_csu_fini (void)
 #endif
 
   _fini ();
-#endif
 }
diff --git a/csu/version.c b/csu/version.c
index 606246a6e1..1104fa9137 100644
--- a/csu/version.c
+++ b/csu/version.c
@@ -25,7 +25,7 @@ static const char __libc_version[] = VERSION;
 
 static const char banner[] =
 "GNU C Library "RELEASE" release version "VERSION", by Roland McGrath et al.\n\
-Copyright (C) 2005 Free Software Foundation, Inc.\n\
+Copyright (C) 2004 Free Software Foundation, Inc.\n\
 This is free software; see the source for copying conditions.\n\
 There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A\n\
 PARTICULAR PURPOSE.\n\
diff --git a/debug/catchsegv.sh b/debug/catchsegv.sh
index f7e79bce42..14556f712a 100755
--- a/debug/catchsegv.sh
+++ b/debug/catchsegv.sh
@@ -39,7 +39,7 @@ if test $# -eq 0; then
       ;;
     --v | --ve | --ver | --vers | --versi | --versio | --version)
       echo 'catchsegv (GNU libc) @VERSION@'
-      echo 'Copyright (C) 2005 Free Software Foundation, Inc.
+      echo 'Copyright (C) 2004 Free Software Foundation, Inc.
 This is free software; see the source for copying conditions.  There is NO
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 Written by Ulrich Drepper.'
diff --git a/debug/chk_fail.c b/debug/chk_fail.c
index 6921ca4686..dc1c3d70b6 100644
--- a/debug/chk_fail.c
+++ b/debug/chk_fail.c
@@ -1,4 +1,5 @@
-/* Copyright (C) 2004, 2005 Free Software Foundation, Inc.
+
+/* Copyright (C) 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -20,15 +21,10 @@
 #include <stdlib.h>
 
 
-extern char **__libc_argv attribute_hidden;
-
 void
 __attribute__ ((noreturn))
 __chk_fail (void)
 {
-  /* The loop is added only to keep gcc happy.  */
-  while (1)
-    __libc_message (1, "*** buffer overflow detected ***: %s terminated\n",
-		    __libc_argv[0] ?: "<unknown>");
+  __libc_fatal ("*** buffer overflow detected ***\n");
 }
 libc_hidden_def (__chk_fail)
diff --git a/debug/xtrace.sh b/debug/xtrace.sh
index 811d6ba9a6..4ce8888490 100755
--- a/debug/xtrace.sh
+++ b/debug/xtrace.sh
@@ -1,5 +1,5 @@
 #! @BASH@
-# Copyright (C) 1999, 2001-2004, 2005 Free Software Foundation, Inc.
+# Copyright (C) 1999, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
 # This file is part of the GNU C Library.
 # Contributed by Ulrich Drepper <drepper@gnu.org>, 1999.
 
@@ -64,7 +64,7 @@ do_version() {
   printf $"Copyright (C) %s Free Software Foundation, Inc.
 This is free software; see the source for copying conditions.  There is NO
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-" "2005"
+" "2004"
   printf $"Written by %s.
 " "Ulrich Drepper"
   exit 0
diff --git a/dirent/tst-seekdir.c b/dirent/tst-seekdir.c
index 43808fecb5..b833c30705 100644
--- a/dirent/tst-seekdir.c
+++ b/dirent/tst-seekdir.c
@@ -11,23 +11,8 @@ main (int argc, char *argv[])
   int i = 0;
   int result = 0;
   struct dirent *dp;
-  long int save0;
-  long int rewind;
 
   dirp = opendir (".");
-  if (dirp == NULL)
-    {
-      printf ("opendir failed: %m\n");
-      return 1;
-    }
-
-  save0 = telldir (dirp);
-  if (save0 == -1)
-    {
-      printf ("telldir failed: %m\n");
-      result = 1;
-    }
-
   for (dp = readdir (dirp); dp != NULL; dp = readdir (dirp))
     {
       /* save position 3 (after fourth entry) */
@@ -59,19 +44,6 @@ main (int argc, char *argv[])
   for (dp = readdir (dirp); dp != NULL; dp = readdir (dirp))
     printf ("%s\n", dp->d_name);
 
-  /* Check rewinddir */
-  rewinddir (dirp);
-  rewind = telldir (dirp);
-  if (rewind == -1)
-    {
-      printf ("telldir failed: %m\n");
-      result = 1;
-    }
-  else if (save0 != rewind)
-    {
-      printf ("rewinddir didn't reset directory stream\n");
-      result = 1;
-    }
 
   closedir (dirp);
   return result;
diff --git a/dlfcn/Makefile b/dlfcn/Makefile
index 7b538fed2b..ed20ae5ccd 100644
--- a/dlfcn/Makefile
+++ b/dlfcn/Makefile
@@ -20,8 +20,8 @@ subdir		:= dlfcn
 headers		:= bits/dlfcn.h dlfcn.h
 extra-libs	:= libdl
 libdl-routines	:= dlopen dlclose dlsym dlvsym dlerror dladdr dladdr1 dlinfo \
-		   dlmopen dlfcn
-routines	:= $(patsubst %,s%,$(filter-out dlfcn,$(libdl-routines)))
+		   dlmopen
+routines	:= $(patsubst %,s%,$(libdl-routines))
 elide-routines.os := $(routines)
 distribute	:= dlopenold.c glreflib1.c glreflib2.c failtestmod.c \
 		   defaultmod1.c defaultmod2.c errmsg1mod.c modatexit.c \
@@ -34,7 +34,7 @@ include ../Makeconfig
 
 ifeq ($(versioning),yes)
 libdl-routines	+= dlopenold
-libdl-shared-only-routines := dlopenold dlfcn
+libdl-shared-only-routines := dlopenold
 endif
 
 ifeq (yes,$(build-shared))
@@ -65,6 +65,8 @@ generated := $(modules-names:=.so)
 
 include ../Rules
 
+LDFLAGS-dl.so = -Wl,-dynamic-linker,$(slibdir)/$(rtld-installed-name)
+
 test-modules = $(addprefix $(objpfx),$(addsuffix .so,$(modules-names)))
 $(test-modules): $(objpfx)%.so: $(objpfx)%.os $(common-objpfx)shlib.lds
 	$(build-module)
diff --git a/dlfcn/dlclose.c b/dlfcn/dlclose.c
index 5a344f31ca..3ddedcffbe 100644
--- a/dlfcn/dlclose.c
+++ b/dlfcn/dlclose.c
@@ -19,7 +19,6 @@
    02111-1307 USA.  */
 
 #include <dlfcn.h>
-#include <ldsodefs.h>
 
 #if !defined SHARED && defined IS_IN_libdl
 
@@ -34,7 +33,7 @@ dlclose (void *handle)
 static void
 dlclose_doit (void *handle)
 {
-  GLRO(dl_close) (handle);
+  _dl_close (handle);
 }
 
 int
diff --git a/dlfcn/dlfcn.c b/dlfcn/dlfcn.c
deleted file mode 100644
index 8047150763..0000000000
--- a/dlfcn/dlfcn.c
+++ /dev/null
@@ -1,39 +0,0 @@
-/* Load a shared object at run time.
-   Copyright (C) 2005 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#include <dlfcn.h>
-
-
-int __dlfcn_argc attribute_hidden;
-char **__dlfcn_argv attribute_hidden;
-
-#ifdef HAVE_INITFINI_ARRAY
-static void
-init (int argc, char *argv[])
-{
-  __dlfcn_argc = argc;
-  __dlfcn_argv = argv;
-}
-
-static void (*const init_array []) (int argc, char *argv[])
-     __attribute__ ((section (".init_array"), aligned (sizeof (void *))))
-     __attribute_used__ = { init };
-#else
-# error "Need linker with .init_array support."
-#endif
diff --git a/dlfcn/dlmopen.c b/dlfcn/dlmopen.c
index 0a6d47ea2e..5fd6543655 100644
--- a/dlfcn/dlmopen.c
+++ b/dlfcn/dlmopen.c
@@ -21,7 +21,6 @@
 #include <errno.h>
 #include <libintl.h>
 #include <stddef.h>
-#include <unistd.h>
 #include <ldsodefs.h>
 
 #if !defined SHARED && defined IS_IN_libdl
@@ -62,10 +61,8 @@ dlmopen_doit (void *a)
 # endif
       GLRO(dl_signal_error) (EINVAL, NULL, NULL, N_("invalid namespace"));
 
-  args->new = GLRO(dl_open) (args->file ?: "", args->mode | __RTLD_DLOPEN,
-			     args->caller,
-			     args->nsid, __dlfcn_argc, __dlfcn_argv,
-			     __environ);
+  args->new = _dl_open (args->file ?: "", args->mode | __RTLD_DLOPEN,
+			args->caller, args->nsid);
 }
 
 
diff --git a/dlfcn/dlopen.c b/dlfcn/dlopen.c
index bffb512aa3..6381ffc9b1 100644
--- a/dlfcn/dlopen.c
+++ b/dlfcn/dlopen.c
@@ -1,5 +1,5 @@
 /* Load a shared object at run time.
-   Copyright (C) 1995-2000,2003,2004,2005 Free Software Foundation, Inc.
+   Copyright (C) 1995,96,97,98,99,2000,2003,2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -18,10 +18,7 @@
    02111-1307 USA.  */
 
 #include <dlfcn.h>
-#include <libintl.h>
 #include <stddef.h>
-#include <unistd.h>
-#include <ldsodefs.h>
 
 #if !defined SHARED && defined IS_IN_libdl
 
@@ -59,14 +56,8 @@ dlopen_doit (void *a)
 {
   struct dlopen_args *args = (struct dlopen_args *) a;
 
-  if (args->mode & ~(RTLD_BINDING_MASK | RTLD_NOLOAD | RTLD_DEEPBIND
-		     | RTLD_GLOBAL | RTLD_LOCAL | RTLD_NODELETE))
-    GLRO(dl_signal_error) (0, NULL, NULL, _("invalid mode parameter"));
-
-  args->new = GLRO(dl_open) (args->file ?: "", args->mode | __RTLD_DLOPEN,
-			     args->caller,
-			     args->file == NULL ? LM_ID_BASE : NS,
-			     __dlfcn_argc, __dlfcn_argv, __environ);
+  args->new = _dl_open (args->file ?: "", args->mode | __RTLD_DLOPEN,
+			args->caller, args->file == NULL ? LM_ID_BASE : NS);
 }
 
 
diff --git a/dlfcn/dlopenold.c b/dlfcn/dlopenold.c
index 8dae1c40ce..148716cdb0 100644
--- a/dlfcn/dlopenold.c
+++ b/dlfcn/dlopenold.c
@@ -19,8 +19,6 @@
 
 #include <dlfcn.h>
 #include <stddef.h>
-#include <unistd.h>
-#include <ldsodefs.h>
 
 /* This file is for compatibility with glibc 2.0.  Compile it only if
    versioning is used.  */
@@ -52,10 +50,8 @@ dlopen_doit (void *a)
 {
   struct dlopen_args *args = (struct dlopen_args *) a;
 
-  args->new = GLRO(dl_open) (args->file ?: "", args->mode | __RTLD_DLOPEN,
-			     args->caller,
-			     args->file == NULL ? LM_ID_BASE : NS,
-			     __dlfcn_argc, __dlfcn_argv, __environ);
+  args->new = _dl_open (args->file ?: "", args->mode | __RTLD_DLOPEN,
+			args->caller, args->file == NULL ? LM_ID_BASE : NS);
 }
 
 extern void *__dlopen_nocheck (const char *file, int mode);
diff --git a/elf/Makefile b/elf/Makefile
index 47e6ea4d58..028be25b2d 100644
--- a/elf/Makefile
+++ b/elf/Makefile
@@ -1,4 +1,4 @@
-# Copyright (C) 1995-2004, 2005 Free Software Foundation, Inc.
+# Copyright (C) 1995-2002, 2003, 2004 Free Software Foundation, Inc.
 # This file is part of the GNU C Library.
 
 # The GNU C Library is free software; you can redistribute it and/or
@@ -20,8 +20,8 @@
 
 subdir		:= elf
 
-headers		= elf.h bits/elfclass.h link.h bits/link.h
-routines	= $(dl-routines) dl-support dl-iteratephdr \
+headers		= elf.h bits/elfclass.h link.h
+routines	= $(dl-routines) dl-open dl-close dl-support dl-iteratephdr \
 		  dl-addr enbl-secure dl-profstub \
 		  dl-origin dl-libc dl-sym dl-tsd
 
@@ -30,7 +30,7 @@ routines	= $(dl-routines) dl-support dl-iteratephdr \
 dl-routines	= $(addprefix dl-,load cache lookup object reloc deps \
 			          runtime error init fini debug misc \
 				  version profile conflict tls origin \
-				  execstack caller open close trampoline)
+				  execstack caller)
 all-dl-routines = $(dl-routines) $(sysdep-dl-routines)
 # But they are absent from the shared libc, because that code is in ld.so.
 elide-routines.os = $(all-dl-routines) dl-support enbl-secure dl-origin
@@ -72,7 +72,7 @@ distribute	:= rtld-Rules \
 		   tst-tlsmod1.c tst-tlsmod2.c tst-tlsmod3.c tst-tlsmod4.c \
 		   tst-tlsmod5.c tst-tlsmod6.c tst-tlsmod7.c tst-tlsmod8.c \
 		   tst-tlsmod9.c tst-tlsmod10.c tst-tlsmod11.c \
-		   tst-tlsmod12.c tst-tls10.h tst-alignmod.c tst-alignmod2.c \
+		   tst-tlsmod12.c tst-tls10.h tst-alignmod.c \
 		   circlemod1.c circlemod1a.c circlemod2.c circlemod2a.c \
 		   circlemod3.c circlemod3a.c nodlopenmod2.c \
 		   tls-macros.h \
@@ -83,8 +83,7 @@ distribute	:= rtld-Rules \
 		   tst-array2dep.c tst-piemod1.c \
 		   tst-execstack-mod.c tst-dlmodcount.c \
 		   check-textrel.c dl-sysdep.h test-dlopenrpathmod.c \
-		   tst-deep1mod1.c tst-deep1mod2.c tst-deep1mod3.c \
-		   tst-auditmod1.c
+		   tst-deep1mod1.c tst-deep1mod2.c tst-deep1mod3.c
 
 CFLAGS-dl-runtime.c = -fexceptions -fasynchronous-unwind-tables
 CFLAGS-dl-lookup.c = -fexceptions -fasynchronous-unwind-tables
@@ -154,9 +153,8 @@ tests += loadtest restest1 preloadtest loadfail multiload origtest resolvfail \
 	 restest2 next dblload dblunload reldep5 reldep6 reldep7 reldep8 \
 	 circleload1 tst-tls3 tst-tls4 tst-tls5 tst-tls6 tst-tls7 tst-tls8 \
 	 tst-tls10 tst-tls11 tst-tls12 tst-tls13 tst-tls14 tst-align \
-	 tst-align2 $(tests-execstack-$(have-z-execstack)) tst-dlmodcount \
-	 tst-dlopenrpath tst-deep1 tst-dlmopen1 tst-dlmopen2 tst-dlmopen3 \
-	 tst-audit1
+	 $(tests-execstack-$(have-z-execstack)) tst-dlmodcount \
+	 tst-dlopenrpath tst-deep1 tst-dlmopen1 tst-dlmopen2 tst-dlmopen3
 #	 reldep9
 test-srcs = tst-pathopt
 tests-vis-yes = vismain
@@ -188,10 +186,9 @@ modules-names = testobj1 testobj2 testobj3 testobj4 testobj5 testobj6 \
 		circlemod3 circlemod3a \
 		reldep8mod1 reldep8mod2 reldep8mod3 \
 		reldep9mod1 reldep9mod2 reldep9mod3 \
-		tst-alignmod tst-alignmod2 \
-		$(modules-execstack-$(have-z-execstack)) \
+		tst-alignmod $(modules-execstack-$(have-z-execstack)) \
 		tst-dlopenrpathmod tst-deep1mod1 tst-deep1mod2 tst-deep1mod3 \
-		tst-dlmopen1mod tst-auditmod1
+		tst-dlmopen1mod
 ifeq (yes,$(have-initfini-array))
 modules-names += tst-array2dep
 endif
@@ -671,12 +668,9 @@ $(objpfx)tst-tls14:  $(objpfx)tst-tlsmod14a.so $(libdl)
 $(objpfx)tst-tls14.out:$(objpfx)tst-tlsmod14b.so
 
 CFLAGS-tst-align.c = $(stack-align-test-flags)
-CFLAGS-tst-align2.c = $(stack-align-test-flags)
 CFLAGS-tst-alignmod.c = $(stack-align-test-flags)
-CFLAGS-tst-alignmod2.c = $(stack-align-test-flags)
 $(objpfx)tst-align: $(libdl)
 $(objpfx)tst-align.out: $(objpfx)tst-alignmod.so
-$(objpfx)tst-align2: $(objpfx)tst-alignmod2.so
 
 ifdef libdl
 $(objpfx)tst-tls9-static: $(common-objpfx)dlfcn/libdl.a
@@ -738,8 +732,6 @@ $(objpfx)tst-pie1: $(objpfx)tst-pie1.o $(objpfx)tst-piemod1.so
 	  -L$(subst :, -L,$(rpath-link)) -Wl,-rpath-link=$(rpath-link) \
 	  -o $@ $(objpfx)tst-pie1.o $(objpfx)tst-piemod1.so \
 	  $(common-objpfx)libc_nonshared.a
-
-generated += tst-pie1 tst-pie1.out tst-pie1.o
 endif
 
 check-textrel-CFLAGS = -O -Wall -D_XOPEN_SOURCE=600 -D_BSD_SOURCE
@@ -781,6 +773,3 @@ $(objpfx)tst-dlmopen2.out: $(objpfx)tst-dlmopen1mod.so
 
 $(objpfx)tst-dlmopen3: $(libdl)
 $(objpfx)tst-dlmopen3.out: $(objpfx)tst-dlmopen1mod.so
-
-$(objpfx)tst-audit1.out: $(objpfx)tst-auditmod1.so
-tst-audit1-ENV = LD_AUDIT=$(objpfx)tst-auditmod1.so
diff --git a/elf/Versions b/elf/Versions
index aaacf3e4ea..e24b2de04c 100644
--- a/elf/Versions
+++ b/elf/Versions
@@ -19,7 +19,7 @@ libc {
 %endif
   GLIBC_PRIVATE {
     # functions used in other libraries
-    _dl_addr;
+    _dl_open; _dl_close; _dl_addr;
     _dl_sym; _dl_vsym;
     _dl_open_hook;
     __libc_dlopen_mode; __libc_dlsym; __libc_dlclose;
diff --git a/elf/dl-close.c b/elf/dl-close.c
index eb5e805dd4..c823b17642 100644
--- a/elf/dl-close.c
+++ b/elf/dl-close.c
@@ -1,5 +1,5 @@
 /* Close a shared object opened by `_dl_open'.
-   Copyright (C) 1996-2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+   Copyright (C) 1996-2002, 2003, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -23,7 +23,6 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <unistd.h>
 #include <bits/libc-lock.h>
 #include <ldsodefs.h>
 #include <sys/types.h>
@@ -100,6 +99,7 @@ remove_slotinfo (size_t idx, struct dtv_slotinfo_list *listp, size_t disp,
 
 
 void
+internal_function
 _dl_close (void *_map)
 {
   struct reldep_list
@@ -112,7 +112,6 @@ _dl_close (void *_map)
   } *reldeps = NULL;
   struct link_map **list;
   struct link_map *map = _map;
-  Lmid_t ns = map->l_ns;
   unsigned int i;
   unsigned int *new_opencount;
 #ifdef USE_TLS
@@ -140,8 +139,8 @@ _dl_close (void *_map)
     {
       /* There are still references to this object.  Do nothing more.  */
       if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FILES, 0))
-	_dl_debug_printf ("\nclosing file=%s; opencount == %u\n",
-			  map->l_name, map->l_opencount);
+	GLRO(dl_debug_printf) ("\nclosing file=%s; opencount == %u\n",
+				map->l_name, map->l_opencount);
 
       /* Decrement the object's reference counter, not the dependencies'.  */
       --map->l_opencount;
@@ -266,23 +265,16 @@ _dl_close (void *_map)
   assert (new_opencount[0] == 0);
 
   /* Call all termination functions at once.  */
-#ifdef SHARED
-  bool do_audit = GLRO(dl_naudit) > 0 && !GL(dl_ns)[ns]._ns_loaded->l_auditing;
-#endif
   for (i = 0; list[i] != NULL; ++i)
     {
       struct link_map *imap = list[i];
-
-      /* All elements must be in the same namespace.  */
-      assert (imap->l_ns == ns);
-
       if (new_opencount[i] == 0 && imap->l_type == lt_loaded
 	  && (imap->l_flags_1 & DF_1_NODELETE) == 0)
 	{
 	  /* When debugging print a message first.  */
 	  if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_IMPCALLS, 0))
-	    _dl_debug_printf ("\ncalling fini: %s [%lu]\n\n",
-			      imap->l_name, ns);
+	    GLRO(dl_debug_printf) ("\ncalling fini: %s [%lu]\n\n",
+				   imap->l_name, imap->l_ns);
 
 	  /* Call its termination function.  Do not do it for
 	     half-cooked objects.  */
@@ -307,22 +299,6 @@ _dl_close (void *_map)
 			 + imap->l_info[DT_FINI]->d_un.d_ptr))) ();
 	    }
 
-#ifdef SHARED
-	  /* Auditing checkpoint: we have a new object.  */
-	  if (__builtin_expect (do_audit, 0))
-	    {
-	      struct audit_ifaces *afct = GLRO(dl_audit);
-	      for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt)
-		{
-		  if (afct->objclose != NULL)
-		    /* Return value is ignored.  */
-		    (void) afct->objclose (&imap->l_audit[cnt].cookie);
-
-		  afct = afct->next;
-		}
-	    }
-#endif
-
 	  /* This object must not be used anymore.  We must remove the
 	     reference from the scope.  */
 	  unsigned int j;
@@ -389,30 +365,9 @@ _dl_close (void *_map)
       assert (imap->l_type == lt_loaded || imap->l_opencount > 0);
     }
 
-#ifdef SHARED
-  /* Auditing checkpoint: we will start deleting objects.  */
-  if (__builtin_expect (do_audit, 0))
-    {
-      struct link_map *head = GL(dl_ns)[ns]._ns_loaded;
-      struct audit_ifaces *afct = GLRO(dl_audit);
-      /* Do not call the functions for any auditing object.  */
-      if (head->l_auditing == 0)
-	{
-	  for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt)
-	    {
-	      if (afct->activity != NULL)
-		afct->activity (&head->l_audit[cnt].cookie, LA_ACT_DELETE);
-
-	      afct = afct->next;
-	    }
-	}
-    }
-#endif
-
   /* Notify the debugger we are about to remove some loaded objects.  */
-  struct r_debug *r = _dl_debug_initialize (0, ns);
-  r->r_state = RT_DELETE;
-  _dl_debug_state ();
+  _r_debug.r_state = RT_DELETE;
+  GLRO(dl_debug_state) ();
 
 #ifdef USE_TLS
   size_t tls_free_start;
@@ -434,19 +389,21 @@ _dl_close (void *_map)
 	  if (__builtin_expect (imap->l_global, 0))
 	    {
 	      /* This object is in the global scope list.  Remove it.  */
-	      unsigned int cnt = GL(dl_ns)[ns]._ns_main_searchlist->r_nlist;
+	      unsigned int cnt
+		= GL(dl_ns)[imap->l_ns]._ns_main_searchlist->r_nlist;
 
 	      do
 		--cnt;
-	      while (GL(dl_ns)[ns]._ns_main_searchlist->r_list[cnt] != imap);
+	      while (GL(dl_ns)[imap->l_ns]._ns_main_searchlist->r_list[cnt]
+		     != imap);
 
 	      /* The object was already correctly registered.  */
 	      while (++cnt
-		     < GL(dl_ns)[ns]._ns_main_searchlist->r_nlist)
-		GL(dl_ns)[ns]._ns_main_searchlist->r_list[cnt - 1]
-		  = GL(dl_ns)[ns]._ns_main_searchlist->r_list[cnt];
+		     < GL(dl_ns)[imap->l_ns]._ns_main_searchlist->r_nlist)
+		GL(dl_ns)[imap->l_ns]._ns_main_searchlist->r_list[cnt - 1]
+		  = GL(dl_ns)[imap->l_ns]._ns_main_searchlist->r_list[cnt];
 
-	      --GL(dl_ns)[ns]._ns_main_searchlist->r_nlist;
+	      --GL(dl_ns)[imap->l_ns]._ns_main_searchlist->r_nlist;
 	    }
 
 #ifdef USE_TLS
@@ -455,10 +412,9 @@ _dl_close (void *_map)
 	    {
 	      any_tls = true;
 
-	      if (GL(dl_tls_dtv_slotinfo_list) != NULL
-		  && ! remove_slotinfo (imap->l_tls_modid,
-					GL(dl_tls_dtv_slotinfo_list), 0,
-					imap->l_init_called))
+	      if (! remove_slotinfo (imap->l_tls_modid,
+				     GL(dl_tls_dtv_slotinfo_list), 0,
+				     imap->l_init_called))
 		/* All dynamically loaded modules with TLS are unloaded.  */
 		GL(dl_tls_max_dtv_idx) = GL(dl_tls_static_nelem);
 
@@ -543,12 +499,12 @@ _dl_close (void *_map)
 	  else
 	    {
 #ifdef SHARED
-	      assert (ns != LM_ID_BASE);
+	      assert (imap->l_ns != LM_ID_BASE);
 #endif
-	      GL(dl_ns)[ns]._ns_loaded = imap->l_next;
+	      GL(dl_ns)[imap->l_ns]._ns_loaded = imap->l_next;
 	    }
 
-	  --GL(dl_ns)[ns]._ns_nloaded;
+	  --GL(dl_ns)[imap->l_ns]._ns_nloaded;
 	  if (imap->l_next != NULL)
 	    imap->l_next->l_prev = imap->l_prev;
 
@@ -623,36 +579,16 @@ _dl_close (void *_map)
   if (any_tls)
     {
       if (__builtin_expect (++GL(dl_tls_generation) == 0, 0))
-	_dl_fatal_printf ("TLS generation counter wrapped!  Please report as described in <http://www.gnu.org/software/libc/bugs.html>.\n");
+	__libc_fatal (_("TLS generation counter wrapped!  Please report as described in <http://www.gnu.org/software/libc/bugs.html>."));
 
       if (tls_free_end == GL(dl_tls_static_used))
 	GL(dl_tls_static_used) = tls_free_start;
     }
 #endif
 
-#ifdef SHARED
-  /* Auditing checkpoint: we have deleted all objects.  */
-  if (__builtin_expect (do_audit, 0))
-    {
-      struct link_map *head = GL(dl_ns)[ns]._ns_loaded;
-      /* Do not call the functions for any auditing object.  */
-      if (head->l_auditing == 0)
-	{
-	  struct audit_ifaces *afct = GLRO(dl_audit);
-	  for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt)
-	    {
-	      if (afct->activity != NULL)
-		afct->activity (&head->l_audit[cnt].cookie, LA_ACT_CONSISTENT);
-
-	      afct = afct->next;
-	    }
-	}
-    }
-#endif
-
   /* Notify the debugger those objects are finalized and gone.  */
-  r->r_state = RT_CONSISTENT;
-  _dl_debug_state ();
+  _r_debug.r_state = RT_CONSISTENT;
+  GLRO(dl_debug_state) ();
 
   /* Now we can perhaps also remove the modules for which we had
      dependencies because of symbol lookup.  */
@@ -676,6 +612,7 @@ _dl_close (void *_map)
   /* Release the lock.  */
   __rtld_lock_unlock_recursive (GL(dl_load_lock));
 }
+libc_hidden_def (_dl_close)
 
 
 #ifdef USE_TLS
diff --git a/elf/dl-debug.c b/elf/dl-debug.c
index bc7d793435..bd6ee69189 100644
--- a/elf/dl-debug.c
+++ b/elf/dl-debug.c
@@ -1,5 +1,5 @@
 /* Communicate dynamic linker state to the debugger at runtime.
-   Copyright (C) 1996, 1998,2000,2002,2004,2005 Free Software Foundation, Inc.
+   Copyright (C) 1996, 1998, 2000, 2002, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -32,25 +32,20 @@ struct r_debug _r_debug;
 
 struct r_debug *
 internal_function
-_dl_debug_initialize (ElfW(Addr) ldbase, Lmid_t ns)
+_dl_debug_initialize (ElfW(Addr) ldbase)
 {
-  struct r_debug *r;
-
-  if (ns == LM_ID_BASE)
-    r = &_r_debug;
-  else
-    r = &GL(dl_ns)[ns]._ns_debug;
-
-  if (r->r_brk == 0 || ldbase != 0)
+  if (_r_debug.r_brk == 0)
     {
       /* Tell the debugger where to find the map of loaded objects.  */
-      r->r_version = 1	/* R_DEBUG_VERSION XXX */;
-      r->r_ldbase = ldbase ?: _r_debug.r_ldbase;
-      r->r_map = GL(dl_ns)[ns]._ns_loaded;
-      r->r_brk = (ElfW(Addr)) &_dl_debug_state;
+      _r_debug.r_version = 1	/* R_DEBUG_VERSION XXX */;
+      _r_debug.r_ldbase = ldbase;
+      // XXX This is problematic.  It means we cannot tell the debugger
+      // XXX about namespaces other than the main one.
+      _r_debug.r_map = GL(dl_ns)[LM_ID_BASE]._ns_loaded;
+      _r_debug.r_brk = (ElfW(Addr)) &_dl_debug_state;
     }
 
-  return r;
+  return &_r_debug;
 }
 
 
diff --git a/elf/dl-fini.c b/elf/dl-fini.c
index b3282089a9..f43f4a00ed 100644
--- a/elf/dl-fini.c
+++ b/elf/dl-fini.c
@@ -1,5 +1,5 @@
 /* Call the termination functions of loaded shared objects.
-   Copyright (C) 1995,96,1998-2002,2004, 2005 Free Software Foundation, Inc.
+   Copyright (C) 1995,96,1998-2002,2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -48,25 +48,12 @@ _dl_fini (void)
   /* We run the destructors of the main namespaces last.  As for the
      other namespaces, we pick run the destructors in them in reverse
      order of the namespace ID.  */
-#ifdef SHARED
-  int do_audit = 0;
- again:
-#endif
   for (Lmid_t cnt = DL_NNS - 1; cnt >= 0; --cnt)
     {
       /* Protect against concurrent loads and unloads.  */
       __rtld_lock_lock_recursive (GL(dl_load_lock));
 
-      unsigned int nmaps = 0;
       unsigned int nloaded = GL(dl_ns)[cnt]._ns_nloaded;
-      /* No need to do anything for empty namespaces or those used for
-	 auditing DSOs.  */
-      if (nloaded == 0
-#ifdef SHARED
-	  || GL(dl_ns)[cnt]._ns_loaded->l_auditing != do_audit
-#endif
-	  )
-	goto out;
 
       /* XXX Could it be (in static binaries) that there is no object
 	 loaded?  */
@@ -89,7 +76,6 @@ _dl_fini (void)
 
       unsigned int i;
       struct link_map *l;
-      assert (nloaded != 0 || GL(dl_ns)[cnt]._ns_loaded == NULL);
       for (l = GL(dl_ns)[cnt]._ns_loaded, i = 0; l != NULL; l = l->l_next)
 	/* Do not handle ld.so in secondary namespaces.  */
 	if (l == l->l_real)
@@ -104,7 +90,7 @@ _dl_fini (void)
 	  }
       assert (cnt != LM_ID_BASE || i == nloaded);
       assert (cnt == LM_ID_BASE || i == nloaded || i == nloaded - 1);
-      nmaps = i;
+      unsigned int nmaps = i;
 
       if (nmaps != 0)
 	{
@@ -177,7 +163,6 @@ _dl_fini (void)
 	 high and will be decremented in this loop.  So we release the
 	 lock so that some code which might be called from a destructor
 	 can directly or indirectly access the lock.  */
-    out:
       __rtld_lock_unlock_recursive (GL(dl_load_lock));
 
       /* 'maps' now contains the objects in the right order.  Now call the
@@ -191,49 +176,38 @@ _dl_fini (void)
 	      /* Make sure nothing happens if we are called twice.  */
 	      l->l_init_called = 0;
 
+	      /* Don't call the destructors for objects we are not
+		 supposed to.  */
+	      if (l->l_name[0] == '\0' && l->l_type == lt_executable)
+		continue;
+
 	      /* Is there a destructor function?  */
-	      if (l->l_info[DT_FINI_ARRAY] != NULL
-		  || l->l_info[DT_FINI] != NULL)
+	      if (l->l_info[DT_FINI_ARRAY] == NULL
+		  && l->l_info[DT_FINI] == NULL)
+		continue;
+
+	      /* When debugging print a message first.  */
+	      if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_IMPCALLS,
+				    0))
+		_dl_debug_printf ("\ncalling fini: %s [%lu]\n\n",
+				  l->l_name[0] ? l->l_name : rtld_progname,
+				  cnt);
+
+	      /* First see whether an array is given.  */
+	      if (l->l_info[DT_FINI_ARRAY] != NULL)
 		{
-		  /* When debugging print a message first.  */
-		  if (__builtin_expect (GLRO(dl_debug_mask)
-					& DL_DEBUG_IMPCALLS, 0))
-		    _dl_debug_printf ("\ncalling fini: %s [%lu]\n\n",
-				      l->l_name[0] ? l->l_name : rtld_progname,
-				      cnt);
-
-		  /* First see whether an array is given.  */
-		  if (l->l_info[DT_FINI_ARRAY] != NULL)
-		    {
-		      ElfW(Addr) *array =
-			(ElfW(Addr) *) (l->l_addr
-					+ l->l_info[DT_FINI_ARRAY]->d_un.d_ptr);
-		      unsigned int i = (l->l_info[DT_FINI_ARRAYSZ]->d_un.d_val
-					/ sizeof (ElfW(Addr)));
-		      while (i-- > 0)
-			((fini_t) array[i]) ();
-		    }
-
-		  /* Next try the old-style destructor.  */
-		  if (l->l_info[DT_FINI] != NULL)
-		    ((fini_t) DL_DT_FINI_ADDRESS (l, l->l_addr + l->l_info[DT_FINI]->d_un.d_ptr)) ();
+		  ElfW(Addr) *array =
+		    (ElfW(Addr) *) (l->l_addr
+				    + l->l_info[DT_FINI_ARRAY]->d_un.d_ptr);
+		  unsigned int i = (l->l_info[DT_FINI_ARRAYSZ]->d_un.d_val
+				    / sizeof (ElfW(Addr)));
+		  while (i-- > 0)
+		    ((fini_t) array[i]) ();
 		}
 
-#ifdef SHARED
-	      /* Auditing checkpoint: another object closed.  */
-	      if (!do_audit && __builtin_expect (GLRO(dl_naudit) > 0, 0))
-		{
-		  struct audit_ifaces *afct = GLRO(dl_audit);
-		  for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt)
-		    {
-		      if (afct->objclose != NULL)
-			/* Return value is ignored.  */
-			(void) afct->objclose (&l->l_audit[cnt].cookie);
-
-		      afct = afct->next;
-		    }
-		}
-#endif
+	      /* Next try the old-style destructor.  */
+	      if (l->l_info[DT_FINI] != NULL)
+		((fini_t) DL_DT_FINI_ADDRESS (l, l->l_addr + l->l_info[DT_FINI]->d_un.d_ptr)) ();
 	    }
 
 	  /* Correct the previous increment.  */
@@ -241,14 +215,6 @@ _dl_fini (void)
 	}
     }
 
-#ifdef SHARED
-  if (! do_audit && GLRO(dl_naudit) > 0)
-    {
-      do_audit = 1;
-      goto again;
-    }
-#endif
-
   if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_STATISTICS, 0))
     _dl_debug_printf ("\nruntime linker statistics:\n"
 		      "           final number of relocations: %lu\n"
diff --git a/elf/dl-init.c b/elf/dl-init.c
index e7b67570fd..e700dffb3a 100644
--- a/elf/dl-init.c
+++ b/elf/dl-init.c
@@ -93,6 +93,7 @@ _dl_init (struct link_map *main_map, int argc, char **argv, char **env)
 {
   ElfW(Dyn) *preinit_array = main_map->l_info[DT_PREINIT_ARRAY];
   ElfW(Dyn) *preinit_array_size = main_map->l_info[DT_PREINIT_ARRAYSZ];
+  struct r_debug *r;
   unsigned int i;
 
   if (__builtin_expect (GL(dl_initfirst) != NULL, 0))
@@ -119,6 +120,13 @@ _dl_init (struct link_map *main_map, int argc, char **argv, char **env)
 	((init_t) addrs[cnt]) (argc, argv, env);
     }
 
+  /* Notify the debugger we have added some objects.  We need to call
+     _dl_debug_initialize in a static program in case dynamic linking has
+     not been used before.  */
+  r = _dl_debug_initialize (0);
+  r->r_state = RT_ADD;
+  _dl_debug_state ();
+
   /* Stupid users forced the ELF specification to be changed.  It now
      says that the dynamic loader is responsible for determining the
      order in which the constructors have to run.  The constructors
@@ -133,6 +141,10 @@ _dl_init (struct link_map *main_map, int argc, char **argv, char **env)
   while (i-- > 0)
     call_init (main_map->l_initfini[i], argc, argv, env);
 
+  /* Notify the debugger all new objects are now ready to go.  */
+  r->r_state = RT_CONSISTENT;
+  _dl_debug_state ();
+
 #ifndef HAVE_INLINED_SYSCALLS
   /* Finished starting up.  */
   INTUSE(_dl_starting_up) = 0;
diff --git a/elf/dl-libc.c b/elf/dl-libc.c
index 5e76069139..8a3f542e01 100644
--- a/elf/dl-libc.c
+++ b/elf/dl-libc.c
@@ -22,11 +22,6 @@
 #include <stdlib.h>
 #include <ldsodefs.h>
 
-extern int __libc_argc attribute_hidden;
-extern char **__libc_argv attribute_hidden;
-
-extern char **__environ;
-
 /* The purpose of this file is to provide wrappers around the dynamic
    linker error mechanism (similar to dlopen() et al in libdl) which
    are usable from within libc.  Generally we want to throw away the
@@ -82,8 +77,7 @@ do_dlopen (void *ptr)
 {
   struct do_dlopen_args *args = (struct do_dlopen_args *) ptr;
   /* Open and relocate the shared object.  */
-  args->map = GLRO(dl_open) (args->name, args->mode, NULL, __LM_ID_CALLER,
-			     __libc_argc, __libc_argv, __environ);
+  args->map = _dl_open (args->name, args->mode, NULL, __LM_ID_CALLER);
 }
 
 static void
@@ -99,7 +93,7 @@ do_dlsym (void *ptr)
 static void
 do_dlclose (void *ptr)
 {
-  GLRO(dl_close) ((struct link_map *) ptr);
+  _dl_close ((struct link_map *) ptr);
 }
 
 /* This code is to support __libc_dlopen from __libc_dlopen'ed shared
@@ -115,7 +109,7 @@ struct dl_open_hook
 #ifdef SHARED
 extern struct dl_open_hook *_dl_open_hook;
 libc_hidden_proto (_dl_open_hook);
-struct dl_open_hook *_dl_open_hook __attribute__ ((nocommon));
+struct dl_open_hook *_dl_open_hook __attribute__((nocommon));
 libc_hidden_data_def (_dl_open_hook);
 #else
 static void
@@ -125,7 +119,7 @@ do_dlsym_private (void *ptr)
   struct r_found_version vers;
   vers.name = "GLIBC_PRIVATE";
   vers.hidden = 1;
-  /* vers.hash = _dl_elf_hash (vers.name);  */
+  /* vers.hash = _dl_elf_hash (version);  */
   vers.hash = 0x0963cf85;
   vers.filename = NULL;
 
diff --git a/elf/dl-load.c b/elf/dl-load.c
index 088b2224e2..eb1a7919fb 100644
--- a/elf/dl-load.c
+++ b/elf/dl-load.c
@@ -1,5 +1,5 @@
 /* Map in a shared object's segments from the file.
-   Copyright (C) 1995-2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+   Copyright (C) 1995-2002, 2003, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -36,7 +36,6 @@
 #include <dl-osinfo.h>
 #include <stackinfo.h>
 #include <caller.h>
-#include <sysdep.h>
 
 #include <dl-dst.h>
 
@@ -106,11 +105,13 @@ ELF_PREFERRED_ADDRESS_DATA;
 
 
 int __stack_prot attribute_hidden attribute_relro
+  = (PROT_READ|PROT_WRITE
 #if _STACK_GROWS_DOWN && defined PROT_GROWSDOWN
-  = PROT_GROWSDOWN;
+     |PROT_GROWSDOWN
 #elif _STACK_GROWS_UP && defined PROT_GROWSUP
-  = PROT_GROWSUP;
+     |PROT_GROWSUP
 #endif
+     );
 
 
 /* Type for the buffer we put the ELF header and hopefully the program
@@ -826,8 +827,6 @@ _dl_map_object_from_fd (const char *name, int fd, struct filebuf *fbp,
   /* Initialize to keep the compiler happy.  */
   const char *errstring = NULL;
   int errval = 0;
-  struct r_debug *r = _dl_debug_initialize (0, nsid);
-  bool make_consistent = false;
 
   /* Get file information.  */
   if (__builtin_expect (__fxstat64 (_STAT_VER, fd, &st) < 0, 0))
@@ -836,12 +835,6 @@ _dl_map_object_from_fd (const char *name, int fd, struct filebuf *fbp,
     call_lose_errno:
       errval = errno;
     call_lose:
-      if (make_consistent)
-	{
-	  r->r_state = RT_CONSISTENT;
-	  _dl_debug_state ();
-	}
-
       lose (errval, fd, name, realname, l, errstring);
     }
 
@@ -912,39 +905,6 @@ _dl_map_object_from_fd (const char *name, int fd, struct filebuf *fbp,
     }
 #endif
 
-  /* Signal that we are going to add new objects.  */
-  if (r->r_state == RT_CONSISTENT)
-    {
-#ifdef SHARED
-      /* Auditing checkpoint: we are going to add new objects.  */
-      if (__builtin_expect (GLRO(dl_naudit) > 0, 0))
-	{
-	  struct link_map *head = GL(dl_ns)[nsid]._ns_loaded;
-	  /* Do not call the functions for any auditing object.  */
-	  if (head->l_auditing == 0)
-	    {
-	      struct audit_ifaces *afct = GLRO(dl_audit);
-	      for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt)
-		{
-		  if (afct->activity != NULL)
-		    afct->activity (&head->l_audit[cnt].cookie, LA_ACT_ADD);
-
-		  afct = afct->next;
-		}
-	    }
-	}
-#endif
-
-      /* Notify the debugger we have added some objects.  We need to
-	 call _dl_debug_initialize in a static program in case dynamic
-	 linking has not been used before.  */
-      r->r_state = RT_ADD;
-      _dl_debug_state ();
-      make_consistent = true;
-    }
-  else
-    assert (r->r_state == RT_ADD);
-
   /* Enter the new object in the list of loaded objects.  */
   l = _dl_new_object (realname, name, l_type, loader, mode, nsid);
   if (__builtin_expect (l == NULL, 0))
@@ -1084,7 +1044,7 @@ _dl_map_object_from_fd (const char *name, int fd, struct filebuf *fbp,
 	    }
 
 # ifdef SHARED
-	  if (l->l_prev == NULL || (mode & __RTLD_AUDIT) != 0)
+	  if (l->l_prev == NULL)
 	    /* We are loading the executable itself when the dynamic linker
 	       was executed directly.  The setup will happen later.  */
 	    break;
@@ -1358,25 +1318,21 @@ cannot allocate TLS data structures for initial thread");
 	 protection of the variable which contains the flags used in
 	 the mprotect calls.  */
 #ifdef HAVE_Z_RELRO
-      if ((mode & (__RTLD_DLOPEN | __RTLD_AUDIT)) == __RTLD_DLOPEN)
+      if (mode & __RTLD_DLOPEN)
 	{
 	  uintptr_t p = ((uintptr_t) &__stack_prot) & ~(GLRO(dl_pagesize) - 1);
 	  size_t s = (uintptr_t) &__stack_prot - p + sizeof (int);
 
 	  __mprotect ((void *) p, s, PROT_READ|PROT_WRITE);
 	  if (__builtin_expect (__check_caller (RETURN_ADDRESS (0),
-						allow_ldso) == 0,
+						allow_ldso|allow_libc) == 0,
 				0))
-	    __stack_prot |= PROT_READ|PROT_WRITE|PROT_EXEC;
+	    __stack_prot |= PROT_EXEC;
 	  __mprotect ((void *) p, s, PROT_READ);
 	}
       else
 #endif
-	__stack_prot |= PROT_READ|PROT_WRITE|PROT_EXEC;
-
-#ifdef check_consistency
-      check_consistency ();
-#endif
+	__stack_prot |= PROT_EXEC;
 
       errval = (*GL(dl_make_stack_executable_hook)) (stack_endp);
       if (errval)
@@ -1468,27 +1424,6 @@ cannot enable executable stack as shared object requires");
     add_name_to_object (l, ((const char *) D_PTR (l, l_info[DT_STRTAB])
 			    + l->l_info[DT_SONAME]->d_un.d_val));
 
-#ifdef SHARED
-  /* Auditing checkpoint: we have a new object.  */
-  if (__builtin_expect (GLRO(dl_naudit) > 0, 0)
-      && !GL(dl_ns)[l->l_ns]._ns_loaded->l_auditing)
-    {
-      struct audit_ifaces *afct = GLRO(dl_audit);
-      for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt)
-	{
-	  if (afct->objopen != NULL)
-	    {
-	      l->l_audit[cnt].bindflags
-		= afct->objopen (l, nsid, &l->l_audit[cnt].cookie);
-
-	      l->l_audit_any_plt |= l->l_audit[cnt].bindflags != 0;
-	    }
-
-	  afct = afct->next;
-	}
-    }
-#endif
-
   return l;
 }
 
@@ -1536,8 +1471,7 @@ print_search_path (struct r_search_path_elem **list,
    this could mean there is something wrong in the installation and the
    user might want to know about this.  */
 static int
-open_verify (const char *name, struct filebuf *fbp, struct link_map *loader,
-	     int whatcode)
+open_verify (const char *name, struct filebuf *fbp)
 {
   /* This is the expected ELF header.  */
 #define ELF32_CLASS ELFCLASS32
@@ -1566,34 +1500,13 @@ open_verify (const char *name, struct filebuf *fbp, struct link_map *loader,
     ElfW(Word) type;
     char vendor[4];
   } expected_note = { 4, 16, 1, "GNU" };
+  int fd;
   /* Initialize it to make the compiler happy.  */
   const char *errstring = NULL;
   int errval = 0;
 
-#ifdef SHARED
-  /* Give the auditing libraries a chance.  */
-  if (__builtin_expect (GLRO(dl_naudit) > 0, 0) && whatcode != 0
-      && loader->l_auditing == 0)
-    {
-      struct audit_ifaces *afct = GLRO(dl_audit);
-      for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt)
-	{
-	  if (afct->objsearch != NULL)
-	    {
-	      name = afct->objsearch (name, &loader->l_audit[cnt].cookie,
-				      whatcode);
-	      if (name == NULL)
-		/* Ignore the path.  */
-		return -1;
-	    }
-
-	  afct = afct->next;
-	}
-    }
-#endif
-
   /* Open the file.  We always open files read-only.  */
-  int fd = __open (name, O_RDONLY);
+  fd = __open (name, O_RDONLY);
   if (fd != -1)
     {
       ElfW(Ehdr) *ehdr;
@@ -1751,7 +1664,7 @@ open_verify (const char *name, struct filebuf *fbp, struct link_map *loader,
 static int
 open_path (const char *name, size_t namelen, int preloaded,
 	   struct r_search_path_struct *sps, char **realname,
-	   struct filebuf *fbp, struct link_map *loader, int whatcode)
+	   struct filebuf *fbp)
 {
   struct r_search_path_elem **dirs = sps->dirs;
   char *buf;
@@ -1795,16 +1708,12 @@ open_path (const char *name, size_t namelen, int preloaded,
 	  if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_LIBS, 0))
 	    _dl_debug_printf ("  trying file=%s\n", buf);
 
-	  fd = open_verify (buf, fbp, loader, whatcode);
+	  fd = open_verify (buf, fbp);
 	  if (this_dir->status[cnt] == unknown)
 	    {
 	      if (fd != -1)
 		this_dir->status[cnt] = existing;
-	      /* Do not update the directory information when loading
-		 auditing code.  We must try to disturb the program as
-		 little as possible.  */
-	      else if (loader == NULL
-		       || GL(dl_ns)[loader->l_ns]._ns_loaded->l_audit == 0)
+	      else
 		{
 		  /* We failed to open machine dependent library.  Let's
 		     test whether there is any directory at all.  */
@@ -1822,7 +1731,7 @@ open_path (const char *name, size_t namelen, int preloaded,
 	    }
 
 	  /* Remember whether we found any existing directory.  */
-	  here_any |= this_dir->status[cnt] != nonexisting;
+	  here_any |= this_dir->status[cnt] == existing;
 
 	  if (fd != -1 && __builtin_expect (preloaded, 0)
 	      && INTUSE(__libc_enable_secure))
@@ -1879,12 +1788,7 @@ open_path (const char *name, size_t namelen, int preloaded,
 	 must not be freed using the general free() in libc.  */
       if (sps->malloced)
 	free (sps->dirs);
-#ifdef HAVE_Z_RELRO
-      /* rtld_search_dirs is attribute_relro, therefore avoid writing
-	 into it.  */
-      if (sps != &rtld_search_dirs)
-#endif
-	sps->dirs = (void *) -1;
+      sps->dirs = (void *) -1;
     }
 
   return -1;
@@ -1943,32 +1847,6 @@ _dl_map_object (struct link_map *loader, const char *name, int preloaded,
 			      loader->l_name[0]
 			      ? loader->l_name : rtld_progname, loader->l_ns);
 
-#ifdef SHARED
-  /* Give the auditing libraries a chance to change the name before we
-     try anything.  */
-  if (__builtin_expect (GLRO(dl_naudit) > 0, 0)
-      && (loader == NULL || loader->l_auditing == 0))
-    {
-      struct audit_ifaces *afct = GLRO(dl_audit);
-      for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt)
-	{
-	  if (afct->objsearch != NULL)
-	    {
-	      name = afct->objsearch (name, &loader->l_audit[cnt].cookie,
-				      LA_SER_ORIG);
-	      if (name == NULL)
-		{
-		  /* Do not try anything further.  */
-		  fd = -1;
-		  goto no_file;
-		}
-	    }
-
-	  afct = afct->next;
-	}
-    }
-#endif
-
   if (strchr (name, '/') == NULL)
     {
       /* Search for NAME in several places.  */
@@ -1989,7 +1867,7 @@ _dl_map_object (struct link_map *loader, const char *name, int preloaded,
 	  for (l = loader; fd == -1 && l; l = l->l_loader)
 	    if (cache_rpath (l, &l->l_rpath_dirs, DT_RPATH, "RPATH"))
 	      fd = open_path (name, namelen, preloaded, &l->l_rpath_dirs,
-			      &realname, &fb, loader, LA_SER_RUNPATH);
+			      &realname, &fb);
 
 	  /* If dynamically linked, try the DT_RPATH of the executable
              itself.  NB: we do this for lookups in any namespace.  */
@@ -1999,24 +1877,21 @@ _dl_map_object (struct link_map *loader, const char *name, int preloaded,
 	      if (l && l->l_type != lt_loaded && l != loader
 		  && cache_rpath (l, &l->l_rpath_dirs, DT_RPATH, "RPATH"))
 		fd = open_path (name, namelen, preloaded, &l->l_rpath_dirs,
-				&realname, &fb, loader ?: l, LA_SER_RUNPATH);
+				&realname, &fb);
 	    }
 	}
 
       /* Try the LD_LIBRARY_PATH environment variable.  */
       if (fd == -1 && env_path_list.dirs != (void *) -1)
 	fd = open_path (name, namelen, preloaded, &env_path_list,
-			&realname, &fb,
-			loader ?: GL(dl_ns)[LM_ID_BASE]._ns_loaded,
-			LA_SER_LIBPATH);
+			&realname, &fb);
 
       /* Look at the RUNPATH information for this binary.  */
       if (fd == -1 && loader != NULL
 	  && cache_rpath (loader, &loader->l_runpath_dirs,
 			  DT_RUNPATH, "RUNPATH"))
 	fd = open_path (name, namelen, preloaded,
-			&loader->l_runpath_dirs, &realname, &fb, loader,
-			LA_SER_RUNPATH);
+			&loader->l_runpath_dirs, &realname, &fb);
 
       if (fd == -1
 	  && (__builtin_expect (! preloaded, 1)
@@ -2064,9 +1939,7 @@ _dl_map_object (struct link_map *loader, const char *name, int preloaded,
 
 	      if (cached != NULL)
 		{
-		  fd = open_verify (cached,
-				    &fb, loader ?: GL(dl_ns)[nsid]._ns_loaded,
-				    LA_SER_CONFIG);
+		  fd = open_verify (cached, &fb);
 		  if (__builtin_expect (fd != -1, 1))
 		    {
 		      realname = local_strdup (cached);
@@ -2086,7 +1959,7 @@ _dl_map_object (struct link_map *loader, const char *name, int preloaded,
 	      || __builtin_expect (!(l->l_flags_1 & DF_1_NODEFLIB), 1))
 	  && rtld_search_dirs.dirs != (void *) -1)
 	fd = open_path (name, namelen, preloaded, &rtld_search_dirs,
-			&realname, &fb, l, LA_SER_DEFAULT);
+			&realname, &fb);
 
       /* Add another newline when we are tracing the library loading.  */
       if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_LIBS, 0))
@@ -2102,16 +1975,12 @@ _dl_map_object (struct link_map *loader, const char *name, int preloaded,
 	fd = -1;
       else
 	{
-	  fd = open_verify (realname, &fb,
-			    loader ?: GL(dl_ns)[nsid]._ns_loaded, 0);
+	  fd = open_verify (realname, &fb);
 	  if (__builtin_expect (fd, 0) == -1)
 	    free (realname);
 	}
     }
 
-#ifdef SHARED
- no_file:
-#endif
   /* In case the LOADER information has only been provided to get to
      the appropriate RUNPATH/RPATH information we do not need it
      anymore.  */
diff --git a/elf/dl-object.c b/elf/dl-object.c
index 86f7a8e4d9..b46ebdc33f 100644
--- a/elf/dl-object.c
+++ b/elf/dl-object.c
@@ -39,24 +39,14 @@ _dl_new_object (char *realname, const char *libname, int type,
   size_t libname_len = strlen (libname) + 1;
   struct link_map *new;
   struct libname_list *newname;
-#ifdef SHARED
-  /* We create the map for the executable before we know whether we have
-     auditing libraries and if yes, how many.  Assume the worst.  */
-  unsigned int naudit = GLRO(dl_naudit) ?: ((mode & __RTLD_OPENEXEC)
-					    ? DL_NNS : 0);
-  size_t audit_space = naudit * sizeof (new->l_audit[0]);
-#else
-# define audit_space 0
-#endif
 
-  new = (struct link_map *) calloc (sizeof (*new) + audit_space
-				    + sizeof (*newname) + libname_len, 1);
+  new = (struct link_map *) calloc (sizeof (*new) + sizeof (*newname)
+				    + libname_len, 1);
   if (new == NULL)
     return NULL;
 
   new->l_real = new;
-  new->l_libname = newname = (struct libname_list *) ((char *) (new + 1)
-						      + audit_space);
+  new->l_libname = newname = (struct libname_list *) (new + 1);
   newname->name = (char *) memcpy (newname + 1, libname, libname_len);
   /* newname->next = NULL;	We use calloc therefore not necessary.  */
   newname->dont_free = 1;
@@ -69,14 +59,6 @@ _dl_new_object (char *realname, const char *libname, int type,
 #endif
   new->l_ns = nsid;
 
-#ifdef SHARED
-  for (unsigned int cnt = 0; cnt < naudit; ++cnt)
-    {
-      new->l_audit[cnt].cookie = (uintptr_t) new;
-      /* new->l_audit[cnt].bindflags = 0; */
-    }
-#endif
-
   /* new->l_global = 0;	We use calloc therefore not necessary.  */
 
   /* Use the 'l_scope_mem' array by default for the the 'l_scope'
diff --git a/elf/dl-open.c b/elf/dl-open.c
index 4de20720ed..7e890ad7f7 100644
--- a/elf/dl-open.c
+++ b/elf/dl-open.c
@@ -1,5 +1,5 @@
 /* Load a shared object at runtime, relocate it, and run its initializer.
-   Copyright (C) 1996-2004, 2005 Free Software Foundation, Inc.
+   Copyright (C) 1996-2001, 2002, 2003, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -49,6 +49,11 @@ weak_extern (BP_SYM (_dl_sysdep_start))
 
 extern int __libc_multiple_libcs;	/* Defined in init-first.c.  */
 
+extern int __libc_argc attribute_hidden;
+extern char **__libc_argv attribute_hidden;
+
+extern char **__environ;
+
 /* Undefine the following for debugging.  */
 /* #define SCOPE_DEBUG 1 */
 #ifdef SCOPE_DEBUG
@@ -69,10 +74,6 @@ struct dl_open_args
   struct link_map *map;
   /* Namespace ID.  */
   Lmid_t nsid;
-  /* Original parameters to the program and the current environment.  */
-  int argc;
-  char **argv;
-  char **env;
 };
 
 
@@ -114,8 +115,8 @@ add_to_global (struct link_map *new)
 	{
 	  GL(dl_ns)[new->l_ns]._ns_global_scope_alloc = 0;
 	nomem:
-	  _dl_signal_error (ENOMEM, new->l_libname->name, NULL,
-			    N_("cannot extend global scope"));
+	  GLRO(dl_signal_error) (ENOMEM, new->l_libname->name, NULL,
+				 N_("cannot extend global scope"));
 	  return 1;
 	}
 
@@ -170,14 +171,13 @@ dl_open_worker (void *a)
   int lazy;
   unsigned int i;
 #ifdef USE_TLS
-  bool any_tls = false;
+  bool any_tls;
 #endif
   struct link_map *call_map = NULL;
 
   /* Check whether _dl_open() has been called from a valid DSO.  */
-  if (__check_caller (args->caller_dl_open,
-		      allow_libc|allow_libdl|allow_ldso) != 0)
-    _dl_signal_error (0, "dlopen", NULL, N_("invalid caller"));
+  if (__check_caller (args->caller_dl_open, allow_libc|allow_libdl) != 0)
+    GLRO(dl_signal_error) (0, "dlopen", NULL, N_("invalid caller"));
 
   /* Determine the caller's map if necessary.  This is needed in case
      we have a DST, when we don't know the namespace ID we have to put
@@ -218,8 +218,6 @@ dl_open_worker (void *a)
 	}
     }
 
-  assert (_dl_debug_initialize (0, args->nsid)->r_state == RT_CONSISTENT);
-
   /* Maybe we have to expand a DST.  */
   if (__builtin_expect (dst != NULL, 0))
     {
@@ -228,10 +226,10 @@ dl_open_worker (void *a)
       char *new_file;
 
       /* DSTs must not appear in SUID/SGID programs.  */
-      if (INTUSE(__libc_enable_secure))
+      if (__libc_enable_secure)
 	/* This is an error.  */
-	_dl_signal_error (0, "dlopen", NULL,
-			  N_("DST not allowed in SUID/SGID programs"));
+	GLRO(dl_signal_error) (0, "dlopen", NULL,
+			       N_("DST not allowed in SUID/SGID programs"));
 
 
       /* Determine how much space we need.  We have to allocate the
@@ -246,8 +244,8 @@ dl_open_worker (void *a)
 
       /* If the substitution failed don't try to load.  */
       if (*new_file == '\0')
-	_dl_signal_error (0, "dlopen", NULL,
-			  N_("empty dynamic string token substitution"));
+	GLRO(dl_signal_error) (0, "dlopen", NULL,
+			       N_("empty dynamic string token substitution"));
 
       /* Now we have a new file name.  */
       file = new_file;
@@ -258,8 +256,8 @@ dl_open_worker (void *a)
     }
 
   /* Load the named object.  */
-  args->map = new = _dl_map_object (call_map, file, 0, lt_loaded, 0,
-				    mode | __RTLD_CALLMAP, args->nsid);
+  args->map = new = GLRO(dl_map_object) (call_map, file, 0, lt_loaded, 0,
+					 mode | __RTLD_CALLMAP, args->nsid);
 
   /* If the pointer returned is NULL this means the RTLD_NOLOAD flag is
      set and the object is not already loaded.  */
@@ -281,8 +279,8 @@ dl_open_worker (void *a)
     {
       /* Let the user know about the opencount.  */
       if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FILES, 0))
-	_dl_debug_printf ("opening file=%s [%lu]; opencount=%u\n\n",
-			  new->l_name, new->l_ns, new->l_opencount);
+	GLRO(dl_debug_printf) ("opening file=%s [%lu]; opencount=%u\n\n",
+			       new->l_name, new->l_ns, new->l_opencount);
 
       /* If the user requested the object to be in the global namespace
 	 but it is not so far, add it now.  */
@@ -298,50 +296,23 @@ dl_open_worker (void *a)
 	/* Increment just the reference counter of the object.  */
 	++new->l_opencount;
 
-      assert (_dl_debug_initialize (0, args->nsid)->r_state == RT_CONSISTENT);
-
       return;
     }
 
   /* Load that object's dependencies.  */
-  _dl_map_object_deps (new, NULL, 0, 0,
-		       mode & (__RTLD_DLOPEN | RTLD_DEEPBIND | __RTLD_AUDIT));
+  GLRO(dl_map_object_deps) (new, NULL, 0, 0,
+			    mode & (__RTLD_DLOPEN | RTLD_DEEPBIND));
 
   /* So far, so good.  Now check the versions.  */
   for (i = 0; i < new->l_searchlist.r_nlist; ++i)
     if (new->l_searchlist.r_list[i]->l_real->l_versions == NULL)
-      (void) _dl_check_map_versions (new->l_searchlist.r_list[i]->l_real,
-				     0, 0);
+      (void) GLRO(dl_check_map_versions) (new->l_searchlist.r_list[i]->l_real,
+					  0, 0);
 
 #ifdef SCOPE_DEBUG
   show_scope (new);
 #endif
 
-#ifdef SHARED
-  /* Auditing checkpoint: we have added all objects.  */
-  if (__builtin_expect (GLRO(dl_naudit) > 0, 0))
-    {
-      struct link_map *head = GL(dl_ns)[new->l_ns]._ns_loaded;
-      /* Do not call the functions for any auditing object.  */
-      if (head->l_auditing == 0)
-	{
-	  struct audit_ifaces *afct = GLRO(dl_audit);
-	  for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt)
-	    {
-	      if (afct->activity != NULL)
-		afct->activity (&head->l_audit[cnt].cookie, LA_ACT_CONSISTENT);
-
-	      afct = afct->next;
-	    }
-	}
-    }
-#endif
-
-  /* Notify the debugger all new objects are now ready to go.  */
-  struct r_debug *r = _dl_debug_initialize (0, args->nsid);
-  r->r_state = RT_CONSISTENT;
-  _dl_debug_state ();
-
   /* Only do lazy relocation if `LD_BIND_NOW' is not set.  */
   lazy = (mode & RTLD_BINDING_MASK) == RTLD_LAZY && GLRO(dl_lazy);
 
@@ -365,12 +336,12 @@ dl_open_worker (void *a)
 		 start the profiling.  */
 	      struct link_map *old_profile_map = GL(dl_profile_map);
 
-	      _dl_relocate_object (l, l->l_scope, 1, 1);
+	      GLRO(dl_relocate_object) (l, l->l_scope, 1, 1);
 
 	      if (old_profile_map == NULL && GL(dl_profile_map) != NULL)
 		{
 		  /* We must prepare the profiling.  */
-		  _dl_start_profile ();
+		  GLRO(dl_start_profile) ();
 
 		  /* Prevent unloading the object.  */
 		  GL(dl_profile_map)->l_flags_1 |= DF_1_NODELETE;
@@ -378,7 +349,7 @@ dl_open_worker (void *a)
 	    }
 	  else
 #endif
-	    _dl_relocate_object (l, l->l_scope, lazy, 0);
+	    GLRO(dl_relocate_object) (l, l->l_scope, lazy, 0);
 	}
 
       if (l == new)
@@ -386,6 +357,22 @@ dl_open_worker (void *a)
       l = l->l_prev;
     }
 
+#ifdef USE_TLS
+  /* Do static TLS initialization now if it has been delayed because
+     the TLS template might not be fully relocated at _dl_allocate_static_tls
+     time.  */
+  for (l = new; l; l = l->l_next)
+    if (l->l_need_tls_init)
+      {
+	l->l_need_tls_init = 0;
+	GL(dl_init_static_tls) (l);
+      }
+
+  /* We normally don't bump the TLS generation counter.  There must be
+     actually a need to do this.  */
+  any_tls = false;
+#endif
+
   /* Increment the open count for all dependencies.  If the file is
      not loaded as a dependency here add the search list of the newly
      loaded object to the scope.  */
@@ -425,8 +412,8 @@ dl_open_worker (void *a)
 		newp = (struct r_scope_elem **)
 		  malloc (new_size * sizeof (struct r_scope_elem *));
 		if (newp == NULL)
-		  _dl_signal_error (ENOMEM, "dlopen", NULL,
-				    N_("cannot create scope list"));
+		  GLRO(dl_signal_error) (ENOMEM, "dlopen", NULL,
+					 N_("cannot create scope list"));
 		imap->l_scope = memcpy (newp, imap->l_scope,
 					cnt * sizeof (imap->l_scope[0]));
 	      }
@@ -436,8 +423,8 @@ dl_open_worker (void *a)
 		  realloc (imap->l_scope,
 			   new_size * sizeof (struct r_scope_elem *));
 		if (newp == NULL)
-		  _dl_signal_error (ENOMEM, "dlopen", NULL,
-				    N_("cannot create scope list"));
+		  GLRO(dl_signal_error) (ENOMEM, "dlopen", NULL,
+					 N_("cannot create scope list"));
 		imap->l_scope = newp;
 	      }
 
@@ -454,35 +441,76 @@ dl_open_worker (void *a)
 				  > 0, 0))
       {
 	/* Now that we know the object is loaded successfully add
-	   modules containing TLS data to the slot info table.  We
+	   modules containing TLS data to the dtv info table.  We
 	   might have to increase its size.  */
-	_dl_add_to_slotinfo (new->l_searchlist.r_list[i]);
+	struct dtv_slotinfo_list *listp;
+	struct dtv_slotinfo_list *prevp;
+	size_t idx = new->l_searchlist.r_list[i]->l_tls_modid;
+
+	assert (new->l_searchlist.r_list[i]->l_type == lt_loaded);
 
-	if (new->l_searchlist.r_list[i]->l_need_tls_init)
+	/* Find the place in the dtv slotinfo list.  */
+	listp = GL(dl_tls_dtv_slotinfo_list);
+	prevp = NULL;		/* Needed to shut up gcc.  */
+	do
 	  {
-	    new->l_searchlist.r_list[i]->l_need_tls_init = 0;
-# ifdef SHARED
-	    /* Update the slot information data for at least the
-	       generation of the DSO we are allocating data for.  */
-	    _dl_update_slotinfo (new->l_searchlist.r_list[i]->l_tls_modid);
-# endif
-
-	    GL(dl_init_static_tls) (new->l_searchlist.r_list[i]);
-	    assert (new->l_searchlist.r_list[i]->l_need_tls_init == 0);
+	    /* Does it fit in the array of this list element?  */
+	    if (idx < listp->len)
+	      break;
+	    idx -= listp->len;
+	    prevp = listp;
+	    listp = listp->next;
 	  }
+	while (listp != NULL);
+
+	if (listp == NULL)
+	  {
+	    /* When we come here it means we have to add a new element
+	       to the slotinfo list.  And the new module must be in
+	       the first slot.  */
+	    assert (idx == 0);
+
+	    listp = prevp->next = (struct dtv_slotinfo_list *)
+	      malloc (sizeof (struct dtv_slotinfo_list)
+		      + TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo));
+	    if (listp == NULL)
+	      {
+		/* We ran out of memory.  We will simply fail this
+		   call but don't undo anything we did so far.  The
+		   application will crash or be terminated anyway very
+		   soon.  */
+
+		/* We have to do this since some entries in the dtv
+		   slotinfo array might already point to this
+		   generation.  */
+		++GL(dl_tls_generation);
+
+		GLRO(dl_signal_error) (ENOMEM, "dlopen", NULL, N_("\
+cannot create TLS data structures"));
+	      }
+
+	    listp->len = TLS_SLOTINFO_SURPLUS;
+	    listp->next = NULL;
+	    memset (listp->slotinfo, '\0',
+		    TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo));
+	  }
+
+	/* Add the information into the slotinfo data structure.  */
+	listp->slotinfo[idx].map = new->l_searchlist.r_list[i];
+	listp->slotinfo[idx].gen = GL(dl_tls_generation) + 1;
 
 	/* We have to bump the generation counter.  */
 	any_tls = true;
       }
 
   /* Bump the generation number if necessary.  */
-  if (any_tls && __builtin_expect (++GL(dl_tls_generation) == 0, 0))
-    _dl_fatal_printf (N_("\
-TLS generation counter wrapped!  Please report this."));
+  if (any_tls)
+    if (__builtin_expect (++GL(dl_tls_generation) == 0, 0))
+      __libc_fatal (_("TLS generation counter wrapped!  Please report this."));
 #endif
 
   /* Run the initializer functions of new objects.  */
-  _dl_init (new, args->argc, args->argv, args->env);
+  GLRO(dl_init) (new, __libc_argc, __libc_argv, __environ);
 
   /* Now we can make the new map available in the global scope.  */
   if (mode & RTLD_GLOBAL)
@@ -504,14 +532,14 @@ TLS generation counter wrapped!  Please report this."));
 
   /* Let the user know about the opencount.  */
   if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_FILES, 0))
-    _dl_debug_printf ("opening file=%s [%lu]; opencount=%u\n\n",
-		      new->l_name, new->l_ns, new->l_opencount);
+    GLRO(dl_debug_printf) ("opening file=%s [%lu]; opencount=%u\n\n",
+			   new->l_name, new->l_ns, new->l_opencount);
 }
 
 
 void *
-_dl_open (const char *file, int mode, const void *caller_dlopen, Lmid_t nsid,
-	  int argc, char *argv[], char *env[])
+internal_function
+_dl_open (const char *file, int mode, const void *caller_dlopen, Lmid_t nsid)
 {
   struct dl_open_args args;
   const char *objname;
@@ -520,7 +548,8 @@ _dl_open (const char *file, int mode, const void *caller_dlopen, Lmid_t nsid,
 
   if ((mode & RTLD_BINDING_MASK) == 0)
     /* One of the flags must be set.  */
-    _dl_signal_error (EINVAL, file, NULL, N_("invalid mode for dlopen()"));
+    GLRO(dl_signal_error) (EINVAL, file, NULL,
+			   N_("invalid mode for dlopen()"));
 
   /* Make sure we are alone.  */
   __rtld_lock_lock_recursive (GL(dl_load_lock));
@@ -537,20 +566,16 @@ _dl_open (const char *file, int mode, const void *caller_dlopen, Lmid_t nsid,
 	  /* No more namespace available.  */
 	  __rtld_lock_unlock_recursive (GL(dl_load_lock));
 
-	  _dl_signal_error (EINVAL, file, NULL, N_("\
+	  GLRO(dl_signal_error) (EINVAL, file, NULL, N_("\
 no more namespaces available for dlmopen()"));
 	}
-
-      _dl_debug_initialize (0, nsid)->r_state = RT_CONSISTENT;
     }
   /* Never allow loading a DSO in a namespace which is empty.  Such
-     direct placements is only causing problems.  Also don't allow
-     loading into a namespace used for auditing.  */
+     direct placements is only causing problems.  */
   else if (nsid != LM_ID_BASE && nsid != __LM_ID_CALLER
-	   && (GL(dl_ns)[nsid]._ns_nloaded == 0
-	       || GL(dl_ns)[nsid]._ns_loaded->l_auditing))
-    _dl_signal_error (EINVAL, file, NULL,
-		      N_("invalid target namespace in dlmopen()"));
+	   && GL(dl_ns)[nsid]._ns_nloaded == 0)
+    GLRO(dl_signal_error) (EINVAL, file, NULL,
+			   N_("invalid target namespace in dlmopen()"));
 
   args.file = file;
   args.mode = mode;
@@ -558,14 +583,11 @@ no more namespaces available for dlmopen()"));
   args.caller_dl_open = RETURN_ADDRESS (0);
   args.map = NULL;
   args.nsid = nsid;
-  args.argc = argc;
-  args.argv = argv;
-  args.env = env;
-  errcode = _dl_catch_error (&objname, &errstring, dl_open_worker, &args);
+  errcode = GLRO(dl_catch_error) (&objname, &errstring, dl_open_worker, &args);
 
 #ifndef MAP_COPY
   /* We must munmap() the cache file.  */
-  _dl_unload_cache ();
+  GLRO(dl_unload_cache) ();
 #endif
 
   /* Release the lock.  */
@@ -581,22 +603,21 @@ no more namespaces available for dlmopen()"));
 	 state if relocation failed, for example.  */
       if (args.map)
 	{
+	  unsigned int i;
+
 	  /* Increment open counters for all objects since this
 	     sometimes has not happened yet.  */
 	  if (args.map->l_searchlist.r_list[0]->l_opencount == 0)
-	    for (unsigned int i = 0; i < args.map->l_searchlist.r_nlist; ++i)
+	    for (i = 0; i < args.map->l_searchlist.r_nlist; ++i)
 	      ++args.map->l_searchlist.r_list[i]->l_opencount;
 
 #ifdef USE_TLS
-	  /* Maybe some of the modules which were loaded use TLS.
+	  /* Maybe some of the modules which were loaded uses TLS.
 	     Since it will be removed in the following _dl_close call
-	     we have to mark the dtv array as having gaps to fill the
-	     holes.  This is a pessimistic assumption which won't hurt
-	     if not true.  There is no need to do this when we are
-	     loading the auditing DSOs since TLS has not yet been set
-	     up.  */
-	  if ((mode & __RTLD_AUDIT) == 0)
-	    GL(dl_tls_dtv_gaps) = true;
+	     we have to mark the dtv array as having gaps to fill
+	     the holes.  This is a pessimistic assumption which won't
+	     hurt if not true.  */
+	  GL(dl_tls_dtv_gaps) = true;
 #endif
 
 	  _dl_close (args.map);
@@ -618,23 +639,20 @@ no more namespaces available for dlmopen()"));
 	  memcpy (local_errstring, errstring, len_errstring);
 	}
 
-      if (errstring != INTUSE(_dl_out_of_memory))
+      if (errstring != _dl_out_of_memory)
 	free ((char *) errstring);
 
-      assert (_dl_debug_initialize (0, args.nsid)->r_state == RT_CONSISTENT);
-
       /* Reraise the error.  */
-      _dl_signal_error (errcode, objname, NULL, local_errstring);
+      GLRO(dl_signal_error) (errcode, objname, NULL, local_errstring);
     }
 
-  assert (_dl_debug_initialize (0, args.nsid)->r_state == RT_CONSISTENT);
-
 #ifndef SHARED
   DL_STATIC_INIT (args.map);
 #endif
 
   return args.map;
 }
+libc_hidden_def (_dl_open)
 
 
 #ifdef SCOPE_DEBUG
diff --git a/elf/dl-reloc.c b/elf/dl-reloc.c
index b680683198..4004316c5a 100644
--- a/elf/dl-reloc.c
+++ b/elf/dl-reloc.c
@@ -48,6 +48,8 @@ void
 internal_function __attribute_noinline__
 _dl_allocate_static_tls (struct link_map *map)
 {
+  size_t offset;
+
   /* If the alignment requirements are too high fail.  */
   if (map->l_tls_align > GL(dl_tls_static_align))
     {
@@ -69,15 +71,15 @@ cannot allocate memory in static TLS block"));
 
   n = (freebytes - blsize) / map->l_tls_align;
 
-  size_t offset = GL(dl_tls_static_used) + (freebytes - n * map->l_tls_align
-					    - map->l_tls_firstbyte_offset);
+  offset = GL(dl_tls_static_used) + (freebytes - n * map->l_tls_align
+				     - map->l_tls_firstbyte_offset);
 
   map->l_tls_offset = GL(dl_tls_static_used) = offset;
 # elif TLS_DTV_AT_TP
   size_t used;
   size_t check;
 
-  size_t offset = roundup (GL(dl_tls_static_used), map->l_tls_align);
+  offset = roundup (GL(dl_tls_static_used), map->l_tls_align);
   used = offset + map->l_tls_blocksize;
   check = used;
   /* dl_tls_static_used includes the TCB at the beginning.  */
@@ -91,20 +93,8 @@ cannot allocate memory in static TLS block"));
 #  error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
 # endif
 
-  /* If the object is not yet relocated we cannot initialize the
-     static TLS region.  Delay it.  */
-  if (map->l_real->l_relocated)
-    {
-#ifdef SHARED
-      if (__builtin_expect (THREAD_DTV()[0].counter != GL(dl_tls_generation),
-			    0))
-	/* Update the slot information data for at least the generation of
-	   the DSO we are allocating data for.  */
-	(void) _dl_update_slotinfo (map->l_tls_modid);
-#endif
-
-      GL(dl_init_static_tls) (map);
-    }
+  if (map->l_relocated)
+    GL(dl_init_static_tls) (map);
   else
     map->l_need_tls_init = 1;
 }
@@ -124,8 +114,7 @@ _dl_nothread_init_static_tls (struct link_map *map)
 # endif
 
   /* Fill in the DTV slot so that a later LD/GD access will find it.  */
-  THREAD_DTV ()[map->l_tls_modid].pointer.val = dest;
-  THREAD_DTV ()[map->l_tls_modid].pointer.is_static = true;
+  THREAD_DTV ()[map->l_tls_modid].pointer = dest;
 
   /* Initialize the memory.  */
   memset (__mempcpy (dest, map->l_tls_initimage, map->l_tls_initimage_size),
@@ -148,17 +137,11 @@ _dl_relocate_object (struct link_map *l, struct r_scope_elem *scope[],
   /* Initialize it to make the compiler happy.  */
   const char *errstring = NULL;
 
-#ifdef SHARED
-  /* If we are auditing, install the same handlers we need for profiling.  */
-  consider_profiling |= GLRO(dl_audit) != NULL;
-#endif
-
   if (l->l_relocated)
     return;
 
   /* If DT_BIND_NOW is set relocate all references in this object.  We
      do not do this if we are profiling, of course.  */
-  // XXX Correct for auditing?
   if (!consider_profiling
       && __builtin_expect (l->l_info[DT_BIND_NOW] != NULL, 0))
     lazy = 0;
@@ -242,6 +225,29 @@ _dl_relocate_object (struct link_map *l, struct r_scope_elem *scope[],
 	     l->l_lookup_cache.ret = (*ref);				      \
 	     l->l_lookup_cache.value = _lr; }))				      \
      : l)
+#define RESOLVE(ref, version, r_type) \
+    (ELFW(ST_BIND) ((*ref)->st_info) != STB_LOCAL			      \
+     ? ((__builtin_expect ((*ref) == l->l_lookup_cache.sym, 0)		      \
+	 && elf_machine_type_class (r_type) == l->l_lookup_cache.type_class)  \
+		? (bump_num_cache_relocations (),			      \
+	   (*ref) = l->l_lookup_cache.ret,				      \
+	   l->l_lookup_cache.value)					      \
+	: ({ lookup_t _lr;						      \
+	     int _tc = elf_machine_type_class (r_type);			      \
+	     l->l_lookup_cache.type_class = _tc;			      \
+	     l->l_lookup_cache.sym = (*ref);				      \
+	     const struct r_found_version *v = NULL;			      \
+	     int flags = DL_LOOKUP_ADD_DEPENDENCY;			      \
+	     if ((version) != NULL && (version)->hash != 0)		      \
+	       {							      \
+		 v = (version);						      \
+		 flags = 0;						      \
+	       }							      \
+	     _lr = _dl_lookup_symbol_x (strtab + (*ref)->st_name, l, (ref),   \
+					scope, v, _tc, flags, NULL);	      \
+	     l->l_lookup_cache.ret = (*ref);				      \
+	     l->l_lookup_cache.value = _lr; }))				      \
+     : l->l_addr)
 
     /* This macro is used as a callback from elf_machine_rel{a,} when a
        static TLS reloc is about to be performed.  Since (in dl-load.c) we
@@ -270,19 +276,20 @@ _dl_relocate_object (struct link_map *l, struct r_scope_elem *scope[],
 	   will be NULL.  */
 	if (l->l_info[DT_PLTRELSZ] == NULL)
 	  {
-	    errstring = N_("%s: no PLTREL found in object %s\n");
+	    errstring = N_("%s: profiler found no PLTREL in object %s\n");
 	  fatal:
 	    _dl_fatal_printf (errstring,
 			      rtld_progname ?: "<program name unknown>",
 			      l->l_name);
 	  }
 
-	l->l_reloc_result = calloc (sizeof (l->l_reloc_result[0]),
-				    l->l_info[DT_PLTRELSZ]->d_un.d_val);
+	l->l_reloc_result =
+	  (ElfW(Addr) *) calloc (sizeof (ElfW(Addr)),
+				 l->l_info[DT_PLTRELSZ]->d_un.d_val);
 	if (l->l_reloc_result == NULL)
 	  {
 	    errstring = N_("\
-%s: out of memory to store relocation results for %s\n");
+%s: profiler out of memory shadowing PLTREL of %s\n");
 	    goto fatal;
 	  }
       }
diff --git a/elf/dl-runtime.c b/elf/dl-runtime.c
index f92cbe26bd..a0aecda2e8 100644
--- a/elf/dl-runtime.c
+++ b/elf/dl-runtime.c
@@ -1,5 +1,5 @@
 /* On-demand PLT fixup for shared objects.
-   Copyright (C) 1995-2002,2003,2004,2005 Free Software Foundation, Inc.
+   Copyright (C) 1995-2002, 2003, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -22,7 +22,6 @@
 #include <alloca.h>
 #include <stdlib.h>
 #include <unistd.h>
-#include <sys/param.h>
 #include <ldsodefs.h>
 #include "dynamic-link.h"
 
@@ -52,15 +51,15 @@
    function.  */
 
 #ifndef ELF_MACHINE_NO_PLT
-DL_FIXUP_VALUE_TYPE
-__attribute ((noinline)) ARCH_FIXUP_ATTRIBUTE
-_dl_fixup (
+static ElfW(Addr)
+__attribute ((used, noinline)) ARCH_FIXUP_ATTRIBUTE
+fixup (
 # ifdef ELF_MACHINE_RUNTIME_FIXUP_ARGS
-	   ELF_MACHINE_RUNTIME_FIXUP_ARGS,
+        ELF_MACHINE_RUNTIME_FIXUP_ARGS,
 # endif
-	   /* GKM FIXME: Fix trampoline to pass bounds so we can do
-	      without the `__unbounded' qualifier.  */
-	   struct link_map *__unbounded l, ElfW(Word) reloc_offset)
+	/* GKM FIXME: Fix trampoline to pass bounds so we can do
+	   without the `__unbounded' qualifier.  */
+       struct link_map *__unbounded l, ElfW(Word) reloc_offset)
 {
   const ElfW(Sym) *const symtab
     = (const void *) D_PTR (l, l_info[DT_SYMTAB]);
@@ -71,7 +70,7 @@ _dl_fixup (
   const ElfW(Sym) *sym = &symtab[ELFW(R_SYM) (reloc->r_info)];
   void *const rel_addr = (void *)(l->l_addr + reloc->r_offset);
   lookup_t result;
-  DL_FIXUP_VALUE_TYPE value;
+  ElfW(Addr) value;
 
   /* Sanity check that we're really looking at a PLT relocation.  */
   assert (ELFW(R_TYPE)(reloc->r_info) == ELF_MACHINE_JMP_SLOT);
@@ -81,6 +80,8 @@ _dl_fixup (
   if (__builtin_expect (ELFW(ST_VISIBILITY) (sym->st_other), 0) == 0)
     {
       const struct r_found_version *version = NULL;
+      // XXX Why exactly do we have the differentiation of the flags here?
+      int flags = DL_LOOKUP_ADD_DEPENDENCY;
 
       if (l->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
 	{
@@ -90,6 +91,8 @@ _dl_fixup (
 	  version = &l->l_versions[ndx];
 	  if (version->hash == 0)
 	    version = NULL;
+	  else
+	    flags = 0;
 	}
 
       result = _dl_lookup_symbol_x (strtab + sym->st_name, l, &sym,
@@ -99,16 +102,16 @@ _dl_fixup (
       /* Currently result contains the base load address (or link map)
 	 of the object that defines sym.  Now add in the symbol
 	 offset.  */
-      value = DL_FIXUP_MAKE_VALUE (result,
-				   sym ? LOOKUP_VALUE_ADDRESS (result)
-					 + sym->st_value : 0);
+      value = (sym ? LOOKUP_VALUE_ADDRESS (result) + sym->st_value : 0);
     }
   else
     {
       /* We already found the symbol.  The module (and therefore its load
 	 address) is also known.  */
-      value = DL_FIXUP_MAKE_VALUE (l, l->l_addr + sym->st_value);
+      value = l->l_addr + sym->st_value;
+#ifdef DL_LOOKUP_RETURNS_MAP
       result = l;
+#endif
     }
 
   /* And now perhaps the relocation addend.  */
@@ -124,45 +127,45 @@ _dl_fixup (
 
 #if !defined PROF && !defined ELF_MACHINE_NO_PLT && !__BOUNDED_POINTERS__
 
-DL_FIXUP_VALUE_TYPE
-__attribute ((noinline)) ARCH_FIXUP_ATTRIBUTE
-_dl_profile_fixup (
+static ElfW(Addr)
+__attribute ((used, noinline)) ARCH_FIXUP_ATTRIBUTE
+profile_fixup (
 #ifdef ELF_MACHINE_RUNTIME_FIXUP_ARGS
-		   ELF_MACHINE_RUNTIME_FIXUP_ARGS,
+       ELF_MACHINE_RUNTIME_FIXUP_ARGS,
 #endif
-		   struct link_map *l, ElfW(Word) reloc_offset,
-		   ElfW(Addr) retaddr, void *regs, long int *framesizep)
+       struct link_map *l, ElfW(Word) reloc_offset, ElfW(Addr) retaddr)
 {
   void (*mcount_fct) (ElfW(Addr), ElfW(Addr)) = INTUSE(_dl_mcount);
+  ElfW(Addr) *resultp;
+  lookup_t result;
+  ElfW(Addr) value;
 
   /* This is the address in the array where we store the result of previous
      relocations.  */
-  struct reloc_result *reloc_result
-    = &l->l_reloc_result[reloc_offset / sizeof (PLTREL)];
-  DL_FIXUP_VALUE_TYPE *resultp = &reloc_result->addr;
+  resultp = &l->l_reloc_result[reloc_offset / sizeof (PLTREL)];
 
-  DL_FIXUP_VALUE_TYPE value = *resultp;
-  if (DL_FIXUP_VALUE_CODE_ADDR (value) == 0)
+  value = *resultp;
+  if (value == 0)
     {
       /* This is the first time we have to relocate this object.  */
       const ElfW(Sym) *const symtab
 	= (const void *) D_PTR (l, l_info[DT_SYMTAB]);
-      const char *strtab = (const char *) D_PTR (l, l_info[DT_STRTAB]);
+      const char *strtab = (const void *) D_PTR (l, l_info[DT_STRTAB]);
 
       const PLTREL *const reloc
 	= (const void *) (D_PTR (l, l_info[DT_JMPREL]) + reloc_offset);
-      const ElfW(Sym) *refsym = &symtab[ELFW(R_SYM) (reloc->r_info)];
-      const ElfW(Sym) *defsym = refsym;
-      lookup_t result;
+      const ElfW(Sym) *sym = &symtab[ELFW(R_SYM) (reloc->r_info)];
 
       /* Sanity check that we're really looking at a PLT relocation.  */
       assert (ELFW(R_TYPE)(reloc->r_info) == ELF_MACHINE_JMP_SLOT);
 
       /* Look up the target symbol.  If the symbol is marked STV_PROTECTED
 	 don't look in the global scope.  */
-      if (__builtin_expect (ELFW(ST_VISIBILITY) (refsym->st_other), 0) == 0)
+      if (__builtin_expect (ELFW(ST_VISIBILITY) (sym->st_other), 0) == 0)
 	{
 	  const struct r_found_version *version = NULL;
+	  // XXX Why exactly do we have the differentiation of the flags here?
+	  int flags = DL_LOOKUP_ADD_DEPENDENCY;
 
 	  if (l->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
 	    {
@@ -172,9 +175,11 @@ _dl_profile_fixup (
 	      version = &l->l_versions[ndx];
 	      if (version->hash == 0)
 		version = NULL;
+	      else
+		flags = 0;
 	    }
 
-	  result = _dl_lookup_symbol_x (strtab + refsym->st_name, l, &defsym,
+	  result = _dl_lookup_symbol_x (strtab + sym->st_name, l, &sym,
 					l->l_scope, version,
 					ELF_RTYPE_CLASS_PLT,
 					DL_LOOKUP_ADD_DEPENDENCY, NULL);
@@ -182,188 +187,26 @@ _dl_profile_fixup (
 	  /* Currently result contains the base load address (or link map)
 	     of the object that defines sym.  Now add in the symbol
 	     offset.  */
-	  value = DL_FIXUP_MAKE_VALUE (result,
-				       defsym != NULL
-				       ? LOOKUP_VALUE_ADDRESS (result)
-					 + defsym->st_value : 0);
+	  value = (sym ? LOOKUP_VALUE_ADDRESS (result) + sym->st_value : 0);
 	}
       else
 	{
 	  /* We already found the symbol.  The module (and therefore its load
 	     address) is also known.  */
-	  value = DL_FIXUP_MAKE_VALUE (l, l->l_addr + refsym->st_value);
+	  value = l->l_addr + sym->st_value;
+#ifdef DL_LOOKUP_RETURNS_MAP
 	  result = l;
+#endif
 	}
       /* And now perhaps the relocation addend.  */
       value = elf_machine_plt_value (l, reloc, value);
 
-#ifdef SHARED
-      /* Auditing checkpoint: we have a new binding.  Provide the
-	 auditing libraries the possibility to change the value and
-	 tell us whether further auditing is wanted.  */
-      if (defsym != NULL && GLRO(dl_naudit) > 0)
-	{
-	  reloc_result->bound = result;
-	  /* Compute index of the symbol entry in the symbol table of
-	     the DSO with the definition.  */
-	  reloc_result->boundndx = (defsym
-				    - (ElfW(Sym) *) D_PTR (result,
-							   l_info[DT_SYMTAB]));
-
-	  /* Determine whether any of the two participating DSOs is
-	     interested in auditing.  */
-	  if ((l->l_audit_any_plt | result->l_audit_any_plt) != 0)
-	    {
-	      unsigned int altvalue = 0;
-	      struct audit_ifaces *afct = GLRO(dl_audit);
-	      /* Synthesize a symbol record where the st_value field is
-		 the result.  */
-	      ElfW(Sym) sym = *defsym;
-	      sym.st_value = DL_FIXUP_VALUE_ADDR (value);
-
-	      /* Keep track whether there is any interest in tracing
-		 the call in the lower two bits.  */
-	      assert (DL_NNS * 2 <= sizeof (reloc_result->flags) * 8);
-	      assert ((LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT) == 3);
-	      reloc_result->enterexit = LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT;
-
-	      const char *strtab2 = (const void *) D_PTR (result,
-							  l_info[DT_STRTAB]);
-
-	      for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt)
-		{
-		  /* XXX Check whether both DSOs must request action or
-		     only one */
-		  if ((l->l_audit[cnt].bindflags & LA_FLG_BINDFROM) != 0
-		      && (result->l_audit[cnt].bindflags & LA_FLG_BINDTO) != 0)
-		    {
-		      unsigned int flags = altvalue;
-		      if (afct->symbind != NULL)
-			{
-			  uintptr_t new_value
-			    = afct->symbind (&sym, reloc_result->boundndx,
-					     &l->l_audit[cnt].cookie,
-					     &result->l_audit[cnt].cookie,
-					     &flags,
-					     strtab2 + defsym->st_name);
-			  if (new_value != (uintptr_t) sym.st_value)
-			    {
-			      altvalue = LA_SYMB_ALTVALUE;
-			      sym.st_value = new_value;
-			    }
-			}
-
-		      /* Remember the results for every audit library and
-			 store a summary in the first two bits.  */
-		      reloc_result->enterexit
-			&= flags & (LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT);
-		      reloc_result->enterexit
-			|= ((flags & (LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT))
-			    << ((cnt + 1) * 2));
-		    }
-		  else
-		    /* If the bind flags say this auditor is not interested,
-		       set the bits manually.  */
-		    reloc_result->enterexit
-		      |= ((LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT)
-			  << ((cnt + 1) * 2));
-
-		  afct = afct->next;
-		}
-
-	      reloc_result->flags = altvalue;
-	      value = DL_FIXUP_ADDR_VALUE (sym.st_value);
-	    }
-	  else
-	    /* Set all bits since this symbol binding is not interesting.  */
-	    reloc_result->enterexit = (1u << DL_NNS) - 1;
-	}
-#endif
-
       /* Store the result for later runs.  */
       if (__builtin_expect (! GLRO(dl_bind_not), 1))
 	*resultp = value;
     }
 
-  /* By default we do not call the pltexit function.  */
-  long int framesize = -1;
-
-#ifdef SHARED
-  /* Auditing checkpoint: report the PLT entering and allow the
-     auditors to change the value.  */
-  if (DL_FIXUP_VALUE_CODE_ADDR (value) != 0 && GLRO(dl_naudit) > 0
-      /* Don't do anything if no auditor wants to intercept this call.  */
-      && (reloc_result->enterexit & LA_SYMB_NOPLTENTER) == 0)
-    {
-      ElfW(Sym) *defsym = ((ElfW(Sym) *) D_PTR (reloc_result->bound,
-						l_info[DT_SYMTAB])
-			   + reloc_result->boundndx);
-
-      /* Set up the sym parameter.  */
-      ElfW(Sym) sym = *defsym;
-      sym.st_value = DL_FIXUP_VALUE_ADDR (value);
-
-      /* Get the symbol name.  */
-      const char *strtab = (const void *) D_PTR (reloc_result->bound,
-						 l_info[DT_STRTAB]);
-      const char *symname = strtab + sym.st_name;
-
-      /* Keep track of overwritten addresses.  */
-      unsigned int altvalue = reloc_result->flags;
-
-      struct audit_ifaces *afct = GLRO(dl_audit);
-      for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt)
-	{
- 	  if (afct->ARCH_LA_PLTENTER != NULL
-	      && (reloc_result->enterexit
-		  & (LA_SYMB_NOPLTENTER << (2 * (cnt + 1)))) == 0)
-	    {
-	      unsigned int flags = altvalue;
-	      long int new_framesize = -1;
-	      uintptr_t new_value
-		= afct->ARCH_LA_PLTENTER (&sym, reloc_result->boundndx,
-					  &l->l_audit[cnt].cookie,
-					  &reloc_result->bound->l_audit[cnt].cookie,
-					  regs, &flags, symname,
-					  &new_framesize);
-	      if (new_value != (uintptr_t) sym.st_value)
-		{
-		  altvalue = LA_SYMB_ALTVALUE;
-		  sym.st_value = new_value;
-		}
-
-	      /* Remember the results for every audit library and
-		 store a summary in the first two bits.  */
-	      reloc_result->enterexit
-		|= ((flags & (LA_SYMB_NOPLTENTER | LA_SYMB_NOPLTEXIT))
-		    << (2 * (cnt + 1)));
-
-	      if ((reloc_result->enterexit & (LA_SYMB_NOPLTEXIT
-					      << (2 * (cnt + 1))))
-		  == 0 && new_framesize != -1 && framesize != -2)
-		{
-		  /* If this is the first call providing information,
-		     use it.  */
-		  if (framesize == -1)
-		    framesize = new_framesize;
-		  /* If two pltenter calls provide conflicting information,
-		     use the larger value.  */
-		  else if (new_framesize != framesize)
-		    framesize = MAX (new_framesize, framesize);
-		}
-	    }
-
-	  afct = afct->next;
-	}
-
-      value = DL_FIXUP_ADDR_VALUE (sym.st_value);
-    }
-#endif
-
-  /* Store the frame size information.  */
-  *framesizep = framesize;
-
-  (*mcount_fct) (retaddr, DL_FIXUP_VALUE_CODE_ADDR (value));
+  (*mcount_fct) (retaddr, value);
 
   return value;
 }
@@ -371,45 +214,9 @@ _dl_profile_fixup (
 #endif /* PROF && ELF_MACHINE_NO_PLT */
 
 
-#include <stdio.h>
-void
-ARCH_FIXUP_ATTRIBUTE
-_dl_call_pltexit (struct link_map *l, ElfW(Word) reloc_offset,
-		  const void *inregs, void *outregs)
-{
-#ifdef SHARED
-  /* This is the address in the array where we store the result of previous
-     relocations.  */
-  // XXX Maybe the bound information must be stored on the stack since
-  // XXX with bind_not a new value could have been stored in the meantime.
-  struct reloc_result *reloc_result
-    = &l->l_reloc_result[reloc_offset / sizeof (PLTREL)];
-  ElfW(Sym) *defsym = ((ElfW(Sym) *) D_PTR (reloc_result->bound,
-					    l_info[DT_SYMTAB])
-		       + reloc_result->boundndx);
-
-  /* Set up the sym parameter.  */
-  ElfW(Sym) sym = *defsym;
-
-  /* Get the symbol name.  */
-  const char *strtab = (const void *) D_PTR (reloc_result->bound,
-					     l_info[DT_STRTAB]);
-  const char *symname = strtab + sym.st_name;
-
-  struct audit_ifaces *afct = GLRO(dl_audit);
-  for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt)
-    {
-      if (afct->ARCH_LA_PLTEXIT != NULL
-	  && (reloc_result->enterexit
-	      & (LA_SYMB_NOPLTEXIT >> (2 * cnt))) == 0)
-	{
-	  afct->ARCH_LA_PLTEXIT (&sym, reloc_result->boundndx,
-				 &l->l_audit[cnt].cookie,
-				 &reloc_result->bound->l_audit[cnt].cookie,
-				 inregs, outregs, symname);
-	}
+/* This macro is defined in dl-machine.h to define the entry point called
+   by the PLT.  The `fixup' function above does the real work, but a little
+   more twiddling is needed to get the stack right and jump to the address
+   finally resolved.  */
 
-      afct = afct->next;
-    }
-#endif
-}
+ELF_MACHINE_RUNTIME_TRAMPOLINE
diff --git a/elf/dl-sym.c b/elf/dl-sym.c
index ca83daf21d..ba00ef56f4 100644
--- a/elf/dl-sym.c
+++ b/elf/dl-sym.c
@@ -116,69 +116,14 @@ RTLD_NEXT used in code not dynamically loaded"));
 
   if (ref != NULL)
     {
-      void *value;
-
 #if defined USE_TLS && defined SHARED
       if (ELFW(ST_TYPE) (ref->st_info) == STT_TLS)
 	/* The found symbol is a thread-local storage variable.
 	   Return the address for to the current thread.  */
-	value = _dl_tls_symaddr (result, ref);
-      else
-#endif
-	value = DL_SYMBOL_ADDRESS (result, ref);
-
-#ifdef SHARED
-      /* Auditing checkpoint: we have a new binding.  Provide the
-	 auditing libraries the possibility to change the value and
-	 tell us whether further auditing is wanted.  */
-      if (__builtin_expect (GLRO(dl_naudit) > 0, 0))
-	{
-	  const char *strtab = (const char *) D_PTR (result,
-						     l_info[DT_STRTAB]);
-	  /* Compute index of the symbol entry in the symbol table of
-	     the DSO with the definition.  */
-	  unsigned int ndx = (ref - (ElfW(Sym) *) D_PTR (result,
-							 l_info[DT_SYMTAB]));
-
-	  if ((match->l_audit_any_plt | result->l_audit_any_plt) != 0)
-	    {
-	      unsigned int altvalue = 0;
-	      struct audit_ifaces *afct = GLRO(dl_audit);
-	      /* Synthesize a symbol record where the st_value field is
-		 the result.  */
-	      ElfW(Sym) sym = *ref;
-	      sym.st_value = (ElfW(Addr)) value;
-
-	      for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt)
-		{
-		  if (afct->symbind != NULL
-		      && ((match->l_audit[cnt].bindflags & LA_FLG_BINDFROM)
-			  != 0
-			  || ((result->l_audit[cnt].bindflags & LA_FLG_BINDTO)
-			      != 0)))
-		    {
-		      unsigned int flags = altvalue | LA_SYMB_DLSYM;
-		      uintptr_t new_value
-			= afct->symbind (&sym, ndx,
-					 &match->l_audit[cnt].cookie,
-					 &result->l_audit[cnt].cookie,
-					 &flags, strtab + ref->st_name);
-		      if (new_value != (uintptr_t) sym.st_value)
-			{
-			  altvalue = LA_SYMB_ALTVALUE;
-			  sym.st_value = new_value;
-			}
-		    }
-
-		  afct = afct->next;
-		}
-
-	      value = (void *) sym.st_value;
-	    }
-	}
+	return _dl_tls_symaddr (result, ref);
 #endif
 
-      return value;
+      return DL_SYMBOL_ADDRESS (result, ref);
     }
 
   return NULL;
diff --git a/elf/dynamic-link.h b/elf/dynamic-link.h
index 5118144174..f9559dc59c 100644
--- a/elf/dynamic-link.h
+++ b/elf/dynamic-link.h
@@ -20,7 +20,7 @@
 #include <elf.h>
 #include <assert.h>
 
-#ifdef RESOLVE_MAP
+#ifdef RESOLVE
 /* We pass reloc_addr as a pointer to void, as opposed to a pointer to
    ElfW(Addr), because not all architectures can assume that the
    relocated address is properly aligned, whereas the compiler is
@@ -64,7 +64,7 @@ elf_machine_lazy_rel (struct link_map *map,
 
 
 /* Read the dynamic section at DYN and fill in INFO with indices DT_*.  */
-#ifndef RESOLVE_MAP
+#ifndef RESOLVE
 static
 #else
 auto
@@ -199,7 +199,7 @@ elf_get_dynamic_info (struct link_map *l, ElfW(Dyn) *temp)
 #endif
 }
 
-#ifdef RESOLVE_MAP
+#ifdef RESOLVE
 
 # ifdef RTLD_BOOTSTRAP
 #  define ELF_DURING_STARTUP (1)
diff --git a/elf/ldconfig.c b/elf/ldconfig.c
index 70f3a8f402..f8504fb95e 100644
--- a/elf/ldconfig.c
+++ b/elf/ldconfig.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1999-2004, 2005 Free Software Foundation, Inc.
+/* Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Andreas Jaeger <aj@suse.de>, 1999.
 
@@ -279,7 +279,7 @@ print_version (FILE *stream, struct argp_state *state)
 Copyright (C) %s Free Software Foundation, Inc.\n\
 This is free software; see the source for copying conditions.  There is NO\n\
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
-"), "2005");
+"), "2004");
   fprintf (stream, gettext ("Written by %s.\n"),
 	   "Andreas Jaeger");
 }
diff --git a/elf/ldd.bash.in b/elf/ldd.bash.in
index 4587efd1cb..4d7c33c728 100644
--- a/elf/ldd.bash.in
+++ b/elf/ldd.bash.in
@@ -1,5 +1,5 @@
 #! @BASH@
-# Copyright (C) 1996-2004, 2005 Free Software Foundation, Inc.
+# Copyright (C) 1996-2001, 2002, 2003, 2004 Free Software Foundation, Inc.
 # This file is part of the GNU C Library.
 
 # The GNU C Library is free software; you can redistribute it and/or
@@ -39,7 +39,7 @@ while test $# -gt 0; do
     printf $"Copyright (C) %s Free Software Foundation, Inc.
 This is free software; see the source for copying conditions.  There is NO
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-" "2005"
+" "2004"
     printf $"Written by %s and %s.
 " "Roland McGrath" "Ulrich Drepper"
     exit 0
diff --git a/elf/link.h b/elf/link.h
index fdda019cbe..6d5ad9d98c 100644
--- a/elf/link.h
+++ b/elf/link.h
@@ -1,6 +1,6 @@
 /* Data structure for communication from the run-time dynamic linker for
    loaded ELF shared objects.
-   Copyright (C) 1995-2001, 2004, 2005 Free Software Foundation, Inc.
+   Copyright (C) 1995-1999, 2000, 2001, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -33,7 +33,6 @@
 #define _ElfW_1(e,w,t)	e##w##t
 
 #include <bits/elfclass.h>		/* Defines __ELF_NATIVE_CLASS.  */
-#include <bits/link.h>
 
 /* Rendezvous structure used by the run-time dynamic linker to communicate
    details of shared object loading to the debugger.  If the executable's
@@ -95,47 +94,6 @@ struct link_map
 
 #ifdef __USE_GNU
 
-/* Version numbers for la_version handshake interface.  */
-#define LAV_CURRENT	1
-
-/* Activity types signaled through la_activity.  */
-enum
-  {
-    LA_ACT_CONSISTENT,		/* Link map consistent again.  */
-    LA_ACT_ADD,			/* New object will be added.  */
-    LA_ACT_DELETE		/* Objects will be removed.  */
-  };
-
-/* Values representing origin of name for dynamic loading.  */
-enum
-  {
-    LA_SER_ORIG = 0x01,		/* Original name.  */
-    LA_SER_LIBPATH = 0x02,	/* Directory from LD_LIBRARY_PATH.  */
-    LA_SER_RUNPATH = 0x04,	/* Directory from RPATH/RUNPATH.  */
-    LA_SER_CONFIG = 0x08,	/* Found through ldconfig.  */
-    LA_SER_DEFAULT = 0x40,	/* Default directory.  */
-    LA_SER_SECURE = 0x80	/* Unused.  */
-  };
-
-/* Values for la_objopen return value.  */
-enum
-  {
-    LA_FLG_BINDTO = 0x01,	/* Audit symbols bound to this object.  */
-    LA_FLG_BINDFROM = 0x02	/* Audit symbols bound from this object.  */
-  };
-
-/* Values for la_symbind flags parameter.  */
-enum
-  {
-    LA_SYMB_NOPLTENTER = 0x01,	/* la_pltenter will not be called.  */
-    LA_SYMB_NOPLTEXIT = 0x02,	/* la_pltexit will not be called.  */
-    LA_SYMB_STRUCTCALL = 0x04,	/* Return value is a structure.  */
-    LA_SYMB_DLSYM = 0x08,	/* Binding due to dlsym call.  */
-    LA_SYMB_ALTVALUE = 0x10	/* Value has been changed by a previous
-				   la_symbind call.  */
-  };
-
-
 struct dl_phdr_info
   {
     ElfW(Addr) dlpi_addr;
@@ -156,28 +114,9 @@ struct dl_phdr_info
 
 __BEGIN_DECLS
 
-extern int dl_iterate_phdr (int (*__callback) (struct dl_phdr_info *,
-					       size_t, void *),
-			    void *__data);
-
-
-/* Prototypes for the ld.so auditing interfaces.  These are not
-   defined anywhere in ld.so but instead have to be provided by the
-   auditing DSO.  */
-extern unsigned int la_version (unsigned int __version);
-extern void la_activity (uintptr_t *__cookie, unsigned int __flag);
-extern char *la_objsearch (const char *__name, uintptr_t *__cookie,
-			   unsigned int __flag);
-extern unsigned int la_objopen (struct link_map *__map, Lmid_t __lmid,
-				uintptr_t *__cookie);
-extern void la_preinit (uintptr_t *__cookie);
-extern uintptr_t la_symbind32 (Elf32_Sym *__sym, unsigned int __ndx,
-			       uintptr_t *__refcook, uintptr_t *__defcook,
-			       unsigned int *__flags, const char *__symname);
-extern uintptr_t la_symbind64 (Elf64_Sym *__sym, unsigned int __ndx,
-			       uintptr_t *__refcook, uintptr_t *__defcook,
-			       unsigned int *__flags, const char *__symname);
-extern unsigned int la_objclose (uintptr_t *__cookie);
+extern int dl_iterate_phdr (int (*callback) (struct dl_phdr_info *info,
+					     size_t size, void *data),
+			    void *data);
 
 __END_DECLS
 
diff --git a/elf/rtld.c b/elf/rtld.c
index b72216ba96..ee7291477f 100644
--- a/elf/rtld.c
+++ b/elf/rtld.c
@@ -1,5 +1,5 @@
 /* Run time dynamic linker.
-   Copyright (C) 1995-2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+   Copyright (C) 1995-2002, 2003, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -79,13 +79,6 @@ INTDEF(_dl_argv)
 /* Nonzero if we were run directly.  */
 unsigned int _dl_skip_args attribute_relro attribute_hidden;
 
-/* List of auditing DSOs.  */
-static struct audit_list
-{
-  const char *name;
-  struct audit_list *next;
-} *audit_list;
-
 #ifndef HAVE_INLINED_SYSCALLS
 /* Set nonzero during loading and initialization of executable and
    libraries, cleared before the executable's entry point runs.  This
@@ -133,14 +126,25 @@ struct rtld_global_ro _rtld_global_ro attribute_relro =
     ._dl_fpu_control = _FPU_DEFAULT,
 
     /* Function pointers.  */
+    ._dl_get_origin = _dl_get_origin,
+    ._dl_dst_count = _dl_dst_count,
+    ._dl_dst_substitute = _dl_dst_substitute,
+    ._dl_map_object = _dl_map_object,
+    ._dl_map_object_deps = _dl_map_object_deps,
+    ._dl_relocate_object = _dl_relocate_object,
+    ._dl_check_map_versions = _dl_check_map_versions,
+    ._dl_init = _dl_init,
+    ._dl_debug_state = _dl_debug_state,
+#ifndef MAP_COPY
+    ._dl_unload_cache = _dl_unload_cache,
+#endif
     ._dl_debug_printf = _dl_debug_printf,
     ._dl_catch_error = _dl_catch_error,
     ._dl_signal_error = _dl_signal_error,
+    ._dl_start_profile = _dl_start_profile,
     ._dl_mcount = _dl_mcount_internal,
     ._dl_lookup_symbol_x = _dl_lookup_symbol_x,
-    ._dl_check_caller = _dl_check_caller,
-    ._dl_open = _dl_open,
-    ._dl_close = _dl_close
+    ._dl_check_caller = _dl_check_caller
   };
 /* If we would use strong_alias here the compiler would see a
    non-hidden definition.  This would undo the effect of the previous
@@ -263,7 +267,6 @@ _dl_start_final (void *arg, struct dl_start_final_info *info)
   memcpy (GL(dl_rtld_map).l_info, info->l.l_info,
 	  sizeof GL(dl_rtld_map).l_info);
   GL(dl_rtld_map).l_mach = info->l.l_mach;
-  GL(dl_rtld_map).l_relocated = 1;
 #endif
   _dl_setup_hash (&GL(dl_rtld_map));
   GL(dl_rtld_map).l_real = &GL(dl_rtld_map);
@@ -345,6 +348,8 @@ _dl_start (void *arg)
 #define RTLD_BOOTSTRAP
 #define RESOLVE_MAP(sym, version, flags) \
   ((*(sym))->st_shndx == SHN_UNDEF ? 0 : &bootstrap_map)
+#define RESOLVE(sym, version, flags) \
+  ((*(sym))->st_shndx == SHN_UNDEF ? 0 : bootstrap_map.l_addr)
 #include "dynamic-link.h"
 
   if (HP_TIMING_INLINE && HP_TIMING_AVAIL)
@@ -467,7 +472,7 @@ _dl_start (void *arg)
 	  while (remaining-- > 0)
 	    *p++ = '\0';
 	}
-# endif
+#endif
 
 	/* Install the pointer to the dtv.  */
 
@@ -510,7 +515,6 @@ _dl_start (void *arg)
 
       ELF_DYNAMIC_RELOCATE (&bootstrap_map, 0, 0);
     }
-  bootstrap_map.l_relocated = 1;
 
   /* Please note that we don't allow profiling of this object and
      therefore need not test whether we have to allocate the array
@@ -562,19 +566,6 @@ struct map_args
   struct link_map *map;
 };
 
-struct dlmopen_args
-{
-  const char *fname;
-  struct link_map *map;
-};
-
-struct lookup_args
-{
-  const char *name;
-  struct link_map *map;
-  void *result;
-};
-
 /* Arguments to version_check_doit.  */
 struct version_check_args
 {
@@ -600,28 +591,6 @@ map_doit (void *a)
 }
 
 static void
-dlmopen_doit (void *a)
-{
-  struct dlmopen_args *args = (struct dlmopen_args *) a;
-  args->map = _dl_open (args->fname, RTLD_LAZY | __RTLD_DLOPEN | __RTLD_AUDIT,
-			dl_main, LM_ID_NEWLM, _dl_argc, INTUSE(_dl_argv),
-			__environ);
-}
-
-static void
-lookup_doit (void *a)
-{
-  struct lookup_args *args = (struct lookup_args *) a;
-  const ElfW(Sym) *ref = NULL;
-  args->result = NULL;
-  lookup_t l = _dl_lookup_symbol_x (args->name, args->map, &ref,
-				    args->map->l_local_scope, NULL, 0,
-				    DL_LOOKUP_RETURN_NEWEST, NULL);
-  if (ref != NULL)
-    args->result = DL_SYMBOL_ADDRESS (l, ref);
-}
-
-static void
 version_check_doit (void *a)
 {
   struct version_check_args *args = (struct version_check_args *) a;
@@ -679,80 +648,6 @@ match_version (const char *string, struct link_map *map)
   return 0;
 }
 
-#ifdef USE_TLS
-static bool tls_init_tp_called;
-
-static void *
-init_tls (void)
-{
-  /* Number of elements in the static TLS block.  */
-  GL(dl_tls_static_nelem) = GL(dl_tls_max_dtv_idx);
-
-  /* Do not do this twice.  The audit interface might have required
-     the DTV interfaces to be set up early.  */
-  if (GL(dl_initial_dtv) != NULL)
-    return NULL;
-
-  /* Allocate the array which contains the information about the
-     dtv slots.  We allocate a few entries more than needed to
-     avoid the need for reallocation.  */
-  size_t nelem = GL(dl_tls_max_dtv_idx) + 1 + TLS_SLOTINFO_SURPLUS;
-
-  /* Allocate.  */
-  GL(dl_tls_dtv_slotinfo_list) = (struct dtv_slotinfo_list *)
-    calloc (sizeof (struct dtv_slotinfo_list)
-	    + nelem * sizeof (struct dtv_slotinfo), 1);
-  /* No need to check the return value.  If memory allocation failed
-     the program would have been terminated.  */
-
-  struct dtv_slotinfo *slotinfo = GL(dl_tls_dtv_slotinfo_list)->slotinfo;
-  GL(dl_tls_dtv_slotinfo_list)->len = nelem;
-  GL(dl_tls_dtv_slotinfo_list)->next = NULL;
-
-  /* Fill in the information from the loaded modules.  No namespace
-     but the base one can be filled at this time.  */
-  assert (GL(dl_ns)[LM_ID_BASE + 1]._ns_loaded == NULL);
-  int i = 0;
-  for (struct link_map *l = GL(dl_ns)[LM_ID_BASE]._ns_loaded; l != NULL;
-       l = l->l_next)
-    if (l->l_tls_blocksize != 0)
-      {
-	/* This is a module with TLS data.  Store the map reference.
-	   The generation counter is zero.  */
-	slotinfo[i].map = l;
-	/* slotinfo[i].gen = 0; */
-	++i;
-      }
-  assert (i == GL(dl_tls_max_dtv_idx));
-
-  /* Compute the TLS offsets for the various blocks.  */
-  _dl_determine_tlsoffset ();
-
-  /* Construct the static TLS block and the dtv for the initial
-     thread.  For some platforms this will include allocating memory
-     for the thread descriptor.  The memory for the TLS block will
-     never be freed.  It should be allocated accordingly.  The dtv
-     array can be changed if dynamic loading requires it.  */
-  void *tcbp = _dl_allocate_tls_storage ();
-  if (tcbp == NULL)
-    _dl_fatal_printf ("\
-cannot allocate TLS data structures for initial thread");
-
-  /* Store for detection of the special case by __tls_get_addr
-     so it knows not to pass this dtv to the normal realloc.  */
-  GL(dl_initial_dtv) = GET_DTV (tcbp);
-
-  /* And finally install it for the main thread.  If ld.so itself uses
-     TLS we know the thread pointer was initialized earlier.  */
-  const char *lossage = TLS_INIT_TP (tcbp, USE___THREAD);
-  if (__builtin_expect (lossage != NULL, 0))
-    _dl_fatal_printf ("cannot set up thread-local storage: %s\n", lossage);
-  tls_init_tp_called = true;
-
-  return tcbp;
-}
-#endif
-
 #ifdef _LIBC_REENTRANT
 /* _dl_error_catch_tsd points to this for the single-threaded case.
    It's reset by the thread library for multithreaded programs.  */
@@ -807,7 +702,7 @@ dl_main (const ElfW(Phdr) *phdr,
   hp_timing_t diff;
 #endif
 #ifdef USE_TLS
-  void *tcbp = NULL;
+  void *tcbp;
 #endif
 
 #ifdef _LIBC_REENTRANT
@@ -931,7 +826,6 @@ of this helper program; chances are you did not intend to run this program.\n\
 	 objects.  */
       _dl_init_paths (library_path);
 
-
       /* The initialization of _dl_stack_flags done below assumes the
 	 executable's PT_GNU_STACK may have been honored by the kernel, and
 	 so a PT_GNU_STACK with PF_X set means the stack started out with
@@ -993,10 +887,10 @@ of this helper program; chances are you did not intend to run this program.\n\
     {
       /* Create a link_map for the executable itself.
 	 This will be what dlopen on "" returns.  */
-      main_map = _dl_new_object ((char *) "", "", lt_executable, NULL,
-				 __RTLD_OPENEXEC, LM_ID_BASE);
-      assert (main_map != NULL);
-      assert (main_map == GL(dl_ns)[LM_ID_BASE]._ns_loaded);
+      _dl_new_object ((char *) "", "", lt_executable, NULL, 0, LM_ID_BASE);
+      main_map = GL(dl_ns)[LM_ID_BASE]._ns_loaded;
+      if (main_map == NULL)
+	_dl_fatal_printf ("cannot allocate memory for link map\n");
       main_map->l_phdr = phdr;
       main_map->l_phnum = phnum;
       main_map->l_entry = *user_entry;
@@ -1097,9 +991,8 @@ of this helper program; chances are you did not intend to run this program.\n\
 	    main_map->l_text_end = allocend;
 	}
 	break;
-
-      case PT_TLS:
 #ifdef USE_TLS
+      case PT_TLS:
 	if (ph->p_memsz > 0)
 	  {
 	    /* Note that in the case the dynamic linker we duplicate work
@@ -1119,12 +1012,8 @@ of this helper program; chances are you did not intend to run this program.\n\
 	    /* This image gets the ID one.  */
 	    GL(dl_tls_max_dtv_idx) = main_map->l_tls_modid = 1;
 	  }
-#else
-	_dl_fatal_printf ("\
-ld.so does not support TLS, but program uses it!\n");
-#endif
 	break;
-
+#endif
       case PT_GNU_STACK:
 	GL(dl_stack_flags) = ph->p_flags;
 	break;
@@ -1156,26 +1045,6 @@ ld.so does not support TLS, but program uses it!\n");
   else
     assert (GL(dl_rtld_map).l_libname); /* How else did we get here?  */
 
-  /* If the current libname is different from the SONAME, add the
-     latter as well.  */
-  if (GL(dl_rtld_map).l_info[DT_SONAME] != NULL
-      && strcmp (GL(dl_rtld_map).l_libname->name,
-		 (const char *) D_PTR (&GL(dl_rtld_map), l_info[DT_STRTAB])
-		 + GL(dl_rtld_map).l_info[DT_SONAME]->d_un.d_val) != 0)
-    {
-      static struct libname_list newname;
-      newname.name = ((char *) D_PTR (&GL(dl_rtld_map), l_info[DT_STRTAB])
-		      + GL(dl_rtld_map).l_info[DT_SONAME]->d_un.d_ptr);
-      newname.next = NULL;
-      newname.dont_free = 1;
-
-      assert (GL(dl_rtld_map).l_libname->next == NULL);
-      GL(dl_rtld_map).l_libname->next = &newname;
-    }
-  /* The ld.so must be relocated since otherwise loading audit modules
-     will fail since they reuse the very same ld.so.  */
-  assert (GL(dl_rtld_map).l_relocated);
-
   if (! rtld_is_main)
     {
       /* Extract the contents of the dynamic section for easy access.  */
@@ -1205,11 +1074,6 @@ ld.so does not support TLS, but program uses it!\n");
        objects.  */
     _dl_init_paths (library_path);
 
-  /* Initialize _r_debug.  */
-  struct r_debug *r = _dl_debug_initialize (GL(dl_rtld_map).l_addr,
-					    LM_ID_BASE);
-  r->r_state = RT_CONSISTENT;
-
   /* Put the link_map for ourselves on the chain so it can be found by
      name.  Note that at this point the global chain of link maps contains
      exactly one element, which is pointed to by dl_loaded.  */
@@ -1284,7 +1148,6 @@ ld.so does not support TLS, but program uses it!\n");
   GL(dl_rtld_map).l_phdr = rtld_phdr;
   GL(dl_rtld_map).l_phnum = rtld_ehdr->e_phnum;
 
-
   /* PT_GNU_RELRO is usually the last phdr.  */
   size_t cnt = rtld_ehdr->e_phnum;
   while (cnt-- > 0)
@@ -1295,204 +1158,6 @@ ld.so does not support TLS, but program uses it!\n");
 	break;
       }
 
-#ifdef USE_TLS
-  /* Add the dynamic linker to the TLS list if it also uses TLS.  */
-  if (GL(dl_rtld_map).l_tls_blocksize != 0)
-    /* Assign a module ID.  Do this before loading any audit modules.  */
-    GL(dl_rtld_map).l_tls_modid = _dl_next_tls_modid ();
-#endif
-
-  /* If we have auditing DSOs to load, do it now.  */
-  if (__builtin_expect (audit_list != NULL, 0))
-    {
-      /* Iterate over all entries in the list.  The order is important.  */
-      struct audit_ifaces *last_audit = NULL;
-      struct audit_list *al = audit_list->next;
-      do
-	{
-#ifdef USE_TLS
-	  int tls_idx = GL(dl_tls_max_dtv_idx);
-
-	  /* Now it is time to determine the layout of the static TLS
-	     block and allocate it for the initial thread.  Note that we
-	     always allocate the static block, we never defer it even if
-	     no DF_STATIC_TLS bit is set.  The reason is that we know
-	     glibc will use the static model.  */
-# ifndef TLS_INIT_TP_EXPENSIVE
-#  define TLS_INIT_TP_EXPENSIVE 0
-# endif
-
-	  /* Since we start using the auditing DSOs right away we need to
-	     initialize the data structures now.  */
-	  if (!TLS_INIT_TP_EXPENSIVE)
-	    tcbp = init_tls ();
-#endif
-	  struct dlmopen_args dlmargs;
-	  dlmargs.fname = al->name;
-	  dlmargs.map = NULL;
-
-	  const char *objname;
-	  const char *err_str = NULL;
-	  (void) _dl_catch_error (&objname, &err_str, dlmopen_doit, &dlmargs);
-	  if (__builtin_expect (err_str != NULL, 0))
-	    {
-	    not_loaded:
-	      _dl_error_printf ("\
-ERROR: ld.so: object '%s' cannot be loaded as audit interface: %s; ignored.\n",
-				al->name, err_str);
-	      free ((char *) err_str);
-	    }
-	  else
-	    {
-	      struct lookup_args largs;
-	      largs.name = "la_version";
-	      largs.map = dlmargs.map;
-
-	      /* Check whether the interface version matches.  */
-	      (void) _dl_catch_error (&objname, &err_str, lookup_doit, &largs);
-
-	      unsigned int (*laversion) (unsigned int);
-	      unsigned int lav;
-	      if  (err_str == NULL
-		   && (laversion = largs.result) != NULL
-		   && (lav = laversion (LAV_CURRENT)) > 0
-		   && lav <= LAV_CURRENT)
-		{
-		  /* Allocate structure for the callback function pointers.
-		     This call can never fail.  */
-		  union
-		  {
-		    struct audit_ifaces ifaces;
-#define naudit_ifaces 8
-		    void (*fptr[naudit_ifaces]) (void);
-		  } *newp = malloc (sizeof (*newp));
-
-		  /* Names of the auditing interfaces.  All in one
-		     long string.  */
-		  static const char audit_iface_names[] =
-		    "la_activity\0"
-		    "la_objsearch\0"
-		    "la_objopen\0"
-		    "la_preinit\0"
-#if __ELF_NATIVE_CLASS == 32
-		    "la_symbind32\0"
-#elif __ELF_NATIVE_CLASS == 64
-		    "la_symbind64\0"
-#else
-# error "__ELF_NATIVE_CLASS must be defined"
-#endif
-#define STRING(s) __STRING (s)
-		    "la_" STRING (ARCH_LA_PLTENTER) "\0"
-		    "la_" STRING (ARCH_LA_PLTEXIT) "\0"
-		    "la_objclose\0";
-		  unsigned int cnt = 0;
-		  const char *cp = audit_iface_names;
-		  do
-		    {
-		      largs.name = cp;
-		      (void) _dl_catch_error (&objname, &err_str, lookup_doit,
-					      &largs);
-
-		      /* Store the pointer.  */
-		      if (err_str == NULL && largs.result != NULL)
-			{
-			  newp->fptr[cnt] = largs.result;
-
-			  /* The dynamic linker link map is statically
-			     allocated, initialize the data now.   */
-			  GL(dl_rtld_map).l_audit[cnt].cookie
-			    = (intptr_t) &GL(dl_rtld_map);
-			}
-		      else
-			newp->fptr[cnt] = NULL;
-		      ++cnt;
-
-		      cp = (char *) rawmemchr (cp, '\0') + 1;
-		    }
-		  while (*cp != '\0');
-		  assert (cnt == naudit_ifaces);
-
-		  /* Now append the new auditing interface to the list.  */
-		  newp->ifaces.next = NULL;
-		  if (last_audit == NULL)
-		    last_audit = GLRO(dl_audit) = &newp->ifaces;
-		  else
-		    last_audit = last_audit->next = &newp->ifaces;
-		  ++GLRO(dl_naudit);
-
-		  /* Mark the DSO as being used for auditing.  */
-		  dlmargs.map->l_auditing = 1;
-		}
-	      else
-		{
-		  /* We cannot use the DSO, it does not have the
-		     appropriate interfaces or it expects something
-		     more recent.  */
-#ifndef NDEBUG
-		  Lmid_t ns = dlmargs.map->l_ns;
-#endif
-		  _dl_close (dlmargs.map);
-
-		  /* Make sure the namespace has been cleared entirely.  */
-		  assert (GL(dl_ns)[ns]._ns_loaded == NULL);
-		  assert (GL(dl_ns)[ns]._ns_nloaded == 0);
-
-#ifdef USE_TLS
-		  GL(dl_tls_max_dtv_idx) = tls_idx;
-#endif
-		  goto not_loaded;
-		}
-	    }
-
-	  al = al->next;
-	}
-      while (al != audit_list->next);
-
-      /* If we have any auditing modules, announce that we already
-	 have two objects loaded.  */
-      if (__builtin_expect (GLRO(dl_naudit) > 0, 0))
-	{
-	  struct link_map *ls[2] = { main_map, &GL(dl_rtld_map) };
-
-	  for (unsigned int outer = 0; outer < 2; ++outer)
-	    {
-	      struct audit_ifaces *afct = GLRO(dl_audit);
-	      for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt)
-		{
-		  if (afct->objopen != NULL)
-		    {
-		      ls[outer]->l_audit[cnt].bindflags
-			= afct->objopen (ls[outer], LM_ID_BASE,
-					 &ls[outer]->l_audit[cnt].cookie);
-
-		      ls[outer]->l_audit_any_plt
-			|= ls[outer]->l_audit[cnt].bindflags != 0;
-		    }
-
-		  afct = afct->next;
-		}
-	    }
-	}
-    }
-
-  /* We start adding objects.  */
-  r->r_state = RT_ADD;
-  _dl_debug_state ();
-
-  /* Auditing checkpoint: we are ready to signal that the initial map
-     is being constructed.  */
-  if (__builtin_expect (GLRO(dl_naudit) > 0, 0))
-    {
-      struct audit_ifaces *afct = GLRO(dl_audit);
-      for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt)
-	{
-	  if (afct->activity != NULL)
-	    afct->activity (&main_map->l_audit[cnt].cookie, LA_ACT_ADD);
-
-	  afct = afct->next;
-	}
-    }
-
   /* We have two ways to specify objects to preload: via environment
      variable and via the file /etc/ld.so.preload.  The latter can also
      be used when security is enabled.  */
@@ -1747,15 +1412,12 @@ ERROR: ld.so: object '%s' from %s cannot be preloaded: ignored.\n",
 		{
 		  if (! l->l_addr)
 		    l->l_addr = ph->p_vaddr;
-		  if (ph->p_vaddr + ph->p_memsz >= l->l_map_end)
+		  else if (ph->p_vaddr + ph->p_memsz >= l->l_map_end)
 		    l->l_map_end = ph->p_vaddr + ph->p_memsz;
-		  if ((ph->p_flags & PF_X)
+		  else if ((ph->p_flags & PF_X)
 			   && ph->p_vaddr + ph->p_memsz >= l->l_text_end)
 		    l->l_text_end = ph->p_vaddr + ph->p_memsz;
 		}
-	      else
-		/* There must be no TLS segment.  */
-		assert (ph->p_type != PT_TLS);
 	    }
 	  l->l_map_start = (ElfW(Addr)) GLRO(dl_sysinfo_dso);
 	  l->l_addr = l->l_map_start - l->l_addr;
@@ -1871,6 +1533,20 @@ ERROR: ld.so: object '%s' from %s cannot be preloaded: ignored.\n",
   }
 
 #ifdef USE_TLS
+  /* Now it is time to determine the layout of the static TLS block
+     and allocate it for the initial thread.  Note that we always
+     allocate the static block, we never defer it even if no
+     DF_STATIC_TLS bit is set.  The reason is that we know glibc will
+     use the static model.  First add the dynamic linker to the list
+     if it also uses TLS.  */
+  if (GL(dl_rtld_map).l_tls_blocksize != 0)
+    /* Assign a module ID.  */
+    GL(dl_rtld_map).l_tls_modid = _dl_next_tls_modid ();
+
+# ifndef TLS_INIT_TP_EXPENSIVE
+#  define TLS_INIT_TP_EXPENSIVE 0
+# endif
+
   /* We do not initialize any of the TLS functionality unless any of the
      initial modules uses TLS.  This makes dynamic loading of modules with
      TLS impossible, but to support it requires either eagerly doing setup
@@ -1878,9 +1554,57 @@ ERROR: ld.so: object '%s' from %s cannot be preloaded: ignored.\n",
      an old kernel that can't perform TLS_INIT_TP, even if no TLS is ever
      used.  Trying to do it lazily is too hairy to try when there could be
      multiple threads (from a non-TLS-using libpthread).  */
-  bool was_tls_init_tp_called = tls_init_tp_called;
-  if (tcbp == NULL && (!TLS_INIT_TP_EXPENSIVE || GL(dl_tls_max_dtv_idx) > 0))
-    tcbp = init_tls ();
+  if (!TLS_INIT_TP_EXPENSIVE || GL(dl_tls_max_dtv_idx) > 0)
+    {
+      struct link_map *l;
+      size_t nelem;
+      struct dtv_slotinfo *slotinfo;
+
+      /* Number of elements in the static TLS block.  */
+      GL(dl_tls_static_nelem) = GL(dl_tls_max_dtv_idx);
+
+      /* Allocate the array which contains the information about the
+	 dtv slots.  We allocate a few entries more than needed to
+	 avoid the need for reallocation.  */
+      nelem = GL(dl_tls_max_dtv_idx) + 1 + TLS_SLOTINFO_SURPLUS;
+
+      /* Allocate.  */
+      GL(dl_tls_dtv_slotinfo_list) = (struct dtv_slotinfo_list *)
+	malloc (sizeof (struct dtv_slotinfo_list)
+		+ nelem * sizeof (struct dtv_slotinfo));
+      /* No need to check the return value.  If memory allocation failed
+	 the program would have been terminated.  */
+
+      slotinfo = memset (GL(dl_tls_dtv_slotinfo_list)->slotinfo, '\0',
+			 nelem * sizeof (struct dtv_slotinfo));
+      GL(dl_tls_dtv_slotinfo_list)->len = nelem;
+      GL(dl_tls_dtv_slotinfo_list)->next = NULL;
+
+      /* Fill in the information from the loaded modules.  */
+      for (l = main_map, i = 0; l != NULL; l = l->l_next)
+	if (l->l_tls_blocksize != 0)
+	  /* This is a module with TLS data.  Store the map reference.
+	     The generation counter is zero.  */
+	  slotinfo[++i].map = l;
+      assert (i == GL(dl_tls_max_dtv_idx));
+
+      /* Compute the TLS offsets for the various blocks.  */
+      _dl_determine_tlsoffset ();
+
+      /* Construct the static TLS block and the dtv for the initial
+	 thread.  For some platforms this will include allocating memory
+	 for the thread descriptor.  The memory for the TLS block will
+	 never be freed.  It should be allocated accordingly.  The dtv
+	 array can be changed if dynamic loading requires it.  */
+      tcbp = _dl_allocate_tls_storage ();
+      if (tcbp == NULL)
+	_dl_fatal_printf ("\
+cannot allocate TLS data structures for initial thread");
+
+      /* Store for detection of the special case by __tls_get_addr
+	 so it knows not to pass this dtv to the normal realloc.  */
+      GL(dl_initial_dtv) = GET_DTV (tcbp);
+    }
 #endif
 
   if (__builtin_expect (mode, normal) != normal)
@@ -2027,13 +1751,10 @@ ERROR: ld.so: object '%s' from %s cannot be preloaded: ignored.\n",
 
 	      if ((GLRO(dl_debug_mask) & DL_DEBUG_PRELINK)
 		  && GL(dl_rtld_map).l_opencount > 1)
-		{
-		  /* Mark the link map as not yet relocated again.  */
-		  GL(dl_rtld_map).l_relocated = 0;
-		  _dl_relocate_object (&GL(dl_rtld_map), main_map->l_scope,
-				       0, 0);
-		}
-            }
+		_dl_relocate_object (&GL(dl_rtld_map), main_map->l_scope,
+				     0, 0);
+	    }
+
 #define VERNEEDTAG (DT_NUM + DT_THISPROCNUM + DT_VERSIONTAGIDX (DT_VERNEED))
 	  if (version_info)
 	    {
@@ -2164,6 +1885,8 @@ ERROR: ld.so: object '%s' from %s cannot be preloaded: ignored.\n",
     }
 
 
+  /* Initialize _r_debug.  */
+  struct r_debug *r = _dl_debug_initialize (GL(dl_rtld_map).l_addr);
   {
     struct link_map *l = main_map;
 
@@ -2198,6 +1921,8 @@ ERROR: ld.so: object '%s' from %s cannot be preloaded: ignored.\n",
 
   if (prelinked)
     {
+      struct link_map *l;
+
       if (main_map->l_info [ADDRIDX (DT_GNU_CONFLICT)] != NULL)
 	{
 	  ElfW(Rela) *conflict, *conflictend;
@@ -2220,17 +1945,11 @@ ERROR: ld.so: object '%s' from %s cannot be preloaded: ignored.\n",
 
 
       /* Mark all the objects so we know they have been already relocated.  */
-      for (struct link_map *l = main_map; l != NULL; l = l->l_next)
+      for (l = main_map; l != NULL; l = l->l_next)
 	{
 	  l->l_relocated = 1;
 	  if (l->l_relro_size)
 	    _dl_protect_relro (l);
-
-#ifdef USE_TLS
-	  /* Add object to slot information data if necessasy.  */
-	  if (l->l_tls_blocksize != 0 && tls_init_tp_called)
-	    _dl_add_to_slotinfo (l);
-#endif
 	}
 
       _dl_sysdep_start_cleanup ();
@@ -2246,6 +1965,7 @@ ERROR: ld.so: object '%s' from %s cannot be preloaded: ignored.\n",
 	 the dynamic linker out of order because it has no copy relocs (we
 	 know that because it is self-contained).  */
 
+      struct link_map *l;
       int consider_profiling = GLRO(dl_profile) != NULL;
 #ifndef HP_TIMING_NONAVAIL
       hp_timing_t start;
@@ -2256,7 +1976,7 @@ ERROR: ld.so: object '%s' from %s cannot be preloaded: ignored.\n",
       /* If we are profiling we also must do lazy reloaction.  */
       GLRO(dl_lazy) |= consider_profiling;
 
-      struct link_map *l = main_map;
+      l = main_map;
       while (l->l_next)
 	l = l->l_next;
 
@@ -2278,12 +1998,6 @@ ERROR: ld.so: object '%s' from %s cannot be preloaded: ignored.\n",
 	    _dl_relocate_object (l, l->l_scope, GLRO(dl_lazy),
 				 consider_profiling);
 
-#ifdef USE_TLS
-	  /* Add object to slot information data if necessasy.  */
-	  if (l->l_tls_blocksize != 0 && tls_init_tp_called)
-	    _dl_add_to_slotinfo (l);
-#endif
-
 	  l = l->l_prev;
 	}
       while (l);
@@ -2311,8 +2025,6 @@ ERROR: ld.so: object '%s' from %s cannot be preloaded: ignored.\n",
 	  /* There was an explicit ref to the dynamic linker as a shared lib.
 	     Re-relocate ourselves with user-controlled symbol definitions.  */
 	  HP_TIMING_NOW (start);
-	  /* Mark the link map as not yet relocated again.  */
-	  GL(dl_rtld_map).l_relocated = 0;
 	  _dl_relocate_object (&GL(dl_rtld_map), main_map->l_scope, 0, 0);
 	  HP_TIMING_NOW (stop);
 	  HP_TIMING_DIFF (add, start, stop);
@@ -2327,9 +2039,6 @@ ERROR: ld.so: object '%s' from %s cannot be preloaded: ignored.\n",
 #ifdef USE_TLS
   if (GL(dl_tls_max_dtv_idx) > 0 || USE___THREAD || !TLS_INIT_TP_EXPENSIVE)
     {
-      if (!was_tls_init_tp_called && GL(dl_tls_max_dtv_idx) > 0)
-	++GL(dl_tls_generation);
-
       /* Now that we have completed relocation, the initializer data
 	 for the TLS blocks has its final values and we can copy them
 	 into the main thread's TLS area, which we allocated above.  */
@@ -2337,42 +2046,16 @@ ERROR: ld.so: object '%s' from %s cannot be preloaded: ignored.\n",
 
       /* And finally install it for the main thread.  If ld.so itself uses
 	 TLS we know the thread pointer was initialized earlier.  */
-      if (! tls_init_tp_called)
-	{
-	  const char *lossage = TLS_INIT_TP (tcbp, USE___THREAD);
-	  if (__builtin_expect (lossage != NULL, 0))
-	    _dl_fatal_printf ("cannot set up thread-local storage: %s\n",
-			      lossage);
-	}
+      const char *lossage = TLS_INIT_TP (tcbp, USE___THREAD);
+      if (__builtin_expect (lossage != NULL, 0))
+	_dl_fatal_printf ("cannot set up thread-local storage: %s\n", lossage);
     }
   else
 #endif
     NONTLS_INIT_TP;
 
-#ifdef SHARED
-  /* Auditing checkpoint: we have added all objects.  */
-  if (__builtin_expect (GLRO(dl_naudit) > 0, 0))
-    {
-      struct link_map *head = GL(dl_ns)[LM_ID_BASE]._ns_loaded;
-      /* Do not call the functions for any auditing object.  */
-      if (head->l_auditing == 0)
-	{
-	  struct audit_ifaces *afct = GLRO(dl_audit);
-	  for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt)
-	    {
-	      if (afct->activity != NULL)
-		afct->activity (&head->l_audit[cnt].cookie, LA_ACT_CONSISTENT);
-
-	      afct = afct->next;
-	    }
-	}
-    }
-#endif
-
-  /* Notify the debugger all new objects are now ready to go.  We must re-get
-     the address since by now the variable might be in another object.  */
-  r = _dl_debug_initialize (0, LM_ID_BASE);
-  r->r_state = RT_CONSISTENT;
+  /* Notify the debugger that all objects are now mapped in.  */
+  r->r_state = RT_ADD;
   _dl_debug_state ();
 
 #ifndef MAP_COPY
@@ -2504,32 +2187,6 @@ a filename can be specified using the LD_DEBUG_OUTPUT environment variable.\n");
     }
 }
 
-static void
-process_dl_audit (char *str)
-{
-  /* The parameter is a colon separated list of DSO names.  */
-  char *p;
-
-  while ((p = (strsep) (&str, ":")) != NULL)
-    if (p[0] != '\0'
-	&& (__builtin_expect (! INTUSE(__libc_enable_secure), 1)
-	    || strchr (p, '/') == NULL))
-      {
-	/* This is using the local malloc, not the system malloc.  The
-	   memory can never be freed.  */
-	struct audit_list *newp = malloc (sizeof (*newp));
-	newp->name = p;
-
-	if (audit_list == NULL)
-	  audit_list = newp->next = newp;
-	else
-	  {
-	    newp->next = audit_list->next;
-	    audit_list = audit_list->next = newp;
-	  }
-      }
-}
-
 /* Process all environments variables the dynamic linker must recognize.
    Since all of them start with `LD_' we are a bit smarter while finding
    all the entries.  */
@@ -2572,12 +2229,7 @@ process_envvars (enum mode *modep)
 	case 5:
 	  /* Debugging of the dynamic linker?  */
 	  if (memcmp (envline, "DEBUG", 5) == 0)
-	    {
-	      process_dl_debug (&envline[6]);
-	      break;
-	    }
-	  if (memcmp (envline, "AUDIT", 5) == 0)
-	    process_dl_audit (&envline[6]);
+	    process_dl_debug (&envline[6]);
 	  break;
 
 	case 7:
diff --git a/elf/sprof.c b/elf/sprof.c
index 9567e4689f..afe3955ad8 100644
--- a/elf/sprof.c
+++ b/elf/sprof.c
@@ -1,5 +1,5 @@
 /* Read and display shared object profiling data.
-   Copyright (C) 1997-2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+   Copyright (C) 1997-2002, 2003, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
 
@@ -357,7 +357,7 @@ Copyright (C) %s Free Software Foundation, Inc.\n\
 This is free software; see the source for copying conditions.  There is NO\n\
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
 "),
-	   "2005");
+	   "2004");
   fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper");
 }
 
diff --git a/elf/tst-audit1.c b/elf/tst-audit1.c
deleted file mode 100644
index 63656b4ee9..0000000000
--- a/elf/tst-audit1.c
+++ /dev/null
@@ -1 +0,0 @@
-#include "../io/pwd.c"
diff --git a/elf/tst-auditmod1.c b/elf/tst-auditmod1.c
deleted file mode 100644
index 9b07588288..0000000000
--- a/elf/tst-auditmod1.c
+++ /dev/null
@@ -1,190 +0,0 @@
-#include <dlfcn.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <bits/wordsize.h>
-#include <gnu/lib-names.h>
-
-
-unsigned int
-la_version (unsigned int v)
-{
-  setlinebuf (stdout);
-
-  printf ("version: %u\n", v);
-
-  char buf[20];
-  sprintf (buf, "%u", v);
-
-  return v;
-}
-
-void
-la_activity (uintptr_t *cookie, unsigned int flag)
-{
-  if (flag == LA_ACT_CONSISTENT)
-    printf ("activity: consistent\n");
-  else if (flag == LA_ACT_ADD)
-    printf ("activity: add\n");
-  else if (flag == LA_ACT_DELETE)
-    printf ("activity: delete\n");
-  else
-    printf ("activity: unknown activity %u\n", flag);
-}
-
-char *
-la_objsearch (const char *name, uintptr_t *cookie, unsigned int flag)
-{
-  char buf[100];
-  const char *flagstr;
-  if (flag == LA_SER_ORIG)
-    flagstr = "LA_SET_ORIG";
-  else if (flag == LA_SER_LIBPATH)
-    flagstr = "LA_SER_LIBPATH";
-  else if (flag == LA_SER_RUNPATH)
-    flagstr = "LA_SER_RUNPATH";
-  else if (flag == LA_SER_CONFIG)
-    flagstr = "LA_SER_CONFIG";
-  else if (flag == LA_SER_DEFAULT)
-    flagstr = "LA_SER_DEFAULT";
-  else if (flag == LA_SER_SECURE)
-    flagstr = "LA_SER_SECURE";
-  else
-    {
-       sprintf (buf, "unknown flag %d", flag);
-       flagstr = buf;
-    }
-  printf ("objsearch: %s, %s\n", name, flagstr);
-
-  return (char *) name;
-}
-
-unsigned int
-la_objopen (struct link_map *l, Lmid_t lmid, uintptr_t *cookie)
-{
-  printf ("objopen: %ld, %s\n", lmid, l->l_name);
-
-  return 3;
-}
-
-void
-la_preinit (uintptr_t *cookie)
-{
-  printf ("preinit\n");
-}
-
-unsigned int
-la_objclose  (uintptr_t *cookie)
-{
-  printf ("objclose\n");
-  return 0;
-}
-
-uintptr_t
-la_symbind32 (Elf32_Sym *sym, unsigned int ndx, uintptr_t *refcook,
-	      uintptr_t *defcook, unsigned int *flags, const char *symname)
-{
-  printf ("symbind32: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n",
-	  symname, (long int) sym->st_value, ndx, *flags);
-
-  return sym->st_value;
-}
-
-uintptr_t
-la_symbind64 (Elf64_Sym *sym, unsigned int ndx, uintptr_t *refcook,
-	      uintptr_t *defcook, unsigned int *flags, const char *symname)
-{
-  printf ("symbind64: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n",
-	  symname, (long int) sym->st_value, ndx, *flags);
-
-  return sym->st_value;
-}
-
-#ifdef __i386__
-# define pltenter la_i86_gnu_pltenter
-# define pltexit la_i86_gnu_pltexit
-# define La_regs La_i86_regs
-# define La_retval La_i86_retval
-# define int_retval lrv_eax
-#elif defined __x86_64__
-# define pltenter la_x86_64_gnu_pltenter
-# define pltexit la_x86_64_gnu_pltexit
-# define La_regs La_x86_64_regs
-# define La_retval La_x86_64_retval
-# define int_retval lrv_rax
-#elif defined __powerpc__ && __WORDSIZE == 32
-# define pltenter la_ppc32_gnu_pltenter
-# define pltexit la_ppc32_gnu_pltexit
-# define La_regs La_ppc32_regs
-# define La_retval La_ppc32_retval
-# define int_retval lrv_r3
-#elif defined __powerpc__ && __WORDSIZE == 64
-# define pltenter la_ppc64_gnu_pltenter
-# define pltexit la_ppc64_gnu_pltexit
-# define La_regs La_ppc64_regs
-# define La_retval La_ppc64_retval
-# define int_retval lrv_r3
-#elif defined __sh__
-# define pltenter la_sh_gnu_pltenter
-# define pltexit la_sh_gnu_pltexit
-# define La_regs La_sh_regs
-# define La_retval La_sh_retval
-# define int_retval lrv_r0
-#elif defined __mc68000__
-# define pltenter la_m68k_gnu_pltenter
-# define pltexit la_m68k_gnu_pltexit
-# define La_regs La_m68k_regs
-# define La_retval La_m68k_retval
-# define int_retval lrv_d0
-#elif defined __alpha__
-# define pltenter la_alpha_gnu_pltenter
-# define pltexit la_alpha_gnu_pltexit
-# define La_regs La_alpha_regs
-# define La_retval La_alpha_retval
-# define int_retval lrv_r0
-#elif defined __s390__ && __WORDSIZE == 32
-# define pltenter la_s390_32_gnu_pltenter
-# define pltexit la_s390_32_gnu_pltexit
-# define La_regs La_s390_32_regs
-# define La_retval La_s390_32_retval
-# define int_retval lrv_r2
-#elif defined __s390__ && __WORDSIZE == 64
-# define pltenter la_s390_64_gnu_pltenter
-# define pltexit la_s390_64_gnu_pltexit
-# define La_regs La_s390_64_regs
-# define La_retval La_s390_64_retval
-# define int_retval lrv_r2
-#elif defined __ia64__
-# define pltenter la_ia64_gnu_pltenter
-# define pltexit la_ia64_gnu_pltexit
-# define La_regs La_ia64_regs
-# define La_retval La_ia64_retval
-# define int_retval lrv_r8
-#else
-# error "architecture specific code needed"
-#endif
-
-
-ElfW(Addr)
-pltenter (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook,
-	  uintptr_t *defcook, La_regs *regs, unsigned int *flags,
-	  const char *symname, long int *framesizep)
-{
-  printf ("pltenter: symname=%s, st_value=%#lx, ndx=%u, flags=%u\n",
-	  symname, (long int) sym->st_value, ndx, *flags);
-
-  return sym->st_value;
-}
-
-unsigned int
-pltexit (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook,
-	 uintptr_t *defcook, const La_regs *inregs, La_retval *outregs,
-	 const char *symname)
-{
-  printf ("pltexit: symname=%s, st_value=%#lx, ndx=%u, retval=%tu\n",
-	  symname, (long int) sym->st_value, ndx, outregs->int_retval);
-
-  return 0;
-}
diff --git a/fedora/branch.mk b/fedora/branch.mk
index 2ec1f17148..20986645ab 100644
--- a/fedora/branch.mk
+++ b/fedora/branch.mk
@@ -1,5 +1,5 @@
 # This file is updated automatically by Makefile.
 glibc-branch := fedora
 glibc-base := HEAD
-fedora-sync-date := 2005-02-11 10:37 UTC
-fedora-sync-tag := fedora-glibc-20050211T1037
+fedora-sync-date := 2004-12-19 23:31 UTC
+fedora-sync-tag := fedora-glibc-20041219T2331
diff --git a/fedora/glibc.spec.in b/fedora/glibc.spec.in
index 9eb9370722..1a9a960012 100644
--- a/fedora/glibc.spec.in
+++ b/fedora/glibc.spec.in
@@ -1,4 +1,4 @@
-%define glibcrelease 10
+%define glibcrelease 2
 %define auxarches i586 i686 athlon sparcv9 alphaev6
 %define prelinkarches noarch
 %define nptlarches i386 i686 athlon x86_64 ia64 s390 s390x sparcv9 ppc ppc64
@@ -10,12 +10,12 @@ Summary: The GNU libc libraries.
 Name: glibc
 Version: %{glibcversion}
 Release: %{glibcrelease}
-License: LGPL
+Copyright: LGPL
 Group: System Environment/Libraries
-%define glibcsrcdir %{glibcname}-%{glibcdate}
+%define glibcsrcdir %{name}-%{glibcdate}
 Source0: %{glibcsrcdir}.tar.bz2
-Source1: %{glibcname}-fedora-%{glibcdate}.tar.bz2
-Patch0: %{glibcname}-fedora.patch
+Source1: %{name}-fedora-%{glibcdate}.tar.bz2
+Patch0: %{name}-fedora.patch
 Patch1: %{name}-nptl-check.patch
 Patch2: %{name}-ppc-assume.patch
 Patch3: %{name}-ia64-lib64.patch
@@ -840,12 +840,9 @@ for i in $RPM_BUILD_ROOT%{_prefix}/bin/{xtrace,memusage}; do
 done
 
 grep '%{_prefix}/%{_lib}/lib.*_p\.a' < rpm.filelist > profile.filelist || :
-grep '%{_infodir}' < rpm.filelist | grep -v '%{_infodir}/dir' > devel.filelist
-grep '%{_prefix}/include/gnu/stubs-[32164]\+\.h' < rpm.filelist >> devel.filelist || :
-
-grep '%{_prefix}/include' < rpm.filelist |
-	egrep -v '%{_prefix}/include/(nptl|gnu/stubs-[32164]+\.h)' \
-		> headers.filelist
+egrep "(%{_prefix}/include)|(%{_infodir})" < rpm.filelist |
+	grep -v %{_prefix}/include/nptl |
+	grep -v %{_infodir}/dir > devel.filelist
 
 mv rpm.filelist rpm.filelist.full
 grep -v '%{_prefix}/%{_lib}/lib.*_p.a' rpm.filelist.full |
@@ -856,6 +853,10 @@ grep '%{_prefix}/%{_lib}/.*\.o' < rpm.filelist >> devel.filelist
 grep '%{_prefix}/%{_lib}/lib.*\.so' < rpm.filelist >> devel.filelist
 grep '%{_mandir}' < rpm.filelist >> devel.filelist
 
+grep '%{_prefix}/include' < devel.filelist > headers.filelist
+grep -v '%{_prefix}/include' < devel.filelist > devel.filelist.tmp
+mv -f devel.filelist.tmp devel.filelist
+
 mv rpm.filelist rpm.filelist.full
 grep -v '%{_prefix}/%{_lib}/lib.*\.a' < rpm.filelist.full |
 	grep -v '%{_prefix}/%{_lib}/.*\.o' |
@@ -1056,11 +1057,11 @@ for f in `find $RPM_BUILD_ROOT/%{_lib} -type l`; do
 done
 
 echo Sorting source file lists. Might take a while...
-xargs -0 -n 1 echo < $sf | LC_ALL=C grep -v '/<internal>$' | LC_ALL=C sort -u > $sf.sorted
-xargs -0 -n 1 echo < $csf | LC_ALL=C grep -v '/<internal>$' | LC_ALL=C sort -u > $csf.sorted
+xargs -0 -n 1 echo < $sf | LANG=C sort -u > $sf.sorted
+xargs -0 -n 1 echo < $csf | LANG=C sort -u > $csf.sorted
 mkdir -p $RPM_BUILD_ROOT/usr/src/debug
 cat $sf.sorted $csf.sorted \
-  | (cd $RPM_BUILD_DIR; LC_ALL=C sort -u | cpio -pdm ${RPM_BUILD_ROOT}/usr/src/debug)
+  | (cd $RPM_BUILD_DIR; LANG=C sort -u | cpio -pdm ${RPM_BUILD_ROOT}/usr/src/debug)
 # stupid cpio creates new directories in mode 0700, fixup
 find $RPM_BUILD_ROOT/usr/src/debug -type d -print | xargs chmod a+rx
 
@@ -1269,81 +1270,11 @@ rm -f *.filelist*
 %endif
 
 %changelog
-* Sat Feb 12 2005 Jakub Jelinek <jakub@redhat.com> 2.3.4-10
-- hopefully fix interaction with prelink (#147655)
-
-* Fri Feb 11 2005 Jakub Jelinek <jakub@redhat.com> 2.3.4-9
-- update from CVS
-  - bi-arch <gnu/stubs.h> (BZ#715)
-
-* Fri Feb 11 2005 Jakub Jelinek <jakub@redhat.com> 2.3.4-8
-- update from CVS
-  - bi-arch <gnu/lib-names.h> (BZ#632)
-  - fix libdl on s390 and maybe other platforms
-  - fix initstate{,_r} (BZ#710)
-  - fix <gnu/stubs.h> generation (BZ#157)
-- define CMSPAR in bits/termios.h (#147533)
-
-* Tue Feb  8 2005 Jakub Jelinek <jakub@redhat.com> 2.3.4-7
-- update from CVS
-  - fix TLS handling in linuxthreads
-
-* Tue Feb  8 2005 Jakub Jelinek <jakub@redhat.com> 2.3.4-6
-- update from CVS
-  - ld.so auditing
-  - fix segfault if chrooted app attempts to dlopen a library
-    and no standard library directory exists at all (#147067, #144303)
-  - fix initgroups when nscd is running, but has group caching disabled
-    (#146588)
-  - fix pthread_key_{create,destroy} in LinuxThreads when pthread_create
-    has not been called yet (#146710)
-  - fix ppc64 swapcontext and setcontext (#146736, BZ#700)
-  - service nscd cosmetic fixes (#146776)
-  - fix IA-32 and x86-64 stack alignment in DSO constructors (#145689)
-  - fix zdump -v segfaults on x86-64 (#146210)
-  - avoid calling sigaction (SIGPIPE, ...) inside syslog (#146021, IT#56686)
-  - fix errno values for futimes (BZ#633)
-  - unconditionally include <features.h> in malloc.h (BZ#650)
-  - change regex \B handling to match old GNU regex as well as perl/grep's dfa
-    (from empty string inside of word to empty string not at a word boundary,
-     BZ#693)
-  - slightly optimize i686 TLS accesses, use direct TLS %gs access in sem_*
-    and allow building -mno-tls-direct-seg-refs glibc that is free of direct TLS
-    %gs access with negative offsets
-  - fix addseverity
-  - fix fmemopen
-  - fix rewinddir
-  - increase svc{tcp,unix}_create listen backlog
-
-* Thu Jan  6 2005 Jakub Jelinek <jakub@redhat.com> 2.3.4-5
-- update from CVS
-  - add some warn_unused_result marking
-  - make ftruncate available even for just -D_POSIX_C_SOURCE=200112L
-    (BZ#640)
-
-* Thu Jan  6 2005 Jakub Jelinek <jakub@redhat.com> 2.3.4-4
-- update from CVS
-  - fix IA-32 stack alignment for LinuxThreads thread functions
-    and functions passed to clone(2) directly
-  - fix ecvt{,_r} on denormals (#143279)
-  - fix __tls_get_addr typo
-  - fix rounding in IA-64 alarm (#143710)
-  - don't reinitialize __environ in __libc_start_main, so that
-    effects of setenv/putenv done in DSO initializers are preserved
-    (#144037, IT#57403)
-  - fix fmemopen
-  - fix vDSO l_map_end and l_text_end values
-  - IA64 libm update (#142494)
-- fix ppc rint/ceil etc. (BZ#602)
-
-* Tue Dec 21 2004 Jakub Jelinek <jakub@redhat.com> 2.3.4-3
-- rebuilt
-
-* Mon Dec 20 2004 Jakub Jelinek <jakub@redhat.com> 2.3.4-2
+* Mon Dec 19 2004 Jakub Jelinek <jakub@redhat.com> 2.3.4-2
 - work around rpm bug some more, this time by copying
   iconvconfig to iconvconfig.%%{_target_cpu}.
 
-* Mon Dec 20 2004 Jakub Jelinek <jakub@redhat.com> 2.3.4-1
+* Mon Dec 19 2004 Jakub Jelinek <jakub@redhat.com> 2.3.4-1
 - update from CVS
   - glibc 2.3.4 release
   - add -o and --nostdlib options to iconvconfig
@@ -1643,7 +1574,7 @@ rm -f *.filelist*
 - update from CVS
   - fix BZ #151, #362, #381, #407
   - fdim fix for +inf/+inf (BZ #376)
-
+ 
 * Sun Sep 26 2004 Jakub Jelinek <jakub@redhat.com> 2.3.3-58
 - update from CVS
   - vasprintf fix (BZ #346)
diff --git a/grp/putgrent.c b/grp/putgrent.c
index 382d6a4daa..cb9b6b1f79 100644
--- a/grp/putgrent.c
+++ b/grp/putgrent.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1991,92,96,98,99,2000,2005 Free Software Foundation, Inc.
+/* Copyright (C) 1991, 92, 96, 98, 99, 2000 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -44,13 +44,8 @@ putgrent (gr, stream)
 
   flockfile (stream);
 
-  if (gr->gr_name[0] == '+' || gr->gr_name[0] == '-')
-    retval = fprintf (stream, "%s:%s::",
-		      gr->gr_name, _S (gr->gr_passwd));
-  else
-    retval = fprintf (stream, "%s:%s:%lu:",
-		      gr->gr_name, _S (gr->gr_passwd),
-		      (unsigned long int) gr->gr_gid);
+  retval = fprintf (stream, "%s:%s:%u:",
+		    gr->gr_name, _S (gr->gr_passwd), gr->gr_gid);
   if (__builtin_expect (retval, 0) < 0)
     {
       funlockfile (stream);
diff --git a/hurd/sigunwind.c b/hurd/sigunwind.c
index 1c6489b3b3..450a385a2a 100644
--- a/hurd/sigunwind.c
+++ b/hurd/sigunwind.c
@@ -1,5 +1,5 @@
 /* longjmp cleanup function for unwinding past signal handlers.
-   Copyright (C) 1995, 1996, 1997, 1998, 2005 Free Software Foundation, Inc.
+   Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -111,7 +111,7 @@ _hurdsig_longjmp_from_handler (void *data, jmp_buf env, int val)
       link = (void *) &scp[1];
       assert (! link->resource.next && ! link->resource.prevp);
       assert (link->thread.next == ss->active_resources);
-      assert (link->thread.prevp == &ss->active_resources);
+      assert (link->thread.prevp = &ss->active_resources);
       if (link->thread.next)
 	link->thread.next->thread.prevp = &link->thread.next;
       ss->active_resources = link;
diff --git a/iconv/Makefile b/iconv/Makefile
index 40c7cbcdd3..fe0c453e7b 100644
--- a/iconv/Makefile
+++ b/iconv/Makefile
@@ -78,15 +78,3 @@ $(inst_bindir)/iconv: $(objpfx)iconv_prog $(+force)
 
 $(objpfx)iconv_prog: $(iconv_prog-modules:%=$(objpfx)%.o)
 $(objpfx)iconvconfig: $(iconvconfig-modules:%=$(objpfx)%.o)
-
-ifneq ($(cross-compiling),yes)
-xtests: test-iconvconfig
-endif
-
-.PHONY: test-iconvconfig
-test-iconvconfig: /dev/null $(objpfx)iconvconfig
-	tmp=$(objpfx)gconv-modules.cache.$$$$; \
-	rm -f $$tmp; \
-	$(make-test-out) --output=$$tmp --nostdlib $(inst_gconvdir) && \
-	cmp $$tmp $(inst_gconvdir)/gconv-modules.cache && \
-	rm -f $$tmp
diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c
index 5b3d35ecaa..86852857a3 100644
--- a/iconv/iconv_prog.c
+++ b/iconv/iconv_prog.c
@@ -1,5 +1,5 @@
 /* Convert text in given files from the specified from-set to the to-set.
-   Copyright (C) 1998-2003, 2004, 2005 Free Software Foundation, Inc.
+   Copyright (C) 1998-2003, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
 
@@ -239,30 +239,26 @@ main (int argc, char *argv[])
 	      if (from_wrong)
 		{
 		  if (to_wrong)
-		    error (0, 0,
+		    error (EXIT_FAILURE, 0,
 			   _("\
 conversion from `%s' and to `%s' are not supported"),
 			   from_pretty, to_pretty);
 		  else
-		    error (0, 0,
+		    error (EXIT_FAILURE, 0,
 			   _("conversion from `%s' is not supported"),
 			   from_pretty);
 		}
 	      else
 		{
 		  if (to_wrong)
-		    error (0, 0,
+		    error (EXIT_FAILURE, 0,
 			   _("conversion to `%s' is not supported"),
 			   to_pretty);
 		  else
-		    error (0, 0,
+		    error (EXIT_FAILURE, 0,
 			   _("conversion from `%s' to `%s' is not supported"),
 			   from_pretty, to_pretty);
 		}
-
-	      argp_help (&argp, stderr, ARGP_HELP_SEE,
-			 program_invocation_short_name);
-	      exit (1);
 	    }
 	  else
 	    error (EXIT_FAILURE, errno,
@@ -428,7 +424,7 @@ print_version (FILE *stream, struct argp_state *state)
 Copyright (C) %s Free Software Foundation, Inc.\n\
 This is free software; see the source for copying conditions.  There is NO\n\
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
-"), "2005");
+"), "2004");
   fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper");
 }
 
diff --git a/iconv/iconvconfig.c b/iconv/iconvconfig.c
index 8c56d29bb0..0ed210b794 100644
--- a/iconv/iconvconfig.c
+++ b/iconv/iconvconfig.c
@@ -1,5 +1,5 @@
 /* Generate fastloading iconv module configuration files.
-   Copyright (C) 2000-2004, 2005 Free Software Foundation, Inc.
+   Copyright (C) 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@redhat.com>, 2000.
 
@@ -397,7 +397,7 @@ print_version (FILE *stream, struct argp_state *state)
 Copyright (C) %s Free Software Foundation, Inc.\n\
 This is free software; see the source for copying conditions.  There is NO\n\
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
-"), "2005");
+"), "2004");
   fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper");
 }
 
diff --git a/iconv/strtab.c b/iconv/strtab.c
index 4a8f29a675..e6feb2cf24 100644
--- a/iconv/strtab.c
+++ b/iconv/strtab.c
@@ -1,5 +1,5 @@
 /* C string table handling.
-   Copyright (C) 2000, 2001, 2005 Free Software Foundation, Inc.
+   Copyright (C) 2000, 2001 Free Software Foundation, Inc.
    Written by Ulrich Drepper <drepper@redhat.com>, 2000.
 
    This program is free software; you can redistribute it and/or modify
@@ -326,7 +326,7 @@ strtabfinalize (struct Strtab *st, size_t *size)
   copylen = 1;
   copystrings (st->root, &endp, &copylen);
   assert (copylen == st->total + 1);
-  assert (endp == retval + st->total + 1);
+  assert (endp = retval + st->total + 1);
   *size = copylen;
 
   return retval;
diff --git a/iconvdata/jis0208.h b/iconvdata/jis0208.h
index 8255bafc62..9dea38973c 100644
--- a/iconvdata/jis0208.h
+++ b/iconvdata/jis0208.h
@@ -1,5 +1,5 @@
 /* Access functions for JISX0208 conversion.
-   Copyright (C) 1997,1998,1999,2000,2003,2005 Free Software Foundation, Inc.
+   Copyright (C) 1997, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
 
@@ -24,15 +24,6 @@
 #include <gconv.h>
 #include <stdint.h>
 
-/* Struct for table with indeces in UCS mapping table.  */
-struct jisx0208_ucs_idx
-{
-  uint16_t start;
-  uint16_t end;
-  uint16_t idx;
-};
-
-
 /* Conversion table.  */
 extern const uint16_t __jis0208_to_ucs[];
 
@@ -42,6 +33,15 @@ extern const struct jisx0208_ucs_idx __jisx0208_from_ucs_idx[];
 extern const char __jisx0208_from_ucs_tab[][2];
 
 
+/* Struct for table with indeces in UCS mapping table.  */
+struct jisx0208_ucs_idx
+{
+  uint16_t start;
+  uint16_t end;
+  uint16_t idx;
+};
+
+
 static inline uint32_t
 __attribute ((always_inline))
 jisx0208_to_ucs4 (const unsigned char **s, size_t avail, unsigned char offset)
diff --git a/include/dlfcn.h b/include/dlfcn.h
index 460c037ed1..bfa1b9041b 100644
--- a/include/dlfcn.h
+++ b/include/dlfcn.h
@@ -7,24 +7,9 @@
 #define __RTLD_SPROF	0x40000000
 #define __RTLD_OPENEXEC	0x20000000
 #define __RTLD_CALLMAP	0x10000000
-#define __RTLD_AUDIT	0x08000000
 
 #define __LM_ID_CALLER	-2
 
-#ifdef SHARED
-/* Locally stored program arguments.  */
-extern int __dlfcn_argc attribute_hidden;
-extern char **__dlfcn_argv attribute_hidden;
-#else
-/* These variables are defined and initialized in the startup code.  */
-extern int __libc_argc attribute_hidden;
-extern char **__libc_argv attribute_hidden;
-
-# define __dlfcn_argc __libc_argc
-# define __dlfcn_argv __libc_argv
-#endif
-
-
 /* Now define the internal interfaces.  */
 
 #define __libc_dlopen(name) \
@@ -44,8 +29,18 @@ extern int _dl_addr (const void *address, Dl_info *info,
 libc_hidden_proto (_dl_addr)
 #endif
 
+/* Open the shared object NAME, relocate it, and run its initializer if it
+   hasn't already been run.  MODE is as for `dlopen' (see <dlfcn.h>).  If
+   the object is already opened, returns its existing map.  */
+extern void *_dl_open (const char *name, int mode, const void *caller,
+		       Lmid_t nsid)
+     internal_function;
+libc_hidden_proto (_dl_open)
+
 /* Close an object previously opened by _dl_open.  */
-extern void _dl_close (void *map) attribute_hidden;
+extern void _dl_close (void *map)
+     internal_function;
+libc_hidden_proto (_dl_close)
 
 /* Look up NAME in shared object HANDLE (which may be RTLD_DEFAULT or
    RTLD_NEXT).  WHO is the calling function, for RTLD_NEXT.  Returns
diff --git a/include/features.h b/include/features.h
index 5e6cca5c86..ff1de8f472 100644
--- a/include/features.h
+++ b/include/features.h
@@ -295,7 +295,7 @@
 /* Major and minor version number of the GNU C library package.  Use
    these macros to test for features in specific releases.  */
 #define	__GLIBC__	2
-#define	__GLIBC_MINOR__	4
+#define	__GLIBC_MINOR__	3
 
 #define __GLIBC_PREREQ(maj, min) \
 	((__GLIBC__ << 16) + __GLIBC_MINOR__ >= ((maj) << 16) + (min))
diff --git a/include/libc-symbols.h b/include/libc-symbols.h
index f3526ca053..dcc46cc80f 100644
--- a/include/libc-symbols.h
+++ b/include/libc-symbols.h
@@ -1,6 +1,6 @@
 /* Support macros for making weak and strong aliases for symbols,
    and for using symbol sets and linker warnings with GNU ld.
-   Copyright (C) 1995-1998,2000-2003,2004, 2005 Free Software Foundation, Inc.
+   Copyright (C) 1995-1998,2000-2003,2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -279,7 +279,6 @@
 
 /* A canned warning for sysdeps/stub functions.  */
 #define	stub_warning(name) \
-  __make_section_unallocated (".gnu.glibc-stub." #name) \
   link_warning (name, \
 		"warning: " #name " is not implemented and will always fail")
 
diff --git a/include/link.h b/include/link.h
index 965419126d..3078b72a87 100644
--- a/include/link.h
+++ b/include/link.h
@@ -1,6 +1,6 @@
 /* Data structure for communication from the run-time dynamic linker for
    loaded ELF shared objects.
-   Copyright (C) 1995-2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+   Copyright (C) 1995-2002, 2003, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -34,7 +34,6 @@
 
 #include <bits/elfclass.h>		/* Defines __ELF_NATIVE_CLASS.  */
 #include <bits/link.h>
-#include <bits/linkmap.h>
 #include <dl-lookupcfg.h>
 #include <tls.h>		/* Defines USE_TLS.  */
 
@@ -200,10 +199,6 @@ struct link_map
 				       should be called on this link map
 				       when relocation finishes.  */
     unsigned int l_used:1;	/* Nonzero if the DSO is used.  */
-    unsigned int l_auditing:1;	/* Nonzero if the DSO is used in auditing.  */
-    unsigned int l_audit_any_plt:1; /* Nonzero if at least one audit module
-				       is interested in the PLT interception.*/
-
     /* Array with version names.  */
     unsigned int l_nversions;
     struct r_found_version *l_versions;
@@ -212,14 +207,7 @@ struct link_map
     struct r_search_path_struct l_rpath_dirs;
 
     /* Collected results of relocation while profiling.  */
-    struct reloc_result
-    {
-      DL_FIXUP_VALUE_TYPE addr;
-      struct link_map *bound;
-      unsigned int boundndx;
-      uint32_t enterexit;
-      unsigned int flags;
-    } *l_reloc_result;
+    ElfW(Addr) *l_reloc_result;
 
     /* Pointer to the version information if available.  */
     ElfW(Versym) *l_versyms;
@@ -275,7 +263,11 @@ struct link_map
     {
       const ElfW(Sym) *sym;
       int type_class;
+#ifdef DL_LOOKUP_RETURNS_MAP
       struct link_map *value;
+#else
+      ElfW(Addr) value;
+#endif
       const ElfW(Sym) *ret;
     } l_lookup_cache;
 
@@ -305,64 +297,8 @@ struct link_map
        done.  */
     ElfW(Addr) l_relro_addr;
     size_t l_relro_size;
-
-    /* Audit information.  This array apparent must be the last in the
-       structure.  Never add something after it.  */
-    struct auditstate
-    {
-      uintptr_t cookie;
-      unsigned int bindflags;
-    } l_audit[0];
-  };
-
-/* Version numbers for la_version handshake interface.  */
-#define LAV_CURRENT	1
-
-/* Activity types signaled through la_activity.  */
-enum
-  {
-    LA_ACT_CONSISTENT,
-    LA_ACT_ADD,
-    LA_ACT_DELETE
-  };
-
-/* Values representing origin of name for dynamic loading.  */
-enum
-  {
-    LA_SER_ORIG = 0x01,		/* Original name.  */
-    LA_SER_LIBPATH = 0x02,	/* Directory from LD_LIBRARY_PATH.  */
-    LA_SER_RUNPATH = 0x04,	/* Directory from RPATH/RUNPATH.  */
-    LA_SER_CONFIG = 0x08,	/* Found through ldconfig.  */
-    LA_SER_DEFAULT = 0x40,	/* Default directory.  */
-    LA_SER_SECURE = 0x80	/* Unused.  */
-  };
-
-/* Values for la_objopen return value.  */
-enum
-  {
-    LA_FLG_BINDTO = 0x01,	/* Audit symbols bound to this object.  */
-    LA_FLG_BINDFROM = 0x02	/* Audit symbols bound from this object.  */
   };
 
-/* Values for la_symbind flags parameter.  */
-enum
-  {
-    LA_SYMB_NOPLTENTER = 0x01,	/* la_pltenter will not be called.  */
-    LA_SYMB_NOPLTEXIT = 0x02,	/* la_pltexit will not be called.  */
-    LA_SYMB_STRUCTCALL = 0x04,	/* Return value is a structure.  */
-    LA_SYMB_DLSYM = 0x08,	/* Binding due to dlsym call.  */
-    LA_SYMB_ALTVALUE = 0x10	/* Value has been changed by a previous
-				   la_symbind call.  */
-  };
-
-#if __ELF_NATIVE_CLASS == 32
-# define symbind symbind32
-#elif __ELF_NATIVE_CLASS == 64
-# define symbind symbind64
-#else
-# error "__ELF_NATIVE_CLASS must be defined"
-#endif
-
 struct dl_phdr_info
   {
     ElfW(Addr) dlpi_addr;
diff --git a/include/signal.h b/include/signal.h
index dc1e0a12e5..104ea8f83a 100644
--- a/include/signal.h
+++ b/include/signal.h
@@ -48,9 +48,6 @@ extern int __sigpause (int sig_or_mask, int is_sig);
 extern int __default_sigpause (int mask);
 extern int __xpg_sigpause (int sig);
 
-/* Simplified sigemptyset() implementation without the parameter checking.  */
-#undef __sigemptyset
-#define __sigemptyset(ss) (memset (ss, '\0', sizeof (sigset_t)), 0)
 
 
 /* Allocate real-time signal with highest/lowest available priority.  */
diff --git a/include/stub-tag.h b/include/stub-tag.h
index d4d10c8341..3004d6f429 100644
--- a/include/stub-tag.h
+++ b/include/stub-tag.h
@@ -1,4 +1,5 @@
 /* This header is included into every file that declares a stub function.
-   It is obsolete now, but hasn't died to avoid removing all those
-   #include's.  Every such file should use the `stub_warning' macro for
-   each of its functions, and that is sufficient.  */
+ * The build process looks for this header in .d files to decide whether
+ * or not it needs to scan the corresponding .c file for entries to add to
+ * <gnu/stubs.h>.
+ */
diff --git a/include/stubs-biarch.h b/include/stubs-biarch.h
deleted file mode 100644
index fc086bd894..0000000000
--- a/include/stubs-biarch.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* This file selects the right generated file of `__stub_FUNCTION' macros
-   based on the architecture being compiled for.  */
-
-#include <bits/wordsize.h>
-
-#if __WORDSIZE == 32
-# include <gnu/stubs-32.h>
-#elif __WORDSIZE == 64
-# include <gnu/stubs-64.h>
-#else
-# error "unexpected value for __WORDSIZE macro"
-#endif
diff --git a/libio/fmemopen.c b/libio/fmemopen.c
index 7c8769a130..ab6ffdd678 100644
--- a/libio/fmemopen.c
+++ b/libio/fmemopen.c
@@ -1,5 +1,5 @@
 /* Fmemopen implementation.
-   Copyright (C) 2000, 2002, 2005 Free Software Foundation, Inc.
+   Copyright (C) 2000, 2002 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by  Hanno Mueller, kontakt@hanno.de, 2000.
 
@@ -27,6 +27,8 @@
  * but couldn't find it in libio. The following snippet of code is an
  * attempt to implement what glibc's documentation describes.
  *
+ * No, it isn't really tested yet. :-)
+ *
  *
  *
  * I already see some potential problems:
@@ -71,7 +73,6 @@
 #include <libio.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <stdint.h>
 #include <string.h>
 #include <sys/types.h>
 #include "libioP.h"
@@ -165,7 +166,7 @@ fmemopen_seek (void *cookie, _IO_off64_t *p, int w)
       break;
 
     case SEEK_END:
-      np = c->maxpos - *p;
+      np = c->size - *p;
       break;
 
     default:
@@ -175,9 +176,9 @@ fmemopen_seek (void *cookie, _IO_off64_t *p, int w)
   if (np < 0 || (size_t) np > c->size)
     return -1;
 
-  *p = c->pos = np;
+  c->pos = np;
 
-  return 0;
+  return np;
 }
 
 
@@ -202,13 +203,6 @@ fmemopen (void *buf, size_t len, const char *mode)
   cookie_io_functions_t iof;
   fmemopen_cookie_t *c;
 
-  if (len == 0)
-    {
-    einval:
-      __set_errno (EINVAL);
-      return NULL;
-    }
-
   c = (fmemopen_cookie_t *) malloc (sizeof (fmemopen_cookie_t));
   if (c == NULL)
     return NULL;
@@ -226,12 +220,7 @@ fmemopen (void *buf, size_t len, const char *mode)
       c->buffer[0] = '\0';
     }
   else
-    {
-      if ((uintptr_t) len > -(uintptr_t) buf)
-	goto einval;
-
-      c->buffer = buf;
-    }
+    c->buffer = buf;
 
   c->size = len;
 
diff --git a/libio/iofopncook.c b/libio/iofopncook.c
index 9c5503d1f2..321eb67b8d 100644
--- a/libio/iofopncook.c
+++ b/libio/iofopncook.c
@@ -1,5 +1,4 @@
-/* Copyright (C) 1993,95,97,99,2000,2002,2004, 2005
-   Free Software Foundation, Inc.
+/* Copyright (C) 1993,95,97,99,2000,2002,2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -37,8 +36,6 @@ static _IO_ssize_t _IO_cookie_read (register _IO_FILE* fp, void* buf,
 static _IO_ssize_t _IO_cookie_write (register _IO_FILE* fp,
 				     const void* buf, _IO_ssize_t size);
 static _IO_off64_t _IO_cookie_seek (_IO_FILE *fp, _IO_off64_t offset, int dir);
-static _IO_off64_t _IO_cookie_seekoff (_IO_FILE *fp, _IO_off64_t offset,
-				       int dir, int mode);
 static int _IO_cookie_close (_IO_FILE* fp);
 
 static _IO_ssize_t
@@ -97,20 +94,6 @@ _IO_cookie_close (fp)
 }
 
 
-static _IO_off64_t
-_IO_cookie_seekoff (fp, offset, dir, mode)
-     _IO_FILE *fp;
-     _IO_off64_t offset;
-     int dir;
-     int mode;
-{
-  /* We must force the fileops code to always use seek to determine
-     the position.  */
-  fp->_offset = _IO_pos_BAD;
-  return INTUSE(_IO_file_seekoff) (fp, offset, dir, mode);
-}
-
-
 static const struct _IO_jump_t _IO_cookie_jumps = {
   JUMP_INIT_DUMMY,
   JUMP_INIT(finish, INTUSE(_IO_file_finish)),
@@ -120,7 +103,7 @@ static const struct _IO_jump_t _IO_cookie_jumps = {
   JUMP_INIT(pbackfail, INTUSE(_IO_default_pbackfail)),
   JUMP_INIT(xsputn, INTUSE(_IO_file_xsputn)),
   JUMP_INIT(xsgetn, INTUSE(_IO_default_xsgetn)),
-  JUMP_INIT(seekoff, _IO_cookie_seekoff),
+  JUMP_INIT(seekoff, INTUSE(_IO_file_seekoff)),
   JUMP_INIT(seekpos, _IO_default_seekpos),
   JUMP_INIT(setbuf, INTUSE(_IO_file_setbuf)),
   JUMP_INIT(sync, INTUSE(_IO_file_sync)),
@@ -240,7 +223,7 @@ static const struct _IO_jump_t _IO_old_cookie_jumps = {
   JUMP_INIT(pbackfail, INTUSE(_IO_default_pbackfail)),
   JUMP_INIT(xsputn, INTUSE(_IO_file_xsputn)),
   JUMP_INIT(xsgetn, INTUSE(_IO_default_xsgetn)),
-  JUMP_INIT(seekoff, _IO_cookie_seekoff),
+  JUMP_INIT(seekoff, INTUSE(_IO_file_seekoff)),
   JUMP_INIT(seekpos, _IO_default_seekpos),
   JUMP_INIT(setbuf, INTUSE(_IO_file_setbuf)),
   JUMP_INIT(sync, INTUSE(_IO_file_sync)),
diff --git a/libio/stdio.h b/libio/stdio.h
index ac42b3e2ac..941a2afc74 100644
--- a/libio/stdio.h
+++ b/libio/stdio.h
@@ -1,5 +1,5 @@
 /* Define ISO C stdio on top of C++ iostreams.
-   Copyright (C) 1991,1994-2004, 2005 Free Software Foundation, Inc.
+   Copyright (C) 1991,1994-2002,2003,2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -393,15 +393,15 @@ __BEGIN_NAMESPACE_STD
    This function is a possible cancellation point and therefore not
    marked with __THROW.  */
 extern int fscanf (FILE *__restrict __stream,
-		   __const char *__restrict __format, ...) __wur;
+		   __const char *__restrict __format, ...);
 /* Read formatted input from stdin.
 
    This function is a possible cancellation point and therefore not
    marked with __THROW.  */
-extern int scanf (__const char *__restrict __format, ...) __wur;
+extern int scanf (__const char *__restrict __format, ...);
 /* Read formatted input from S.  */
 extern int sscanf (__const char *__restrict __s,
-		   __const char *__restrict __format, ...) __THROW __wur;
+		   __const char *__restrict __format, ...) __THROW;
 __END_NAMESPACE_STD
 
 #ifdef	__USE_ISOC99
@@ -412,19 +412,19 @@ __BEGIN_NAMESPACE_C99
    marked with __THROW.  */
 extern int vfscanf (FILE *__restrict __s, __const char *__restrict __format,
 		    _G_va_list __arg)
-     __attribute__ ((__format__ (__scanf__, 2, 0))) __wur;
+     __attribute__ ((__format__ (__scanf__, 2, 0)));
 
 /* Read formatted input from stdin into argument list ARG.
 
    This function is a possible cancellation point and therefore not
    marked with __THROW.  */
 extern int vscanf (__const char *__restrict __format, _G_va_list __arg)
-     __attribute__ ((__format__ (__scanf__, 1, 0))) __wur;
+     __attribute__ ((__format__ (__scanf__, 1, 0)));
 
 /* Read formatted input from S into argument list ARG.  */
 extern int vsscanf (__const char *__restrict __s,
 		    __const char *__restrict __format, _G_va_list __arg)
-     __THROW __attribute__ ((__format__ (__scanf__, 2, 0))) __wur;
+     __THROW __attribute__ ((__format__ (__scanf__, 2, 0)));
 __END_NAMESPACE_C99
 #endif /* Use ISO C9x.  */
 
@@ -525,15 +525,14 @@ __BEGIN_NAMESPACE_STD
 
    This function is a possible cancellation point and therefore not
    marked with __THROW.  */
-extern char *fgets (char *__restrict __s, int __n, FILE *__restrict __stream)
-     __wur;
+extern char *fgets (char *__restrict __s, int __n, FILE *__restrict __stream);
 
 /* Get a newline-terminated string from stdin, removing the newline.
    DO NOT USE THIS FUNCTION!!  There is no limit on how much it will read.
 
    This function is a possible cancellation point and therefore not
    marked with __THROW.  */
-extern char *gets (char *__s) __wur;
+extern char *gets (char *__s);
 __END_NAMESPACE_STD
 
 #ifdef __USE_GNU
@@ -544,7 +543,7 @@ __END_NAMESPACE_STD
    or due to the implementation it is a cancellation point and
    therefore not marked with __THROW.  */
 extern char *fgets_unlocked (char *__restrict __s, int __n,
-			     FILE *__restrict __stream) __wur;
+			     FILE *__restrict __stream);
 #endif
 
 
@@ -561,10 +560,10 @@ extern char *fgets_unlocked (char *__restrict __s, int __n,
    therefore not marked with __THROW.  */
 extern _IO_ssize_t __getdelim (char **__restrict __lineptr,
 			       size_t *__restrict __n, int __delimiter,
-			       FILE *__restrict __stream) __wur;
+			       FILE *__restrict __stream);
 extern _IO_ssize_t getdelim (char **__restrict __lineptr,
 			     size_t *__restrict __n, int __delimiter,
-			     FILE *__restrict __stream) __wur;
+			     FILE *__restrict __stream);
 
 /* Like `getdelim', but reads up to a newline.
 
@@ -574,7 +573,7 @@ extern _IO_ssize_t getdelim (char **__restrict __lineptr,
    therefore not marked with __THROW.  */
 extern _IO_ssize_t getline (char **__restrict __lineptr,
 			    size_t *__restrict __n,
-			    FILE *__restrict __stream) __wur;
+			    FILE *__restrict __stream);
 #endif
 
 
@@ -596,7 +595,7 @@ extern int puts (__const char *__s);
 
    This function is a possible cancellation points and therefore not
    marked with __THROW.  */
-extern int ungetc (int __c, FILE *__stream) __wur;
+extern int ungetc (int __c, FILE *__stream);
 
 
 /* Read chunks of generic data from STREAM.
@@ -604,13 +603,13 @@ extern int ungetc (int __c, FILE *__stream) __wur;
    This function is a possible cancellation points and therefore not
    marked with __THROW.  */
 extern size_t fread (void *__restrict __ptr, size_t __size,
-		     size_t __n, FILE *__restrict __stream) __wur;
+		     size_t __n, FILE *__restrict __stream);
 /* Write chunks of generic data to STREAM.
 
    This function is a possible cancellation points and therefore not
    marked with __THROW.  */
 extern size_t fwrite (__const void *__restrict __ptr, size_t __size,
-		      size_t __n, FILE *__restrict __s) __wur;
+		      size_t __n, FILE *__restrict __s);
 __END_NAMESPACE_STD
 
 #ifdef __USE_GNU
@@ -632,9 +631,9 @@ extern int fputs_unlocked (__const char *__restrict __s,
    or due to the implementation they are cancellation points and
    therefore not marked with __THROW.  */
 extern size_t fread_unlocked (void *__restrict __ptr, size_t __size,
-			      size_t __n, FILE *__restrict __stream) __wur;
+			      size_t __n, FILE *__restrict __stream);
 extern size_t fwrite_unlocked (__const void *__restrict __ptr, size_t __size,
-			       size_t __n, FILE *__restrict __stream) __wur;
+			       size_t __n, FILE *__restrict __stream);
 #endif
 
 
@@ -648,7 +647,7 @@ extern int fseek (FILE *__stream, long int __off, int __whence);
 
    This function is a possible cancellation point and therefore not
    marked with __THROW.  */
-extern long int ftell (FILE *__stream) __wur;
+extern long int ftell (FILE *__stream);
 /* Rewind to the beginning of STREAM.
 
    This function is a possible cancellation point and therefore not
@@ -672,7 +671,7 @@ extern int fseeko (FILE *__stream, __off_t __off, int __whence);
 
    This function is a possible cancellation point and therefore not
    marked with __THROW.  */
-extern __off_t ftello (FILE *__stream) __wur;
+extern __off_t ftello (FILE *__stream);
 # else
 #  ifdef __REDIRECT
 extern int __REDIRECT (fseeko,
@@ -713,7 +712,7 @@ __END_NAMESPACE_STD
 
 #ifdef __USE_LARGEFILE64
 extern int fseeko64 (FILE *__stream, __off64_t __off, int __whence);
-extern __off64_t ftello64 (FILE *__stream) __wur;
+extern __off64_t ftello64 (FILE *__stream);
 extern int fgetpos64 (FILE *__restrict __stream, fpos64_t *__restrict __pos);
 extern int fsetpos64 (FILE *__stream, __const fpos64_t *__pos);
 #endif
@@ -811,7 +810,7 @@ extern void flockfile (FILE *__stream) __THROW;
 
 /* Try to acquire ownership of STREAM but do not block if it is not
    possible.  */
-extern int ftrylockfile (FILE *__stream) __THROW __wur;
+extern int ftrylockfile (FILE *__stream) __THROW;
 
 /* Relinquish the ownership granted for STREAM.  */
 extern void funlockfile (FILE *__stream) __THROW;
diff --git a/linuxthreads/ChangeLog b/linuxthreads/ChangeLog
index 4cbcfbb7a3..9577130e10 100644
--- a/linuxthreads/ChangeLog
+++ b/linuxthreads/ChangeLog
@@ -1,38 +1,3 @@
-2005-02-09  Daniel Jacobowitz  <dan@codesourcery.com>
-
-	* descr.h (__pthread_find_self, thread_self): Mark as pure
-	instead of const.
-
-2005-02-07  Jakub Jelinek  <jakub@redhat.com>
-
-	* specific.c (pthread_key_delete): If pthread_create has not been
-	called yet, clear p_specific for the current thread.
-	* Makefile (tests): Add tst-tsd1.
-	* tst-tsd1.c: New test.
-
-2005-01-08  Andreas Jaeger  <aj@suse.de>
-
-	* pthread.c (init_one_static_tls): Adjust initialization of DTV
-	entry for static tls deallocation fix.
-
-	* sysdeps/alpha/tls.h (dtv_t): Change pointer type to be struct which
-	also contains information whether the memory pointed to is static
-	TLS or not, include <stdbool.h>.
-	* sysdeps/i386/tls.h: Likewise.
-	* sysdeps/ia64/tls.h: Likewise.
-	* sysdeps/powerpc/tls.h: Likewise.
-	* sysdeps/s390/tls.h: Likewise.
-	* sysdeps/sh/tls.h: Likewise.
-	* sysdeps/sparc/tls.h: Likewise.
-	* sysdeps/x86_64/tls.h: Likewise.
-
-2004-12-21  Jakub Jelinek  <jakub@redhat.com>
-
-	* Makefile (tests): Add tst-align.
-	* tst-align.c: New test.
-	* sysdeps/i386/Makefile (CFLAGS-tst-align.c): Add
-	-mpreferred-stack-boundary=4.
-
 2004-12-12  Ulrich Drepper  <drepper@redhat.com>
 
 	* internals.h: Include <stdbool.h> to match includes used in nptl.
diff --git a/linuxthreads/Makefile b/linuxthreads/Makefile
index c2b548b38b..f4c9f2a916 100644
--- a/linuxthreads/Makefile
+++ b/linuxthreads/Makefile
@@ -1,4 +1,4 @@
-# Copyright (C) 1996-2003, 2004, 2005 Free Software Foundation, Inc.
+# Copyright (C) 1996-2003, 2004 Free Software Foundation, Inc.
 # This file is part of the GNU C Library.
 
 # The GNU C Library is free software; you can redistribute it and/or
@@ -111,7 +111,7 @@ tests = ex1 ex2 ex3 ex4 ex5 ex6 ex7 ex8 ex9 $(librt-tests) ex12 ex13 joinrace \
 	ex17 ex18 tst-cancel tst-context bug-sleep \
 	tst-cancel1 tst-cancel2 tst-cancel3 tst-cancel4 tst-cancel5 \
 	tst-cancel6 tst-cancel7 tst-cancel8 tst-popen tst-popen2 tst-attr1 \
-	tst-stack1 tst-align tst-tsd1
+	tst-stack1
 test-srcs = tst-signal
 # These tests are linked with libc before libpthread
 tests-reverse += tst-cancel5
diff --git a/linuxthreads/descr.h b/linuxthreads/descr.h
index 2b1e49a0ba..bea8b912f7 100644
--- a/linuxthreads/descr.h
+++ b/linuxthreads/descr.h
@@ -239,9 +239,9 @@ extern int __pthread_nonstandard_stacks;
 
 /* Recover thread descriptor for the current thread */
 
-extern pthread_descr __pthread_find_self (void) __attribute__ ((pure));
+extern pthread_descr __pthread_find_self (void) __attribute__ ((const));
 
-static inline pthread_descr thread_self (void) __attribute__ ((pure));
+static inline pthread_descr thread_self (void) __attribute__ ((const));
 static inline pthread_descr thread_self (void)
 {
 #ifdef THREAD_SELF
diff --git a/linuxthreads/pthread.c b/linuxthreads/pthread.c
index 39863f2b54..24f0eb02b0 100644
--- a/linuxthreads/pthread.c
+++ b/linuxthreads/pthread.c
@@ -482,8 +482,7 @@ init_one_static_tls (pthread_descr descr, struct link_map *map)
 # endif
 
   /* Fill in the DTV slot so that a later LD/GD access will find it.  */
-  dtv[map->l_tls_modid].pointer.val = dest;
-  dtv[map->l_tls_modid].pointer.is_static = true;
+  dtv[map->l_tls_modid].pointer = dest;
 
   /* Initialize the memory.  */
   memset (__mempcpy (dest, map->l_tls_initimage, map->l_tls_initimage_size),
diff --git a/linuxthreads/specific.c b/linuxthreads/specific.c
index 92eec3d99a..f54fabaeb9 100644
--- a/linuxthreads/specific.c
+++ b/linuxthreads/specific.c
@@ -104,16 +104,15 @@ int pthread_key_delete(pthread_key_t key)
      that if the key is reallocated later by pthread_key_create, its
      associated values will be NULL in all threads.
 
-     If no threads have been created yet, clear it just in the
-     current thread.  */
+     Do nothing if no threads have been created yet.  */
 
-  struct pthread_key_delete_helper_args args;
-  args.idx1st = key / PTHREAD_KEY_2NDLEVEL_SIZE;
-  args.idx2nd = key % PTHREAD_KEY_2NDLEVEL_SIZE;
   if (__pthread_manager_request != -1)
     {
+      struct pthread_key_delete_helper_args args;
       struct pthread_request request;
 
+      args.idx1st = key / PTHREAD_KEY_2NDLEVEL_SIZE;
+      args.idx2nd = key % PTHREAD_KEY_2NDLEVEL_SIZE;
       args.self = 0;
 
       request.req_thread = self;
@@ -125,11 +124,6 @@ int pthread_key_delete(pthread_key_t key)
 					  (char *) &request, sizeof(request)));
       suspend(self);
     }
-  else
-    {
-      if (self->p_specific[args.idx1st] != NULL)
-	self->p_specific[args.idx1st][args.idx2nd] = NULL;
-    }
 
   pthread_mutex_unlock(&pthread_keys_mutex);
   return 0;
diff --git a/linuxthreads/sysdeps/alpha/tls.h b/linuxthreads/sysdeps/alpha/tls.h
index 1eabf5b890..9e15318b79 100644
--- a/linuxthreads/sysdeps/alpha/tls.h
+++ b/linuxthreads/sysdeps/alpha/tls.h
@@ -1,5 +1,5 @@
 /* Definitions for thread-local data handling.  linuxthreads/Alpha version.
-   Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
+   Copyright (C) 2002, 2003 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -23,18 +23,13 @@
 #ifndef __ASSEMBLER__
 
 # include <pt-machine.h>
-# include <stdbool.h>
 # include <stddef.h>
 
 /* Type for the dtv.  */
 typedef union dtv
 {
   size_t counter;
-  struct
-  {
-    void *val;
-    bool is_static;
-  } pointer;
+  void *pointer;
 } dtv_t;
 
 
diff --git a/linuxthreads/sysdeps/i386/Makefile b/linuxthreads/sysdeps/i386/Makefile
index 418fa5c6ef..45183d1cd3 100644
--- a/linuxthreads/sysdeps/i386/Makefile
+++ b/linuxthreads/sysdeps/i386/Makefile
@@ -15,7 +15,6 @@ CFLAGS-pthread.c += -fno-omit-frame-pointer -mpreferred-stack-boundary=4
 CFLAGS-ptlongjmp.c += -fno-omit-frame-pointer
 CFLAGS-semaphore.c += -fno-omit-frame-pointer
 CFLAGS-sighandler.c += -fno-omit-frame-pointer -mpreferred-stack-boundary=4
-CFLAGS-tst-align.c += -mpreferred-stack-boundary=4
 endif
 
 ifeq ($(subdir),csu)
diff --git a/linuxthreads/sysdeps/i386/tls.h b/linuxthreads/sysdeps/i386/tls.h
index f5d555baec..d296340ad8 100644
--- a/linuxthreads/sysdeps/i386/tls.h
+++ b/linuxthreads/sysdeps/i386/tls.h
@@ -1,5 +1,5 @@
 /* Definition for thread-local data handling.  linuxthreads/i386 version.
-   Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+   Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -24,7 +24,6 @@
 # include <pt-machine.h>
 
 #ifndef __ASSEMBLER__
-# include <stdbool.h>
 # include <stddef.h>
 # include <stdint.h>
 
@@ -32,11 +31,7 @@
 typedef union dtv
 {
   size_t counter;
-  struct
-  {
-    void *val;
-    bool is_static;
-  } pointer;
+  void *pointer;
 } dtv_t;
 
 
diff --git a/linuxthreads/sysdeps/ia64/tls.h b/linuxthreads/sysdeps/ia64/tls.h
index fe8fcd15f8..6bdf819d8d 100644
--- a/linuxthreads/sysdeps/ia64/tls.h
+++ b/linuxthreads/sysdeps/ia64/tls.h
@@ -1,5 +1,5 @@
 /* Definitions for thread-local data handling.  linuxthreads/IA-64 version.
-   Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+   Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -24,18 +24,13 @@
 
 # include <dl-sysdep.h>
 # include <pt-machine.h>
-# include <stdbool.h>
 # include <stddef.h>
 
 /* Type for the dtv.  */
 typedef union dtv
 {
   size_t counter;
-  struct
-  {
-    void *val;
-    bool is_static;
-  } pointer;
+  void *pointer;
 } dtv_t;
 
 #else /* __ASSEMBLER__ */
diff --git a/linuxthreads/sysdeps/powerpc/tls.h b/linuxthreads/sysdeps/powerpc/tls.h
index 35472081d6..fda953976e 100644
--- a/linuxthreads/sysdeps/powerpc/tls.h
+++ b/linuxthreads/sysdeps/powerpc/tls.h
@@ -1,5 +1,5 @@
 /* Definitions for thread-local data handling.  linuxthreads/PPC version.
-   Copyright (C) 2003, 2005 Free Software Foundation, Inc.
+   Copyright (C) 2003 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -23,18 +23,13 @@
 #ifndef __ASSEMBLER__
 
 # include <pt-machine.h>
-# include <stdbool.h>
 # include <stddef.h>
 
 /* Type for the dtv.  */
 typedef union dtv
 {
   size_t counter;
-  struct
-  {
-    void *val;
-    bool is_static;
-  } pointer;
+  void *pointer;
 } dtv_t;
 
 #else /* __ASSEMBLER__ */
@@ -112,9 +107,9 @@ typedef struct
 /* Code to initially initialize the thread pointer.  This might need
    special attention since 'errno' is not yet available and if the
    operation can cause a failure 'errno' must not be touched.
-
-   The global register variable is declared in pt-machine.h with the
-   wrong type, so we need some extra casts to get the desired result.
+   
+   The global register variable is declared in pt-machine.h with the 
+   wrong type, so we need some extra casts to get the desired result.  
    This avoids a lvalue cast that gcc-3.4 does not like.  */
 # define TLS_INIT_TP(TCBP, SECONDCALL) \
     (__thread_self = (struct _pthread_descr_struct *) \
diff --git a/linuxthreads/sysdeps/s390/tls.h b/linuxthreads/sysdeps/s390/tls.h
index 00a4df9382..f420195bd2 100644
--- a/linuxthreads/sysdeps/s390/tls.h
+++ b/linuxthreads/sysdeps/s390/tls.h
@@ -1,5 +1,5 @@
 /* Definitions for thread-local data handling.  linuxthreads/s390 version.
-   Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
+   Copyright (C) 2002, 2003 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -23,18 +23,13 @@
 #ifndef __ASSEMBLER__
 
 # include <pt-machine.h>
-# include <stdbool.h>
 # include <stddef.h>
 
 /* Type for the dtv.  */
 typedef union dtv
 {
   size_t counter;
-  struct
-  {
-    void *val;
-    bool is_static;
-  } pointer;
+  void *pointer;
 } dtv_t;
 
 typedef struct
diff --git a/linuxthreads/sysdeps/sh/tls.h b/linuxthreads/sysdeps/sh/tls.h
index fbc6f01151..bc9d44fa85 100644
--- a/linuxthreads/sysdeps/sh/tls.h
+++ b/linuxthreads/sysdeps/sh/tls.h
@@ -1,5 +1,5 @@
 /* Definition for thread-local data handling.  linuxthreads/SH version.
-   Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
+   Copyright (C) 2002, 2003 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -24,7 +24,6 @@
 # include <pt-machine.h>
 
 #ifndef __ASSEMBLER__
-# include <stdbool.h>
 # include <stddef.h>
 # include <stdint.h>
 
@@ -32,11 +31,7 @@
 typedef union dtv
 {
   size_t counter;
-  struct
-  {
-    void *val;
-    bool is_static;
-  } pointer;
+  void *pointer;
 } dtv_t;
 
 #else /* __ASSEMBLER__ */
diff --git a/linuxthreads/sysdeps/sparc/tls.h b/linuxthreads/sysdeps/sparc/tls.h
index 3e550fa7c7..5053824058 100644
--- a/linuxthreads/sysdeps/sparc/tls.h
+++ b/linuxthreads/sysdeps/sparc/tls.h
@@ -1,5 +1,5 @@
 /* Definitions for thread-local data handling.  linuxthreads/sparc version.
-   Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
+   Copyright (C) 2002, 2003 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -23,18 +23,13 @@
 #ifndef __ASSEMBLER__
 
 # include <pt-machine.h>
-# include <stdbool.h>
 # include <stddef.h>
 
 /* Type for the dtv.  */
 typedef union dtv
 {
   size_t counter;
-  struct
-  {
-    void *val;
-    bool is_static;
-  } pointer;
+  void *pointer;
 } dtv_t;
 
 typedef struct
diff --git a/linuxthreads/sysdeps/x86_64/tls.h b/linuxthreads/sysdeps/x86_64/tls.h
index 67d80b28d9..d979bbd896 100644
--- a/linuxthreads/sysdeps/x86_64/tls.h
+++ b/linuxthreads/sysdeps/x86_64/tls.h
@@ -1,5 +1,5 @@
 /* Definitions for thread-local data handling.  linuxthreads/x86-64 version.
-   Copyright (C) 2002, 2005 Free Software Foundation, Inc.
+   Copyright (C) 2002 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -23,18 +23,13 @@
 #ifndef __ASSEMBLER__
 
 # include <pt-machine.h>
-# include <stdbool.h>
 # include <stddef.h>
 
 /* Type for the dtv.  */
 typedef union dtv
 {
   size_t counter;
-  struct
-  {
-    void *val;
-    bool is_static;
-  } pointer;
+  void *pointer;
 } dtv_t;
 
 
diff --git a/linuxthreads_db/ChangeLog b/linuxthreads_db/ChangeLog
index 4bda6fb47a..338bf8207f 100644
--- a/linuxthreads_db/ChangeLog
+++ b/linuxthreads_db/ChangeLog
@@ -1,7 +1,3 @@
-2005-01-09  Andreas Jaeger  <aj@suse.de>
-
-	* td_thr_tlsbase.c (td_thr_tlsbase): Adjust for dtv change.
-
 2004-05-01  Jakub Jelinek  <jakub@redhat.com>
 
 	* thread_dbP.h (LOG): Use write instead of __libc_write.
diff --git a/linuxthreads_db/td_thr_tlsbase.c b/linuxthreads_db/td_thr_tlsbase.c
index 5a7e31b9e1..081e8d0e70 100644
--- a/linuxthreads_db/td_thr_tlsbase.c
+++ b/linuxthreads_db/td_thr_tlsbase.c
@@ -1,5 +1,5 @@
 /* Locate TLS data for a thread.
-   Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
+   Copyright (C) 2003, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -59,10 +59,10 @@ td_thr_tlsbase (const td_thrhandle_t *th,
 
   /* It could be that the memory for this module is not allocated for
      the given thread.  */
-  if (pdtv.pointer.val == TLS_DTV_UNALLOCATED)
+  if (pdtv.pointer == TLS_DTV_UNALLOCATED)
     return TD_TLSDEFER;
 
-  *base = (char *) pdtv.pointer.val;
+  *base = (char *) pdtv.pointer;
 
   return TD_OK;
 #else
diff --git a/locale/programs/locale.c b/locale/programs/locale.c
index d64bb0b560..adf9944578 100644
--- a/locale/programs/locale.c
+++ b/locale/programs/locale.c
@@ -1,5 +1,5 @@
 /* Implementation of the locale program according to POSIX 9945-2.
-   Copyright (C) 1995-1997, 1999-2004, 2005 Free Software Foundation, Inc.
+   Copyright (C) 1995-1997, 1999-2003, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1995.
 
@@ -277,7 +277,7 @@ print_version (FILE *stream, struct argp_state *state)
 Copyright (C) %s Free Software Foundation, Inc.\n\
 This is free software; see the source for copying conditions.  There is NO\n\
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
-"), "2005");
+"), "2004");
   fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper");
 }
 
diff --git a/locale/programs/localedef.c b/locale/programs/localedef.c
index 326113c175..28cb7b316e 100644
--- a/locale/programs/localedef.c
+++ b/locale/programs/localedef.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1995-2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+/* Copyright (C) 1995-2002, 2003, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1995.
 
@@ -389,7 +389,7 @@ print_version (FILE *stream, struct argp_state *state)
 Copyright (C) %s Free Software Foundation, Inc.\n\
 This is free software; see the source for copying conditions.  There is NO\n\
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
-"), "2005");
+"), "2004");
   fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper");
 }
 
diff --git a/localedata/ChangeLog b/localedata/ChangeLog
index 8c8616e7cf..42ce0637ae 100644
--- a/localedata/ChangeLog
+++ b/localedata/ChangeLog
@@ -1,7 +1,3 @@
-2004-12-19  Roland McGrath  <roland@frob.com>
-
-	* gen-unicode-ctype.c (output_tables): Fix email address in output.
-
 2004-10-02  Petter Reinholdtsen  <pere@hungry.com>
 
 	[BZ #82]
diff --git a/localedata/gen-unicode-ctype.c b/localedata/gen-unicode-ctype.c
index 849f272ed5..a9c51b3f48 100644
--- a/localedata/gen-unicode-ctype.c
+++ b/localedata/gen-unicode-ctype.c
@@ -638,7 +638,7 @@ output_tables (const char *filename, const char *version)
   fprintf (stream, "source    \"UnicodeData.txt, PropList.txt\"\n");
   fprintf (stream, "address   \"\"\n");
   fprintf (stream, "contact   \"\"\n");
-  fprintf (stream, "email     \"bug-glibc-locales@gnu.org\"\n");
+  fprintf (stream, "email     \"bug-glibc@gnu.org\"\n");
   fprintf (stream, "tel       \"\"\n");
   fprintf (stream, "fax       \"\"\n");
   fprintf (stream, "language  \"\"\n");
diff --git a/malloc/malloc.c b/malloc/malloc.c
index 4d2169bf78..e3ccbde7b5 100644
--- a/malloc/malloc.c
+++ b/malloc/malloc.c
@@ -1,5 +1,5 @@
   /* Malloc implementation for multiple threads without lock contention.
-   Copyright (C) 1996-2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+   Copyright (C) 1996-2002, 2003, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Wolfram Gloger <wg@malloc.de>
    and Doug Lea <dl@cs.oswego.edu>, 2001.
@@ -5508,8 +5508,6 @@ int mALLOPt(param_number, value) int param_number; int value;
 
 /* Helper code.  */
 
-extern char **__libc_argv attribute_hidden;
-
 static void
 malloc_printerr(int action, const char *str, void *ptr)
 {
@@ -5524,8 +5522,8 @@ malloc_printerr(int action, const char *str, void *ptr)
 
       __libc_message (action & 2,
 		      action & 4
-		      ? "%s\n" : "*** glibc detected *** %s: %s: 0x%s ***\n",
-		      __libc_argv[0] ?: "<unknown>", str, cp);
+		      ? "%s\n" : "*** glibc detected *** %s: 0x%s ***\n",
+		      str, cp);
     }
   else if (action & 2)
     abort ();
diff --git a/malloc/malloc.h b/malloc/malloc.h
index 0f99e837c3..753539e7b0 100644
--- a/malloc/malloc.h
+++ b/malloc/malloc.h
@@ -1,5 +1,5 @@
 /* Prototypes and definition for malloc implementation.
-   Copyright (C) 1996,97,99,2000,2002-2004,2005 Free Software Foundation, Inc.
+   Copyright (C) 1996,97,99,2000,2002,2003,2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -20,16 +20,59 @@
 #ifndef _MALLOC_H
 #define _MALLOC_H 1
 
+#ifdef _LIBC
 #include <features.h>
-#include <stddef.h>
+#endif
+
+/*
+  $Id$
+  `ptmalloc2', a malloc implementation for multiple threads without
+  lock contention, by Wolfram Gloger <wg@malloc.de>.
+
+  VERSION 2.7.0
+
+  This work is mainly derived from malloc-2.7.0 by Doug Lea
+  <dl@cs.oswego.edu>, which is available from:
+
+                 ftp://gee.cs.oswego.edu/pub/misc/malloc.c
+
+  This trimmed-down header file only provides function prototypes and
+  the exported data structures.  For more detailed function
+  descriptions and compile-time options, see the source file
+  `malloc.c'.
+*/
+
+#if defined(__STDC__) || defined (__cplusplus)
+# include <stddef.h>
 # define __malloc_ptr_t  void *
+#else
+# undef  size_t
+# define size_t          unsigned int
+# undef  ptrdiff_t
+# define ptrdiff_t       int
+# define __malloc_ptr_t  char *
+#endif
 
+#ifdef _LIBC
 /* Used by GNU libc internals. */
-#define __malloc_size_t size_t
-#define __malloc_ptrdiff_t ptrdiff_t
+# define __malloc_size_t size_t
+# define __malloc_ptrdiff_t ptrdiff_t
+#elif !defined __attribute_malloc__
+# define __attribute_malloc__
+#endif
 
 #ifdef __GNUC__
 
+/* GCC can always grok prototypes.  For C++ programs we add throw()
+   to help it optimize the function calls.  But this works only with
+   gcc 2.8.x and egcs.  */
+# ifndef __THROW
+#  if defined __cplusplus && (__GNUC__ >= 3 || __GNUC_MINOR__ >= 8)
+#   define __THROW	throw ()
+#  else
+#   define __THROW
+#  endif
+# endif
 # define __MALLOC_P(args)	args __THROW
 /* This macro will be used for functions which might take C++ callback
    functions.  */
@@ -37,51 +80,78 @@
 
 #else	/* Not GCC.  */
 
-# define __MALLOC_P(args)	args
-# define __MALLOC_PMT(args)	args
+# define __THROW
+
+# if (defined __STDC__ && __STDC__) || defined __cplusplus
+
+#  define __MALLOC_P(args)	args
+#  define __MALLOC_PMT(args)	args
+
+#  ifndef __const
+#   define __const	 const
+#  endif
+
+# else	/* Not ANSI C or C++.  */
+
+#  define __MALLOC_P(args)	()	/* No prototypes.  */
+#  define __MALLOC_PMT(args)	()
+
+#  ifndef __const
+#   define __const
+#  endif
+
+# endif	/* ANSI C or C++.  */
 
 #endif	/* GCC.  */
 
+#ifndef NULL
+# ifdef __cplusplus
+#  define NULL	0
+# else
+#  define NULL	((__malloc_ptr_t) 0)
+# endif
+#endif
 
-__BEGIN_DECLS
+#ifdef __cplusplus
+extern "C" {
+#endif
 
 /* Allocate SIZE bytes of memory.  */
-extern void *malloc __MALLOC_P ((size_t __size)) __attribute_malloc__ __wur;
+extern __malloc_ptr_t malloc __MALLOC_P ((size_t __size)) __attribute_malloc__;
 
 /* Allocate NMEMB elements of SIZE bytes each, all initialized to 0.  */
-extern void *calloc __MALLOC_P ((size_t __nmemb, size_t __size))
-       __attribute_malloc__ __wur;
+extern __malloc_ptr_t calloc __MALLOC_P ((size_t __nmemb, size_t __size))
+       __attribute_malloc__;
 
 /* Re-allocate the previously allocated block in __ptr, making the new
    block SIZE bytes long.  */
-extern void *realloc __MALLOC_P ((void *__ptr, size_t __size))
-       __attribute_malloc__ __attribute_warn_unused_result__;
+extern __malloc_ptr_t realloc __MALLOC_P ((__malloc_ptr_t __ptr,
+					   size_t __size))
+       __attribute_malloc__;
 
 /* Free a block allocated by `malloc', `realloc' or `calloc'.  */
-extern void free __MALLOC_P ((void *__ptr));
+extern void free __MALLOC_P ((__malloc_ptr_t __ptr));
 
 /* Free a block allocated by `calloc'. */
-extern void cfree __MALLOC_P ((void *__ptr));
+extern void cfree __MALLOC_P ((__malloc_ptr_t __ptr));
 
 /* Allocate SIZE bytes allocated to ALIGNMENT bytes.  */
-extern void *memalign __MALLOC_P ((size_t __alignment, size_t __size))
-       __attribute_malloc__ __wur;
+extern __malloc_ptr_t memalign __MALLOC_P ((size_t __alignment, size_t __size));
 
 /* Allocate SIZE bytes on a page boundary.  */
-extern void *valloc __MALLOC_P ((size_t __size))
-       __attribute_malloc__ __wur;
+extern __malloc_ptr_t valloc __MALLOC_P ((size_t __size)) __attribute_malloc__;
 
 /* Equivalent to valloc(minimum-page-that-holds(n)), that is, round up
    __size to nearest pagesize. */
-extern void * pvalloc __MALLOC_P ((size_t __size))
-       __attribute_malloc__ __wur;
+extern __malloc_ptr_t  pvalloc __MALLOC_P ((size_t __size))
+       __attribute_malloc__;
 
 /* Underlying allocation function; successive calls should return
    contiguous pieces of memory.  */
-extern void *(*__morecore) __MALLOC_PMT ((ptrdiff_t __size));
+extern __malloc_ptr_t (*__morecore) __MALLOC_PMT ((ptrdiff_t __size));
 
 /* Default value of `__morecore'.  */
-extern void *__default_morecore __MALLOC_P ((ptrdiff_t __size))
+extern __malloc_ptr_t __default_morecore __MALLOC_P ((ptrdiff_t __size))
        __attribute_malloc__;
 
 /* SVID2/XPG mallinfo structure */
@@ -132,38 +202,41 @@ extern int malloc_trim __MALLOC_P ((size_t __pad));
 
 /* Report the number of usable allocated bytes associated with allocated
    chunk __ptr. */
-extern size_t malloc_usable_size __MALLOC_P ((void *__ptr));
+extern size_t malloc_usable_size __MALLOC_P ((__malloc_ptr_t __ptr));
 
 /* Prints brief summary statistics on stderr. */
 extern void malloc_stats __MALLOC_P ((void));
 
 /* Record the state of all malloc variables in an opaque data structure. */
-extern void *malloc_get_state __MALLOC_P ((void));
+extern __malloc_ptr_t malloc_get_state __MALLOC_P ((void));
 
 /* Restore the state of all malloc variables from data obtained with
    malloc_get_state(). */
-extern int malloc_set_state __MALLOC_P ((void *__ptr));
+extern int malloc_set_state __MALLOC_P ((__malloc_ptr_t __ptr));
 
 /* Called once when malloc is initialized; redefining this variable in
    the application provides the preferred way to set up the hook
    pointers. */
 extern void (*__malloc_initialize_hook) __MALLOC_PMT ((void));
 /* Hooks for debugging and user-defined versions. */
-extern void (*__free_hook) __MALLOC_PMT ((void *__ptr,
+extern void (*__free_hook) __MALLOC_PMT ((__malloc_ptr_t __ptr,
 					__const __malloc_ptr_t));
-extern void *(*__malloc_hook) __MALLOC_PMT ((size_t __size,
-					     __const __malloc_ptr_t));
-extern void *(*__realloc_hook) __MALLOC_PMT ((void *__ptr, size_t __size,
-					      __const __malloc_ptr_t));
-extern void *(*__memalign_hook) __MALLOC_PMT ((size_t __alignment,
-					       size_t __size,
-					       __const __malloc_ptr_t));
+extern __malloc_ptr_t (*__malloc_hook) __MALLOC_PMT ((size_t __size,
+						    __const __malloc_ptr_t));
+extern __malloc_ptr_t (*__realloc_hook) __MALLOC_PMT ((__malloc_ptr_t __ptr,
+						     size_t __size,
+						     __const __malloc_ptr_t));
+extern __malloc_ptr_t (*__memalign_hook) __MALLOC_PMT ((size_t __alignment,
+						      size_t __size,
+						      __const __malloc_ptr_t));
 extern void (*__after_morecore_hook) __MALLOC_PMT ((void));
 
 /* Activate a standard set of debugging hooks. */
 extern void __malloc_check_init __MALLOC_P ((void));
 
 
-__END_DECLS
+#ifdef __cplusplus
+} /* end of extern "C" */
+#endif
 
 #endif /* malloc.h */
diff --git a/malloc/memusage.sh b/malloc/memusage.sh
index b2e08c6039..be8f755a20 100755
--- a/malloc/memusage.sh
+++ b/malloc/memusage.sh
@@ -1,5 +1,5 @@
 #! @BASH@
-# Copyright (C) 1999-2004, 2005 Free Software Foundation, Inc.
+# Copyright (C) 1999,2000,2001,2002,2003,2004 Free Software Foundation, Inc.
 # This file is part of the GNU C Library.
 # Contributed by Ulrich Drepper <drepper@gnu.org>, 1999.
 
@@ -71,7 +71,7 @@ do_version() {
   printf $"Copyright (C) %s Free Software Foundation, Inc.
 This is free software; see the source for copying conditions.  There is NO
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-" "2005"
+" "2004"
   printf $"Written by %s.
 " "Ulrich Drepper"
   exit 0
diff --git a/malloc/mtrace.pl b/malloc/mtrace.pl
index 0036f33f59..1640fa652d 100644
--- a/malloc/mtrace.pl
+++ b/malloc/mtrace.pl
@@ -45,7 +45,7 @@ arglist: while (@ARGV) {
 	$ARGV[0] eq "--vers" || $ARGV[0] eq "--versi" ||
 	$ARGV[0] eq "--versio" || $ARGV[0] eq "--version") {
 	print "mtrace (GNU $PACKAGE) $VERSION\n";
-	print "Copyright (C) 2005 Free Software Foundation, Inc.\n";
+	print "Copyright (C) 2004 Free Software Foundation, Inc.\n";
 	print "This is free software; see the source for copying conditions.  There is NO\n";
 	print "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n";
 	print "Written by Ulrich Drepper <drepper\@gnu.org>\n";
diff --git a/manual/.cvsignore b/manual/.cvsignore
index 54abbbb5a4..f1254e92d3 100644
--- a/manual/.cvsignore
+++ b/manual/.cvsignore
@@ -9,6 +9,6 @@ glibc-*
 *.cp *.cps *.fn *.fns *.vr *.vrs *.tp *.tps *.ky *.kys *.pg *.pgs
 
 texis top-menu.texi chapters.texi summary.texi stamp-*
-distinfo dir-add.texinfo dir-add.texi
+distinfo dir-add.texinfo
 
 libm-err.texi
diff --git a/manual/memory.texi b/manual/memory.texi
index ee2cd75c44..91abb7f5d4 100644
--- a/manual/memory.texi
+++ b/manual/memory.texi
@@ -2384,7 +2384,7 @@ exceed the process' data storage limit.
 
 @comment unistd.h
 @comment BSD
-@deftypefun void *sbrk (ptrdiff_t @var{delta})
+@deftypefun int sbrk (ptrdiff_t @var{delta})
 This function is the same as @code{brk} except that you specify the new
 end of the data segment as an offset @var{delta} from the current end
 and on success the return value is the address of the resulting end of
diff --git a/manual/string.texi b/manual/string.texi
index d9de12996e..21ab71461a 100644
--- a/manual/string.texi
+++ b/manual/string.texi
@@ -1781,9 +1781,9 @@ uppercase and lowercase characters are related.
 
 For example,
 @smallexample
-strcasestr ("hello, world", "L")
+strstr ("hello, world", "L")
     @result{} "llo, world"
-strcasestr ("hello, World", "wo")
+strstr ("hello, World", "wo")
     @result{} "World"
 @end smallexample
 @end deftypefun
diff --git a/manual/users.texi b/manual/users.texi
index b52ee44439..20deeabdd2 100644
--- a/manual/users.texi
+++ b/manual/users.texi
@@ -1690,9 +1690,6 @@ extended information about users, adding an entry using this function
 would inevitably leave out much of the important information.
 @c Then how are programmers to modify the password file? -zw
 
-The group and user ID fields are left empty if the group or user name
-starts with a - or +.
-
 The function @code{putpwent} is declared in @file{pwd.h}.
 @end deftypefun
 
diff --git a/math/libm-test.inc b/math/libm-test.inc
index be05222629..60711fabdc 100644
--- a/math/libm-test.inc
+++ b/math/libm-test.inc
@@ -3795,114 +3795,6 @@ rint_test (void)
 }
 
 static void
-rint_test_tonearest (void)
-{
-  int save_round_mode;
-  START (rint_tonearest);
-
-  save_round_mode = fegetround();
-
-  if (!fesetround (FE_TONEAREST))
-  {
-    TEST_f_f (rint, 2.0, 2.0);
-    TEST_f_f (rint, 1.5, 2.0);
-    TEST_f_f (rint, 1.0, 1.0);
-    TEST_f_f (rint, 0.5, 0.0);
-    TEST_f_f (rint, 0.0, 0.0);
-    TEST_f_f (rint, minus_zero, minus_zero);
-    TEST_f_f (rint, -0.5, -0.0);
-    TEST_f_f (rint, -1.0, -1.0);
-    TEST_f_f (rint, -1.5, -2.0);
-    TEST_f_f (rint, -2.0, -2.0);
-  }
-
-  fesetround(save_round_mode);
-
-  END (rint_tonearest);
-}
-
-static void
-rint_test_towardzero (void)
-{
-  int save_round_mode;
-  START (rint_towardzero);
-
-  save_round_mode = fegetround();
-
-  if (!fesetround (FE_TOWARDZERO))
-  {
-    TEST_f_f (rint, 2.0, 2.0);
-    TEST_f_f (rint, 1.5, 1.0);
-    TEST_f_f (rint, 1.0, 1.0);
-    TEST_f_f (rint, 0.5, 0.0);
-    TEST_f_f (rint, 0.0, 0.0);
-    TEST_f_f (rint, minus_zero, minus_zero);
-    TEST_f_f (rint, -0.5, -0.0);
-    TEST_f_f (rint, -1.0, -1.0);
-    TEST_f_f (rint, -1.5, -1.0);
-    TEST_f_f (rint, -2.0, -2.0);
-  }
-
-  fesetround(save_round_mode);
-
-  END (rint_towardzero);
-}
-
-static void
-rint_test_downward (void)
-{
-  int save_round_mode;
-  START (rint_downward);
-
-  save_round_mode = fegetround();
-
-  if (!fesetround (FE_DOWNWARD))
-  {
-    TEST_f_f (rint, 2.0, 2.0);
-    TEST_f_f (rint, 1.5, 1.0);
-    TEST_f_f (rint, 1.0, 1.0);
-    TEST_f_f (rint, 0.5, 0.0);
-    TEST_f_f (rint, 0.0, 0.0);
-    TEST_f_f (rint, minus_zero, minus_zero);
-    TEST_f_f (rint, -0.5, -1.0);
-    TEST_f_f (rint, -1.0, -1.0);
-    TEST_f_f (rint, -1.5, -2.0);
-    TEST_f_f (rint, -2.0, -2.0);
-  }
-
-  fesetround(save_round_mode);
-
-  END (rint_downward);
-}
-
-static void
-rint_test_upward (void)
-{
-  int save_round_mode;
-  START (rint_upward);
-
-  save_round_mode = fegetround();
-
-  if (!fesetround (FE_UPWARD))
-  {
-    TEST_f_f (rint, 2.0, 2.0);
-    TEST_f_f (rint, 1.5, 2.0);
-    TEST_f_f (rint, 1.0, 1.0);
-    TEST_f_f (rint, 0.5, 1.0);
-    TEST_f_f (rint, 0.0, 0.0);
-    TEST_f_f (rint, minus_zero, minus_zero);
-    TEST_f_f (rint, -0.5, -0.0);
-    TEST_f_f (rint, -1.0, -1.0);
-    TEST_f_f (rint, -1.5, -1.0);
-    TEST_f_f (rint, -2.0, -2.0);
-  }
-
-  fesetround(save_round_mode);
-
-  END (rint_upward);
-}
-
-static void
 round_test (void)
 {
   START (round);
@@ -4665,10 +4557,6 @@ main (int argc, char **argv)
   floor_test ();
   nearbyint_test ();
   rint_test ();
-  rint_test_tonearest ();
-  rint_test_towardzero ();
-  rint_test_downward ();
-  rint_test_upward ();
   lrint_test ();
   llrint_test ();
   round_test ();
diff --git a/math/math_private.h b/math/math_private.h
index a6a85d3b1e..f545841df0 100644
--- a/math/math_private.h
+++ b/math/math_private.h
@@ -192,10 +192,6 @@ extern int    __kernel_rem_pio2 (double*,double*,int,int,int, const int32_t*);
 /* internal functions.  */
 extern double __copysign (double x, double __y);
 
-#if __GNUC_PREREQ (4, 0)
-extern inline double __copysign (double x, double y)
-{ return __builtin_copysign (x, y); }
-#endif
 
 /* ieee style elementary float functions */
 extern float __ieee754_sqrtf (float);
@@ -239,10 +235,6 @@ extern int   __kernel_rem_pio2f (float*,float*,int,int,int, const int32_t*);
 /* internal functions.  */
 extern float __copysignf (float x, float __y);
 
-#if __GNUC_PREREQ (4, 0)
-extern inline float __copysignf (float x, float y)
-{ return __builtin_copysignf (x, y); }
-#endif
 
 /* ieee style elementary long double functions */
 extern long double __ieee754_sqrtl (long double);
@@ -306,12 +298,6 @@ extern long double fabsl (long double x);
 extern void __sincosl (long double, long double *, long double *);
 extern long double __logbl (long double x);
 extern long double __significandl (long double x);
-
-#if __GNUC_PREREQ (4, 0)
-extern inline long double __copysignl (long double x, long double y)
-{ return __builtin_copysignl (x, y); }
-#endif
-
 #endif
 
 /* Prototypes for functions of the IBM Accurate Mathematical Library.  */
diff --git a/misc/efgcvt_r.c b/misc/efgcvt_r.c
index 69cca9038f..ac2a5c45bf 100644
--- a/misc/efgcvt_r.c
+++ b/misc/efgcvt_r.c
@@ -1,5 +1,5 @@
 /* Compatibility functions for floating point formatting, reentrant versions.
-   Copyright (C) 1995,96,97,98,99,2000,01,02,04 Free Software Foundation, Inc.
+   Copyright (C) 1995,96,97,98,99,2000,01,02 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -31,7 +31,6 @@
 # define FUNC_PREFIX
 # define FLOAT_FMT_FLAG
 # define FLOAT_NAME_EXT
-# define FLOAT_MIN_10_EXP DBL_MIN_10_EXP
 # if DBL_MANT_DIG == 53
 #  define NDIGIT_MAX 17
 # elif DBL_MANT_DIG == 24
@@ -44,17 +43,6 @@
 #  error "NDIGIT_MAX must be precomputed"
 #  define NDIGIT_MAX (lrint (ceil (M_LN2 / M_LN10 * DBL_MANT_DIG + 1.0)))
 # endif
-# if DBL_MIN_10_EXP == -37
-#  define FLOAT_MIN_10_NORM	1.0e-37
-# elif DBL_MIN_10_EXP == -307
-#  define FLOAT_MIN_10_NORM	1.0e-307
-# elif DBL_MIN_10_EXP == -4931
-#  define FLOAT_MIN_10_NORM	1.0e-4931
-# else
-/* libc can't depend on libm.  */
-#  error "FLOAT_MIN_10_NORM must be precomputed"
-#  define FLOAT_MIN_10_NORM	exp10 (DBL_MIN_10_EXP)
-# endif
 #endif
 
 #define APPEND(a, b) APPEND2 (a, b)
@@ -183,17 +171,6 @@ APPEND (FUNC_PREFIX, ecvt_r) (value, ndigit, decpt, sign, buf, len)
 	d = -value;
       else
 	d = value;
-      /* For denormalized numbers the d < 1.0 case below won't work,
-	 as f can overflow to +Inf.  */
-      if (d < FLOAT_MIN_10_NORM)
-	{
-	  value /= FLOAT_MIN_10_NORM;
-	  if (value < 0.0)
-	    d = -value;
-	  else
-	    d = value;
-	  exponent += FLOAT_MIN_10_EXP;
-	}
       if (d < 1.0)
 	{
 	  do
diff --git a/misc/qefgcvt_r.c b/misc/qefgcvt_r.c
index d5b2a799b3..66cc049ec8 100644
--- a/misc/qefgcvt_r.c
+++ b/misc/qefgcvt_r.c
@@ -24,7 +24,6 @@
 #define FUNC_PREFIX q
 #define FLOAT_FMT_FLAG "L"
 #define FLOAT_NAME_EXT l
-#define FLOAT_MIN_10_EXP LDBL_MIN_10_EXP
 #if LDBL_MANT_DIG == 64
 # define NDIGIT_MAX 21
 #elif LDBL_MANT_DIG == 53
@@ -41,16 +40,5 @@
 # error "NDIGIT_MAX must be precomputed"
 # define NDIGIT_MAX (lrint (ceil (M_LN2 / M_LN10 * LDBL_MANT_DIG + 1.0)))
 #endif
-#if LDBL_MIN_10_EXP == -37
-# define FLOAT_MIN_10_NORM	1.0e-37L
-#elif LDBL_MIN_10_EXP == -307
-# define FLOAT_MIN_10_NORM	1.0e-307L
-#elif LDBL_MIN_10_EXP == -4931
-# define FLOAT_MIN_10_NORM	1.0e-4931L
-#else
-/* libc can't depend on libm.  */
-# error "FLOAT_MIN_10_NORM must be precomputed"
-# define FLOAT_MIN_10_NORM	exp10l (LDBL_MIN_10_EXP)
-#endif
 
 #include "efgcvt_r.c"
diff --git a/misc/sys/cdefs.h b/misc/sys/cdefs.h
index 8d529bfadc..475cf62961 100644
--- a/misc/sys/cdefs.h
+++ b/misc/sys/cdefs.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 1992-2001, 2002, 2004, 2005 Free Software Foundation, Inc.
+/* Copyright (C) 1992-2001, 2002, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -255,21 +255,6 @@
 # define __nonnull(params)
 #endif
 
-/* If fortification mode, we warn about unused results of certain
-   function calls which can lead to problems.  */
-#if __GNUC_PREREQ (3,4)
-# define __attribute_warn_unused_result__ \
-   __attribute__ ((__warn_unused_result__))
-# if __USE_FORTIFY_LEVEL > 0
-#  define __wur __attribute_warn_unused_result__
-# endif
-#else
-# define __attribute_warn_unused_result__ /* empty */
-#endif
-#ifndef __wur
-# define __wur /* Ignore */
-#endif
-
 /* It is possible to compile containing GCC extensions even if GCC is
    run in pedantic mode if the uses are carefully marked using the
    `__extension__' keyword.  But this is not generally available before
diff --git a/misc/syslog.c b/misc/syslog.c
new file mode 100644
index 0000000000..6916356da7
--- /dev/null
+++ b/misc/syslog.c
@@ -0,0 +1,414 @@
+/*
+ * Copyright (c) 1983, 1988, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char sccsid[] = "@(#)syslog.c	8.4 (Berkeley) 3/18/94";
+#endif /* LIBC_SCCS and not lint */
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/syslog.h>
+#include <sys/uio.h>
+#include <netdb.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <paths.h>
+#include <stdio.h>
+#include <stdio_ext.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <bits/libc-lock.h>
+#include <signal.h>
+#include <locale.h>
+
+#if __STDC__
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+
+#include <libio/iolibio.h>
+#define ftell(s) INTUSE(_IO_ftell) (s)
+
+static int	LogType = SOCK_DGRAM;	/* type of socket connection */
+static int	LogFile = -1;		/* fd for log */
+static int	connected;		/* have done connect */
+static int	LogStat;		/* status bits, set by openlog() */
+static const char *LogTag;		/* string to tag the entry with */
+static int	LogFacility = LOG_USER;	/* default facility code */
+static int	LogMask = 0xff;		/* mask of priorities to be logged */
+extern char	*__progname;		/* Program name, from crt0. */
+
+/* Define the lock.  */
+__libc_lock_define_initialized (static, syslog_lock)
+
+static void openlog_internal(const char *, int, int) internal_function;
+static void closelog_internal(void);
+static void sigpipe_handler (int);
+
+
+struct cleanup_arg
+{
+  void *buf;
+  struct sigaction *oldaction;
+};
+
+static void
+cancel_handler (void *ptr)
+{
+  /* Restore the old signal handler.  */
+  struct cleanup_arg *clarg = (struct cleanup_arg *) ptr;
+
+  if (clarg != NULL && clarg->oldaction != NULL)
+    __sigaction (SIGPIPE, clarg->oldaction, NULL);
+
+  /* Free the lock.  */
+  __libc_lock_unlock (syslog_lock);
+}
+
+
+/*
+ * syslog, vsyslog --
+ *	print message on log file; output is intended for syslogd(8).
+ */
+void
+#if __STDC__
+syslog(int pri, const char *fmt, ...)
+#else
+syslog(pri, fmt, va_alist)
+	int pri;
+	char *fmt;
+	va_dcl
+#endif
+{
+	va_list ap;
+
+#if __STDC__
+	va_start(ap, fmt);
+#else
+	va_start(ap);
+#endif
+	vsyslog(pri, fmt, ap);
+	va_end(ap);
+}
+libc_hidden_def (syslog)
+
+void
+vsyslog(pri, fmt, ap)
+	int pri;
+	register const char *fmt;
+	va_list ap;
+{
+	struct tm now_tm;
+	time_t now;
+	int fd;
+	FILE *f;
+	char *buf = 0;
+	size_t bufsize = 0;
+	size_t prioff, msgoff;
+ 	struct sigaction action, oldaction;
+ 	int sigpipe;
+	int saved_errno = errno;
+	char failbuf[3 * sizeof (pid_t) + sizeof "out of memory []"];
+
+#define	INTERNALLOG	LOG_ERR|LOG_CONS|LOG_PERROR|LOG_PID
+	/* Check for invalid bits. */
+	if (pri & ~(LOG_PRIMASK|LOG_FACMASK)) {
+		syslog(INTERNALLOG,
+		    "syslog: unknown facility/priority: %x", pri);
+		pri &= LOG_PRIMASK|LOG_FACMASK;
+	}
+
+	/* Check priority against setlogmask values. */
+	if ((LOG_MASK (LOG_PRI (pri)) & LogMask) == 0)
+		return;
+
+	/* Set default facility if none specified. */
+	if ((pri & LOG_FACMASK) == 0)
+		pri |= LogFacility;
+
+	/* Build the message in a memory-buffer stream.  */
+	f = open_memstream (&buf, &bufsize);
+	if (f == NULL)
+	  {
+	    /* We cannot get a stream.  There is not much we can do but
+	       emitting an error messages.  */
+	    char numbuf[3 * sizeof (pid_t)];
+	    char *nump;
+	    char *endp = __stpcpy (failbuf, "out of memory [");
+	    pid_t pid = __getpid ();
+
+	    nump = numbuf + sizeof (numbuf);
+	    /* The PID can never be zero.  */
+	    do
+	      *--nump = '0' + pid % 10;
+	    while ((pid /= 10) != 0);
+
+	    endp = __mempcpy (endp, nump, (numbuf + sizeof (numbuf)) - nump);
+	    *endp++ = ']';
+	    *endp = '\0';
+	    buf = failbuf;
+	    bufsize = endp - failbuf;
+	    msgoff = 0;
+	  }
+	else
+	  {
+	    __fsetlocking (f, FSETLOCKING_BYCALLER);
+	    prioff = fprintf (f, "<%d>", pri);
+	    (void) time (&now);
+	    f->_IO_write_ptr += __strftime_l (f->_IO_write_ptr,
+					      f->_IO_write_end
+					      - f->_IO_write_ptr,
+					      "%h %e %T ",
+					      __localtime_r (&now, &now_tm),
+					      &_nl_C_locobj);
+	    msgoff = ftell (f);
+	    if (LogTag == NULL)
+	      LogTag = __progname;
+	    if (LogTag != NULL)
+	      fputs_unlocked (LogTag, f);
+	    if (LogStat & LOG_PID)
+	      fprintf (f, "[%d]", (int) __getpid ());
+	    if (LogTag != NULL)
+	      {
+		putc_unlocked (':', f);
+		putc_unlocked (' ', f);
+	      }
+
+	    /* Restore errno for %m format.  */
+	    __set_errno (saved_errno);
+
+	    /* We have the header.  Print the user's format into the
+               buffer.  */
+	    vfprintf (f, fmt, ap);
+
+	    /* Close the memory stream; this will finalize the data
+	       into a malloc'd buffer in BUF.  */
+	    fclose (f);
+	  }
+
+	/* Output to stderr if requested. */
+	if (LogStat & LOG_PERROR) {
+		struct iovec iov[2];
+		register struct iovec *v = iov;
+
+		v->iov_base = buf + msgoff;
+		v->iov_len = bufsize - msgoff;
+		/* Append a newline if necessary.  */
+		if (buf[bufsize - 1] != '\n')
+		  {
+		    ++v;
+		    v->iov_base = (char *) "\n";
+		    v->iov_len = 1;
+		  }
+
+		__libc_cleanup_push (free, buf == failbuf ? NULL : buf);
+
+		/* writev is a cancellation point.  */
+		(void)__writev(STDERR_FILENO, iov, v - iov + 1);
+
+		__libc_cleanup_pop (0);
+	}
+
+	/* Prepare for multiple users.  We have to take care: open and
+	   write are cancellation points.  */
+	struct cleanup_arg clarg;
+	clarg.buf = buf;
+	clarg.oldaction = NULL;
+	__libc_cleanup_push (cancel_handler, &clarg);
+	__libc_lock_lock (syslog_lock);
+
+	/* Prepare for a broken connection.  */
+ 	memset (&action, 0, sizeof (action));
+ 	action.sa_handler = sigpipe_handler;
+ 	sigemptyset (&action.sa_mask);
+ 	sigpipe = __sigaction (SIGPIPE, &action, &oldaction);
+	if (sigpipe == 0)
+	  clarg.oldaction = &oldaction;
+
+	/* Get connected, output the message to the local logger. */
+	if (!connected)
+		openlog_internal(LogTag, LogStat | LOG_NDELAY, 0);
+
+	/* If we have a SOCK_STREAM connection, also send ASCII NUL as
+	   a record terminator.  */
+	if (LogType == SOCK_STREAM)
+	  ++bufsize;
+
+	if (!connected || __send(LogFile, buf, bufsize, 0) < 0)
+	  {
+	    if (connected)
+	      {
+		/* Try to reopen the syslog connection.  Maybe it went
+		   down.  */
+		closelog_internal ();
+		openlog_internal(LogTag, LogStat | LOG_NDELAY, 0);
+	      }
+
+	    if (!connected || __send(LogFile, buf, bufsize, 0) < 0)
+	      {
+		closelog_internal ();	/* attempt re-open next time */
+		/*
+		 * Output the message to the console; don't worry
+		 * about blocking, if console blocks everything will.
+		 * Make sure the error reported is the one from the
+		 * syslogd failure.
+		 */
+		if (LogStat & LOG_CONS &&
+		    (fd = __open(_PATH_CONSOLE, O_WRONLY|O_NOCTTY, 0)) >= 0)
+		  {
+		    dprintf (fd, "%s\r\n", buf + msgoff);
+		    (void)__close(fd);
+		  }
+	      }
+	  }
+
+	if (sigpipe == 0)
+		__sigaction (SIGPIPE, &oldaction, (struct sigaction *) NULL);
+
+	/* End of critical section.  */
+	__libc_cleanup_pop (0);
+	__libc_lock_unlock (syslog_lock);
+
+	if (buf != failbuf)
+		free (buf);
+}
+libc_hidden_def (vsyslog)
+
+static struct sockaddr SyslogAddr;	/* AF_UNIX address of local logger */
+
+
+static void
+internal_function
+openlog_internal(const char *ident, int logstat, int logfac)
+{
+	if (ident != NULL)
+		LogTag = ident;
+	LogStat = logstat;
+	if (logfac != 0 && (logfac &~ LOG_FACMASK) == 0)
+		LogFacility = logfac;
+
+	int retry = 0;
+	while (retry < 2) {
+		if (LogFile == -1) {
+			SyslogAddr.sa_family = AF_UNIX;
+			(void)strncpy(SyslogAddr.sa_data, _PATH_LOG,
+				      sizeof(SyslogAddr.sa_data));
+			if (LogStat & LOG_NDELAY) {
+				if ((LogFile = __socket(AF_UNIX, LogType, 0))
+				    == -1)
+					return;
+				(void)__fcntl(LogFile, F_SETFD, 1);
+			}
+		}
+		if (LogFile != -1 && !connected)
+		{
+			int old_errno = errno;
+			if (__connect(LogFile, &SyslogAddr, sizeof(SyslogAddr))
+			    == -1)
+			{
+				int saved_errno = errno;
+				int fd = LogFile;
+				LogFile = -1;
+				(void)__close(fd);
+				__set_errno (old_errno);
+				if (saved_errno == EPROTOTYPE)
+				{
+					/* retry with the other type: */
+					LogType = (LogType == SOCK_DGRAM
+						   ? SOCK_STREAM : SOCK_DGRAM);
+					++retry;
+					continue;
+				}
+			} else
+				connected = 1;
+		}
+		break;
+	}
+}
+
+void
+openlog (const char *ident, int logstat, int logfac)
+{
+  /* Protect against multiple users and cancellation.  */
+  __libc_cleanup_push (cancel_handler, NULL);
+  __libc_lock_lock (syslog_lock);
+
+  openlog_internal (ident, logstat, logfac);
+
+  __libc_cleanup_pop (1);
+}
+
+static void
+sigpipe_handler (int signo)
+{
+  closelog_internal ();
+}
+
+static void
+closelog_internal()
+{
+  if (!connected)
+    return;
+
+  __close (LogFile);
+  LogFile = -1;
+  connected = 0;
+}
+
+void
+closelog ()
+{
+  /* Protect against multiple users and cancellation.  */
+  __libc_cleanup_push (cancel_handler, NULL);
+  __libc_lock_lock (syslog_lock);
+
+  closelog_internal ();
+  LogTag = NULL;
+  LogType = SOCK_DGRAM; /* this is the default */
+
+  /* Free the lock.  */
+  __libc_cleanup_pop (1);
+}
+
+/* setlogmask -- set the log mask level */
+int
+setlogmask(pmask)
+	int pmask;
+{
+	int omask;
+
+	omask = LogMask;
+	if (pmask != 0)
+		LogMask = pmask;
+	return (omask);
+}
diff --git a/misc/tst-efgcvt.c b/misc/tst-efgcvt.c
index 30ab0f17a0..91e5cf929e 100644
--- a/misc/tst-efgcvt.c
+++ b/misc/tst-efgcvt.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1998, 1999, 2000, 2004 Free Software Foundation, Inc.
+/* Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -20,7 +20,6 @@
 # define _GNU_SOURCE	1
 #endif
 
-#include <float.h>
 #include <math.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -60,10 +59,6 @@ static testcase ecvt_tests[] =
   { 123.01, -4, 3, "" },
   { 126.71, -4, 3, "" },
   { 0.0, 4, 1, "0000" },
-#if DBL_MANT_DIG == 53
-  { 0x1p-1074, 3, -323, "494" },
-  { -0x1p-1074, 3, -323, "494" },
-#endif
   /* -1.0 is end marker.  */
   { -1.0, 0, 0, "" }
 };
diff --git a/nptl/ChangeLog b/nptl/ChangeLog
index f372bde30e..de90f2a66d 100644
--- a/nptl/ChangeLog
+++ b/nptl/ChangeLog
@@ -1,53 +1,3 @@
-2005-02-07  Richard Henderson  <rth@redhat.com>
-
-	* sysdeps/pthread/pthread.h (__sigsetjmp): Use pointer as first
-	argument.
-
-2004-11-03  Marcus Brinkmann  <marcus@gnu.org>
-
-	* sysdeps/generic/lowlevellock.h (__generic_mutex_unlock): Fix
-	order of arguments in invocation of atomic_add_zero.
-
-2005-01-26  Jakub Jelinek  <jakub@redhat.com>
-
-	* sysdeps/unix/sysv/linux/i386/i486/sem_trywait.S (__new_sem_trywait):
-	Use direct %gs segment access or, if NO_TLS_DIRECT_SEG_REFS,
-	at least gotntpoff relocation and addition.
-	* sysdeps/unix/sysv/linux/i386/i486/sem_timedwait.S (sem_timedwait):
-	Likewise.
-	* sysdeps/unix/sysv/linux/i386/i486/sem_post.S (__new_sem_post):
-	Likewise.
-	* sysdeps/unix/sysv/linux/i386/i486/sem_wait.S (__new_sem_wait):
-	Likewise.
-
-2005-01-06  Ulrich Drepper  <drepper@redhat.com>
-
-	* allocatestack.c (init_one_static_tls): Adjust initialization of DTV
-	entry for static tls deallocation fix.
-	* sysdeps/alpha/tls.h (dtv_t): Change pointer type to be struct which
-	also contains information whether the memory pointed to is static
-	TLS or not.
-	* sysdeps/i386/tls.h: Likewise.
-	* sysdeps/ia64/tls.h: Likewise.
-	* sysdeps/powerpc/tls.h: Likewise.
-	* sysdeps/s390/tls.h: Likewise.
-	* sysdeps/sh/tls.h: Likewise.
-	* sysdeps/sparc/tls.h: Likewise.
-	* sysdeps/x86_64/tls.h: Likewise.
-
-2004-12-27  Ulrich Drepper  <drepper@redhat.com>
-
-	* init.c (__pthread_initialize_minimal_internal): Use __sigemptyset.
-
-2004-12-21  Jakub Jelinek  <jakub@redhat.com>
-
-	* sysdeps/i386/tls.h (CALL_THREAD_FCT): Maintain 16 byte alignment of
-	%esp.
-	* Makefile (tests): Add tst-align2.
-	* tst-align2.c: New test.
-	* sysdeps/i386/Makefile (CFLAGS-tst-align{,2}.c): Add
-	-mpreferred-stack-boundary=4.
-
 2004-12-18  Roland McGrath  <roland@redhat.com>
 
 	* sysdeps/unix/sysv/linux/powerpc/powerpc64/bits/local_lim.h:
@@ -106,7 +56,7 @@
 	* sysdeps/pthread/posix-timer.h (__timer_signal_thread_pclk,
 	__timer_signal_thread_tclk): Remove.
 	* sysdeps/unix/sysv/linux/i386/bits/posix_opt.h: Removed.
-	* sysdeps/unix/sysv/linux/ia64/bits/posix_opt.h: Removed.
+	* sysdeps/unix/sysv/linux/ia64/bits/posix_>opt.h: Removed.
 	* sysdeps/unix/sysv/linux/x86_64/bits/posix_opt.h: Removed.
 
 2004-12-07  Jakub Jelinek  <jakub@redhat.com>
diff --git a/nptl/Makefile b/nptl/Makefile
index 8d18946e6f..d42f356131 100644
--- a/nptl/Makefile
+++ b/nptl/Makefile
@@ -205,7 +205,7 @@ tests = tst-attr1 tst-attr2 tst-attr3 \
 	tst-sem1 tst-sem2 tst-sem3 tst-sem4 tst-sem5 tst-sem6 tst-sem7 \
 	tst-sem8 tst-sem9 \
 	tst-barrier1 tst-barrier2 tst-barrier3 tst-barrier4 \
-	tst-align tst-align2 \
+	tst-align \
 	tst-basic1 tst-basic2 tst-basic3 tst-basic4 tst-basic5 tst-basic6 \
 	tst-kill1 tst-kill2 tst-kill3 tst-kill4 tst-kill5 tst-kill6 \
 	tst-raise1 \
diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c
index fcb6c6e475..8875209a11 100644
--- a/nptl/allocatestack.c
+++ b/nptl/allocatestack.c
@@ -920,8 +920,7 @@ init_one_static_tls (struct pthread *curp, struct link_map *map)
 # endif
 
   /* Fill in the DTV slot so that a later LD/GD access will find it.  */
-  dtv[map->l_tls_modid].pointer.val = dest;
-  dtv[map->l_tls_modid].pointer.is_static = true;
+  dtv[map->l_tls_modid].pointer = dest;
 
   /* Initialize the memory.  */
   memset (__mempcpy (dest, map->l_tls_initimage, map->l_tls_initimage_size),
diff --git a/nptl/init.c b/nptl/init.c
index 86745af8d1..3751e6be77 100644
--- a/nptl/init.c
+++ b/nptl/init.c
@@ -262,7 +262,7 @@ __pthread_initialize_minimal_internal (void)
   struct sigaction sa;
   sa.sa_sigaction = sigcancel_handler;
   sa.sa_flags = SA_SIGINFO;
-  __sigemptyset (&sa.sa_mask);
+  sigemptyset (&sa.sa_mask);
 
   (void) __libc_sigaction (SIGCANCEL, &sa, NULL);
 
diff --git a/nptl/sysdeps/alpha/tls.h b/nptl/sysdeps/alpha/tls.h
index fa3c832a68..bc6630953f 100644
--- a/nptl/sysdeps/alpha/tls.h
+++ b/nptl/sysdeps/alpha/tls.h
@@ -1,5 +1,5 @@
 /* Definition for thread-local data handling.  NPTL/Alpha version.
-   Copyright (C) 2003, 2005 Free Software Foundation, Inc.
+   Copyright (C) 2003 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -23,7 +23,6 @@
 # include <dl-sysdep.h>
 
 #ifndef __ASSEMBLER__
-# include <stdbool.h>
 # include <stddef.h>
 # include <stdint.h>
 
@@ -31,11 +30,7 @@
 typedef union dtv
 {
   size_t counter;
-  struct
-  {
-    void *val;
-    bool is_static;
-  } pointer;
+  void *pointer;
 } dtv_t;
 
 #else /* __ASSEMBLER__ */
diff --git a/nptl/sysdeps/generic/lowlevellock.h b/nptl/sysdeps/generic/lowlevellock.h
index 7f95daadad..9cffca83e6 100644
--- a/nptl/sysdeps/generic/lowlevellock.h
+++ b/nptl/sysdeps/generic/lowlevellock.h
@@ -76,7 +76,7 @@ __generic_mutex_unlock (int *mutex)
   /* Adding 0x80000000 to the counter results in 0 if and only if
      there are not other interested threads - we can return (this is
      the fastpath).  */
-  if (atomic_add_zero (mutex, 0x80000000))
+  if (atomic_add_zero (0x80000000, mutex))
     return;
 
   /* There are other threads waiting for this mutex, wake one of them
diff --git a/nptl/sysdeps/i386/Makefile b/nptl/sysdeps/i386/Makefile
index 2f0d88f303..693fb0569f 100644
--- a/nptl/sysdeps/i386/Makefile
+++ b/nptl/sysdeps/i386/Makefile
@@ -22,6 +22,4 @@ endif
 
 ifeq ($(subdir),nptl)
 CFLAGS-pthread_create.c += -mpreferred-stack-boundary=4
-CFLAGS-tst-align.c += -mpreferred-stack-boundary=4
-CFLAGS-tst-align2.c += -mpreferred-stack-boundary=4
 endif
diff --git a/nptl/sysdeps/i386/tls.h b/nptl/sysdeps/i386/tls.h
index e243f8b2cf..18b038f93e 100644
--- a/nptl/sysdeps/i386/tls.h
+++ b/nptl/sysdeps/i386/tls.h
@@ -22,7 +22,6 @@
 
 #include <dl-sysdep.h>
 #ifndef __ASSEMBLER__
-# include <stdbool.h>
 # include <stddef.h>
 # include <stdint.h>
 # include <stdlib.h>
@@ -33,11 +32,7 @@
 typedef union dtv
 {
   size_t counter;
-  struct
-  {
-    void *val;
-    bool is_static;
-  } pointer;
+  void *pointer;
 } dtv_t;
 
 
@@ -402,12 +397,9 @@ union user_desc_init
 #define CALL_THREAD_FCT(descr) \
   ({ void *__res;							      \
      int __ignore1, __ignore2;						      \
-     asm volatile ("pushl %%eax\n\t"					      \
-		   "pushl %%eax\n\t"					      \
-		   "pushl %%eax\n\t"					      \
-		   "pushl %%gs:%P4\n\t"					      \
+     asm volatile ("pushl %%gs:%P4\n\t"					      \
 		   "call *%%gs:%P3\n\t"					      \
-		   "addl $16, %%esp"					      \
+		   "addl $4, %%esp"					      \
 		   : "=a" (__res), "=c" (__ignore1), "=d" (__ignore2)	      \
 		   : "i" (offsetof (struct pthread, start_routine)),	      \
 		     "i" (offsetof (struct pthread, arg)));		      \
diff --git a/nptl/sysdeps/ia64/tls.h b/nptl/sysdeps/ia64/tls.h
index a435f966ca..4591a415c0 100644
--- a/nptl/sysdeps/ia64/tls.h
+++ b/nptl/sysdeps/ia64/tls.h
@@ -1,5 +1,5 @@
 /* Definition for thread-local data handling.  nptl/IA-64 version.
-   Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
+   Copyright (C) 2003, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -22,7 +22,6 @@
 
 #include <dl-sysdep.h>
 #ifndef __ASSEMBLER__
-# include <stdbool.h>
 # include <stddef.h>
 # include <stdint.h>
 # include <stdlib.h>
@@ -33,11 +32,7 @@
 typedef union dtv
 {
   size_t counter;
-  struct
-  {
-    void *val;
-    bool is_static;
-  } pointer;
+  void *pointer;
 } dtv_t;
 
 
diff --git a/nptl/sysdeps/powerpc/tls.h b/nptl/sysdeps/powerpc/tls.h
index a7f69074e9..ce7f5bd53d 100644
--- a/nptl/sysdeps/powerpc/tls.h
+++ b/nptl/sysdeps/powerpc/tls.h
@@ -1,5 +1,5 @@
 /* Definition for thread-local data handling.  NPTL/PowerPC version.
-   Copyright (C) 2003, 2005 Free Software Foundation, Inc.
+   Copyright (C) 2003 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -23,7 +23,6 @@
 # include <dl-sysdep.h>
 
 #ifndef __ASSEMBLER__
-# include <stdbool.h>
 # include <stddef.h>
 # include <stdint.h>
 
@@ -31,11 +30,7 @@
 typedef union dtv
 {
   size_t counter;
-  struct
-  {
-    void *val;
-    bool is_static;
-  } pointer;
+  void *pointer;
 } dtv_t;
 
 #else /* __ASSEMBLER__ */
diff --git a/nptl/sysdeps/pthread/pthread.h b/nptl/sysdeps/pthread/pthread.h
index 57024e1e84..5046a6976b 100644
--- a/nptl/sysdeps/pthread/pthread.h
+++ b/nptl/sysdeps/pthread/pthread.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+/* Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -661,7 +661,7 @@ extern void __pthread_unwind_next (__pthread_unwind_buf_t *__buf)
 
 /* Function used in the macros.  */
 struct __jmp_buf_tag;
-extern int __sigsetjmp (struct __jmp_buf_tag *__env, int __savemask) __THROW;
+extern int __sigsetjmp (struct __jmp_buf_tag __env[1], int __savemask) __THROW;
 
 
 /* Mutex handling.  */
diff --git a/nptl/sysdeps/s390/tls.h b/nptl/sysdeps/s390/tls.h
index e93f3d080c..c9b991df32 100644
--- a/nptl/sysdeps/s390/tls.h
+++ b/nptl/sysdeps/s390/tls.h
@@ -1,5 +1,5 @@
 /* Definition for thread-local data handling.  NPTL/s390 version.
-   Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
+   Copyright (C) 2003, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -22,7 +22,6 @@
 
 #include <dl-sysdep.h>
 #ifndef __ASSEMBLER__
-# include <stdbool.h>
 # include <stddef.h>
 # include <stdint.h>
 # include <stdlib.h>
@@ -33,11 +32,7 @@
 typedef union dtv
 {
   size_t counter;
-  struct
-  {
-    void *val;
-    bool is_static;
-  } pointer;
+  void *pointer;
 } dtv_t;
 
 
diff --git a/nptl/sysdeps/sh/tls.h b/nptl/sysdeps/sh/tls.h
index e883bae993..db490ab7ee 100644
--- a/nptl/sysdeps/sh/tls.h
+++ b/nptl/sysdeps/sh/tls.h
@@ -1,5 +1,5 @@
 /* Definition for thread-local data handling.  NPTL/SH version.
-   Copyright (C) 2003, 2005 Free Software Foundation, Inc.
+   Copyright (C) 2003 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -23,7 +23,6 @@
 # include <dl-sysdep.h>
 
 #ifndef __ASSEMBLER__
-# include <stdbool.h>
 # include <stddef.h>
 # include <stdint.h>
 
@@ -31,11 +30,7 @@
 typedef union dtv
 {
   size_t counter;
-  struct
-  {
-    void *val;
-    bool is_static;
-  } pointer;
+  void *pointer;
 } dtv_t;
 
 typedef struct
diff --git a/nptl/sysdeps/sparc/tls.h b/nptl/sysdeps/sparc/tls.h
index 8f54a0bb23..8980f9fc0c 100644
--- a/nptl/sysdeps/sparc/tls.h
+++ b/nptl/sysdeps/sparc/tls.h
@@ -1,5 +1,5 @@
 /* Definitions for thread-local data handling.  NPTL/sparc version.
-   Copyright (C) 2003, 2005 Free Software Foundation, Inc.
+   Copyright (C) 2003 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -22,7 +22,6 @@
 
 #include <dl-sysdep.h>
 #ifndef __ASSEMBLER__
-# include <stdbool.h>
 # include <stddef.h>
 # include <stdint.h>
 # include <stdlib.h>
@@ -32,11 +31,7 @@
 typedef union dtv
 {
   size_t counter;
-  struct
-  {
-    void *val;
-    bool is_static;
-  } pointer;
+  void *pointer;
 } dtv_t;
 
 typedef struct
diff --git a/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_post.S b/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_post.S
index 71e96d2228..3d67329bd1 100644
--- a/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_post.S
+++ b/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_post.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
+/* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
 
@@ -65,14 +65,9 @@ __new_sem_post:
 #endif
 	addl	$_GLOBAL_OFFSET_TABLE_, %ebx
 #if USE___THREAD
-# ifdef NO_TLS_DIRECT_SEG_REFS
-	movl	errno@gotntpoff(%ebx), %edx
-	addl	%gs:0, %edx
+	movl	%gs:0, %edx
+	subl	errno@gottpoff(%ebx), %edx
 	movl	$EINVAL, (%edx)
-# else
-	movl	errno@gotntpoff(%ebx), %edx
-	movl	$EINVAL, %gs:(%edx)
-# endif
 #else
 	call	__errno_location@plt
 	movl	$EINVAL, (%eax)
diff --git a/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_timedwait.S b/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_timedwait.S
index 318b53a873..5b24476936 100644
--- a/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_timedwait.S
+++ b/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_timedwait.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+/* Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
 
@@ -158,14 +158,9 @@ sem_timedwait:
 #endif
 	addl	$_GLOBAL_OFFSET_TABLE_, %ebx
 #if USE___THREAD
-# ifdef NO_TLS_DIRECT_SEG_REFS
-	movl	errno@gotntpoff(%ebx), %edx
-	addl	%gs:0, %edx
+	movl	%gs:0, %edx
+	subl	errno@gottpoff(%ebx), %edx
 	movl	%esi, (%edx)
-# else
-	movl	errno@gotntpoff(%ebx), %edx
-	movl	%esi, %gs:(%edx)
-# endif
 #else
 	call	__errno_location@plt
 	movl	%esi, (%eax)
diff --git a/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_trywait.S b/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_trywait.S
index d36a1088fb..a7c405d95f 100644
--- a/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_trywait.S
+++ b/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_trywait.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
+/* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
 
@@ -55,14 +55,9 @@ __new_sem_trywait:
 #endif
 	addl	$_GLOBAL_OFFSET_TABLE_, %ecx
 #if USE___THREAD
-# ifdef NO_TLS_DIRECT_SEG_REFS
-	movl	errno@gotntpoff(%ecx), %edx
-	addl	%gs:0, %edx
+	movl	%gs:0, %edx
+	subl	errno@gottpoff(%ecx), %edx
 	movl	$EAGAIN, (%edx)
-# else
-	movl	errno@gotntpoff(%ecx), %edx
-	movl	$EAGAIN, %gs:(%edx)
-# endif
 #else
 	call	__errno_location@plt
 	movl	$EAGAIN, (%eax)
diff --git a/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_wait.S b/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_wait.S
index 1605b69b69..b7674dc3ba 100644
--- a/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_wait.S
+++ b/nptl/sysdeps/unix/sysv/linux/i386/i486/sem_wait.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
+/* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
 
@@ -102,14 +102,9 @@ __new_sem_wait:
 #endif
 	addl	$_GLOBAL_OFFSET_TABLE_, %ebx
 #if USE___THREAD
-# ifdef NO_TLS_DIRECT_SEG_REFS
-	movl	errno@gotntpoff(%ebx), %edx
-	addl	%gs:0, %edx
+	movl	%gs:0, %edx
+	subl	errno@gottpoff(%ebx), %edx
 	movl	%esi, (%edx)
-# else
-	movl	errno@gotntpoff(%ebx), %edx
-	movl	%esi, %gs:(%edx)
-# endif
 #else
 	call	__errno_location@plt
 	movl	%esi, (%eax)
diff --git a/nptl/sysdeps/x86_64/tls.h b/nptl/sysdeps/x86_64/tls.h
index 12da9dc81f..3d6111f4e3 100644
--- a/nptl/sysdeps/x86_64/tls.h
+++ b/nptl/sysdeps/x86_64/tls.h
@@ -1,5 +1,5 @@
 /* Definition for thread-local data handling.  nptl/x86_64 version.
-   Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
+   Copyright (C) 2002, 2003 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -22,7 +22,6 @@
 
 #include <asm/prctl.h>	/* For ARCH_SET_FS.  */
 #ifndef __ASSEMBLER__
-# include <stdbool.h>
 # include <stddef.h>
 # include <stdint.h>
 # include <stdlib.h>
@@ -32,11 +31,7 @@
 typedef union dtv
 {
   size_t counter;
-  struct
-  {
-    void *val;
-    bool is_static;
-  } pointer;
+  void *pointer;
 } dtv_t;
 
 
diff --git a/nscd/Makefile b/nscd/Makefile
index 9c32c68681..70a35198c2 100644
--- a/nscd/Makefile
+++ b/nscd/Makefile
@@ -1,4 +1,4 @@
-# Copyright (C) 1998,2000,2002,2003,2004,2005 Free Software Foundation, Inc.
+# Copyright (C) 1998, 2000, 2002, 2003, 2004 Free Software Foundation, Inc.
 # This file is part of the GNU C Library.
 
 # The GNU C Library is free software; you can redistribute it and/or
@@ -56,8 +56,6 @@ nscd-modules += selinux
 selinux-LIBS := -lselinux
 endif
 
-LDLIBS-nscd = $(selinux-LIBS)
-
 distribute := nscd.h nscd-client.h dbg_log.h \
 	      $(addsuffix .c, $(filter-out xmalloc, $(nscd-modules))) \
 	      nscd_nischeck.c nscd.conf nscd.init nscd_proto.h \
@@ -112,7 +110,7 @@ $(objpfx)nscd: $(addprefix $(objpfx),$(nscd-modules:=.o))
 	  $(extra-B-$(@F:lib%.so=%).so) $(load-map-file) \
 	  $(LDFLAGS) $(LDFLAGS-$(@F)) \
 	  -L$(subst :, -L,$(rpath-link)) -Wl,-rpath-link=$(rpath-link) \
-	  -o $@ $^ $(LDLIBS-nscd) $(common-objpfx)libc_nonshared.a
+	  -o $@ $^ $(selinux-LIBS) $(common-objpfx)libc_nonshared.a
 endif
 
 # This makes sure -DNOT_IN_libc is passed for all these modules.
diff --git a/nscd/connections.c b/nscd/connections.c
index fe4989d6ef..aa760e0252 100644
--- a/nscd/connections.c
+++ b/nscd/connections.c
@@ -1,5 +1,5 @@
 /* Inner loops of cache daemon.
-   Copyright (C) 1998-2003, 2004, 2005 Free Software Foundation, Inc.
+   Copyright (C) 1998-2003, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
 
@@ -605,10 +605,7 @@ send_ro_fd (struct database_dyn *db, char *key, int fd)
 
   /* Send the control message.  We repeat when we are interrupted but
      everything else is ignored.  */
-#ifndef MSG_NOSIGNAL
-# define MSG_NOSIGNAL 0
-#endif
-  (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
+  (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, 0));
 
   if (__builtin_expect (debug_level > 0, 0))
     dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
diff --git a/nscd/nscd.c b/nscd/nscd.c
index 3ae401f1f7..5cca127f91 100644
--- a/nscd/nscd.c
+++ b/nscd/nscd.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 1998-2003, 2004, 2005 Free Software Foundation, Inc.
+/* Copyright (c) 1998-2003, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Thorsten Kukuk <kukuk@suse.de>, 1998.
 
@@ -403,7 +403,7 @@ print_version (FILE *stream, struct argp_state *state)
 Copyright (C) %s Free Software Foundation, Inc.\n\
 This is free software; see the source for copying conditions.  There is NO\n\
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
-"), "2005");
+"), "2004");
   fprintf (stream, gettext ("Written by %s.\n"),
 	   "Thorsten Kukuk and Ulrich Drepper");
 }
@@ -450,9 +450,6 @@ termination_handler (int signum)
   /* Synchronize memory.  */
   for (int cnt = 0; cnt < lastdb; ++cnt)
     {
-      if (!dbs[cnt].enabled)
-	continue;
-
       /* Make sure nobody keeps using the database.  */
       dbs[cnt].head->timestamp = 0;
 
diff --git a/nscd/nscd.init b/nscd/nscd.init
index 23e20c3a82..d5c1cb9ae3 100644
--- a/nscd/nscd.init
+++ b/nscd/nscd.init
@@ -88,9 +88,9 @@ case "$1" in
 	RETVAL=$?
 	;;
     status)
-	status nscd
+        status nscd
 	RETVAL=$?
-	;;
+        ;;
     restart)
 	restart
 	RETVAL=$?
@@ -100,11 +100,9 @@ case "$1" in
 	RETVAL=$?
 	;;
     reload)
-    	echo -n $"Reloading $prog: "
-	killproc /usr/sbin/nscd -HUP
+  	killproc /usr/sbin/nscd -HUP
 	RETVAL=$?
-	echo
-	;;
+        ;;
     *)
 	echo $"Usage: $0 {start|stop|status|restart|reload|condrestart}"
 	RETVAL=1
diff --git a/nscd/nscd_getai.c b/nscd/nscd_getai.c
index 866f7b2a5f..24b374b0dc 100644
--- a/nscd/nscd_getai.c
+++ b/nscd/nscd_getai.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2004, 2005 Free Software Foundation, Inc.
+/* Copyright (C) 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@redhat.com>, 2004.
 
@@ -78,7 +78,7 @@ __nscd_getai (const char *key, struct nscd_ai_result **result, int *h_errnop)
 				 sizeof (ai_resp_mem));
       if (sock == -1)
 	{
-	  /* nscd not running or wrong version.  */
+	  /* nscd not running or wrong version or hosts caching disabled.  */
 	  __nss_not_use_nscd_hosts = 1;
 	  goto out;
 	}
@@ -151,13 +151,6 @@ __nscd_getai (const char *key, struct nscd_ai_result **result, int *h_errnop)
     }
   else
     {
-      if (__builtin_expect (ai_resp->found == -1, 0))
-	{
-	  /* The daemon does not cache this database.  */
-	  __nss_not_use_nscd_hosts = 1;
-	  goto out_close;
-	}
-
       /* Store the error number.  */
       *h_errnop = ai_resp->error;
 
diff --git a/nscd/nscd_helper.c b/nscd/nscd_helper.c
index ea4fb968db..0e16cb8aeb 100644
--- a/nscd/nscd_helper.c
+++ b/nscd/nscd_helper.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1998-2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+/* Copyright (C) 1998-2002, 2003, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
 
@@ -138,10 +138,7 @@ get_mapping (request_type type, const char *key,
     /* Failure or timeout.  */
     goto out_close2;
 
-#ifndef MSG_NOSIGNAL
-# define MSG_NOSIGNAL 0
-#endif
-  if (TEMP_FAILURE_RETRY (__recvmsg (sock, &msg, MSG_NOSIGNAL)) != keylen)
+  if (TEMP_FAILURE_RETRY (__recvmsg (sock, &msg, 0)) != keylen)
     goto out_close2;
 
   mapfd = *(int *) CMSG_DATA (cmsg);
diff --git a/nscd/nscd_initgroups.c b/nscd/nscd_initgroups.c
index daddf2e164..2ea9e7f862 100644
--- a/nscd/nscd_initgroups.c
+++ b/nscd/nscd_initgroups.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2004, 2005 Free Software Foundation, Inc.
+/* Copyright (C) 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@redhat.com>, 2004.
 
@@ -75,7 +75,7 @@ __nscd_getgrouplist (const char *user, gid_t group, long int *size,
 				 sizeof (initgr_resp_mem));
       if (sock == -1)
 	{
-	  /* nscd not running or wrong version.  */
+	  /* nscd not running or wrong version or hosts caching disabled.  */
 	  __nss_not_use_nscd_group = 1;
 	  goto out;
 	}
@@ -101,7 +101,7 @@ __nscd_getgrouplist (const char *user, gid_t group, long int *size,
 				 (initgr_resp->ngrps + 1) * sizeof (gid_t));
 	  if (newp == NULL)
 	    /* We cannot increase the buffer size.  */
-	    goto out_close;
+	    goto out;
 
 	  *groupsp = newp;
 	  *size = initgr_resp->ngrps + 1;
@@ -125,13 +125,6 @@ __nscd_getgrouplist (const char *user, gid_t group, long int *size,
     }
   else
     {
-      if (__builtin_expect (initgr_resp->found == -1, 0))
-	{
-	  /* The daemon does not cache this database.  */
-	  __nss_not_use_nscd_group = 1;
-	  goto out_close;
-	}
-
       /* No group found yet.   */
       retval = 0;
 
@@ -150,7 +143,6 @@ __nscd_getgrouplist (const char *user, gid_t group, long int *size,
 	(*groupsp)[retval++] = group;
     }
 
- out_close:
   if (sock != -1)
     close_not_cancel_no_status (sock);
  out:
diff --git a/nscd/nscd_nischeck.c b/nscd/nscd_nischeck.c
index 20f7bb0620..a6817cf79e 100644
--- a/nscd/nscd_nischeck.c
+++ b/nscd/nscd_nischeck.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 1999, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+/* Copyright (c) 1999, 2002, 2003, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Thorsten Kukuk <kukuk@suse.de>, 1999.
 
@@ -91,6 +91,6 @@ print_version (FILE *stream, struct argp_state *state)
 Copyright (C) %s Free Software Foundation, Inc.\n\
 This is free software; see the source for copying conditions.  There is NO\n\
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
-"), "2005");
+"), "2004");
   fprintf (stream, gettext ("Written by %s.\n"), "Thorsten Kukuk");
 }
diff --git a/nss/getent.c b/nss/getent.c
index 0aea5eed2a..c0a273241f 100644
--- a/nss/getent.c
+++ b/nss/getent.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 1998-2003, 2004, 2005 Free Software Foundation, Inc.
+/* Copyright (c) 1998-2003, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Thorsten Kukuk <kukuk@suse.de>, 1998.
 
@@ -83,7 +83,7 @@ print_version (FILE *stream, struct argp_state *state)
 Copyright (C) %s Free Software Foundation, Inc.\n\
 This is free software; see the source for copying conditions.  There is NO\n\
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
-"), "2005");
+"), "2004");
   fprintf (stream, gettext ("Written by %s.\n"), "Thorsten Kukuk");
 }
 
diff --git a/po/es.po b/po/es.po
index ba6a1fbd83..eb79bd3d02 100644
--- a/po/es.po
+++ b/po/es.po
@@ -1,14 +1,13 @@
 # Mensajes en espa�ol para GNU libc.
-# Copyright (C) 1996, 1997, 1998, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
+# Copyright (C) 1996, 1997, 1998, 2001, 2002, 2003 Free Software Foundation, Inc.
 # Enrique Melero G�mez <melero@eurolands.com>, 1996, 1997.
-# Santiago Vila Doncel <sanvila@unex.es>, 1997, 1998, 2001, 2002, 2003, 2004.
+# Santiago Vila Doncel <sanvila@unex.es>, 1997, 1998, 2001, 2002, 2003.
 #
 msgid ""
 msgstr ""
-"Project-Id-Version: GNU libc 2.3.3\n"
-"Report-Msgid-Bugs-To: \n"
-"POT-Creation-Date: 2004-08-05 09:16+0200\n"
-"PO-Revision-Date: 2004-12-28 19:46+0100\n"
+"Project-Id-Version: GNU libc 2.3.2\n"
+"POT-Creation-Date: 2003-02-22 15:34-0800\n"
+"PO-Revision-Date: 2003-03-03 17:20+0100\n"
 "Last-Translator: Santiago Vila Doncel <sanvila@unex.es>\n"
 "Language-Team: Spanish <es@li.org>\n"
 "MIME-Version: 1.0\n"
@@ -220,12 +219,12 @@ msgstr "no se puede abrir `%s'"
 msgid "cannot read header from `%s'"
 msgstr "no se puede leer la cabecera de `%s'"
 
-#: iconv/iconv_charmap.c:159 iconv/iconv_prog.c:295 catgets/gencat.c:288
+#: iconv/iconv_charmap.c:159 iconv/iconv_prog.c:293 catgets/gencat.c:288
 #, c-format
 msgid "cannot open input file `%s'"
 msgstr "no se puede abrir el fichero de entrada `%s'"
 
-#: iconv/iconv_charmap.c:177 iconv/iconv_prog.c:313
+#: iconv/iconv_charmap.c:177 iconv/iconv_prog.c:311
 #, c-format
 msgid "error while closing input `%s'"
 msgstr "error al cerrar la entrada `%s'"
@@ -235,16 +234,16 @@ msgstr "error al cerrar la entrada `%s'"
 msgid "illegal input sequence at position %Zd"
 msgstr "secuencia de entrada ilegal en la posici�n %Zd"
 
-#: iconv/iconv_charmap.c:462 iconv/iconv_prog.c:506
+#: iconv/iconv_charmap.c:462 iconv/iconv_prog.c:503
 msgid "incomplete character or shift sequence at end of buffer"
 msgstr "car�cter o secuencia de desplazamiento incompleta al final del b�fer"
 
-#: iconv/iconv_charmap.c:507 iconv/iconv_charmap.c:543 iconv/iconv_prog.c:549
-#: iconv/iconv_prog.c:585
+#: iconv/iconv_charmap.c:507 iconv/iconv_charmap.c:543 iconv/iconv_prog.c:546
+#: iconv/iconv_prog.c:582
 msgid "error while reading the input"
 msgstr "error al leer la entrada"
 
-#: iconv/iconv_charmap.c:525 iconv/iconv_prog.c:567
+#: iconv/iconv_charmap.c:525 iconv/iconv_prog.c:564
 msgid "unable to allocate buffer for input"
 msgstr "no se puede asignar espacio para el b�fer de entrada"
 
@@ -296,47 +295,47 @@ msgstr "Convierte la codificaci�n de los ficheros dados de una codificaci�n a ot
 msgid "[FILE...]"
 msgstr "[FICHERO...]"
 
-#: iconv/iconv_prog.c:201
+#: iconv/iconv_prog.c:199
 msgid "cannot open output file"
 msgstr "no se puede abrir el fichero de salida"
 
-#: iconv/iconv_prog.c:243
+#: iconv/iconv_prog.c:241
 #, c-format
 msgid "conversion from `%s' and to `%s' are not supported"
 msgstr "no se admite la conversi�n de `%s' a `%s'"
 
-#: iconv/iconv_prog.c:248
+#: iconv/iconv_prog.c:246
 #, c-format
 msgid "conversion from `%s' is not supported"
 msgstr "no se admite la conversi�n de `%s'"
 
-#: iconv/iconv_prog.c:255
+#: iconv/iconv_prog.c:253
 #, c-format
 msgid "conversion to `%s' is not supported"
 msgstr "no se admite la conversi�n a `%s'"
 
-#: iconv/iconv_prog.c:259
+#: iconv/iconv_prog.c:257
 #, c-format
 msgid "conversion from `%s' to `%s' is not supported"
 msgstr "no se admite la conversi�n de `%s' a `%s'"
 
-#: iconv/iconv_prog.c:265
+#: iconv/iconv_prog.c:263
 msgid "failed to start conversion processing"
 msgstr "fallo al comenzar el proceso de conversi�n"
 
-#: iconv/iconv_prog.c:360
+#: iconv/iconv_prog.c:358
 msgid "error while closing output file"
 msgstr "error al cerrar el fichero de salida"
 
-#: iconv/iconv_prog.c:409 iconv/iconvconfig.c:357 locale/programs/locale.c:279
+#: iconv/iconv_prog.c:407 iconv/iconvconfig.c:357 locale/programs/locale.c:274
 #: locale/programs/localedef.c:372 catgets/gencat.c:233
 #: malloc/memusagestat.c:602 debug/pcprofiledump.c:199
 msgid "Report bugs using the `glibcbug' script to <bugs@gnu.org>.\n"
 msgstr "Comunicar bichos usando el programa `glibcbug' a <bugs@gnu.org>.\n"
 
-#: iconv/iconv_prog.c:423 iconv/iconvconfig.c:371 locale/programs/locale.c:292
-#: locale/programs/localedef.c:386 catgets/gencat.c:246 posix/getconf.c:913
-#: nss/getent.c:74 nscd/nscd.c:355 nscd/nscd_nischeck.c:90 elf/ldconfig.c:274
+#: iconv/iconv_prog.c:421 iconv/iconvconfig.c:371 locale/programs/locale.c:287
+#: locale/programs/localedef.c:386 catgets/gencat.c:246 posix/getconf.c:910
+#: nss/getent.c:74 nscd/nscd.c:330 nscd/nscd_nischeck.c:90 elf/ldconfig.c:271
 #: elf/sprof.c:349
 #, c-format
 msgid ""
@@ -349,34 +348,34 @@ msgstr ""
 "No hay NINGUNA garant�a; ni siquiera de COMERCIABILIDAD o IDONEIDAD PARA UN\n"
 "FIN DETERMINADO.\n"
 
-#: iconv/iconv_prog.c:428 iconv/iconvconfig.c:376 locale/programs/locale.c:297
-#: locale/programs/localedef.c:391 catgets/gencat.c:251 posix/getconf.c:918
-#: nss/getent.c:79 nscd/nscd.c:360 nscd/nscd_nischeck.c:95 elf/ldconfig.c:279
+#: iconv/iconv_prog.c:426 iconv/iconvconfig.c:376 locale/programs/locale.c:292
+#: locale/programs/localedef.c:391 catgets/gencat.c:251 posix/getconf.c:915
+#: nss/getent.c:79 nscd/nscd.c:335 nscd/nscd_nischeck.c:95 elf/ldconfig.c:276
 #: elf/sprof.c:355
 #, c-format
 msgid "Written by %s.\n"
 msgstr "Escrito por %s.\n"
 
-#: iconv/iconv_prog.c:458 iconv/iconv_prog.c:484
+#: iconv/iconv_prog.c:456 iconv/iconv_prog.c:482
 msgid "conversion stopped due to problem in writing the output"
 msgstr "la conversi�n se ha detenido debido a un problema al escribir el resultado"
 
-#: iconv/iconv_prog.c:502
+#: iconv/iconv_prog.c:499
 #, c-format
 msgid "illegal input sequence at position %ld"
 msgstr "secuencia de entrada ilegal en la posici�n %ld"
 
-#: iconv/iconv_prog.c:510
+#: iconv/iconv_prog.c:507
 msgid "internal error (illegal descriptor)"
 msgstr "error interno (descriptor ilegal)"
 
-#: iconv/iconv_prog.c:513
+#: iconv/iconv_prog.c:510
 #, c-format
 msgid "unknown iconv() error %d"
 msgstr "error de iconv() desconocido %d"
 
 # FIXME: Espacio en blanco final.
-#: iconv/iconv_prog.c:756
+#: iconv/iconv_prog.c:753
 msgid ""
 "The following list contain all the coded character sets known.  This does\n"
 "not necessarily mean that all combinations of these names can be used for\n"
@@ -427,169 +426,169 @@ msgstr "no se puede generar el fichero de salida"
 msgid "cannot read character map directory `%s'"
 msgstr "no se puede leer el directorio de tablas de caracteres `%s'"
 
-#: locale/programs/charmap.c:136
+#: locale/programs/charmap.c:135
 #, c-format
 msgid "character map file `%s' not found"
 msgstr "el fichero de tabla de caracteres `%s' no se encontr�"
 
-#: locale/programs/charmap.c:194
+#: locale/programs/charmap.c:193
 #, c-format
 msgid "default character map file `%s' not found"
 msgstr "no se encontr� el fichero de tabla de caracteres predeterminado `%s'"
 
-#: locale/programs/charmap.c:257
+#: locale/programs/charmap.c:255
 #, c-format
 msgid "character map `%s' is not ASCII compatible, locale not ISO C compliant\n"
 msgstr ""
 "la tabla de caracteres `%s' no es compatible con ASCII, el local no cumple\n"
 "con ISO C\n"
 
-#: locale/programs/charmap.c:336
+#: locale/programs/charmap.c:332
 #, c-format
 msgid "%s: <mb_cur_max> must be greater than <mb_cur_min>\n"
-msgstr "%s: <mb_cur_max> debe ser mayor que <mb_cur_min>\n"
+msgstr "%s: <mb_cur_max> debe ser m�s grande que <mb_cur_min>\n"
 
-#: locale/programs/charmap.c:356 locale/programs/charmap.c:373
+#: locale/programs/charmap.c:352 locale/programs/charmap.c:369
 #: locale/programs/repertoire.c:175
 #, c-format
 msgid "syntax error in prolog: %s"
 msgstr "error de sintaxis en el pr�logo: %s"
 
-#: locale/programs/charmap.c:357
+#: locale/programs/charmap.c:353
 msgid "invalid definition"
 msgstr "definici�n inv�lida"
 
-#: locale/programs/charmap.c:374 locale/programs/locfile.c:126
+#: locale/programs/charmap.c:370 locale/programs/locfile.c:126
 #: locale/programs/locfile.c:153 locale/programs/repertoire.c:176
 msgid "bad argument"
-msgstr "argumento err�neo"
+msgstr "Argumento err�neo"
 
-#: locale/programs/charmap.c:402
+#: locale/programs/charmap.c:398
 #, c-format
 msgid "duplicate definition of <%s>"
 msgstr "definici�n duplicada de <%s>"
 
-#: locale/programs/charmap.c:409
+#: locale/programs/charmap.c:405
 #, c-format
 msgid "value for <%s> must be 1 or greater"
 msgstr "el valor para <%s> debe ser 1 o mayor"
 
 # Milagro, por una vez es m�s corto en espa�ol :-) sv
-#: locale/programs/charmap.c:421
+#: locale/programs/charmap.c:417
 #, c-format
 msgid "value of <%s> must be greater or equal than the value of <%s>"
 msgstr "el valor de <%s> debe ser mayor o igual que el valor de <%s>"
 
-#: locale/programs/charmap.c:444 locale/programs/repertoire.c:184
+#: locale/programs/charmap.c:440 locale/programs/repertoire.c:184
 #, c-format
 msgid "argument to <%s> must be a single character"
 msgstr "el argumento para <%s> debe ser un �nico car�cter"
 
-#: locale/programs/charmap.c:470
+#: locale/programs/charmap.c:466
 msgid "character sets with locking states are not supported"
 msgstr "los conjuntos de caracteres con estados de bloqueo no est�n soportados"
 
-#: locale/programs/charmap.c:497 locale/programs/charmap.c:551
-#: locale/programs/charmap.c:583 locale/programs/charmap.c:677
-#: locale/programs/charmap.c:732 locale/programs/charmap.c:773
-#: locale/programs/charmap.c:814
+#: locale/programs/charmap.c:493 locale/programs/charmap.c:547
+#: locale/programs/charmap.c:579 locale/programs/charmap.c:673
+#: locale/programs/charmap.c:728 locale/programs/charmap.c:769
+#: locale/programs/charmap.c:810
 #, c-format
 msgid "syntax error in %s definition: %s"
 msgstr "error de sintaxis en la definici�n de %s: %s"
 
-#: locale/programs/charmap.c:498 locale/programs/charmap.c:678
-#: locale/programs/charmap.c:774 locale/programs/repertoire.c:231
+#: locale/programs/charmap.c:494 locale/programs/charmap.c:674
+#: locale/programs/charmap.c:770 locale/programs/repertoire.c:231
 msgid "no symbolic name given"
 msgstr "no se ha especificado ning�n nombre simb�lico"
 
-#: locale/programs/charmap.c:552
+#: locale/programs/charmap.c:548
 msgid "invalid encoding given"
 msgstr "especificada una codificaci�n inv�lida"
 
-#: locale/programs/charmap.c:561
+#: locale/programs/charmap.c:557
 msgid "too few bytes in character encoding"
 msgstr "insuficiente n�mero de bytes en la codificaci�n del car�cter"
 
-#: locale/programs/charmap.c:563
+#: locale/programs/charmap.c:559
 msgid "too many bytes in character encoding"
 msgstr "demasiados bytes en la codificaci�n del car�cter"
 
-#: locale/programs/charmap.c:585 locale/programs/charmap.c:733
-#: locale/programs/charmap.c:816 locale/programs/repertoire.c:297
+#: locale/programs/charmap.c:581 locale/programs/charmap.c:729
+#: locale/programs/charmap.c:812 locale/programs/repertoire.c:297
 msgid "no symbolic name given for end of range"
 msgstr "no se ha especificado ning�n nombre simb�lico para el final del rango"
 
-#: locale/programs/charmap.c:609 locale/programs/locfile.c:818
+#: locale/programs/charmap.c:605 locale/programs/locfile.h:96
 #: locale/programs/repertoire.c:314
 #, c-format
 msgid "`%1$s' definition does not end with `END %1$s'"
 msgstr "La definici�n `%1$s' no termina con `END %1$s'"
 
-#: locale/programs/charmap.c:642
+#: locale/programs/charmap.c:638
 msgid "only WIDTH definitions are allowed to follow the CHARMAP definition"
 msgstr "solamente se permiten ANCHO definiciones despu�s de la definici�n CHARMAP"
 
-#: locale/programs/charmap.c:650 locale/programs/charmap.c:713
+#: locale/programs/charmap.c:646 locale/programs/charmap.c:709
 #, c-format
 msgid "value for %s must be an integer"
 msgstr "el valor para %s debe ser un n�mero entero"
 
 # Para entender este mensaje, pensar en Turing.
-#: locale/programs/charmap.c:841
+#: locale/programs/charmap.c:837
 #, c-format
 msgid "%s: error in state machine"
 msgstr "%s: error en la m�quina de estados"
 
-#: locale/programs/charmap.c:849 locale/programs/ld-address.c:605
-#: locale/programs/ld-collate.c:2650 locale/programs/ld-collate.c:3818
-#: locale/programs/ld-ctype.c:2225 locale/programs/ld-ctype.c:2994
+#: locale/programs/charmap.c:845 locale/programs/ld-address.c:605
+#: locale/programs/ld-collate.c:2635 locale/programs/ld-collate.c:3793
+#: locale/programs/ld-ctype.c:2216 locale/programs/ld-ctype.c:2977
 #: locale/programs/ld-identification.c:469
 #: locale/programs/ld-measurement.c:255 locale/programs/ld-messages.c:349
-#: locale/programs/ld-monetary.c:958 locale/programs/ld-name.c:324
+#: locale/programs/ld-monetary.c:952 locale/programs/ld-name.c:324
 #: locale/programs/ld-numeric.c:392 locale/programs/ld-paper.c:258
-#: locale/programs/ld-telephone.c:330 locale/programs/ld-time.c:1219
-#: locale/programs/locfile.c:825 locale/programs/repertoire.c:325
+#: locale/programs/ld-telephone.c:330 locale/programs/ld-time.c:1217
+#: locale/programs/locfile.h:103 locale/programs/repertoire.c:325
 #, c-format
 msgid "%s: premature end of file"
 msgstr "%s: fin de fichero no esperado"
 
-#: locale/programs/charmap.c:868 locale/programs/charmap.c:879
+#: locale/programs/charmap.c:864 locale/programs/charmap.c:875
 #, c-format
 msgid "unknown character `%s'"
 msgstr "car�cter desconocido `%s'"
 
-#: locale/programs/charmap.c:887
+#: locale/programs/charmap.c:883
 #, c-format
 msgid "number of bytes for byte sequence of beginning and end of range not the same: %d vs %d"
 msgstr ""
 "el n�mero de bytes para la sucesi�n de bytes de comienzo y final del rango\n"
 "no es el mismo: %d vs %d"
 
-#: locale/programs/charmap.c:991 locale/programs/ld-collate.c:2930
+#: locale/programs/charmap.c:987 locale/programs/ld-collate.c:2915
 #: locale/programs/repertoire.c:420
 msgid "invalid names for character range"
 msgstr "nombres inv�lidos para el rango de caracteres"
 
-#: locale/programs/charmap.c:1003 locale/programs/repertoire.c:432
+#: locale/programs/charmap.c:999 locale/programs/repertoire.c:432
 msgid "hexadecimal range format should use only capital characters"
-msgstr "el formato de rango hexadecimal debe usar solamente caracteres en may�sculas"
+msgstr "el formato de rango hexadecimal debe usar solamente caracteres en may�scula"
 
-#: locale/programs/charmap.c:1021
+#: locale/programs/charmap.c:1017
 #, c-format
 msgid "<%s> and <%s> are illegal names for range"
 msgstr "<%s> y <%s> son nombres no permitidos para el rango de caracteres"
 
 # FIXME: then -> than
-#: locale/programs/charmap.c:1027
+#: locale/programs/charmap.c:1023
 msgid "upper limit in range is not higher then lower limit"
 msgstr "el l�mite superior del rango no es mayor que el l�mite inferior"
 
-#: locale/programs/charmap.c:1085
+#: locale/programs/charmap.c:1081
 msgid "resulting bytes for range not representable."
 msgstr "los bytes resultantes para el rango no son representables."
 
-#: locale/programs/ld-address.c:134 locale/programs/ld-collate.c:1534
-#: locale/programs/ld-ctype.c:421 locale/programs/ld-identification.c:134
+#: locale/programs/ld-address.c:134 locale/programs/ld-collate.c:1519
+#: locale/programs/ld-ctype.c:416 locale/programs/ld-identification.c:134
 #: locale/programs/ld-measurement.c:95 locale/programs/ld-messages.c:98
 #: locale/programs/ld-monetary.c:194 locale/programs/ld-name.c:95
 #: locale/programs/ld-numeric.c:99 locale/programs/ld-paper.c:92
@@ -604,8 +603,8 @@ msgstr "No se encontr� ninguna definici�n para la categor�a %s"
 #: locale/programs/ld-address.c:290 locale/programs/ld-address.c:309
 #: locale/programs/ld-address.c:322 locale/programs/ld-identification.c:147
 #: locale/programs/ld-measurement.c:106 locale/programs/ld-monetary.c:206
-#: locale/programs/ld-monetary.c:250 locale/programs/ld-monetary.c:266
-#: locale/programs/ld-monetary.c:278 locale/programs/ld-name.c:106
+#: locale/programs/ld-monetary.c:244 locale/programs/ld-monetary.c:260
+#: locale/programs/ld-monetary.c:272 locale/programs/ld-name.c:106
 #: locale/programs/ld-name.c:143 locale/programs/ld-numeric.c:113
 #: locale/programs/ld-numeric.c:127 locale/programs/ld-paper.c:103
 #: locale/programs/ld-paper.c:112 locale/programs/ld-telephone.c:106
@@ -649,503 +648,503 @@ msgid "%s: numeric country code `%d' not valid"
 msgstr "%s: el c�digo num�rico de pa�s `%d' no es v�lido"
 
 #: locale/programs/ld-address.c:497 locale/programs/ld-address.c:534
-#: locale/programs/ld-address.c:572 locale/programs/ld-ctype.c:2601
+#: locale/programs/ld-address.c:572 locale/programs/ld-ctype.c:2592
 #: locale/programs/ld-identification.c:365
 #: locale/programs/ld-measurement.c:222 locale/programs/ld-messages.c:302
-#: locale/programs/ld-monetary.c:700 locale/programs/ld-monetary.c:735
-#: locale/programs/ld-monetary.c:776 locale/programs/ld-name.c:281
+#: locale/programs/ld-monetary.c:694 locale/programs/ld-monetary.c:729
+#: locale/programs/ld-monetary.c:770 locale/programs/ld-name.c:281
 #: locale/programs/ld-numeric.c:264 locale/programs/ld-paper.c:225
-#: locale/programs/ld-telephone.c:289 locale/programs/ld-time.c:1108
-#: locale/programs/ld-time.c:1150
+#: locale/programs/ld-telephone.c:289 locale/programs/ld-time.c:1106
+#: locale/programs/ld-time.c:1148
 #, c-format
 msgid "%s: field `%s' declared more than once"
 msgstr "%s: el campo `%s' ha sido declarado m�s de una vez"
 
 #: locale/programs/ld-address.c:501 locale/programs/ld-address.c:539
 #: locale/programs/ld-identification.c:369 locale/programs/ld-messages.c:312
-#: locale/programs/ld-monetary.c:704 locale/programs/ld-monetary.c:739
+#: locale/programs/ld-monetary.c:698 locale/programs/ld-monetary.c:733
 #: locale/programs/ld-name.c:285 locale/programs/ld-numeric.c:268
-#: locale/programs/ld-telephone.c:293 locale/programs/ld-time.c:1002
-#: locale/programs/ld-time.c:1071 locale/programs/ld-time.c:1113
+#: locale/programs/ld-telephone.c:293 locale/programs/ld-time.c:1000
+#: locale/programs/ld-time.c:1069 locale/programs/ld-time.c:1111
 #, c-format
 msgid "%s: unknown character in field `%s'"
 msgstr "%s: car�cter desconocido en el campo `%s'"
 
-#: locale/programs/ld-address.c:586 locale/programs/ld-collate.c:3800
-#: locale/programs/ld-ctype.c:2974 locale/programs/ld-identification.c:450
+#: locale/programs/ld-address.c:586 locale/programs/ld-collate.c:3775
+#: locale/programs/ld-ctype.c:2957 locale/programs/ld-identification.c:450
 #: locale/programs/ld-measurement.c:236 locale/programs/ld-messages.c:331
-#: locale/programs/ld-monetary.c:940 locale/programs/ld-name.c:306
+#: locale/programs/ld-monetary.c:934 locale/programs/ld-name.c:306
 #: locale/programs/ld-numeric.c:374 locale/programs/ld-paper.c:240
-#: locale/programs/ld-telephone.c:312 locale/programs/ld-time.c:1201
+#: locale/programs/ld-telephone.c:312 locale/programs/ld-time.c:1199
 #, c-format
 msgid "%s: incomplete `END' line"
 msgstr "%s: l�nea `END' incompleta"
 
-#: locale/programs/ld-address.c:589 locale/programs/ld-collate.c:2653
-#: locale/programs/ld-collate.c:3802 locale/programs/ld-ctype.c:2228
-#: locale/programs/ld-ctype.c:2977 locale/programs/ld-identification.c:453
+#: locale/programs/ld-address.c:589 locale/programs/ld-collate.c:2638
+#: locale/programs/ld-collate.c:3777 locale/programs/ld-ctype.c:2219
+#: locale/programs/ld-ctype.c:2960 locale/programs/ld-identification.c:453
 #: locale/programs/ld-measurement.c:239 locale/programs/ld-messages.c:333
-#: locale/programs/ld-monetary.c:942 locale/programs/ld-name.c:308
+#: locale/programs/ld-monetary.c:936 locale/programs/ld-name.c:308
 #: locale/programs/ld-numeric.c:376 locale/programs/ld-paper.c:242
-#: locale/programs/ld-telephone.c:314 locale/programs/ld-time.c:1203
+#: locale/programs/ld-telephone.c:314 locale/programs/ld-time.c:1201
 #, c-format
 msgid "%1$s: definition does not end with `END %1$s'"
 msgstr "%1$s: la definici�n no termina con `END %1$s'"
 
-#: locale/programs/ld-address.c:596 locale/programs/ld-collate.c:523
-#: locale/programs/ld-collate.c:575 locale/programs/ld-collate.c:871
-#: locale/programs/ld-collate.c:884 locale/programs/ld-collate.c:2640
-#: locale/programs/ld-collate.c:3809 locale/programs/ld-ctype.c:1956
-#: locale/programs/ld-ctype.c:2215 locale/programs/ld-ctype.c:2799
-#: locale/programs/ld-ctype.c:2985 locale/programs/ld-identification.c:460
+#: locale/programs/ld-address.c:596 locale/programs/ld-collate.c:520
+#: locale/programs/ld-collate.c:572 locale/programs/ld-collate.c:869
+#: locale/programs/ld-collate.c:882 locale/programs/ld-collate.c:2625
+#: locale/programs/ld-collate.c:3784 locale/programs/ld-ctype.c:1947
+#: locale/programs/ld-ctype.c:2206 locale/programs/ld-ctype.c:2782
+#: locale/programs/ld-ctype.c:2968 locale/programs/ld-identification.c:460
 #: locale/programs/ld-measurement.c:246 locale/programs/ld-messages.c:340
-#: locale/programs/ld-monetary.c:949 locale/programs/ld-name.c:315
+#: locale/programs/ld-monetary.c:943 locale/programs/ld-name.c:315
 #: locale/programs/ld-numeric.c:383 locale/programs/ld-paper.c:249
-#: locale/programs/ld-telephone.c:321 locale/programs/ld-time.c:1210
+#: locale/programs/ld-telephone.c:321 locale/programs/ld-time.c:1208
 #, c-format
 msgid "%s: syntax error"
 msgstr "%s: error de sintaxis"
 
-#: locale/programs/ld-collate.c:398
+#: locale/programs/ld-collate.c:395
 #, c-format
 msgid "`%.*s' already defined in charmap"
 msgstr "`%.*s' ya est� definido en la tabla de caracteres"
 
-#: locale/programs/ld-collate.c:407
+#: locale/programs/ld-collate.c:404
 #, c-format
 msgid "`%.*s' already defined in repertoire"
 msgstr "`%.*s' ya est� definido en el repertorio"
 
-#: locale/programs/ld-collate.c:414
+#: locale/programs/ld-collate.c:411
 #, c-format
 msgid "`%.*s' already defined as collating symbol"
 msgstr "`%.*s' ya est� definido como s�mbolo de ordenaci�n"
 
-#: locale/programs/ld-collate.c:421
+#: locale/programs/ld-collate.c:418
 #, c-format
 msgid "`%.*s' already defined as collating element"
 msgstr "`%.*s' ya est� definido como elemento de ordenaci�n"
 
-#: locale/programs/ld-collate.c:452 locale/programs/ld-collate.c:478
+#: locale/programs/ld-collate.c:449 locale/programs/ld-collate.c:475
 #, c-format
 msgid "%s: `forward' and `backward' are mutually excluding each other"
 msgstr "%s: `forward' y `backward' se excluyen mutuamente"
 
-#: locale/programs/ld-collate.c:462 locale/programs/ld-collate.c:488
-#: locale/programs/ld-collate.c:504
+#: locale/programs/ld-collate.c:459 locale/programs/ld-collate.c:485
+#: locale/programs/ld-collate.c:501
 #, c-format
 msgid "%s: `%s' mentioned more than once in definition of weight %d"
 msgstr "%s: `%s' mencionado m�s de una vez en la definici�n del peso %d"
 
-#: locale/programs/ld-collate.c:560
+#: locale/programs/ld-collate.c:557
 #, c-format
 msgid "%s: too many rules; first entry only had %d"
 msgstr "%s: demasiadas reglas; la primera entrada solamente ten�a %d"
 
-#: locale/programs/ld-collate.c:596
+#: locale/programs/ld-collate.c:593
 #, c-format
 msgid "%s: not enough sorting rules"
 msgstr "%s: no hay suficientes reglas de ordenaci�n"
 
-#: locale/programs/ld-collate.c:761
+#: locale/programs/ld-collate.c:759
 #, c-format
 msgid "%s: empty weight string not allowed"
 msgstr "%s: no se permite una cadena de peso vac�a"
 
-#: locale/programs/ld-collate.c:856
+#: locale/programs/ld-collate.c:854
 #, c-format
 msgid "%s: weights must use the same ellipsis symbol as the name"
 msgstr "%s: los pesos deben usar el mismo s�mbolo de elipsis que el nombre"
 
-#: locale/programs/ld-collate.c:912
+#: locale/programs/ld-collate.c:910
 #, c-format
 msgid "%s: too many values"
 msgstr "%s: demasiados valores"
 
-#: locale/programs/ld-collate.c:1031 locale/programs/ld-collate.c:1206
+#: locale/programs/ld-collate.c:1023 locale/programs/ld-collate.c:1194
 #, c-format
 msgid "order for `%.*s' already defined at %s:%Zu"
 msgstr "el orden para `%.*s' ya est� definido en %s:%Zu"
 
-#: locale/programs/ld-collate.c:1081
+#: locale/programs/ld-collate.c:1073
 #, c-format
 msgid "%s: the start and the end symbol of a range must stand for characters"
 msgstr "%s: los s�mbolos de comienzo y de final de un rango deben representar caracteres"
 
-#: locale/programs/ld-collate.c:1108
+#: locale/programs/ld-collate.c:1100
 #, c-format
 msgid "%s: byte sequences of first and last character must have the same length"
 msgstr ""
 "%s: los �rdenes de byte de los caracteres primero y �ltimo deben tener\n"
 "la misma longitud"
 
-#: locale/programs/ld-collate.c:1150
+#: locale/programs/ld-collate.c:1142
 #, c-format
 msgid "%s: byte sequence of first character of sequence is not lower than that of the last character"
 msgstr ""
 "%s: el orden de byte del primer car�cter de la sucesi�n no es menor que\n"
 "el del �ltimo car�cter"
 
-#: locale/programs/ld-collate.c:1275
+#: locale/programs/ld-collate.c:1263
 #, c-format
 msgid "%s: symbolic range ellipsis must not directly follow `order_start'"
 msgstr "%s: el rango simb�lico de la elipsis no debe seguir directamente a `order_start'"
 
-#: locale/programs/ld-collate.c:1279
+#: locale/programs/ld-collate.c:1267
 #, c-format
 msgid "%s: symbolic range ellipsis must not be directly followed by `order_end'"
 msgstr "%s: el rango simb�lico de la elipsis no debe estar directamente seguido por `order_end'"
 
-#: locale/programs/ld-collate.c:1299 locale/programs/ld-ctype.c:1476
+#: locale/programs/ld-collate.c:1287 locale/programs/ld-ctype.c:1467
 #, c-format
 msgid "`%s' and `%.*s' are no valid names for symbolic range"
 msgstr "`%s' y `%.*s' no son nombres v�lidos para el rango simb�lico"
 
-#: locale/programs/ld-collate.c:1348 locale/programs/ld-collate.c:3737
+#: locale/programs/ld-collate.c:1333 locale/programs/ld-collate.c:3712
 #, c-format
 msgid "%s: order for `%.*s' already defined at %s:%Zu"
 msgstr "%s: el orden para `%.*s' ya est� definido en %s:%Zu"
 
-#: locale/programs/ld-collate.c:1357
+#: locale/programs/ld-collate.c:1342
 #, c-format
 msgid "%s: `%s' must be a character"
 msgstr "%s: `%s' debe ser un car�cter"
 
-#: locale/programs/ld-collate.c:1550
+#: locale/programs/ld-collate.c:1535
 #, c-format
 msgid "%s: `position' must be used for a specific level in all sections or none"
 msgstr "%s: `position' debe utilizarse para un nivel espec�fico en todas las secciones o en ninguna"
 
-#: locale/programs/ld-collate.c:1575
+#: locale/programs/ld-collate.c:1560
 #, c-format
 msgid "symbol `%s' not defined"
 msgstr "el s�mbolo `%s' no est� definido"
 
-#: locale/programs/ld-collate.c:1651 locale/programs/ld-collate.c:1757
+#: locale/programs/ld-collate.c:1636 locale/programs/ld-collate.c:1742
 #, c-format
 msgid "symbol `%s' has the same encoding as"
 msgstr "el s�mbolo `%s' tiene la misma codificaci�n que"
 
-#: locale/programs/ld-collate.c:1655 locale/programs/ld-collate.c:1761
+#: locale/programs/ld-collate.c:1640 locale/programs/ld-collate.c:1746
 #, c-format
 msgid "symbol `%s'"
 msgstr "el s�mbolo `%s'"
 
-#: locale/programs/ld-collate.c:1803
+#: locale/programs/ld-collate.c:1788
 msgid "no definition of `UNDEFINED'"
 msgstr "no hay definici�n para `UNDEFINED'"
 
-#: locale/programs/ld-collate.c:1832
+#: locale/programs/ld-collate.c:1817
 msgid "too many errors; giving up"
 msgstr "demasiados errores; abandono"
 
-#: locale/programs/ld-collate.c:2735
+#: locale/programs/ld-collate.c:2720
 #, c-format
 msgid "%s: duplicate definition of `%s'"
 msgstr "%s: definici�n duplicada de `%s'"
 
-#: locale/programs/ld-collate.c:2771
+#: locale/programs/ld-collate.c:2756
 #, c-format
 msgid "%s: duplicate declaration of section `%s'"
 msgstr "%s: definici�n duplicada de la secci�n `%s'"
 
-#: locale/programs/ld-collate.c:2910
+#: locale/programs/ld-collate.c:2895
 #, c-format
 msgid "%s: unknown character in collating symbol name"
 msgstr "%s: car�cter desconocido en el nombre de un s�mbolo de ordenaci�n"
 
-#: locale/programs/ld-collate.c:3042
+#: locale/programs/ld-collate.c:3027
 #, c-format
 msgid "%s: unknown character in equivalent definition name"
 msgstr "%s: car�cter desconocido en el nombre de definici�n equivalente"
 
-#: locale/programs/ld-collate.c:3055
+#: locale/programs/ld-collate.c:3040
 #, c-format
 msgid "%s: unknown character in equivalent definition value"
 msgstr "%s: car�cter desconocido en el valor de definici�n equivalente"
 
-#: locale/programs/ld-collate.c:3065
+#: locale/programs/ld-collate.c:3050
 #, c-format
 msgid "%s: unknown symbol `%s' in equivalent definition"
 msgstr "%s: s�mbolo desconocido `%s' en la definici�n equivalente"
 
-#: locale/programs/ld-collate.c:3074
+#: locale/programs/ld-collate.c:3059
 msgid "error while adding equivalent collating symbol"
 msgstr "error al a�adir s�mbolo de ordenaci�n equivalente"
 
-#: locale/programs/ld-collate.c:3104
+#: locale/programs/ld-collate.c:3089
 #, c-format
 msgid "duplicate definition of script `%s'"
 msgstr "definici�n duplicada de `script' `%s'"
 
-#: locale/programs/ld-collate.c:3152
+#: locale/programs/ld-collate.c:3137
 #, c-format
 msgid "%s: unknown section name `%s'"
 msgstr "%s: nombre de secci�n desconocido `%s'"
 
-#: locale/programs/ld-collate.c:3180
+#: locale/programs/ld-collate.c:3165
 #, c-format
 msgid "%s: multiple order definitions for section `%s'"
 msgstr "%s: hay varias definiciones de orden para la secci�n `%s'"
 
-#: locale/programs/ld-collate.c:3205
+#: locale/programs/ld-collate.c:3190
 #, c-format
 msgid "%s: invalid number of sorting rules"
 msgstr "%s: n�mero inv�lido de reglas de ordenaci�n"
 
-#: locale/programs/ld-collate.c:3232
+#: locale/programs/ld-collate.c:3217
 #, c-format
 msgid "%s: multiple order definitions for unnamed section"
 msgstr "%s: varias definiciones de orden para la secci�n sin nombre"
 
-#: locale/programs/ld-collate.c:3286 locale/programs/ld-collate.c:3414
-#: locale/programs/ld-collate.c:3778
+#: locale/programs/ld-collate.c:3271 locale/programs/ld-collate.c:3394
+#: locale/programs/ld-collate.c:3753
 #, c-format
 msgid "%s: missing `order_end' keyword"
 msgstr "%s: falta la palabra clave `order_end'"
 
-#: locale/programs/ld-collate.c:3347
+#: locale/programs/ld-collate.c:3329
 #, c-format
 msgid "%s: order for collating symbol %.*s not yet defined"
 msgstr "%s: el orden para el s�mbolo de ordenaci�n %.*s todav�a no est� definido"
 
 # FIXME: �Por qu� este y el siguiente no son iguales?
-#: locale/programs/ld-collate.c:3365
+#: locale/programs/ld-collate.c:3345
 #, c-format
 msgid "%s: order for collating element %.*s not yet defined"
 msgstr "%s: el orden para el elemento de ordenaci�n %.*s todav�a no est� definido"
 
-#: locale/programs/ld-collate.c:3376
+#: locale/programs/ld-collate.c:3356
 #, c-format
 msgid "%s: cannot reorder after %.*s: symbol not known"
 msgstr "%s: no se puede reordenar despu�s de %.*s: s�mbolo desconocido"
 
-#: locale/programs/ld-collate.c:3428 locale/programs/ld-collate.c:3790
+#: locale/programs/ld-collate.c:3408 locale/programs/ld-collate.c:3765
 #, c-format
 msgid "%s: missing `reorder-end' keyword"
 msgstr "%s: falta la palabra clave `reorder-end'"
 
-#: locale/programs/ld-collate.c:3462 locale/programs/ld-collate.c:3662
+#: locale/programs/ld-collate.c:3442 locale/programs/ld-collate.c:3637
 #, c-format
 msgid "%s: section `%.*s' not known"
 msgstr "%s: la secci�n `%.*s' es desconocida"
 
-#: locale/programs/ld-collate.c:3527
+#: locale/programs/ld-collate.c:3507
 #, c-format
 msgid "%s: bad symbol <%.*s>"
 msgstr "%s: s�mbolo err�neo <%.*s>"
 
-#: locale/programs/ld-collate.c:3725
+#: locale/programs/ld-collate.c:3700
 #, c-format
 msgid "%s: cannot have `%s' as end of ellipsis range"
 msgstr "%s: no puede tener `%s' como final de un rango de elipsis"
 
-#: locale/programs/ld-collate.c:3774
+#: locale/programs/ld-collate.c:3749
 #, c-format
 msgid "%s: empty category description not allowed"
 msgstr "%s: no se permite una descripci�n de categor�a vac�a"
 
-#: locale/programs/ld-collate.c:3793
+#: locale/programs/ld-collate.c:3768
 #, c-format
 msgid "%s: missing `reorder-sections-end' keyword"
 msgstr "%s: falta la palabra clave `reorder-sections-end'"
 
-#: locale/programs/ld-ctype.c:440
+#: locale/programs/ld-ctype.c:435
 msgid "No character set name specified in charmap"
 msgstr ""
 "No se ha especificado ning�n nombre de conjunto de caracteres en la tabla\n"
 "de caracteres"
 
-#: locale/programs/ld-ctype.c:469
+#: locale/programs/ld-ctype.c:464
 #, c-format
 msgid "character L'\\u%0*x' in class `%s' must be in class `%s'"
 msgstr "el car�cter L'\\u%0*x' en la clase `%s' debe estar en la clase `%s'"
 
-#: locale/programs/ld-ctype.c:484
+#: locale/programs/ld-ctype.c:479
 #, c-format
 msgid "character L'\\u%0*x' in class `%s' must not be in class `%s'"
 msgstr "el car�cter L'\\u%0*x' en la clase `%s' no debe estar en la clase `%s"
 
-#: locale/programs/ld-ctype.c:498 locale/programs/ld-ctype.c:556
+#: locale/programs/ld-ctype.c:493 locale/programs/ld-ctype.c:551
 #, c-format
 msgid "internal error in %s, line %u"
 msgstr "error interno en %s, l�nea %u"
 
-#: locale/programs/ld-ctype.c:527
+#: locale/programs/ld-ctype.c:522
 #, c-format
 msgid "character '%s' in class `%s' must be in class `%s'"
 msgstr "el car�cter '%s' en la clase `%s' debe estar en la clase `%s'"
 
-#: locale/programs/ld-ctype.c:543
+#: locale/programs/ld-ctype.c:538
 #, c-format
 msgid "character '%s' in class `%s' must not be in class `%s'"
 msgstr "el car�cter '%s' en la clase `%s' no debe estar en la clase `%s"
 
-#: locale/programs/ld-ctype.c:573 locale/programs/ld-ctype.c:611
+#: locale/programs/ld-ctype.c:568 locale/programs/ld-ctype.c:606
 #, c-format
 msgid "<SP> character not in class `%s'"
 msgstr "El car�cter <SP> no est� en la clase `%s'"
 
-#: locale/programs/ld-ctype.c:585 locale/programs/ld-ctype.c:622
+#: locale/programs/ld-ctype.c:580 locale/programs/ld-ctype.c:617
 #, c-format
 msgid "<SP> character must not be in class `%s'"
 msgstr "El car�cter <SP> no debe estar en la clase `%s'"
 
-#: locale/programs/ld-ctype.c:600
+#: locale/programs/ld-ctype.c:595
 msgid "character <SP> not defined in character map"
 msgstr "el car�cter <SP> no est� definido en la tabla de caracteres"
 
-#: locale/programs/ld-ctype.c:714
+#: locale/programs/ld-ctype.c:709
 msgid "`digit' category has not entries in groups of ten"
 msgstr "la categor�a `digit' no tiene entradas en grupos de diez"
 
 # FIXME: El original no se entiende. �Es gramaticalmente correcto? sv
-#: locale/programs/ld-ctype.c:763
+#: locale/programs/ld-ctype.c:758
 msgid "no input digits defined and none of the standard names in the charmap"
 msgstr ""
 "no hay ning�n d�gito de entrada definido y ninguno de los nombres est�ndar\n"
 "en el conjunto de caracteres"
 
-#: locale/programs/ld-ctype.c:828
+#: locale/programs/ld-ctype.c:823
 msgid "not all characters used in `outdigit' are available in the charmap"
 msgstr ""
 "no todos los caracteres usados en `outdigit' est�n disponibles en la tabla\n"
 "de caracteres"
 
-#: locale/programs/ld-ctype.c:845
+#: locale/programs/ld-ctype.c:840
 msgid "not all characters used in `outdigit' are available in the repertoire"
 msgstr "no todos los caracteres usados en `outdigit' est�n disponibles en el repertorio"
 
-#: locale/programs/ld-ctype.c:1244
+#: locale/programs/ld-ctype.c:1235
 #, c-format
 msgid "character class `%s' already defined"
 msgstr "la clase de car�cter `%s' ya fue definida"
 
-#: locale/programs/ld-ctype.c:1250
+#: locale/programs/ld-ctype.c:1241
 #, c-format
 msgid "implementation limit: no more than %Zd character classes allowed"
 msgstr "l�mite de la implementaci�n: no se permiten m�s de %Zd clases de caracteres"
 
-#: locale/programs/ld-ctype.c:1276
+#: locale/programs/ld-ctype.c:1267
 #, c-format
 msgid "character map `%s' already defined"
 msgstr "la tabla de caracteres `%s' ya est� definida"
 
-#: locale/programs/ld-ctype.c:1282
+#: locale/programs/ld-ctype.c:1273
 #, c-format
 msgid "implementation limit: no more than %d character maps allowed"
 msgstr "l�mite de la implementaci�n: no se permiten m�s de %d tablas de caracteres"
 
-#: locale/programs/ld-ctype.c:1547 locale/programs/ld-ctype.c:1672
-#: locale/programs/ld-ctype.c:1778 locale/programs/ld-ctype.c:2464
-#: locale/programs/ld-ctype.c:3460
+#: locale/programs/ld-ctype.c:1538 locale/programs/ld-ctype.c:1663
+#: locale/programs/ld-ctype.c:1769 locale/programs/ld-ctype.c:2455
+#: locale/programs/ld-ctype.c:3443
 #, c-format
 msgid "%s: field `%s' does not contain exactly ten entries"
 msgstr "%s: el campo `%s' no contiene exactamente diez entradas"
 
-#: locale/programs/ld-ctype.c:1575 locale/programs/ld-ctype.c:2146
+#: locale/programs/ld-ctype.c:1566 locale/programs/ld-ctype.c:2137
 #, c-format
 msgid "to-value <U%0*X> of range is smaller than from-value <U%0*X>"
 msgstr "el valor `to' del rango <U%0*X> es m�s peque�o que el valor `from' <U%0*X>"
 
-#: locale/programs/ld-ctype.c:1702
+#: locale/programs/ld-ctype.c:1693
 msgid "start and end character sequence of range must have the same length"
 msgstr "los caracteres de comienzo y final del rango debe tener la misma longitud"
 
-#: locale/programs/ld-ctype.c:1709
+#: locale/programs/ld-ctype.c:1700
 msgid "to-value character sequence is smaller than from-value sequence"
 msgstr "el valor `to' de la sucesi�n de caracteres es m�s peque�o que el valor `from'"
 
-#: locale/programs/ld-ctype.c:2066 locale/programs/ld-ctype.c:2117
+#: locale/programs/ld-ctype.c:2057 locale/programs/ld-ctype.c:2108
 msgid "premature end of `translit_ignore' definition"
 msgstr "Fin no esperado de la definici�n `translit_ignore'"
 
-#: locale/programs/ld-ctype.c:2072 locale/programs/ld-ctype.c:2123
-#: locale/programs/ld-ctype.c:2165
+#: locale/programs/ld-ctype.c:2063 locale/programs/ld-ctype.c:2114
+#: locale/programs/ld-ctype.c:2156
 msgid "syntax error"
 msgstr "error de sintaxis"
 
-#: locale/programs/ld-ctype.c:2296
+#: locale/programs/ld-ctype.c:2287
 #, c-format
 msgid "%s: syntax error in definition of new character class"
 msgstr "%s: error de sintaxis en la definici�n de una nueva clase de caracteres"
 
-#: locale/programs/ld-ctype.c:2311
+#: locale/programs/ld-ctype.c:2302
 #, c-format
 msgid "%s: syntax error in definition of new character map"
 msgstr "%s: error de sintaxis en la definici�n de un nueva tabla de caracteres"
 
-#: locale/programs/ld-ctype.c:2486
+#: locale/programs/ld-ctype.c:2477
 msgid "ellipsis range must be marked by two operands of same type"
 msgstr "el rango de la elipsis debe estar marcada mediante dos operandos del mismo tipo"
 
-#: locale/programs/ld-ctype.c:2495
+#: locale/programs/ld-ctype.c:2486
 msgid "with symbolic name range values the absolute ellipsis `...' must not be used"
 msgstr "con valores de rango nombre simb�lico la elipsis absoluta `...' no debe usarse"
 
-#: locale/programs/ld-ctype.c:2510
+#: locale/programs/ld-ctype.c:2501
 msgid "with UCS range values one must use the hexadecimal symbolic ellipsis `..'"
 msgstr "con valores de rango UCS se debe utilizar la elipsis simb�lica hexadecimal `..'"
 
-#: locale/programs/ld-ctype.c:2524
+#: locale/programs/ld-ctype.c:2515
 msgid "with character code range values one must use the absolute ellipsis `...'"
 msgstr "con valores de rango c�digo de caracteres se debe utilizar la elipsis absoluta `...'"
 
-#: locale/programs/ld-ctype.c:2675
+#: locale/programs/ld-ctype.c:2666
 #, c-format
 msgid "duplicated definition for mapping `%s'"
 msgstr "definici�n duplicada para la asignaci�n `%s'"
 
-#: locale/programs/ld-ctype.c:2761 locale/programs/ld-ctype.c:2905
+#: locale/programs/ld-ctype.c:2744 locale/programs/ld-ctype.c:2888
 #, c-format
 msgid "%s: `translit_start' section does not end with `translit_end'"
 msgstr "%s: la secci�n `translit_start' no termina con `translit_end'"
 
-#: locale/programs/ld-ctype.c:2856
+#: locale/programs/ld-ctype.c:2839
 #, c-format
 msgid "%s: duplicate `default_missing' definition"
 msgstr "%s: definici�n `default_missing' duplicada"
 
-#: locale/programs/ld-ctype.c:2861
+#: locale/programs/ld-ctype.c:2844
 msgid "previous definition was here"
-msgstr "aqu� estaba la definici�n anterior"
+msgstr "la definici�n anterior estaba aqu�"
 
-#: locale/programs/ld-ctype.c:2883
+#: locale/programs/ld-ctype.c:2866
 #, c-format
 msgid "%s: no representable `default_missing' definition found"
 msgstr "%s: no se ha encontrado ninguna definici�n de `default_missing' representable"
 
-#: locale/programs/ld-ctype.c:3036
+#: locale/programs/ld-ctype.c:3019
 #, c-format
 msgid "%s: character `%s' not defined in charmap while needed as default value"
 msgstr ""
 "%s: el car�cter `%s' no est� definido en el conjundo de caracteres\n"
 "cuando se necesit� como valor predeterminado"
 
-#: locale/programs/ld-ctype.c:3041 locale/programs/ld-ctype.c:3125
-#: locale/programs/ld-ctype.c:3145 locale/programs/ld-ctype.c:3166
-#: locale/programs/ld-ctype.c:3187 locale/programs/ld-ctype.c:3208
-#: locale/programs/ld-ctype.c:3229 locale/programs/ld-ctype.c:3269
-#: locale/programs/ld-ctype.c:3290 locale/programs/ld-ctype.c:3357
+#: locale/programs/ld-ctype.c:3024 locale/programs/ld-ctype.c:3108
+#: locale/programs/ld-ctype.c:3128 locale/programs/ld-ctype.c:3149
+#: locale/programs/ld-ctype.c:3170 locale/programs/ld-ctype.c:3191
+#: locale/programs/ld-ctype.c:3212 locale/programs/ld-ctype.c:3252
+#: locale/programs/ld-ctype.c:3273 locale/programs/ld-ctype.c:3340
 #, c-format
 msgid "%s: character `%s' in charmap not representable with one byte"
 msgstr "%s: el car�cter `%s' en la tabla de caracteres no es representable con un byte"
 
-#: locale/programs/ld-ctype.c:3120 locale/programs/ld-ctype.c:3140
-#: locale/programs/ld-ctype.c:3182 locale/programs/ld-ctype.c:3203
-#: locale/programs/ld-ctype.c:3224 locale/programs/ld-ctype.c:3264
-#: locale/programs/ld-ctype.c:3285 locale/programs/ld-ctype.c:3352
-#: locale/programs/ld-ctype.c:3394 locale/programs/ld-ctype.c:3419
+#: locale/programs/ld-ctype.c:3103 locale/programs/ld-ctype.c:3123
+#: locale/programs/ld-ctype.c:3165 locale/programs/ld-ctype.c:3186
+#: locale/programs/ld-ctype.c:3207 locale/programs/ld-ctype.c:3247
+#: locale/programs/ld-ctype.c:3268 locale/programs/ld-ctype.c:3335
+#: locale/programs/ld-ctype.c:3377 locale/programs/ld-ctype.c:3402
 #, c-format
 msgid "%s: character `%s' not defined while needed as default value"
 msgstr "%s: el car�cter `%s' no est� definido cuando se necesit� como valor predeterminado"
 
-#: locale/programs/ld-ctype.c:3161
+#: locale/programs/ld-ctype.c:3144
 #, c-format
 msgid "character `%s' not defined while needed as default value"
 msgstr "el car�cter `%s' no est� definido cuando se necesit� como valor por defecto"
 
-#: locale/programs/ld-ctype.c:3401 locale/programs/ld-ctype.c:3426
+#: locale/programs/ld-ctype.c:3384 locale/programs/ld-ctype.c:3409
 #, c-format
 msgid "%s: character `%s' needed as default value not representable with one byte"
 msgstr ""
@@ -1153,29 +1152,29 @@ msgstr ""
 "con un byte"
 
 # FIXME: Lo mismo de antes.
-#: locale/programs/ld-ctype.c:3481
+#: locale/programs/ld-ctype.c:3464
 msgid "no output digits defined and none of the standard names in the charmap"
 msgstr ""
 "no hay ning�n d�gito de salida definido y ninguno de los nombres est�ndar\n"
 "en el conjunto de caracteres"
 
 # Pregunta: �De verdad existe transliteraci�n en espa�ol? sv
-#: locale/programs/ld-ctype.c:3772
+#: locale/programs/ld-ctype.c:3755
 #, c-format
 msgid "%s: transliteration data from locale `%s' not available"
 msgstr "%s: los datos de transliteraci�n del local `%s' no est�n disponibles"
 
-#: locale/programs/ld-ctype.c:3868
+#: locale/programs/ld-ctype.c:3851
 #, c-format
 msgid "%s: table for class \"%s\": %lu bytes\n"
 msgstr "%s: tabla para la clase \"%s\": %lu bytes\n"
 
-#: locale/programs/ld-ctype.c:3937
+#: locale/programs/ld-ctype.c:3920
 #, c-format
 msgid "%s: table for map \"%s\": %lu bytes\n"
 msgstr "%s: tabla para la asignaci�n \"%s\": %lu bytes\n"
 
-#: locale/programs/ld-ctype.c:4070
+#: locale/programs/ld-ctype.c:4053
 #, c-format
 msgid "%s: table for width: %lu bytes\n"
 msgstr "%s: tabla para el ancho: %lu bytes\n"
@@ -1219,39 +1218,39 @@ msgstr "%s: la expresi�n regular para el campo `%s' no es correcta: %s"
 msgid "%s: value of field `int_curr_symbol' has wrong length"
 msgstr "%s: el valor del campo `int_curr_symbol' tiene una longitud err�nea"
 
-#: locale/programs/ld-monetary.c:237
+#: locale/programs/ld-monetary.c:232
 #, c-format
 msgid "%s: value of field `int_curr_symbol' does not correspond to a valid name in ISO 4217"
 msgstr ""
 "%s: el valor del campo `int_curr_symbol' no se corresponde con un nombre\n"
 "v�lido en ISO 4217"
 
-#: locale/programs/ld-monetary.c:256 locale/programs/ld-numeric.c:119
+#: locale/programs/ld-monetary.c:250 locale/programs/ld-numeric.c:119
 #, c-format
 msgid "%s: value for field `%s' must not be the empty string"
 msgstr "%s: el valor para el campo `%s' no debe estar vac�o"
 
-#: locale/programs/ld-monetary.c:284 locale/programs/ld-monetary.c:314
+#: locale/programs/ld-monetary.c:278 locale/programs/ld-monetary.c:308
 #, c-format
 msgid "%s: value for field `%s' must be in range %d...%d"
 msgstr "%s: el valor para el campo `%s' debe estar en el rango %d...%d"
 
-#: locale/programs/ld-monetary.c:746 locale/programs/ld-numeric.c:275
+#: locale/programs/ld-monetary.c:740 locale/programs/ld-numeric.c:275
 #, c-format
 msgid "%s: value for field `%s' must be a single character"
 msgstr "%s: el valor para el campo `%s' debe ser un �nico car�cter"
 
-#: locale/programs/ld-monetary.c:843 locale/programs/ld-numeric.c:319
+#: locale/programs/ld-monetary.c:837 locale/programs/ld-numeric.c:319
 #, c-format
 msgid "%s: `-1' must be last entry in `%s' field"
 msgstr "%s: `-1' debe ser la �ltima entrada del campo `%s'"
 
-#: locale/programs/ld-monetary.c:865 locale/programs/ld-numeric.c:340
+#: locale/programs/ld-monetary.c:859 locale/programs/ld-numeric.c:340
 #, c-format
 msgid "%s: values for field `%s' must be smaller than 127"
 msgstr "%s: los valores para el campo `%s' deben ser menores que 127"
 
-#: locale/programs/ld-monetary.c:908
+#: locale/programs/ld-monetary.c:902
 msgid "conversion rate value cannot be zero"
 msgstr "el valor de la tasa de conversi�n no puede ser cero"
 
@@ -1343,62 +1342,62 @@ msgstr "%s: los valores para el campo `%s' no deben ser mayores que %d"
 msgid "%s: values for field `%s' must not be larger than %d"
 msgstr "%s: los valores para el campo `%s' no deben ser mayores que %d"
 
-#: locale/programs/ld-time.c:986
+#: locale/programs/ld-time.c:984
 #, c-format
 msgid "%s: too few values for field `%s'"
 msgstr "%s: insuficiente n�mero de valores para el campo `%s'"
 
-#: locale/programs/ld-time.c:1031
+#: locale/programs/ld-time.c:1029
 msgid "extra trailing semicolon"
 msgstr "sobra un punto y coma al final"
 
-#: locale/programs/ld-time.c:1034
+#: locale/programs/ld-time.c:1032
 #, c-format
 msgid "%s: too many values for field `%s'"
 msgstr "%s: demasiados valores para el campo `%s'"
 
-#: locale/programs/linereader.c:130
-msgid "trailing garbage at end of line"
-msgstr "hay inconsistencias al final de la l�nea"
-
-#: locale/programs/linereader.c:304
+#: locale/programs/linereader.c:275
 msgid "garbage at end of number"
 msgstr "inconsistencias al final del n�mero"
 
-#: locale/programs/linereader.c:416
+#: locale/programs/linereader.c:387
 msgid "garbage at end of character code specification"
 msgstr "inconsistencia al final de la especificaci�n del c�digo de caracteres"
 
-#: locale/programs/linereader.c:502
+#: locale/programs/linereader.c:473
 msgid "unterminated symbolic name"
 msgstr "nombre simb�lico sin terminar"
 
-#: locale/programs/linereader.c:566 catgets/gencat.c:1195
+#: locale/programs/linereader.c:537 catgets/gencat.c:1195
 msgid "invalid escape sequence"
 msgstr "secuencia de escape inv�lida"
 
-#: locale/programs/linereader.c:629
+#: locale/programs/linereader.c:600
 msgid "illegal escape sequence at end of string"
 msgstr "secuencia de escape ilegal al final de la cadena de caracteres"
 
-#: locale/programs/linereader.c:633 locale/programs/linereader.c:861
+#: locale/programs/linereader.c:604 locale/programs/linereader.c:832
 msgid "unterminated string"
 msgstr "cadena de caracteres sin terminar"
 
-#: locale/programs/linereader.c:675
+#: locale/programs/linereader.c:646
 msgid "non-symbolic character value should not be used"
 msgstr "los valores de caracteres no simb�licos no deben utilizarse"
 
-#: locale/programs/linereader.c:822
+#: locale/programs/linereader.c:793
 #, c-format
 msgid "symbol `%.*s' not in charmap"
 msgstr "el s�mbolo `%.*s' no est� en la tabla de caracteres"
 
-#: locale/programs/linereader.c:843
+#: locale/programs/linereader.c:814
 #, c-format
 msgid "symbol `%.*s' not in repertoire map"
 msgstr "el s�mbolo `%.*s' no est� en el repertorio"
 
+#: locale/programs/linereader.h:162
+msgid "trailing garbage at end of line"
+msgstr "hay inconsistencias al final de la l�nea"
+
 #: locale/programs/locale.c:75
 msgid "System information:"
 msgstr "Informaci�n del sistema:"
@@ -1429,7 +1428,7 @@ msgstr "Muestra m�s informaci�n"
 
 #: locale/programs/locale.c:88
 msgid "Get locale-specific information."
-msgstr "Obtiene la informaci�n espec�fica del local."
+msgstr "Obtiene la informaci�n espec�fica del locale."
 
 #: locale/programs/locale.c:91
 msgid ""
@@ -1439,23 +1438,7 @@ msgstr ""
 "NOMBRE\n"
 "[-a|-m]"
 
-#: locale/programs/locale.c:195
-msgid "Cannot set LC_CTYPE to default locale"
-msgstr "No se puede establecer LC_CTYPE al local predeterminado"
-
-#: locale/programs/locale.c:197
-msgid "Cannot set LC_MESSAGES to default locale"
-msgstr "No se puede establecer LC_MESSAGES al local predeterminado"
-
-#: locale/programs/locale.c:210
-msgid "Cannot set LC_COLLATE to default locale"
-msgstr "No se puede establecer LC_COLLATE al local predeterminado"
-
-#: locale/programs/locale.c:226
-msgid "Cannot set LC_ALL to default locale"
-msgstr "No se puede establecer LC_ALL al local predeterminado"
-
-#: locale/programs/locale.c:517
+#: locale/programs/locale.c:512
 msgid "while preparing output"
 msgstr "al preparar la salida"
 
@@ -1677,7 +1660,7 @@ msgstr "fall� la llamada a `stat' sobre \"%s\": %s: descartado"
 #: locale/programs/locarchive.c:1138
 #, c-format
 msgid "\"%s\" is no directory; ignored"
-msgstr "\"%s\" no es un directorio; descartado"
+msgstr "\"%s\" no es un directorio; descarrtado"
 
 #: locale/programs/locarchive.c:1145
 #, c-format
@@ -1723,17 +1706,17 @@ msgstr "error al escribir los datos para la categor�a `%s'"
 msgid "cannot create output file `%s' for category `%s'"
 msgstr "no se puede crear el fichero de salida `%s' para la categor�a `%s'"
 
-#: locale/programs/locfile.c:781
+#: locale/programs/locfile.h:59
 msgid "expect string argument for `copy'"
 msgstr "se espera un argumento de cadena de caracteres para `copy'"
 
-#: locale/programs/locfile.c:785
+#: locale/programs/locfile.h:63
 msgid "locale name should consist only of portable characters"
 msgstr "el nombre del local debe estar formado por caracteres portables �nicamente"
 
-#: locale/programs/locfile.c:804
+#: locale/programs/locfile.h:82
 msgid "no other keyword shall be specified when `copy' is used"
-msgstr "cuando se utiliza `copy' no debe especificarse ninguna otra palabra clave"
+msgstr "ninguna otra palabra clave debe ser especificada al usar `copy'"
 
 #: locale/programs/repertoire.c:230 locale/programs/repertoire.c:271
 #: locale/programs/repertoire.c:296
@@ -1767,7 +1750,7 @@ msgid "upper limit in range is not smaller then lower limit"
 msgstr "el l�mite superior del rango no es menor que el l�mite inferior"
 
 #: locale/programs/xmalloc.c:70 malloc/obstack.c:505 malloc/obstack.c:508
-#: posix/getconf.c:1007
+#: posix/getconf.c:1002
 msgid "memory exhausted"
 msgstr "memoria agotada"
 
@@ -1810,7 +1793,7 @@ msgstr "Primera cadena para hacer pruebas."
 msgid "Another string for testing."
 msgstr "Otra cadena para hacer pruebas."
 
-#: catgets/gencat.c:111 catgets/gencat.c:115 nscd/nscd.c:88
+#: catgets/gencat.c:111 catgets/gencat.c:115 nscd/nscd.c:84
 msgid "NAME"
 msgstr "NOMBRE"
 
@@ -1919,7 +1902,6 @@ msgid "cannot determine escape character"
 msgstr "no se puede determinar el car�cter de escape"
 
 #: stdlib/../sysdeps/unix/sysv/linux/ia64/makecontext.c:63
-#, c-format
 msgid "makecontext: does not know how to handle more than 8 arguments\n"
 msgstr "makecontext: no sabe c�mo manejar m�s de 8 argumentos\n"
 
@@ -1927,8 +1909,8 @@ msgstr "makecontext: no sabe c�mo manejar m�s de 8 argumentos\n"
 # me gustar�a que hubiera otra palabra mejor. SV
 # Siempre me han gustado F&C ;-)
 # A m� tambi�n :-) sv
-#: stdio-common/../sysdeps/gnu/errlist.c:12 posix/regcomp.c:147
-#: nis/nis_error.c:29 nis/ypclnt.c:778 nis/ypclnt.c:852
+#: stdio-common/../sysdeps/gnu/errlist.c:12 posix/regcomp.c:133
+#: nis/nis_error.c:29 nis/ypclnt.c:787 nis/ypclnt.c:861
 msgid "Success"
 msgstr "Conseguido"
 
@@ -1957,7 +1939,7 @@ msgstr "No existe el fichero o el directorio"
 #: stdio-common/../sysdeps/gnu/errlist.c:37
 #: stdio-common/../sysdeps/unix/sysv/sysv4/solaris2/sparc/errlist.c:33
 msgid "No such process"
-msgstr "No existe el proceso"
+msgstr "No existe tal proceso"
 
 #. TRANS Interrupted function call; an asynchronous signal occurred and prevented
 #. TRANS completion of the call.  When this happens, you should try the call
@@ -1984,7 +1966,7 @@ msgstr "Error de entrada/salida"
 #: stdio-common/../sysdeps/gnu/errlist.c:74
 #: stdio-common/../sysdeps/unix/sysv/sysv4/solaris2/sparc/errlist.c:36
 msgid "No such device or address"
-msgstr "No existe el dispositivo o la direcci�n"
+msgstr "No existe tal dispositivo o direcci�n"
 
 #. TRANS Argument list too long; used when the arguments passed to a new program
 #. TRANS being executed with one of the @code{exec} functions (@pxref{Executing a
@@ -2044,7 +2026,7 @@ msgstr "No se pudo asignar memoria"
 #. TRANS Permission denied; the file permissions do not allow the attempted operation.
 #: stdio-common/../sysdeps/gnu/errlist.c:149
 #: stdio-common/../sysdeps/unix/sysv/sysv4/solaris2/sparc/errlist.c:43
-#: nis/nis_error.c:39 nis/ypclnt.c:808
+#: nis/nis_error.c:39 nis/ypclnt.c:817
 msgid "Permission denied"
 msgstr "Permiso denegado"
 
@@ -2075,7 +2057,7 @@ msgstr "Dispositivo o recurso ocupado"
 #: stdio-common/../sysdeps/gnu/errlist.c:191
 #: stdio-common/../sysdeps/unix/sysv/sysv4/solaris2/sparc/errlist.c:47
 msgid "File exists"
-msgstr "El fichero ya existe"
+msgstr "El fichero existe"
 
 #  ??? ver esto.
 #. TRANS An attempt to make an improper link across file systems was detected.
@@ -2090,7 +2072,7 @@ msgstr "Enlace cruzado entre dispositivos no permitido"
 #: stdio-common/../sysdeps/gnu/errlist.c:212
 #: stdio-common/../sysdeps/unix/sysv/sysv4/solaris2/sparc/errlist.c:49
 msgid "No such device"
-msgstr "No existe el dispositivo"
+msgstr "No existe tal dispositivo"
 
 #. TRANS A file that isn't a directory was specified when a directory is required.
 #: stdio-common/../sysdeps/gnu/errlist.c:221
@@ -2189,6 +2171,9 @@ msgstr "Sistema de ficheros de s�lo lectura"
 msgid "Too many links"
 msgstr "Demasiados enlaces"
 
+#  fuentes
+# Sugerencia: Argumento num�rico. sv
+#
 #. TRANS Domain error; used by mathematical functions when an argument value does
 #. TRANS not fall into the domain over which the function is defined.
 #: stdio-common/../sysdeps/gnu/errlist.c:361
@@ -2902,11 +2887,10 @@ msgid "Invalid request code"
 msgstr "C�digo de petici�n incorrecto"
 
 # �Ranura no v�lida?, creo que no hay traducci�n para slot :) em+
-# Antes: `slot' incorrecto
 #: stdio-common/../sysdeps/gnu/errlist.c:1205
 #: stdio-common/../sysdeps/unix/sysv/sysv4/solaris2/sparc/errlist.c:85
 msgid "Invalid slot"
-msgstr "Ranura inv�lida"
+msgstr "`slot' incorrecto"
 
 # FUZZY em+
 #: stdio-common/../sysdeps/gnu/errlist.c:1213
@@ -3332,14 +3316,6 @@ msgstr "No se puede enviar despu�s de la destrucci�n del `socket'"
 msgid "%s%sUnknown signal %d\n"
 msgstr "%s%sSe�al desconocida %d\n"
 
-#: dlfcn/dlinfo.c:51
-msgid "RTLD_SELF used in code not dynamically loaded"
-msgstr "Se ha usado RTLD_SELF en una parte del c�digo que no se carg� din�micamente"
-
-#: dlfcn/dlinfo.c:61
-msgid "unsupported dlinfo request"
-msgstr "Petici�n dlinfo no admitida"
-
 #: malloc/mcheck.c:346
 msgid "memory is consistent, library is buggy\n"
 msgstr "la memoria es consistente, la biblioteca tiene un bicho\n"
@@ -3410,114 +3386,114 @@ msgstr "Se�al de tiempo real %d"
 msgid "Unknown signal %d"
 msgstr "Se�al desconocida %d"
 
-#: timezone/zdump.c:176
+#: timezone/zdump.c:175
 #, c-format
-msgid "%s: usage is %s [ --version ] [ -v ] [ -c cutoff ] zonename ...\n"
-msgstr "%s: el modo de empleo es %s [ --version ] [ -v ] [ -c cutoff ] nombrezona ...\n"
+msgid "%s: usage is %s [ -v ] [ -c cutoff ] zonename ...\n"
+msgstr "%s: el modo de empleo es %s [ -v ] [ -c cutoff ] nombrezona ...\n"
 
-#: timezone/zdump.c:269
+#: timezone/zdump.c:268
 msgid "Error writing standard output"
 msgstr "Error al escribir en la salida est�ndar"
 
-#: timezone/zic.c:361
+#: timezone/zic.c:365
 #, c-format
 msgid "%s: Memory exhausted: %s\n"
 msgstr "%s: Memoria agotada: %s\n"
 
-#: timezone/zic.c:386 misc/error.c:129 misc/error.c:157
+#: timezone/zic.c:390 misc/error.c:127 misc/error.c:155
 msgid "Unknown system error"
 msgstr "Error del sistema desconocido"
 
-#: timezone/zic.c:420
+#: timezone/zic.c:424
 #, c-format
 msgid "\"%s\", line %d: %s"
 msgstr "\"%s\", l�nea %d: %s"
 
-#: timezone/zic.c:423
+#: timezone/zic.c:427
 #, c-format
 msgid " (rule from \"%s\", line %d)"
 msgstr " (regla desde \"%s\", l�nea %d)"
 
-#: timezone/zic.c:435
+#: timezone/zic.c:439
 msgid "warning: "
 msgstr "atenci�n: "
 
 # FIXME: Decir al autor que no use tabs. sv
-#: timezone/zic.c:445
+#: timezone/zic.c:449
 #, c-format
 msgid ""
-"%s: usage is %s [ --version ] [ -s ] [ -v ] [ -l localtime ] [ -p posixrules ] \\\n"
+"%s: usage is %s [ -s ] [ -v ] [ -l localtime ] [ -p posixrules ] \\\n"
 "\t[ -d directory ] [ -L leapseconds ] [ -y yearistype ] [ filename ... ]\n"
 msgstr ""
-"%s: el modo de empleo es %s [ --version ] [ -s ] [ -v ] [ -l hora_local ] [ -p reglasposix ] \\\n"
+"%s: el modo de empleo es %s [ -s ] [ -v ] [ -l hora_local ] [ -p reglasposix ] \\\n"
 " [ -d directorio ] [ -L segundos_intercalares ] [ -y tipoa�o ] [ fichero ... ]\n"
 
-#: timezone/zic.c:492
+#: timezone/zic.c:491
 #, c-format
 msgid "%s: More than one -d option specified\n"
 msgstr "%s: La opci�n -d se ha especificado m�s de una vez\n"
 
-#: timezone/zic.c:502
+#: timezone/zic.c:501
 #, c-format
 msgid "%s: More than one -l option specified\n"
 msgstr "%s: La opci�n -l se ha especificado m�s de una vez\n"
 
-#: timezone/zic.c:512
+#: timezone/zic.c:511
 #, c-format
 msgid "%s: More than one -p option specified\n"
 msgstr "%s: La opci�n -p se ha especificado m�s de una vez\n"
 
-#: timezone/zic.c:522
+#: timezone/zic.c:521
 #, c-format
 msgid "%s: More than one -y option specified\n"
 msgstr "%s: La opci�n -y se ha especificado m�s de una vez\n"
 
-#: timezone/zic.c:532
+#: timezone/zic.c:531
 #, c-format
 msgid "%s: More than one -L option specified\n"
 msgstr "%s: La opci�n -L se ha especificado m�s de una vez\n"
 
-#: timezone/zic.c:639
+#: timezone/zic.c:638
 #, c-format
 msgid "%s: Can't unlink  %s: %s\n"
 msgstr "%s: No se puede borrar %s: %s\n"
 
-#: timezone/zic.c:646
+#: timezone/zic.c:645
 msgid "hard link failed, symbolic link used"
 msgstr "el enlace duro fall�, se usar� un enlace simb�lico"
 
-#: timezone/zic.c:654
+#: timezone/zic.c:653
 #, c-format
 msgid "%s: Can't link from %s to %s: %s\n"
 msgstr "%s: No se pudo crear un enlace de %s a %s: %s\n"
 
-#: timezone/zic.c:752 timezone/zic.c:754
+#: timezone/zic.c:751 timezone/zic.c:753
 msgid "same rule name in multiple files"
 msgstr "mismo nombre de regla en varios ficheros"
 
-#: timezone/zic.c:795
+#: timezone/zic.c:794
 msgid "unruly zone"
 msgstr "zona sin reglas"
 
-#: timezone/zic.c:802
+#: timezone/zic.c:801
 #, c-format
 msgid "%s in ruleless zone"
 msgstr "%s en una zona sin reglas"
 
-#: timezone/zic.c:823
+#: timezone/zic.c:822
 msgid "standard input"
 msgstr "entrada est�ndar"
 
-#: timezone/zic.c:828
+#: timezone/zic.c:827
 #, c-format
 msgid "%s: Can't open %s: %s\n"
 msgstr "%s: No se puede abrir %s: %s\n"
 
-#: timezone/zic.c:839
+#: timezone/zic.c:838
 msgid "line too long"
 msgstr "l�nea demasiado larga"
 
-#: timezone/zic.c:859
+#: timezone/zic.c:858
 msgid "input line of unknown type"
 msgstr "l�nea de entrada de tipo desconocido"
 
@@ -3552,7 +3528,7 @@ msgstr "l�nea de entrada de tipo desconocido"
 # Segundo, seg�n he visto en la documentaci�n, s�lo existe un fichero
 # de leap lines, por eso pongo 'el'... em+
 #
-#: timezone/zic.c:875
+#: timezone/zic.c:874
 #, c-format
 msgid "%s: Leap line in non leap seconds file %s\n"
 msgstr ""
@@ -3560,70 +3536,70 @@ msgstr ""
 "ajuste de a�os bisiestos %s\n"
 
 # �dem. 1984.
-#: timezone/zic.c:882 timezone/zic.c:1297 timezone/zic.c:1322
+#: timezone/zic.c:881 timezone/zic.c:1295 timezone/zic.c:1320
 #, c-format
 msgid "%s: panic: Invalid l_value %d\n"
 msgstr "%s: grave: valor_l %d inv�lido\n"
 
-#: timezone/zic.c:890
+#: timezone/zic.c:889
 #, c-format
 msgid "%s: Error reading %s\n"
 msgstr "%s: Error al leer %s\n"
 
-#: timezone/zic.c:897
+#: timezone/zic.c:896
 #, c-format
 msgid "%s: Error closing %s: %s\n"
 msgstr "%s: Error al cerrar %s: %s\n"
 
-#: timezone/zic.c:902
+#: timezone/zic.c:901
 msgid "expected continuation line not found"
 msgstr "la l�nea de continuaci�n esperada no se encuentra"
 
-#: timezone/zic.c:958
+#: timezone/zic.c:957
 msgid "wrong number of fields on Rule line"
 msgstr "n�mero incorrecto de argumentos en la l�nea de regla (Rule)"
 
-#: timezone/zic.c:962
+#: timezone/zic.c:961
 msgid "nameless rule"
 msgstr "regla sin nombre"
 
-#: timezone/zic.c:967
+#: timezone/zic.c:966
 msgid "invalid saved time"
 msgstr "la hora almacenada no es v�lida"
 
-#: timezone/zic.c:986
+#: timezone/zic.c:985
 msgid "wrong number of fields on Zone line"
 msgstr "n�mero de campos incorrecto en la l�nea de zona (Zone)"
 
-#: timezone/zic.c:992
+#: timezone/zic.c:991
 #, c-format
 msgid "\"Zone %s\" line and -l option are mutually exclusive"
 msgstr "la l�nea \"Zone %s\" y la opci�n -l son mutuamente excluyentes"
 
-#: timezone/zic.c:1000
+#: timezone/zic.c:999
 #, c-format
 msgid "\"Zone %s\" line and -p option are mutually exclusive"
 msgstr "la l�nea \"Zone %s\" y la opci�n -p son mutuamente excluyentes"
 
-#: timezone/zic.c:1012
+#: timezone/zic.c:1011
 #, c-format
 msgid "duplicate zone name %s (file \"%s\", line %d)"
 msgstr "nombre de zona %s duplicado (fichero \"%s\", l�nea %d)"
 
-#: timezone/zic.c:1028
+#: timezone/zic.c:1027
 msgid "wrong number of fields on Zone continuation line"
 msgstr "n�mero de campos incorrecto en la l�nea de continuaci�n de zona (Zone)"
 
-#: timezone/zic.c:1068
+#: timezone/zic.c:1067
 msgid "invalid UTC offset"
 msgstr "desplazamiento UTC inv�lido"
 
-#: timezone/zic.c:1071
+#: timezone/zic.c:1070
 msgid "invalid abbreviation format"
 msgstr "formato de abreviatura incorrecto"
 
 # VER
-#: timezone/zic.c:1097
+#: timezone/zic.c:1096
 msgid "Zone continuation line end time is not after end time of previous line"
 msgstr ""
 "La l�nea de continuaci�n de la zona no est� despu�s del tiempo de final\n"
@@ -3639,170 +3615,154 @@ msgstr ""
 # Si es mejor, ponlo en todos los sitios. Y si no, en ninguno.
 # Yo creo que es mucho mejor poner "n�mero incorrecto ..."
 # Si no, queda como "al rev�s". sv+
-#: timezone/zic.c:1124
+#: timezone/zic.c:1123
 msgid "wrong number of fields on Leap line"
 msgstr "n�mero incorrecto de campos en la l�nea de bisiesto (Leap)"
 
-#: timezone/zic.c:1133
+#: timezone/zic.c:1132
 msgid "invalid leaping year"
 msgstr "a�o bisiesto inv�lido"
 
-#: timezone/zic.c:1148 timezone/zic.c:1252
+#: timezone/zic.c:1147 timezone/zic.c:1250
 msgid "invalid month name"
 msgstr "nombre de mes incorrecto"
 
-#: timezone/zic.c:1161 timezone/zic.c:1374 timezone/zic.c:1388
+#: timezone/zic.c:1160 timezone/zic.c:1372 timezone/zic.c:1386
 msgid "invalid day of month"
 msgstr "d�a del mes inv�lido"
 
-#: timezone/zic.c:1166
+#: timezone/zic.c:1165
 msgid "time before zero"
 msgstr "hora antes de cero"
 
-#: timezone/zic.c:1170
-msgid "time too small"
-msgstr "tiempo demasiado peque�o"
-
-#: timezone/zic.c:1174
-msgid "time too large"
-msgstr "tiempo demasiado grande"
+# Sugerencia: Desbordamiento de fecha. (?) sv+
+#: timezone/zic.c:1173 timezone/zic.c:2049 timezone/zic.c:2068
+msgid "time overflow"
+msgstr "desbordamiento horario"
 
-#: timezone/zic.c:1178 timezone/zic.c:1281
+#: timezone/zic.c:1176 timezone/zic.c:1279
 msgid "invalid time of day"
 msgstr "hora del d�a inv�lida"
 
-#: timezone/zic.c:1197
+#: timezone/zic.c:1195
 msgid "illegal CORRECTION field on Leap line"
 msgstr "El campo CORRECTION en la l�nea de a�o bisiesto es ilegal"
 
-#: timezone/zic.c:1201
+#: timezone/zic.c:1199
 msgid "illegal Rolling/Stationary field on Leap line"
 msgstr "Campo Rolling/Stationary ilegal en la l�nea de a�o bisiesto"
 
-#: timezone/zic.c:1216
+#: timezone/zic.c:1214
 msgid "wrong number of fields on Link line"
 msgstr "n�mero incorrecto de campos en la l�nea de enlace (Link)"
 
-#: timezone/zic.c:1220
+#: timezone/zic.c:1218
 msgid "blank FROM field on Link line"
 msgstr "Campo FROM vac�o en la l�nea `Link'"
 
-#: timezone/zic.c:1224
+#: timezone/zic.c:1222
 msgid "blank TO field on Link line"
 msgstr "Campo TO vac�o en la l�nea `Link'"
 
-#: timezone/zic.c:1301
+#: timezone/zic.c:1299
 msgid "invalid starting year"
 msgstr "a�o de comienzo inv�lido"
 
-#: timezone/zic.c:1305
+#: timezone/zic.c:1303 timezone/zic.c:1328
 msgid "starting year too low to be represented"
 msgstr "el a�o de comienzo es demasiado bajo para ser representado"
 
-#: timezone/zic.c:1307
+#: timezone/zic.c:1305 timezone/zic.c:1330
 msgid "starting year too high to be represented"
 msgstr "el a�o de comienzo es demasiado alto para ser representado"
 
-#: timezone/zic.c:1326
+#: timezone/zic.c:1324
 msgid "invalid ending year"
 msgstr "a�o de final inv�lido"
 
-#: timezone/zic.c:1330
-msgid "ending year too low to be represented"
-msgstr "el a�o de final es demasiado bajo para ser representado"
-
-#: timezone/zic.c:1332
-msgid "ending year too high to be represented"
-msgstr "el a�o de final es demasiado alto para ser representado"
-
-#: timezone/zic.c:1335
+#: timezone/zic.c:1333
 msgid "starting year greater than ending year"
 msgstr "a�o de comienzo mayor que a�o de final"
 
-#: timezone/zic.c:1342
+#: timezone/zic.c:1340
 msgid "typed single year"
 msgstr "tecleado un �nico a�o"
 
-#: timezone/zic.c:1379
+#: timezone/zic.c:1377
 msgid "invalid weekday name"
 msgstr "nombre del d�a de la semana incorrecto"
 
-#: timezone/zic.c:1494
+#: timezone/zic.c:1492
 #, c-format
 msgid "%s: Can't remove %s: %s\n"
 msgstr "%s: No se puede eliminar %s: %s\n"
 
-#: timezone/zic.c:1504
+#: timezone/zic.c:1502
 #, c-format
 msgid "%s: Can't create %s: %s\n"
 msgstr "%s: No se puede crear %s: %s\n"
 
-#: timezone/zic.c:1570
+#: timezone/zic.c:1568
 #, c-format
 msgid "%s: Error writing %s\n"
 msgstr "%s: Error al escribir %s\n"
 
 # FUZZY
-#: timezone/zic.c:1760
+#: timezone/zic.c:1758
 msgid "can't determine time zone abbreviation to use just after until time"
 msgstr ""
 "No se puede determinar la abreviaci�n de zona horaria que se usar� justo\n"
 "despu�s"
 
-#: timezone/zic.c:1803
+#: timezone/zic.c:1801
 msgid "too many transitions?!"
 msgstr "��demasiadas transiciones?!"
 
-#: timezone/zic.c:1822
+#: timezone/zic.c:1820
 msgid "internal error - addtype called with bad isdst"
 msgstr "error interno - se llam� a `addtype' con un `isdst' err�neo"
 
-#: timezone/zic.c:1826
+#: timezone/zic.c:1824
 msgid "internal error - addtype called with bad ttisstd"
 msgstr "error interno - se llam� a `addtype' con un `ttisstd' err�neo"
 
-#: timezone/zic.c:1830
+#: timezone/zic.c:1828
 msgid "internal error - addtype called with bad ttisgmt"
 msgstr "error interno - se llam� a `addtype' con un `ttisgmt' err�neo"
 
-#: timezone/zic.c:1849
+#: timezone/zic.c:1847
 msgid "too many local time types"
 msgstr "demasiados tipos de hora local"
 
-#: timezone/zic.c:1877
+#: timezone/zic.c:1875
 msgid "too many leap seconds"
 msgstr "demasiados segundos intercalares"
 
-#: timezone/zic.c:1883
+#: timezone/zic.c:1881
 msgid "repeated leap second moment"
 msgstr "segundo intercalar repetido"
 
 # # Otra opci�n, resultado incongruente al ejecutar la orden em
-#: timezone/zic.c:1935
+#: timezone/zic.c:1933
 msgid "Wild result from command execution"
 msgstr "Resultado salvaje en la ejecuci�n de la orden"
 
 # FIXME: `%s'
-#: timezone/zic.c:1936
+#: timezone/zic.c:1934
 #, c-format
 msgid "%s: command was '%s', result was %d\n"
 msgstr "%s: la orden fue `%s', el resultado fue %d\n"
 
-#: timezone/zic.c:2031
+#: timezone/zic.c:2029
 msgid "Odd number of quotation marks"
 msgstr "N�mero impar de comillas"
 
-# Sugerencia: Desbordamiento de fecha. (?) sv+
-#: timezone/zic.c:2051 timezone/zic.c:2070
-msgid "time overflow"
-msgstr "desbordamiento horario"
-
 # FIXME: non leap-year -> non-leap year.
 # A lo mejor si pones "veintinueve de febrero" o "29 de febrero"
 # se entiende mejor. no s�. sv
 # Si, estas pensando lo mismo que yo, 29 de febrero puede confundir, porque
 # en el fichero pondr� 2/29  em
-#: timezone/zic.c:2117
+#: timezone/zic.c:2115
 msgid "use of 2/29 in non leap-year"
 msgstr "uso de 2/29 en un a�o no bisiesto"
 
@@ -3810,25 +3770,25 @@ msgstr "uso de 2/29 en un a�o no bisiesto"
 # Esto debe tener algo que ver con la funci�n menopausie() em
 # No se me hab�a ocurrido... �Se te ocurre algo mejor, ahora que ya
 # sabemos lo que quiere decir? sv
-#: timezone/zic.c:2151
+#: timezone/zic.c:2149
 msgid "no day in month matches rule"
 msgstr "ning�n d�a del mes coincide con la regla"
 
-#: timezone/zic.c:2175
+#: timezone/zic.c:2172
 msgid "too many, or too long, time zone abbreviations"
 msgstr "demasiadas abreviaturas de zona horaria, o demasiado largas"
 
-#: timezone/zic.c:2216
+#: timezone/zic.c:2213
 #, c-format
 msgid "%s: Can't create directory %s: %s\n"
 msgstr "%s: No se puede crear el directorio %s: %s\n"
 
-#: timezone/zic.c:2238
+#: timezone/zic.c:2235
 #, c-format
 msgid "%s: %d did not sign extend correctly\n"
 msgstr "%s: %d no extendi� el signo correctamente\n"
 
-#: posix/../sysdeps/generic/wordexp.c:1797
+#: posix/../sysdeps/generic/wordexp.c:1801
 msgid "parameter null or not set"
 msgstr "par�metro nulo o no establecido"
 
@@ -3899,57 +3859,57 @@ msgstr "Realizadas todas las peticiones"
 msgid "Interrupted by a signal"
 msgstr "Interrumpido por una se�al"
 
-#: posix/getconf.c:892
+#: posix/getconf.c:889
 #, c-format
 msgid "Usage: %s [-v specification] variable_name [pathname]\n"
 msgstr "Modo de empleo: %s [-v especificaci�n] nombre_de_variable [ruta]\n"
 
-#: posix/getconf.c:950
+#: posix/getconf.c:947
 #, c-format
 msgid "unknown specification \"%s\""
 msgstr "especificaci�n \"%s\" desconocida"
 
-#: posix/getconf.c:979 posix/getconf.c:995
+#: posix/getconf.c:974 posix/getconf.c:990
 msgid "undefined"
 msgstr "sin definir"
 
-#: posix/getconf.c:1017
+#: posix/getconf.c:1012
 #, c-format
 msgid "Unrecognized variable `%s'"
 msgstr "Variable no reconocida `%s'"
 
-#: posix/getopt.c:692 posix/getopt.c:711
+#: posix/getopt.c:692 posix/getopt.c:704
 #, c-format
 msgid "%s: option `%s' is ambiguous\n"
 msgstr "%s: la opci�n `%s' es ambigua\n"
 
-#: posix/getopt.c:744 posix/getopt.c:748
+#: posix/getopt.c:737 posix/getopt.c:741
 #, c-format
 msgid "%s: option `--%s' doesn't allow an argument\n"
 msgstr "%s: la opci�n `--%s' no admite ning�n argumento\n"
 
-#: posix/getopt.c:757 posix/getopt.c:762
+#: posix/getopt.c:750 posix/getopt.c:755
 #, c-format
 msgid "%s: option `%c%s' doesn't allow an argument\n"
 msgstr "%s: la opci�n `%c%s' no admite ning�n argumento\n"
 
-#: posix/getopt.c:807 posix/getopt.c:829 posix/getopt.c:1159
-#: posix/getopt.c:1181
+#: posix/getopt.c:791 posix/getopt.c:804 posix/getopt.c:1093
+#: posix/getopt.c:1106
 #, c-format
 msgid "%s: option `%s' requires an argument\n"
 msgstr "%s: la opci�n `%s' requiere un argumento\n"
 
-#: posix/getopt.c:867 posix/getopt.c:870
+#: posix/getopt.c:842 posix/getopt.c:845
 #, c-format
 msgid "%s: unrecognized option `--%s'\n"
 msgstr "%s: opci�n no reconocida `--%s'\n"
 
-#: posix/getopt.c:878 posix/getopt.c:881
+#: posix/getopt.c:853 posix/getopt.c:856
 #, c-format
 msgid "%s: unrecognized option `%c%s'\n"
 msgstr "%s: opci�n no reconocida `%c%s'\n"
 
-#: posix/getopt.c:936 posix/getopt.c:939
+#: posix/getopt.c:903 posix/getopt.c:906
 #, c-format
 msgid "%s: illegal option -- %c\n"
 msgstr "%s: opci�n ilegal -- %c\n"
@@ -3968,200 +3928,200 @@ msgstr "%s: opci�n ilegal -- %c\n"
 # Despu�s de leer "1984", lo cambio.
 # Aqu� y en todas partes. sv
 #
-#: posix/getopt.c:945 posix/getopt.c:948
+#: posix/getopt.c:912 posix/getopt.c:915
 #, c-format
 msgid "%s: invalid option -- %c\n"
 msgstr "%s: opci�n inv�lida -- %c\n"
 
-#: posix/getopt.c:1003 posix/getopt.c:1022 posix/getopt.c:1234
-#: posix/getopt.c:1255
+#: posix/getopt.c:962 posix/getopt.c:973 posix/getopt.c:1159
+#: posix/getopt.c:1172
 #, c-format
 msgid "%s: option requires an argument -- %c\n"
 msgstr "%s: la opci�n requiere un argumento --%c\n"
 
-#: posix/getopt.c:1074 posix/getopt.c:1093
+#: posix/getopt.c:1025 posix/getopt.c:1036
 #, c-format
 msgid "%s: option `-W %s' is ambiguous\n"
 msgstr "%s: la opci�n `-W %s' es ambigua\n"
 
-#: posix/getopt.c:1117 posix/getopt.c:1138
+#: posix/getopt.c:1060 posix/getopt.c:1072
 #, c-format
 msgid "%s: option `-W %s' doesn't allow an argument\n"
 msgstr "%s: la opci�n `-W %s' no admite ning�n argumento\n"
 
-#: posix/regcomp.c:150
+#: posix/regcomp.c:136
 msgid "No match"
 msgstr "No hay ninguna coincidencia"
 
-#: posix/regcomp.c:153
+#: posix/regcomp.c:139
 msgid "Invalid regular expression"
 msgstr "La expresi�n regular es err�nea"
 
-#: posix/regcomp.c:156
+#: posix/regcomp.c:142
 msgid "Invalid collation character"
 msgstr "Car�cter de uni�n inv�lido"
 
-#: posix/regcomp.c:159
+#: posix/regcomp.c:145
 msgid "Invalid character class name"
 msgstr "Nombre de clase de car�cter inv�lido"
 
-#: posix/regcomp.c:162
+#: posix/regcomp.c:148
 msgid "Trailing backslash"
 msgstr "Barra invertida extra al final `\\'"
 
-#: posix/regcomp.c:165
+#: posix/regcomp.c:151
 msgid "Invalid back reference"
 msgstr "Referencia hacia atr�s inv�lida"
 
-#: posix/regcomp.c:168
+#: posix/regcomp.c:154
 msgid "Unmatched [ or [^"
 msgstr "[ � ^[ desemparejados"
 
-#: posix/regcomp.c:171
+#: posix/regcomp.c:157
 msgid "Unmatched ( or \\("
 msgstr "( � \\( desemparejados"
 
-#: posix/regcomp.c:174
+#: posix/regcomp.c:160
 msgid "Unmatched \\{"
 msgstr "\\{ desemparejado"
 
-#: posix/regcomp.c:177
+#: posix/regcomp.c:163
 msgid "Invalid content of \\{\\}"
 msgstr "Contenido de \\{\\} inv�lido"
 
-#: posix/regcomp.c:180
+#: posix/regcomp.c:166
 msgid "Invalid range end"
 msgstr "Final de rango inv�lido"
 
-#: posix/regcomp.c:183
+#: posix/regcomp.c:169
 msgid "Memory exhausted"
 msgstr "Memoria agotada"
 
-#: posix/regcomp.c:186
+#: posix/regcomp.c:172
 msgid "Invalid preceding regular expression"
 msgstr "La expresi�n regular precedente es inv�lida"
 
-#: posix/regcomp.c:189
+#: posix/regcomp.c:175
 msgid "Premature end of regular expression"
 msgstr "Fin no esperado de la expresi�n regular"
 
-#: posix/regcomp.c:192
+#: posix/regcomp.c:178
 msgid "Regular expression too big"
 msgstr "La expresi�n regular es demasiado grande"
 
-#: posix/regcomp.c:195
+#: posix/regcomp.c:181
 msgid "Unmatched ) or \\)"
 msgstr ") � \\) desemparejados"
 
-#: posix/regcomp.c:661
+#: posix/regcomp.c:615
 msgid "No previous regular expression"
 msgstr "No existe ninguna expresi�n regular anterior"
 
-#: argp/argp-help.c:224
+#: argp/argp-help.c:213
 #, c-format
 msgid "%.*s: ARGP_HELP_FMT parameter requires a value"
 msgstr "%.*s: El argumento ARGP_HELP_FMT requiere un valor"
 
-#: argp/argp-help.c:233
+#: argp/argp-help.c:222
 #, c-format
 msgid "%.*s: Unknown ARGP_HELP_FMT parameter"
 msgstr "%.*s: Par�metro ARGP_HELP_FMT desconocido"
 
-#: argp/argp-help.c:245
+#: argp/argp-help.c:234
 #, c-format
 msgid "Garbage in ARGP_HELP_FMT: %s"
 msgstr "Inconsistencias en ARGP_HELP_FMT: %s"
 
-#: argp/argp-help.c:1205
+#: argp/argp-help.c:1189
 msgid "Mandatory or optional arguments to long options are also mandatory or optional for any corresponding short options."
 msgstr ""
 "Los argumentos obligatorios u opcionales para las opciones largas son\n"
 "tambi�n obligatorios u opcionales para las opciones cortas correspondientes."
 
-#: argp/argp-help.c:1592
+#: argp/argp-help.c:1572
 msgid "Usage:"
 msgstr "Modo de empleo:"
 
-#: argp/argp-help.c:1596
+#: argp/argp-help.c:1576
 msgid "  or: "
 msgstr " o: "
 
-#: argp/argp-help.c:1608
+#: argp/argp-help.c:1588
 msgid " [OPTION...]"
 msgstr " [OPCI�N...]"
 
-#: argp/argp-help.c:1635
+#: argp/argp-help.c:1615
 #, c-format
 msgid "Try `%s --help' or `%s --usage' for more information.\n"
 msgstr "Pruebe `%s --help' o `%s --usage' para m�s informaci�n.\n"
 
-#: argp/argp-help.c:1663
+#: argp/argp-help.c:1643
 #, c-format
 msgid "Report bugs to %s.\n"
 msgstr "Comunicar bichos a %s.\n"
 
-#: argp/argp-parse.c:115
+#: argp/argp-parse.c:100
 msgid "Give this help list"
 msgstr "Da esta lista de ayuda"
 
-#: argp/argp-parse.c:116
+#: argp/argp-parse.c:101
 msgid "Give a short usage message"
 msgstr "Da un mensaje corto de uso"
 
-#: argp/argp-parse.c:117
+#: argp/argp-parse.c:102
 msgid "Set the program name"
 msgstr "Establece el nombre del programa"
 
-#: argp/argp-parse.c:119
+#: argp/argp-parse.c:104
 msgid "Hang for SECS seconds (default 3600)"
 msgstr "Cuelga durante SECS segundos (por omisi�n, 3600)"
 
-#: argp/argp-parse.c:180
+#: argp/argp-parse.c:161
 msgid "Print program version"
 msgstr "Muestra la versi�n del programa"
 
-#: argp/argp-parse.c:196
+#: argp/argp-parse.c:177
 msgid "(PROGRAM ERROR) No version known!?"
 msgstr "(ERROR DEL PROGRAMA) ��No se conoce ninguna versi�n!?"
 
-#: argp/argp-parse.c:672
+#: argp/argp-parse.c:653
 #, c-format
 msgid "%s: Too many arguments\n"
 msgstr "%s: Demasiados argumentos\n"
 
-#: argp/argp-parse.c:813
+#: argp/argp-parse.c:794
 msgid "(PROGRAM ERROR) Option should have been recognized!?"
 msgstr "(ERROR DEL PROGRAMA) ��No se deber�a haber reconocido la opci�n!?"
 
 #  ??? resolvedor, determinador, investigador, solucionador ?
 # Me suena que quiz� exista resolvedor. Habr�a que enterarse. sv
-#: resolv/herror.c:68
+#: resolv/herror.c:67
 msgid "Resolver Error 0 (no error)"
 msgstr "Error del determinador de nombres 0 (ning�n error)"
 
 # En el libro de Infov�a traducen host por "anfitri�n"
-#: resolv/herror.c:69
+#: resolv/herror.c:68
 msgid "Unknown host"
 msgstr "`Host' desconocido"
 
-#: resolv/herror.c:70
+#: resolv/herror.c:69
 msgid "Host name lookup failure"
 msgstr "Nombre de `host' no encontrado"
 
-#: resolv/herror.c:71
+#: resolv/herror.c:70
 msgid "Unknown server error"
 msgstr "Error del servidor desconocido"
 
-#: resolv/herror.c:72
+#: resolv/herror.c:71
 msgid "No address associated with name"
 msgstr "No existe ninguna direcci�n asociada al nombre"
 
 #  ??? lo mismo que arriba
-#: resolv/herror.c:108
+#: resolv/herror.c:107
 msgid "Resolver internal error"
 msgstr "Error interno del determinador de nombres"
 
-#: resolv/herror.c:111
+#: resolv/herror.c:110
 msgid "Unknown resolver error"
 msgstr "Error del determinador de nombres desconocido"
 
@@ -4214,24 +4174,24 @@ msgstr "basededatos [clave ...]"
 msgid "Service configuration to be used"
 msgstr "Configuraci�n del servicio"
 
-#: nss/getent.c:136 nss/getent.c:375
+#: nss/getent.c:136 nss/getent.c:308
 #, c-format
 msgid "Enumeration not supported on %s\n"
 msgstr "La enumeraci�n no est� soportada sobre %s\n"
 
-#: nss/getent.c:800
+#: nss/getent.c:732
 msgid "getent - get entries from administrative database."
 msgstr "getent - obtiene entradas de la base de datos administrativa."
 
-#: nss/getent.c:801
+#: nss/getent.c:733
 msgid "Supported databases:"
 msgstr "Bases de datos admitidas:"
 
-#: nss/getent.c:858 nscd/nscd.c:131 nscd/nscd_nischeck.c:64
+#: nss/getent.c:790 nscd/nscd.c:124 nscd/nscd_nischeck.c:64
 msgid "wrong number of arguments"
 msgstr "n�mero incorrecto de argumentos"
 
-#: nss/getent.c:868
+#: nss/getent.c:800
 #, c-format
 msgid "Unknown database: %s\n"
 msgstr "Base de datos desconocida: %s\n"
@@ -4624,12 +4584,10 @@ msgid "illegal nettype :`%s'\n"
 msgstr "tipodered ilegal :`%s'\n"
 
 #: sunrpc/rpc_main.c:1104
-#, c-format
 msgid "rpcgen: too many defines\n"
 msgstr "rpcgen: demasiados defines\n"
 
 #: sunrpc/rpc_main.c:1116
-#, c-format
 msgid "rpcgen: arglist coding error\n"
 msgstr "rpcgen: error de codificaci�n de la lista de argumentos\n"
 
@@ -4641,7 +4599,6 @@ msgid "file `%s' already exists and may be overwritten\n"
 msgstr "el fichero `%s' ya existe y podr�a ser sobreescrito\n"
 
 #: sunrpc/rpc_main.c:1194
-#, c-format
 msgid "Cannot specify more than one input file!\n"
 msgstr "No se puede especificar m�s de un fichero de entrada\n"
 
@@ -4651,7 +4608,6 @@ msgid "This implementation doesn't support newstyle or MT-safe code!\n"
 msgstr "�Esta implementaci�n no admite c�digo de nuevo estilo o `MT-safe'!\n"
 
 #: sunrpc/rpc_main.c:1373
-#, c-format
 msgid "Cannot use netid flag with inetd flag!\n"
 msgstr "No se puede usar la opci�n netid con la opci�n inetd\n"
 
@@ -4664,14 +4620,12 @@ msgid "Cannot use table flags with newstyle!\n"
 msgstr "No se pueden usar las opciones de la tabla con el nuevo estilo\n"
 
 #: sunrpc/rpc_main.c:1411
-#, c-format
 msgid "\"infile\" is required for template generation flags.\n"
 msgstr ""
 "se necesita un \"fichero_de_entrada\" para las opciones de generaci�n\n"
 "de plantillas\n"
 
 #: sunrpc/rpc_main.c:1416
-#, c-format
 msgid "Cannot have more than one file generation flag!\n"
 msgstr "No se puede tener m�s de una opci�n de generaci�n de fichero\n"
 
@@ -4886,7 +4840,7 @@ msgstr "svcudp_create: memoria agotada\n"
 msgid "svcudp_create: xp_pad is too small for IP_PKTINFO\n"
 msgstr "svcudp_create: xp_pad es demasiado peque�o para IP_PKTINFO\n"
 
-#: sunrpc/svc_udp.c:493
+#: sunrpc/svc_udp.c:471
 msgid "enablecache: cache already enabled"
 msgstr "enablecache: el cach� ya estaba activado"
 
@@ -4894,27 +4848,27 @@ msgstr "enablecache: el cach� ya estaba activado"
 # Parece ser indistinto, as� que unas veces puede ser "la" y otras "el".
 # dependiendo del caso (lo que mejor suene).
 #
-#: sunrpc/svc_udp.c:499
+#: sunrpc/svc_udp.c:477
 msgid "enablecache: could not allocate cache"
 msgstr "enablecache: no se pudo crear espacio para el cach�"
 
-#: sunrpc/svc_udp.c:507
+#: sunrpc/svc_udp.c:485
 msgid "enablecache: could not allocate cache data"
 msgstr "enablecache: no se pudo crear espacio para los datos del cach�"
 
-#: sunrpc/svc_udp.c:514
+#: sunrpc/svc_udp.c:492
 msgid "enablecache: could not allocate cache fifo"
 msgstr "enablecache: no se pudo crear espacio para la pila del cach�"
 
-#: sunrpc/svc_udp.c:550
+#: sunrpc/svc_udp.c:528
 msgid "cache_set: victim not found"
 msgstr "cache_set: no se encontr� el objetivo"
 
-#: sunrpc/svc_udp.c:561
+#: sunrpc/svc_udp.c:539
 msgid "cache_set: victim alloc failed"
 msgstr "cache_set: fall� la asignaci�n de espacio para el objetivo"
 
-#: sunrpc/svc_udp.c:567
+#: sunrpc/svc_udp.c:545
 msgid "cache_set: could not allocate new rpc_buffer"
 msgstr "cache_set: no se pudo asignar espacio para un nuevo b�fer rpc"
 
@@ -4938,7 +4892,7 @@ msgstr "svc_unix: makefd_xprt: memoria agotada\n"
 msgid "xdr_bytes: out of memory\n"
 msgstr "xdr_bytes: memoria agotada\n"
 
-#: sunrpc/xdr.c:728 sunrpc/xdr.c:731
+#: sunrpc/xdr.c:725 sunrpc/xdr.c:728
 msgid "xdr_string: out of memory\n"
 msgstr "xdr_string: memoria agotada\n"
 
@@ -5363,7 +5317,6 @@ msgid "Access Rights : "
 msgstr "Derechos de acceso : "
 
 #: nis/nis_print.c:326
-#, c-format
 msgid ""
 "\n"
 "Time to Live  : "
@@ -5455,112 +5408,112 @@ msgstr " No hay ning�n no-miembro impl�cito\n"
 msgid "    No recursive nonmembers\n"
 msgstr " No hay ning�n no-miembro recursivo\n"
 
-#: nis/nss_nisplus/nisplus-publickey.c:101
-#: nis/nss_nisplus/nisplus-publickey.c:182
+#: nis/nss_nisplus/nisplus-publickey.c:96
+#: nis/nss_nisplus/nisplus-publickey.c:172
 #, c-format
 msgid "DES entry for netname %s not unique\n"
 msgstr "La entrada DES para el nombre %s no es �nica\n"
 
-#: nis/nss_nisplus/nisplus-publickey.c:218
+#: nis/nss_nisplus/nisplus-publickey.c:208
 #, c-format
 msgid "netname2user: missing group id list in `%s'."
 msgstr "netname2user: falta la lista de ids de grupo en `%s'."
 
-#: nis/nss_nisplus/nisplus-publickey.c:300
-#: nis/nss_nisplus/nisplus-publickey.c:306
-#: nis/nss_nisplus/nisplus-publickey.c:370
-#: nis/nss_nisplus/nisplus-publickey.c:379
+#: nis/nss_nisplus/nisplus-publickey.c:285
+#: nis/nss_nisplus/nisplus-publickey.c:291
+#: nis/nss_nisplus/nisplus-publickey.c:350
+#: nis/nss_nisplus/nisplus-publickey.c:359
 #, c-format
 msgid "netname2user: (nis+ lookup): %s\n"
 msgstr "netname2user: (b�squeda nis+): %s\n"
 
-#: nis/nss_nisplus/nisplus-publickey.c:319
+#: nis/nss_nisplus/nisplus-publickey.c:304
 #, c-format
 msgid "netname2user: DES entry for %s in directory %s not unique"
 msgstr "netname2user: la entrada DES para %s en el directorio %s no es �nica"
 
-#: nis/nss_nisplus/nisplus-publickey.c:337
+#: nis/nss_nisplus/nisplus-publickey.c:322
 #, c-format
 msgid "netname2user: principal name `%s' too long"
 msgstr "netname2user: el nombre principal `%s' es demasiado largo"
 
-#: nis/nss_nisplus/nisplus-publickey.c:392
+#: nis/nss_nisplus/nisplus-publickey.c:372
 #, c-format
 msgid "netname2user: LOCAL entry for %s in directory %s not unique"
 msgstr "netname2user: la entrada LOCAL para %s en el directorio %s no es �nica"
 
-#: nis/nss_nisplus/nisplus-publickey.c:399
+#: nis/nss_nisplus/nisplus-publickey.c:379
 msgid "netname2user: should not have uid 0"
 msgstr "netname2user: no deber�a tener uid 0"
 
-#: nis/ypclnt.c:171
+#: nis/ypclnt.c:174
 #, c-format
 msgid "YPBINDPROC_DOMAIN: %s\n"
 msgstr "YPBINDPROC_DOMAIN: %s\n"
 
-#: nis/ypclnt.c:780
+#: nis/ypclnt.c:789
 msgid "Request arguments bad"
 msgstr "Los argumentos de la petici�n son incorrectos"
 
-#: nis/ypclnt.c:782
+#: nis/ypclnt.c:791
 msgid "RPC failure on NIS operation"
 msgstr "Fallo RPC en una operaci�n NIS"
 
-#: nis/ypclnt.c:784
+#: nis/ypclnt.c:793
 msgid "Can't bind to server which serves this domain"
 msgstr "Ha fallado la llamada a bind() con el servidor que sirve a este dominio"
 
-#: nis/ypclnt.c:786
+#: nis/ypclnt.c:795
 msgid "No such map in server's domain"
 msgstr "No existe esa tabla en el dominio del servidor"
 
-#: nis/ypclnt.c:788
+#: nis/ypclnt.c:797
 msgid "No such key in map"
 msgstr "No existe esta clave en la tabla"
 
-#: nis/ypclnt.c:790
+#: nis/ypclnt.c:799
 msgid "Internal NIS error"
 msgstr "Error interno de NIS"
 
-#: nis/ypclnt.c:792
+#: nis/ypclnt.c:801
 msgid "Local resource allocation failure"
 msgstr "La asignaci�n de recursos locales ha fallado"
 
-#: nis/ypclnt.c:794
+#: nis/ypclnt.c:803
 msgid "No more records in map database"
 msgstr "No hay m�s registros en la base de datos"
 
-#: nis/ypclnt.c:796
+#: nis/ypclnt.c:805
 msgid "Can't communicate with portmapper"
 msgstr "No se puede comunicar con el asignador de puertos"
 
-#: nis/ypclnt.c:798
+#: nis/ypclnt.c:807
 msgid "Can't communicate with ypbind"
 msgstr "No se puede establecer comunicaci�n con `ypbind'"
 
-#: nis/ypclnt.c:800
+#: nis/ypclnt.c:809
 msgid "Can't communicate with ypserv"
 msgstr "No se puede establecer comunicaci�n con `ypserv'"
 
-#: nis/ypclnt.c:802
+#: nis/ypclnt.c:811
 msgid "Local domain name not set"
 msgstr "No se ha establecido el nombre del dominio local"
 
-#: nis/ypclnt.c:804
+#: nis/ypclnt.c:813
 msgid "NIS map database is bad"
 msgstr "La base de datos de la tabla NIS no es correcta"
 
-#: nis/ypclnt.c:806
+#: nis/ypclnt.c:815
 msgid "NIS client/server version mismatch - can't supply service"
 msgstr ""
 "Discordancia en las versiones de NIS del cliente y el servidor.\n"
 "No se puede suministrar el servicio."
 
-#: nis/ypclnt.c:810
+#: nis/ypclnt.c:819
 msgid "Database is busy"
 msgstr "La base de datos est� ocupada"
 
-#: nis/ypclnt.c:812
+#: nis/ypclnt.c:821
 msgid "Unknown NIS error code"
 msgstr "Error de NIS desconocido"
 
@@ -5571,117 +5524,116 @@ msgstr "Error de NIS desconocido"
 # De acuerdo.
 # [ Antes dec�a ... la llamada a bind para el servicio de p�ginas amarillas ]
 # Un poco demasiado explicativo. sv
-#: nis/ypclnt.c:854
+#: nis/ypclnt.c:863
 msgid "Internal ypbind error"
 msgstr "Error interno en ypbind"
 
 # FUZZY
-#: nis/ypclnt.c:856
+#: nis/ypclnt.c:865
 msgid "Domain not bound"
 msgstr "No se pudo conectar con el dominio"
 
-#: nis/ypclnt.c:858
+#: nis/ypclnt.c:867
 msgid "System resource allocation failure"
 msgstr "Fallo en la asignaci�n de recursos del sistema"
 
-#: nis/ypclnt.c:860
+#: nis/ypclnt.c:869
 msgid "Unknown ypbind error"
 msgstr "Error desconocido en la llamada a `ypbind()'"
 
-#: nis/ypclnt.c:899
+#: nis/ypclnt.c:908
 msgid "yp_update: cannot convert host to netname\n"
 msgstr "yp_update: no se puede convertir el nombre del `host' a nombre de red\n"
 
-#: nis/ypclnt.c:911
+#: nis/ypclnt.c:920
 msgid "yp_update: cannot get server address\n"
 msgstr "yp_update: no se puede encontrar la direcci�n del servidor\n"
 
-#: nscd/cache.c:94
+#: nscd/cache.c:88
 msgid "while allocating hash table entry"
 msgstr "al asignar espacio para la entrada en la tabla `hash'"
 
-#: nscd/cache.c:162 nscd/connections.c:184
+#: nscd/cache.c:150 nscd/connections.c:187
 #, c-format
 msgid "cannot stat() file `%s': %s"
 msgstr "no se puede ejecutar stat() sobre el fichero `%s': %s"
 
-#: nscd/connections.c:150
+#: nscd/connections.c:146
+msgid "cannot read configuration file; this is fatal"
+msgstr "no se puede leer el fichero de configuraci�n; este error es fatal"
+
+#: nscd/connections.c:153
 msgid "Cannot run nscd in secure mode as unprivileged user"
 msgstr "No se puede ejecutar nscd en modo seguro como usuario no privilegiado"
 
-#: nscd/connections.c:172
+#: nscd/connections.c:175
 #, c-format
 msgid "while allocating cache: %s"
 msgstr "al asignar espacio para el cach�: %s"
 
-#: nscd/connections.c:197
+#: nscd/connections.c:200
 #, c-format
 msgid "cannot open socket: %s"
 msgstr "no se puede abrir el `socket': %s"
 
-#: nscd/connections.c:215
+#: nscd/connections.c:218
 #, c-format
 msgid "cannot enable socket to accept connections: %s"
 msgstr "no se puede activar el `socket' para aceptar conexiones: %s"
 
 #: nscd/connections.c:260
 #, c-format
+msgid "handle_request: request received (Version = %d)"
+msgstr "handle_request: petici�n recibida (Versi�n = %d)"
+
+#: nscd/connections.c:266
+#, c-format
 msgid "cannot handle old request version %d; current version is %d"
 msgstr ""
 "no se pueden manejar peticiones de la versi�n %d, la versi�n\n"
 "actual es %d"
 
-#: nscd/connections.c:298 nscd/connections.c:324
+#: nscd/connections.c:304 nscd/connections.c:326
 #, c-format
 msgid "cannot write result: %s"
 msgstr "no se puede escribir el resultado: %s"
 
-#: nscd/connections.c:392 nscd/connections.c:514
+#: nscd/connections.c:405 nscd/connections.c:499
 #, c-format
 msgid "error getting callers id: %s"
 msgstr "error al obtener el id de los llamantes: %s"
 
-#: nscd/connections.c:485
+#: nscd/connections.c:471
 #, c-format
 msgid "while accepting connection: %s"
 msgstr "al aceptar la conexi�n: %s"
 
-#: nscd/connections.c:498
+#: nscd/connections.c:482
 #, c-format
 msgid "short read while reading request: %s"
 msgstr "lectura insuficiente mientras se le�a la petici�n: %s"
 
-#: nscd/connections.c:542
+#: nscd/connections.c:518
 #, c-format
 msgid "key length in request too long: %d"
 msgstr "la longitud de la clave en la petici�n es demasiado larga: %d"
 
-#: nscd/connections.c:556
+#: nscd/connections.c:532
 #, c-format
 msgid "short read while reading request key: %s"
 msgstr "se acabaron los datos mientras se le�a la clave de petici�n: %s"
 
-#: nscd/connections.c:566
-#, c-format
-msgid "handle_request: request received (Version = %d) from PID %ld"
-msgstr "handle_request: petici�n recibida (Versi�n = %d) del PID %ld"
-
-#: nscd/connections.c:571
-#, c-format
-msgid "handle_request: request received (Version = %d)"
-msgstr "handle_request: petici�n recibida (Versi�n = %d)"
-
-#: nscd/connections.c:635 nscd/connections.c:636 nscd/connections.c:655
-#: nscd/connections.c:668 nscd/connections.c:674 nscd/connections.c:681
+#: nscd/connections.c:591 nscd/connections.c:592 nscd/connections.c:611
+#: nscd/connections.c:624 nscd/connections.c:630 nscd/connections.c:637
 #, c-format
 msgid "Failed to run nscd as user '%s'"
 msgstr "Fallo al ejecutar nscd como usuario `%s'"
 
-#: nscd/connections.c:656
+#: nscd/connections.c:612
 msgid "getgrouplist failed"
 msgstr "fall� `getgrouplist'"
 
-#: nscd/connections.c:669
+#: nscd/connections.c:625
 msgid "setgroups failed"
 msgstr "fall� `setgroups'"
 
@@ -5693,134 +5645,121 @@ msgstr "al asignar espacio para la copia de la clave"
 msgid "while allocating cache entry"
 msgstr "al asignar espacio para la entrada en el cach�"
 
-#: nscd/grpcache.c:197 nscd/hstcache.c:283 nscd/pwdcache.c:193
+#: nscd/grpcache.c:196 nscd/hstcache.c:282 nscd/pwdcache.c:192
 #, c-format
 msgid "short write in %s: %s"
 msgstr "escritura insuficiente en %s: %s"
 
-#: nscd/grpcache.c:219
+#: nscd/grpcache.c:218
 #, c-format
 msgid "Haven't found \"%s\" in group cache!"
 msgstr "No se ha encontrado \"%s\" en el cach� de grupos"
 
-#: nscd/grpcache.c:285
+#: nscd/grpcache.c:284
 #, c-format
 msgid "Invalid numeric gid \"%s\"!"
 msgstr "�gid num�rico inv�lido \"%s\"!"
 
-#: nscd/grpcache.c:292
+#: nscd/grpcache.c:291
 #, c-format
 msgid "Haven't found \"%d\" in group cache!"
 msgstr "No se ha encontrado \"%d\" en el cach� de grupo"
 
-#: nscd/hstcache.c:305 nscd/hstcache.c:371 nscd/hstcache.c:436
-#: nscd/hstcache.c:501
+#: nscd/hstcache.c:304 nscd/hstcache.c:370 nscd/hstcache.c:435
+#: nscd/hstcache.c:500
 #, c-format
 msgid "Haven't found \"%s\" in hosts cache!"
 msgstr "No se ha encontrado \"%s\" en el cach� de `hosts'"
 
-#: nscd/nscd.c:89
+#: nscd/nscd.c:85
 msgid "Read configuration data from NAME"
 msgstr "Lee datos de configuraci�n de NOMBRE"
 
-#: nscd/nscd.c:91
+#: nscd/nscd.c:87
 msgid "Do not fork and display messages on the current tty"
 msgstr "No se divide y muestra los mensajes en la terminal actual"
 
-#: nscd/nscd.c:92
+#: nscd/nscd.c:88
 msgid "NUMBER"
 msgstr "N�MERO"
 
-#: nscd/nscd.c:92
+#: nscd/nscd.c:88
 msgid "Start NUMBER threads"
 msgstr "Comienza N�MERO hilos"
 
-#: nscd/nscd.c:93
+#: nscd/nscd.c:89
 msgid "Shut the server down"
 msgstr "Apagar el servidor"
 
-#: nscd/nscd.c:94
+#: nscd/nscd.c:90
 msgid "Print current configuration statistic"
 msgstr "Muestra una estad�stica sobre la configuraci�n actual"
 
-#: nscd/nscd.c:95
+#: nscd/nscd.c:91
 msgid "TABLE"
 msgstr "TABLA"
 
-#: nscd/nscd.c:96
+#: nscd/nscd.c:92
 msgid "Invalidate the specified cache"
 msgstr "Invalida la cach� especificada"
 
-#: nscd/nscd.c:97
+#: nscd/nscd.c:93
 msgid "TABLE,yes"
 msgstr "TABLA,s�"
 
-#: nscd/nscd.c:97
+#: nscd/nscd.c:93
 msgid "Use separate cache for each user"
 msgstr "Utiliza una cach� separada para cada usuario"
 
-#: nscd/nscd.c:102
+#: nscd/nscd.c:98
 msgid "Name Service Cache Daemon."
 msgstr "Daemon de Cach� del Servicio de Nombres."
 
-#: nscd/nscd.c:141
-msgid "cannot read configuration file; this is fatal"
-msgstr "no se puede leer el fichero de configuraci�n; este error es fatal"
-
-#: nscd/nscd.c:152
+#: nscd/nscd.c:131
 msgid "already running"
 msgstr "ya est� funcionando"
 
-#: nscd/nscd.c:270 nscd/nscd.c:294 nscd/nscd_stat.c:132
+#: nscd/nscd.c:243 nscd/nscd.c:263 nscd/nscd.c:269
 msgid "Only root is allowed to use this option!"
 msgstr "Solamente root puede usar esta opci�n"
 
-#: nscd/nscd_conf.c:88
+#: nscd/nscd_conf.c:83
 #, c-format
 msgid "Parse error: %s"
 msgstr "Error de an�lisis: %s"
 
-#: nscd/nscd_conf.c:171
+#: nscd/nscd_conf.c:166
 #, c-format
 msgid "Could not create log file \"%s\""
 msgstr "No se pudo crear el fichero de registro \"%s\""
 
-#: nscd/nscd_conf.c:187
+#: nscd/nscd_conf.c:182
 msgid "Must specify user name for server-user option"
 msgstr "Debe especificar un nombre de usuario para la opci�n `server-user'"
 
-#: nscd/nscd_conf.c:194
-msgid "Must specify user name for stat-user option"
-msgstr "Debe especificar un nombre de usuario para la opci�n `stat-user'"
-
-#: nscd/nscd_conf.c:205
+#: nscd/nscd_conf.c:187
 #, c-format
 msgid "Unknown option: %s %s %s"
 msgstr "Opci�n desconocida: %s %s %s"
 
-#: nscd/nscd_stat.c:103
+#: nscd/nscd_stat.c:87
 #, c-format
 msgid "cannot write statistics: %s"
 msgstr "no se pueden escribir las estad�sticas: %s"
 
-#: nscd/nscd_stat.c:128
-#, c-format
-msgid "Only root or %s is allowed to use this option!"
-msgstr "Solamente root o %s puede usar esta opci�n"
-
-#: nscd/nscd_stat.c:139
+#: nscd/nscd_stat.c:105
 msgid "nscd not running!\n"
 msgstr "nscd no est� en ejecuci�n\n"
 
-#: nscd/nscd_stat.c:150
+#: nscd/nscd_stat.c:116
 msgid "write incomplete"
 msgstr "escritura incompleta"
 
-#: nscd/nscd_stat.c:162
+#: nscd/nscd_stat.c:128
 msgid "cannot read statistics data"
 msgstr "no se pueden leer los datos de estad�stica"
 
-#: nscd/nscd_stat.c:165
+#: nscd/nscd_stat.c:131
 #, c-format
 msgid ""
 "nscd configuration:\n"
@@ -5831,96 +5770,61 @@ msgstr ""
 "\n"
 "%15d  nivel de depuraci�n del servidor\n"
 
-#: nscd/nscd_stat.c:189
-#, c-format
-msgid "%3ud %2uh %2um %2lus  server runtime\n"
-msgstr "%3ud %2uh %2um %2lus  tiempo de funcionamiento del servidor\n"
-
-#: nscd/nscd_stat.c:192
-#, c-format
-msgid "    %2uh %2um %2lus  server runtime\n"
-msgstr "    %2uh %2um %2lus  tiempo de funcionamiento del servidor\n"
-
-#: nscd/nscd_stat.c:194
-#, c-format
-msgid "        %2um %2lus  server runtime\n"
-msgstr "        %2um %2lus  tiempo de funcionamiento del servidor\n"
-
-#: nscd/nscd_stat.c:196
-#, c-format
-msgid "            %2lus  server runtime\n"
-msgstr "            %2lus  tiempo de funcionamiento del servidor\n"
-
-#: nscd/nscd_stat.c:198
-#, c-format
-msgid "%15lu  number of times clients had to wait\n"
-msgstr "%15lu  n�mero de veces que los clientes tuvieron que esperar\n"
-
-#: nscd/nscd_stat.c:213 nscd/nscd_stat.c:215
+#: nscd/nscd_stat.c:146 nscd/nscd_stat.c:148
 msgid "      no"
 msgstr "      no"
 
-#: nscd/nscd_stat.c:213 nscd/nscd_stat.c:215
+#: nscd/nscd_stat.c:146 nscd/nscd_stat.c:148
 msgid "     yes"
 msgstr "      si"
 
-#: nscd/nscd_stat.c:221
+#: nscd/nscd_stat.c:154
 #, c-format
 msgid ""
 "\n"
 "%s cache:\n"
 "\n"
 "%15s  cache is enabled\n"
-"%15Zu  suggested size\n"
-"%15lu  seconds time to live for positive entries\n"
-"%15lu  seconds time to live for negative entries\n"
-"%15lu  cache hits on positive entries\n"
-"%15lu  cache hits on negative entries\n"
-"%15lu  cache misses on positive entries\n"
-"%15lu  cache misses on negative entries\n"
-"%15lu%% cache hit rate\n"
-"%15lu  current number of cached values\n"
-"%15lu  maximum number of cached values\n"
-"%15lu  maximum chain length searched\n"
-"%15lu  number of delays on rdlock\n"
-"%15lu  number of delays on wrlock\n"
+"%15Zd  suggested size\n"
+"%15ld  seconds time to live for positive entries\n"
+"%15ld  seconds time to live for negative entries\n"
+"%15ld  cache hits on positive entries\n"
+"%15ld  cache hits on negative entries\n"
+"%15ld  cache misses on positive entries\n"
+"%15ld  cache misses on negative entries\n"
+"%15ld%% cache hit rate\n"
 "%15s  check /etc/%s for changes\n"
 msgstr ""
 "\n"
 "%s cach�:\n"
 "\n"
 "%15s  el cach� est� activado\n"
-"%15Zu  tama�o sugerido\n"
-"%15lu  segundos de vida para las entradas positivas\n"
-"%15lu  segundos de vida para las entradas negativas\n"
-"%15lu  aciertos de cach� en las entradas positivas\n"
-"%15lu  aciertos de cach� en las entradas negativas\n"
-"%15lu  fallos de cach� en las entradas positivas\n"
-"%15lu  fallos de cach� en las entradas negativas\n"
-"%15lu%% tasa de aciertos de cach�\n"
-"%15lu  n�mero actual de valores en cach�\n"
-"%15lu  n�mero m�ximo de valores en cach�\n"
-"%15lu  longitud maxima de la cadena buscada\n"
-"%15lu  n�mero de retardos en rdlock\n"
-"%15lu  n�mero de retardos en wrlock\n"
+"%15Zd  tama�o sugerido\n"
+"%15ld  segundos de vida para las entradas positivas\n"
+"%15ld  segundos de vida para las entradas negativas\n"
+"%15ld  aciertos de cach� en las entradas positivas\n"
+"%15ld  aciertos de cach� en las entradas negativas\n"
+"%15ld  fallos de cach� en las entradas positivas\n"
+"%15ld  fallos de cach� en las entradas negativas\n"
+"%15ld%% tasa de aciertos de cach�\n"
 "%15s  compruebe /etc/%s para cambios\n"
 
-#: nscd/pwdcache.c:215
+#: nscd/pwdcache.c:214
 #, c-format
 msgid "Haven't found \"%s\" in password cache!"
 msgstr "No se ha encontrado \"%s\" en el cach� de contrase�as"
 
-#: nscd/pwdcache.c:281
+#: nscd/pwdcache.c:280
 #, c-format
 msgid "Invalid numeric uid \"%s\"!"
 msgstr "�uid num�rico inv�lido \"%s\"!"
 
-#: nscd/pwdcache.c:288
+#: nscd/pwdcache.c:287
 #, c-format
 msgid "Haven't found \"%d\" in password cache!"
 msgstr "No se ha encontrado \"%d\" en el cach� de contrase�as"
 
-#: elf/../sysdeps/generic/dl-sysdep.c:422
+#: elf/../sysdeps/generic/dl-sysdep.c:357
 msgid "cannot create capability list"
 msgstr "no se puede crear la lista de capacidades"
 
@@ -5959,62 +5863,62 @@ msgid "%s is for unknown machine %d.\n"
 msgstr "%s es para la m�quina desconocida %d.\n"
 
 # FIXME: Falta ver si es ni�o o ni�a. sv
-#: elf/cache.c:70
+#: elf/cache.c:69
 msgid "unknown"
 msgstr "desconocido/a"
 
-#: elf/cache.c:111
+#: elf/cache.c:105
 msgid "Unknown OS"
 msgstr "Sistema Operativo desconocido"
 
-#: elf/cache.c:116
+#: elf/cache.c:110
 #, c-format
 msgid ", OS ABI: %s %d.%d.%d"
 msgstr ", ABI del SO: %s %d.%d.%d"
 
-#: elf/cache.c:142 elf/ldconfig.c:1078
+#: elf/cache.c:136 elf/ldconfig.c:1045
 #, c-format
 msgid "Can't open cache file %s\n"
 msgstr "No se puede abrir el fichero de cach� %s\n"
 
-#: elf/cache.c:154
+#: elf/cache.c:148
 msgid "mmap of cache file failed.\n"
 msgstr "fall� la operaci�n `mmap' sobre el fichero de cach�.\n"
 
-#: elf/cache.c:158 elf/cache.c:168
+#: elf/cache.c:152 elf/cache.c:162
 msgid "File is not a cache file.\n"
 msgstr "El fichero no es un fichero de cach�.\n"
 
-#: elf/cache.c:201 elf/cache.c:211
+#: elf/cache.c:195 elf/cache.c:205
 #, c-format
 msgid "%d libs found in cache `%s'\n"
 msgstr "%d bibliotecas se encontraron en la cach� `%s'\n"
 
-#: elf/cache.c:410
+#: elf/cache.c:392
 #, c-format
 msgid "Can't remove old temporary cache file %s"
 msgstr "No se puede borrar el fichero de cach� temporal antiguo %s"
 
-#: elf/cache.c:417
+#: elf/cache.c:399
 #, c-format
 msgid "Can't create temporary cache file %s"
 msgstr "No se puede crear el fichero temporal de cach� %s"
 
-#: elf/cache.c:425 elf/cache.c:434 elf/cache.c:438
+#: elf/cache.c:407 elf/cache.c:416 elf/cache.c:420
 msgid "Writing of cache data failed"
 msgstr "Fall� la escritura de los datos de la cach�"
 
 # FIXME. Merge with previous message (?). sv
-#: elf/cache.c:442
+#: elf/cache.c:424
 msgid "Writing of cache data failed."
 msgstr "Fall� la escritura de los datos de la cach�"
 
-#: elf/cache.c:449
+#: elf/cache.c:431
 #, c-format
 msgid "Changing access rights of %s to %#o failed"
 msgstr "El cambio de los derechos de acceso de %s a %#o fall�"
 
-#: elf/cache.c:454
+#: elf/cache.c:436
 #, c-format
 msgid "Renaming of %s to %s failed"
 msgstr "Fall� el renombramiento de %s a %s"
@@ -6023,7 +5927,7 @@ msgstr "Fall� el renombramiento de %s a %s"
 msgid "shared object not open"
 msgstr "el objeto compartido no est� abierto"
 
-#: elf/dl-close.c:531 elf/dl-open.c:454
+#: elf/dl-close.c:486 elf/dl-open.c:444
 msgid "TLS generation counter wrapped!  Please send report with the 'glibcbug' script."
 msgstr ""
 "�El contador de generaciones TLS ha vuelto a cero! Por favor env�e un informe\n"
@@ -6068,145 +5972,137 @@ msgstr "���HAY UN BICHO EN EL ENLAZADOR DIN�MICO!!!"
 msgid "error while loading shared libraries"
 msgstr "error al cargar las bibliotecas compartidas"
 
-#: elf/dl-load.c:347
+#: elf/dl-load.c:339
 msgid "cannot allocate name record"
 msgstr "no se puede asignar el registro del nombre"
 
 # He intentado mejorarlo un poco ...
 #
-#: elf/dl-load.c:449 elf/dl-load.c:528 elf/dl-load.c:648 elf/dl-load.c:743
+#: elf/dl-load.c:441 elf/dl-load.c:520 elf/dl-load.c:612 elf/dl-load.c:707
 msgid "cannot create cache for search path"
 msgstr "no se puede crear un cach� para la ruta de b�squeda"
 
-#: elf/dl-load.c:551
+#: elf/dl-load.c:543
 msgid "cannot create RUNPATH/RPATH copy"
 msgstr "no se puede crear una copia RUNPATH/RPATH"
 
-#: elf/dl-load.c:634
+#: elf/dl-load.c:598
 msgid "cannot create search path array"
 msgstr "no se puede crear la matriz de la ruta de b�squeda"
 
-#: elf/dl-load.c:830
+#: elf/dl-load.c:794
 msgid "cannot stat shared object"
 msgstr "no se puede efectuar `stat' sobre el objeto compartido"
 
-#: elf/dl-load.c:874
+#: elf/dl-load.c:838
 msgid "cannot open zero fill device"
 msgstr "no se puede abrir el dispositivo de `zero fill'"
 
-#: elf/dl-load.c:883 elf/dl-load.c:1929
+#: elf/dl-load.c:847 elf/dl-load.c:1902
 msgid "cannot create shared object descriptor"
 msgstr "no se puede crear el descriptor del objeto compartido"
 
-#: elf/dl-load.c:902 elf/dl-load.c:1470 elf/dl-load.c:1553
+#: elf/dl-load.c:866 elf/dl-load.c:1398 elf/dl-load.c:1481
 msgid "cannot read file data"
 msgstr "no se pueden leer los datos del fichero"
 
-#: elf/dl-load.c:946
+#: elf/dl-load.c:906
 msgid "ELF load command alignment not page-aligned"
 msgstr "El alineamiento de la orden de carga ELF no est� alineada a la p�gina"
 
-#: elf/dl-load.c:953
+#: elf/dl-load.c:913
 msgid "ELF load command address/offset not properly aligned"
 msgstr "La direcci�n/desplazamiento de la orden de carga ELF no est� bien alineada"
 
-#: elf/dl-load.c:1037
+#: elf/dl-load.c:988
 msgid "cannot allocate TLS data structures for initial thread"
 msgstr "no se pueden crear las estructuras de datos TLS para el hilo inicial"
 
-#: elf/dl-load.c:1061
+#: elf/dl-load.c:1012
 msgid "cannot handle TLS data"
 msgstr "no se pueden manejar los datos de TLS"
 
-#: elf/dl-load.c:1075
-msgid "object file has no loadable segments"
-msgstr "el fichero objeto no tiene segmentos cargables"
-
-#: elf/dl-load.c:1110
+#: elf/dl-load.c:1047
 msgid "failed to map segment from shared object"
 msgstr "fallo al asignar un segmento del objeto compartido"
 
-#: elf/dl-load.c:1135
+#: elf/dl-load.c:1071
 msgid "cannot dynamically load executable"
 msgstr "no se puede cargar el ejecutable din�micamente"
 
-#: elf/dl-load.c:1191
+#: elf/dl-load.c:1132
 msgid "cannot change memory protections"
 msgstr "no se pueden cambiar las protecciones de memoria"
 
-#: elf/dl-load.c:1210
+#: elf/dl-load.c:1151
 msgid "cannot map zero-fill pages"
 msgstr "no se pueden asignar p�ginas de tipo `zero-fill'"
 
-#: elf/dl-load.c:1228
+#: elf/dl-load.c:1169
 msgid "cannot allocate memory for program header"
 msgstr "no se puede asignar memoria para la cabecera del programa"
 
-#: elf/dl-load.c:1259
+#: elf/dl-load.c:1200
 msgid "object file has no dynamic section"
 msgstr "el fichero objeto no tiene secci�n din�mica"
 
-#: elf/dl-load.c:1299
+#: elf/dl-load.c:1240
 msgid "shared object cannot be dlopen()ed"
 msgstr "no se puede efectuar dlopen() sobre el objeto compartido"
 
-#: elf/dl-load.c:1322
+#: elf/dl-load.c:1263
 msgid "cannot create searchlist"
 msgstr "no se puede crear la lista de b�squeda"
 
-#: elf/dl-load.c:1352
-msgid "cannot enable executable stack as shared object requires"
-msgstr "no se puede activar la pila ejecutable tal y como el objeto compartido necesita"
-
-#: elf/dl-load.c:1470
+#: elf/dl-load.c:1398
 msgid "file too short"
 msgstr "fichero demasiado corto"
 
-#: elf/dl-load.c:1493
+#: elf/dl-load.c:1421
 msgid "invalid ELF header"
 msgstr "cabecera ELF inv�lida"
 
-#: elf/dl-load.c:1502
+#: elf/dl-load.c:1430
 msgid "ELF file data encoding not big-endian"
 msgstr "La codificaci�n de los datos del fichero ELF no es `big-endian'"
 
-#: elf/dl-load.c:1504
+#: elf/dl-load.c:1432
 msgid "ELF file data encoding not little-endian"
 msgstr "La codificaci�n de los datos del fichero ELF no es `little-endian'"
 
-#: elf/dl-load.c:1508
+#: elf/dl-load.c:1436
 msgid "ELF file version ident does not match current one"
 msgstr "La identificaci�n de versi�n del fichero ELF no encaja con la actual"
 
-#: elf/dl-load.c:1512
+#: elf/dl-load.c:1440
 msgid "ELF file OS ABI invalid"
 msgstr "ABI del OS del fichero ELF inv�lida"
 
-#: elf/dl-load.c:1514
+#: elf/dl-load.c:1442
 msgid "ELF file ABI version invalid"
 msgstr "Versi�n de ABI del fichero ELF inv�lida"
 
-#: elf/dl-load.c:1517
+#: elf/dl-load.c:1445
 msgid "internal error"
 msgstr "error interno"
 
-#: elf/dl-load.c:1524
+#: elf/dl-load.c:1452
 msgid "ELF file version does not match current one"
 msgstr "La versi�n del fichero ELF no coincide con la actual"
 
-#: elf/dl-load.c:1532
+#: elf/dl-load.c:1460
 msgid "ELF file's phentsize not the expected size"
 msgstr "El `phentsize' del fichero ELF no es el tama�o esperado"
 
-#: elf/dl-load.c:1538
+#: elf/dl-load.c:1466
 msgid "only ET_DYN and ET_EXEC can be loaded"
 msgstr "solamente pueden cargarse ET_DYN y ET_EXEC"
 
-#: elf/dl-load.c:1944
+#: elf/dl-load.c:1917
 msgid "cannot open shared object file"
 msgstr "no se puede abrir el fichero del objeto compartido"
 
-#: elf/dl-lookup.c:265 elf/dl-lookup.c:443
+#: elf/dl-lookup.c:265 elf/dl-lookup.c:430
 msgid "relocation error"
 msgstr "error de relocalizaci�n"
 
@@ -6218,38 +6114,40 @@ msgstr "no se puede extender el �mbito global"
 msgid "empty dynamic string token substitution"
 msgstr "sustituci�n din�mica de un elemento por una cadena vac�a"
 
-#: elf/dl-open.c:361 elf/dl-open.c:372
+#: elf/dl-open.c:351 elf/dl-open.c:362
 msgid "cannot create scope list"
 msgstr "no se puede crear la lista de �mbito"
 
-#: elf/dl-open.c:434
+#: elf/dl-open.c:424
 msgid "cannot create TLS data structures"
 msgstr "no se pueden crear las estructuras de datos TLS"
 
-#: elf/dl-open.c:496
+#: elf/dl-open.c:486
 msgid "invalid mode for dlopen()"
 msgstr "modo inv�lido para dlopen()"
 
-#: elf/dl-reloc.c:57
-msgid "cannot allocate memory in static TLS block"
-msgstr "No se pudo asignar memoria en el bloque TLS est�tico"
+#: elf/dl-reloc.c:58
+msgid "shared object cannot be dlopen()ed: static TLS memory too small"
+msgstr ""
+"no se puede efectuar dlopen() sobre el objeto compartido: memoria est�tica TLS\n"
+"demasiado peque�a"
 
-#: elf/dl-reloc.c:176
+#: elf/dl-reloc.c:118
 msgid "cannot make segment writable for relocation"
 msgstr "no se puede hacer el segmento escribible para su relocalizaci�n"
 
-#: elf/dl-reloc.c:277
+#: elf/dl-reloc.c:219
 #, c-format
 msgid "%s: profiler found no PLTREL in object %s\n"
 msgstr "%s el `profiler' no encontr� ning�n PLTREL en el objeto %s\n"
 
-#: elf/dl-reloc.c:289
+#: elf/dl-reloc.c:231
 #, c-format
 msgid "%s: profiler out of memory shadowing PLTREL of %s\n"
 msgstr "%s: el `profiler' se qued� sin memoria al ocultar el PLTREL de %s\n"
 
 # Se admiten sugerencias. sv
-#: elf/dl-reloc.c:304
+#: elf/dl-reloc.c:246
 msgid "cannot restore segment prot after reloc"
 msgstr "no se puede restaurar el `prot' del segmento despu�s de la relocalizaci�n"
 
@@ -6257,7 +6155,7 @@ msgstr "no se puede restaurar el `prot' del segmento despu�s de la relocalizaci�
 msgid "RTLD_NEXT used in code not dynamically loaded"
 msgstr "Se ha usado RTLD_NEXT en una parte del c�digo que no se carg� din�micamente"
 
-#: elf/dl-version.c:303
+#: elf/dl-version.c:302
 msgid "cannot allocate version reference table"
 msgstr "no se puede asignar espacio para la tabla de versiones de referencia"
 
@@ -6304,148 +6202,143 @@ msgid "Format to use: new, old or compat (default)"
 msgstr "Formato utilizado: new, old o compat (predeterminado)"
 
 # FIXME: Why So Many Uppercase Letters? sv
-#: elf/ldconfig.c:139
+#: elf/ldconfig.c:136
 msgid "Configure Dynamic Linker Run Time Bindings."
 msgstr "Configura las asociaciones de tiempo de ejecuci�n del enlazador din�mico"
 
-#: elf/ldconfig.c:297
+#: elf/ldconfig.c:294
 #, c-format
 msgid "Path `%s' given more than once"
 msgstr "Se ha dado la ruta `%s' m�s de una vez"
 
-#: elf/ldconfig.c:341
+#: elf/ldconfig.c:338
 #, c-format
 msgid "%s is not a known library type"
 msgstr "%s no es un tipo de biblioteca conocido"
 
-#: elf/ldconfig.c:361
+#: elf/ldconfig.c:356
 #, c-format
 msgid "Can't stat %s"
 msgstr "No se puede efectuar `stat' sobre %s"
 
-#: elf/ldconfig.c:431
+#: elf/ldconfig.c:426
 #, c-format
 msgid "Can't stat %s\n"
 msgstr "No se puede efectuar `stat' sobre %s\n"
 
-#: elf/ldconfig.c:441
+#: elf/ldconfig.c:436
 #, c-format
 msgid "%s is not a symbolic link\n"
 msgstr "%s no es un enlace simb�lico\n"
 
-#: elf/ldconfig.c:460
+#: elf/ldconfig.c:455
 #, c-format
 msgid "Can't unlink %s"
 msgstr "No se puede efectuar `unlink' sobre %s"
 
-#: elf/ldconfig.c:466
+#: elf/ldconfig.c:461
 #, c-format
 msgid "Can't link %s to %s"
 msgstr "No se puede crear un enlace de %s a %s"
 
-#: elf/ldconfig.c:472
+#: elf/ldconfig.c:467
 msgid " (changed)\n"
 msgstr " (cambiado)\n"
 
-#: elf/ldconfig.c:474
+#: elf/ldconfig.c:469
 msgid " (SKIPPED)\n"
 msgstr " (SALTADO)\n"
 
-#: elf/ldconfig.c:529
+#: elf/ldconfig.c:524
 #, c-format
 msgid "Can't find %s"
 msgstr "No se encuentra %s"
 
-#: elf/ldconfig.c:545
+#: elf/ldconfig.c:540
 #, c-format
 msgid "Can't lstat %s"
 msgstr "No se puede efectuar `lstat' sobre %s"
 
-#: elf/ldconfig.c:552
+#: elf/ldconfig.c:547
 #, c-format
 msgid "Ignored file %s since it is not a regular file."
 msgstr "Descartado el fichero %s dado que no es un fichero regular."
 
-#: elf/ldconfig.c:560
+#: elf/ldconfig.c:555
 #, c-format
 msgid "No link created since soname could not be found for %s"
 msgstr "No se cre� el enlace ya que no se encontr� el soname para %s"
 
-#: elf/ldconfig.c:651
+#: elf/ldconfig.c:646
 #, c-format
 msgid "Can't open directory %s"
 msgstr "No se puede abrir el directorio %s"
 
-#: elf/ldconfig.c:706 elf/ldconfig.c:753
+#: elf/ldconfig.c:701 elf/ldconfig.c:748
 #, c-format
 msgid "Cannot lstat %s"
 msgstr "No se puede efectuar `lstat' sobre %s"
 
-#: elf/ldconfig.c:718
+#: elf/ldconfig.c:713
 #, c-format
 msgid "Cannot stat %s"
 msgstr "No se puede efectuar `stat' sobre %s"
 
-#: elf/ldconfig.c:775 elf/readlib.c:92
+#: elf/ldconfig.c:770 elf/readlib.c:93
 #, c-format
 msgid "Input file %s not found.\n"
 msgstr "No se encontr� el fichero de entrada %s.\n"
 
-#: elf/ldconfig.c:826
+#: elf/ldconfig.c:804
 #, c-format
 msgid "libc5 library %s in wrong directory"
 msgstr "biblioteca libc5 %s en un directorio equivocado"
 
-#: elf/ldconfig.c:829
+#: elf/ldconfig.c:807
 #, c-format
 msgid "libc6 library %s in wrong directory"
 msgstr "biblioteca libc6 %s en un directorio equivocado"
 
-#: elf/ldconfig.c:832
+#: elf/ldconfig.c:810
 #, c-format
 msgid "libc4 library %s in wrong directory"
 msgstr "biblioteca libc4 %s en un directorio equivocado"
 
-#: elf/ldconfig.c:859
+#: elf/ldconfig.c:837
 #, c-format
 msgid "libraries %s and %s in directory %s have same soname but different type."
 msgstr "las bibliotecas %s y %s en el directorio %s tienen el mismo soname pero distinto tipo."
 
-#: elf/ldconfig.c:962
+#: elf/ldconfig.c:940
 #, c-format
 msgid "Can't open configuration file %s"
 msgstr "No se puede abrir el fichero de configuraci�n `%s'"
 
-#: elf/ldconfig.c:1033
-#, c-format
-msgid "relative path `%s' used to build cache"
-msgstr "se usa el camino relativo `%s' para construir el cach�"
-
-#: elf/ldconfig.c:1057
+#: elf/ldconfig.c:1024
 msgid "Can't chdir to /"
 msgstr "No se puede cambiar al directorio /"
 
-#: elf/ldconfig.c:1099
+#: elf/ldconfig.c:1066
 #, c-format
 msgid "Can't open cache file directory %s\n"
 msgstr "No se puede leer el directorio de ficheros de cach� %s\n"
 
-#: elf/readlib.c:98
+#: elf/readlib.c:99
 #, c-format
 msgid "Cannot fstat file %s.\n"
 msgstr "No se puede efectuar `fstat' sobre el fichero %s.\n"
 
-#: elf/readlib.c:108
+#: elf/readlib.c:109
 #, c-format
 msgid "File %s is too small, not checked."
 msgstr "El fichero %s es demasiado peque�o, no se comprueba."
 
-#: elf/readlib.c:117
+#: elf/readlib.c:118
 #, c-format
 msgid "Cannot mmap file %s.\n"
 msgstr "No se puede efectuar `mmap' sobre el fichero %s.\n"
 
-#: elf/readlib.c:155
+#: elf/readlib.c:158
 #, c-format
 msgid "%s is not an ELF file - it has the wrong magic bytes at the start.\n"
 msgstr "%s no es un fichero ELF - tiene los bytes m�gicos equivocados en el comienzo.\n"
@@ -6545,11 +6438,6 @@ msgstr "`%s' no es un fichero de datos para `profile' correcto para `%s'"
 msgid "cannot allocate symbol data"
 msgstr "no se puede asignar espacio para los datos del s�mbolo"
 
-#~ msgid "shared object cannot be dlopen()ed: static TLS memory too small"
-#~ msgstr ""
-#~ "no se puede efectuar dlopen() sobre el objeto compartido: memoria est�tica TLS\n"
-#~ "demasiado peque�a"
-
 # FIXME: Decir al autor que no use tabs. sv
 #~ msgid "\t\t\t\t\t\t\t      %s: value for field `%s' must be in range %d...%d"
 #~ msgstr "\t\t\t\t\t\t\t      %s: el valor para el campo `%s' debe estar en el rango %d...%d"
@@ -6647,6 +6535,9 @@ msgstr "no se puede asignar espacio para los datos del s�mbolo"
 #~ msgid "cannot insert collation element `%.*s'"
 #~ msgstr "no se puede insertar el elemento de uni�n `%.*s' "
 
+#~ msgid "cannot insert into result table"
+#~ msgstr "no se puede insertar el la tabla de resultados"
+
 # FUZZY
 #~ msgid "cannot insert new collating symbol definition: %s"
 #~ msgstr "no se puede insertar la nueva definici�n para el s�mbolo de uni�n: %s"
diff --git a/posix/Makefile b/posix/Makefile
index 3af9e6681d..149283c65d 100644
--- a/posix/Makefile
+++ b/posix/Makefile
@@ -1,4 +1,4 @@
-# Copyright (C) 1991-1999, 2000-2003, 2004, 2005 Free Software Foundation, Inc.
+# Copyright (C) 1991-1999, 2000-2003, 2004 Free Software Foundation, Inc.
 # This file is part of the GNU C Library.
 
 # The GNU C Library is free software; you can redistribute it and/or
@@ -140,27 +140,16 @@ CFLAGS-waitid.c = -fexceptions
 CFLAGS-waitpid.c = -fexceptions -fasynchronous-unwind-tables
 CFLAGS-getopt.c = -fexceptions
 CFLAGS-wordexp.c = -fexceptions
-CFLAGS-wordexp.os = -fomit-frame-pointer
 CFLAGS-sysconf.c = -fexceptions -DGETCONF_DIR='"$(libexecdir)/getconf"'
 CFLAGS-pathconf.c = -fexceptions
 CFLAGS-fpathconf.c = -fexceptions
 CFLAGS-spawn.c = -fexceptions
-CFLAGS-spawn.os = -fomit-frame-pointer
 CFLAGS-spawnp.c = -fexceptions
-CFLAGS-spawnp.os = -fomit-frame-pointer
 CFLAGS-spawni.c = -fexceptions
-CFLAGS-spawni.os = -fomit-frame-pointer
 CFLAGS-pause.c = -fexceptions
 CFLAGS-glob.c = $(uses-callbacks) -fexceptions
 CFLAGS-glob64.c = $(uses-callbacks) -fexceptions
 CFLAGS-getconf.c = -DGETCONF_DIR='"$(libexecdir)/getconf"'
-CFLAGS-execve.os = -fomit-frame-pointer
-CFLAGS-fexecve.os = -fomit-frame-pointer
-CFLAGS-execv.os = -fomit-frame-pointer
-CFLAGS-execle.os = -fomit-frame-pointer
-CFLAGS-execl.os = -fomit-frame-pointer
-CFLAGS-execvp.os = -fomit-frame-pointer
-CFLAGS-execlp.os = -fomit-frame-pointer
 
 tstgetopt-ARGS = -a -b -cfoobar --required foobar --optional=bazbug \
 		--none random --col --color --colour
diff --git a/posix/bug-regex19.c b/posix/bug-regex19.c
index 3a173a6ca0..4000b19b4d 100644
--- a/posix/bug-regex19.c
+++ b/posix/bug-regex19.c
@@ -1,5 +1,5 @@
 /* Regular expression tests.
-   Copyright (C) 2003, 2005 Free Software Foundation, Inc.
+   Copyright (C) 2003 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Jakub Jelinek <jakub@redhat.com>, 2003.
 
@@ -170,22 +170,22 @@ static struct test_s
   {ERE, "[^k]\\B[^k]", "kBk", 0, -1},
   {ERE, "[^C]\\B[^C]", "CCCABA", 0, 3},
   {ERE, "[^C]\\B[^C]", "CBC", 0, -1},
-  {ERE, ".(\\b|\\B).", "=~AB", 0, 0},
+  {ERE, ".(\\b|\\B).", "=~AB", 0, 1},
   {ERE, ".(\\b|\\B).", "A=C", 0, 0},
   {ERE, ".(\\b|\\B).", "ABC", 0, 0},
-  {ERE, ".(\\b|\\B).", "=~\\!", 0, 0},
-  {ERE, "[^k](\\b|\\B)[^k]", "=~AB", 0, 0},
+  {ERE, ".(\\b|\\B).", "=~\\!", 0, -1},
+  {ERE, "[^k](\\b|\\B)[^k]", "=~AB", 0, 1},
   {ERE, "[^k](\\b|\\B)[^k]", "A=C", 0, 0},
   {ERE, "[^k](\\b|\\B)[^k]", "ABC", 0, 0},
-  {ERE, "[^k](\\b|\\B)[^k]", "=~kBD", 0, 0},
-  {ERE, "[^k](\\b|\\B)[^k]", "=~\\!", 0, 0},
-  {ERE, "[^k](\\b|\\B)[^k]", "=~kB", 0, 0},
-  {ERE, "[^C](\\b|\\B)[^C]", "=~AB", 0, 0},
+  {ERE, "[^k](\\b|\\B)[^k]", "=~kBD", 0, 3},
+  {ERE, "[^k](\\b|\\B)[^k]", "=~\\!", 0, -1},
+  {ERE, "[^k](\\b|\\B)[^k]", "=~kB", 0, -1},
+  {ERE, "[^C](\\b|\\B)[^C]", "=~AB", 0, 1},
   {ERE, "[^C](\\b|\\B)[^C]", "A=C", 0, 0},
   {ERE, "[^C](\\b|\\B)[^C]", "ABC", 0, 0},
-  {ERE, "[^C](\\b|\\B)[^C]", "=~CBD", 0, 0},
-  {ERE, "[^C](\\b|\\B)[^C]", "=~\\!", 0, 0},
-  {ERE, "[^C](\\b|\\B)[^C]", "=~CB", 0, 0},
+  {ERE, "[^C](\\b|\\B)[^C]", "=~CBD", 0, 3},
+  {ERE, "[^C](\\b|\\B)[^C]", "=~\\!", 0, -1},
+  {ERE, "[^C](\\b|\\B)[^C]", "=~CB", 0, -1},
   {ERE, "\\b([A]|[!]|.B)", "A=AC", 0, 0},
   {ERE, "\\b([A]|[!]|.B)", "=AC", 0, 1},
   {ERE, "\\b([A]|[!]|.B)", "!AC", 0, 1},
diff --git a/posix/execl.c b/posix/execl.c
index 12b59f9de3..62fd45db58 100644
--- a/posix/execl.c
+++ b/posix/execl.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1991,92,94,97,98,99,2002,2005 Free Software Foundation, Inc.
+/* Copyright (C) 1991,92,94,97,98,99,2002 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -16,10 +16,10 @@
    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307 USA.  */
 
+#include <alloca.h>
 #include <unistd.h>
 #include <stdarg.h>
 #include <stddef.h>
-#include <stdlib.h>
 #include <string.h>
 
 #include <stackinfo.h>
@@ -33,44 +33,46 @@
 int
 execl (const char *path, const char *arg, ...)
 {
-#define INITIAL_ARGV_MAX 1024
-  size_t argv_max = INITIAL_ARGV_MAX;
-  const char *initial_argv[INITIAL_ARGV_MAX];
-  const char **argv = initial_argv;
+  size_t argv_max = 1024;
+  const char **argv = alloca (argv_max * sizeof (const char *));
+  unsigned int i;
   va_list args;
 
   argv[0] = arg;
 
   va_start (args, arg);
-  unsigned int i = 0;
+  i = 0;
   while (argv[i++] != NULL)
     {
       if (i == argv_max)
 	{
-	  argv_max *= 2;
-	  const char **nptr = realloc (argv == initial_argv ? NULL : argv,
-				       argv_max * sizeof (const char *));
-	  if (nptr == NULL)
+	  const char **nptr = alloca ((argv_max *= 2) * sizeof (const char *));
+
+#ifndef _STACK_GROWS_UP
+	  if ((char *) nptr + argv_max == (char *) argv)
 	    {
-	      if (argv != initial_argv)
-		free (argv);
-	      return -1;
+	      /* Stack grows down.  */
+	      argv = (const char **) memcpy (nptr, argv,
+					     i * sizeof (const char *));
+	      argv_max += i;
 	    }
-	  if (argv == initial_argv)
-	    /* We have to copy the already filled-in data ourselves.  */
-	    memcpy (nptr, argv, i * sizeof (const char *));
-
-	  argv = nptr;
+	  else
+#endif
+#ifndef _STACK_GROWS_DOWN
+	    if ((char *) argv + i == (char *) nptr)
+	    /* Stack grows up.  */
+	    argv_max += i;
+	  else
+#endif
+	    /* We have a hole in the stack.  */
+	    argv = (const char **) memcpy (nptr, argv,
+					   i * sizeof (const char *));
 	}
 
       argv[i] = va_arg (args, const char *);
     }
   va_end (args);
 
-  int ret = __execve (path, (char *const *) argv, __environ);
-  if (argv != initial_argv)
-    free (argv);
-
-  return ret;
+  return __execve (path, (char *const *) argv, __environ);
 }
 libc_hidden_def (execl)
diff --git a/posix/execle.c b/posix/execle.c
index 70522ad2e5..2199ebeb74 100644
--- a/posix/execle.c
+++ b/posix/execle.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1991,97,98,99,2002,2005 Free Software Foundation, Inc.
+/* Copyright (C) 1991,97,98,99,2002 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -16,10 +16,10 @@
    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307 USA.  */
 
+#include <alloca.h>
 #include <unistd.h>
 #include <stdarg.h>
 #include <stddef.h>
-#include <stdlib.h>
 #include <string.h>
 
 #include <stackinfo.h>
@@ -29,45 +29,48 @@
 int
 execle (const char *path, const char *arg, ...)
 {
-#define INITIAL_ARGV_MAX 1024
-  size_t argv_max = INITIAL_ARGV_MAX;
-  const char *initial_argv[INITIAL_ARGV_MAX];
-  const char **argv = initial_argv;
+  size_t argv_max = 1024;
+  const char **argv = alloca (argv_max * sizeof (const char *));
+  const char *const *envp;
+  unsigned int i;
   va_list args;
   argv[0] = arg;
 
   va_start (args, arg);
-  unsigned int i = 0;
+  i = 0;
   while (argv[i++] != NULL)
     {
       if (i == argv_max)
 	{
-	  argv_max *= 2;
-	  const char **nptr = realloc (argv == initial_argv ? NULL : argv,
-				       argv_max * sizeof (const char *));
-	  if (nptr == NULL)
+	  const char **nptr = alloca ((argv_max *= 2) * sizeof (const char *));
+
+#ifndef _STACK_GROWS_UP
+	  if ((char *) nptr + argv_max == (char *) argv)
 	    {
-	      if (argv != initial_argv)
-		free (argv);
-	      return -1;
+	      /* Stack grows down.  */
+	      argv = (const char **) memcpy (nptr, argv,
+					     i * sizeof (const char *));
+	      argv_max += i;
 	    }
-	  if (argv == initial_argv)
-	    /* We have to copy the already filled-in data ourselves.  */
-	    memcpy (nptr, argv, i * sizeof (const char *));
-
-	  argv = nptr;
+	  else
+#endif
+#ifndef _STACK_GROWS_DOWN
+	    if ((char *) argv + i == (char *) nptr)
+	    /* Stack grows up.  */
+	    argv_max += i;
+	  else
+#endif
+	    /* We have a hole in the stack.  */
+	    argv = (const char **) memcpy (nptr, argv,
+					   i * sizeof (const char *));
 	}
 
       argv[i] = va_arg (args, const char *);
     }
 
-  const char *const *envp = va_arg (args, const char *const *);
+  envp = va_arg (args, const char *const *);
   va_end (args);
 
-  int ret = __execve (path, (char *const *) argv, (char *const *) envp);
-  if (argv != initial_argv)
-    free (argv);
-
-  return ret;
+  return __execve (path, (char *const *) argv, (char *const *) envp);
 }
 libc_hidden_def (execle)
diff --git a/posix/execlp.c b/posix/execlp.c
index 66996a9367..ba8fc74c90 100644
--- a/posix/execlp.c
+++ b/posix/execlp.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1991,93,96,97,98,99,2002,2005 Free Software Foundation, Inc.
+/* Copyright (C) 1991,93,96,97,98,99,2002 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -16,10 +16,10 @@
    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307 USA.  */
 
+#include <alloca.h>
 #include <unistd.h>
 #include <stdarg.h>
 #include <stddef.h>
-#include <stdlib.h>
 #include <string.h>
 
 #include <stackinfo.h>
@@ -30,44 +30,46 @@
 int
 execlp (const char *file, const char *arg, ...)
 {
-#define INITIAL_ARGV_MAX 1024
-  size_t argv_max = INITIAL_ARGV_MAX;
-  const char *initial_argv[INITIAL_ARGV_MAX];
-  const char **argv = initial_argv;
+  size_t argv_max = 1024;
+  const char **argv = alloca (argv_max * sizeof (const char *));
+  unsigned int i;
   va_list args;
 
   argv[0] = arg;
 
   va_start (args, arg);
-  unsigned int i = 0;
+  i = 0;
   while (argv[i++] != NULL)
     {
       if (i == argv_max)
 	{
-	  argv_max *= 2;
-	  const char **nptr = realloc (argv == initial_argv ? NULL : argv,
-				       argv_max * sizeof (const char *));
-	  if (nptr == NULL)
+	  const char **nptr = alloca ((argv_max *= 2) * sizeof (const char *));
+
+#ifndef _STACK_GROWS_UP
+	  if ((char *) nptr + argv_max == (char *) argv)
 	    {
-	      if (argv != initial_argv)
-		free (argv);
-	      return -1;
+	      /* Stack grows down.  */
+	      argv = (const char **) memcpy (nptr, argv,
+					     i * sizeof (const char *));
+	      argv_max += i;
 	    }
-	  if (argv == initial_argv)
-	    /* We have to copy the already filled-in data ourselves.  */
-	    memcpy (nptr, argv, i * sizeof (const char *));
-
-	  argv = nptr;
+	  else
+#endif
+#ifndef _STACK_GROWS_DOWN
+	    if ((char *) argv + i == (char *) nptr)
+	    /* Stack grows up.  */
+	    argv_max += i;
+	  else
+#endif
+	    /* We have a hole in the stack.  */
+	    argv = (const char **) memcpy (nptr, argv,
+					   i * sizeof (const char *));
 	}
 
       argv[i] = va_arg (args, const char *);
     }
   va_end (args);
 
-  int ret = execvp (file, (char *const *) argv);
-  if (argv != initial_argv)
-    free (argv);
-
-  return ret;
+  return execvp (file, (char *const *) argv);
 }
 libc_hidden_def (execlp)
diff --git a/posix/execvp.c b/posix/execvp.c
index 9ccfd7fc22..d6f60c02e7 100644
--- a/posix/execvp.c
+++ b/posix/execvp.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1991,92,1995-99,2002,2004,2005 Free Software Foundation, Inc.
+/* Copyright (C) 1991,92,1995-99,2002,2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -18,7 +18,6 @@
 
 #include <unistd.h>
 #include <stdarg.h>
-#include <stdbool.h>
 #include <stdlib.h>
 #include <string.h>
 #include <errno.h>
@@ -27,9 +26,9 @@
 
 /* The file is accessible but it is not an executable file.  Invoke
    the shell to interpret it as a script.  */
-static char **
+static void
 internal_function
-allocate_scripts_argv (const char *file, char *const argv[])
+script_execute (const char *file, char *const argv[])
 {
   /* Count the arguments.  */
   int argc = 0;
@@ -37,19 +36,19 @@ allocate_scripts_argv (const char *file, char *const argv[])
     ;
 
   /* Construct an argument list for the shell.  */
-  char **new_argv = (char **) malloc ((argc + 1) * sizeof (char *));
-  if (new_argv != NULL)
-    {
-      new_argv[0] = (char *) _PATH_BSHELL;
-      new_argv[1] = (char *) file;
-      while (argc > 1)
-	{
-	  new_argv[argc] = argv[argc - 1];
-	  --argc;
-	}
-    }
-
-  return new_argv;
+  {
+    char *new_argv[argc + 1];
+    new_argv[0] = (char *) _PATH_BSHELL;
+    new_argv[1] = (char *) file;
+    while (argc > 1)
+      {
+	new_argv[argc] = argv[argc - 1];
+	--argc;
+      }
+
+    /* Execute the shell.  */
+    __execve (new_argv[0], new_argv, __environ);
+  }
 }
 
 
@@ -67,58 +66,42 @@ execvp (file, argv)
       return -1;
     }
 
-  char **script_argv = NULL;
-
   if (strchr (file, '/') != NULL)
     {
       /* Don't search when it contains a slash.  */
       __execve (file, argv, __environ);
 
       if (errno == ENOEXEC)
-	{
-	  script_argv = allocate_scripts_argv (file, argv);
-	  if (script_argv != NULL)
-	    {
-	      __execve (script_argv[0], script_argv, __environ);
-
-	      free (script_argv);
-	    }
-	}
+	script_execute (file, argv);
     }
   else
     {
-      char *path = getenv ("PATH");
-      bool path_malloc = false;
+      int got_eacces = 0;
+      char *path, *p, *name;
+      size_t len;
+      size_t pathlen;
+
+      path = getenv ("PATH");
       if (path == NULL)
 	{
 	  /* There is no `PATH' in the environment.
 	     The default search path is the current directory
 	     followed by the path `confstr' returns for `_CS_PATH'.  */
-	  size_t len = confstr (_CS_PATH, (char *) NULL, 0);
-	  path = (char *) malloc (1 + len);
-	  if (path == NULL)
-	    return -1;
+	  len = confstr (_CS_PATH, (char *) NULL, 0);
+	  path = (char *) __alloca (1 + len);
 	  path[0] = ':';
 	  (void) confstr (_CS_PATH, path + 1, len);
-	  path_malloc = true;
 	}
 
-      size_t len = strlen (file) + 1;
-      size_t pathlen = strlen (path);
-      char *name = malloc (pathlen + len + 1);
-      if (name == NULL)
-	{
-	  if (path_malloc)
-	    free (path);
-	  return -1;
-	}
+      len = strlen (file) + 1;
+      pathlen = strlen (path);
+      name = __alloca (pathlen + len + 1);
       /* Copy the file name at the top.  */
       name = (char *) memcpy (name + pathlen + 1, file, len);
       /* And add the slash.  */
       *--name = '/';
 
-      bool got_eacces = false;
-      char *p = path;
+      p = path;
       do
 	{
 	  char *startp;
@@ -137,21 +120,7 @@ execvp (file, argv)
 	  __execve (startp, argv, __environ);
 
 	  if (errno == ENOEXEC)
-	    {
-	      if (script_argv == NULL)
-		{
-		  script_argv = allocate_scripts_argv (file, argv);
-		  if (script_argv == NULL)
-		    {
-		      /* A possible EACCES error is not as important as
-			 the ENOMEM.  */
-		      got_eacces = false;
-		      break;
-		    }
-		}
-
-	      __execve (script_argv[0], script_argv, __environ);
-	    }
+	    script_execute (startp, argv);
 
 	  switch (errno)
 	    {
@@ -159,7 +128,7 @@ execvp (file, argv)
 	      /* Record the we got a `Permission denied' error.  If we end
 		 up finding no executable we can use, we want to diagnose
 		 that we did find one but were denied access.  */
-	      got_eacces = true;
+	      got_eacces = 1;
 	    case ENOENT:
 	    case ESTALE:
 	    case ENOTDIR:
@@ -187,11 +156,6 @@ execvp (file, argv)
 	/* At least one failure was due to permissions, so report that
            error.  */
 	__set_errno (EACCES);
-
-      free (script_argv);
-      free (name);
-      if (path_malloc)
-	free (path);
     }
 
   /* Return the error from the last attempt (probably ENOENT).  */
diff --git a/posix/getconf.c b/posix/getconf.c
index 0cc0c0d7b5..4ce4f8e413 100644
--- a/posix/getconf.c
+++ b/posix/getconf.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1991, 92, 1995-2003, 2004, 2005 Free Software Foundation, Inc.
+/* Copyright (C) 1991, 92, 1995-2003, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -964,7 +964,7 @@ main (int argc, char *argv[])
 Copyright (C) %s Free Software Foundation, Inc.\n\
 This is free software; see the source for copying conditions.  There is NO\n\
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
-"), "2005");
+"), "2004");
       fprintf (stderr, gettext ("Written by %s.\n"), "Roland McGrath");
       return 0;
     }
diff --git a/posix/regcomp.c b/posix/regcomp.c
index 1a5f7952c3..5de5bf725a 100644
--- a/posix/regcomp.c
+++ b/posix/regcomp.c
@@ -1,5 +1,5 @@
 /* Extended regular expression matching and search library.
-   Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+   Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
 
@@ -33,21 +33,19 @@ static reg_errcode_t create_initial_state (re_dfa_t *dfa);
 #ifdef RE_ENABLE_I18N
 static void optimize_utf8 (re_dfa_t *dfa);
 #endif
-static reg_errcode_t analyze (regex_t *preg);
-static reg_errcode_t create_initial_state (re_dfa_t *dfa);
-static reg_errcode_t preorder (bin_tree_t *root,
-			       reg_errcode_t (fn (void *, bin_tree_t *)),
-			       void *extra);
-static reg_errcode_t postorder (bin_tree_t *root,
-				reg_errcode_t (fn (void *, bin_tree_t *)),
-				void *extra);
-static reg_errcode_t optimize_subexps (void *extra, bin_tree_t *node);
-static reg_errcode_t lower_subexps (void *extra, bin_tree_t *node);
-static bin_tree_t *lower_subexp (reg_errcode_t *err, regex_t *preg,
-				 bin_tree_t *node);
-static reg_errcode_t calc_first (void *extra, bin_tree_t *node);
-static reg_errcode_t calc_next (void *extra, bin_tree_t *node);
-static reg_errcode_t link_nfa_nodes (void *extra, bin_tree_t *node);
+struct subexp_optimize
+{
+  re_dfa_t *dfa;
+  re_token_t *nodes;
+  int no_sub, re_nsub;
+};
+static bin_tree_t *optimize_subexps (struct subexp_optimize *so,
+                                     bin_tree_t *node, int sidx, int depth);
+static reg_errcode_t analyze (re_dfa_t *dfa);
+static reg_errcode_t analyze_tree (re_dfa_t *dfa, bin_tree_t *node);
+static void calc_first (re_dfa_t *dfa, bin_tree_t *node);
+static void calc_next (re_dfa_t *dfa, bin_tree_t *node);
+static void calc_epsdest (re_dfa_t *dfa, bin_tree_t *node);
 static reg_errcode_t duplicate_node_closure (re_dfa_t *dfa, int top_org_node,
 					     int top_clone_node, int root_node,
 					     unsigned int constraint);
@@ -58,7 +56,7 @@ static int search_duplicated_node (re_dfa_t *dfa, int org_node,
 static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
 static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa,
 					 int node, int root);
-static reg_errcode_t calc_inveclosure (re_dfa_t *dfa);
+static void calc_inveclosure (re_dfa_t *dfa);
 static int fetch_number (re_string_t *input, re_token_t *token,
 			 reg_syntax_t syntax);
 static void fetch_token (re_token_t *result, re_string_t *input,
@@ -140,14 +138,14 @@ static bin_tree_t *build_charclass_op (re_dfa_t *dfa,
 				       int non_match, reg_errcode_t *err);
 static bin_tree_t *create_tree (re_dfa_t *dfa,
 				bin_tree_t *left, bin_tree_t *right,
-				re_token_type_t type);
-static bin_tree_t *create_token_tree (re_dfa_t *dfa,
-				      bin_tree_t *left, bin_tree_t *right,
-				      const re_token_t *token);
+				re_token_type_t type, int index);
+static bin_tree_t *re_dfa_add_tree_node (re_dfa_t *dfa,
+					 bin_tree_t *left, bin_tree_t *right,
+					 const re_token_t *token)
+  __attribute ((noinline));
 static bin_tree_t *duplicate_tree (const bin_tree_t *src, re_dfa_t *dfa);
-static void free_token (re_token_t *node);
-static reg_errcode_t free_tree (void *extra, bin_tree_t *node);
-static reg_errcode_t mark_opt_subexp (void *extra, bin_tree_t *node);
+static void mark_opt_subexp (const bin_tree_t *src, re_dfa_t *dfa);
+static void mark_opt_subexp_iter (const bin_tree_t *src, re_dfa_t *dfa, int idx);
 
 /* This table gives an error message for each of the error codes listed
    in regex.h.  Obviously the order here has to be same as there.
@@ -600,7 +598,16 @@ free_dfa_content (re_dfa_t *dfa)
 
   if (dfa->nodes)
     for (i = 0; i < dfa->nodes_len; ++i)
-      free_token (dfa->nodes + i);
+      {
+	re_token_t *node = dfa->nodes + i;
+#ifdef RE_ENABLE_I18N
+	if (node->type == COMPLEX_BRACKET && node->duplicated == 0)
+	  free_charset (node->opr.mbcset);
+	else
+#endif /* RE_ENABLE_I18N */
+	  if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
+	    re_free (node->opr.sbcset);
+      }
   re_free (dfa->nexts);
   for (i = 0; i < dfa->nodes_len; ++i)
     {
@@ -804,17 +811,29 @@ re_compile_internal (preg, pattern, length, syntax)
   if (BE (dfa->str_tree == NULL, 0))
     goto re_compile_internal_free_return;
 
-  /* Analyze the tree and create the nfa.  */
-  err = analyze (preg);
-  if (BE (err != REG_NOERROR, 0))
-    goto re_compile_internal_free_return;
-
 #ifdef RE_ENABLE_I18N
   /* If possible, do searching in single byte encoding to speed things up.  */
   if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL)
     optimize_utf8 (dfa);
 #endif
 
+  if (preg->re_nsub > 0)
+    {
+      struct subexp_optimize so;
+
+      so.dfa = dfa;
+      so.nodes = dfa->nodes;
+      so.no_sub = preg->no_sub;
+      so.re_nsub = preg->re_nsub;
+      dfa->str_tree = optimize_subexps (&so, dfa->str_tree, -1, 0);
+    }
+
+  /* Analyze the tree and collect information which is necessary to
+     create the dfa.  */
+  err = analyze (dfa);
+  if (BE (err != REG_NOERROR, 0))
+    goto re_compile_internal_free_return;
+
   /* Then create the initial state of the dfa.  */
   err = create_initial_state (dfa);
 
@@ -875,9 +894,9 @@ init_dfa (dfa, pat_len)
   codeset_name = nl_langinfo (CODESET);
 # else
   codeset_name = getenv ("LC_ALL");
-  if (codeset_name == NULL || codeset_name[0] == '\0')
+  if (codeset_name == NULL || codeset[0] == '\0')
     codeset_name = getenv ("LC_CTYPE");
-  if (codeset_name == NULL || codeset_name[0] == '\0')
+  if (codeset_name == NULL || codeset[0] == '\0')
     codeset_name = getenv ("LANG");
   if (codeset_name == NULL)
     codeset_name = "";
@@ -979,7 +998,7 @@ create_initial_state (dfa)
 
   /* Initial states have the epsilon closure of the node which is
      the first node of the regular expression.  */
-  first = dfa->str_tree->first->node_idx;
+  first = dfa->str_tree->first;
   dfa->init_node = first;
   err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first);
   if (BE (err != REG_NOERROR, 0))
@@ -1085,11 +1104,10 @@ optimize_utf8 (dfa)
       case OP_ALT:
       case END_OF_RE:
       case OP_DUP_ASTERISK:
+      case OP_DUP_QUESTION:
       case OP_OPEN_SUBEXP:
       case OP_CLOSE_SUBEXP:
 	break;
-      case COMPLEX_BRACKET:
-	return;
       case SIMPLE_BRACKET:
 	/* Just double check.  */
         for (i = 0x80 / UINT_BITS; i < BITSET_UINTS; ++i)
@@ -1097,7 +1115,7 @@ optimize_utf8 (dfa)
 	    return;
 	break;
       default:
-	abort ();
+	return;
       }
 
   if (mb_chars || has_period)
@@ -1117,14 +1135,90 @@ optimize_utf8 (dfa)
 }
 #endif
 
+static bin_tree_t *
+optimize_subexps (so, node, sidx, depth)
+     struct subexp_optimize *so;
+     bin_tree_t *node;
+     int sidx, depth;
+{
+  int idx, new_depth, new_sidx;
+  bin_tree_t *ret;
+  if (node == NULL)
+    return NULL;
+
+  new_depth = 0;
+  new_sidx = sidx;
+  if ((depth & 1) && node->type == CONCAT
+      && node->right && node->right->type == 0
+      && so->nodes[idx = node->right->node_idx].type == OP_CLOSE_SUBEXP)
+    {
+      new_depth = depth + 1;
+      if (new_depth == 2
+          || (so->nodes[idx].opr.idx < 8 * sizeof (so->dfa->used_bkref_map)
+              && so->dfa->used_bkref_map & (1 << so->nodes[idx].opr.idx)))
+        new_sidx = so->nodes[idx].opr.idx;
+    }
+  node->left = optimize_subexps (so, node->left, new_sidx, new_depth);
+  new_depth = (depth & 1) == 0 && node->type == CONCAT
+              && node->left && node->left->type == 0
+              && so->nodes[node->left->node_idx].type == OP_OPEN_SUBEXP
+              ? depth + 1 : 0;
+  node->right = optimize_subexps (so, node->right, sidx, new_depth);
+                                     
+  if (node->type != CONCAT)
+    return node;
+  if ((depth & 1) == 0
+      && node->left
+      && node->left->type == 0
+      && so->nodes[idx = node->left->node_idx].type == OP_OPEN_SUBEXP)
+    ret = node->right;
+  else if ((depth & 1)
+           && node->right
+           && node->right->type == 0
+           && so->nodes[idx = node->right->node_idx].type == OP_CLOSE_SUBEXP)
+    ret = node->left;
+  else
+    return node;
+
+  if (so->nodes[idx].opr.idx < 8 * sizeof (so->dfa->used_bkref_map)
+      && so->dfa->used_bkref_map & (1 << so->nodes[idx].opr.idx))
+    return node;
+
+  if (!so->no_sub)
+    {
+      int i;
+
+      if (depth < 2)
+        return node;
+
+      if (so->dfa->subexp_map == NULL)
+        {
+          so->dfa->subexp_map = re_malloc (int, so->re_nsub);
+          if (so->dfa->subexp_map == NULL)
+            return node;
+
+          for (i = 0; i < so->re_nsub; i++)
+            so->dfa->subexp_map[i] = i;
+        }
+
+      i = so->nodes[idx].opr.idx;
+      assert (sidx < i);
+      so->dfa->subexp_map[i] = sidx;
+    }
+
+  so->nodes[idx].type = OP_DELETED_SUBEXP;
+  ret->parent = node->parent;
+  return ret;
+}
+
 /* Analyze the structure tree, and calculate "first", "next", "edest",
    "eclosure", and "inveclosure".  */
 
 static reg_errcode_t
-analyze (preg)
-     regex_t *preg;
+analyze (dfa)
+     re_dfa_t *dfa;
 {
-  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+  int i;
   reg_errcode_t ret;
 
   /* Allocate arrays.  */
@@ -1132,321 +1226,225 @@ analyze (preg)
   dfa->org_indices = re_malloc (int, dfa->nodes_alloc);
   dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc);
   dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc);
+  dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_alloc);
   if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL
-	  || dfa->eclosures == NULL, 0))
+	  || dfa->eclosures == NULL || dfa->inveclosures == NULL, 0))
     return REG_ESPACE;
-
-  dfa->subexp_map = re_malloc (int, preg->re_nsub);
-  if (dfa->subexp_map != NULL)
+  /* Initialize them.  */
+  for (i = 0; i < dfa->nodes_len; ++i)
     {
-      int i;
-      for (i = 0; i < preg->re_nsub; i++)
-	dfa->subexp_map[i] = i;
-      preorder (dfa->str_tree, optimize_subexps, dfa);
-      for (i = 0; i < preg->re_nsub; i++)
-	if (dfa->subexp_map[i] != i)
-	  break;
-      if (i == preg->re_nsub)
-	{
-	  free (dfa->subexp_map);
-	  dfa->subexp_map = NULL;
-	}
+      dfa->nexts[i] = -1;
+      re_node_set_init_empty (dfa->edests + i);
+      re_node_set_init_empty (dfa->eclosures + i);
+      re_node_set_init_empty (dfa->inveclosures + i);
     }
 
-  ret = postorder (dfa->str_tree, lower_subexps, preg);
-  if (BE (ret != REG_NOERROR, 0))
-    return ret;
-  ret = postorder (dfa->str_tree, calc_first, dfa);
-  if (BE (ret != REG_NOERROR, 0))
-    return ret;
-  preorder (dfa->str_tree, calc_next, dfa);
-  ret = preorder (dfa->str_tree, link_nfa_nodes, dfa);
-  if (BE (ret != REG_NOERROR, 0))
-    return ret;
-  ret = calc_eclosure (dfa);
-  if (BE (ret != REG_NOERROR, 0))
-    return ret;
-
-  /* We only need this during the prune_impossible_nodes pass in regexec.c;
-     skip it if p_i_n will not run, as calc_inveclosure can be quadratic.  */
-  if ((!preg->no_sub && preg->re_nsub > 0 && dfa->has_plural_match)
-      || dfa->nbackref)
+  ret = analyze_tree (dfa, dfa->str_tree);
+  if (BE (ret == REG_NOERROR, 1))
     {
-      dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len);
-      if (BE (dfa->inveclosures == NULL, 0))
-        return REG_ESPACE;
-      ret = calc_inveclosure (dfa);
+      ret = calc_eclosure (dfa);
+      if (ret == REG_NOERROR)
+	calc_inveclosure (dfa);
     }
-
   return ret;
 }
 
-/* Our parse trees are very unbalanced, so we cannot use a stack to
-   implement parse tree visits.  Instead, we use parent pointers and
-   some hairy code in these two functions.  */
-static reg_errcode_t
-postorder (root, fn, extra)
-     bin_tree_t *root;
-     reg_errcode_t (fn (void *, bin_tree_t *));
-     void *extra;
-{
-  bin_tree_t *node, *prev;
-
-  for (node = root; ; )
-    {
-      /* Descend down the tree, preferably to the left (or to the right
-	 if that's the only child).  */
-      while (node->left || node->right)
-	if (node->left)
-          node = node->left;
-        else
-          node = node->right;
-
-      do
-	{
-	  reg_errcode_t err = fn (extra, node);
-	  if (BE (err != REG_NOERROR, 0))
-	    return err;
-          if (node->parent == NULL)
-	    return REG_NOERROR;
-	  prev = node;
-	  node = node->parent;
-	}
-      /* Go up while we have a node that is reached from the right.  */
-      while (node->right == prev || node->right == NULL);
-      node = node->right;
-    }
-}
-
-static reg_errcode_t
-preorder (root, fn, extra)
-     bin_tree_t *root;
-     reg_errcode_t (fn (void *, bin_tree_t *));
-     void *extra;
-{
-  bin_tree_t *node;
-
-  for (node = root; ; )
-    {
-      reg_errcode_t err = fn (extra, node);
-      if (BE (err != REG_NOERROR, 0))
-	return err;
-
-      /* Go to the left node, or up and to the right.  */
-      if (node->left)
-	node = node->left;
-      else
-	{
-	  bin_tree_t *prev = NULL;
-	  while (node->right == prev || node->right == NULL)
-	    {
-	      prev = node;
-	      node = node->parent;
-	      if (!node)
-	        return REG_NOERROR;
-	    }
-	  node = node->right;
-	}
-    }
-}
+/* Helper functions for analyze.
+   This function calculate "first", "next", and "edest" for the subtree
+   whose root is NODE.  */
 
-/* Optimization pass: if a SUBEXP is entirely contained, strip it and tell
-   re_search_internal to map the inner one's opr.idx to this one's.  Adjust
-   backreferences as well.  Requires a preorder visit.  */
 static reg_errcode_t
-optimize_subexps (extra, node)
-     void *extra;
+analyze_tree (dfa, node)
+     re_dfa_t *dfa;
      bin_tree_t *node;
 {
-  re_dfa_t *dfa = (re_dfa_t *) extra;
+  reg_errcode_t ret;
+  if (node->first == -1)
+    calc_first (dfa, node);
+  if (node->next == -1)
+    calc_next (dfa, node);
+  calc_epsdest (dfa, node);
 
-  if (node->token.type == OP_BACK_REF && dfa->subexp_map)
+  /* Calculate "first" etc. for the left child.  */
+  if (node->left != NULL)
     {
-      int idx = node->token.opr.idx;
-      node->token.opr.idx = dfa->subexp_map[idx];
-      dfa->used_bkref_map |= 1 << node->token.opr.idx;
+      ret = analyze_tree (dfa, node->left);
+      if (BE (ret != REG_NOERROR, 0))
+	return ret;
     }
-
-  else if (node->token.type == SUBEXP
-           && node->left && node->left->token.type == SUBEXP)
+  /* Calculate "first" etc. for the right child.  */
+  if (node->right != NULL)
     {
-      int other_idx = node->left->token.opr.idx;
-
-      node->left = node->left->left;
-      if (node->left)
-        node->left->parent = node;
-
-      dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx];
-      if (other_idx < 8 * sizeof (dfa->used_bkref_map))
-	dfa->used_bkref_map &= ~(1 << other_idx);
+      ret = analyze_tree (dfa, node->right);
+      if (BE (ret != REG_NOERROR, 0))
+	return ret;
     }
-
   return REG_NOERROR;
 }
 
-/* Lowering pass: Turn each SUBEXP node into the appropriate concatenation
-   of OP_OPEN_SUBEXP, the body of the SUBEXP (if any) and OP_CLOSE_SUBEXP.  */
-static reg_errcode_t
-lower_subexps (extra, node)
-     void *extra;
+/* Calculate "first" for the node NODE.  */
+static void
+calc_first (dfa, node)
+     re_dfa_t *dfa;
      bin_tree_t *node;
 {
-  regex_t *preg = (regex_t *) extra;
-  reg_errcode_t err = REG_NOERROR;
+  int idx, type;
+  idx = node->node_idx;
+  type = (node->type == 0) ? dfa->nodes[idx].type : node->type;
 
-  if (node->left && node->left->token.type == SUBEXP)
+  switch (type)
     {
-      node->left = lower_subexp (&err, preg, node->left);
-      if (node->left)
-	node->left->parent = node;
-    }
-  if (node->right && node->right->token.type == SUBEXP)
-    {
-      node->right = lower_subexp (&err, preg, node->right);
-      if (node->right)
-	node->right->parent = node;
+#ifdef DEBUG
+    case OP_OPEN_BRACKET:
+    case OP_CLOSE_BRACKET:
+    case OP_OPEN_DUP_NUM:
+    case OP_CLOSE_DUP_NUM:
+    case OP_DUP_PLUS:
+    case OP_NON_MATCH_LIST:
+    case OP_OPEN_COLL_ELEM:
+    case OP_CLOSE_COLL_ELEM:
+    case OP_OPEN_EQUIV_CLASS:
+    case OP_CLOSE_EQUIV_CLASS:
+    case OP_OPEN_CHAR_CLASS:
+    case OP_CLOSE_CHAR_CLASS:
+      /* These must not appear here.  */
+      assert (0);
+#endif
+    case END_OF_RE:
+    case CHARACTER:
+    case OP_PERIOD:
+    case OP_DUP_ASTERISK:
+    case OP_DUP_QUESTION:
+#ifdef RE_ENABLE_I18N
+    case OP_UTF8_PERIOD:
+    case COMPLEX_BRACKET:
+#endif /* RE_ENABLE_I18N */
+    case SIMPLE_BRACKET:
+    case OP_BACK_REF:
+    case ANCHOR:
+    case OP_OPEN_SUBEXP:
+    case OP_CLOSE_SUBEXP:
+      node->first = idx;
+      break;
+    case OP_ALT:
+      node->first = idx;
+      break;
+      /* else fall through */
+    default:
+#ifdef DEBUG
+      assert (node->left != NULL);
+#endif
+      if (node->left->first == -1)
+	calc_first (dfa, node->left);
+      node->first = node->left->first;
+      break;
     }
-
-  return err;
 }
 
-static bin_tree_t *
-lower_subexp (err, preg, node)
-     reg_errcode_t *err;
-     regex_t *preg;
-     bin_tree_t *node;
-{
-  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
-  bin_tree_t *body = node->left;
-  bin_tree_t *op, *cls, *tree1, *tree;
-
-  if (preg->no_sub
-      && (node->token.opr.idx >= 8 * sizeof (dfa->used_bkref_map)
-	  || !(dfa->used_bkref_map & (1 << node->token.opr.idx))))
-    return node->left;
-
-  /* Convert the SUBEXP node to the concatenation of an
-     OP_OPEN_SUBEXP, the contents, and an OP_CLOSE_SUBEXP.  */
-  op = create_tree (dfa, NULL, NULL, OP_OPEN_SUBEXP);
-  cls = create_tree (dfa, NULL, NULL, OP_CLOSE_SUBEXP);
-  tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls;
-  tree = create_tree (dfa, op, tree1, CONCAT);
-  if (BE (tree == NULL || tree1 == NULL || op == NULL || cls == NULL, 0))
-    {
-      *err = REG_ESPACE;
-      return NULL;
-    }
-
-  op->token.opr.idx = cls->token.opr.idx = node->token.opr.idx;
-  op->token.opt_subexp = cls->token.opt_subexp = node->token.opt_subexp;
-  return tree;
-}
+/* Calculate "next" for the node NODE.  */
 
-/* Pass 1 in building the NFA: compute FIRST and create unlinked automaton
-   nodes.  Requires a postorder visit.  */
-static reg_errcode_t
-calc_first (extra, node)
-     void *extra;
+static void
+calc_next (dfa, node)
+     re_dfa_t *dfa;
      bin_tree_t *node;
 {
-  re_dfa_t *dfa = (re_dfa_t *) extra;
-  if (node->token.type == CONCAT)
-    {
-      node->first = node->left->first;
-      node->node_idx = node->left->node_idx;
-    }
-  else
+  int idx, type;
+  bin_tree_t *parent = node->parent;
+  if (parent == NULL)
     {
-      node->first = node;
-      node->node_idx = re_dfa_add_node (dfa, node->token);
-      if (BE (node->node_idx == -1, 0))
-        return REG_ESPACE;
+      node->next = -1;
+      idx = node->node_idx;
+      if (node->type == 0)
+	dfa->nexts[idx] = node->next;
+      return;
     }
-  return REG_NOERROR;
-}
 
-/* Pass 2: compute NEXT on the tree.  Preorder visit.  */
-static reg_errcode_t
-calc_next (extra, node)
-     void *extra;
-     bin_tree_t *node;
-{
-  switch (node->token.type)
+  idx = parent->node_idx;
+  type = (parent->type == 0) ? dfa->nodes[idx].type : parent->type;
+
+  switch (type)
     {
     case OP_DUP_ASTERISK:
-      node->left->next = node;
+      node->next = idx;
       break;
     case CONCAT:
-      node->left->next = node->right->first;
-      node->right->next = node->next;
-      break;
+      if (parent->left == node)
+	{
+	  if (parent->right->first == -1)
+	    calc_first (dfa, parent->right);
+	  node->next = parent->right->first;
+	  break;
+	}
+      /* else fall through */
     default:
-      if (node->left)
-	node->left->next = node->next;
-      if (node->right)
-        node->right->next = node->next;
+      if (parent->next == -1)
+	calc_next (dfa, parent);
+      node->next = parent->next;
       break;
     }
-  return REG_NOERROR;
+  idx = node->node_idx;
+  if (node->type == 0)
+    dfa->nexts[idx] = node->next;
 }
 
-/* Pass 3: link all DFA nodes to their NEXT node (any order will do).  */
-static reg_errcode_t
-link_nfa_nodes (extra, node)
-     void *extra;
+/* Calculate "edest" for the node NODE.  */
+
+static void
+calc_epsdest (dfa, node)
+     re_dfa_t *dfa;
      bin_tree_t *node;
 {
-  re_dfa_t *dfa = (re_dfa_t *) extra;
-  int idx = node->node_idx;
-  reg_errcode_t err = REG_NOERROR;
-
-  switch (node->token.type)
+  int idx;
+  idx = node->node_idx;
+  if (node->type == 0)
     {
-    case CONCAT:
-      break;
-
-    case END_OF_RE:
-      assert (node->next == NULL);
-      break;
-
-    case OP_DUP_ASTERISK:
-    case OP_ALT:
-      {
-	int left, right;
-	dfa->has_plural_match = 1;
-	if (node->left != NULL)
-	  left = node->left->first->node_idx;
-	else
-	  left = node->next->node_idx;
-	if (node->right != NULL)
-	  right = node->right->first->node_idx;
-	else
-	  right = node->next->node_idx;
-	assert (left > -1);
-	assert (right > -1);
-	err = re_node_set_init_2 (dfa->edests + idx, left, right);
-      }
-      break;
-
-    case ANCHOR:
-    case OP_OPEN_SUBEXP:
-    case OP_CLOSE_SUBEXP:
-      err = re_node_set_init_1 (dfa->edests + idx, node->next->node_idx);
-      break;
-
-    case OP_BACK_REF:
-      dfa->nexts[idx] = node->next->node_idx;
-      if (node->token.type == OP_BACK_REF)
-	re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]);
-      break;
-
-    default:
-      assert (!IS_EPSILON_NODE (node->token.type));
-      dfa->nexts[idx] = node->next->node_idx;
-      break;
+      if (dfa->nodes[idx].type == OP_DUP_ASTERISK
+	  || dfa->nodes[idx].type == OP_DUP_QUESTION)
+	{
+	  if (node->left->first == -1)
+	    calc_first (dfa, node->left);
+	  if (node->next == -1)
+	    calc_next (dfa, node);
+	  re_node_set_init_2 (dfa->edests + idx, node->left->first,
+			      node->next);
+	}
+      else if (dfa->nodes[idx].type == OP_ALT)
+	{
+	  int left, right;
+	  if (node->left != NULL)
+	    {
+	      if (node->left->first == -1)
+		calc_first (dfa, node->left);
+	      left = node->left->first;
+	    }
+	  else
+	    {
+	      if (node->next == -1)
+		calc_next (dfa, node);
+	      left = node->next;
+	    }
+	  if (node->right != NULL)
+	    {
+	      if (node->right->first == -1)
+		calc_first (dfa, node->right);
+	      right = node->right->first;
+	    }
+	  else
+	    {
+	      if (node->next == -1)
+		calc_next (dfa, node);
+	      right = node->next;
+	    }
+	  re_node_set_init_2 (dfa->edests + idx, left, right);
+	}
+      else if (dfa->nodes[idx].type == ANCHOR
+	       || dfa->nodes[idx].type == OP_OPEN_SUBEXP
+	       || dfa->nodes[idx].type == OP_CLOSE_SUBEXP
+	       || dfa->nodes[idx].type == OP_BACK_REF)
+	re_node_set_init_1 (dfa->edests + idx, node->next);
+      else
+        assert (!IS_EPSILON_NODE (dfa->nodes[idx].type));
     }
-
-  return err;
 }
 
 /* Duplicate the epsilon closure of the node ROOT_NODE.
@@ -1522,7 +1520,7 @@ duplicate_node_closure (dfa, top_org_node, top_clone_node, root_node,
       else /* dfa->edests[org_node].nelem == 2 */
 	{
 	  /* In case of the node can epsilon-transit, and it has two
-	     destinations. In the bin_tree_t and DFA, that's '|' and '*'.   */
+	     destinations. E.g. '|', '*', '+', '?'.   */
 	  org_dest = dfa->edests[org_node].elems[0];
 	  re_node_set_empty (dfa->edests + clone_node);
 	  /* Search for a duplicated node which satisfies the constraint.  */
@@ -1593,13 +1591,16 @@ duplicate_node (new_idx, dfa, org_idx, constraint)
      int *new_idx, org_idx;
      unsigned int constraint;
 {
-  int dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]);
+  int dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx], 1);
   if (BE (dup_idx == -1, 0))
     return REG_ESPACE;
   dfa->nodes[dup_idx].constraint = constraint;
   if (dfa->nodes[org_idx].type == ANCHOR)
     dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].opr.ctx_type;
   dfa->nodes[dup_idx].duplicated = 1;
+  re_node_set_init_empty (dfa->edests + dup_idx);
+  re_node_set_init_empty (dfa->eclosures + dup_idx);
+  re_node_set_init_empty (dfa->inveclosures + dup_idx);
 
   /* Store the index of the original node.  */
   dfa->org_indices[dup_idx] = org_idx;
@@ -1607,26 +1608,21 @@ duplicate_node (new_idx, dfa, org_idx, constraint)
   return REG_NOERROR;
 }
 
-static reg_errcode_t
+static void
 calc_inveclosure (dfa)
      re_dfa_t *dfa;
 {
-  int src, idx, ret;
-  for (idx = 0; idx < dfa->nodes_len; ++idx)
-    re_node_set_init_empty (dfa->inveclosures + idx);
-
+  int src, idx, dest;
   for (src = 0; src < dfa->nodes_len; ++src)
     {
-      int *elems = dfa->eclosures[src].elems;
+      if (dfa->nodes[src].type == OP_DELETED_SUBEXP)
+        continue;
       for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx)
 	{
-	  ret = re_node_set_insert_last (dfa->inveclosures + elems[idx], src);
-	  if (BE (ret == -1, 0))
-	    return REG_ESPACE;
+	  dest = dfa->eclosures[src].elems[idx];
+	  re_node_set_insert_last (dfa->inveclosures + dest, src);
 	}
     }
-
-  return REG_NOERROR;
 }
 
 /* Calculate "eclosure" for all the node in DFA.  */
@@ -1656,6 +1652,8 @@ calc_eclosure (dfa)
 #ifdef DEBUG
       assert (dfa->eclosures[node_idx].nelem != -1);
 #endif
+      if (dfa->nodes[node_idx].type == OP_DELETED_SUBEXP)
+        continue;
 
       /* If we have already calculated, skip it.  */
       if (dfa->eclosures[node_idx].nelem != 0)
@@ -1861,7 +1859,7 @@ peek_token (token, input, syntax)
 	  if (!(syntax & RE_NO_GNU_OPS))
 	    {
 	      token->type = ANCHOR;
-	      token->opr.ctx_type = NOT_WORD_DELIM;
+	      token->opr.ctx_type = INSIDE_WORD;
 	    }
 	  break;
 	case 'w':
@@ -2126,9 +2124,9 @@ parse (regexp, preg, syntax, err)
   tree = parse_reg_exp (regexp, preg, &current_token, syntax, 0, err);
   if (BE (*err != REG_NOERROR && tree == NULL, 0))
     return NULL;
-  eor = create_tree (dfa, NULL, NULL, END_OF_RE);
+  eor = re_dfa_add_tree_node (dfa, NULL, NULL, &current_token);
   if (tree != NULL)
-    root = create_tree (dfa, tree, eor, CONCAT);
+    root = create_tree (dfa, tree, eor, CONCAT, 0);
   else
     root = eor;
   if (BE (eor == NULL || root == NULL, 0))
@@ -2165,6 +2163,7 @@ parse_reg_exp (regexp, preg, token, syntax, nest, err)
 
   while (token->type == OP_ALT)
     {
+      re_token_t alt_token = *token;
       fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE);
       if (token->type != OP_ALT && token->type != END_OF_RE
 	  && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
@@ -2175,12 +2174,13 @@ parse_reg_exp (regexp, preg, token, syntax, nest, err)
 	}
       else
 	branch = NULL;
-      tree = create_tree (dfa, tree, branch, OP_ALT);
+      tree = re_dfa_add_tree_node (dfa, tree, branch, &alt_token);
       if (BE (tree == NULL, 0))
 	{
 	  *err = REG_ESPACE;
 	  return NULL;
 	}
+      dfa->has_plural_match = 1;
     }
   return tree;
 }
@@ -2219,7 +2219,7 @@ parse_branch (regexp, preg, token, syntax, nest, err)
 	}
       if (tree != NULL && exp != NULL)
 	{
-	  tree = create_tree (dfa, tree, exp, CONCAT);
+	  tree = create_tree (dfa, tree, exp, CONCAT, 0);
 	  if (tree == NULL)
 	    {
 	      *err = REG_ESPACE;
@@ -2253,7 +2253,7 @@ parse_expression (regexp, preg, token, syntax, nest, err)
   switch (token->type)
     {
     case CHARACTER:
-      tree = create_token_tree (dfa, NULL, NULL, token);
+      tree = re_dfa_add_tree_node (dfa, NULL, NULL, token);
       if (BE (tree == NULL, 0))
 	{
 	  *err = REG_ESPACE;
@@ -2267,8 +2267,8 @@ parse_expression (regexp, preg, token, syntax, nest, err)
 	    {
 	      bin_tree_t *mbc_remain;
 	      fetch_token (token, regexp, syntax);
-	      mbc_remain = create_token_tree (dfa, NULL, NULL, token);
-	      tree = create_tree (dfa, tree, mbc_remain, CONCAT);
+	      mbc_remain = re_dfa_add_tree_node (dfa, NULL, NULL, token);
+	      tree = create_tree (dfa, tree, mbc_remain, CONCAT, 0);
 	      if (BE (mbc_remain == NULL || tree == NULL, 0))
 		{
 		  *err = REG_ESPACE;
@@ -2295,7 +2295,7 @@ parse_expression (regexp, preg, token, syntax, nest, err)
 	  return NULL;
 	}
       dfa->used_bkref_map |= 1 << token->opr.idx;
-      tree = create_token_tree (dfa, NULL, NULL, token);
+      tree = re_dfa_add_tree_node (dfa, NULL, NULL, token);
       if (BE (tree == NULL, 0))
 	{
 	  *err = REG_ESPACE;
@@ -2340,7 +2340,7 @@ parse_expression (regexp, preg, token, syntax, nest, err)
       token->type = CHARACTER;
       /* mb_partial and word_char bits should be initialized already
 	 by peek_token.  */
-      tree = create_token_tree (dfa, NULL, NULL, token);
+      tree = re_dfa_add_tree_node (dfa, NULL, NULL, token);
       if (BE (tree == NULL, 0))
 	{
 	  *err = REG_ESPACE;
@@ -2349,27 +2349,18 @@ parse_expression (regexp, preg, token, syntax, nest, err)
       break;
     case ANCHOR:
       if ((token->opr.ctx_type
-	   & (WORD_DELIM | NOT_WORD_DELIM | WORD_FIRST | WORD_LAST))
+	   & (WORD_DELIM | INSIDE_WORD | WORD_FIRST | WORD_LAST))
 	  && dfa->word_ops_used == 0)
 	init_word_char (dfa);
-      if (token->opr.ctx_type == WORD_DELIM
-          || token->opr.ctx_type == NOT_WORD_DELIM)
+      if (token->opr.ctx_type == WORD_DELIM)
 	{
 	  bin_tree_t *tree_first, *tree_last;
-	  if (token->opr.ctx_type == WORD_DELIM)
-	    {
-	      token->opr.ctx_type = WORD_FIRST;
-	      tree_first = create_token_tree (dfa, NULL, NULL, token);
-	      token->opr.ctx_type = WORD_LAST;
-            }
-          else
-            {
-	      token->opr.ctx_type = INSIDE_WORD;
-	      tree_first = create_token_tree (dfa, NULL, NULL, token);
-	      token->opr.ctx_type = INSIDE_NOTWORD;
-            }
-	  tree_last = create_token_tree (dfa, NULL, NULL, token);
-	  tree = create_tree (dfa, tree_first, tree_last, OP_ALT);
+	  token->opr.ctx_type = WORD_FIRST;
+	  tree_first = re_dfa_add_tree_node (dfa, NULL, NULL, token);
+	  token->opr.ctx_type = WORD_LAST;
+	  tree_last = re_dfa_add_tree_node (dfa, NULL, NULL, token);
+	  token->type = OP_ALT;
+	  tree = re_dfa_add_tree_node (dfa, tree_first, tree_last, token);
 	  if (BE (tree_first == NULL || tree_last == NULL || tree == NULL, 0))
 	    {
 	      *err = REG_ESPACE;
@@ -2378,7 +2369,7 @@ parse_expression (regexp, preg, token, syntax, nest, err)
 	}
       else
 	{
-	  tree = create_token_tree (dfa, NULL, NULL, token);
+	  tree = re_dfa_add_tree_node (dfa, NULL, NULL, token);
 	  if (BE (tree == NULL, 0))
 	    {
 	      *err = REG_ESPACE;
@@ -2392,7 +2383,7 @@ parse_expression (regexp, preg, token, syntax, nest, err)
       fetch_token (token, regexp, syntax);
       return tree;
     case OP_PERIOD:
-      tree = create_token_tree (dfa, NULL, NULL, token);
+      tree = re_dfa_add_tree_node (dfa, NULL, NULL, token);
       if (BE (tree == NULL, 0))
 	{
 	  *err = REG_ESPACE;
@@ -2448,6 +2439,7 @@ parse_expression (regexp, preg, token, syntax, nest, err)
 	  *err = REG_BADRPT;
 	  return NULL;
 	}
+      dfa->has_plural_match = 1;
     }
 
   return tree;
@@ -2470,10 +2462,17 @@ parse_sub_exp (regexp, preg, token, syntax, nest, err)
      reg_errcode_t *err;
 {
   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
-  bin_tree_t *tree;
+  bin_tree_t *tree, *left_par, *right_par;
   size_t cur_nsub;
   cur_nsub = preg->re_nsub++;
 
+  left_par = re_dfa_add_tree_node (dfa, NULL, NULL, token);
+  if (BE (left_par == NULL, 0))
+    {
+      *err = REG_ESPACE;
+      return NULL;
+    }
+  dfa->nodes[left_par->node_idx].opr.idx = cur_nsub;
   fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE);
 
   /* The subexpression may be a null string.  */
@@ -2482,20 +2481,26 @@ parse_sub_exp (regexp, preg, token, syntax, nest, err)
   else
     {
       tree = parse_reg_exp (regexp, preg, token, syntax, nest, err);
-      if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0))
-        *err = REG_EPAREN;
-      if (BE (*err != REG_NOERROR, 0))
+      if (BE (*err != REG_NOERROR && tree == NULL, 0))
 	return NULL;
     }
+  if (BE (token->type != OP_CLOSE_SUBEXP, 0))
+    {
+      *err = REG_EPAREN;
+      return NULL;
+    }
+  right_par = re_dfa_add_tree_node (dfa, NULL, NULL, token);
   dfa->completed_bkref_map |= 1 << cur_nsub;
-
-  tree = create_tree (dfa, tree, NULL, SUBEXP);
-  if (BE (tree == NULL, 0))
+  tree = ((tree == NULL) ? right_par
+	  : create_tree (dfa, tree, right_par, CONCAT, 0));
+  tree = create_tree (dfa, left_par, tree, CONCAT, 0);
+  if (BE (right_par == NULL || tree == NULL, 0))
     {
       *err = REG_ESPACE;
       return NULL;
     }
-  tree->token.opr.idx = cur_nsub;
+  dfa->nodes[right_par->node_idx].opr.idx = cur_nsub;
+
   return tree;
 }
 
@@ -2510,6 +2515,7 @@ parse_dup_op (elem, regexp, dfa, token, syntax, err)
      reg_syntax_t syntax;
      reg_errcode_t *err;
 {
+  re_token_t dup_token;
   bin_tree_t *tree = NULL, *old_tree = NULL;
   int i, start, end, start_idx = re_string_cur_idx (regexp);
   re_token_t start_token = *token;
@@ -2572,13 +2578,9 @@ parse_dup_op (elem, regexp, dfa, token, syntax, err)
 
   fetch_token (token, regexp, syntax);
 
-  if (BE (elem == NULL, 0))
+  /* Treat "<re>{0}*" etc. as "<re>{0}".  */
+  if (BE (elem == NULL || (start == 0 && end == 0), 0))
     return NULL;
-  if (BE (start == 0 && end == 0, 0))
-    {
-      postorder (elem, free_tree, NULL);
-      return NULL;
-    }
 
   /* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}".  */
   if (BE (start > 0, 0))
@@ -2587,7 +2589,7 @@ parse_dup_op (elem, regexp, dfa, token, syntax, err)
       for (i = 2; i <= start; ++i)
 	{
 	  elem = duplicate_tree (elem, dfa);
-	  tree = create_tree (dfa, tree, elem, CONCAT);
+	  tree = create_tree (dfa, tree, elem, CONCAT, 0);
 	  if (BE (elem == NULL || tree == NULL, 0))
 	    goto parse_dup_op_espace;
 	}
@@ -2602,10 +2604,9 @@ parse_dup_op (elem, regexp, dfa, token, syntax, err)
   else
     old_tree = NULL;
 
-  if (elem->token.type == SUBEXP)
-    postorder (elem, mark_opt_subexp, (void *) (long) elem->token.opr.idx);
-
-  tree = create_tree (dfa, elem, NULL, (end == -1 ? OP_DUP_ASTERISK : OP_ALT));
+  mark_opt_subexp (elem, dfa);
+  dup_token.type = (end == -1 ? OP_DUP_ASTERISK : OP_DUP_QUESTION);
+  tree = re_dfa_add_tree_node (dfa, elem, NULL, &dup_token);
   if (BE (tree == NULL, 0))
     goto parse_dup_op_espace;
 
@@ -2615,17 +2616,17 @@ parse_dup_op (elem, regexp, dfa, token, syntax, err)
   for (i = start + 2; i <= end; ++i)
     {
       elem = duplicate_tree (elem, dfa);
-      tree = create_tree (dfa, tree, elem, CONCAT);
+      tree = create_tree (dfa, tree, elem, CONCAT, 0);
       if (BE (elem == NULL || tree == NULL, 0))
         goto parse_dup_op_espace;
 
-      tree = create_tree (dfa, tree, NULL, OP_ALT);
+      tree = re_dfa_add_tree_node (dfa, tree, NULL, &dup_token);
       if (BE (tree == NULL, 0))
         goto parse_dup_op_espace;
     }
 
   if (old_tree)
-    tree = create_tree (dfa, old_tree, tree, CONCAT);
+    tree = create_tree (dfa, old_tree, tree, CONCAT, 0);
 
   return tree;
 
@@ -3281,59 +3282,57 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
   /* Ensure only single byte characters are set.  */
   if (dfa->mb_cur_max > 1)
     bitset_mask (sbcset, dfa->sb_char);
+#endif /* RE_ENABLE_I18N */
 
+  /* Build a tree for simple bracket.  */
+  br_token.type = SIMPLE_BRACKET;
+  br_token.opr.sbcset = sbcset;
+  work_tree = re_dfa_add_tree_node (dfa, NULL, NULL, &br_token);
+  if (BE (work_tree == NULL, 0))
+    goto parse_bracket_exp_espace;
+
+#ifdef RE_ENABLE_I18N
   if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes
       || mbcset->nranges || (dfa->mb_cur_max > 1 && (mbcset->nchar_classes
 						     || mbcset->non_match)))
     {
+      re_token_t alt_token;
       bin_tree_t *mbc_tree;
       int sbc_idx;
       /* Build a tree for complex bracket.  */
       dfa->has_mb_node = 1;
-      br_token.type = COMPLEX_BRACKET;
-      br_token.opr.mbcset = mbcset;
-      mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
-      if (BE (mbc_tree == NULL, 0))
-	goto parse_bracket_exp_espace;
       for (sbc_idx = 0; sbc_idx < BITSET_UINTS; ++sbc_idx)
 	if (sbcset[sbc_idx])
 	  break;
       /* If there are no bits set in sbcset, there is no point
 	 of having both SIMPLE_BRACKET and COMPLEX_BRACKET.  */
-      if (sbc_idx < BITSET_UINTS)
-	{
-          /* Build a tree for simple bracket.  */
-          br_token.type = SIMPLE_BRACKET;
-          br_token.opr.sbcset = sbcset;
-          work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
-          if (BE (work_tree == NULL, 0))
-            goto parse_bracket_exp_espace;
-
-          /* Then join them by ALT node.  */
-          work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT);
-          if (BE (work_tree == NULL, 0))
-            goto parse_bracket_exp_espace;
-	}
-      else
+      if (sbc_idx == BITSET_UINTS)
 	{
 	  re_free (sbcset);
-	  work_tree = mbc_tree;
+	  dfa->nodes[work_tree->node_idx].type = COMPLEX_BRACKET;
+	  dfa->nodes[work_tree->node_idx].opr.mbcset = mbcset;
+	  return work_tree;
 	}
+      br_token.type = COMPLEX_BRACKET;
+      br_token.opr.mbcset = mbcset;
+      mbc_tree = re_dfa_add_tree_node (dfa, NULL, NULL, &br_token);
+      if (BE (mbc_tree == NULL, 0))
+	goto parse_bracket_exp_espace;
+      /* Then join them by ALT node.  */
+      alt_token.type = OP_ALT;
+      dfa->has_plural_match = 1;
+      work_tree = re_dfa_add_tree_node (dfa, work_tree, mbc_tree, &alt_token);
+      if (BE (mbc_tree != NULL, 1))
+	return work_tree;
     }
   else
-#endif /* not RE_ENABLE_I18N */
     {
-#ifdef RE_ENABLE_I18N
       free_charset (mbcset);
-#endif
-      /* Build a tree for simple bracket.  */
-      br_token.type = SIMPLE_BRACKET;
-      br_token.opr.sbcset = sbcset;
-      work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
-      if (BE (work_tree == NULL, 0))
-        goto parse_bracket_exp_espace;
+      return work_tree;
     }
+#else /* not RE_ENABLE_I18N */
   return work_tree;
+#endif /* not RE_ENABLE_I18N */
 
  parse_bracket_exp_espace:
   *err = REG_ESPACE;
@@ -3694,23 +3693,26 @@ build_charclass_op (dfa, trans, class_name, extra, non_match, err)
   /* Build a tree for simple bracket.  */
   br_token.type = SIMPLE_BRACKET;
   br_token.opr.sbcset = sbcset;
-  tree = create_token_tree (dfa, NULL, NULL, &br_token);
+  tree = re_dfa_add_tree_node (dfa, NULL, NULL, &br_token);
   if (BE (tree == NULL, 0))
     goto build_word_op_espace;
 
 #ifdef RE_ENABLE_I18N
   if (dfa->mb_cur_max > 1)
     {
+      re_token_t alt_token;
       bin_tree_t *mbc_tree;
       /* Build a tree for complex bracket.  */
       br_token.type = COMPLEX_BRACKET;
       br_token.opr.mbcset = mbcset;
       dfa->has_mb_node = 1;
-      mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
+      mbc_tree = re_dfa_add_tree_node (dfa, NULL, NULL, &br_token);
       if (BE (mbc_tree == NULL, 0))
 	goto build_word_op_espace;
       /* Then join them by ALT node.  */
-      tree = create_tree (dfa, tree, mbc_tree, OP_ALT);
+      alt_token.type = OP_ALT;
+      dfa->has_plural_match = 1;
+      tree = re_dfa_add_tree_node (dfa, tree, mbc_tree, &alt_token);
       if (BE (mbc_tree != NULL, 1))
 	return tree;
     }
@@ -3781,23 +3783,12 @@ free_charset (re_charset_t *cset)
 /* Create a tree node.  */
 
 static bin_tree_t *
-create_tree (dfa, left, right, type)
+create_tree (dfa, left, right, type, index)
      re_dfa_t *dfa;
      bin_tree_t *left;
      bin_tree_t *right;
      re_token_type_t type;
-{
-  re_token_t t;
-  t.type = type;
-  return create_token_tree (dfa, left, right, &t);
-}
-
-static bin_tree_t *
-create_token_tree (dfa, left, right, token)
-     re_dfa_t *dfa;
-     bin_tree_t *left;
-     bin_tree_t *right;
-     const re_token_t *token;
+     int index;
 {
   bin_tree_t *tree;
   if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0))
@@ -3815,12 +3806,11 @@ create_token_tree (dfa, left, right, token)
   tree->parent = NULL;
   tree->left = left;
   tree->right = right;
-  tree->token = *token;
-  tree->token.duplicated = 0;
-  tree->token.opt_subexp = 0;
-  tree->first = NULL;
-  tree->next = NULL;
-  tree->node_idx = -1;
+  tree->type = type;
+  tree->node_idx = index;
+  tree->first = -1;
+  tree->next = -1;
+  re_node_set_init_empty (&tree->eclosure);
 
   if (left != NULL)
     left->parent = tree;
@@ -3829,89 +3819,103 @@ create_token_tree (dfa, left, right, token)
   return tree;
 }
 
-/* Mark the tree SRC as an optional subexpression.
-   To be called from preorder or postorder.  */
+/* Create both a DFA node and a tree for it.  */
 
-static reg_errcode_t
-mark_opt_subexp (extra, node)
-     void *extra;
-     bin_tree_t *node;
+static bin_tree_t *
+re_dfa_add_tree_node (dfa, left, right, token)
+     re_dfa_t *dfa;
+     bin_tree_t *left;
+     bin_tree_t *right;
+     const re_token_t *token;
 {
-  int idx = (int) (long) extra;
-  if (node->token.type == SUBEXP && node->token.opr.idx == idx)
-    node->token.opt_subexp = 1;
+  int new_idx = re_dfa_add_node (dfa, *token, 0);
 
-  return REG_NOERROR;
+  if (new_idx == -1)
+    return NULL;
+
+  return create_tree (dfa, left, right, 0, new_idx);
 }
 
-/* Free the allocated memory inside NODE. */
+/* Mark the tree SRC as an optional subexpression.  */
 
 static void
-free_token (re_token_t *node)
+mark_opt_subexp (src, dfa)
+     const bin_tree_t *src;
+     re_dfa_t *dfa;
 {
-#ifdef RE_ENABLE_I18N
-  if (node->type == COMPLEX_BRACKET && node->duplicated == 0)
-    free_charset (node->opr.mbcset);
-  else
-#endif /* RE_ENABLE_I18N */
-    if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
-      re_free (node->opr.sbcset);
+  /* Pass an OPT_SUBEXP_IDX which is != 1 if the duplicated tree is
+     a subexpression.  */
+  if (src->type == CONCAT
+      && src->left->type == NON_TYPE
+      && dfa->nodes[src->left->node_idx].type == OP_OPEN_SUBEXP)
+    mark_opt_subexp_iter (src, dfa, dfa->nodes[src->left->node_idx].opr.idx);
 }
 
-/* Worker function for tree walking.  Free the allocated memory inside NODE
-   and its children. */
 
-static reg_errcode_t
-free_tree (void *extra, bin_tree_t *node)
+/* Recursive tree walker for mark_opt_subexp.  */
+
+static void
+mark_opt_subexp_iter (src, dfa, idx)
+     const bin_tree_t *src;
+     re_dfa_t *dfa;
+     int idx;
 {
-  free_token (&node->token);
-  return REG_NOERROR;
+  int node_idx;
+
+  if (src->type == NON_TYPE)
+    {
+      node_idx = src->node_idx;
+      if ((dfa->nodes[node_idx].type == OP_OPEN_SUBEXP
+	   || dfa->nodes[node_idx].type == OP_CLOSE_SUBEXP)
+	  && dfa->nodes[node_idx].opr.idx == idx)
+	dfa->nodes[node_idx].opt_subexp = 1;
+     }
+
+  if (src->left != NULL)
+    mark_opt_subexp_iter (src->left, dfa, idx);
+
+  if (src->right != NULL)
+    mark_opt_subexp_iter (src->right, dfa, idx);
 }
 
 
-/* Duplicate the node SRC, and return new node.  This is a preorder
-   visit similar to the one implemented by the generic visitor, but
-   we need more infrastructure to maintain two parallel trees --- so,
-   it's easier to duplicate.  */
+/* Duplicate the node SRC, and return new node.  */
 
 static bin_tree_t *
-duplicate_tree (root, dfa)
-     const bin_tree_t *root;
+duplicate_tree (src, dfa)
+     const bin_tree_t *src;
      re_dfa_t *dfa;
 {
-  const bin_tree_t *node;
-  bin_tree_t *dup_root;
-  bin_tree_t **p_new = &dup_root, *dup_node = root->parent;
+  bin_tree_t *left = NULL, *right = NULL, *new_tree;
+  int new_node_idx;
+  /* Since node indies must be according to Post-order of the tree,
+     we must duplicate the left at first.  */
+  if (src->left != NULL)
+    {
+      left = duplicate_tree (src->left, dfa);
+      if (left == NULL)
+	return NULL;
+    }
 
-  for (node = root; ; )
+  /* Secondaly, duplicate the right.  */
+  if (src->right != NULL)
     {
-      /* Create a new tree and link it back to the current parent.  */
-      *p_new = create_token_tree (dfa, NULL, NULL, &node->token);
-      if (*p_new == NULL)
+      right = duplicate_tree (src->right, dfa);
+      if (right == NULL)
 	return NULL;
-      (*p_new)->parent = dup_node;
-      (*p_new)->token.duplicated = 1;
-      dup_node = *p_new;
+    }
 
-      /* Go to the left node, or up and to the right.  */
-      if (node->left)
-	{
-	  node = node->left;
-	  p_new = &dup_node->left;
-	}
-      else
-	{
-	  const bin_tree_t *prev = NULL;
-	  while (node->right == prev || node->right == NULL)
-	    {
-	      prev = node;
-	      node = node->parent;
-	      dup_node = dup_node->parent;
-	      if (!node)
-	        return dup_root;
-	    }
-	  node = node->right;
-	  p_new = &dup_node->right;
-	}
+  /* At last, duplicate itself.  */
+  if (src->type == NON_TYPE)
+    {
+      new_node_idx = re_dfa_add_node (dfa, dfa->nodes[src->node_idx], 0);
+      dfa->nodes[new_node_idx].duplicated = 1;
+      if (BE (new_node_idx == -1, 0))
+	return NULL;
     }
+  else
+    new_node_idx = src->type;
+
+  new_tree = create_tree (dfa, left, right, src->type, new_node_idx);
+  return new_tree;
 }
diff --git a/posix/regex_internal.c b/posix/regex_internal.c
index c3295a851c..001b50b134 100644
--- a/posix/regex_internal.c
+++ b/posix/regex_internal.c
@@ -1330,44 +1330,47 @@ re_node_set_remove_at (set, idx)
    Or return -1, if an error will be occured.  */
 
 static int
-re_dfa_add_node (dfa, token)
+re_dfa_add_node (dfa, token, mode)
      re_dfa_t *dfa;
      re_token_t token;
+     int mode;
 {
-  int type = token.type;
   if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
     {
       int new_nodes_alloc = dfa->nodes_alloc * 2;
-      int *new_nexts, *new_indices;
-      re_node_set *new_edests, *new_eclosures;
-
       re_token_t *new_array = re_realloc (dfa->nodes, re_token_t,
 					  new_nodes_alloc);
       if (BE (new_array == NULL, 0))
 	return -1;
       dfa->nodes = new_array;
-      new_nexts = re_realloc (dfa->nexts, int, new_nodes_alloc);
-      new_indices = re_realloc (dfa->org_indices, int, new_nodes_alloc);
-      new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
-      new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
-      if (BE (new_nexts == NULL || new_indices == NULL
-	      || new_edests == NULL || new_eclosures == NULL, 0))
-	return -1;
-      dfa->nexts = new_nexts;
-      dfa->org_indices = new_indices;
-      dfa->edests = new_edests;
-      dfa->eclosures = new_eclosures;
+      if (mode)
+	{
+	  int *new_nexts, *new_indices;
+	  re_node_set *new_edests, *new_eclosures, *new_inveclosures;
+
+	  new_nexts = re_realloc (dfa->nexts, int, new_nodes_alloc);
+	  new_indices = re_realloc (dfa->org_indices, int, new_nodes_alloc);
+	  new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
+	  new_eclosures = re_realloc (dfa->eclosures, re_node_set,
+				      new_nodes_alloc);
+	  new_inveclosures = re_realloc (dfa->inveclosures, re_node_set,
+					 new_nodes_alloc);
+	  if (BE (new_nexts == NULL || new_indices == NULL
+		  || new_edests == NULL || new_eclosures == NULL
+		  || new_inveclosures == NULL, 0))
+	    return -1;
+	  dfa->nexts = new_nexts;
+	  dfa->org_indices = new_indices;
+	  dfa->edests = new_edests;
+	  dfa->eclosures = new_eclosures;
+	  dfa->inveclosures = new_inveclosures;
+	}
       dfa->nodes_alloc = new_nodes_alloc;
     }
   dfa->nodes[dfa->nodes_len] = token;
+  dfa->nodes[dfa->nodes_len].opt_subexp = 0;
+  dfa->nodes[dfa->nodes_len].duplicated = 0;
   dfa->nodes[dfa->nodes_len].constraint = 0;
-#ifdef RE_ENABLE_I18N
-  dfa->nodes[dfa->nodes_len].accept_mb =
-    (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET;
-#endif
-  dfa->nexts[dfa->nodes_len] = -1;
-  re_node_set_init_empty (dfa->edests + dfa->nodes_len);
-  re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
   return dfa->nodes_len++;
 }
 
@@ -1548,13 +1551,16 @@ create_ci_newstate (dfa, nodes, hash)
       re_token_type_t type = node->type;
       if (type == CHARACTER && !node->constraint)
 	continue;
-#ifdef RE_ENABLE_I18N
-      newstate->accept_mb |= node->accept_mb;
-#endif /* RE_ENABLE_I18N */
 
       /* If the state has the halt node, the state is a halt state.  */
-      if (type == END_OF_RE)
+      else if (type == END_OF_RE)
 	newstate->halt = 1;
+#ifdef RE_ENABLE_I18N
+      else if (type == COMPLEX_BRACKET
+	       || type == OP_UTF8_PERIOD
+	       || (type == OP_PERIOD && dfa->mb_cur_max > 1))
+	newstate->accept_mb = 1;
+#endif /* RE_ENABLE_I18N */
       else if (type == OP_BACK_REF)
 	newstate->has_backref = 1;
       else if (type == ANCHOR || node->constraint)
@@ -1605,13 +1611,15 @@ create_cd_newstate (dfa, nodes, context, hash)
 
       if (type == CHARACTER && !constraint)
 	continue;
-#ifdef RE_ENABLE_I18N
-      newstate->accept_mb |= node->accept_mb;
-#endif /* RE_ENABLE_I18N */
-
       /* If the state has the halt node, the state is a halt state.  */
-      if (type == END_OF_RE)
+      else if (type == END_OF_RE)
 	newstate->halt = 1;
+#ifdef RE_ENABLE_I18N
+      else if (type == COMPLEX_BRACKET
+	       || type == OP_UTF8_PERIOD
+	       || (type == OP_PERIOD && dfa->mb_cur_max > 1))
+	newstate->accept_mb = 1;
+#endif /* RE_ENABLE_I18N */
       else if (type == OP_BACK_REF)
 	newstate->has_backref = 1;
       else if (type == ANCHOR)
@@ -1660,7 +1668,6 @@ free_state (state)
       re_free (state->entrance_nodes);
     }
   re_node_set_free (&state->nodes);
-  re_free (state->word_trtable);
   re_free (state->trtable);
   re_free (state);
 }
diff --git a/posix/regex_internal.h b/posix/regex_internal.h
index f065cf449d..0ccd8d3665 100644
--- a/posix/regex_internal.h
+++ b/posix/regex_internal.h
@@ -1,5 +1,5 @@
 /* Extended regular expression matching and search library.
-   Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+   Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
 
@@ -143,21 +143,18 @@ static inline void bitset_mask (bitset dest, const bitset src);
 #define NEXT_NEWLINE_CONSTRAINT 0x0020
 #define PREV_BEGBUF_CONSTRAINT 0x0040
 #define NEXT_ENDBUF_CONSTRAINT 0x0080
-#define WORD_DELIM_CONSTRAINT 0x0100
-#define NOT_WORD_DELIM_CONSTRAINT 0x0200
+#define DUMMY_CONSTRAINT 0x0100
 
 typedef enum
 {
   INSIDE_WORD = PREV_WORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
   WORD_FIRST = PREV_NOTWORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
   WORD_LAST = PREV_WORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
-  INSIDE_NOTWORD = PREV_NOTWORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
   LINE_FIRST = PREV_NEWLINE_CONSTRAINT,
   LINE_LAST = NEXT_NEWLINE_CONSTRAINT,
   BUF_FIRST = PREV_BEGBUF_CONSTRAINT,
   BUF_LAST = NEXT_ENDBUF_CONSTRAINT,
-  WORD_DELIM = WORD_DELIM_CONSTRAINT,
-  NOT_WORD_DELIM = NOT_WORD_DELIM_CONSTRAINT
+  WORD_DELIM = DUMMY_CONSTRAINT
 } re_context_type;
 
 typedef struct
@@ -189,16 +186,16 @@ typedef enum
   OP_CLOSE_SUBEXP = EPSILON_BIT | 1,
   OP_ALT = EPSILON_BIT | 2,
   OP_DUP_ASTERISK = EPSILON_BIT | 3,
-  ANCHOR = EPSILON_BIT | 4,
+  OP_DUP_PLUS = EPSILON_BIT | 4,
+  OP_DUP_QUESTION = EPSILON_BIT | 5,
+  ANCHOR = EPSILON_BIT | 6,
+  OP_DELETED_SUBEXP = EPSILON_BIT | 7,
 
   /* Tree type, these are used only by tree. */
   CONCAT = 16,
-  SUBEXP = 17,
 
   /* Token type, these are used only by token.  */
-  OP_DUP_PLUS = 18,
-  OP_DUP_QUESTION,
-  OP_OPEN_BRACKET,
+  OP_OPEN_BRACKET = 17,
   OP_CLOSE_BRACKET,
   OP_CHARSET_RANGE,
   OP_OPEN_DUP_NUM,
@@ -287,7 +284,6 @@ typedef struct
   unsigned int duplicated : 1;
   unsigned int opt_subexp : 1;
 #ifdef RE_ENABLE_I18N
-  unsigned int accept_mb : 1;
   /* These 2 bits can be moved into the union if needed (e.g. if running out
      of bits; move opr.c to opr.c.c and move the flags to opr.c.flags).  */
   unsigned int mb_partial : 1;
@@ -296,6 +292,8 @@ typedef struct
 } re_token_t;
 
 #define IS_EPSILON_NODE(type) ((type) & EPSILON_BIT)
+#define ACCEPT_MB_NODE(type) \
+  ((type) >= OP_PERIOD && (type) <= OP_UTF8_PERIOD)
 
 struct re_string_t
 {
@@ -431,14 +429,15 @@ struct bin_tree_t
   struct bin_tree_t *parent;
   struct bin_tree_t *left;
   struct bin_tree_t *right;
-  struct bin_tree_t *first;
-  struct bin_tree_t *next;
-
-  re_token_t token;
 
   /* `node_idx' is the index in dfa->nodes, if `type' == 0.
      Otherwise `type' indicate the type of this node.  */
+  re_token_type_t type;
   int node_idx;
+
+  int first;
+  int next;
+  re_node_set eclosure;
 };
 typedef struct bin_tree_t bin_tree_t;
 
@@ -487,7 +486,7 @@ struct re_dfastate_t
   re_node_set non_eps_nodes;
   re_node_set inveclosure;
   re_node_set *entrance_nodes;
-  struct re_dfastate_t **trtable, **word_trtable;
+  struct re_dfastate_t **trtable;
   unsigned int context : 4;
   unsigned int halt : 1;
   /* If this state can accept `multi byte'.
@@ -497,6 +496,7 @@ struct re_dfastate_t
   /* If this state has backreference node(s).  */
   unsigned int has_backref : 1;
   unsigned int has_constraint : 1;
+  unsigned int word_trtable : 1;
 };
 typedef struct re_dfastate_t re_dfastate_t;
 
@@ -678,7 +678,7 @@ static void re_node_set_remove_at (re_node_set *set, int idx) internal_function;
   (re_node_set_remove_at (set, re_node_set_contains (set, id) - 1))
 #define re_node_set_empty(p) ((p)->nelem = 0)
 #define re_node_set_free(set) re_free ((set)->elems)
-static int re_dfa_add_node (re_dfa_t *dfa, re_token_t token) internal_function;
+static int re_dfa_add_node (re_dfa_t *dfa, re_token_t token, int mode) internal_function;
 static re_dfastate_t *re_acquire_state (reg_errcode_t *err, re_dfa_t *dfa,
 					const re_node_set *nodes) internal_function;
 static re_dfastate_t *re_acquire_state_context (reg_errcode_t *err,
diff --git a/posix/regexec.c b/posix/regexec.c
index 636396e6f7..91b48dd4a2 100644
--- a/posix/regexec.c
+++ b/posix/regexec.c
@@ -175,8 +175,8 @@ static reg_errcode_t check_arrival_expand_ecl_sub (re_dfa_t *dfa,
 static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
 					 re_node_set *cur_nodes, int cur_str,
 					 int subexp_num, int type) internal_function;
-static int build_trtable (re_dfa_t *dfa,
-			  re_dfastate_t *state) internal_function;
+static re_dfastate_t **build_trtable (re_dfa_t *dfa,
+				      re_dfastate_t *state) internal_function;
 #ifdef RE_ENABLE_I18N
 static int check_node_accept_bytes (re_dfa_t *dfa, int node_idx,
 				    const re_string_t *input, int idx) internal_function;
@@ -605,7 +605,6 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
   re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
   int left_lim, right_lim, incr;
   int fl_longest_match, match_first, match_kind, match_last = -1;
-  int extra_nmatch;
   int sb, ch;
 #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
   re_match_context_t mctx = { .dfa = dfa };
@@ -621,9 +620,6 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
   mctx.dfa = dfa;
 #endif
 
-  extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
-  nmatch -= extra_nmatch;
-
   /* Check if the DFA haven't been compiled.  */
   if (BE (preg->used == 0 || dfa->init_state == NULL
 	  || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
@@ -886,14 +882,11 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
 	    pmatch[reg_idx].rm_so += match_first;
 	    pmatch[reg_idx].rm_eo += match_first;
 	  }
-      for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
-	{
-	  pmatch[nmatch + reg_idx].rm_so = -1;
-	  pmatch[nmatch + reg_idx].rm_eo = -1;
-	}
 
       if (dfa->subexp_map)
-        for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
+        for (reg_idx = 0;
+             reg_idx + 1 < nmatch && reg_idx < preg->re_nsub;
+             reg_idx++)
           if (dfa->subexp_map[reg_idx] != reg_idx)
             {
               pmatch[reg_idx + 1].rm_so
@@ -1269,7 +1262,7 @@ proceed_next_node (mctx, nregs, regs, pidx, node, eps_via_nodes, fs)
       re_token_type_t type = dfa->nodes[node].type;
 
 #ifdef RE_ENABLE_I18N
-      if (dfa->nodes[node].accept_mb)
+      if (ACCEPT_MB_NODE (type))
 	naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
       else
 #endif /* RE_ENABLE_I18N */
@@ -1378,7 +1371,7 @@ set_regs (preg, mctx, nmatch, pmatch, fl_backtrack)
      int fl_backtrack;
 {
   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
-  int idx, cur_node;
+  int idx, cur_node, real_nmatch;
   re_node_set eps_via_nodes;
   struct re_fail_stack_t *fs;
   struct re_fail_stack_t fs_body = { 0, 2, NULL };
@@ -1399,14 +1392,15 @@ set_regs (preg, mctx, nmatch, pmatch, fl_backtrack)
     fs = NULL;
 
   cur_node = dfa->init_node;
+  real_nmatch = (nmatch <= preg->re_nsub) ? nmatch : preg->re_nsub + 1;
   re_node_set_init_empty (&eps_via_nodes);
 
-  prev_idx_match = (regmatch_t *) alloca (sizeof (regmatch_t) * nmatch);
-  memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
+  prev_idx_match = (regmatch_t *) alloca (sizeof (regmatch_t) * real_nmatch);
+  memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * real_nmatch);
 
   for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
     {
-      update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
+      update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, real_nmatch);
 
       if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
 	{
@@ -1630,13 +1624,15 @@ build_sifted_states (mctx, sctx, str_idx, cur_dest)
       int naccepted = 0;
       int ret;
 
-#ifdef DEBUG
+#if defined DEBUG || defined RE_ENABLE_I18N
       re_token_type_t type = dfa->nodes[prev_node].type;
+#endif
+#ifdef DEBUG
       assert (!IS_EPSILON_NODE (type));
 #endif
 #ifdef RE_ENABLE_I18N
       /* If the node may accept `multi byte'.  */
-      if (dfa->nodes[prev_node].accept_mb)
+      if (ACCEPT_MB_NODE (type))
 	naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
 					 str_idx, sctx->last_str_idx);
 #endif /* RE_ENABLE_I18N */
@@ -2222,6 +2218,7 @@ transit_state (err, mctx, state)
      re_match_context_t *mctx;
      re_dfastate_t *state;
 {
+  re_dfa_t *const dfa = mctx->dfa;
   re_dfastate_t **trtable;
   unsigned char ch;
 
@@ -2236,22 +2233,21 @@ transit_state (err, mctx, state)
 #endif /* RE_ENABLE_I18N */
 
   /* Then decide the next state with the single byte.  */
-#if 0
-  if (0)
-    /* don't use transition table  */
-    return transit_state_sb (err, mctx, state);
-#endif
-
-  /* Use transition table  */
-  ch = re_string_fetch_byte (&mctx->input);
-  for (;;)
+  if (1)
     {
+      /* Use transition table  */
+      ch = re_string_fetch_byte (&mctx->input);
       trtable = state->trtable;
-      if (BE (trtable != NULL, 1))
-	return trtable[ch];
-
-      trtable = state->word_trtable;
-      if (BE (trtable != NULL, 1))
+      if (trtable == NULL)
+        {
+          trtable = build_trtable (dfa, state);
+          if (trtable == NULL)
+	    {
+	      *err = REG_ESPACE;
+	      return NULL;
+	    }
+	}
+      if (BE (state->word_trtable, 0))
         {
 	  unsigned int context;
 	  context
@@ -2263,15 +2259,14 @@ transit_state (err, mctx, state)
 	  else
 	    return trtable[ch];
 	}
-
-      if (!build_trtable (mctx->dfa, state))
-	{
-	  *err = REG_ESPACE;
-	  return NULL;
-	}
-
-      /* Retry, we now have a transition table.  */
+      else
+	return trtable[ch];
     }
+#if 0
+  else
+    /* don't use transition table  */
+    return transit_state_sb (err, mctx, state);
+#endif
 }
 
 /* Update the state_log if we need */
@@ -2475,13 +2470,10 @@ transit_state_mb (mctx, pstate)
     {
       re_node_set dest_nodes, *new_nodes;
       int cur_node_idx = pstate->nodes.elems[i];
-      int naccepted, dest_idx;
+      int naccepted = 0, dest_idx;
       unsigned int context;
       re_dfastate_t *dest_state;
 
-      if (!dfa->nodes[cur_node_idx].accept_mb)
-        continue;
-
       if (dfa->nodes[cur_node_idx].constraint)
 	{
 	  context = re_string_context_at (&mctx->input,
@@ -2493,8 +2485,9 @@ transit_state_mb (mctx, pstate)
 	}
 
       /* How many bytes the node can accept?  */
-      naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
-					   re_string_cur_idx (&mctx->input));
+      if (ACCEPT_MB_NODE (dfa->nodes[cur_node_idx].type))
+	naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
+					     re_string_cur_idx (&mctx->input));
       if (naccepted == 0)
 	continue;
 
@@ -2508,7 +2501,9 @@ transit_state_mb (mctx, pstate)
 #ifdef DEBUG
       assert (dfa->nexts[cur_node_idx] != -1);
 #endif
-      new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
+      /* `cur_node_idx' may point the entity of the OP_CONTEXT_NODE,
+	 then we use pstate->nodes.elems[i] instead.  */
+      new_nodes = dfa->eclosures + dfa->nexts[pstate->nodes.elems[i]];
 
       dest_state = mctx->state_log[dest_idx];
       if (dest_state == NULL)
@@ -3024,13 +3019,15 @@ check_arrival_add_next_nodes (mctx, str_idx, cur_nodes, next_nodes)
     {
       int naccepted = 0;
       int cur_node = cur_nodes->elems[cur_idx];
-#ifdef DEBUG
+#if defined DEBUG || defined RE_ENABLE_I18N
       re_token_type_t type = dfa->nodes[cur_node].type;
+#endif
+#ifdef DEBUG
       assert (!IS_EPSILON_NODE (type));
 #endif
 #ifdef RE_ENABLE_I18N
       /* If the node may accept `multi byte'.  */
-      if (dfa->nodes[cur_node].accept_mb)
+      if (ACCEPT_MB_NODE (type))
 	{
 	  naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
 					       str_idx);
@@ -3276,15 +3273,15 @@ expand_bkref_cache (mctx, cur_nodes, cur_str, subexp_num,
 }
 
 /* Build transition table for the state.
-   Return 1 if succeeded, otherwise return NULL.  */
+   Return the new table if succeeded, otherwise return NULL.  */
 
-static int
+static re_dfastate_t **
 build_trtable (dfa, state)
     re_dfa_t *dfa;
     re_dfastate_t *state;
 {
   reg_errcode_t err;
-  int i, j, ch, need_word_trtable = 0;
+  int i, j, ch;
   unsigned int elem, mask;
   int dests_node_malloced = 0, dest_states_malloced = 0;
   int ndests; /* Number of the destination states from `state'.  */
@@ -3301,20 +3298,20 @@ build_trtable (dfa, state)
 #ifdef _LIBC
   if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX))
     dests_node = (re_node_set *)
-      alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
+		 alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
   else
 #endif
     {
       dests_node = (re_node_set *)
-	malloc ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
+		   malloc ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
       if (BE (dests_node == NULL, 0))
-	return 0;
+	return NULL;
       dests_node_malloced = 1;
     }
   dests_ch = (bitset *) (dests_node + SBC_MAX);
 
   /* Initialize transiton table.  */
-  state->word_trtable = state->trtable = NULL;
+  state->word_trtable = 0;
 
   /* At first, group all nodes belonging to `state' into several
      destinations.  */
@@ -3323,14 +3320,14 @@ build_trtable (dfa, state)
     {
       if (dests_node_malloced)
 	free (dests_node);
-      /* Return 0 in case of an error, 1 otherwise.  */
+      /* Return NULL in case of an error, trtable otherwise.  */
       if (ndests == 0)
 	{
 	  state->trtable = (re_dfastate_t **)
-	    calloc (sizeof (re_dfastate_t *), SBC_MAX);
-	  return 1;
+	    calloc (sizeof (re_dfastate_t *), SBC_MAX);;
+	  return state->trtable;
 	}
-      return 0;
+      return NULL;
     }
 
   err = re_node_set_alloc (&follows, ndests + 1);
@@ -3341,12 +3338,12 @@ build_trtable (dfa, state)
   if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX
 			 + ndests * 3 * sizeof (re_dfastate_t *)))
     dest_states = (re_dfastate_t **)
-      alloca (ndests * 3 * sizeof (re_dfastate_t *));
+		  alloca (ndests * 3 * sizeof (re_dfastate_t *));
   else
 #endif
     {
       dest_states = (re_dfastate_t **)
-	malloc (ndests * 3 * sizeof (re_dfastate_t *));
+		    malloc (ndests * 3 * sizeof (re_dfastate_t *));
       if (BE (dest_states == NULL, 0))
 	{
 out_free:
@@ -3357,7 +3354,7 @@ out_free:
 	    re_node_set_free (dests_node + i);
 	  if (dests_node_malloced)
 	    free (dests_node);
-	  return 0;
+	  return NULL;
 	}
       dest_states_malloced = 1;
     }
@@ -3393,8 +3390,9 @@ out_free:
 	  if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
 	    goto out_free;
 
-	  if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
-	    need_word_trtable = 1;
+	  if (dest_states[i] != dest_states_word[i]
+	      && dfa->mb_cur_max > 1)
+	    state->word_trtable = 1;
 
 	  dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
 							CONTEXT_NEWLINE);
@@ -3409,14 +3407,13 @@ out_free:
       bitset_merge (acceptable, dests_ch[i]);
     }
 
-  if (!BE (need_word_trtable, 0))
+  if (!BE (state->word_trtable, 0))
     {
       /* We don't care about whether the following character is a word
 	 character, or we are in a single-byte character set so we can
 	 discern by looking at the character code: allocate a
 	 256-entry transition table.  */
-      trtable = state->trtable =
-	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
+      trtable = (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
       if (BE (trtable == NULL, 0))
 	goto out_free;
 
@@ -3446,8 +3443,8 @@ out_free:
 	 by looking at the character code: build two 256-entry
 	 transition tables, one starting at trtable[0] and one
 	 starting at trtable[SBC_MAX].  */
-      trtable = state->word_trtable =
-	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
+      trtable = (re_dfastate_t **) calloc (sizeof (re_dfastate_t *),
+					   2 * SBC_MAX);
       if (BE (trtable == NULL, 0))
 	goto out_free;
 
@@ -3478,7 +3475,7 @@ out_free:
 	  {
 	    /* k-th destination accepts newline character.  */
 	    trtable[NEWLINE_CHAR] = dest_states_nl[j];
-	    if (need_word_trtable)
+	    if (state->word_trtable)
 	      trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
 	    /* There must be only one destination which accepts
 	       newline.  See group_nodes_into_DFAstates.  */
@@ -3496,7 +3493,8 @@ out_free:
   if (dests_node_malloced)
     free (dests_node);
 
-  return 1;
+  state->trtable = trtable;
+  return trtable;
 }
 
 /* Group all nodes belonging to STATE into several destinations.
diff --git a/posix/rxspencer/tests b/posix/rxspencer/tests
index a8b6e4baa8..a724252d8c 100644
--- a/posix/rxspencer/tests
+++ b/posix/rxspencer/tests
@@ -526,12 +526,3 @@ a((b+|((c)*)))+d	-	abcd	abcd	c,c,c,c
 (((\b))){0}	-	x	@x	-,-,-
 a(((.*)))b((\2)){0}c	-	abc	abc	@bc,@bc,@bc,-,-
 a(((.*)))b((\1)){0}c	-	axbc	axbc	x,x,x,-,-
-
-\b	&	SaT	@aT
-\b	&	aT	@aT
-a.*\b	&	abT	ab
-\b	&	STSS
-\B	&	abc	@bc
-\B	&	aSbTc
-\B	&	SaT	@SaT
-\B	&	aSTSb	@TSb
diff --git a/posix/tst-rxspencer.c b/posix/tst-rxspencer.c
index a68bab2de9..cb40421797 100644
--- a/posix/tst-rxspencer.c
+++ b/posix/tst-rxspencer.c
@@ -1,5 +1,5 @@
 /* Regular expression tests.
-   Copyright (C) 2003, 2005 Free Software Foundation, Inc.
+   Copyright (C) 2003 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Jakub Jelinek <jakub@redhat.com>, 2003.
 
@@ -127,15 +127,14 @@ mb_frob_string (const char *str, const char *letters)
 }
 
 /* Like mb_frob_string, but don't replace anything between
-   [: and :], [. and .] or [= and =] or characters escaped
-   with a backslash.  */
+   [: and :], [. and .] or [= and =].  */
 
 static char *
 mb_frob_pattern (const char *str, const char *letters)
 {
   char *ret, *dst;
   const char *src;
-  int in_class = 0, escaped = 0;
+  int in_class = 0;
 
   if (str == NULL)
     return NULL;
@@ -145,18 +144,7 @@ mb_frob_pattern (const char *str, const char *letters)
     return NULL;
 
   for (src = str, dst = ret; *src; ++src)
-    if (*src == '\\')
-      {
-	escaped ^= 1;
-	*dst++ = *src;
-      }
-    else if (escaped)
-      {
-	escaped = 0;
-	*dst++ = *src;
-	continue;
-      }
-    else if (!in_class && strchr (letters, *src))
+    if (!in_class && strchr (letters, *src))
       dst = mb_replace (dst, *src);
     else
       {
diff --git a/posix/unistd.h b/posix/unistd.h
index 744c10c50b..5d42169e82 100644
--- a/posix/unistd.h
+++ b/posix/unistd.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 1991-2003, 2004, 2005 Free Software Foundation, Inc.
+/* Copyright (C) 1991-2002, 2003, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -295,8 +295,7 @@ extern __off64_t __REDIRECT_NTH (lseek,
 # endif
 #endif
 #ifdef __USE_LARGEFILE64
-extern __off64_t lseek64 (int __fd, __off64_t __offset, int __whence)
-     __THROW;
+extern __off64_t lseek64 (int __fd, __off64_t __offset, int __whence) __THROW;
 #endif
 
 /* Close the file descriptor FD.
@@ -310,13 +309,13 @@ extern int close (int __fd);
 
    This function is a cancellation point and therefore not marked with
    __THROW.  */
-extern ssize_t read (int __fd, void *__buf, size_t __nbytes) __wur;
+extern ssize_t read (int __fd, void *__buf, size_t __nbytes);
 
 /* Write N bytes of BUF to FD.  Return the number written, or -1.
 
    This function is a cancellation point and therefore not marked with
    __THROW.  */
-extern ssize_t write (int __fd, __const void *__buf, size_t __n) __wur;
+extern ssize_t write (int __fd, __const void *__buf, size_t __n);
 
 #ifdef __USE_UNIX98
 # ifndef __USE_FILE_OFFSET64
@@ -327,7 +326,7 @@ extern ssize_t write (int __fd, __const void *__buf, size_t __n) __wur;
    This function is a cancellation point and therefore not marked with
    __THROW.  */
 extern ssize_t pread (int __fd, void *__buf, size_t __nbytes,
-		      __off_t __offset) __wur;
+		      __off_t __offset);
 
 /* Write N bytes of BUF to FD at the given position OFFSET without
    changing the file pointer.  Return the number written, or -1.
@@ -335,15 +334,15 @@ extern ssize_t pread (int __fd, void *__buf, size_t __nbytes,
    This function is a cancellation point and therefore not marked with
    __THROW.  */
 extern ssize_t pwrite (int __fd, __const void *__buf, size_t __n,
-		       __off_t __offset) __wur;
+		       __off_t __offset);
 # else
 #  ifdef __REDIRECT
 extern ssize_t __REDIRECT (pread, (int __fd, void *__buf, size_t __nbytes,
 				   __off64_t __offset),
-			   pread64) __wur;
+			   pread64);
 extern ssize_t __REDIRECT (pwrite, (int __fd, __const void *__buf,
 				    size_t __nbytes, __off64_t __offset),
-			   pwrite64) __wur;
+			   pwrite64);
 #  else
 #   define pread pread64
 #   define pwrite pwrite64
@@ -355,11 +354,11 @@ extern ssize_t __REDIRECT (pwrite, (int __fd, __const void *__buf,
    changing the file pointer.  Return the number read, -1 for errors
    or 0 for EOF.  */
 extern ssize_t pread64 (int __fd, void *__buf, size_t __nbytes,
-			__off64_t __offset) __wur;
+			__off64_t __offset);
 /* Write N bytes of BUF to FD at the given position OFFSET without
    changing the file pointer.  Return the number written, or -1.  */
 extern ssize_t pwrite64 (int __fd, __const void *__buf, size_t __n,
-			 __off64_t __offset) __wur;
+			 __off64_t __offset);
 # endif
 #endif
 
@@ -367,7 +366,7 @@ extern ssize_t pwrite64 (int __fd, __const void *__buf, size_t __n,
    If successful, two file descriptors are stored in PIPEDES;
    bytes written on PIPEDES[1] can be read from PIPEDES[0].
    Returns 0 if successful, -1 if not.  */
-extern int pipe (int __pipedes[2]) __THROW __wur;
+extern int pipe (int __pipedes[2]) __THROW;
 
 /* Schedule an alarm.  In SECONDS seconds, the process will get a SIGALRM.
    If SECONDS is zero, any currently scheduled alarm will be cancelled.
@@ -417,26 +416,26 @@ extern int pause (void);
 
 /* Change the owner and group of FILE.  */
 extern int chown (__const char *__file, __uid_t __owner, __gid_t __group)
-     __THROW __nonnull ((1)) __wur;
+     __THROW __nonnull ((1));
 
 #if defined __USE_BSD || defined __USE_XOPEN_EXTENDED
 /* Change the owner and group of the file that FD is open on.  */
-extern int fchown (int __fd, __uid_t __owner, __gid_t __group) __THROW __wur;
+extern int fchown (int __fd, __uid_t __owner, __gid_t __group) __THROW;
 
 
 /* Change owner and group of FILE, if it is a symbolic
    link the ownership of the symbolic link is changed.  */
 extern int lchown (__const char *__file, __uid_t __owner, __gid_t __group)
-     __THROW __nonnull ((1)) __wur;
+     __THROW __nonnull ((1));
 
 #endif /* Use BSD || X/Open Unix.  */
 
 /* Change the process's working directory to PATH.  */
-extern int chdir (__const char *__path) __THROW __nonnull ((1)) __wur;
+extern int chdir (__const char *__path) __THROW __nonnull ((1));
 
 #if defined __USE_BSD || defined __USE_XOPEN_EXTENDED
 /* Change the process's working directory to the one FD is open on.  */
-extern int fchdir (int __fd) __THROW __wur;
+extern int fchdir (int __fd) __THROW;
 #endif
 
 /* Get the pathname of the current working directory,
@@ -446,7 +445,7 @@ extern int fchdir (int __fd) __THROW __wur;
    an array is allocated with `malloc'; the array is SIZE
    bytes long, unless SIZE == 0, in which case it is as
    big as necessary.  */
-extern char *getcwd (char *__buf, size_t __size) __THROW __wur;
+extern char *getcwd (char *__buf, size_t __size) __THROW;
 
 #ifdef	__USE_GNU
 /* Return a malloc'd string containing the current directory name.
@@ -459,13 +458,12 @@ extern char *get_current_dir_name (void) __THROW;
 /* Put the absolute pathname of the current working directory in BUF.
    If successful, return BUF.  If not, put an error message in
    BUF and return NULL.  BUF should be at least PATH_MAX bytes long.  */
-extern char *getwd (char *__buf)
-     __THROW __nonnull ((1)) __attribute_deprecated__ __wur;
+extern char *getwd (char *__buf) __THROW __nonnull ((1));
 #endif
 
 
 /* Duplicate FD, returning a new file descriptor on the same file.  */
-extern int dup (int __fd) __THROW __wur;
+extern int dup (int __fd) __THROW;
 
 /* Duplicate FD to FD2, closing FD2 and making it open on the same file.  */
 extern int dup2 (int __fd, int __fd2) __THROW;
@@ -518,7 +516,7 @@ extern int execlp (__const char *__file, __const char *__arg, ...)
 
 #if defined __USE_MISC || defined __USE_XOPEN
 /* Add INC to priority of the current process.  */
-extern int nice (int __inc) __THROW __wur;
+extern int nice (int __inc) __THROW;
 #endif
 
 
@@ -631,7 +629,7 @@ extern __gid_t getegid (void) __THROW;
 /* If SIZE is zero, return the number of supplementary groups
    the calling process is in.  Otherwise, fill in the group IDs
    of its supplementary groups in LIST and return the number written.  */
-extern int getgroups (int __size, __gid_t __list[]) __THROW __wur;
+extern int getgroups (int __size, __gid_t __list[]) __THROW;
 
 #ifdef	__USE_GNU
 /* Return nonzero iff the calling process is in group GID.  */
@@ -675,23 +673,19 @@ extern int setegid (__gid_t __gid) __THROW;
 #ifdef __USE_GNU
 /* Fetch the effective user ID, real user ID, and saved-set user ID,
    of the calling process.  */
-extern int getresuid (__uid_t *__euid, __uid_t *__ruid, __uid_t *__suid)
-     __THROW;
+extern int getresuid (__uid_t *__euid, __uid_t *__ruid, __uid_t *__suid);
 
 /* Fetch the effective group ID, real group ID, and saved-set group ID,
    of the calling process.  */
-extern int getresgid (__gid_t *__egid, __gid_t *__rgid, __gid_t *__sgid)
-     __THROW;
+extern int getresgid (__gid_t *__egid, __gid_t *__rgid, __gid_t *__sgid);
 
 /* Set the effective user ID, real user ID, and saved-set user ID,
    of the calling process to EUID, RUID, and SUID, respectively.  */
-extern int setresuid (__uid_t __euid, __uid_t __ruid, __uid_t __suid)
-     __THROW;
+extern int setresuid (__uid_t __euid, __uid_t __ruid, __uid_t __suid);
 
 /* Set the effective group ID, real group ID, and saved-set group ID,
    of the calling process to EGID, RGID, and SGID, respectively.  */
-extern int setresgid (__gid_t __egid, __gid_t __rgid, __gid_t __sgid)
-     __THROW;
+extern int setresgid (__gid_t __egid, __gid_t __rgid, __gid_t __sgid);
 #endif
 
 
@@ -716,7 +710,7 @@ extern char *ttyname (int __fd) __THROW;
 /* Store at most BUFLEN characters of the pathname of the terminal FD is
    open on in BUF.  Return 0 on success, otherwise an error number.  */
 extern int ttyname_r (int __fd, char *__buf, size_t __buflen)
-     __THROW __nonnull ((2)) __wur;
+     __THROW __nonnull ((2));
 
 /* Return 1 if FD is a valid descriptor associated
    with a terminal, zero if not.  */
@@ -732,18 +726,18 @@ extern int ttyslot (void) __THROW;
 
 /* Make a link to FROM named TO.  */
 extern int link (__const char *__from, __const char *__to)
-     __THROW __nonnull ((1, 2)) __wur;
+     __THROW __nonnull ((1, 2));
 
 #if defined __USE_BSD || defined __USE_XOPEN_EXTENDED
 /* Make a symbolic link to FROM named TO.  */
 extern int symlink (__const char *__from, __const char *__to)
-     __THROW __nonnull ((1, 2)) __wur;
+     __THROW __nonnull ((1, 2));
 
 /* Read the contents of the symbolic link PATH into no more than
    LEN bytes of BUF.  The contents are not null-terminated.
    Returns the number of characters read, or -1 for errors.  */
 extern int readlink (__const char *__restrict __path, char *__restrict __buf,
-		     size_t __len) __THROW __nonnull ((1, 2)) __wur;
+		     size_t __len) __THROW __nonnull ((1, 2));
 #endif /* Use BSD.  */
 
 /* Remove the link NAME.  */
@@ -802,20 +796,20 @@ extern int gethostname (char *__name, size_t __len) __THROW __nonnull ((1));
 /* Set the name of the current host to NAME, which is LEN bytes long.
    This call is restricted to the super-user.  */
 extern int sethostname (__const char *__name, size_t __len)
-     __THROW __nonnull ((1)) __wur;
+     __THROW __nonnull ((1));
 
 /* Set the current machine's Internet number to ID.
    This call is restricted to the super-user.  */
-extern int sethostid (long int __id) __THROW __wur;
+extern int sethostid (long int __id) __THROW;
 
 
 /* Get and set the NIS (aka YP) domain name, if any.
    Called just like `gethostname' and `sethostname'.
    The NIS domain name is usually the empty string when not using NIS.  */
 extern int getdomainname (char *__name, size_t __len)
-     __THROW __nonnull ((1)) __wur;
+     __THROW __nonnull ((1));
 extern int setdomainname (__const char *__name, size_t __len)
-     __THROW __nonnull ((1)) __wur;
+     __THROW __nonnull ((1));
 
 
 /* Revoke access permissions to all processes currently communicating
@@ -824,7 +818,7 @@ extern int setdomainname (__const char *__name, size_t __len)
 extern int vhangup (void) __THROW;
 
 /* Revoke the access of all descriptors currently open on FILE.  */
-extern int revoke (__const char *__file) __THROW __nonnull ((1)) __wur;
+extern int revoke (__const char *__file) __THROW __nonnull ((1));
 
 
 /* Enable statistical profiling, writing samples of the PC into at most
@@ -852,14 +846,14 @@ extern void setusershell (void) __THROW; /* Rewind and re-read the file.  */
 /* Put the program in the background, and dissociate from the controlling
    terminal.  If NOCHDIR is zero, do `chdir ("/")'.  If NOCLOSE is zero,
    redirects stdin, stdout, and stderr to /dev/null.  */
-extern int daemon (int __nochdir, int __noclose) __THROW __wur;
+extern int daemon (int __nochdir, int __noclose) __THROW;
 #endif /* Use BSD || X/Open.  */
 
 
 #if defined __USE_BSD || (defined __USE_XOPEN && !defined __USE_XOPEN2K)
 /* Make PATH be the root directory (the starting point for absolute paths).
    This call is restricted to the super-user.  */
-extern int chroot (__const char *__path) __THROW __nonnull ((1)) __wur;
+extern int chroot (__const char *__path) __THROW __nonnull ((1));
 
 /* Prompt with PROMPT and read a string from the terminal without echoing.
    Uses /dev/tty if possible; otherwise stderr and stdin.  */
@@ -890,56 +884,52 @@ extern void sync (void) __THROW;
 extern int getpagesize (void)  __THROW __attribute__ ((__const__));
 
 
-/* Return the maximum number of file descriptors
-   the current process could possibly have.  */
-extern int getdtablesize (void) __THROW;
-
-
 /* Truncate FILE to LENGTH bytes.  */
 # ifndef __USE_FILE_OFFSET64
 extern int truncate (__const char *__file, __off_t __length)
-     __THROW __nonnull ((1)) __wur;
+     __THROW __nonnull ((1));
 # else
 #  ifdef __REDIRECT_NTH
 extern int __REDIRECT_NTH (truncate,
 			   (__const char *__file, __off64_t __length),
-			   truncate64) __nonnull ((1)) __wur;
+			   truncate64) __nonnull ((1));
 #  else
 #   define truncate truncate64
 #  endif
 # endif
 # ifdef __USE_LARGEFILE64
 extern int truncate64 (__const char *__file, __off64_t __length)
-     __THROW __nonnull ((1)) __wur;
+     __THROW __nonnull ((1));
 # endif
 
-#endif /* Use BSD || X/Open Unix.  */
-
-#if defined __USE_BSD || defined __USE_XOPEN_EXTENDED || defined __USE_XOPEN2K
-
 /* Truncate the file FD is open on to LENGTH bytes.  */
 # ifndef __USE_FILE_OFFSET64
-extern int ftruncate (int __fd, __off_t __length) __THROW __wur;
+extern int ftruncate (int __fd, __off_t __length) __THROW;
 # else
 #  ifdef __REDIRECT_NTH
 extern int __REDIRECT_NTH (ftruncate, (int __fd, __off64_t __length),
-			   ftruncate64) __wur;
+			   ftruncate64);
 #  else
 #   define ftruncate ftruncate64
 #  endif
 # endif
 # ifdef __USE_LARGEFILE64
-extern int ftruncate64 (int __fd, __off64_t __length) __THROW __wur;
+extern int ftruncate64 (int __fd, __off64_t __length) __THROW;
 # endif
 
-#endif /* Use BSD || X/Open Unix || POSIX 2003.  */
+
+/* Return the maximum number of file descriptors
+   the current process could possibly have.  */
+extern int getdtablesize (void) __THROW;
+
+#endif /* Use BSD || X/Open Unix.  */
 
 
 #if defined __USE_MISC || defined __USE_XOPEN_EXTENDED
 
 /* Set the end of accessible data space (aka "the break") to ADDR.
    Returns zero on success and -1 for errors (with errno set).  */
-extern int brk (void *__addr) __THROW __wur;
+extern int brk (void *__addr) __THROW;
 
 /* Increase or decrease the end of accessible data space by DELTA bytes.
    If successful, returns the address the previous end of data space
@@ -983,17 +973,17 @@ extern long int syscall (long int __sysno, ...) __THROW;
 # define F_TEST  3	/* Test a region for other processes locks.  */
 
 # ifndef __USE_FILE_OFFSET64
-extern int lockf (int __fd, int __cmd, __off_t __len) __wur;
+extern int lockf (int __fd, int __cmd, __off_t __len);
 # else
 #  ifdef __REDIRECT
 extern int __REDIRECT (lockf, (int __fd, int __cmd, __off64_t __len),
-		       lockf64) __wur;
+		       lockf64);
 #  else
 #   define lockf lockf64
 #  endif
 # endif
 # ifdef __USE_LARGEFILE64
-extern int lockf64 (int __fd, int __cmd, __off64_t __len) __wur;
+extern int lockf64 (int __fd, int __cmd, __off64_t __len);
 # endif
 #endif /* Use misc and F_LOCK not already defined.  */
 
diff --git a/pwd/putpwent.c b/pwd/putpwent.c
index a1041ec1c2..8b7767bd4a 100644
--- a/pwd/putpwent.c
+++ b/pwd/putpwent.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1991,1992,1996,1997,1998,2005 Free Software Foundation, Inc.
+/* Copyright (C) 1991, 1992, 1996, 1997, 1998 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -20,7 +20,7 @@
 #include <stdio.h>
 #include <pwd.h>
 
-#define _S(x)	x ?: ""
+#define _S(x)	x ? x : ""
 
 /* Write an entry to the given stream.
    This must know the format of the password file.  */
@@ -35,21 +35,11 @@ putpwent (p, stream)
       return -1;
     }
 
-  if (p->pw_name[0] == '+' || p->pw_name[0] == '-')
-    {
-      if (fprintf (stream, "%s:%s:::%s:%s:%s\n",
-		   p->pw_name, _S (p->pw_passwd),
-		   _S (p->pw_gecos), _S (p->pw_dir), _S (p->pw_shell)) < 0)
-	return -1;
-    }
-  else
-    {
-      if (fprintf (stream, "%s:%s:%lu:%lu:%s:%s:%s\n",
-		   p->pw_name, _S (p->pw_passwd),
-		   (unsigned long int) p->pw_uid,
-		   (unsigned long int) p->pw_gid,
-		   _S (p->pw_gecos), _S (p->pw_dir), _S (p->pw_shell)) < 0)
-	return -1;
-    }
+  if (fprintf (stream, "%s:%s:%lu:%lu:%s:%s:%s\n",
+	       p->pw_name, _S (p->pw_passwd),
+	       (unsigned long int) p->pw_uid, (unsigned long int) p->pw_gid,
+	       _S (p->pw_gecos), _S (p->pw_dir), _S (p->pw_shell)) < 0)
+    return -1;
+
   return 0;
 }
diff --git a/scripts/lib-names.awk b/scripts/lib-names.awk
deleted file mode 100644
index 77e6668515..0000000000
--- a/scripts/lib-names.awk
+++ /dev/null
@@ -1,66 +0,0 @@
-# awk script for soversions.i -> gnu/lib-names.h; see Makeconfig.
-
-$1 != "DEFAULT" { multi = 1 }
-
-#
-{
-  lib = $2;
-  version = $3;
-  if ($3 !~ /^[0-9]/) {
-    soname = $3;
-    extra = $3;
-    sub(/\.so.*$/, "", extra);
-  }
-  else {
-    soname = lib ".so." $3;
-    extra = "";
-  }
-  soname = "\"" soname "\"";
-  lib = toupper(lib);
-  extra = toupper(extra);
-  gsub(/-/, "_", lib);
-  gsub(/-/, "_", extra);
-  macros[$1 FS lib "_SO"] = soname;
-  if (extra)
-    macros[$1 FS extra "_SO"] = soname;
-}
-
-END {
-  print "/* This file is automatically generated.";
-  print "   It defines macros to allow user program to find the shared";
-  print "   library files which come as part of GNU libc.  */";
-  print "#ifndef __GNU_LIB_NAMES_H";
-  print "#define __GNU_LIB_NAMES_H	1";
-  print "";
-
-  pfx = multi ? "# define " : "#define ";
-  for (elt in macros) {
-    split(elt, x);
-    line = sprintf("%-40s%s", pfx x[2], macros[elt]);
-    if (x[1] in lines)
-      lines[x[1]] = lines[x[1]] "\n" line;
-    else
-      lines[x[1]] = line;
-  }
-
-  if (multi) {
-    # Print these in a fixed order so the result is identical
-    # on both sides of the coin.
-    if (!("WORDSIZE32" in lines))
-      lines["WORDSIZE32"] = lines["DEFAULT"];
-    if (!("WORDSIZE64" in lines))
-      lines["WORDSIZE64"] = lines["DEFAULT"];
-    print "#include <bits/wordsize.h>\n";
-    print "#if __WORDSIZE == 32";
-    cmd = "LC_ALL=C sort"; print lines["WORDSIZE32"] | cmd; close(cmd);
-    print "#else"
-    cmd = "LC_ALL=C sort"; print lines["WORDSIZE64"] | cmd; close(cmd);
-    print "#endif";
-  }
-  else {
-    cmd = "LC_ALL=C sort"; print lines["DEFAULT"] | cmd; close(cmd);
-  }
-
-  print "";
-  print "#endif	/* gnu/lib-names.h */"
-}
diff --git a/scripts/soversions.awk b/scripts/soversions.awk
index 32ce076ba9..6207088514 100644
--- a/scripts/soversions.awk
+++ b/scripts/soversions.awk
@@ -1,27 +1,15 @@
-# awk script for shlib-versions.v -> soversions.i; see Makeconfig.
+# awk script for shlib-versions.v.i -> soversions.i; see Makeconfig.
 
-BEGIN {
-  config = cpu "-" vendor "-" os;
-  configs[config] = "DEFAULT";
-}
-
-{ thiscf = $1 }
-
-$2 ~ /WORDSIZE[3264]/ {
-  if ((config ~ thiscf) && !othercf) {
-    othercf = $3;
-    sub(/@CPU@/, cpu, othercf);
-    sub(/@VENDOR@/, vendor, othercf);
-    sub(/@OS@/, os, othercf);
-    configs[othercf] = $2;
-  }
-  next;
-}
+# Only lines matching `config' (set with -v) are relevant to us.
+config !~ $1 { next }
 
 # Obey the first matching DEFAULT line.
 $2 == "DEFAULT" {
-  $1 = $2 = "";
-  default_set[++ndefault_set] = thiscf "\n" $0;
+  if (!matched_default) {
+    matched_default = 1;
+    $1 = $2 = "";
+    default_setname = $0;
+  }
   next
 }
 
@@ -31,42 +19,20 @@ $2 == "DEFAULT" {
   lib = number = $2;
   sub(/=.*$/, "", lib);
   sub(/^.*=/, "", number);
-  if ((thiscf FS lib) in numbers) next;
-  numbers[thiscf FS lib] = number;
-  order[thiscf FS lib] = ++order_n;
+  if (lib in numbers) next;
+  numbers[lib] = number;
   if (NF > 2) {
     $1 = $2 = "";
-    versions[thiscf FS lib] = $0
+    versions[lib] = $0
   }
 }
 
 END {
-  for (elt in numbers) {
-    split(elt, x);
-    cf = x[1];
-    lib = x[2];
-    for (c in configs)
-      if (c ~ cf) {
-	if (elt in versions)
-	  set = versions[elt];
-	else {
-	  set = (c == config) ? default_setname : "";
-	  for (i = 1; i <= ndefault_set; ++i) {
-	    split(default_set[i], x, "\n");
-	    if (c ~ x[1]) {
-	      set = x[2];
-	      break;
-	    }
-	  }
-	}
-	line = set ? (lib FS numbers[elt] FS set) : (lib FS numbers[elt]);
-	if (!((c FS lib) in lineorder) || order[elt] < lineorder[c FS lib]) {
-	  lineorder[c FS lib] = order[elt];
-	  lines[c FS lib] = configs[c] FS line;
-	}
-      }
-  }
-  for (c in lines) {
-    print lines[c]
+  for (lib in numbers) {
+    set = (lib in versions) ? versions[lib] : default_setname;
+    if (set)
+      print lib, numbers[lib], set;
+    else
+      print lib, numbers[lib];
   }
 }
diff --git a/shadow/sgetspent_r.c b/shadow/sgetspent_r.c
index 2ed350a1ad..01d22876ba 100644
--- a/shadow/sgetspent_r.c
+++ b/shadow/sgetspent_r.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1996, 1997, 1998, 2005 Free Software Foundation, Inc.
+/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -30,7 +30,7 @@
 struct spent_data {};
 
 /* Predicate which always returns false, needed below.  */
-#define FALSEP(arg) 0
+#define FALSE(arg) 0
 
 
 #include <nss/nss_files/files-parse.c>
@@ -77,7 +77,7 @@ LINE_PARSER
 	 INT_FIELD_MAYBE_NULL (result->sp_expire, ISCOLON, 0, 10, (long int),
 			       (long int) -1);
 	 if (*line != '\0')
-	   INT_FIELD_MAYBE_NULL (result->sp_flag, FALSEP, 0, 10,
+	   INT_FIELD_MAYBE_NULL (result->sp_flag, FALSE, 0, 10,
 				 (unsigned long int), ~0ul)
 	 else
 	   result->sp_flag = ~0ul;
diff --git a/shlib-versions b/shlib-versions
index 58e923e717..72cbb4af0e 100644
--- a/shlib-versions
+++ b/shlib-versions
@@ -17,11 +17,6 @@
 # to apply on matching configurations when the matching entry for a particular
 # library has no third column.  The defaults must precede the entries they
 # apply to.
-#
-# An entry with WORDSIZE* in the second column gives an alternate
-# configuration tuple whose macros will be conditionally defined in
-# gnu/lib-names.h; @CPU@, @VENDOR@, @OS@ can be used in the third
-# column to compose the alternate tuple matched against the patterns here.
 
 # Configuration		DEFAULT			Earliest symbol set
 # -------------		---------------		------------------------------
@@ -32,17 +27,6 @@ x86_64-.*-linux.*       DEFAULT			GLIBC_2.2.5
 powerpc64-.*-linux.*	DEFAULT			GLIBC_2.3
 .*-.*-gnu-gnu.*		DEFAULT			GLIBC_2.2.6
 
-# Configuration		WORDSIZE[32|64]		Alternate configuration
-# -------------		----------		-----------------------
-x86_64-.*-.*		WORDSIZE32		i686-@VENDOR@-@OS@
-i.86-.*-.*		WORDSIZE64		x86_64-@VENDOR@-@OS@
-s390x-.*-.*		WORDSIZE32		s390-@VENDOR@-@OS@
-s390-.*-.*		WORDSIZE64		s390x-@VENDOR@-@OS@
-powerpc64-.*-.*		WORDSIZE32		powerpc-@VENDOR@-@OS@
-powerpc.*-.*-.*		WORDSIZE64		powerpc64-@VENDOR@-@OS@
-sparc64-.*-.*		WORDSIZE32		sparc-@VENDOR@-@OS@
-sparc.*-.*-.*		WORDSIZE64		sparc64-@VENDOR@-@OS@
-
 # Configuration		Library=version		Earliest symbol set (optional)
 # -------------		---------------		------------------------------
 
diff --git a/stdio-common/Makefile b/stdio-common/Makefile
index 2e797e4dfe..3a66f1d021 100644
--- a/stdio-common/Makefile
+++ b/stdio-common/Makefile
@@ -1,4 +1,4 @@
-# Copyright (C) 1991-2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+# Copyright (C) 1991-2002, 2003, 2004 Free Software Foundation, Inc.
 # This file is part of the GNU C Library.
 
 # The GNU C Library is free software; you can redistribute it and/or
@@ -53,7 +53,7 @@ tests := tstscanf test_rdwr test-popen tstgetln test-fseek \
 	 scanf11 scanf12 tst-tmpnam tst-cookie tst-obprintf tst-sscanf \
 	 tst-swprintf tst-fseek tst-fmemopen test-vfprintf tst-gets \
 	 tst-perror tst-sprintf tst-rndseek tst-fdopen tst-fphex bug14 bug15 \
-	 tst-popen tst-unlockedio tst-fmemopen2
+	 tst-popen tst-unlockedio
 
 test-srcs = tst-unbputc tst-printf
 
diff --git a/stdlib/Makefile b/stdlib/Makefile
index fafe6061a0..5f4675033e 100644
--- a/stdlib/Makefile
+++ b/stdlib/Makefile
@@ -1,4 +1,4 @@
-# Copyright (C) 1991-2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+# Copyright (C) 1991-2002, 2003, 2004 Free Software Foundation, Inc.
 # This file is part of the GNU C Library.
 
 # The GNU C Library is free software; you can redistribute it and/or
@@ -61,15 +61,10 @@ distribute	:= exit.h grouping.h abort-instr.h isomac.c tst-fmtmsg.sh   \
 test-srcs	:= tst-fmtmsg
 tests		:= tst-strtol tst-strtod testmb testrand testsort testdiv   \
 		   test-canon test-canon2 tst-strtoll tst-environ	    \
-		   tst-xpg-basename tst-random tst-random2 tst-bsearch	    \
-		   tst-limits tst-rand48 bug-strtod tst-setcontext	    \
-		   test-a64l tst-qsort tst-system testmb2
+		   tst-xpg-basename tst-random tst-bsearch tst-limits	    \
+		   tst-rand48 bug-strtod tst-setcontext test-a64l tst-qsort \
+		   tst-system testmb2
 
-include ../Makeconfig
-
-ifeq ($(build-shared),yes)
-tests		+= tst-putenv
-endif
 
 # Several mpn functions from GNU MP are used by the strtod function.
 mpn-routines := inlines add_n addmul_1 cmp divmod_1 divrem udiv_qrnnd \
@@ -80,21 +75,18 @@ routines := $(strip $(routines) $(mpn-routines))	\
 	    dbl2mpn ldbl2mpn				\
 	    mpn2flt mpn2dbl mpn2ldbl
 aux += fpioconst mp_clz_tab
-distribute := $(distribute) $(mpn-headers) gen-mpn-copy fpioconst.h \
-	      tst-putenvmod.c
-
-tests-extras += tst-putenvmod
-extra-objs += tst-putenvmod.os
+distribute := $(distribute) $(mpn-headers) gen-mpn-copy fpioconst.h
 
-generated += isomac isomac.out tst-putenvmod.so
+generated += isomac isomac.out
 
 CFLAGS-bsearch.c = $(uses-callbacks)
 CFLAGS-msort.c = $(uses-callbacks)
 CFLAGS-qsort.c = $(uses-callbacks)
 CFLAGS-system.c = -fexceptions
-CFLAGS-system.os = -fomit-frame-pointer
 CFLAGS-fmtmsg.c = -fexceptions
 
+include ../Makeconfig
+
 ifneq (,$(filter %REENTRANT, $(defines)))
 CFLAGS-strfmon.c = -D_IO_MTSAFE_IO
 CFLAGS-strfmon_l.c = -D_IO_MTSAFE_IO
@@ -132,9 +124,3 @@ $(objpfx)isomac: isomac.c
 
 $(objpfx)tst-fmtmsg.out: tst-fmtmsg.sh $(objpfx)tst-fmtmsg
 	$(SHELL) -e $< $(common-objpfx) '$(run-program-prefix)' $(common-objpfx)stdlib/
-
-$(objpfx)tst-putenv: $(objpfx)tst-putenvmod.so
-
-$(objpfx)tst-putenvmod.so: $(objpfx)tst-putenvmod.os
-	$(build-module)
-CFLAGS-tst-putenvmod.c = -DNOT_IN_libc=1
diff --git a/stdlib/fmtmsg.c b/stdlib/fmtmsg.c
index b5d7436956..2ab97b7d90 100644
--- a/stdlib/fmtmsg.c
+++ b/stdlib/fmtmsg.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1997,1999,2000-2003,2005 Free Software Foundation, Inc.
+/* Copyright (C) 1997,1999,2000,2001,2002,2003 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
 
@@ -316,7 +316,7 @@ internal_addseverity (int severity, const char *string)
   int result = MM_OK;
 
   /* First see if there is already a record for the severity level.  */
-  for (runp = severity_list, lastp = NULL; runp != NULL; runp = runp->next)
+  for (runp = severity_list, lastp = NULL; runp != NULL; runp = runp-> next)
     if (runp->severity == severity)
       break;
     else
@@ -324,6 +324,9 @@ internal_addseverity (int severity, const char *string)
 
   if (runp != NULL)
     {
+      /* Release old string.  */
+      free ((char *) runp->string);
+
       if (string != NULL)
 	/* Change the string.  */
 	runp->string = string;
@@ -364,17 +367,34 @@ int
 addseverity (int severity, const char *string)
 {
   int result;
+  const char *new_string;
 
   /* Prevent illegal SEVERITY values.  */
   if (severity <= MM_INFO)
     return MM_NOTOK;
 
+  if (string == NULL)
+    /* We want to remove the severity class.  */
+    new_string = NULL;
+  else
+    {
+      new_string = __strdup (string);
+
+      if (new_string == NULL)
+	/* Allocation failed or illegal value.  */
+	return MM_NOTOK;
+    }
+
   /* Protect the global data.  */
   __libc_lock_lock (lock);
 
   /* Do the real work.  */
   result = internal_addseverity (severity, string);
 
+  if (result != MM_OK)
+    /* Free the allocated string.  */
+    free ((char *) new_string);
+
   /* Release the lock.  */
   __libc_lock_unlock (lock);
 
@@ -391,6 +411,7 @@ libc_freeres_fn (free_mem)
       {
 	/* This is data we have to release.  */
 	struct severity_info *here = runp;
+	free ((char *) runp->string);
 	runp = runp->next;
 	free (here);
       }
diff --git a/stdlib/random_r.c b/stdlib/random_r.c
index c85fd5eeef..09677e6077 100644
--- a/stdlib/random_r.c
+++ b/stdlib/random_r.c
@@ -1,5 +1,5 @@
 /* 
-   Copyright (C) 1995, 2005 Free Software Foundation
+   Copyright (C) 1995 Free Software Foundation
 
    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
@@ -240,19 +240,10 @@ __initstate_r (seed, arg_state, n, buf)
   int degree;
   int separation;
   int32_t *state;
-  int old_type;
-  int32_t *old_state;
 
   if (buf == NULL)
     goto fail;
 
-  old_type = buf->rand_type;
-  old_state = buf->state;
-  if (old_type == TYPE_0)
-    old_state[-1] = TYPE_0;
-  else
-    old_state[-1] = (MAX_TYPES * (buf->rptr - old_state)) + old_type;
-
   if (n >= BREAK_3)
     type = n < BREAK_4 ? TYPE_3 : TYPE_4;
   else if (n < BREAK_1)
diff --git a/stdlib/stdlib.h b/stdlib/stdlib.h
index 4a1571e7db..1bda32262b 100644
--- a/stdlib/stdlib.h
+++ b/stdlib/stdlib.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 1991-2003, 2004, 2005 Free Software Foundation, Inc.
+/* Copyright (C) 1991-2002, 2003, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -137,45 +137,44 @@ __END_NAMESPACE_C99
 
 /* Maximum length of a multibyte character in the current locale.  */
 #define	MB_CUR_MAX	(__ctype_get_mb_cur_max ())
-extern size_t __ctype_get_mb_cur_max (void) __THROW __wur;
+extern size_t __ctype_get_mb_cur_max (void) __THROW;
 
 
 __BEGIN_NAMESPACE_STD
 /* Convert a string to a floating-point number.  */
 extern double atof (__const char *__nptr)
-     __THROW __attribute_pure__ __nonnull ((1)) __wur;
+     __THROW __attribute_pure__ __nonnull ((1));
 /* Convert a string to an integer.  */
 extern int atoi (__const char *__nptr)
-     __THROW __attribute_pure__ __nonnull ((1)) __wur;
+     __THROW __attribute_pure__ __nonnull ((1));
 /* Convert a string to a long integer.  */
 extern long int atol (__const char *__nptr)
-     __THROW __attribute_pure__ __nonnull ((1)) __wur;
+     __THROW __attribute_pure__ __nonnull ((1));
 __END_NAMESPACE_STD
 
 #if defined __USE_ISOC99 || (defined __GLIBC_HAVE_LONG_LONG && defined __USE_MISC)
 __BEGIN_NAMESPACE_C99
 /* Convert a string to a long long integer.  */
 __extension__ extern long long int atoll (__const char *__nptr)
-     __THROW __attribute_pure__ __nonnull ((1)) __wur;
+     __THROW __attribute_pure__ __nonnull ((1));
 __END_NAMESPACE_C99
 #endif
 
 __BEGIN_NAMESPACE_STD
 /* Convert a string to a floating-point number.  */
 extern double strtod (__const char *__restrict __nptr,
-		      char **__restrict __endptr)
-     __THROW __nonnull ((1)) __wur;
+		      char **__restrict __endptr) __THROW __nonnull ((1));
 __END_NAMESPACE_STD
 
 #ifdef	__USE_ISOC99
 __BEGIN_NAMESPACE_C99
 /* Likewise for `float' and `long double' sizes of floating-point numbers.  */
 extern float strtof (__const char *__restrict __nptr,
-		     char **__restrict __endptr) __THROW __nonnull ((1)) __wur;
+		     char **__restrict __endptr) __THROW __nonnull ((1));
 
 extern long double strtold (__const char *__restrict __nptr,
 			    char **__restrict __endptr)
-     __THROW __nonnull ((1)) __wur;
+     __THROW __nonnull ((1));
 __END_NAMESPACE_C99
 #endif
 
@@ -183,11 +182,11 @@ __BEGIN_NAMESPACE_STD
 /* Convert a string to a long integer.  */
 extern long int strtol (__const char *__restrict __nptr,
 			char **__restrict __endptr, int __base)
-     __THROW __nonnull ((1)) __wur;
+     __THROW __nonnull ((1));
 /* Convert a string to an unsigned long integer.  */
 extern unsigned long int strtoul (__const char *__restrict __nptr,
 				  char **__restrict __endptr, int __base)
-     __THROW __nonnull ((1)) __wur;
+     __THROW __nonnull ((1));
 __END_NAMESPACE_C99
 
 #if defined __GLIBC_HAVE_LONG_LONG && defined __USE_BSD
@@ -195,12 +194,12 @@ __END_NAMESPACE_C99
 __extension__
 extern long long int strtoq (__const char *__restrict __nptr,
 			     char **__restrict __endptr, int __base)
-     __THROW __nonnull ((1)) __wur;
+     __THROW __nonnull ((1));
 /* Convert a string to an unsigned quadword integer.  */
 __extension__
 extern unsigned long long int strtouq (__const char *__restrict __nptr,
 				       char **__restrict __endptr, int __base)
-     __THROW __nonnull ((1)) __wur;
+     __THROW __nonnull ((1));
 #endif /* GCC and use BSD.  */
 
 #if defined __USE_ISOC99 || (defined __GLIBC_HAVE_LONG_LONG && defined __USE_MISC)
@@ -209,12 +208,12 @@ __BEGIN_NAMESPACE_C99
 __extension__
 extern long long int strtoll (__const char *__restrict __nptr,
 			      char **__restrict __endptr, int __base)
-     __THROW __nonnull ((1)) __wur;
+     __THROW __nonnull ((1));
 /* Convert a string to an unsigned quadword integer.  */
 __extension__
 extern unsigned long long int strtoull (__const char *__restrict __nptr,
 					char **__restrict __endptr, int __base)
-     __THROW __nonnull ((1)) __wur;
+     __THROW __nonnull ((1));
 __END_NAMESPACE_C99
 #endif /* ISO C99 or GCC and use MISC.  */
 
@@ -239,37 +238,36 @@ __END_NAMESPACE_C99
    use as an additional parameter.  */
 extern long int strtol_l (__const char *__restrict __nptr,
 			  char **__restrict __endptr, int __base,
-			  __locale_t __loc) __THROW __nonnull ((1, 4)) __wur;
+			  __locale_t __loc) __THROW __nonnull ((1, 4));
 
 extern unsigned long int strtoul_l (__const char *__restrict __nptr,
 				    char **__restrict __endptr,
 				    int __base, __locale_t __loc)
-     __THROW __nonnull ((1, 4)) __wur;
+     __THROW __nonnull ((1, 4));
 
 __extension__
 extern long long int strtoll_l (__const char *__restrict __nptr,
 				char **__restrict __endptr, int __base,
 				__locale_t __loc)
-     __THROW __nonnull ((1, 4)) __wur;
+     __THROW __nonnull ((1, 4));
 
 __extension__
 extern unsigned long long int strtoull_l (__const char *__restrict __nptr,
 					  char **__restrict __endptr,
 					  int __base, __locale_t __loc)
-     __THROW __nonnull ((1, 4)) __wur;
+     __THROW __nonnull ((1, 4));
 
 extern double strtod_l (__const char *__restrict __nptr,
 			char **__restrict __endptr, __locale_t __loc)
-     __THROW __nonnull ((1, 3)) __wur;
+     __THROW __nonnull ((1, 3));
 
 extern float strtof_l (__const char *__restrict __nptr,
 		       char **__restrict __endptr, __locale_t __loc)
-     __THROW __nonnull ((1, 3)) __wur;
+     __THROW __nonnull ((1, 3));
 
 extern long double strtold_l (__const char *__restrict __nptr,
 			      char **__restrict __endptr,
-			      __locale_t __loc)
-     __THROW __nonnull ((1, 3)) __wur;
+			      __locale_t __loc) __THROW __nonnull ((1, 3));
 #endif /* GNU */
 
 
@@ -278,26 +276,25 @@ extern long double strtold_l (__const char *__restrict __nptr,
 
 extern double __strtod_internal (__const char *__restrict __nptr,
 				 char **__restrict __endptr, int __group)
-     __THROW __nonnull ((1)) __wur;
+     __THROW __nonnull ((1));
 extern float __strtof_internal (__const char *__restrict __nptr,
 				char **__restrict __endptr, int __group)
-     __THROW __nonnull ((1)) __wur;
+     __THROW __nonnull ((1));
 extern long double __strtold_internal (__const char *__restrict __nptr,
 				       char **__restrict __endptr,
-				       int __group)
-     __THROW __nonnull ((1)) __wur;
+				       int __group) __THROW __nonnull ((1));
 #ifndef __strtol_internal_defined
 extern long int __strtol_internal (__const char *__restrict __nptr,
 				   char **__restrict __endptr,
 				   int __base, int __group)
-     __THROW __nonnull ((1)) __wur;
+     __THROW __nonnull ((1));
 # define __strtol_internal_defined	1
 #endif
 #ifndef __strtoul_internal_defined
 extern unsigned long int __strtoul_internal (__const char *__restrict __nptr,
 					     char **__restrict __endptr,
 					     int __base, int __group)
-     __THROW __nonnull ((1)) __wur;
+     __THROW __nonnull ((1));
 # define __strtoul_internal_defined	1
 #endif
 #if defined __GNUC__ || defined __USE_ISOC99
@@ -306,7 +303,7 @@ __extension__
 extern long long int __strtoll_internal (__const char *__restrict __nptr,
 					 char **__restrict __endptr,
 					 int __base, int __group)
-     __THROW __nonnull ((1)) __wur;
+     __THROW __nonnull ((1));
 #  define __strtoll_internal_defined	1
 # endif
 # ifndef __strtoull_internal_defined
@@ -315,7 +312,7 @@ extern unsigned long long int __strtoull_internal (__const char *
 						   __restrict __nptr,
 						   char **__restrict __endptr,
 						   int __base, int __group)
-     __THROW __nonnull ((1)) __wur;
+     __THROW __nonnull ((1));
 #  define __strtoull_internal_defined	1
 # endif
 #endif /* GCC */
@@ -424,11 +421,11 @@ __END_NAMESPACE_C99
 /* Convert N to base 64 using the digits "./0-9A-Za-z", least-significant
    digit first.  Returns a pointer to static storage overwritten by the
    next call.  */
-extern char *l64a (long int __n) __THROW __wur;
+extern char *l64a (long int __n) __THROW;
 
 /* Read a number from a string S in base 64 as above.  */
 extern long int a64l (__const char *__s)
-     __THROW __attribute_pure__ __nonnull ((1)) __wur;
+     __THROW __attribute_pure__ __nonnull ((1));
 
 #endif	/* Use SVID || extended X/Open.  */
 
@@ -584,10 +581,10 @@ extern int lcong48_r (unsigned short int __param[7],
 # define __malloc_and_calloc_defined
 __BEGIN_NAMESPACE_STD
 /* Allocate SIZE bytes of memory.  */
-extern void *malloc (size_t __size) __THROW __attribute_malloc__ __wur;
+extern void *malloc (size_t __size) __THROW __attribute_malloc__;
 /* Allocate NMEMB elements of SIZE bytes each, all initialized to 0.  */
 extern void *calloc (size_t __nmemb, size_t __size)
-     __THROW __attribute_malloc__ __wur;
+     __THROW __attribute_malloc__;
 __END_NAMESPACE_STD
 #endif
 
@@ -595,8 +592,7 @@ __END_NAMESPACE_STD
 __BEGIN_NAMESPACE_STD
 /* Re-allocate the previously allocated block
    in PTR, making the new block SIZE bytes long.  */
-extern void *realloc (void *__ptr, size_t __size)
-     __THROW __attribute_malloc__ __attribute_warn_unused_result__;
+extern void *realloc (void *__ptr, size_t __size) __THROW __attribute_malloc__;
 /* Free a block allocated by `malloc', `realloc' or `calloc'.  */
 extern void free (void *__ptr) __THROW;
 __END_NAMESPACE_STD
@@ -612,13 +608,13 @@ extern void cfree (void *__ptr) __THROW;
 
 #if defined __USE_BSD || defined __USE_XOPEN_EXTENDED
 /* Allocate SIZE bytes on a page boundary.  The storage cannot be freed.  */
-extern void *valloc (size_t __size) __THROW __attribute_malloc__ __wur;
+extern void *valloc (size_t __size) __THROW __attribute_malloc__;
 #endif
 
 #ifdef __USE_XOPEN2K
 /* Allocate memory of SIZE bytes with an alignment of ALIGNMENT.  */
 extern int posix_memalign (void **__memptr, size_t __alignment, size_t __size)
-     __THROW __attribute_malloc__ __nonnull ((1)) __wur;
+     __THROW __attribute_malloc__ __nonnull ((1));
 #endif
 
 __BEGIN_NAMESPACE_STD
@@ -655,13 +651,12 @@ __END_NAMESPACE_C99
 
 __BEGIN_NAMESPACE_STD
 /* Return the value of envariable NAME, or NULL if it doesn't exist.  */
-extern char *getenv (__const char *__name) __THROW __nonnull ((1)) __wur;
+extern char *getenv (__const char *__name) __THROW __nonnull ((1));
 __END_NAMESPACE_STD
 
 /* This function is similar to the above but returns NULL if the
    programs is running with SUID or SGID enabled.  */
-extern char *__secure_getenv (__const char *__name)
-     __THROW __nonnull ((1)) __wur;
+extern char *__secure_getenv (__const char *__name) __THROW __nonnull ((1));
 
 #if defined __USE_SVID || defined __USE_XOPEN
 /* The SVID says this is in <stdio.h>, but this seems a better place.	*/
@@ -693,7 +688,7 @@ extern int clearenv (void) __THROW;
    The last six characters of TEMPLATE must be "XXXXXX";
    they are replaced with a string that makes the file name unique.
    Returns TEMPLATE, or a null pointer if it cannot get a unique file name.  */
-extern char *mktemp (char *__template) __THROW __nonnull ((1)) __wur;
+extern char *mktemp (char *__template) __THROW __nonnull ((1));
 
 /* Generate a unique temporary file name from TEMPLATE.
    The last six characters of TEMPLATE must be "XXXXXX";
@@ -704,17 +699,16 @@ extern char *mktemp (char *__template) __THROW __nonnull ((1)) __wur;
    This function is a possible cancellation points and therefore not
    marked with __THROW.  */
 # ifndef __USE_FILE_OFFSET64
-extern int mkstemp (char *__template) __nonnull ((1)) __wur;
+extern int mkstemp (char *__template) __nonnull ((1));
 # else
 #  ifdef __REDIRECT
-extern int __REDIRECT (mkstemp, (char *__template), mkstemp64)
-     __nonnull ((1)) __wur;
+extern int __REDIRECT (mkstemp, (char *__template), mkstemp64) __nonnull ((1));
 #  else
 #   define mkstemp mkstemp64
 #  endif
 # endif
 # ifdef __USE_LARGEFILE64
-extern int mkstemp64 (char *__template) __nonnull ((1)) __wur;
+extern int mkstemp64 (char *__template) __nonnull ((1));
 # endif
 #endif
 
@@ -724,7 +718,7 @@ extern int mkstemp64 (char *__template) __nonnull ((1)) __wur;
    they are replaced with a string that makes the directory name unique.
    Returns TEMPLATE, or a null pointer if it cannot get a unique name.
    The directory is created mode 700.  */
-extern char *mkdtemp (char *__template) __THROW __nonnull ((1)) __wur;
+extern char *mkdtemp (char *__template) __THROW __nonnull ((1));
 #endif
 
 
@@ -733,7 +727,7 @@ __BEGIN_NAMESPACE_STD
 
    This function is a cancellation point and therefore not marked with
    __THROW.  */
-extern int system (__const char *__command) __wur;
+extern int system (__const char *__command);
 __END_NAMESPACE_STD
 
 
@@ -742,7 +736,7 @@ __END_NAMESPACE_STD
    named file.  The last file name component need not exist, and may be a
    symlink to a nonexistent file.  */
 extern char *canonicalize_file_name (__const char *__name)
-     __THROW __nonnull ((1)) __wur;
+     __THROW __nonnull ((1));
 #endif
 
 #if defined __USE_BSD || defined __USE_XOPEN_EXTENDED
@@ -753,7 +747,7 @@ extern char *canonicalize_file_name (__const char *__name)
    ENAMETOOLONG; if the name fits in fewer than PATH_MAX chars, returns the
    name in RESOLVED.  */
 extern char *realpath (__const char *__restrict __name,
-		       char *__restrict __resolved) __THROW __wur;
+		       char *__restrict __resolved) __THROW;
 #endif
 
 
@@ -772,7 +766,7 @@ __BEGIN_NAMESPACE_STD
    of SIZE bytes each, using COMPAR to perform the comparisons.  */
 extern void *bsearch (__const void *__key, __const void *__base,
 		      size_t __nmemb, size_t __size, __compar_fn_t __compar)
-     __nonnull ((1, 2, 5)) __wur;
+     __nonnull ((1, 2, 5));
 
 /* Sort NMEMB elements of BASE, of SIZE bytes each,
    using COMPAR to perform the comparisons.  */
@@ -781,13 +775,13 @@ extern void qsort (void *__base, size_t __nmemb, size_t __size,
 
 
 /* Return the absolute value of X.  */
-extern int abs (int __x) __THROW __attribute__ ((__const__)) __wur;
-extern long int labs (long int __x) __THROW __attribute__ ((__const__)) __wur;
+extern int abs (int __x) __THROW __attribute__ ((__const__));
+extern long int labs (long int __x) __THROW __attribute__ ((__const__));
 __END_NAMESPACE_STD
 
 #ifdef __USE_ISOC99
 __extension__ extern long long int llabs (long long int __x)
-     __THROW __attribute__ ((__const__)) __wur;
+     __THROW __attribute__ ((__const__));
 #endif
 
 
@@ -796,16 +790,16 @@ __BEGIN_NAMESPACE_STD
    of the value of NUMER over DENOM. */
 /* GCC may have built-ins for these someday.  */
 extern div_t div (int __numer, int __denom)
-     __THROW __attribute__ ((__const__)) __wur;
+     __THROW __attribute__ ((__const__));
 extern ldiv_t ldiv (long int __numer, long int __denom)
-     __THROW __attribute__ ((__const__)) __wur;
+     __THROW __attribute__ ((__const__));
 __END_NAMESPACE_STD
 
 #ifdef __USE_ISOC99
 __BEGIN_NAMESPACE_C99
 __extension__ extern lldiv_t lldiv (long long int __numer,
 				    long long int __denom)
-     __THROW __attribute__ ((__const__)) __wur;
+     __THROW __attribute__ ((__const__));
 __END_NAMESPACE_C99
 #endif
 
@@ -818,31 +812,31 @@ __END_NAMESPACE_C99
    this.  Set *DECPT with the position of the decimal character and *SIGN
    with the sign of the number.  */
 extern char *ecvt (double __value, int __ndigit, int *__restrict __decpt,
-		   int *__restrict __sign) __THROW __nonnull ((3, 4)) __wur;
+		   int *__restrict __sign) __THROW __nonnull ((3, 4));
 
 /* Convert VALUE to a string rounded to NDIGIT decimal digits.  Set *DECPT
    with the position of the decimal character and *SIGN with the sign of
    the number.  */
 extern char *fcvt (double __value, int __ndigit, int *__restrict __decpt,
-		   int *__restrict __sign) __THROW __nonnull ((3, 4)) __wur;
+		   int *__restrict __sign) __THROW __nonnull ((3, 4));
 
 /* If possible convert VALUE to a string with NDIGIT significant digits.
    Otherwise use exponential representation.  The resulting string will
    be written to BUF.  */
 extern char *gcvt (double __value, int __ndigit, char *__buf)
-     __THROW __nonnull ((3)) __wur;
+     __THROW __nonnull ((3));
 
 
 # ifdef __USE_MISC
 /* Long double versions of above functions.  */
 extern char *qecvt (long double __value, int __ndigit,
 		    int *__restrict __decpt, int *__restrict __sign)
-     __THROW __nonnull ((3, 4)) __wur;
+     __THROW __nonnull ((3, 4));
 extern char *qfcvt (long double __value, int __ndigit,
 		    int *__restrict __decpt, int *__restrict __sign)
-     __THROW __nonnull ((3, 4)) __wur;
+     __THROW __nonnull ((3, 4));
 extern char *qgcvt (long double __value, int __ndigit, char *__buf)
-     __THROW __nonnull ((3)) __wur;
+     __THROW __nonnull ((3));
 
 
 /* Reentrant version of the functions above which provide their own
@@ -869,14 +863,14 @@ extern int qfcvt_r (long double __value, int __ndigit,
 __BEGIN_NAMESPACE_STD
 /* Return the length of the multibyte character
    in S, which is no longer than N.  */
-extern int mblen (__const char *__s, size_t __n) __THROW __wur;
+extern int mblen (__const char *__s, size_t __n) __THROW;
 /* Return the length of the given multibyte character,
    putting its `wchar_t' representation in *PWC.  */
 extern int mbtowc (wchar_t *__restrict __pwc,
-		   __const char *__restrict __s, size_t __n) __THROW __wur;
+		   __const char *__restrict __s, size_t __n) __THROW;
 /* Put the multibyte character represented
    by WCHAR in S, returning its length.  */
-extern int wctomb (char *__s, wchar_t __wchar) __THROW __wur;
+extern int wctomb (char *__s, wchar_t __wchar) __THROW;
 
 
 /* Convert a multibyte string to a wide char string.  */
@@ -894,7 +888,7 @@ __END_NAMESPACE_STD
    or negative response expression as specified by the LC_MESSAGES category
    in the program's current locale.  Returns 1 if affirmative, 0 if
    negative, and -1 if not matching.  */
-extern int rpmatch (__const char *__response) __THROW __nonnull ((1)) __wur;
+extern int rpmatch (__const char *__response) __THROW __nonnull ((1));
 #endif
 
 
@@ -908,7 +902,7 @@ extern int rpmatch (__const char *__response) __THROW __nonnull ((1)) __wur;
 extern int getsubopt (char **__restrict __optionp,
 		      char *__const *__restrict __tokens,
 		      char **__restrict __valuep)
-     __THROW __nonnull ((1, 2, 3)) __wur;
+     __THROW __nonnull ((1, 2, 3));
 #endif
 
 
@@ -922,7 +916,7 @@ extern void setkey (__const char *__key) __THROW __nonnull ((1));
 
 #ifdef __USE_XOPEN2K
 /* Return a master pseudo-terminal handle.  */
-extern int posix_openpt (int __oflag) __wur;
+extern int posix_openpt (int __oflag);
 #endif
 
 #ifdef __USE_XOPEN
@@ -939,7 +933,7 @@ extern int unlockpt (int __fd) __THROW;
 /* Return the pathname of the pseudo terminal slave assoicated with
    the master FD is open on, or NULL on errors.
    The returned storage is good until the next call to this function.  */
-extern char *ptsname (int __fd) __THROW __wur;
+extern char *ptsname (int __fd) __THROW;
 #endif
 
 #ifdef __USE_GNU
diff --git a/stdlib/tst-fmtmsg.c b/stdlib/tst-fmtmsg.c
index c3748d64d5..d5369bda62 100644
--- a/stdlib/tst-fmtmsg.c
+++ b/stdlib/tst-fmtmsg.c
@@ -1,8 +1,6 @@
 #include <fmtmsg.h>
 #include <mcheck.h>
 #include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
 
 
 #define MM_TEST 10
@@ -14,13 +12,11 @@ main (void)
 
   mtrace ();
 
-  char TEST[] = "ABCD";
-  if (addseverity (MM_TEST, TEST) != MM_OK)
+  if (addseverity (MM_TEST, "TEST") != MM_OK)
     {
       puts ("addseverity failed");
       result = 1;
     }
-  strcpy (TEST, "TEST");
 
   if (fmtmsg (MM_PRINT, "GLIBC:tst-fmtmsg", MM_HALT, "halt",
 	      "should print message for MM_HALT", "GLIBC:tst-fmtmsg:1")
@@ -52,31 +48,5 @@ main (void)
       != MM_OK)
     result = 1;
 
-  if (addseverity (MM_TEST, NULL) != MM_OK)
-    {
-      puts ("second addseverity failed");
-      result = 1;
-    }
-
-  if (addseverity (MM_TEST, NULL) != MM_NOTOK)
-    {
-      puts ("third addseverity unexpectedly succeeded");
-      result = 1;
-    }
-
-  char *p = strdup ("TEST2");
-  if (addseverity (MM_TEST, p) != MM_OK)
-    {
-      puts ("fourth addseverity failed");
-      result = 1;
-    }
-  if (addseverity (MM_TEST, "TEST3") != MM_OK)
-    {
-      puts ("fifth addseverity failed");
-      result = 1;
-    }
-
-  free (p);
-
   return result;
 }
diff --git a/sunrpc/openchild.c b/sunrpc/openchild.c
index 29ddfa6b75..16f6a34e7a 100644
--- a/sunrpc/openchild.c
+++ b/sunrpc/openchild.c
@@ -81,7 +81,7 @@ _openchild (const char *command, FILE ** fto, FILE ** ffrom)
       for (i = _rpc_dtablesize () - 1; i >= 3; i--)
 	__close (i);
       fflush (stderr);
-      execlp (command, command, NULL);
+      execlp (command, command, 0);
       perror ("exec");
       _exit (~0);
 
diff --git a/sunrpc/rpc_main.c b/sunrpc/rpc_main.c
index acc0132603..fee83514d1 100644
--- a/sunrpc/rpc_main.c
+++ b/sunrpc/rpc_main.c
@@ -695,9 +695,11 @@ s_output (int argc, const char *argv[], const char *infile, const char *define,
 
   fprintf (fout, "#include <stdio.h>\n");
   fprintf (fout, "#include <stdlib.h>\n");
-  fprintf (fout, "#include <rpc/pmap_clnt.h>\n");
   if (Cflag)
-    fprintf (fout, "#include <string.h>\n");
+    {
+      fprintf (fout, "#include <rpc/pmap_clnt.h>\n");
+      fprintf (fout, "#include <string.h>\n");
+    }
   if (strcmp (svcclosetime, "-1") == 0)
     indefinitewait = 1;
   else if (strcmp (svcclosetime, "0") == 0)
diff --git a/sunrpc/svc_tcp.c b/sunrpc/svc_tcp.c
index 873d39d52a..4decfa4fd0 100644
--- a/sunrpc/svc_tcp.c
+++ b/sunrpc/svc_tcp.c
@@ -165,7 +165,7 @@ svctcp_create (int sock, u_int sendsize, u_int recvsize)
       (void) __bind (sock, (struct sockaddr *) &addr, len);
     }
   if ((__getsockname (sock, (struct sockaddr *) &addr, &len) != 0) ||
-      (__listen (sock, SOMAXCONN) != 0))
+      (__listen (sock, 2) != 0))
     {
       perror (_("svc_tcp.c - cannot getsockname or listen"));
       if (madesock)
diff --git a/sunrpc/svc_unix.c b/sunrpc/svc_unix.c
index d95e884434..cfbc63866b 100644
--- a/sunrpc/svc_unix.c
+++ b/sunrpc/svc_unix.c
@@ -161,7 +161,7 @@ svcunix_create (int sock, u_int sendsize, u_int recvsize, char *path)
   __bind (sock, (struct sockaddr *) &addr, len);
 
   if (__getsockname (sock, (struct sockaddr *) &addr, &len) != 0
-      || __listen (sock, SOMAXCONN) != 0)
+      || __listen (sock, 2) != 0)
     {
       perror (_("svc_unix.c - cannot getsockname or listen"));
       if (madesock)
diff --git a/sysdeps/alpha/bits/link.h b/sysdeps/alpha/bits/link.h
deleted file mode 100644
index 429faff432..0000000000
--- a/sysdeps/alpha/bits/link.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/* Copyright (C) 2005 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef	_LINK_H
-# error "Never include <bits/link.h> directly; use <link.h> instead."
-#endif
-
-
-/* Registers for entry into PLT on Alpha.  */
-typedef struct La_alpha_regs
-{
-  uint64_t lr_r26;
-  uint64_t lr_sp;
-  uint64_t lr_r16;
-  uint64_t lr_r17;
-  uint64_t lr_r18;
-  uint64_t lr_r19;
-  uint64_t lr_r20;
-  uint64_t lr_r21;
-  double lr_f16;
-  double lr_f17;
-  double lr_f18;
-  double lr_f19;
-  double lr_f20;
-  double lr_f21;
-} La_alpha_regs;
-
-/* Return values for calls from PLT on Alpha.  */
-typedef struct La_alpha_retval
-{
-  uint64_t lrv_r0;
-  uint64_t lrv_r1;
-  double lrv_f0;
-  double lrv_f1;
-} La_alpha_retval;
-
-
-__BEGIN_DECLS
-
-extern Elf64_Addr la_alpha_gnu_pltenter (Elf64_Sym *__sym, unsigned int __ndx,
-				         uintptr_t *__refcook,
-				         uintptr_t *__defcook,
-				         La_alpha_regs *__regs,
-				         unsigned int *__flags,
-				         const char *__symname,
-				         long int *__framesizep);
-extern unsigned int la_alpha_gnu_pltexit (Elf64_Sym *__sym, unsigned int __ndx,
-					  uintptr_t *__refcook,
-					  uintptr_t *__defcook,
-					  const La_alpha_regs *__inregs,
-					  La_alpha_retval *__outregs,
-					  const char *symname);
-
-__END_DECLS
diff --git a/sysdeps/alpha/dl-machine.h b/sysdeps/alpha/dl-machine.h
index 4166e8c498..780a3a57fd 100644
--- a/sysdeps/alpha/dl-machine.h
+++ b/sysdeps/alpha/dl-machine.h
@@ -108,20 +108,19 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
       plt = D_PTR (l, l_info[DT_PLTGOT]);
 
       /* This function will be called to perform the relocation.  */
-      if (__builtin_expect (profile, 0))
+      if (!profile)
+        *(Elf64_Addr *)(plt + 16) = (Elf64_Addr) &_dl_runtime_resolve;
+      else
 	{
 	  *(Elf64_Addr *)(plt + 16) = (Elf64_Addr) &_dl_runtime_profile;
 
-	  if (GLRO(dl_profile) != NULL
-	      && _dl_name_match_p (GLRO(dl_profile), l))
+	  if (_dl_name_match_p (GLRO(dl_profile), l))
 	    {
 	      /* This is the object we are looking for.  Say that we really
 		 want profiling and the timers are started.  */
 	      GL(dl_profile_map) = l;
 	    }
 	}
-      else
-        *(Elf64_Addr *)(plt + 16) = (Elf64_Addr) &_dl_runtime_resolve;
 
       /* Identify this shared object */
       *(Elf64_Addr *)(plt + 24) = (Elf64_Addr) l;
@@ -157,6 +156,143 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
   return lazy;
 }
 
+/* This code is used in dl-runtime.c to call the `fixup' function
+   and then redirect to the address it returns.  */
+#define TRAMPOLINE_TEMPLATE(tramp_name, fixup_name, IMB)	\
+  extern void tramp_name (void);				\
+  asm ( "\
+	.globl " #tramp_name "					\n\
+	.ent " #tramp_name "					\n\
+" #tramp_name ":						\n\
+	lda	$sp, -44*8($sp)					\n\
+	.frame	$sp, 44*8, $26					\n\
+	/* Preserve all integer registers that C normally	\n\
+	   doesn't.  */						\n\
+	stq	$26, 0*8($sp)					\n\
+	stq	$0, 1*8($sp)					\n\
+	stq	$1, 2*8($sp)					\n\
+	stq	$2, 3*8($sp)					\n\
+	stq	$3, 4*8($sp)					\n\
+	stq	$4, 5*8($sp)					\n\
+	stq	$5, 6*8($sp)					\n\
+	stq	$6, 7*8($sp)					\n\
+	stq	$7, 8*8($sp)					\n\
+	stq	$8, 9*8($sp)					\n\
+	stq	$16, 10*8($sp)					\n\
+	stq	$17, 11*8($sp)					\n\
+	stq	$18, 12*8($sp)					\n\
+	stq	$19, 13*8($sp)					\n\
+	stq	$20, 14*8($sp)					\n\
+	stq	$21, 15*8($sp)					\n\
+	stq	$22, 16*8($sp)					\n\
+	stq	$23, 17*8($sp)					\n\
+	stq	$24, 18*8($sp)					\n\
+	stq	$25, 19*8($sp)					\n\
+	stq	$29, 20*8($sp)					\n\
+	stt	$f0, 21*8($sp)					\n\
+	stt	$f1, 22*8($sp)					\n\
+	stt	$f10, 23*8($sp)					\n\
+	stt	$f11, 24*8($sp)					\n\
+	stt	$f12, 25*8($sp)					\n\
+	stt	$f13, 26*8($sp)					\n\
+	stt	$f14, 27*8($sp)					\n\
+	stt	$f15, 28*8($sp)					\n\
+	stt	$f16, 29*8($sp)					\n\
+	stt	$f17, 30*8($sp)					\n\
+	stt	$f18, 31*8($sp)					\n\
+	stt	$f19, 32*8($sp)					\n\
+	stt	$f20, 33*8($sp)					\n\
+	stt	$f21, 34*8($sp)					\n\
+	stt	$f22, 35*8($sp)					\n\
+	stt	$f23, 36*8($sp)					\n\
+	stt	$f24, 37*8($sp)					\n\
+	stt	$f25, 38*8($sp)					\n\
+	stt	$f26, 39*8($sp)					\n\
+	stt	$f27, 40*8($sp)					\n\
+	stt	$f28, 41*8($sp)					\n\
+	stt	$f29, 42*8($sp)					\n\
+	stt	$f30, 43*8($sp)					\n\
+	.mask	0x27ff01ff, -44*8				\n\
+	.fmask	0xfffffc03, -(44-21)*8				\n\
+	/* Set up our $gp */					\n\
+	br	$gp, .+4					\n\
+	ldgp	$gp, 0($gp)					\n\
+	.prologue 0						\n\
+	/* Set up the arguments for fixup: */			\n\
+	/* $16 = link_map out of plt0 */			\n\
+	/* $17 = offset of reloc entry = ($28 - $27 - 20) /12 * 24 */\n\
+	/* $18 = return address */				\n\
+	subq	$28, $27, $17					\n\
+	ldq	$16, 8($27)					\n\
+	subq	$17, 20, $17					\n\
+	mov	$26, $18					\n\
+	addq	$17, $17, $17					\n\
+	/* Do the fixup */					\n\
+	bsr	$26, " #fixup_name "	!samegp			\n\
+	/* Move the destination address into position.  */	\n\
+	mov	$0, $27						\n\
+	/* Restore program registers.  */			\n\
+	ldq	$26, 0*8($sp)					\n\
+	ldq	$0, 1*8($sp)					\n\
+	ldq	$1, 2*8($sp)					\n\
+	ldq	$2, 3*8($sp)					\n\
+	ldq	$3, 4*8($sp)					\n\
+	ldq	$4, 5*8($sp)					\n\
+	ldq	$5, 6*8($sp)					\n\
+	ldq	$6, 7*8($sp)					\n\
+	ldq	$7, 8*8($sp)					\n\
+	ldq	$8, 9*8($sp)					\n\
+	ldq	$16, 10*8($sp)					\n\
+	ldq	$17, 11*8($sp)					\n\
+	ldq	$18, 12*8($sp)					\n\
+	ldq	$19, 13*8($sp)					\n\
+	ldq	$20, 14*8($sp)					\n\
+	ldq	$21, 15*8($sp)					\n\
+	ldq	$22, 16*8($sp)					\n\
+	ldq	$23, 17*8($sp)					\n\
+	ldq	$24, 18*8($sp)					\n\
+	ldq	$25, 19*8($sp)					\n\
+	ldq	$29, 20*8($sp)					\n\
+	ldt	$f0, 21*8($sp)					\n\
+	ldt	$f1, 22*8($sp)					\n\
+	ldt	$f10, 23*8($sp)					\n\
+	ldt	$f11, 24*8($sp)					\n\
+	ldt	$f12, 25*8($sp)					\n\
+	ldt	$f13, 26*8($sp)					\n\
+	ldt	$f14, 27*8($sp)					\n\
+	ldt	$f15, 28*8($sp)					\n\
+	ldt	$f16, 29*8($sp)					\n\
+	ldt	$f17, 30*8($sp)					\n\
+	ldt	$f18, 31*8($sp)					\n\
+	ldt	$f19, 32*8($sp)					\n\
+	ldt	$f20, 33*8($sp)					\n\
+	ldt	$f21, 34*8($sp)					\n\
+	ldt	$f22, 35*8($sp)					\n\
+	ldt	$f23, 36*8($sp)					\n\
+	ldt	$f24, 37*8($sp)					\n\
+	ldt	$f25, 38*8($sp)					\n\
+	ldt	$f26, 39*8($sp)					\n\
+	ldt	$f27, 40*8($sp)					\n\
+	ldt	$f28, 41*8($sp)					\n\
+	ldt	$f29, 42*8($sp)					\n\
+	ldt	$f30, 43*8($sp)					\n\
+	/* Flush the Icache after having modified the .plt code.  */\n\
+	" #IMB "						\n\
+	/* Clean up and turn control to the destination */	\n\
+	lda	$sp, 44*8($sp)					\n\
+	jmp	$31, ($27)					\n\
+	.end " #tramp_name)
+
+#ifndef PROF
+#define ELF_MACHINE_RUNTIME_TRAMPOLINE				\
+  TRAMPOLINE_TEMPLATE (_dl_runtime_resolve, fixup, imb);	\
+  TRAMPOLINE_TEMPLATE (_dl_runtime_profile, profile_fixup, /* nop */);
+#else
+#define ELF_MACHINE_RUNTIME_TRAMPOLINE				\
+  TRAMPOLINE_TEMPLATE (_dl_runtime_resolve, fixup, imb);	\
+  strong_alias (_dl_runtime_resolve, _dl_runtime_profile);
+#endif
+
 /* Initial entry point code for the dynamic linker.
    The C function `_dl_start' is the real entry point;
    its return value is the user program's entry point.  */
@@ -365,13 +501,9 @@ elf_machine_plt_value (struct link_map *map, const Elf64_Rela *reloc,
   return value + reloc->r_addend;
 }
 
-/* Names of the architecture-specific auditing callback functions.  */
-#define ARCH_LA_PLTENTER	alpha_gnu_pltenter
-#define ARCH_LA_PLTEXIT		alpha_gnu_pltexit
-
 #endif /* !dl_machine_h */
 
-#ifdef RESOLVE_MAP
+#ifdef RESOLVE
 
 /* Perform the relocation specified by RELOC and SYM (which is fully resolved).
    MAP is the object containing the reloc.  */
@@ -423,16 +555,26 @@ elf_machine_rela (struct link_map *map,
       return;
   else
     {
-      struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type);
       Elf64_Addr sym_value;
       Elf64_Addr sym_raw_value;
 
+#if defined USE_TLS && !defined RTLD_BOOTSTRAP
+      struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type);
       sym_raw_value = sym_value = reloc->r_addend;
-      if (sym_map)
+      if (sym)
 	{
 	  sym_raw_value += sym->st_value;
 	  sym_value = sym_raw_value + sym_map->l_addr;
 	}
+#else
+      Elf64_Addr loadbase = RESOLVE (&sym, version, r_type);
+      sym_raw_value = sym_value = reloc->r_addend;
+      if (sym)
+	{
+	  sym_raw_value += sym->st_value;
+	  sym_value = sym_raw_value + loadbase;
+	}
+#endif
 
       if (r_type == R_ALPHA_GLOB_DAT)
 	*reloc_addr = sym_value;
@@ -538,4 +680,4 @@ elf_machine_lazy_rel (struct link_map *map,
     _dl_reloc_bad_type (map, r_type, 1);
 }
 
-#endif /* RESOLVE_MAP */
+#endif /* RESOLVE */
diff --git a/sysdeps/alpha/dl-trampoline.S b/sysdeps/alpha/dl-trampoline.S
deleted file mode 100644
index 42350836ef..0000000000
--- a/sysdeps/alpha/dl-trampoline.S
+++ /dev/null
@@ -1,361 +0,0 @@
-/* PLT trampolines.  Alpha version.
-   Copyright (C) 2005 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#include <sysdep.h>
-
-	.set noat
-
-	.globl	_dl_runtime_resolve
-	.ent	_dl_runtime_resolve
-
-#undef FRAMESIZE
-#define FRAMESIZE	44*8
-
-_dl_runtime_resolve:
-	lda	$30, -FRAMESIZE($30)
-	.frame	$30, FRAMESIZE, $26
-	/* Preserve all registers that C normally doesn't.  */
-	stq	$26, 0*8($30)
-	stq	$0, 1*8($30)
-	stq	$1, 2*8($30)
-	stq	$2, 3*8($30)
-	stq	$3, 4*8($30)
-	stq	$4, 5*8($30)
-	stq	$5, 6*8($30)
-	stq	$6, 7*8($30)
-	stq	$7, 8*8($30)
-	stq	$8, 9*8($30)
-	stq	$16, 10*8($30)
-	stq	$17, 11*8($30)
-	stq	$18, 12*8($30)
-	stq	$19, 13*8($30)
-	stq	$20, 14*8($30)
-	stq	$21, 15*8($30)
-	stq	$22, 16*8($30)
-	stq	$23, 17*8($30)
-	stq	$24, 18*8($30)
-	stq	$25, 19*8($30)
-	stq	$29, 20*8($30)
-	stt	$f0, 21*8($30)
-	stt	$f1, 22*8($30)
-	stt	$f10, 23*8($30)
-	stt	$f11, 24*8($30)
-	stt	$f12, 25*8($30)
-	stt	$f13, 26*8($30)
-	stt	$f14, 27*8($30)
-	stt	$f15, 28*8($30)
-	stt	$f16, 29*8($30)
-	stt	$f17, 30*8($30)
-	stt	$f18, 31*8($30)
-	stt	$f19, 32*8($30)
-	stt	$f20, 33*8($30)
-	stt	$f21, 34*8($30)
-	stt	$f22, 35*8($30)
-	stt	$f23, 36*8($30)
-	stt	$f24, 37*8($30)
-	stt	$f25, 38*8($30)
-	stt	$f26, 39*8($30)
-	stt	$f27, 40*8($30)
-	stt	$f28, 41*8($30)
-	stt	$f29, 42*8($30)
-	stt	$f30, 43*8($30)
-	.mask	0x27ff01ff, -FRAMESIZE
-	.fmask	0xfffffc03, -FRAMESIZE+21*8
-	/* Set up our GP.  */
-	br	$29, .+4
-	ldgp	$29, 0($29)
-	.prologue 0
-	/* Set up the arguments for _dl_fixup:
-	   $16 = link_map out of plt0
-	   $17 = offset of reloc entry = ($28 - $27 - 20) /12 * 24
-	   $18 = return address
-	*/
-	subq	$28, $27, $17
-	ldq	$16, 8($27)
-	subq	$17, 20, $17
-	mov	$26, $18
-	addq	$17, $17, $17
-	bsr	$26, _dl_fixup	!samegp
-
-	/* Move the destination address into position.  */
-	mov	$0, $27
-	/* Restore program registers.  */
-	ldq	$26, 0*8($30)
-	ldq	$0, 1*8($30)
-	ldq	$1, 2*8($30)
-	ldq	$2, 3*8($30)
-	ldq	$3, 4*8($30)
-	ldq	$4, 5*8($30)
-	ldq	$5, 6*8($30)
-	ldq	$6, 7*8($30)
-	ldq	$7, 8*8($30)
-	ldq	$8, 9*8($30)
-	ldq	$16, 10*8($30)
-	ldq	$17, 11*8($30)
-	ldq	$18, 12*8($30)
-	ldq	$19, 13*8($30)
-	ldq	$20, 14*8($30)
-	ldq	$21, 15*8($30)
-	ldq	$22, 16*8($30)
-	ldq	$23, 17*8($30)
-	ldq	$24, 18*8($30)
-	ldq	$25, 19*8($30)
-	ldq	$29, 20*8($30)
-	ldt	$f0, 21*8($30)
-	ldt	$f1, 22*8($30)
-	ldt	$f10, 23*8($30)
-	ldt	$f11, 24*8($30)
-	ldt	$f12, 25*8($30)
-	ldt	$f13, 26*8($30)
-	ldt	$f14, 27*8($30)
-	ldt	$f15, 28*8($30)
-	ldt	$f16, 29*8($30)
-	ldt	$f17, 30*8($30)
-	ldt	$f18, 31*8($30)
-	ldt	$f19, 32*8($30)
-	ldt	$f20, 33*8($30)
-	ldt	$f21, 34*8($30)
-	ldt	$f22, 35*8($30)
-	ldt	$f23, 36*8($30)
-	ldt	$f24, 37*8($30)
-	ldt	$f25, 38*8($30)
-	ldt	$f26, 39*8($30)
-	ldt	$f27, 40*8($30)
-	ldt	$f28, 41*8($30)
-	ldt	$f29, 42*8($30)
-	ldt	$f30, 43*8($30)
-	/* Flush the Icache after having modified the .plt code.  */
-	imb
-	/* Clean up and turn control to the destination */
-	lda	$30, FRAMESIZE($30)
-	jmp	$31, ($27)
-
-	.end	_dl_runtime_resolve
-
-	.globl	_dl_runtime_profile
-	.usepv	_dl_runtime_profile, no
-	.type	_dl_runtime_profile, @function
-
-	/* We save the registers in a different order than desired by
-	   .mask/.fmask, so we have to use explicit cfi directives.  */
-	cfi_startproc
-
-.macro savei regno, offset
-	stq	$\regno, \offset($30)
-	cfi_rel_offset(\regno, \offset)
-.endm
-
-.macro savef regno, offset
-	stt	$f\regno, \offset($30)
-	cfi_rel_offset(\regno+32, \offset)
-.endm
-
-#undef FRAMESIZE
-#define FRAMESIZE	50*8
-
-_dl_runtime_profile:
-	lda	$30, -FRAMESIZE($30)
-	cfi_adjust_cfa_offset (FRAMESIZE)
-
-	/* Preserve all argument registers.  This also constructs the
-	   La_alpha_regs structure.  */
-	savei	26, 0*8
-	savei	16, 2*8
-	savei	17, 3*8
-	savei	18, 4*8
-	savei	19, 5*8
-	savei	20, 6*8
-	savei	21, 7*8
-	lda	$16, FRAMESIZE($30)
-	savef	16, 8*8
-	savef	17, 9*8
-	savef	18, 10*8
-	savef	19, 11*8
-	savef	20, 12*8
-	savef	21, 13*8
-	stq	$16, 1*8($30)
-
-	/* Preserve all registers that C normally doesn't.  */
-	savei	0, 14*8
-	savei	1, 15*8
-	savei	2, 16*8
-	savei	3, 17*8
-	savei	4, 18*8
-	savei	5, 19*8
-	savei	6, 20*8
-	savei	7, 21*8
-	savei	8, 22*8
-	savei	22, 23*8
-	savei	23, 24*8
-	savei	24, 25*8
-	savei	25, 26*8
-	savei	29, 27*8
-	savef	0, 28*8
-	savef	1, 29*8
-	savef	10, 30*8
-	savef	11, 31*8
-	savef	12, 32*8
-	savef	13, 33*8
-	savef	14, 34*8
-	savef	15, 35*8
-	savef	22, 36*8
-	savef	23, 37*8
-	savef	24, 38*8
-	savef	25, 39*8
-	savef	26, 40*8
-	savef	27, 41*8
-	savef	28, 42*8
-	savef	29, 43*8
-	savef	30, 44*8
-
-	/* Set up our GP.  */
-	br	$29, .+4
-	ldgp	$29, 0($29)
-
-	/* Set up the arguments for _dl_profile_fixup:
-	   $16 = link_map out of plt0
-	   $17 = offset of reloc entry = ($28 - $27 - 20) /12 * 24
-	   $18 = return address
-	   $19 = La_alpha_regs address
-	   $20 = framesize address
-	*/
-	subq	$28, $27, $17
-	ldq	$16, 8($27)
-	subq	$17, 20, $17
-	mov	$26, $18
-	addq	$17, $17, $17
-	lda	$19, 0($30)
-	lda	$20, 45*8($30)
-	stq	$16, 48*8($30)
-	stq	$17, 49*8($30)
-
-	bsr	$26, _dl_profile_fixup	!samegp
-
-	/* Discover if we're wrapping this call.  */
-	ldq	$18, 45*8($30)
-	bge	$18, 1f
-
-	/* Move the destination address into position.  */
-	mov	$0, $27
-	/* Restore program registers.  */
-	ldq	$26, 0*8($30)
-	ldq	$16, 2*8($30)
-	ldq	$17, 3*8($30)
-	ldq	$18, 4*8($30)
-	ldq	$19, 5*8($30)
-	ldq	$20, 6*8($30)
-	ldq	$21, 7*8($30)
-	ldt	$f16, 8*8($30)
-	ldt	$f17, 9*8($30)
-	ldt	$f18, 10*8($30)
-	ldt	$f19, 11*8($30)
-	ldt	$f20, 12*8($30)
-	ldt	$f21, 13*8($30)
-	ldq	$0, 14*8($30)
-	ldq	$1, 15*8($30)
-	ldq	$2, 16*8($30)
-	ldq	$3, 17*8($30)
-	ldq	$4, 18*8($30)
-	ldq	$5, 19*8($30)
-	ldq	$6, 20*8($30)
-	ldq	$7, 21*8($30)
-	ldq	$8, 22*8($30)
-	ldq	$22, 23*8($30)
-	ldq	$23, 24*8($30)
-	ldq	$24, 25*8($30)
-	ldq	$25, 26*8($30)
-	ldq	$29, 27*8($30)
-	ldt	$f0, 28*8($30)
-	ldt	$f1, 29*8($30)
-	ldt	$f10, 30*8($30)
-	ldt	$f11, 31*8($30)
-	ldt	$f12, 32*8($30)
-	ldt	$f13, 33*8($30)
-	ldt	$f14, 34*8($30)
-	ldt	$f15, 35*8($30)
-	ldt	$f22, 36*8($30)
-	ldt	$f23, 37*8($30)
-	ldt	$f24, 38*8($30)
-	ldt	$f25, 39*8($30)
-	ldt	$f26, 40*8($30)
-	ldt	$f27, 41*8($30)
-	ldt	$f28, 42*8($30)
-	ldt	$f29, 43*8($30)
-	ldt	$f30, 44*8($30)
-
-	/* Clean up and turn control to the destination.  */
-	lda	$30, FRAMESIZE($30)
-	jmp	$31, ($27)
-
-1:
-	/* Create a frame pointer and allocate a new argument frame.  */
-	savei	15, 45*8
-	mov	$30, $15
-	cfi_def_cfa_register (15)
-	addq	$18, 15, $18
-	bic	$18, 15, $18
-	subq	$30, $18, $30
-
-	/* Save the call destination around memcpy.  */
-	stq	$0, 46*8($30)
-
-	/* Copy the stack arguments into place.  */
-	lda	$16, 0($30)
-	lda	$17, FRAMESIZE($15)
-	jsr	$26, memcpy
-	ldgp	$29, 0($26)
-
-	/* Reload the argument registers.  */
-	ldq	$27, 46*8($30)
-	ldq	$16, 2*8($15)
-	ldq	$17, 3*8($15)
-	ldq	$18, 4*8($15)
-	ldq	$19, 5*8($15)
-	ldq	$20, 6*8($15)
-	ldq	$21, 7*8($15)
-	ldt	$f16, 8*8($15)
-	ldt	$f17, 9*8($15)
-	ldt	$f18, 10*8($15)
-	ldt	$f19, 11*8($15)
-	ldt	$f20, 12*8($15)
-	ldt	$f21, 13*8($15)
-
-	jsr	$26, ($27), 0
-	ldgp	$29, 0($26)
-
-	/* Set up for call to _dl_call_pltexit.  */
-	ldq	$16, 48($15)
-	ldq	$17, 49($15)
-	stq	$0, 46*8($15)
-	lda	$18, 0($15)
-	stq	$1, 47*8($15)
-	lda	$19, 46*8($15)
-	stt	$f0, 48*8($15)
-	stt	$f1, 49*8($15)
-	bsr	$26, _dl_call_pltexit	!samegp
-
-	mov	$15, $30
-	cfi_def_cfa_register (30)
-	ldq	$26, 0($30)
-	ldq	$15, 45*8($30)
-	lda	$30, FRAMESIZE($30)
-	ret
-
-	cfi_endproc
-	.size	_dl_runtime_profile, .-_dl_runtime_profile
diff --git a/sysdeps/alpha/fpu/bits/mathinline.h b/sysdeps/alpha/fpu/bits/mathinline.h
index 87d40058c3..187bd42f33 100644
--- a/sysdeps/alpha/fpu/bits/mathinline.h
+++ b/sysdeps/alpha/fpu/bits/mathinline.h
@@ -46,8 +46,7 @@
 #if (!defined __NO_MATH_INLINES || defined __LIBC_INTERNAL_MATH_INLINES) \
     && defined __OPTIMIZE__
 
-#if !__GNUC_PREREQ (4, 0)
-# define __inline_copysign(NAME, TYPE)					\
+#define __inline_copysign(NAME, TYPE)					\
 __MATH_INLINE TYPE							\
 __NTH (NAME (TYPE __x, TYPE __y))					\
 {									\
@@ -61,11 +60,19 @@ __inline_copysign (copysignf, float)
 __inline_copysign (__copysign, double)
 __inline_copysign (copysign, double)
 
-# undef __inline_copysign
-#endif
+#undef __MATH_INLINE_copysign
 
 
-#if !__GNUC_PREREQ (2, 8)
+#if __GNUC_PREREQ (2, 8)
+__MATH_INLINE float
+__NTH (__fabsf (float __x)) { return __builtin_fabsf (__x); }
+__MATH_INLINE float
+__NTH (fabsf (float __x)) { return __builtin_fabsf (__x); }
+__MATH_INLINE double
+__NTH (__fabs (double __x)) { return __builtin_fabs (__x); }
+__MATH_INLINE double
+__NTH (fabs (double __x)) { return __builtin_fabs (__x); }
+#else
 # define __inline_fabs(NAME, TYPE)			\
 __MATH_INLINE TYPE					\
 __NTH (NAME (TYPE __x))					\
diff --git a/sysdeps/alpha/libc-tls.c b/sysdeps/alpha/libc-tls.c
index a3b68e928f..434d5d9313 100644
--- a/sysdeps/alpha/libc-tls.c
+++ b/sysdeps/alpha/libc-tls.c
@@ -31,7 +31,7 @@ void *
 __tls_get_addr (tls_index *ti)
 {
   dtv_t *dtv = THREAD_DTV ();
-  return (char *) dtv[1].pointer.val + ti->ti_offset;
+  return (char *) dtv[1].pointer + ti->ti_offset;
 }
 
 #endif
diff --git a/sysdeps/arm/bits/link.h b/sysdeps/arm/bits/link.h
index e69de29bb2..648976d7d2 100644
--- a/sysdeps/arm/bits/link.h
+++ b/sysdeps/arm/bits/link.h
@@ -0,0 +1,4 @@
+struct link_map_machine
+  {
+    Elf32_Addr plt; /* Address of .plt */
+  };
diff --git a/sysdeps/arm/bits/linkmap.h b/sysdeps/arm/bits/linkmap.h
deleted file mode 100644
index 648976d7d2..0000000000
--- a/sysdeps/arm/bits/linkmap.h
+++ /dev/null
@@ -1,4 +0,0 @@
-struct link_map_machine
-  {
-    Elf32_Addr plt; /* Address of .plt */
-  };
diff --git a/sysdeps/generic/bits/link.h b/sysdeps/generic/bits/link.h
index 6b4f811c25..470b4d3e5f 100644
--- a/sysdeps/generic/bits/link.h
+++ b/sysdeps/generic/bits/link.h
@@ -1 +1,4 @@
-#error "Architecture-specific definition needed."
+struct link_map_machine
+  {
+    /* empty by default */
+  };
diff --git a/sysdeps/generic/bits/linkmap.h b/sysdeps/generic/bits/linkmap.h
deleted file mode 100644
index 470b4d3e5f..0000000000
--- a/sysdeps/generic/bits/linkmap.h
+++ /dev/null
@@ -1,4 +0,0 @@
-struct link_map_machine
-  {
-    /* empty by default */
-  };
diff --git a/sysdeps/generic/dl-fptr.h b/sysdeps/generic/dl-fptr.h
index d47fb7b635..8156981e6e 100644
--- a/sysdeps/generic/dl-fptr.h
+++ b/sysdeps/generic/dl-fptr.h
@@ -36,8 +36,6 @@ struct fdesc_table
     struct fdesc fdesc[0];
   };
 
-struct link_map;
-
 extern ElfW(Addr) _dl_boot_fptr_table [];
 
 extern ElfW(Addr) _dl_make_fptr (struct link_map *, const ElfW(Sym) *,
diff --git a/sysdeps/generic/dl-lookupcfg.h b/sysdeps/generic/dl-lookupcfg.h
index 2b29989600..f48cb0a844 100644
--- a/sysdeps/generic/dl-lookupcfg.h
+++ b/sysdeps/generic/dl-lookupcfg.h
@@ -1,5 +1,5 @@
 /* Configuration of lookup functions.
-   Copyright (C) 2002, 2004 Free Software Foundation, Inc.
+   Copyright (C) 2002 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -17,13 +17,16 @@
    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307 USA.  */
 
-/* The type of the return value of fixup/profile_fixup.  */
-#define DL_FIXUP_VALUE_TYPE ElfW(Addr)
-/* Construct a value of type DL_FIXUP_VALUE_TYPE from a code address
-   and a link map.  */
-#define DL_FIXUP_MAKE_VALUE(map, addr) (addr)
-/* Extract the code address from a value of type DL_FIXUP_MAKE_VALUE.
- */
-#define DL_FIXUP_VALUE_CODE_ADDR(value) (value)
-#define DL_FIXUP_VALUE_ADDR(value) (value)
-#define DL_FIXUP_ADDR_VALUE(addr) (addr)
+/* Some platforms need more information from the symbol lookup function
+   than just the address.  But this is not generally the case.
+
+   However, because of how _dl_sym and _dl_tls_symaddr are written, every
+   platform needs it when we support TLS.  */
+
+#include <tls.h>		/* Defines USE_TLS (or doesn't).  */
+
+#ifdef USE_TLS
+# define DL_LOOKUP_RETURNS_MAP
+#else
+# undef DL_LOOKUP_RETURNS_MAP
+#endif
diff --git a/sysdeps/generic/dl-tls.c b/sysdeps/generic/dl-tls.c
index 099742ceff..3382e3493c 100644
--- a/sysdeps/generic/dl-tls.c
+++ b/sysdeps/generic/dl-tls.c
@@ -1,5 +1,5 @@
 /* Thread-local storage handling in the ELF dynamic linker.  Generic version.
-   Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+   Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -18,8 +18,6 @@
    02111-1307 USA.  */
 
 #include <assert.h>
-#include <errno.h>
-#include <libintl.h>
 #include <signal.h>
 #include <stdlib.h>
 #include <unistd.h>
@@ -67,10 +65,7 @@ _dl_next_tls_modid (void)
       /* Note that this branch will never be executed during program
 	 start since there are no gaps at that time.  Therefore it
 	 does not matter that the dl_tls_dtv_slotinfo is not allocated
-	 yet when the function is called for the first times.
-
-	 NB: the offset +1 is due to the fact that DTV[0] is used
-	 for something else.  */
+	 yet when the function is called for the first times.  */
       result = GL(dl_tls_static_nelem) + 1;
       /* If the following would not be true we mustn't have assumed
 	 there is a gap.  */
@@ -93,11 +88,11 @@ _dl_next_tls_modid (void)
 	}
       while ((runp = runp->next) != NULL);
 
-      if (result > GL(dl_tls_max_dtv_idx))
+      if (result >= GL(dl_tls_max_dtv_idx))
 	{
 	  /* The new index must indeed be exactly one higher than the
 	     previous high.  */
-	  assert (result == GL(dl_tls_max_dtv_idx) + 1);
+	  assert (result == GL(dl_tls_max_dtv_idx));
 
 	  /* There is no gap anymore.  */
 	  GL(dl_tls_dtv_gaps) = false;
@@ -121,9 +116,10 @@ void
 internal_function
 _dl_determine_tlsoffset (void)
 {
+  struct dtv_slotinfo *slotinfo;
   size_t max_align = TLS_TCB_ALIGN;
-  size_t freetop = 0;
-  size_t freebottom = 0;
+  size_t offset, freetop = 0, freebottom = 0;
+  size_t cnt;
 
   /* The first element of the dtv slot info list is allocated.  */
   assert (GL(dl_tls_dtv_slotinfo_list) != NULL);
@@ -131,7 +127,7 @@ _dl_determine_tlsoffset (void)
      dl_tls_dtv_slotinfo_list list.  */
   assert (GL(dl_tls_dtv_slotinfo_list)->next == NULL);
 
-  struct dtv_slotinfo *slotinfo = GL(dl_tls_dtv_slotinfo_list)->slotinfo;
+  slotinfo = GL(dl_tls_dtv_slotinfo_list)->slotinfo;
 
   /* Determining the offset of the various parts of the static TLS
      block has several dependencies.  In addition we have to work
@@ -163,9 +159,9 @@ _dl_determine_tlsoffset (void)
 
 # if TLS_TCB_AT_TP
   /* We simply start with zero.  */
-  size_t offset = 0;
+  offset = 0;
 
-  for (size_t cnt = 0; slotinfo[cnt].map != NULL; ++cnt)
+  for (cnt = 1; slotinfo[cnt].map != NULL; ++cnt)
     {
       assert (cnt < GL(dl_tls_dtv_slotinfo_list)->len);
 
@@ -210,9 +206,9 @@ _dl_determine_tlsoffset (void)
 			    + TLS_TCB_SIZE);
 # elif TLS_DTV_AT_TP
   /* The TLS blocks start right after the TCB.  */
-  size_t offset = TLS_TCB_SIZE;
+  offset = TLS_TCB_SIZE;
 
-  for (size_t cnt = 0; slotinfo[cnt].map != NULL; ++cnt)
+  for (cnt = 1; slotinfo[cnt].map != NULL; ++cnt)
     {
       assert (cnt < GL(dl_tls_dtv_slotinfo_list)->len);
 
@@ -229,8 +225,8 @@ _dl_determine_tlsoffset (void)
 	  if (off + slotinfo[cnt].map->l_tls_blocksize - firstbyte <= freetop)
 	    {
 	      slotinfo[cnt].map->l_tls_offset = off - firstbyte;
-	      freebottom = (off + slotinfo[cnt].map->l_tls_blocksize
-			    - firstbyte);
+	      freebottom = off + slotinfo[cnt].map->l_tls_blocksize
+			   - firstbyte;
 	      continue;
 	    }
 	}
@@ -361,14 +357,14 @@ _dl_allocate_tls_storage (void)
 
       /* Clear the TCB data structure.  We can't ask the caller (i.e.
 	 libpthread) to do it, because we will initialize the DTV et al.  */
-      memset (result, '\0', TLS_TCB_SIZE);
+      memset (result, 0, TLS_TCB_SIZE);
 # elif TLS_DTV_AT_TP
       result = (char *) result + size - GL(dl_tls_static_size);
 
       /* Clear the TCB data structure and TLS_PRE_TCB_SIZE bytes before it.
 	 We can't ask the caller (i.e. libpthread) to do it, because we will
 	 initialize the DTV et al.  */
-      memset ((char *) result - TLS_PRE_TCB_SIZE, '\0',
+      memset ((char *) result - TLS_PRE_TCB_SIZE, 0,
 	      TLS_PRE_TCB_SIZE + TLS_TCB_SIZE);
 # endif
 
@@ -392,11 +388,10 @@ _dl_allocate_tls_init (void *result)
   dtv_t *dtv = GET_DTV (result);
   struct dtv_slotinfo_list *listp;
   size_t total = 0;
-  size_t maxgen = 0;
 
-  /* We have to prepare the dtv for all currently loaded modules using
-     TLS.  For those which are dynamically loaded we add the values
-     indicating deferred allocation.  */
+  /* We have to look prepare the dtv for all currently loaded
+     modules using TLS.  For those which are dynamically loaded we
+     add the values indicating deferred allocation.  */
   listp = GL(dl_tls_dtv_slotinfo_list);
   while (1)
     {
@@ -416,16 +411,11 @@ _dl_allocate_tls_init (void *result)
 	    /* Unused entry.  */
 	    continue;
 
-	  /* Keep track of the maximum generation number.  This might
-	     not be the generation counter.  */
-	  maxgen = MAX (maxgen, listp->slotinfo[cnt].gen);
-
 	  if (map->l_tls_offset == NO_TLS_OFFSET)
 	    {
 	      /* For dynamically loaded modules we simply store
 		 the value indicating deferred allocation.  */
-	      dtv[map->l_tls_modid].pointer.val = TLS_DTV_UNALLOCATED;
-	      dtv[map->l_tls_modid].pointer.is_static = false;
+	      dtv[map->l_tls_modid].pointer = TLS_DTV_UNALLOCATED;
 	      continue;
 	    }
 
@@ -441,8 +431,7 @@ _dl_allocate_tls_init (void *result)
 # endif
 
 	  /* Copy the initialization image and clear the BSS part.  */
-	  dtv[map->l_tls_modid].pointer.val = dest;
-	  dtv[map->l_tls_modid].pointer.is_static = true;
+	  dtv[map->l_tls_modid].pointer = dest;
 	  memset (__mempcpy (dest, map->l_tls_initimage,
 			     map->l_tls_initimage_size), '\0',
 		  map->l_tls_blocksize - map->l_tls_initimage_size);
@@ -456,9 +445,6 @@ _dl_allocate_tls_init (void *result)
       assert (listp != NULL);
     }
 
-  /* The DTV version is up-to-date now.  */
-  dtv[0].counter = maxgen;
-
   return result;
 }
 rtld_hidden_def (_dl_allocate_tls_init)
@@ -480,12 +466,6 @@ _dl_deallocate_tls (void *tcb, bool dealloc_tcb)
 {
   dtv_t *dtv = GET_DTV (tcb);
 
-  /* We need to free the memory allocated for non-static TLS.  */
-  for (size_t cnt = 0; cnt < dtv[-1].counter; ++cnt)
-    if (! dtv[1 + cnt].pointer.is_static
-	&& dtv[1 + cnt].pointer.val != TLS_DTV_UNALLOCATED)
-      free (dtv[1 + cnt].pointer.val);
-
   /* The array starts with dtv[-1].  */
 #ifdef SHARED
   if (dtv != GL(dl_initial_dtv))
@@ -544,172 +524,166 @@ allocate_and_init (struct link_map *map)
 }
 
 
-struct link_map *
-_dl_update_slotinfo (unsigned long int req_modid)
+/* The generic dynamic and local dynamic model cannot be used in
+   statically linked applications.  */
+void *
+__tls_get_addr (GET_ADDR_ARGS)
 {
-  struct link_map *the_map = NULL;
   dtv_t *dtv = THREAD_DTV ();
+  struct link_map *the_map = NULL;
+  void *p;
 
-  /* The global dl_tls_dtv_slotinfo array contains for each module
-     index the generation counter current when the entry was created.
-     This array never shrinks so that all module indices which were
-     valid at some time can be used to access it.  Before the first
-     use of a new module index in this function the array was extended
-     appropriately.  Access also does not have to be guarded against
-     modifications of the array.  It is assumed that pointer-size
-     values can be read atomically even in SMP environments.  It is
-     possible that other threads at the same time dynamically load
-     code and therefore add to the slotinfo list.  This is a problem
-     since we must not pick up any information about incomplete work.
-     The solution to this is to ignore all dtv slots which were
-     created after the one we are currently interested.  We know that
-     dynamic loading for this module is completed and this is the last
-     load operation we know finished.  */
-  unsigned long int idx = req_modid;
-  struct dtv_slotinfo_list *listp = GL(dl_tls_dtv_slotinfo_list);
-
-  while (idx >= listp->len)
+  if (__builtin_expect (dtv[0].counter != GL(dl_tls_generation), 0))
     {
-      idx -= listp->len;
-      listp = listp->next;
-    }
+      struct dtv_slotinfo_list *listp;
+      size_t idx;
+
+      /* The global dl_tls_dtv_slotinfo array contains for each module
+	 index the generation counter current when the entry was
+	 created.  This array never shrinks so that all module indices
+	 which were valid at some time can be used to access it.
+	 Before the first use of a new module index in this function
+	 the array was extended appropriately.  Access also does not
+	 have to be guarded against modifications of the array.  It is
+	 assumed that pointer-size values can be read atomically even
+	 in SMP environments.  It is possible that other threads at
+	 the same time dynamically load code and therefore add to the
+	 slotinfo list.  This is a problem since we must not pick up
+	 any information about incomplete work.  The solution to this
+	 is to ignore all dtv slots which were created after the one
+	 we are currently interested.  We know that dynamic loading
+	 for this module is completed and this is the last load
+	 operation we know finished.  */
+      idx = GET_ADDR_MODULE;
+      listp = GL(dl_tls_dtv_slotinfo_list);
+      while (idx >= listp->len)
+	{
+	  idx -= listp->len;
+	  listp = listp->next;
+	}
 
-  if (dtv[0].counter < listp->slotinfo[idx].gen)
-    {
-      /* The generation counter for the slot is higher than what the
-	 current dtv implements.  We have to update the whole dtv but
-	 only those entries with a generation counter <= the one for
-	 the entry we need.  */
-      size_t new_gen = listp->slotinfo[idx].gen;
-      size_t total = 0;
-
-      /* We have to look through the entire dtv slotinfo list.  */
-      listp =  GL(dl_tls_dtv_slotinfo_list);
-      do
+      if (dtv[0].counter < listp->slotinfo[idx].gen)
 	{
-	  for (size_t cnt = total == 0 ? 1 : 0; cnt < listp->len; ++cnt)
+	  /* The generation counter for the slot is higher than what
+	     the current dtv implements.  We have to update the whole
+	     dtv but only those entries with a generation counter <=
+	     the one for the entry we need.  */
+	  size_t new_gen = listp->slotinfo[idx].gen;
+	  size_t total = 0;
+
+	  /* We have to look through the entire dtv slotinfo list.  */
+	  listp =  GL(dl_tls_dtv_slotinfo_list);
+	  do
 	    {
-	      size_t gen = listp->slotinfo[cnt].gen;
-
-	      if (gen > new_gen)
-		/* This is a slot for a generation younger than the
-		   one we are handling now.  It might be incompletely
-		   set up so ignore it.  */
-		continue;
-
-	      /* If the entry is older than the current dtv layout we
-		 know we don't have to handle it.  */
-	      if (gen <= dtv[0].counter)
-		continue;
-
-	      /* If there is no map this means the entry is empty.  */
-	      struct link_map *map = listp->slotinfo[cnt].map;
-	      if (map == NULL)
-		{
-		  /* If this modid was used at some point the memory
-		     might still be allocated.  */
-		  if (! dtv[total + cnt].pointer.is_static
-		      && dtv[total + cnt].pointer.val != TLS_DTV_UNALLOCATED)
-		    {
-		      free (dtv[total + cnt].pointer.val);
-		      dtv[total + cnt].pointer.val = TLS_DTV_UNALLOCATED;
-		    }
-
-		  continue;
-		}
+	      size_t cnt;
 
-	      /* Check whether the current dtv array is large enough.  */
-	      size_t modid = map->l_tls_modid;
-	      assert (total + cnt == modid);
-	      if (dtv[-1].counter < modid)
+	      for (cnt = total = 0 ? 1 : 0; cnt < listp->len; ++cnt)
 		{
-		  /* Reallocate the dtv.  */
-		  dtv_t *newp;
-		  size_t newsize = GL(dl_tls_max_dtv_idx) + DTV_SURPLUS;
-		  size_t oldsize = dtv[-1].counter;
-
-		  assert (map->l_tls_modid <= newsize);
-
-		  if (dtv == GL(dl_initial_dtv))
+		  size_t gen = listp->slotinfo[cnt].gen;
+		  struct link_map *map;
+		  size_t modid;
+
+		  if (gen > new_gen)
+		    /* This is a slot for a generation younger than
+		       the one we are handling now.  It might be
+		       incompletely set up so ignore it.  */
+		    continue;
+
+		  /* If the entry is older than the current dtv layout
+		     we know we don't have to handle it.  */
+		  if (gen <= dtv[0].counter)
+		    continue;
+
+		  /* If there is no map this means the entry is empty.  */
+		  map = listp->slotinfo[cnt].map;
+		  if (map == NULL)
 		    {
-		      /* This is the initial dtv that was allocated
-			 during rtld startup using the dl-minimal.c
-			 malloc instead of the real malloc.  We can't
-			 free it, we have to abandon the old storage.  */
-
-		      newp = malloc ((2 + newsize) * sizeof (dtv_t));
-		      if (newp == NULL)
-			oom ();
-		      memcpy (newp, &dtv[-1], oldsize * sizeof (dtv_t));
+		      /* If this modid was used at some point the memory
+			 might still be allocated.  */
+		      if (dtv[total + cnt].pointer != TLS_DTV_UNALLOCATED)
+			{
+			  free (dtv[total + cnt].pointer);
+			  dtv[total + cnt].pointer = TLS_DTV_UNALLOCATED;
+			}
+
+		      continue;
 		    }
-		  else
+
+		  /* Check whether the current dtv array is large enough.  */
+		  modid = map->l_tls_modid;
+		  assert (total + cnt == modid);
+		  if (dtv[-1].counter < modid)
 		    {
-		      newp = realloc (&dtv[-1],
-				      (2 + newsize) * sizeof (dtv_t));
-		      if (newp == NULL)
-			oom ();
+		      /* Reallocate the dtv.  */
+		      dtv_t *newp;
+		      size_t newsize = GL(dl_tls_max_dtv_idx) + DTV_SURPLUS;
+		      size_t oldsize = dtv[-1].counter;
+
+		      assert (map->l_tls_modid <= newsize);
+
+		      if (dtv == GL(dl_initial_dtv))
+			{
+			  /* This is the initial dtv that was allocated
+			     during rtld startup using the dl-minimal.c
+			     malloc instead of the real malloc.  We can't
+			     free it, we have to abandon the old storage.  */
+
+			  newp = malloc ((2 + newsize) * sizeof (dtv_t));
+			  if (newp == NULL)
+			    oom ();
+			  memcpy (newp, &dtv[-1], oldsize * sizeof (dtv_t));
+			}
+		      else
+			{
+			  newp = realloc (&dtv[-1],
+					  (2 + newsize) * sizeof (dtv_t));
+			  if (newp == NULL)
+			    oom ();
+			}
+
+		      newp[0].counter = newsize;
+
+		      /* Clear the newly allocated part.  */
+		      memset (newp + 2 + oldsize, '\0',
+			      (newsize - oldsize) * sizeof (dtv_t));
+
+		      /* Point dtv to the generation counter.  */
+		      dtv = &newp[1];
+
+		      /* Install this new dtv in the thread data
+			 structures.  */
+		      INSTALL_NEW_DTV (dtv);
 		    }
 
-		  newp[0].counter = newsize;
-
-		  /* Clear the newly allocated part.  */
-		  memset (newp + 2 + oldsize, '\0',
-			  (newsize - oldsize) * sizeof (dtv_t));
-
-		  /* Point dtv to the generation counter.  */
-		  dtv = &newp[1];
-
-		  /* Install this new dtv in the thread data
-		     structures.  */
-		  INSTALL_NEW_DTV (dtv);
+		  /* If there is currently memory allocate for this
+		     dtv entry free it.  */
+		  /* XXX Ideally we will at some point create a memory
+		     pool.  */
+		  if (dtv[modid].pointer != TLS_DTV_UNALLOCATED)
+		    /* Note that free is called for NULL is well.  We
+		       deallocate even if it is this dtv entry we are
+		       supposed to load.  The reason is that we call
+		       memalign and not malloc.  */
+		    free (dtv[modid].pointer);
+
+		  /* This module is loaded dynamically- We defer
+		     memory allocation.  */
+		  dtv[modid].pointer = TLS_DTV_UNALLOCATED;
+
+		  if (modid == GET_ADDR_MODULE)
+		    the_map = map;
 		}
 
-	      /* If there is currently memory allocate for this
-		 dtv entry free it.  */
-	      /* XXX Ideally we will at some point create a memory
-		 pool.  */
-	      if (! dtv[modid].pointer.is_static
-		  && dtv[modid].pointer.val != TLS_DTV_UNALLOCATED)
-		/* Note that free is called for NULL is well.  We
-		   deallocate even if it is this dtv entry we are
-		   supposed to load.  The reason is that we call
-		   memalign and not malloc.  */
-		free (dtv[modid].pointer.val);
-
-	      /* This module is loaded dynamically- We defer memory
-		 allocation.  */
-	      dtv[modid].pointer.is_static = false;
-	      dtv[modid].pointer.val = TLS_DTV_UNALLOCATED;
-
-	      if (modid == req_modid)
-		the_map = map;
+	      total += listp->len;
 	    }
+	  while ((listp = listp->next) != NULL);
 
-	  total += listp->len;
+	  /* This will be the new maximum generation counter.  */
+	  dtv[0].counter = new_gen;
 	}
-      while ((listp = listp->next) != NULL);
-
-      /* This will be the new maximum generation counter.  */
-      dtv[0].counter = new_gen;
     }
 
-  return the_map;
-}
-
-
-/* The generic dynamic and local dynamic model cannot be used in
-   statically linked applications.  */
-void *
-__tls_get_addr (GET_ADDR_ARGS)
-{
-  dtv_t *dtv = THREAD_DTV ();
-  struct link_map *the_map = NULL;
-  void *p;
-
-  if (__builtin_expect (dtv[0].counter != GL(dl_tls_generation), 0))
-    the_map = _dl_update_slotinfo (GET_ADDR_MODULE);
-
-  p = dtv[GET_ADDR_MODULE].pointer.val;
+  p = dtv[GET_ADDR_MODULE].pointer;
 
   if (__builtin_expect (p == TLS_DTV_UNALLOCATED, 0))
     {
@@ -729,74 +703,11 @@ __tls_get_addr (GET_ADDR_ARGS)
 	  the_map = listp->slotinfo[idx].map;
 	}
 
-      p = dtv[GET_ADDR_MODULE].pointer.val = allocate_and_init (the_map);
-      dtv[GET_ADDR_MODULE].pointer.is_static = false;
+      p = dtv[GET_ADDR_MODULE].pointer = allocate_and_init (the_map);
     }
 
   return (char *) p + GET_ADDR_OFFSET;
 }
 # endif
 
-
-
-void
-_dl_add_to_slotinfo (struct link_map  *l)
-{
-  /* Now that we know the object is loaded successfully add
-     modules containing TLS data to the dtv info table.  We
-     might have to increase its size.  */
-  struct dtv_slotinfo_list *listp;
-  struct dtv_slotinfo_list *prevp;
-  size_t idx = l->l_tls_modid;
-
-  /* Find the place in the dtv slotinfo list.  */
-  listp = GL(dl_tls_dtv_slotinfo_list);
-  prevp = NULL;		/* Needed to shut up gcc.  */
-  do
-    {
-      /* Does it fit in the array of this list element?  */
-      if (idx < listp->len)
-	break;
-      idx -= listp->len;
-      prevp = listp;
-      listp = listp->next;
-    }
-  while (listp != NULL);
-
-  if (listp == NULL)
-    {
-      /* When we come here it means we have to add a new element
-	 to the slotinfo list.  And the new module must be in
-	 the first slot.  */
-      assert (idx == 0);
-
-      listp = prevp->next = (struct dtv_slotinfo_list *)
-	malloc (sizeof (struct dtv_slotinfo_list)
-		+ TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo));
-      if (listp == NULL)
-	{
-	  /* We ran out of memory.  We will simply fail this
-	     call but don't undo anything we did so far.  The
-	     application will crash or be terminated anyway very
-	     soon.  */
-
-	  /* We have to do this since some entries in the dtv
-	     slotinfo array might already point to this
-	     generation.  */
-	  ++GL(dl_tls_generation);
-
-	  _dl_signal_error (ENOMEM, "dlopen", NULL, N_("\
-cannot create TLS data structures"));
-	}
-
-      listp->len = TLS_SLOTINFO_SURPLUS;
-      listp->next = NULL;
-      memset (listp->slotinfo, '\0',
-	      TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo));
-    }
-
-  /* Add the information into the slotinfo data structure.  */
-  listp->slotinfo[idx].map = l;
-  listp->slotinfo[idx].gen = GL(dl_tls_generation) + 1;
-}
 #endif	/* use TLS */
diff --git a/sysdeps/generic/dl-trampoline.c b/sysdeps/generic/dl-trampoline.c
deleted file mode 100644
index 3ca89f3879..0000000000
--- a/sysdeps/generic/dl-trampoline.c
+++ /dev/null
@@ -1 +0,0 @@
-#error "Architecture specific PLT trampolines must be defined."
diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
index 08039e18ca..ec68e1a565 100644
--- a/sysdeps/generic/ldsodefs.h
+++ b/sysdeps/generic/ldsodefs.h
@@ -1,5 +1,5 @@
 /* Run-time dynamic linker data structures for loaded ELF shared objects.
-   Copyright (C) 1995-2003, 2004, 2005 Free Software Foundation, Inc.
+   Copyright (C) 1995-2002, 2003, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -52,15 +52,23 @@ __BEGIN_DECLS
   most architectures the entry is already relocated - but for some not
   and we need to relocate at access time.  */
 #ifdef DL_RO_DYN_SECTION
-# define D_PTR(map, i) ((map)->i->d_un.d_ptr + (map)->l_addr)
+# define D_PTR(map,i) (map->i->d_un.d_ptr + map->l_addr)
 #else
-# define D_PTR(map, i) (map)->i->d_un.d_ptr
+# define D_PTR(map,i) map->i->d_un.d_ptr
 #endif
 
-/* Result of the lookup functions and how to retrieve the base address.  */
+/* On some platforms more information than just the address of the symbol
+   is needed from the lookup functions.  In this case we return the whole
+   link map.  */
+#ifdef DL_LOOKUP_RETURNS_MAP
 typedef struct link_map *lookup_t;
 # define LOOKUP_VALUE(map) map
-# define LOOKUP_VALUE_ADDRESS(map) ((map) ? (map)->l_addr : 0)
+# define LOOKUP_VALUE_ADDRESS(map) (map ? map->l_addr : 0)
+#else
+typedef ElfW(Addr) lookup_t;
+# define LOOKUP_VALUE(map) map->l_addr
+# define LOOKUP_VALUE_ADDRESS(address) address
+#endif
 
 /* on some architectures a pointer to a function is not just a pointer
    to the actual code of the function but rather an architecture
@@ -174,133 +182,6 @@ enum allowmask
   };
 
 
-/* Type for list of auditing interfaces.  */
-struct La_i86_regs;
-struct La_i86_retval;
-struct La_x86_64_regs;
-struct La_x86_64_retval;
-struct La_ppc32_regs;
-struct La_ppc32_retval;
-struct La_ppc64_regs;
-struct La_ppc64_retval;
-struct La_sh_regs;
-struct La_sh_retval;
-struct La_m68k_regs;
-struct La_m68k_retval;
-struct La_alpha_regs;
-struct La_alpha_retval;
-struct La_s390_32_regs;
-struct La_s390_32_retval;
-struct La_s390_64_regs;
-struct La_s390_64_retval;
-struct La_ia64_regs;
-struct La_ia64_retval;
-
-struct audit_ifaces
-{
-  void (*activity) (uintptr_t *, unsigned int);
-  char *(*objsearch) (const char *, uintptr_t *, unsigned int);
-  unsigned int (*objopen) (struct link_map *, Lmid_t, uintptr_t *);
-  void (*preinit) (uintptr_t *);
-  union
-  {
-    uintptr_t (*symbind32) (Elf32_Sym *, unsigned int, uintptr_t *,
-			    uintptr_t *, unsigned int *, const char *);
-    uintptr_t (*symbind64) (Elf64_Sym *, unsigned int, uintptr_t *,
-			    uintptr_t *, unsigned int *, const char *);
-  };
-  union
-  {
-    Elf32_Addr (*i86_gnu_pltenter) (Elf32_Sym *, unsigned int, uintptr_t *,
-				    uintptr_t *, struct La_i86_regs *,
-				    unsigned int *, const char *name,
-				    long int *framesizep);
-    Elf64_Addr (*x86_64_gnu_pltenter) (Elf64_Sym *, unsigned int, uintptr_t *,
-				       uintptr_t *, struct La_x86_64_regs *,
-				       unsigned int *, const char *name,
-				       long int *framesizep);
-    Elf32_Addr (*ppc32_gnu_pltenter) (Elf32_Sym *, unsigned int, uintptr_t *,
-				      uintptr_t *, struct La_ppc32_regs *,
-				      unsigned int *, const char *name,
-				      long int *framesizep);
-    Elf64_Addr (*ppc64_gnu_pltenter) (Elf64_Sym *, unsigned int, uintptr_t *,
-				      uintptr_t *, struct La_ppc64_regs *,
-				      unsigned int *, const char *name,
-				      long int *framesizep);
-    uintptr_t (*sh_gnu_pltenter) (Elf32_Sym *, unsigned int, uintptr_t *,
-				  uintptr_t *, const struct La_sh_regs *,
-				  unsigned int *, const char *name,
-				  long int *framesizep);
-    Elf32_Addr (*m68k_gnu_pltenter) (Elf32_Sym *, unsigned int, uintptr_t *,
-				     uintptr_t *, struct La_m68k_regs *,
-				     unsigned int *, const char *name,
-				     long int *framesizep);
-    Elf64_Addr (*alpha_gnu_pltenter) (Elf64_Sym *, unsigned int, uintptr_t *,
-				      uintptr_t *, struct La_alpha_regs *,
-				      unsigned int *, const char *name,
-				      long int *framesizep);
-    Elf32_Addr (*s390_32_gnu_pltenter) (Elf32_Sym *, unsigned int, uintptr_t *,
-					uintptr_t *, struct La_s390_32_regs *,
-					unsigned int *, const char *name,
-					long int *framesizep);
-    Elf64_Addr (*s390_64_gnu_pltenter) (Elf64_Sym *, unsigned int, uintptr_t *,
-					uintptr_t *, struct La_s390_64_regs *,
-					unsigned int *, const char *name,
-					long int *framesizep);
-    Elf64_Addr (*ia64_gnu_pltenter) (Elf64_Sym *, unsigned int, uintptr_t *,
-				     uintptr_t *, struct La_ia64_regs *,
-				     unsigned int *, const char *name,
-				     long int *framesizep);
-  };
-  union
-  {
-    unsigned int (*i86_gnu_pltexit) (Elf32_Sym *, unsigned int, uintptr_t *,
-				     uintptr_t *, const struct La_i86_regs *,
-				     struct La_i86_retval *, const char *);
-    unsigned int (*x86_64_gnu_pltexit) (Elf64_Sym *, unsigned int, uintptr_t *,
-					uintptr_t *,
-					const struct La_x86_64_regs *,
-					struct La_x86_64_retval *,
-					const char *);
-    unsigned int (*ppc32_gnu_pltexit) (Elf32_Sym *, unsigned int, uintptr_t *,
-				       uintptr_t *,
-				       const struct La_ppc32_regs *,
-				       struct La_ppc32_retval *, const char *);
-    unsigned int (*ppc64_gnu_pltexit) (Elf64_Sym *, unsigned int, uintptr_t *,
-				       uintptr_t *,
-				       const struct La_ppc64_regs *,
-				       struct La_ppc64_retval *, const char *);
-    unsigned int (*sh_gnu_pltexit) (Elf32_Sym *, unsigned int, uintptr_t *,
-				    uintptr_t *, const struct La_sh_regs *,
-				    struct La_sh_retval *, const char *);
-    unsigned int (*m68k_gnu_pltexit) (Elf32_Sym *, unsigned int, uintptr_t *,
-				      uintptr_t *, const struct La_m68k_regs *,
-				      struct La_m68k_retval *, const char *);
-    unsigned int (*alpha_gnu_pltexit) (Elf64_Sym *, unsigned int, uintptr_t *,
-				       uintptr_t *,
-				       const struct La_alpha_regs *,
-				       struct La_alpha_retval *, const char *);
-    unsigned int (*s390_32_gnu_pltexit) (Elf32_Sym *, unsigned int,
-					 uintptr_t *, uintptr_t *,
-					 const struct La_s390_32_regs *,
-					 struct La_s390_32_retval *,
-					 const char *);
-    unsigned int (*s390_64_gnu_pltexit) (Elf64_Sym *, unsigned int,
-					 uintptr_t *, uintptr_t *,
-					 const struct La_s390_64_regs *,
-					 struct La_s390_64_retval *,
-					 const char *);
-    unsigned int (*ia64_gnu_pltexit) (Elf64_Sym *, unsigned int, uintptr_t *,
-				      uintptr_t *,
-				      const struct La_ia64_regs *,
-				      struct La_ia64_retval *, const char *);
-  };
-  unsigned int (*objclose) (uintptr_t *);
-
-  struct audit_ifaces *next;
-};
-
-
 /* Test whether given NAME matches any of the names of the given object.  */
 extern int _dl_name_match_p (const char *__name, struct link_map *__map)
      internal_function;
@@ -343,7 +224,7 @@ struct rtld_global
 #endif
   EXTERN struct link_namespaces
   {
-    /* A pointer to the map for the main map.  */
+    /* And a pointer to the map for the main map.  */
     struct link_map *_ns_loaded;
     /* Number of object in the _dl_loaded list.  */
     unsigned int _ns_nloaded;
@@ -355,8 +236,6 @@ struct rtld_global
        allocated by rtld.  Later it keeps the size of the map.  It might be
        reset if in _dl_close if the last global object is removed.  */
     size_t _ns_global_scope_alloc;
-    /* Keep track of changes to each namespace' list.  */
-    struct r_debug _ns_debug;
   } _dl_ns[DL_NNS];
 
   /* During the program run we must not modify the global data of
@@ -398,12 +277,8 @@ struct rtld_global
   EXTERN void **(*_dl_error_catch_tsd) (void) __attribute__ ((const));
 #endif
 
-  /* Structure describing the dynamic linker itself.  We need to
-     reserve memory for the data the audit libraries need.  */
+  /* Structure describing the dynamic linker itself.  */
   EXTERN struct link_map _dl_rtld_map;
-#ifdef SHARED
-  struct auditstate audit_data[DL_NNS];
-#endif
 
 #if defined SHARED && defined _LIBC_REENTRANT \
     && defined __rtld_lock_default_lock_recursive
@@ -436,7 +311,6 @@ struct rtld_global
     struct dtv_slotinfo
     {
       size_t gen;
-      bool is_static;
       struct link_map *map;
     } slotinfo[0];
   } *_dl_tls_dtv_slotinfo_list;
@@ -609,12 +483,32 @@ struct rtld_global_ro
      call the function instead of going through the PLT.  The result
      is that we can avoid exporting the functions and we do not jump
      PLT relocations in libc.so.  */
+  const char *(*_dl_get_origin) (void);
+  size_t (*_dl_dst_count) (const char *, int);
+  char *(*_dl_dst_substitute) (struct link_map *, const char *, char *, int);
+  struct link_map *(internal_function *_dl_map_object) (struct link_map *,
+							const char *, int,
+							int, int, int, Lmid_t);
+  void (internal_function *_dl_map_object_deps) (struct link_map *,
+						 struct link_map **,
+						 unsigned int, int, int);
+  void (*_dl_relocate_object) (struct link_map *, struct r_scope_elem *[],
+			       int, int);
+  int (internal_function *_dl_check_map_versions) (struct link_map *, int,
+						   int);
+  void (internal_function *_dl_init) (struct link_map *, int, char **,
+					char **);
+  void (*_dl_debug_state) (void);
+#ifndef MAP_COPY
+  void (*_dl_unload_cache) (void);
+#endif
   void (*_dl_debug_printf) (const char *, ...)
        __attribute__ ((__format__ (__printf__, 1, 2)));
   int (internal_function *_dl_catch_error) (const char **, const char **,
 					    void (*) (void *), void *);
   void (internal_function *_dl_signal_error) (int, const char *, const char *,
 					      const char *);
+  void (internal_function *_dl_start_profile) (void);
   void (*_dl_mcount) (ElfW(Addr) frompc, ElfW(Addr) selfpc);
   lookup_t (internal_function *_dl_lookup_symbol_x) (const char *,
 						     struct link_map *,
@@ -624,13 +518,7 @@ struct rtld_global_ro
 						     int, int,
 						     struct link_map *);
   int (*_dl_check_caller) (const void *, enum allowmask);
-  void *(*_dl_open) (const char *file, int mode, const void *caller_dlopen,
-		     Lmid_t nsid, int argc, char *argv[], char *env[]);
-  void (*_dl_close) (void *map);
 
-  /* List of auditing interfaces.  */
-  struct audit_ifaces *_dl_audit;
-  unsigned int _dl_naudit;
 };
 # define __rtld_global_attribute__
 # ifdef IS_IN_rtld
@@ -905,7 +793,7 @@ rtld_hidden_proto (_dl_debug_state)
 /* Initialize `struct r_debug' if it has not already been done.  The
    argument is the run-time load address of the dynamic linker, to be put
    in the `r_ldbase' member.  Returns the address of the structure.  */
-extern struct r_debug *_dl_debug_initialize (ElfW(Addr) ldbase, Lmid_t ns)
+extern struct r_debug *_dl_debug_initialize (ElfW(Addr) ldbase)
      internal_function;
 
 /* Initialize the basic data structure for the search paths.  */
@@ -1023,20 +911,6 @@ extern char *_dl_dst_substitute (struct link_map *l, const char *name,
 extern int _dl_check_caller (const void *caller, enum allowmask mask)
      attribute_hidden;
 
-/* Open the shared object NAME, relocate it, and run its initializer if it
-   hasn't already been run.  MODE is as for `dlopen' (see <dlfcn.h>).  If
-   the object is already opened, returns its existing map.  */
-extern void *_dl_open (const char *name, int mode, const void *caller,
-		       Lmid_t nsid, int argc, char *argv[], char *env[])
-     attribute_hidden;
-
-/* Add module to slot information data.  */
-extern void _dl_add_to_slotinfo (struct link_map  *l) attribute_hidden;
-
-/* Update slot information data for at least the generation of the
-   module with the given index.  */
-extern struct link_map *_dl_update_slotinfo (unsigned long int req_modid);
-
 __END_DECLS
 
 #endif /* ldsodefs.h */
diff --git a/sysdeps/generic/libc-start.c b/sysdeps/generic/libc-start.c
index 5bb8a9b352..fc9df40996 100644
--- a/sysdeps/generic/libc-start.c
+++ b/sysdeps/generic/libc-start.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1998-2003, 2004, 2005 Free Software Foundation, Inc.
+/* Copyright (C) 1998-2003, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -80,10 +80,6 @@ STATIC int LIBC_START_MAIN (int (*main) (int, char **, char **
 			    void *__unbounded stack_end)
      __attribute__ ((noreturn));
 
-
-/* Note: the fini parameter is ignored here.  It used to be registered
-   with __cxa_atexit.  This had the disadvantage that finalizers were
-   called in more than one place.  */
 STATIC int
 LIBC_START_MAIN (int (*main) (int, char **, char ** MAIN_AUXVEC_DECL),
 		 int argc, char *__unbounded *__unbounded ubp_av,
@@ -110,9 +106,9 @@ LIBC_START_MAIN (int (*main) (int, char **, char ** MAIN_AUXVEC_DECL),
 
   __libc_multiple_libcs = &_dl_starting_up && !_dl_starting_up;
 
-#ifndef SHARED
   INIT_ARGV_and_ENVIRON;
 
+#ifndef SHARED
   /* Store the lowest stack address.  This is done in ld.so if this is
      the code for the DSO.  */
   __libc_stack_end = stack_end;
@@ -162,6 +158,10 @@ LIBC_START_MAIN (int (*main) (int, char **, char ** MAIN_AUXVEC_DECL),
   __libc_init_first (argc, argv, __environ);
 #endif
 
+  /* Register the destructor of the program, if any.  */
+  if (fini)
+    __cxa_atexit ((void (*) (void *)) fini, NULL, NULL);
+
 #ifndef SHARED
   /* Some security at this point.  Prevent starting a SUID binary where
      the standard file descriptors are not opened.  We have to do this
@@ -184,22 +184,6 @@ LIBC_START_MAIN (int (*main) (int, char **, char ** MAIN_AUXVEC_DECL),
 	     );
 
 #ifdef SHARED
-  /* Auditing checkpoint: we have a new object.  */
-  if (__builtin_expect (GLRO(dl_naudit) > 0, 0))
-    {
-      struct audit_ifaces *afct = GLRO(dl_audit);
-      struct link_map *head = GL(dl_ns)[LM_ID_BASE]._ns_loaded;
-      for (unsigned int cnt = 0; cnt < GLRO(dl_naudit); ++cnt)
-	{
-	  if (afct->preinit != NULL)
-	    afct->preinit (&head->l_audit[cnt].cookie);
-
-	  afct = afct->next;
-	}
-    }
-#endif
-
-#ifdef SHARED
   if (__builtin_expect (GLRO(dl_debug_mask) & DL_DEBUG_IMPCALLS, 0))
     GLRO(dl_debug_printf) ("\ntransferring control: %s\n\n", argv[0]);
 #endif
diff --git a/sysdeps/generic/libc-tls.c b/sysdeps/generic/libc-tls.c
index b88ede06a2..b5ecc36436 100644
--- a/sysdeps/generic/libc-tls.c
+++ b/sysdeps/generic/libc-tls.c
@@ -1,5 +1,5 @@
 /* Initialization code for TLS in statically linked application.
-   Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
+   Copyright (C) 2002, 2003 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -178,18 +178,17 @@ __libc_setup_tls (size_t tcbsize, size_t tcbalign)
 
   /* Initialize the TLS block.  */
 # if TLS_TCB_AT_TP
-  static_dtv[2].pointer.val = ((char *) tlsblock + tcb_offset
-			       - roundup (memsz, align ?: 1));
+  static_dtv[2].pointer = ((char *) tlsblock + tcb_offset
+			   - roundup (memsz, align ?: 1));
   static_map.l_tls_offset = roundup (memsz, align ?: 1);
 # elif TLS_DTV_AT_TP
-  static_dtv[2].pointer.val = (char *) tlsblock + tcb_offset;
+  static_dtv[2].pointer = (char *) tlsblock + tcb_offset;
   static_map.l_tls_offset = tcb_offset;
 # else
 #  error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
 # endif
-  static_dtv[2].pointer.is_static = true;
   /* sbrk gives us zero'd memory, so we don't need to clear the remainder.  */
-  memcpy (static_dtv[2].pointer.val, initimage, filesz);
+  memcpy (static_dtv[2].pointer, initimage, filesz);
 
   /* Install the pointer to the dtv.  */
 
diff --git a/sysdeps/generic/unsecvars.h b/sysdeps/generic/unsecvars.h
index a7378b742f..eb77b260d8 100644
--- a/sysdeps/generic/unsecvars.h
+++ b/sysdeps/generic/unsecvars.h
@@ -2,19 +2,18 @@
    all stuffed in a single string which means they have to be terminated
    with a '\0' explicitly.  */
 #define UNSECURE_ENVVARS \
-  "GCONV_PATH\0"							      \
-  "GETCONF_DIR\0"							      \
-  "HOSTALIASES\0"							      \
-  "LD_AUDIT\0"								      \
-  "LD_DEBUG\0"								      \
-  "LD_DEBUG_OUTPUT\0"							      \
-  "LD_DYNAMIC_WEAK\0"							      \
+  "LD_PRELOAD\0"							      \
   "LD_LIBRARY_PATH\0"							      \
   "LD_ORIGIN_PATH\0"							      \
-  "LD_PRELOAD\0"							      \
+  "LD_DEBUG\0"								      \
+  "LD_DEBUG_OUTPUT\0"							      \
   "LD_PROFILE\0"							      \
-  "LD_SHOW_AUXV\0"							      \
   "LD_USE_LOAD_BIAS\0"							      \
+  "LD_DYNAMIC_WEAK\0"							      \
+  "LD_SHOW_AUXV\0"							      \
+  "GCONV_PATH\0"							      \
+  "GETCONF_DIR\0"							      \
+  "HOSTALIASES\0"							      \
   "LOCALDOMAIN\0"							      \
   "LOCPATH\0"								      \
   "MALLOC_TRACE\0"							      \
diff --git a/sysdeps/generic/wordexp.c b/sysdeps/generic/wordexp.c
index c3d382fb95..3e37d6449c 100644
--- a/sysdeps/generic/wordexp.c
+++ b/sysdeps/generic/wordexp.c
@@ -1,5 +1,5 @@
 /* POSIX.2 wordexp implementation.
-   Copyright (C) 1997-2002, 2003, 2005 Free Software Foundation, Inc.
+   Copyright (C) 1997-2002, 2003 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Tim Waugh <tim@cyberelk.demon.co.uk>.
 
@@ -810,7 +810,7 @@ parse_arith (char **word, size_t *word_length, size_t *max_length,
 
 /* Function called by child process in exec_comm() */
 static void
-internal_function __attribute__ ((always_inline))
+internal_function
 exec_comm_child (char *comm, int *fildes, int showerr, int noexec)
 {
   const char *args[4] = { _PATH_BSHELL, "-c", comm, NULL };
@@ -868,14 +868,13 @@ exec_comm (char *comm, char **word, size_t *word_length, size_t *max_length,
 	   const char *ifs_white)
 {
   int fildes[2];
-#define bufsize 128
+  int bufsize = 128;
   int buflen;
   int i;
   int status = 0;
   size_t maxnewlines = 0;
-  char buffer[bufsize];
+  char *buffer;
   pid_t pid;
-  int noexec = 0;
 
   /* Don't fork() unless necessary */
   if (!comm || !*comm)
@@ -885,42 +884,32 @@ exec_comm (char *comm, char **word, size_t *word_length, size_t *max_length,
     /* Bad */
     return WRDE_NOSPACE;
 
- again:
   if ((pid = __fork ()) < 0)
     {
       /* Bad */
-      if (fildes[0] != -1)
-	__close (fildes[0]);
-      if (fildes[1] != -1)
-	__close (fildes[1]);
+      __close (fildes[0]);
+      __close (fildes[1]);
       return WRDE_NOSPACE;
     }
 
   if (pid == 0)
-    exec_comm_child (comm, fildes, noexec ? 0 : flags & WRDE_SHOWERR, noexec);
+    exec_comm_child (comm, fildes, flags & WRDE_SHOWERR, 0);
 
   /* Parent */
 
-  /* If we are just testing the syntax, only wait.  */
-  if (noexec)
-    return (TEMP_FAILURE_RETRY (__waitpid (pid, &status, 0)) == pid
-	    && status != 0) ? WRDE_SYNTAX : 0;
-
   __close (fildes[1]);
-  fildes[1] = -1;
+  buffer = __alloca (bufsize);
 
   if (!pwordexp)
     /* Quoted - no field splitting */
     {
       while (1)
 	{
-	  if ((buflen = TEMP_FAILURE_RETRY (__read (fildes[0], buffer,
-						    bufsize))) < 1)
+	  if ((buflen = __read (fildes[0], buffer, bufsize)) < 1)
 	    {
-	      if (TEMP_FAILURE_RETRY (__waitpid (pid, &status, WNOHANG)) == 0)
+	      if (__waitpid (pid, &status, WNOHANG) == 0)
 		continue;
-	      if ((buflen = TEMP_FAILURE_RETRY (__read (fildes[0], buffer,
-							bufsize))) < 1)
+	      if ((buflen = __read (fildes[0], buffer, bufsize)) < 1)
 		break;
 	    }
 
@@ -944,13 +933,11 @@ exec_comm (char *comm, char **word, size_t *word_length, size_t *max_length,
 
       while (1)
 	{
-	  if ((buflen = TEMP_FAILURE_RETRY (__read (fildes[0], buffer,
-						    bufsize))) < 1)
+	  if ((buflen = __read (fildes[0], buffer, bufsize)) < 1)
 	    {
-	      if (TEMP_FAILURE_RETRY (__waitpid (pid, &status, WNOHANG)) == 0)
+	      if (__waitpid (pid, &status, WNOHANG) == 0)
 		continue;
-	      if ((buflen = TEMP_FAILURE_RETRY (__read (fildes[0], buffer,
-							bufsize))) < 1)
+	      if ((buflen = __read (fildes[0], buffer, bufsize)) < 1)
 		break;
 	    }
 
@@ -1066,20 +1053,31 @@ exec_comm (char *comm, char **word, size_t *word_length, size_t *max_length,
     }
 
   __close (fildes[0]);
-  fildes[0] = -1;
 
   /* Check for syntax error (re-execute but with "-n" flag) */
   if (buflen < 1 && status != 0)
     {
-      noexec = 1;
-      goto again;
+      if ((pid = __fork ()) < 0)
+	{
+	  /* Bad */
+	  return WRDE_NOSPACE;
+	}
+
+      if (pid == 0)
+	{
+          fildes[0] = fildes[1] = -1;
+	  exec_comm_child (comm, fildes, 0, 1);
+	}
+
+      if (__waitpid (pid, &status, 0) == pid && status != 0)
+	return WRDE_SYNTAX;
     }
 
   return 0;
 
 no_space:
   __kill (pid, SIGKILL);
-  TEMP_FAILURE_RETRY (__waitpid (pid, NULL, 0));
+  __waitpid (pid, NULL, 0);
   __close (fildes[0]);
   return WRDE_NOSPACE;
 }
diff --git a/sysdeps/hppa/bits/link.h b/sysdeps/hppa/bits/link.h
index e69de29bb2..54842b2299 100644
--- a/sysdeps/hppa/bits/link.h
+++ b/sysdeps/hppa/bits/link.h
@@ -0,0 +1,6 @@
+/* Used to store the function descriptor table */
+struct link_map_machine
+  {
+    size_t fptr_table_len;
+    ElfW(Addr) *fptr_table;
+  };
diff --git a/sysdeps/hppa/bits/linkmap.h b/sysdeps/hppa/bits/linkmap.h
deleted file mode 100644
index 54842b2299..0000000000
--- a/sysdeps/hppa/bits/linkmap.h
+++ /dev/null
@@ -1,6 +0,0 @@
-/* Used to store the function descriptor table */
-struct link_map_machine
-  {
-    size_t fptr_table_len;
-    ElfW(Addr) *fptr_table;
-  };
diff --git a/sysdeps/hppa/dl-lookupcfg.h b/sysdeps/hppa/dl-lookupcfg.h
index 84436e7c56..d393b3e427 100644
--- a/sysdeps/hppa/dl-lookupcfg.h
+++ b/sysdeps/hppa/dl-lookupcfg.h
@@ -1,5 +1,5 @@
 /* Configuration of lookup functions.
-   Copyright (C) 2000, 2004 Free Software Foundation, Inc.
+   Copyright (C) 2000 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -17,6 +17,9 @@
    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307 USA.  */
 
+/* Like IA-64, PA-RISC needs more information from the symbol lookup
+   function than just the address. */
+#define DL_LOOKUP_RETURNS_MAP
 #define ELF_FUNCTION_PTR_IS_SPECIAL
 #define DL_UNMAP_IS_SPECIAL
 
@@ -63,3 +66,4 @@ void _dl_unmap (struct link_map *map);
   ((Elf32_Addr)(addr) & 2 ? (addr) : DL_AUTO_FUNCTION_ADDRESS (map, addr))
 #define DL_DT_FINI_ADDRESS(map, addr) \
   ((Elf32_Addr)(addr) & 2 ? (addr) : DL_AUTO_FUNCTION_ADDRESS (map, addr))
+
diff --git a/sysdeps/i386/Makefile b/sysdeps/i386/Makefile
index ece94f3610..52faaa3109 100644
--- a/sysdeps/i386/Makefile
+++ b/sysdeps/i386/Makefile
@@ -61,7 +61,3 @@ CFLAGS-dlopenold.c += -mpreferred-stack-boundary=4
 CFLAGS-dlclose.c += -mpreferred-stack-boundary=4
 CFLAGS-dlerror.c += -mpreferred-stack-boundary=4
 endif
-
-ifneq (,$(filter -mno-tls-direct-seg-refs,$(CFLAGS)))
-defines += -DNO_TLS_DIRECT_SEG_REFS
-endif
diff --git a/sysdeps/i386/bits/link.h b/sysdeps/i386/bits/link.h
index 985d040413..3be9b7eae8 100644
--- a/sysdeps/i386/bits/link.h
+++ b/sysdeps/i386/bits/link.h
@@ -1,60 +1,5 @@
-/* Copyright (C) 2004, 2005 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef	_LINK_H
-# error "Never include <bits/link.h> directly; use <link.h> instead."
-#endif
-
-
-/* Registers for entry into PLT on IA-32.  */
-typedef struct La_i86_regs
-{
-  uint32_t lr_edx;
-  uint32_t lr_ecx;
-  uint32_t lr_eax;
-  uint32_t lr_ebp;
-  uint32_t lr_esp;
-} La_i86_regs;
-
-/* Return values for calls from PLT on IA-32.  */
-typedef struct La_i86_retval
-{
-  uint32_t lrv_eax;
-  uint32_t lrv_edx;
-  long double lrv_st0;
-  long double lrv_st1;
-} La_i86_retval;
-
-
-__BEGIN_DECLS
-
-extern Elf32_Addr la_i86_gnu_pltenter (Elf32_Sym *__sym, unsigned int __ndx,
-				       uintptr_t *__refcook,
-				       uintptr_t *__defcook,
-				       La_i86_regs *__regs,
-				       unsigned int *__flags,
-				       const char *__symname,
-				       long int *__framesizep);
-extern unsigned int la_i86_gnu_pltexit (Elf32_Sym *__sym, unsigned int __ndx,
-					uintptr_t *__refcook,
-					uintptr_t *__defcook,
-					const La_i86_regs *__inregs,
-					La_i86_retval *__outregs,
-					const char *symname);
-
-__END_DECLS
+struct link_map_machine
+  {
+    Elf32_Addr plt; /* Address of .plt + 0x16 */
+    Elf32_Addr gotplt; /* Address of .got + 0x0c */
+  };
diff --git a/sysdeps/i386/bits/linkmap.h b/sysdeps/i386/bits/linkmap.h
deleted file mode 100644
index 3be9b7eae8..0000000000
--- a/sysdeps/i386/bits/linkmap.h
+++ /dev/null
@@ -1,5 +0,0 @@
-struct link_map_machine
-  {
-    Elf32_Addr plt; /* Address of .plt + 0x16 */
-    Elf32_Addr gotplt; /* Address of .got + 0x0c */
-  };
diff --git a/sysdeps/i386/configure b/sysdeps/i386/configure
deleted file mode 100755
index d1d4dc15a7..0000000000
--- a/sysdeps/i386/configure
+++ /dev/null
@@ -1,54 +0,0 @@
-# This file is generated from configure.in by Autoconf.  DO NOT EDIT!
- # Local configure fragment for sysdeps/i386.
-
-echo "$as_me:$LINENO: checking if -g produces usable source locations for assembler-with-cpp" >&5
-echo $ECHO_N "checking if -g produces usable source locations for assembler-with-cpp... $ECHO_C" >&6
-if test "${libc_cv_cpp_asm_debuginfo+set}" = set; then
-  echo $ECHO_N "(cached) $ECHO_C" >&6
-else
-  cat > conftest.S <<EOF
-#include "confdefs.h"
-
-/* comment on
-   two lines */
-	${libc_cv_dot_text}
-	${libc_cv_asm_global_directive} foo
-foo:
-	/* Unfortunately this test only works for a real instruction,
-	   not for any of the machine-independent pseudo-ops.
-	   So we just have to assume everybody has a "nop".  */
-	nop
-	/* comment */
-	nop
-	/* comment */
-	nop
-EOF
-if { ac_try='${CC-cc} $CPPFLAGS $ASFLAGS -g -c conftest.S 1>&5'
-  { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
-  (eval $ac_try) 2>&5
-  ac_status=$?
-  echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); }; } && {
-   ac_pattern='conftest\.S'
-   { ac_try='readelf --debug-dump=line conftest.o |
-		   grep $ac_pattern 1>&5'
-  { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
-  (eval $ac_try) 2>&5
-  ac_status=$?
-  echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); }; }
-  }; then
-  libc_cv_cpp_asm_debuginfo=yes
-else
-  libc_cv_cpp_asm_debuginfo=no
-fi
-rm -f conftest*
-fi
-echo "$as_me:$LINENO: result: $libc_cv_cpp_asm_debuginfo" >&5
-echo "${ECHO_T}$libc_cv_cpp_asm_debuginfo" >&6
-if test $libc_cv_cpp_asm_debuginfo = yes; then
-  cat >>confdefs.h <<\_ACEOF
-#define HAVE_CPP_ASM_DEBUGINFO 1
-_ACEOF
-
-fi
diff --git a/sysdeps/i386/configure.in b/sysdeps/i386/configure.in
deleted file mode 100644
index 028e1ae8e1..0000000000
--- a/sysdeps/i386/configure.in
+++ /dev/null
@@ -1,35 +0,0 @@
-GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory.
-# Local configure fragment for sysdeps/i386.
-
-AC_CACHE_CHECK(if -g produces usable source locations for assembler-with-cpp,
-	       libc_cv_cpp_asm_debuginfo, [dnl
-cat > conftest.S <<EOF
-#include "confdefs.h"
-
-/* comment on
-   two lines */
-	${libc_cv_dot_text}
-	${libc_cv_asm_global_directive} foo
-foo:
-	/* Unfortunately this test only works for a real instruction,
-	   not for any of the machine-independent pseudo-ops.
-	   So we just have to assume everybody has a "nop".  */
-	nop
-	/* comment */
-	nop
-	/* comment */
-	nop
-EOF
-if AC_TRY_COMMAND([${CC-cc} $CPPFLAGS $ASFLAGS -g -c conftest.S 1>&AS_MESSAGE_LOG_FD]) && {
-   ac_pattern='conftest\.S'
-   AC_TRY_COMMAND([readelf --debug-dump=line conftest.o |
-		   grep $ac_pattern 1>&AS_MESSAGE_LOG_FD])
-  }; then
-  libc_cv_cpp_asm_debuginfo=yes
-else
-  libc_cv_cpp_asm_debuginfo=no
-fi
-rm -f conftest*])AC_SUBST(libc_cv_cpp_asm_debuginfo)
-if test $libc_cv_cpp_asm_debuginfo = yes; then
-  AC_DEFINE(HAVE_CPP_ASM_DEBUGINFO)
-fi
diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h
index 78c083f6ab..c48d9d325e 100644
--- a/sysdeps/i386/dl-machine.h
+++ b/sysdeps/i386/dl-machine.h
@@ -1,5 +1,5 @@
 /* Machine-dependent ELF dynamic relocation inline functions.  i386 version.
-   Copyright (C) 1995-2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+   Copyright (C) 1995-2002, 2003, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -129,8 +129,7 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
 	{
 	  got[2] = (Elf32_Addr) &_dl_runtime_profile;
 
-	  if (GLRO(dl_profile) != NULL
-	      && _dl_name_match_p (GLRO(dl_profile), l))
+	  if (_dl_name_match_p (GLRO(dl_profile), l))
 	    /* This is the object we are looking for.  Say that we really
 	       want profiling and the timers are started.  */
 	    GL(dl_profile_map) = l;
@@ -155,18 +154,112 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
    destroys the passed register information.  */
 /* GKM FIXME: Fix trampoline to pass bounds so we can do
    without the `__unbounded' qualifier.  */
-#define ARCH_FIXUP_ATTRIBUTE __attribute__ ((regparm (3), stdcall, unused))
+#define ARCH_FIXUP_ATTRIBUTE __attribute__ ((regparm (3), unused))
 
-extern ElfW(Addr) _dl_fixup (struct link_map *__unbounded l,
-			     ElfW(Word) reloc_offset)
+static ElfW(Addr) fixup (struct link_map *__unbounded l,
+			 ElfW(Word) reloc_offset)
      ARCH_FIXUP_ATTRIBUTE;
-extern ElfW(Addr) _dl_profile_fixup (struct link_map *l,
-				     ElfW(Word) reloc_offset,
-				     ElfW(Addr) retaddr, void *regs,
-				     long int *framesizep)
+static ElfW(Addr) profile_fixup (struct link_map *l, ElfW(Word) reloc_offset,
+				 ElfW(Addr) retaddr)
      ARCH_FIXUP_ATTRIBUTE;
 # endif
 
+/* This code is used in dl-runtime.c to call the `fixup' function
+   and then redirect to the address it returns.  */
+# if !defined PROF && !__BOUNDED_POINTERS__
+#  define ELF_MACHINE_RUNTIME_TRAMPOLINE asm ("\
+	.text\n\
+	.globl _dl_runtime_resolve\n\
+	.type _dl_runtime_resolve, @function\n\
+	" CFI_STARTPROC "\n\
+	.align 16\n\
+_dl_runtime_resolve:\n\
+	" CFI_ADJUST_CFA_OFFSET (8) "\n\
+	pushl %eax		# Preserve registers otherwise clobbered.\n\
+	" CFI_ADJUST_CFA_OFFSET (4) "\n\
+	pushl %ecx\n\
+	" CFI_ADJUST_CFA_OFFSET (4) "\n\
+	pushl %edx\n\
+	" CFI_ADJUST_CFA_OFFSET (4) "\n\
+	movl 16(%esp), %edx	# Copy args pushed by PLT in register.  Note\n\
+	movl 12(%esp), %eax	# that `fixup' takes its parameters in regs.\n\
+	call fixup		# Call resolver.\n\
+	popl %edx		# Get register content back.\n\
+	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
+	popl %ecx\n\
+	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
+	xchgl %eax, (%esp)	# Get %eax contents end store function address.\n\
+	ret $8			# Jump to function address.\n\
+	" CFI_ENDPROC "\n\
+	.size _dl_runtime_resolve, .-_dl_runtime_resolve\n\
+\n\
+	.globl _dl_runtime_profile\n\
+	.type _dl_runtime_profile, @function\n\
+	" CFI_STARTPROC "\n\
+	.align 16\n\
+_dl_runtime_profile:\n\
+	" CFI_ADJUST_CFA_OFFSET (8) "\n\
+	pushl %eax		# Preserve registers otherwise clobbered.\n\
+	" CFI_ADJUST_CFA_OFFSET (4) "\n\
+	pushl %ecx\n\
+	" CFI_ADJUST_CFA_OFFSET (4) "\n\
+	pushl %edx\n\
+	" CFI_ADJUST_CFA_OFFSET (4) "\n\
+	movl 20(%esp), %ecx	# Load return address\n\
+	movl 16(%esp), %edx	# Copy args pushed by PLT in register.  Note\n\
+	movl 12(%esp), %eax	# that `fixup' takes its parameters in regs.\n\
+	call profile_fixup	# Call resolver.\n\
+	popl %edx		# Get register content back.\n\
+	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
+	popl %ecx\n\
+	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
+	xchgl %eax, (%esp)	# Get %eax contents end store function address.\n\
+	ret $8			# Jump to function address.\n\
+	" CFI_ENDPROC "\n\
+	.size _dl_runtime_profile, .-_dl_runtime_profile\n\
+	.previous\n\
+");
+# else
+#  define ELF_MACHINE_RUNTIME_TRAMPOLINE asm ("\n\
+	.text\n\
+	.globl _dl_runtime_resolve\n\
+	.globl _dl_runtime_profile\n\
+	.type _dl_runtime_resolve, @function\n\
+	.type _dl_runtime_profile, @function\n\
+	" CFI_STARTPROC "\n\
+	.align 16\n\
+_dl_runtime_resolve:\n\
+_dl_runtime_profile:\n\
+	" CFI_ADJUST_CFA_OFFSET (8) "\n\
+	pushl %eax		# Preserve registers otherwise clobbered.\n\
+	" CFI_ADJUST_CFA_OFFSET (4) "\n\
+	pushl %ecx\n\
+	" CFI_ADJUST_CFA_OFFSET (4) "\n\
+	pushl %edx\n\
+	" CFI_ADJUST_CFA_OFFSET (4) "\n\
+	movl 16(%esp), %edx	# Push the arguments for `fixup'\n\
+	movl 12(%esp), %eax\n\
+	pushl %edx\n\
+	" CFI_ADJUST_CFA_OFFSET (4) "\n\
+	pushl %eax\n\
+	" CFI_ADJUST_CFA_OFFSET (4) "\n\
+	call fixup		# Call resolver.\n\
+	popl %edx		# Pop the parameters\n\
+	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
+	popl %ecx\n\
+	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
+	popl %edx		# Get register content back.\n\
+	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
+	popl %ecx\n\
+	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
+	xchgl %eax, (%esp)	# Get %eax contents end store function address.\n\
+	ret $8			# Jump to function address.\n\
+	" CFI_ENDPROC "\n\
+	.size _dl_runtime_resolve, .-_dl_runtime_resolve\n\
+	.size _dl_runtime_profile, .-_dl_runtime_profile\n\
+	.previous\n\
+");
+# endif
 #endif
 
 /* Mask identifying addresses reserved for the user program,
@@ -215,21 +308,11 @@ _dl_start_user:\n\
 	movl _rtld_local@GOTOFF(%ebx), %eax\n\
 	leal 8(%esp,%edx,4), %esi\n\
 	leal 4(%esp), %ecx\n\
-	movl %esp, %ebp\n\
-	# Make sure _dl_init is run with 16 byte aligned stack.\n\
-	andl $-16, %esp\n\
-	pushl %eax\n\
-	pushl %eax\n\
-	pushl %ebp\n\
 	pushl %esi\n\
-	# Clear %ebp, so that even constructors have terminated backchain.\n\
-	xorl %ebp, %ebp\n\
 	# Call the function to run the initializers.\n\
 	call _dl_init_internal@PLT\n\
 	# Pass our finalizer function to the user in %edx, as per ELF ABI.\n\
 	leal _dl_fini@GOTOFF(%ebx), %edx\n\
-	# Restore %esp _start expects.\n\
-	movl (%esp), %esp\n\
 	# Jump to the user's entry point.\n\
 	jmp *%edi\n\
 	.previous\n\
@@ -292,14 +375,9 @@ elf_machine_plt_value (struct link_map *map, const Elf32_Rel *reloc,
   return value;
 }
 
-
-/* Names of the architecture-specific auditing callback functions.  */
-#define ARCH_LA_PLTENTER i86_gnu_pltenter
-#define ARCH_LA_PLTEXIT i86_gnu_pltexit
-
 #endif /* !dl_machine_h */
 
-#ifdef RESOLVE_MAP
+#ifdef RESOLVE
 
 /* The i386 never uses Elf32_Rela relocations for the dynamic linker.
    Prelinked libraries may use Elf32_Rela though.  */
@@ -344,8 +422,17 @@ elf_machine_rel (struct link_map *map, const Elf32_Rel *reloc,
 #endif	/* !RTLD_BOOTSTRAP and have no -z combreloc */
     {
       const Elf32_Sym *const refsym = sym;
+#if defined USE_TLS && !defined RTLD_BOOTSTRAP
       struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type);
-      Elf32_Addr value = sym_map == NULL ? 0 : sym_map->l_addr + sym->st_value;
+      Elf32_Addr value = sym == NULL ? 0 : sym_map->l_addr + sym->st_value;
+#else
+      Elf32_Addr value = RESOLVE (&sym, version, r_type);
+
+# ifndef RTLD_BOOTSTRAP
+      if (sym != NULL)
+# endif
+	value += sym->st_value;
+#endif	/* use TLS and !RTLD_BOOTSTRAP */
 
       switch (r_type)
 	{
@@ -462,8 +549,14 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
 # ifndef RESOLVE_CONFLICT_FIND_MAP
       const Elf32_Sym *const refsym = sym;
 # endif
+# ifdef USE_TLS
       struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type);
       Elf32_Addr value = sym == NULL ? 0 : sym_map->l_addr + sym->st_value;
+# else
+      Elf32_Addr value = RESOLVE (&sym, version, r_type);
+      if (sym != NULL)
+	value += sym->st_value;
+# endif
 
       switch (ELF32_R_TYPE (reloc->r_info))
 	{
@@ -599,4 +692,4 @@ elf_machine_lazy_rela (struct link_map *map,
 
 #endif	/* !RTLD_BOOTSTRAP */
 
-#endif /* RESOLVE_MAP */
+#endif /* RESOLVE */
diff --git a/sysdeps/i386/dl-trampoline.S b/sysdeps/i386/dl-trampoline.S
deleted file mode 100644
index 80dd300e86..0000000000
--- a/sysdeps/i386/dl-trampoline.S
+++ /dev/null
@@ -1,182 +0,0 @@
-/* PLT trampolines.  i386 version.
-   Copyright (C) 2004, 2005 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#include <sysdep.h>
-
-	.text
-	.globl _dl_runtime_resolve
-	.type _dl_runtime_resolve, @function
-	cfi_startproc
-	.align 16
-_dl_runtime_resolve:
-	cfi_adjust_cfa_offset (8)
-	pushl %eax		# Preserve registers otherwise clobbered.
-	cfi_adjust_cfa_offset (4)
-	pushl %ecx
-	cfi_adjust_cfa_offset (4)
-	pushl %edx
-	cfi_adjust_cfa_offset (4)
-	movl 16(%esp), %edx	# Copy args pushed by PLT in register.  Note
-	movl 12(%esp), %eax	# that `fixup' takes its parameters in regs.
-	call _dl_fixup		# Call resolver.
-	popl %edx		# Get register content back.
-	cfi_adjust_cfa_offset (-4)
-	popl %ecx
-	cfi_adjust_cfa_offset (-4)
-	xchgl %eax, (%esp)	# Get %eax contents end store function address.
-	ret $8			# Jump to function address.
-	cfi_endproc
-	.size _dl_runtime_resolve, .-_dl_runtime_resolve
-
-
-	.globl _dl_runtime_profile
-	.type _dl_runtime_profile, @function
-	cfi_startproc
-	.align 16
-_dl_runtime_profile:
-	cfi_adjust_cfa_offset (8)
-	pushl %esp
-	cfi_adjust_cfa_offset (4)
-	addl $8, (%esp)		# Account for the pushed PLT data
-	pushl %ebp
-	cfi_adjust_cfa_offset (4)
-	pushl %eax		# Preserve registers otherwise clobbered.
-	cfi_adjust_cfa_offset (4)
-	pushl %ecx
-	cfi_adjust_cfa_offset (4)
-	pushl %edx
-	cfi_adjust_cfa_offset (4)
-	movl %esp, %ecx
-	subl $8, %esp
-	cfi_adjust_cfa_offset (8)
-	movl $-1, 4(%esp)
-	leal 4(%esp), %edx
-	movl %edx, (%esp)
-	pushl %ecx		# Address of the register structure
-	cfi_adjust_cfa_offset (4)
-	movl 40(%esp), %ecx	# Load return address
-	movl 36(%esp), %edx	# Copy args pushed by PLT in register.  Note
-	movl 32(%esp), %eax	# that `fixup' takes its parameters in regs.
-	call _dl_profile_fixup	# Call resolver.
-	cfi_adjust_cfa_offset (-8)
-	movl (%esp), %edx
-	testl %edx, %edx
-	jns 1f
-	popl %edx
-	cfi_adjust_cfa_offset (-4)
-	popl %edx		# Get register content back.
-	cfi_adjust_cfa_offset (-4)
-	popl %ecx
-	cfi_adjust_cfa_offset (-4)
-	xchgl %eax, (%esp)	# Get %eax contents end store function address.
-	ret $16			# Jump to function address.
-
-	/*
-	    +32     return address
-	    +28     PLT1
-	    +24     PLT2
-	    +20     %esp
-	    +16     %ebp
-	    +12     %eax
-	    +8      %ecx
-	    +4      %edx
-	   %esp     free
-	*/
-	cfi_adjust_cfa_offset (12)
-1:	movl %ebx, (%esp)
-	cfi_rel_offset (3, 0)
-	movl %edx, %ebx		# This is the frame buffer size
-	pushl %edi
-	cfi_adjust_cfa_offset (4)
-	cfi_rel_offset (7, 0)
-	pushl %esi
-	cfi_adjust_cfa_offset (4)
-	cfi_rel_offset (6, 0)
-	leal 44(%esp), %esi
-	movl %ebx, %ecx
-	movl %esp, %edi
-	subl %ebx, %edi
-	andl $0xfffffff0, %edi	# Align stack
-	movl %esp, %ebx
-	cfi_def_cfa_register (3)
-	movl %edi, %esp
-	shrl $2, %ecx
-	rep
-	movsl
-	movl (%edi), %esi
-	cfi_restore (6)
-	movl 4(%edi), %edi
-	cfi_restore (7)
-	/*
-	   %ebx+40  return address
-	   %ebx+36  PLT1
-	   %ebx+32  PLT2
-	   %ebx+28  %esp
-	   %ebx+24  %ebp
-	   %ebx+20  %eax
-	   %ebx+16  %ecx
-	   %ebx+12  %edx
-	   %ebx+8   %ebx
-	   %ebx+4   free
-	   %ebx     free
-	   %esp     copied stack frame
-	*/
-	movl %eax, (%ebx)
-	movl 12(%ebx), %edx
-	movl 16(%ebx), %ecx
-	movl 20(%ebx), %eax
-	call *(%ebx)
-	movl %ebx, %esp
-	cfi_def_cfa_register (4)
-	movl 8(%esp), %ebx
-	cfi_restore (3)
-	/*
-	    +40     return address
-	    +36     PLT1
-	    +32     PLT2
-	    +28     %esp
-	    +24     %ebp
-	    +20     %eax
-	    +16     %ecx
-	    +12     %edx
-	    +8      free
-	    +4      free
-	   %esp     free
-	*/
-	subl $20, %esp
-	cfi_adjust_cfa_offset (20)
-	movl %eax, (%esp)
-	movl %edx, 4(%esp)
-	fstpt 8(%esp)
-	fstpt 20(%esp)
-	pushl %esp
-	cfi_adjust_cfa_offset (4)
-	leal 36(%esp), %ecx
-	movl 56(%esp), %eax
-	movl 60(%esp), %edx
-	call _dl_call_pltexit
-	movl (%esp), %eax
-	movl 4(%esp), %edx
-	fldt 20(%esp)
-	fldt 8(%esp)
-	addl $60, %esp
-	cfi_adjust_cfa_offset (-60)
-	ret
-	cfi_endproc
-	.size _dl_runtime_profile, .-_dl_runtime_profile
diff --git a/sysdeps/ia64/bits/link.h b/sysdeps/ia64/bits/link.h
index f751c23fd1..7f8b0550d9 100644
--- a/sysdeps/ia64/bits/link.h
+++ b/sysdeps/ia64/bits/link.h
@@ -1,63 +1,5 @@
-/* Copyright (C) 2005 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef	_LINK_H
-# error "Never include <bits/link.h> directly; use <link.h> instead."
-#endif
-
-/* Registers for entry into PLT on ia64.  */
-typedef struct La_ia64_regs
-{
-  uint64_t lr_r8;
-  uint64_t lr_r9;
-  uint64_t lr_r10;
-  uint64_t lr_r11;
-  uint64_t lr_gr [8];
-  long double lr_fr [8];
-  uint64_t lr_unat;
-  uint64_t lr_sp;
-} La_ia64_regs;
-
-/* Return values for calls from PLT on ia64.  */
-typedef struct La_ia64_retval
-{
-  uint64_t lrv_r8;
-  uint64_t lrv_r9;
-  uint64_t lrv_r10;
-  uint64_t lrv_r11;
-  long double lr_fr [8];
-} La_ia64_retval;
-
-
-__BEGIN_DECLS
-
-extern Elf64_Addr la_ia64_gnu_pltenter (Elf64_Sym *__sym, unsigned int __ndx,
-				       uintptr_t *__refcook,
-				       uintptr_t *__defcook,
-				       La_ia64_regs *__regs,
-				       unsigned int *__flags,
-				       const char *__symname,
-				       long int *__framesizep);
-extern unsigned int la_ia64_gnu_pltexit (Elf64_Sym *__sym, unsigned int __ndx,
-					uintptr_t *__refcook,
-					uintptr_t *__defcook,
-					const La_ia64_regs *__inregs,
-					La_ia64_retval *__outregs,
-					const char *symname);
-
-__END_DECLS
+struct link_map_machine
+  {
+    size_t fptr_table_len;
+    Elf64_Addr *fptr_table;
+  };
diff --git a/sysdeps/ia64/bits/linkmap.h b/sysdeps/ia64/bits/linkmap.h
deleted file mode 100644
index 7f8b0550d9..0000000000
--- a/sysdeps/ia64/bits/linkmap.h
+++ /dev/null
@@ -1,5 +0,0 @@
-struct link_map_machine
-  {
-    size_t fptr_table_len;
-    Elf64_Addr *fptr_table;
-  };
diff --git a/sysdeps/ia64/dl-lookupcfg.h b/sysdeps/ia64/dl-lookupcfg.h
index b50030ebd2..0ae3dd68ba 100644
--- a/sysdeps/ia64/dl-lookupcfg.h
+++ b/sysdeps/ia64/dl-lookupcfg.h
@@ -1,5 +1,5 @@
 /* Configuration of lookup functions.
-   Copyright (C) 2000, 2001, 2003, 2004 Free Software Foundation, Inc.
+   Copyright (C) 2000, 2001, 2003 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -17,11 +17,12 @@
    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307 USA.  */
 
+/* The ia64 need more information from the symbol lookup function
+   than just the address. */
+#define DL_LOOKUP_RETURNS_MAP
 #define ELF_FUNCTION_PTR_IS_SPECIAL
 #define DL_UNMAP_IS_SPECIAL
 
-#include <dl-fptr.h>
-
 /* We do not support copy relocations for IA-64.  */
 #define DL_NO_COPY_RELOCS
 
@@ -58,15 +59,3 @@ extern void _dl_unmap (struct link_map *map);
 
 #define DL_DT_INIT_ADDRESS(map, addr) DL_AUTO_FUNCTION_ADDRESS (map, addr)
 #define DL_DT_FINI_ADDRESS(map, addr) DL_AUTO_FUNCTION_ADDRESS (map, addr)
-/* The type of the return value of fixup/profile_fixup.  */
-#define DL_FIXUP_VALUE_TYPE struct fdesc
-/* Construct a value of type DL_FIXUP_VALUE_TYPE from a code address
-   and a link map.  */
-#define DL_FIXUP_MAKE_VALUE(map, addr) \
-  ((struct fdesc) { (addr), (map)->l_info[DT_PLTGOT]->d_un.d_ptr })
-/* Extract the code address from a value of type DL_FIXUP_MAKE_VALUE.
- */
-#define DL_FIXUP_VALUE_CODE_ADDR(value) (value).ip
-
-#define DL_FIXUP_VALUE_ADDR(value) ((uintptr_t) &(value))
-#define DL_FIXUP_ADDR_VALUE(addr) (*(struct fdesc *) (addr))
diff --git a/sysdeps/ia64/dl-machine.h b/sysdeps/ia64/dl-machine.h
index 55349690e3..3108047869 100644
--- a/sysdeps/ia64/dl-machine.h
+++ b/sysdeps/ia64/dl-machine.h
@@ -1,5 +1,5 @@
 /* Machine-dependent ELF dynamic relocation inline functions.  IA-64 version.
-   Copyright (C) 1995-1997, 2000-2004, 2005 Free Software Foundation, Inc.
+   Copyright (C) 1995-1997, 2000-2003, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -123,8 +123,7 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
 	doit = (Elf64_Addr) ((struct fdesc *) &_dl_runtime_resolve)->ip;
       else
 	{
-	  if (GLRO(dl_profile) != NULL
-	      && _dl_name_match_p (GLRO(dl_profile), l))
+	  if (_dl_name_match_p (GLRO(dl_profile), l))
 	    {
 	      /* This is the object we are looking for.  Say that we really
 		 want profiling and the timers are started.  */
@@ -140,9 +139,133 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
   return lazy;
 }
 
-/* Names of the architecture-specific auditing callback functions.  */
-#define ARCH_LA_PLTENTER ia64_gnu_pltenter
-#define ARCH_LA_PLTEXIT ia64_gnu_pltexit
+
+/*
+   This code is used in dl-runtime.c to call the `fixup' function
+   and then redirect to the address it returns. `fixup()' takes two
+   arguments, however profile_fixup() takes three.
+
+   The ABI specifies that we will never see more than 8 input
+   registers to a function call, thus it is safe to simply allocate
+   those, and simpler than playing stack games.
+					                     - 12/09/99 Jes
+ */
+#define TRAMPOLINE_TEMPLATE(tramp_name, fixup_name)			     \
+  extern void tramp_name (void);					     \
+  asm (									     \
+"	.global " #tramp_name "#\n"					     \
+"	.proc " #tramp_name "#\n"					     \
+#tramp_name ":\n"							     \
+"	{ .mmi\n"							     \
+"	  .prologue\n"							     \
+"	  .save ar.pfs, r40\n"						     \
+"	  alloc loc0 = ar.pfs, 8, 6, 3, 0\n"				     \
+"	  adds r2 = -144, r12\n"					     \
+"	  adds r3 = -128, r12\n"					     \
+"	}\n"								     \
+"	{ .mii\n"							     \
+"	  .fframe 160\n"						     \
+"	  adds r12 = -160, r12\n"					     \
+"	  .save rp, r41\n"						     \
+"	  mov loc1 = b0\n"						     \
+"	  .body\n"							     \
+"	  mov out2 = b0		/* needed by fixup_profile */\n"	     \
+"	  ;;\n"								     \
+"	}\n"								     \
+"	{ .mfb\n"							     \
+"	  mov loc2 = r8		/* preserve struct value register */\n"	     \
+"	  nop.f 0\n"							     \
+"	  nop.b 0\n"							     \
+"	}\n"								     \
+"	{ .mii\n"							     \
+"	  mov loc3 = r9		/* preserve language specific register */\n" \
+"	  mov loc4 = r10	/* preserve language specific register */\n" \
+"	  mov loc5 = r11	/* preserve language specific register */\n" \
+"	}\n"								     \
+"	{ .mmi\n"							     \
+"	  stf.spill [r2] = f8, 32\n"					     \
+"	  stf.spill [r3] = f9, 32\n"					     \
+"	  mov out0 = r16\n"						     \
+"	  ;;\n"								     \
+"	}\n"								     \
+"	{ .mmi\n"							     \
+"	  stf.spill [r2] = f10, 32\n"					     \
+"	  stf.spill [r3] = f11, 32\n"					     \
+"	  shl out1 = r15, 4\n"						     \
+"	  ;;\n"								     \
+"	}\n"								     \
+"	{ .mmi\n"							     \
+"	  stf.spill [r2] = f12, 32\n"					     \
+"	  stf.spill [r3] = f13, 32\n"					     \
+"	  shladd out1 = r15, 3, out1\n"					     \
+"	  ;;\n"								     \
+"	}\n"								     \
+"	{ .mmb\n"							     \
+"	  stf.spill [r2] = f14\n"					     \
+"	  stf.spill [r3] = f15\n"					     \
+"	  br.call.sptk.many b0 = " #fixup_name "#\n"			     \
+"	}\n"								     \
+"	{ .mii\n"							     \
+"	  ld8 r9 = [ret0], 8\n"						     \
+"	  adds r2 = 16, r12\n"						     \
+"	  adds r3 = 32, r12\n"						     \
+"	  ;;\n"								     \
+"	}\n"								     \
+"	{ .mmi\n"							     \
+"	  ldf.fill f8 = [r2], 32\n"					     \
+"	  ldf.fill f9 = [r3], 32\n"					     \
+"	  mov b0 = loc1\n"						     \
+"	  ;;\n"								     \
+"	}\n"								     \
+"	{ .mmi\n"							     \
+"	  ldf.fill f10 = [r2], 32\n"					     \
+"	  ldf.fill f11 = [r3], 32\n"					     \
+"	  mov b6 = r9\n"						     \
+"	  ;;\n"								     \
+"	}\n"								     \
+"	{ .mmi\n"							     \
+"	  ldf.fill f12 = [r2], 32\n"					     \
+"	  ldf.fill f13 = [r3], 32\n"					     \
+"	  mov ar.pfs = loc0\n"						     \
+"	  ;;\n"								     \
+"	}\n"								     \
+"	{ .mmi\n"							     \
+"	  ldf.fill f14 = [r2], 32\n"					     \
+"	  ldf.fill f15 = [r3], 32\n"					     \
+"	  .restore sp		/* pop the unwind frame state */\n"	     \
+"	  adds r12 = 160, r12\n"					     \
+"	  ;;\n"								     \
+"	}\n"								     \
+"	{ .mii\n"							     \
+"	  mov r9 = loc3		/* restore language specific register */\n"  \
+"	  mov r10 = loc4	/* restore language specific register */\n"  \
+"	  mov r11 = loc5	/* restore language specific register */\n"  \
+"	}\n"								     \
+"	{ .mii\n"							     \
+"	  ld8 gp = [ret0]\n"						     \
+"	  mov r8 = loc2		/* restore struct value register */\n"	     \
+"	  ;;\n"								     \
+"	}\n"								     \
+"	/* An alloc is needed for the break system call to work.\n"	     \
+"	   We don't care about the old value of the pfs register.  */\n"     \
+"	{ .mmb\n"							     \
+"	  .prologue\n"							     \
+"	  .body\n"							     \
+"	  alloc r2 = ar.pfs, 0, 0, 8, 0\n"				     \
+"	  br.sptk.many b6\n"						     \
+"	  ;;\n"								     \
+"	}\n"								     \
+"	.endp " #tramp_name "#\n");
+
+#ifndef PROF
+#define ELF_MACHINE_RUNTIME_TRAMPOLINE 				\
+  TRAMPOLINE_TEMPLATE (_dl_runtime_resolve, fixup);		\
+  TRAMPOLINE_TEMPLATE (_dl_runtime_profile, profile_fixup);
+#else
+#define ELF_MACHINE_RUNTIME_TRAMPOLINE				\
+  TRAMPOLINE_TEMPLATE (_dl_runtime_resolve, fixup);		\
+  strong_alias (_dl_runtime_resolve, _dl_runtime_profile);
+#endif
 
 /* Undo the adds out0 = 16, sp below to get at the value we want in
    __libc_stack_end.  */
@@ -331,29 +454,34 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
 #define ELF_MACHINE_START_ADDRESS(map, start)	\
   DL_STATIC_FUNCTION_ADDRESS (map, start)
 
+#define elf_machine_profile_fixup_plt(l, reloc, rel_addr, value) \
+  elf_machine_fixup_plt (l, reloc, rel_addr, value)
+
+#define elf_machine_profile_plt(reloc_addr) ((Elf64_Addr) (reloc_addr))
+
 /* Fixup a PLT entry to bounce directly to the function at VALUE.  */
-static inline struct fdesc __attribute__ ((always_inline))
+static inline Elf64_Addr __attribute__ ((always_inline))
 elf_machine_fixup_plt (struct link_map *l, lookup_t t,
 		       const Elf64_Rela *reloc,
-		       Elf64_Addr *reloc_addr, struct fdesc value)
+		       Elf64_Addr *reloc_addr, Elf64_Addr value)
 {
   /* l is the link_map for the caller, t is the link_map for the object
    * being called */
   /* got has already been relocated in elf_get_dynamic_info() */
-  reloc_addr[1] = value.gp;
+  reloc_addr[1] = t->l_info[DT_PLTGOT]->d_un.d_ptr;
   /* we need a "release" here to ensure that the gp is visible before
      the code entry point is updated: */
-  ((volatile Elf64_Addr *) reloc_addr)[0] = value.ip;
-  return value;
+  ((volatile Elf64_Addr *) reloc_addr)[0] = value;
+  return (Elf64_Addr) reloc_addr;
 }
 
 /* Return the final value of a plt relocation.  */
-static inline struct fdesc
+static inline Elf64_Addr
 elf_machine_plt_value (struct link_map *map, const Elf64_Rela *reloc,
-		       struct fdesc value)
+		       Elf64_Addr value)
 {
   /* No need to handle rel vs rela since IA64 is rela only */
-  return (struct fdesc) { value.ip + reloc->r_addend, value.gp };
+  return value + reloc->r_addend;
 }
 
 #endif /* !dl_machine_h */
@@ -424,8 +552,7 @@ elf_machine_rela (struct link_map *map,
 	    ;/* No adjustment.  */
 	  else if (r_type == R_IA64_IPLTLSB)
 	    {
-	      elf_machine_fixup_plt (NULL, NULL, reloc, reloc_addr,
-				     DL_FIXUP_MAKE_VALUE (sym_map, value));
+	      elf_machine_fixup_plt (NULL, sym_map, reloc, reloc_addr, value);
 	      return;
 	    }
 	  else if (R_IA64_TYPE (r_type) == R_IA64_TYPE (R_IA64_FPTR64LSB))
diff --git a/sysdeps/ia64/dl-trampoline.S b/sysdeps/ia64/dl-trampoline.S
deleted file mode 100644
index b7969a6cf0..0000000000
--- a/sysdeps/ia64/dl-trampoline.S
+++ /dev/null
@@ -1,537 +0,0 @@
-/* PLT trampolines.  ia64 version.
-   Copyright (C) 2005 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#include <sysdep.h>
-#undef ret
-
-/*
-   This code is used in dl-runtime.c to call the `_dl_fixup' function
-   and then redirect to the address it returns. `_dl_fixup()' takes two
-   arguments, however _dl_profile_fixup() takes five.
-
-   The ABI specifies that we will never see more than 8 input
-   registers to a function call, thus it is safe to simply allocate
-   those, and simpler than playing stack games.  */
-
-/* Used to save and restore 8 incoming fp registers */
-#define RESOLVE_FRAME_SIZE (16*8)
-
-ENTRY(_dl_runtime_resolve)
-	{ .mmi
-	  .prologue
-	  .save ar.pfs, r40
-	  alloc loc0 = ar.pfs, 8, 6, 2, 0
-	  /* Use the 16 byte scratch area. r2 will start at f8 and
-	     r3 will start at f9.  */
-	  adds r2 = -(RESOLVE_FRAME_SIZE - 16), r12
-	  adds r3 = -(RESOLVE_FRAME_SIZE - 32), r12
-	}
-	{ .mii
-	  .fframe RESOLVE_FRAME_SIZE
-	  adds r12 = -RESOLVE_FRAME_SIZE, r12
-	  .save rp, loc1
-	  mov loc1 = b0
-	  .body
-	  mov loc2 = r8		/* preserve struct value register */
-	  ;;
-	}
-	{ .mii
-	  mov loc3 = r9		/* preserve language specific register */
-	  mov loc4 = r10	/* preserve language specific register */
-	  mov loc5 = r11	/* preserve language specific register */
-	}
-	{ .mmi
-	  stf.spill [r2] = f8, 32
-	  stf.spill [r3] = f9, 32
-	  mov out0 = r16
-	  ;;
-	}
-	{ .mmi
-	  stf.spill [r2] = f10, 32
-	  stf.spill [r3] = f11, 32
-	  shl out1 = r15, 4
-	  ;;
-	}
-	{ .mmi
-	  stf.spill [r2] = f12, 32
-	  stf.spill [r3] = f13, 32
-	  /* Relocation record is 24 byte. */
-	  shladd out1 = r15, 3, out1
-	  ;;
-	}
-	{ .mmb
-	  stf.spill [r2] = f14
-	  stf.spill [r3] = f15
-	  br.call.sptk.many b0 = _dl_fixup
-	}
-	{ .mii
-	  /* Skip the 16byte scratch area.  */
-	  adds r2 = 16, r12
-	  adds r3 = 32, r12
-	  mov b6 = ret0
-	  ;;
-	}
-	{ .mmi
-	  ldf.fill f8 = [r2], 32
-	  ldf.fill f9 = [r3], 32
-	  mov b0 = loc1
-	  ;;
-	}
-	{ .mmi
-	  ldf.fill f10 = [r2], 32
-	  ldf.fill f11 = [r3], 32
-	  mov gp = ret1
-	  ;;
-	}
-	{ .mmi
-	  ldf.fill f12 = [r2], 32
-	  ldf.fill f13 = [r3], 32
-	  mov ar.pfs = loc0
-	  ;;
-	}
-	{ .mmi
-	  ldf.fill f14 = [r2], 32
-	  ldf.fill f15 = [r3], 32
-	  .restore sp		/* pop the unwind frame state */
-	  adds r12 = RESOLVE_FRAME_SIZE, r12
-	  ;;
-	}
-	{ .mii
-	  mov r9 = loc3		/* restore language specific register */
-	  mov r10 = loc4	/* restore language specific register */
-	  mov r11 = loc5	/* restore language specific register */
-	}
-	{ .mii
-	  mov r8 = loc2		/* restore struct value register */
-	  ;;
-	}
-	/* An alloc is needed for the break system call to work.
-	   We don't care about the old value of the pfs register.  */
-	{ .mmb
-	  .prologue
-	  .body
-	  alloc r2 = ar.pfs, 0, 0, 8, 0
-	  br.sptk.many b6
-	  ;;
-	}
-END(_dl_runtime_resolve)
-
-
-/* The fourth argument to _dl_profile_fixup and the third one to
-   _dl_call_pltexit are a pointer to La_ia64_regs:
-
-   8byte r8
-   8byte r9
-   8byte r10
-   8byte r11
-   8byte in0
-   8byte in1
-   8byte in2
-   8byte in3
-   8byte in4
-   8byte in5
-   8byte in6
-   8byte in7
-   16byte f8
-   16byte f9
-   16byte f10
-   16byte f11
-   16byte f12
-   16byte f13
-   16byte f14
-   16byte f15
-   8byte ar.unat
-   8byte sp
-
-   The fifth argument to _dl_profile_fixup is a pointer to long int.
-   The fourth argument to _dl_call_pltexit is a pointer to
-   La_ia64_retval:
-
-   8byte r8
-   8byte r9
-   8byte r10
-   8byte r11
-   16byte f8
-   16byte f9
-   16byte f10
-   16byte f11
-   16byte f12
-   16byte f13
-   16byte f14
-   16byte f15
-  
-  Since stack has to be 16 byte aligned, the stack allocation is in
-  16byte increment. Before calling _dl_profile_fixup, the stack will
-  look like
-
-  psp	new frame_size
-  +16	La_ia64_regs
-  sp	scratch
-
- */
-
-#define PLTENTER_FRAME_SIZE (4*8 + 8*8 + 8*16 + 2*8 + 16)
-#define PLTEXIT_FRAME_SIZE (PLTENTER_FRAME_SIZE + 4*8 + 8*16)
-
-ENTRY(_dl_runtime_profile)
-	{ .mii
-	  .prologue
-	  .save ar.pfs, r40
-	  alloc loc0 = ar.pfs, 8, 12, 8, 0
-	  .vframe loc10
-	  mov loc10 = r12
-	  .save rp, loc1
-	  mov loc1 = b0
-	}
-	{ .mii
-	  .save ar.unat, r17
-	  mov r17 = ar.unat
-	  .save ar.lc, loc6
-	  mov loc6 = ar.lc
-	  mov loc11 = gp
-	}
-	{ .mii
-	  .body
-	  /* There is a 16 byte scratch area. r2 will start at r8 and
-	     r3 will start at r9 for La_ia64_regs.  */
-	  adds r2 = -(PLTENTER_FRAME_SIZE - 16), r12
-	  adds r3 = -(PLTENTER_FRAME_SIZE - 24), r12
-	  adds r12 = -PLTENTER_FRAME_SIZE, r12
-	  ;;
-	}
-	{ .mmi
-	  st8 [r2] = r8, 16;
-	  st8 [r3] = r9, 16;
-	  mov out2 = b0		/* needed by _dl_fixup_profile */
-	  ;;
-	}
-	{ .mmi
-	  st8 [r2] = r10, 16;
-	  st8 [r3] = r11, 16;
-	  adds out3 = 16, r12	/* pointer to La_ia64_regs */
-	  ;;
-	}
-	{ .mmi
-	  .mem.offset 0, 0
-	  st8.spill [r2] = in0, 16
-	  .mem.offset 8, 0
-	  st8.spill [r3] = in1, 16
-	  mov out4 = loc10	/* pointer to new frame size  */
-	  ;;
-	}
-	{ .mmi
-	  .mem.offset 0, 0
-	  st8.spill [r2] = in2, 16
-	  .mem.offset 8, 0
-	  st8.spill [r3] = in3, 16
-	  mov loc2 = r8		/* preserve struct value register */
-	  ;;
-	}
-	{ .mmi
-	  .mem.offset 0, 0
-	  st8.spill [r2] = in4, 16
-	  .mem.offset 8, 0
-	  st8.spill [r3] = in5, 16
-	  mov loc3 = r9		/* preserve language specific register */
-	  ;;
-	}
-	{ .mmi
-	  .mem.offset 0, 0
-	  st8 [r2] = in6, 16
-	  .mem.offset 8, 0
-	  st8 [r3] = in7, 24	/* adjust for f9 */
-	  mov loc4 = r10	/* preserve language specific register */
-	  ;;
-	}
-	{ .mii
-	  mov r18 = ar.unat	/* save it in La_ia64_regs */
-	  mov loc7 = out3	/* save it for _dl_call_pltexit */
-	  mov loc5 = r11	/* preserve language specific register */
-	}
-	{ .mmi
-	  stf.spill [r2] = f8, 32
-	  stf.spill [r3] = f9, 32
-	  mov out0 = r16	/* needed by _dl_fixup_profile */
-	  ;;
-	}
-	{ .mii
-	  mov ar.unat = r17	/* restore it for function call */
-	  mov loc8 = r16	/* save it for _dl_call_pltexit */
-	  nop.i 0x0
-	}
-	{ .mmi
-	  stf.spill [r2] = f10, 32
-	  stf.spill [r3] = f11, 32
-	  shl out1 = r15, 4
-	  ;;
-	}
-	{ .mmi
-	  stf.spill [r2] = f12, 32
-	  stf.spill [r3] = f13, 32
-	  /* Relocation record is 24 byte. */
-	  shladd out1 = r15, 3, out1
-	  ;;
-	}
-	{ .mmi
-	  stf.spill [r2] = f14, 32
-	  stf.spill [r3] = f15, 24
-	  mov loc9 = out1	/* save it for _dl_call_pltexit */
-	  ;;
-	}
-	{ .mmb
-	  st8 [r2] = r18	/* store ar.unat */
-	  st8 [r3] = loc10	/* store sp */
-	  br.call.sptk.many b0 = _dl_profile_fixup
-	}
-	{ .mii
-	  /* Skip the 16byte scratch area, 4 language specific GRs and
-	     8 incoming GRs to restore incoming fp registers.  */
-	  adds r2 = (4*8 + 8*8 + 16), r12
-	  adds r3 = (4*8 + 8*8 + 32), r12
-	  mov b6 = ret0
-	  ;;
-	}
-	{ .mmi
-	  ldf.fill f8 = [r2], 32
-	  ldf.fill f9 = [r3], 32
-	  mov gp = ret1
-	  ;;
-	}
-	{ .mmi
-	  ldf.fill f10 = [r2], 32
-	  ldf.fill f11 = [r3], 32
-	  mov r8 = loc2		/* restore struct value register */
-	  ;;
-	}
-	{ .mmi
-	  ldf.fill f12 = [r2], 32
-	  ldf.fill f13 = [r3], 32
-	  mov r9 = loc3		/* restore language specific register */
-	  ;;
-	}
-	{ .mmi
-	  ldf.fill f14 = [r2], 32
-	  ldf.fill f15 = [r3], 32
-	  mov r10 = loc4	/* restore language specific register */
-	  ;;
-	}
-	{ .mii
-	  ld8 r15 = [loc10]	/* load the new frame size */
-	  mov r11 = loc5	/* restore language specific register */
-	  ;;
-	  cmp.eq p6, p7 = -1, r15
-	  ;;
-	}
-	{ .mii
-(p7)	  cmp.eq p8, p9 = 0, r15
-(p6)	  mov b0 = loc1
-(p6)	  mov ar.lc = loc6
-	}
-	{ .mib
-	  nop.m 0x0
-(p6)	  mov ar.pfs = loc0
-(p6)	  br.cond.dptk.many .Lresolved
-	  ;;
-	}
-
-	/* At this point, the stack looks like
-
-	  +psp	free
-	  +16	La_ia64_regs
-	  sp	scratch
-
-	  We need to keep the current stack and call the resolved
-	  function by copying the r15 byte from sp + PLTENTER_FRAME_SIZE
-	  + 16 (scratch area) to sp + 16 (scratch area). Since stack
-	  has to be 16byte aligned, we around r15 up to 16byte.  */
-
-	{ .mbb
-(p9)	  adds r15 = 15, r15
-(p8)	  br.cond.dptk.many .Lno_new_frame
-	  nop.b 0x0
-	  ;;
-	}
-	{ .mmi
-	  and r15 = -16, r15
-	  ;;
-	  /* We don't copy the 16byte scatch area. Prepare r16/r17 as
-	     destination.  */
-	  sub r16 = r12, r15
-	  sub r17 = r12, r15
-	  ;;
-	}
-	{ .mii
-	  adds r16 = 16, r16
-	  adds r17 = 24, r17
-	  sub r12 = r12, r15		/* Adjust stack  */
-	  ;;
-	}
-	{ .mii
-	  nop.m 0x0
-	  shr r15 = r15, 4
-	  ;;
-	  adds r15 = -1, r15
-	  ;;
-	}
-	{ .mii
-	  /* Skip the 16byte scatch area. Prepare r2/r3 as source.  */
-	  adds r2 = 16, loc10
-	  adds r3 = 24, loc10
-	  mov ar.lc = r15
-	  ;;
-	}
-.Lcopy:
-	{ .mmi
-	  ld8 r18 = [r2], 16
-	  ld8 r19 = [r3], 16
-	  nop.i 0x0
-	  ;;
-	}
-	{ .mmb
-	  st8 [r16] = r18, 16
-	  st8 [r17] = r19, 16
-	  br.cloop.sptk.few .Lcopy
-	}
-.Lno_new_frame:
-	{ .mii
-	  mov out0 = in0
-	  mov out1 = in1
-	  mov out2 = in2
-	}
-	{ .mii
-	  mov out3 = in3
-	  mov out4 = in4
-	  mov out5 = in5
-	}
-	{ .mib
-	  mov out6 = in6
-	  mov out7 = in7
-	  /* Call the resolved function  */
-	  br.call.sptk.many b0 = b6
-	}
-	{ .mii
-	  /* Prepare stack for _dl_call_pltexit. Loc10 has the original
-	     stack pointer.  */
-	  adds r12 = -PLTEXIT_FRAME_SIZE, loc10
-	  adds r2 = -(PLTEXIT_FRAME_SIZE - 16), loc10
-	  adds r3 = -(PLTEXIT_FRAME_SIZE - 24), loc10
-	  ;;
-	}
-	{ .mmi
-	  /* Load all possible return values into buffer.  */
-	  st8 [r2] = r8, 16
-	  st8 [r3] = r9, 16
-	  mov out0 = loc8
-	  ;;
-	}
-	{ .mmi
-	  st8 [r2] = r10, 16
-	  st8 [r3] = r11, 24
-	  mov out1 = loc9
-	  ;;
-	}
-	{ .mmi
-	  stf.spill [r2] = f8, 32
-	  stf.spill [r3] = f9, 32
-	  mov out2 = loc7		/* Pointer to La_ia64_regs */
-	  ;;
-	}
-	{ .mmi
-	  stf.spill [r2] = f10, 32
-	  stf.spill [r3] = f11, 32
-	  adds out3 = 16, r12		/* Pointer to La_ia64_retval */
-	  ;;
-	}
-	{ .mmi
-	  stf.spill [r2] = f12, 32
-	  stf.spill [r3] = f13, 32
-	  /* We need to restore gp for _dl_call_pltexit. */
-	  mov gp = loc11
-	  ;;
-	}
-	{ .mmb
-	  stf.spill [r2] = f14
-	  stf.spill [r3] = f15
-	  br.call.sptk.many b0 = _dl_call_pltexit
-	}
-	{ .mmi
-	  /* Load all the non-floating and floating return values. Skip
-	     the 16byte scratch area.  */
-	  adds r2 = 16, r12
-	  adds r3 = 24, r12
-	  nop.i 0x0
-	  ;;
-	}
-	{ .mmi
-	  ld8 r8 = [r2], 16
-	  ld8 r9 = [r3], 16
-	  nop.i 0x0
-	  ;;
-	}
-	{ .mmi
-	  ld8 r10 = [r2], 16
-	  ld8 r11 = [r3], 24
-	  nop.i 0x0
-	  ;;
-	}
-	{ .mmi
-	  ldf.fill f8 = [r2], 32
-	  ldf.fill f9 = [r3], 32
-	  mov ar.lc = loc6
-	  ;;
-	}
-	{ .mmi
-	  ldf.fill f10 = [r2], 32
-	  ldf.fill f11 = [r3], 32
-	  mov ar.pfs = loc0
-	  ;;
-	}
-	{ .mmi
-	  ldf.fill f12 = [r2], 32
-	  ldf.fill f13 = [r3], 32
-	  mov b0 = loc1
-	  ;;
-	}
-	{ .mmi
-	  ldf.fill f14 = [r2]
-	  ldf.fill f15 = [r3]
-	  /* We know that the previous stack pointer, loc10, isn't 0.
-	     We use it to reload p7.  */
-	  cmp.ne p7, p0 = 0, loc10
-	  ;;
-	}
-.Lresolved:
-	{ .mmb
-	  .restore sp
-	  mov r12 = loc10
-(p7)	  br.ret.sptk.many b0
-	  ;;
-	}
-	/* An alloc is needed for the break system call to work. We
-	   don't care about the old value of the pfs register. After
-	   this alloc, we can't use any rotating registers. Otherwise
-	   assembler won't be happy. This has to be at the end.  */
-	{ .mmb
-	  .prologue
-	  .body
-	  alloc r2 = ar.pfs, 0, 0, 8, 0
-	  br.sptk.many b6
-	  ;;
-	}
-END(_dl_runtime_profile)
diff --git a/sysdeps/ia64/fpu/Makefile b/sysdeps/ia64/fpu/Makefile
index 7ec30c43d3..6d1b0c1717 100644
--- a/sysdeps/ia64/fpu/Makefile
+++ b/sysdeps/ia64/fpu/Makefile
@@ -1,33 +1,8 @@
 ifeq ($(subdir),math)
-#
-# Some files which need to go both into libc and libm have external
-# dependencies which need to be resolved differently for libc
-# vs. libm.  For example, inside libc, __libm_error_support needs to
-# resolve to HIDDEN_JUMPTARGET(__libm_error_support) whereas within
-# libm it always resolves to __libm_error_support.  Such files need to
-# be compiled twice.  Fortunately, math/Makefile already has logic to
-# support this: if a file starts with "s_", make will automatically
-# generate a matching file whose name starts with "m_" which simply
-# includes the corresponding "s_" file.
-#
-duplicated-routines = s_libm_ldexp s_libm_ldexpf s_libm_ldexpl \
-		      s_libm_scalbn s_libm_scalbnf s_libm_scalbnl
+libm-sysdep_routines += libm_atan2_reg s_matherrf s_matherrl libm_reduce \
+			libm_tan libm_error \
+			libm_frexp4 libm_frexp4f libm_frexp4l
 
-libm-sysdep_routines += s_erfc s_erfcf s_erfcl \
-			s_matherrf s_matherrl libm_reduce \
-			libm_error \
-			libm_frexp libm_frexpf libm_frexpl \
-			libm_sincos libm_sincosf libm_sincosl \
-			libm_sincos_large \
-			libm_lgamma libm_lgammaf libm_lgammal \
-			libm_scalblnf \
-			$(duplicated-routines:s_%=m_%)
-
-sysdep_routines += libc_libm_error libm_frexp libm_frexpf libm_frexpl \
-		   $(duplicated-routines)
-
-sysdep-CPPFLAGS += -include libm-symbols.h \
-	-D__POSIX__ \
-	-D_LIB_VERSIONIMF=_LIB_VERSION \
-	-DSIZE_INT_32 -DSIZE_LONG_INT_64 -DSIZE_LONG_LONG_INT_64
+sysdep_routines += libm_frexp4 libm_frexp4f libm_frexp4l libc_libm_error
+sysdep-CPPFLAGS += -DSIZE_INT_32
 endif
diff --git a/sysdeps/ia64/fpu/e_acos.S b/sysdeps/ia64/fpu/e_acos.S
index b515f01a1e..7e83811727 100644
--- a/sysdeps/ia64/fpu/e_acos.S
+++ b/sysdeps/ia64/fpu/e_acos.S
@@ -1,10 +1,10 @@
 .file "acos.s"
 
-
-// Copyright (c) 2000 - 2003 Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
 //
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,7 +20,9 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
+//
+// WARRANTY DISCLAIMER
+//
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,800 +37,838 @@
 //
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// http://developer.intel.com/opensource.
 
 // History
 //==============================================================
-// 02/02/00 Initial version
-// 08/17/00 New and much faster algorithm.
-// 08/30/00 Avoided bank conflicts on loads, shortened |x|=1 and x=0 paths,
+// 2/02/00  Initial version 
+// 8/17/00  New and much faster algorithm.
+// 8/30/00  Avoided bank conflicts on loads, shortened |x|=1 and x=0 paths,
 //          fixed mfb split issue stalls.
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 08/02/02 New and much faster algorithm II
-// 02/06/03 Reordered header: .section, .global, .proc, .align
 
 // Description
 //=========================================
-// The acos function computes the principal value of the arc cosine of x.
-// acos(0) returns Pi/2, acos(1) returns 0, acos(-1) returns Pi.
+// The acos function computes the principle value of the arc sine of x.
 // A doman error occurs for arguments not in the range [-1,+1].
-//
-// The acos function returns the arc cosine in the range [0, Pi] radians.
-//
-// There are 8 paths:
-// 1. x = +/-0.0
-//    Return acos(x) = Pi/2 + x
-//
-// 2. 0.0 < |x| < 0.625
-//    Return acos(x) = Pi/2 - x - x^3 *PolA(x^2)
-//    where PolA(x^2) = A3 + A5*x^2 + A7*x^4 +...+ A35*x^32
-//
-// 3. 0.625 <=|x| < 1.0
-//    Return acos(x) = Pi/2 - asin(x) =
-//                   = Pi/2 - sign(x) * ( Pi/2 - sqrt(R) * PolB(R))
-//    Where R = 1 - |x|,
-//          PolB(R) = B0 + B1*R + B2*R^2 +...+B12*R^12
-//
-//    sqrt(R) is approximated using the following sequence:
-//        y0 = (1 + eps)/sqrt(R) - initial approximation by frsqrta,
-//             |eps| < 2^(-8)
-//        Then 3 iterations are used to refine the result:
-//        H0 = 0.5*y0
-//        S0 = R*y0
-//
-//        d0 = 0.5 - H0*S0
-//        H1 = H0 + d0*H0
-//        S1 = S0 + d0*S0
-//
-//        d1 = 0.5 - H1*S1
-//        H2 = H1 + d0*H1
-//        S2 = S1 + d0*S1
-//
-//        d2 = 0.5 - H2*S2
-//        S3 = S3 + d2*S3
-//
-//        S3 approximates sqrt(R) with enough accuracy for this algorithm
-//
-//    So, the result should be reconstracted as follows:
-//    acos(x) = Pi/2 - sign(x) * (Pi/2 - S3*PolB(R))
-//
-//    But for optimization purposes the reconstruction step is slightly
-//    changed:
-//    acos(x) = Cpi + sign(x)*PolB(R)*S2 - sign(x)*d2*S2*PolB(R)
-//        where Cpi = 0 if x > 0 and Cpi = Pi if x < 0
-//
-// 4. |x| = 1.0
-//    Return acos(1.0) = 0.0, acos(-1.0) = Pi
-//
-// 5. 1.0 < |x| <= +INF
-//    A doman error occurs for arguments not in the range [-1,+1]
-//
-// 6. x = [S,Q]NaN
-//    Return acos(x) = QNaN
-//
-// 7. x is denormal
-//    Return acos(x) = Pi/2 - x,
-//
-// 8. x is unnormal
-//    Normalize input in f8 and return to the very beginning of the function
-//
-// Registers used
-//==============================================================
-// Floating Point registers used:
-// f8, input, output
-// f6, f7, f9 -> f15, f32 -> f64
 
-// General registers used:
-// r3, r21 -> r31, r32 -> r38
+// The acos function returns the arc cosine in the range [0, +pi] radians.
+// acos(1) returns +0, acos(-1) returns pi, acos(0) returns pi/2.
+// acos(x) returns a Nan and raises the invalid exception for |x| >1
 
-// Predicate registers used:
-// p0, p6 -> p14
+// The acos function is just like asin except that pi/2 is added at the end.
 
 //
 // Assembly macros
 //=========================================
-// integer registers used
-// scratch
-rTblAddr                      = r3
-
-rPiBy2Ptr                     = r21
-rTmpPtr3                      = r22
-rDenoBound                    = r23
-rOne                          = r24
-rAbsXBits                     = r25
-rHalf                         = r26
-r0625                         = r27
-rSign                         = r28
-rXBits                        = r29
-rTmpPtr2                      = r30
-rTmpPtr1                      = r31
-
-// stacked
-GR_SAVE_PFS                   = r32
-GR_SAVE_B0                    = r33
-GR_SAVE_GP                    = r34
-GR_Parameter_X                = r35
-GR_Parameter_Y                = r36
-GR_Parameter_RESULT           = r37
-GR_Parameter_TAG              = r38
-
-// floating point registers used
-FR_X                          = f10
-FR_Y                          = f1
-FR_RESULT                     = f8
-
-
-// scratch
-fXSqr                         = f6
-fXCube                        = f7
-fXQuadr                       = f9
-f1pX                          = f10
-f1mX                          = f11
-f1pXRcp                       = f12
-f1mXRcp                       = f13
-fH                            = f14
-fS                            = f15
-// stacked
-fA3                           = f32
-fB1                           = f32
-fA5                           = f33
-fB2                           = f33
-fA7                           = f34
-fPiBy2                        = f34
-fA9                           = f35
-fA11                          = f36
-fB10                          = f35
-fB11                          = f36
-fA13                          = f37
-fA15                          = f38
-fB4                           = f37
-fB5                           = f38
-fA17                          = f39
-fA19                          = f40
-fB6                           = f39
-fB7                           = f40
-fA21                          = f41
-fA23                          = f42
-fB3                           = f41
-fB8                           = f42
-fA25                          = f43
-fA27                          = f44
-fB9                           = f43
-fB12                          = f44
-fA29                          = f45
-fA31                          = f46
-fA33                          = f47
-fA35                          = f48
-fBaseP                        = f49
-fB0                           = f50
-fSignedS                      = f51
-fD                            = f52
-fHalf                         = f53
-fR                            = f54
-fCloseTo1Pol                  = f55
-fSignX                        = f56
-fDenoBound                    = f57
-fNormX                        = f58
-fX8                           = f59
-fRSqr                         = f60
-fRQuadr                       = f61
-fR8                           = f62
-fX16                          = f63
-fCpi                          = f64
+
+#include "libm_support.h"
+
+// predicate registers
+//acos_pred_LEsqrt2by2            = p7
+//acos_pred_GTsqrt2by2            = p8
+
+// integer registers
+ASIN_Addr1                      = r33
+ASIN_Addr2                      = r34
+ASIN_FFFE                       = r35
+
+GR_SAVE_B0                      = r36
+GR_SAVE_PFS                     = r37
+GR_SAVE_GP                      = r38
+
+GR_Parameter_X                  = r39
+GR_Parameter_Y                  = r40
+GR_Parameter_RESULT             = r41
+GR_Parameter_Tag                = r42
+
+// floating point registers
+acos_coeff_P1                   = f32
+acos_coeff_P2                   = f33
+acos_coeff_P3                   = f34
+acos_coeff_P4                   = f35
+
+acos_coeff_P5                   = f36
+acos_coeff_P6                   = f37
+acos_coeff_P7                   = f38
+acos_coeff_P8                   = f39
+acos_coeff_P9                   = f40
+
+acos_coeff_P10                  = f41
+acos_coeff_P11                  = f42
+acos_coeff_P12                  = f43
+acos_coeff_P13                  = f44
+acos_coeff_P14                  = f45
+
+acos_coeff_P15                  = f46
+acos_coeff_P16                  = f47
+acos_coeff_P17                  = f48
+acos_coeff_P18                  = f49
+acos_coeff_P19                  = f50
+
+acos_coeff_P20                  = f51
+acos_coeff_P21                  = f52
+acos_const_sqrt2by2             = f53
+acos_const_piby2                = f54
+acos_abs_x                      = f55
+
+acos_tx                         = f56
+acos_tx2                        = f57
+acos_tx3                        = f58
+acos_tx4                        = f59
+acos_tx8                        = f60
+
+acos_tx11                       = f61
+acos_1poly_p8                   = f62
+acos_1poly_p19                  = f63
+acos_1poly_p4                   = f64
+acos_1poly_p15                  = f65
+
+acos_1poly_p6                   = f66
+acos_1poly_p17                  = f67
+acos_1poly_p0                   = f68
+acos_1poly_p11                  = f69
+acos_1poly_p2                   = f70
+
+acos_1poly_p13                  = f71
+acos_series_tx                  = f72
+acos_t                          = f73
+acos_t2                         = f74
+acos_t3                         = f75
+
+acos_t4                         = f76
+acos_t8                         = f77
+acos_t11                        = f78
+acos_poly_p8                    = f79
+acos_poly_p19                   = f80
+
+acos_poly_p4                    = f81
+acos_poly_p15                   = f82
+acos_poly_p6                    = f83
+acos_poly_p17                   = f84
+acos_poly_p0                    = f85
+
+acos_poly_p11                   = f86
+acos_poly_p2                    = f87
+acos_poly_p13                   = f88
+acos_series_t                   = f89
+acos_1by2                       = f90
+
+acos_3by2                       = f91
+acos_5by2                       = f92
+acos_11by4                      = f93
+acos_35by8                      = f94
+acos_63by8                      = f95
+
+acos_231by16                    = f96 
+acos_y0                         = f97 
+acos_H0                         = f98 
+acos_S0                         = f99 
+acos_d                          = f100
+
+acos_l1                         = f101
+acos_d2                         = f102
+acos_T0                         = f103
+acos_d1                         = f104
+acos_e0                         = f105
+
+acos_l2                         = f106
+acos_d3                         = f107
+acos_T3                         = f108
+acos_S1                         = f109
+acos_e1                         = f110
+
+acos_z                          = f111
+answer2                         = f112
+acos_sgn_x                      = f113
+acos_429by16                    = f114
+acos_18by4                      = f115
+
+acos_3by4                       = f116
+acos_l3                         = f117
+acos_T6                         = f118
+acos_const_add                  = f119
 
 // Data tables
 //==============================================================
-RODATA
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
 .align 16
-LOCAL_OBJECT_START(acos_base_range_table)
-// Ai: Polynomial coefficients for the acos(x), |x| < .625000
-// Bi: Polynomial coefficients for the acos(x), |x| > .625000
-data8 0xBFDAAB56C01AE468 //A29
-data8 0x3FE1C470B76A5B2B //A31
-data8 0xBFDC5FF82A0C4205 //A33
-data8 0x3FC71FD88BFE93F0 //A35
-data8 0xB504F333F9DE6487, 0x00003FFF //B0
-data8 0xAAAAAAAAAAAAFC18, 0x00003FFC //A3
-data8 0x3F9F1C71BC4A7823 //A9
-data8 0x3F96E8BBAAB216B2 //A11
-data8 0x3F91C4CA1F9F8A98 //A13
-data8 0x3F8C9DDCEDEBE7A6 //A15
-data8 0x3F877784442B1516 //A17
-data8 0x3F859C0491802BA2 //A19
-data8 0x9999999998C88B8F, 0x00003FFB //A5
-data8 0x3F6BD7A9A660BF5E //A21
-data8 0x3F9FC1659340419D //A23
-data8 0xB6DB6DB798149BDF, 0x00003FFA //A7
-data8 0xBFB3EF18964D3ED3 //A25
-data8 0x3FCD285315542CF2 //A27
-data8 0xF15BEEEFF7D2966A, 0x00003FFB //B1
-data8 0x3EF0DDA376D10FB3 //B10
-data8 0xBEB83CAFE05EBAC9 //B11
-data8 0x3F65FFB67B513644 //B4
-data8 0x3F5032FBB86A4501 //B5
-data8 0x3F392162276C7CBA //B6
-data8 0x3F2435949FD98BDF //B7
-data8 0xD93923D7FA08341C, 0x00003FF9 //B2
-data8 0x3F802995B6D90BDB //B3
-data8 0x3F10DF86B341A63F //B8
-data8 0xC90FDAA22168C235, 0x00003FFF // Pi/2
-data8 0x3EFA3EBD6B0ECB9D //B9
-data8 0x3EDE18BA080E9098 //B12
-LOCAL_OBJECT_END(acos_base_range_table)
+
+acos_coeff_1_table:
+ASM_TYPE_DIRECTIVE(acos_coeff_1_table,@object)
+data8 0xE4E7E0A423A21249  , 0x00003FF8 //P7
+data8 0xC2F7EE0200FCE2A5  , 0x0000C003 //P18
+data8 0xB745D7F6C65C20E0  , 0x00003FF9 //P5
+data8 0xF75E381A323D4D94  , 0x0000C002 //P16
+data8 0x8959C2629C1024C0  , 0x0000C002 //P20
+data8 0xAFF68E7D241292C5  , 0x00003FF8 //P9
+data8 0xB6DB6DB7260AC30D  , 0x00003FFA //P3
+data8 0xD0417CE2B41CB7BF  , 0x0000C000 //P14
+data8 0x81D570FEA724E3E4  , 0x0000BFFD //P12
+data8 0xAAAAAAAAAAAAC277  , 0x00003FFC //P1
+data8 0xF534912FF3E7B76F  , 0x00003FFF //P21
+data8 0xc90fdaa22168c235  , 0x00003fff // pi/2
+data8 0x0000000000000000  , 0x00000000 // pad to avoid bank conflicts
+ASM_SIZE_DIRECTIVE(acos_coeff_1_table)
+
+
+acos_coeff_2_table:
+ASM_TYPE_DIRECTIVE(acos_coeff_2_table,@object)
+data8 0x8E26AF5F29B39A2A  , 0x00003FF9 //P6
+data8 0xB4F118A4B1015470  , 0x00004003 //P17
+data8 0xF8E38E10C25990E0  , 0x00003FF9 //P4
+data8 0x80F50489AEF1CAC6  , 0x00004002 //P15
+data8 0x92728015172CFE1C  , 0x00004003 //P19
+data8 0xBBC3D831D4595971  , 0x00003FF8 //P8
+data8 0x999999999952A5C3  , 0x00003FFB //P2
+data8 0x855576BE6F0975EC  , 0x00003FFF //P13
+data8 0xF12420E778077D89  , 0x00003FFA //P11
+data8 0xB6590FF4D23DE003  , 0x00003FF3 //P10
+data8 0xb504f333f9de6484  , 0x00003ffe // sqrt(2)/2
+ASM_SIZE_DIRECTIVE(acos_coeff_2_table)
+
+
+.align 32
+.global acos
+ASM_TYPE_DIRECTIVE(acos,@function)
 
 .section .text
-GLOBAL_LIBM_ENTRY(acos)
-acos_unnormal_back:
-{ .mfi
-      getf.d             rXBits = f8 // grab bits of input value
-      // set p12 = 1 if x is a NaN, denormal, or zero
-      fclass.m           p12, p0 = f8, 0xcf
-      adds               rSign = 1, r0
-}
-{ .mfi
-      addl               rTblAddr = @ltoff(acos_base_range_table),gp
-      // 1 - x = 1 - |x| for positive x
-      fms.s1             f1mX = f1, f1, f8
-      addl               rHalf = 0xFFFE, r0 // exponent of 1/2
-}
-;;
-{ .mfi
-      addl               r0625 = 0x3FE4, r0 // high 16 bits of 0.625
-      // set p8 = 1 if x < 0
-      fcmp.lt.s1         p8, p9 = f8, f0
-      shl                rSign = rSign, 63 // sign bit
-}
-{ .mfi
-      // point to the beginning of the table
-      ld8                rTblAddr = [rTblAddr]
-      // 1 + x = 1 - |x| for negative x
-      fma.s1             f1pX = f1, f1, f8
-      adds               rOne = 0x3FF, r0
-}
-;;
-{ .mfi
-      andcm              rAbsXBits = rXBits, rSign // bits of |x|
-      fmerge.s           fSignX = f8, f1 // signum(x)
-      shl                r0625 = r0625, 48 // bits of DP representation of 0.625
-}
-{ .mfb
-      setf.exp           fHalf = rHalf // load A2 to FP reg
-      fma.s1             fXSqr = f8, f8, f0 // x^2
-      // branch on special path if x is a NaN, denormal, or zero
-(p12) br.cond.spnt       acos_special
-}
-;;
-{ .mfi
-      adds               rPiBy2Ptr = 272, rTblAddr
-      nop.f              0
-      shl                rOne = rOne, 52 // bits of 1.0
-}
-{ .mfi
-      adds               rTmpPtr1 = 16, rTblAddr
-      nop.f              0
-      // set p6 = 1 if |x| < 0.625
-      cmp.lt             p6, p7 = rAbsXBits, r0625
-}
-;;
-{ .mfi
-      ldfpd              fA29, fA31 = [rTblAddr] // A29, fA31
-      // 1 - x = 1 - |x| for positive x
-(p9)  fms.s1             fR = f1, f1, f8
-      // point to coefficient of "near 1" polynomial
-(p7)  adds               rTmpPtr2 = 176, rTblAddr
-}
-{ .mfi
-      ldfpd              fA33, fA35 = [rTmpPtr1], 16 // A33, fA35
-      // 1 + x = 1 - |x| for negative x
-(p8)  fma.s1             fR = f1, f1, f8
-(p6)  adds               rTmpPtr2 = 48, rTblAddr
-}
-;;
-{ .mfi
-      ldfe               fB0 = [rTmpPtr1], 16 // B0
-      nop.f              0
-      nop.i              0
-}
-{ .mib
-      adds               rTmpPtr3 = 16, rTmpPtr2
-      // set p10 = 1 if |x| = 1.0
-      cmp.eq             p10, p0 = rAbsXBits, rOne
-      // branch on special path for |x| = 1.0
-(p10) br.cond.spnt       acos_abs_1
-}
-;;
-{ .mfi
-      ldfe               fA3 = [rTmpPtr2], 48 // A3 or B1
-      nop.f              0
-      adds               rTmpPtr1 = 64, rTmpPtr3
-}
-{ .mib
-      ldfpd              fA9, fA11 = [rTmpPtr3], 16 // A9, A11 or B10, B11
-      // set p11 = 1 if |x| > 1.0
-      cmp.gt             p11, p0 = rAbsXBits, rOne
-      // branch on special path for |x| > 1.0
-(p11) br.cond.spnt       acos_abs_gt_1
-}
-;;
-{ .mfi
-      ldfpd              fA17, fA19 = [rTmpPtr2], 16 // A17, A19 or B6, B7
-      // initial approximation of 1 / sqrt(1 - x)
-      frsqrta.s1         f1mXRcp, p0 = f1mX
-      nop.i              0
-}
-{ .mfi
-      ldfpd              fA13, fA15 = [rTmpPtr3] // A13, A15 or B4, B5
-      fma.s1             fXCube = fXSqr, f8, f0 // x^3
-      nop.i              0
-}
-;;
-{ .mfi
-      ldfe               fA5 = [rTmpPtr2], 48 // A5 or B2
-      // initial approximation of 1 / sqrt(1 + x)
-      frsqrta.s1         f1pXRcp, p0 = f1pX
-      nop.i              0
-}
-{ .mfi
-      ldfpd              fA21, fA23 = [rTmpPtr1], 16 // A21, A23 or B3, B8
-      fma.s1             fXQuadr = fXSqr, fXSqr, f0 // x^4
-      nop.i              0
-}
-;;
-{ .mfi
-      ldfe               fA7 = [rTmpPtr1] // A7 or Pi/2
-      fma.s1             fRSqr = fR, fR, f0 // R^2
-      nop.i              0
-}
-{ .mfb
-      ldfpd              fA25, fA27 = [rTmpPtr2] // A25, A27 or B9, B12
-      nop.f              0
-(p6)  br.cond.spnt       acos_base_range;
-}
-;;
+.proc  acos
+.align 32
 
-{ .mfi
-      nop.m              0
-(p9)  fma.s1             fH = fHalf, f1mXRcp, f0 // H0 for x > 0
-      nop.i              0
-}
-{ .mfi
-      nop.m              0
-(p9)  fma.s1             fS = f1mX, f1mXRcp, f0  // S0 for x > 0
-      nop.i              0
-}
-;;
-{ .mfi
-      nop.m              0
-(p8)  fma.s1             fH = fHalf, f1pXRcp, f0 // H0 for x < 0
-      nop.i              0
-}
-{ .mfi
-      nop.m              0
-(p8)  fma.s1             fS = f1pX, f1pXRcp, f0  // S0 for x > 0
-      nop.i              0
-}
-;;
-{ .mfi
-      nop.m              0
-      fma.s1             fRQuadr = fRSqr, fRSqr, f0 // R^4
-      nop.i              0
-}
-;;
-{ .mfi
-      nop.m              0
-      fma.s1             fB11 = fB11, fR, fB10
-      nop.i              0
-}
-{ .mfi
-      nop.m              0
-      fma.s1             fB1 = fB1, fR, fB0
-      nop.i              0
-}
-;;
-{ .mfi
-      nop.m              0
-      fma.s1             fB5 = fB5, fR, fB4
-      nop.i              0
-}
-{ .mfi
-      nop.m              0
-      fma.s1             fB7 = fB7, fR, fB6
-      nop.i              0
-}
-;;
-{ .mfi
-      nop.m              0
-      fma.s1             fB3 = fB3, fR, fB2
-      nop.i              0
-}
-;;
-{ .mfi
-      nop.m              0
-      fnma.s1            fD = fH, fS, fHalf // d0 = 1/2 - H0*S0
-      nop.i              0
-}
-;;
-{ .mfi
-      nop.m              0
-      fma.s1             fR8 = fRQuadr, fRQuadr, f0 // R^4
-      nop.i              0
-}
-{ .mfi
-      nop.m              0
-      fma.s1             fB9 = fB9, fR, fB8
-      nop.i              0
+
+acos:
+ 
+{     .mfi 
+     alloc      r32               = ar.pfs,1,6,4,0
+     fma.s1    acos_tx        =    f8,f8,f0
+     addl      ASIN_Addr2     =    @ltoff(acos_coeff_2_table),gp
+} 
+{     .mfi 
+     mov       ASIN_FFFE      =    0xFFFE
+     fnma.s1   acos_t         =    f8,f8,f1
+     addl      ASIN_Addr1     =    @ltoff(acos_coeff_1_table),gp
 }
 ;;
-{.mfi
-      nop.m              0
-      fma.s1             fB12 = fB12, fRSqr, fB11
-      nop.i              0
-}
-{.mfi
-      nop.m              0
-      fma.s1             fB7 = fB7, fRSqr, fB5
-      nop.i              0
-}
+
+ 
+{     .mfi 
+     setf.exp       acos_1by2      =    ASIN_FFFE
+     fmerge.s       acos_abs_x     =    f1,f8
+     nop.i          999              ;;
+} 
+ 
+
+{     .mmf 
+     ld8       ASIN_Addr1     =    [ASIN_Addr1]
+     ld8       ASIN_Addr2     =    [ASIN_Addr2]
+     fmerge.s  acos_sgn_x     =    f8,f1
+} 
 ;;
-{.mfi
-      nop.m              0
-      fma.s1             fB3 = fB3, fRSqr, fB1
-      nop.i              0
+
+
+{     .mfi 
+     nop.m                      999
+     fcmp.lt.s1  p11,p12  = f8, f0
+     nop.i          999              ;;
+} 
+ 
+ 
+{     .mfi 
+     ldfe      acos_coeff_P7  =    [ASIN_Addr1],16
+     fma.s1    acos_tx2       =    acos_tx,acos_tx,f0
+     nop.i                      999
+} 
+{     .mfi 
+     ldfe      acos_coeff_P6  =    [ASIN_Addr2],16
+     fma.s1    acos_t2        =    acos_t,acos_t,f0
+     nop.i                      999;;
 }
+
+ 
+{     .mmf 
+     ldfe      acos_coeff_P18 =    [ASIN_Addr1],16
+     ldfe      acos_coeff_P17 =    [ASIN_Addr2],16
+     fclass.m.unc p8,p0  = f8, 0xc3	//@qnan |@snan
+} 
 ;;
-{ .mfi
-      nop.m              0
-      fma.s1             fH = fH, fD, fH // H1 = H0 + H0*d0
-      nop.i              0
-}
-{ .mfi
-      nop.m              0
-      fma.s1             fS = fS, fD, fS // S1 = S0 + S0*d0
-      nop.i              0
-}
+
+ 
+{     .mmf 
+     ldfe      acos_coeff_P5  =    [ASIN_Addr1],16
+     ldfe      acos_coeff_P4  =    [ASIN_Addr2],16
+     frsqrta.s1     acos_y0,p0     =    acos_t
+} 
 ;;
-{.mfi
-      nop.m              0
-(p9)  fma.s1             fCpi = f1, f0, f0 // Cpi = 0 if x > 0
-      nop.i              0
-}
-{ .mfi
-      nop.m              0
-(p8)  fma.s1             fCpi = fPiBy2, f1, fPiBy2 // Cpi = Pi if x < 0
-      nop.i              0
+
+ 
+{     .mfi 
+     ldfe      acos_coeff_P16 =    [ASIN_Addr1],16
+     fcmp.gt.s1 p9,p0 = acos_abs_x,f1
+     nop.i                      999
+} 
+{     .mfb 
+     ldfe      acos_coeff_P15 =    [ASIN_Addr2],16
+(p8) fma.d     f8 = f8,f1,f0
+(p8) br.ret.spnt b0
 }
 ;;
-{ .mfi
-      nop.m              0
-      fma.s1             fB12 = fB12, fRSqr, fB9
-      nop.i              0
-}
-{ .mfi
-      nop.m              0
-      fma.s1             fB7 = fB7, fRQuadr, fB3
-      nop.i              0
-}
+
+ 
+{     .mmf 
+     ldfe      acos_coeff_P20 =    [ASIN_Addr1],16
+     ldfe      acos_coeff_P19 =    [ASIN_Addr2],16
+     fclass.m.unc p10,p0 = f8, 0x07	//@zero
+} 
 ;;
-{.mfi
-      nop.m              0
-      fnma.s1            fD = fH, fS, fHalf // d1 = 1/2 - H1*S1
-      nop.i              0
-}
-{ .mfi
-      nop.m              0
-      fnma.s1            fSignedS = fSignX, fS, f0 // -signum(x)*S1
-      nop.i              0
+
+ 
+{     .mfi 
+     ldfe      acos_coeff_P9  =    [ASIN_Addr1],16
+     fma.s1    acos_t4        =    acos_t2,acos_t2,f0
+(p9) mov GR_Parameter_Tag = 58 
+} 
+{     .mfi 
+     ldfe      acos_coeff_P8  =    [ASIN_Addr2],16
+     fma.s1    acos_3by2      =    acos_1by2,f1,f1
+     nop.i                      999;;
 }
-;;
-{ .mfi
-      nop.m              0
-      fma.s1             fCloseTo1Pol = fB12, fR8, fB7
-      nop.i              0
+
+ 
+{     .mfi 
+     ldfe      acos_coeff_P2  =    [ASIN_Addr2],16
+     fma.s1    acos_tx4       =    acos_tx2,acos_tx2,f0
+     nop.i 999
+} 
+{     .mfb 
+     ldfe      acos_coeff_P3  =    [ASIN_Addr1],16
+     fma.s1    acos_t3        =    acos_t,acos_t2,f0
+(p9) br.cond.spnt  __libm_error_region
 }
 ;;
-{ .mfi
-      nop.m              0
-      fma.s1             fH = fH, fD, fH // H2 = H1 + H1*d1
-      nop.i              0
-}
-{ .mfi
-      nop.m              0
-      fma.s1             fS = fS, fD, fS // S2 = S1 + S1*d1
-      nop.i              0
+
+ 
+{     .mfi 
+     ldfe      acos_coeff_P13 =    [ASIN_Addr2],16
+     fma.s1    acos_H0        =    acos_y0,acos_1by2,f0
+     nop.i                      999
+} 
+{     .mfi 
+     ldfe      acos_coeff_P14 =    [ASIN_Addr1],16
+     fma.s1    acos_S0        =    acos_y0,acos_t,f0
+     nop.i                      999;;
 }
-;;
-{ .mfi
-      nop.m              0
-      // -signum(x)* S2 = -signum(x)*(S1 + S1*d1)
-      fma.s1             fSignedS = fSignedS, fD, fSignedS
-      nop.i              0
+
+ 
+{     .mfi 
+     ldfe      acos_coeff_P11 =    [ASIN_Addr2],16
+     fcmp.eq.s1  p6,p0  = acos_abs_x, f1
+     nop.i                      999
+} 
+{     .mfi 
+     ldfe      acos_coeff_P12 =    [ASIN_Addr1],16
+     fma.s1    acos_tx3       =    acos_tx,acos_tx2,f0
+     nop.i 999
 }
 ;;
-{.mfi
-      nop.m              0
-      fnma.s1            fD = fH, fS, fHalf // d2 = 1/2 - H2*S2
-      nop.i              0
+
+ 
+{     .mfi 
+     ldfe      acos_coeff_P10 =    [ASIN_Addr2],16
+     fma.s1    acos_1poly_p6  =    acos_tx,acos_coeff_P7,acos_coeff_P6
+     nop.i                      999
+} 
+{     .mfi 
+     ldfe      acos_coeff_P1  =    [ASIN_Addr1],16
+     fma.s1    acos_poly_p6   =    acos_t,acos_coeff_P7,acos_coeff_P6
+     nop.i                      999;;
 }
-;;
-{ .mfi
-      nop.m              0
-      // Cpi + signum(x)*PolB*S2
-      fnma.s1            fCpi = fSignedS, fCloseTo1Pol, fCpi
-      nop.i              0
+
+ 
+{     .mfi 
+     ldfe      acos_const_sqrt2by2 =    [ASIN_Addr2],16
+     fma.s1    acos_5by2           =    acos_3by2,f1,f1
+     nop.i                           999
+} 
+{     .mfi 
+     ldfe      acos_coeff_P21 =    [ASIN_Addr1],16
+     fma.s1    acos_11by4     =    acos_3by2,acos_3by2,acos_1by2
+     nop.i                      999;;
 }
-{ .mfi
-      nop.m              0
-      // signum(x)*PolB * S2
-      fnma.s1            fCloseTo1Pol = fSignedS, fCloseTo1Pol, f0
-      nop.i              0
+
+ 
+{     .mfi 
+     ldfe      acos_const_piby2    =    [ASIN_Addr1],16
+     fma.s1    acos_poly_p17       =    acos_t,acos_coeff_P18,acos_coeff_P17
+     nop.i                      999
+} 
+{     .mfb 
+     nop.m                 999
+     fma.s1    acos_3by4 =    acos_3by2,acos_1by2,f0
+(p10) br.cond.spnt  L(ACOS_ZERO)    // Branch to short path if x=0
 }
 ;;
-{ .mfb
-      nop.m              0
-      // final result for 0.625 <= |x| < 1
-      fma.d.s0           f8 = fCloseTo1Pol, fD, fCpi
-      // exit here for  0.625 <= |x| < 1
-      br.ret.sptk        b0
+
+ 
+{     .mfi 
+     nop.m                      999
+     fma.s1    acos_poly_p15  =    acos_t,acos_coeff_P16,acos_coeff_P15
+     nop.i                      999
+} 
+{     .mfb 
+     nop.m                 999
+     fnma.s1   acos_d    =    acos_S0,acos_H0,acos_1by2
+(p6) br.cond.spnt  L(ACOS_ABS_ONE)    // Branch to short path if |x|=1
 }
 ;;
 
-
-// here if |x| < 0.625
-.align 32
-acos_base_range:
-{ .mfi
-      ldfe               fCpi = [rPiBy2Ptr] // Pi/2
-      fma.s1             fA33 = fA33, fXSqr, fA31
-      nop.i              0
-}
-{ .mfi
-      nop.m              0
-      fma.s1             fA15 = fA15, fXSqr, fA13
-      nop.i              0
+ 
+{     .mfi 
+     nop.m                      999
+     fma.s1    acos_poly_p19  =    acos_t,acos_coeff_P20,acos_coeff_P19
+     nop.i                      999
+} 
+{     .mfi 
+     nop.m                      999
+     fma.s1    acos_poly_p4   =    acos_t,acos_coeff_P5,acos_coeff_P4
+     nop.i                      999;;
 }
-;;
-{ .mfi
-      nop.m              0
-      fma.s1             fA29 = fA29, fXSqr, fA27
-      nop.i              0
+
+ 
+{     .mfi 
+     nop.m                      999
+     fma.s1    acos_1poly_p17 =    acos_tx,acos_coeff_P18,acos_coeff_P17
+     nop.i                      999
+} 
+{     .mfi 
+     nop.m                      999
+     fma.s1    acos_poly_p8   =    acos_t,acos_coeff_P9,acos_coeff_P8
+     nop.i                      999;;
 }
-{ .mfi
-      nop.m              0
-      fma.s1             fA25 = fA25, fXSqr, fA23
-      nop.i              0
+
+ 
+{     .mfi 
+     nop.m                      999
+     fms.s1    acos_35by8     =    acos_5by2,acos_11by4,acos_5by2
+     nop.i                      999
+} 
+{     .mfi 
+     nop.m                      999
+     fma.s1    acos_63by8     =    acos_5by2,acos_11by4,f1
+     nop.i                      999;;
 }
-;;
-{ .mfi
-      nop.m              0
-      fma.s1             fA21 = fA21, fXSqr, fA19
-      nop.i              0
+
+ 
+{     .mfi 
+     nop.m                      999
+     fma.s1    acos_poly_p13  =    acos_t,acos_coeff_P14,acos_coeff_P13
+     nop.i                      999
+} 
+{     .mfi 
+     nop.m                      999
+     fma.s1    acos_18by4     =    acos_3by2,acos_5by2,acos_3by4
+     nop.i                      999;;
 }
-{ .mfi
-      nop.m              0
-      fma.s1             fA9 = fA9, fXSqr, fA7
-      nop.i              0
+
+ 
+{     .mfi 
+     nop.m                 999
+     fma.s1    acos_l1   =    acos_5by2,acos_d,acos_3by2
+     nop.i                 999
+} 
+{     .mfi 
+     nop.m                 999
+     fma.s1    acos_d2   =    acos_d,acos_d,f0
+     nop.i                 999;;
 }
-;;
-{ .mfi
-      nop.m              0
-      fma.s1             fA5 = fA5, fXSqr, fA3
-      nop.i              0
+
+ 
+{     .mfi 
+     nop.m                      999
+     fma.s1    acos_poly_p15  =    acos_t2,acos_poly_p17,acos_poly_p15
+     nop.i                      999
+} 
+{     .mfi 
+     nop.m                 999
+     fma.s1    acos_T0   =    acos_d,acos_S0,f0
+     nop.i                 999;;
 }
-;;
-{ .mfi
-      nop.m              0
-      fma.s1             fA35 = fA35, fXQuadr, fA33
-      nop.i              0
+
+ 
+{     .mfi 
+     nop.m                      999
+     fma.s1    acos_poly_p19  =    acos_t2,acos_coeff_P21,acos_poly_p19
+     nop.i                      999
+} 
+{     .mfi 
+     nop.m                      999
+     fma.s1    acos_poly_p4   =    acos_t2,acos_poly_p6,acos_poly_p4
+     nop.i                      999;;
 }
-{ .mfi
-      nop.m              0
-      fma.s1             fA17 = fA17, fXQuadr, fA15
-      nop.i              0
+
+ 
+{     .mfi 
+     nop.m                 999
+     fma.s1    acos_d1   =    acos_35by8,acos_d,f0
+     nop.i                 999
+} 
+{     .mfi 
+     nop.m                      999
+     fma.s1    acos_231by16   =    acos_3by2,acos_35by8,acos_63by8
+     nop.i                      999;;
 }
-;;
-{ .mfi
-      nop.m              0
-      fma.s1             fX8 = fXQuadr, fXQuadr, f0 // x^8
-      nop.i              0
+
+ 
+{     .mfi 
+     nop.m                      999
+     fma.s1    acos_poly_p2   =    acos_t,acos_coeff_P3,acos_coeff_P2
+     nop.i                      999
+} 
+{     .mfi 
+     nop.m                      999
+     fma.s1    acos_poly_p8   =    acos_t2,acos_coeff_P10,acos_poly_p8
+     nop.i                      999;;
 }
-{ .mfi
-      nop.m              0
-      fma.s1             fA25 = fA25, fXQuadr, fA21
-      nop.i              0
+
+ 
+{     .mfi 
+     nop.m                      999
+     fma.s1    acos_poly_p11  =    acos_t,acos_coeff_P12,acos_coeff_P11
+     nop.i                      999
+} 
+{     .mfi 
+     nop.m                 999
+     fma.s1    acos_e0   =    acos_d2,acos_l1,acos_d
+     nop.i                 999;;
 }
-;;
-{ .mfi
-      nop.m              0
-      fma.s1             fA9 = fA9, fXQuadr, fA5
-      nop.i              0
+
+ 
+{     .mfi 
+     nop.m                      999
+     fma.s1    acos_1poly_p15 =    acos_tx,acos_coeff_P16,acos_coeff_P15
+     nop.i                      999
+} 
+{     .mfi 
+     nop.m                      999
+     fma.s1    acos_poly_p0   =    acos_t,acos_coeff_P1,f1
+     nop.i                      999;;
 }
-;;
-{ .mfi
-      nop.m              0
-      fms.s1             fCpi = fCpi, f1, f8 // Pi/2 - x
-      nop.i              0
+
+ 
+{     .mfi 
+     nop.m                      999
+     fma.s1    acos_1poly_p19 =    acos_tx,acos_coeff_P20,acos_coeff_P19
+     nop.i                      999
+} 
+{     .mfi 
+     nop.m                      999
+     fma.s1    acos_1poly_p4  =    acos_tx,acos_coeff_P5,acos_coeff_P4
+     nop.i                      999;;
 }
-;;
-{ .mfi
-      nop.m              0
-      fma.s1             fA35 = fA35, fXQuadr, fA29
-      nop.i              0
+
+ 
+{     .mfi 
+     nop.m                      999
+     fma.s1    acos_1poly_p8  =    acos_tx,acos_coeff_P9,acos_coeff_P8
+     nop.i                      999
+} 
+{     .mfi 
+     nop.m                 999
+     fma.s1    acos_l2   =    acos_231by16,acos_d,acos_63by8
+     nop.i                 999;;
 }
-{ .mfi
-      nop.m              0
-      fma.s1             fA17 = fA17, fXSqr, fA11
-      nop.i              0
+
+ 
+{     .mfi 
+     nop.m                 999
+     fma.s1    acos_d3   =    acos_d2,acos_d,f0
+     nop.i                 999
+} 
+{     .mfi 
+     nop.m                 999
+     fma.s1    acos_T3   =    acos_d2,acos_T0,f0
+     nop.i                 999;;
 }
-;;
-{ .mfi
-      nop.m              0
-      fma.s1             fX16 = fX8, fX8, f0 // x^16
-      nop.i              0
+
+ 
+{     .mfi 
+     nop.m                      999
+     fma.s1    acos_429by16   =    acos_18by4,acos_11by4,acos_231by16
+     nop.i                      999
+} 
+{     .mfi 
+     nop.m                 999
+     fma.s1    acos_S1   =    acos_e0,acos_S0,acos_S0
+     nop.i                 999;;
 }
-;;
-{ .mfi
-      nop.m              0
-      fma.s1             fA35 = fA35, fX8, fA25
-      nop.i              0
+
+ 
+{     .mfi 
+     nop.m                      999
+     fma.s1    acos_poly_p4   =    acos_t4,acos_poly_p8,acos_poly_p4
+     nop.i                      999
+} 
+{     .mfi 
+     nop.m                      999
+     fma.s1    acos_poly_p15  =    acos_t4,acos_poly_p19,acos_poly_p15
+     nop.i                      999;;
 }
-{ .mfi
-      nop.m              0
-      fma.s1             fA17 = fA17, fX8, fA9
-      nop.i              0
+
+ 
+{     .mfi 
+     nop.m                      999
+     fma.s1    acos_poly_p0   =    acos_t2,acos_poly_p2,acos_poly_p0
+     nop.i                      999
+} 
+{     .mfi 
+     nop.m                      999
+     fma.s1    acos_poly_p11  =    acos_t2,acos_poly_p13,acos_poly_p11
+     nop.i                      999;;
 }
-;;
-{ .mfi
-      nop.m              0
-      fma.s1             fBaseP = fA35, fX16, fA17
-      nop.i              0
+
+ 
+{     .mfi 
+     nop.m                 999
+     fma.s1    acos_t8   =    acos_t4,acos_t4,f0
+     nop.i                 999
+} 
+{     .mfi 
+     nop.m                 999
+     fma.s1    acos_e1   =    acos_d2,acos_l2,acos_d1
+     nop.i                 999;;
 }
-;;
-{ .mfb
-      nop.m              0
-      // final result for |x| < 0.625
-      fnma.d.s0           f8 = fBaseP, fXCube, fCpi
-      // exit here for |x| < 0.625 path
-      br.ret.sptk        b0
+
+ 
+{     .mfi 
+     nop.m                      999
+     fma.s1    acos_1poly_p4  =    acos_tx2,acos_1poly_p6,acos_1poly_p4
+     nop.i                      999
+} 
+{     .mfi 
+     nop.m                      999
+     fma.s1    acos_1poly_p15 =    acos_tx2,acos_1poly_p17,acos_1poly_p15
+     nop.i                      999;;
 }
-;;
 
-// here if |x| = 1
-// acos(1) = 0
-// acos(-1) = Pi
-.align 32
-acos_abs_1:
-{ .mfi
-      ldfe               fPiBy2 = [rPiBy2Ptr] // Pi/2
-      nop.f              0
-      nop.i              0
+ 
+{     .mfi 
+     nop.m                      999
+     fma.s1    acos_1poly_p8  =    acos_tx2,acos_coeff_P10,acos_1poly_p8
+     nop.i                      999
+} 
+{     .mfi 
+     nop.m                      999
+     fma.s1    acos_1poly_p19 =    acos_tx2,acos_coeff_P21,acos_1poly_p19
+     nop.i                      999;;
 }
-;;
-.pred.rel "mutex", p8, p9
-{ .mfi
-      nop.m              0
-      // result for x = 1.0
-(p9)  fma.d.s0           f8 = f1, f0, f0 // 0.0
-      nop.i              0
-}
-{.mfb
-      nop.m              0
-      // result for x = -1.0
-(p8)  fma.d.s0           f8 = fPiBy2, f1, fPiBy2 // Pi
-      // exit here for |x| = 1.0
-      br.ret.sptk        b0
+
+ 
+{     .mfi 
+     nop.m                      999
+     fma.s1    acos_1poly_p2  =    acos_tx,acos_coeff_P3,acos_coeff_P2
+     nop.i                      999
+} 
+{     .mfi 
+     nop.m                      999
+     fma.s1    acos_1poly_p13 =    acos_tx,acos_coeff_P14,acos_coeff_P13
+     nop.i                      999;;
 }
-;;
 
-// here if x is a NaN, denormal, or zero
-.align 32
-acos_special:
-{ .mfi
-      // point to Pi/2
-      adds               rPiBy2Ptr = 272, rTblAddr
-      // set p12 = 1 if x is a NaN
-      fclass.m           p12, p0 = f8, 0xc3
-      nop.i              0
+ 
+{     .mfi 
+     nop.m                      999
+     fma.s1    acos_1poly_p0  =    acos_tx,acos_coeff_P1,f1
+     nop.i                      999
+} 
+{     .mfi 
+     nop.m                      999
+     fma.s1    acos_1poly_p11 =    acos_tx,acos_coeff_P12,acos_coeff_P11
+     nop.i                      999;;
 }
-{ .mlx
-      nop.m              0
-      // smallest positive DP normalized number
-      movl               rDenoBound = 0x0010000000000000
+
+ 
+{     .mfi 
+     nop.m                 999
+     fma.s1    acos_l3   =    acos_429by16,acos_d,f0
+     nop.i                 999
+} 
+{     .mfi 
+     nop.m                 999
+     fma.s1    acos_z    =    acos_e1,acos_T3,acos_S1
+     nop.i                 999;;
 }
-;;
-{ .mfi
-      ldfe               fPiBy2 = [rPiBy2Ptr] // Pi/2
-      // set p13 = 1 if x = 0.0
-      fclass.m           p13, p0 = f8, 0x07
-      nop.i              0
+
+ 
+{     .mfi 
+     nop.m                      999
+     fma.s1    acos_poly_p11  =    acos_t4,acos_poly_p15,acos_poly_p11
+     nop.i                      999
+} 
+{     .mfi 
+     nop.m                 999
+     fma.s1    acos_T6   =    acos_T3,acos_d3,f0
+     nop.i                 999;;
 }
-{ .mfi
-      nop.m              0
-      fnorm.s1           fNormX = f8
-      nop.i              0
+
+ 
+{     .mfi 
+     nop.m                 999
+     fma.s1    acos_t11  =    acos_t8,acos_t3,f0
+     nop.i                 999
+} 
+{     .mfi 
+     nop.m                      999
+     fma.s1    acos_poly_p0   =    acos_t4,acos_poly_p4,acos_poly_p0
+     nop.i                      999;;
 }
-;;
-{ .mfb
-      // load smallest normal to FP reg
-      setf.d             fDenoBound = rDenoBound
-      // answer if x is a NaN
-(p12) fma.d.s0           f8 = f8,f1,f0
-      // exit here if x is a NaN
-(p12) br.ret.spnt        b0
+
+ 
+{     .mfi 
+     nop.m                      999
+     fma.s1    acos_1poly_p4  =    acos_tx4,acos_1poly_p8,acos_1poly_p4
+     nop.i                      999
+} 
+{     .mfi 
+     nop.m                      999
+     fma.s1    acos_1poly_p15 =    acos_tx4,acos_1poly_p19,acos_1poly_p15
+     nop.i                      999;;
 }
-;;
-{ .mfi
-      nop.m              0
-      // absolute value of normalized x
-      fmerge.s           fNormX = f1, fNormX
-      nop.i              0
+
+ 
+{     .mfi 
+     nop.m                      999
+     fma.s1    acos_1poly_p0  =    acos_tx2,acos_1poly_p2,acos_1poly_p0
+     nop.i                      999
+} 
+{     .mfi 
+     nop.m                      999
+     fma.s1    acos_1poly_p11 =    acos_tx2,acos_1poly_p13,acos_1poly_p11
+     nop.i                      999;;
 }
-;;
-{ .mfb
-      nop.m              0
-      // final result for x = 0
-(p13) fma.d.s0           f8 = fPiBy2, f1, f8
-      // exit here if x = 0.0
-(p13) br.ret.spnt        b0
+
+ 
+{     .mfi 
+     nop.m                                                         999
+//     fcmp.le.s1     acos_pred_LEsqrt2by2,acos_pred_GTsqrt2by2    =    acos_abs_x,acos_const_sqrt2by2
+     fcmp.le.s1     p7,p8    =    acos_abs_x,acos_const_sqrt2by2
+     nop.i                                                         999
+} 
+{     .mfi 
+     nop.m                 999
+     fma.s1    acos_tx8  =    acos_tx4,acos_tx4,f0
+     nop.i                 999;;
 }
-;;
-// if we still here then x is denormal or unnormal
-{ .mfi
-      nop.m              0
-      // set p14 = 1 if normalized x is greater than or
-      // equal to the smallest denormalized value
-      // So, if p14 is set to 1 it means that we deal with
-      // unnormal rather than with "true" denormal
-      fcmp.ge.s1         p14, p0 = fNormX, fDenoBound
-      nop.i              0
+
+ 
+{     .mfi 
+     nop.m                 999
+     fma.s1    acos_z    =    acos_l3,acos_T6,acos_z
+     nop.i                 999;;
+} 
+ 
+{     .mfi
+     nop.m                      999
+     fma.s1    acos_series_t  =    acos_t11,acos_poly_p11,acos_poly_p0
+     nop.i                      999
+}
+{    .mfi
+     nop.m 999
+(p11) fma.s1 acos_const_add = acos_const_piby2, f1, acos_const_piby2
+     nop.i 999
 }
 ;;
+
 { .mfi
-      nop.m              0
-(p14) fcmp.eq.s0         p6, p0 = f8, f0      // Set D flag if x unnormal
-      nop.i              0
-}
-{ .mfb
-      nop.m              0
-      // normalize unnormal input
-(p14) fnorm.s1           f8 = f8
-      // return to the main path
-(p14) br.cond.sptk       acos_unnormal_back
+      nop.m 999
+(p12) fma.s1 acos_const_add = f1,f0,f0
+      nop.i 999
 }
 ;;
-// if we still here it means that input is "true" denormal
-{ .mfb
-      nop.m              0
-      // final result if x is denormal
-      fms.d.s0           f8 = fPiBy2, f1, f8 // Pi/2 - x
-      // exit here if x is denormal
-      br.ret.sptk        b0
+ 
+{     .mfi 
+     nop.m                      999
+     fma.s1    acos_1poly_p0  =    acos_tx4,acos_1poly_p4,acos_1poly_p0
+     nop.i                      999
+} 
+{     .mfi 
+     nop.m                      999
+     fma.s1    acos_1poly_p11 =    acos_tx4,acos_1poly_p15,acos_1poly_p11
+     nop.i                      999;;
 }
-;;
 
-// here if |x| > 1.0
-// error handler should be called
-.align 32
-acos_abs_gt_1:
-{ .mfi
-      alloc              r32 = ar.pfs, 0, 3, 4, 0 // get some registers
-      fmerge.s           FR_X = f8,f8
-      nop.i              0
-}
-{ .mfb
-      mov                GR_Parameter_TAG = 58 // error code
-      frcpa.s0           FR_RESULT, p0 = f0,f0
-      // call error handler routine
-      br.cond.sptk       __libm_error_region
-}
-;;
-GLOBAL_LIBM_END(acos)
+ 
+{     .mfi 
+     nop.m                 999
+     fma.s1    acos_tx11 =    acos_tx8,acos_tx3,f0
+     nop.i                 999;;
+} 
+ 
+{     .mfi 
+                         nop.m                 999
+//(acos_pred_GTsqrt2by2)   fnma.s1      answer2   =    acos_z,acos_series_t,acos_const_piby2
+(p8)   fnma.s1      answer2   =    acos_z,acos_series_t,f0
+                         nop.i                 999;;
+} 
+ 
+{     .mfi 
+     nop.m                      999
+     fma.s1    acos_series_tx =    acos_tx11,acos_1poly_p11,acos_1poly_p0
+     nop.i                      999;;
+} 
+ 
+{     .mfi 
+                         nop.m                 999
+//(acos_pred_GTsqrt2by2)   fnma.d     f8   =    acos_sgn_x,answer2,acos_const_piby2
+(p8)   fnma.d     f8   =    acos_sgn_x,answer2,acos_const_add
+                         nop.i                 999;;
+} 
+ 
+{     .mfb 
+                         nop.m                 999
+//(acos_pred_LEsqrt2by2)   fnma.d     f8   =    f8,acos_series_tx,acos_const_piby2
+(p7)   fnma.d     f8   =    f8,acos_series_tx,acos_const_piby2
+     br.ret.sptk b0 ;;
+} 
+
+
+L(ACOS_ZERO):
+// Here if x=0
+{     .mfb 
+      nop.m                 999
+      fma.d    f8 =    acos_const_piby2,f1,f0
+      br.ret.sptk b0 ;;
+} 
+
+
+L(ACOS_ABS_ONE):
+.pred.rel "mutex",p11,p12
+// Here if |x|=1
+{     .mfi 
+      nop.m                 999
+(p11) fma.d    f8 =    acos_const_piby2,f1,acos_const_piby2 // acos(-1)=pi
+      nop.i                 999
+} 
+{     .mfb 
+      nop.m                 999
+(p12) fma.d    f8 =    f1,f0,f0 // acos(1)=0
+      br.ret.sptk b0 ;;
+} 
+
 
+.endp acos
+ASM_SIZE_DIRECTIVE(acos)
 
-LOCAL_LIBM_ENTRY(__libm_error_region)
+.proc __libm_error_region
+__libm_error_region:
 .prologue
 { .mfi
         add   GR_Parameter_Y=-32,sp             // Parameter 2 value
-        nop.f 0
+                nop.f 999
 .save   ar.pfs,GR_SAVE_PFS
         mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
 }
@@ -839,29 +879,28 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
         mov GR_SAVE_GP=gp                       // Save gp
 };;
 { .mmi
-        stfd [GR_Parameter_Y] = FR_Y,16         // STORE Parameter 2 on stack
+        stfs [GR_Parameter_Y] = f1,16         // Store Parameter 2 on stack
         add GR_Parameter_X = 16,sp              // Parameter 1 address
 .save   b0, GR_SAVE_B0
         mov GR_SAVE_B0=b0                       // Save b0
 };;
+
 .body
+        frcpa.s0 f9,p0 = f0,f0
+;;
+
 { .mib
-        stfd [GR_Parameter_X] = FR_X                  // STORE Parameter 1 on stack
-        add   GR_Parameter_RESULT = 0,GR_Parameter_Y  // Parameter 3 address
-        nop.b 0
+        stfd [GR_Parameter_X] = f8            // Store Parameter 1 on stack
+        add   GR_Parameter_RESULT = 0,GR_Parameter_Y
+        nop.b 0                                 // Parameter 3 address
 }
 { .mib
-        stfd [GR_Parameter_Y] = FR_RESULT             // STORE Parameter 3 on stack
-        add   GR_Parameter_Y = -16,GR_Parameter_Y
-        br.call.sptk b0=__libm_error_support#         // Call error handling function
+        stfd [GR_Parameter_Y] = f9,-16           // Store Parameter 3 on stack
+        adds r32 = 48,sp
+        br.call.sptk b0=__libm_error_support#   // Call error handling function
 };;
 { .mmi
-        add   GR_Parameter_RESULT = 48,sp
-        nop.m 0
-        nop.i 0
-};;
-{ .mmi
-        ldfd  f8 = [GR_Parameter_RESULT]       // Get return result off stack
+        ldfd  f8 = [r32]       // Get return result off stack
 .restore sp
         add   sp = 64,sp                       // Restore stack pointer
         mov   b0 = GR_SAVE_B0                  // Restore return address
@@ -870,8 +909,11 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
         mov   gp = GR_SAVE_GP                  // Restore gp
         mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
         br.ret.sptk     b0                     // Return
+
 };;
 
-LOCAL_LIBM_END(__libm_error_region)
-.type   __libm_error_support#,@function
-.global __libm_error_support#
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+.type   __libm_error_support,@function
+.global __libm_error_support
diff --git a/sysdeps/ia64/fpu/e_acosf.S b/sysdeps/ia64/fpu/e_acosf.S
index 417f5b7ddc..a3425414cf 100644
--- a/sysdeps/ia64/fpu/e_acosf.S
+++ b/sysdeps/ia64/fpu/e_acosf.S
@@ -1,10 +1,10 @@
 .file "acosf.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
 //
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
+//
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,23 +35,19 @@
 //
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// http://developer.intel.com/opensource.
 
 // History
 //==============================================================
-// 02/02/00 Initial version
-// 06/28/00 Improved speed
-// 06/31/00 Changed register allocation because of some duplicate macros
+// 2/02/00  Initial revision
+// 6/28/00  Improved speed
+// 6/31/00  Changed register allocation because of some duplicate macros
 //          moved nan exit bundle up to gain a cycle.
-// 08/15/00 Bundle added after call to __libm_error_support to properly
+// 8/15/00  Bundle added after call to __libm_error_support to properly
 //          set [the previously overwritten] GR_Parameter_RESULT.
-// 08/17/00 Changed predicate register macro-usage to direct predicate
+// 8/17/00  Changed predicate register macro-usage to direct predicate
 //          names due to an assembler bug.
 // 10/17/00 Improved speed of x=0 and x=1 paths, set D flag if x denormal.
-// 03/13/01 Corrected sign of imm1 value in dep instruction.
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 02/06/03 Reordered header: .section, .global, .proc, .align
-// 04/17/03 Moved mutex after label
 
 
 // Description
@@ -119,6 +115,7 @@
 //  answer2 = sign(x) z P(t)       if x>0
 //          = sign(x) z P(t) + pi  if x<0
 
+#include "libm_support.h"
 
 //
 // Assembly macros
@@ -225,30 +222,42 @@ acosf_poly_p1a                   = f90
 // Data tables
 //==============================================================
 
-RODATA
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
 
 .align 16
 
-LOCAL_OBJECT_START(acosf_coeff_1_table)
+acosf_coeff_1_table:
+ASM_TYPE_DIRECTIVE(acosf_coeff_1_table,@object)
 data8 0x3FC5555607DCF816 // P1
 data8 0x3F9CF81AD9BAB2C6 // P4
 data8 0x3FC59E0975074DF3 // P7
 data8 0xBFA6F4CC2780AA1D // P6
 data8 0x3FC2DD45292E93CB // P9
 data8 0x3fe6a09e667f3bcd // sqrt(2)/2
-LOCAL_OBJECT_END(acosf_coeff_1_table)
+ASM_SIZE_DIRECTIVE(acosf_coeff_1_table)
 
-LOCAL_OBJECT_START(acosf_coeff_2_table)
+acosf_coeff_2_table:
+ASM_TYPE_DIRECTIVE(acosf_coeff_2_table,@object)
 data8 0x3FA6F108E31EFBA6 // P3
 data8 0xBFCA31BF175D82A0 // P8
 data8 0x3FA30C0337F6418B // P5
 data8 0x3FB332C9266CB1F9 // P2
 data8 0x3ff921fb54442d18 // pi_by_2
-LOCAL_OBJECT_END(acosf_coeff_2_table)
+ASM_SIZE_DIRECTIVE(acosf_coeff_2_table)
 
+.align 32
+.global acosf
+ASM_TYPE_DIRECTIVE(acosf,@function)
 
 .section .text
-GLOBAL_LIBM_ENTRY(acosf)
+.proc  acosf
+.align 32
+
+acosf:
  
 // Load the addresses of the two tables.
 // Then, load the coefficients and other constants.
@@ -333,7 +342,7 @@ GLOBAL_LIBM_ENTRY(acosf)
 } 
 {     .mfb 
      nop.m                                               999
-(p8) fma.s.s0 f8                = f8,f1,f0
+(p8) fma.s f8                = f8,f1,f0
 (p8) br.ret.spnt   b0 ;;  // Exit if x=nan
 }
 
@@ -341,7 +350,7 @@ GLOBAL_LIBM_ENTRY(acosf)
 {     .mfb 
      nop.m                 999
      fcmp.eq.s1 p6,p0 = acosf_abs_x,f1
-(p10) br.cond.spnt  ACOSF_ZERO ;;     // Branch if x=0
+(p10) br.cond.spnt  L(ACOSF_ZERO) ;;     // Branch if x=0
 } 
  
 {     .mfi 
@@ -358,7 +367,7 @@ GLOBAL_LIBM_ENTRY(acosf)
 {     .mfb 
      nop.m                      999
      fma.s1    acosf_t4  =    acosf_t2,acosf_t2,f0
-(p6) br.cond.spnt  ACOSF_ABS_ONE ;;     // Branch if |x|=1
+(p6) br.cond.spnt  L(ACOSF_ABS_ONE) ;;     // Branch if |x|=1
 } 
 
 {     .mfi 
@@ -566,40 +575,42 @@ GLOBAL_LIBM_ENTRY(acosf)
 .pred.rel "mutex",p8,p7    //acosf_pred_GTsqrt2by2,acosf_pred_LEsqrt2by2
 {     .mfi 
       nop.m            999
-(p8)  fma.s.s0     f8   =    acosf_z,acosf_Pt,acosf_sgn_x_piby2
+(p8)  fma.s     f8   =    acosf_z,acosf_Pt,acosf_sgn_x_piby2
       nop.i            999
 } 
  
 {     .mfb 
       nop.m            999
-(p7)  fms.s.s0     f8   =    acosf_const_piby2,f1,acosf_sinf1
+(p7)  fms.s     f8   =    acosf_const_piby2,f1,acosf_sinf1
       br.ret.sptk b0 ;;
 } 
 
-ACOSF_ZERO:
+L(ACOSF_ZERO):
 // Here if x=0
 {     .mfb 
       nop.m                 999
-      fma.s.s0    f8 =    acosf_const_piby2,f1,f0  // acosf(0)=pi/2
+      fma.s    f8 =    acosf_const_piby2,f1,f0  // acosf(0)=pi/2
       br.ret.sptk b0 ;;
 } 
 
 
-ACOSF_ABS_ONE:
+L(ACOSF_ABS_ONE):
 .pred.rel "mutex",p11,p12
 // Here if |x|=1
 {     .mfi 
       nop.m                 999
-(p11) fma.s.s0    f8 =    acosf_const_piby2,f1,acosf_const_piby2 // acosf(-1)=pi
+(p11) fma.s    f8 =    acosf_const_piby2,f1,acosf_const_piby2 // acosf(-1)=pi
       nop.i                 999
 } 
 {     .mfb 
       nop.m                 999
-(p12) fma.s.s0    f8 =    f1,f0,f0 // acosf(1)=0
+(p12) fma.s    f8 =    f1,f0,f0 // acosf(1)=0
       br.ret.sptk b0 ;;
 } 
 
-GLOBAL_LIBM_END(acosf)
+.endp acosf
+ASM_SIZE_DIRECTIVE(acosf)
+
 
 // Stack operations when calling error support.
 //       (1)               (2)
@@ -631,7 +642,8 @@ GLOBAL_LIBM_END(acosf)
 //                              restore ar.pfs
 
 
-LOCAL_LIBM_ENTRY(__libm_error_region)
+.proc __libm_error_region
+__libm_error_region:
 .prologue
 { .mfi
         add   GR_Parameter_Y=-32,sp             // Parameter 2 value
@@ -687,7 +699,8 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
         br.ret.sptk     b0                     // Return
 };;
 
-LOCAL_LIBM_END(__libm_error_region)
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
 
 .type   __libm_error_support#,@function
 .global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_acosl.S b/sysdeps/ia64/fpu/e_acosl.S
index daa75b18a5..ab1bbf41a7 100644
--- a/sysdeps/ia64/fpu/e_acosl.S
+++ b/sysdeps/ia64/fpu/e_acosl.S
@@ -1,10 +1,10 @@
 .file "acosl.s"
 
-
-// Copyright (c) 2001 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2001 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,2469 +20,1027 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
 // Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http: //www.intel.com/software/products/opensource/libraries/num.htm.
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
 //
 // History
 //==============================================================
-// 08/28/01 New version
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 02/06/03 Reordered header: .section, .global, .proc, .align
+// 2/02/00  Initial version 
+// 2/07/00  Modified calculation of acos_corr to correct acosl
+// 4/04/00  Unwind support added
+// 8/15/00  Bundle added after call to __libm_error_support to properly
+//          set [the previously overwritten] GR_Parameter_RESULT.
+// 12/20/00 Set denormal flag properly.
 //
 // API
 //==============================================================
-// long double acosl(long double)
+// double-extended = acosl (double-extended)
+// input  floating point f8
+// output floating point f8
 //
-// Overview of operation
+// Registers used
 //==============================================================
-// Background
 //
-// Implementation
+// predicate registers used:
+// p6 -> p12
 //
-// For |s| in [2^{-4}, sqrt(2)/2]:
-// Let t= 2^k*1.b1 b2..b6 1, where s= 2^k*1.b1 b2.. b52
-// acos(s)= pi/2-asin(t)-asin(r), where r= s*sqrt(1-t^2)-t*sqrt(1-s^2), i.e.
-// r= (s-t)*sqrt(1-t^2)-t*sqrt(1-t^2)*(sqrt((1-s^2)/(1-t^2))-1)
-// asin(r)-r evaluated as 9-degree polynomial (c3*r^3+c5*r^5+c7*r^7+c9*r^9)
-// The 64-bit significands of sqrt(1-t^2), 1/(1-t^2) are read from the table,
-// along with the high and low parts of asin(t) (stored as two double precision
-// values)
+// floating-point registers used:
+// f8 has input, then output
+// f8 -> f15, f32 ->f99
 //
-// |s| in (sqrt(2)/2, sqrt(255/256)):
-// Let t= 2^k*1.b1 b2..b6 1, where (1-s^2)*frsqrta(1-s^2)= 2^k*1.b1 b2..b6..
-// acos(|s|)= asin(t)-asin(r)
-// acos(-|s|)=pi-asin(t)+asin(r),   r= s*t-sqrt(1-s^2)*sqrt(1-t^2)
-// To minimize accumulated errors, r is computed as
-// r= (t*s)_s-t^2*y*z+z*y*(t^2-1+s^2)_s+z*y*(1-s^2)_s*x+z'*y*(1-s^2)*PS29+
-// +(t*s-(t*s)_s)+z*y*((t^2-1-(t^2-1+s^2)_s)+s^2)+z*y*(1-s^2-(1-s^2)_s)+
-// +ez*z'*y*(1-s^2)*(1-x),
-// where y= frsqrta(1-s^2), z= (sqrt(1-t^2))_s (rounded to 24 significant bits)
-// z'= sqrt(1-t^2), x= ((1-s^2)*y^2-1)/2
+// general registers used:
+// r32 -> r48
 //
-// |s|<2^{-4}: evaluate asin(s) as 17-degree polynomial, return pi/2-asin(s)
-// (or simply return pi/2-s, if|s|<2^{-64})
-//
-// |s| in [sqrt(255/256), 1): acos(|s|)= asin(sqrt(1-s^2))
-// acos(-|s|)= pi-asin(sqrt(1-s^2))
-// use 17-degree polynomial for asin(sqrt(1-s^2)),
-// 9-degree polynomial to evaluate sqrt(1-s^2)
-// High order term is (pi)_high-(y*(1-s^2))_high, for s<0,
-// or y*(1-s^2)_s, for s>0
-//
-
-
-
-// Registers used
+// Overview of operation
 //==============================================================
-// f6-f15, f32-f36
-// r2-r3, r23-r23
-// p6, p7, p8, p12
-//
-
-
-       GR_SAVE_B0= r33
-       GR_SAVE_PFS= r34
-       GR_SAVE_GP= r35 // This reg. can safely be used
-       GR_SAVE_SP= r36
-
-       GR_Parameter_X= r37
-       GR_Parameter_Y= r38
-       GR_Parameter_RESULT= r39
-       GR_Parameter_TAG= r40
-
-       FR_X= f10
-       FR_Y= f1
-       FR_RESULT= f8
-
-
+// There are three paths
+// 1. |x| < 2^-25                 ACOS_TINY
+// 2. 2^-25 <= |x| < 1/4          ACOS_POLY
+// 3. 1/4 <= |x| < 1              ACOS_ATAN
 
-RODATA
-
-.align 16
-
-LOCAL_OBJECT_START(T_table)
-
-// stores 64-bit significand of 1/(1-t^2), 64-bit significand of sqrt(1-t^2),
-// asin(t)_high (double precision), asin(t)_low (double precision)
-
-data8 0x80828692b71c4391, 0xff7ddcec2d87e879
-data8 0x3fb022bc0ae531a0, 0x3c9f599c7bb42af6
-data8 0x80869f0163d0b082, 0xff79cad2247914d3
-data8 0x3fb062dd26afc320, 0x3ca4eff21bd49c5c
-data8 0x808ac7d5a8690705, 0xff75a89ed6b626b9
-data8 0x3fb0a2ff4a1821e0, 0x3cb7e33b58f164cc
-data8 0x808f0112ad8ad2e0, 0xff7176517c2cc0cb
-data8 0x3fb0e32279319d80, 0x3caee31546582c43
-data8 0x80934abba8a1da0a, 0xff6d33e949b1ed31
-data8 0x3fb12346b8101da0, 0x3cb8bfe463d087cd
-data8 0x8097a4d3dbe63d8f, 0xff68e16571015c63
-data8 0x3fb1636c0ac824e0, 0x3c8870a7c5a3556f
-data8 0x809c0f5e9662b3dd, 0xff647ec520bca0f0
-data8 0x3fb1a392756ed280, 0x3c964f1a927461ae
-data8 0x80a08a5f33fadc66, 0xff600c07846a6830
-data8 0x3fb1e3b9fc19e580, 0x3c69eb3576d56332
-data8 0x80a515d91d71acd4, 0xff5b892bc475affa
-data8 0x3fb223e2a2dfbe80, 0x3c6a4e19fd972fb6
-data8 0x80a9b1cfc86ff7cd, 0xff56f631062cf93d
-data8 0x3fb2640c6dd76260, 0x3c62041160e0849e
-data8 0x80ae5e46b78b0d68, 0xff5253166bc17794
-data8 0x3fb2a43761187c80, 0x3cac61651af678c0
-data8 0x80b31b417a4b756b, 0xff4d9fdb14463dc8
-data8 0x3fb2e46380bb6160, 0x3cb06ef23eeba7a1
-data8 0x80b7e8c3ad33c369, 0xff48dc7e1baf6738
-data8 0x3fb32490d0d910c0, 0x3caa05f480b300d5
-data8 0x80bcc6d0f9c784d6, 0xff4408fe9ad13e37
-data8 0x3fb364bf558b3820, 0x3cb01e7e403aaab9
-data8 0x80c1b56d1692492d, 0xff3f255ba75f5f4e
-data8 0x3fb3a4ef12ec3540, 0x3cb4fe8fcdf5f5f1
-data8 0x80c6b49bc72ec446, 0xff3a319453ebd961
-data8 0x3fb3e5200d171880, 0x3caf2dc089b2b7e2
-data8 0x80cbc460dc4e0ae8, 0xff352da7afe64ac6
-data8 0x3fb425524827a720, 0x3cb75a855e7c6053
-data8 0x80d0e4c033bee9c4, 0xff301994c79afb32
-data8 0x3fb46585c83a5e00, 0x3cb3264981c019ab
-data8 0x80d615bdb87556db, 0xff2af55aa431f291
-data8 0x3fb4a5ba916c73c0, 0x3c994251d94427b5
-data8 0x80db575d6291fd8a, 0xff25c0f84bae0cb9
-data8 0x3fb4e5f0a7dbdb20, 0x3cbee2fcc4c786cb
-data8 0x80e0a9a33769e535, 0xff207c6cc0ec09fd
-data8 0x3fb526280fa74620, 0x3c940656e5549b91
-data8 0x80e60c93498e32cd, 0xff1b27b703a19c98
-data8 0x3fb56660ccee2740, 0x3ca7082374d7b2cd
-data8 0x80eb8031b8d4052d, 0xff15c2d6105c72f8
-data8 0x3fb5a69ae3d0b520, 0x3c7c4d46e09ac68a
-data8 0x80f10482b25c6c8a, 0xff104dc8e0813ed4
-data8 0x3fb5e6d6586fec20, 0x3c9aa84ffd9b4958
-data8 0x80f6998a709c7cfb, 0xff0ac88e6a4ab926
-data8 0x3fb627132eed9140, 0x3cbced2cbbbe7d16
-data8 0x80fc3f4d3b657c44, 0xff053325a0c8a2ec
-data8 0x3fb667516b6c34c0, 0x3c6489c5fc68595a
-data8 0x8101f5cf67ed2af8, 0xfeff8d8d73dec2bb
-data8 0x3fb6a791120f33a0, 0x3cbe12acf159dfad
-data8 0x8107bd1558d6291f, 0xfef9d7c4d043df29
-data8 0x3fb6e7d226fabba0, 0x3ca386d099cd0dc7
-data8 0x810d95237e38766a, 0xfef411ca9f80b5f7
-data8 0x3fb72814ae53cc20, 0x3cb9f35731e71dd6
-data8 0x81137dfe55aa0e29, 0xfeee3b9dc7eef009
-data8 0x3fb76858ac403a00, 0x3c74df3dd959141a
-data8 0x811977aa6a479f0f, 0xfee8553d2cb8122c
-data8 0x3fb7a89e24e6b0e0, 0x3ca6034406ee42bc
-data8 0x811f822c54bd5ef8, 0xfee25ea7add46a91
-data8 0x3fb7e8e51c6eb6a0, 0x3cb82f8f78e68ed7
-data8 0x81259d88bb4ffac1, 0xfedc57dc2809fb1d
-data8 0x3fb8292d9700ad60, 0x3cbebb73c0e653f9
-data8 0x812bc9c451e5a257, 0xfed640d974eb6068
-data8 0x3fb8697798c5d620, 0x3ca2feee76a9701b
-data8 0x813206e3da0f3124, 0xfed0199e6ad6b585
-data8 0x3fb8a9c325e852e0, 0x3cb9e88f2f4d0efe
-data8 0x813854ec231172f9, 0xfec9e229dcf4747d
-data8 0x3fb8ea1042932a00, 0x3ca5ff40d81f66fd
-data8 0x813eb3e209ee858f, 0xfec39a7a9b36538b
-data8 0x3fb92a5ef2f247c0, 0x3cb5e3bece4d6b07
-data8 0x814523ca796f56ce, 0xfebd428f72561efe
-data8 0x3fb96aaf3b3281a0, 0x3cb7b9e499436d7c
-data8 0x814ba4aa6a2d3ff9, 0xfeb6da672bd48fe4
-data8 0x3fb9ab011f819860, 0x3cb9168143cc1a7f
-data8 0x81523686e29bbdd7, 0xfeb062008df81f50
-data8 0x3fb9eb54a40e3ac0, 0x3cb6e544197eb1e1
-data8 0x8158d964f7124614, 0xfea9d95a5bcbd65a
-data8 0x3fba2ba9cd080800, 0x3ca9a717be8f7446
-data8 0x815f8d49c9d639e4, 0xfea34073551e1ac8
-data8 0x3fba6c009e9f9260, 0x3c741e989a60938a
-data8 0x8166523a8b24f626, 0xfe9c974a367f785c
-data8 0x3fbaac591d0661a0, 0x3cb2c1290107e57d
-data8 0x816d283c793e0114, 0xfe95ddddb94166cb
-data8 0x3fbaecb34c6ef600, 0x3c9c7d5fbaec405d
-data8 0x81740f54e06d55bd, 0xfe8f142c93750c50
-data8 0x3fbb2d0f310cca00, 0x3cbc09479a9cbcfb
-data8 0x817b07891b15cd5e, 0xfe883a3577e9fceb
-data8 0x3fbb6d6ccf1455e0, 0x3cb9450bff4ee307
-data8 0x818210de91bba6c8, 0xfe814ff7162cf62f
-data8 0x3fbbadcc2abb1180, 0x3c9227fda12a8d24
-data8 0x81892b5abb0f2bf9, 0xfe7a55701a8697b1
-data8 0x3fbbee2d48377700, 0x3cb6fad72acfe356
-data8 0x819057031bf7760e, 0xfe734a9f2dfa1810
-data8 0x3fbc2e902bc10600, 0x3cb4465b588d16ad
-data8 0x819793dd479d4fbe, 0xfe6c2f82f643f68b
-data8 0x3fbc6ef4d9904580, 0x3c8b9ac54823960d
-data8 0x819ee1eedf76367a, 0xfe65041a15d8a92c
-data8 0x3fbcaf5b55dec6a0, 0x3ca2b8d28a954db2
-data8 0x81a6413d934f7a66, 0xfe5dc8632be3477f
-data8 0x3fbcefc3a4e727a0, 0x3c9380da83713ab4
-data8 0x81adb1cf21597d4b, 0xfe567c5cd44431d5
-data8 0x3fbd302dcae51600, 0x3ca995b83421756a
-data8 0x81b533a9563310b8, 0xfe4f2005a78fb50f
-data8 0x3fbd7099cc155180, 0x3caefa2f7a817d5f
-data8 0x81bcc6d20cf4f373, 0xfe47b35c3b0caaeb
-data8 0x3fbdb107acb5ae80, 0x3cb455fc372dd026
-data8 0x81c46b4f2f3d6e68, 0xfe40365f20b316d6
-data8 0x3fbdf177710518c0, 0x3cbee3dcc5b01434
-data8 0x81cc2126b53c1144, 0xfe38a90ce72abf36
-data8 0x3fbe31e91d439620, 0x3cb3e131c950aebd
-data8 0x81d3e85ea5bd8ee2, 0xfe310b6419c9c33a
-data8 0x3fbe725cb5b24900, 0x3c01d3fac6029027
-data8 0x81dbc0fd1637b9c1, 0xfe295d6340932d15
-data8 0x3fbeb2d23e937300, 0x3c6304cc44aeedd1
-data8 0x81e3ab082ad5a0a4, 0xfe219f08e03580b3
-data8 0x3fbef349bc2a77e0, 0x3cac1d2d6abe9c72
-data8 0x81eba6861683cb97, 0xfe19d0537a0946e2
-data8 0x3fbf33c332bbe020, 0x3ca0909dba4e96ca
-data8 0x81f3b37d1afc9979, 0xfe11f1418c0f94e2
-data8 0x3fbf743ea68d5b60, 0x3c937fc12a2a779a
-data8 0x81fbd1f388d4be45, 0xfe0a01d190f09063
-data8 0x3fbfb4bc1be5c340, 0x3cbf51a504b55813
-data8 0x820401efbf87e248, 0xfe020201fff9efea
-data8 0x3fbff53b970d1e80, 0x3ca625444b260078
-data8 0x82106ad2ffdca049, 0xfdf5e3940a49135e
-data8 0x3fc02aff52065460, 0x3c9125d113e22a57
-data8 0x8221343d6ea1d3e2, 0xfde581a45429b0a0
-data8 0x3fc06b84f8e03220, 0x3caccf362295894b
-data8 0x82324434adbf99c2, 0xfdd4de1a001fb775
-data8 0x3fc0ac0ed1fe7240, 0x3cc22f676096b0af
-data8 0x82439aee8d0c7747, 0xfdc3f8e8269d1f03
-data8 0x3fc0ec9cee9e4820, 0x3cca147e2886a628
-data8 0x825538a1d0fcb2f0, 0xfdb2d201a9b1ba66
-data8 0x3fc12d2f6006f0a0, 0x3cc72b36633bc2d4
-data8 0x82671d86345c5cee, 0xfda1695934d723e7
-data8 0x3fc16dc63789de60, 0x3cb11f9c47c7b83f
-data8 0x827949d46a121770, 0xfd8fbee13cbbb823
-data8 0x3fc1ae618682e620, 0x3cce1b59020cef8e
-data8 0x828bbdc61eeab9ba, 0xfd7dd28bff0c9f34
-data8 0x3fc1ef015e586c40, 0x3cafec043e0225ee
-data8 0x829e7995fb6de9e1, 0xfd6ba44b823ee1ca
-data8 0x3fc22fa5d07b90c0, 0x3cba905409caf8e3
-data8 0x82b17d7fa5bbc982, 0xfd5934119557883a
-data8 0x3fc2704eee685da0, 0x3cb5ef21838a823e
-data8 0x82c4c9bfc373d276, 0xfd4681cfcfb2c161
-data8 0x3fc2b0fcc9a5f3e0, 0x3ccc7952c5e0e312
-data8 0x82d85e93fba50136, 0xfd338d7790ca0f41
-data8 0x3fc2f1af73c6ba00, 0x3cbecf5f977d1ca9
-data8 0x82ec3c3af8c76b32, 0xfd2056f9fff97727
-data8 0x3fc33266fe6889a0, 0x3c9d329c022ebdb5
-data8 0x830062f46abf6022, 0xfd0cde480c43b327
-data8 0x3fc373237b34de60, 0x3cc95806d4928adb
-data8 0x8314d30108ea35f0, 0xfcf923526c1562b2
-data8 0x3fc3b3e4fbe10520, 0x3cbc299fe7223d54
-data8 0x83298ca29434df97, 0xfce526099d0737ed
-data8 0x3fc3f4ab922e4a60, 0x3cb59d8bb8fdbccc
-data8 0x833e901bd93c7009, 0xfcd0e65de39f1f7c
-data8 0x3fc435774fea2a60, 0x3c9ec18b43340914
-data8 0x8353ddb0b278aad8, 0xfcbc643f4b106055
-data8 0x3fc4764846ee80a0, 0x3cb90402efd87ed6
-data8 0x836975a60a70c52e, 0xfca79f9da4fab13a
-data8 0x3fc4b71e8921b860, 0xbc58f23449ed6365
-data8 0x837f5841ddfa7a46, 0xfc92986889284148
-data8 0x3fc4f7fa2876fca0, 0xbc6294812bf43acd
-data8 0x839585cb3e839773, 0xfc7d4e8f554ab12f
-data8 0x3fc538db36ee6960, 0x3cb910b773d4c578
-data8 0x83abfe8a5466246f, 0xfc67c2012cb6fa68
-data8 0x3fc579c1c6953cc0, 0x3cc5ede909fc47fc
-data8 0x83c2c2c861474d91, 0xfc51f2acf82041d5
-data8 0x3fc5baade9860880, 0x3cac63cdfc3588e5
-data8 0x83d9d2cfc2813637, 0xfc3be08165519325
-data8 0x3fc5fb9fb1e8e3a0, 0x3cbf7c8466578c29
-data8 0x83f12eebf397daac, 0xfc258b6ce6e6822f
-data8 0x3fc63c9731f39d40, 0x3cb6d2a7ffca3e9e
-data8 0x8408d76990b9296e, 0xfc0ef35db402af94
-data8 0x3fc67d947be9eec0, 0x3cb1980da09e6566
-data8 0x8420cc9659487cd7, 0xfbf81841c8082dc4
-data8 0x3fc6be97a21daf00, 0x3cc2ac8330e59aa5
-data8 0x84390ec132759ecb, 0xfbe0fa06e24cc390
-data8 0x3fc6ffa0b6ef05e0, 0x3ccc1a030fee56c4
-data8 0x84519e3a29df811a, 0xfbc9989a85ce0954
-data8 0x3fc740afcccca000, 0x3cc19692a5301ca6
-data8 0x846a7b527842d61b, 0xfbb1f3e9f8e45dc4
-data8 0x3fc781c4f633e2c0, 0x3cc0e98f3868a508
-data8 0x8483a65c8434b5f0, 0xfb9a0be244f4af45
-data8 0x3fc7c2e045b12140, 0x3cb2a8d309754420
-data8 0x849d1fabe4e97dd7, 0xfb81e070362116d1
-data8 0x3fc80401cddfd120, 0x3ca7a44544aa4ce6
-data8 0x84b6e795650817ea, 0xfb6971805af8411e
-data8 0x3fc84529a16ac020, 0x3c9e3b709c7d6f94
-data8 0x84d0fe6f0589da92, 0xfb50beff0423a2f5
-data8 0x3fc88657d30c49e0, 0x3cc60d65a7f0a278
-data8 0x84eb649000a73014, 0xfb37c8d84414755c
-data8 0x3fc8c78c758e8e80, 0x3cc94b2ee984c2b7
-data8 0x85061a50ccd13781, 0xfb1e8ef7eeaf764b
-data8 0x3fc908c79bcba900, 0x3cc8540ae794a2fe
-data8 0x8521200b1fb8916e, 0xfb05114998f76a83
-data8 0x3fc94a0958ade6c0, 0x3ca127f49839fa9c
-data8 0x853c7619f1618bf6, 0xfaeb4fb898b65d19
-data8 0x3fc98b51bf2ffee0, 0x3c8c9ba7a803909a
-data8 0x85581cd97f45e274, 0xfad14a3004259931
-data8 0x3fc9cca0e25d4ac0, 0x3cba458e91d3bf54
-data8 0x857414a74f8446b4, 0xfab7009ab1945a54
-data8 0x3fca0df6d551fe80, 0x3cc78ea1d329d2b2
-data8 0x85905de2341dea46, 0xfa9c72e3370d2fbc
-data8 0x3fca4f53ab3b6200, 0x3ccf60dca86d57ef
-data8 0x85acf8ea4e423ff8, 0xfa81a0f3e9fa0ee9
-data8 0x3fca90b777580aa0, 0x3ca4c4e2ec8a867e
-data8 0x85c9e62111a92e7d, 0xfa668ab6dec711b1
-data8 0x3fcad2224cf814e0, 0x3c303de5980d071c
-data8 0x85e725e947fbee97, 0xfa4b3015e883dbfe
-data8 0x3fcb13943f7d5f80, 0x3cc29d4eefa5cb1e
-data8 0x8604b8a7144cd054, 0xfa2f90fa9883a543
-data8 0x3fcb550d625bc6a0, 0x3c9e01a746152daf
-data8 0x86229ebff69e2415, 0xfa13ad4e3dfbe1c1
-data8 0x3fcb968dc9195ea0, 0x3ccc091bd73ae518
-data8 0x8640d89acf78858c, 0xf9f784f9e5a1877b
-data8 0x3fcbd815874eb160, 0x3cb5f4b89875e187
-data8 0x865f669fe390c7f5, 0xf9db17e65944eacf
-data8 0x3fcc19a4b0a6f9c0, 0x3cc5c0bc2b0bbf14
-data8 0x867e4938df7dc45f, 0xf9be65fc1f6c2e6e
-data8 0x3fcc5b3b58e061e0, 0x3cc1ca70df8f57e7
-data8 0x869d80d0db7e4c0c, 0xf9a16f237aec427a
-data8 0x3fcc9cd993cc4040, 0x3cbae93acc85eccf
-data8 0x86bd0dd45f4f8265, 0xf98433446a806e70
-data8 0x3fccde7f754f5660, 0x3cb22f70e64568d0
-data8 0x86dcf0b16613e37a, 0xf966b246a8606170
-data8 0x3fcd202d11620fa0, 0x3c962030e5d4c849
-data8 0x86fd29d7624b3d5d, 0xf948ec11a9d4c45b
-data8 0x3fcd61e27c10c0a0, 0x3cc7083c91d59217
-data8 0x871db9b741dbe44a, 0xf92ae08c9eca4941
-data8 0x3fcda39fc97be7c0, 0x3cc9258579e57211
-data8 0x873ea0c3722d6af2, 0xf90c8f9e71633363
-data8 0x3fcde5650dd86d60, 0x3ca4755a9ea582a9
-data8 0x875fdf6fe45529e8, 0xf8edf92dc5875319
-data8 0x3fce27325d6fe520, 0x3cbc1e2b6c1954f9
-data8 0x878176321154e2bc, 0xf8cf1d20f87270b8
-data8 0x3fce6907cca0d060, 0x3cb6ca4804750830
-data8 0x87a36580fe6bccf5, 0xf8affb5e20412199
-data8 0x3fceaae56fdee040, 0x3cad6b310d6fd46c
-data8 0x87c5add5417a5cb9, 0xf89093cb0b7c0233
-data8 0x3fceeccb5bb33900, 0x3cc16e99cedadb20
-data8 0x87e84fa9057914ca, 0xf870e64d40a15036
-data8 0x3fcf2eb9a4bcb600, 0x3cc75ee47c8b09e9
-data8 0x880b4b780f02b709, 0xf850f2c9fdacdf78
-data8 0x3fcf70b05fb02e20, 0x3cad6350d379f41a
-data8 0x882ea1bfc0f228ac, 0xf830b926379e6465
-data8 0x3fcfb2afa158b8a0, 0x3cce0ccd9f829985
-data8 0x885252ff21146108, 0xf810394699fe0e8e
-data8 0x3fcff4b77e97f3e0, 0x3c9b30faa7a4c703
-data8 0x88765fb6dceebbb3, 0xf7ef730f865f6df0
-data8 0x3fd01b6406332540, 0x3cdc5772c9e0b9bd
-data8 0x88ad1f69be2cc730, 0xf7bdc59bc9cfbd97
-data8 0x3fd04cf8ad203480, 0x3caeef44fe21a74a
-data8 0x88f763f70ae2245e, 0xf77a91c868a9c54e
-data8 0x3fd08f23ce0162a0, 0x3cd6290ab3fe5889
-data8 0x89431fc7bc0c2910, 0xf73642973c91298e
-data8 0x3fd0d1610f0c1ec0, 0x3cc67401a01f08cf
-data8 0x8990573407c7738e, 0xf6f0d71d1d7a2dd6
-data8 0x3fd113b0c65d88c0, 0x3cc7aa4020fe546f
-data8 0x89df0eb108594653, 0xf6aa4e6a05cfdef2
-data8 0x3fd156134ada6fe0, 0x3cc87369da09600c
-data8 0x8a2f4ad16e0ed78a, 0xf662a78900c35249
-data8 0x3fd19888f43427a0, 0x3cc62b220f38e49c
-data8 0x8a811046373e0819, 0xf619e180181d97cc
-data8 0x3fd1db121aed7720, 0x3ca3ede7490b52f4
-data8 0x8ad463df6ea0fa2c, 0xf5cffb504190f9a2
-data8 0x3fd21daf185fa360, 0x3caafad98c1d6c1b
-data8 0x8b294a8cf0488daf, 0xf584f3f54b8604e6
-data8 0x3fd2606046bf95a0, 0x3cdb2d704eeb08fa
-data8 0x8b7fc95f35647757, 0xf538ca65c960b582
-data8 0x3fd2a32601231ec0, 0x3cc661619fa2f126
-data8 0x8bd7e588272276f8, 0xf4eb7d92ff39fccb
-data8 0x3fd2e600a3865760, 0x3c8a2a36a99aca4a
-data8 0x8c31a45bf8e9255e, 0xf49d0c68cd09b689
-data8 0x3fd328f08ad12000, 0x3cb9efaf1d7ab552
-data8 0x8c8d0b520a35eb18, 0xf44d75cd993cfad2
-data8 0x3fd36bf614dcc040, 0x3ccacbb590bef70d
-data8 0x8cea2005d068f23d, 0xf3fcb8a23ab4942b
-data8 0x3fd3af11a079a6c0, 0x3cd9775872cf037d
-data8 0x8d48e837c8cd5027, 0xf3aad3c1e2273908
-data8 0x3fd3f2438d754b40, 0x3ca03304f667109a
-data8 0x8da969ce732f3ac7, 0xf357c60202e2fd7e
-data8 0x3fd4358c3ca032e0, 0x3caecf2504ff1a9d
-data8 0x8e0baad75555e361, 0xf3038e323ae9463a
-data8 0x3fd478ec0fd419c0, 0x3cc64bdc3d703971
-data8 0x8e6fb18807ba877e, 0xf2ae2b1c3a6057f7
-data8 0x3fd4bc6369fa40e0, 0x3cbb7122ec245cf2
-data8 0x8ed5843f4bda74d5, 0xf2579b83aa556f0c
-data8 0x3fd4fff2af11e2c0, 0x3c9cfa2dc792d394
-data8 0x8f3d29862c861fef, 0xf1ffde2612ca1909
-data8 0x3fd5439a4436d000, 0x3cc38d46d310526b
-data8 0x8fa6a81128940b2d, 0xf1a6f1bac0075669
-data8 0x3fd5875a8fa83520, 0x3cd8bf59b8153f8a
-data8 0x901206c1686317a6, 0xf14cd4f2a730d480
-data8 0x3fd5cb33f8cf8ac0, 0x3c9502b5c4d0e431
-data8 0x907f4ca5fe9cf739, 0xf0f186784a125726
-data8 0x3fd60f26e847b120, 0x3cc8a1a5e0acaa33
-data8 0x90ee80fd34aeda5e, 0xf09504ef9a212f18
-data8 0x3fd65333c7e43aa0, 0x3cae5b029cb1f26e
-data8 0x915fab35e37421c6, 0xf0374ef5daab5c45
-data8 0x3fd6975b02b8e360, 0x3cd5aa1c280c45e6
-data8 0x91d2d2f0d894d73c, 0xefd86321822dbb51
-data8 0x3fd6db9d05213b20, 0x3cbecf2c093ccd8b
-data8 0x9248000249200009, 0xef7840021aca5a72
-data8 0x3fd71ffa3cc87fc0, 0x3cb8d273f08d00d9
-data8 0x92bf3a7351f081d2, 0xef16e42021d7cbd5
-data8 0x3fd7647318b1ad20, 0x3cbce099d79cdc46
-data8 0x93388a8386725713, 0xeeb44dfce6820283
-data8 0x3fd7a908093fc1e0, 0x3ccb033ec17a30d9
-data8 0x93b3f8aa8e653812, 0xee507c126774fa45
-data8 0x3fd7edb9803e3c20, 0x3cc10aedb48671eb
-data8 0x94318d99d341ade4, 0xedeb6cd32f891afb
-data8 0x3fd83287f0e9cf80, 0x3c994c0c1505cd2a
-data8 0x94b1523e3dedc630, 0xed851eaa3168f43c
-data8 0x3fd87773cff956e0, 0x3cda3b7bce6a6b16
-data8 0x95334fc20577563f, 0xed1d8ffaa2279669
-data8 0x3fd8bc7d93a70440, 0x3cd4922edc792ce2
-data8 0x95b78f8e8f92f274, 0xecb4bf1fd2be72da
-data8 0x3fd901a5b3b9cf40, 0x3cd3fea1b00f9d0d
-data8 0x963e1b4e63a87c3f, 0xec4aaa6d08694cc1
-data8 0x3fd946eca98f2700, 0x3cdba4032d968ff1
-data8 0x96c6fcef314074fc, 0xebdf502d53d65fea
-data8 0x3fd98c52f024e800, 0x3cbe7be1ab8c95c9
-data8 0x97523ea3eab028b2, 0xeb72aea36720793e
-data8 0x3fd9d1d904239860, 0x3cd72d08a6a22b70
-data8 0x97dfeae6f4ee4a9a, 0xeb04c4096a884e94
-data8 0x3fda177f63e8ef00, 0x3cd818c3c1ebfac7
-data8 0x98700c7c6d85d119, 0xea958e90cfe1efd7
-data8 0x3fda5d468f92a540, 0x3cdf45fbfaa080fe
-data8 0x9902ae7487a9caa1, 0xea250c6224aab21a
-data8 0x3fdaa32f090998e0, 0x3cd715a9353cede4
-data8 0x9997dc2e017a9550, 0xe9b33b9ce2bb7638
-data8 0x3fdae939540d3f00, 0x3cc545c014943439
-data8 0x9a2fa158b29b649b, 0xe9401a573f8aa706
-data8 0x3fdb2f65f63f6c60, 0x3cd4a63c2f2ca8e2
-data8 0x9aca09f835466186, 0xe8cba69df9f0bf35
-data8 0x3fdb75b5773075e0, 0x3cda310ce1b217ec
-data8 0x9b672266ab1e0136, 0xe855de74266193d4
-data8 0x3fdbbc28606babc0, 0x3cdc84b75cca6c44
-data8 0x9c06f7579f0b7bd5, 0xe7debfd2f98c060b
-data8 0x3fdc02bf3d843420, 0x3cd225d967ffb922
-data8 0x9ca995db058cabdc, 0xe76648a991511c6e
-data8 0x3fdc497a9c224780, 0x3cde08101c5b825b
-data8 0x9d4f0b605ce71e88, 0xe6ec76dcbc02d9a7
-data8 0x3fdc905b0c10d420, 0x3cb1abbaa3edf120
-data8 0x9df765b9eecad5e6, 0xe6714846bdda7318
-data8 0x3fdcd7611f4b8a00, 0x3cbf6217ae80aadf
-data8 0x9ea2b320350540fe, 0xe5f4bab71494cd6b
-data8 0x3fdd1e8d6a0d56c0, 0x3cb726e048cc235c
-data8 0x9f51023562fc5676, 0xe576cbf239235ecb
-data8 0x3fdd65e082df5260, 0x3cd9e66872bd5250
-data8 0xa002620915c2a2f6, 0xe4f779b15f5ec5a7
-data8 0x3fddad5b02a82420, 0x3c89743b0b57534b
-data8 0xa0b6e21c2caf9992, 0xe476c1a233a7873e
-data8 0x3fddf4fd84bbe160, 0x3cbf7adea9ee3338
-data8 0xa16e9264cc83a6b2, 0xe3f4a16696608191
-data8 0x3fde3cc8a6ec6ee0, 0x3cce46f5a51f49c6
-data8 0xa22983528f3d8d49, 0xe3711694552da8a8
-data8 0x3fde84bd099a6600, 0x3cdc78f6490a2d31
-data8 0xa2e7c5d2e2e69460, 0xe2ec1eb4e1e0a5fb
-data8 0x3fdeccdb4fc685c0, 0x3cdd3aedb56a4825
-data8 0xa3a96b5599bd2532, 0xe265b74506fbe1c9
-data8 0x3fdf15241f23b3e0, 0x3cd440f3c6d65f65
-data8 0xa46e85d1ae49d7de, 0xe1ddddb499b3606f
-data8 0x3fdf5d98202994a0, 0x3cd6c44bd3fb745a
-data8 0xa53727ca3e11b99e, 0xe1548f662951b00d
-data8 0x3fdfa637fe27bf60, 0x3ca8ad1cd33054dd
-data8 0xa6036453bdc20186, 0xe0c9c9aeabe5e481
-data8 0x3fdfef0467599580, 0x3cc0f1ac0685d78a
-data8 0xa6d34f1969dda338, 0xe03d89d5281e4f81
-data8 0x3fe01bff067d6220, 0x3cc0731e8a9ef057
-data8 0xa7a6fc62f7246ff3, 0xdfafcd125c323f54
-data8 0x3fe04092d1ae3b40, 0x3ccabda24b59906d
-data8 0xa87e811a861df9b9, 0xdf20909061bb9760
-data8 0x3fe0653df0fd9fc0, 0x3ce94c8dcc722278
-data8 0xa959f2d2dd687200, 0xde8fd16a4e5f88bd
-data8 0x3fe08a00c1cae320, 0x3ce6b888bb60a274
-data8 0xaa3967cdeea58bda, 0xddfd8cabd1240d22
-data8 0x3fe0aedba3221c00, 0x3ced5941cd486e46
-data8 0xab904fd587263c84, 0xdd1f4472e1cf64ed
-data8 0x3fe0e651e85229c0, 0x3cdb6701042299b1
-data8 0xad686d44dd5a74bb, 0xdbf173e1f6b46e92
-data8 0x3fe1309cbf4cdb20, 0x3cbf1be7bb3f0ec5
-data8 0xaf524e15640ebee4, 0xdabd54896f1029f6
-data8 0x3fe17b4ee1641300, 0x3ce81dd055b792f1
-data8 0xb14eca24ef7db3fa, 0xd982cb9ae2f47e41
-data8 0x3fe1c66b9ffd6660, 0x3cd98ea31eb5ddc7
-data8 0xb35ec807669920ce, 0xd841bd1b8291d0b6
-data8 0x3fe211f66db3a5a0, 0x3ca480c35a27b4a2
-data8 0xb5833e4755e04dd1, 0xd6fa0bd3150b6930
-data8 0x3fe25df2e05b6c40, 0x3ca4bc324287a351
-data8 0xb7bd34c8000b7bd3, 0xd5ab9939a7d23aa1
-data8 0x3fe2aa64b32f7780, 0x3cba67314933077c
-data8 0xba0dc64d126cc135, 0xd4564563ce924481
-data8 0x3fe2f74fc9289ac0, 0x3cec1a1dc0efc5ec
-data8 0xbc76222cbbfa74a6, 0xd2f9eeed501125a8
-data8 0x3fe344b82f859ac0, 0x3ceeef218de413ac
-data8 0xbef78e31985291a9, 0xd19672e2182f78be
-data8 0x3fe392a22087b7e0, 0x3cd2619ba201204c
-data8 0xc19368b2b0629572, 0xd02baca5427e436a
-data8 0x3fe3e11206694520, 0x3cb5d0b3143fe689
-data8 0xc44b2ae8c6733e51, 0xceb975d60b6eae5d
-data8 0x3fe4300c7e945020, 0x3cbd367143da6582
-data8 0xc7206b894212dfef, 0xcd3fa6326ff0ac9a
-data8 0x3fe47f965d201d60, 0x3ce797c7a4ec1d63
-data8 0xca14e1b0622de526, 0xcbbe13773c3c5338
-data8 0x3fe4cfb4b09d1a20, 0x3cedfadb5347143c
-data8 0xcd2a6825eae65f82, 0xca34913d425a5ae9
-data8 0x3fe5206cc637e000, 0x3ce2798b38e54193
-data8 0xd06301095e1351ee, 0xc8a2f0d3679c08c0
-data8 0x3fe571c42e3d0be0, 0x3ccd7cb9c6c2ca68
-data8 0xd3c0d9f50057adda, 0xc70901152d59d16b
-data8 0x3fe5c3c0c108f940, 0x3ceb6c13563180ab
-data8 0xd74650a98cc14789, 0xc5668e3d4cbf8828
-data8 0x3fe61668a46ffa80, 0x3caa9092e9e3c0e5
-data8 0xdaf5f8579dcc8f8f, 0xc3bb61b3eed42d02
-data8 0x3fe669c251ad69e0, 0x3cccf896ef3b4fee
-data8 0xded29f9f9a6171b4, 0xc20741d7f8e8e8af
-data8 0x3fe6bdd49bea05c0, 0x3cdc6b29937c575d
-data8 0xe2df5765854ccdb0, 0xc049f1c2d1b8014b
-data8 0x3fe712a6b76c6e80, 0x3ce1ddc6f2922321
-data8 0xe71f7a9b94fcb4c3, 0xbe833105ec291e91
-data8 0x3fe76840418978a0, 0x3ccda46e85432c3d
-data8 0xeb96b72d3374b91e, 0xbcb2bb61493b28b3
-data8 0x3fe7bea9496d5a40, 0x3ce37b42ec6e17d3
-data8 0xf049183c3f53c39b, 0xbad848720223d3a8
-data8 0x3fe815ea59dab0a0, 0x3cb03ad41bfc415b
-data8 0xf53b11ec7f415f15, 0xb8f38b57c53c9c48
-data8 0x3fe86e0c84010760, 0x3cc03bfcfb17fe1f
-data8 0xfa718f05adbf2c33, 0xb70432500286b185
-data8 0x3fe8c7196b9225c0, 0x3ced99fcc6866ba9
-data8 0xfff200c3f5489608, 0xb509e6454dca33cc
-data8 0x3fe9211b54441080, 0x3cb789cb53515688
-// The following table entries are not used
-//data8 0x82e138a0fac48700, 0xb3044a513a8e6132
-//data8 0x3fe97c1d30f5b7c0, 0x3ce1eb765612d1d0
-//data8 0x85f4cc7fc670d021, 0xb0f2fb2ea6cbbc88
-//data8 0x3fe9d82ab4b5fde0, 0x3ced3fe6f27e8039
-//data8 0x89377c1387d5b908, 0xaed58e9a09014d5c
-//data8 0x3fea355065f87fa0, 0x3cbef481d25f5b58
-//data8 0x8cad7a2c98dec333, 0xacab929ce114d451
-//data8 0x3fea939bb451e2a0, 0x3c8e92b4fbf4560f
-//data8 0x905b7dfc99583025, 0xaa748cc0dbbbc0ec
-//data8 0x3feaf31b11270220, 0x3cdced8c61bd7bd5
-//data8 0x9446d8191f80dd42, 0xa82ff92687235baf
-//data8 0x3feb53de0bcffc20, 0x3cbe1722fb47509e
-//data8 0x98758ba086e4000a, 0xa5dd497a9c184f58
-//data8 0x3febb5f571cb0560, 0x3ce0c7774329a613
-//data8 0x9cee6c7bf18e4e24, 0xa37be3c3cd1de51b
-//data8 0x3fec197373bc7be0, 0x3ce08ebdb55c3177
-//data8 0xa1b944000a1b9440, 0xa10b2101b4f27e03
-//data8 0x3fec7e6bd023da60, 0x3ce5fc5fd4995959
-//data8 0xa6defd8ba04d3e38, 0x9e8a4b93cad088ec
-//data8 0x3fece4f404e29b20, 0x3cea3413401132b5
-//data8 0xac69dd408a10c62d, 0x9bf89d5d17ddae8c
-//data8 0x3fed4d2388f63600, 0x3cd5a7fb0d1d4276
-//data8 0xb265c39cbd80f97a, 0x99553d969fec7beb
-//data8 0x3fedb714101e0a00, 0x3cdbda21f01193f2
-//data8 0xb8e081a16ae4ae73, 0x969f3e3ed2a0516c
-//data8 0x3fee22e1da97bb00, 0x3ce7231177f85f71
-//data8 0xbfea427678945732, 0x93d5990f9ee787af
-//data8 0x3fee90ac13b18220, 0x3ce3c8a5453363a5
-//data8 0xc79611399b8c90c5, 0x90f72bde80febc31
-//data8 0x3fef009542b712e0, 0x3ce218fd79e8cb56
-//data8 0xcffa8425040624d7, 0x8e02b4418574ebed
-//data8 0x3fef72c3d2c57520, 0x3cd32a717f82203f
-//data8 0xd93299cddcf9cf23, 0x8af6ca48e9c44024
-//data8 0x3fefe762b77744c0, 0x3ce53478a6bbcf94
-//data8 0xe35eda760af69ad9, 0x87d1da0d7f45678b
-//data8 0x3ff02f511b223c00, 0x3ced6e11782c28fc
-//data8 0xeea6d733421da0a6, 0x84921bbe64ae029a
-//data8 0x3ff06c5c6f8ce9c0, 0x3ce71fc71c1ffc02
-//data8 0xfb3b2c73fc6195cc, 0x813589ba3a5651b6
-//data8 0x3ff0aaf2613700a0, 0x3cf2a72d2fd94ef3
-//data8 0x84ac1fcec4203245, 0xfb73a828893df19e
-//data8 0x3ff0eb367c3fd600, 0x3cf8054c158610de
-//data8 0x8ca50621110c60e6, 0xf438a14c158d867c
-//data8 0x3ff12d51caa6b580, 0x3ce6bce9748739b6
-//data8 0x95b8c2062d6f8161, 0xecb3ccdd37b369da
-//data8 0x3ff1717418520340, 0x3ca5c2732533177c
-//data8 0xa0262917caab4ad1, 0xe4dde4ddc81fd119
-//data8 0x3ff1b7d59dd40ba0, 0x3cc4c7c98e870ff5
-//data8 0xac402c688b72f3f4, 0xdcae469be46d4c8d
-//data8 0x3ff200b93cc5a540, 0x3c8dd6dc1bfe865a
-//data8 0xba76968b9eabd9ab, 0xd41a8f3df1115f7f
-//data8 0x3ff24c6f8f6affa0, 0x3cf1acb6d2a7eff7
-//data8 0xcb63c87c23a71dc5, 0xcb161074c17f54ec
-//data8 0x3ff29b5b338b7c80, 0x3ce9b5845f6ec746
-//data8 0xdfe323b8653af367, 0xc19107d99ab27e42
-//data8 0x3ff2edf6fac7f5a0, 0x3cf77f961925fa02
-//data8 0xf93746caaba3e1f1, 0xb777744a9df03bff
-//data8 0x3ff344df237486c0, 0x3cf6ddf5f6ddda43
-//data8 0x8ca77052f6c340f0, 0xacaf476f13806648
-//data8 0x3ff3a0dfa4bb4ae0, 0x3cfee01bbd761bff
-//data8 0xa1a48604a81d5c62, 0xa11575d30c0aae50
-//data8 0x3ff4030b73c55360, 0x3cf1cf0e0324d37c
-//data8 0xbe45074b05579024, 0x9478e362a07dd287
-//data8 0x3ff46ce4c738c4e0, 0x3ce3179555367d12
-//data8 0xe7a08b5693d214ec, 0x8690e3575b8a7c3b
-//data8 0x3ff4e0a887c40a80, 0x3cfbd5d46bfefe69
-//data8 0x94503d69396d91c7, 0xedd2ce885ff04028
-//data8 0x3ff561ebd9c18cc0, 0x3cf331bd176b233b
-//data8 0xced1d96c5bb209e6, 0xc965278083808702
-//data8 0x3ff5f71d7ff42c80, 0x3ce3301cc0b5a48c
-//data8 0xabac2cee0fc24e20, 0x9c4eb1136094cbbd
-//data8 0x3ff6ae4c63222720, 0x3cf5ff46874ee51e
-//data8 0x8040201008040201, 0xb4d7ac4d9acb1bf4
-//data8 0x3ff7b7d33b928c40, 0x3cfacdee584023bb
-LOCAL_OBJECT_END(T_table)
-
-
-
-.align 16
+#include "libm_support.h"
 
-LOCAL_OBJECT_START(poly_coeffs)
-       // C_3
-data8 0xaaaaaaaaaaaaaaab, 0x0000000000003ffc
-       // C_5
-data8 0x999999999999999a, 0x0000000000003ffb
-       // C_7, C_9
-data8 0x3fa6db6db6db6db7, 0x3f9f1c71c71c71c8
-       // pi/2 (low, high)
-data8 0x3C91A62633145C07, 0x3FF921FB54442D18
-       // C_11, C_13
-data8 0x3f96e8ba2e8ba2e9, 0x3f91c4ec4ec4ec4e
-       // C_15, C_17
-data8 0x3f8c99999999999a, 0x3f87a87878787223
-       // pi (low, high)
-data8 0x3CA1A62633145C07, 0x400921FB54442D18
-LOCAL_OBJECT_END(poly_coeffs)
-
-
-R_DBL_S = r21
-R_EXP0 = r22
-R_EXP = r15
-R_SGNMASK = r23
-R_TMP = r24
-R_TMP2 = r25
-R_INDEX = r26
-R_TMP3 = r27
-R_TMP03 = r27
-R_TMP4 = r28
-R_TMP5 = r23
-R_TMP6 = r22
-R_TMP7 = r21
-R_T = r29
-R_BIAS = r20
-
-F_T = f6
-F_1S2 = f7
-F_1S2_S = f9
-F_INV_1T2 = f10
-F_SQRT_1T2 = f11
-F_S2T2 = f12
-F_X = f13
-F_D = f14
-F_2M64 = f15
-
-F_CS2 = f32
-F_CS3 = f33
-F_CS4 = f34
-F_CS5 = f35
-F_CS6 = f36
-F_CS7 = f37
-F_CS8 = f38
-F_CS9 = f39
-F_S23 = f40 
-F_S45 = f41 
-F_S67 = f42 
-F_S89 = f43 
-F_S25 = f44 
-F_S69 = f45 
-F_S29 = f46 
-F_X2 = f47 
-F_X4 = f48 
-F_TSQRT = f49 
-F_DTX = f50 
-F_R = f51 
-F_R2 = f52 
-F_R3 = f53 
-F_R4 = f54 
-
-F_C3 = f55 
-F_C5 = f56 
-F_C7 = f57 
-F_C9 = f58 
-F_P79 = f59 
-F_P35 = f60 
-F_P39 = f61 
-
-F_ATHI = f62 
-F_ATLO = f63 
-
-F_T1 = f64 
-F_Y = f65 
-F_Y2 = f66 
-F_ANDMASK = f67 
-F_ORMASK = f68 
-F_S = f69 
-F_05 = f70 
-F_SQRT_1S2 = f71 
-F_DS = f72 
-F_Z = f73 
-F_1T2 = f74 
-F_DZ = f75 
-F_ZE = f76 
-F_YZ = f77 
-F_Y1S2 = f78 
-F_Y1S2X = f79 
-F_1X = f80 
-F_ST = f81 
-F_1T2_ST = f82 
-F_TSS = f83 
-F_Y1S2X2 = f84 
-F_DZ_TERM = f85 
-F_DTS = f86 
-F_DS2X = f87 
-F_T2 = f88 
-F_ZY1S2S = f89 
-F_Y1S2_1X = f90 
-F_TS = f91
-F_PI2_LO = f92 
-F_PI2_HI = f93 
-F_S19 = f94 
-F_INV1T2_2 = f95 
-F_CORR = f96 
-F_DZ0 = f97 
-
-F_C11 = f98 
-F_C13 = f99 
-F_C15 = f100
-F_C17 = f101
-F_P1113 = f102
-F_P1517 = f103
-F_P1117 = f104
-F_P317 = f105
-F_R8 = f106
-F_HI = f107
-F_1S2_HI = f108
-F_DS2 = f109
-F_Y2_2 = f110
-//F_S2 = f111
-//F_S_DS2 = f112
-F_S_1S2S = f113
-F_XL = f114
-F_2M128 = f115
-F_1AS = f116
-F_AS = f117
-
-
-
-.section .text
-GLOBAL_LIBM_ENTRY(acosl)
-
-{.mfi
-       // get exponent, mantissa (rounded to double precision) of s
-       getf.d R_DBL_S = f8
-       // 1-s^2
-       fnma.s1 F_1S2 = f8, f8, f1
-       // r2 = pointer to T_table
-       addl r2 = @ltoff(T_table), gp
-}
-
-{.mfi
-       // sign mask
-       mov R_SGNMASK = 0x20000
-       nop.f 0
-       // bias-63-1
-       mov R_TMP03 = 0xffff-64;;
-}
-
-
-{.mfi
-       // get exponent of s
-       getf.exp R_EXP = f8
-       nop.f 0
-       // R_TMP4 = 2^45
-       shl R_TMP4 = R_SGNMASK, 45-17
-}
-
-{.mlx
-       // load bias-4
-       mov R_TMP = 0xffff-4
-       // load RU(sqrt(2)/2) to integer register (in double format, shifted left by 1)
-       movl R_TMP2 = 0x7fcd413cccfe779a;;
-}
-
-
-{.mfi
-       // load 2^{-64} in FP register
-       setf.exp F_2M64 = R_TMP03
-       nop.f 0
-       // index = (0x7-exponent)|b1 b2.. b6
-       extr.u R_INDEX = R_DBL_S, 46, 9
-}
-
-{.mfi
-       // get t = sign|exponent|b1 b2.. b6 1 x.. x
-       or R_T = R_DBL_S, R_TMP4
-       nop.f 0
-       // R_TMP4 = 2^45-1
-       sub R_TMP4 = R_TMP4, r0, 1;;
-}
-
-
-{.mfi
-       // get t = sign|exponent|b1 b2.. b6 1 0.. 0
-       andcm R_T = R_T, R_TMP4
-       nop.f 0
-       // eliminate sign from R_DBL_S (shift left by 1)
-       shl R_TMP3 = R_DBL_S, 1
-}
-
-{.mfi
-       // R_BIAS = 3*2^6
-       mov R_BIAS = 0xc0
-       nop.f 0
-       // eliminate sign from R_EXP
-       andcm R_EXP0 = R_EXP, R_SGNMASK;;
-}
-
-
-
-{.mfi
-       // load start address for T_table
-       ld8 r2 = [r2]
-       nop.f 0
-       // p8 = 1 if |s|> = sqrt(2)/2
-       cmp.geu p8, p0 = R_TMP3, R_TMP2
-}
-
-{.mlx
-       // p7 = 1 if |s|<2^{-4} (exponent of s<bias-4)
-       cmp.lt p7, p0 = R_EXP0, R_TMP
-       // sqrt coefficient cs8 = -33*13/128
-       movl R_TMP2 = 0xc0568000;;
-}
-
-
-
-{.mbb
-       // load t in FP register
-       setf.d F_T = R_T
-       // if |s|<2^{-4}, take alternate path
- (p7) br.cond.spnt SMALL_S
-       // if |s|> = sqrt(2)/2, take alternate path
- (p8) br.cond.sptk LARGE_S
-}
-
-{.mlx
-       // index = (4-exponent)|b1 b2.. b6
-       sub R_INDEX = R_INDEX, R_BIAS
-       // sqrt coefficient cs9 = 55*13/128
-       movl R_TMP = 0x40b2c000;;
-}
-
-
-{.mfi
-       // sqrt coefficient cs8 = -33*13/128
-       setf.s F_CS8 = R_TMP2
-       nop.f 0
-       // shift R_INDEX by 5
-       shl R_INDEX = R_INDEX, 5
-}
-
-{.mfi
-       // sqrt coefficient cs3 = 0.5 (set exponent = bias-1)
-       mov R_TMP4 = 0xffff - 1
-       nop.f 0
-       // sqrt coefficient cs6 = -21/16
-       mov R_TMP6 = 0xbfa8;;
-}
-
-
-{.mlx
-       // table index
-       add r2 = r2, R_INDEX
-       // sqrt coefficient cs7 = 33/16
-       movl R_TMP2 = 0x40040000;;
-}
-
-
-{.mmi
-       // load cs9 = 55*13/128
-       setf.s F_CS9 = R_TMP
-       // sqrt coefficient cs5 = 7/8
-       mov R_TMP3 = 0x3f60
-       // sqrt coefficient cs6 = 21/16
-       shl R_TMP6 = R_TMP6, 16;;
-}
-
-
-{.mmi
-       // load significand of 1/(1-t^2)
-       ldf8 F_INV_1T2 = [r2], 8
-       // sqrt coefficient cs7 = 33/16
-       setf.s F_CS7 = R_TMP2
-       // sqrt coefficient cs4 = -5/8
-       mov R_TMP5 = 0xbf20;;
-}
-
-
-{.mmi
-       // load significand of sqrt(1-t^2)
-       ldf8 F_SQRT_1T2 = [r2], 8
-       // sqrt coefficient cs6 = 21/16
-       setf.s F_CS6 = R_TMP6
-       // sqrt coefficient cs5 = 7/8
-       shl R_TMP3 = R_TMP3, 16;;
-}
-
-
-{.mmi
-       // sqrt coefficient cs3 = 0.5 (set exponent = bias-1)
-       setf.exp F_CS3 = R_TMP4
-       // r3 = pointer to polynomial coefficients
-       addl r3 = @ltoff(poly_coeffs), gp
-       // sqrt coefficient cs4 = -5/8
-       shl R_TMP5 = R_TMP5, 16;;
-}
-
-
-{.mfi
-       // sqrt coefficient cs5 = 7/8
-       setf.s F_CS5 = R_TMP3
-       // d = s-t
-       fms.s1 F_D = f8, f1, F_T
-       // set p6 = 1 if s<0, p11 = 1 if s> = 0
-       cmp.ge p6, p11 = R_EXP, R_DBL_S
-}
-
-{.mfi
-       // r3 = load start address to polynomial coefficients
-       ld8 r3 = [r3]
-       // s+t
-       fma.s1 F_S2T2 = f8, f1, F_T
-       nop.i 0;;
-}
-
-
-{.mfi
-       // sqrt coefficient cs4 = -5/8
-       setf.s F_CS4 = R_TMP5
-       // s^2-t^2
-       fma.s1 F_S2T2 = F_S2T2, F_D, f0
-       nop.i 0;;
-}
-
-
-{.mfi
-       // load C3
-       ldfe F_C3 = [r3], 16
-       // 0.5/(1-t^2) = 2^{-64}*(2^63/(1-t^2))
-       fma.s1 F_INV_1T2 = F_INV_1T2, F_2M64, f0
-       nop.i 0;;
-}
-
-{.mfi
-       // load C_5
-       ldfe F_C5 = [r3], 16
-       // set correct exponent for sqrt(1-t^2)
-       fma.s1 F_SQRT_1T2 = F_SQRT_1T2, F_2M64, f0
-       nop.i 0;;
-}
-
-
-{.mfi
-       // load C_7, C_9
-       ldfpd F_C7, F_C9 = [r3], 16
-       // x = -(s^2-t^2)/(1-t^2)/2
-       fnma.s1 F_X = F_INV_1T2, F_S2T2, f0
-       nop.i 0;;
-}
-
-
-{.mmf
-       // load asin(t)_high, asin(t)_low
-       ldfpd F_ATHI, F_ATLO = [r2]
-	   // load pi/2
-	   ldfpd F_PI2_LO, F_PI2_HI = [r3]
-       // t*sqrt(1-t^2)
-       fma.s1 F_TSQRT = F_T, F_SQRT_1T2, f0;;
-}
-
-
-{.mfi
-       nop.m 0
-       // cs9*x+cs8
-       fma.s1 F_S89 = F_CS9, F_X, F_CS8
-       nop.i 0
-}
-
-{.mfi
-       nop.m 0
-       // cs7*x+cs6
-       fma.s1 F_S67 = F_CS7, F_X, F_CS6
-       nop.i 0;;
-}
-
-{.mfi
-       nop.m 0
-       // cs5*x+cs4
-       fma.s1 F_S45 = F_CS5, F_X, F_CS4
-       nop.i 0
-}
-
-{.mfi
-       nop.m 0
-       // x*x
-       fma.s1 F_X2 = F_X, F_X, f0
-       nop.i 0;;
-}
-
-
-{.mfi
-       nop.m 0
-       // (s-t)-t*x
-       fnma.s1 F_DTX = F_T, F_X, F_D
-       nop.i 0
-}
-
-{.mfi
-       nop.m 0
-       // cs3*x+cs2 (cs2 = -0.5 = -cs3)
-       fms.s1 F_S23 = F_CS3, F_X, F_CS3
-       nop.i 0;;
-}
-
-{.mfi
-  nop.m 0
-  // if sign is negative, negate table values: asin(t)_low
-  (p6) fnma.s1 F_ATLO = F_ATLO, f1, f0
-  nop.i 0
-}
-
-{.mfi
-  nop.m 0
-  // if sign is negative, negate table values: asin(t)_high
-  (p6) fnma.s1 F_ATHI = F_ATHI, f1, f0
-  nop.i 0;;
-}
-
-
-{.mfi
-       nop.m 0
-       // cs9*x^3+cs8*x^2+cs7*x+cs6
-       fma.s1 F_S69 = F_S89, F_X2, F_S67
-       nop.i 0
-}
-
-{.mfi
-       nop.m 0
-       // x^4
-       fma.s1 F_X4 = F_X2, F_X2, f0
-       nop.i 0;;
-}
-
-
-{.mfi
-       nop.m 0
-       // t*sqrt(1-t^2)*x^2
-       fma.s1 F_TSQRT = F_TSQRT, F_X2, f0
-       nop.i 0
-}
-
-{.mfi
-       nop.m 0
-       // cs5*x^3+cs4*x^2+cs3*x+cs2
-       fma.s1 F_S25 = F_S45, F_X2, F_S23
-       nop.i 0;;
-}
-
-
-{.mfi
-       nop.m 0
-       // ((s-t)-t*x)*sqrt(1-t^2)
-       fma.s1 F_DTX = F_DTX, F_SQRT_1T2, f0
-       nop.i 0;;
-}
-
-{.mfi
-       nop.m 0
-       // (pi/2)_high - asin(t)_high
-       fnma.s1 F_ATHI = F_ATHI, f1, F_PI2_HI
-       nop.i 0
-}
-
-{.mfi
-       nop.m 0
-       // asin(t)_low - (pi/2)_low
-       fnma.s1 F_ATLO = F_PI2_LO, f1, F_ATLO
-	   nop.i 0;;
-}
-
-
-{.mfi
-       nop.m 0
-       // PS29 = cs9*x^7+..+cs5*x^3+cs4*x^2+cs3*x+cs2
-       fma.s1 F_S29 = F_S69, F_X4, F_S25
-       nop.i 0;;
-}
-
-
-
-{.mfi
-       nop.m 0
-       // R = ((s-t)-t*x)*sqrt(1-t^2)-t*sqrt(1-t^2)*x^2*PS29
-       fnma.s1 F_R = F_S29, F_TSQRT, F_DTX
-       nop.i 0;;
-}
-
-
-{.mfi
-       nop.m 0
-       // R^2
-       fma.s1 F_R2 = F_R, F_R, f0
-       nop.i 0;;
-}
-
-
-{.mfi
-       nop.m 0
-       // c7+c9*R^2
-       fma.s1 F_P79 = F_C9, F_R2, F_C7
-       nop.i 0
-}
-
-{.mfi
-       nop.m 0
-       // c3+c5*R^2
-       fma.s1 F_P35 = F_C5, F_R2, F_C3
-       nop.i 0;;
-}
-
-{.mfi
-       nop.m 0
-       // R^3
-       fma.s1 F_R4 = F_R2, F_R2, f0
-       nop.i 0;;
-}
-
-{.mfi
-       nop.m 0
-       // R^3
-       fma.s1 F_R3 = F_R2, F_R, f0
-       nop.i 0;;
-}
-
-
-
-{.mfi
-       nop.m 0
-       // c3+c5*R^2+c7*R^4+c9*R^6
-       fma.s1 F_P39 = F_P79, F_R4, F_P35
-       nop.i 0;;
-}
-
-
-{.mfi
-       nop.m 0
-       // asin(t)_low+R^3*(c3+c5*R^2+c7*R^4+c9*R^6)
-       fma.s1 F_P39 = F_P39, F_R3, F_ATLO
-       nop.i 0;;
-}
-
-
-{.mfi
-       nop.m 0
-       // R+asin(t)_low+R^3*(c3+c5*R^2+c7*R^4+c9*R^6)
-       fma.s1 F_P39 = F_P39, f1, F_R
-       nop.i 0;;
-}
-
-
-{.mfb
-       nop.m 0
-       // result = (pi/2)-asin(t)_high+R+asin(t)_low+R^3*(c3+c5*R^2+c7*R^4+c9*R^6)
-       fnma.s0 f8 = F_P39, f1, F_ATHI
-       // return
-       br.ret.sptk b0;;
-}
-
-
-
-
-LARGE_S:
-
-{.mfi
-       // bias-1
-       mov R_TMP3 = 0xffff - 1
-       // y ~ 1/sqrt(1-s^2)
-       frsqrta.s1 F_Y, p7 = F_1S2
-       // c9 = 55*13*17/128
-       mov R_TMP4 = 0x10af7b
-}
-
-{.mlx
-       // c8 = -33*13*15/128
-       mov R_TMP5 = 0x184923
-       movl R_TMP2 = 0xff00000000000000;;
-}
-
-{.mfi
-       // set p6 = 1 if s<0, p11 = 1 if s>0
-       cmp.ge p6, p11 = R_EXP, R_DBL_S
-       // 1-s^2
-       fnma.s1 F_1S2 = f8, f8, f1
-       // set p9 = 1
-       cmp.eq p9, p0 = r0, r0;;
-}
-
-
-{.mfi
-       // load 0.5
-       setf.exp F_05 = R_TMP3
-       // (1-s^2) rounded to single precision
-       fnma.s.s1 F_1S2_S = f8, f8, f1
-       // c9 = 55*13*17/128
-       shl R_TMP4 = R_TMP4, 10
-}
-
-{.mlx
-       // AND mask for getting t ~ sqrt(1-s^2)
-       setf.sig F_ANDMASK = R_TMP2
-       // OR mask
-       movl R_TMP2 = 0x0100000000000000;;
-}
-
-.pred.rel "mutex", p6, p11
-{.mfi
-       nop.m 0
-	   // 1-|s|
- (p6)  fma.s1 F_1AS = f8, f1, f1
-       nop.i 0
-}
-
-{.mfi
-       nop.m 0
-       // 1-|s|
- (p11) fnma.s1 F_1AS = f8, f1, f1
-       nop.i 0;;
-}
-
-
-{.mfi
-       // c9 = 55*13*17/128
-       setf.s F_CS9 = R_TMP4
-	   // |s|
- (p6)  fnma.s1 F_AS = f8, f1, f0
-       // c8 = -33*13*15/128
-       shl R_TMP5 = R_TMP5, 11
-}
+// Assembly macros
+//==============================================================
 
-{.mfi
-       // c7 = 33*13/16
-       mov R_TMP4 = 0x41d68
-	   // |s|
- (p11) fma.s1 F_AS = f8, f1, f0
-       nop.i 0;;
-}
+// f8 is input, but acos_V must be put in f8
+//    when __libm_atan2_reg is called, f8 must get V
+// f9 gets U when __libm_atan2_reg is called
 
 
-{.mfi
-       setf.sig F_ORMASK = R_TMP2
-       // y^2
-       fma.s1 F_Y2 = F_Y, F_Y, f0
-       // c7 = 33*13/16
-       shl R_TMP4 = R_TMP4, 12
-}
+// __libm_atan2_reg returns 
+// f8  = Z_hi
+// f10 = Z_lo
+// f11 = s_lo
 
-{.mfi
-       // c6 = -33*7/16
-       mov R_TMP6 = 0xc1670
-       // y' ~ sqrt(1-s^2)
-       fma.s1 F_T1 = F_Y, F_1S2, f0
-       // c5 = 63/8
-       mov R_TMP7 = 0x40fc;;
-}
+acos_Z_hi = f8
+acos_Z_lo = f10
+acos_S_lo = f11
 
+// When we call __libm_atan2_reg, we must save 
+// the following:
 
-{.mlx
-       // load c8 = -33*13*15/128
-       setf.s F_CS8 = R_TMP5
-       // c4 = -35/8
-       movl R_TMP5 = 0xc08c0000;;
-}
+acos_corr  = f12
+acos_X     = f13
+acos_pi_hi = f14
+acos_pi_lo = f15
 
-{.mfi
-       // r3 = pointer to polynomial coefficients
-       addl r3 = @ltoff(poly_coeffs), gp
-       // 1-s-(1-s^2)_s
-       fnma.s1 F_DS = F_1S2_S, f1, F_1AS
-       // p9 = 0 if p7 = 1 (p9 = 1 for special cases only)
- (p7) cmp.ne p9, p0 = r0, r0
-}
+// The rest of the assembly macros
+
+acos_P79                   = f32
+acos_P59                   = f33
+acos_P39                   = f34
+acos_P19                   = f35
 
-{.mlx
-       // load c7 = 33*13/16
-       setf.s F_CS7 = R_TMP4
-       // c3 = 5/2
-       movl R_TMP4 = 0x40200000;;
-}
+acos_P810                  = f36
+acos_P610                  = f37
+acos_P410                  = f38
+acos_P210                  = f39
 
+acos_A1                    = f41
+acos_A2                    = f42
+acos_A3                    = f43
+acos_A4                    = f44
+acos_A5                    = f45
+acos_A6                    = f46
+acos_A7                    = f47
+acos_A8                    = f48
+acos_A9                    = f49
+acos_A10                   = f50
 
-{.mlx
-       // load c4 = -35/8
-       setf.s F_CS4 = R_TMP5
-       // c2 = -3/2
-       movl R_TMP5 = 0xbfc00000;;
-}
+acos_X2                    = f51
+acos_X4                    = f52
 
+acos_B                     = f53
+acos_Bb                    = f54
+acos_A                     = f55
+acos_Aa                    = f56
 
-{.mfi
-       // load c3 = 5/2
-       setf.s F_CS3 = R_TMP4
-       // x = (1-s^2)_s*y^2-1
-       fms.s1 F_X = F_1S2_S, F_Y2, f1
-       // c6 = -33*7/16
-       shl R_TMP6 = R_TMP6, 12
-}
+acos_1mA                   = f57
 
-{.mfi
-       nop.m 0
-       // y^2/2
-       fma.s1 F_Y2_2 = F_Y2, F_05, f0
-       nop.i 0;;
-}
+acos_W                     = f58
+acos_Ww                    = f59
 
+acos_y0                    = f60
+acos_y1                    = f61
+acos_y2                    = f62
 
-{.mfi
-       // load c6 = -33*7/16
-       setf.s F_CS6 = R_TMP6
-       // eliminate lower bits from y'
-       fand F_T = F_T1, F_ANDMASK
-       // c5 = 63/8
-       shl R_TMP7 = R_TMP7, 16
-}
+acos_H                     = f63
+acos_Hh                    = f64
 
+acos_t1                    = f65
+acos_t2                    = f66
+acos_t3                    = f67
+acos_t4                    = f68
+acos_t5                    = f69
 
-{.mfb
-       // r3 = load start address to polynomial coefficients
-       ld8 r3 = [r3]
-       // 1-(1-s^2)_s-s^2
-       fma.s1 F_DS = F_AS, F_1AS, F_DS
-       // p9 = 1 if s is a special input (NaN, or |s|> = 1)
- (p9) br.cond.spnt acosl_SPECIAL_CASES;;
-}
+acos_Pseries               = f70
+acos_NORM_f8               = f71
+acos_ABS_NORM_f8           = f72
 
-{.mmf
-       // get exponent, significand of y' (in single prec.)
-       getf.s R_TMP = F_T1
-       // load c3 = -3/2
-       setf.s F_CS2 = R_TMP5
-       // y*(1-s^2)
-       fma.s1 F_Y1S2 = F_Y, F_1S2, f0;;
-}
+acos_2                     = f73
+acos_P1P2                  = f74
+acos_HALF                  = f75
+acos_U                     = f76
 
+acos_1mB                   = f77
+acos_V                     = f78 
+acos_S                     = f79
 
+acos_BmUU                  = f80 
+acos_BmUUpb                = f81 
+acos_2U                    = f82
+acos_1d2U                  = f83
 
-{.mfi
-       nop.m 0
-       // if s<0, set s = -s
- (p6) fnma.s1 f8 = f8, f1, f0
-       nop.i 0;;
-}
-
-
-{.mfi
-       // load c5 = 63/8
-       setf.s F_CS5 = R_TMP7
-       // x = (1-s^2)_s*y^2-1+(1-(1-s^2)_s-s^2)*y^2
-       fma.s1 F_X = F_DS, F_Y2, F_X
-       // for t = 2^k*1.b1 b2.., get 7-k|b1.. b6
-       extr.u R_INDEX = R_TMP, 17, 9;;
-}
+acos_Dd                    = f84
 
+acos_pi_by_2_hi            = f85
+acos_pi_by_2_lo            = f86
+acos_xmpi_by_2_lo          = f87
+acos_xPmw                  = f88
 
-{.mmi
-       // index = (4-exponent)|b1 b2.. b6
-       sub R_INDEX = R_INDEX, R_BIAS
-       nop.m 0
-       // get exponent of y
-       shr.u R_TMP2 = R_TMP, 23;;
-}
-
-{.mmi
-       // load C3
-       ldfe F_C3 = [r3], 16
-       // set p8 = 1 if y'<2^{-4}
-       cmp.gt p8, p0 = 0x7b, R_TMP2
-       // shift R_INDEX by 5
-       shl R_INDEX = R_INDEX, 5;;
-}
+acos_Uu                    = f89
+acos_AmVV                  = f90 
+acos_AmVVpa                = f91 
 
+acos_2V                    = f92 
+acos_1d2V                  = f93
+acos_Vv                    = f94
 
-{.mfb
-       // get table index for sqrt(1-t^2)
-       add r2 = r2, R_INDEX
-       // get t = 2^k*1.b1 b2.. b7 1
-       for F_T = F_T, F_ORMASK
- (p8) br.cond.spnt VERY_LARGE_INPUT;;
-}
-
-
-
-{.mmf
-       // load C5
-       ldfe F_C5 = [r3], 16
-       // load 1/(1-t^2)
-       ldfp8 F_INV_1T2, F_SQRT_1T2 = [r2], 16
-       // x = ((1-s^2)*y^2-1)/2
-       fma.s1 F_X = F_X, F_05, f0;;
-}
-
+acos_Vu                    = f95 
+acos_Uv                    = f96 
+
+acos_2_Z_hi                = f97
+acos_s_lo_Z_lo             = f98
+acos_result_lo             = f99
+
+acos_Z_hi                  = f8
+acos_Z_lo                  = f10
+acos_s_lo                  = f11
+
+acos_GR_17_ones            = r33
+acos_GR_16_ones            = r34
+acos_GR_signexp_f8         = r35
+acos_GR_exp                = r36
+acos_GR_true_exp           = r37
+acos_GR_fffe               = r38
+
+GR_SAVE_PFS                = r43
+GR_SAVE_B0                 = r39
+GR_SAVE_GP                 = r41
+
+// r40 is address of table of coefficients
+// r42 
+
+GR_Parameter_X             = r44 
+GR_Parameter_Y             = r45 
+GR_Parameter_RESULT        = r46 
+GR_Parameter_TAG                = r47 
+
+
+// 2^-40:
+// A true exponent of -40 is
+//                    : -40 + register_bias
+//                    : -28 + ffff = ffd7
 
+// A true exponent of 1 is 
+//                    : 1 + register_bias
+//                    : 1 + ffff = 10000
 
-{.mmf
-       nop.m 0
-       // C7, C9
-       ldfpd F_C7, F_C9 = [r3], 16
-       // set correct exponent for t
-       fmerge.se F_T = F_T1, F_T;;
-}
+// Data tables
+//==============================================================
 
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
 
+.align 16
 
-{.mfi
-       // get address for loading pi
-	   add r3 = 48, r3
-       // c9*x+c8
-       fma.s1 F_S89 = F_X, F_CS9, F_CS8
-       nop.i 0
-}
+acos_coefficients:
+ASM_TYPE_DIRECTIVE(acos_coefficients,@object)
+data8  0xc90fdaa22168c234, 0x00003FFF            // pi_by_2_hi
+data8  0xc4c6628b80dc1cd1, 0x00003FBF            // pi_by_2_lo
+data8  0xc90fdaa22168c234, 0x00004000            // pi_hi
+data8  0xc4c6628b80dc1cd1, 0x00003FC0            // pi_lo
+
+data8  0xBB08911F2013961E, 0x00003FF8            // A10
+data8  0x981F1095A23A87D3, 0x00003FF8            // A9 
+data8  0xBDF09C6C4177BCC6, 0x00003FF8            // A8 
+data8  0xE4C3A60B049ACCEA, 0x00003FF8            // A7 
+data8  0x8E2789F4E8A8F1AD, 0x00003FF9            // A6 
+data8  0xB745D09B2B0E850B, 0x00003FF9            // A5 
+data8  0xF8E38E3BC4C50920, 0x00003FF9            // A4 
+data8  0xB6DB6DB6D89FCD81, 0x00003FFA            // A3 
+data8  0x99999999999AF376, 0x00003FFB            // A2 
+data8  0xAAAAAAAAAAAAAA71, 0x00003FFC            // A1
+ASM_SIZE_DIRECTIVE(acos_coefficients)
+
+
+.align 32
+.global acosl#
+ASM_TYPE_DIRECTIVE(acosl#,@function)
 
-{.mfi
-       nop.m 0
-       // x^2
-       fma.s1 F_X2 = F_X, F_X, f0
-       nop.i 0;;
-}
+.section .text
+.proc  acosl#
+.align 32
 
 
-{.mfi
-       // pi (low, high)
-       ldfpd F_PI2_LO, F_PI2_HI = [r3]
-       // y*(1-s^2)*x
-       fma.s1 F_Y1S2X = F_Y1S2, F_X, f0
-       nop.i 0
-}
+acosl: 
 
-{.mfi
-       nop.m 0
-       // c7*x+c6
-       fma.s1 F_S67 = F_X, F_CS7, F_CS6
-       nop.i 0;;
+// After normalizing f8, get its true exponent
+{ .mfi
+      alloc r32 = ar.pfs,1,11,4,0                                             
+(p0)  fnorm.s1    acos_NORM_f8 = f8                                            
+(p0)  mov         acos_GR_17_ones = 0x1ffff                                    
 }
 
-
-{.mfi
-       nop.m 0
-       // 1-x
-       fnma.s1 F_1X = F_X, f1, f1
-       nop.i 0
+{ .mmi
+(p0)  mov        acos_GR_16_ones = 0xffff                                     
+(p0)  addl                 r40   = @ltoff(acos_coefficients), gp
+      nop.i 999
 }
+;;
 
-{.mfi
-       nop.m 0
-       // c3*x+c2
-       fma.s1 F_S23 = F_X, F_CS3, F_CS2
-       nop.i 0;;
+// Set denormal flag on denormal input with fcmp
+{ .mfi
+      ld8 r40 = [r40]
+      fcmp.eq  p6,p0 = f8,f0
+      nop.i 999
 }
+;;
 
 
-{.mfi
-       nop.m 0
-       // 1-t^2
-       fnma.s1 F_1T2 = F_T, F_T, f1
-       nop.i 0
-}
+// Load the constants pi_by_2 and pi.
+// Each is stored as hi and lo values
+// Also load the coefficients for ACOS_POLY
 
-{.mfi
-       // load asin(t)_high, asin(t)_low
-       ldfpd F_ATHI, F_ATLO = [r2]
-       // c5*x+c4
-       fma.s1 F_S45 = F_X, F_CS5, F_CS4
-       nop.i 0;;
+{ .mmi
+(p0) ldfe       acos_pi_by_2_hi = [r40],16 ;;      
+(p0) ldfe       acos_pi_by_2_lo = [r40],16      
+     nop.i 999 ;;
 }
 
-
-
-{.mfi
-       nop.m 0
-       // t*s
-       fma.s1 F_TS = F_T, f8, f0
-       nop.i 0
+{ .mmi
+(p0) ldfe       acos_pi_hi      = [r40],16 ;;      
+(p0) ldfe       acos_pi_lo      = [r40],16      
+     nop.i 999 ;;
 }
 
-{.mfi
-       nop.m 0
-       // 0.5/(1-t^2)
-       fma.s1 F_INV_1T2 = F_INV_1T2, F_2M64, f0
-       nop.i 0;;
+{ .mmi
+(p0) ldfe       acos_A10        = [r40],16 ;;      
+(p0) ldfe       acos_A9         = [r40],16      
+     nop.i 999 ;;
 }
 
-{.mfi
-       nop.m 0
-       // z~sqrt(1-t^2), rounded to 24 significant bits
-       fma.s.s1 F_Z = F_SQRT_1T2, F_2M64, f0
-       nop.i 0
+// Take the absolute value of f8
+{ .mmf
+      nop.m 999
+(p0)  getf.exp   acos_GR_signexp_f8  = acos_NORM_f8                           
+(p0)  fmerge.s  acos_ABS_NORM_f8 = f0, acos_NORM_f8 
 }
 
-{.mfi
-       nop.m 0
-       // sqrt(1-t^2)
-       fma.s1 F_SQRT_1T2 = F_SQRT_1T2, F_2M64, f0
-       nop.i 0;;
+{ .mii
+(p0) ldfe       acos_A8         = [r40],16      
+     nop.i 999 ;;
+(p0) and        acos_GR_exp         = acos_GR_signexp_f8, acos_GR_17_ones ;;    
 }
 
+// case 1: |x| < 2^-25         ==> p6   ACOS_TINY
+// case 2: 2^-25 <= |x| < 2^-2 ==> p8   ACOS_POLY
+// case 3: 2^-2  <= |x| < 1    ==> p9   ACOS_ATAN
+// case 4: 1     <= |x|        ==> p11  ACOS_ERROR_RETURN
+//  Admittedly |x| = 1 is not an error but this is where that case is
+//  handled.
 
-{.mfi
-       nop.m 0
-       // y*(1-s^2)*x^2
-       fma.s1 F_Y1S2X2 = F_Y1S2, F_X2, f0
-       nop.i 0
+{ .mii
+(p0) ldfe       acos_A7         = [r40],16      
+(p0) sub        acos_GR_true_exp    = acos_GR_exp, acos_GR_16_ones ;;           
+(p0) cmp.ge.unc p6, p7    = -26, acos_GR_true_exp ;;                            
 }
 
-{.mfi
-       nop.m 0
-       // x^4
-       fma.s1 F_X4 = F_X2, F_X2, f0
-       nop.i 0;;
+{ .mii
+(p0) ldfe       acos_A6         = [r40],16      
+(p7) cmp.ge.unc p8, p9    = -3,  acos_GR_true_exp ;;                            
+(p9) cmp.ge.unc p10, p11  =  -1, acos_GR_true_exp                            
 }
 
-
-{.mfi
-       nop.m 0
-       // s*t rounded to 24 significant bits
-       fma.s.s1 F_TSS = F_T, f8, f0
-       nop.i 0
-}
-
-{.mfi
-       nop.m 0
-       // c9*x^3+..+c6
-       fma.s1 F_S69 = F_X2, F_S89, F_S67
-       nop.i 0;;
+{ .mmi
+(p0) ldfe       acos_A5         = [r40],16 ;;      
+(p0) ldfe       acos_A4         = [r40],16      
+      nop.i 999 ;;
 }
 
-
-{.mfi
-       nop.m 0
-       // ST = (t^2-1+s^2) rounded to 24 significant bits
-       fms.s.s1 F_ST = f8, f8, F_1T2
-       nop.i 0
+{ .mmi
+(p0) ldfe       acos_A3         = [r40],16 ;;      
+(p0) ldfe       acos_A2         = [r40],16      
+      nop.i 999 ;;
 }
 
-{.mfi
-       nop.m 0
-       // c5*x^3+..+c2
-       fma.s1 F_S25 = F_X2, F_S45, F_S23
-       nop.i 0;;
+// ACOS_ERROR_RETURN ==> p11 is true
+// case 4: |x| >= 1
+{ .mib
+(p0)  ldfe       acos_A1         = [r40],16      
+      nop.i 999
+(p11) br.spnt         L(ACOS_ERROR_RETURN) ;; 
 }
 
-
-{.mfi
-       nop.m 0
-       // 0.25/(1-t^2)
-       fma.s1 F_INV1T2_2 = F_05, F_INV_1T2, f0
-       nop.i 0
+// ACOS_TINY ==> p6 is true
+// case 1: |x| < 2^-25
+{ .mfi
+      nop.m 999
+(p6)  fms.s1        acos_xmpi_by_2_lo = acos_NORM_f8,f1, acos_pi_by_2_lo 
+      nop.i 999 ;;
 }
 
-{.mfi
-       nop.m 0
-       // t*s-sqrt(1-t^2)*(1-s^2)*y
-       fnma.s1 F_TS = F_Y1S2, F_SQRT_1T2, F_TS
-       nop.i 0;;
+{ .mfb
+           nop.m 999
+(p6)  fms.s0         f8 = acos_pi_by_2_hi,f1, acos_xmpi_by_2_lo                
+(p6)  br.ret.spnt   b0 ;;                                                   
 }
 
 
-{.mfi
-       nop.m 0
-       // z*0.5/(1-t^2)
-       fma.s1 F_ZE = F_INV_1T2, F_SQRT_1T2, f0
-       nop.i 0
-}
 
-{.mfi
-       nop.m 0
-       // z^2+t^2-1
-       fms.s1 F_DZ0 = F_Z, F_Z, F_1T2
-       nop.i 0;;
+// ACOS_POLY ==> p8 is true
+// case 2: 2^-25 <= |x| < 2^-2                   
+{ .mfi
+      nop.m 999
+(p8)  fms.s1        acos_W       = acos_pi_by_2_hi, f1, acos_NORM_f8     
+      nop.i 999 ;;
 }
 
-
-{.mfi
-       nop.m 0
-       // (1-s^2-(1-s^2)_s)*x
-       fma.s1 F_DS2X = F_X, F_DS, f0
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p8)  fma.s1        acos_X2   = f8,f8, f0                                
+      nop.i 999 ;;
 }
 
-
-{.mfi
-       nop.m 0
-       // t*s-(t*s)_s
-       fms.s1 F_DTS = F_T, f8, F_TSS
-       nop.i 0
+{ .mfi
+      nop.m 999
+(p8)  fms.s1        acos_Ww      = acos_pi_by_2_hi, f1, acos_W           
+      nop.i 999 ;;
 }
 
-{.mfi
-       nop.m 0
-       // c9*x^7+..+c2
-       fma.s1 F_S29 = F_X4, F_S69, F_S25
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p8)  fma.s1        acos_X4   = acos_X2,acos_X2, f0                      
+      nop.i 999 ;;
 }
 
-
-{.mfi
-       nop.m 0
-       // y*z
-       fma.s1 F_YZ = F_Z, F_Y, f0
-       nop.i 0
+{ .mfi
+      nop.m 999
+(p8)  fms.s1        acos_Ww      = acos_Ww, f1, acos_NORM_f8             
+      nop.i 999 ;;
 }
 
-{.mfi
-       nop.m 0
-       // t^2
-       fma.s1 F_T2 = F_T, F_T, f0
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p8)  fma.s1        acos_P810 = acos_X4, acos_A10, acos_A8               
+      nop.i 999
 }
 
-
-{.mfi
-       nop.m 0
-       // 1-t^2+ST
-       fma.s1 F_1T2_ST = F_ST, f1, F_1T2
-       nop.i 0;;
+// acos_P79  = X4*A9   + A7
+// acos_P810 = X4*A10  + A8
+{ .mfi
+      nop.m 999
+(p8)  fma.s1        acos_P79  = acos_X4, acos_A9, acos_A7                
+      nop.i 999 ;;
 }
 
-
-{.mfi
-       nop.m 0
-       // y*(1-s^2)(1-x)
-       fma.s1 F_Y1S2_1X = F_Y1S2, F_1X, f0
-       nop.i 0
+{ .mfi
+      nop.m 999
+(p8)  fma.s1        acos_Ww      = acos_Ww, f1, acos_pi_by_2_lo          
+      nop.i 999 ;;
 }
 
-{.mfi
-       nop.m 0
-       // dz ~ sqrt(1-t^2)-z
-       fma.s1 F_DZ = F_DZ0, F_ZE, f0
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p8)  fma.s1        acos_P610 = acos_X4, acos_P810, acos_A6              
+      nop.i 999
 }
 
 
-{.mfi
-       nop.m 0
-       // -1+correction for sqrt(1-t^2)-z
-       fnma.s1 F_CORR = F_INV1T2_2, F_DZ0, f0
-       nop.i 0;;
+// acos_P59   = X4*(X4*A9   + A7)  + A5
+// acos_P610  = X4*(X4*A10  + A8)  + A6
+{ .mfi
+      nop.m 999
+(p8)  fma.s1        acos_P59  = acos_X4, acos_P79, acos_A5               
+      nop.i 999 ;;
 }
 
-
-{.mfi
-       nop.m 0
-       // (PS29*x^2+x)*y*(1-s^2)
-       fma.s1 F_S19 = F_Y1S2X2, F_S29, F_Y1S2X
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p8)  fma.s1        acos_P410 = acos_X4, acos_P610, acos_A4              
+      nop.i 999
 }
 
-{.mfi
-       nop.m 0
-       // z*y*(1-s^2)_s
-       fma.s1 F_ZY1S2S = F_YZ, F_1S2_S, f0
-       nop.i 0
+// acos_P39   = X4*(X4*(X4*A9   + A7)  + A5) + A3
+// acos_P410  = X4*(X4*(X4*A10  + A8)  + A6) + A4
+{ .mfi
+      nop.m 999
+(p8)  fma.s1        acos_P39  = acos_X4, acos_P59, acos_A3               
+      nop.i 999 ;;
 }
 
-{.mfi
-       nop.m 0
-       // s^2-(1-t^2+ST)
-       fms.s1 F_1T2_ST = f8, f8, F_1T2_ST
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p8)  fma.s1        acos_P210 = acos_X4, acos_P410, acos_A2              
+      nop.i 999
 }
 
-
-{.mfi
-       nop.m 0
-       // (t*s-(t*s)_s)+z*y*(1-s^2-(1-s^2)_s)*x
-       fma.s1 F_DTS = F_YZ, F_DS2X, F_DTS
-       nop.i 0
+// acos_P19   = X4*(X4*(X4*(X4*A9   + A7)  + A5) + A3) + A1 = P1
+// acos_P210  = X4*(X4*(X4*(X4*A10  + A8)  + A6) + A4) + A2 = P2
+{ .mfi
+      nop.m 999
+(p8)  fma.s1        acos_P19  = acos_X4, acos_P39, acos_A1               
+      nop.i 999 ;;
 }
 
-{.mfi
-       nop.m 0
-       // dz*y*(1-s^2)*(1-x)
-       fma.s1 F_DZ_TERM = F_DZ, F_Y1S2_1X, f0
-       nop.i 0;;
+// acos_P1P2 = Xsq*P2 + P1
+// acos_P1P2 = Xsq*(Xsq*P2 + P1)
+{ .mfi
+      nop.m 999
+(p8)  fma.s1        acos_P1P2    = acos_X2, acos_P210, acos_P19          
+      nop.i 999 ;;
 }
 
-
-{.mfi
-       nop.m 0
-       // R = t*s-sqrt(1-t^2)*(1-s^2)*y+sqrt(1-t^2)*(1-s^2)*y*PS19
-       // (used for polynomial evaluation)
-       fma.s1 F_R = F_S19, F_SQRT_1T2, F_TS
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p8)  fma.s1        acos_P1P2    = acos_X2, acos_P1P2, f0                
+      nop.i 999 ;;
 }
 
-
-{.mfi
-       nop.m 0
-       // (PS29*x^2)*y*(1-s^2)
-       fma.s1 F_S29 = F_Y1S2X2, F_S29, f0
-       nop.i 0
+{ .mfi
+      nop.m 999
+(p8)  fms.s1        acos_xPmw    = acos_NORM_f8, acos_P1P2, acos_Ww       
+      nop.i 999 ;;
 }
 
-{.mfi
-       nop.m 0
-       // apply correction to dz*y*(1-s^2)*(1-x)
-       fma.s1 F_DZ_TERM = F_DZ_TERM, F_CORR, F_DZ_TERM
-       nop.i 0;;
+{ .mfb
+      nop.m 999
+(p8)  fms.s0         f8           = acos_W, f1, acos_xPmw                 
+(p8)  br.ret.spnt   b0 ;;                                                   
 }
 
 
-{.mfi
-       nop.m 0
-       // R^2
-       fma.s1 F_R2 = F_R, F_R, f0
-       nop.i 0;;
-}
-
+// ACOS_ATAN
+// case 3: 2^-2  <= |x| < 1                      
+// case 3: 2^-2  <= |x| < 1    ==> p9   ACOS_ATAN
 
-{.mfi
-       nop.m 0
-       // (t*s-(t*s)_s)+z*y*(1-s^2-(1-s^2)_s)*x+dz*y*(1-s^2)*(1-x)
-       fma.s1 F_DZ_TERM = F_DZ_TERM, f1, F_DTS
-       nop.i 0;;
-}
+// Step 1.1:     Get A,B and a,b
+// A + a = 1- |X|
+// B + b = 1+ |X|
+// Note also that we will use  acos_corr (f13)
+// and                         acos_W
 
+// Step 2
+// Call __libm_atan2_reg
 
-{.mfi
-       nop.m 0
-       // c7+c9*R^2
-       fma.s1 F_P79 = F_C9, F_R2, F_C7
-       nop.i 0
-}
 
-{.mfi
-       nop.m 0
-       // c3+c5*R^2
-       fma.s1 F_P35 = F_C5, F_R2, F_C3
-       nop.i 0;;
+{ .mfi
+(p0)  mov    acos_GR_fffe = 0xfffe                      
+(p0)  fma.s1 acos_B          = f1,f1,  acos_ABS_NORM_f8                            
+(p0)  mov   GR_SAVE_B0 = b0 ;;                                
 }
 
-{.mfi
-       nop.m 0
-       // asin(t)_low-(pi)_low (if s<0)
- (p6)  fms.s1 F_ATLO = F_ATLO, f1, F_PI2_LO
-       nop.i 0
+{ .mmf
+(p0)  mov   GR_SAVE_GP = gp                                
+      nop.m 999
+(p0)  fms.s1 acos_A   = f1,f1,  acos_ABS_NORM_f8                            
 }
 
-{.mfi
-       nop.m 0
-       // R^4
-       fma.s1 F_R4 = F_R2, F_R2, f0
-       nop.i 0;;
+{ .mfi
+(p0)  setf.exp       acos_HALF = acos_GR_fffe                   
+      nop.f 999
+      nop.i 999 ;;
 }
 
-{.mfi
-       nop.m 0
-       // R^3
-       fma.s1 F_R3 = F_R2, F_R, f0
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p0)  fms.s1 acos_1mB = f1,f1, acos_B                                       
+      nop.i 999 ;;
 }
 
+// We want atan2(V,U)
+//   so put V in f8 and U in f9
+//   but save X in acos_X
 
-{.mfi
-       nop.m 0
-       // (t*s)_s-t^2*y*z
-       fnma.s1 F_TSS = F_T2, F_YZ, F_TSS
-       nop.i 0
-}
-
-{.mfi
-       nop.m 0
-       // d(ts)+z*y*d(1-s^2)*x+dz*y*(1-s^2)*(1-x)+z*y*(s^2-1+t^2-ST)
-       fma.s1 F_DZ_TERM = F_YZ, F_1T2_ST, F_DZ_TERM
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p0)  fmerge.se acos_X = f8, f8                               
+      nop.i 999 ;;
 }
 
+// Step 1.2:
+/////////////////////////
+// Get U = sqrt(B)
+/////////////////////////
 
-{.mfi
-       nop.m 0
-       // (pi)_hi-asin(t)_hi (if s<0)
- (p6)  fms.s1 F_ATHI = F_PI2_HI, f1, F_ATHI
-       nop.i 0
+{ .mfi
+      nop.m 999
+(p0)  frsqrta.s1     acos_y0,p8  = acos_B                                
+      nop.i 999
 }
 
-{.mfi
-       nop.m 0
-       // c3+c5*R^2+c7*R^4+c9*R^6
-       fma.s1 F_P39 = F_P79, F_R4, F_P35
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p0)  fms.s1 acos_1mA = f1,f1, acos_A                                       
+      nop.i 999 ;;
 }
 
-
-{.mfi
-       nop.m 0
-       // d(ts)+z*y*d(1-s^2)*x+dz*y*(1-s^2)*(1-x)+z*y*(s^2-1+t^2-ST)+
-       // + sqrt(1-t^2)*y*(1-s^2)*x^2*PS29
-       fma.s1 F_DZ_TERM = F_SQRT_1T2, F_S29, F_DZ_TERM
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p0)  fma.s1 acos_Bb  = acos_1mB,f1, acos_ABS_NORM_f8                       
+      nop.i 999 ;;
 }
 
-
-{.mfi
-       nop.m 0
-       // (t*s)_s-t^2*y*z+z*y*ST
-       fma.s1 F_TSS = F_YZ, F_ST, F_TSS
-       nop.i 0
+{ .mfi
+      nop.m 999
+(p0)  fma.s1         acos_Hh     = acos_HALF, acos_B, f0                 
+      nop.i 999 ;;
 }
 
-{.mfi
-       nop.m 0
-       // -asin(t)_low+R^3*(c3+c5*R^2+c7*R^4+c9*R^6)
-       fms.s1 F_P39 = F_P39, F_R3, F_ATLO
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p0)  fma.s1         acos_t1     = acos_y0, acos_y0, f0                  
+      nop.i 999
 }
 
-
-{.mfi
-       nop.m 0
-       // d(ts)+z*y*d(1-s^2)*x+dz*y*(1-s^2)*(1-x)+z*y*(s^2-1+t^2-ST) +
-       // + sqrt(1-t^2)*y*(1-s^2)*x^2*PS29 +
-       // - asin(t)_low+R^3*(c3+c5*R^2+c7*R^4+c9*R^6)
-       fma.s1 F_DZ_TERM = F_P39, f1, F_DZ_TERM
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p0)  fms.s1 acos_Aa  = acos_1mA,f1, acos_ABS_NORM_f8                       
+      nop.i 999 ;;
 }
 
-
-{.mfi
-       nop.m 0
-       // d(ts)+z*y*d(1-s^2)*x+dz*y*(1-s^2)*(1-x)+z*y*(s^2-1+t^2-ST) +
-       // + sqrt(1-t^2)*y*(1-s^2)*x^2*PS29 + z*y*(1-s^2)_s*x +
-       // - asin(t)_low+R^3*(c3+c5*R^2+c7*R^4+c9*R^6)
-       fma.s1 F_DZ_TERM = F_ZY1S2S, F_X, F_DZ_TERM
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p0)  fnma.s1        acos_t2     = acos_t1, acos_Hh, acos_HALF           
+      nop.i 999 ;;
 }
 
-
-{.mfi
-       nop.m 0
-       // d(ts)+z*y*d(1-s^2)*x+dz*y*(1-s^2)*(1-x)+z*y*(s^2-1+t^2-ST) +
-       // + sqrt(1-t^2)*y*(1-s^2)*x^2*PS29 + z*y*(1-s^2)_s*x +
-       // - asin(t)_low+R^3*(c3+c5*R^2+c7*R^4+c9*R^6) +
-       // + (t*s)_s-t^2*y*z+z*y*ST
-       fma.s1 F_DZ_TERM = F_TSS, f1, F_DZ_TERM
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p0)  fma.s1         acos_y1     = acos_t2, acos_y0, acos_y0             
+      nop.i 999
 }
 
 
-.pred.rel "mutex", p6, p11
-{.mfi
-       nop.m 0
-       // result: add high part of table value
-       // s>0 in this case
- (p11) fnma.s0 f8 = F_DZ_TERM, f1, F_ATHI
-       nop.i 0
-}
-
-{.mfb
-       nop.m 0
-       // result: add high part of pi-table value
-       // if s<0
- (p6)  fma.s0 f8 = F_DZ_TERM, f1, F_ATHI
-       br.ret.sptk b0;;
+// Step 1.2:
+/////////////////////////
+// Get V = sqrt(A)
+/////////////////////////
+{ .mfi
+      nop.m 999
+(p0)  frsqrta.s1     acos_y0,p8  = acos_A                                
+      nop.i 999 ;;
 }
 
-
-
-
-
-
-SMALL_S:
-
-       // use 15-term polynomial approximation
-
-{.mmi
-       // r3 = pointer to polynomial coefficients
-       addl r3 = @ltoff(poly_coeffs), gp;;
-       // load start address for coefficients
-       ld8 r3 = [r3]
-       mov R_TMP = 0x3fbf;;
+{ .mfi
+      nop.m 999
+(p0)  fma.s1         acos_t3     = acos_y1, acos_Hh, f0                  
+      nop.i 999 ;;
 }
 
-
-{.mmi
-       add r2 = 64, r3
-       ldfe F_C3 = [r3], 16
-       // p7 = 1 if |s|<2^{-64} (exponent of s<bias-64)
-       cmp.lt p7, p0 = R_EXP0, R_TMP;;
+{ .mfi
+      nop.m 999
+(p0)  fma.s1         acos_t1     = acos_y0, acos_y0, f0                  
+      nop.i 999 ;;
 }
 
-{.mmf
-       ldfe F_C5 = [r3], 16
-       ldfpd F_C11, F_C13 = [r2], 16
-	   nop.f 0;;
+{ .mfi
+      nop.m 999
+(p0)  fnma.s1        acos_t4     = acos_t3, acos_y1, acos_HALF           
+      nop.i 999 ;;
 }
 
-{.mmf
-       ldfpd F_C7, F_C9 = [r3], 16
-       ldfpd F_C15, F_C17 = [r2]
-       nop.f 0;;
+{ .mfi
+      nop.m 999
+(p0)  fma.s1         acos_y2     = acos_t4, acos_y1, acos_y1             
+      nop.i 999 ;;
 }
 
-
-
-{.mfb
-       // load pi/2
-       ldfpd F_PI2_LO, F_PI2_HI = [r3]
-       // s^2
-       fma.s1 F_R2 = f8, f8, f0
-	   // |s|<2^{-64}
-  (p7) br.cond.spnt  RETURN_PI2;;
+{ .mfi
+      nop.m 999
+(p0)  fma.s1         acos_S      = acos_B, acos_y2, f0                   
+      nop.i 999
 }
 
-
-{.mfi
-       nop.m 0
-       // s^3
-       fma.s1 F_R3 = f8, F_R2, f0
-       nop.i 0
+{ .mfi
+      nop.m 999
+(p0)  fma.s1         acos_H      = acos_y2, acos_HALF, f0                
+      nop.i 999 ;;
 }
 
-{.mfi
-       nop.m 0
-       // s^4
-       fma.s1 F_R4 = F_R2, F_R2, f0
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p0)  fma.s1         acos_t5     = acos_Hh, acos_y2, f0                  
+      nop.i 999
 }
 
-
-{.mfi
-       nop.m 0
-       // c3+c5*s^2
-       fma.s1 F_P35 = F_C5, F_R2, F_C3
-       nop.i 0
+{ .mfi
+      nop.m 999
+(p0)  fma.s1         acos_Hh     = acos_HALF, acos_A, f0                 
+      nop.i 999 ;;
 }
 
-{.mfi
-       nop.m 0
-       // c11+c13*s^2
-       fma.s1 F_P1113 = F_C13, F_R2, F_C11
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p0)  fnma.s1        acos_Dd     = acos_S, acos_S, acos_B                
+      nop.i 999 ;;
 }
 
-
-{.mfi
-       nop.m 0
-       // c7+c9*s^2
-       fma.s1 F_P79 = F_C9, F_R2, F_C7
-       nop.i 0
+{ .mfi
+      nop.m 999
+(p0)  fnma.s1        acos_t2     = acos_t1, acos_Hh, acos_HALF           
+      nop.i 999 ;;
 }
 
-{.mfi
-       nop.m 0
-       // c15+c17*s^2
-       fma.s1 F_P1517 = F_C17, F_R2, F_C15
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p0)  fma.s1         acos_U      = acos_Dd, acos_H, acos_S               
+      nop.i 999 ;;
 }
 
-{.mfi
-       nop.m 0
-	   // (pi/2)_high-s_high
-	   fnma.s1 F_T = f8, f1, F_PI2_HI
-	   nop.i 0
-}
-{.mfi
-       nop.m 0
-       // s^8
-       fma.s1 F_R8 = F_R4, F_R4, f0
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p0)  fma.s1         acos_y1     = acos_t2, acos_y0, acos_y0             
+      nop.i 999 ;;
 }
 
-
-{.mfi
-       nop.m 0
-       // c3+c5*s^2+c7*s^4+c9*s^6
-       fma.s1 F_P39 = F_P79, F_R4, F_P35
-       nop.i 0
+{ .mfi
+      nop.m 999
+(p0)  fma.s1         acos_2U       = acos_U, f1, acos_U                  
+      nop.i 999 ;;
 }
 
-{.mfi
-       nop.m 0
-       // c11+c13*s^2+c15*s^4+c17*s^6
-       fma.s1 F_P1117 = F_P1517, F_R4, F_P1113
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p0)  fma.s1         acos_t3     = acos_y1, acos_Hh, f0                  
+      nop.i 999
 }
 
-{.mfi
-       nop.m 0
-	   // -s_high
-	   fms.s1 F_S = F_T, f1, F_PI2_HI
-	   nop.i 0;;
-}
 
-{.mfi
-       nop.m 0
-       // c3+..+c17*s^14
-       fma.s1 F_P317 = F_R8, F_P1117, F_P39
-       nop.i 0;;
-}
+// Step 1.3: 
+// sqrt(A + a) = V + v
+// sqrt(B + b) = U + u
 
-{.mfi
-       nop.m 0
-	   // s_low
-	   fma.s1 F_DS = f8, f1, F_S
-	   nop.i 0;;
-}
+/////////////////////////
+// Get u
+/////////////////////////
 
-{.mfi
-       nop.m 0
-       // (pi/2)_low-s^3*(c3+..+c17*s^14)
-       fnma.s0 F_P317 = F_P317, F_R3, F_PI2_LO
-	   nop.i 0;;
-}
+// acos_BmUU   = B - UU
+// acos_BmUUpb = (B - UU) + b
 
-{.mfi
-       nop.m 0
-	   // (pi/2)_low-s_low-s^3*(c3+..+c17*s^14)
-	   fms.s1 F_P317 = F_P317, f1, F_DS
-	   nop.i 0;;
+{ .mfi
+      nop.m 999
+(p0)  fnma.s1        acos_BmUU     = acos_U, acos_U, acos_B              
+      nop.i 999 ;;
 }
 
-{.mfb
-       nop.m 0
-	   // result: pi/2-s-c3*s^3-..-c17*s^17
-	   fma.s0 f8 = F_T, f1, F_P317
-       br.ret.sptk b0;;
+{ .mfi
+      nop.m 999
+(p0)   fmerge.se f9 = acos_U, acos_U                           
+      nop.i 999 ;;
 }
 
-
-
-
-
-RETURN_PI2:
-
-{.mfi
-       nop.m 0
-       // (pi/2)_low-s
-	   fms.s0 F_PI2_LO = F_PI2_LO, f1, f8
-	   nop.i 0;;
+{ .mfi
+      nop.m 999
+(p0)  fnma.s1        acos_t4     = acos_t3, acos_y1, acos_HALF           
+      nop.i 999 ;;
 }
 
-{.mfb
-       nop.m 0
-	   // (pi/2)-s
-	   fma.s0 f8 = F_PI2_HI, f1, F_PI2_LO
-	   br.ret.sptk b0;;
+// acos_1d2U = frcpa(2U)
+{ .mfi
+      nop.m 999
+(p0)  frcpa.s1       acos_1d2U,p9  = f1, acos_2U                         
+      nop.i 999
 }
 
-
-
-
-
-VERY_LARGE_INPUT:
-
-
-{.mmf
-       // pointer to pi_low, pi_high
-	   add r2 = 80, r3
-       // load C5
-       ldfe F_C5 = [r3], 16
-       // x = ((1-(s^2)_s)*y^2-1)/2-(s^2-(s^2)_s)*y^2/2
-       fma.s1 F_X = F_X, F_05, f0;;
+{ .mfi
+      nop.m 999
+(p0)  fma.s1         acos_BmUUpb   = acos_BmUU, f1, acos_Bb              
+      nop.i 999 ;;
 }
 
-.pred.rel "mutex", p6, p11
-{.mmf
-       // load pi (low, high), if s<0
- (p6)  ldfpd F_PI2_LO, F_PI2_HI = [r2]
-       // C7, C9
-       ldfpd F_C7, F_C9 = [r3], 16
-	   // if s>0, set F_PI2_LO=0
- (p11) fma.s1 F_PI2_HI = f0, f0, f0;;
+{ .mfi
+      nop.m 999
+(p0)  fma.s1         acos_y2     = acos_t4, acos_y1, acos_y1             
+      nop.i 999 ;;
 }
 
-{.mfi
-       nop.m 0
- (p11) fma.s1 F_PI2_LO = f0, f0, f0
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+// acos_Uu = ((B - UU) + b) * frcpa(2U)
+(p0)  fma.s1         acos_Uu       = acos_BmUUpb, acos_1d2U, f0          
+      nop.i 999 ;;
 }
 
-{.mfi
-       // adjust address for C_11
-	   add r3 = 16, r3
-       // c9*x+c8
-       fma.s1 F_S89 = F_X, F_CS9, F_CS8
-       nop.i 0
+{ .mfi
+      nop.m 999
+(p0)  fma.s1         acos_S      = acos_A, acos_y2, f0                   
+      nop.i 999
 }
 
-{.mfi
-       nop.m 0
-       // x^2
-       fma.s1 F_X2 = F_X, F_X, f0
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p0)  fma.s1         acos_H      = acos_y2, acos_HALF, f0                
+      nop.i 999 ;;
 }
 
-
-{.mfi
-       nop.m 0
-       // y*(1-s^2)*x
-       fma.s1 F_Y1S2X = F_Y1S2, F_X, f0
-       nop.i 0
+{ .mfi
+      nop.m 999
+(p0)  fma.s1         acos_t5     = acos_Hh, acos_y2, f0                  
+      nop.i 999 ;;
 }
 
-{.mfi
-       // C11, C13
-       ldfpd F_C11, F_C13 = [r3], 16
-       // c7*x+c6
-       fma.s1 F_S67 = F_X, F_CS7, F_CS6
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p0)  fnma.s1        acos_Dd     = acos_S, acos_S, acos_A                
+      nop.i 999 ;;
 }
 
-
-{.mfi
-       // C15, C17
-       ldfpd F_C15, F_C17 = [r3], 16
-       // c3*x+c2
-       fma.s1 F_S23 = F_X, F_CS3, F_CS2
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p0)  fma.s1         acos_V      = acos_Dd, acos_H, acos_S               
+      nop.i 999 ;;
 }
 
-
-{.mfi
-       nop.m 0
-       // c5*x+c4
-       fma.s1 F_S45 = F_X, F_CS5, F_CS4
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p0)  fma.s1         acos_2V       = acos_V, f1, acos_V                  
+      nop.i 999
 }
 
+// Step 3
+/////////////////////////
+// Calculate the correction, acos_corr
+/////////////////////////
+// acos_corr = U*v - (V*u)
 
-
-
-{.mfi
-       nop.m 0
-       // y*(1-s^2)*x^2
-       fma.s1 F_Y1S2X2 = F_Y1S2, F_X2, f0
-       nop.i 0
-}
-
-{.mfi
-       nop.m 0
-       // x^4
-       fma.s1 F_X4 = F_X2, F_X2, f0
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p0)  fma.s1  acos_Vu   = acos_V,acos_Uu, f0                  
+      nop.i 999 ;;
 }
 
+/////////////////////////
+// Get v
+/////////////////////////
+// acos_AmVV   = A - VV
+// acos_AmVVpa = (A - VV) + a
 
-{.mfi
-       nop.m 0
-       // c9*x^3+..+c6
-       fma.s1 F_S69 = F_X2, F_S89, F_S67
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p0)  fnma.s1        acos_AmVV     = acos_V, acos_V, acos_A              
+      nop.i 999 ;;
 }
 
-
-{.mfi
-       nop.m 0
-       // c5*x^3+..+c2
-       fma.s1 F_S25 = F_X2, F_S45, F_S23
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p0)   fmerge.se f8 = acos_V, acos_V                           
+      nop.i 999 ;;
 }
 
-
-
-{.mfi
-       nop.m 0
-       // (pi)_high-y*(1-s^2)_s
-       fnma.s1 F_HI = F_Y, F_1S2_S, F_PI2_HI
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p0)  fma.s1         acos_AmVVpa   = acos_AmVV, f1, acos_Aa              
+      nop.i 999 ;;
 }
 
-
-{.mfi
-       nop.m 0
-       // c9*x^7+..+c2
-       fma.s1 F_S29 = F_X4, F_S69, F_S25
-       nop.i 0;;
+// acos_1d2V = frcpa(2V)
+{ .mfi
+      nop.m 999
+(p0)  frcpa.s1       acos_1d2V,p9  = f1, acos_2V                         
+      nop.i 999 ;;
 }
 
-
-{.mfi
-       nop.m 0
-       // -(y*(1-s^2)_s)_high
-       fms.s1 F_1S2_HI = F_HI, f1, F_PI2_HI
-       nop.i 0;;
+// acos_Vv = ((A - VV) + a) * frcpa(2V)
+{ .mfi
+      nop.m 999
+(p0)  fma.s1         acos_Vv       = acos_AmVVpa, acos_1d2V, f0          
+      nop.i 999 ;;
 }
 
-
-{.mfi
-       nop.m 0
-       // (PS29*x^2+x)*y*(1-s^2)
-       fma.s1 F_S19 = F_Y1S2X2, F_S29, F_Y1S2X
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p0)   fma.s1  acos_Uv   = acos_U,acos_Vv, f0                  
+      nop.i 999 ;;
 }
 
 
-{.mfi
-       nop.m 0
-       // y*(1-s^2)_s-(y*(1-s^2))_high
-       fma.s1 F_DS2 = F_Y, F_1S2_S, F_1S2_HI
-       nop.i 0;;
-}
-
+.endp acosl#
+ASM_SIZE_DIRECTIVE(acosl#)
 
 
-{.mfi
-       nop.m 0
-       // R ~ sqrt(1-s^2)
-       // (used for polynomial evaluation)
-       fnma.s1 F_R = F_S19, f1, F_Y1S2
-       nop.i 0;;
+.proc __libm_callout
+__libm_callout:
+.prologue
+{ .mfi
+        nop.m 0
+        nop.f 0
+.save   ar.pfs,GR_SAVE_PFS
+        mov  GR_SAVE_PFS=ar.pfs
 }
+;;
 
-
-{.mfi
-       nop.m 0
-       // y*(1-s^2)-(y*(1-s^2))_high
-       fma.s1 F_DS2 = F_Y, F_DS, F_DS2
-       nop.i 0
+{ .mfi
+        mov GR_SAVE_GP=gp
+        nop.f 0
+.save   b0, GR_SAVE_B0
+        mov GR_SAVE_B0=b0
 }
 
-{.mfi
-       nop.m 0
-       // (pi)_low+(PS29*x^2)*y*(1-s^2)
-       fma.s1 F_S29 = F_Y1S2X2, F_S29, F_PI2_LO
-       nop.i 0;;
+.body
+{ .mfb
+      nop.m 999
+(p0)   fms.s1  acos_corr = acos_Uv,f1, acos_Vu                 
+(p0)   br.call.sptk.many  b0=__libm_atan2_reg# ;;                        
 }
 
 
-{.mfi
-       nop.m 0
-       // R^2
-       fma.s1 F_R2 = F_R, F_R, f0
-       nop.i 0;;
-}
-
+// p6 ==> X is negative
+// p7 ==> x is positive
+// We know that |X| >= 1/4
 
-{.mfi
-       nop.m 0
-	   // if s<0
-       // (pi)_low+(PS29*x^2)*y*(1-s^2)-(y*(1-s^2)-(y*(1-s^2))_high)
-       fms.s1 F_S29 = F_S29, f1, F_DS2
-       nop.i 0;;
+{ .mfi
+(p0)   mov gp              = GR_SAVE_GP                           
+(p0)   fcmp.lt.unc   p6,p7 = acos_X , f0                       
+(p0)   mov b0              = GR_SAVE_B0 ;;                           
 }
 
+// acos_2_Z_hi    = 2 * acos_Z_hi
+// acos_s_lo_Z_lo = s_lo * Z_lo
 
-{.mfi
-       nop.m 0
-       // c7+c9*R^2
-       fma.s1 F_P79 = F_C9, F_R2, F_C7
-       nop.i 0
-}
-
-{.mfi
-       nop.m 0
-       // c3+c5*R^2
-       fma.s1 F_P35 = F_C5, F_R2, F_C3
-       nop.i 0;;
+{ .mfi
+       nop.m 999
+(p0)   fma.s1  acos_2_Z_hi      = acos_Z_hi, f1, acos_Z_hi               
+(p0)   mov ar.pfs               = GR_SAVE_PFS                                     
 }
 
-
-
-{.mfi
-       nop.m 0
-       // R^4
-       fma.s1 F_R4 = F_R2, F_R2, f0
-       nop.i 0
+{ .mfi
+      nop.m 999
+(p0)   fma.s1  acos_s_lo_Z_lo   = acos_s_lo, acos_Z_lo, f0               
+      nop.i 999 ;;
 }
 
-{.mfi
-       nop.m 0
-       // R^3
-       fma.s1 F_R3 = F_R2, F_R, f0
-       nop.i 0;;
+// 2 is a constant needed later
+{ .mfi
+      nop.m 999
+(p0)  fma.s1     acos_2 = f1,f1,f1                             
+      nop.i 999 ;;
 }
 
+// X >= 1/4
+// acos_result_lo = 2(s_lo * Z_lo) - corr
+// f8             = (2*Z_hi) + (2(s_lo * Z_lo) - corr)
 
-{.mfi
-       nop.m 0
-       // c11+c13*R^2
-       fma.s1 F_P1113 = F_C13, F_R2, F_C11
-       nop.i 0
+{ .mfi
+      nop.m 999
+(p7)   fma.s1  acos_result_lo     = acos_s_lo_Z_lo, acos_2, acos_corr      
+      nop.i 999 ;;
 }
 
-{.mfi
-       nop.m 0
-       // c15+c17*R^2
-       fma.s1 F_P1517 = F_C17, F_R2, F_C15
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p7)  fma.s0   f8                 = acos_2_Z_hi, f1, acos_result_lo        
+      nop.i 999
 }
 
-
-{.mfi
-       nop.m 0
-       // (pi)_low+(PS29*x^2)*y*(1-s^2)-(y*(1-s^2)-(y*(1-s^2))_high)+y*(1-s^2)*x
-       fma.s1 F_S29 = F_Y1S2, F_X, F_S29
-       nop.i 0;;
+// acos_result_lo = (pi_lo - corr)
+// acos_result_lo = (pi_lo - corr) + acos_Ww 
+{ .mfi
+      nop.m 999
+(p6)  fms.s1  acos_result_lo     = acos_pi_lo, f1, acos_corr              
+      nop.i 999 ;;
 }
 
-
-{.mfi
-       nop.m 0
-       // c11+c13*R^2+c15*R^4+c17*R^6
-       fma.s1 F_P1117 = F_P1517, F_R4, F_P1113
-       nop.i 0
+// X <= -1/4
+// acos_W = pi_hi - 2 * Z_hi
+{ .mfi
+      nop.m 999
+(p6)  fnma.s1 acos_W             = acos_2, acos_Z_hi, acos_pi_hi          
+      nop.i 999 ;;
 }
 
-{.mfi
-       nop.m 0
-       // c3+c5*R^2+c7*R^4+c9*R^6
-       fma.s1 F_P39 = F_P79, F_R4, F_P35
-       nop.i 0;;
+// acos_Ww = pi_hi - W
+// acos_Ww = (pi_hi - W) + (2 * Z_hi)
+{ .mfi
+      nop.m 999
+(p6)  fms.s1  acos_Ww            = acos_pi_hi, f1, acos_W                 
+      nop.i 999 ;;
 }
 
-
-
-{.mfi
-       nop.m 0
-       // R^8
-       fma.s1 F_R8 = F_R4, F_R4, f0
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p6)   fms.s1  acos_Ww            = acos_Ww, f1, acos_2_Z_hi               
+      nop.i 999 ;;
 }
 
-
-{.mfi
-       nop.m 0
-       // c3+c5*R^2+c7*R^4+c9*R^6+..+c17*R^14
-       fma.s1 F_P317 = F_P1117, F_R8, F_P39
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p6)   fma.s1  acos_result_lo     = acos_result_lo, f1, acos_Ww            
+      nop.i 999 ;;
 }
 
-
-{.mfi
-       nop.m 0
-       // (pi)_low-(PS29*x^2)*y*(1-s^2)-(y*(1-s^2)-
-       // -(y*(1-s^2))_high)+y*(1-s^2)*x - P3, 17
-       fnma.s1 F_S29 = F_P317, F_R3, F_S29
-       nop.i 0;;
+// acos_Z_lo = ((pi_lo - corr) + acos_Ww) - 2 * (s_lo * Z_lo)
+{ .mfi
+      nop.m 999
+(p6)  fnma.s1  acos_Z_lo          = acos_s_lo_Z_lo, acos_2, acos_result_lo 
+      nop.i 999 ;;
 }
 
-.pred.rel "mutex", p6, p11
-{.mfi
-       nop.m 0
-       // Result (if s<0):
-       // (pi)_low-(PS29*x^2)*y*(1-s^2)-(y*(1-s^2)-
-       // -(y*(1-s^2))_high)+y*(1-s^2)*x - P3, 17
-       // +(pi)_high-(y*(1-s^2))_high
- (p6)  fma.s0 f8 = F_S29, f1, F_HI
-       nop.i 0
+{ .mfb
+      nop.m 999
+(p6)  fma.s0   f8                  = acos_W, f1, acos_Z_lo                
+(p0)  br.ret.sptk   b0 ;;                          
 }
+.endp __libm_callout
+ASM_SIZE_DIRECTIVE(__libm_callout)
 
-{.mfb
-       nop.m 0
-	   // Result (if s>0):
-       // (PS29*x^2)*y*(1-s^2)-
-       // -y*(1-s^2)*x + P3, 17
-       // +(y*(1-s^2))
- (p11) fms.s0 f8 = F_Y, F_1S2_S, F_S29
-       br.ret.sptk b0;;
+.proc SPECIAL
+SPECIAL:
+L(ACOS_NAN): 
+{ .mfb
+      nop.m 999
+(p0)  fma.s0 f8 = f8,f1,f0                       
+(p0)  br.ret.sptk   b0 ;;                          
 }
 
+L(ACOS_ERROR_RETURN): 
+// Save ar.pfs, b0, and gp; restore on exit
 
+// qnan snan inf norm     unorm 0 -+
+// 1    1    0   0        0     0 11 = 0xc3
 
+// Coming in as X = +- 1
+// What should we return?
 
+// If X is 1, return (sign of X)pi/2
 
 
-acosl_SPECIAL_CASES:
-
-{.mfi
-       alloc r32 = ar.pfs, 1, 4, 4, 0
-       // check if the input is a NaN, or unsupported format
-       // (i.e. not infinity or normal/denormal)
-       fclass.nm p7, p8 = f8, 0x3f
-       // pointer to pi/2
-       add r3 = 96, r3;;
-}
-
-
-{.mfi
-       // load pi/2
-       ldfpd F_PI2_HI, F_PI2_LO = [r3]
-       // get |s|
-       fmerge.s F_S = f0, f8
-       nop.i 0
-}
-
-{.mfb
-       nop.m 0
-       // if NaN, quietize it, and return
- (p7) fma.s0 f8 = f8, f1, f0
- (p7) br.ret.spnt b0;;
-}
-
-
-{.mfi
-       nop.m 0
-       // |s| = 1 ?
-       fcmp.eq.s0 p9, p10 = F_S, f1
-       nop.i 0
+{ .mfi
+      nop.m 999
+(p0)  fcmp.eq.unc p6,p7 = acos_ABS_NORM_f8,f1              
+      nop.i 999 ;;
 }
 
-{.mfi
-       nop.m 0
-       // load FR_X
-       fma.s1 FR_X = f8, f1, f0
-       // load error tag
-       mov GR_Parameter_TAG = 57;;
+{ .mfi
+      nop.m 999
+(p6)  fcmp.lt.unc p8,p9 = f8,f0                            
+      nop.i 999 ;;
 }
 
-
-{.mfi
-       nop.m 0
-       // if s = 1, result is 0
- (p9)  fma.s0 f8 = f0, f0, f0
-       // set p6=0 for |s|>1
- (p10) cmp.ne p6, p0 = r0, r0;;
+{ .mfi
+      nop.m 999
+(p8)  fma.s0 f8          = acos_pi_hi, f1, acos_pi_lo       
+      nop.i 999
 }
 
-
-{.mfb
-       nop.m 0
-       //  if s = -1, result is pi
- (p6) fma.s0 f8 = F_PI2_HI, f1, F_PI2_LO
-       // return if |s| = 1
- (p9) br.ret.sptk b0;;
+{ .mfb
+      nop.m 999
+(p9)  fmerge.s    f8 = f8,f0                               
+(p6)  br.ret.spnt   b0 ;;                                     
 }
 
-
-{.mfi
-       nop.m 0
-       // get Infinity
-       frcpa.s1 FR_RESULT, p0 = f1, f0
-       nop.i 0;;
+// If X is a NAN, leave
+{ .mfi
+      nop.m 999
+(p0)  fclass.m.unc p12,p0 = f8, 0xc3            
+      nop.i 999 ;;
 }
 
-
-{.mfb
-       nop.m 0
-       // return QNaN indefinite (0*Infinity)
-       fma.s0 FR_RESULT = f0, FR_RESULT, f0
-       nop.b 0;;
+{ .mfb
+      nop.m 999
+(p12) fma.s0 f8 = f8,f1,f0                       
+(p12) br.ret.spnt   b0 ;;                          
 }
 
+{ .mfi
+(p0)   mov   GR_Parameter_TAG = 57 
+(p0)   frcpa f10, p6 = f0, f0
+nop.i 999
+};;
 
-GLOBAL_LIBM_END(acosl)
+.endp SPECIAL
+ASM_SIZE_DIRECTIVE(SPECIAL)
 
-LOCAL_LIBM_ENTRY(__libm_error_region)
+.proc __libm_error_region
+__libm_error_region:
 .prologue
 // (1)
 { .mfi
@@ -2510,12 +1068,12 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
 .body
 // (3)
 { .mib
-        stfe [GR_Parameter_X] = FR_X              // Store Parameter 1 on stack
+        stfe [GR_Parameter_X] = f8              // Store Parameter 1 on stack
         add   GR_Parameter_RESULT = 0,GR_Parameter_Y
         nop.b 0                                 // Parameter 3 address
 }
 { .mib
-        stfe [GR_Parameter_Y] = FR_RESULT             // Store Parameter 3 on stack
+        stfe [GR_Parameter_Y] = f10             // Store Parameter 3 on stack
         add   GR_Parameter_Y = -16,GR_Parameter_Y
         br.call.sptk b0=__libm_error_support#   // Call error handling function
 };;
@@ -2539,13 +1097,11 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
         br.ret.sptk     b0                     // Return
 };;
 
-LOCAL_LIBM_END(__libm_error_region)
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
 
 .type   __libm_error_support#,@function
 .global __libm_error_support#
 
-
-
-
-
-
+.type   __libm_atan2_reg#,@function
+.global __libm_atan2_reg#
diff --git a/sysdeps/ia64/fpu/e_asin.S b/sysdeps/ia64/fpu/e_asin.S
index 398079eae4..bb4c242fb2 100644
--- a/sysdeps/ia64/fpu/e_asin.S
+++ b/sysdeps/ia64/fpu/e_asin.S
@@ -1,10 +1,10 @@
 .file "asin.s"
 
-
-// Copyright (c) 2000 - 2003 Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
 //
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
+//
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,776 +35,818 @@
 //
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// http://developer.intel.com/opensource.
 
 // History
 //==============================================================
-// 02/02/00 Initial version
-// 08/17/00 New and much faster algorithm.
-// 08/31/00 Avoided bank conflicts on loads, shortened |x|=1 path,
+// 2/02/00  Initial version 
+// 8/17/00  New and much faster algorithm.
+// 8/31/00  Avoided bank conflicts on loads, shortened |x|=1 path, 
 //          fixed mfb split issue stalls.
 // 12/19/00 Fixed small arg cases to force inexact, or inexact and underflow.
-// 08/02/02 New and much faster algorithm II
-// 02/06/03 Reordered header: .section, .global, .proc, .align
 
 // Description
 //=========================================
-// The asin function computes the principal value of the arc sine of x.
+// The asin function computes the principle value of the arc sine of x.
 // asin(0) returns 0, asin(1) returns pi/2, asin(-1) returns -pi/2.
 // A doman error occurs for arguments not in the range [-1,+1].
-//
-// The asin function returns the arc sine in the range [-pi/2, +pi/2] radians.
-//
-// There are 8 paths:
-// 1. x = +/-0.0
-//    Return asin(x) = +/-0.0
-//
-// 2. 0.0 < |x| < 0.625
-//    Return asin(x) = x + x^3 *PolA(x^2)
-//    where PolA(x^2) = A3 + A5*x^2 + A7*x^4 +...+ A35*x^32
-//
-// 3. 0.625 <=|x| < 1.0
-//    Return asin(x) = sign(x) * ( Pi/2 - sqrt(R) * PolB(R))
-//    Where R = 1 - |x|,
-//          PolB(R) = B0 + B1*R + B2*R^2 +...+B12*R^12
-//
-//    sqrt(R) is approximated using the following sequence:
-//        y0 = (1 + eps)/sqrt(R) - initial approximation by frsqrta,
-//             |eps| < 2^(-8)
-//        Then 3 iterations are used to refine the result:
-//        H0 = 0.5*y0
-//        S0 = R*y0
-//
-//        d0 = 0.5 - H0*S0
-//        H1 = H0 + d0*H0
-//        S1 = S0 + d0*S0
-//
-//        d1 = 0.5 - H1*S1
-//        H2 = H1 + d0*H1
-//        S2 = S1 + d0*S1
-//
-//        d2 = 0.5 - H2*S2
-//        S3 = S3 + d2*S3
-//
-//        S3 approximates sqrt(R) with enough accuracy for this algorithm
-//
-//    So, the result should be reconstracted as follows:
-//    asin(x) = sign(x) * (Pi/2 - S3*PolB(R))
-//
-//    But for optimization perposes the reconstruction step is slightly
-//    changed:
-//    asin(x) = sign(x)*(Pi/2 - PolB(R)*S2) + sign(x)*d2*S2*PolB(R)
-//
-// 4. |x| = 1.0
-//    Return asin(x) = sign(x)*Pi/2
-//
-// 5. 1.0 < |x| <= +INF
-//    A doman error occurs for arguments not in the range [-1,+1]
-//
-// 6. x = [S,Q]NaN
-//    Return asin(x) = QNaN
-//
-// 7. x is denormal
-//    Return asin(x) = x + x^3,
-//
-// 8. x is unnormal
-//    Normalize input in f8 and return to the very beginning of the function
-//
-// Registers used
-//==============================================================
-// Floating Point registers used:
-// f8, input, output
-// f6, f7, f9 -> f15, f32 -> f63
 
-// General registers used:
-// r3, r21 -> r31, r32 -> r38
+// The asin function returns the arc sine in the range [-pi/2, +pi/2] radians.
 
-// Predicate registers used:
-// p0, p6 -> p14
+#include "libm_support.h"
 
 //
 // Assembly macros
 //=========================================
-// integer registers used
-// scratch
-rTblAddr                      = r3
-
-rPiBy2Ptr                     = r21
-rTmpPtr3                      = r22
-rDenoBound                    = r23
-rOne                          = r24
-rAbsXBits                     = r25
-rHalf                         = r26
-r0625                         = r27
-rSign                         = r28
-rXBits                        = r29
-rTmpPtr2                      = r30
-rTmpPtr1                      = r31
-
-// stacked
-GR_SAVE_PFS                   = r32
-GR_SAVE_B0                    = r33
-GR_SAVE_GP                    = r34
-GR_Parameter_X                = r35
-GR_Parameter_Y                = r36
-GR_Parameter_RESULT           = r37
-GR_Parameter_TAG              = r38
-
-// floating point registers used
-FR_X                          = f10
-FR_Y                          = f1
-FR_RESULT                     = f8
-
-
-// scratch
-fXSqr                         = f6
-fXCube                        = f7
-fXQuadr                       = f9
-f1pX                          = f10
-f1mX                          = f11
-f1pXRcp                       = f12
-f1mXRcp                       = f13
-fH                            = f14
-fS                            = f15
-// stacked
-fA3                           = f32
-fB1                           = f32
-fA5                           = f33
-fB2                           = f33
-fA7                           = f34
-fPiBy2                        = f34
-fA9                           = f35
-fA11                          = f36
-fB10                          = f35
-fB11                          = f36
-fA13                          = f37
-fA15                          = f38
-fB4                           = f37
-fB5                           = f38
-fA17                          = f39
-fA19                          = f40
-fB6                           = f39
-fB7                           = f40
-fA21                          = f41
-fA23                          = f42
-fB3                           = f41
-fB8                           = f42
-fA25                          = f43
-fA27                          = f44
-fB9                           = f43
-fB12                          = f44
-fA29                          = f45
-fA31                          = f46
-fA33                          = f47
-fA35                          = f48
-fBaseP                        = f49
-fB0                           = f50
-fSignedS                      = f51
-fD                            = f52
-fHalf                         = f53
-fR                            = f54
-fCloseTo1Pol                  = f55
-fSignX                        = f56
-fDenoBound                    = f57
-fNormX                        = f58
-fX8                           = f59
-fRSqr                         = f60
-fRQuadr                       = f61
-fR8                           = f62
-fX16                          = f63
+
+
+// predicate registers
+//asin_pred_LEsqrt2by2            = p7
+//asin_pred_GTsqrt2by2            = p8
+
+// integer registers
+ASIN_Addr1                      = r33
+ASIN_Addr2                      = r34
+ASIN_FFFE                       = r35
+ASIN_lnorm_sig                  = r36
+ASIN_snorm_exp                  = r37
+
+GR_SAVE_B0                      = r36
+GR_SAVE_PFS                     = r37
+GR_SAVE_GP                      = r38
+
+GR_Parameter_X                  = r39
+GR_Parameter_Y                  = r40
+GR_Parameter_RESULT             = r41
+GR_Parameter_Tag                = r42
+
+// floating point registers
+asin_coeff_P1                   = f32
+asin_coeff_P2                   = f33
+asin_coeff_P3                   = f34
+asin_coeff_P4                   = f35
+
+asin_coeff_P5                   = f36
+asin_coeff_P6                   = f37
+asin_coeff_P7                   = f38
+asin_coeff_P8                   = f39
+asin_coeff_P9                   = f40
+
+asin_coeff_P10                  = f41
+asin_coeff_P11                  = f42
+asin_coeff_P12                  = f43
+asin_coeff_P13                  = f44
+asin_coeff_P14                  = f45
+
+asin_coeff_P15                  = f46
+asin_coeff_P16                  = f47
+asin_coeff_P17                  = f48
+asin_coeff_P18                  = f49
+asin_coeff_P19                  = f50
+
+asin_coeff_P20                  = f51
+asin_coeff_P21                  = f52
+asin_const_sqrt2by2             = f53
+asin_const_piby2                = f54
+asin_abs_x                      = f55
+
+asin_tx                         = f56
+asin_tx2                        = f57
+asin_tx3                        = f58
+asin_tx4                        = f59
+asin_tx8                        = f60
+
+asin_tx11                       = f61
+asin_1poly_p8                   = f62
+asin_1poly_p19                  = f63
+asin_1poly_p4                   = f64
+asin_1poly_p15                  = f65
+
+asin_1poly_p6                   = f66
+asin_1poly_p17                  = f67
+asin_1poly_p0                   = f68
+asin_1poly_p11                  = f69
+asin_1poly_p2                   = f70
+
+asin_1poly_p13                  = f71
+asin_series_tx                  = f72
+asin_t                          = f73
+asin_t2                         = f74
+asin_t3                         = f75
+
+asin_t4                         = f76
+asin_t8                         = f77
+asin_t11                        = f78
+asin_poly_p8                    = f79
+asin_poly_p19                   = f80
+
+asin_poly_p4                    = f81
+asin_poly_p15                   = f82
+asin_poly_p6                    = f83
+asin_poly_p17                   = f84
+asin_poly_p0                    = f85
+
+asin_poly_p11                   = f86
+asin_poly_p2                    = f87
+asin_poly_p13                   = f88
+asin_series_t                   = f89
+asin_1by2                       = f90
+
+asin_3by2                       = f91
+asin_5by2                       = f92
+asin_11by4                      = f93
+asin_35by8                      = f94
+asin_63by8                      = f95
+
+asin_231by16                    = f96 
+asin_y0                         = f97 
+asin_H0                         = f98 
+asin_S0                         = f99 
+asin_d                          = f100
+
+asin_l1                         = f101
+asin_d2                         = f102
+asin_T0                         = f103
+asin_d1                         = f104
+asin_e0                         = f105
+
+asin_l2                         = f106
+asin_d3                         = f107
+asin_T3                         = f108
+asin_S1                         = f109
+asin_e1                         = f110
+
+asin_z                          = f111
+answer2                         = f112
+asin_sgn_x                      = f113
+asin_429by16                    = f114
+asin_18by4                      = f115
+
+asin_3by4                       = f116
+asin_l3                         = f117
+asin_T6                         = f118
+asin_eps_exp                    = f119
+asin_eps_sig                    = f120
+asin_eps                        = f120
+
 // Data tables
 //==============================================================
-RODATA
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
 .align 16
-LOCAL_OBJECT_START(asin_base_range_table)
-// Ai: Polynomial coefficients for the asin(x), |x| < .625000
-// Bi: Polynomial coefficients for the asin(x), |x| > .625000
-data8 0xBFDAAB56C01AE468 //A29
-data8 0x3FE1C470B76A5B2B //A31
-data8 0xBFDC5FF82A0C4205 //A33
-data8 0x3FC71FD88BFE93F0 //A35
-data8 0xB504F333F9DE6487, 0x00003FFF //B0
-data8 0xAAAAAAAAAAAAFC18, 0x00003FFC //A3
-data8 0x3F9F1C71BC4A7823 //A9
-data8 0x3F96E8BBAAB216B2 //A11
-data8 0x3F91C4CA1F9F8A98 //A13
-data8 0x3F8C9DDCEDEBE7A6 //A15
-data8 0x3F877784442B1516 //A17
-data8 0x3F859C0491802BA2 //A19
-data8 0x9999999998C88B8F, 0x00003FFB //A5
-data8 0x3F6BD7A9A660BF5E //A21
-data8 0x3F9FC1659340419D //A23
-data8 0xB6DB6DB798149BDF, 0x00003FFA //A7
-data8 0xBFB3EF18964D3ED3 //A25
-data8 0x3FCD285315542CF2 //A27
-data8 0xF15BEEEFF7D2966A, 0x00003FFB //B1
-data8 0x3EF0DDA376D10FB3 //B10
-data8 0xBEB83CAFE05EBAC9 //B11
-data8 0x3F65FFB67B513644 //B4
-data8 0x3F5032FBB86A4501 //B5
-data8 0x3F392162276C7CBA //B6
-data8 0x3F2435949FD98BDF //B7
-data8 0xD93923D7FA08341C, 0x00003FF9 //B2
-data8 0x3F802995B6D90BDB //B3
-data8 0x3F10DF86B341A63F //B8
-data8 0xC90FDAA22168C235, 0x00003FFF // Pi/2
-data8 0x3EFA3EBD6B0ECB9D //B9
-data8 0x3EDE18BA080E9098 //B12
-LOCAL_OBJECT_END(asin_base_range_table)
+
+asin_coeff_1_table:
+ASM_TYPE_DIRECTIVE(asin_coeff_1_table,@object)
+data8 0xE4E7E0A423A21249  , 0x00003FF8 //P7
+data8 0xC2F7EE0200FCE2A5  , 0x0000C003 //P18
+data8 0xB745D7F6C65C20E0  , 0x00003FF9 //P5
+data8 0xF75E381A323D4D94  , 0x0000C002 //P16
+data8 0x8959C2629C1024C0  , 0x0000C002 //P20
+data8 0xAFF68E7D241292C5  , 0x00003FF8 //P9
+data8 0xB6DB6DB7260AC30D  , 0x00003FFA //P3
+data8 0xD0417CE2B41CB7BF  , 0x0000C000 //P14
+data8 0x81D570FEA724E3E4  , 0x0000BFFD //P12
+data8 0xAAAAAAAAAAAAC277  , 0x00003FFC //P1
+data8 0xF534912FF3E7B76F  , 0x00003FFF //P21
+data8 0xc90fdaa22168c235  , 0x00003fff // pi/2
+data8 0x0000000000000000  , 0x00000000 // pad to avoid data bank conflict
+ASM_SIZE_DIRECTIVE(asin_coeff_1_table)
+	
+
+asin_coeff_2_table:
+ASM_TYPE_DIRECTIVE(asin_coeff_2_table,@object)
+data8 0x8E26AF5F29B39A2A  , 0x00003FF9 //P6
+data8 0xB4F118A4B1015470  , 0x00004003 //P17
+data8 0xF8E38E10C25990E0  , 0x00003FF9 //P4
+data8 0x80F50489AEF1CAC6  , 0x00004002 //P15
+data8 0x92728015172CFE1C  , 0x00004003 //P19
+data8 0xBBC3D831D4595971  , 0x00003FF8 //P8
+data8 0x999999999952A5C3  , 0x00003FFB //P2
+data8 0x855576BE6F0975EC  , 0x00003FFF //P13
+data8 0xF12420E778077D89  , 0x00003FFA //P11
+data8 0xB6590FF4D23DE003  , 0x00003FF3 //P10
+data8 0xb504f333f9de6484  , 0x00003ffe // sqrt(2)/2
+ASM_SIZE_DIRECTIVE(asin_coeff_2_table)
 
 
+
+.align 32
+.global asin
+
 .section .text
-GLOBAL_LIBM_ENTRY(asin)
-asin_unnormal_back:
-{ .mfi
-      getf.d             rXBits = f8 // grab bits of input value
-      // set p12 = 1 if x is a NaN, denormal, or zero
-      fclass.m           p12, p0 = f8, 0xcf
-      adds               rSign = 1, r0
-}
-{ .mfi
-      addl               rTblAddr = @ltoff(asin_base_range_table),gp
-      // 1 - x = 1 - |x| for positive x
-      fms.s1             f1mX = f1, f1, f8
-      addl               rHalf = 0xFFFE, r0 // exponent of 1/2
-}
-;;
-{ .mfi
-      addl               r0625 = 0x3FE4, r0 // high 16 bits of 0.625
-      // set p8 = 1 if x < 0
-      fcmp.lt.s1         p8, p9 = f8, f0
-      shl                rSign = rSign, 63 // sign bit
-}
-{ .mfi
-      // point to the beginning of the table
-      ld8                rTblAddr = [rTblAddr]
-      // 1 + x = 1 - |x| for negative x
-      fma.s1             f1pX = f1, f1, f8
-      adds               rOne = 0x3FF, r0
-}
-;;
-{ .mfi
-      andcm              rAbsXBits = rXBits, rSign // bits of |x|
-      fmerge.s           fSignX = f8, f1 // signum(x)
-      shl                r0625 = r0625, 48 // bits of DP representation of 0.625
-}
-{ .mfb
-      setf.exp           fHalf = rHalf // load A2 to FP reg
-      fma.s1             fXSqr = f8, f8, f0 // x^2
-      // branch on special path if x is a NaN, denormal, or zero
-(p12) br.cond.spnt       asin_special
-}
-;;
-{ .mfi
-      adds               rPiBy2Ptr = 272, rTblAddr
-      nop.f              0
-      shl                rOne = rOne, 52 // bits of 1.0
-}
-{ .mfi
-      adds               rTmpPtr1 = 16, rTblAddr
-      nop.f              0
-      // set p6 = 1 if |x| < 0.625
-      cmp.lt             p6, p7 = rAbsXBits, r0625
-}
-;;
-{ .mfi
-      ldfpd              fA29, fA31 = [rTblAddr] // A29, fA31
-      // 1 - x = 1 - |x| for positive x
-(p9)  fms.s1             fR = f1, f1, f8
-      // point to coefficient of "near 1" polynomial
-(p7)  adds               rTmpPtr2 = 176, rTblAddr
-}
-{ .mfi
-      ldfpd              fA33, fA35 = [rTmpPtr1], 16 // A33, fA35
-      // 1 + x = 1 - |x| for negative x
-(p8)  fma.s1             fR = f1, f1, f8
-(p6)  adds               rTmpPtr2 = 48, rTblAddr
-}
-;;
-{ .mfi
-      ldfe               fB0 = [rTmpPtr1], 16 // B0
-      nop.f              0
-      nop.i              0
-}
-{ .mib
-      adds               rTmpPtr3 = 16, rTmpPtr2
-      // set p10 = 1 if |x| = 1.0
-      cmp.eq             p10, p0 = rAbsXBits, rOne
-      // branch on special path for |x| = 1.0
-(p10) br.cond.spnt       asin_abs_1
-}
-;;
-{ .mfi
-      ldfe               fA3 = [rTmpPtr2], 48 // A3 or B1
-      nop.f              0
-      adds               rTmpPtr1 = 64, rTmpPtr3
-}
-{ .mib
-      ldfpd              fA9, fA11 = [rTmpPtr3], 16 // A9, A11 or B10, B11
-      // set p11 = 1 if |x| > 1.0
-      cmp.gt             p11, p0 = rAbsXBits, rOne
-      // branch on special path for |x| > 1.0
-(p11) br.cond.spnt       asin_abs_gt_1
-}
-;;
-{ .mfi
-      ldfpd              fA17, fA19 = [rTmpPtr2], 16 // A17, A19 or B6, B7
-      // initial approximation of 1 / sqrt(1 - x)
-      frsqrta.s1         f1mXRcp, p0 = f1mX
-      nop.i              0
-}
-{ .mfi
-      ldfpd              fA13, fA15 = [rTmpPtr3] // A13, A15 or B4, B5
-      fma.s1             fXCube = fXSqr, f8, f0 // x^3
-      nop.i              0
-}
-;;
-{ .mfi
-      ldfe               fA5 = [rTmpPtr2], 48 // A5 or B2
-      // initial approximation of 1 / sqrt(1 + x)
-      frsqrta.s1         f1pXRcp, p0 = f1pX
-      nop.i              0
-}
-{ .mfi
-      ldfpd              fA21, fA23 = [rTmpPtr1], 16 // A21, A23 or B3, B8
-      fma.s1             fXQuadr = fXSqr, fXSqr, f0 // x^4
-      nop.i              0
-}
-;;
-{ .mfi
-      ldfe               fA7 = [rTmpPtr1] // A7 or Pi/2
-      fma.s1             fRSqr = fR, fR, f0 // R^2
-      nop.i              0
-}
-{ .mfb
-      ldfpd              fA25, fA27 = [rTmpPtr2] // A25, A27 or B9, B12
-      nop.f              0
-(p6)  br.cond.spnt       asin_base_range;
-}
-;;
+.proc  asin
+.align 32
 
-{ .mfi
-      nop.m              0
-(p9)  fma.s1             fH = fHalf, f1mXRcp, f0 // H0 for x > 0
-      nop.i              0
-}
-{ .mfi
-      nop.m              0
-(p9)  fma.s1             fS = f1mX, f1mXRcp, f0  // S0 for x > 0
-      nop.i              0
-}
-;;
-{ .mfi
-      nop.m              0
-(p8)  fma.s1             fH = fHalf, f1pXRcp, f0 // H0 for x < 0
-      nop.i              0
-}
-{ .mfi
-      nop.m              0
-(p8)  fma.s1             fS = f1pX, f1pXRcp, f0  // S0 for x > 0
-      nop.i              0
-}
-;;
-{ .mfi
-      nop.m              0
-      fma.s1             fRQuadr = fRSqr, fRSqr, f0 // R^4
-      nop.i              0
-}
-;;
-{ .mfi
-      nop.m              0
-      fma.s1             fB11 = fB11, fR, fB10
-      nop.i              0
-}
-{ .mfi
-      nop.m              0
-      fma.s1             fB1 = fB1, fR, fB0
-      nop.i              0
-}
-;;
-{ .mfi
-      nop.m              0
-      fma.s1             fB5 = fB5, fR, fB4
-      nop.i              0
-}
-{ .mfi
-      nop.m              0
-      fma.s1             fB7 = fB7, fR, fB6
-      nop.i              0
-}
-;;
-{ .mfi
-      nop.m              0
-      fma.s1             fB3 = fB3, fR, fB2
-      nop.i              0
-}
-;;
-{ .mfi
-      nop.m              0
-      fnma.s1            fD = fH, fS, fHalf // d0 = 1/2 - H0*S0
-      nop.i              0
-}
-;;
-{ .mfi
-      nop.m              0
-      fma.s1             fR8 = fRQuadr, fRQuadr, f0 // R^4
-      nop.i              0
-}
-{ .mfi
-      nop.m              0
-      fma.s1             fB9 = fB9, fR, fB8
-      nop.i              0
-}
-;;
-{.mfi
-      nop.m              0
-      fma.s1             fB12 = fB12, fRSqr, fB11
-      nop.i              0
-}
-{.mfi
-      nop.m              0
-      fma.s1             fB7 = fB7, fRSqr, fB5
-      nop.i              0
-}
-;;
-{.mfi
-      nop.m              0
-      fma.s1             fB3 = fB3, fRSqr, fB1
-      nop.i              0
-}
-;;
-{ .mfi
-      nop.m              0
-      fma.s1             fH = fH, fD, fH // H1 = H0 + H0*d0
-      nop.i              0
-}
-{ .mfi
-      nop.m              0
-      fma.s1             fS = fS, fD, fS // S1 = S0 + S0*d0
-      nop.i              0
-}
-;;
-{.mfi
-      nop.m              0
-      fma.s1             fPiBy2 = fPiBy2, fSignX, f0 // signum(x)*Pi/2
-      nop.i              0
-}
-;;
-{ .mfi
-      nop.m              0
-      fma.s1             fB12 = fB12, fRSqr, fB9
-      nop.i              0
-}
-{ .mfi
-      nop.m              0
-      fma.s1             fB7 = fB7, fRQuadr, fB3
-      nop.i              0
-}
-;;
-{.mfi
-      nop.m              0
-      fnma.s1            fD = fH, fS, fHalf // d1 = 1/2 - H1*S1
-      nop.i              0
-}
-{ .mfi
-      nop.m              0
-      fnma.s1            fSignedS = fSignX, fS, f0 // -signum(x)*S1
-      nop.i              0
-}
-;;
-{ .mfi
-      nop.m              0
-      fma.s1             fCloseTo1Pol = fB12, fR8, fB7
-      nop.i              0
+
+asin:
+ 
+{     .mfi 
+     alloc      r32               = ar.pfs,1,6,4,0
+     fma.s1    asin_tx        =    f8,f8,f0
+     addl      ASIN_Addr2     =    @ltoff(asin_coeff_2_table),gp
+} 
+{     .mfi 
+     mov       ASIN_FFFE      =    0xFFFE
+     fnma.s1   asin_t         =    f8,f8,f1
+     addl      ASIN_Addr1     =    @ltoff(asin_coeff_1_table),gp
 }
 ;;
-{ .mfi
-      nop.m              0
-      fma.s1             fH = fH, fD, fH // H2 = H1 + H1*d1
-      nop.i              0
+
+ 
+{     .mfi 
+     setf.exp       asin_1by2      =    ASIN_FFFE
+     fmerge.s       asin_abs_x     =    f1,f8
+     nop.i          999              ;;
+} 
+ 
+{     .mmf 
+     ld8       ASIN_Addr1     =    [ASIN_Addr1]
+     ld8       ASIN_Addr2     =    [ASIN_Addr2]
+     fmerge.s  asin_sgn_x     =    f8,f1 ;;
+} 
+
+ 
+{     .mfi 
+     ldfe      asin_coeff_P7  =    [ASIN_Addr1],16
+     fma.s1    asin_tx2       =    asin_tx,asin_tx,f0
+     nop.i                      999
+} 
+{     .mfi 
+     ldfe      asin_coeff_P6  =    [ASIN_Addr2],16
+     fma.s1    asin_t2        =    asin_t,asin_t,f0
+     nop.i                      999;;
 }
-{ .mfi
-      nop.m              0
-      fma.s1             fS = fS, fD, fS // S2 = S1 + S1*d1
-      nop.i              0
+
+ 
+{     .mmf 
+     ldfe      asin_coeff_P18 =    [ASIN_Addr1],16
+     ldfe      asin_coeff_P17 =    [ASIN_Addr2],16
+     fclass.m.unc p8,p0  = f8, 0xc3	//@qnan |@snan
+} 
+;;
+ 
+{     .mmf 
+     ldfe           asin_coeff_P5  =    [ASIN_Addr1],16
+     ldfe      asin_coeff_P4  =    [ASIN_Addr2],16
+     frsqrta.s1     asin_y0,p0     =    asin_t
+} 
+;;
+ 
+{     .mfi 
+     ldfe      asin_coeff_P16 =    [ASIN_Addr1],16
+     fcmp.gt.s1 p9,p0 = asin_abs_x,f1
+     nop.i                      999
+} 
+{     .mfb 
+     ldfe      asin_coeff_P15 =    [ASIN_Addr2],16
+(p8) fma.d     f8 = f8,f1,f0
+(p8) br.ret.spnt b0
 }
 ;;
-{ .mfi
-      nop.m              0
-      // -signum(x)* S2 = -signum(x)*(S1 + S1*d1)
-      fma.s1             fSignedS = fSignedS, fD, fSignedS
-      nop.i              0
+
+ 
+{     .mmf 
+     ldfe      asin_coeff_P20 =    [ASIN_Addr1],16
+     ldfe      asin_coeff_P19 =    [ASIN_Addr2],16
+     fclass.m.unc p8,p0 = f8, 0x07	//@zero
+} 
+;;
+ 
+
+{     .mfi 
+     ldfe      asin_coeff_P9  =    [ASIN_Addr1],16
+     fma.s1    asin_t4        =    asin_t2,asin_t2,f0
+(p9) mov GR_Parameter_Tag = 61 
+} 
+{     .mfi 
+     ldfe      asin_coeff_P8  =    [ASIN_Addr2],16
+     fma.s1    asin_3by2      =    asin_1by2,f1,f1
+     nop.i                      999;;
 }
-;;
-{.mfi
-      nop.m              0
-      fnma.s1            fD = fH, fS, fHalf // d2 = 1/2 - H2*S2
-      nop.i              0
+
+ 
+{     .mfi 
+     ldfe      asin_coeff_P2  =    [ASIN_Addr2],16
+     fma.s1    asin_tx4       =    asin_tx2,asin_tx2,f0
+     nop.i                      999
+} 
+{     .mfb 
+     ldfe      asin_coeff_P3  =    [ASIN_Addr1],16
+     fma.s1    asin_t3        =    asin_t,asin_t2,f0
+(p8) br.ret.spnt b0
 }
 ;;
-{ .mfi
-      nop.m              0
-      // signum(x)*(Pi/2 - PolB*S2)
-      fma.s1             fPiBy2 = fSignedS, fCloseTo1Pol, fPiBy2
-      nop.i              0
-}
-{ .mfi
-      nop.m              0
-      // -signum(x)*PolB * S2
-      fma.s1             fCloseTo1Pol = fSignedS, fCloseTo1Pol, f0
-      nop.i              0
+
+ 
+{     .mfi 
+     ldfe      asin_coeff_P13 =    [ASIN_Addr2],16
+     fma.s1    asin_H0        =    asin_y0,asin_1by2,f0
+     nop.i                      999
+} 
+{     .mfb 
+     ldfe      asin_coeff_P14 =    [ASIN_Addr1],16
+     fma.s1    asin_S0        =    asin_y0,asin_t,f0
+(p9) br.cond.spnt  __libm_error_region
 }
 ;;
-{ .mfb
-      nop.m              0
-      // final result for 0.625 <= |x| < 1
-      fma.d.s0           f8 = fCloseTo1Pol, fD, fPiBy2
-      // exit here for  0.625 <= |x| < 1
-      br.ret.sptk        b0
+
+ 
+{     .mfi 
+     ldfe      asin_coeff_P11 =    [ASIN_Addr2],16
+     fcmp.eq.s1 p6,p0 = asin_abs_x,f1
+     nop.i                      999
+} 
+{     .mfi 
+     ldfe      asin_coeff_P12 =    [ASIN_Addr1],16
+     fma.s1    asin_tx3       =    asin_tx,asin_tx2,f0
+     nop.i                      999;;
 }
-;;
 
+ 
+{     .mfi 
+     ldfe      asin_coeff_P10 =    [ASIN_Addr2],16
+     fma.s1    asin_1poly_p6  =    asin_tx,asin_coeff_P7,asin_coeff_P6
+     nop.i                      999
+} 
+{     .mfi 
+     ldfe      asin_coeff_P1  =    [ASIN_Addr1],16
+     fma.s1    asin_poly_p6   =    asin_t,asin_coeff_P7,asin_coeff_P6
+     nop.i                      999;;
+}
 
-// here if |x| < 0.625
-.align 32
-asin_base_range:
-{ .mfi
-      nop.m              0
-      fma.s1             fA33 = fA33, fXSqr, fA31
-      nop.i              0
+ 
+{     .mfi 
+     ldfe      asin_const_sqrt2by2 =    [ASIN_Addr2],16
+     fma.s1    asin_5by2           =    asin_3by2,f1,f1
+     nop.i                           999
+} 
+{     .mfi 
+     ldfe      asin_coeff_P21 =    [ASIN_Addr1],16
+     fma.s1    asin_11by4     =    asin_3by2,asin_3by2,asin_1by2
+     nop.i                      999;;
 }
-{ .mfi
-      nop.m              0
-      fma.s1             fA15 = fA15, fXSqr, fA13
-      nop.i              0
+
+ 
+{     .mfi 
+     ldfe      asin_const_piby2    =    [ASIN_Addr1],16
+     fma.s1    asin_poly_p17       =    asin_t,asin_coeff_P18,asin_coeff_P17
+     nop.i                           999
+} 
+{     .mfb 
+     nop.m                 999
+     fma.s1    asin_3by4 =    asin_3by2,asin_1by2,f0
+(p6) br.cond.spnt  L(ASIN_ABS_1)      // Branch to short exit if |x|=1
 }
 ;;
-{ .mfi
-      nop.m              0
-      fma.s1             fA29 = fA29, fXSqr, fA27
-      nop.i              0
+
+ 
+{     .mfi 
+     addl ASIN_lnorm_sig = -0x1,r0  // Form significand 0xffffffffffffffff
+     fma.s1    asin_poly_p15  =    asin_t,asin_coeff_P16,asin_coeff_P15
+     nop.i                      999
+} 
+{     .mfi 
+     addl ASIN_snorm_exp = 0x0c001,r0  // Form small exponent
+     fnma.s1   asin_d    =    asin_S0,asin_H0,asin_1by2
+     nop.i                 999;;
 }
-{ .mfi
-      nop.m              0
-      fma.s1             fA25 = fA25, fXSqr, fA23
-      nop.i              0
+
+ 
+// Form the exponent and significand of a small number
+{     .mfi 
+     setf.sig asin_eps_sig = ASIN_lnorm_sig
+     fma.s1    asin_poly_p19  =    asin_t,asin_coeff_P20,asin_coeff_P19
+     nop.i                      999
+} 
+{     .mfi 
+     setf.exp asin_eps_exp = ASIN_snorm_exp
+     fma.s1    asin_poly_p4   =    asin_t,asin_coeff_P5,asin_coeff_P4
+     nop.i                      999;;
 }
-;;
-{ .mfi
-      nop.m              0
-      fma.s1             fA21 = fA21, fXSqr, fA19
-      nop.i              0
+
+ 
+{     .mfi 
+     nop.m                      999
+     fma.s1    asin_1poly_p17 =    asin_tx,asin_coeff_P18,asin_coeff_P17
+     nop.i                      999
+} 
+{     .mfi 
+     nop.m                      999
+     fma.s1    asin_poly_p8   =    asin_t,asin_coeff_P9,asin_coeff_P8
+     nop.i                      999;;
 }
-{ .mfi
-      nop.m              0
-      fma.s1             fA9 = fA9, fXSqr, fA7
-      nop.i              0
+
+ 
+{     .mfi 
+     nop.m                      999
+     fms.s1    asin_35by8     =    asin_5by2,asin_11by4,asin_5by2
+     nop.i                      999
+} 
+{     .mfi 
+     nop.m                      999
+     fma.s1    asin_63by8     =    asin_5by2,asin_11by4,f1
+     nop.i                      999;;
 }
-;;
-{ .mfi
-      nop.m              0
-      fma.s1             fA5 = fA5, fXSqr, fA3
-      nop.i              0
+
+ 
+{     .mfi 
+     nop.m                      999
+     fma.s1    asin_poly_p13  =    asin_t,asin_coeff_P14,asin_coeff_P13
+     nop.i                      999
+} 
+{     .mfi 
+     nop.m                      999
+     fma.s1    asin_18by4     =    asin_3by2,asin_5by2,asin_3by4
+     nop.i                      999;;
 }
-;;
-{ .mfi
-      nop.m              0
-      fma.s1             fA35 = fA35, fXQuadr, fA33
-      nop.i              0
+
+ 
+{     .mfi 
+     nop.m                 999
+     fma.s1    asin_l1   =    asin_5by2,asin_d,asin_3by2
+     nop.i                 999
+} 
+{     .mfi 
+     nop.m                 999
+     fma.s1    asin_d2   =    asin_d,asin_d,f0
+     nop.i                 999;;
 }
-{ .mfi
-      nop.m              0
-      fma.s1             fA17 = fA17, fXQuadr, fA15
-      nop.i              0
+
+ 
+{     .mfi 
+     nop.m                      999
+     fma.s1    asin_poly_p15  =    asin_t2,asin_poly_p17,asin_poly_p15
+     nop.i                      999
+} 
+{     .mfi 
+     nop.m                 999
+     fma.s1    asin_T0   =    asin_d,asin_S0,f0
+     nop.i                 999;;
 }
-;;
-{ .mfi
-      nop.m              0
-      fma.s1             fX8 = fXQuadr, fXQuadr, f0 // x^8
-      nop.i              0
+
+ 
+{     .mfi 
+     nop.m                      999
+     fma.s1    asin_poly_p19  =    asin_t2,asin_coeff_P21,asin_poly_p19
+     nop.i                      999
+} 
+{     .mfi 
+     nop.m                      999
+     fma.s1    asin_poly_p4   =    asin_t2,asin_poly_p6,asin_poly_p4
+     nop.i                      999;;
 }
-{ .mfi
-      nop.m              0
-      fma.s1             fA25 = fA25, fXQuadr, fA21
-      nop.i              0
+
+ 
+{     .mfi 
+     nop.m                 999
+     fma.s1    asin_d1   =    asin_35by8,asin_d,f0
+     nop.i                 999
+} 
+{     .mfi 
+     nop.m                      999
+     fma.s1    asin_231by16   =    asin_3by2,asin_35by8,asin_63by8
+     nop.i                      999;;
 }
-;;
-{ .mfi
-      nop.m              0
-      fma.s1             fA9 = fA9, fXQuadr, fA5
-      nop.i              0
+
+ 
+{     .mfi 
+     nop.m                      999
+     fma.s1    asin_poly_p2   =    asin_t,asin_coeff_P3,asin_coeff_P2
+     nop.i                      999
+} 
+{     .mfi 
+     nop.m                      999
+     fma.s1    asin_poly_p8   =    asin_t2,asin_coeff_P10,asin_poly_p8
+     nop.i                      999;;
 }
-;;
-{ .mfi
-      nop.m              0
-      fma.s1             fA35 = fA35, fXQuadr, fA29
-      nop.i              0
+
+ 
+{     .mfi 
+     nop.m                      999
+     fma.s1    asin_poly_p11  =    asin_t,asin_coeff_P12,asin_coeff_P11
+     nop.i                      999
+} 
+{     .mfi 
+     nop.m                 999
+     fma.s1    asin_e0   =    asin_d2,asin_l1,asin_d
+     nop.i                 999;;
 }
-{ .mfi
-      nop.m              0
-      fma.s1             fA17 = fA17, fXSqr, fA11
-      nop.i              0
+
+ 
+{     .mfi 
+     nop.m                      999
+     fma.s1    asin_1poly_p15 =    asin_tx,asin_coeff_P16,asin_coeff_P15
+     nop.i                      999
+} 
+{     .mfi 
+     nop.m                      999
+     fma.s1    asin_poly_p0   =    asin_t,asin_coeff_P1,f1
+     nop.i                      999;;
 }
-;;
-{ .mfi
-      nop.m              0
-      fma.s1             fX16 = fX8, fX8, f0 // x^16
-      nop.i              0
+
+ 
+{     .mfi 
+     nop.m                      999
+     fma.s1    asin_1poly_p19 =    asin_tx,asin_coeff_P20,asin_coeff_P19
+     nop.i                      999
+} 
+{     .mfi 
+     nop.m                      999
+     fma.s1    asin_1poly_p4  =    asin_tx,asin_coeff_P5,asin_coeff_P4
+     nop.i                      999;;
 }
-;;
-{ .mfi
-      nop.m              0
-      fma.s1             fA35 = fA35, fX8, fA25
-      nop.i              0
+
+ 
+{     .mfi 
+     nop.m                      999
+     fma.s1    asin_1poly_p8  =    asin_tx,asin_coeff_P9,asin_coeff_P8
+     nop.i                      999
+} 
+{     .mfi 
+     nop.m                 999
+     fma.s1    asin_l2   =    asin_231by16,asin_d,asin_63by8
+     nop.i                 999;;
 }
-{ .mfi
-      nop.m              0
-      fma.s1             fA17 = fA17, fX8, fA9
-      nop.i              0
+
+ 
+{     .mfi 
+     nop.m                 999
+     fma.s1    asin_d3   =    asin_d2,asin_d,f0
+     nop.i                 999
+} 
+{     .mfi 
+     nop.m                 999
+     fma.s1    asin_T3   =    asin_d2,asin_T0,f0
+     nop.i                 999;;
 }
-;;
-{ .mfi
-      nop.m              0
-      fma.s1             fBaseP = fA35, fX16, fA17
-      nop.i              0
+
+ 
+{     .mfi 
+     nop.m                      999
+     fma.s1    asin_429by16   =    asin_18by4,asin_11by4,asin_231by16
+     nop.i                      999
+} 
+{     .mfi 
+     nop.m                 999
+     fma.s1    asin_S1   =    asin_e0,asin_S0,asin_S0
+     nop.i                 999;;
 }
-;;
-{ .mfb
-      nop.m              0
-      // final result for |x| < 0.625
-      fma.d.s0           f8 = fBaseP, fXCube, f8
-      // exit here for |x| < 0.625 path
-      br.ret.sptk        b0
+
+ 
+{     .mfi 
+     nop.m                      999
+     fma.s1    asin_poly_p4   =    asin_t4,asin_poly_p8,asin_poly_p4
+     nop.i                      999
+} 
+{     .mfi 
+     nop.m                      999
+     fma.s1    asin_poly_p15  =    asin_t4,asin_poly_p19,asin_poly_p15
+     nop.i                      999;;
 }
-;;
 
-// here if |x| = 1
-// asin(x) = sign(x) * Pi/2
-.align 32
-asin_abs_1:
-{ .mfi
-      ldfe               fPiBy2 = [rPiBy2Ptr] // Pi/2
-      nop.f              0
-      nop.i              0
+ 
+{     .mfi 
+     nop.m                      999
+     fma.s1    asin_poly_p0   =    asin_t2,asin_poly_p2,asin_poly_p0
+     nop.i                      999
+} 
+{     .mfi 
+     nop.m                      999
+     fma.s1    asin_poly_p11  =    asin_t2,asin_poly_p13,asin_poly_p11
+     nop.i                      999;;
 }
-;;
-{.mfb
-      nop.m              0
-      // result for |x| = 1.0
-      fma.d.s0           f8 = fPiBy2, fSignX, f0
-      // exit here for |x| = 1.0
-      br.ret.sptk        b0
+
+ 
+{     .mfi 
+     nop.m                 999
+     fma.s1    asin_t8   =    asin_t4,asin_t4,f0
+     nop.i                 999
+} 
+{     .mfi 
+     nop.m                 999
+     fma.s1    asin_e1   =    asin_d2,asin_l2,asin_d1
+     nop.i                 999;;
 }
-;;
 
-// here if x is a NaN, denormal, or zero
-.align 32
-asin_special:
-{ .mfi
-      nop.m              0
-      // set p12 = 1 if x is a NaN
-      fclass.m           p12, p0 = f8, 0xc3
-      nop.i              0
+ 
+{     .mfi 
+     nop.m                      999
+     fma.s1    asin_1poly_p4  =    asin_tx2,asin_1poly_p6,asin_1poly_p4
+     nop.i                      999
+} 
+{     .mfi 
+     nop.m                      999
+     fma.s1    asin_1poly_p15 =    asin_tx2,asin_1poly_p17,asin_1poly_p15
+     nop.i                      999;;
 }
-{ .mlx
-      nop.m              0
-      // smallest positive DP normalized number
-      movl               rDenoBound = 0x0010000000000000
+
+ 
+{     .mfi 
+     nop.m                      999
+     fma.s1    asin_1poly_p8  =    asin_tx2,asin_coeff_P10,asin_1poly_p8
+     nop.i                      999
+} 
+{     .mfi 
+     nop.m                      999
+     fma.s1    asin_1poly_p19 =    asin_tx2,asin_coeff_P21,asin_1poly_p19
+     nop.i                      999;;
 }
-;;
-{ .mfi
-      nop.m              0
-      // set p13 = 1 if x = 0.0
-      fclass.m           p13, p0 = f8, 0x07
-      nop.i              0
+
+ 
+{     .mfi 
+     nop.m                      999
+     fma.s1    asin_1poly_p2  =    asin_tx,asin_coeff_P3,asin_coeff_P2
+     nop.i                      999
+} 
+{     .mfi 
+     nop.m                      999
+     fma.s1    asin_1poly_p13 =    asin_tx,asin_coeff_P14,asin_coeff_P13
+     nop.i                      999;;
 }
-{ .mfi
-      nop.m              0
-      fnorm.s1           fNormX = f8
-      nop.i              0
+
+ 
+{     .mfi 
+     nop.m                      999
+     fma.s1    asin_1poly_p0  =    asin_tx,asin_coeff_P1,f1
+     nop.i                      999
+} 
+{     .mfi 
+     nop.m                      999
+     fma.s1    asin_1poly_p11 =    asin_tx,asin_coeff_P12,asin_coeff_P11
+     nop.i                      999;;
 }
-;;
-{ .mfb
-      // load smallest normal to FP reg
-      setf.d             fDenoBound = rDenoBound
-      // answer if x is a NaN
-(p12) fma.d.s0           f8 = f8,f1,f0
-      // exit here if x is a NaN
-(p12) br.ret.spnt        b0
+
+ 
+{     .mfi 
+     nop.m                 999
+     fma.s1    asin_l3   =    asin_429by16,asin_d,f0
+     nop.i                 999
+} 
+{     .mfi 
+     nop.m                 999
+     fma.s1    asin_z    =    asin_e1,asin_T3,asin_S1
+     nop.i                 999;;
 }
-;;
-{ .mfb
-      nop.m              0
-      nop.f              0
-      // exit here if x = 0.0
-(p13) br.ret.spnt        b0
+
+ 
+{     .mfi 
+     nop.m                      999
+     fma.s1    asin_poly_p11  =    asin_t4,asin_poly_p15,asin_poly_p11
+     nop.i                      999
+} 
+{     .mfi 
+     nop.m                 999
+     fma.s1    asin_T6   =    asin_T3,asin_d3,f0
+     nop.i                 999;;
 }
-;;
-// if we still here then x is denormal or unnormal
-{ .mfi
-      nop.m              0
-      // absolute value of normalized x
-      fmerge.s           fNormX = f1, fNormX
-      nop.i              0
+
+ 
+{     .mfi 
+     nop.m                 999
+     fma.s1    asin_t11  =    asin_t8,asin_t3,f0
+     nop.i                 999
+} 
+{     .mfi 
+     nop.m                      999
+     fma.s1    asin_poly_p0   =    asin_t4,asin_poly_p4,asin_poly_p0
+     nop.i                      999;;
 }
-;;
-{ .mfi
-      nop.m              0
-      // set p14 = 1 if normalized x is greater than or
-      // equal to the smallest denormalized value
-      // So, if p14 is set to 1 it means that we deal with
-      // unnormal rather than with "true" denormal
-      fcmp.ge.s1         p14, p0 = fNormX, fDenoBound
-      nop.i              0
+
+ 
+{     .mfi 
+     nop.m                      999
+     fma.s1    asin_1poly_p4  =    asin_tx4,asin_1poly_p8,asin_1poly_p4
+     nop.i                      999
+} 
+{     .mfi 
+     nop.m                      999
+     fma.s1    asin_1poly_p15 =    asin_tx4,asin_1poly_p19,asin_1poly_p15
+     nop.i                      999;;
 }
-;;
-{ .mfi
-      nop.m              0
-(p14) fcmp.eq.s0         p6, p0 = f8, f0      // Set D flag if x unnormal
-      nop.i              0
+
+ 
+{     .mfi 
+     nop.m                      999
+     fma.s1    asin_1poly_p0  =    asin_tx2,asin_1poly_p2,asin_1poly_p0
+     nop.i                      999
+} 
+{     .mfi 
+     nop.m                      999
+     fma.s1    asin_1poly_p11 =    asin_tx2,asin_1poly_p13,asin_1poly_p11
+     nop.i                      999;;
 }
-{ .mfb
-      nop.m              0
-      // normalize unnormal input
-(p14) fnorm.s1           f8 = f8
-      // return to the main path
-(p14) br.cond.sptk       asin_unnormal_back
+
+ 
+{     .mfi 
+     nop.m                                                         999
+//     fcmp.le.s1     asin_pred_LEsqrt2by2,asin_pred_GTsqrt2by2    =    asin_abs_x,asin_const_sqrt2by2
+     fcmp.le.s1     p7,p8    =    asin_abs_x,asin_const_sqrt2by2
+     nop.i                                                         999
+} 
+{     .mfi 
+     nop.m                 999
+     fma.s1    asin_tx8  =    asin_tx4,asin_tx4,f0
+     nop.i                 999;;
 }
-;;
-// if we still here it means that input is "true" denormal
-{ .mfb
-      nop.m              0
-      // final result if x is denormal
-      fma.d.s0           f8 = f8, fXSqr, f8
-      // exit here if x is denormal
-      br.ret.sptk        b0
+
+ 
+// Form a small number to force inexact flag for small args 
+{     .mfi 
+     nop.m                 999
+     fmerge.se asin_eps = asin_eps_exp,asin_eps_sig
+     nop.i                 999
+} 
+{     .mfi 
+     nop.m                 999
+     fma.s1    asin_z    =    asin_l3,asin_T6,asin_z
+     nop.i                 999;;
+} 
+ 
+{     .mfi 
+     nop.m                      999
+     fma.s1    asin_series_t  =    asin_t11,asin_poly_p11,asin_poly_p0
+     nop.i                      999;;
+} 
+ 
+{     .mfi 
+     nop.m                      999
+     fma.s1    asin_1poly_p0  =    asin_tx4,asin_1poly_p4,asin_1poly_p0
+     nop.i                      999
+} 
+{     .mfi 
+     nop.m                      999
+     fma.s1    asin_1poly_p11 =    asin_tx4,asin_1poly_p15,asin_1poly_p11
+     nop.i                      999;;
 }
+
+ 
+{     .mfi 
+     nop.m                 999
+     fma.s1    asin_tx11 =    asin_tx8,asin_tx3,f0
+     nop.i                 999;;
+} 
+ 
+{     .mfi 
+                         nop.m                 999
+//(asin_pred_GTsqrt2by2)   fnma.s1      answer2   =    asin_z,asin_series_t,asin_const_piby2
+(p8)   fnma.s1      answer2   =    asin_z,asin_series_t,asin_const_piby2
+                         nop.i                 999;;
+} 
+ 
+{     .mfi 
+     nop.m                      999
+     fma.s1    asin_series_tx =    asin_tx11,asin_1poly_p11,asin_1poly_p0
+     nop.i                      999;;
+} 
+ 
+{     .mfi 
+                         nop.m                 999
+//(asin_pred_GTsqrt2by2)   fma.d     f8   =    asin_sgn_x,answer2,f0
+(p8)   fma.d     f8   =    asin_sgn_x,answer2,f0
+                         nop.i                 999;;
+} 
+ 
+// asin_eps is added only to force inexact and possibly underflow flag 
+// in case asin_series_tx is zero
+//
+{     .mfi 
+                         nop.m                 999
+(p7)   fma.d     asin_eps   =    f8,asin_series_tx,asin_eps
+                         nop.i                 999
+} 
+{     .mfb 
+                         nop.m                 999
+//(asin_pred_LEsqrt2by2)   fma.d     f8   =    f8,asin_series_tx,f0
+(p7)   fma.d     f8   =    f8,asin_series_tx,f0
+       br.ret.sptk b0
+} 
 ;;
 
-// here if |x| > 1.0
-// error handler should be called
-.align 32
-asin_abs_gt_1:
-{ .mfi
-      alloc              r32 = ar.pfs, 0, 3, 4, 0 // get some registers
-      fmerge.s           FR_X = f8,f8
-      nop.i              0
-}
-{ .mfb
-      mov                GR_Parameter_TAG = 61 // error code
-      frcpa.s0           FR_RESULT, p0 = f0,f0
-      // call error handler routine
-      br.cond.sptk       __libm_error_region
-}
+
+L(ASIN_ABS_1):
+// Here for short exit if |x|=1
+{     .mfb 
+     nop.m                      999
+     fma.d    f8 =    asin_sgn_x,asin_const_piby2,f0
+     br.ret.sptk b0
+} 
 ;;
-GLOBAL_LIBM_END(asin)
 
 
-LOCAL_LIBM_ENTRY(__libm_error_region)
+.endp asin
+ASM_SIZE_DIRECTIVE(asin)
+
+.proc __libm_error_region
+__libm_error_region:
 .prologue
 { .mfi
         add   GR_Parameter_Y=-32,sp             // Parameter 2 value
-        nop.f 0
+                nop.f 999
 .save   ar.pfs,GR_SAVE_PFS
         mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
 }
@@ -815,29 +857,28 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
         mov GR_SAVE_GP=gp                       // Save gp
 };;
 { .mmi
-        stfd [GR_Parameter_Y] = FR_Y,16         // STORE Parameter 2 on stack
+        stfs [GR_Parameter_Y] = f1,16         // Store Parameter 2 on stack
         add GR_Parameter_X = 16,sp              // Parameter 1 address
 .save   b0, GR_SAVE_B0
         mov GR_SAVE_B0=b0                       // Save b0
 };;
+
 .body
+        frcpa.s0 f9,p0 = f0,f0
+;;
+
 { .mib
-        stfd [GR_Parameter_X] = FR_X                  // STORE Parameter 1 on stack
-        add   GR_Parameter_RESULT = 0,GR_Parameter_Y  // Parameter 3 address
-        nop.b 0
+        stfd [GR_Parameter_X] = f8            // Store Parameter 1 on stack
+        add   GR_Parameter_RESULT = 0,GR_Parameter_Y
+        nop.b 0                                 // Parameter 3 address
 }
 { .mib
-        stfd [GR_Parameter_Y] = FR_RESULT             // STORE Parameter 3 on stack
-        add   GR_Parameter_Y = -16,GR_Parameter_Y
-        br.call.sptk b0=__libm_error_support#         // Call error handling function
+        stfd [GR_Parameter_Y] = f9,-16           // Store Parameter 3 on stack
+        adds r32 = 48,sp
+        br.call.sptk b0=__libm_error_support#   // Call error handling function
 };;
 { .mmi
-        add   GR_Parameter_RESULT = 48,sp
-        nop.m 0
-        nop.i 0
-};;
-{ .mmi
-        ldfd  f8 = [GR_Parameter_RESULT]       // Get return result off stack
+        ldfd  f8 = [r32]       // Get return result off stack
 .restore sp
         add   sp = 64,sp                       // Restore stack pointer
         mov   b0 = GR_SAVE_B0                  // Restore return address
@@ -846,8 +887,11 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
         mov   gp = GR_SAVE_GP                  // Restore gp
         mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
         br.ret.sptk     b0                     // Return
+
 };;
 
-LOCAL_LIBM_END(__libm_error_region)
-.type   __libm_error_support#,@function
-.global __libm_error_support#
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+.type   __libm_error_support,@function
+.global __libm_error_support
diff --git a/sysdeps/ia64/fpu/e_asinf.S b/sysdeps/ia64/fpu/e_asinf.S
index f9a1312b26..ddae85880b 100644
--- a/sysdeps/ia64/fpu/e_asinf.S
+++ b/sysdeps/ia64/fpu/e_asinf.S
@@ -1,10 +1,10 @@
 .file "asinf.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
 //
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// Contributed 2/02/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
+//
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,25 +35,21 @@
 //
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// http://developer.intel.com/opensource.
 
 // History
 //==============================================================
-// 02/02/00 Initial version
-// 06/28/00 Improved speed 
-// 06/31/00 Changed register allocation because of some duplicate macros
+// 2/02/00  Initial revision
+// 6/28/00  Improved speed 
+// 6/31/00  Changed register allocation because of some duplicate macros
 //          moved nan exit bundle up to gain a cycle. 
-// 08/08/00 Improved speed by avoiding SIR flush.
-// 08/15/00 Bundle added after call to __libm_error_support to properly
+// 8/08/00  Improved speed by avoiding SIR flush.
+// 8/15/00  Bundle added after call to __libm_error_support to properly
 //          set [the previously overwritten] GR_Parameter_RESULT.
-// 08/17/00 Changed predicate register macro-usage to direct predicate
+// 8/17/00  Changed predicate register macro-usage to direct predicate
 //          names due to an assembler bug.
 // 10/17/00 Improved speed of x=0 and x=1 paths, set D flag if x denormal.
-// 03/13/01 Corrected sign of imm1 value in dep instruction.
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 02/06/03 Reordered header: .section, .global, .proc, .align
 
-	
 // Description
 //=========================================
 // The asinf function computes the arc sine of x in the range [-pi,+pi].
@@ -123,6 +119,7 @@
 //  answer2 = - sign(x) z P(t) + (sign(x) pi/2)
 //
 
+#include "libm_support.h"
 
 // Assembly macros
 //=========================================
@@ -228,30 +225,42 @@ asinf_poly_p1a                   = f90
 // Data tables
 //==============================================================
 
-RODATA
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
 
 .align 16
 
-LOCAL_OBJECT_START(asinf_coeff_1_table)
+asinf_coeff_1_table:
+ASM_TYPE_DIRECTIVE(asinf_coeff_1_table,@object)
 data8 0x3FC5555607DCF816 // P1
 data8 0x3F9CF81AD9BAB2C6 // P4
 data8 0x3FC59E0975074DF3 // P7
 data8 0xBFA6F4CC2780AA1D // P6
 data8 0x3FC2DD45292E93CB // P9
 data8 0x3fe6a09e667f3bcd // sqrt(2)/2
-LOCAL_OBJECT_END(asinf_coeff_1_table)
+ASM_SIZE_DIRECTIVE(asinf_coeff_1_table)
 
-LOCAL_OBJECT_START(asinf_coeff_2_table)
+asinf_coeff_2_table:
+ASM_TYPE_DIRECTIVE(asinf_coeff_2_table,@object)
 data8 0x3FA6F108E31EFBA6 // P3
 data8 0xBFCA31BF175D82A0 // P8
 data8 0x3FA30C0337F6418B // P5
 data8 0x3FB332C9266CB1F9 // P2
 data8 0x3ff921fb54442d18 // pi_by_2
-LOCAL_OBJECT_END(asinf_coeff_2_table)
+ASM_SIZE_DIRECTIVE(asinf_coeff_2_table)
 
 
+.align 32
+.global asinf
+
 .section .text
-GLOBAL_LIBM_ENTRY(asinf)
+.proc  asinf
+.align 32
+
+asinf:
  
 // Load the addresses of the two tables.
 // Then, load the coefficients and other constants.
@@ -336,7 +345,7 @@ GLOBAL_LIBM_ENTRY(asinf)
 } 
 {     .mfb 
      nop.m                                               999
-(p8) fma.s.s0 f8                = f8,f1,f0
+(p8) fma.s f8                = f8,f1,f0
 (p8) br.ret.spnt   b0 ;;  // Exit if x=nan
 }
 
@@ -361,7 +370,7 @@ GLOBAL_LIBM_ENTRY(asinf)
 {     .mfb 
      nop.m                      999
      fma.s1    asinf_t4  =    asinf_t2,asinf_t2,f0
-(p6) br.cond.spnt  ASINF_ABS_ONE ;;     // Branch if |x|=1
+(p6) br.cond.spnt  L(ASINF_ABS_ONE) ;;     // Branch if |x|=1
 } 
 
 {     .mfi 
@@ -563,26 +572,28 @@ GLOBAL_LIBM_ENTRY(asinf)
 .pred.rel "mutex",p8,p7    //asinf_pred_GTsqrt2by2,asinf_pred_LEsqrt2by2
 {     .mfi 
                          nop.m            999
-(p8)  fnma.s.s0     f8   =    asinf_z,asinf_Pt,asinf_sgn_x_piby2
+(p8)  fnma.s     f8   =    asinf_z,asinf_Pt,asinf_sgn_x_piby2
                          nop.i            999
 } 
  
 {     .mfb 
                          nop.m            999
-(p7)  fma.s.s0    f8    =    asinf_x11,asinf_poly_Bx,asinf_poly_Ax
+(p7)  fma.s    f8    =    asinf_x11,asinf_poly_Bx,asinf_poly_Ax
                          br.ret.sptk b0 ;;
 } 
 
-ASINF_ABS_ONE:
+L(ASINF_ABS_ONE):
 // Here for short exit if |x|=1
 {     .mfb 
      nop.m                      999
-     fma.s.s0    f8 =    asinf_sgn_x,asinf_const_piby2,f0
+     fma.s    f8 =    asinf_sgn_x,asinf_const_piby2,f0
      br.ret.sptk b0
 } 
 ;;
 
-GLOBAL_LIBM_END(asinf)
+.endp asinf
+ASM_SIZE_DIRECTIVE(asinf)
+
 // Stack operations when calling error support.
 //       (1)               (2)                  
 //   sp   -> +          psp -> +               
@@ -612,7 +623,8 @@ GLOBAL_LIBM_END(asinf)
 //                              restore gp
 //                              restore ar.pfs
 
-LOCAL_LIBM_ENTRY(__libm_error_region)
+.proc __libm_error_region
+__libm_error_region:
 .prologue
 { .mfi
         add   GR_Parameter_Y=-32,sp             // Parameter 2 value
@@ -668,7 +680,8 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
         br.ret.sptk     b0                     // Return
 };;
 
-LOCAL_LIBM_END(__libm_error_region)
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
 
 .type   __libm_error_support#,@function
 .global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_asinl.S b/sysdeps/ia64/fpu/e_asinl.S
index bf5feba155..9153832090 100644
--- a/sysdeps/ia64/fpu/e_asinl.S
+++ b/sysdeps/ia64/fpu/e_asinl.S
@@ -1,10 +1,10 @@
 .file "asinl.s"
 
-
-// Copyright (c) 2001 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2001 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,2448 +20,720 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
 // Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http: //www.intel.com/software/products/opensource/libraries/num.htm.
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
 //
 // History
 //==============================================================
-// 08/28/01 New version
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 02/06/03 Reordered header: .section, .global, .proc, .align
+// 2/02/00  Initial version 
+// 4/04/00  Unwind support added
+// 8/15/00  Bundle added after call to __libm_error_support to properly
+//          set [the previously overwritten] GR_Parameter_RESULT.
 //
 // API
 //==============================================================
-// long double asinl(long double)
+// long double = asinl(long double)
+// input  floating point f8
+// output floating point f8
 //
-// Overview of operation
+// Registers used
 //==============================================================
-// Background
-//
-// Implementation
-//
-// For |s| in [2^{-4}, sqrt(2)/2]:
-// Let t= 2^k*1.b1 b2..b6 1, where s= 2^k*1.b1 b2.. b52
-// asin(s)= asin(t)+asin(r), where r= s*sqrt(1-t^2)-t*sqrt(1-s^2), i.e.
-// r= (s-t)*sqrt(1-t^2)-t*sqrt(1-t^2)*(sqrt((1-s^2)/(1-t^2))-1)
-// asin(r)-r evaluated as 9-degree polynomial (c3*r^3+c5*r^5+c7*r^7+c9*r^9)
-// The 64-bit significands of sqrt(1-t^2), 1/(1-t^2) are read from the table,
-// along with the high and low parts of asin(t) (stored as two double precision
-// values)
 //
-// |s| in (sqrt(2)/2, sqrt(255/256)):
-// Let t= 2^k*1.b1 b2..b6 1, where (1-s^2)*frsqrta(1-s^2)= 2^k*1.b1 b2..b6..
-// asin(|s|)= pi/2-asin(t)+asin(r), r= s*t-sqrt(1-s^2)*sqrt(1-t^2)
-// To minimize accumulated errors, r is computed as
-// r= (t*s)_s-t^2*y*z+z*y*(t^2-1+s^2)_s+z*y*(1-s^2)_s*x+z'*y*(1-s^2)*PS29+
-// +(t*s-(t*s)_s)+z*y*((t^2-1-(t^2-1+s^2)_s)+s^2)+z*y*(1-s^2-(1-s^2)_s)+
-// +ez*z'*y*(1-s^2)*(1-x),
-// where y= frsqrta(1-s^2), z= (sqrt(1-t^2))_s (rounded to 24 significant bits)
-// z'= sqrt(1-t^2), x= ((1-s^2)*y^2-1)/2
+// predicate registers used:
+// p6 -> p12
 //
-// |s|<2^{-4}: evaluate as 17-degree polynomial
-// (or simply return s, if|s|<2^{-64})
+// floating-point registers used:
+// f8 has input, then output
+// f32 -> f87, f8 -> f13, f32 -> f87
 //
-// |s| in [sqrt(255/256), 1): asin(|s|)= pi/2-asin(sqrt(1-s^2))
-// use 17-degree polynomial for asin(sqrt(1-s^2)),
-// 9-degree polynomial to evaluate sqrt(1-s^2)
-// High order term is (pi/2)_high-(y*(1-s^2))_high
+// general registers used:
+// r32 -> r47
 //
-
-
-
-// Registers used
+// Overview of operation
 //==============================================================
-// f6-f15, f32-f36
-// r2-r3, r23-r23
-// p6, p7, p8, p12
-//
-
-
-       GR_SAVE_B0= r33
-       GR_SAVE_PFS= r34
-       GR_SAVE_GP= r35 // This reg. can safely be used
-       GR_SAVE_SP= r36
-
-       GR_Parameter_X= r37
-       GR_Parameter_Y= r38
-       GR_Parameter_RESULT= r39
-       GR_Parameter_TAG= r40
+// There are three paths
+// 1. |x| < 2^-40                 ASIN_TINY
+// 2. 2^-40 <= |x| < 1/4          ASIN_POLY
+// 3. 1/4 <= |x| < 1              ASIN_ATAN
 
-       FR_X= f10
-       FR_Y= f1
-       FR_RESULT= f8
-
-
-
-RODATA
-
-.align 16
-
-
-
-LOCAL_OBJECT_START(T_table)
-
-// stores 64-bit significand of 1/(1-t^2), 64-bit significand of sqrt(1-t^2),
-// asin(t)_high (double precision), asin(t)_low (double precision)
-
-data8 0x80828692b71c4391, 0xff7ddcec2d87e879
-data8 0x3fb022bc0ae531a0, 0x3c9f599c7bb42af6
-data8 0x80869f0163d0b082, 0xff79cad2247914d3
-data8 0x3fb062dd26afc320, 0x3ca4eff21bd49c5c
-data8 0x808ac7d5a8690705, 0xff75a89ed6b626b9
-data8 0x3fb0a2ff4a1821e0, 0x3cb7e33b58f164cc
-data8 0x808f0112ad8ad2e0, 0xff7176517c2cc0cb
-data8 0x3fb0e32279319d80, 0x3caee31546582c43
-data8 0x80934abba8a1da0a, 0xff6d33e949b1ed31
-data8 0x3fb12346b8101da0, 0x3cb8bfe463d087cd
-data8 0x8097a4d3dbe63d8f, 0xff68e16571015c63
-data8 0x3fb1636c0ac824e0, 0x3c8870a7c5a3556f
-data8 0x809c0f5e9662b3dd, 0xff647ec520bca0f0
-data8 0x3fb1a392756ed280, 0x3c964f1a927461ae
-data8 0x80a08a5f33fadc66, 0xff600c07846a6830
-data8 0x3fb1e3b9fc19e580, 0x3c69eb3576d56332
-data8 0x80a515d91d71acd4, 0xff5b892bc475affa
-data8 0x3fb223e2a2dfbe80, 0x3c6a4e19fd972fb6
-data8 0x80a9b1cfc86ff7cd, 0xff56f631062cf93d
-data8 0x3fb2640c6dd76260, 0x3c62041160e0849e
-data8 0x80ae5e46b78b0d68, 0xff5253166bc17794
-data8 0x3fb2a43761187c80, 0x3cac61651af678c0
-data8 0x80b31b417a4b756b, 0xff4d9fdb14463dc8
-data8 0x3fb2e46380bb6160, 0x3cb06ef23eeba7a1
-data8 0x80b7e8c3ad33c369, 0xff48dc7e1baf6738
-data8 0x3fb32490d0d910c0, 0x3caa05f480b300d5
-data8 0x80bcc6d0f9c784d6, 0xff4408fe9ad13e37
-data8 0x3fb364bf558b3820, 0x3cb01e7e403aaab9
-data8 0x80c1b56d1692492d, 0xff3f255ba75f5f4e
-data8 0x3fb3a4ef12ec3540, 0x3cb4fe8fcdf5f5f1
-data8 0x80c6b49bc72ec446, 0xff3a319453ebd961
-data8 0x3fb3e5200d171880, 0x3caf2dc089b2b7e2
-data8 0x80cbc460dc4e0ae8, 0xff352da7afe64ac6
-data8 0x3fb425524827a720, 0x3cb75a855e7c6053
-data8 0x80d0e4c033bee9c4, 0xff301994c79afb32
-data8 0x3fb46585c83a5e00, 0x3cb3264981c019ab
-data8 0x80d615bdb87556db, 0xff2af55aa431f291
-data8 0x3fb4a5ba916c73c0, 0x3c994251d94427b5
-data8 0x80db575d6291fd8a, 0xff25c0f84bae0cb9
-data8 0x3fb4e5f0a7dbdb20, 0x3cbee2fcc4c786cb
-data8 0x80e0a9a33769e535, 0xff207c6cc0ec09fd
-data8 0x3fb526280fa74620, 0x3c940656e5549b91
-data8 0x80e60c93498e32cd, 0xff1b27b703a19c98
-data8 0x3fb56660ccee2740, 0x3ca7082374d7b2cd
-data8 0x80eb8031b8d4052d, 0xff15c2d6105c72f8
-data8 0x3fb5a69ae3d0b520, 0x3c7c4d46e09ac68a
-data8 0x80f10482b25c6c8a, 0xff104dc8e0813ed4
-data8 0x3fb5e6d6586fec20, 0x3c9aa84ffd9b4958
-data8 0x80f6998a709c7cfb, 0xff0ac88e6a4ab926
-data8 0x3fb627132eed9140, 0x3cbced2cbbbe7d16
-data8 0x80fc3f4d3b657c44, 0xff053325a0c8a2ec
-data8 0x3fb667516b6c34c0, 0x3c6489c5fc68595a
-data8 0x8101f5cf67ed2af8, 0xfeff8d8d73dec2bb
-data8 0x3fb6a791120f33a0, 0x3cbe12acf159dfad
-data8 0x8107bd1558d6291f, 0xfef9d7c4d043df29
-data8 0x3fb6e7d226fabba0, 0x3ca386d099cd0dc7
-data8 0x810d95237e38766a, 0xfef411ca9f80b5f7
-data8 0x3fb72814ae53cc20, 0x3cb9f35731e71dd6
-data8 0x81137dfe55aa0e29, 0xfeee3b9dc7eef009
-data8 0x3fb76858ac403a00, 0x3c74df3dd959141a
-data8 0x811977aa6a479f0f, 0xfee8553d2cb8122c
-data8 0x3fb7a89e24e6b0e0, 0x3ca6034406ee42bc
-data8 0x811f822c54bd5ef8, 0xfee25ea7add46a91
-data8 0x3fb7e8e51c6eb6a0, 0x3cb82f8f78e68ed7
-data8 0x81259d88bb4ffac1, 0xfedc57dc2809fb1d
-data8 0x3fb8292d9700ad60, 0x3cbebb73c0e653f9
-data8 0x812bc9c451e5a257, 0xfed640d974eb6068
-data8 0x3fb8697798c5d620, 0x3ca2feee76a9701b
-data8 0x813206e3da0f3124, 0xfed0199e6ad6b585
-data8 0x3fb8a9c325e852e0, 0x3cb9e88f2f4d0efe
-data8 0x813854ec231172f9, 0xfec9e229dcf4747d
-data8 0x3fb8ea1042932a00, 0x3ca5ff40d81f66fd
-data8 0x813eb3e209ee858f, 0xfec39a7a9b36538b
-data8 0x3fb92a5ef2f247c0, 0x3cb5e3bece4d6b07
-data8 0x814523ca796f56ce, 0xfebd428f72561efe
-data8 0x3fb96aaf3b3281a0, 0x3cb7b9e499436d7c
-data8 0x814ba4aa6a2d3ff9, 0xfeb6da672bd48fe4
-data8 0x3fb9ab011f819860, 0x3cb9168143cc1a7f
-data8 0x81523686e29bbdd7, 0xfeb062008df81f50
-data8 0x3fb9eb54a40e3ac0, 0x3cb6e544197eb1e1
-data8 0x8158d964f7124614, 0xfea9d95a5bcbd65a
-data8 0x3fba2ba9cd080800, 0x3ca9a717be8f7446
-data8 0x815f8d49c9d639e4, 0xfea34073551e1ac8
-data8 0x3fba6c009e9f9260, 0x3c741e989a60938a
-data8 0x8166523a8b24f626, 0xfe9c974a367f785c
-data8 0x3fbaac591d0661a0, 0x3cb2c1290107e57d
-data8 0x816d283c793e0114, 0xfe95ddddb94166cb
-data8 0x3fbaecb34c6ef600, 0x3c9c7d5fbaec405d
-data8 0x81740f54e06d55bd, 0xfe8f142c93750c50
-data8 0x3fbb2d0f310cca00, 0x3cbc09479a9cbcfb
-data8 0x817b07891b15cd5e, 0xfe883a3577e9fceb
-data8 0x3fbb6d6ccf1455e0, 0x3cb9450bff4ee307
-data8 0x818210de91bba6c8, 0xfe814ff7162cf62f
-data8 0x3fbbadcc2abb1180, 0x3c9227fda12a8d24
-data8 0x81892b5abb0f2bf9, 0xfe7a55701a8697b1
-data8 0x3fbbee2d48377700, 0x3cb6fad72acfe356
-data8 0x819057031bf7760e, 0xfe734a9f2dfa1810
-data8 0x3fbc2e902bc10600, 0x3cb4465b588d16ad
-data8 0x819793dd479d4fbe, 0xfe6c2f82f643f68b
-data8 0x3fbc6ef4d9904580, 0x3c8b9ac54823960d
-data8 0x819ee1eedf76367a, 0xfe65041a15d8a92c
-data8 0x3fbcaf5b55dec6a0, 0x3ca2b8d28a954db2
-data8 0x81a6413d934f7a66, 0xfe5dc8632be3477f
-data8 0x3fbcefc3a4e727a0, 0x3c9380da83713ab4
-data8 0x81adb1cf21597d4b, 0xfe567c5cd44431d5
-data8 0x3fbd302dcae51600, 0x3ca995b83421756a
-data8 0x81b533a9563310b8, 0xfe4f2005a78fb50f
-data8 0x3fbd7099cc155180, 0x3caefa2f7a817d5f
-data8 0x81bcc6d20cf4f373, 0xfe47b35c3b0caaeb
-data8 0x3fbdb107acb5ae80, 0x3cb455fc372dd026
-data8 0x81c46b4f2f3d6e68, 0xfe40365f20b316d6
-data8 0x3fbdf177710518c0, 0x3cbee3dcc5b01434
-data8 0x81cc2126b53c1144, 0xfe38a90ce72abf36
-data8 0x3fbe31e91d439620, 0x3cb3e131c950aebd
-data8 0x81d3e85ea5bd8ee2, 0xfe310b6419c9c33a
-data8 0x3fbe725cb5b24900, 0x3c01d3fac6029027
-data8 0x81dbc0fd1637b9c1, 0xfe295d6340932d15
-data8 0x3fbeb2d23e937300, 0x3c6304cc44aeedd1
-data8 0x81e3ab082ad5a0a4, 0xfe219f08e03580b3
-data8 0x3fbef349bc2a77e0, 0x3cac1d2d6abe9c72
-data8 0x81eba6861683cb97, 0xfe19d0537a0946e2
-data8 0x3fbf33c332bbe020, 0x3ca0909dba4e96ca
-data8 0x81f3b37d1afc9979, 0xfe11f1418c0f94e2
-data8 0x3fbf743ea68d5b60, 0x3c937fc12a2a779a
-data8 0x81fbd1f388d4be45, 0xfe0a01d190f09063
-data8 0x3fbfb4bc1be5c340, 0x3cbf51a504b55813
-data8 0x820401efbf87e248, 0xfe020201fff9efea
-data8 0x3fbff53b970d1e80, 0x3ca625444b260078
-data8 0x82106ad2ffdca049, 0xfdf5e3940a49135e
-data8 0x3fc02aff52065460, 0x3c9125d113e22a57
-data8 0x8221343d6ea1d3e2, 0xfde581a45429b0a0
-data8 0x3fc06b84f8e03220, 0x3caccf362295894b
-data8 0x82324434adbf99c2, 0xfdd4de1a001fb775
-data8 0x3fc0ac0ed1fe7240, 0x3cc22f676096b0af
-data8 0x82439aee8d0c7747, 0xfdc3f8e8269d1f03
-data8 0x3fc0ec9cee9e4820, 0x3cca147e2886a628
-data8 0x825538a1d0fcb2f0, 0xfdb2d201a9b1ba66
-data8 0x3fc12d2f6006f0a0, 0x3cc72b36633bc2d4
-data8 0x82671d86345c5cee, 0xfda1695934d723e7
-data8 0x3fc16dc63789de60, 0x3cb11f9c47c7b83f
-data8 0x827949d46a121770, 0xfd8fbee13cbbb823
-data8 0x3fc1ae618682e620, 0x3cce1b59020cef8e
-data8 0x828bbdc61eeab9ba, 0xfd7dd28bff0c9f34
-data8 0x3fc1ef015e586c40, 0x3cafec043e0225ee
-data8 0x829e7995fb6de9e1, 0xfd6ba44b823ee1ca
-data8 0x3fc22fa5d07b90c0, 0x3cba905409caf8e3
-data8 0x82b17d7fa5bbc982, 0xfd5934119557883a
-data8 0x3fc2704eee685da0, 0x3cb5ef21838a823e
-data8 0x82c4c9bfc373d276, 0xfd4681cfcfb2c161
-data8 0x3fc2b0fcc9a5f3e0, 0x3ccc7952c5e0e312
-data8 0x82d85e93fba50136, 0xfd338d7790ca0f41
-data8 0x3fc2f1af73c6ba00, 0x3cbecf5f977d1ca9
-data8 0x82ec3c3af8c76b32, 0xfd2056f9fff97727
-data8 0x3fc33266fe6889a0, 0x3c9d329c022ebdb5
-data8 0x830062f46abf6022, 0xfd0cde480c43b327
-data8 0x3fc373237b34de60, 0x3cc95806d4928adb
-data8 0x8314d30108ea35f0, 0xfcf923526c1562b2
-data8 0x3fc3b3e4fbe10520, 0x3cbc299fe7223d54
-data8 0x83298ca29434df97, 0xfce526099d0737ed
-data8 0x3fc3f4ab922e4a60, 0x3cb59d8bb8fdbccc
-data8 0x833e901bd93c7009, 0xfcd0e65de39f1f7c
-data8 0x3fc435774fea2a60, 0x3c9ec18b43340914
-data8 0x8353ddb0b278aad8, 0xfcbc643f4b106055
-data8 0x3fc4764846ee80a0, 0x3cb90402efd87ed6
-data8 0x836975a60a70c52e, 0xfca79f9da4fab13a
-data8 0x3fc4b71e8921b860, 0xbc58f23449ed6365
-data8 0x837f5841ddfa7a46, 0xfc92986889284148
-data8 0x3fc4f7fa2876fca0, 0xbc6294812bf43acd
-data8 0x839585cb3e839773, 0xfc7d4e8f554ab12f
-data8 0x3fc538db36ee6960, 0x3cb910b773d4c578
-data8 0x83abfe8a5466246f, 0xfc67c2012cb6fa68
-data8 0x3fc579c1c6953cc0, 0x3cc5ede909fc47fc
-data8 0x83c2c2c861474d91, 0xfc51f2acf82041d5
-data8 0x3fc5baade9860880, 0x3cac63cdfc3588e5
-data8 0x83d9d2cfc2813637, 0xfc3be08165519325
-data8 0x3fc5fb9fb1e8e3a0, 0x3cbf7c8466578c29
-data8 0x83f12eebf397daac, 0xfc258b6ce6e6822f
-data8 0x3fc63c9731f39d40, 0x3cb6d2a7ffca3e9e
-data8 0x8408d76990b9296e, 0xfc0ef35db402af94
-data8 0x3fc67d947be9eec0, 0x3cb1980da09e6566
-data8 0x8420cc9659487cd7, 0xfbf81841c8082dc4
-data8 0x3fc6be97a21daf00, 0x3cc2ac8330e59aa5
-data8 0x84390ec132759ecb, 0xfbe0fa06e24cc390
-data8 0x3fc6ffa0b6ef05e0, 0x3ccc1a030fee56c4
-data8 0x84519e3a29df811a, 0xfbc9989a85ce0954
-data8 0x3fc740afcccca000, 0x3cc19692a5301ca6
-data8 0x846a7b527842d61b, 0xfbb1f3e9f8e45dc4
-data8 0x3fc781c4f633e2c0, 0x3cc0e98f3868a508
-data8 0x8483a65c8434b5f0, 0xfb9a0be244f4af45
-data8 0x3fc7c2e045b12140, 0x3cb2a8d309754420
-data8 0x849d1fabe4e97dd7, 0xfb81e070362116d1
-data8 0x3fc80401cddfd120, 0x3ca7a44544aa4ce6
-data8 0x84b6e795650817ea, 0xfb6971805af8411e
-data8 0x3fc84529a16ac020, 0x3c9e3b709c7d6f94
-data8 0x84d0fe6f0589da92, 0xfb50beff0423a2f5
-data8 0x3fc88657d30c49e0, 0x3cc60d65a7f0a278
-data8 0x84eb649000a73014, 0xfb37c8d84414755c
-data8 0x3fc8c78c758e8e80, 0x3cc94b2ee984c2b7
-data8 0x85061a50ccd13781, 0xfb1e8ef7eeaf764b
-data8 0x3fc908c79bcba900, 0x3cc8540ae794a2fe
-data8 0x8521200b1fb8916e, 0xfb05114998f76a83
-data8 0x3fc94a0958ade6c0, 0x3ca127f49839fa9c
-data8 0x853c7619f1618bf6, 0xfaeb4fb898b65d19
-data8 0x3fc98b51bf2ffee0, 0x3c8c9ba7a803909a
-data8 0x85581cd97f45e274, 0xfad14a3004259931
-data8 0x3fc9cca0e25d4ac0, 0x3cba458e91d3bf54
-data8 0x857414a74f8446b4, 0xfab7009ab1945a54
-data8 0x3fca0df6d551fe80, 0x3cc78ea1d329d2b2
-data8 0x85905de2341dea46, 0xfa9c72e3370d2fbc
-data8 0x3fca4f53ab3b6200, 0x3ccf60dca86d57ef
-data8 0x85acf8ea4e423ff8, 0xfa81a0f3e9fa0ee9
-data8 0x3fca90b777580aa0, 0x3ca4c4e2ec8a867e
-data8 0x85c9e62111a92e7d, 0xfa668ab6dec711b1
-data8 0x3fcad2224cf814e0, 0x3c303de5980d071c
-data8 0x85e725e947fbee97, 0xfa4b3015e883dbfe
-data8 0x3fcb13943f7d5f80, 0x3cc29d4eefa5cb1e
-data8 0x8604b8a7144cd054, 0xfa2f90fa9883a543
-data8 0x3fcb550d625bc6a0, 0x3c9e01a746152daf
-data8 0x86229ebff69e2415, 0xfa13ad4e3dfbe1c1
-data8 0x3fcb968dc9195ea0, 0x3ccc091bd73ae518
-data8 0x8640d89acf78858c, 0xf9f784f9e5a1877b
-data8 0x3fcbd815874eb160, 0x3cb5f4b89875e187
-data8 0x865f669fe390c7f5, 0xf9db17e65944eacf
-data8 0x3fcc19a4b0a6f9c0, 0x3cc5c0bc2b0bbf14
-data8 0x867e4938df7dc45f, 0xf9be65fc1f6c2e6e
-data8 0x3fcc5b3b58e061e0, 0x3cc1ca70df8f57e7
-data8 0x869d80d0db7e4c0c, 0xf9a16f237aec427a
-data8 0x3fcc9cd993cc4040, 0x3cbae93acc85eccf
-data8 0x86bd0dd45f4f8265, 0xf98433446a806e70
-data8 0x3fccde7f754f5660, 0x3cb22f70e64568d0
-data8 0x86dcf0b16613e37a, 0xf966b246a8606170
-data8 0x3fcd202d11620fa0, 0x3c962030e5d4c849
-data8 0x86fd29d7624b3d5d, 0xf948ec11a9d4c45b
-data8 0x3fcd61e27c10c0a0, 0x3cc7083c91d59217
-data8 0x871db9b741dbe44a, 0xf92ae08c9eca4941
-data8 0x3fcda39fc97be7c0, 0x3cc9258579e57211
-data8 0x873ea0c3722d6af2, 0xf90c8f9e71633363
-data8 0x3fcde5650dd86d60, 0x3ca4755a9ea582a9
-data8 0x875fdf6fe45529e8, 0xf8edf92dc5875319
-data8 0x3fce27325d6fe520, 0x3cbc1e2b6c1954f9
-data8 0x878176321154e2bc, 0xf8cf1d20f87270b8
-data8 0x3fce6907cca0d060, 0x3cb6ca4804750830
-data8 0x87a36580fe6bccf5, 0xf8affb5e20412199
-data8 0x3fceaae56fdee040, 0x3cad6b310d6fd46c
-data8 0x87c5add5417a5cb9, 0xf89093cb0b7c0233
-data8 0x3fceeccb5bb33900, 0x3cc16e99cedadb20
-data8 0x87e84fa9057914ca, 0xf870e64d40a15036
-data8 0x3fcf2eb9a4bcb600, 0x3cc75ee47c8b09e9
-data8 0x880b4b780f02b709, 0xf850f2c9fdacdf78
-data8 0x3fcf70b05fb02e20, 0x3cad6350d379f41a
-data8 0x882ea1bfc0f228ac, 0xf830b926379e6465
-data8 0x3fcfb2afa158b8a0, 0x3cce0ccd9f829985
-data8 0x885252ff21146108, 0xf810394699fe0e8e
-data8 0x3fcff4b77e97f3e0, 0x3c9b30faa7a4c703
-data8 0x88765fb6dceebbb3, 0xf7ef730f865f6df0
-data8 0x3fd01b6406332540, 0x3cdc5772c9e0b9bd
-data8 0x88ad1f69be2cc730, 0xf7bdc59bc9cfbd97
-data8 0x3fd04cf8ad203480, 0x3caeef44fe21a74a
-data8 0x88f763f70ae2245e, 0xf77a91c868a9c54e
-data8 0x3fd08f23ce0162a0, 0x3cd6290ab3fe5889
-data8 0x89431fc7bc0c2910, 0xf73642973c91298e
-data8 0x3fd0d1610f0c1ec0, 0x3cc67401a01f08cf
-data8 0x8990573407c7738e, 0xf6f0d71d1d7a2dd6
-data8 0x3fd113b0c65d88c0, 0x3cc7aa4020fe546f
-data8 0x89df0eb108594653, 0xf6aa4e6a05cfdef2
-data8 0x3fd156134ada6fe0, 0x3cc87369da09600c
-data8 0x8a2f4ad16e0ed78a, 0xf662a78900c35249
-data8 0x3fd19888f43427a0, 0x3cc62b220f38e49c
-data8 0x8a811046373e0819, 0xf619e180181d97cc
-data8 0x3fd1db121aed7720, 0x3ca3ede7490b52f4
-data8 0x8ad463df6ea0fa2c, 0xf5cffb504190f9a2
-data8 0x3fd21daf185fa360, 0x3caafad98c1d6c1b
-data8 0x8b294a8cf0488daf, 0xf584f3f54b8604e6
-data8 0x3fd2606046bf95a0, 0x3cdb2d704eeb08fa
-data8 0x8b7fc95f35647757, 0xf538ca65c960b582
-data8 0x3fd2a32601231ec0, 0x3cc661619fa2f126
-data8 0x8bd7e588272276f8, 0xf4eb7d92ff39fccb
-data8 0x3fd2e600a3865760, 0x3c8a2a36a99aca4a
-data8 0x8c31a45bf8e9255e, 0xf49d0c68cd09b689
-data8 0x3fd328f08ad12000, 0x3cb9efaf1d7ab552
-data8 0x8c8d0b520a35eb18, 0xf44d75cd993cfad2
-data8 0x3fd36bf614dcc040, 0x3ccacbb590bef70d
-data8 0x8cea2005d068f23d, 0xf3fcb8a23ab4942b
-data8 0x3fd3af11a079a6c0, 0x3cd9775872cf037d
-data8 0x8d48e837c8cd5027, 0xf3aad3c1e2273908
-data8 0x3fd3f2438d754b40, 0x3ca03304f667109a
-data8 0x8da969ce732f3ac7, 0xf357c60202e2fd7e
-data8 0x3fd4358c3ca032e0, 0x3caecf2504ff1a9d
-data8 0x8e0baad75555e361, 0xf3038e323ae9463a
-data8 0x3fd478ec0fd419c0, 0x3cc64bdc3d703971
-data8 0x8e6fb18807ba877e, 0xf2ae2b1c3a6057f7
-data8 0x3fd4bc6369fa40e0, 0x3cbb7122ec245cf2
-data8 0x8ed5843f4bda74d5, 0xf2579b83aa556f0c
-data8 0x3fd4fff2af11e2c0, 0x3c9cfa2dc792d394
-data8 0x8f3d29862c861fef, 0xf1ffde2612ca1909
-data8 0x3fd5439a4436d000, 0x3cc38d46d310526b
-data8 0x8fa6a81128940b2d, 0xf1a6f1bac0075669
-data8 0x3fd5875a8fa83520, 0x3cd8bf59b8153f8a
-data8 0x901206c1686317a6, 0xf14cd4f2a730d480
-data8 0x3fd5cb33f8cf8ac0, 0x3c9502b5c4d0e431
-data8 0x907f4ca5fe9cf739, 0xf0f186784a125726
-data8 0x3fd60f26e847b120, 0x3cc8a1a5e0acaa33
-data8 0x90ee80fd34aeda5e, 0xf09504ef9a212f18
-data8 0x3fd65333c7e43aa0, 0x3cae5b029cb1f26e
-data8 0x915fab35e37421c6, 0xf0374ef5daab5c45
-data8 0x3fd6975b02b8e360, 0x3cd5aa1c280c45e6
-data8 0x91d2d2f0d894d73c, 0xefd86321822dbb51
-data8 0x3fd6db9d05213b20, 0x3cbecf2c093ccd8b
-data8 0x9248000249200009, 0xef7840021aca5a72
-data8 0x3fd71ffa3cc87fc0, 0x3cb8d273f08d00d9
-data8 0x92bf3a7351f081d2, 0xef16e42021d7cbd5
-data8 0x3fd7647318b1ad20, 0x3cbce099d79cdc46
-data8 0x93388a8386725713, 0xeeb44dfce6820283
-data8 0x3fd7a908093fc1e0, 0x3ccb033ec17a30d9
-data8 0x93b3f8aa8e653812, 0xee507c126774fa45
-data8 0x3fd7edb9803e3c20, 0x3cc10aedb48671eb
-data8 0x94318d99d341ade4, 0xedeb6cd32f891afb
-data8 0x3fd83287f0e9cf80, 0x3c994c0c1505cd2a
-data8 0x94b1523e3dedc630, 0xed851eaa3168f43c
-data8 0x3fd87773cff956e0, 0x3cda3b7bce6a6b16
-data8 0x95334fc20577563f, 0xed1d8ffaa2279669
-data8 0x3fd8bc7d93a70440, 0x3cd4922edc792ce2
-data8 0x95b78f8e8f92f274, 0xecb4bf1fd2be72da
-data8 0x3fd901a5b3b9cf40, 0x3cd3fea1b00f9d0d
-data8 0x963e1b4e63a87c3f, 0xec4aaa6d08694cc1
-data8 0x3fd946eca98f2700, 0x3cdba4032d968ff1
-data8 0x96c6fcef314074fc, 0xebdf502d53d65fea
-data8 0x3fd98c52f024e800, 0x3cbe7be1ab8c95c9
-data8 0x97523ea3eab028b2, 0xeb72aea36720793e
-data8 0x3fd9d1d904239860, 0x3cd72d08a6a22b70
-data8 0x97dfeae6f4ee4a9a, 0xeb04c4096a884e94
-data8 0x3fda177f63e8ef00, 0x3cd818c3c1ebfac7
-data8 0x98700c7c6d85d119, 0xea958e90cfe1efd7
-data8 0x3fda5d468f92a540, 0x3cdf45fbfaa080fe
-data8 0x9902ae7487a9caa1, 0xea250c6224aab21a
-data8 0x3fdaa32f090998e0, 0x3cd715a9353cede4
-data8 0x9997dc2e017a9550, 0xe9b33b9ce2bb7638
-data8 0x3fdae939540d3f00, 0x3cc545c014943439
-data8 0x9a2fa158b29b649b, 0xe9401a573f8aa706
-data8 0x3fdb2f65f63f6c60, 0x3cd4a63c2f2ca8e2
-data8 0x9aca09f835466186, 0xe8cba69df9f0bf35
-data8 0x3fdb75b5773075e0, 0x3cda310ce1b217ec
-data8 0x9b672266ab1e0136, 0xe855de74266193d4
-data8 0x3fdbbc28606babc0, 0x3cdc84b75cca6c44
-data8 0x9c06f7579f0b7bd5, 0xe7debfd2f98c060b
-data8 0x3fdc02bf3d843420, 0x3cd225d967ffb922
-data8 0x9ca995db058cabdc, 0xe76648a991511c6e
-data8 0x3fdc497a9c224780, 0x3cde08101c5b825b
-data8 0x9d4f0b605ce71e88, 0xe6ec76dcbc02d9a7
-data8 0x3fdc905b0c10d420, 0x3cb1abbaa3edf120
-data8 0x9df765b9eecad5e6, 0xe6714846bdda7318
-data8 0x3fdcd7611f4b8a00, 0x3cbf6217ae80aadf
-data8 0x9ea2b320350540fe, 0xe5f4bab71494cd6b
-data8 0x3fdd1e8d6a0d56c0, 0x3cb726e048cc235c
-data8 0x9f51023562fc5676, 0xe576cbf239235ecb
-data8 0x3fdd65e082df5260, 0x3cd9e66872bd5250
-data8 0xa002620915c2a2f6, 0xe4f779b15f5ec5a7
-data8 0x3fddad5b02a82420, 0x3c89743b0b57534b
-data8 0xa0b6e21c2caf9992, 0xe476c1a233a7873e
-data8 0x3fddf4fd84bbe160, 0x3cbf7adea9ee3338
-data8 0xa16e9264cc83a6b2, 0xe3f4a16696608191
-data8 0x3fde3cc8a6ec6ee0, 0x3cce46f5a51f49c6
-data8 0xa22983528f3d8d49, 0xe3711694552da8a8
-data8 0x3fde84bd099a6600, 0x3cdc78f6490a2d31
-data8 0xa2e7c5d2e2e69460, 0xe2ec1eb4e1e0a5fb
-data8 0x3fdeccdb4fc685c0, 0x3cdd3aedb56a4825
-data8 0xa3a96b5599bd2532, 0xe265b74506fbe1c9
-data8 0x3fdf15241f23b3e0, 0x3cd440f3c6d65f65
-data8 0xa46e85d1ae49d7de, 0xe1ddddb499b3606f
-data8 0x3fdf5d98202994a0, 0x3cd6c44bd3fb745a
-data8 0xa53727ca3e11b99e, 0xe1548f662951b00d
-data8 0x3fdfa637fe27bf60, 0x3ca8ad1cd33054dd
-data8 0xa6036453bdc20186, 0xe0c9c9aeabe5e481
-data8 0x3fdfef0467599580, 0x3cc0f1ac0685d78a
-data8 0xa6d34f1969dda338, 0xe03d89d5281e4f81
-data8 0x3fe01bff067d6220, 0x3cc0731e8a9ef057
-data8 0xa7a6fc62f7246ff3, 0xdfafcd125c323f54
-data8 0x3fe04092d1ae3b40, 0x3ccabda24b59906d
-data8 0xa87e811a861df9b9, 0xdf20909061bb9760
-data8 0x3fe0653df0fd9fc0, 0x3ce94c8dcc722278
-data8 0xa959f2d2dd687200, 0xde8fd16a4e5f88bd
-data8 0x3fe08a00c1cae320, 0x3ce6b888bb60a274
-data8 0xaa3967cdeea58bda, 0xddfd8cabd1240d22
-data8 0x3fe0aedba3221c00, 0x3ced5941cd486e46
-data8 0xab904fd587263c84, 0xdd1f4472e1cf64ed
-data8 0x3fe0e651e85229c0, 0x3cdb6701042299b1
-data8 0xad686d44dd5a74bb, 0xdbf173e1f6b46e92
-data8 0x3fe1309cbf4cdb20, 0x3cbf1be7bb3f0ec5
-data8 0xaf524e15640ebee4, 0xdabd54896f1029f6
-data8 0x3fe17b4ee1641300, 0x3ce81dd055b792f1
-data8 0xb14eca24ef7db3fa, 0xd982cb9ae2f47e41
-data8 0x3fe1c66b9ffd6660, 0x3cd98ea31eb5ddc7
-data8 0xb35ec807669920ce, 0xd841bd1b8291d0b6
-data8 0x3fe211f66db3a5a0, 0x3ca480c35a27b4a2
-data8 0xb5833e4755e04dd1, 0xd6fa0bd3150b6930
-data8 0x3fe25df2e05b6c40, 0x3ca4bc324287a351
-data8 0xb7bd34c8000b7bd3, 0xd5ab9939a7d23aa1
-data8 0x3fe2aa64b32f7780, 0x3cba67314933077c
-data8 0xba0dc64d126cc135, 0xd4564563ce924481
-data8 0x3fe2f74fc9289ac0, 0x3cec1a1dc0efc5ec
-data8 0xbc76222cbbfa74a6, 0xd2f9eeed501125a8
-data8 0x3fe344b82f859ac0, 0x3ceeef218de413ac
-data8 0xbef78e31985291a9, 0xd19672e2182f78be
-data8 0x3fe392a22087b7e0, 0x3cd2619ba201204c
-data8 0xc19368b2b0629572, 0xd02baca5427e436a
-data8 0x3fe3e11206694520, 0x3cb5d0b3143fe689
-data8 0xc44b2ae8c6733e51, 0xceb975d60b6eae5d
-data8 0x3fe4300c7e945020, 0x3cbd367143da6582
-data8 0xc7206b894212dfef, 0xcd3fa6326ff0ac9a
-data8 0x3fe47f965d201d60, 0x3ce797c7a4ec1d63
-data8 0xca14e1b0622de526, 0xcbbe13773c3c5338
-data8 0x3fe4cfb4b09d1a20, 0x3cedfadb5347143c
-data8 0xcd2a6825eae65f82, 0xca34913d425a5ae9
-data8 0x3fe5206cc637e000, 0x3ce2798b38e54193
-data8 0xd06301095e1351ee, 0xc8a2f0d3679c08c0
-data8 0x3fe571c42e3d0be0, 0x3ccd7cb9c6c2ca68
-data8 0xd3c0d9f50057adda, 0xc70901152d59d16b
-data8 0x3fe5c3c0c108f940, 0x3ceb6c13563180ab
-data8 0xd74650a98cc14789, 0xc5668e3d4cbf8828
-data8 0x3fe61668a46ffa80, 0x3caa9092e9e3c0e5
-data8 0xdaf5f8579dcc8f8f, 0xc3bb61b3eed42d02
-data8 0x3fe669c251ad69e0, 0x3cccf896ef3b4fee
-data8 0xded29f9f9a6171b4, 0xc20741d7f8e8e8af
-data8 0x3fe6bdd49bea05c0, 0x3cdc6b29937c575d
-data8 0xe2df5765854ccdb0, 0xc049f1c2d1b8014b
-data8 0x3fe712a6b76c6e80, 0x3ce1ddc6f2922321
-data8 0xe71f7a9b94fcb4c3, 0xbe833105ec291e91
-data8 0x3fe76840418978a0, 0x3ccda46e85432c3d
-data8 0xeb96b72d3374b91e, 0xbcb2bb61493b28b3
-data8 0x3fe7bea9496d5a40, 0x3ce37b42ec6e17d3
-data8 0xf049183c3f53c39b, 0xbad848720223d3a8
-data8 0x3fe815ea59dab0a0, 0x3cb03ad41bfc415b
-data8 0xf53b11ec7f415f15, 0xb8f38b57c53c9c48
-data8 0x3fe86e0c84010760, 0x3cc03bfcfb17fe1f
-data8 0xfa718f05adbf2c33, 0xb70432500286b185
-data8 0x3fe8c7196b9225c0, 0x3ced99fcc6866ba9
-data8 0xfff200c3f5489608, 0xb509e6454dca33cc
-data8 0x3fe9211b54441080, 0x3cb789cb53515688
-// The following table entries are not used
-//data8 0x82e138a0fac48700, 0xb3044a513a8e6132
-//data8 0x3fe97c1d30f5b7c0, 0x3ce1eb765612d1d0
-//data8 0x85f4cc7fc670d021, 0xb0f2fb2ea6cbbc88
-//data8 0x3fe9d82ab4b5fde0, 0x3ced3fe6f27e8039
-//data8 0x89377c1387d5b908, 0xaed58e9a09014d5c
-//data8 0x3fea355065f87fa0, 0x3cbef481d25f5b58
-//data8 0x8cad7a2c98dec333, 0xacab929ce114d451
-//data8 0x3fea939bb451e2a0, 0x3c8e92b4fbf4560f
-//data8 0x905b7dfc99583025, 0xaa748cc0dbbbc0ec
-//data8 0x3feaf31b11270220, 0x3cdced8c61bd7bd5
-//data8 0x9446d8191f80dd42, 0xa82ff92687235baf
-//data8 0x3feb53de0bcffc20, 0x3cbe1722fb47509e
-//data8 0x98758ba086e4000a, 0xa5dd497a9c184f58
-//data8 0x3febb5f571cb0560, 0x3ce0c7774329a613
-//data8 0x9cee6c7bf18e4e24, 0xa37be3c3cd1de51b
-//data8 0x3fec197373bc7be0, 0x3ce08ebdb55c3177
-//data8 0xa1b944000a1b9440, 0xa10b2101b4f27e03
-//data8 0x3fec7e6bd023da60, 0x3ce5fc5fd4995959
-//data8 0xa6defd8ba04d3e38, 0x9e8a4b93cad088ec
-//data8 0x3fece4f404e29b20, 0x3cea3413401132b5
-//data8 0xac69dd408a10c62d, 0x9bf89d5d17ddae8c
-//data8 0x3fed4d2388f63600, 0x3cd5a7fb0d1d4276
-//data8 0xb265c39cbd80f97a, 0x99553d969fec7beb
-//data8 0x3fedb714101e0a00, 0x3cdbda21f01193f2
-//data8 0xb8e081a16ae4ae73, 0x969f3e3ed2a0516c
-//data8 0x3fee22e1da97bb00, 0x3ce7231177f85f71
-//data8 0xbfea427678945732, 0x93d5990f9ee787af
-//data8 0x3fee90ac13b18220, 0x3ce3c8a5453363a5
-//data8 0xc79611399b8c90c5, 0x90f72bde80febc31
-//data8 0x3fef009542b712e0, 0x3ce218fd79e8cb56
-//data8 0xcffa8425040624d7, 0x8e02b4418574ebed
-//data8 0x3fef72c3d2c57520, 0x3cd32a717f82203f
-//data8 0xd93299cddcf9cf23, 0x8af6ca48e9c44024
-//data8 0x3fefe762b77744c0, 0x3ce53478a6bbcf94
-//data8 0xe35eda760af69ad9, 0x87d1da0d7f45678b
-//data8 0x3ff02f511b223c00, 0x3ced6e11782c28fc
-//data8 0xeea6d733421da0a6, 0x84921bbe64ae029a
-//data8 0x3ff06c5c6f8ce9c0, 0x3ce71fc71c1ffc02
-//data8 0xfb3b2c73fc6195cc, 0x813589ba3a5651b6
-//data8 0x3ff0aaf2613700a0, 0x3cf2a72d2fd94ef3
-//data8 0x84ac1fcec4203245, 0xfb73a828893df19e
-//data8 0x3ff0eb367c3fd600, 0x3cf8054c158610de
-//data8 0x8ca50621110c60e6, 0xf438a14c158d867c
-//data8 0x3ff12d51caa6b580, 0x3ce6bce9748739b6
-//data8 0x95b8c2062d6f8161, 0xecb3ccdd37b369da
-//data8 0x3ff1717418520340, 0x3ca5c2732533177c
-//data8 0xa0262917caab4ad1, 0xe4dde4ddc81fd119
-//data8 0x3ff1b7d59dd40ba0, 0x3cc4c7c98e870ff5
-//data8 0xac402c688b72f3f4, 0xdcae469be46d4c8d
-//data8 0x3ff200b93cc5a540, 0x3c8dd6dc1bfe865a
-//data8 0xba76968b9eabd9ab, 0xd41a8f3df1115f7f
-//data8 0x3ff24c6f8f6affa0, 0x3cf1acb6d2a7eff7
-//data8 0xcb63c87c23a71dc5, 0xcb161074c17f54ec
-//data8 0x3ff29b5b338b7c80, 0x3ce9b5845f6ec746
-//data8 0xdfe323b8653af367, 0xc19107d99ab27e42
-//data8 0x3ff2edf6fac7f5a0, 0x3cf77f961925fa02
-//data8 0xf93746caaba3e1f1, 0xb777744a9df03bff
-//data8 0x3ff344df237486c0, 0x3cf6ddf5f6ddda43
-//data8 0x8ca77052f6c340f0, 0xacaf476f13806648
-//data8 0x3ff3a0dfa4bb4ae0, 0x3cfee01bbd761bff
-//data8 0xa1a48604a81d5c62, 0xa11575d30c0aae50
-//data8 0x3ff4030b73c55360, 0x3cf1cf0e0324d37c
-//data8 0xbe45074b05579024, 0x9478e362a07dd287
-//data8 0x3ff46ce4c738c4e0, 0x3ce3179555367d12
-//data8 0xe7a08b5693d214ec, 0x8690e3575b8a7c3b
-//data8 0x3ff4e0a887c40a80, 0x3cfbd5d46bfefe69
-//data8 0x94503d69396d91c7, 0xedd2ce885ff04028
-//data8 0x3ff561ebd9c18cc0, 0x3cf331bd176b233b
-//data8 0xced1d96c5bb209e6, 0xc965278083808702
-//data8 0x3ff5f71d7ff42c80, 0x3ce3301cc0b5a48c
-//data8 0xabac2cee0fc24e20, 0x9c4eb1136094cbbd
-//data8 0x3ff6ae4c63222720, 0x3cf5ff46874ee51e
-//data8 0x8040201008040201, 0xb4d7ac4d9acb1bf4
-//data8 0x3ff7b7d33b928c40, 0x3cfacdee584023bb
-LOCAL_OBJECT_END(T_table)
+#include "libm_support.h"
 
+// Assembly macros
+//==============================================================
+FR_RESULT = f10
+FR_X = f8
+FR_Y = f1
+asin_P79                   = f32
+asin_P59                   = f33
+asin_P39                   = f34
+asin_P19                   = f35
+
+asin_P810                  = f36
+asin_P610                  = f37
+asin_P410                  = f38
+asin_P210                  = f39
+
+asin_A1                    = f41
+asin_A2                    = f42
+asin_A3                    = f43
+asin_A4                    = f44
+asin_A5                    = f45
+asin_A6                    = f46
+asin_A7                    = f47
+asin_A8                    = f48
+asin_A9                    = f49
+asin_A10                   = f50
+
+asin_X2                    = f51
+asin_X4                    = f52
+
+asin_B                     = f53
+asin_Bb                    = f54
+asin_C                     = f55
+asin_Cc                    = f56
+asin_D                     = f57
+
+asin_W                     = f58
+asin_Ww                    = f59
+
+asin_y0                    = f60
+asin_y1                    = f61
+asin_y2                    = f62
+
+asin_H                     = f63
+asin_Hh                    = f64
+
+asin_t1                    = f65
+asin_t2                    = f66
+asin_t3                    = f67
+asin_t4                    = f68
+asin_t5                    = f69
+
+asin_Pseries               = f70
+asin_NORM_f8               = f71
+asin_ABS_NORM_f8           = f72
+
+asin_2m100                 = f73
+asin_P1P2                  = f74
+asin_HALF                  = f75
+asin_1mD                   = f76
+
+asin_1mB                   = f77
+asin_1mBmC                 = f78 
+asin_S                     = f79
+
+asin_BmWW                  = f80 
+asin_BmWWpb                = f81 
+asin_2W                    = f82 
+asin_1d2W                  = f83 
+asin_Dd                    = f84
+
+asin_XWw                   = f85 
+asin_low                   = f86
+
+asin_pi_by_2               = f87
+asin_pi_by_2_lo            = f88
+
+asin_GR_17_ones            = r33
+asin_GR_16_ones            = r34
+asin_GR_signexp_f8         = r35
+asin_GR_exp                = r36
+asin_GR_true_exp           = r37
+asin_GR_ff9b               = r38 
+
+GR_SAVE_B0              = r39
+GR_SAVE_SP              = r40
+GR_SAVE_PFS             = r33 
+// r33 can be used safely.
+// r40 is address of table of coefficients
+// Later it is used to save sp across calls 
+GR_SAVE_GP              = r41
+asin_GR_fffe               = r42 
+asin_GR_retval             = r43 
+
+GR_Parameter_X                 = r44 
+GR_Parameter_Y                 = r45 
+GR_Parameter_RESULT            = r46 
+GR_Parameter_TAG               = r47 
+
+
+// 2^-40:
+// A true exponent of -40 is
+//                    : -40 + register_bias
+//                    : -28 + ffff = ffd7
+
+// A true exponent of -100 is 
+//                    : -100 + register_bias
+//                    : -64 + ffff = ff9b
+
+// Data tables
+//==============================================================
 
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
 
 .align 16
 
-LOCAL_OBJECT_START(poly_coeffs)
-       // C_3
-data8 0xaaaaaaaaaaaaaaab, 0x0000000000003ffc
-       // C_5
-data8 0x999999999999999a, 0x0000000000003ffb
-       // C_7, C_9
-data8 0x3fa6db6db6db6db7, 0x3f9f1c71c71c71c8
-       // pi/2 (low, high)
-data8 0x3C91A62633145C07, 0x3FF921FB54442D18
-       // C_11, C_13
-data8 0x3f96e8ba2e8ba2e9, 0x3f91c4ec4ec4ec4e
-       // C_15, C_17
-data8 0x3f8c99999999999a, 0x3f87a87878787223
-LOCAL_OBJECT_END(poly_coeffs)
-
-
-R_DBL_S = r21
-R_EXP0 = r22
-R_EXP = r15
-R_SGNMASK = r23
-R_TMP = r24
-R_TMP2 = r25
-R_INDEX = r26
-R_TMP3 = r27
-R_TMP03 = r27
-R_TMP4 = r28
-R_TMP5 = r23
-R_TMP6 = r22
-R_TMP7 = r21
-R_T = r29
-R_BIAS = r20
-
-F_T = f6
-F_1S2 = f7
-F_1S2_S = f9
-F_INV_1T2 = f10
-F_SQRT_1T2 = f11
-F_S2T2 = f12
-F_X = f13
-F_D = f14
-F_2M64 = f15
-
-F_CS2 = f32
-F_CS3 = f33
-F_CS4 = f34
-F_CS5 = f35
-F_CS6 = f36
-F_CS7 = f37
-F_CS8 = f38
-F_CS9 = f39
-F_S23 = f40 
-F_S45 = f41 
-F_S67 = f42 
-F_S89 = f43 
-F_S25 = f44 
-F_S69 = f45 
-F_S29 = f46 
-F_X2 = f47 
-F_X4 = f48 
-F_TSQRT = f49 
-F_DTX = f50 
-F_R = f51 
-F_R2 = f52 
-F_R3 = f53 
-F_R4 = f54 
-
-F_C3 = f55 
-F_C5 = f56 
-F_C7 = f57 
-F_C9 = f58 
-F_P79 = f59 
-F_P35 = f60 
-F_P39 = f61 
-
-F_ATHI = f62 
-F_ATLO = f63 
-
-F_T1 = f64 
-F_Y = f65 
-F_Y2 = f66 
-F_ANDMASK = f67 
-F_ORMASK = f68 
-F_S = f69 
-F_05 = f70 
-F_SQRT_1S2 = f71 
-F_DS = f72 
-F_Z = f73 
-F_1T2 = f74 
-F_DZ = f75 
-F_ZE = f76 
-F_YZ = f77 
-F_Y1S2 = f78 
-F_Y1S2X = f79 
-F_1X = f80 
-F_ST = f81 
-F_1T2_ST = f82 
-F_TSS = f83 
-F_Y1S2X2 = f84 
-F_DZ_TERM = f85 
-F_DTS = f86 
-F_DS2X = f87 
-F_T2 = f88 
-F_ZY1S2S = f89 
-F_Y1S2_1X = f90 
-F_TS = f91
-F_PI2_LO = f92 
-F_PI2_HI = f93 
-F_S19 = f94 
-F_INV1T2_2 = f95 
-F_CORR = f96 
-F_DZ0 = f97 
-
-F_C11 = f98 
-F_C13 = f99 
-F_C15 = f100
-F_C17 = f101
-F_P1113 = f102
-F_P1517 = f103
-F_P1117 = f104
-F_P317 = f105
-F_R8 = f106
-F_HI = f107
-F_1S2_HI = f108
-F_DS2 = f109
-F_Y2_2 = f110
-F_S2 = f111
-F_S_DS2 = f112
-F_S_1S2S = f113
-F_XL = f114
-F_2M128 = f115
-
+asin_coefficients:
+ASM_TYPE_DIRECTIVE(asin_coefficients,@object)
+data8  0xBB08911F2013961E, 0x00003FF8            // A10
+data8  0x981F1095A23A87D3, 0x00003FF8            // A9 
+data8  0xBDF09C6C4177BCC6, 0x00003FF8            // A8 
+data8  0xE4C3A60B049ACCEA, 0x00003FF8            // A7 
+data8  0x8E2789F4E8A8F1AD, 0x00003FF9            // A6 
+data8  0xB745D09B2B0E850B, 0x00003FF9            // A5 
+data8  0xF8E38E3BC4C50920, 0x00003FF9            // A4 
+data8  0xB6DB6DB6D89FCD81, 0x00003FFA            // A3 
+data8  0x99999999999AF376, 0x00003FFB            // A2 
+data8  0xAAAAAAAAAAAAAA71, 0x00003FFC            // A1
+
+data8  0xc90fdaa22168c234, 0x00003FFF            // pi_by_2_hi
+data8  0xc4c6628b80dc1cd1, 0x00003FBF            // pi_by_2_lo
+ASM_SIZE_DIRECTIVE(asin_coefficients)
+
+.align 32
+.global asinl#
 
 .section .text
-GLOBAL_LIBM_ENTRY(asinl)
-
-{.mfi
-       // get exponent, mantissa (rounded to double precision) of s
-       getf.d R_DBL_S = f8
-       // 1-s^2
-       fnma.s1 F_1S2 = f8, f8, f1
-       // r2 = pointer to T_table
-       addl r2 = @ltoff(T_table), gp
-}
-
-{.mfi
-       // sign mask
-       mov R_SGNMASK = 0x20000
-       nop.f 0
-       // bias-63-1
-       mov R_TMP03 = 0xffff-64;;
-}
-
-
-{.mfi
-       // get exponent of s
-       getf.exp R_EXP = f8
-       nop.f 0
-       // R_TMP4 = 2^45
-       shl R_TMP4 = R_SGNMASK, 45-17
-}
-
-{.mlx
-       // load bias-4
-       mov R_TMP = 0xffff-4
-       // load RU(sqrt(2)/2) to integer register (in double format, shifted left by 1)
-       movl R_TMP2 = 0x7fcd413cccfe779a;;
-}
-
-
-{.mfi
-       // load 2^{-64} in FP register
-       setf.exp F_2M64 = R_TMP03
-       nop.f 0
-       // index = (0x7-exponent)|b1 b2.. b6
-       extr.u R_INDEX = R_DBL_S, 46, 9
-}
-
-{.mfi
-       // get t = sign|exponent|b1 b2.. b6 1 x.. x
-       or R_T = R_DBL_S, R_TMP4
-       nop.f 0
-       // R_TMP4 = 2^45-1
-       sub R_TMP4 = R_TMP4, r0, 1;;
-}
-
-
-{.mfi
-       // get t = sign|exponent|b1 b2.. b6 1 0.. 0
-       andcm R_T = R_T, R_TMP4
-       nop.f 0
-       // eliminate sign from R_DBL_S (shift left by 1)
-       shl R_TMP3 = R_DBL_S, 1
-}
-
-{.mfi
-       // R_BIAS = 3*2^6
-       mov R_BIAS = 0xc0
-       nop.f 0
-       // eliminate sign from R_EXP
-       andcm R_EXP0 = R_EXP, R_SGNMASK;;
-}
-
-
-
-{.mfi
-       // load start address for T_table
-       ld8 r2 = [r2]
-       nop.f 0
-       // p8 = 1 if |s|> = sqrt(2)/2
-       cmp.geu p8, p0 = R_TMP3, R_TMP2
-}
-
-{.mlx
-       // p7 = 1 if |s|<2^{-4} (exponent of s<bias-4)
-       cmp.lt p7, p0 = R_EXP0, R_TMP
-       // sqrt coefficient cs8 = -33*13/128
-       movl R_TMP2 = 0xc0568000;;
-}
-
-
-
-{.mbb
-       // load t in FP register
-       setf.d F_T = R_T
-       // if |s|<2^{-4}, take alternate path
- (p7) br.cond.spnt SMALL_S
-       // if |s|> = sqrt(2)/2, take alternate path
- (p8) br.cond.sptk LARGE_S
-}
-
-{.mlx
-       // index = (4-exponent)|b1 b2.. b6
-       sub R_INDEX = R_INDEX, R_BIAS
-       // sqrt coefficient cs9 = 55*13/128
-       movl R_TMP = 0x40b2c000;;
-}
-
-
-{.mfi
-       // sqrt coefficient cs8 = -33*13/128
-       setf.s F_CS8 = R_TMP2
-       nop.f 0
-       // shift R_INDEX by 5
-       shl R_INDEX = R_INDEX, 5
-}
-
-{.mfi
-       // sqrt coefficient cs3 = 0.5 (set exponent = bias-1)
-       mov R_TMP4 = 0xffff - 1
-       nop.f 0
-       // sqrt coefficient cs6 = -21/16
-       mov R_TMP6 = 0xbfa8;;
-}
-
-
-{.mlx
-       // table index
-       add r2 = r2, R_INDEX
-       // sqrt coefficient cs7 = 33/16
-       movl R_TMP2 = 0x40040000;;
-}
-
-
-{.mmi
-       // load cs9 = 55*13/128
-       setf.s F_CS9 = R_TMP
-       // sqrt coefficient cs5 = 7/8
-       mov R_TMP3 = 0x3f60
-       // sqrt coefficient cs6 = 21/16
-       shl R_TMP6 = R_TMP6, 16;;
-}
-
-
-{.mmi
-       // load significand of 1/(1-t^2)
-       ldf8 F_INV_1T2 = [r2], 8
-       // sqrt coefficient cs7 = 33/16
-       setf.s F_CS7 = R_TMP2
-       // sqrt coefficient cs4 = -5/8
-       mov R_TMP5 = 0xbf20;;
-}
-
-
-{.mmi
-       // load significand of sqrt(1-t^2)
-       ldf8 F_SQRT_1T2 = [r2], 8
-       // sqrt coefficient cs6 = 21/16
-       setf.s F_CS6 = R_TMP6
-       // sqrt coefficient cs5 = 7/8
-       shl R_TMP3 = R_TMP3, 16;;
-}
-
-
-{.mmi
-       // sqrt coefficient cs3 = 0.5 (set exponent = bias-1)
-       setf.exp F_CS3 = R_TMP4
-       // r3 = pointer to polynomial coefficients
-       addl r3 = @ltoff(poly_coeffs), gp
-       // sqrt coefficient cs4 = -5/8
-       shl R_TMP5 = R_TMP5, 16;;
-}
-
-
-{.mfi
-       // sqrt coefficient cs5 = 7/8
-       setf.s F_CS5 = R_TMP3
-       // d = s-t
-       fms.s1 F_D = f8, f1, F_T
-       // set p6 = 1 if s<0, p11 = 1 if s> = 0
-       cmp.ge p6, p11 = R_EXP, R_DBL_S
-}
-
-{.mfi
-       // r3 = load start address to polynomial coefficients
-       ld8 r3 = [r3]
-       // s+t
-       fma.s1 F_S2T2 = f8, f1, F_T
-       nop.i 0;;
-}
-
-
-{.mfi
-       // sqrt coefficient cs4 = -5/8
-       setf.s F_CS4 = R_TMP5
-       // s^2-t^2
-       fma.s1 F_S2T2 = F_S2T2, F_D, f0
-       nop.i 0;;
-}
-
-
-{.mfi
-       // load C3
-       ldfe F_C3 = [r3], 16
-       // 0.5/(1-t^2) = 2^{-64}*(2^63/(1-t^2))
-       fma.s1 F_INV_1T2 = F_INV_1T2, F_2M64, f0
-       nop.i 0;;
-}
-
-{.mfi
-       // load C_5
-       ldfe F_C5 = [r3], 16
-       // set correct exponent for sqrt(1-t^2)
-       fma.s1 F_SQRT_1T2 = F_SQRT_1T2, F_2M64, f0
-       nop.i 0;;
-}
-
-
-{.mfi
-       // load C_7, C_9
-       ldfpd F_C7, F_C9 = [r3]
-       // x = -(s^2-t^2)/(1-t^2)/2
-       fnma.s1 F_X = F_INV_1T2, F_S2T2, f0
-       nop.i 0;;
-}
-
-
-{.mfi
-       // load asin(t)_high, asin(t)_low
-       ldfpd F_ATHI, F_ATLO = [r2]
-       // t*sqrt(1-t^2)
-       fma.s1 F_TSQRT = F_T, F_SQRT_1T2, f0
-       nop.i 0;;
-}
-
-
-{.mfi
-       nop.m 0
-       // cs9*x+cs8
-       fma.s1 F_S89 = F_CS9, F_X, F_CS8
-       nop.i 0
-}
-
-{.mfi
-       nop.m 0
-       // cs7*x+cs6
-       fma.s1 F_S67 = F_CS7, F_X, F_CS6
-       nop.i 0;;
-}
-
-{.mfi
-       nop.m 0
-       // cs5*x+cs4
-       fma.s1 F_S45 = F_CS5, F_X, F_CS4
-       nop.i 0
-}
-
-{.mfi
-       nop.m 0
-       // x*x
-       fma.s1 F_X2 = F_X, F_X, f0
-       nop.i 0;;
-}
-
-
-{.mfi
-       nop.m 0
-       // (s-t)-t*x
-       fnma.s1 F_DTX = F_T, F_X, F_D
-       nop.i 0
-}
-
-{.mfi
-       nop.m 0
-       // cs3*x+cs2 (cs2 = -0.5 = -cs3)
-       fms.s1 F_S23 = F_CS3, F_X, F_CS3
-       nop.i 0;;
-}
-
-
-{.mfi
-       nop.m 0
-       // cs9*x^3+cs8*x^2+cs7*x+cs6
-       fma.s1 F_S69 = F_S89, F_X2, F_S67
-       nop.i 0
-}
-
-{.mfi
-       nop.m 0
-       // x^4
-       fma.s1 F_X4 = F_X2, F_X2, f0
-       nop.i 0;;
-}
-
-
-{.mfi
-       nop.m 0
-       // t*sqrt(1-t^2)*x^2
-       fma.s1 F_TSQRT = F_TSQRT, F_X2, f0
-       nop.i 0
-}
-
-{.mfi
-       nop.m 0
-       // cs5*x^3+cs4*x^2+cs3*x+cs2
-       fma.s1 F_S25 = F_S45, F_X2, F_S23
-       nop.i 0;;
-}
-
-
-{.mfi
-       nop.m 0
-       // ((s-t)-t*x)*sqrt(1-t^2)
-       fma.s1 F_DTX = F_DTX, F_SQRT_1T2, f0
-       nop.i 0;;
-}
-
-
-{.mfi
-       nop.m 0
-       // if sign is negative, negate table values: asin(t)_low
- (p6) fnma.s1 F_ATLO = F_ATLO, f1, f0
-       nop.i 0
-}
-
-{.mfi
-       nop.m 0
-       // PS29 = cs9*x^7+..+cs5*x^3+cs4*x^2+cs3*x+cs2
-       fma.s1 F_S29 = F_S69, F_X4, F_S25
-       nop.i 0;;
-}
-
-
-{.mfi
-       nop.m 0
-       // if sign is negative, negate table values: asin(t)_high
- (p6) fnma.s1 F_ATHI = F_ATHI, f1, f0
-       nop.i 0
-}
-
-{.mfi
-       nop.m 0
-       // R = ((s-t)-t*x)*sqrt(1-t^2)-t*sqrt(1-t^2)*x^2*PS29
-       fnma.s1 F_R = F_S29, F_TSQRT, F_DTX
-       nop.i 0;;
-}
-
-
-{.mfi
-       nop.m 0
-       // R^2
-       fma.s1 F_R2 = F_R, F_R, f0
-       nop.i 0;;
-}
-
-
-{.mfi
-       nop.m 0
-       // c7+c9*R^2
-       fma.s1 F_P79 = F_C9, F_R2, F_C7
-       nop.i 0
-}
-
-{.mfi
-       nop.m 0
-       // c3+c5*R^2
-       fma.s1 F_P35 = F_C5, F_R2, F_C3
-       nop.i 0;;
-}
-
-{.mfi
-       nop.m 0
-       // R^3
-       fma.s1 F_R4 = F_R2, F_R2, f0
-       nop.i 0;;
-}
-
-{.mfi
-       nop.m 0
-       // R^3
-       fma.s1 F_R3 = F_R2, F_R, f0
-       nop.i 0;;
-}
-
-
-
-{.mfi
-       nop.m 0
-       // c3+c5*R^2+c7*R^4+c9*R^6
-       fma.s1 F_P39 = F_P79, F_R4, F_P35
-       nop.i 0;;
-}
-
-
-{.mfi
-       nop.m 0
-       // asin(t)_low+R^3*(c3+c5*R^2+c7*R^4+c9*R^6)
-       fma.s1 F_P39 = F_P39, F_R3, F_ATLO
-       nop.i 0;;
-}
-
-
-{.mfi
-       nop.m 0
-       // R+asin(t)_low+R^3*(c3+c5*R^2+c7*R^4+c9*R^6)
-       fma.s1 F_P39 = F_P39, f1, F_R
-       nop.i 0;;
-}
-
-
-{.mfb
-       nop.m 0
-       // result = asin(t)_high+R+asin(t)_low+R^3*(c3+c5*R^2+c7*R^4+c9*R^6)
-       fma.s0 f8 = F_ATHI, f1, F_P39
-       // return
-       br.ret.sptk b0;;
-}
-
-
-
-
-LARGE_S:
-
-{.mfi
-       // bias-1
-       mov R_TMP3 = 0xffff - 1
-       // y ~ 1/sqrt(1-s^2)
-       frsqrta.s1 F_Y, p7 = F_1S2
-       // c9 = 55*13*17/128
-       mov R_TMP4 = 0x10af7b
-}
-
-{.mlx
-       // c8 = -33*13*15/128
-       mov R_TMP5 = 0x184923
-       movl R_TMP2 = 0xff00000000000000;;
-}
-
-{.mfi
-       // set p6 = 1 if s<0, p11 = 1 if s>0
-       cmp.ge p6, p11 = R_EXP, R_DBL_S
-       // 1-s^2
-       fnma.s1 F_1S2 = f8, f8, f1
-       // set p9 = 1
-       cmp.eq p9, p0 = r0, r0;;
-}
-
-
-{.mfi
-       // load 0.5
-       setf.exp F_05 = R_TMP3
-       // (1-s^2) rounded to single precision
-       fnma.s.s1 F_1S2_S = f8, f8, f1
-       // c9 = 55*13*17/128
-       shl R_TMP4 = R_TMP4, 10
-}
-
-{.mlx
-       // AND mask for getting t ~ sqrt(1-s^2)
-       setf.sig F_ANDMASK = R_TMP2
-       // OR mask
-       movl R_TMP2 = 0x0100000000000000;;
-}
-
-
-{.mfi
-       nop.m 0
-       // (s^2)_s
-       fma.s.s1 F_S2 = f8, f8, f0
-       nop.i 0;;
-}
-
-
-{.mmi
-       // c9 = 55*13*17/128
-       setf.s F_CS9 = R_TMP4
-       // c7 = 33*13/16
-       mov R_TMP4 = 0x41d68
-       // c8 = -33*13*15/128
-       shl R_TMP5 = R_TMP5, 11;;
-}
-
-
-{.mfi
-       setf.sig F_ORMASK = R_TMP2
-       // y^2
-       fma.s1 F_Y2 = F_Y, F_Y, f0
-       // c7 = 33*13/16
-       shl R_TMP4 = R_TMP4, 12
-}
-
-{.mfi
-       // c6 = -33*7/16
-       mov R_TMP6 = 0xc1670
-       // y' ~ sqrt(1-s^2)
-       fma.s1 F_T1 = F_Y, F_1S2, f0
-       // c5 = 63/8
-       mov R_TMP7 = 0x40fc;;
-}
-
-
-{.mlx
-       // load c8 = -33*13*15/128
-       setf.s F_CS8 = R_TMP5
-       // c4 = -35/8
-       movl R_TMP5 = 0xc08c0000;;
-}
-
-{.mfi
-       // r3 = pointer to polynomial coefficients
-       addl r3 = @ltoff(poly_coeffs), gp
-       // 1-(1-s^2)_s
-       fnma.s1 F_DS = F_1S2_S, f1, f1
-       // p9 = 0 if p7 = 1 (p9 = 1 for special cases only)
- (p7) cmp.ne p9, p0 = r0, r0
-}
-
-{.mlx
-       // load c7 = 33*13/16
-       setf.s F_CS7 = R_TMP4
-       // c3 = 5/2
-       movl R_TMP4 = 0x40200000;;
-}
-
-
-{.mfi
-       nop.m 0
-       // 1-(s^2)_s
-       fnma.s1 F_S_1S2S = F_S2, f1, f1
-       nop.i 0
-}
-
-{.mlx
-       // load c4 = -35/8
-       setf.s F_CS4 = R_TMP5
-       // c2 = -3/2
-       movl R_TMP5 = 0xbfc00000;;
-}
-
-
-{.mfi
-       // load c3 = 5/2
-       setf.s F_CS3 = R_TMP4
-       // x = (1-s^2)_s*y^2-1
-       fms.s1 F_X = F_1S2_S, F_Y2, f1
-       // c6 = -33*7/16
-       shl R_TMP6 = R_TMP6, 12
-}
-
-{.mfi
-       nop.m 0
-       // y^2/2
-       fma.s1 F_Y2_2 = F_Y2, F_05, f0
-       nop.i 0;;
-}
-
-
-{.mfi
-       // load c6 = -33*7/16
-       setf.s F_CS6 = R_TMP6
-       // eliminate lower bits from y'
-       fand F_T = F_T1, F_ANDMASK
-       // c5 = 63/8
-       shl R_TMP7 = R_TMP7, 16
-}
-
-{.mfb
-       // r3 = load start address to polynomial coefficients
-       ld8 r3 = [r3]
-       // 1-(1-s^2)_s-s^2
-       fnma.s1 F_DS = f8, f8, F_DS
-       // p9 = 1 if s is a special input (NaN, or |s|> = 1)
- (p9) br.cond.spnt ASINL_SPECIAL_CASES;;
-}
-
-{.mmf
-       // get exponent, significand of y' (in single prec.)
-       getf.s R_TMP = F_T1
-       // load c3 = -3/2
-       setf.s F_CS2 = R_TMP5
-       // y*(1-s^2)
-       fma.s1 F_Y1S2 = F_Y, F_1S2, f0;;
-}
-
-
-{.mfi
-       nop.m 0
-       // x' = (y^2/2)*(1-(s^2)_s)-0.5
-       fms.s1 F_XL = F_Y2_2, F_S_1S2S, F_05
-       nop.i 0
-}
-
-{.mfi
-       nop.m 0
-       // s^2-(s^2)_s
-       fms.s1 F_S_DS2 = f8, f8, F_S2
-       nop.i 0;;
-}
-
-
-{.mfi
-       nop.m 0
-       // if s<0, set s = -s
- (p6) fnma.s1 f8 = f8, f1, f0
-       nop.i 0;;
-}
-
-{.mfi
-       // load c5 = 63/8
-       setf.s F_CS5 = R_TMP7
-       // x = (1-s^2)_s*y^2-1+(1-(1-s^2)_s-s^2)*y^2
-       fma.s1 F_X = F_DS, F_Y2, F_X
-       // for t = 2^k*1.b1 b2.., get 7-k|b1.. b6
-       extr.u R_INDEX = R_TMP, 17, 9;;
-}
-
-
-{.mmi
-       // index = (4-exponent)|b1 b2.. b6
-       sub R_INDEX = R_INDEX, R_BIAS
-       nop.m 0
-       // get exponent of y
-       shr.u R_TMP2 = R_TMP, 23;;
-}
-
-{.mmi
-       // load C3
-       ldfe F_C3 = [r3], 16
-       // set p8 = 1 if y'<2^{-4}
-       cmp.gt p8, p0 = 0x7b, R_TMP2
-       // shift R_INDEX by 5
-       shl R_INDEX = R_INDEX, 5;;
-}
-
-
-{.mfb
-       // get table index for sqrt(1-t^2)
-       add r2 = r2, R_INDEX
-       // get t = 2^k*1.b1 b2.. b7 1
-       for F_T = F_T, F_ORMASK
- (p8) br.cond.spnt VERY_LARGE_INPUT;;
-}
-
-
-
-{.mmf
-       // load C5
-       ldfe F_C5 = [r3], 16
-       // load 1/(1-t^2)
-       ldfp8 F_INV_1T2, F_SQRT_1T2 = [r2], 16
-       // x = ((1-s^2)*y^2-1)/2
-       fma.s1 F_X = F_X, F_05, f0;;
-}
-
-
-
-{.mmf
-       nop.m 0
-       // C7, C9
-       ldfpd F_C7, F_C9 = [r3], 16
-       // set correct exponent for t
-       fmerge.se F_T = F_T1, F_T;;
-}
-
-
-
-{.mfi
-       // pi/2 (low, high)
-       ldfpd F_PI2_LO, F_PI2_HI = [r3]
-       // c9*x+c8
-       fma.s1 F_S89 = F_X, F_CS9, F_CS8
-       nop.i 0
-}
-
-{.mfi
-       nop.m 0
-       // x^2
-       fma.s1 F_X2 = F_X, F_X, f0
-       nop.i 0;;
-}
-
-
-{.mfi
-       nop.m 0
-       // y*(1-s^2)*x
-       fma.s1 F_Y1S2X = F_Y1S2, F_X, f0
-       nop.i 0
-}
-
-{.mfi
-       nop.m 0
-       // c7*x+c6
-       fma.s1 F_S67 = F_X, F_CS7, F_CS6
-       nop.i 0;;
-}
-
-
-{.mfi
-       nop.m 0
-       // 1-x
-       fnma.s1 F_1X = F_X, f1, f1
-       nop.i 0
-}
-
-{.mfi
-       nop.m 0
-       // c3*x+c2
-       fma.s1 F_S23 = F_X, F_CS3, F_CS2
-       nop.i 0;;
-}
-
-
-{.mfi
-       nop.m 0
-       // 1-t^2
-       fnma.s1 F_1T2 = F_T, F_T, f1
-       nop.i 0
-}
-
-{.mfi
-       // load asin(t)_high, asin(t)_low
-       ldfpd F_ATHI, F_ATLO = [r2]
-       // c5*x+c4
-       fma.s1 F_S45 = F_X, F_CS5, F_CS4
-       nop.i 0;;
-}
-
-
-
-{.mfi
-       nop.m 0
-       // t*s
-       fma.s1 F_TS = F_T, f8, f0
-       nop.i 0
-}
-
-{.mfi
-       nop.m 0
-       // 0.5/(1-t^2)
-       fma.s1 F_INV_1T2 = F_INV_1T2, F_2M64, f0
-       nop.i 0;;
-}
-
-{.mfi
-       nop.m 0
-       // z~sqrt(1-t^2), rounded to 24 significant bits
-       fma.s.s1 F_Z = F_SQRT_1T2, F_2M64, f0
-       nop.i 0
-}
-
-{.mfi
-       nop.m 0
-       // sqrt(1-t^2)
-       fma.s1 F_SQRT_1T2 = F_SQRT_1T2, F_2M64, f0
-       nop.i 0;;
-}
-
-
-{.mfi
-       nop.m 0
-       // y*(1-s^2)*x^2
-       fma.s1 F_Y1S2X2 = F_Y1S2, F_X2, f0
-       nop.i 0
-}
-
-{.mfi
-       nop.m 0
-       // x^4
-       fma.s1 F_X4 = F_X2, F_X2, f0
-       nop.i 0;;
-}
-
-
-{.mfi
-       nop.m 0
-       // s*t rounded to 24 significant bits
-       fma.s.s1 F_TSS = F_T, f8, f0
-       nop.i 0
-}
-
-{.mfi
-       nop.m 0
-       // c9*x^3+..+c6
-       fma.s1 F_S69 = F_X2, F_S89, F_S67
-       nop.i 0;;
-}
-
-
-{.mfi
-       nop.m 0
-       // ST = (t^2-1+s^2) rounded to 24 significant bits
-       fms.s.s1 F_ST = f8, f8, F_1T2
-       nop.i 0
-}
-
-{.mfi
-       nop.m 0
-       // c5*x^3+..+c2
-       fma.s1 F_S25 = F_X2, F_S45, F_S23
-       nop.i 0;;
-}
-
-
-{.mfi
-       nop.m 0
-       // 0.25/(1-t^2)
-       fma.s1 F_INV1T2_2 = F_05, F_INV_1T2, f0
-       nop.i 0
-}
-
-{.mfi
-       nop.m 0
-       // t*s-sqrt(1-t^2)*(1-s^2)*y
-       fnma.s1 F_TS = F_Y1S2, F_SQRT_1T2, F_TS
-       nop.i 0;;
-}
+.proc  asinl#
+.align 32
 
 
-{.mfi
-       nop.m 0
-       // z*0.5/(1-t^2)
-       fma.s1 F_ZE = F_INV_1T2, F_SQRT_1T2, f0
-       nop.i 0
-}
-
-{.mfi
-       nop.m 0
-       // z^2+t^2-1
-       fms.s1 F_DZ0 = F_Z, F_Z, F_1T2
-       nop.i 0;;
-}
-
-
-{.mfi
-       nop.m 0
-       // (1-s^2-(1-s^2)_s)*x
-       fma.s1 F_DS2X = F_X, F_DS, f0
-       nop.i 0;;
-}
-
-
-{.mfi
-       nop.m 0
-       // t*s-(t*s)_s
-       fms.s1 F_DTS = F_T, f8, F_TSS
-       nop.i 0
-}
-
-{.mfi
-       nop.m 0
-       // c9*x^7+..+c2
-       fma.s1 F_S29 = F_X4, F_S69, F_S25
-       nop.i 0;;
-}
-
-
-{.mfi
-       nop.m 0
-       // y*z
-       fma.s1 F_YZ = F_Z, F_Y, f0
-       nop.i 0
-}
-
-{.mfi
-       nop.m 0
-       // t^2
-       fma.s1 F_T2 = F_T, F_T, f0
-       nop.i 0;;
-}
-
-
-{.mfi
-       nop.m 0
-       // 1-t^2+ST
-       fma.s1 F_1T2_ST = F_ST, f1, F_1T2
-       nop.i 0;;
-}
-
-
-{.mfi
-       nop.m 0
-       // y*(1-s^2)(1-x)
-       fma.s1 F_Y1S2_1X = F_Y1S2, F_1X, f0
-       nop.i 0
-}
-
-{.mfi
-       nop.m 0
-       // dz ~ sqrt(1-t^2)-z
-       fma.s1 F_DZ = F_DZ0, F_ZE, f0
-       nop.i 0;;
-}
-
-
-{.mfi
-       nop.m 0
-       // -1+correction for sqrt(1-t^2)-z
-       fnma.s1 F_CORR = F_INV1T2_2, F_DZ0, f0
-       nop.i 0;;
-}
-
-
-{.mfi
-       nop.m 0
-       // (PS29*x^2+x)*y*(1-s^2)
-       fma.s1 F_S19 = F_Y1S2X2, F_S29, F_Y1S2X
-       nop.i 0;;
-}
-
-
-{.mfi
-       nop.m 0
-       // z*y*(1-s^2)_s
-       fma.s1 F_ZY1S2S = F_YZ, F_1S2_S, f0
-       nop.i 0
-}
-
-{.mfi
-       nop.m 0
-       // s^2-(1-t^2+ST)
-       fms.s1 F_1T2_ST = f8, f8, F_1T2_ST
-       nop.i 0;;
-}
-
-
-{.mfi
-       nop.m 0
-       // (t*s-(t*s)_s)+z*y*(1-s^2-(1-s^2)_s)*x
-       fma.s1 F_DTS = F_YZ, F_DS2X, F_DTS
-       nop.i 0
-}
-
-{.mfi
-       nop.m 0
-       // dz*y*(1-s^2)*(1-x)
-       fma.s1 F_DZ_TERM = F_DZ, F_Y1S2_1X, f0
-       nop.i 0;;
-}
-
-
-{.mfi
-       nop.m 0
-       // R = t*s-sqrt(1-t^2)*(1-s^2)*y+sqrt(1-t^2)*(1-s^2)*y*PS19
-       // (used for polynomial evaluation)
-       fma.s1 F_R = F_S19, F_SQRT_1T2, F_TS
-       nop.i 0;;
-}
-
-
-{.mfi
-       nop.m 0
-       // (PS29*x^2)*y*(1-s^2)
-       fma.s1 F_S29 = F_Y1S2X2, F_S29, f0
-       nop.i 0
-}
-
-{.mfi
-       nop.m 0
-       // apply correction to dz*y*(1-s^2)*(1-x)
-       fma.s1 F_DZ_TERM = F_DZ_TERM, F_CORR, F_DZ_TERM
-       nop.i 0;;
-}
-
-
-{.mfi
-       nop.m 0
-       // R^2
-       fma.s1 F_R2 = F_R, F_R, f0
-       nop.i 0;;
-}
+asinl: 
 
-
-{.mfi
-       nop.m 0
-       // (t*s-(t*s)_s)+z*y*(1-s^2-(1-s^2)_s)*x+dz*y*(1-s^2)*(1-x)
-       fma.s1 F_DZ_TERM = F_DZ_TERM, f1, F_DTS
-       nop.i 0;;
-}
-
-
-{.mfi
-       nop.m 0
-       // c7+c9*R^2
-       fma.s1 F_P79 = F_C9, F_R2, F_C7
-       nop.i 0
-}
-
-{.mfi
-       nop.m 0
-       // c3+c5*R^2
-       fma.s1 F_P35 = F_C5, F_R2, F_C3
-       nop.i 0;;
-}
-
-{.mfi
-       nop.m 0
-       // asin(t)_low-(pi/2)_low
-       fms.s1 F_ATLO = F_ATLO, f1, F_PI2_LO
-       nop.i 0
-}
-
-{.mfi
-       nop.m 0
-       // R^4
-       fma.s1 F_R4 = F_R2, F_R2, f0
-       nop.i 0;;
-}
-
-{.mfi
-       nop.m 0
-       // R^3
-       fma.s1 F_R3 = F_R2, F_R, f0
-       nop.i 0;;
-}
-
-
-{.mfi
-       nop.m 0
-       // (t*s)_s-t^2*y*z
-       fnma.s1 F_TSS = F_T2, F_YZ, F_TSS
-       nop.i 0
-}
-
-{.mfi
-       nop.m 0
-       // d(ts)+z*y*d(1-s^2)*x+dz*y*(1-s^2)*(1-x)+z*y*(s^2-1+t^2-ST)
-       fma.s1 F_DZ_TERM = F_YZ, F_1T2_ST, F_DZ_TERM
-       nop.i 0;;
-}
-
-
-{.mfi
-       nop.m 0
-       // (pi/2)_hi-asin(t)_hi
-       fms.s1 F_ATHI = F_PI2_HI, f1, F_ATHI
-       nop.i 0
+{ .mfi
+      alloc r32 = ar.pfs,1,11,4,0                        
+(p0)  fnorm      asin_NORM_f8 = f8                       
+(p0)  mov        asin_GR_17_ones = 0x1ffff               
 }
 
-{.mfi
-       nop.m 0
-       // c3+c5*R^2+c7*R^4+c9*R^6
-       fma.s1 F_P39 = F_P79, F_R4, F_P35
-       nop.i 0;;
+{ .mii
+(p0)  mov        asin_GR_16_ones = 0xffff                
+(p0)  mov        asin_GR_ff9b = 0xff9b ;;                   
+      nop.i 999
 }
 
 
-{.mfi
-       nop.m 0
-       // d(ts)+z*y*d(1-s^2)*x+dz*y*(1-s^2)*(1-x)+z*y*(s^2-1+t^2-ST)+
-       // + sqrt(1-t^2)*y*(1-s^2)*x^2*PS29
-       fma.s1 F_DZ_TERM = F_SQRT_1T2, F_S29, F_DZ_TERM
-       nop.i 0;;
+{ .mmi
+(p0)  setf.exp  asin_2m100 = asin_GR_ff9b                                      
+(p0)  addl           r40   = @ltoff(asin_coefficients), gp
+      nop.i 999
 }
+;;
 
-
-{.mfi
-       nop.m 0
-       // (t*s)_s-t^2*y*z+z*y*ST
-       fma.s1 F_TSS = F_YZ, F_ST, F_TSS
-       nop.i 0
+{ .mmi
+      ld8 r40 = [r40]
+      nop.m 999
+      nop.i 999
 }
+;;
 
-{.mfi
-       nop.m 0
-       // -asin(t)_low+R^3*(c3+c5*R^2+c7*R^4+c9*R^6)
-       fms.s1 F_P39 = F_P39, F_R3, F_ATLO
-       nop.i 0;;
-}
 
 
-{.mfi
-       nop.m 0
-       // if s<0, change sign of F_ATHI
- (p6) fnma.s1 F_ATHI = F_ATHI, f1, f0
-       nop.i 0
-}
+// Load the constants
 
-{.mfi
-       nop.m 0
-       // d(ts)+z*y*d(1-s^2)*x+dz*y*(1-s^2)*(1-x)+z*y*(s^2-1+t^2-ST) +
-       // + sqrt(1-t^2)*y*(1-s^2)*x^2*PS29 +
-       // - asin(t)_low+R^3*(c3+c5*R^2+c7*R^4+c9*R^6)
-       fma.s1 F_DZ_TERM = F_P39, f1, F_DZ_TERM
-       nop.i 0;;
+{ .mmi
+(p0) ldfe       asin_A10 = [r40],16 ;;      
+(p0) ldfe       asin_A9  = [r40],16      
+      nop.i 999 ;;
 }
 
-
-{.mfi
-       nop.m 0
-       // d(ts)+z*y*d(1-s^2)*x+dz*y*(1-s^2)*(1-x)+z*y*(s^2-1+t^2-ST) +
-       // + sqrt(1-t^2)*y*(1-s^2)*x^2*PS29 + z*y*(1-s^2)_s*x +
-       // - asin(t)_low+R^3*(c3+c5*R^2+c7*R^4+c9*R^6)
-       fma.s1 F_DZ_TERM = F_ZY1S2S, F_X, F_DZ_TERM
-       nop.i 0;;
+{ .mmi
+(p0) ldfe       asin_A8  = [r40],16 ;;      
+(p0) ldfe       asin_A7  = [r40],16      
+      nop.i 999 ;;
 }
 
-
-{.mfi
-       nop.m 0
-       // d(ts)+z*y*d(1-s^2)*x+dz*y*(1-s^2)*(1-x)+z*y*(s^2-1+t^2-ST) +
-       // + sqrt(1-t^2)*y*(1-s^2)*x^2*PS29 + z*y*(1-s^2)_s*x +
-       // - asin(t)_low+R^3*(c3+c5*R^2+c7*R^4+c9*R^6) +
-       // + (t*s)_s-t^2*y*z+z*y*ST
-       fma.s1 F_DZ_TERM = F_TSS, f1, F_DZ_TERM
-       nop.i 0;;
+{ .mmi
+(p0) ldfe       asin_A6  = [r40],16 ;;      
+(p0)  getf.exp   asin_GR_signexp_f8  = asin_NORM_f8                            
+      nop.i 999
 }
 
-
-.pred.rel "mutex", p6, p11
-{.mfi
-       nop.m 0
-       // result: add high part of pi/2-table value
-       // s>0 in this case
- (p11) fma.s0 f8 = F_DZ_TERM, f1, F_ATHI
-       nop.i 0
+{ .mmi
+(p0) ldfe       asin_A5  = [r40],16 ;;      
+(p0) ldfe       asin_A4  = [r40],16      
+      nop.i 999 ;;
 }
 
-{.mfb
-       nop.m 0
-       // result: add high part of pi/2-table value
-       // if s<0
- (p6) fnma.s0 f8 = F_DZ_TERM, f1, F_ATHI
-       br.ret.sptk b0;;
+{ .mfi
+      nop.m 999
+(p0) fmerge.s   asin_ABS_NORM_f8 = f0, asin_NORM_f8            
+(p0)  and        asin_GR_exp         = asin_GR_signexp_f8, asin_GR_17_ones ;;     
 }
 
+// case 1: |x| < 2^-40         ==> p6 (includes x = +-0)
+// case 2: 2^-40 <= |x| < 2^-2 ==> p8
+// case 3: 2^-2  <= |x| < 1    ==> p9
+// case 4: 1  <= |x|           ==> p11
+//   In case 4, we pick up the special case x = +-1 and return +-pi/2
 
-
-
-
-
-SMALL_S:
-
-       // use 15-term polynomial approximation
-
-{.mmi
-       // r3 = pointer to polynomial coefficients
-       addl r3 = @ltoff(poly_coeffs), gp;;
-       // load start address for coefficients
-       ld8 r3 = [r3]
-       mov R_TMP = 0x3fbf;;
+{ .mii
+(p0) ldfe       asin_A3  = [r40],16      
+(p0)  sub        asin_GR_true_exp    = asin_GR_exp, asin_GR_16_ones ;;            
+(p0)  cmp.ge.unc p6, p7    = -41, asin_GR_true_exp ;;             
 }
 
-
-{.mmi
-       add r2 = 64, r3
-       ldfe F_C3 = [r3], 16
-       // p7 = 1 if |s|<2^{-64} (exponent of s<bias-64)
-       cmp.lt p7, p0 = R_EXP0, R_TMP;;
+{ .mii
+(p0) ldfe       asin_A2  = [r40],16      
+(p7)  cmp.ge.unc p8, p9    = -3,  asin_GR_true_exp ;;             
+(p9)  cmp.ge.unc p10, p11  = -1,  asin_GR_true_exp              
 }
 
-{.mmf
-       ldfe F_C5 = [r3], 16
-       ldfpd F_C11, F_C13 = [r2], 16
-	   // 2^{-128}
-       fma.s1 F_2M128 = F_2M64, F_2M64, f0;;
+{ .mmi
+(p0) ldfe       asin_A1  = [r40],16 ;;      
+(p0) ldfe       asin_pi_by_2  = [r40],16 
+      nop.i 999
 }
 
-{.mmf
-       ldfpd F_C7, F_C9 = [r3]
-       ldfpd F_C15, F_C17 = [r2]
-       // if |s|<2^{-64}, return s+2^{-128}*s
- (p7) fma.s0 f8 = f8, F_2M128, f8;;
+// case 4: |x| >= 1
+{ .mib
+      nop.m 999
+      nop.i 999
+(p11) br.spnt         L(ASIN_ERROR_RETURN) ;;                         
 }
 
-
-
-{.mfb
-       nop.m 0
-       // s^2
-       fma.s1 F_R2 = f8, f8, f0
-       // if |s|<2^{-64}, return s
- (p7) br.ret.spnt b0;;
+// case 1: |x| < 2^-40
+{ .mfb
+      nop.m 999
+(p6)  fma.s0         f8 = asin_2m100,f8,f8                       
+(p6)  br.ret.spnt   b0 ;;                                          
 }
 
 
-{.mfi
-       nop.m 0
-       // s^3
-       fma.s1 F_R3 = f8, F_R2, f0
-       nop.i 0
-}
-
-{.mfi
-       nop.m 0
-       // s^4
-       fma.s1 F_R4 = F_R2, F_R2, f0
-       nop.i 0;;
+// case 2: 2^-40 <= |x| < 2^-2 ==> p8
+{ .mfi
+      nop.m 999
+(p8)  fma.s1        asin_X2   = f8,f8, f0                       
+      nop.i 999 ;;
 }
 
-
-{.mfi
-       nop.m 0
-       // c3+c5*s^2
-       fma.s1 F_P35 = F_C5, F_R2, F_C3
-       nop.i 0
+{ .mfi
+      nop.m 999
+(p8)  fma.s1        asin_X4   = asin_X2,asin_X2, f0             
+      nop.i 999 ;;
 }
 
-{.mfi
-       nop.m 0
-       // c11+c13*s^2
-       fma.s1 F_P1113 = F_C13, F_R2, F_C11
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p8)  fma.s1        asin_P810 = asin_X4, asin_A10, asin_A8      
+      nop.i 999
 }
 
-
-{.mfi
-       nop.m 0
-       // c7+c9*s^2
-       fma.s1 F_P79 = F_C9, F_R2, F_C7
-       nop.i 0
+{ .mfi
+      nop.m 999
+(p8)  fma.s1        asin_P79  = asin_X4, asin_A9, asin_A7       
+      nop.i 999 ;;
 }
 
-{.mfi
-       nop.m 0
-       // c15+c17*s^2
-       fma.s1 F_P1517 = F_C17, F_R2, F_C15
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p8)  fma.s1        asin_P610 = asin_X4, asin_P810, asin_A6     
+      nop.i 999
 }
 
-
-{.mfi
-       nop.m 0
-       // s^8
-       fma.s1 F_R8 = F_R4, F_R4, f0
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p8)  fma.s1        asin_P59  = asin_X4, asin_P79, asin_A5      
+      nop.i 999 ;;
 }
 
-
-{.mfi
-       nop.m 0
-       // c3+c5*s^2+c7*s^4+c9*s^6
-       fma.s1 F_P39 = F_P79, F_R4, F_P35
-       nop.i 0
+{ .mfi
+      nop.m 999
+(p8)  fma.s1        asin_P410 = asin_X4, asin_P610, asin_A4     
+      nop.i 999
 }
 
-{.mfi
-       nop.m 0
-       // c11+c13*s^2+c15*s^4+c17*s^6
-       fma.s1 F_P1117 = F_P1517, F_R4, F_P1113
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p8)  fma.s1        asin_P39  = asin_X4, asin_P59, asin_A3      
+      nop.i 999 ;;
 }
 
-
-{.mfi
-       nop.m 0
-       // c3+..+c17*s^14
-       fma.s1 F_P317 = F_R8, F_P1117, F_P39
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p8)  fma.s1        asin_P210 = asin_X4, asin_P410, asin_A2     
+      nop.i 999
 }
 
-
-{.mfb
-       nop.m 0
-       // result
-       fma.s0 f8 = F_P317, F_R3, f8
-       br.ret.sptk b0;;
+{ .mfi
+      nop.m 999
+(p8)  fma.s1        asin_P19  = asin_X4, asin_P39, asin_A1      
+      nop.i 999 ;;
 }
 
-
-{.mfb
-       nop.m 0
-       fma.s0 f8 = F_P317, F_R3, f0//F_P317, F_R3, F_S29
-       // nop.f 0//fma.s0 f8 = f13, f6, f0
-       br.ret.sptk b0;;
+{ .mfi
+      nop.m 999
+(p8)  fma.s1        asin_P1P2    = asin_X2, asin_P210, asin_P19 
+      nop.i 999 ;;
 }
 
-
-
-
-
-       VERY_LARGE_INPUT:
-
-{.mfi
-       nop.m 0
-       // s rounded to 24 significant bits
-       fma.s.s1 F_S = f8, f1, f0
-       nop.i 0
+{ .mfi
+      nop.m 999
+(p8)  fma.s1        asin_P1P2    = asin_X2, asin_P1P2, f0       
+      nop.i 999 ;;
 }
 
-{.mfi
-       // load C5
-       ldfe F_C5 = [r3], 16
-       // x = ((1-(s^2)_s)*y^2-1)/2-(s^2-(s^2)_s)*y^2/2
-       fnma.s1 F_X = F_S_DS2, F_Y2_2, F_XL
-       nop.i 0;;
+{ .mfb
+      nop.m 999
+(p8)  fma.s0        f8 = asin_NORM_f8, asin_P1P2, asin_NORM_f8  
+(p8)  br.ret.spnt   b0 ;;                                          
 }
 
+// case 3: 2^-2  <= |x| < 1    
+// 1- X*X is computed as B + b
+// Step 1.1:     Get B and b
 
-
-{.mmf
-       nop.m 0
-       // C7, C9
-       ldfpd F_C7, F_C9 = [r3], 16
-       nop.f 0;;
-}
+// atan2 will return
+//   f8  = Z_hi
+//   f10 = Z_lo
+//   f11 = s_lo
 
 
+{ .mfi
+(p0)  mov            asin_GR_fffe = 0xfffe                      
+(p0)   fmerge.se f8 = asin_ABS_NORM_f8, asin_ABS_NORM_f8                                   
+nop.i 0
+};;
 
-{.mfi
-       // pi/2 (low, high)
-       ldfpd F_PI2_LO, F_PI2_HI = [r3], 16
-       // c9*x+c8
-       fma.s1 F_S89 = F_X, F_CS9, F_CS8
-       nop.i 0
-}
-
-{.mfi
-       nop.m 0
-       // x^2
-       fma.s1 F_X2 = F_X, F_X, f0
-       nop.i 0;;
+{ .mmf
+nop.m 0
+(p0)   setf.exp       asin_HALF = asin_GR_fffe                   
+(p0)   fmerge.se f12 = asin_NORM_f8, asin_NORM_f8 ;;                         
 }
 
 
-{.mfi
-       nop.m 0
-       // y*(1-s^2)*x
-       fma.s1 F_Y1S2X = F_Y1S2, F_X, f0
-       nop.i 0
-}
-
-{.mfi
-       // C11, C13
-       ldfpd F_C11, F_C13 = [r3], 16
-       // c7*x+c6
-       fma.s1 F_S67 = F_X, F_CS7, F_CS6
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p0)  fcmp.lt.unc.s1 p6,p7 = asin_ABS_NORM_f8, asin_HALF        
+      nop.i 999 ;;
 }
 
-
-{.mfi
-       // C15, C17
-       ldfpd F_C15, F_C17 = [r3], 16
-       // c3*x+c2
-       fma.s1 F_S23 = F_X, F_CS3, F_CS2
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p7)  fma.s1         asin_D   = f1,f1,asin_ABS_NORM_f8          
+      nop.i 999
 }
 
-
-{.mfi
-       nop.m 0
-       // c5*x+c4
-       fma.s1 F_S45 = F_X, F_CS5, F_CS4
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p7)  fms.s1         asin_C   = f1,f1,asin_ABS_NORM_f8          
+      nop.i 999 ;;
 }
 
-
-{.mfi
-       nop.m 0
-       // (s_s)^2
-       fma.s1 F_DS = F_S, F_S, f0
-       nop.i 0
+{ .mfi
+      nop.m 999
+(p7)  fma.s1         asin_B   = asin_C, asin_D, f0              
+      nop.i 999
 }
 
-{.mfi
-       nop.m 0
-       // 1-(s_s)^2
-       fnma.s1 F_1S2_S = F_S, F_S, f1
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p7)  fms.s1         asin_1mD = f1,f1,asin_D                    
+      nop.i 999 ;;
 }
 
-
-{.mfi
-       nop.m 0
-       // y*(1-s^2)*x^2
-       fma.s1 F_Y1S2X2 = F_Y1S2, F_X2, f0
-       nop.i 0
+{ .mfi
+      nop.m 999
+(p7)  fma.s1         asin_Dd  = asin_1mD,f1, asin_ABS_NORM_f8   
+      nop.i 999
 }
 
-{.mfi
-       nop.m 0
-       // x^4
-       fma.s1 F_X4 = F_X2, F_X2, f0
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p7)  fms.s1         asin_Bb  = asin_C, asin_D, asin_B          
+      nop.i 999 ;;
 }
 
-
-{.mfi
-       nop.m 0
-       // c9*x^3+..+c6
-       fma.s1 F_S69 = F_X2, F_S89, F_S67
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p7)  fma.s1         asin_Bb  = asin_C, asin_Dd, asin_Bb        
+      nop.i 999
 }
 
-
-{.mfi
-       nop.m 0
-       // c5*x^3+..+c2
-       fma.s1 F_S25 = F_X2, F_S45, F_S23
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p6)  fma.s1         asin_C   = asin_ABS_NORM_f8, asin_ABS_NORM_f8, f0     
+      nop.i 999 ;;
 }
 
-
-{.mfi
-       nop.m 0
-       // ((s_s)^2-s^2)
-       fnma.s1 F_DS = f8, f8, F_DS
-       nop.i 0
+{ .mfi
+      nop.m 999
+(p6)  fms.s1         asin_B   = f1, f1, asin_C                             
+      nop.i 999
 }
 
-{.mfi
-       nop.m 0
-       // (pi/2)_high-y*(1-(s_s)^2)
-       fnma.s1 F_HI = F_Y, F_1S2_S, F_PI2_HI
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p6)  fms.s1         asin_Cc  = asin_ABS_NORM_f8, asin_ABS_NORM_f8, asin_C 
+      nop.i 999 ;;
 }
 
-
-{.mfi
-       nop.m 0
-       // c9*x^7+..+c2
-       fma.s1 F_S29 = F_X4, F_S69, F_S25
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p0)  fma.s1         asin_Hh     = asin_HALF, asin_B, f0                   
+      nop.i 999
 }
 
-
-{.mfi
-       nop.m 0
-       // -(y*(1-(s_s)^2))_high
-       fms.s1 F_1S2_HI = F_HI, f1, F_PI2_HI
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p6)  fms.s1         asin_1mB = f1, f1, asin_B                             
+      nop.i 999 ;;
 }
 
+// Step 1.2: 
+// sqrt(B + b) is computed as W + w
+// Get W
 
-{.mfi
-       nop.m 0
-       // (PS29*x^2+x)*y*(1-s^2)
-       fma.s1 F_S19 = F_Y1S2X2, F_S29, F_Y1S2X
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p0)  frsqrta.s1     asin_y0,p8  = asin_B                                  
+      nop.i 999 ;;
 }
 
-
-{.mfi
-       nop.m 0
-       // y*(1-(s_s)^2)-(y*(1-s^2))_high
-       fma.s1 F_DS2 = F_Y, F_1S2_S, F_1S2_HI
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p6)  fms.s1         asin_1mBmC = asin_1mB, f1, asin_C                     
+      nop.i 999 ;;
 }
 
-
-
-{.mfi
-       nop.m 0
-       // R ~ sqrt(1-s^2)
-       // (used for polynomial evaluation)
-       fnma.s1 F_R = F_S19, f1, F_Y1S2
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p0)  fma.s1         asin_t1     = asin_y0, asin_y0, f0                    
+      nop.i 999 ;;
 }
 
-
-{.mfi
-       nop.m 0
-       // y*(1-s^2)-(y*(1-s^2))_high
-       fma.s1 F_DS2 = F_Y, F_DS, F_DS2
-       nop.i 0
+{ .mfi
+      nop.m 999
+(p6)  fms.s1         asin_Bb  = asin_1mBmC, f1, asin_Cc                    
+      nop.i 999 ;;
 }
 
-{.mfi
-       nop.m 0
-       // (pi/2)_low+(PS29*x^2)*y*(1-s^2)
-       fma.s1 F_S29 = F_Y1S2X2, F_S29, F_PI2_LO
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p0)  fnma.s1        asin_t2     = asin_t1, asin_Hh, asin_HALF             
+      nop.i 999 ;;
 }
 
-
-
-{.mfi
-       nop.m 0
-       // R^2
-       fma.s1 F_R2 = F_R, F_R, f0
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p0)  fma.s1         asin_y1     = asin_t2, asin_y0, asin_y0               
+      nop.i 999 ;;
 }
 
-
-{.mfi
-       nop.m 0
-       // (pi/2)_low+(PS29*x^2)*y*(1-s^2)-(y*(1-s^2)-(y*(1-s^2))_high)
-       fms.s1 F_S29 = F_S29, f1, F_DS2
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p0)  fma.s1         asin_t3     = asin_y1, asin_Hh, f0                    
+      nop.i 999 ;;
 }
 
-
-{.mfi
-       nop.m 0
-       // c7+c9*R^2
-       fma.s1 F_P79 = F_C9, F_R2, F_C7
-       nop.i 0
+{ .mfi
+      nop.m 999
+(p0)  fnma.s1        asin_t4     = asin_t3, asin_y1, asin_HALF             
+      nop.i 999 ;;
 }
 
-{.mfi
-       nop.m 0
-       // c3+c5*R^2
-       fma.s1 F_P35 = F_C5, F_R2, F_C3
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p0)  fma.s1         asin_y2     = asin_t4, asin_y1, asin_y1               
+      nop.i 999 ;;
 }
 
-
-
-{.mfi
-       nop.m 0
-       // R^4
-       fma.s1 F_R4 = F_R2, F_R2, f0
-       nop.i 0
+{ .mfi
+      nop.m 999
+(p0)  fma.s1         asin_S      = asin_B, asin_y2, f0                     
+      nop.i 999
 }
 
-{.mfi
-       nop.m 0
-       // R^3
-       fma.s1 F_R3 = F_R2, F_R, f0
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p0)  fma.s1         asin_H      = asin_y2, asin_HALF, f0                  
+      nop.i 999 ;;
 }
 
-
-{.mfi
-       nop.m 0
-       // c11+c13*R^2
-       fma.s1 F_P1113 = F_C13, F_R2, F_C11
-       nop.i 0
+{ .mfi
+      nop.m 999
+(p0)  fma.s1         asin_t5     = asin_Hh, asin_y2, f0                    
+      nop.i 999 ;;
 }
 
-{.mfi
-       nop.m 0
-       // c15+c17*R^2
-       fma.s1 F_P1517 = F_C17, F_R2, F_C15
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p0)  fnma.s1        asin_Dd     = asin_S, asin_S, asin_B                  
+      nop.i 999 ;;
 }
 
-
-{.mfi
-       nop.m 0
-       // (pi/2)_low+(PS29*x^2)*y*(1-s^2)-(y*(1-s^2)-(y*(1-s^2))_high)+y*(1-s^2)*x
-       fma.s1 F_S29 = F_Y1S2, F_X, F_S29
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p0)  fma.s1         asin_W      = asin_Dd, asin_H, asin_S                 
+      nop.i 999 ;;
 }
 
-
-{.mfi
-       nop.m 0
-       // c11+c13*R^2+c15*R^4+c17*R^6
-       fma.s1 F_P1117 = F_P1517, F_R4, F_P1113
-       nop.i 0
+{ .mfi
+      nop.m 999
+(p0)  fma.s1         asin_2W       = asin_W, f1, asin_W                    
+      nop.i 999
 }
 
-{.mfi
-       nop.m 0
-       // c3+c5*R^2+c7*R^4+c9*R^6
-       fma.s1 F_P39 = F_P79, F_R4, F_P35
-       nop.i 0;;
+// Step 1.3
+// Get w
+{ .mfi
+      nop.m 999
+(p0)  fnma.s1        asin_BmWW     = asin_W, asin_W, asin_B                
+      nop.i 999 ;;
 }
 
+// Step 2
+// asin(x) = atan2(X,sqrt(1-X*X))
+//         = atan2(X, W) -Xw
+// corr = Xw
+// asin(x) = Z_hi + (s_lo*Z_lo - corr)
+// Call atan2(X, W)
+// Save W in f9 
+// Save X in f12 
+// Save w in f13
 
-{.mfi
-       nop.m 0
-       // R^8
-       fma.s1 F_R8 = F_R4, F_R4, f0
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p0)   fmerge.se f9 = asin_W, asin_W                                      
+      nop.i 999 ;;
 }
 
-
-{.mfi
-       nop.m 0
-       // c3+c5*R^2+c7*R^4+c9*R^6+..+c17*R^14
-       fma.s1 F_P317 = F_P1117, F_R8, F_P39
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p0)  fma.s1         asin_BmWWpb   = asin_BmWW, f1, asin_Bb                
+      nop.i 999 ;;
 }
 
-
-{.mfi
-       nop.m 0
-       // (pi/2)_low-(PS29*x^2)*y*(1-s^2)-(y*(1-s^2)-
-       // -(y*(1-s^2))_high)+y*(1-s^2)*x - P3, 17
-       fnma.s1 F_S29 = F_P317, F_R3, F_S29
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p0)  frcpa.s1       asin_1d2W,p9  = f1, asin_2W                           
+      nop.i 999 ;;
 }
 
-{.mfi
-       nop.m 0
-       // set sign
-  (p6) fnma.s1 F_S29 = F_S29, f1, f0
-       nop.i 0
+{ .mfi
+      nop.m 999
+(p0)  fma.s1         asin_Ww       = asin_BmWWpb, asin_1d2W, f0            
+      nop.i 999 ;;
 }
+.endp asinl
+ASM_SIZE_DIRECTIVE(asinl)
 
-{.mfi
-       nop.m 0
-  (p6) fnma.s1 F_HI = F_HI, f1, f0
-       nop.i 0;;
+.proc __libm_callout
+__libm_callout:
+.prologue
+{ .mfi
+        nop.m 0
+        nop.f 0
+.save   ar.pfs,GR_SAVE_PFS
+        mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
+};;
+{ .mfi
+        mov GR_SAVE_GP=gp                       // Save gp
+        nop.f 0
+.save   b0, GR_SAVE_B0
+        mov GR_SAVE_B0=b0                       // Save b0
 }
-
-
+.body
 {.mfb
-       nop.m 0
-       // Result:
-       // (pi/2)_low-(PS29*x^2)*y*(1-s^2)-(y*(1-s^2)-
-       // -(y*(1-s^2))_high)+y*(1-s^2)*x - P3, 17
-       // +(pi/2)_high-(y*(1-s^2))_high
-       fma.s0 f8 = F_S29, f1, F_HI
-       br.ret.sptk b0;;
-}
-
-
-
-
-
-
-
-
-
-       ASINL_SPECIAL_CASES:
-
-{.mfi
-       alloc r32 = ar.pfs, 1, 4, 4, 0
-       // check if the input is a NaN, or unsupported format
-       // (i.e. not infinity or normal/denormal)
-       fclass.nm p7, p8 = f8, 0x3f
-       // pointer to pi/2
-       add r3 = 48, r3;;
-}
-
+        nop.m 0
+(p0)    fmerge.se f13 = asin_Ww, asin_Ww                                   
+(p0)    br.call.sptk.many  b0=__libm_atan2_reg#                  
+};;
+{ .mfi
+        mov   gp = GR_SAVE_GP                  // Restore gp
+(p0)    fma.s1  asin_XWw  = asin_ABS_NORM_f8,f13,f0             
+        mov   b0 = GR_SAVE_B0                  // Restore return address
+};;
+// asin_XWw = Xw = corr
+// asin_low = (s_lo * Z_lo - corr)
+// f8       = Z_hi + (s_lo * Z_lo - corr)
 
-{.mfi
-       // load pi/2
-       ldfpd F_PI2_HI, F_PI2_LO = [r3]
-       // get |s|
-       fmerge.s F_S = f0, f8
-       nop.i 0
-}
+{ .mfi
+        nop.m 999
+(p0)    fms.s1  asin_low  = f11, f10, asin_XWw                                
+        mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
+};;
 
-{.mfb
-       nop.m 0
-       // if NaN, quietize it, and return
- (p7) fma.s0 f8 = f8, f1, f0
- (p7) br.ret.spnt b0;;
+{ .mfi
+      nop.m 999
+(p0)   fma.s0  f8        = f8, f1, asin_low                                
+      nop.i 999 ;;
 }
 
-
-{.mfi
-       nop.m 0
-       // |s| = 1 ?
-       fcmp.eq.s0 p9, p0 = F_S, f1
-       nop.i 0
+{ .mfb
+      nop.m 999
+(p0)   fmerge.s f8 = f12,f8 
+(p0)  br.ret.sptk   b0 ;;                                                    
 }
+.endp __libm_callout
+ASM_SIZE_DIRECTIVE(__libm_callout)
 
-{.mfi
-       nop.m 0
-       // load FR_X
-       fma.s1 FR_X = f8, f1, f0
-       // load error tag
-       mov GR_Parameter_TAG = 60;;
-}
+.proc SPECIAL
+SPECIAL:
+L(ASIN_ERROR_RETURN): 
 
+// If X is 1, return (sign of X)pi/2
 
-{.mfb
-       nop.m 0
-       // change sign if s = -1
- (p6)  fnma.s1 F_PI2_HI = F_PI2_HI, f1, f0
-       nop.b 0
+{ .mfi
+      nop.m 999
+(p0)  fcmp.eq.unc p6,p7 = asin_ABS_NORM_f8,f1   
+      nop.i 999 ;;
 }
 
-{.mfb
-       nop.m 0
-       // change sign if s = -1
- (p6)  fnma.s1 F_PI2_LO = F_PI2_LO, f1, f0
-       nop.b 0;;
+{ .mfb
+(p6) ldfe          asin_pi_by_2_lo  = [r40] 
+(p6) fmerge.s      asin_pi_by_2 = f8,asin_pi_by_2          
+     nop.b 0;;
 }
 
-{.mfb
-       nop.m 0
-       // if s = 1, result is pi/2
- (p9) fma.s0 f8 = F_PI2_HI, f1, F_PI2_LO
-       // return if |s| = 1
- (p9) br.ret.sptk b0;;
+// If X is a NAN, leave
+// qnan snan inf norm     unorm 0 -+
+// 1    1    0   0        0     0 11
+{ .mfb
+      nop.m 999
+(p6)  fma.s0     f8 = f8,asin_pi_by_2_lo,asin_pi_by_2              
+(p6)  br.ret.spnt   b0                           
 }
-
-
-{.mfi
-       nop.m 0
-       // get Infinity
-       frcpa.s1 FR_RESULT, p0 = f1, f0
-       nop.i 0;;
+{ .mfi
+      nop.m 999
+(p0)  fclass.m.unc p12,p0 = f8, 0xc3            
+      nop.i 999 ;;
 }
 
-
-{.mfi
-       nop.m 0
-       // return QNaN indefinite (0*Infinity)
-       fma.s0 FR_RESULT = f0, FR_RESULT, f0
-       nop.i 0;;
+{ .mfb
+      nop.m 999
+(p12) fma.s0 f8 = f8,f1,f0                       
+(p12) br.ret.spnt   b0 ;;                          
 }
+{ .mfi
+(p0)   mov   GR_Parameter_TAG = 60                   
+(p0)   frcpa f10, p6 = f0, f0                   
+nop.i 0
+};;
+.endp SPECIAL
+ASM_SIZE_DIRECTIVE(SPECIAL)
 
-
-GLOBAL_LIBM_END(asinl)
-
-
-LOCAL_LIBM_ENTRY(__libm_error_region)
+.proc __libm_error_region
+__libm_error_region:
 .prologue
-// (1)
 { .mfi
         add   GR_Parameter_Y=-32,sp             // Parameter 2 value
         nop.f 0
@@ -2470,29 +742,24 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
 }
 { .mfi
 .fframe 64
-        add sp=-64,sp                          // Create new stack
+        add sp=-64,sp                           // Create new stack
         nop.f 0
-        mov GR_SAVE_GP=gp                      // Save gp
+        mov GR_SAVE_GP=gp                       // Save gp
 };;
-
-
-// (2)
 { .mmi
-        stfe [GR_Parameter_Y] = f1,16         // Store Parameter 2 on stack
-        add GR_Parameter_X = 16,sp            // Parameter 1 address
+        stfe [GR_Parameter_Y] = FR_Y,16         // Store Parameter 2 on stack
+        add GR_Parameter_X = 16,sp              // Parameter 1 address
 .save   b0, GR_SAVE_B0
-        mov GR_SAVE_B0=b0                     // Save b0
+        mov GR_SAVE_B0=b0                       // Save b0
 };;
-
 .body
-// (3)
 { .mib
-        stfe [GR_Parameter_X] = FR_X              // Store Parameter 1 on stack
+        stfe [GR_Parameter_X] = FR_X            // Store Parameter 1 on stack
         add   GR_Parameter_RESULT = 0,GR_Parameter_Y
         nop.b 0                                 // Parameter 3 address
 }
 { .mib
-        stfe [GR_Parameter_Y] = FR_RESULT             // Store Parameter 3 on stack
+        stfe [GR_Parameter_Y] = FR_RESULT       // Store Parameter 3 on stack
         add   GR_Parameter_Y = -16,GR_Parameter_Y
         br.call.sptk b0=__libm_error_support#   // Call error handling function
 };;
@@ -2501,27 +768,23 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
         nop.m 0
         add   GR_Parameter_RESULT = 48,sp
 };;
-
-// (4)
 { .mmi
         ldfe  f8 = [GR_Parameter_RESULT]       // Get return result off stack
 .restore sp
         add   sp = 64,sp                       // Restore stack pointer
         mov   b0 = GR_SAVE_B0                  // Restore return address
 };;
-
 { .mib
-        mov   gp = GR_SAVE_GP                  // Restore gp
+        mov   gp = GR_SAVE_GP                  // Restore gp 
         mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
         br.ret.sptk     b0                     // Return
-};;
+};; 
 
-LOCAL_LIBM_END(__libm_error_region)
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
 
 .type   __libm_error_support#,@function
 .global __libm_error_support#
 
-
-
-
-
+.type   __libm_atan2_reg#,@function
+.global __libm_atan2_reg#
diff --git a/sysdeps/ia64/fpu/e_atan2.S b/sysdeps/ia64/fpu/e_atan2.S
index 8be7c6cec5..38dd2f749a 100644
--- a/sysdeps/ia64/fpu/e_atan2.S
+++ b/sysdeps/ia64/fpu/e_atan2.S
@@ -1,10 +1,10 @@
 .file "atan2.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,38 +20,33 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
 // Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
 //
 // History
 //==============================================================
-// 02/02/00  Initial version
-// 04/04/00  Unwind support added
-// 08/15/00  Bundle added after call to __libm_error_support to properly
-//           set [the previously overwritten] GR_Parameter_RESULT.
-// 08/17/00  Changed predicate register macro-usage to direct predicate
-//           names due to an assembler bug.
-// 09/28/00  Updated to set invalid on SNaN inputs
-// 01/19/01  Fixed flags for small results
-// 04/13/01  Rescheduled to make all paths faster
-// 05/20/02  Cleaned up namespace and sf0 syntax
-// 08/20/02  Corrected inexact flag and directed rounding symmetry bugs
-// 02/06/03  Reordered header: .section, .global, .proc, .align
-// 04/17/03  Added missing mutex directive
+// 2/02/00  Initial version
+// 4/04/00  Unwind support added
+// 8/15/00  Bundle added after call to __libm_error_support to properly
+//          set [the previously overwritten] GR_Parameter_RESULT.
+// 8/17/00  Changed predicate register macro-usage to direct predicate
+//          names due to an assembler bug.
+// 9/28/00  Updated to set invalid on SNaN inputs
+// 1/19/01  Fixed flags for small results
 //
 // API
 //==============================================================
@@ -60,12 +55,10 @@
 // Overview of operation
 //==============================================================
 //
-// The atan2 function returns values in the interval [-pi,+pi].
-//
 // There are two basic paths: swap true and swap false.
 // atan2(Y,X) ==> atan2(V/U) where U >= V. If Y > X, we must swap.
 //
-// p6  swap True    |Y| > |X|
+// p6  swap True    |Y| > |X| 
 // p7  swap False   |Y| <= |X|
 // p8  X+   (If swap=True p8=p9=0)
 // p9  X-
@@ -73,21 +66,21 @@
 // all the other predicates p10 thru p15 are false for the main path
 //
 // Simple trigonometric identities show
-//   Region 1 (-45 to +45 degrees):
+//   Region 1 (-45 to +45 degrees):  
 //         X>0, |Y|<=X, V=Y, U=X     atan2(Y,X) = sgnY * (0 + atan(V/U))
 //
-//   Region 2 (-90 to -45 degrees, and +45 to +90 degrees):
+//   Region 2 (-90 to -45 degrees, and +45 to +90 degrees):  
 //         X>0, |Y|>X, V=X, U=Y      atan2(Y,X) = sgnY * (pi/2 - atan(V/U))
 //
-//   Region 3 (-135 to -90 degrees, and +90 to +135 degrees):
+//   Region 3 (-135 to -90 degrees, and +90 to +135 degrees):  
 //         X<0, |Y|>X, V=X, U=Y      atan2(Y,X) = sgnY * (pi/2 + atan(V/U))
 //
-//   Region 4 (-180 to -135 degrees, and +135 to +180 degrees):
+//   Region 4 (-180 to -135 degrees, and +135 to +180 degrees):  
 //         X<0, |Y|<=X, V=Y, U=X      atan2(Y,X) = sgnY * (pi - atan(V/U))
 //
 // So the result is always of the form atan2(Y,X) = P + sgnXY * atan(V/U)
 //
-// We compute atan(V/U) from the identity
+// We compute atan(V/U) from the identity 
 //      atan(z) + atan([(V/U)-z] / [1+(V/U)z])
 //      where z is a limited precision approximation (16 bits) to V/U
 //
@@ -131,13 +124,13 @@
 //             +number           -0          +pi/2
 //             -number           -0          -pi/2
 //
-//             +0                +number     +0
-//             -0                +number     -0
+//             +0                +number     +0 
+//             -0                +number     -0 
 //             +0                -number     +pi
 //             -0                -number     -pi
 //
-//             +0                +0          +0
-//             -0                +0          -0
+//             +0                +0          +0 
+//             -0                +0          -0 
 //             +0                -0          +pi
 //             -0                -0          -pi
 //
@@ -145,26 +138,16 @@
 //            anything        NaN           quiet X
 
 // atan2(+-0/+-0) sets double error tag to 37
+// atan2(+-0/+-0) sets single error tag to 38
 
-// Registers used
-//==============================================================
-
-// predicate registers used:
-// p6 -> p15
-
-// floating-point registers used:
-// f8, f9 input
-// f32 -> f119
-
-// general registers used
-// r32 -> r41
+#include "libm_support.h"
 
 // Assembly macros
 //==============================================================
 
 EXP_AD_P1                    = r33
 EXP_AD_P2                    = r34
-rsig_near_one                = r35
+atan2_GR_sml_exp             = r35
 
 
 GR_SAVE_B0                   = r35
@@ -176,23 +159,22 @@ GR_Parameter_Y               = r39
 GR_Parameter_RESULT          = r40
 atan2_GR_tag                 = r41
 
-atan2_Y                      = f8
+
 atan2_X                      = f9
+atan2_Y                      = f8
 
 atan2_u1_X                   = f32
 atan2_u1_Y                   = f33
-atan2_z2_X                   = f34
-atan2_z2_Y                   = f35
-
+atan2_Umax                   = f34
+atan2_Vmin                   = f35
 atan2_two                    = f36
-atan2_B1sq_Y                 = f37
+atan2_absX                   = f37
 atan2_z1_X                   = f38
 atan2_z1_Y                   = f39
 atan2_B1X                    = f40
-
 atan2_B1Y                    = f41
-atan2_wp_X                   = f42
-atan2_B1sq_X                 = f43
+atan2_wp                     = f42
+atan2_B1sq                   = f43
 atan2_z                      = f44
 atan2_w                      = f45
 
@@ -201,149 +183,178 @@ atan2_P1                     = f47
 atan2_P2                     = f48
 atan2_P3                     = f49
 atan2_P4                     = f50
-
 atan2_P5                     = f51
 atan2_P6                     = f52
 atan2_P7                     = f53
 atan2_P8                     = f54
 atan2_P9                     = f55
-
 atan2_P10                    = f56
 atan2_P11                    = f57
 atan2_P12                    = f58
 atan2_P13                    = f59
 atan2_P14                    = f60
-
 atan2_P15                    = f61
 atan2_P16                    = f62
 atan2_P17                    = f63
 atan2_P18                    = f64
 atan2_P19                    = f65
-
 atan2_P20                    = f66
 atan2_P21                    = f67
 atan2_P22                    = f68
-atan2_tmp                    = f68
-atan2_pi_by_2                = f69
-atan2_sgn_pi_by_2            = f69
-atan2_V13                    = f70
+atan2_Pi_by_2                = f69
 
+atan2_V13                    = f70
 atan2_W11                    = f71
 atan2_E                      = f72
-atan2_wp_Y                   = f73
+atan2_gamma                  = f73
 atan2_V11                    = f74
 atan2_V12                    = f75
-
 atan2_V7                     = f76
 atan2_V8                     = f77
 atan2_W7                     = f78
 atan2_W8                     = f79
 atan2_W3                     = f80
-
 atan2_W4                     = f81
 atan2_V3                     = f82
 atan2_V4                     = f83
 atan2_F                      = f84
 atan2_gV                     = f85
-
 atan2_V10                    = f86
 atan2_zcub                   = f87
 atan2_V6                     = f88
 atan2_V9                     = f89
 atan2_W10                    = f90
-
 atan2_W6                     = f91
 atan2_W2                     = f92
 atan2_V2                     = f93
+
 atan2_alpha                  = f94
 atan2_alpha_1                = f95
-
 atan2_gVF                    = f96
 atan2_V5                     = f97
 atan2_W12                    = f98
 atan2_W5                     = f99
 atan2_alpha_sq               = f100
-
 atan2_Cp                     = f101
 atan2_V1                     = f102
-atan2_ysq                    = f103
+
+atan2_sml_norm               = f103
+atan2_FR_tmp                 = f103
+
 atan2_W1                     = f104
 atan2_alpha_cub              = f105
-
 atan2_C                      = f106
-atan2_xsq                    = f107
+atan2_P                      = f107
 atan2_d                      = f108
 atan2_A_hi                   = f109
 atan2_dsq                    = f110
-
 atan2_pd                     = f111
 atan2_A_lo                   = f112
 atan2_A                      = f113
+
 atan2_Pp                     = f114
-atan2_sgnY                   = f115
 
-atan2_sig_near_one           = f116
-atan2_near_one               = f116
+atan2_sgnY                   = f116
 atan2_pi                     = f117
-atan2_sgn_pi                 = f117
-atan2_3pi_by_4               = f118
-atan2_pi_by_4                = f119
+atan2_sgnX                   = f118
+atan2_sgnXY                  = f119
+
+atan2_3pi_by_4               = f120
+atan2_pi_by_4                = f121
+
+//atan2_sF                     = p7
+//atan2_sT                     = p6
 
+// These coefficients are for atan2. 
+// You can also use this set to substitute those used in the |X| <= 1 case for atan; 
+// BUT NOT vice versa.
 
 /////////////////////////////////////////////////////////////
 
 
-RODATA
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
 
 .align 16
 
-LOCAL_OBJECT_START(atan2_tb1)
-data8 0xA21922DC45605EA1 ,  0x00003FFA // P11
+atan2_tb1:
+ASM_TYPE_DIRECTIVE(atan2_tb1,@object)
 data8 0xB199DD6D2675C40F ,  0x0000BFFA // P10
-data8 0xC2F01E5DDD100DBE ,  0x00003FFA // P9
+data8 0xA21922DC45605EA1 ,  0x00003FFA // P11
 data8 0xD78F28FC2A592781 ,  0x0000BFFA // P8
-data8 0xF0F03ADB3FC930D3 ,  0x00003FFA // P7
-data8 0x88887EBB209E3543 ,  0x0000BFFB // P6
+data8 0xC2F01E5DDD100DBE ,  0x00003FFA // P9
 data8 0x9D89D7D55C3287A5 ,  0x00003FFB // P5
-data8 0xBA2E8B9793955C77 ,  0x0000BFFB // P4
-data8 0xE38E38E320A8A098 ,  0x00003FFB // P3
-data8 0x9249249247E37913 ,  0x0000BFFC // P2
+data8 0xF0F03ADB3FC930D3 ,  0x00003FFA // P7
+data8 0xF396268151CFB11C ,  0x00003FF7 // P17 
+data8 0x9D3436AABE218776 ,  0x00003FF5 // P19
+data8 0x80D601879218B53A ,  0x00003FFA // P13
+data8 0xA2270D30A90AA220 ,  0x00003FF9 // P15
 data8 0xCCCCCCCCCCC906CD ,  0x00003FFC // P1
-data8 0xAAAAAAAAAAAAA8A9 ,  0x0000BFFD // P0
-data8 0xC90FDAA22168C235 ,  0x00004000 // pi
-LOCAL_OBJECT_END(atan2_tb1)
+data8 0xE38E38E320A8A098 ,  0x00003FFB // P3
+data8 0xFE7E52D2A89995B3 ,  0x0000BFEC // P22
+data8 0xC90FDAA22168C235 ,  0x00003FFE // pi/4
+ASM_SIZE_DIRECTIVE(atan2_tb1)
 
-LOCAL_OBJECT_START(atan2_tb2)
-data8 0xCE585A259BD8374C ,  0x00003FF0 // P21
+atan2_tb2:
+ASM_TYPE_DIRECTIVE(atan2_tb2,@object)
 data8 0x9F90FB984D8E39D0 ,  0x0000BFF3 // P20
-data8 0x9D3436AABE218776 ,  0x00003FF5 // P19
-data8 0xDEC343E068A6D2A8 ,  0x0000BFF6 // P18
-data8 0xF396268151CFB11C ,  0x00003FF7 // P17
+data8 0xCE585A259BD8374C ,  0x00003FF0 // P21
+data8 0xBA2E8B9793955C77 ,  0x0000BFFB // P4
+data8 0x88887EBB209E3543 ,  0x0000BFFB // P6
 data8 0xD818B4BB43D84BF2 ,  0x0000BFF8 // P16
-data8 0xA2270D30A90AA220 ,  0x00003FF9 // P15
-data8 0xD5F4F2182E7A8725 ,  0x0000BFF9 // P14
-data8 0x80D601879218B53A ,  0x00003FFA // P13
+data8 0xDEC343E068A6D2A8 ,  0x0000BFF6 // P18
 data8 0x9297B23CCFFB291F ,  0x0000BFFA // P12
-data8 0xFE7E52D2A89995B3 ,  0x0000BFEC // P22
+data8 0xD5F4F2182E7A8725 ,  0x0000BFF9 // P14
+data8 0xAAAAAAAAAAAAA8A9 ,  0x0000BFFD // P0
+data8 0x9249249247E37913 ,  0x0000BFFC // P2
 data8 0xC90FDAA22168C235 ,  0x00003FFF // pi/2
-data8 0xC90FDAA22168C235 ,  0x00003FFE // pi/4
+data8 0xC90FDAA22168C235 ,  0x00004000 // pi
 data8 0x96cbe3f9990e91a8 ,  0x00004000 // 3pi/4
-LOCAL_OBJECT_END(atan2_tb2)
+ASM_SIZE_DIRECTIVE(atan2_tb2)
+
+
 
 
+.align 32
+.global atan2#
+#ifdef _LIBC
+.global __atan2#
+.global __ieee754_atan2#
+#endif
 
+////////////////////////////////////////////////////////
 
 .section .text
-GLOBAL_IEEE754_ENTRY(atan2)
+.align 32
+
+.proc  atan2#
+atan2:
+#ifdef _LIBC
+.proc  __atan2#
+__atan2:
+.proc  __ieee754_atan2#
+__ieee754_atan2:
+#endif
+// qnan snan inf norm     unorm 0 -+
+// 0    0    1   0        0     0 11
+
+
+//         Y NAN?     p10 p11
+// p10 ==> quiet Y and return
+// p11     X NAN?     p12, p13 
+// p12 ==> quiet X and return
 
 { .mfi
            alloc        r32           = ar.pfs,1,5,4,0
            frcpa.s1     atan2_u1_X,p6 = f1,atan2_X
-           nop.i 999
+           addl         EXP_AD_P2   = @ltoff(atan2_tb2), gp
 }
 { .mfi
            addl         EXP_AD_P1   = @ltoff(atan2_tb1), gp
-           fma.s1       atan2_two  = f1,f1,f1
+           fclass.m.unc p10,p11 = f8, 0xc3
            nop.i 999
 ;;
 }
@@ -355,233 +366,256 @@ GLOBAL_IEEE754_ENTRY(atan2)
 }
 { .mfi
            nop.m 999
-           fma.s1       atan2_xsq  = atan2_X,atan2_X,f0
+           fma.s1       atan2_two  = f1,f1,f1 
            nop.i 999
 ;;
 }
 
+
 { .mfi
-           nop.m 999
-           fclass.m p10,p0 = atan2_Y, 0xc3     // Test for y=nan
-           nop.i 999
-}
-{ .mfi
-           nop.m 999
-           fma.s1       atan2_ysq  = atan2_Y,atan2_Y,f0
+           ld8 EXP_AD_P2 = [ EXP_AD_P2]
+           famax.s1     atan2_Umax =  f8,f9
            nop.i 999
 }
 ;;
 
 { .mfi
-           add  EXP_AD_P2 = 0xd0,EXP_AD_P1
-           fclass.m p12,p0 = atan2_X, 0xc3     // Test for x nan
+           nop.m 999
+           fmerge.s     atan2_absX = f0,atan2_X
            nop.i 999
 }
 ;;
 
-
 // p10 Y NAN, quiet and return
 { .mfi
-           ldfe         atan2_P11  = [EXP_AD_P1],16
+           ldfe         atan2_P10  = [EXP_AD_P1],16
            fmerge.s     atan2_sgnY = atan2_Y,f1
            nop.i 999
 }
 { .mfb
-           ldfe         atan2_P21  = [EXP_AD_P2],16
-(p10)      fma.d.s0 f8 = atan2_Y,atan2_X,f0   // If y=nan, result quietized y
-(p10)      br.ret.spnt b0        // Exit if y=nan
+           nop.m 999
+(p10)      fma.d f8 = f8,f9,f0 
+(p10)      br.ret.spnt b0
 ;;
 }
 
 
-{ .mfi
-           ldfe         atan2_P10  = [EXP_AD_P1],16
+{ .mmf
+           ldfe         atan2_P11  = [EXP_AD_P1],16
+           ldfe         atan2_P20  = [EXP_AD_P2],16
+           fmerge.s     atan2_sgnX = atan2_X,f1
+;;
+}
+
+
+{ .mfi 
+           ldfe         atan2_P8   = [EXP_AD_P1],16
            fma.s1       atan2_z1_X = atan2_u1_X, atan2_Y, f0
            nop.i 999
 }
-{ .mfi
-           ldfe         atan2_P20  = [EXP_AD_P2],16
-           fnma.s1      atan2_B1X  = atan2_u1_X, atan2_X, atan2_two
+{ .mfi 
+
+           ldfe         atan2_P21  = [EXP_AD_P2],16
+           fma.s1       atan2_z1_Y = atan2_u1_Y, atan2_X, f0
            nop.i 999
 ;;
 }
 
-{ .mfi
+{ .mfi 
            ldfe         atan2_P9   = [EXP_AD_P1],16
-           fma.s1       atan2_z1_Y = atan2_u1_Y, atan2_X, f0
+           fnma.s1      atan2_B1X  = atan2_u1_X, atan2_X, atan2_two
            nop.i 999
 }
-{ .mfi
-           ldfe         atan2_P19  = [EXP_AD_P2],16
+{ .mfi 
+
+           ldfe         atan2_P4   = [EXP_AD_P2],16
            fnma.s1      atan2_B1Y  = atan2_u1_Y, atan2_Y, atan2_two
            nop.i 999
-}
 ;;
-
-{ .mfi
-           ldfe         atan2_P8   = [EXP_AD_P1],16
-           fma.s1       atan2_z2_X = atan2_u1_X, atan2_ysq, f0
-           nop.i 999
 }
+
+// p6 (atan2_sT) true if swap
+// p7 (atan2_sF) true if no swap
+// p11 ==> Y !NAN;  X NAN?
+
 { .mfi
-           ldfe         atan2_P18  = [EXP_AD_P2],16
-           fma.s1       atan2_z2_Y = atan2_u1_Y, atan2_xsq, f0
+           ldfe         atan2_P5   = [EXP_AD_P1],16
+//           fcmp.eq.unc.s1 atan2_sF,atan2_sT    = atan2_Umax, atan2_X
+           fcmp.eq.unc.s1 p7,p6    = atan2_Umax, atan2_X
            nop.i 999
 }
-;;
-
-// p10 ==> x  inf     y ?
-// p11 ==> x !inf     y ?
 { .mfi
-           ldfe         atan2_P7   = [EXP_AD_P1],16
-           fclass.m p10,p11 = atan2_X, 0x23    // test for x inf
+           ldfe         atan2_P6   = [EXP_AD_P2],16
+(p11)      fclass.m.unc p12,p13    = f9, 0xc3
            nop.i 999
-}
-{ .mfb
-           ldfe         atan2_P17  = [EXP_AD_P2],16
-(p12)      fma.d.s0        f8 = atan2_X,atan2_Y,f0     // If x nan, result quiet x
-(p12)      br.ret.spnt b0                 // Exit for x nan
 ;;
 }
 
-// p6 true if swap,    means |y| >  |x|    or ysq > xsq
-// p7 true if no swap, means |x| >= |y|    or xsq >= ysq
 { .mmf
-           ldfe         atan2_P6   = [EXP_AD_P1],16
+           ldfe         atan2_P7   = [EXP_AD_P1],16
            ldfe         atan2_P16  = [EXP_AD_P2],16
-           fcmp.ge.s1 p7,p6    = atan2_xsq, atan2_ysq
+           famin.s1     atan2_Vmin =  f8,f9
 ;;
 }
 
+// p8 true if X positive
+// p9 true if X negative
+// both are false is swap is true
 { .mfi
-           ldfe         atan2_P5   = [EXP_AD_P1],16
-           fma.s1       atan2_wp_X   = atan2_z1_X, atan2_z1_X, f0
+           ldfe         atan2_P17  = [EXP_AD_P1],16
+//(atan2_sF) fcmp.eq.unc.s1 p8,p9    = atan2_sgnX,f1
+(p7) fcmp.eq.unc.s1 p8,p9    = atan2_sgnX,f1
            nop.i 999
 }
 { .mfi
-           ldfe         atan2_P15       = [EXP_AD_P2],16
-           fma.s1       atan2_B1sq_X = atan2_B1X, atan2_B1X, f0
+           ldfe         atan2_P18  = [EXP_AD_P2],16
+           fma.s1       atan2_sgnXY     = atan2_sgnX, atan2_sgnY, f0 
            nop.i 999
 ;;
 }
 
+
 { .mfi
-           ldfe         atan2_P4   = [EXP_AD_P1],16
-(p6)       fma.s1       atan2_wp_Y   = atan2_z1_Y, atan2_z1_Y, f0
+           ldfe         atan2_P19  = [EXP_AD_P1],16
+//(atan2_sF) fma.s1       atan2_wp   = atan2_z1_X, atan2_z1_X, f0
+(p7) fma.s1       atan2_wp   = atan2_z1_X, atan2_z1_X, f0
            nop.i 999
 }
 { .mfi
-           ldfe         atan2_P14  = [EXP_AD_P2],16
-(p6)       fma.s1       atan2_B1sq_Y = atan2_B1Y, atan2_B1Y, f0
+           ldfe         atan2_P12  = [EXP_AD_P2],16
+//(atan2_sT) fma.s1       atan2_wp   = atan2_z1_Y, atan2_z1_Y, f0
+(p6) fma.s1       atan2_wp   = atan2_z1_Y, atan2_z1_Y, f0
            nop.i 999
 ;;
 }
 
+
 { .mfi
-           ldfe         atan2_P3        = [EXP_AD_P1],16
-(p6)       fma.s1       atan2_E         = atan2_z2_Y, atan2_B1Y, atan2_Y
+           ldfe         atan2_P13  = [EXP_AD_P1],16
+//(atan2_sF) fma.s1       atan2_z         = atan2_z1_X, atan2_B1X, f0
+(p7) fma.s1       atan2_z         = atan2_z1_X, atan2_B1X, f0
            nop.i 999
 }
 { .mfi
-           ldfe         atan2_P13  = [EXP_AD_P2],16
-(p7)       fma.s1       atan2_E         = atan2_z2_X, atan2_B1X, atan2_X
+           ldfe         atan2_P14  = [EXP_AD_P2],16
+//(atan2_sT) fma.s1       atan2_z         = atan2_z1_Y, atan2_B1Y, f0
+(p6) fma.s1       atan2_z         = atan2_z1_Y, atan2_B1Y, f0
            nop.i 999
 ;;
 }
 
 
 { .mfi
-           ldfe         atan2_P2        = [EXP_AD_P1],16
-(p6)       fma.s1       atan2_z         = atan2_z1_Y, atan2_B1Y, f0
+           ldfe         atan2_P15       = [EXP_AD_P1],16
+//(atan2_sF) fma.s1       atan2_B1sq = atan2_B1X, atan2_B1X, f0
+(p7) fma.s1       atan2_B1sq = atan2_B1X, atan2_B1X, f0
            nop.i 999
 }
 { .mfi
-           ldfe         atan2_P12  = [EXP_AD_P2],16
-(p7)       fma.s1       atan2_z         = atan2_z1_X, atan2_B1X, f0
+           ldfe         atan2_P0        = [EXP_AD_P2],16
+//(atan2_sT) fma.s1       atan2_B1sq = atan2_B1Y, atan2_B1Y, f0
+(p6) fma.s1       atan2_B1sq = atan2_B1Y, atan2_B1Y, f0
            nop.i 999
 ;;
 }
 
 
+// p12 ==> X NAN, quiet and return
 { .mfi
            ldfe         atan2_P1        = [EXP_AD_P1],16
-           fcmp.eq.s0  p14,p15=atan2_X,atan2_Y  // Dummy for denorm and invalid
+           fmerge.s     atan2_Umax      = f0,atan2_Umax
            nop.i 999
 }
-{ .mlx
-           ldfe         atan2_P22       = [EXP_AD_P2],16
-           movl         rsig_near_one = 0x8000000000000001 // signif near 1.0
+{ .mfb
+           ldfe         atan2_P2        = [EXP_AD_P2],16
+(p12)      fma.d        f8 = f9,f8,f0
+(p12)      br.ret.spnt b0
 ;;
 }
 
 
-// p12 ==> x  inf     y inf
-// p13 ==> x  inf     y !inf
-{ .mmf
-           ldfe         atan2_P0        = [EXP_AD_P1],16
-           ldfe         atan2_pi_by_2   = [EXP_AD_P2],16
-(p10)      fclass.m.unc p12,p13 = atan2_Y, 0x23  // x inf, test if y inf
-;;
-}
-
+// p10 ==> x  inf     y ?
+// p11 ==> x !inf     y ?
 { .mfi
-           ldfe         atan2_pi        = [EXP_AD_P1],16
-(p6)       fma.s1       atan2_w         = atan2_wp_Y, atan2_B1sq_Y,f0
+           ldfe         atan2_P3        = [EXP_AD_P1],16
+           fmerge.s     atan2_Vmin      = f0,atan2_Vmin
            nop.i 999
 }
 { .mfi
-           ldfe         atan2_pi_by_4       = [EXP_AD_P2],16
-(p7)       fma.s1       atan2_w         = atan2_wp_X, atan2_B1sq_X,f0
+           ldfe         atan2_Pi_by_2   = [EXP_AD_P2],16
+           fclass.m.unc p10,p11 = f9, 0x23
            nop.i 999
 ;;
 }
 
+
+{ .mmf
+           ldfe         atan2_P22       = [EXP_AD_P1],16
+           ldfe         atan2_pi        = [EXP_AD_P2],16
+           nop.f 999
+;;
+}
+
 { .mfi
-           ldfe         atan2_3pi_by_4       = [EXP_AD_P2],16
-(p11)      fclass.m.unc p9,p0 = atan2_Y, 0x23  // x not inf, test if y inf
+           nop.m 999 
+           fcmp.eq.s0  p12,p13=f9,f8   // Dummy to catch denormal and invalid
            nop.i 999
 ;;
 }
 
+
 { .mfi
-           setf.sig      atan2_sig_near_one = rsig_near_one
-(p12)      fcmp.gt.unc.s1 p10,p11 = atan2_X,f0 // x inf, y inf, test if x +inf
+           ldfe         atan2_pi_by_4       = [EXP_AD_P1],16
+//(atan2_sT) fmerge.ns    atan2_sgnXY     = atan2_sgnXY, atan2_sgnXY
+(p6) fmerge.ns    atan2_sgnXY     = atan2_sgnXY, atan2_sgnXY
            nop.i 999
 }
 { .mfi
-           nop.m 999
-(p6)       fnma.s1       atan2_gV        = atan2_Y, atan2_z, atan2_X
+           ldfe         atan2_3pi_by_4       = [EXP_AD_P2],16
+           fma.s1       atan2_w         = atan2_wp, atan2_B1sq,f0
            nop.i 999
 ;;
 }
 
+// p12 ==> x  inf     y inf
+// p13 ==> x  inf     y !inf
 { .mfi
            nop.m 999
-           frcpa.s1     atan2_F,p0     = f1, atan2_E
+           fmerge.s     atan2_z         = f0, atan2_z
+           nop.i 999
+;;
+}
+
+{ .mfi
+           nop.m 99
+(p10)      fclass.m.unc p12,p13 = f8, 0x23
            nop.i 999
 }
 { .mfi
-           nop.m 999
-(p7)       fnma.s1       atan2_gV        = atan2_X, atan2_z, atan2_Y
+           nop.m 99
+(p11)      fclass.m.unc p14,p15 = f8, 0x23
            nop.i 999
 ;;
 }
 
-// p13 ==> x  inf     y !inf
 { .mfi
            nop.m 999
-(p13)      fcmp.gt.unc.s1 p14,p15 = atan2_X,f0 // x inf, y !inf, test if x +inf
-           nop.i 999
+(p12)      fcmp.eq.unc.s1 p10,p11       = atan2_sgnX,f1
+           nop.i 99
+;;
 }
+
+
 { .mfb
-           nop.m 999
-(p9)       fma.d.s0  f8 = atan2_sgnY, atan2_pi_by_2, f0  // +-pi/2 if x !inf, y inf
-(p9)       br.ret.spnt b0      // exit if x not inf, y inf, result is +-pi/2
+           mov atan2_GR_sml_exp = 0x1  // Small exponent for making small norm
+(p14)      fma.d       f8 = atan2_sgnY, atan2_Pi_by_2, f0
+(p14)      br.ret.spnt b0
 ;;
 }
 
+// Make a very small normal in case need to force inexact and underflow
 { .mfi
-           nop.m 999
+           setf.exp atan2_sml_norm = atan2_GR_sml_exp
            fma.s1       atan2_V13       = atan2_w, atan2_P11, atan2_P10
            nop.i 999
 }
@@ -592,58 +626,58 @@ GLOBAL_IEEE754_ENTRY(atan2)
 ;;
 }
 
+
 { .mfi
            nop.m 999
-           fma.s1       atan2_V11       = atan2_w, atan2_P9, atan2_P8
+           fma.s1       atan2_E         = atan2_Vmin, atan2_z, atan2_Umax
            nop.i 999
 }
 { .mfi
            nop.m 999
-           fma.s1       atan2_V12       = atan2_w, atan2_w, f0
+           fnma.s1      atan2_gamma     = atan2_Umax, atan2_z, f1
            nop.i 999
 ;;
 }
 
 { .mfi
            nop.m 999
-           fma.s1       atan2_V8        = atan2_w, atan2_P7 , atan2_P6
+           fma.s1       atan2_V11       = atan2_w, atan2_P9, atan2_P8
            nop.i 999
 }
 { .mfi
            nop.m 999
-           fma.s1       atan2_W8        = atan2_w, atan2_P19, atan2_P18
+           fma.s1       atan2_V12       = atan2_w, atan2_w, f0
            nop.i 999
 ;;
 }
 
 { .mfi
            nop.m 999
-           fnma.s1      atan2_alpha     = atan2_E, atan2_F, f1
+           fma.s1       atan2_V7        = atan2_w, atan2_P5 , atan2_P4 
            nop.i 999
 }
 { .mfi
            nop.m 999
-           fnma.s1      atan2_alpha_1   = atan2_E, atan2_F, atan2_two
+           fma.s1       atan2_V8        = atan2_w, atan2_P7 , atan2_P6 
            nop.i 999
 ;;
 }
 
-
 { .mfi
            nop.m 999
-           fma.s1       atan2_V7        = atan2_w, atan2_P5 , atan2_P4
+           fma.s1       atan2_W7        = atan2_w, atan2_P17, atan2_P16 
            nop.i 999
 }
 { .mfi
            nop.m 999
-           fma.s1       atan2_W7        = atan2_w, atan2_P17, atan2_P16
+           fma.s1       atan2_W8        = atan2_w, atan2_P19, atan2_P18
            nop.i 999
 ;;
 }
 
 { .mfi
            nop.m 999
-           fma.s1       atan2_V4        = atan2_w, atan2_P3 , atan2_P2
+           fma.s1       atan2_W3        = atan2_w, atan2_P13, atan2_P12 
            nop.i 999
 }
 { .mfi
@@ -655,55 +689,55 @@ GLOBAL_IEEE754_ENTRY(atan2)
 
 { .mfi
            nop.m 999
-           fma.s1       atan2_V3        = atan2_w, atan2_P1 , atan2_P0
+           fma.s1       atan2_V3        = atan2_w, atan2_P1 , atan2_P0 
            nop.i 999
 }
 { .mfi
            nop.m 999
-           fma.s1       atan2_W3        = atan2_w, atan2_P13, atan2_P12
+           fma.s1       atan2_V4        = atan2_w, atan2_P3 , atan2_P2
            nop.i 999
 ;;
 }
 
 { .mfi
            nop.m 999
-           fma.s1       atan2_V10       = atan2_V12, atan2_V13, atan2_V11
+           fma.s1       atan2_zcub      = atan2_z, atan2_w, f0
            nop.i 999
 }
 { .mfi
            nop.m 999
-           fma.s1       atan2_gVF       = atan2_gV, atan2_F, f0
+           fnma.s1       atan2_gV        = atan2_Umax, atan2_z, atan2_Vmin 
            nop.i 999
 ;;
 }
 
 { .mfi
            nop.m 999
-           fma.s1       atan2_alpha_sq  = atan2_alpha, atan2_alpha, f0
+           frcpa.s1     atan2_F,p15     = f1, atan2_E
            nop.i 999
 }
 { .mfi
            nop.m 999
-           fma.s1       atan2_Cp        = atan2_alpha, atan2_alpha_1, f1
+           fma.s1       atan2_V10       = atan2_V12, atan2_V13, atan2_V11
            nop.i 999
 ;;
 }
 
 { .mfi
            nop.m 999
-           fma.s1       atan2_V9        = atan2_V12, atan2_V12, f0
+           fma.s1       atan2_V6        = atan2_V12, atan2_V8 , atan2_V7 
            nop.i 999
 }
 { .mfi
            nop.m 999
-           fma.s1       atan2_W10       = atan2_V12, atan2_P22 , atan2_W11
+           fma.s1       atan2_V9        = atan2_V12, atan2_V12, f0
            nop.i 999
 ;;
 }
 
 { .mfi
            nop.m 999
-           fma.s1       atan2_V6        = atan2_V12, atan2_V8 , atan2_V7
+           fma.s1       atan2_W10       = atan2_V12, atan2_P22 , atan2_W11
            nop.i 999
 }
 { .mfi
@@ -715,47 +749,65 @@ GLOBAL_IEEE754_ENTRY(atan2)
 
 { .mfi
            nop.m 999
-           fma.s1       atan2_V2        = atan2_V12, atan2_V4 , atan2_V3
+           fma.s1       atan2_W2        = atan2_V12, atan2_W4  , atan2_W3
            nop.i 999
 }
 { .mfi
            nop.m 999
-           fma.s1       atan2_W2        = atan2_V12, atan2_W4  , atan2_W3
+           fma.s1       atan2_V2        = atan2_V12, atan2_V4 , atan2_V3
            nop.i 999
 ;;
 }
 
-// p8 ==> y   0     x?
-// p9 ==> y  !0     x?
+
+// Both X and Y are INF
+// p10 ==> X +
+// p11 ==> X -
+.pred.rel "mutex",p10,p11
+{ .mfb
+           nop.m 999
+(p10)      fma.d       f8              = atan2_sgnY, atan2_pi_by_4, f0
+(p10)      br.ret.spnt b0
+}
+{ .mfb
+           nop.m 999
+(p11)      fma.d       f8              = atan2_sgnY, atan2_3pi_by_4, f0
+(p11)      br.ret.spnt b0
+;;
+}
+
+
+.pred.rel "mutex",p8,p9,p6
 { .mfi
            nop.m 999
-           fclass.m p8,p9 = atan2_Y, 0x07  // Test for y=0
+           fnma.s1      atan2_alpha     = atan2_E, atan2_F, f1
            nop.i 999
 }
 { .mfi
            nop.m 999
-           fma.s1       atan2_zcub      = atan2_z, atan2_w, f0
+           fnma.s1      atan2_alpha_1   = atan2_E, atan2_F, atan2_two
            nop.i 999
 ;;
 }
 
+
 { .mfi
            nop.m 999
-           fma.s1       atan2_alpha_cub = atan2_alpha, atan2_alpha_sq, f0
+//(atan2_sT) fmerge.s     atan2_P         = atan2_Y, atan2_Pi_by_2
+(p6) fmerge.s     atan2_P         = atan2_Y, atan2_Pi_by_2
            nop.i 999
 }
 { .mfi
            nop.m 999
-           fma.s1       atan2_C         = atan2_gVF, atan2_Cp, f0
+           fma.s1       atan2_gVF       = atan2_gV, atan2_F, f0
            nop.i 999
 ;;
 }
 
-// p12 ==>  y0     x0
-// p13 ==>  y0     x!0
+
 { .mfi
            nop.m 999
-(p8)       fclass.m.unc p12,p13 = atan2_X, 0x07  // y=0, test if x is 0
+           fma.s1       atan2_V5        = atan2_V9, atan2_V10, atan2_V6
            nop.i 999
 }
 { .mfi
@@ -765,9 +817,11 @@ GLOBAL_IEEE754_ENTRY(atan2)
 ;;
 }
 
+
+
 { .mfi
            nop.m 999
-           fma.s1       atan2_V5        = atan2_V9, atan2_V10, atan2_V6
+(p8)       fmerge.s     atan2_P         = atan2_sgnY, f0
            nop.i 999
 }
 { .mfi
@@ -778,214 +832,249 @@ GLOBAL_IEEE754_ENTRY(atan2)
 }
 
 
-// p9 ==>  y!0    x0
+
+
 { .mfi
            nop.m 999
-(p9)       fclass.m.unc p9,p0 = atan2_X, 0x07  // y not 0, test if x is 0
+(p9)       fmerge.s     atan2_P         = atan2_sgnY, atan2_pi
            nop.i 999
-}
-// p10 ==> X +INF, Y +-INF
-{ .mfb
-           nop.m 999
-(p10)      fma.d.s0       f8 = atan2_sgnY, atan2_pi_by_4, f0 // x=+inf, y=inf
-(p10)      br.ret.spnt b0          // Exit for x=+inf, y=inf, result is +-pi/4
 ;;
 }
 
-.pred.rel "mutex",p11,p14
+
 { .mfi
            nop.m 999
-(p14)      fmerge.s    f8 = atan2_sgnY, f0 // x=+inf, y !inf, result +-0
+           fma.s1       atan2_alpha_sq  = atan2_alpha, atan2_alpha, f0  
            nop.i 999
 }
-// p11 ==> X -INF, Y +-INF
-{ .mfb
+{ .mfi
            nop.m 999
-(p11)      fma.d.s0       f8 = atan2_sgnY, atan2_3pi_by_4, f0 // x=-inf, y=inf
-(p11)      br.ret.spnt b0          // Exit for x=-inf, y=inf, result is +-3pi/4
+           fma.s1       atan2_Cp        = atan2_alpha, atan2_alpha_1, f1  
+           nop.i 999
 ;;
 }
 
+
 { .mfi
            nop.m 999
-(p13)      fcmp.gt.unc.s1 p10,p11 = atan2_X,f0 // x not 0, y=0, test if x>0
+           fma.s1       atan2_V1        = atan2_V9, atan2_V5, atan2_V2
            nop.i 999
 }
-{ .mfb
+{ .mfi
            nop.m 999
-           fma.s1       atan2_d         = atan2_alpha_cub, atan2_C, atan2_C
-(p14)      br.ret.spnt b0         // Exit if x=+inf, y !inf, result +-0
+           fma.s1       atan2_W12       = atan2_V9, atan2_W12, f0
+           nop.i 999
 ;;
 }
 
+
+// p13 ==> x  inf     y !inf
 { .mfi
            nop.m 999
-           fma.s1       atan2_W12       = atan2_V9, atan2_W12, f0
+           fma.s1       atan2_W1        = atan2_V9, atan2_W5, atan2_W2
            nop.i 999
 }
-{ .mfb
+{ .mfi
            nop.m 999
-(p9)       fma.d.s0       f8 = atan2_sgnY, atan2_pi_by_2, f0 // x=0, y not 0
-(p9)       br.ret.spnt b0      // Exit if x=0 and y not 0, result is +-pi/2
+(p13)      fcmp.eq.unc.s1 p10,p11       = atan2_sgnX,f1
+           nop.i 999
 ;;
 }
 
+
 { .mfi
            nop.m 999
-           fma.s1       atan2_V1        = atan2_V9, atan2_V5, atan2_V2
+           fma.s1       atan2_alpha_cub = atan2_alpha, atan2_alpha_sq, f0
            nop.i 999
 }
-{ .mfb
+{ .mfi
            nop.m 999
-           fma.s1       atan2_W1        = atan2_V9, atan2_W5, atan2_W2
-(p12)      br.spnt ATAN2_ERROR            // Branch if x=0 and y=0
+           fma.s1       atan2_C         = atan2_gVF, atan2_Cp, f0
+           nop.i 999
 ;;
 }
 
-{ .mfi
+.pred.rel "mutex",p10,p11 
+// x inf y !inf
+{ .mfb
            nop.m 999
-(p10)      fmerge.s     f8              = atan2_sgnY, f0  // +-0 if x>0, y=0
-           nop.i 999
+(p10)      fmerge.s     f8              = atan2_sgnY, f0
+(p10)      br.ret.spnt b0
 }
 { .mfb
            nop.m 999
-(p11)      fma.d.s0        f8 = atan2_sgnY, atan2_pi, f0 // +-pi if x<0, y=0
-(p13)      br.ret.spnt b0      // Exit if x!0 and y=0
+(p11)      fma.d        f8              = atan2_sgnY, atan2_pi, f0
+(p11)      br.ret.spnt b0
 ;;
 }
 
 
+
+// p10 ==> y   0     x?
+// p11 ==> y  !0     x?
 { .mfi
            nop.m 999
-           fma.s1       atan2_pd        = atan2_P0, atan2_d, f0
+           fclass.m.unc p10,p11 = f8, 0x07
            nop.i 999
+;;
 }
+
 { .mfi
            nop.m 999
-           fma.s1       atan2_dsq       = atan2_d, atan2_d, f0
+(p8)       fmerge.s     atan2_sml_norm  = atan2_sgnY, atan2_sml_norm
            nop.i 999
 ;;
 }
 
-
 { .mfi
            nop.m 999
-           fmerge.se    atan2_near_one = f1, atan2_sig_near_one // Const ~1.0
+           fma.s1       atan2_Pp        = atan2_W12, atan2_W1, atan2_V1
            nop.i 999
 }
 { .mfi
            nop.m 999
-           fma.s1       atan2_Pp        = atan2_W12, atan2_W1, atan2_V1
+           fma.s1       atan2_d         = atan2_alpha_cub, atan2_C, atan2_C
            nop.i 999
 ;;
 }
 
-// p8 true if no swap and X positive
-// p9 true if no swap and X negative
-// both are false is swap is true
+// p12 ==>  y0     x0
+// p13 ==>  y0     x!0
+// p14 ==>  y!0    x0
+// p15 ==>  y!0    x!0
 { .mfi
            nop.m 999
-(p7)       fcmp.ge.unc.s1 p8,p9    = atan2_X,f0
+(p10)      fclass.m.unc p12,p13 = f9, 0x07 
            nop.i 999
 }
+{ .mfi
+           nop.m 999
+(p11)      fclass.m.unc p14,p15 = f9, 0x07 
+           nop.i 999
+;;
+}
+
+
+
+
 { .mfb
            nop.m 999
-(p15)      fma.d.s0        f8              = atan2_sgnY, atan2_pi, f0
-(p15)      br.ret.spnt b0         // Exit if x=-inf, y !inf, result +-pi
+(p13)      fcmp.eq.unc.s1 p10,p11       = atan2_sgnX,f1
+(p12)      br.spnt ATAN2_ERROR
 ;;
 }
 
+
+
 { .mfi
            nop.m 999
-           fma.s1       atan2_sgn_pi_by_2 = atan2_pi_by_2, atan2_sgnY, f0
+           fma.s1       atan2_pd        = atan2_P0, atan2_d, f0
            nop.i 999
 }
 { .mfi
            nop.m 999
-           fma.s1       atan2_A_lo      = atan2_pd, atan2_dsq, atan2_d
+           fma.s1       atan2_dsq       = atan2_d, atan2_d, f0
            nop.i 999
 ;;
 }
 
-
 { .mfi
            nop.m 999
-           fma.s1       atan2_sgn_pi = atan2_pi, atan2_sgnY, f0
+           fma.s1       atan2_A_hi      = atan2_zcub, atan2_Pp, atan2_z
            nop.i 999
 }
-{ .mfi
+{ .mfb
            nop.m 999
-           fma.s1       atan2_A_hi      = atan2_zcub, atan2_Pp, atan2_z
-           nop.i 999
+(p14)      fma.d       f8 = atan2_sgnY, atan2_Pi_by_2, f0
+(p14)      br.ret.spnt b0                
 ;;
 }
 
 
-// For |Y| <= |X| and X > 0, force inexact in case A_lo is zero
-{ .mfi
+
+{ .mfb
            nop.m 999
-(p8)       fmpy.s0      atan2_tmp       = atan2_P22, atan2_P22
-           nop.i 999
+(p10)      fmerge.s     f8              = atan2_sgnY, f0
+(p10)      br.ret.spnt b0
+}
+{ .mfb
+           nop.m 999
+(p11)      fma.d        f8              = atan2_sgnY, atan2_pi, f0
+(p11)      br.ret.spnt b0
 ;;
 }
 
+
+
 { .mfi
            nop.m 999
-           fma.s1       atan2_A         = atan2_A_hi, f1, atan2_A_lo
+           fma.s1       atan2_A_lo      = atan2_pd, atan2_dsq, atan2_d
            nop.i 999
+;;
 }
-// For |Y| <= |X| and X > 0, result is A_hi + A_lo
+
+
 { .mfi
            nop.m 999
-(p8)       fma.d.s0       f8         = atan2_A_hi, f1, atan2_A_lo
+           fma.s1       atan2_A         = atan2_A_hi, f1, atan2_A_lo
            nop.i 999
 ;;
 }
 
-.pred.rel "mutex",p6,p9
-// We perturb A by multiplying by 1.0+1ulp as we produce the result
-// in order to get symmetrically rounded results in directed rounding modes.
-// If we don't do this, there are a few cases where the trailing 11 bits of
-// the significand of the result, before converting to double, are zero.  These
-// cases do not round symmetrically in round to +infinity or round to -infinity.
-// The perturbation also insures that the inexact flag is set.
-// For |Y| > |X|, result is  +- pi/2 - (A_hi + A_lo)
+// Force inexact and possibly underflow if very small results
 { .mfi
            nop.m 999
-(p6)       fnma.d.s0      f8        = atan2_A, atan2_near_one, atan2_sgn_pi_by_2
+(p8)       fma.d        atan2_FR_tmp    = atan2_sgnXY, atan2_A, atan2_sml_norm
            nop.i 999
 }
-// For |Y| <= |X|, and X < 0, result is  +- pi + (A_hi + A_lo)
 { .mfb
            nop.m 999
-(p9)       fma.d.s0        f8        = atan2_A, atan2_near_one, atan2_sgn_pi
-           br.ret.sptk  b0
+           fma.d        f8              = atan2_sgnXY, atan2_A, atan2_P
+           br.ret.sptk  b0     
 ;;
 }
 
 ATAN2_ERROR:
-// Here if x=0 and y=0
+
 { .mfi
           nop.m 999
-          fclass.m p10,p11       = atan2_X,0x05  // Test if x=+0
+          fcmp.eq.unc.s1 p10,p11       = atan2_sgnX,f1
           nop.i 999
 }
 ;;
 
 { .mfi
-          mov        atan2_GR_tag     = 37
-(p10)     fmerge.s     f10             = atan2_sgnY, f0 // x=+0, y=0
-          nop.i 999
+          mov        atan2_GR_tag     = 37 
+(p10)     fmerge.s     f10             = atan2_sgnY, f0
+          nop.i 999 
 }
 { .mfi
           nop.m 999
-(p11)     fma.d.s0        f10            = atan2_sgnY, atan2_pi, f0 // x=-0, y=0
+(p11)     fma.d        f10            = atan2_sgnY, atan2_pi, f0
           nop.i 999
 ;;
 }
-GLOBAL_IEEE754_END(atan2)
+.endp atan2#
+ASM_SIZE_DIRECTIVE(atan2#)
+
+
+// Stack operations when calling error support.
+//       (1)               (2)                          (3) (call)              (4)
+//   sp   -> +          psp -> +                     psp -> +                   sp -> +
+//           |                 |                            |                         |
+//           |                 | <- GR_Y               R3 ->| <- GR_RESULT            | -> f8
+//           |                 |                            |                         |
+//           | <-GR_Y      Y2->|                       Y2 ->| <- GR_Y                 |
+//           |                 |                            |                         |
+//           |                 | <- GR_X               X1 ->|                         |
+//           |                 |                            |                         |
+//  sp-64 -> +          sp ->  +                     sp ->  +                         +
+//    save ar.pfs          save b0                                               restore gp
+//    save gp                                                                    restore ar.pfs
 
-LOCAL_LIBM_ENTRY(__libm_error_region)
+
+.proc __libm_error_region
+__libm_error_region:
 .prologue
 // (1)
 { .mfi
@@ -1013,19 +1102,19 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
 .body
 // (3)
 { .mib
-        stfd [GR_Parameter_X] = f9            // STORE Parameter 1 on stack
+        stfd [GR_Parameter_X] = f9                   // STORE Parameter 1 on stack
         add   GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
-        nop.b 0
+        nop.b 0                                 
 }
 { .mib
-        stfd [GR_Parameter_Y] = f10           // STORE Parameter 3 on stack
+        stfd [GR_Parameter_Y] = f10                  // STORE Parameter 3 on stack
         add   GR_Parameter_Y = -16,GR_Parameter_Y
-        br.call.sptk b0=__libm_error_support# // Call error handling function
+        br.call.sptk b0=__libm_error_support#        // Call error handling function
 };;
 { .mmi
-        add   GR_Parameter_RESULT = 48,sp
         nop.m 0
-        nop.i 0
+        nop.m 0
+        add   GR_Parameter_RESULT = 48,sp
 };;
 
 // (4)
@@ -1041,7 +1130,8 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
         br.ret.sptk     b0                     // Return
 };;
 
-LOCAL_LIBM_END(__libm_error_region)
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
 
 .type   __libm_error_support#,@function
 .global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_atan2f.S b/sysdeps/ia64/fpu/e_atan2f.S
index c483a7ad34..03a4fed82f 100644
--- a/sysdeps/ia64/fpu/e_atan2f.S
+++ b/sysdeps/ia64/fpu/e_atan2f.S
@@ -1,10 +1,10 @@
 .file "atan2f.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
 //
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// Contributed 6/1/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
+//
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,21 +35,18 @@
 //
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// http://developer.intel.com/opensource.
 
 // History
 //==============================================================
-// 06/01/00 Initial version
-// 08/15/00 Bundle added after call to __libm_error_support to properly
+// 6/01/00  Initial version
+// 8/15/00  Bundle added after call to __libm_error_support to properly
 //          set [the previously overwritten] GR_Parameter_RESULT.
-// 08/17/00 Changed predicate register macro-usage to direct predicate
+// 8/17/00  Changed predicate register macro-usage to direct predicate
 //          names due to an assembler bug.
-// 01/05/01 Fixed flag settings for denormal input.
-// 01/19/01 Added documentation
-// 01/30/01 Improved speed
-// 02/06/02 Corrected .section statement
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 02/06/03 Reordered header: .section, .global, .proc, .align
+// 1/05/01  Fixed flag settings for denormal input.
+// 1/19/01  Added documentation
+// 1/30/01  Improved speed
 
 // Description
 //=========================================
@@ -229,6 +226,7 @@
 // atan2f(+-0/+-0) sets single error tag to 38
 // These are domain errors.
 
+#include "libm_support.h"
 
 //
 // Assembly macros
@@ -326,20 +324,22 @@ atan2f_poly_atan_U     = f88
 //atan2f_Pred_Xneg     = p9 //  x  <   0
 
 
-RODATA
+.data
 
 .align 16
 
-LOCAL_OBJECT_START(atan2f_coef_table1)
+atan2f_coef_table1:
+ASM_TYPE_DIRECTIVE(atan2f_coef_table1,@object)
 data8 0xBFD5555512191621 // p1
 data8 0x3F522E5D33BC9BAA // p10
 data8 0xBFA6E10BA401393F // p7
 data8 0x3FB142A73D7C54E3 // p6
 data8 0xBFC2473C5145EE38 // p3
 data8 0x3FC9997E7AFBFF4E // p2
-LOCAL_OBJECT_END(atan2f_coef_table1)
+ASM_SIZE_DIRECTIVE(atan2f_coef_table1)
 
-LOCAL_OBJECT_START(atan2f_coef_table2)
+atan2f_coef_table2:
+ASM_TYPE_DIRECTIVE(atan2f_coef_table2,@object)
 data8 0xBF7DEAADAA336451 // p9
 data8 0x3F97105B4160F86B // p8
 data8 0xBFB68EED6A8CFA32 // p5
@@ -348,12 +348,29 @@ data8 0x3ff921fb54442d18 // pi/2
 data8 0x400921fb54442d18 // pi
 data8 0x3fe921fb54442d18 // pi/4
 data8 0x4002d97c7f3321d2 // 3pi/4
-LOCAL_OBJECT_END(atan2f_coef_table2)
+ASM_SIZE_DIRECTIVE(atan2f_coef_table2)
+
 
 
+.global atan2f
+#ifdef _LIBC
+.global __atan2f
+.global __ieee754_atan2f
+#endif
+
+.text
+.align 32
+
+atan2f:
+.proc  atan2f
+#ifdef _LIBC
+.proc  __atan2f
+__atan2f:
+.proc  __ieee754_atan2f
+__ieee754_atan2f:
+#endif
 
-.section .text
-GLOBAL_IEEE754_ENTRY(atan2f)
+ 
  
 {     .mfi 
      alloc      r32           = ar.pfs,1,5,4,0
@@ -707,7 +724,7 @@ ATAN2F_XY_INF_NAN_ZERO:
 }
 { .mfb
       nop.m 999
-(p10) fma.s.s0 f8  = f9,f8,f0          // Result quietized y if y is nan
+(p10) fma.s f8  = f9,f8,f0          // Result quietized y if y is nan
 (p10) br.ret.spnt b0                // Exit if y is nan
 }
 ;;
@@ -720,7 +737,7 @@ ATAN2F_XY_INF_NAN_ZERO:
 }
 { .mfb
       nop.m 999
-(p12) fnorm.s.s0 f8 = f9               // Result quietized x if x is nan, y not nan
+(p12) fnorm.s f8 = f9               // Result quietized x if x is nan, y not nan
 (p12) br.ret.spnt b0                // Exit if x is nan, y not nan
 }
 ;;
@@ -740,7 +757,7 @@ ATAN2F_XY_INF_NAN_ZERO:
 }
 { .mfb
       nop.m 999
-(p7)  fma.s.s0 f8 = atan2f_sgn_Y, atan2f_const_piby4,f0 // Result +-pi/4
+(p7)  fma.s f8 = atan2f_sgn_Y, atan2f_const_piby4,f0 // Result +-pi/4
 (p7)  br.ret.spnt b0            // Exit if x +inf and y inf
 }
 ;;
@@ -773,19 +790,19 @@ ATAN2F_XY_INF_NAN_ZERO:
 }
 { .mfb
       nop.m 999
-(p13) fma.s.s0   f8 = atan2f_sgn_Y, atan2f_const_piby2,f0 // Result +-pi/2
+(p13) fma.s   f8 = atan2f_sgn_Y, atan2f_const_piby2,f0 // Result +-pi/2
 (p13) br.ret.spnt b0           // Exit if x not -inf and y inf
 }
 ;;
 
 { .mfi
       nop.m 999
-(p14) fma.s.s0   f8 = atan2f_sgn_Y, atan2f_const_3piby4,f0 // Result +-3pi/4
+(p14) fma.s   f8 = atan2f_sgn_Y, atan2f_const_3piby4,f0 // Result +-3pi/4
       nop.i 999
 }
 { .mfb
       nop.m 999
-(p15) fma.s.s0   f8 = atan2f_sgn_Y, atan2f_const_pi,f0 // Result +-pi
+(p15) fma.s   f8 = atan2f_sgn_Y, atan2f_const_pi,f0 // Result +-pi
 (p11) br.ret.spnt b0           // Exit if x -inf
 }
 ;;
@@ -812,28 +829,31 @@ ATAN2F_XY_INF_NAN_ZERO:
 }
 { .mfb
       nop.m 999
-(p9)  fma.s.s0  f8 = atan2f_sgn_Y, atan2f_const_pi,f0 // x < 0, y 0, result +-pi
+(p9)  fma.s  f8 = atan2f_sgn_Y, atan2f_const_pi,f0 // x < 0, y 0, result +-pi
 (p10) br.cond.spnt   __libm_error_region // Branch if x zero and y zero
 }
 ;;
 
 { .mfb
       nop.m 999
-(p11) fma.s.s0  f8 = atan2f_sgn_Y, atan2f_const_piby2,f0 // x zero, y not zero
+(p11) fma.s  f8 = atan2f_sgn_Y, atan2f_const_piby2,f0 // x zero, y not zero
       br.ret.sptk b0         // Final special case exit
 }
 ;;
 
 
-GLOBAL_IEEE754_END(atan2f)
+.endp atan2f
+ASM_SIZE_DIRECTIVE(atan2f)
+
 
-LOCAL_LIBM_ENTRY(__libm_error_region)
+.proc __libm_error_region
+__libm_error_region:
 .prologue
          mov            GR_Parameter_TAG      = 38
          fclass.m       p10,p11               = f9,0x5	// @zero | @pos
 ;;
 (p10)    fmerge.s       f10                   = f8, f0
-(p11)    fma.s.s0          f10                   = atan2f_sgn_Y, atan2f_const_pi,f0
+(p11)    fma.s          f10                   = atan2f_sgn_Y, atan2f_const_pi,f0
 ;;
 
 { .mfi
@@ -893,7 +913,8 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
 }
 ;;
 
-LOCAL_LIBM_END(__libm_error_region)
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
 
 .type   __libm_error_support#,@function
 .global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_cosh.S b/sysdeps/ia64/fpu/e_cosh.S
index 0c6c5b451e..205653d4bf 100644
--- a/sysdeps/ia64/fpu/e_cosh.S
+++ b/sysdeps/ia64/fpu/e_cosh.S
@@ -1,10 +1,10 @@
 .file "cosh.s"
 
-
-// Copyright (c) 2000 - 2002, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,799 +20,1081 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
 // Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
 //
 // History
 //==============================================================
-// 02/02/00 Initial version
-// 04/04/00 Unwind support added
-// 08/15/00 Bundle added after call to __libm_error_support to properly
+// 2/02/00  Initial version 
+// 4/04/00  Unwind support added
+// 8/15/00  Bundle added after call to __libm_error_support to properly
 //          set [the previously overwritten] GR_Parameter_RESULT.
-// 05/07/01 Reworked to improve speed of all paths
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 11/15/02 Improved speed with new algorithm
-
+//
 // API
 //==============================================================
-// double cosh(double)
+// double = cosh(double)
+// input  floating point f8
+// output floating point f8
+
 
 // Overview of operation
 //==============================================================
-// Case 1:  0 < |x| < 0.25
-//  Evaluate cosh(x) by a 12th order polynomial
-//  Care is take for the order of multiplication; and A2 is not exactly 1/4!,
-//  A3 is not exactly 1/6!, etc.
-//  cosh(x) = 1 + (A1*x^2 + A2*x^4 + A3*x^6 + A4*x^8 + A5*x^10 + A6*x^12)
-//
-// Case 2:  0.25 < |x| < 710.47586
-//  Algorithm is based on the identity cosh(x) = ( exp(x) + exp(-x) ) / 2.
-//  The algorithm for exp is described as below.  There are a number of
-//  economies from evaluating both exp(x) and exp(-x).  Although we
-//  are evaluating both quantities, only where the quantities diverge do we
-//  duplicate the computations.  The basic algorithm for exp(x) is described
-//  below.
-//
-// Take the input x. w is "how many log2/128 in x?"
-//  w = x * 128/log2
-//  n = int(w)
-//  x = n log2/128 + r + delta
+// There are four paths
 
-//  n = 128M + index_1 + 2^4 index_2
-//  x = M log2 + (log2/128) index_1 + (log2/8) index_2 + r + delta
+// 1. |x| < 0.25        COSH_BY_POLY
+// 2. |x| < 32          COSH_BY_TBL
+// 3. |x| < 2^14        COSH_BY_EXP
+// 4. |x_ >= 2^14       COSH_HUGE
 
-//  exp(x) = 2^M  2^(index_1/128)  2^(index_2/8) exp(r) exp(delta)
-//       Construct 2^M
-//       Get 2^(index_1/128) from table_1;
-//       Get 2^(index_2/8)   from table_2;
-//       Calculate exp(r) by 5th order polynomial
-//          r = x - n (log2/128)_high
-//          delta = - n (log2/128)_low
-//       Calculate exp(delta) as 1 + delta
+// For paths 1, and 2 SAFE is always 1.
+// For path  4, Safe is always 0.
+// SAFE = 1 means we cannot overflow.
 
+#include "libm_support.h"
 
-// Special values
+// Assembly macros
 //==============================================================
-// cosh(+0)    = 1.0
-// cosh(-0)    = 1.0
+cosh_FR_X            = f44
+cosh_FR_SGNX         = f40
 
-// cosh(+qnan) = +qnan
-// cosh(-qnan) = -qnan
-// cosh(+snan) = +qnan
-// cosh(-snan) = -qnan
+cosh_FR_Inv_log2by64 = f9
+cosh_FR_log2by64_lo  = f11
+cosh_FR_log2by64_hi  = f10
 
-// cosh(-inf)  = +inf
-// cosh(+inf)  = +inf
+cosh_FR_A1           = f9
+cosh_FR_A2           = f10
+cosh_FR_A3           = f11
 
-// Overflow and Underflow
-//=======================
-// cosh(x) = largest double normal when
-//     x = 710.47586 = 0x408633ce8fb9f87d
-//
-// There is no underflow.
+cosh_FR_Rcub         = f12
+cosh_FR_M_temp       = f13
+cosh_FR_R_temp       = f13
+cosh_FR_Rsq          = f13
+cosh_FR_R            = f14
 
-// Registers used
-//==============================================================
-// Floating Point registers used:
-// f8, input, output
-// f6 -> f15,  f32 -> f61
+cosh_FR_M            = f38
 
-// General registers used:
-// r14 -> r40
+cosh_FR_B1           = f15
+cosh_FR_B2           = f32
+cosh_FR_B3           = f33
 
-// Predicate registers used:
-// p6 -> p15
+cosh_FR_peven_temp1  = f34
+cosh_FR_peven_temp2  = f35
+cosh_FR_peven        = f36
 
-// Assembly macros
-//==============================================================
+cosh_FR_podd_temp1   = f34
+cosh_FR_podd_temp2   = f35
+cosh_FR_podd         = f37
+
+cosh_FR_J_temp       = f9
+cosh_FR_J            = f10
+
+cosh_FR_Mmj          = f39
+
+cosh_FR_N_temp1      = f11
+cosh_FR_N_temp2      = f12
+cosh_FR_N            = f13
+
+cosh_FR_spos         = f14
+cosh_FR_sneg         = f15
+
+cosh_FR_Tjhi         = f32
+cosh_FR_Tjlo         = f33
+cosh_FR_Tmjhi        = f34
+cosh_FR_Tmjlo        = f35
+
+GR_mJ           = r35
+GR_J            = r36
+
+AD_mJ           = r38
+AD_J            = r39
+
+cosh_FR_C_hi         = f9
+cosh_FR_C_hi_temp    = f10
+cosh_FR_C_lo_temp1   = f11 
+cosh_FR_C_lo_temp2   = f12 
+cosh_FR_C_lo_temp3   = f13 
+
+cosh_FR_C_lo         = f38
+cosh_FR_S_hi         = f39
+
+cosh_FR_S_hi_temp1   = f10
+cosh_FR_Y_hi         = f11 
+cosh_FR_Y_lo_temp    = f12 
+cosh_FR_Y_lo         = f13 
+cosh_FR_COSH         = f9
+
+cosh_FR_X2           = f9
+cosh_FR_X4           = f10
+
+cosh_FR_P1           = f14
+cosh_FR_P2           = f15
+cosh_FR_P3           = f32
+cosh_FR_P4           = f33
+cosh_FR_P5           = f34
+cosh_FR_P6           = f35
+
+cosh_FR_TINY_THRESH  = f9
+
+cosh_FR_COSH_temp    = f10
+cosh_FR_SCALE        = f11 
+
+cosh_FR_hi_lo = f10
+
+cosh_FR_poly_podd_temp1    =  f11 
+cosh_FR_poly_podd_temp2    =  f13
+cosh_FR_poly_peven_temp1   =  f11
+cosh_FR_poly_peven_temp2   =  f13
+
+GR_SAVE_PFS                    = r41
+GR_SAVE_B0                     = r42
+GR_SAVE_GP                     = r43
+
+GR_Parameter_X                 = r44
+GR_Parameter_Y                 = r45
+GR_Parameter_RESULT            = r46
 
-rRshf                 = r14
-rN_neg                = r14
-rAD_TB1               = r15
-rAD_TB2               = r16
-rAD_P                 = r17
-rN                    = r18
-rIndex_1              = r19
-rIndex_2_16           = r20
-rM                    = r21
-rBiased_M             = r21
-rSig_inv_ln2          = r22
-rIndex_1_neg          = r22
-rExp_bias             = r23
-rExp_bias_minus_1     = r23
-rExp_mask             = r24
-rTmp                  = r24
-rGt_ln                = r24
-rIndex_2_16_neg       = r24
-rM_neg                = r25
-rBiased_M_neg         = r25
-rRshf_2to56           = r26
-rAD_T1_neg            = r26
-rExp_2tom56           = r28
-rAD_T2_neg            = r28
-rAD_T1                = r29
-rAD_T2                = r30
-rSignexp_x            = r31
-rExp_x                = r31
-
-GR_SAVE_B0            = r33
-GR_SAVE_PFS           = r34
-GR_SAVE_GP            = r35
-GR_SAVE_SP            = r36
-
-GR_Parameter_X        = r37
-GR_Parameter_Y        = r38
-GR_Parameter_RESULT   = r39
-GR_Parameter_TAG      = r40
-
-
-FR_X                  = f10
-FR_Y                  = f1
-FR_RESULT             = f8
-
-fRSHF_2TO56           = f6
-fINV_LN2_2TO63        = f7
-fW_2TO56_RSH          = f9
-f2TOM56               = f11
-fP5                   = f12
-fP4                   = f13
-fP3                   = f14
-fP2                   = f15
-
-fLn2_by_128_hi        = f33
-fLn2_by_128_lo        = f34
-
-fRSHF                 = f35
-fNfloat               = f36
-fNormX                = f37
-fR                    = f38
-fF                    = f39
-
-fRsq                  = f40
-f2M                   = f41
-fS1                   = f42
-fT1                   = f42
-fS2                   = f43
-fT2                   = f43
-fS                    = f43
-fWre_urm_f8           = f44
-fAbsX                 = f44
-
-fMIN_DBL_OFLOW_ARG    = f45
-fMAX_DBL_NORM_ARG     = f46
-fXsq                  = f47
-fX4                   = f48
-fGt_pln               = f49
-fTmp                  = f49
-
-fP54                  = f50
-fP5432                = f50
-fP32                  = f51
-fP                    = f52
-fP54_neg              = f53
-fP5432_neg            = f53
-fP32_neg              = f54
-fP_neg                = f55
-fF_neg                = f56
-
-f2M_neg               = f57
-fS1_neg               = f58
-fT1_neg               = f58
-fS2_neg               = f59
-fT2_neg               = f59
-fS_neg                = f59
-fExp                  = f60
-fExp_neg              = f61
-
-fA6                   = f50
-fA65                  = f50
-fA6543                = f50
-fA654321              = f50
-fA5                   = f51
-fA4                   = f52
-fA43                  = f52
-fA3                   = f53
-fA2                   = f54
-fA21                  = f54
-fA1                   = f55
 
 // Data tables
 //==============================================================
 
-RODATA
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
 .align 16
+double_cosh_arg_reduction:
+ASM_TYPE_DIRECTIVE(double_cosh_arg_reduction,@object)
+   data8 0xB8AA3B295C17F0BC, 0x00004005
+   data8 0xB17217F7D1000000, 0x00003FF8
+   data8 0xCF79ABC9E3B39804, 0x00003FD0
+ASM_SIZE_DIRECTIVE(double_cosh_arg_reduction)
+
+double_cosh_p_table:
+ASM_TYPE_DIRECTIVE(double_cosh_p_table,@object)
+   data8 0x8000000000000000, 0x00003FFE
+   data8 0xAAAAAAAAAAAAAB80, 0x00003FFA
+   data8 0xB60B60B60B4FE884, 0x00003FF5
+   data8 0xD00D00D1021D7370, 0x00003FEF
+   data8 0x93F27740C0C2F1CC, 0x00003FE9
+   data8 0x8FA02AC65BCBD5BC, 0x00003FE2
+ASM_SIZE_DIRECTIVE(double_cosh_p_table)
+
+double_cosh_ab_table:
+ASM_TYPE_DIRECTIVE(double_cosh_ab_table,@object)
+   data8 0xAAAAAAAAAAAAAAAC, 0x00003FFC
+   data8 0x88888888884ECDD5, 0x00003FF8
+   data8 0xD00D0C6DCC26A86B, 0x00003FF2
+   data8 0x8000000000000002, 0x00003FFE
+   data8 0xAAAAAAAAAA402C77, 0x00003FFA
+   data8 0xB60B6CC96BDB144D, 0x00003FF5
+ASM_SIZE_DIRECTIVE(double_cosh_ab_table)
+
+double_cosh_j_table:
+ASM_TYPE_DIRECTIVE(double_cosh_j_table,@object)
+   data8 0xB504F333F9DE6484, 0x00003FFE, 0x1EB2FB13, 0x00000000
+   data8 0xB6FD91E328D17791, 0x00003FFE, 0x1CE2CBE2, 0x00000000
+   data8 0xB8FBAF4762FB9EE9, 0x00003FFE, 0x1DDC3CBC, 0x00000000
+   data8 0xBAFF5AB2133E45FB, 0x00003FFE, 0x1EE9AA34, 0x00000000
+   data8 0xBD08A39F580C36BF, 0x00003FFE, 0x9EAEFDC1, 0x00000000
+   data8 0xBF1799B67A731083, 0x00003FFE, 0x9DBF517B, 0x00000000
+   data8 0xC12C4CCA66709456, 0x00003FFE, 0x1EF88AFB, 0x00000000
+   data8 0xC346CCDA24976407, 0x00003FFE, 0x1E03B216, 0x00000000
+   data8 0xC5672A115506DADD, 0x00003FFE, 0x1E78AB43, 0x00000000
+   data8 0xC78D74C8ABB9B15D, 0x00003FFE, 0x9E7B1747, 0x00000000
+   data8 0xC9B9BD866E2F27A3, 0x00003FFE, 0x9EFE3C0E, 0x00000000
+   data8 0xCBEC14FEF2727C5D, 0x00003FFE, 0x9D36F837, 0x00000000
+   data8 0xCE248C151F8480E4, 0x00003FFE, 0x9DEE53E4, 0x00000000
+   data8 0xD06333DAEF2B2595, 0x00003FFE, 0x9E24AE8E, 0x00000000
+   data8 0xD2A81D91F12AE45A, 0x00003FFE, 0x1D912473, 0x00000000
+   data8 0xD4F35AABCFEDFA1F, 0x00003FFE, 0x1EB243BE, 0x00000000
+   data8 0xD744FCCAD69D6AF4, 0x00003FFE, 0x1E669A2F, 0x00000000
+   data8 0xD99D15C278AFD7B6, 0x00003FFE, 0x9BBC610A, 0x00000000
+   data8 0xDBFBB797DAF23755, 0x00003FFE, 0x1E761035, 0x00000000
+   data8 0xDE60F4825E0E9124, 0x00003FFE, 0x9E0BE175, 0x00000000
+   data8 0xE0CCDEEC2A94E111, 0x00003FFE, 0x1CCB12A1, 0x00000000
+   data8 0xE33F8972BE8A5A51, 0x00003FFE, 0x1D1BFE90, 0x00000000
+   data8 0xE5B906E77C8348A8, 0x00003FFE, 0x1DF2F47A, 0x00000000
+   data8 0xE8396A503C4BDC68, 0x00003FFE, 0x1EF22F22, 0x00000000
+   data8 0xEAC0C6E7DD24392F, 0x00003FFE, 0x9E3F4A29, 0x00000000
+   data8 0xED4F301ED9942B84, 0x00003FFE, 0x1EC01A5B, 0x00000000
+   data8 0xEFE4B99BDCDAF5CB, 0x00003FFE, 0x1E8CAC3A, 0x00000000
+   data8 0xF281773C59FFB13A, 0x00003FFE, 0x9DBB3FAB, 0x00000000
+   data8 0xF5257D152486CC2C, 0x00003FFE, 0x1EF73A19, 0x00000000
+   data8 0xF7D0DF730AD13BB9, 0x00003FFE, 0x9BB795B5, 0x00000000
+   data8 0xFA83B2DB722A033A, 0x00003FFE, 0x1EF84B76, 0x00000000
+   data8 0xFD3E0C0CF486C175, 0x00003FFE, 0x9EF5818B, 0x00000000
+   data8 0x8000000000000000, 0x00003FFF, 0x00000000, 0x00000000
+   data8 0x8164D1F3BC030773, 0x00003FFF, 0x1F77CACA, 0x00000000
+   data8 0x82CD8698AC2BA1D7, 0x00003FFF, 0x1EF8A91D, 0x00000000
+   data8 0x843A28C3ACDE4046, 0x00003FFF, 0x1E57C976, 0x00000000
+   data8 0x85AAC367CC487B15, 0x00003FFF, 0x9EE8DA92, 0x00000000
+   data8 0x871F61969E8D1010, 0x00003FFF, 0x1EE85C9F, 0x00000000
+   data8 0x88980E8092DA8527, 0x00003FFF, 0x1F3BF1AF, 0x00000000
+   data8 0x8A14D575496EFD9A, 0x00003FFF, 0x1D80CA1E, 0x00000000
+   data8 0x8B95C1E3EA8BD6E7, 0x00003FFF, 0x9D0373AF, 0x00000000
+   data8 0x8D1ADF5B7E5BA9E6, 0x00003FFF, 0x9F167097, 0x00000000
+   data8 0x8EA4398B45CD53C0, 0x00003FFF, 0x1EB70051, 0x00000000
+   data8 0x9031DC431466B1DC, 0x00003FFF, 0x1F6EB029, 0x00000000
+   data8 0x91C3D373AB11C336, 0x00003FFF, 0x1DFD6D8E, 0x00000000
+   data8 0x935A2B2F13E6E92C, 0x00003FFF, 0x9EB319B0, 0x00000000
+   data8 0x94F4EFA8FEF70961, 0x00003FFF, 0x1EBA2BEB, 0x00000000
+   data8 0x96942D3720185A00, 0x00003FFF, 0x1F11D537, 0x00000000
+   data8 0x9837F0518DB8A96F, 0x00003FFF, 0x1F0D5A46, 0x00000000
+   data8 0x99E0459320B7FA65, 0x00003FFF, 0x9E5E7BCA, 0x00000000
+   data8 0x9B8D39B9D54E5539, 0x00003FFF, 0x9F3AAFD1, 0x00000000
+   data8 0x9D3ED9A72CFFB751, 0x00003FFF, 0x9E86DACC, 0x00000000
+   data8 0x9EF5326091A111AE, 0x00003FFF, 0x9F3EDDC2, 0x00000000
+   data8 0xA0B0510FB9714FC2, 0x00003FFF, 0x1E496E3D, 0x00000000
+   data8 0xA27043030C496819, 0x00003FFF, 0x9F490BF6, 0x00000000
+   data8 0xA43515AE09E6809E, 0x00003FFF, 0x1DD1DB48, 0x00000000
+   data8 0xA5FED6A9B15138EA, 0x00003FFF, 0x1E65EBFB, 0x00000000
+   data8 0xA7CD93B4E965356A, 0x00003FFF, 0x9F427496, 0x00000000
+   data8 0xA9A15AB4EA7C0EF8, 0x00003FFF, 0x1F283C4A, 0x00000000
+   data8 0xAB7A39B5A93ED337, 0x00003FFF, 0x1F4B0047, 0x00000000
+   data8 0xAD583EEA42A14AC6, 0x00003FFF, 0x1F130152, 0x00000000
+   data8 0xAF3B78AD690A4375, 0x00003FFF, 0x9E8367C0, 0x00000000
+   data8 0xB123F581D2AC2590, 0x00003FFF, 0x9F705F90, 0x00000000
+   data8 0xB311C412A9112489, 0x00003FFF, 0x1EFB3C53, 0x00000000
+   data8 0xB504F333F9DE6484, 0x00003FFF, 0x1F32FB13, 0x00000000
+ASM_SIZE_DIRECTIVE(double_cosh_j_table)
+
+.align 32
+.global cosh#
 
-// ************* DO NOT CHANGE ORDER OF THESE TABLES ********************
+.section .text
+.proc  cosh#
+.align 32
 
-// double-extended 1/ln(2)
-// 3fff b8aa 3b29 5c17 f0bb be87fed0691d3e88
-// 3fff b8aa 3b29 5c17 f0bc
-// For speed the significand will be loaded directly with a movl and setf.sig
-//   and the exponent will be bias+63 instead of bias+0.  Thus subsequent
-//   computations need to scale appropriately.
-// The constant 128/ln(2) is needed for the computation of w.  This is also
-//   obtained by scaling the computations.
-//
-// Two shifting constants are loaded directly with movl and setf.d.
-//   1. fRSHF_2TO56 = 1.1000..00 * 2^(63-7)
-//        This constant is added to x*1/ln2 to shift the integer part of
-//        x*128/ln2 into the rightmost bits of the significand.
-//        The result of this fma is fW_2TO56_RSH.
-//   2. fRSHF       = 1.1000..00 * 2^(63)
-//        This constant is subtracted from fW_2TO56_RSH * 2^(-56) to give
-//        the integer part of w, n, as a floating-point number.
-//        The result of this fms is fNfloat.
-
-
-LOCAL_OBJECT_START(exp_table_1)
-data8 0x408633ce8fb9f87e // smallest dbl overflow arg
-data8 0x408633ce8fb9f87d // largest dbl arg to give normal dbl result
-data8 0xb17217f7d1cf79ab , 0x00003ff7 // ln2/128 hi
-data8 0xc9e3b39803f2f6af , 0x00003fb7 // ln2/128 lo
-//
-// Table 1 is 2^(index_1/128) where
-// index_1 goes from 0 to 15
-//
-data8 0x8000000000000000 , 0x00003FFF
-data8 0x80B1ED4FD999AB6C , 0x00003FFF
-data8 0x8164D1F3BC030773 , 0x00003FFF
-data8 0x8218AF4373FC25EC , 0x00003FFF
-data8 0x82CD8698AC2BA1D7 , 0x00003FFF
-data8 0x8383594EEFB6EE37 , 0x00003FFF
-data8 0x843A28C3ACDE4046 , 0x00003FFF
-data8 0x84F1F656379C1A29 , 0x00003FFF
-data8 0x85AAC367CC487B15 , 0x00003FFF
-data8 0x8664915B923FBA04 , 0x00003FFF
-data8 0x871F61969E8D1010 , 0x00003FFF
-data8 0x87DB357FF698D792 , 0x00003FFF
-data8 0x88980E8092DA8527 , 0x00003FFF
-data8 0x8955EE03618E5FDD , 0x00003FFF
-data8 0x8A14D575496EFD9A , 0x00003FFF
-data8 0x8AD4C6452C728924 , 0x00003FFF
-LOCAL_OBJECT_END(exp_table_1)
-
-// Table 2 is 2^(index_1/8) where
-// index_2 goes from 0 to 7
-LOCAL_OBJECT_START(exp_table_2)
-data8 0x8000000000000000 , 0x00003FFF
-data8 0x8B95C1E3EA8BD6E7 , 0x00003FFF
-data8 0x9837F0518DB8A96F , 0x00003FFF
-data8 0xA5FED6A9B15138EA , 0x00003FFF
-data8 0xB504F333F9DE6484 , 0x00003FFF
-data8 0xC5672A115506DADD , 0x00003FFF
-data8 0xD744FCCAD69D6AF4 , 0x00003FFF
-data8 0xEAC0C6E7DD24392F , 0x00003FFF
-LOCAL_OBJECT_END(exp_table_2)
-
-LOCAL_OBJECT_START(exp_p_table)
-data8 0x3f8111116da21757 //P5
-data8 0x3fa55555d787761c //P4
-data8 0x3fc5555555555414 //P3
-data8 0x3fdffffffffffd6a //P2
-LOCAL_OBJECT_END(exp_p_table)
-
-LOCAL_OBJECT_START(cosh_p_table)
-data8 0x8FA02AC65BCBD5BC, 0x00003FE2  // A6
-data8 0xD00D00D1021D7370, 0x00003FEF  // A4
-data8 0xAAAAAAAAAAAAAB80, 0x00003FFA  // A2
-data8 0x93F27740C0C2F1CC, 0x00003FE9  // A5
-data8 0xB60B60B60B4FE884, 0x00003FF5  // A3
-data8 0x8000000000000000, 0x00003FFE  // A1
-LOCAL_OBJECT_END(cosh_p_table)
+cosh: 
 
+#ifdef _LIBC
+.global __ieee754_cosh#
+.proc __ieee754_cosh#
+__ieee754_cosh:
+#endif
 
-.section .text
-GLOBAL_IEEE754_ENTRY(cosh)
+// X NAN?
 
-{ .mlx
-      getf.exp        rSignexp_x = f8  // Must recompute if x unorm
-      movl            rSig_inv_ln2 = 0xb8aa3b295c17f0bc  // significand of 1/ln2
-}
-{ .mlx
-      addl            rAD_TB1    = @ltoff(exp_table_1), gp
-      movl            rRshf_2to56 = 0x4768000000000000   // 1.10000 2^(63+56)
+{ .mfi
+      alloc r32 = ar.pfs,0,12,4,0                  
+(p0)  fclass.m.unc  p6,p7 = f8, 0xc3	//@snan | @qnan 
+      nop.i 999
 }
 ;;
 
-{ .mfi
-      ld8             rAD_TB1    = [rAD_TB1]
-      fclass.m        p6,p0 = f8,0x0b  // Test for x=unorm
-      mov             rExp_mask = 0x1ffff
-}
-{ .mfi
-      mov             rExp_bias = 0xffff
-      fnorm.s1        fNormX   = f8
-      mov             rExp_2tom56 = 0xffff-56
+
+{ .mfb
+      nop.m 999
+(p6)  fma.d.s0   f8 = f8,f1,f8                  
+(p6)  br.ret.spnt     b0 ;;                          
 }
-;;
 
-// Form two constants we need
-//  1/ln2 * 2^63  to compute  w = x * 1/ln2 * 128
-//  1.1000..000 * 2^(63+63-7) to right shift int(w) into the significand
 
+// X infinity 
 { .mfi
-      setf.sig        fINV_LN2_2TO63 = rSig_inv_ln2 // form 1/ln2 * 2^63
-      fclass.m        p8,p0 = f8,0x07  // Test for x=0
-      nop.i 999
+      nop.m 999
+(p0)  fclass.m.unc  p6,p0 = f8, 0x23	//@inf 
+      nop.i 999 ;;
 }
+
+{ .mfb
+      nop.m 999
+(p6)     fmerge.s      f8 = f0,f8                  
+(p6)  br.ret.spnt     b0 ;;                          
+}
+
+
+
+// Put 0.25 in f9; p6 true if x < 0.25
 { .mlx
-      setf.d          fRSHF_2TO56 = rRshf_2to56 // Form const 1.100 * 2^(63+56)
-      movl            rRshf = 0x43e8000000000000 // 1.10000 2^63 for right shift
+         nop.m 999
+(p0)     movl            r32 = 0x000000000000fffd ;;         
 }
-;;
 
 { .mfi
-      ldfpd           fMIN_DBL_OFLOW_ARG, fMAX_DBL_NORM_ARG = [rAD_TB1],16
-      fclass.m        p10,p0 = f8,0x1e3  // Test for x=inf, nan, NaT
-      nop.i           0
+(p0)  setf.exp        f9 = r32                         
+      nop.f 999
+      nop.i 999 ;;
 }
-{ .mfb
-      setf.exp        f2TOM56 = rExp_2tom56 // form 2^-56 for scaling Nfloat
-      nop.f           0
-(p6)  br.cond.spnt    COSH_UNORM            // Branch if x=unorm
-}
-;;
 
-COSH_COMMON:
 { .mfi
-      ldfe            fLn2_by_128_hi  = [rAD_TB1],16
-      nop.f           0
-      nop.i           0
+      nop.m 999
+(p0)  fmerge.s      cosh_FR_X    = f0,f8                
+      nop.i 999
 }
-{ .mfb
-      setf.d          fRSHF = rRshf // Form right shift const 1.100 * 2^63
-(p8)  fma.d.s0        f8 = f1,f1,f0           // quick exit for x=0
-(p8)  br.ret.spnt     b0
+
+{ .mfi
+      nop.m 999
+(p0)  fmerge.s      cosh_FR_SGNX = f8,f1                
+      nop.i 999 ;;
 }
-;;
 
 { .mfi
-      ldfe            fLn2_by_128_lo  = [rAD_TB1],16
-      nop.f           0
-      nop.i           0
+      nop.m 999
+(p0)  fcmp.lt.unc     p0,p7 = cosh_FR_X,f9                    
+      nop.i 999 ;;
 }
-{ .mfb
-      and             rExp_x = rExp_mask, rSignexp_x // Biased exponent of x
-(p10) fma.d.s0        f8 = f8,f8,f0  // Result if x=inf, nan, NaT
-(p10) br.ret.spnt     b0               // quick exit for x=inf, nan, NaT
+
+{ .mib
+      nop.m 999
+      nop.i 999
+(p7)  br.cond.sptk    L(COSH_BY_TBL) 
 }
 ;;
 
-// After that last load rAD_TB1 points to the beginning of table 1
-{ .mfi
-      nop.m           0
-      fcmp.eq.s0      p6,p0 = f8, f0       // Dummy to set D
-      sub             rExp_x = rExp_x, rExp_bias // True exponent of x
+
+// COSH_BY_POLY: 
+// POLY cannot overflow so there is no need to call __libm_error_support
+// Get the values of P_x from the table
+
+{ .mmi
+      nop.m 999
+(p0)  addl           r34   = @ltoff(double_cosh_p_table), gp
+      nop.i 999
 }
 ;;
 
-{ .mfi
-      nop.m           0
-      fmerge.s        fAbsX = f0, fNormX   // Form |x|
-      nop.i           0
-}
-{ .mfb
-      cmp.gt          p7, p0 = -2, rExp_x      // Test |x| < 2^(-2)
-      fma.s1          fXsq = fNormX, fNormX, f0  // x*x for small path
-(p7)  br.cond.spnt    COSH_SMALL               // Branch if 0 < |x| < 2^-2
+{ .mmi
+      ld8 r34 = [r34]
+      nop.m 999
+      nop.i 999
 }
 ;;
 
-// W = X * Inv_log2_by_128
-// By adding 1.10...0*2^63 we shift and get round_int(W) in significand.
-// We actually add 1.10...0*2^56 to X * Inv_log2 to do the same thing.
 
-{ .mfi
-      add             rAD_P = 0x180, rAD_TB1
-      fma.s1          fW_2TO56_RSH  = fNormX, fINV_LN2_2TO63, fRSHF_2TO56
-      add             rAD_TB2 = 0x100, rAD_TB1
+// Calculate cosh_FR_X2 = ax*ax and cosh_FR_X4 = ax*ax*ax*ax
+{ .mmf
+         nop.m 999
+(p0)     ldfe       cosh_FR_P1 = [r34],16                 
+(p0)     fma.s1     cosh_FR_X2 = cosh_FR_X, cosh_FR_X, f0 ;;           
 }
-;;
 
-// Divide arguments into the following categories:
-//  Certain Safe                - 0.25 <= |x| <= MAX_DBL_NORM_ARG
-//  Possible Overflow       p14 - MAX_DBL_NORM_ARG < |x| < MIN_DBL_OFLOW_ARG
-//  Certain Overflow        p15 - MIN_DBL_OFLOW_ARG <= |x| < +inf
-//
-// If the input is really a double arg, then there will never be
-// "Possible Overflow" arguments.
-//
+{ .mmi
+(p0)     ldfe       cosh_FR_P2 = [r34],16 ;;                 
+(p0)     ldfe       cosh_FR_P3 = [r34],16                 
+         nop.i 999 ;;
+}
+
+{ .mmi
+(p0)     ldfe       cosh_FR_P4 = [r34],16 ;;                 
+(p0)     ldfe       cosh_FR_P5 = [r34],16                 
+         nop.i 999 ;;
+}
 
 { .mfi
-      ldfpd           fP5, fP4  = [rAD_P] ,16
-      fcmp.ge.s1      p15,p14 = fAbsX,fMIN_DBL_OFLOW_ARG
-      nop.i           0
+(p0)     ldfe       cosh_FR_P6 = [r34],16                 
+(p0)     fma.s1     cosh_FR_X4 = cosh_FR_X2, cosh_FR_X2, f0         
+         nop.i 999 ;;
 }
-;;
 
-// Nfloat = round_int(W)
-// The signficand of fW_2TO56_RSH contains the rounded integer part of W,
-// as a twos complement number in the lower bits (that is, it may be negative).
-// That twos complement number (called N) is put into rN.
+// Calculate cosh_FR_podd = x4 *(x4 * P_5 + P_3) + P_1
+{ .mfi
+         nop.m 999
+(p0)     fma.s1     cosh_FR_poly_podd_temp1 = cosh_FR_X4, cosh_FR_P5, cosh_FR_P3                
+         nop.i 999 ;;
+}
 
-// Since fW_2TO56_RSH is scaled by 2^56, it must be multiplied by 2^-56
-// before the shift constant 1.10000 * 2^63 is subtracted to yield fNfloat.
-// Thus, fNfloat contains the floating point version of N
+{ .mfi
+         nop.m 999
+(p0)     fma.s1     cosh_FR_podd            = cosh_FR_X4, cosh_FR_poly_podd_temp1, cosh_FR_P1   
+         nop.i 999
+}
 
+// Calculate cosh_FR_peven =  p_even = x4 *(x4 * (x4 * P_6 + P_4) + P_2)
 { .mfi
-      ldfpd           fP3, fP2  = [rAD_P]
-(p14) fcmp.gt.unc.s1  p14,p0 = fAbsX,fMAX_DBL_NORM_ARG
-      nop.i           0
+         nop.m 999
+(p0)     fma.s1     cosh_FR_poly_peven_temp1 = cosh_FR_X4, cosh_FR_P6, cosh_FR_P4               
+         nop.i 999 ;;
 }
-{ .mfb
-      nop.m           0
-      fms.s1          fNfloat = fW_2TO56_RSH, f2TOM56, fRSHF
-(p15) br.cond.spnt    COSH_CERTAIN_OVERFLOW
+
+{ .mfi
+         nop.m 999
+(p0)     fma.s1     cosh_FR_poly_peven_temp2 = cosh_FR_X4, cosh_FR_poly_peven_temp1, cosh_FR_P2 
+         nop.i 999 ;;
 }
-;;
 
 { .mfi
-      getf.sig        rN        = fW_2TO56_RSH
-      nop.f           0
-      mov             rExp_bias_minus_1 = 0xfffe
+         nop.m 999
+(p0)     fma.s1     cosh_FR_peven       = cosh_FR_X4, cosh_FR_poly_peven_temp2, f0         
+         nop.i 999 ;;
 }
-;;
 
-// rIndex_1 has index_1
-// rIndex_2_16 has index_2 * 16
-// rBiased_M has M
+// Y_lo = x2*p_odd + p_even
+// Calculate f8 = Y_hi + Y_lo 
+{ .mfi
+         nop.m 999
+(p0)     fma.s1     cosh_FR_Y_lo         = cosh_FR_X2, cosh_FR_podd,  cosh_FR_peven    
+         nop.i 999 ;;
+}
+
+{ .mfb
+         nop.m 999
+(p0)     fma.d.s0   f8                   = f1, f1, cosh_FR_Y_lo                        
+(p0)  br.ret.sptk     b0 ;;                          
+}
+
+
+L(COSH_BY_TBL): 
+
+// Now that we are at TBL; so far all we know is that |x| >= 0.25.
+// The first two steps are the same for TBL and EXP, but if we are HUGE
+// Double
+// Go to HUGE if |x| >= 2^10, 10009 (register-biased) is e = 10 (true)
+// Single
+// Go to HUGE if |x| >= 2^7,  10006 (register-biased) is e =  7 (true)
+// we want to leave now. Go to HUGE if |x| >= 2^14
+// 1000d (register-biased) is e = 14 (true)
+
+{ .mlx
+      nop.m 999
+(p0)     movl            r32 = 0x0000000000010009 ;;              
+}
 
-// rM has true M
-// r = x - Nfloat * ln2_by_128_hi
-// f = 1 - Nfloat * ln2_by_128_lo
 { .mfi
-      and             rIndex_1 = 0x0f, rN
-      fnma.s1         fR   = fNfloat, fLn2_by_128_hi, fNormX
-      shr             rM = rN,  0x7
+(p0)     setf.exp        f9 = r32                              
+      nop.f 999
+      nop.i 999 ;;
 }
+
 { .mfi
-      and             rIndex_2_16 = 0x70, rN
-      fnma.s1         fF   = fNfloat, fLn2_by_128_lo, f1
-      sub             rN_neg = r0, rN
+      nop.m 999
+(p0)     fcmp.ge.unc     p6,p7 = cosh_FR_X,f9                  
+      nop.i 999 ;;
 }
-;;
 
-{ .mmi
-      and             rIndex_1_neg = 0x0f, rN_neg
-      add             rBiased_M = rExp_bias_minus_1, rM
-      shr             rM_neg = rN_neg,  0x7
+{ .mib
+      nop.m 999
+      nop.i 999
+(p6)     br.cond.spnt    L(COSH_HUGE) ;;                             
 }
+
+// r32 = 1
+// r34 = N-1 
+// r35 = N
+// r36 = j
+// r37 = N+1
+
+// TBL can never overflow
+// cosh(x) = cosh(B+R)
+//         = cosh(B) cosh(R) + sinh(B) sinh(R) 
+// cosh(R) can be approximated by 1 + p_even
+// sinh(R) can be approximated by p_odd
+
+// ******************************************************
+// STEP 1 (TBL and EXP)
+// ******************************************************
+// Get the following constants.
+// f9  = Inv_log2by64
+// f10 = log2by64_hi
+// f11 = log2by64_lo
+
 { .mmi
-      and             rIndex_2_16_neg = 0x70, rN_neg
-      add             rAD_T2 = rAD_TB2, rIndex_2_16
-      shladd          rAD_T1 = rIndex_1, 4, rAD_TB1
+(p0)     adds                 r32 = 0x1,r0      
+(p0)     addl           r34   = @ltoff(double_cosh_arg_reduction), gp
+         nop.i 999
 }
 ;;
 
-// rAD_T1 has address of T1
-// rAD_T2 has address if T2
+// We want 2^(N-1) and 2^(-N-1). So bias N-1 and -N-1 and
+// put them in an exponent.
+// cosh_FR_spos = 2^(N-1) and cosh_FR_sneg = 2^(-N-1)
+// r39 = 0xffff + (N-1)  = 0xffff +N -1
+// r40 = 0xffff - (N +1) = 0xffff -N -1
+
+{ .mlx
+         ld8 r34 = [r34]
+(p0)     movl                r38 = 0x000000000000fffe ;; 
+}
 
 { .mmi
-      setf.exp        f2M = rBiased_M
-      ldfe            fT2  = [rAD_T2]
-      nop.i           0
+(p0)     ldfe            cosh_FR_Inv_log2by64 = [r34],16 ;;            
+(p0)     ldfe            cosh_FR_log2by64_hi  = [r34],16            
+         nop.i 999 ;;
 }
+
+{ .mbb
+(p0)     ldfe            cosh_FR_log2by64_lo  = [r34],16            
+         nop.b 999
+         nop.b 999 ;;
+}
+
+// Get the A coefficients
+// f9  = A_1
+// f10 = A_2
+// f11 = A_3
+
 { .mmi
-      add             rBiased_M_neg = rExp_bias_minus_1, rM_neg
-      add             rAD_T2_neg = rAD_TB2, rIndex_2_16_neg
-      shladd          rAD_T1_neg = rIndex_1_neg, 4, rAD_TB1
+      nop.m 999
+(p0)  addl           r34   = @ltoff(double_cosh_ab_table), gp
+      nop.i 999
 }
 ;;
 
-// Create Scale = 2^M
-// Load T1 and T2
 { .mmi
-      ldfe            fT1  = [rAD_T1]
-      nop.m           0
-      nop.i           0
-}
-{ .mmf
-      setf.exp        f2M_neg = rBiased_M_neg
-      ldfe            fT2_neg  = [rAD_T2_neg]
-      fma.s1          fF_neg   = fNfloat, fLn2_by_128_lo, f1
+      ld8 r34 = [r34]
+      nop.m 999
+      nop.i 999
 }
 ;;
 
+
+// Calculate M and keep it as integer and floating point.
+// M = round-to-integer(x*Inv_log2by64)
+// cosh_FR_M = M = truncate(ax/(log2/64))
+// Put the significand of M in r35
+//    and the floating point representation of M in cosh_FR_M
+
 { .mfi
-      nop.m           0
-      fma.s1          fRsq = fR, fR, f0
-      nop.i           0
+      nop.m 999
+(p0)  fma.s1          cosh_FR_M      = cosh_FR_X, cosh_FR_Inv_log2by64, f0 
+      nop.i 999
 }
+
 { .mfi
-      ldfe            fT1_neg  = [rAD_T1_neg]
-      fma.s1          fP54 = fR, fP5, fP4
-      nop.i           0
+(p0)  ldfe            cosh_FR_A1 = [r34],16            
+      nop.f 999
+      nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m           0
-      fma.s1          fP32 = fR, fP3, fP2
-      nop.i           0
+      nop.m 999
+(p0)  fcvt.fx.s1      cosh_FR_M_temp = cosh_FR_M                      
+      nop.i 999 ;;
 }
+
 { .mfi
-      nop.m           0
-      fnma.s1         fP54_neg = fR, fP5, fP4
-      nop.i           0
+      nop.m 999
+(p0)  fnorm.s1        cosh_FR_M      = cosh_FR_M_temp                 
+      nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m           0
-      fnma.s1         fP32_neg = fR, fP3, fP2
-      nop.i           0
+(p0)  getf.sig        r35       = cosh_FR_M_temp                 
+      nop.f 999
+      nop.i 999 ;;
 }
-;;
+
+// M is still in r35. Calculate j. j is the signed extension of the six lsb of M. It
+// has a range of -32 thru 31.
+// r35 = M
+// r36 = j 
+{ .mii
+      nop.m 999
+      nop.i 999 ;;
+(p0)  and            r36 = 0x3f, r35 ;;   
+}
+
+// Calculate R
+// f13 = f44 - f12*f10 = x - M*log2by64_hi
+// f14 = f13 - f8*f11 = R = (x - M*log2by64_hi) - M*log2by64_lo
 
 { .mfi
-      nop.m           0
-      fma.s1          fP5432  = fRsq, fP54, fP32
-      nop.i           0
+      nop.m 999
+(p0)  fnma.s1        cosh_FR_R_temp = cosh_FR_M, cosh_FR_log2by64_hi, cosh_FR_X      
+      nop.i 999
 }
+
 { .mfi
-      nop.m           0
-      fma.s1          fS2  = fF,fT2,f0
-      nop.i           0
+(p0)  ldfe            cosh_FR_A2 = [r34],16            
+      nop.f 999
+      nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m           0
-      fma.s1          fS1  = f2M,fT1,f0
-      nop.i           0
+      nop.m 999
+(p0)  fnma.s1        cosh_FR_R      = cosh_FR_M, cosh_FR_log2by64_lo, cosh_FR_R_temp 
+      nop.i 999
 }
+
+// Get the B coefficients
+// f15 = B_1
+// f32 = B_2
+// f33 = B_3
+
+{ .mmi
+(p0)     ldfe            cosh_FR_A3 = [r34],16 ;;            
+(p0)     ldfe            cosh_FR_B1 = [r34],16            
+         nop.i 999 ;;
+}
+
+{ .mmi
+(p0)     ldfe            cosh_FR_B2 = [r34],16 ;;            
+(p0)     ldfe            cosh_FR_B3 = [r34],16            
+         nop.i 999 ;;
+}
+
+{ .mii
+         nop.m 999
+(p0)     shl            r34 = r36,  0x2 ;;   
+(p0)     sxt1           r37 = r34 ;;         
+}
+
+// ******************************************************
+// STEP 2 (TBL and EXP)
+// ******************************************************
+// Calculate Rsquared and Rcubed in preparation for p_even and p_odd
+// f12 = R*R*R
+// f13 = R*R
+// f14 = R <== from above
+
 { .mfi
-      nop.m           0
-      fma.s1          fP5432_neg  = fRsq, fP54_neg, fP32_neg
-      nop.i           0
+      nop.m 999
+(p0)     fma.s1          cosh_FR_Rsq  = cosh_FR_R,   cosh_FR_R, f0  
+(p0)     shr            r36 = r37,  0x2 ;;   
+}
+
+// r34 = M-j = r35 - r36
+// r35 = N = (M-j)/64
+
+{ .mii
+(p0)     sub                  r34 = r35, r36    
+         nop.i 999 ;;
+(p0)     shr                  r35 = r34, 0x6 ;;    
+}
+
+{ .mii
+(p0)     sub                 r40 = r38, r35           
+(p0)     adds                 r37 = 0x1, r35    
+(p0)     add                 r39 = r38, r35 ;;           
+}
+
+// Get the address of the J table, add the offset,
+// addresses are sinh_AD_mJ and sinh_AD_J, get the T value
+// f32 = T(j)_hi
+// f33 = T(j)_lo
+// f34 = T(-j)_hi
+// f35 = T(-j)_lo
+
+{ .mmi
+(p0)     sub                  r34 = r35, r32    
+(p0)     addl    r37   = @ltoff(double_cosh_j_table), gp
+         nop.i 999
 }
 ;;
 
 { .mfi
-      nop.m           0
-      fma.s1          fS1_neg  = f2M_neg,fT1_neg,f0
-      nop.i           0
+      ld8 r37 = [r37]
+(p0)  fma.s1          cosh_FR_Rcub = cosh_FR_Rsq, cosh_FR_R, f0  
+      nop.i 999
 }
+
+// ******************************************************
+// STEP 3 Now decide if we need to branch to EXP
+// ******************************************************
+// Put 32 in f9; p6 true if x < 32
+
+{ .mlx
+         nop.m 999
+(p0)     movl                r32 = 0x0000000000010004 ;;               
+}
+
+// Calculate p_even
+// f34 = B_2 + Rsq *B_3
+// f35 = B_1 + Rsq*f34      = B_1 + Rsq * (B_2 + Rsq *B_3)
+// f36 = peven = Rsq * f35 = Rsq * (B_1 + Rsq * (B_2 + Rsq *B_3))
+
 { .mfi
-      nop.m           0
-      fma.s1          fS2_neg  = fF_neg,fT2_neg,f0
-      nop.i           0
+      nop.m 999
+(p0)  fma.s1          cosh_FR_peven_temp1 = cosh_FR_Rsq, cosh_FR_B3,          cosh_FR_B2  
+      nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m           0
-      fma.s1          fP     = fRsq, fP5432, fR
-      nop.i           0
+      nop.m 999
+(p0)  fma.s1          cosh_FR_peven_temp2 = cosh_FR_Rsq, cosh_FR_peven_temp1, cosh_FR_B1  
+      nop.i 999
 }
+
+// Calculate p_odd
+// f34 = A_2 + Rsq *A_3
+// f35 = A_1 + Rsq * (A_2 + Rsq *A_3)
+// f37 = podd = R + Rcub * (A_1 + Rsq * (A_2 + Rsq *A_3))
+
 { .mfi
-      nop.m           0
-      fma.s1          fS   = fS1,fS2,f0
-      nop.i           0
+      nop.m 999
+(p0)  fma.s1          cosh_FR_podd_temp1 = cosh_FR_Rsq,        cosh_FR_A3,         cosh_FR_A2  
+      nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m           0
-      fms.s1          fP_neg     = fRsq, fP5432_neg, fR
-      nop.i           0
+(p0)  setf.exp            cosh_FR_N_temp1 = r39            
+      nop.f 999
+      nop.i 999 ;;
 }
+
 { .mfi
-      nop.m           0
-      fma.s1          fS_neg   = fS1_neg,fS2_neg,f0
-      nop.i           0
+      nop.m 999
+(p0)  fma.s1          cosh_FR_peven       = cosh_FR_Rsq, cosh_FR_peven_temp2, f0     
+      nop.i 999
 }
-;;
 
-{ .mfb
-      nop.m           0
-      fmpy.s0         fTmp = fLn2_by_128_lo, fLn2_by_128_lo // Force inexact
-(p14) br.cond.spnt    COSH_POSSIBLE_OVERFLOW
+{ .mfi
+      nop.m 999
+(p0)  fma.s1          cosh_FR_podd_temp2 = cosh_FR_Rsq,        cosh_FR_podd_temp1, cosh_FR_A1  
+      nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m           0
-      fma.s1          fExp = fS, fP, fS
-      nop.i           0
+(p0)  setf.exp            f9  = r32                              
+      nop.f 999
+      nop.i 999 ;;
 }
+
 { .mfi
-      nop.m           0
-      fma.s1          fExp_neg = fS_neg, fP_neg, fS_neg
-      nop.i           0
+      nop.m 999
+(p0)  fma.s1          cosh_FR_podd       = cosh_FR_podd_temp2, cosh_FR_Rcub,       cosh_FR_R   
+      nop.i 999
 }
-;;
 
-{ .mfb
-      nop.m           0
-      fma.d.s0        f8 = fExp, f1, fExp_neg
-      br.ret.sptk     b0                  // Normal path exit
+// sinh_GR_mj contains the table offset for -j
+// sinh_GR_j  contains the table offset for +j
+// p6 is true when j <= 0
+
+{ .mlx
+(p0)     setf.exp            cosh_FR_N_temp2 = r40            
+(p0)     movl                r40 = 0x0000000000000020 ;;    
 }
-;;
 
-// Here if 0 < |x| < 0.25
-COSH_SMALL:
-{ .mmf
-      add             rAD_T1 = 0x1a0, rAD_TB1
-      add             rAD_T2 = 0x1d0, rAD_TB1
+{ .mfi
+(p0)     sub                 GR_mJ = r40,  r36           
+(p0)     fmerge.se           cosh_FR_spos    = cosh_FR_N_temp1, f1 
+(p0)     adds                GR_J  = 0x20, r36 ;;           
 }
-;;
 
-{ .mmf
-      ldfe            fA6 = [rAD_T1],16
-      ldfe            fA5 = [rAD_T2],16
-      nop.f           0
+{ .mii
+         nop.m 999
+(p0)     shl                  GR_mJ = GR_mJ, 5 ;;   
+(p0)     add                  AD_mJ = r37, GR_mJ ;; 
 }
-;;
 
 { .mmi
-      ldfe            fA4 = [rAD_T1],16
-      ldfe            fA3 = [rAD_T2],16
-      nop.i           0
+         nop.m 999
+(p0)     ldfe                 cosh_FR_Tmjhi = [AD_mJ],16                 
+(p0)     shl                  GR_J  = GR_J, 5 ;;    
+}
+
+{ .mfi
+(p0)     ldfs                 cosh_FR_Tmjlo = [AD_mJ],16                 
+(p0)     fcmp.lt.unc.s1      p6,p7 = cosh_FR_X,f9                          
+(p0)     add                  AD_J  = r37, GR_J ;;  
 }
-;;
 
 { .mmi
-      ldfe            fA2 = [rAD_T1],16
-      ldfe            fA1 = [rAD_T2],16
-      nop.i           0
+(p0)     ldfe                 cosh_FR_Tjhi  = [AD_J],16 ;;                  
+(p0)     ldfs                 cosh_FR_Tjlo  = [AD_J],16                  
+         nop.i 999 ;;
+}
+
+{ .mfb
+         nop.m 999
+(p0)     fmerge.se           cosh_FR_sneg    = cosh_FR_N_temp2, f1 
+(p7)     br.cond.spnt        L(COSH_BY_EXP) ;;                            
 }
-;;
+
+// ******************************************************
+// If NOT branch to EXP
+// ******************************************************
+// Calculate C_hi
+// ******************************************************
+// cosh_FR_C_hi_temp = cosh_FR_sneg * cosh_FR_Tmjhi
+// cosh_FR_C_hi = cosh_FR_spos * cosh_FR_Tjhi + (cosh_FR_sneg * cosh_FR_Tmjhi)
 
 { .mfi
-      nop.m           0
-      fma.s1          fX4 = fXsq, fXsq, f0
-      nop.i           0
+      nop.m 999
+(p0)  fma.s1         cosh_FR_C_hi_temp = cosh_FR_sneg, cosh_FR_Tmjhi, f0                   
+      nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m           0
-      fma.s1          fA65 = fXsq, fA6, fA5
-      nop.i           0
+      nop.m 999
+(p0)  fma.s1         cosh_FR_C_hi      = cosh_FR_spos, cosh_FR_Tjhi,  cosh_FR_C_hi_temp    
+      nop.i 999
 }
+
+// ******************************************************
+// Calculate S_hi
+// ******************************************************
+// cosh_FR_S_hi_temp1 = cosh_FR_sneg * cosh_FR_Tmjhi
+// cosh_FR_S_hi = cosh_FR_spos * cosh_FR_Tjhi - cosh_FR_C_hi_temp1
+
 { .mfi
-      nop.m           0
-      fma.s1          fA43 = fXsq, fA4, fA3
-      nop.i           0
+      nop.m 999
+(p0)  fma.s1        cosh_FR_S_hi_temp1 =  cosh_FR_sneg, cosh_FR_Tmjhi, f0                
+      nop.i 999 ;;
 }
-;;
+
+// ******************************************************
+// Calculate C_lo
+// ******************************************************
+// cosh_FR_C_lo_temp1 = cosh_FR_spos * cosh_FR_Tjhi - cosh_FR_C_hi
+// cosh_FR_C_lo_temp2 = cosh_FR_sneg * cosh_FR_Tmjlo + (cosh_FR_spos * cosh_FR_Tjhi - cosh_FR_C_hi)
+// cosh_FR_C_lo_temp1 = cosh_FR_sneg * cosh_FR_Tmjlo
+// cosh_FR_C_lo_temp3 = cosh_FR_spos * cosh_FR_Tjlo + (cosh_FR_sneg * cosh_FR_Tmjlo)
+// cosh_FR_C_lo = cosh_FR_C_lo_temp3 + cosh_FR_C_lo_temp2
 
 { .mfi
-      nop.m           0
-      fma.s1          fA21 = fXsq, fA2, fA1
-      nop.i           0
+      nop.m 999
+(p0)  fms.s1        cosh_FR_C_lo_temp1 = cosh_FR_spos, cosh_FR_Tjhi,  cosh_FR_C_hi        
+      nop.i 999
 }
-;;
 
 { .mfi
-      nop.m           0
-      fma.s1          fA6543 = fX4, fA65, fA43
-      nop.i           0
+      nop.m 999
+(p0)  fms.s1        cosh_FR_S_hi       =  cosh_FR_spos, cosh_FR_Tjhi, cosh_FR_S_hi_temp1 
+      nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m           0
-      fma.s1          fA654321 = fX4, fA6543, fA21
-      nop.i           0
+      nop.m 999
+(p0)  fma.s1        cosh_FR_C_lo_temp2 = cosh_FR_sneg, cosh_FR_Tmjhi, cosh_FR_C_lo_temp1  
+      nop.i 999
+}
+
+{ .mfi
+      nop.m 999
+(p0)  fma.s1        cosh_FR_C_lo_temp1 = cosh_FR_sneg, cosh_FR_Tmjlo, f0                  
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+(p0)  fma.s1        cosh_FR_C_lo_temp3 =  cosh_FR_spos, cosh_FR_Tjlo,  cosh_FR_C_lo_temp1 
+      nop.i 999 ;;
 }
-;;
 
-// Dummy multiply to generate inexact
 { .mfi
-      nop.m           0
-      fmpy.s0         fTmp = fA6, fA6
-      nop.i           0
+      nop.m 999
+(p0)  fma.s1        cosh_FR_C_lo       =  cosh_FR_C_lo_temp3, f1,   cosh_FR_C_lo_temp2    
+      nop.i 999 ;;
 }
+
+// ******************************************************
+// cosh_FR_Y_lo_temp = cosh_FR_C_hi * cosh_FR_peven + cosh_FR_C_lo
+// cosh_FR_Y_lo = cosh_FR_S_hi * cosh_FR_podd + cosh_FR_Y_lo_temp
+// cosh_FR_COSH = Y_hi + Y_lo
+
+{ .mfi
+      nop.m 999
+(p0)  fma.s1         cosh_FR_Y_lo_temp =  cosh_FR_C_hi, cosh_FR_peven, cosh_FR_C_lo       
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+(p0)  fma.s1         cosh_FR_Y_lo      =  cosh_FR_S_hi, cosh_FR_podd, cosh_FR_Y_lo_temp   
+      nop.i 999 ;;
+}
+
 { .mfb
-      nop.m           0
-      fma.d.s0        f8 = fA654321, fXsq, f1
-      br.ret.sptk     b0                // Exit if 0 < |x| < 0.25
+      nop.m 999
+(p0)  fma.d.s0       f8 =  cosh_FR_C_hi, f1, cosh_FR_Y_lo                       
+(p0)  br.ret.sptk     b0 ;;                          
 }
-;;
 
+L(COSH_BY_EXP): 
 
-COSH_POSSIBLE_OVERFLOW:
+// When p7 is true,  we know that an overflow is not going to happen
+// When p7 is false, we must check for possible overflow
+// p7 is the over_SAFE flag
+// f44 = Scale * (Y_hi + Y_lo)
+//     =  cosh_FR_spos * (cosh_FR_Tjhi + cosh_FR_Y_lo)
 
-// Here if fMAX_DBL_NORM_ARG < |x| < fMIN_DBL_OFLOW_ARG
-// This cannot happen if input is a double, only if input higher precision.
-// Overflow is a possibility, not a certainty.
+{ .mfi
+      nop.m 999
+(p0)  fma.s1         cosh_FR_Y_lo_temp =  cosh_FR_peven, f1,       cosh_FR_podd           
+      nop.i 999
+}
+
+// Now we are in EXP. This is the only path where an overflow is possible
+// but not for certain. So this is the only path where over_SAFE has any use.
+// r34 still has N-1
+// There is a danger of double-extended overflow   if N-1 > 16382 = 0x3ffe
+// There is a danger of double overflow            if N-1 > 0x3fe = 1022
 
-// Recompute result using status field 2 with user's rounding mode,
-// and wre set.  If result is larger than largest double, then we have
-// overflow
+{ .mlx
+       nop.m 999
+(p0)   movl                r32          = 0x00000000000003fe ;;                       
+}
 
 { .mfi
-      mov             rGt_ln  = 0x103ff // Exponent for largest dbl + 1 ulp
-      fsetc.s2        0x7F,0x42         // Get user's round mode, set wre
-      nop.i           0
+(p0)  cmp.gt.unc          p0,p7        = r34, r32                                 
+      nop.f 999
+      nop.i 999 ;;
 }
-;;
 
 { .mfi
-      setf.exp        fGt_pln = rGt_ln  // Create largest double + 1 ulp
-      fma.d.s2        fWre_urm_f8 = fS, fP, fS    // Result with wre set
-      nop.i           0
+      nop.m 999
+(p0)  fma.s1         cosh_FR_Y_lo      =  cosh_FR_Tjhi,  cosh_FR_Y_lo_temp, cosh_FR_Tjlo       
+      nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m           0
-      fsetc.s2        0x7F,0x40                   // Turn off wre in sf2
-      nop.i           0
+      nop.m 999
+(p0)  fma.s1         cosh_FR_COSH_temp =  cosh_FR_Y_lo,  f1, cosh_FR_Tjhi                 
+      nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m           0
-      fcmp.ge.s1      p6, p0 =  fWre_urm_f8, fGt_pln // Test for overflow
-      nop.i           0
+      nop.m 999
+(p0)  fma.d.s0       f44 = cosh_FR_spos,  cosh_FR_COSH_temp, f0                       
+      nop.i 999 ;;
 }
-;;
 
+// If over_SAFE is set, return
 { .mfb
-      nop.m           0
-      nop.f           0
-(p6)  br.cond.spnt    COSH_CERTAIN_OVERFLOW // Branch if overflow
+       nop.m 999
+(p7)   fmerge.s            f8 = f44,f44
+(p7)  br.ret.sptk     b0 ;;                          
 }
-;;
 
-{ .mfb
-      nop.m           0
-      fma.d.s0        f8 = fS, fP, fS
-      br.ret.sptk     b0                     // Exit if really no overflow
+// Else see if we overflowed
+// S0 user supplied status
+// S2 user supplied status + WRE + TD  (Overflows)
+// If WRE is set then an overflow will not occur in EXP.
+// The input value that would cause a register (WRE) value to overflow is about 2^15
+// and this input would go into the HUGE path.
+// Answer with WRE is in f43.
+
+{ .mfi
+      nop.m 999
+(p0)  fsetc.s2            0x7F,0x42                                               
+      nop.i 999;;
 }
-;;
 
-COSH_CERTAIN_OVERFLOW:
-{ .mmi
-      sub             rTmp = rExp_mask, r0, 1
-;;
-      setf.exp        fTmp = rTmp
-      nop.i           0
+{ .mfi
+      nop.m 999
+(p0)  fma.d.s2            f43  = cosh_FR_spos,  cosh_FR_COSH_temp, f0                      
+      nop.i 999 ;;
+}
+
+// 103FF => 103FF -FFFF = 400(true)
+// 400 + 3FF = 7FF, which is 1 more that the exponent of the largest
+// double (7FE). So 0 103FF 8000000000000000  is one ulp more than
+// largest double in register bias
+// Now  set p8 if the answer with WRE is greater than or equal this value
+// Also set p9 if the answer with WRE is less than or equal to negative this value
+
+{ .mlx
+       nop.m 999
+(p0)   movl                r32          = 0x00000000000103ff ;;                     
+}
+
+{ .mmf
+       nop.m 999
+(p0)   setf.exp            f41          = r32                                    
+(p0)   fsetc.s2            0x7F,0x40 ;;                                               
 }
-;;
 
 { .mfi
-      alloc           r32=ar.pfs,1,4,4,0
-      fmerge.s        FR_X = f8,f8
-      nop.i           0
+      nop.m 999
+(p0)  fcmp.ge.unc.s1      p8, p0       = f43, f41                               
+      nop.i 999
 }
+
+{ .mfi
+      nop.m 999
+(p0)  fmerge.ns           f42 = f41, f41                                          
+      nop.i 999 ;;
+}
+
+// The error tag for overflow is 64
+{ .mii
+      nop.m 999
+      nop.i 999 ;;
+(p8)  mov                 r47 = 64 ;;                                               
+}
+
 { .mfb
-      mov             GR_Parameter_TAG = 64
-      fma.d.s0        FR_RESULT = fTmp, fTmp, f0    // Set I,O and +INF result
-      br.cond.sptk    __libm_error_region
+      nop.m 999
+(p0)  fcmp.le.unc.s1      p9, p0 =  f43, f42                                      
+(p8)  br.cond.spnt __libm_error_region ;;
+}
+
+{ .mii
+      nop.m 999
+      nop.i 999 ;;
+(p9)  mov                 r47 = 64                                               
+}
+
+{ .mib
+      nop.m 999
+      nop.i 999
+(p9)  br.cond.spnt __libm_error_region ;;
 }
-;;
 
-// Here if x unorm
-COSH_UNORM:
 { .mfb
-      getf.exp        rSignexp_x = fNormX    // Must recompute if x unorm
-      fcmp.eq.s0      p6, p0 = f8, f0        // Set D flag
-      br.cond.sptk    COSH_COMMON
+      nop.m 999
+(p0)  fmerge.s            f8 = f44,f44                                            
+(p0)  br.ret.sptk     b0 ;;                          
+}
+
+
+// for COSH_HUGE, put 24000 in exponent; take sign from input; add 1
+// SAFE: SAFE is always 0 for HUGE
+
+L(COSH_HUGE): 
+
+{ .mlx
+      nop.m 999
+(p0)  movl                r32 = 0x0000000000015dbf ;;                                
 }
-;;
 
-GLOBAL_IEEE754_END(cosh)
+{ .mfi
+(p0)  setf.exp            f9  = r32                                               
+      nop.f 999
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+(p0)  fma.s1              cosh_FR_hi_lo = f1, f9, f1                              
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+(p0)  fma.d.s0            f44 = f9, cosh_FR_hi_lo, f0                             
+(p0)  mov                 r47 = 64                                               
+}
+;;
 
-LOCAL_LIBM_ENTRY(__libm_error_region)
+.endp cosh#
+ASM_SIZE_DIRECTIVE(cosh#)
+
+// Stack operations when calling error support.
+//       (1)               (2)                          (3) (call)              (4)
+//   sp   -> +          psp -> +                     psp -> +                   sp -> +
+//           |                 |                            |                         |
+//           |                 | <- GR_Y               R3 ->| <- GR_RESULT            | -> f8
+//           |                 |                            |                         |
+//           | <-GR_Y      Y2->|                       Y2 ->| <- GR_Y                 |
+//           |                 |                            |                         |
+//           |                 | <- GR_X               X1 ->|                         |
+//           |                 |                            |                         |
+//  sp-64 -> +          sp ->  +                     sp ->  +                         +
+//    save ar.pfs          save b0                                               restore gp
+//    save gp                                                                    restore ar.pfs
+
+.proc __libm_error_region
+__libm_error_region:
 .prologue
+// (1)
 { .mfi
         add   GR_Parameter_Y=-32,sp             // Parameter 2 value
         nop.f 0
@@ -821,32 +1103,39 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
 }
 { .mfi
 .fframe 64
-        add sp=-64,sp                           // Create new stack
+        add sp=-64,sp                          // Create new stack
         nop.f 0
-        mov GR_SAVE_GP=gp                       // Save gp
+        mov GR_SAVE_GP=gp                      // Save gp
 };;
+
+
+// (2)
 { .mmi
-        stfd [GR_Parameter_Y] = FR_Y,16         // STORE Parameter 2 on stack
-        add GR_Parameter_X = 16,sp              // Parameter 1 address
+        stfd [GR_Parameter_Y] = f0,16         // STORE Parameter 2 on stack
+        add GR_Parameter_X = 16,sp            // Parameter 1 address
 .save   b0, GR_SAVE_B0
-        mov GR_SAVE_B0=b0                       // Save b0
+        mov GR_SAVE_B0=b0                     // Save b0
 };;
+
 .body
+// (3)
 { .mib
-        stfd [GR_Parameter_X] = FR_X            // STORE Parameter 1 on stack
+        stfd [GR_Parameter_X] = f8                    // STORE Parameter 1 on stack
         add   GR_Parameter_RESULT = 0,GR_Parameter_Y  // Parameter 3 address
-        nop.b 0
+        nop.b 0                                 
 }
 { .mib
-        stfd [GR_Parameter_Y] = FR_RESULT       // STORE Parameter 3 on stack
+        stfd [GR_Parameter_Y] = f44                   // STORE Parameter 3 on stack
         add   GR_Parameter_Y = -16,GR_Parameter_Y
-        br.call.sptk b0=__libm_error_support#   // Call error handling function
+        br.call.sptk b0=__libm_error_support#         // Call error handling function
 };;
 { .mmi
-        add   GR_Parameter_RESULT = 48,sp
         nop.m 0
-        nop.i 0
+        nop.m 0
+        add   GR_Parameter_RESULT = 48,sp
 };;
+
+// (4)
 { .mmi
         ldfd  f8 = [GR_Parameter_RESULT]       // Get return result off stack
 .restore sp
@@ -859,6 +1148,8 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
         br.ret.sptk     b0                     // Return
 };;
 
-LOCAL_LIBM_END(__libm_error_region)
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
 .type   __libm_error_support#,@function
 .global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_coshf.S b/sysdeps/ia64/fpu/e_coshf.S
index 91846e4717..969abc4ff6 100644
--- a/sysdeps/ia64/fpu/e_coshf.S
+++ b/sysdeps/ia64/fpu/e_coshf.S
@@ -1,10 +1,10 @@
 .file "coshf.s"
 
-
-// Copyright (c) 2000 - 2002, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,690 +20,1127 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
 // Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
-
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
+//
 // History
-//*********************************************************************
-// 02/02/00 Initial version
-// 02/16/00 The error tag for coshf overflow changed to 65 (from 64).
-// 04/04/00 Unwind support added
-// 08/15/00 Bundle added after call to __libm_error_support to properly
+//==============================================================
+// 2/02/00  Initial version
+// 2/16/00  The error tag for coshf overflow changed to 65 (from 64).
+// 4/04/00  Unwind support added
+// 8/15/00  Bundle added after call to __libm_error_support to properly
 //          set [the previously overwritten] GR_Parameter_RESULT.
-// 05/07/01 Reworked to improve speed of all paths
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 11/15/02 Improved algorithm based on expf
 //
 // API
-//*********************************************************************
-// float coshf(float)
-//
+//==============================================================
+// float = coshf(float)
+// input  floating point f8
+// output floating point f8
+
+
 // Overview of operation
-//*********************************************************************
-// Case 1:  0 < |x| < 0.25
-//  Evaluate cosh(x) by a 8th order polynomial
-//  Care is take for the order of multiplication; and A2 is not exactly 1/4!,
-//  A3 is not exactly 1/6!, etc.
-//  cosh(x) = 1 + (A1*x^2 + A2*x^4 + A3*x^6 + A4*x^8)
-//
-// Case 2:  0.25 < |x| < 89.41598
-//  Algorithm is based on the identity cosh(x) = ( exp(x) + exp(-x) ) / 2.
-//  The algorithm for exp is described as below.  There are a number of
-//  economies from evaluating both exp(x) and exp(-x).  Although we
-//  are evaluating both quantities, only where the quantities diverge do we
-//  duplicate the computations.  The basic algorithm for exp(x) is described
-//  below.
-//
-// Take the input x. w is "how many log2/128 in x?"
-//  w = x * 64/log2
-//  NJ = int(w)
-//  x = NJ*log2/64 + R
+//==============================================================
+// There are four paths
 
-//  NJ = 64*n + j
-//  x = n*log2 + (log2/64)*j + R
-//
-//  So, exp(x) = 2^n * 2^(j/64)* exp(R)
-//
-//  T =  2^n * 2^(j/64)
-//       Construct 2^n
-//       Get 2^(j/64) table
-//           actually all the entries of 2^(j/64) table are stored in DP and
-//           with exponent bits set to 0 -> multiplication on 2^n can be
-//           performed by doing logical "or" operation with bits presenting 2^n
-
-//  exp(R) = 1 + (exp(R) - 1)
-//  P = exp(R) - 1 approximated by Taylor series of 3rd degree
-//      P = A3*R^3 + A2*R^2 + R, A3 = 1/6, A2 = 1/2
-//
+// 1. |x| < 0.25        COSH_BY_POLY
+// 2. |x| < 32          COSH_BY_TBL
+// 3. |x| < 2^14        COSH_BY_EXP
+// 4. |x_ >= 2^14       COSH_HUGE
 
-//  The final result is reconstructed as follows
-//  exp(x) = T + T*P
+// For paths 1, and 2 SAFE is always 1.
+// For path  4, Safe is always 0.
+// SAFE = 1 means we cannot overflow.
 
-// Special values
-//*********************************************************************
-// coshf(+0)    = 1.0
-// coshf(-0)    = 1.0
+#include "libm_support.h"
 
-// coshf(+qnan) = +qnan
-// coshf(-qnan) = -qnan
-// coshf(+snan) = +qnan
-// coshf(-snan) = -qnan
+// Assembly macros
+//==============================================================
+coshf_FR_X            = f44
+coshf_FR_SGNX         = f40
 
-// coshf(-inf)  = +inf
-// coshf(+inf)  = +inf
+coshf_FR_Inv_log2by64 = f9
+coshf_FR_log2by64_lo  = f11
+coshf_FR_log2by64_hi  = f10
 
-// Overflow and Underflow
-//*********************************************************************
-// coshf(x) = largest single normal when
-//     x = 89.41598 = 0x42b2d4fc
-//
-// There is no underflow.
+coshf_FR_A1           = f9
+coshf_FR_A2           = f10
+coshf_FR_A3           = f11
 
-// Registers used
-//*********************************************************************
-// Floating Point registers used:
-// f8 input, output
-// f6,f7, f9 -> f15,  f32 -> f45
+coshf_FR_Rcub         = f12
+coshf_FR_M_temp       = f13
+coshf_FR_R_temp       = f13
+coshf_FR_Rsq          = f13
+coshf_FR_R            = f14
 
-// General registers used:
-// r2, r3, r16 -> r38
+coshf_FR_M            = f38
 
-// Predicate registers used:
-// p6 -> p15
+coshf_FR_B1           = f15
+coshf_FR_B2           = f32
+coshf_FR_B3           = f33
 
-// Assembly macros
-//*********************************************************************
-// integer registers used
-// scratch
-rNJ                   = r2
-rNJ_neg               = r3
-
-rJ_neg                = r16
-rN_neg                = r17
-rSignexp_x            = r18
-rExp_x                = r18
-rExp_mask             = r19
-rExp_bias             = r20
-rAd1                  = r21
-rAd2                  = r22
-rJ                    = r23
-rN                    = r24
-rTblAddr              = r25
-rA3                   = r26
-rExpHalf              = r27
-rLn2Div64             = r28
-rGt_ln                = r29
-r17ones_m1            = r29
-rRightShifter         = r30
-rJ_mask               = r30
-r64DivLn2             = r31
-rN_mask               = r31
-// stacked
-GR_SAVE_PFS           = r32
-GR_SAVE_B0            = r33
-GR_SAVE_GP            = r34
-GR_Parameter_X        = r35
-GR_Parameter_Y        = r36
-GR_Parameter_RESULT   = r37
-GR_Parameter_TAG      = r38
-
-// floating point registers used
-FR_X                  = f10
-FR_Y                  = f1
-FR_RESULT             = f8
-// scratch
-fRightShifter         = f6
-f64DivLn2             = f7
-fNormX                = f9
-fNint                 = f10
-fN                    = f11
-fR                    = f12
-fLn2Div64             = f13
-fA2                   = f14
-fA3                   = f15
-// stacked
-fP                    = f32
-fT                    = f33
-fMIN_SGL_OFLOW_ARG    = f34
-fMAX_SGL_NORM_ARG     = f35
-fRSqr                 = f36
-fA1                   = f37
-fA21                  = f37
-fA4                   = f38
-fA43                  = f38
-fA4321                = f38
-fX4                   = f39
-fTmp                  = f39
-fGt_pln               = f39
-fWre_urm_f8           = f40
-fXsq                  = f40
-fP_neg                = f41
-fT_neg                = f42
-fExp                  = f43
-fExp_neg              = f44
-fAbsX                 = f45
-
-
-RODATA
-.align 16
+coshf_FR_peven_temp1  = f34
+coshf_FR_peven_temp2  = f35
+coshf_FR_peven        = f36
 
-LOCAL_OBJECT_START(_coshf_table)
-data4 0x42b2d4fd         // Smallest single arg to overflow single result
-data4 0x42b2d4fc         // Largest single arg to give normal single result
-data4 0x00000000         // pad
-data4 0x00000000         // pad
-//
-// 2^(j/64) table, j goes from 0 to 63
-data8 0x0000000000000000 // 2^(0/64)
-data8 0x00002C9A3E778061 // 2^(1/64)
-data8 0x000059B0D3158574 // 2^(2/64)
-data8 0x0000874518759BC8 // 2^(3/64)
-data8 0x0000B5586CF9890F // 2^(4/64)
-data8 0x0000E3EC32D3D1A2 // 2^(5/64)
-data8 0x00011301D0125B51 // 2^(6/64)
-data8 0x0001429AAEA92DE0 // 2^(7/64)
-data8 0x000172B83C7D517B // 2^(8/64)
-data8 0x0001A35BEB6FCB75 // 2^(9/64)
-data8 0x0001D4873168B9AA // 2^(10/64)
-data8 0x0002063B88628CD6 // 2^(11/64)
-data8 0x0002387A6E756238 // 2^(12/64)
-data8 0x00026B4565E27CDD // 2^(13/64)
-data8 0x00029E9DF51FDEE1 // 2^(14/64)
-data8 0x0002D285A6E4030B // 2^(15/64)
-data8 0x000306FE0A31B715 // 2^(16/64)
-data8 0x00033C08B26416FF // 2^(17/64)
-data8 0x000371A7373AA9CB // 2^(18/64)
-data8 0x0003A7DB34E59FF7 // 2^(19/64)
-data8 0x0003DEA64C123422 // 2^(20/64)
-data8 0x0004160A21F72E2A // 2^(21/64)
-data8 0x00044E086061892D // 2^(22/64)
-data8 0x000486A2B5C13CD0 // 2^(23/64)
-data8 0x0004BFDAD5362A27 // 2^(24/64)
-data8 0x0004F9B2769D2CA7 // 2^(25/64)
-data8 0x0005342B569D4F82 // 2^(26/64)
-data8 0x00056F4736B527DA // 2^(27/64)
-data8 0x0005AB07DD485429 // 2^(28/64)
-data8 0x0005E76F15AD2148 // 2^(29/64)
-data8 0x0006247EB03A5585 // 2^(30/64)
-data8 0x0006623882552225 // 2^(31/64)
-data8 0x0006A09E667F3BCD // 2^(32/64)
-data8 0x0006DFB23C651A2F // 2^(33/64)
-data8 0x00071F75E8EC5F74 // 2^(34/64)
-data8 0x00075FEB564267C9 // 2^(35/64)
-data8 0x0007A11473EB0187 // 2^(36/64)
-data8 0x0007E2F336CF4E62 // 2^(37/64)
-data8 0x00082589994CCE13 // 2^(38/64)
-data8 0x000868D99B4492ED // 2^(39/64)
-data8 0x0008ACE5422AA0DB // 2^(40/64)
-data8 0x0008F1AE99157736 // 2^(41/64)
-data8 0x00093737B0CDC5E5 // 2^(42/64)
-data8 0x00097D829FDE4E50 // 2^(43/64)
-data8 0x0009C49182A3F090 // 2^(44/64)
-data8 0x000A0C667B5DE565 // 2^(45/64)
-data8 0x000A5503B23E255D // 2^(46/64)
-data8 0x000A9E6B5579FDBF // 2^(47/64)
-data8 0x000AE89F995AD3AD // 2^(48/64)
-data8 0x000B33A2B84F15FB // 2^(49/64)
-data8 0x000B7F76F2FB5E47 // 2^(50/64)
-data8 0x000BCC1E904BC1D2 // 2^(51/64)
-data8 0x000C199BDD85529C // 2^(52/64)
-data8 0x000C67F12E57D14B // 2^(53/64)
-data8 0x000CB720DCEF9069 // 2^(54/64)
-data8 0x000D072D4A07897C // 2^(55/64)
-data8 0x000D5818DCFBA487 // 2^(56/64)
-data8 0x000DA9E603DB3285 // 2^(57/64)
-data8 0x000DFC97337B9B5F // 2^(58/64)
-data8 0x000E502EE78B3FF6 // 2^(59/64)
-data8 0x000EA4AFA2A490DA // 2^(60/64)
-data8 0x000EFA1BEE615A27 // 2^(61/64)
-data8 0x000F50765B6E4540 // 2^(62/64)
-data8 0x000FA7C1819E90D8 // 2^(63/64)
-LOCAL_OBJECT_END(_coshf_table)
-
-LOCAL_OBJECT_START(cosh_p_table)
-data8 0x3efa3001dcf5905b // A4
-data8 0x3f56c1437543543e // A3
-data8 0x3fa5555572601504 // A2
-data8 0x3fdfffffffe2f097 // A1
-LOCAL_OBJECT_END(cosh_p_table)
+coshf_FR_podd_temp1   = f34
+coshf_FR_podd_temp2   = f35
+coshf_FR_podd         = f37
+
+coshf_FR_J_temp       = f9
+coshf_FR_J            = f10
+
+coshf_FR_Mmj          = f39
+
+coshf_FR_N_temp1      = f11
+coshf_FR_N_temp2      = f12
+coshf_FR_N            = f13
+
+coshf_FR_spos         = f14
+coshf_FR_sneg         = f15
+
+coshf_FR_Tjhi         = f32
+coshf_FR_Tjlo         = f33
+coshf_FR_Tmjhi        = f34
+coshf_FR_Tmjlo        = f35
+
+GR_mJ           = r35
+GR_J            = r36
+
+AD_mJ           = r38
+AD_J            = r39
+
+
+GR_SAVE_B0                    = r42
+GR_SAVE_PFS                   = r41
+GR_SAVE_GP                    = r43 
+
+GR_Parameter_X                = r44
+GR_Parameter_Y                = r45
+GR_Parameter_RESULT           = r46
+GR_Parameter_TAG              = r47
+
+FR_X             = f8
+FR_Y             = f0
+FR_RESULT        = f44
+
+
+coshf_FR_C_hi         = f9
+coshf_FR_C_hi_temp    = f10
+coshf_FR_C_lo_temp1   = f11 
+coshf_FR_C_lo_temp2   = f12 
+coshf_FR_C_lo_temp3   = f13 
+
+coshf_FR_C_lo         = f38
+coshf_FR_S_hi         = f39
+
+coshf_FR_S_hi_temp1   = f10
+coshf_FR_Y_hi         = f11 
+coshf_FR_Y_lo_temp    = f12 
+coshf_FR_Y_lo         = f13 
+coshf_FR_COSH         = f9
+
+coshf_FR_X2           = f9
+coshf_FR_X4           = f10
+
+coshf_FR_P1           = f14
+coshf_FR_P2           = f15
+coshf_FR_P3           = f32
+coshf_FR_P4           = f33
+coshf_FR_P5           = f34
+coshf_FR_P6           = f35
 
+coshf_FR_TINY_THRESH  = f9
+
+coshf_FR_COSH_temp    = f10
+coshf_FR_SCALE        = f11 
+
+coshf_FR_hi_lo = f10
+
+coshf_FR_poly_podd_temp1    =  f11 
+coshf_FR_poly_podd_temp2    =  f13
+coshf_FR_poly_peven_temp1   =  f11
+coshf_FR_poly_peven_temp2   =  f13
+
+// Data tables
+//==============================================================
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
+.align 16
+single_coshf_arg_reduction:
+ASM_TYPE_DIRECTIVE(single_coshf_arg_reduction,@object)
+   data8 0xB8AA3B295C17F0BC, 0x00004005
+   data8 0xB17217F7D1000000, 0x00003FF8
+   data8 0xCF79ABC9E3B39804, 0x00003FD0
+ASM_SIZE_DIRECTIVE(single_coshf_arg_reduction)
+
+single_coshf_p_table:
+ASM_TYPE_DIRECTIVE(single_coshf_p_table,@object)
+   data8 0x8000000000000000, 0x00003FFE
+   data8 0xAAAAAAAAAAAAAB80, 0x00003FFA
+   data8 0xB60B60B60B4FE884, 0x00003FF5
+   data8 0xD00D00D1021D7370, 0x00003FEF
+   data8 0x93F27740C0C2F1CC, 0x00003FE9
+   data8 0x8FA02AC65BCBD5BC, 0x00003FE2
+ASM_SIZE_DIRECTIVE(single_coshf_p_table)
+
+single_coshf_ab_table:
+ASM_TYPE_DIRECTIVE(single_coshf_ab_table,@object)
+   data8 0xAAAAAAAAAAAAAAAC, 0x00003FFC
+   data8 0x88888888884ECDD5, 0x00003FF8
+   data8 0xD00D0C6DCC26A86B, 0x00003FF2
+   data8 0x8000000000000002, 0x00003FFE
+   data8 0xAAAAAAAAAA402C77, 0x00003FFA
+   data8 0xB60B6CC96BDB144D, 0x00003FF5
+ASM_SIZE_DIRECTIVE(single_coshf_ab_table)
+
+single_coshf_j_table:
+ASM_TYPE_DIRECTIVE(single_coshf_j_table,@object)
+   data8 0xB504F333F9DE6484, 0x00003FFE, 0x1EB2FB13, 0x00000000
+   data8 0xB6FD91E328D17791, 0x00003FFE, 0x1CE2CBE2, 0x00000000
+   data8 0xB8FBAF4762FB9EE9, 0x00003FFE, 0x1DDC3CBC, 0x00000000
+   data8 0xBAFF5AB2133E45FB, 0x00003FFE, 0x1EE9AA34, 0x00000000
+   data8 0xBD08A39F580C36BF, 0x00003FFE, 0x9EAEFDC1, 0x00000000
+   data8 0xBF1799B67A731083, 0x00003FFE, 0x9DBF517B, 0x00000000
+   data8 0xC12C4CCA66709456, 0x00003FFE, 0x1EF88AFB, 0x00000000
+   data8 0xC346CCDA24976407, 0x00003FFE, 0x1E03B216, 0x00000000
+   data8 0xC5672A115506DADD, 0x00003FFE, 0x1E78AB43, 0x00000000
+   data8 0xC78D74C8ABB9B15D, 0x00003FFE, 0x9E7B1747, 0x00000000
+   data8 0xC9B9BD866E2F27A3, 0x00003FFE, 0x9EFE3C0E, 0x00000000
+   data8 0xCBEC14FEF2727C5D, 0x00003FFE, 0x9D36F837, 0x00000000
+   data8 0xCE248C151F8480E4, 0x00003FFE, 0x9DEE53E4, 0x00000000
+   data8 0xD06333DAEF2B2595, 0x00003FFE, 0x9E24AE8E, 0x00000000
+   data8 0xD2A81D91F12AE45A, 0x00003FFE, 0x1D912473, 0x00000000
+   data8 0xD4F35AABCFEDFA1F, 0x00003FFE, 0x1EB243BE, 0x00000000
+   data8 0xD744FCCAD69D6AF4, 0x00003FFE, 0x1E669A2F, 0x00000000
+   data8 0xD99D15C278AFD7B6, 0x00003FFE, 0x9BBC610A, 0x00000000
+   data8 0xDBFBB797DAF23755, 0x00003FFE, 0x1E761035, 0x00000000
+   data8 0xDE60F4825E0E9124, 0x00003FFE, 0x9E0BE175, 0x00000000
+   data8 0xE0CCDEEC2A94E111, 0x00003FFE, 0x1CCB12A1, 0x00000000
+   data8 0xE33F8972BE8A5A51, 0x00003FFE, 0x1D1BFE90, 0x00000000
+   data8 0xE5B906E77C8348A8, 0x00003FFE, 0x1DF2F47A, 0x00000000
+   data8 0xE8396A503C4BDC68, 0x00003FFE, 0x1EF22F22, 0x00000000
+   data8 0xEAC0C6E7DD24392F, 0x00003FFE, 0x9E3F4A29, 0x00000000
+   data8 0xED4F301ED9942B84, 0x00003FFE, 0x1EC01A5B, 0x00000000
+   data8 0xEFE4B99BDCDAF5CB, 0x00003FFE, 0x1E8CAC3A, 0x00000000
+   data8 0xF281773C59FFB13A, 0x00003FFE, 0x9DBB3FAB, 0x00000000
+   data8 0xF5257D152486CC2C, 0x00003FFE, 0x1EF73A19, 0x00000000
+   data8 0xF7D0DF730AD13BB9, 0x00003FFE, 0x9BB795B5, 0x00000000
+   data8 0xFA83B2DB722A033A, 0x00003FFE, 0x1EF84B76, 0x00000000
+   data8 0xFD3E0C0CF486C175, 0x00003FFE, 0x9EF5818B, 0x00000000
+   data8 0x8000000000000000, 0x00003FFF, 0x00000000, 0x00000000
+   data8 0x8164D1F3BC030773, 0x00003FFF, 0x1F77CACA, 0x00000000
+   data8 0x82CD8698AC2BA1D7, 0x00003FFF, 0x1EF8A91D, 0x00000000
+   data8 0x843A28C3ACDE4046, 0x00003FFF, 0x1E57C976, 0x00000000
+   data8 0x85AAC367CC487B15, 0x00003FFF, 0x9EE8DA92, 0x00000000
+   data8 0x871F61969E8D1010, 0x00003FFF, 0x1EE85C9F, 0x00000000
+   data8 0x88980E8092DA8527, 0x00003FFF, 0x1F3BF1AF, 0x00000000
+   data8 0x8A14D575496EFD9A, 0x00003FFF, 0x1D80CA1E, 0x00000000
+   data8 0x8B95C1E3EA8BD6E7, 0x00003FFF, 0x9D0373AF, 0x00000000
+   data8 0x8D1ADF5B7E5BA9E6, 0x00003FFF, 0x9F167097, 0x00000000
+   data8 0x8EA4398B45CD53C0, 0x00003FFF, 0x1EB70051, 0x00000000
+   data8 0x9031DC431466B1DC, 0x00003FFF, 0x1F6EB029, 0x00000000
+   data8 0x91C3D373AB11C336, 0x00003FFF, 0x1DFD6D8E, 0x00000000
+   data8 0x935A2B2F13E6E92C, 0x00003FFF, 0x9EB319B0, 0x00000000
+   data8 0x94F4EFA8FEF70961, 0x00003FFF, 0x1EBA2BEB, 0x00000000
+   data8 0x96942D3720185A00, 0x00003FFF, 0x1F11D537, 0x00000000
+   data8 0x9837F0518DB8A96F, 0x00003FFF, 0x1F0D5A46, 0x00000000
+   data8 0x99E0459320B7FA65, 0x00003FFF, 0x9E5E7BCA, 0x00000000
+   data8 0x9B8D39B9D54E5539, 0x00003FFF, 0x9F3AAFD1, 0x00000000
+   data8 0x9D3ED9A72CFFB751, 0x00003FFF, 0x9E86DACC, 0x00000000
+   data8 0x9EF5326091A111AE, 0x00003FFF, 0x9F3EDDC2, 0x00000000
+   data8 0xA0B0510FB9714FC2, 0x00003FFF, 0x1E496E3D, 0x00000000
+   data8 0xA27043030C496819, 0x00003FFF, 0x9F490BF6, 0x00000000
+   data8 0xA43515AE09E6809E, 0x00003FFF, 0x1DD1DB48, 0x00000000
+   data8 0xA5FED6A9B15138EA, 0x00003FFF, 0x1E65EBFB, 0x00000000
+   data8 0xA7CD93B4E965356A, 0x00003FFF, 0x9F427496, 0x00000000
+   data8 0xA9A15AB4EA7C0EF8, 0x00003FFF, 0x1F283C4A, 0x00000000
+   data8 0xAB7A39B5A93ED337, 0x00003FFF, 0x1F4B0047, 0x00000000
+   data8 0xAD583EEA42A14AC6, 0x00003FFF, 0x1F130152, 0x00000000
+   data8 0xAF3B78AD690A4375, 0x00003FFF, 0x9E8367C0, 0x00000000
+   data8 0xB123F581D2AC2590, 0x00003FFF, 0x9F705F90, 0x00000000
+   data8 0xB311C412A9112489, 0x00003FFF, 0x1EFB3C53, 0x00000000
+   data8 0xB504F333F9DE6484, 0x00003FFF, 0x1F32FB13, 0x00000000
+ASM_SIZE_DIRECTIVE(single_coshf_j_table)
+
+.align 32
+.global coshf#
 
 .section .text
-GLOBAL_IEEE754_ENTRY(coshf)
+.proc  coshf#
+.align 32
 
-{ .mlx
-      getf.exp        rSignexp_x = f8  // Must recompute if x unorm
-      movl            r64DivLn2 = 0x40571547652B82FE // 64/ln(2)
+coshf: 
+
+#ifdef _LIBC
+.global __ieee754_coshf#
+.proc __ieee754_coshf#
+__ieee754_coshf:
+#endif
+
+// X NAN?
+
+
+{ .mfi
+      alloc r32 = ar.pfs,0,12,4,0                  
+(p0)  fclass.m.unc  p6,p7 = f8, 0xc3               
+	  nop.i 999 ;;                           
+}
+{ .mfb
+      nop.m 999
+(p6)  fma.s.s0   f8 = f8,f1,f8                  
+(p6)  br.ret.spnt b0 ;;                    
 }
+
+{ .mfi
+       nop.m 999
+       nop.f 999
+       nop.i 999 ;;
+}
+
+// X infinity 
+{ .mfi
+       nop.m 999
+(p0)  fclass.m.unc  p6,p0 = f8, 0x23               
+       nop.i 999 ;;
+}
+
+{ .mfb
+       nop.m 999
+(p6)     fmerge.s      f8 = f0,f8                  
+(p6)  br.ret.spnt    b0 ;;                  
+}
+
+// Put 0.25 in f9; p6 true if x < 0.25
 { .mlx
-      addl            rTblAddr = @ltoff(_coshf_table),gp
-      movl            rRightShifter = 0x43E8000000000000 // DP Right Shifter
+       nop.m 999
+(p0)     movl            r32 = 0x000000000000fffd ;;         
 }
-;;
 
 { .mfi
-      // point to the beginning of the table
-      ld8             rTblAddr = [rTblAddr]
-      fclass.m        p6, p0 = f8, 0x0b   // Test for x=unorm
-      addl            rA3 = 0x3E2AA, r0   // high bits of 1.0/6.0 rounded to SP
+(p0)     setf.exp        f9 = r32                         
+       nop.f 999
+       nop.i 999 ;;
 }
+
 { .mfi
-      nop.m           0
-      fnorm.s1        fNormX = f8 // normalized x
-      addl            rExpHalf = 0xFFFE, r0 // exponent of 1/2
+       nop.m 999
+(p0)  fmerge.s      coshf_FR_X    = f0,f8                
+       nop.i 999
 }
-;;
 
 { .mfi
-      setf.d          f64DivLn2 = r64DivLn2 // load 64/ln(2) to FP reg
-      fclass.m        p15, p0 = f8, 0x1e3   // test for NaT,NaN,Inf
-      nop.i           0
+       nop.m 999
+(p0)  fmerge.s      coshf_FR_SGNX = f8,f1                
+       nop.i 999 ;;
 }
-{ .mlx
-      // load Right Shifter to FP reg
-      setf.d          fRightShifter = rRightShifter
-      movl            rLn2Div64 = 0x3F862E42FEFA39EF // DP ln(2)/64 in GR
+
+{ .mfi
+       nop.m 999
+(p0)     fcmp.lt.unc     p0,p7 = coshf_FR_X,f9                    
+       nop.i 999 ;;
+}
+
+{ .mib
+       nop.m 999
+       nop.i 999
+(p7)     br.cond.sptk    L(COSH_BY_TBL) ;;                      
+}
+
+
+// COSH_BY_POLY: 
+
+// POLY cannot overflow so there is no need to call __libm_error_support
+// Get the values of P_x from the table
+
+{ .mmi
+      nop.m 999
+(p0)  addl           r34   = @ltoff(single_coshf_p_table), gp
+      nop.i 999
+}
+;;
+
+{ .mmi
+      ld8 r34 = [r34]
+      nop.m 999
+      nop.i 999
 }
 ;;
 
+// Calculate coshf_FR_X2 = ax*ax and coshf_FR_X4 = ax*ax*ax*ax
+{ .mmf
+       nop.m 999
+(p0)     ldfe       coshf_FR_P1 = [r34],16                 
+(p0)     fma.s1     coshf_FR_X2 = coshf_FR_X, coshf_FR_X, f0 ;;           
+}
+
+{ .mmi
+(p0)     ldfe       coshf_FR_P2 = [r34],16 ;;                 
+(p0)     ldfe       coshf_FR_P3 = [r34],16                 
+       nop.i 999 ;;
+}
+
+{ .mmi
+(p0)     ldfe       coshf_FR_P4 = [r34],16 ;;                 
+(p0)     ldfe       coshf_FR_P5 = [r34],16                 
+       nop.i 999 ;;
+}
+
 { .mfi
-      mov             rExp_mask = 0x1ffff
-      fcmp.eq.s1      p13, p0 = f0, f8 // test for x = 0.0
-      shl             rA3 = rA3, 12    // 0x3E2AA000, approx to 1.0/6.0 in SP
+(p0)     ldfe       coshf_FR_P6 = [r34],16                 
+(p0)     fma.s1     coshf_FR_X4 = coshf_FR_X2, coshf_FR_X2, f0         
+       nop.i 999 ;;
 }
-{ .mfb
-      nop.m           0
-      nop.f           0
-(p6)  br.cond.spnt    COSH_UNORM            // Branch if x=unorm
+
+// Calculate coshf_FR_podd = x4 *(x4 * P_5 + P_3) + P_1
+{ .mfi
+       nop.m 999
+(p0)     fma.s1     coshf_FR_poly_podd_temp1 = coshf_FR_X4, coshf_FR_P5, coshf_FR_P3                
+       nop.i 999 ;;
 }
-;;
 
-COSH_COMMON:
 { .mfi
-      setf.exp        fA2 = rExpHalf        // load A2 to FP reg
-      nop.f           0
-      mov             rExp_bias = 0xffff
+       nop.m 999
+(p0)     fma.s1     coshf_FR_podd            = coshf_FR_X4, coshf_FR_poly_podd_temp1, coshf_FR_P1   
+       nop.i 999
 }
-{ .mfb
-      setf.d          fLn2Div64 = rLn2Div64 // load ln(2)/64 to FP reg
-(p15) fma.s.s0        f8 = f8, f8, f0       // result if x = NaT,NaN,Inf
-(p15) br.ret.spnt     b0                    // exit here if x = NaT,NaN,Inf
+
+// Calculate coshf_FR_peven =  p_even = x4 *(x4 * (x4 * P_6 + P_4) + P_2)
+{ .mfi
+       nop.m 999
+(p0)     fma.s1     coshf_FR_poly_peven_temp1 = coshf_FR_X4, coshf_FR_P6, coshf_FR_P4               
+       nop.i 999 ;;
 }
-;;
 
 { .mfi
-      // min overflow and max normal threshold
-      ldfps           fMIN_SGL_OFLOW_ARG, fMAX_SGL_NORM_ARG = [rTblAddr], 8
-      nop.f           0
-      and             rExp_x = rExp_mask, rSignexp_x // Biased exponent of x
+       nop.m 999
+(p0)     fma.s1     coshf_FR_poly_peven_temp2 = coshf_FR_X4, coshf_FR_poly_peven_temp1, coshf_FR_P2 
+       nop.i 999 ;;
+}
+
+{ .mfi
+       nop.m 999
+(p0)     fma.s1     coshf_FR_peven       = coshf_FR_X4, coshf_FR_poly_peven_temp2, f0         
+       nop.i 999 ;;
+}
+
+// Y_lo = x2*p_odd + p_even
+// Calculate f8 = Y_hi + Y_lo 
+
+{ .mfi
+       nop.m 999
+(p0)     fma.s1     coshf_FR_Y_lo         = coshf_FR_X2, coshf_FR_podd,  coshf_FR_peven    
+       nop.i 999 ;;
 }
+
 { .mfb
-      setf.s          fA3 = rA3                  // load A3 to FP reg
-(p13) fma.s.s0        f8 = f1, f1, f0            // result if x = 0.0
-(p13) br.ret.spnt     b0                         // exit here if x =0.0
+       nop.m 999
+(p0)     fma.s.s0   f8                   = f1, f1, coshf_FR_Y_lo                        
+(p0)     br.ret.sptk    b0 ;;                                                        
+}
+
+
+L(COSH_BY_TBL): 
+
+// Now that we are at TBL; so far all we know is that |x| >= 0.25.
+// The first two steps are the same for TBL and EXP, but if we are HUGE
+// Double
+// Go to HUGE if |x| >= 2^10, 10009 (register-biased) is e = 10 (true)
+// Single
+// Go to HUGE if |x| >= 2^7,  10006 (register-biased) is e =  7 (true)
+// we want to leave now. Go to HUGE if |x| >= 2^14
+// 1000d (register-biased) is e = 14 (true)
+
+{ .mlx
+       nop.m 999
+(p0)     movl            r32 = 0x0000000000010006 ;;              
 }
-;;
 
 { .mfi
-      sub             rExp_x = rExp_x, rExp_bias // True exponent of x
-      fmerge.s        fAbsX = f0, fNormX         // Form |x|
-      nop.i           0
+(p0)     setf.exp        f9 = r32                              
+       nop.f 999
+       nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m           0
-      // x*(64/ln(2)) + Right Shifter
-      fma.s1          fNint = fNormX, f64DivLn2, fRightShifter
-      add             rTblAddr = 8, rTblAddr
+       nop.m 999
+(p0)     fcmp.ge.unc     p6,p7 = coshf_FR_X,f9                  
+       nop.i 999 ;;
 }
-{ .mfb
-      cmp.gt          p7, p0 = -2, rExp_x        // Test |x| < 2^(-2)
-      fma.s1          fXsq = fNormX, fNormX, f0  // x*x for small path
-(p7)  br.cond.spnt    COSH_SMALL                 // Branch if 0 < |x| < 2^-2
+
+{ .mib
+       nop.m 999
+       nop.i 999
+(p6)     br.cond.spnt    L(COSH_HUGE) ;;                             
+}
+
+// r32 = 1
+// r34 = N-1 
+// r35 = N
+// r36 = j
+// r37 = N+1
+
+// TBL can never overflow
+// coshf(x) = coshf(B+R)
+//         = coshf(B) coshf(R) + sinh(B) sinh(R) 
+// coshf(R) can be approximated by 1 + p_even
+// sinh(R) can be approximated by p_odd
+
+// ******************************************************
+// STEP 1 (TBL and EXP)
+// ******************************************************
+// Get the following constants.
+// f9  = Inv_log2by64
+// f10 = log2by64_hi
+// f11 = log2by64_lo
+
+{ .mmi
+(p0)     adds                 r32 = 0x1,r0      
+(p0)     addl           r34   = @ltoff(single_coshf_arg_reduction), gp
+         nop.i 999
 }
 ;;
 
-{ .mfi
-      nop.m           0
-      // check for overflow
-      fcmp.ge.s1      p12, p13 = fAbsX, fMIN_SGL_OFLOW_ARG
-      mov             rJ_mask = 0x3f             // 6-bit mask for J
+
+// We want 2^(N-1) and 2^(-N-1). So bias N-1 and -N-1 and
+// put them in an exponent.
+// coshf_FR_spos = 2^(N-1) and coshf_FR_sneg = 2^(-N-1)
+// r39 = 0xffff + (N-1)  = 0xffff +N -1
+// r40 = 0xffff - (N +1) = 0xffff -N -1
+
+{ .mlx
+         ld8 r34 = [r34]
+(p0)     movl                r38 = 0x000000000000fffe ;; 
+}
+
+{ .mmi
+(p0)     ldfe            coshf_FR_Inv_log2by64 = [r34],16 ;;            
+(p0)     ldfe            coshf_FR_log2by64_hi  = [r34],16            
+       nop.i 999 ;;
+}
+
+{ .mbb
+(p0)     ldfe            coshf_FR_log2by64_lo  = [r34],16            
+       nop.b 999
+       nop.b 999 ;;
+}
+
+// Get the A coefficients
+// f9  = A_1
+// f10 = A_2
+// f11 = A_3
+
+{ .mmi
+      nop.m 999
+(p0)  addl           r34   = @ltoff(single_coshf_ab_table), gp
+      nop.i 999
 }
 ;;
 
-{ .mfb
-      nop.m           0
-      fms.s1          fN = fNint, f1, fRightShifter // n in FP register
-      // branch out if overflow
-(p12) br.cond.spnt    COSH_CERTAIN_OVERFLOW
+{ .mmi
+      ld8 r34 = [r34]
+      nop.m 999
+      nop.i 999
 }
 ;;
 
+
+// Calculate M and keep it as integer and floating point.
+// M = round-to-integer(x*Inv_log2by64)
+// coshf_FR_M = M = truncate(ax/(log2/64))
+// Put the significand of M in r35
+//    and the floating point representation of M in coshf_FR_M
+
 { .mfi
-      getf.sig        rNJ = fNint                   // bits of n, j
-      // check for possible overflow
-      fcmp.gt.s1      p13, p0 = fAbsX, fMAX_SGL_NORM_ARG
-      nop.i           0
+       nop.m 999
+(p0)     fma.s1          coshf_FR_M      = coshf_FR_X, coshf_FR_Inv_log2by64, f0 
+       nop.i 999
 }
-;;
 
 { .mfi
-      addl            rN = 0xFFBF - 63, rNJ      // biased and shifted n-1,j
-      fnma.s1         fR = fLn2Div64, fN, fNormX // R = x - N*ln(2)/64
-      and             rJ = rJ_mask, rNJ          // bits of j
+(p0)     ldfe            coshf_FR_A1 = [r34],16            
+       nop.f 999
+       nop.i 999 ;;
 }
+
 { .mfi
-      sub             rNJ_neg = r0, rNJ          // bits of n, j for -x
-      nop.f           0
-      andcm           rN_mask = -1, rJ_mask      // 0xff...fc0 to mask N
+       nop.m 999
+(p0)     fcvt.fx.s1      coshf_FR_M_temp = coshf_FR_M                      
+       nop.i 999 ;;
 }
-;;
 
 { .mfi
-      shladd          rJ = rJ, 3, rTblAddr // address in the 2^(j/64) table
-      nop.f           0
-      and             rN = rN_mask, rN     // biased, shifted n-1
+       nop.m 999
+(p0)     fnorm.s1        coshf_FR_M      = coshf_FR_M_temp                 
+       nop.i 999 ;;
 }
+
 { .mfi
-      addl            rN_neg = 0xFFBF - 63, rNJ_neg // -x biased, shifted n-1,j
-      nop.f           0
-      and             rJ_neg = rJ_mask, rNJ_neg     // bits of j for -x
+(p0)     getf.sig        r35       = coshf_FR_M_temp                 
+       nop.f 999
+       nop.i 999 ;;
+}
+
+// M is still in r35. Calculate j. j is the signed extension of the six lsb of M. It
+// has a range of -32 thru 31.
+// r35 = M
+// r36 = j 
+
+{ .mii
+       nop.m 999
+       nop.i 999 ;;
+(p0)     and            r36 = 0x3f, r35 ;;   
+}
+
+// Calculate R
+// f13 = f44 - f12*f10 = x - M*log2by64_hi
+// f14 = f13 - f8*f11 = R = (x - M*log2by64_hi) - M*log2by64_lo
+
+{ .mfi
+       nop.m 999
+(p0)     fnma.s1        coshf_FR_R_temp = coshf_FR_M, coshf_FR_log2by64_hi, coshf_FR_X      
+       nop.i 999
 }
-;;
 
 { .mfi
-      ld8             rJ = [rJ]                    // Table value
-      nop.f           0
-      shl             rN = rN, 46 // 2^(n-1) bits in DP format
+(p0)     ldfe            coshf_FR_A2 = [r34],16            
+       nop.f 999
+       nop.i 999 ;;
 }
+
 { .mfi
-      shladd          rJ_neg = rJ_neg, 3, rTblAddr // addr in 2^(j/64) table -x
-      nop.f           0
-      and             rN_neg = rN_mask, rN_neg     // biased, shifted n-1 for -x
+       nop.m 999
+(p0)     fnma.s1        coshf_FR_R      = coshf_FR_M, coshf_FR_log2by64_lo, coshf_FR_R_temp 
+       nop.i 999
 }
-;;
+
+// Get the B coefficients
+// f15 = B_1
+// f32 = B_2
+// f33 = B_3
+
+{ .mmi
+(p0)     ldfe            coshf_FR_A3 = [r34],16 ;;            
+(p0)     ldfe            coshf_FR_B1 = [r34],16            
+       nop.i 999 ;;
+}
+
+{ .mmi
+(p0)     ldfe            coshf_FR_B2 = [r34],16 ;;            
+(p0)     ldfe            coshf_FR_B3 = [r34],16            
+       nop.i 999 ;;
+}
+
+{ .mii
+       nop.m 999
+(p0)     shl            r34 = r36,  0x2 ;;   
+(p0)     sxt1           r37 = r34 ;;         
+}
+
+// ******************************************************
+// STEP 2 (TBL and EXP)
+// ******************************************************
+// Calculate Rsquared and Rcubed in preparation for p_even and p_odd
+// f12 = R*R*R
+// f13 = R*R
+// f14 = R <== from above
 
 { .mfi
-      ld8             rJ_neg = [rJ_neg]            // Table value for -x
-      nop.f           0
-      shl             rN_neg = rN_neg, 46 // 2^(n-1) bits in DP format for -x
+       nop.m 999
+(p0)     fma.s1          coshf_FR_Rsq  = coshf_FR_R,   coshf_FR_R, f0  
+(p0)     shr            r36 = r37,  0x2 ;;   
+}
+
+// r34 = M-j = r35 - r36
+// r35 = N = (M-j)/64
+
+{ .mii
+(p0)     sub                  r34 = r35, r36    
+       nop.i 999 ;;
+(p0)     shr                  r35 = r34, 0x6 ;;    
+}
+
+{ .mii
+(p0)     sub                 r40 = r38, r35           
+(p0)     adds                 r37 = 0x1, r35    
+(p0)     add                 r39 = r38, r35 ;;           
+}
+
+// Get the address of the J table, add the offset,
+// addresses are sinh_AD_mJ and sinh_AD_J, get the T value
+// f32 = T(j)_hi
+// f33 = T(j)_lo
+// f34 = T(-j)_hi
+// f35 = T(-j)_lo
+
+{ .mmi
+(p0)     sub                  r34 = r35, r32    
+(p0)     addl    r37   = @ltoff(single_coshf_j_table), gp
+         nop.i 999
 }
 ;;
 
 { .mfi
-      or              rN = rN, rJ // bits of 2^n * 2^(j/64) in DP format
-      nop.f           0
-      nop.i           0
+      ld8 r37 = [r37]
+(p0)     fma.s1          coshf_FR_Rcub = coshf_FR_Rsq, coshf_FR_R, f0  
+       nop.i 999
 }
-;;
 
-{ .mmf
-      setf.d          fT = rN            // 2^(n-1) * 2^(j/64)
-      or              rN_neg = rN_neg, rJ_neg // -x bits of 2^n * 2^(j/64) in DP
-      fma.s1          fRSqr = fR, fR, f0 // R^2
+// ******************************************************
+// STEP 3 Now decide if we need to branch to EXP
+// ******************************************************
+// Put 32 in f9; p6 true if x < 32
+
+{ .mlx
+       nop.m 999
+(p0)     movl                r32 = 0x0000000000010004 ;;               
 }
-;;
+
+// Calculate p_even
+// f34 = B_2 + Rsq *B_3
+// f35 = B_1 + Rsq*f34      = B_1 + Rsq * (B_2 + Rsq *B_3)
+// f36 = peven = Rsq * f35 = Rsq * (B_1 + Rsq * (B_2 + Rsq *B_3))
 
 { .mfi
-      setf.d          fT_neg = rN_neg    // 2^(n-1) * 2^(j/64) for -x
-      fma.s1          fP = fA3, fR, fA2  // A3*R + A2
-      nop.i           0
+       nop.m 999
+(p0)     fma.s1          coshf_FR_peven_temp1 = coshf_FR_Rsq, coshf_FR_B3,          coshf_FR_B2  
+       nop.i 999 ;;
 }
+
 { .mfi
-      nop.m           0
-      fnma.s1         fP_neg = fA3, fR, fA2  // A3*R + A2 for -x
-      nop.i           0
+       nop.m 999
+(p0)     fma.s1          coshf_FR_peven_temp2 = coshf_FR_Rsq, coshf_FR_peven_temp1, coshf_FR_B1  
+       nop.i 999
 }
-;;
+
+// Calculate p_odd
+// f34 = A_2 + Rsq *A_3
+// f35 = A_1 + Rsq * (A_2 + Rsq *A_3)
+// f37 = podd = R + Rcub * (A_1 + Rsq * (A_2 + Rsq *A_3))
 
 { .mfi
-      nop.m           0
-      fma.s1          fP = fP, fRSqr, fR // P = (A3*R + A2)*R^2 + R
-      nop.i           0
+       nop.m 999
+(p0)     fma.s1          coshf_FR_podd_temp1 = coshf_FR_Rsq,        coshf_FR_A3,         coshf_FR_A2  
+       nop.i 999 ;;
 }
+
 { .mfi
-      nop.m           0
-      fms.s1          fP_neg = fP_neg, fRSqr, fR // P = (A3*R + A2)*R^2 + R, -x
-      nop.i           0
+(p0)     setf.exp            coshf_FR_N_temp1 = r39            
+       nop.f 999
+       nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m           0
-      fmpy.s0         fTmp = fLn2Div64, fLn2Div64       // Force inexact
-      nop.i           0
+       nop.m 999
+(p0)     fma.s1          coshf_FR_peven       = coshf_FR_Rsq, coshf_FR_peven_temp2, f0     
+       nop.i 999
 }
-;;
 
 { .mfi
-      nop.m           0
-      fma.s1          fExp = fP, fT, fT                 // exp(x)/2
-      nop.i           0
+       nop.m 999
+(p0)     fma.s1          coshf_FR_podd_temp2 = coshf_FR_Rsq,        coshf_FR_podd_temp1, coshf_FR_A1  
+       nop.i 999 ;;
 }
-{ .mfb
-      nop.m           0
-      fma.s1          fExp_neg = fP_neg, fT_neg, fT_neg // exp(-x)/2
-      // branch out if possible overflow result
-(p13) br.cond.spnt    COSH_POSSIBLE_OVERFLOW
+
+{ .mfi
+(p0)     setf.exp            f9  = r32                              
+       nop.f 999
+       nop.i 999 ;;
 }
-;;
 
-{ .mfb
-      nop.m           0
-      // final result in the absence of overflow
-      fma.s.s0        f8 = fExp, f1, fExp_neg  // result = (exp(x)+exp(-x))/2
-      // exit here in the absence of overflow
-      br.ret.sptk     b0              // Exit main path, 0.25 <= |x| < 89.41598
+{ .mfi
+       nop.m 999
+(p0)     fma.s1          coshf_FR_podd       = coshf_FR_podd_temp2, coshf_FR_Rcub,       coshf_FR_R   
+       nop.i 999
+}
+
+// sinh_GR_mj contains the table offset for -j
+// sinh_GR_j  contains the table offset for +j
+// p6 is true when j <= 0
+
+{ .mlx
+(p0)     setf.exp            coshf_FR_N_temp2 = r40            
+(p0)     movl                r40 = 0x0000000000000020 ;;    
+}
+
+{ .mfi
+(p0)     sub                 GR_mJ = r40,  r36           
+(p0)     fmerge.se           coshf_FR_spos    = coshf_FR_N_temp1, f1 
+(p0)     adds                GR_J  = 0x20, r36 ;;           
+}
+
+{ .mii
+       nop.m 999
+(p0)     shl                  GR_mJ = GR_mJ, 5 ;;   
+(p0)     add                  AD_mJ = r37, GR_mJ ;; 
 }
-;;
 
-// Here if 0 < |x| < 0.25.  Evaluate 8th order polynomial.
-COSH_SMALL:
 { .mmi
-      add             rAd1 = 0x200, rTblAddr
-      add             rAd2 = 0x210, rTblAddr
-      nop.i           0
+       nop.m 999
+(p0)     ldfe                 coshf_FR_Tmjhi = [AD_mJ],16                 
+(p0)     shl                  GR_J  = GR_J, 5 ;;    
+}
+
+{ .mfi
+(p0)     ldfs                 coshf_FR_Tmjlo = [AD_mJ],16                 
+(p0)     fcmp.lt.unc.s1      p6,p7 = coshf_FR_X,f9                          
+(p0)     add                  AD_J  = r37, GR_J ;;  
 }
-;;
 
 { .mmi
-      ldfpd           fA4, fA3 = [rAd1]
-      ldfpd           fA2, fA1 = [rAd2]
-      nop.i           0
+(p0)     ldfe                 coshf_FR_Tjhi  = [AD_J],16 ;;                  
+(p0)     ldfs                 coshf_FR_Tjlo  = [AD_J],16                  
+       nop.i 999 ;;
 }
-;;
+
+{ .mfb
+       nop.m 999
+(p0)     fmerge.se           coshf_FR_sneg    = coshf_FR_N_temp2, f1 
+(p7)     br.cond.spnt        L(COSH_BY_EXP) ;;                            
+}
+
+// ******************************************************
+// If NOT branch to EXP
+// ******************************************************
+// Calculate C_hi
+// ******************************************************
+// coshf_FR_C_hi_temp = coshf_FR_sneg * coshf_FR_Tmjhi
+// coshf_FR_C_hi = coshf_FR_spos * coshf_FR_Tjhi + (coshf_FR_sneg * coshf_FR_Tmjhi)
 
 { .mfi
-      nop.m           0
-      fma.s1          fX4 = fXsq, fXsq, f0
-      nop.i           0
+       nop.m 999
+(p0)    fma.s1         coshf_FR_C_hi_temp = coshf_FR_sneg, coshf_FR_Tmjhi, f0                   
+       nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m           0
-      fma.s1          fA43 = fXsq, fA4, fA3
-      nop.i           0
+       nop.m 999
+(p0)    fma.s1         coshf_FR_C_hi      = coshf_FR_spos, coshf_FR_Tjhi,  coshf_FR_C_hi_temp    
+       nop.i 999
 }
+
+// ******************************************************
+// Calculate S_hi
+// ******************************************************
+// coshf_FR_S_hi_temp1 = coshf_FR_sneg * coshf_FR_Tmjhi
+// coshf_FR_S_hi = coshf_FR_spos * coshf_FR_Tjhi - coshf_FR_C_hi_temp1
+
 { .mfi
-      nop.m           0
-      fma.s1          fA21 = fXsq, fA2, fA1
-      nop.i           0
+       nop.m 999
+(p0)     fma.s1        coshf_FR_S_hi_temp1 =  coshf_FR_sneg, coshf_FR_Tmjhi, f0                
+       nop.i 999 ;;
 }
-;;
+
+// ******************************************************
+// Calculate C_lo
+// ******************************************************
+// coshf_FR_C_lo_temp1 = coshf_FR_spos * coshf_FR_Tjhi - coshf_FR_C_hi
+// coshf_FR_C_lo_temp2 = coshf_FR_sneg * coshf_FR_Tmjlo + (coshf_FR_spos * coshf_FR_Tjhi - coshf_FR_C_hi)
+// coshf_FR_C_lo_temp1 = coshf_FR_sneg * coshf_FR_Tmjlo
+// coshf_FR_C_lo_temp3 = coshf_FR_spos * coshf_FR_Tjlo + (coshf_FR_sneg * coshf_FR_Tmjlo)
+// coshf_FR_C_lo = coshf_FR_C_lo_temp3 + coshf_FR_C_lo_temp2
 
 { .mfi
-      nop.m           0
-      fma.s1          fA4321 = fX4, fA43, fA21
-      nop.i           0
+       nop.m 999
+(p0)     fms.s1        coshf_FR_C_lo_temp1 = coshf_FR_spos, coshf_FR_Tjhi,  coshf_FR_C_hi        
+       nop.i 999
 }
-;;
 
-// Dummy multiply to generate inexact
 { .mfi
-      nop.m           0
-      fmpy.s0         fTmp = fA4, fA4
-      nop.i           0
+       nop.m 999
+(p0)     fms.s1        coshf_FR_S_hi       =  coshf_FR_spos, coshf_FR_Tjhi, coshf_FR_S_hi_temp1 
+       nop.i 999 ;;
+}
+
+{ .mfi
+       nop.m 999
+(p0)     fma.s1        coshf_FR_C_lo_temp2 = coshf_FR_sneg, coshf_FR_Tmjhi, coshf_FR_C_lo_temp1  
+       nop.i 999
+}
+
+{ .mfi
+       nop.m 999
+(p0)     fma.s1        coshf_FR_C_lo_temp1 = coshf_FR_sneg, coshf_FR_Tmjlo, f0                  
+       nop.i 999 ;;
+}
+
+{ .mfi
+       nop.m 999
+(p0)     fma.s1        coshf_FR_C_lo_temp3 =  coshf_FR_spos, coshf_FR_Tjlo,  coshf_FR_C_lo_temp1 
+       nop.i 999 ;;
+}
+
+{ .mfi
+       nop.m 999
+(p0)     fma.s1        coshf_FR_C_lo       =  coshf_FR_C_lo_temp3, f1,   coshf_FR_C_lo_temp2    
+       nop.i 999 ;;
+}
+
+// ******************************************************
+// coshf_FR_Y_lo_temp = coshf_FR_C_hi * coshf_FR_peven + coshf_FR_C_lo
+// coshf_FR_Y_lo = coshf_FR_S_hi * coshf_FR_podd + coshf_FR_Y_lo_temp
+// coshf_FR_COSH = Y_hi + Y_lo
+
+{ .mfi
+       nop.m 999
+(p0)    fma.s1         coshf_FR_Y_lo_temp =  coshf_FR_C_hi, coshf_FR_peven, coshf_FR_C_lo       
+       nop.i 999 ;;
 }
+
+{ .mfi
+       nop.m 999
+(p0)    fma.s1         coshf_FR_Y_lo      =  coshf_FR_S_hi, coshf_FR_podd, coshf_FR_Y_lo_temp   
+       nop.i 999 ;;
+}
+
 { .mfb
-      nop.m           0
-      fma.s.s0        f8 = fA4321, fXsq, f1
-      br.ret.sptk     b0                // Exit if 0 < |x| < 0.25
+       nop.m 999
+(p0)    fma.s.s0       f8 =  coshf_FR_C_hi, f1, coshf_FR_Y_lo                       
+(p0)    br.ret.sptk        b0 ;;                                           
 }
-;;
 
-COSH_POSSIBLE_OVERFLOW:
 
-// Here if fMAX_SGL_NORM_ARG < x < fMIN_SGL_OFLOW_ARG
-// This cannot happen if input is a single, only if input higher precision.
-// Overflow is a possibility, not a certainty.
+L(COSH_BY_EXP): 
 
-// Recompute result using status field 2 with user's rounding mode,
-// and wre set.  If result is larger than largest single, then we have
-// overflow
+// When p7 is true,  we know that an overflow is not going to happen
+// When p7 is false, we must check for possible overflow
+// p7 is the over_SAFE flag
+// f44 = Scale * (Y_hi + Y_lo)
+//     =  coshf_FR_spos * (coshf_FR_Tjhi + coshf_FR_Y_lo)
 
 { .mfi
-      mov             rGt_ln  = 0x1007f // Exponent for largest single + 1 ulp
-      fsetc.s2        0x7F,0x42         // Get user's round mode, set wre
-      nop.i           0
+       nop.m 999
+(p0)    fma.s1         coshf_FR_Y_lo_temp =  coshf_FR_peven, f1,       coshf_FR_podd           
+       nop.i 999
+}
+
+// Now we are in EXP. This is the only path where an overflow is possible
+// but not for certain. So this is the only path where over_SAFE has any use.
+// r34 still has N-1
+// There is a danger of double-extended overflow   if N-1 > 16382 = 0x3ffe
+// There is a danger of double overflow            if N-1 > 0x3fe = 1022
+// There is a danger of single overflow            if N-1 > 0x7e = 126
+
+{ .mlx
+       nop.m 999
+(p0)   movl                r32          = 0x000000000000007e ;;                       
 }
-;;
 
 { .mfi
-      setf.exp        fGt_pln = rGt_ln  // Create largest single + 1 ulp
-      fma.s.s2        fWre_urm_f8 = fP, fT, fT    // Result with wre set
-      nop.i           0
+(p0)   cmp.gt.unc          p0,p7        = r34, r32                                 
+       nop.f 999
+       nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m           0
-      fsetc.s2        0x7F,0x40                   // Turn off wre in sf2
-      nop.i           0
+       nop.m 999
+(p0)    fma.s1         coshf_FR_Y_lo      =  coshf_FR_Tjhi,  coshf_FR_Y_lo_temp, coshf_FR_Tjlo       
+       nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m           0
-      fcmp.ge.s1      p6, p0 =  fWre_urm_f8, fGt_pln // Test for overflow
-      nop.i           0
+       nop.m 999
+(p0)    fma.s1         coshf_FR_COSH_temp =  coshf_FR_Y_lo,  f1, coshf_FR_Tjhi                 
+       nop.i 999 ;;
 }
-;;
 
-{ .mfb
-      nop.m           0
-      nop.f           0
-(p6)  br.cond.spnt    COSH_CERTAIN_OVERFLOW // Branch if overflow
+{ .mfi
+       nop.m 999
+(p0)    fma.s.s0       f44 = coshf_FR_spos,  coshf_FR_COSH_temp, f0                       
+       nop.i 999 ;;
 }
-;;
 
+// If over_SAFE is set, return
 { .mfb
-      nop.m           0
-      fma.s.s0        f8 = fP, fT, fT
-      br.ret.sptk     b0                     // Exit if really no overflow
+       nop.m 999
+(p7)   fmerge.s            f8 = f44,f44
+(p7)   br.ret.sptk        b0 ;;
 }
-;;
 
-// here if overflow
-COSH_CERTAIN_OVERFLOW:
-{ .mmi
-      addl            r17ones_m1 = 0x1FFFE, r0
-;;
-      setf.exp        fTmp = r17ones_m1
-      nop.i           0
+// Else see if we overflowed
+// S0 user supplied status
+// S2 user supplied status + WRE + TD  (Overflows)
+// If WRE is set then an overflow will not occur in EXP.
+// The input value that would cause a register (WRE) value to overflow is about 2^15
+// and this input would go into the HUGE path.
+// Answer with WRE is in f43.
+
+{ .mfi
+       nop.m 999
+(p0)   fsetc.s2            0x7F,0x42                                               
+       nop.i 999;;
+}
+
+{ .mfi
+       nop.m 999
+(p0)   fma.s.s2            f43  = coshf_FR_spos,  coshf_FR_COSH_temp, f0                      
+       nop.i 999 ;;
+}
+
+// 1 more that the exponent of the largest double (7FE)  = 7FF
+// 7FF - 3FF = 400 (true); 400 + FFFF = 103FF (register-biased)
+// So 0 103FF 8000000000000000  is one ulp more than
+// largest double in register bias
+// 1 more that the exponent of the largest single (FE)  = FF
+// FF - 7F = 80 (true); 80 + FFFF = 1007F (register-biased)
+// Now  set p8 if the answer with WRE is greater than or equal this value
+// Also set p9 if the answer with WRE is less than or equal to negative this value
+
+{ .mlx
+       nop.m 999
+(p0)   movl                r32          = 0x000000000001007f ;;                     
+}
+
+{ .mmf
+       nop.m 999
+(p0)   setf.exp            f41          = r32                                    
+(p0)   fsetc.s2            0x7F,0x40 ;;                                               
+}
+
+{ .mfi
+       nop.m 999
+(p0)   fcmp.ge.unc.s1      p8, p0       = f43, f41                               
+       nop.i 999
 }
-;;
 
 { .mfi
-      alloc           r32 = ar.pfs, 0, 3, 4, 0 // get some registers
-      fmerge.s        FR_X = f8,f8
-      nop.i           0
+       nop.m 999
+(p0)   fmerge.ns           f42 = f41, f41                                        
+       nop.i 999 ;;
+}
+
+// The error tag for overflow is 65
+{ .mii
+       nop.m 999
+       nop.i 999 ;;
+(p8)   mov                 GR_Parameter_TAG = 65 ;;                                              
 }
+
 { .mfb
-      mov             GR_Parameter_TAG = 65
-      fma.s.s0        FR_RESULT = fTmp, fTmp, f0 // Set I,O and +INF result
-      br.cond.sptk    __libm_error_region
+       nop.m 999
+(p0)   fcmp.le.unc.s1      p9, p0 =  f43, f42                                    
+(p8)   br.cond.spnt __libm_error_region ;;
+}
+
+{ .mii
+       nop.m 999
+       nop.i 999 ;;
+(p9)   mov                 GR_Parameter_TAG = 64                                              
+}
+
+{ .mib
+       nop.m 999
+       nop.i 999
+(p9)   br.cond.spnt __libm_error_region ;;
 }
-;;
 
-// Here if x unorm
-COSH_UNORM:
 { .mfb
-      getf.exp        rSignexp_x = fNormX    // Must recompute if x unorm
-      fcmp.eq.s0      p6, p0 = f8, f0        // Set D flag
-      br.cond.sptk    COSH_COMMON            // Return to main path
+       nop.m 999
+(p0)   fmerge.s            f8 = f44,f44                                          
+(p0)   br.ret.sptk b0 ;; 
 }
-;;
 
-GLOBAL_IEEE754_END(coshf)
 
-LOCAL_LIBM_ENTRY(__libm_error_region)
+L(COSH_HUGE): 
+
+// for COSH_HUGE, put 24000 in exponent; take sign from input; add 1
+// SAFE: SAFE is always 0 for HUGE
+
+{ .mlx
+       nop.m 999
+(p0)   movl                r32 = 0x0000000000015dbf ;;                               
+}
+
+{ .mfi
+(p0)   setf.exp            f9  = r32                                              
+       nop.f 999
+       nop.i 999 ;;
+}
+
+{ .mfi
+       nop.m 999
+(p0)   fma.s1              coshf_FR_hi_lo = f1, f9, f1                            
+       nop.i 999 ;;
+}
+
+{ .mfi
+       nop.m 999
+(p0)   fma.s.s0            f44 = f9, coshf_FR_hi_lo, f0                           
+(p0)   mov                 GR_Parameter_TAG = 65                                               
+}
+.endp coshf
+ASM_SIZE_DIRECTIVE(coshf)
+
+
+.proc __libm_error_region
+__libm_error_region:
 .prologue
 { .mfi
-      add   GR_Parameter_Y=-32,sp             // Parameter 2 value
-      nop.f 0
+        add   GR_Parameter_Y=-32,sp             // Parameter 2 value
+        nop.f 0
 .save   ar.pfs,GR_SAVE_PFS
-      mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
+        mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs 
 }
 { .mfi
-.fframe 64
-      add sp=-64,sp                           // Create new stack
-      nop.f 0
-      mov GR_SAVE_GP=gp                       // Save gp
+.fframe 64 
+        add sp=-64,sp                           // Create new stack
+        nop.f 0
+        mov GR_SAVE_GP=gp                       // Save gp
 };;
 { .mmi
-      stfs [GR_Parameter_Y] = FR_Y,16         // Store Parameter 2 on stack
-      add GR_Parameter_X = 16,sp              // Parameter 1 address
-.save   b0, GR_SAVE_B0
-      mov GR_SAVE_B0=b0                       // Save b0
+        stfs [GR_Parameter_Y] = FR_Y,16         // Save Parameter 2 on stack
+        add GR_Parameter_X = 16,sp              // Parameter 1 address
+.save   b0, GR_SAVE_B0                      
+        mov GR_SAVE_B0=b0                       // Save b0 
 };;
 .body
-{ .mfi
-      stfs [GR_Parameter_X] = FR_X            // Store Parameter 1 on stack
-      nop.f 0
-      add   GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+{ .mib
+        stfs [GR_Parameter_X] = FR_X            // Store Parameter 1 on stack 
+        add   GR_Parameter_RESULT = 0,GR_Parameter_Y  
+	nop.b 0                                 // Parameter 3 address
 }
 { .mib
-      stfs [GR_Parameter_Y] = FR_RESULT       // Store Parameter 3 on stack
-      add   GR_Parameter_Y = -16,GR_Parameter_Y
-      br.call.sptk b0=__libm_error_support#   // Call error handling function
+        stfs [GR_Parameter_Y] = FR_RESULT      // Store Parameter 3 on stack
+        add   GR_Parameter_Y = -16,GR_Parameter_Y  
+        br.call.sptk.many b0=__libm_error_support#  // Call error handling function
 };;
-
 { .mmi
-      add   GR_Parameter_RESULT = 48,sp
-      nop.m 0
-      nop.i 0
+        nop.m 0
+        nop.m 0
+        add   GR_Parameter_RESULT = 48,sp
 };;
-
 { .mmi
-      ldfs  f8 = [GR_Parameter_RESULT]       // Get return result off stack
+        ldfs  f8 = [GR_Parameter_RESULT]       // Get return result off stack
 .restore sp
-      add   sp = 64,sp                       // Restore stack pointer
-      mov   b0 = GR_SAVE_B0                  // Restore return address
+        add   sp = 64,sp                       // Restore stack pointer
+        mov   b0 = GR_SAVE_B0                  // Restore return address
 };;
 { .mib
-      mov   gp = GR_SAVE_GP                  // Restore gp
-      mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
-      br.ret.sptk     b0                     // Return
-};;
-
-LOCAL_LIBM_END(__libm_error_region)
+        mov   gp = GR_SAVE_GP                  // Restore gp 
+        mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
+        br.ret.sptk     b0                     // Return
+};; 
 
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
 
 .type   __libm_error_support#,@function
 .global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_coshl.S b/sysdeps/ia64/fpu/e_coshl.S
index cef8be0b1a..daac20d9a3 100644
--- a/sysdeps/ia64/fpu/e_coshl.S
+++ b/sysdeps/ia64/fpu/e_coshl.S
@@ -1,10 +1,10 @@
 .file "coshl.s"
 
-
-// Copyright (c) 2000 - 2002, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
+//
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,1060 +35,1129 @@
 // 
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at 
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// http://developer.intel.com/opensource.
 //
 // History
 //==============================================================
-// 02/02/00 Initial version 
-// 04/04/00 Unwind support added
-// 08/15/00 Bundle added after call to __libm_error_support to properly
+// 2/02/00  Initial version 
+// 4/04/00  Unwind support added
+// 8/15/00  Bundle added after call to __libm_error_support to properly
 //          set [the previously overwritten] GR_Parameter_RESULT.
-// 01/23/01 Set inexact flag for large args.
-// 05/07/01 Reworked to improve speed of all paths
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 12/06/02 Improved performance
+// 1/23/01  Set inexact flag for large args.
 //
 // API
 //==============================================================
+// float       = cosh(float)
+// double      = cosh(double)
 // long double = coshl(long double)
 // input  floating point f8
 // output floating point f8
-//
-// Registers used
-//==============================================================
-// general registers: 
-// r14 -> r40
-// predicate registers used:
-// p6 -> p11
-// floating-point registers used:
-// f9 -> f15; f32 -> f90; 
-// f8 has input, then output
-//
+
+
 // Overview of operation
 //==============================================================
-// There are seven paths
-// 1. 0 < |x| < 0.25          COSH_BY_POLY
-// 2. 0.25 <=|x| < 32         COSH_BY_TBL
-// 3. 32 <= |x| < 11357.21655 COSH_BY_EXP (merged path with COSH_BY_TBL)
-// 4. |x| >= 11357.21655      COSH_HUGE
-// 5. x=0                     Done with early exit
-// 6. x=inf,nan               Done with early exit
-// 7. x=denormal              COSH_DENORM
-//
-// For double extended we get overflow for x >= 400c b174 ddc0 31ae c0ea
-//                                           >= 11357.21655
-//
-//
-// 1. COSH_BY_POLY   0 < |x| < 0.25
-// ===============
-// Evaluate cosh(x) by a 12th order polynomial
-// Care is take for the order of multiplication; and P2 is not exactly 1/4!, 
-// P3 is not exactly 1/6!, etc.
-// cosh(x) = 1 + (P1*x^2 + P2*x^4 + P3*x^6 + P4*x^8 + P5*x^10 + P6*x^12)
-//
-// 2. COSH_BY_TBL   0.25 <= |x| < 32.0
-// =============
-// cosh(x) = cosh(B+R)
-//         = cosh(B)cosh(R) + sinh(B)sinh(R)
-// 
-// ax = |x| = M*log2/64 + R
-// B = M*log2/64
-// M = 64*N + j 
-//   We will calculate M and get N as (M-j)/64
-//   The division is a shift.
-// exp(B)  = exp(N*log2 + j*log2/64)
-//         = 2^N * 2^(j*log2/64)
-// cosh(B) = 1/2(e^B + e^-B)
-//         = 1/2(2^N * 2^(j*log2/64) + 2^-N * 2^(-j*log2/64)) 
-// cosh(B) = (2^(N-1) * 2^(j*log2/64) + 2^(-N-1) * 2^(-j*log2/64)) 
-// sinh(B) = (2^(N-1) * 2^(j*log2/64) - 2^(-N-1) * 2^(-j*log2/64)) 
-// 2^(j*log2/64) is stored as Tjhi + Tjlo , j= -32,....,32
-// Tjhi is double-extended (80-bit) and Tjlo is single(32-bit)
-//
-// R = ax - M*log2/64
-// R = ax - M*log2_by_64_hi - M*log2_by_64_lo
-// exp(R) = 1 + R +R^2(1/2! + R(1/3! + R(1/4! + ... + R(1/n!)...)
-//        = 1 + p_odd + p_even
-//        where the p_even uses the A coefficients and the p_even uses 
-//        the B coefficients
-//
-// So sinh(R) = 1 + p_odd + p_even -(1 -p_odd -p_even)/2 = p_odd
-//    cosh(R) = 1 + p_even
-//    cosh(B) = C_hi + C_lo
-//    sinh(B) = S_hi
-// cosh(x) = cosh(B)cosh(R) + sinh(B)sinh(R)
-//
-// 3. COSH_BY_EXP   32.0 <= |x| < 11357.21655  ( 400c b174 ddc0 31ae c0ea )
-// ==============
-// Can approximate result by exp(x)/2 in this region.
-// Y_hi = Tjhi
-// Y_lo = Tjhi * (p_odd + p_even) + Tjlo
-// cosh(x) = Y_hi + Y_lo
-//
-// 4. COSH_HUGE     |x| >= 11357.21655  ( 400c b174 ddc0 31ae c0ea )
-// ============
-// Set error tag and call error support
-//
-//
+// There are four paths
+
+// 1. |x| < 0.25        COSH_BY_POLY
+// 2. |x| < 32          COSH_BY_TBL
+// 3. |x| < 2^14        COSH_BY_EXP
+// 4. |x| >= 2^14       COSH_HUGE
+
+// For paths 1, and 2 SAFE is always 1.
+// For path  4, Safe is always 0.
+// SAFE = 1 means we cannot overflow.
+
+#include "libm_support.h"
+
 // Assembly macros
 //==============================================================
-r_ad5                 = r14
-r_rshf_2to57          = r15
-r_exp_denorm          = r15
-r_ad_mJ_lo            = r15
-r_ad_J_lo             = r16
-r_2Nm1                = r17
-r_2mNm1               = r18
-r_exp_x               = r18
-r_ad_J_hi             = r19
-r_ad2o                = r19
-r_ad_mJ_hi            = r20
-r_mj                  = r21
-r_ad2e                = r22
-r_ad3                 = r23
-r_ad1                 = r24
-r_Mmj                 = r24
-r_rshf                = r25
-r_M                   = r25
-r_N                   = r25
-r_jshf                = r26
-r_exp_2tom57          = r26
-r_j                   = r26
-r_exp_mask            = r27
-r_signexp_x           = r28
-r_signexp_0_5         = r28
-r_exp_0_25            = r29
-r_sig_inv_ln2         = r30
-r_exp_32              = r30
-r_exp_huge            = r30
-r_ad4                 = r31
-
-GR_SAVE_PFS           = r34
-GR_SAVE_B0            = r35
-GR_SAVE_GP            = r36
-
-GR_Parameter_X        = r37
-GR_Parameter_Y        = r38
-GR_Parameter_RESULT   = r39
-GR_Parameter_TAG      = r40
-
-
-f_ABS_X               = f9 
-f_X2                  = f10
-f_X4                  = f11
-f_tmp                 = f14
-f_RSHF                = f15
-
-f_Inv_log2by64        = f32
-f_log2by64_lo         = f33
-f_log2by64_hi         = f34
-f_A1                  = f35
-
-f_A2                  = f36
-f_A3                  = f37
-f_Rcub                = f38
-f_M_temp              = f39
-f_R_temp              = f40
-
-f_Rsq                 = f41
-f_R                   = f42
-f_M                   = f43
-f_B1                  = f44
-f_B2                  = f45
-
-f_B3                  = f46
-f_peven_temp1         = f47
-f_peven_temp2         = f48
-f_peven               = f49
-f_podd_temp1          = f50
-
-f_podd_temp2          = f51
-f_podd                = f52
-f_poly65              = f53
-f_poly6543            = f53
-f_poly6to1            = f53
-f_poly43              = f54
-f_poly21              = f55
-
-f_X3                  = f56
-f_INV_LN2_2TO63       = f57
-f_RSHF_2TO57          = f58
-f_2TOM57              = f59
-f_smlst_oflow_input   = f60
-
-f_pre_result          = f61
-f_huge                = f62
-f_spos                = f63
-f_sneg                = f64
-f_Tjhi                = f65
-
-f_Tjlo                = f66
-f_Tmjhi               = f67
-f_Tmjlo               = f68
-f_S_hi                = f69
-f_SC_hi_temp          = f70
-
-f_C_lo_temp1          = f71 
-f_C_lo_temp2          = f72 
-f_C_lo_temp3          = f73 
-f_C_lo_temp4          = f73 
-f_C_lo                = f74
-f_C_hi                = f75
-
-f_Y_hi                = f77 
-f_Y_lo_temp           = f78 
-f_Y_lo                = f79 
-f_NORM_X              = f80
-
-f_P1                  = f81
-f_P2                  = f82
-f_P3                  = f83
-f_P4                  = f84
-f_P5                  = f85
-
-f_P6                  = f86
-f_Tjhi_spos           = f87
-f_Tjlo_spos           = f88
-f_huge                = f89
-f_signed_hi_lo        = f90
+cosh_FR_X            = f44
+FR_RESULT            = f44
+cosh_FR_SGNX         = f40
+cosh_FR_all_ones     = f45
+
+FR_X                 = f8
+FR_Y                 = f0
+cosh_FR_Inv_log2by64 = f9
+cosh_FR_log2by64_lo  = f11
+cosh_FR_log2by64_hi  = f10
+
+cosh_FR_A1           = f9
+cosh_FR_A2           = f10
+cosh_FR_A3           = f11
+
+cosh_FR_Rcub         = f12
+cosh_FR_M_temp       = f13
+cosh_FR_R_temp       = f13
+cosh_FR_Rsq          = f13
+cosh_FR_R            = f14
+
+cosh_FR_M            = f38
+
+cosh_FR_tmp          = f15
+cosh_FR_B1           = f15
+cosh_FR_B2           = f32
+cosh_FR_B3           = f33
+
+cosh_FR_peven_temp1  = f34
+cosh_FR_peven_temp2  = f35
+cosh_FR_peven        = f36
+
+cosh_FR_podd_temp1   = f34
+cosh_FR_podd_temp2   = f35
+cosh_FR_podd         = f37
+
+cosh_FR_J_temp       = f9
+cosh_FR_J            = f10
+
+cosh_FR_Mmj          = f39
+
+cosh_FR_N_temp1      = f11
+cosh_FR_N_temp2      = f12
+cosh_FR_N            = f13
+
+cosh_FR_spos         = f14
+cosh_FR_sneg         = f15
+
+cosh_FR_Tjhi         = f32
+cosh_FR_Tjlo         = f33
+cosh_FR_Tmjhi        = f34
+cosh_FR_Tmjlo        = f35
+
+GR_mJ           = r35
+GR_J            = r36
+
+AD_mJ           = r38
+AD_J            = r39
+
+cosh_GR_all_ones     = r40
+
+GR_SAVE_PFS           = r41
+GR_SAVE_B0            = r42
+GR_SAVE_GP            = r43
+GR_Parameter_X        = r44
+GR_Parameter_Y        = r45
+GR_Parameter_RESULT   = r46
+GR_Parameter_TAG      = r47 
 
+cosh_FR_C_hi         = f9
+cosh_FR_C_hi_temp    = f10
+cosh_FR_C_lo_temp1   = f11 
+cosh_FR_C_lo_temp2   = f12 
+cosh_FR_C_lo_temp3   = f13 
+
+cosh_FR_C_lo         = f38
+cosh_FR_S_hi         = f39
+
+cosh_FR_S_hi_temp1   = f10
+cosh_FR_Y_hi         = f11 
+cosh_FR_Y_lo_temp    = f12 
+cosh_FR_Y_lo         = f13 
+cosh_FR_COSH         = f9
+
+cosh_FR_X2           = f9
+cosh_FR_X4           = f10
+
+cosh_FR_P1           = f14
+cosh_FR_P2           = f15
+cosh_FR_P3           = f32
+cosh_FR_P4           = f33
+cosh_FR_P5           = f34
+cosh_FR_P6           = f35
+
+cosh_FR_TINY_THRESH  = f9
+
+cosh_FR_COSH_temp    = f10
+cosh_FR_SCALE        = f11 
+
+cosh_FR_hi_lo = f10
+
+cosh_FR_poly_podd_temp1    =  f11 
+cosh_FR_poly_podd_temp2    =  f13
+cosh_FR_poly_peven_temp1   =  f11
+cosh_FR_poly_peven_temp2   =  f13
 
 // Data tables
 //==============================================================
 
-// DO NOT CHANGE ORDER OF THESE TABLES
-RODATA
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
 
 .align 16
-LOCAL_OBJECT_START(cosh_arg_reduction)
-//   data8 0xB8AA3B295C17F0BC, 0x00004005  // 64/log2 -- signif loaded with setf
-   data8 0xB17217F7D1000000, 0x00003FF8  // log2/64 high part
-   data8 0xCF79ABC9E3B39804, 0x00003FD0  // log2/64 low part
-   data8 0xb174ddc031aec0ea, 0x0000400c  // Smallest x to overflow (11357.21655)
-LOCAL_OBJECT_END(cosh_arg_reduction)
-
-LOCAL_OBJECT_START(cosh_p_table)
-   data8 0x8FA02AC65BCBD5BC, 0x00003FE2  // P6
-   data8 0xD00D00D1021D7370, 0x00003FEF  // P4
-   data8 0xAAAAAAAAAAAAAB80, 0x00003FFA  // P2
-   data8 0x93F27740C0C2F1CC, 0x00003FE9  // P5
-   data8 0xB60B60B60B4FE884, 0x00003FF5  // P3
-   data8 0x8000000000000000, 0x00003FFE  // P1
-LOCAL_OBJECT_END(cosh_p_table)
-
-LOCAL_OBJECT_START(cosh_ab_table)
-   data8 0xAAAAAAAAAAAAAAAC, 0x00003FFC  // A1
-   data8 0x88888888884ECDD5, 0x00003FF8  // A2
-   data8 0xD00D0C6DCC26A86B, 0x00003FF2  // A3
-   data8 0x8000000000000002, 0x00003FFE  // B1
-   data8 0xAAAAAAAAAA402C77, 0x00003FFA  // B2
-   data8 0xB60B6CC96BDB144D, 0x00003FF5  // B3
-LOCAL_OBJECT_END(cosh_ab_table)
-
-LOCAL_OBJECT_START(cosh_j_hi_table)
-   data8 0xB504F333F9DE6484, 0x00003FFE
-   data8 0xB6FD91E328D17791, 0x00003FFE
-   data8 0xB8FBAF4762FB9EE9, 0x00003FFE
-   data8 0xBAFF5AB2133E45FB, 0x00003FFE
-   data8 0xBD08A39F580C36BF, 0x00003FFE
-   data8 0xBF1799B67A731083, 0x00003FFE
-   data8 0xC12C4CCA66709456, 0x00003FFE
-   data8 0xC346CCDA24976407, 0x00003FFE
-   data8 0xC5672A115506DADD, 0x00003FFE
-   data8 0xC78D74C8ABB9B15D, 0x00003FFE
-   data8 0xC9B9BD866E2F27A3, 0x00003FFE
-   data8 0xCBEC14FEF2727C5D, 0x00003FFE
-   data8 0xCE248C151F8480E4, 0x00003FFE
-   data8 0xD06333DAEF2B2595, 0x00003FFE
-   data8 0xD2A81D91F12AE45A, 0x00003FFE
-   data8 0xD4F35AABCFEDFA1F, 0x00003FFE
-   data8 0xD744FCCAD69D6AF4, 0x00003FFE
-   data8 0xD99D15C278AFD7B6, 0x00003FFE
-   data8 0xDBFBB797DAF23755, 0x00003FFE
-   data8 0xDE60F4825E0E9124, 0x00003FFE
-   data8 0xE0CCDEEC2A94E111, 0x00003FFE
-   data8 0xE33F8972BE8A5A51, 0x00003FFE
-   data8 0xE5B906E77C8348A8, 0x00003FFE
-   data8 0xE8396A503C4BDC68, 0x00003FFE
-   data8 0xEAC0C6E7DD24392F, 0x00003FFE
-   data8 0xED4F301ED9942B84, 0x00003FFE
-   data8 0xEFE4B99BDCDAF5CB, 0x00003FFE
-   data8 0xF281773C59FFB13A, 0x00003FFE
-   data8 0xF5257D152486CC2C, 0x00003FFE
-   data8 0xF7D0DF730AD13BB9, 0x00003FFE
-   data8 0xFA83B2DB722A033A, 0x00003FFE
-   data8 0xFD3E0C0CF486C175, 0x00003FFE
-   data8 0x8000000000000000, 0x00003FFF // Center of table
-   data8 0x8164D1F3BC030773, 0x00003FFF
-   data8 0x82CD8698AC2BA1D7, 0x00003FFF
-   data8 0x843A28C3ACDE4046, 0x00003FFF
-   data8 0x85AAC367CC487B15, 0x00003FFF
-   data8 0x871F61969E8D1010, 0x00003FFF
-   data8 0x88980E8092DA8527, 0x00003FFF
-   data8 0x8A14D575496EFD9A, 0x00003FFF
-   data8 0x8B95C1E3EA8BD6E7, 0x00003FFF
-   data8 0x8D1ADF5B7E5BA9E6, 0x00003FFF
-   data8 0x8EA4398B45CD53C0, 0x00003FFF
-   data8 0x9031DC431466B1DC, 0x00003FFF
-   data8 0x91C3D373AB11C336, 0x00003FFF
-   data8 0x935A2B2F13E6E92C, 0x00003FFF
-   data8 0x94F4EFA8FEF70961, 0x00003FFF
-   data8 0x96942D3720185A00, 0x00003FFF
-   data8 0x9837F0518DB8A96F, 0x00003FFF
-   data8 0x99E0459320B7FA65, 0x00003FFF
-   data8 0x9B8D39B9D54E5539, 0x00003FFF
-   data8 0x9D3ED9A72CFFB751, 0x00003FFF
-   data8 0x9EF5326091A111AE, 0x00003FFF
-   data8 0xA0B0510FB9714FC2, 0x00003FFF
-   data8 0xA27043030C496819, 0x00003FFF
-   data8 0xA43515AE09E6809E, 0x00003FFF
-   data8 0xA5FED6A9B15138EA, 0x00003FFF
-   data8 0xA7CD93B4E965356A, 0x00003FFF
-   data8 0xA9A15AB4EA7C0EF8, 0x00003FFF
-   data8 0xAB7A39B5A93ED337, 0x00003FFF
-   data8 0xAD583EEA42A14AC6, 0x00003FFF
-   data8 0xAF3B78AD690A4375, 0x00003FFF
-   data8 0xB123F581D2AC2590, 0x00003FFF
-   data8 0xB311C412A9112489, 0x00003FFF
-   data8 0xB504F333F9DE6484, 0x00003FFF
-LOCAL_OBJECT_END(cosh_j_hi_table)
-
-LOCAL_OBJECT_START(cosh_j_lo_table)
-   data4 0x1EB2FB13
-   data4 0x1CE2CBE2
-   data4 0x1DDC3CBC
-   data4 0x1EE9AA34
-   data4 0x9EAEFDC1
-   data4 0x9DBF517B
-   data4 0x1EF88AFB
-   data4 0x1E03B216
-   data4 0x1E78AB43
-   data4 0x9E7B1747
-   data4 0x9EFE3C0E
-   data4 0x9D36F837
-   data4 0x9DEE53E4
-   data4 0x9E24AE8E
-   data4 0x1D912473
-   data4 0x1EB243BE
-   data4 0x1E669A2F
-   data4 0x9BBC610A
-   data4 0x1E761035
-   data4 0x9E0BE175
-   data4 0x1CCB12A1
-   data4 0x1D1BFE90
-   data4 0x1DF2F47A
-   data4 0x1EF22F22
-   data4 0x9E3F4A29
-   data4 0x1EC01A5B
-   data4 0x1E8CAC3A
-   data4 0x9DBB3FAB
-   data4 0x1EF73A19
-   data4 0x9BB795B5
-   data4 0x1EF84B76
-   data4 0x9EF5818B
-   data4 0x00000000 // Center of table
-   data4 0x1F77CACA
-   data4 0x1EF8A91D
-   data4 0x1E57C976
-   data4 0x9EE8DA92
-   data4 0x1EE85C9F
-   data4 0x1F3BF1AF
-   data4 0x1D80CA1E
-   data4 0x9D0373AF
-   data4 0x9F167097
-   data4 0x1EB70051
-   data4 0x1F6EB029
-   data4 0x1DFD6D8E
-   data4 0x9EB319B0
-   data4 0x1EBA2BEB
-   data4 0x1F11D537
-   data4 0x1F0D5A46
-   data4 0x9E5E7BCA
-   data4 0x9F3AAFD1
-   data4 0x9E86DACC
-   data4 0x9F3EDDC2
-   data4 0x1E496E3D
-   data4 0x9F490BF6
-   data4 0x1DD1DB48
-   data4 0x1E65EBFB
-   data4 0x9F427496
-   data4 0x1F283C4A
-   data4 0x1F4B0047
-   data4 0x1F130152
-   data4 0x9E8367C0
-   data4 0x9F705F90
-   data4 0x1EFB3C53
-   data4 0x1F32FB13
-LOCAL_OBJECT_END(cosh_j_lo_table)
-
+double_cosh_arg_reduction:
+ASM_TYPE_DIRECTIVE(double_cosh_arg_reduction,@object)
+   data8 0xB8AA3B295C17F0BC, 0x00004005
+   data8 0xB17217F7D1000000, 0x00003FF8
+   data8 0xCF79ABC9E3B39804, 0x00003FD0
+ASM_SIZE_DIRECTIVE(double_cosh_arg_reduction)
+
+double_cosh_p_table:
+ASM_TYPE_DIRECTIVE(double_cosh_p_table,@object)
+   data8 0x8000000000000000, 0x00003FFE
+   data8 0xAAAAAAAAAAAAAB80, 0x00003FFA
+   data8 0xB60B60B60B4FE884, 0x00003FF5
+   data8 0xD00D00D1021D7370, 0x00003FEF
+   data8 0x93F27740C0C2F1CC, 0x00003FE9
+   data8 0x8FA02AC65BCBD5BC, 0x00003FE2
+ASM_SIZE_DIRECTIVE(double_cosh_p_table)
+
+double_cosh_ab_table:
+ASM_TYPE_DIRECTIVE(double_cosh_ab_table,@object)
+   data8 0xAAAAAAAAAAAAAAAC, 0x00003FFC
+   data8 0x88888888884ECDD5, 0x00003FF8
+   data8 0xD00D0C6DCC26A86B, 0x00003FF2
+   data8 0x8000000000000002, 0x00003FFE
+   data8 0xAAAAAAAAAA402C77, 0x00003FFA
+   data8 0xB60B6CC96BDB144D, 0x00003FF5
+ASM_SIZE_DIRECTIVE(double_cosh_ab_table)
+
+double_cosh_j_table:
+ASM_TYPE_DIRECTIVE(double_cosh_j_table,@object)
+   data8 0xB504F333F9DE6484, 0x00003FFE, 0x1EB2FB13, 0x00000000
+   data8 0xB6FD91E328D17791, 0x00003FFE, 0x1CE2CBE2, 0x00000000
+   data8 0xB8FBAF4762FB9EE9, 0x00003FFE, 0x1DDC3CBC, 0x00000000
+   data8 0xBAFF5AB2133E45FB, 0x00003FFE, 0x1EE9AA34, 0x00000000
+   data8 0xBD08A39F580C36BF, 0x00003FFE, 0x9EAEFDC1, 0x00000000
+   data8 0xBF1799B67A731083, 0x00003FFE, 0x9DBF517B, 0x00000000
+   data8 0xC12C4CCA66709456, 0x00003FFE, 0x1EF88AFB, 0x00000000
+   data8 0xC346CCDA24976407, 0x00003FFE, 0x1E03B216, 0x00000000
+   data8 0xC5672A115506DADD, 0x00003FFE, 0x1E78AB43, 0x00000000
+   data8 0xC78D74C8ABB9B15D, 0x00003FFE, 0x9E7B1747, 0x00000000
+   data8 0xC9B9BD866E2F27A3, 0x00003FFE, 0x9EFE3C0E, 0x00000000
+   data8 0xCBEC14FEF2727C5D, 0x00003FFE, 0x9D36F837, 0x00000000
+   data8 0xCE248C151F8480E4, 0x00003FFE, 0x9DEE53E4, 0x00000000
+   data8 0xD06333DAEF2B2595, 0x00003FFE, 0x9E24AE8E, 0x00000000
+   data8 0xD2A81D91F12AE45A, 0x00003FFE, 0x1D912473, 0x00000000
+   data8 0xD4F35AABCFEDFA1F, 0x00003FFE, 0x1EB243BE, 0x00000000
+   data8 0xD744FCCAD69D6AF4, 0x00003FFE, 0x1E669A2F, 0x00000000
+   data8 0xD99D15C278AFD7B6, 0x00003FFE, 0x9BBC610A, 0x00000000
+   data8 0xDBFBB797DAF23755, 0x00003FFE, 0x1E761035, 0x00000000
+   data8 0xDE60F4825E0E9124, 0x00003FFE, 0x9E0BE175, 0x00000000
+   data8 0xE0CCDEEC2A94E111, 0x00003FFE, 0x1CCB12A1, 0x00000000
+   data8 0xE33F8972BE8A5A51, 0x00003FFE, 0x1D1BFE90, 0x00000000
+   data8 0xE5B906E77C8348A8, 0x00003FFE, 0x1DF2F47A, 0x00000000
+   data8 0xE8396A503C4BDC68, 0x00003FFE, 0x1EF22F22, 0x00000000
+   data8 0xEAC0C6E7DD24392F, 0x00003FFE, 0x9E3F4A29, 0x00000000
+   data8 0xED4F301ED9942B84, 0x00003FFE, 0x1EC01A5B, 0x00000000
+   data8 0xEFE4B99BDCDAF5CB, 0x00003FFE, 0x1E8CAC3A, 0x00000000
+   data8 0xF281773C59FFB13A, 0x00003FFE, 0x9DBB3FAB, 0x00000000
+   data8 0xF5257D152486CC2C, 0x00003FFE, 0x1EF73A19, 0x00000000
+   data8 0xF7D0DF730AD13BB9, 0x00003FFE, 0x9BB795B5, 0x00000000
+   data8 0xFA83B2DB722A033A, 0x00003FFE, 0x1EF84B76, 0x00000000
+   data8 0xFD3E0C0CF486C175, 0x00003FFE, 0x9EF5818B, 0x00000000
+   data8 0x8000000000000000, 0x00003FFF, 0x00000000, 0x00000000
+   data8 0x8164D1F3BC030773, 0x00003FFF, 0x1F77CACA, 0x00000000
+   data8 0x82CD8698AC2BA1D7, 0x00003FFF, 0x1EF8A91D, 0x00000000
+   data8 0x843A28C3ACDE4046, 0x00003FFF, 0x1E57C976, 0x00000000
+   data8 0x85AAC367CC487B15, 0x00003FFF, 0x9EE8DA92, 0x00000000
+   data8 0x871F61969E8D1010, 0x00003FFF, 0x1EE85C9F, 0x00000000
+   data8 0x88980E8092DA8527, 0x00003FFF, 0x1F3BF1AF, 0x00000000
+   data8 0x8A14D575496EFD9A, 0x00003FFF, 0x1D80CA1E, 0x00000000
+   data8 0x8B95C1E3EA8BD6E7, 0x00003FFF, 0x9D0373AF, 0x00000000
+   data8 0x8D1ADF5B7E5BA9E6, 0x00003FFF, 0x9F167097, 0x00000000
+   data8 0x8EA4398B45CD53C0, 0x00003FFF, 0x1EB70051, 0x00000000
+   data8 0x9031DC431466B1DC, 0x00003FFF, 0x1F6EB029, 0x00000000
+   data8 0x91C3D373AB11C336, 0x00003FFF, 0x1DFD6D8E, 0x00000000
+   data8 0x935A2B2F13E6E92C, 0x00003FFF, 0x9EB319B0, 0x00000000
+   data8 0x94F4EFA8FEF70961, 0x00003FFF, 0x1EBA2BEB, 0x00000000
+   data8 0x96942D3720185A00, 0x00003FFF, 0x1F11D537, 0x00000000
+   data8 0x9837F0518DB8A96F, 0x00003FFF, 0x1F0D5A46, 0x00000000
+   data8 0x99E0459320B7FA65, 0x00003FFF, 0x9E5E7BCA, 0x00000000
+   data8 0x9B8D39B9D54E5539, 0x00003FFF, 0x9F3AAFD1, 0x00000000
+   data8 0x9D3ED9A72CFFB751, 0x00003FFF, 0x9E86DACC, 0x00000000
+   data8 0x9EF5326091A111AE, 0x00003FFF, 0x9F3EDDC2, 0x00000000
+   data8 0xA0B0510FB9714FC2, 0x00003FFF, 0x1E496E3D, 0x00000000
+   data8 0xA27043030C496819, 0x00003FFF, 0x9F490BF6, 0x00000000
+   data8 0xA43515AE09E6809E, 0x00003FFF, 0x1DD1DB48, 0x00000000
+   data8 0xA5FED6A9B15138EA, 0x00003FFF, 0x1E65EBFB, 0x00000000
+   data8 0xA7CD93B4E965356A, 0x00003FFF, 0x9F427496, 0x00000000
+   data8 0xA9A15AB4EA7C0EF8, 0x00003FFF, 0x1F283C4A, 0x00000000
+   data8 0xAB7A39B5A93ED337, 0x00003FFF, 0x1F4B0047, 0x00000000
+   data8 0xAD583EEA42A14AC6, 0x00003FFF, 0x1F130152, 0x00000000
+   data8 0xAF3B78AD690A4375, 0x00003FFF, 0x9E8367C0, 0x00000000
+   data8 0xB123F581D2AC2590, 0x00003FFF, 0x9F705F90, 0x00000000
+   data8 0xB311C412A9112489, 0x00003FFF, 0x1EFB3C53, 0x00000000
+   data8 0xB504F333F9DE6484, 0x00003FFF, 0x1F32FB13, 0x00000000
+ASM_SIZE_DIRECTIVE(double_cosh_j_table)
+
+.align 32
+.global coshl#
 
 .section .text
-GLOBAL_IEEE754_ENTRY(coshl)
+.proc  coshl#
+.align 32
 
-{ .mlx
-      getf.exp        r_signexp_x = f8   // Get signexp of x, must redo if unorm
-      movl            r_sig_inv_ln2 = 0xb8aa3b295c17f0bc // significand of 1/ln2
+coshl:
+
+#ifdef _LIBC
+.global __ieee754_coshl#
+.proc __ieee754_coshl#
+__ieee754_coshl:
+#endif
+
+// X NAN?
+
+{ .mfi
+      alloc r32 = ar.pfs,0,12,4,0                  
+(p0)  fclass.m.unc  p6,p7 = f8, 0xc3               
+      mov cosh_GR_all_ones = -1
+};;
+
+//   This is more than we need but it is in preparation
+//   for the values we add for error support. We push three
+//   addresses on the stack (3*8) = 24 bytes and one tag
+
+{ .mfb
+      nop.m 999
+(p6)     fma.s0   f8 = f8,f1,f8                  
+(p6)  br.ret.spnt     b0 ;;                          
+}
+
+
+// Make constant that will generate inexact when squared
+// X infinity 
+{ .mfi
+      setf.sig cosh_FR_all_ones = cosh_GR_all_ones 
+(p0)  fclass.m.unc  p6,p0 = f8, 0x23               
+      nop.i 999 ;;
+}
+
+{ .mfb
+      nop.m 999
+(p6)     fmerge.s      f8 = f0,f8                  
+(p6)  br.ret.spnt     b0 ;;
 }
+
+
+
+// Put 0.25 in f9; p6 true if x < 0.25
 { .mlx
-      addl            r_ad1 = @ltoff(cosh_arg_reduction), gp
-      movl            r_rshf_2to57 = 0x4778000000000000 // 1.10000 2^(63+57)
+         nop.m 999
+(p0)     movl            r32 = 0x000000000000fffd ;;         
+}
+
+{ .mfi
+(p0)  setf.exp        f9 = r32                         
+      nop.f 999
+      nop.i 999 ;;
 }
-;;
 
 { .mfi
-      ld8             r_ad1 = [r_ad1]
-      fmerge.s        f_ABS_X    = f0,f8
-      mov             r_exp_0_25 = 0x0fffd    // Form exponent for 0.25
+      nop.m 999
+(p0)  fmerge.s      cosh_FR_X    = f0,f8                
+      nop.i 999
 }
+
 { .mfi
-      nop.m           0
-      fnorm.s1        f_NORM_X = f8      
-      mov             r_exp_2tom57 = 0xffff-57
+      nop.m 999
+(p0)  fmerge.s      cosh_FR_SGNX = f8,f1                
+      nop.i 999 ;;
 }
-;;
 
 { .mfi
-      setf.d          f_RSHF_2TO57 = r_rshf_2to57 // Form const 1.100 * 2^120
-      fclass.m        p10,p0 = f8, 0x0b           // Test for denorm
-      mov             r_exp_mask = 0x1ffff 
+      nop.m 999
+(p0)  fcmp.lt.unc     p0,p7 = cosh_FR_X,f9                    
+      nop.i 999 ;;
 }
-{ .mlx
-      setf.sig        f_INV_LN2_2TO63 = r_sig_inv_ln2 // Form 1/ln2 * 2^63
-      movl            r_rshf = 0x43e8000000000000 // 1.1000 2^63 for right shift
+
+{ .mib
+      nop.m 999
+      nop.i 999
+(p7)  br.cond.sptk    L(COSH_BY_TBL) 
+}
+;;
+
+
+// COSH_BY_POLY: 
+// POLY cannot overflow so there is no need to call __libm_error_support
+// Get the values of P_x from the table
+
+{ .mmi
+      nop.m 999
+(p0)  addl           r34   = @ltoff(double_cosh_p_table), gp
+      nop.i 999
 }
 ;;
 
+{ .mmi
+      ld8 r34 = [r34]
+      nop.m 999
+      nop.i 999
+}
+;;
+
+
+// Calculate cosh_FR_X2 = ax*ax and cosh_FR_X4 = ax*ax*ax*ax
+{ .mmf
+         nop.m 999
+(p0)     ldfe       cosh_FR_P1 = [r34],16                 
+(p0)     fma.s1     cosh_FR_X2 = cosh_FR_X, cosh_FR_X, f0 ;;           
+}
+
+{ .mmi
+(p0)     ldfe       cosh_FR_P2 = [r34],16 ;;                 
+(p0)     ldfe       cosh_FR_P3 = [r34],16                 
+         nop.i 999 ;;
+}
+
+{ .mmi
+(p0)     ldfe       cosh_FR_P4 = [r34],16 ;;                 
+(p0)     ldfe       cosh_FR_P5 = [r34],16                 
+         nop.i 999 ;;
+}
+
 { .mfi
-      nop.m           0
-      fclass.m        p7,p0 = f8, 0x07  // Test if x=0
-      nop.i           0
+(p0)     ldfe       cosh_FR_P6 = [r34],16                 
+(p0)     fma.s1     cosh_FR_X4 = cosh_FR_X2, cosh_FR_X2, f0         
+         nop.i 999 ;;
 }
+
+// Calculate cosh_FR_podd = x4 *(x4 * P_5 + P_3) + P_1
 { .mfi
-      setf.exp        f_2TOM57 = r_exp_2tom57 // Form 2^-57 for scaling
-      nop.f           0
-      add             r_ad3 = 0x90, r_ad1  // Point to ab_table
+         nop.m 999
+(p0)     fma.s1     cosh_FR_poly_podd_temp1 = cosh_FR_X4, cosh_FR_P5, cosh_FR_P3                
+         nop.i 999 ;;
 }
-;;
 
 { .mfi
-      setf.d          f_RSHF = r_rshf     // Form right shift const 1.100 * 2^63
-      fclass.m        p6,p0 = f8, 0xe3     // Test if x nan, inf
-      add             r_ad4 = 0x2f0, r_ad1 // Point to j_hi_table midpoint
+         nop.m 999
+(p0)     fma.s1     cosh_FR_podd            = cosh_FR_X4, cosh_FR_poly_podd_temp1, cosh_FR_P1   
+         nop.i 999
 }
-{ .mib
-      add             r_ad2e = 0x20, r_ad1 // Point to p_table
-      nop.i           0
-(p10) br.cond.spnt    COSH_DENORM          // Branch if x denorm
+
+// Calculate cosh_FR_peven =  p_even = x4 *(x4 * (x4 * P_6 + P_4) + P_2)
+{ .mfi
+         nop.m 999
+(p0)     fma.s1     cosh_FR_poly_peven_temp1 = cosh_FR_X4, cosh_FR_P6, cosh_FR_P4               
+         nop.i 999 ;;
 }
-;;
 
-// Common path -- return here from COSH_DENORM if x is unnorm
-COSH_COMMON:
 { .mfi
-      ldfe            f_smlst_oflow_input = [r_ad2e],16
-(p7)  fma.s0          f8 = f1, f1, f0      // Result = 1.0 if x=0
-      add             r_ad5 = 0x580, r_ad1 // Point to j_lo_table midpoint
+         nop.m 999
+(p0)     fma.s1     cosh_FR_poly_peven_temp2 = cosh_FR_X4, cosh_FR_poly_peven_temp1, cosh_FR_P2 
+         nop.i 999 ;;
 }
-{ .mib
-      ldfe            f_log2by64_hi  = [r_ad1],16       
-      and             r_exp_x = r_exp_mask, r_signexp_x
-(p7)  br.ret.spnt     b0                  // Exit if x=0
+
+{ .mfi
+         nop.m 999
+(p0)     fma.s1     cosh_FR_peven       = cosh_FR_X4, cosh_FR_poly_peven_temp2, f0         
+         nop.i 999 ;;
 }
-;;
 
-// Get the A coefficients for COSH_BY_TBL
+// Y_lo = x2*p_odd + p_even
+// Calculate f8 = Y_hi + Y_lo 
 { .mfi
-      ldfe            f_A1 = [r_ad3],16            
-      fcmp.lt.s1      p8,p9 = f8,f0           // Test for x<0
-      cmp.lt          p7,p0 = r_exp_x, r_exp_0_25  // Test x < 0.25
+         nop.m 999
+(p0)     fma.s1     cosh_FR_Y_lo         = cosh_FR_X2, cosh_FR_podd,  cosh_FR_peven    
+         nop.i 999 ;;
 }
+
 { .mfb
-      add             r_ad2o = 0x30, r_ad2e  // Point to p_table odd coeffs
-(p6)  fma.s0          f8 = f8,f8,f0          // Result for x nan, inf          
-(p6)  br.ret.spnt     b0                     // Exit for x nan, inf
+         nop.m 999
+(p0)     fma.s0   f8                   = f1, f1, cosh_FR_Y_lo                        
+(p0)     br.ret.sptk     b0 ;;
+}
+
+
+L(COSH_BY_TBL): 
+
+// Now that we are at TBL; so far all we know is that |x| >= 0.25.
+// The first two steps are the same for TBL and EXP, but if we are HUGE
+// Double Extended
+// Go to HUGE if |x| >= 2^14, 1000d (register-biased) is e = 14 (true)
+// Double
+// Go to HUGE if |x| >= 2^10, 10009 (register-biased) is e = 10 (true)
+// Single
+// Go to HUGE if |x| >= 2^7,  10006 (register-biased) is e =  7 (true)
+// we want to leave now. Go to HUGE if |x| >= 2^14
+// 1000d (register-biased) is e = 14 (true)
+
+{ .mlx
+      nop.m 999
+(p0)     movl            r32 = 0x000000000001000d ;;              
 }
-;;
 
-// Calculate X2 = ax*ax for COSH_BY_POLY
 { .mfi
-      ldfe            f_log2by64_lo  = [r_ad1],16       
-      nop.f           0
-      nop.i           0
+(p0)     setf.exp        f9 = r32                              
+      nop.f 999
+      nop.i 999 ;;
 }
-{ .mfb
-      ldfe            f_A2 = [r_ad3],16            
-      fma.s1          f_X2 = f_NORM_X, f_NORM_X, f0
-(p7)  br.cond.spnt    COSH_BY_POLY
+
+{ .mfi
+      nop.m 999
+(p0)     fcmp.ge.unc     p6,p7 = cosh_FR_X,f9                  
+      nop.i 999 ;;
 }
-;;
 
-// Here if |x| >= 0.25
-COSH_BY_TBL: 
+{ .mib
+      nop.m 999
+      nop.i 999
+(p6)     br.cond.spnt    L(COSH_HUGE) ;;                             
+}
+
+// r32 = 1
+// r34 = N-1 
+// r35 = N
+// r36 = j
+// r37 = N+1
+
+// TBL can never overflow
+// cosh(x) = cosh(B+R)
+//         = cosh(B) cosh(R) + sinh(B) sinh(R) 
+// cosh(R) can be approximated by 1 + p_even
+// sinh(R) can be approximated by p_odd
+
 // ******************************************************
-// STEP 1 (TBL and EXP) - Argument reduction
+// STEP 1 (TBL and EXP)
 // ******************************************************
-// Get the following constants. 
-// Inv_log2by64
-// log2by64_hi
-// log2by64_lo
+// Get the following constants.
+// f9  = Inv_log2by64
+// f10 = log2by64_hi
+// f11 = log2by64_lo
 
+{ .mmi
+(p0)     adds                 r32 = 0x1,r0      
+(p0)     addl           r34   = @ltoff(double_cosh_arg_reduction), gp
+         nop.i 999
+}
+;;
 
 // We want 2^(N-1) and 2^(-N-1). So bias N-1 and -N-1 and
 // put them in an exponent.
-// f_spos = 2^(N-1) and f_sneg = 2^(-N-1)
-// 0xffff + (N-1)  = 0xffff +N -1
-// 0xffff - (N +1) = 0xffff -N -1
+// cosh_FR_spos = 2^(N-1) and cosh_FR_sneg = 2^(-N-1)
+// r39 = 0xffff + (N-1)  = 0xffff +N -1
+// r40 = 0xffff - (N +1) = 0xffff -N -1
 
+{ .mlx
+         ld8 r34 = [r34]
+(p0)     movl                r38 = 0x000000000000fffe ;; 
+}
 
-// Calculate M and keep it as integer and floating point.
-// M = round-to-integer(x*Inv_log2by64)
-// f_M = M = truncate(ax/(log2/64))
-// Put the integer representation of M in r_M
-//    and the floating point representation of M in f_M
+{ .mmi
+(p0)     ldfe            cosh_FR_Inv_log2by64 = [r34],16 ;;            
+(p0)     ldfe            cosh_FR_log2by64_hi  = [r34],16            
+         nop.i 999 ;;
+}
+
+{ .mbb
+(p0)     ldfe            cosh_FR_log2by64_lo  = [r34],16            
+         nop.b 999
+         nop.b 999 ;;
+}
+
+// Get the A coefficients
+// f9  = A_1
+// f10 = A_2
+// f11 = A_3
 
-// Get the remaining A,B coefficients
 { .mmi
-      ldfe            f_A3 = [r_ad3],16
-      nop.m           0
-      nop.i           0
+      nop.m 999
+(p0)  addl           r34   = @ltoff(double_cosh_ab_table), gp
+      nop.i 999
 }
 ;;
 
-// Use constant (1.100*2^(63-6)) to get rounded M into rightmost significand
-// |x| * 64 * 1/ln2 * 2^(63-6) + 1.1000 * 2^(63+(63-6))
-{ .mfi
-      nop.m           0
-      fma.s1          f_M_temp = f_ABS_X, f_INV_LN2_2TO63, f_RSHF_2TO57
-      mov             r_signexp_0_5 = 0x0fffe // signexp of +0.5
+{ .mmi
+      ld8 r34 = [r34]
+      nop.m 999
+      nop.i 999
 }
 ;;
 
-// Test for |x| >= overflow limit
+
+// Calculate M and keep it as integer and floating point.
+// M = round-to-integer(x*Inv_log2by64)
+// cosh_FR_M = M = truncate(ax/(log2/64))
+// Put the significand of M in r35
+//    and the floating point representation of M in cosh_FR_M
+
 { .mfi
-      ldfe            f_B1 = [r_ad3],16
-      fcmp.ge.s1      p6,p0 = f_ABS_X, f_smlst_oflow_input
-      nop.i           0
+      nop.m 999
+(p0)  fma.s1          cosh_FR_M      = cosh_FR_X, cosh_FR_Inv_log2by64, f0 
+      nop.i 999
 }
-;;
 
 { .mfi
-      ldfe            f_B2 = [r_ad3],16
-      nop.f           0
-      mov             r_exp_32 = 0x10004
+(p0)  ldfe            cosh_FR_A1 = [r34],16            
+      nop.f 999
+      nop.i 999 ;;
 }
-;;
 
-// Subtract RSHF constant to get rounded M as a floating point value
-// M_temp * 2^(63-6) - 2^63
-{ .mfb
-      ldfe            f_B3 = [r_ad3],16            
-      fms.s1          f_M = f_M_temp, f_2TOM57, f_RSHF
-(p6)  br.cond.spnt    COSH_HUGE  // Branch if result will overflow
+{ .mfi
+      nop.m 999
+(p0)  fcvt.fx.s1      cosh_FR_M_temp = cosh_FR_M                      
+      nop.i 999 ;;
 }
-;;
 
 { .mfi
-      getf.sig        r_M = f_M_temp                 
-      nop.f           0
-      cmp.ge          p7,p6 = r_exp_x, r_exp_32 // Test if x >= 32
+      nop.m 999
+(p0)  fnorm.s1        cosh_FR_M      = cosh_FR_M_temp                 
+      nop.i 999 ;;
+}
+
+{ .mfi
+(p0)  getf.sig        r35       = cosh_FR_M_temp                 
+      nop.f 999
+      nop.i 999 ;;
 }
-;;
 
-// Calculate j. j is the signed extension of the six lsb of M. It 
+// M is still in r35. Calculate j. j is the signed extension of the six lsb of M. It
 // has a range of -32 thru 31.
+// r35 = M
+// r36 = j 
+{ .mii
+      nop.m 999
+      nop.i 999 ;;
+(p0)  and            r36 = 0x3f, r35 ;;   
+}
 
 // Calculate R
-// ax - M*log2by64_hi
-// R = (ax - M*log2by64_hi) - M*log2by64_lo
+// f13 = f44 - f12*f10 = x - M*log2by64_hi
+// f14 = f13 - f8*f11 = R = (x - M*log2by64_hi) - M*log2by64_lo
 
 { .mfi
-      nop.m           0
-      fnma.s1         f_R_temp = f_M, f_log2by64_hi, f_ABS_X
-      and             r_j = 0x3f, r_M
+      nop.m 999
+(p0)  fnma.s1        cosh_FR_R_temp = cosh_FR_M, cosh_FR_log2by64_hi, cosh_FR_X      
+      nop.i 999
 }
-;;
 
-{ .mii
-      nop.m           0
-      shl             r_jshf = r_j, 0x2 // Shift j so can sign extend it
-;;
-      sxt1            r_jshf = r_jshf
+{ .mfi
+(p0)  ldfe            cosh_FR_A2 = [r34],16            
+      nop.f 999
+      nop.i 999 ;;
 }
-;;
 
-{ .mii
-      nop.m           0
-      shr             r_j = r_jshf, 0x2    // Now j has range -32 to 31
-      nop.i           0
+{ .mfi
+      nop.m 999
+(p0)  fnma.s1        cosh_FR_R      = cosh_FR_M, cosh_FR_log2by64_lo, cosh_FR_R_temp 
+      nop.i 999
 }
-;;
+
+// Get the B coefficients
+// f15 = B_1
+// f32 = B_2
+// f33 = B_3
 
 { .mmi
-      shladd          r_ad_J_hi = r_j, 4, r_ad4 // pointer to Tjhi
-      sub             r_Mmj = r_M, r_j          // M-j
-      sub             r_mj = r0, r_j            // Form -j
+(p0)     ldfe            cosh_FR_A3 = [r34],16 ;;            
+(p0)     ldfe            cosh_FR_B1 = [r34],16            
+         nop.i 999 ;;
 }
-;;
 
-// The TBL and EXP branches are merged and predicated
-// If TBL, p6 true, 0.25 <= |x| < 32
-// If EXP, p7 true, 32 <= |x| < overflow_limit
-//
-// N = (M-j)/64
-{ .mfi
-      ldfe            f_Tjhi = [r_ad_J_hi]
-      fnma.s1         f_R = f_M, f_log2by64_lo, f_R_temp 
-      shr             r_N = r_Mmj, 0x6            // N = (M-j)/64 
+{ .mmi
+(p0)     ldfe            cosh_FR_B2 = [r34],16 ;;            
+(p0)     ldfe            cosh_FR_B3 = [r34],16            
+         nop.i 999 ;;
 }
-{ .mfi
-      shladd          r_ad_mJ_hi = r_mj, 4, r_ad4 // pointer to Tmjhi
-      nop.f           0
-      shladd          r_ad_mJ_lo = r_mj, 2, r_ad5 // pointer to Tmjlo
+
+{ .mii
+         nop.m 999
+(p0)     shl            r34 = r36,  0x2 ;;   
+(p0)     sxt1           r37 = r34 ;;         
 }
-;;
+
+// ******************************************************
+// STEP 2 (TBL and EXP)
+// ******************************************************
+// Calculate Rsquared and Rcubed in preparation for p_even and p_odd
+// f12 = R*R*R
+// f13 = R*R
+// f14 = R <== from above
 
 { .mfi
-      sub             r_2mNm1 = r_signexp_0_5, r_N // signexp 2^(-N-1)
-      nop.f           0
-      shladd          r_ad_J_lo = r_j, 2, r_ad5   // pointer to Tjlo
+      nop.m 999
+(p0)     fma.s1          cosh_FR_Rsq  = cosh_FR_R,   cosh_FR_R, f0  
+(p0)     shr            r36 = r37,  0x2 ;;   
 }
-{ .mfi
-      ldfe            f_Tmjhi = [r_ad_mJ_hi]
-      nop.f           0
-      add             r_2Nm1 = r_signexp_0_5, r_N // signexp 2^(N-1)
+
+// r34 = M-j = r35 - r36
+// r35 = N = (M-j)/64
+
+{ .mii
+(p0)     sub                  r34 = r35, r36    
+         nop.i 999 ;;
+(p0)     shr                  r35 = r34, 0x6 ;;    
 }
-;;
 
-{ .mmf
-      ldfs            f_Tmjlo = [r_ad_mJ_lo]
-      setf.exp        f_sneg = r_2mNm1            // Form 2^(-N-1)
-      nop.f           0
+{ .mii
+(p0)     sub                 r40 = r38, r35           
+(p0)     adds                 r37 = 0x1, r35    
+(p0)     add                 r39 = r38, r35 ;;           
 }
-;;
 
-{ .mmf
-      ldfs            f_Tjlo  = [r_ad_J_lo]
-      setf.exp        f_spos = r_2Nm1             // Form 2^(N-1)
-      nop.f           0
+// Get the address of the J table, add the offset,
+// addresses are sinh_AD_mJ and sinh_AD_J, get the T value
+// f32 = T(j)_hi
+// f33 = T(j)_lo
+// f34 = T(-j)_hi
+// f35 = T(-j)_lo
+
+{ .mmi
+(p0)     sub                  r34 = r35, r32    
+(p0)     addl    r37   = @ltoff(double_cosh_j_table), gp
+         nop.i 999
 }
 ;;
 
+{ .mfi
+      ld8 r37 = [r37]
+(p0)  fma.s1          cosh_FR_Rcub = cosh_FR_Rsq, cosh_FR_R, f0  
+      nop.i 999
+}
+
 // ******************************************************
-// STEP 2 (TBL and EXP)
+// STEP 3 Now decide if we need to branch to EXP
 // ******************************************************
-// Calculate Rsquared and Rcubed in preparation for p_even and p_odd
+// Put 32 in f9; p6 true if x < 32
 
-{ .mmf
-      nop.m           0
-      nop.m           0
-      fma.s1          f_Rsq  = f_R, f_R, f0
+{ .mlx
+         nop.m 999
+(p0)     movl                r32 = 0x0000000000010004 ;;               
 }
-;;
-
 
 // Calculate p_even
-// B_2 + Rsq *B_3
-// B_1 + Rsq * (B_2 + Rsq *B_3)
-// p_even = Rsq * (B_1 + Rsq * (B_2 + Rsq *B_3))
-{ .mfi
-      nop.m           0
-      fma.s1          f_peven_temp1 = f_Rsq, f_B3, f_B2
-      nop.i           0
-}
-// Calculate p_odd
-// A_2 + Rsq *A_3
-// A_1 + Rsq * (A_2 + Rsq *A_3)
-// podd = R + Rcub * (A_1 + Rsq * (A_2 + Rsq *A_3))
+// f34 = B_2 + Rsq *B_3
+// f35 = B_1 + Rsq*f34      = B_1 + Rsq * (B_2 + Rsq *B_3)
+// f36 = peven = Rsq * f35 = Rsq * (B_1 + Rsq * (B_2 + Rsq *B_3))
+
 { .mfi
-      nop.m           0
-      fma.s1          f_podd_temp1 = f_Rsq, f_A3, f_A2
-      nop.i           0
+      nop.m 999
+(p0)  fma.s1          cosh_FR_peven_temp1 = cosh_FR_Rsq, cosh_FR_B3,          cosh_FR_B2  
+      nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m           0
-      fma.s1          f_Rcub = f_Rsq, f_R, f0
-      nop.i           0
+      nop.m 999
+(p0)  fma.s1          cosh_FR_peven_temp2 = cosh_FR_Rsq, cosh_FR_peven_temp1, cosh_FR_B1  
+      nop.i 999
 }
-;;
 
-// 
-// If TBL, 
-// Calculate S_hi and S_lo, and C_hi
-// SC_hi_temp = sneg * Tmjhi
-// S_hi = spos * Tjhi - SC_hi_temp
-// S_hi = spos * Tjhi - (sneg * Tmjhi)
-// C_hi = spos * Tjhi + SC_hi_temp
-// C_hi = spos * Tjhi + (sneg * Tmjhi)
+// Calculate p_odd
+// f34 = A_2 + Rsq *A_3
+// f35 = A_1 + Rsq * (A_2 + Rsq *A_3)
+// f37 = podd = R + Rcub * (A_1 + Rsq * (A_2 + Rsq *A_3))
 
 { .mfi
-      nop.m           0
-(p6)  fma.s1          f_SC_hi_temp = f_sneg, f_Tmjhi, f0   
-      nop.i           0
+      nop.m 999
+(p0)  fma.s1          cosh_FR_podd_temp1 = cosh_FR_Rsq,        cosh_FR_A3,         cosh_FR_A2  
+      nop.i 999 ;;
 }
-;;
 
-// If TBL, 
-// C_lo_temp3 = sneg * Tmjlo
-// C_lo_temp4 = spos * Tjlo + C_lo_temp3
-// C_lo_temp4 = spos * Tjlo + (sneg * Tmjlo)
 { .mfi
-      nop.m           0
-(p6)  fma.s1          f_C_lo_temp3 =  f_sneg, f_Tmjlo, f0
-      nop.i           0
+(p0)  setf.exp            cosh_FR_N_temp1 = r39            
+      nop.f 999
+      nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m           0
-      fma.s1          f_peven_temp2 = f_Rsq, f_peven_temp1, f_B1
-      nop.i           0
+      nop.m 999
+(p0)  fma.s1          cosh_FR_peven       = cosh_FR_Rsq, cosh_FR_peven_temp2, f0     
+      nop.i 999
 }
+
 { .mfi
-      nop.m           0
-      fma.s1          f_podd_temp2 = f_Rsq, f_podd_temp1, f_A1
-      nop.i           0
+      nop.m 999
+(p0)  fma.s1          cosh_FR_podd_temp2 = cosh_FR_Rsq,        cosh_FR_podd_temp1, cosh_FR_A1  
+      nop.i 999 ;;
 }
-;;
 
-// If EXP, 
-// Compute 2^(N-1) * Tjhi and 2^(N-1) * Tjlo
 { .mfi
-      nop.m           0
-(p7)  fma.s1          f_Tjhi_spos = f_Tjhi, f_spos, f0
-      nop.i           0
+(p0)  setf.exp            f9  = r32                              
+      nop.f 999
+      nop.i 999 ;;
 }
+
 { .mfi
-      nop.m           0
-(p7)  fma.s1          f_Tjlo_spos = f_Tjlo, f_spos, f0
-      nop.i           0
+      nop.m 999
+(p0)  fma.s1          cosh_FR_podd       = cosh_FR_podd_temp2, cosh_FR_Rcub,       cosh_FR_R   
+      nop.i 999
 }
-;;
 
-{ .mfi
-      nop.m           0
-(p6)  fma.s1          f_C_hi = f_spos, f_Tjhi, f_SC_hi_temp
-      nop.i           0
+// sinh_GR_mj contains the table offset for -j
+// sinh_GR_j  contains the table offset for +j
+// p6 is true when j <= 0
+
+{ .mlx
+(p0)     setf.exp            cosh_FR_N_temp2 = r40            
+(p0)     movl                r40 = 0x0000000000000020 ;;    
 }
-;;
 
 { .mfi
-      nop.m           0
-(p6)  fms.s1          f_S_hi = f_spos, f_Tjhi, f_SC_hi_temp
-      nop.i           0
+(p0)     sub                 GR_mJ = r40,  r36           
+(p0)     fmerge.se           cosh_FR_spos    = cosh_FR_N_temp1, f1 
+(p0)     adds                GR_J  = 0x20, r36 ;;           
+}
+
+{ .mii
+         nop.m 999
+(p0)     shl                  GR_mJ = GR_mJ, 5 ;;   
+(p0)     add                  AD_mJ = r37, GR_mJ ;; 
 }
+
+{ .mmi
+         nop.m 999
+(p0)     ldfe                 cosh_FR_Tmjhi = [AD_mJ],16                 
+(p0)     shl                  GR_J  = GR_J, 5 ;;    
+}
+
 { .mfi
-      nop.m           0
-(p6)  fma.s1          f_C_lo_temp4 = f_spos, f_Tjlo, f_C_lo_temp3
-      nop.i           0
+(p0)     ldfs                 cosh_FR_Tmjlo = [AD_mJ],16                 
+(p0)     fcmp.lt.unc.s1      p6,p7 = cosh_FR_X,f9                          
+(p0)     add                  AD_J  = r37, GR_J ;;  
 }
-;;
+
+{ .mmi
+(p0)     ldfe                 cosh_FR_Tjhi  = [AD_J],16 ;;                  
+(p0)     ldfs                 cosh_FR_Tjlo  = [AD_J],16                  
+         nop.i 999 ;;
+}
+
+{ .mfb
+         nop.m 999
+(p0)     fmerge.se           cosh_FR_sneg    = cosh_FR_N_temp2, f1 
+(p7)     br.cond.spnt        L(COSH_BY_EXP) ;;                            
+}
+
+// ******************************************************
+// If NOT branch to EXP
+// ******************************************************
+// Calculate C_hi
+// ******************************************************
+// cosh_FR_C_hi_temp = cosh_FR_sneg * cosh_FR_Tmjhi
+// cosh_FR_C_hi = cosh_FR_spos * cosh_FR_Tjhi + (cosh_FR_sneg * cosh_FR_Tmjhi)
 
 { .mfi
-      nop.m           0
-      fma.s1          f_peven = f_Rsq, f_peven_temp2, f0
-      nop.i           0
+      nop.m 999
+(p0)  fma.s1         cosh_FR_C_hi_temp = cosh_FR_sneg, cosh_FR_Tmjhi, f0                   
+      nop.i 999 ;;
 }
+
 { .mfi
-      nop.m           0
-      fma.s1          f_podd = f_podd_temp2, f_Rcub, f_R
-      nop.i           0
+      nop.m 999
+(p0)  fma.s1         cosh_FR_C_hi      = cosh_FR_spos, cosh_FR_Tjhi,  cosh_FR_C_hi_temp    
+      nop.i 999
 }
-;;
 
-// If TBL,
-// C_lo_temp1 =  spos * Tjhi - C_hi
-// C_lo_temp2 =  sneg * Tmjlo + C_lo_temp1
-// C_lo_temp2 =  sneg * Tmjlo + (spos * Tjhi - C_hi)
+// ******************************************************
+// Calculate S_hi
+// ******************************************************
+// cosh_FR_S_hi_temp1 = cosh_FR_sneg * cosh_FR_Tmjhi
+// cosh_FR_S_hi = cosh_FR_spos * cosh_FR_Tjhi - cosh_FR_C_hi_temp1
 
 { .mfi
-      nop.m           0
-(p6)  fms.s1          f_C_lo_temp1 =  f_spos, f_Tjhi,  f_C_hi
-      nop.i           0
+      nop.m 999
+(p0)  fma.s1        cosh_FR_S_hi_temp1 =  cosh_FR_sneg, cosh_FR_Tmjhi, f0                
+      nop.i 999 ;;
 }
-;;
+
+// ******************************************************
+// Calculate C_lo
+// ******************************************************
+// cosh_FR_C_lo_temp1 = cosh_FR_spos * cosh_FR_Tjhi - cosh_FR_C_hi
+// cosh_FR_C_lo_temp2 = cosh_FR_sneg * cosh_FR_Tmjlo + (cosh_FR_spos * cosh_FR_Tjhi - cosh_FR_C_hi)
+// cosh_FR_C_lo_temp1 = cosh_FR_sneg * cosh_FR_Tmjlo
+// cosh_FR_C_lo_temp3 = cosh_FR_spos * cosh_FR_Tjlo + (cosh_FR_sneg * cosh_FR_Tmjlo)
+// cosh_FR_C_lo = cosh_FR_C_lo_temp3 + cosh_FR_C_lo_temp2
 
 { .mfi
-      nop.m           0
-(p6)  fma.s1          f_C_lo_temp2 = f_sneg, f_Tmjhi, f_C_lo_temp1       
-      nop.i           0
+      nop.m 999
+(p0)  fms.s1        cosh_FR_C_lo_temp1 = cosh_FR_spos, cosh_FR_Tjhi,  cosh_FR_C_hi        
+      nop.i 999
 }
-;;
 
-// If EXP,
-// Y_hi = 2^(N-1) * Tjhi
-// Y_lo = 2^(N-1) * Tjhi * (p_odd + p_even) + 2^(N-1) * Tjlo
 { .mfi
-      nop.m           0
-(p7)  fma.s1          f_Y_lo_temp =  f_peven, f1, f_podd
-      nop.i           0
+      nop.m 999
+(p0)  fms.s1        cosh_FR_S_hi       =  cosh_FR_spos, cosh_FR_Tjhi, cosh_FR_S_hi_temp1 
+      nop.i 999 ;;
 }
-;;
 
-// If TBL,
-// C_lo = C_lo_temp4 + C_lo_temp2
 { .mfi
-      nop.m           0
-(p6)  fma.s1          f_C_lo = f_C_lo_temp4, f1, f_C_lo_temp2
-      nop.i           0
+      nop.m 999
+(p0)  fma.s1        cosh_FR_C_lo_temp2 = cosh_FR_sneg, cosh_FR_Tmjhi, cosh_FR_C_lo_temp1  
+      nop.i 999
 }
-;;
 
-// If TBL,
-// Y_hi = C_hi 
-// Y_lo = S_hi*p_odd + (C_hi*p_even + C_lo)
 { .mfi
-      nop.m           0
-(p6)  fma.s1          f_Y_lo_temp = f_C_hi, f_peven, f_C_lo
-      nop.i           0
+      nop.m 999
+(p0)  fma.s1        cosh_FR_C_lo_temp1 = cosh_FR_sneg, cosh_FR_Tmjlo, f0                  
+      nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m           0
-(p7)  fma.s1          f_Y_lo = f_Tjhi_spos, f_Y_lo_temp, f_Tjlo_spos
-      nop.i           0
+      nop.m 999
+(p0)  fma.s1        cosh_FR_C_lo_temp3 =  cosh_FR_spos, cosh_FR_Tjlo,  cosh_FR_C_lo_temp1 
+      nop.i 999 ;;
 }
-;;
 
-// Dummy multiply to generate inexact
 { .mfi
-      nop.m           0
-      fmpy.s0         f_tmp = f_B2, f_B2
-      nop.i           0
+      nop.m 999
+(p0)  fma.s1        cosh_FR_C_lo       =  cosh_FR_C_lo_temp3, f1,   cosh_FR_C_lo_temp2    
+      nop.i 999 ;;
 }
+
+// ******************************************************
+// cosh_FR_Y_lo_temp = cosh_FR_C_hi * cosh_FR_peven + cosh_FR_C_lo
+// cosh_FR_Y_lo = cosh_FR_S_hi * cosh_FR_podd + cosh_FR_Y_lo_temp
+// cosh_FR_COSH = Y_hi + Y_lo
+
 { .mfi
-      nop.m           0
-(p6)  fma.s1          f_Y_lo = f_S_hi, f_podd, f_Y_lo_temp
-      nop.i           0
+      nop.m 999
+(p0)  fma.s1         cosh_FR_Y_lo_temp =  cosh_FR_C_hi, cosh_FR_peven, cosh_FR_C_lo       
+      nop.i 999 ;;
 }
-;;
 
-// f8 = answer = Y_hi + Y_lo
 { .mfi
-      nop.m           0
-(p7)  fma.s0          f8 = f_Y_lo,  f1, f_Tjhi_spos
-      nop.i           0
+      nop.m 999
+(p0)  fma.s1         cosh_FR_Y_lo      =  cosh_FR_S_hi, cosh_FR_podd, cosh_FR_Y_lo_temp   
+      nop.i 999 ;;
 }
-;;
 
-// f8 = answer = Y_hi + Y_lo
 { .mfb
-      nop.m           0
-(p6)  fma.s0          f8 = f_Y_lo, f1, f_C_hi
-      br.ret.sptk     b0      // Exit for COSH_BY_TBL and COSH_BY_EXP
+      nop.m 999
+(p0)  fma.s0       f8 =  cosh_FR_C_hi, f1, cosh_FR_Y_lo                       
+(p0)  br.ret.sptk     b0 ;;
 }
-;;
 
+L(COSH_BY_EXP): 
 
-// Here if 0 < |x| < 0.25
-COSH_BY_POLY: 
-{ .mmf
-      ldfe            f_P6 = [r_ad2e],16
-      ldfe            f_P5 = [r_ad2o],16
-      nop.f           0
-}
-;;
+// When p7 is true,  we know that an overflow is not going to happen
+// When p7 is false, we must check for possible overflow
+// p7 is the over_SAFE flag
+// f44 = Scale * (Y_hi + Y_lo)
+//     =  cosh_FR_spos * (cosh_FR_Tjhi + cosh_FR_Y_lo)
 
-{ .mmi
-      ldfe            f_P4 = [r_ad2e],16
-      ldfe            f_P3 = [r_ad2o],16
-      nop.i           0
+{ .mfi
+      nop.m 999
+(p0)  fma.s1         cosh_FR_Y_lo_temp =  cosh_FR_peven, f1,       cosh_FR_podd           
+      nop.i 999
 }
-;;
 
-{ .mmi
-      ldfe            f_P2 = [r_ad2e],16
-      ldfe            f_P1 = [r_ad2o],16                 
-      nop.i           0
+// Now we are in EXP. This is the only path where an overflow is possible
+// but not for certain. So this is the only path where over_SAFE has any use.
+// r34 still has N-1
+// There is a danger of double-extended overflow   if N-1 > 0x3ffe = 16382
+// There is a danger of double overflow            if N-1 > 0x3fe  = 1022
+// There is a danger of single overflow            if N-1 > 0x7e   = 126
+
+{ .mlx
+       nop.m 999
+(p0)   movl                r32          = 0x0000000000003ffe ;;                       
 }
-;;
 
 { .mfi
-      nop.m           0
-      fma.s1          f_X3 = f_NORM_X, f_X2, f0
-      nop.i           0
+(p0)  cmp.gt.unc          p0,p7        = r34, r32                                 
+      nop.f 999
+      nop.i 999 ;;
 }
+
 { .mfi
-      nop.m           0
-      fma.s1          f_X4 = f_X2, f_X2, f0
-      nop.i           0
+      nop.m 999
+(p0)  fma.s1         cosh_FR_Y_lo      =  cosh_FR_Tjhi,  cosh_FR_Y_lo_temp, cosh_FR_Tjlo       
+      nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m           0
-      fma.s1          f_poly65 = f_X2, f_P6, f_P5
-      nop.i           0
+      nop.m 999
+(p0)  fma.s1         cosh_FR_COSH_temp =  cosh_FR_Y_lo,  f1, cosh_FR_Tjhi                 
+      nop.i 999 ;;
 }
+
 { .mfi
-      nop.m           0
-      fma.s1          f_poly43 = f_X2, f_P4, f_P3
-      nop.i           0
+      nop.m 999
+(p0)  fma.s0       f44 = cosh_FR_spos,  cosh_FR_COSH_temp, f0                       
+      nop.i 999 ;;
 }
-;;
 
+// Dummy multiply to generate inexact
 { .mfi
-      nop.m           0
-      fma.s1          f_poly21 = f_X2, f_P2, f_P1
-      nop.i           0
+         nop.m 999
+(p7)     fmpy.s0      cosh_FR_tmp = cosh_FR_all_ones, cosh_FR_all_ones
+         nop.i 999 ;;
 }
-;;
 
-{ .mfi
-      nop.m           0
-      fma.s1          f_poly6543 = f_X4, f_poly65, f_poly43
-      nop.i           0
+// If over_SAFE is set, return
+{ .mfb
+       nop.m 999
+(p7)   fmerge.s            f8 = f44,f44
+(p7)  br.ret.sptk     b0 ;;
 }
-;;
+
+// Else see if we overflowed
+// S0 user supplied status
+// S2 user supplied status + WRE + TD  (Overflows)
+// If WRE is set then an overflow will not occur in EXP.
+// The input value that would cause a register (WRE) value to overflow is about 2^15
+// and this input would go into the HUGE path.
+// Answer with WRE is in f43.
 
 { .mfi
-      nop.m           0
-      fma.s1          f_poly6to1 = f_X4, f_poly6543, f_poly21
-      nop.i           0
+      nop.m 999
+(p0)  fsetc.s2            0x7F,0x42                                               
+      nop.i 999;;
 }
-;;
 
-// Dummy multiply to generate inexact
 { .mfi
-      nop.m           0
-      fmpy.s0         f_tmp = f_P6, f_P6
-      nop.i           0
-}
-{ .mfb
-      nop.m           0
-      fma.s0          f8 = f_poly6to1, f_X2, f1
-      br.ret.sptk     b0                // Exit COSH_BY_POLY
+      nop.m 999
+(p0)  fma.s2            f43  = cosh_FR_spos,  cosh_FR_COSH_temp, f0                      
+      nop.i 999 ;;
 }
-;;
 
+// 103FF => 103FF -FFFF = 400(true)
+// 400 + 3FF = 7FF, which is 1 more than the exponent of the largest
+// double (7FE). So 0 103FF 8000000000000000  is one ulp more than
+// largest double in register bias
+
+// 13FFF => 13FFF -FFFF = 4000(true)
+
+// Now  set p8 if the answer with WRE is greater than or equal this value
+// Also set p9 if the answer with WRE is less than or equal to negative this value
+
+{ .mlx
+       nop.m 999
+(p0)   movl                r32          = 0x0000000000013fff ;;                     
+}
 
-// Here if x denorm or unorm
-COSH_DENORM:
-// Determine if x really a denorm and not a unorm
 { .mmf
-      getf.exp        r_signexp_x = f_NORM_X
-      mov             r_exp_denorm = 0x0c001   // Real denorms have exp < this
-      fmerge.s        f_ABS_X = f0, f_NORM_X
+       nop.m 999
+(p0)   setf.exp            f41          = r32                                    
+(p0)   fsetc.s2            0x7F,0x40 ;;                                               
 }
-;;
 
 { .mfi
-      nop.m           0
-      fcmp.eq.s0      p10,p0 = f8, f0  // Set denorm flag
-      nop.i           0
+      nop.m 999
+(p0)  fcmp.ge.unc.s1      p8, p0       = f43, f41                               
+      nop.i 999
 }
-;;
 
-// Set p8 if really a denorm
-{ .mmi
-      and             r_exp_x = r_exp_mask, r_signexp_x
-;;
-      cmp.lt          p8,p9 = r_exp_x, r_exp_denorm
-      nop.i           0
+{ .mfi
+      nop.m 999
+(p0)  fmerge.ns           f42 = f41, f41                                          
+      nop.i 999 ;;
+}
+
+// The error tag for overflow is 63
+{ .mii
+      nop.m 999
+      nop.i 999 ;;
+(p8)  mov                 GR_Parameter_TAG = 63 ;;                                               
 }
-;;
 
-// Identify denormal operands.
 { .mfb
-      nop.m           0
-(p8)  fma.s0          f8 =  f8,f8,f1 // If x denorm, result=1+x^2
-(p9)  br.cond.sptk    COSH_COMMON    // Return to main path if x unorm
+      nop.m 999
+(p0)  fcmp.le.unc.s1      p9, p0 =  f43, f42                                      
+(p8)  br.cond.spnt __libm_error_region ;;
+}
+
+{ .mii
+      nop.m 999
+      nop.i 999 ;;
+(p9)  mov                 GR_Parameter_TAG = 63                                               
+}
+
+{ .mib
+      nop.m 999
+      nop.i 999
+(p9)  br.cond.spnt __libm_error_region ;;
+}
+
+// Dummy multiply to generate inexact
+{ .mfi
+         nop.m 999
+(p0)     fmpy.s0      cosh_FR_tmp = cosh_FR_all_ones, cosh_FR_all_ones
+         nop.i 999 ;;
 }
-;;
 
 { .mfb
-      nop.m           0
-      nop.f           0
-      br.ret.sptk     b0            // Exit if x denorm
+      nop.m 999
+(p0)  fmerge.s            f8 = f44,f44                                            
+(p0)  br.ret.sptk     b0 ;;
 }
-;;
 
 
-// Here if |x| >= overflow limit
-COSH_HUGE: 
-// for COSH_HUGE, put 24000 in exponent; take sign from input
-{ .mmi
-      mov             r_exp_huge = 0x15dbf
-;;
-      setf.exp        f_huge  = r_exp_huge
-      nop.i           0
+// for COSH_HUGE, put 24000 in exponent; take sign from input; add 1
+// SAFE: SAFE is always 0 for HUGE
+
+L(COSH_HUGE): 
+
+{ .mlx
+      nop.m 999
+(p0)  movl                r32 = 0x0000000000015dbf ;;                                
 }
-;;
 
 { .mfi
-      alloc           r32 = ar.pfs,0,5,4,0                  
-      fma.s1          f_signed_hi_lo = f_huge, f1, f1
-      nop.i           0
+(p0)  setf.exp            f9  = r32                                               
+      nop.f 999
+      nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m           0
-      fma.s0          f_pre_result = f_signed_hi_lo, f_huge, f0
-      mov             GR_Parameter_TAG = 63
+      nop.m 999
+(p0)  fma.s1              cosh_FR_hi_lo = f1, f9, f1                              
+      nop.i 999 ;;
 }
-;;
 
-GLOBAL_IEEE754_END(coshl)
+{ .mfi
+      nop.m 999
+(p0)  fma.s0            f44 = f9, cosh_FR_hi_lo, f0                             
+(p0)  mov                 GR_Parameter_TAG = 63                                               
+}
+.endp coshl
+ASM_SIZE_DIRECTIVE(coshl)
 
-LOCAL_LIBM_ENTRY(__libm_error_region)
+.proc __libm_error_region
+__libm_error_region:
 .prologue
-
 { .mfi
-        add   GR_Parameter_Y=-32,sp              // Parameter 2 value
+        add   GR_Parameter_Y=-32,sp             // Parameter 2 value
         nop.f 0
 .save   ar.pfs,GR_SAVE_PFS
-        mov  GR_SAVE_PFS=ar.pfs                  // Save ar.pfs
+        mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
 }
 { .mfi
 .fframe 64
-        add sp=-64,sp                            // Create new stack
+        add sp=-64,sp                           // Create new stack
         nop.f 0
-        mov GR_SAVE_GP=gp                        // Save gp
+        mov GR_SAVE_GP=gp                       // Save gp
 };;
-
 { .mmi
-        stfe [GR_Parameter_Y] = f0,16            // STORE Parameter 2 on stack
-        add GR_Parameter_X = 16,sp               // Parameter 1 address
+        stfe [GR_Parameter_Y] = FR_Y,16         // Save Parameter 2 on stack
+        add GR_Parameter_X = 16,sp              // Parameter 1 address
 .save   b0, GR_SAVE_B0
-        mov GR_SAVE_B0=b0                        // Save b0
+        mov GR_SAVE_B0=b0                       // Save b0
 };;
-
 .body
 { .mib
-        stfe [GR_Parameter_X] = f8               // STORE Parameter 1 on stack
-        add   GR_Parameter_RESULT = 0,GR_Parameter_Y   // Parameter 3 address
-        nop.b 0                            
+        stfe [GR_Parameter_X] = FR_X            // Store Parameter 1 on stack
+        add   GR_Parameter_RESULT = 0,GR_Parameter_Y
+        nop.b 0                                 // Parameter 3 address
 }
 { .mib
-        stfe [GR_Parameter_Y] = f_pre_result     // STORE Parameter 3 on stack
+        stfe [GR_Parameter_Y] = FR_RESULT      // Store Parameter 3 on stack
         add   GR_Parameter_Y = -16,GR_Parameter_Y
-        br.call.sptk b0=__libm_error_support#    // Call error handling function
+        br.call.sptk b0=__libm_error_support#  // Call error handling function
 };;
-
 { .mmi
-        add   GR_Parameter_RESULT = 48,sp
         nop.m 0
-        nop.i 0
+        nop.m 0
+        add   GR_Parameter_RESULT = 48,sp
 };;
-
 { .mmi
-        ldfe  f8 = [GR_Parameter_RESULT]         // Get return result off stack
+        ldfe  f8 = [GR_Parameter_RESULT]       // Get return result off stack
 .restore sp
-        add   sp = 64,sp                         // Restore stack pointer
-        mov   b0 = GR_SAVE_B0                    // Restore return address
+        add   sp = 64,sp                       // Restore stack pointer
+        mov   b0 = GR_SAVE_B0                  // Restore return address
 };;
-
 { .mib
-        mov   gp = GR_SAVE_GP                    // Restore gp
-        mov   ar.pfs = GR_SAVE_PFS               // Restore ar.pfs
-        br.ret.sptk     b0                       // Return
+        mov   gp = GR_SAVE_GP                  // Restore gp
+        mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
+        br.ret.sptk     b0                     // Return
 };;
 
-LOCAL_LIBM_END(__libm_error_region)
-
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
 
 .type   __libm_error_support#,@function
 .global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_exp.S b/sysdeps/ia64/fpu/e_exp.S
index 5ae8afeb99..db02336ecf 100644
--- a/sysdeps/ia64/fpu/e_exp.S
+++ b/sysdeps/ia64/fpu/e_exp.S
@@ -1,10 +1,10 @@
 .file "exp.s"
 
-
-// Copyright (c) 2000 - 2002, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,26 +20,26 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
 // Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
 //
 // History
 //==============================================================
-// 2/02/00  Initial version
+// 2/02/00  Initial version 
 // 3/07/00  exp(inf)  = inf but now does NOT call error support
 //          exp(-inf) = 0   but now does NOT call error support
 // 4/04/00  Unwind support added
@@ -48,10 +48,6 @@
 // 11/30/00 Reworked to shorten main path, widen main path to include all
 //          args in normal range, and add quick exit for 0, nan, inf.
 // 12/05/00 Loaded constants earlier with setf to save 2 cycles.
-// 02/05/02 Corrected uninitialize predicate in POSSIBLE_UNDERFLOW path
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 09/07/02 Force inexact flag
-// 11/15/02 Split underflow path into zero/nonzero; eliminated fma in main path
 
 // API
 //==============================================================
@@ -71,167 +67,187 @@
 //       Construct 2^M
 //       Get 2^(index_1/128) from table_1;
 //       Get 2^(index_2/8)   from table_2;
-//       Calculate exp(r) by 5th order polynomial
+//       Calculate exp(r) by series
 //          r = x - n (log2/128)_high
 //          delta = - n (log2/128)_low
 //       Calculate exp(delta) as 1 + delta
 
 
-// Special values
+// Special values 
 //==============================================================
 // exp(+0)    = 1.0
 // exp(-0)    = 1.0
 
-// exp(+qnan) = +qnan
-// exp(-qnan) = -qnan
-// exp(+snan) = +qnan
-// exp(-snan) = -qnan
+// exp(+qnan) = +qnan 
+// exp(-qnan) = -qnan 
+// exp(+snan) = +qnan 
+// exp(-snan) = -qnan 
 
-// exp(-inf)  = +0
+// exp(-inf)  = +0 
 // exp(+inf)  = +inf
 
-// Overflow and Underflow
+// Overfow and Underfow
 //=======================
-// exp(x) = largest double normal when
-//     x = 709.7827 = 0x40862e42fefa39ef
+// exp(-x) = smallest double normal when
+//     x = -708.396 = c086232bdd7abcd2
 
-// exp(x) = smallest double normal when
-//     x = -708.396 = 0xc086232bdd7abcd2
+// exp(x) = largest double normal when
+//     x = 709.7827 = 40862e42fefa39ef
 
-// exp(x) = largest round-to-nearest single zero when
-//     x = -745.1332 = 0xc0874910d52d3052
 
 
 // Registers used
 //==============================================================
-// Floating Point registers used:
-// f8, input, output
-// f6 -> f15,  f32 -> f49
+// Floating Point registers used: 
+// f8, input
+// f9 -> f15,  f32 -> f60
 
-// General registers used:
-// r14 -> r40
+// General registers used: 
+// r32 -> r60 
 
 // Predicate registers used:
 // p6 -> p15
 
+#include "libm_support.h"
+
 // Assembly macros
 //==============================================================
 
-rRshf                 = r14
-rAD_TB1               = r15
-rAD_T1                = r15
-rAD_TB2               = r16
-rAD_T2                = r16
-rAD_P                 = r17
-rN                    = r18
-rIndex_1              = r19
-rIndex_2_16           = r20
-rM                    = r21
-rBiased_M             = r21
-rIndex_1_16           = r21
-rSig_inv_ln2          = r22
-rExp_bias             = r23
-rExp_mask             = r24
-rTmp                  = r25
-rRshf_2to56           = r26
-rGt_ln                = r27
-rExp_2tom56           = r28
-
-
-GR_SAVE_B0            = r33
-GR_SAVE_PFS           = r34
-GR_SAVE_GP            = r35
-GR_SAVE_SP            = r36
-
-GR_Parameter_X        = r37
-GR_Parameter_Y        = r38
-GR_Parameter_RESULT   = r39
-GR_Parameter_TAG      = r40
-
-
-FR_X                  = f10
-FR_Y                  = f1
-FR_RESULT             = f8
-
-fRSHF_2TO56           = f6
-fINV_LN2_2TO63        = f7
-fW_2TO56_RSH          = f9
-f2TOM56               = f11
-fP5                   = f12
-fP54                  = f12
-fP5432                = f12
-fP4                   = f13
-fP3                   = f14
-fP32                  = f14
-fP2                   = f15
-fP                    = f15
-
-fLn2_by_128_hi        = f33
-fLn2_by_128_lo        = f34
-
-fRSHF                 = f35
-fNfloat               = f36
-fNormX                = f37
-fR                    = f38
-fF                    = f39
-
-fRsq                  = f40
-f2M                   = f41
-fS1                   = f42
-fT1                   = f42
-fS2                   = f43
-fT2                   = f43
-fS                    = f43
-fWre_urm_f8           = f44
-fFtz_urm_f8           = f44
-
-fMIN_DBL_OFLOW_ARG    = f45
-fMAX_DBL_ZERO_ARG     = f46
-fMAX_DBL_NORM_ARG     = f47
-fMIN_DBL_NORM_ARG     = f48
-fGt_pln               = f49
-fTmp                  = f49
+exp_GR_rshf                   = r33
+EXP_AD_TB1                    = r34
+EXP_AD_TB2                    = r35
+EXP_AD_P                      = r36
+
+exp_GR_N                      = r37
+exp_GR_index_1                = r38
+exp_GR_index_2_16             = r39
+
+exp_GR_biased_M               = r40
+exp_GR_index_1_16             = r41
+EXP_AD_T1                     = r42
+EXP_AD_T2                     = r43
+exp_GR_sig_inv_ln2            = r44
+
+exp_GR_17ones                 = r45
+exp_GR_one                    = r46
+exp_TB1_size                  = r47
+exp_TB2_size                  = r48
+exp_GR_rshf_2to56             = r49
+
+exp_GR_gt_ln                  = r50
+exp_GR_exp_2tom56             = r51
+
+exp_GR_17ones_m1              = r52
+
+GR_SAVE_B0                    = r53
+GR_SAVE_PFS                   = r54
+GR_SAVE_GP                    = r55
+GR_SAVE_SP                    = r56
+
+GR_Parameter_X                = r57
+GR_Parameter_Y                = r58
+GR_Parameter_RESULT           = r59
+GR_Parameter_TAG              = r60
+
+
+FR_X             = f10
+FR_Y             = f1
+FR_RESULT        = f8
+
+EXP_RSHF_2TO56   = f6
+EXP_INV_LN2_2TO63 = f7
+EXP_W_2TO56_RSH  = f9
+EXP_2TOM56       = f11
+exp_P4           = f12 
+exp_P3           = f13 
+exp_P2           = f14 
+exp_P1           = f15 
+
+exp_ln2_by_128_hi  = f33 
+exp_ln2_by_128_lo  = f34 
+
+EXP_RSHF           = f35
+EXP_Nfloat         = f36 
+exp_W              = f37
+exp_r              = f38
+exp_f              = f39
+
+exp_rsq            = f40
+exp_rcube          = f41
+
+EXP_2M             = f42
+exp_S1             = f43
+exp_T1             = f44
+
+EXP_MIN_DBL_OFLOW_ARG = f45
+EXP_MAX_DBL_ZERO_ARG  = f46
+EXP_MAX_DBL_NORM_ARG  = f47
+EXP_MAX_DBL_UFLOW_ARG = f48
+EXP_MIN_DBL_NORM_ARG  = f49
+exp_rP4pP3         = f50
+exp_P_lo           = f51
+exp_P_hi           = f52
+exp_P              = f53
+exp_S              = f54
+
+EXP_NORM_f8        = f56   
+
+exp_wre_urm_f8     = f57
+exp_ftz_urm_f8     = f57
+
+exp_gt_pln         = f58
+
+exp_S2             = f59
+exp_T2             = f60
 
 
 // Data tables
 //==============================================================
 
-RODATA
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
 .align 16
 
 // ************* DO NOT CHANGE ORDER OF THESE TABLES ********************
 
 // double-extended 1/ln(2)
 // 3fff b8aa 3b29 5c17 f0bb be87fed0691d3e88
-// 3fff b8aa 3b29 5c17 f0bc
+// 3fff b8aa 3b29 5c17 f0bc 
 // For speed the significand will be loaded directly with a movl and setf.sig
 //   and the exponent will be bias+63 instead of bias+0.  Thus subsequent
 //   computations need to scale appropriately.
-// The constant 128/ln(2) is needed for the computation of w.  This is also
+// The constant 128/ln(2) is needed for the computation of w.  This is also 
 //   obtained by scaling the computations.
 //
-// Two shifting constants are loaded directly with movl and setf.d.
-//   1. fRSHF_2TO56 = 1.1000..00 * 2^(63-7)
+// Two shifting constants are loaded directly with movl and setf.d. 
+//   1. EXP_RSHF_2TO56 = 1.1000..00 * 2^(63-7) 
 //        This constant is added to x*1/ln2 to shift the integer part of
 //        x*128/ln2 into the rightmost bits of the significand.
-//        The result of this fma is fW_2TO56_RSH.
-//   2. fRSHF       = 1.1000..00 * 2^(63)
-//        This constant is subtracted from fW_2TO56_RSH * 2^(-56) to give
+//        The result of this fma is EXP_W_2TO56_RSH.
+//   2. EXP_RSHF       = 1.1000..00 * 2^(63) 
+//        This constant is subtracted from EXP_W_2TO56_RSH * 2^(-56) to give
 //        the integer part of w, n, as a floating-point number.
-//        The result of this fms is fNfloat.
+//        The result of this fms is EXP_Nfloat.
 
 
-LOCAL_OBJECT_START(exp_table_1)
-data8 0x40862e42fefa39f0 // smallest dbl overflow arg, +709.7827
-data8 0xc0874910d52d3052 // largest arg for rnd-to-nearest 0 result, -745.133
-data8 0x40862e42fefa39ef // largest dbl arg to give normal dbl result, +709.7827
-data8 0xc086232bdd7abcd2 // smallest dbl arg to give normal dbl result, -708.396
+exp_table_1:
+ASM_TYPE_DIRECTIVE(exp_table_1,@object)
+data8 0x40862e42fefa39f0 // smallest dbl overflow arg
+data8 0xc0874c0000000000 // approx largest arg for zero result
+data8 0x40862e42fefa39ef // largest dbl arg to give normal dbl result
+data8 0xc086232bdd7abcd3 // largest dbl underflow arg
+data8 0xc086232bdd7abcd2 // smallest dbl arg to give normal dbl result
+data8 0x0                // pad
 data8 0xb17217f7d1cf79ab , 0x00003ff7 // ln2/128 hi
 data8 0xc9e3b39803f2f6af , 0x00003fb7 // ln2/128 lo
-//
+
 // Table 1 is 2^(index_1/128) where
 // index_1 goes from 0 to 15
-//
+
 data8 0x8000000000000000 , 0x00003FFF
 data8 0x80B1ED4FD999AB6C , 0x00003FFF
 data8 0x8164D1F3BC030773 , 0x00003FFF
@@ -248,11 +264,12 @@ data8 0x88980E8092DA8527 , 0x00003FFF
 data8 0x8955EE03618E5FDD , 0x00003FFF
 data8 0x8A14D575496EFD9A , 0x00003FFF
 data8 0x8AD4C6452C728924 , 0x00003FFF
-LOCAL_OBJECT_END(exp_table_1)
+ASM_SIZE_DIRECTIVE(exp_table_1)
 
 // Table 2 is 2^(index_1/8) where
 // index_2 goes from 0 to 7
-LOCAL_OBJECT_START(exp_table_2)
+exp_table_2:
+ASM_TYPE_DIRECTIVE(exp_table_2,@object)
 data8 0x8000000000000000 , 0x00003FFF
 data8 0x8B95C1E3EA8BD6E7 , 0x00003FFF
 data8 0x9837F0518DB8A96F , 0x00003FFF
@@ -261,356 +278,413 @@ data8 0xB504F333F9DE6484 , 0x00003FFF
 data8 0xC5672A115506DADD , 0x00003FFF
 data8 0xD744FCCAD69D6AF4 , 0x00003FFF
 data8 0xEAC0C6E7DD24392F , 0x00003FFF
-LOCAL_OBJECT_END(exp_table_2)
+ASM_SIZE_DIRECTIVE (exp_table_2)
+
 
+exp_p_table:
+ASM_TYPE_DIRECTIVE(exp_p_table,@object)
+data8 0x3f8111116da21757 //P_4
+data8 0x3fa55555d787761c //P_3
+data8 0x3fc5555555555414 //P_2
+data8 0x3fdffffffffffd6a //P_1
+ASM_SIZE_DIRECTIVE(exp_p_table)
 
-LOCAL_OBJECT_START(exp_p_table)
-data8 0x3f8111116da21757 //P5
-data8 0x3fa55555d787761c //P4
-data8 0x3fc5555555555414 //P3
-data8 0x3fdffffffffffd6a //P2
-LOCAL_OBJECT_END(exp_p_table)
 
+.align 32
+.global exp#
 
 .section .text
-GLOBAL_IEEE754_ENTRY(exp)
+.proc  exp#
+.align 32
+exp: 
+#ifdef _LIBC
+.global __ieee754_exp#
+__ieee754_exp:
+#endif
 
 { .mlx
-      nop.m           0
-      movl            rSig_inv_ln2 = 0xb8aa3b295c17f0bc  // significand of 1/ln2
+      alloc      r32=ar.pfs,1,24,4,0                               
+      movl exp_GR_sig_inv_ln2 = 0xb8aa3b295c17f0bc  // significand of 1/ln2
 }
 { .mlx
-      addl            rAD_TB1    = @ltoff(exp_table_1), gp
-      movl            rRshf_2to56 = 0x4768000000000000   // 1.10000 2^(63+56)
+      addl       EXP_AD_TB1    = @ltoff(exp_table_1), gp
+      movl exp_GR_rshf_2to56 = 0x4768000000000000 ;;  // 1.10000 2^(63+56)
 }
 ;;
 
+// We do this fnorm right at the beginning to take any enabled
+// faults and to normalize any input unnormals so that SWA is not taken.
 { .mfi
-      ld8             rAD_TB1    = [rAD_TB1]
-      fclass.m        p8,p0 = f8,0x07  // Test for x=0
-      mov             rExp_mask = 0x1ffff
+      ld8        EXP_AD_TB1    = [EXP_AD_TB1]
+      fclass.m   p8,p0 = f8,0x07  // Test for x=0
+      mov        exp_GR_17ones = 0x1FFFF                          
 }
 { .mfi
-      mov             rExp_bias = 0xffff
-      fnorm.s1        fNormX   = f8
-      mov             rExp_2tom56 = 0xffff-56
+      mov        exp_TB1_size  = 0x100
+      fnorm      EXP_NORM_f8   = f8                                          
+      mov exp_GR_exp_2tom56 = 0xffff-56
 }
 ;;
 
 // Form two constants we need
-//  1/ln2 * 2^63  to compute  w = x * 1/ln2 * 128
+//  1/ln2 * 2^63  to compute  w = x * 1/ln2 * 128 
 //  1.1000..000 * 2^(63+63-7) to right shift int(w) into the significand
 
-{ .mfi
-      setf.sig        fINV_LN2_2TO63 = rSig_inv_ln2 // form 1/ln2 * 2^63
-      fclass.m        p9,p0 = f8,0x22  // Test for x=-inf
-      nop.i           0
-}
-{ .mlx
-      setf.d          fRSHF_2TO56 = rRshf_2to56 // Form const 1.100 * 2^(63+56)
-      movl            rRshf = 0x43e8000000000000 // 1.10000 2^63 for right shift
+{ .mmf
+      setf.sig  EXP_INV_LN2_2TO63 = exp_GR_sig_inv_ln2 // form 1/ln2 * 2^63
+      setf.d  EXP_RSHF_2TO56 = exp_GR_rshf_2to56 // Form const 1.100 * 2^(63+56)
+      fclass.m   p9,p0 = f8,0x22  // Test for x=-inf
 }
 ;;
 
-{ .mfi
-      ldfpd           fMIN_DBL_OFLOW_ARG, fMAX_DBL_ZERO_ARG = [rAD_TB1],16
-      fclass.m        p10,p0 = f8,0x1e1  // Test for x=+inf, nan, NaT
-      nop.i           0
+{ .mlx
+      setf.exp EXP_2TOM56 = exp_GR_exp_2tom56 // form 2^-56 for scaling Nfloat
+      movl exp_GR_rshf = 0x43e8000000000000   // 1.10000 2^63 for right shift
 }
 { .mfb
-      setf.exp        f2TOM56 = rExp_2tom56 // form 2^-56 for scaling Nfloat
-(p9)  fma.d.s0        f8 = f0,f0,f0           // quick exit for x=-inf
-(p9)  br.ret.spnt     b0
-}
+      mov        exp_TB2_size  = 0x80
+(p8)  fma.d      f8 = f1,f1,f0           // quick exit for x=0
+(p8)  br.ret.spnt b0
 ;;
+}
 
 { .mfi
-      ldfpd           fMAX_DBL_NORM_ARG, fMIN_DBL_NORM_ARG = [rAD_TB1],16
-      nop.f           0
-      nop.i           0
+      ldfpd      EXP_MIN_DBL_OFLOW_ARG, EXP_MAX_DBL_ZERO_ARG = [EXP_AD_TB1],16
+      fclass.m   p10,p0 = f8,0x21  // Test for x=+inf
+      nop.i 999
 }
 { .mfb
-      setf.d          fRSHF = rRshf // Form right shift const 1.100 * 2^63
-(p8)  fma.d.s0        f8 = f1,f1,f0           // quick exit for x=0
-(p8)  br.ret.spnt     b0
+      nop.m 999
+(p9)  fma.d      f8 = f0,f0,f0           // quick exit for x=-inf
+(p9)  br.ret.spnt b0
+;;                    
 }
+
+{ .mmf
+      ldfpd      EXP_MAX_DBL_NORM_ARG, EXP_MAX_DBL_UFLOW_ARG = [EXP_AD_TB1],16
+      setf.d  EXP_RSHF = exp_GR_rshf // Form right shift const 1.100 * 2^63
+      fclass.m   p11,p0 = f8,0xc3  // Test for x=nan
 ;;
+}
 
 { .mfb
-      ldfe            fLn2_by_128_hi  = [rAD_TB1],16
-(p10) fma.d.s0        f8 = f8,f8,f0  // Result if x=+inf, nan, NaT
-(p10) br.ret.spnt     b0               // quick exit for x=+inf, nan, NaT
-}
+      ldfd      EXP_MIN_DBL_NORM_ARG = [EXP_AD_TB1],16
+      nop.f 999
+(p10) br.ret.spnt b0               // quick exit for x=+inf
 ;;
+}
 
 { .mfi
-      ldfe            fLn2_by_128_lo  = [rAD_TB1],16
-      fcmp.eq.s0      p6,p0 = f8, f0       // Dummy to set D
-      nop.i           0
+      ldfe       exp_ln2_by_128_hi  = [EXP_AD_TB1],16
+      nop.f 999
+      nop.i 999
+;;
 }
+
+
+{ .mfb
+      ldfe       exp_ln2_by_128_lo  = [EXP_AD_TB1],16
+(p11) fmerge.s   f8 = EXP_NORM_f8, EXP_NORM_f8
+(p11) br.ret.spnt b0               // quick exit for x=nan
 ;;
+}
 
-// After that last load, rAD_TB1 points to the beginning of table 1
+// After that last load, EXP_AD_TB1 points to the beginning of table 1
 
 // W = X * Inv_log2_by_128
 // By adding 1.10...0*2^63 we shift and get round_int(W) in significand.
 // We actually add 1.10...0*2^56 to X * Inv_log2 to do the same thing.
 
 { .mfi
-      nop.m           0
-      fma.s1          fW_2TO56_RSH  = fNormX, fINV_LN2_2TO63, fRSHF_2TO56
-      nop.i           0
-}
+      nop.m 999
+      fma.s1  EXP_W_2TO56_RSH  = EXP_NORM_f8, EXP_INV_LN2_2TO63, EXP_RSHF_2TO56
+      nop.i 999
 ;;
+}
+
 
 // Divide arguments into the following categories:
-//  Certain Underflow       p11 - -inf < x <= MAX_DBL_ZERO_ARG
-//  Possible Underflow      p13 - MAX_DBL_ZERO_ARG < x < MIN_DBL_NORM_ARG
+//  Certain Underflow/zero  p11 - -inf < x <= MAX_DBL_ZERO_ARG 
+//  Certain Underflow       p12 - MAX_DBL_ZERO_ARG < x <= MAX_DBL_UFLOW_ARG 
+//  Possible Underflow      p13 - MAX_DBL_UFLOW_ARG < x < MIN_DBL_NORM_ARG
 //  Certain Safe                - MIN_DBL_NORM_ARG <= x <= MAX_DBL_NORM_ARG
 //  Possible Overflow       p14 - MAX_DBL_NORM_ARG < x < MIN_DBL_OFLOW_ARG
 //  Certain Overflow        p15 - MIN_DBL_OFLOW_ARG <= x < +inf
 //
-// If the input is really a double arg, then there will never be
-// "Possible Overflow" arguments.
+// If the input is really a double arg, then there will never be "Possible
+// Underflow" or "Possible Overflow" arguments.
 //
 
 { .mfi
-      add             rAD_TB2 = 0x100, rAD_TB1
-      fcmp.ge.s1      p15,p0 = fNormX,fMIN_DBL_OFLOW_ARG
-      nop.i           0
+      add        EXP_AD_TB2 = exp_TB1_size, EXP_AD_TB1
+      fcmp.ge.s1  p15,p14 = EXP_NORM_f8,EXP_MIN_DBL_OFLOW_ARG
+      nop.i 999
+;;                        
 }
-;;
 
 { .mfi
-      add             rAD_P = 0x80, rAD_TB2
-      fcmp.le.s1      p11,p0 = fNormX,fMAX_DBL_ZERO_ARG
-      nop.i           0
-}
+      add        EXP_AD_P = exp_TB2_size, EXP_AD_TB2
+      fcmp.le.s1  p11,p12 = EXP_NORM_f8,EXP_MAX_DBL_ZERO_ARG
+      nop.i 999
 ;;
+}
 
 { .mfb
-      ldfpd           fP5, fP4  = [rAD_P] ,16
-      fcmp.gt.s1      p14,p0 = fNormX,fMAX_DBL_NORM_ARG
-(p15) br.cond.spnt    EXP_CERTAIN_OVERFLOW
-}
+      ldfpd      exp_P4, exp_P3  = [EXP_AD_P] ,16
+(p14) fcmp.gt.unc.s1  p14,p0 = EXP_NORM_f8,EXP_MAX_DBL_NORM_ARG
+(p15) br.cond.spnt L(EXP_CERTAIN_OVERFLOW)
 ;;
+}
+
 
-// Nfloat = round_int(W)
-// The signficand of fW_2TO56_RSH contains the rounded integer part of W,
+// Nfloat = round_int(W) 
+// The signficand of EXP_W_2TO56_RSH contains the rounded integer part of W,
 // as a twos complement number in the lower bits (that is, it may be negative).
-// That twos complement number (called N) is put into rN.
+// That twos complement number (called N) is put into exp_GR_N.
 
-// Since fW_2TO56_RSH is scaled by 2^56, it must be multiplied by 2^-56
-// before the shift constant 1.10000 * 2^63 is subtracted to yield fNfloat.
-// Thus, fNfloat contains the floating point version of N
+// Since EXP_W_2TO56_RSH is scaled by 2^56, it must be multiplied by 2^-56
+// before the shift constant 1.10000 * 2^63 is subtracted to yield EXP_Nfloat.
+// Thus, EXP_Nfloat contains the floating point version of N
 
-{ .mfb
-      ldfpd           fP3, fP2  = [rAD_P]
-      fms.s1          fNfloat = fW_2TO56_RSH, f2TOM56, fRSHF
-(p11) br.cond.spnt    EXP_CERTAIN_UNDERFLOW
+
+{ .mfi
+      nop.m 999
+(p12) fcmp.le.unc  p12,p0 = EXP_NORM_f8,EXP_MAX_DBL_UFLOW_ARG
+      nop.i 999
 }
+{ .mfb
+      ldfpd      exp_P2, exp_P1  = [EXP_AD_P]                                  
+      fms.s1          EXP_Nfloat = EXP_W_2TO56_RSH, EXP_2TOM56, EXP_RSHF 
+(p11) br.cond.spnt L(EXP_CERTAIN_UNDERFLOW_ZERO)
 ;;
+}
 
 { .mfi
-      getf.sig        rN        = fW_2TO56_RSH
-      nop.f           0
-      nop.i           0
-}
+      getf.sig        exp_GR_N        = EXP_W_2TO56_RSH
+(p13) fcmp.lt.unc  p13,p0 = EXP_NORM_f8,EXP_MIN_DBL_NORM_ARG
+      nop.i 999
 ;;
+}
+
 
-// rIndex_1 has index_1
-// rIndex_2_16 has index_2 * 16
-// rBiased_M has M
-// rIndex_1_16 has index_1 * 16
+// exp_GR_index_1 has index_1
+// exp_GR_index_2_16 has index_2 * 16
+// exp_GR_biased_M has M
+// exp_GR_index_1_16 has index_1 * 16
 
-// rM has true M
-// r = x - Nfloat * ln2_by_128_hi
-// f = 1 - Nfloat * ln2_by_128_lo
+// r2 has true M
 { .mfi
-      and             rIndex_1 = 0x0f, rN
-      fnma.s1         fR   = fNfloat, fLn2_by_128_hi, fNormX
-      shr             rM = rN,  0x7
+      and            exp_GR_index_1 = 0x0f, exp_GR_N
+      fnma.s1    exp_r   = EXP_Nfloat, exp_ln2_by_128_hi, EXP_NORM_f8 
+      shr            r2 = exp_GR_N,  0x7
 }
 { .mfi
-      and             rIndex_2_16 = 0x70, rN
-      fnma.s1         fF   = fNfloat, fLn2_by_128_lo, f1
-      nop.i           0
+      and            exp_GR_index_2_16 = 0x70, exp_GR_N
+      fnma.s1    exp_f   = EXP_Nfloat, exp_ln2_by_128_lo, f1 
+      nop.i 999
+;;                            
 }
-;;
 
-// rAD_T1 has address of T1
-// rAD_T2 has address if T2
+
+// EXP_AD_T1 has address of T1                           
+// EXP_AD_T2 has address if T2                            
 
 { .mmi
-      add             rBiased_M = rExp_bias, rM
-      add             rAD_T2 = rAD_TB2, rIndex_2_16
-      shladd          rAD_T1 = rIndex_1, 4, rAD_TB1
+      addl           exp_GR_biased_M = 0xffff, r2 
+      add            EXP_AD_T2 = EXP_AD_TB2, exp_GR_index_2_16 
+      shladd         EXP_AD_T1 = exp_GR_index_1, 4, EXP_AD_TB1
+;;                            
 }
-;;
+
 
 // Create Scale = 2^M
+// r = x - Nfloat * ln2_by_128_hi 
+// f = 1 - Nfloat * ln2_by_128_lo 
+
 { .mmi
-      setf.exp        f2M = rBiased_M
-      ldfe            fT2  = [rAD_T2]
-      nop.i           0
-}
+      setf.exp        EXP_2M = exp_GR_biased_M                              
+      ldfe       exp_T2  = [EXP_AD_T2]                                
+      nop.i 999
 ;;
+}
 
 // Load T1 and T2
 { .mfi
-      ldfe            fT1  = [rAD_T1]
-      fmpy.s0         fTmp = fLn2_by_128_lo, fLn2_by_128_lo // Force inexact
-      nop.i           0
-}
+      ldfe       exp_T1  = [EXP_AD_T1]                                
+      nop.f 999
+      nop.i 999
 ;;
+}
+
 
 { .mfi
-      nop.m           0
-      fma.s1          fRsq = fR, fR, f0
-      nop.i           0
+        nop.m 999
+        fma.s1           exp_rsq = exp_r, exp_r, f0 
+        nop.i 999
 }
 { .mfi
-      nop.m           0
-      fma.s1          fP54 = fR, fP5, fP4
-      nop.i           0
-}
+        nop.m 999
+        fma.s1        exp_rP4pP3 = exp_r, exp_P4, exp_P3               
+        nop.i 999
 ;;
+}
+
+
 
 { .mfi
-      nop.m           0
-      fcmp.lt.s1      p13,p0 = fNormX,fMIN_DBL_NORM_ARG
-      nop.i           0
+        nop.m 999
+        fma.s1           exp_rcube = exp_r, exp_rsq, f0 
+        nop.i 999 
 }
 { .mfi
-      nop.m           0
-      fma.s1          fP32 = fR, fP3, fP2
-      nop.i           0
-}
+        nop.m 999
+        fma.s1        exp_P_lo  = exp_r, exp_rP4pP3, exp_P2            
+        nop.i 999
 ;;
-
-{ .mfi
-      nop.m           0
-      fma.s1          fP5432  = fRsq, fP54, fP32
-      nop.i           0
 }
-;;
+
 
 { .mfi
-      nop.m           0
-      fma.s1          fS1  = f2M,fT1,f0
-      nop.i           0
+        nop.m 999
+        fma.s1        exp_P_hi  = exp_rsq, exp_P1, exp_r              
+        nop.i 999
 }
 { .mfi
-      nop.m           0
-      fma.s1          fS2  = fF,fT2,f0
-      nop.i           0
-}
+        nop.m 999
+        fma.s1        exp_S2  = exp_f,exp_T2,f0                       
+        nop.i 999
 ;;
+}
 
 { .mfi
-      nop.m           0
-      fma.s1          fP     = fRsq, fP5432, fR
-      nop.i           0
+        nop.m 999
+        fma.s1        exp_S1  = EXP_2M,exp_T1,f0                      
+        nop.i 999
+;;
 }
+
+
 { .mfi
-      nop.m           0
-      fma.s1          fS   = fS1,fS2,f0
-      nop.i           0
-}
+        nop.m 999
+        fma.s1        exp_P     = exp_rcube, exp_P_lo, exp_P_hi       
+        nop.i 999
 ;;
+}
 
-{ .mbb
-      nop.m           0
-(p13) br.cond.spnt    EXP_POSSIBLE_UNDERFLOW
-(p14) br.cond.spnt    EXP_POSSIBLE_OVERFLOW
+{ .mfi
+        nop.m 999
+        fma.s1        exp_S   = exp_S1,exp_S2,f0                      
+        nop.i 999
+;;
 }
+
+{ .bbb
+(p12)   br.cond.spnt  L(EXP_CERTAIN_UNDERFLOW)
+(p13)   br.cond.spnt  L(EXP_POSSIBLE_UNDERFLOW)
+(p14)   br.cond.spnt  L(EXP_POSSIBLE_OVERFLOW)
 ;;
+}
+
 
 { .mfb
-      nop.m           0
-      fma.d.s0        f8 = fS, fP, fS
-      br.ret.sptk     b0                  // Normal path exit
+        nop.m 999
+        fma.d      f8 = exp_S, exp_P, exp_S 
+        br.ret.sptk     b0 ;;               // Normal path exit 
 }
-;;
 
 
-EXP_POSSIBLE_OVERFLOW:
+L(EXP_POSSIBLE_OVERFLOW): 
 
-// Here if fMAX_DBL_NORM_ARG < x < fMIN_DBL_OFLOW_ARG
-// This cannot happen if input is a double, only if input higher precision.
-// Overflow is a possibility, not a certainty.
+// We got an answer. EXP_MAX_DBL_NORM_ARG < x < EXP_MIN_DBL_OFLOW_ARG
+// overflow is a possibility, not a certainty
 
-// Recompute result using status field 2 with user's rounding mode,
-// and wre set.  If result is larger than largest double, then we have
-// overflow
+{ .mfi
+	nop.m 999
+        fsetc.s2 0x7F,0x42                                          
+	nop.i 999 ;;
+}
 
 { .mfi
-      mov             rGt_ln  = 0x103ff // Exponent for largest dbl + 1 ulp
-      fsetc.s2        0x7F,0x42         // Get user's round mode, set wre
-      nop.i           0
+	nop.m 999
+        fma.d.s2      exp_wre_urm_f8 = exp_S, exp_P, exp_S          
+	nop.i 999 ;;
 }
-;;
+
+// We define an overflow when the answer with
+//    WRE set
+//    user-defined rounding mode
+// is ldn +1
+
+// Is the exponent 1 more than the largest double?
+// If so, go to ERROR RETURN, else get the answer and 
+// leave.
+
+// Largest double is 7FE (biased double)
+//                   7FE - 3FF + FFFF = 103FE
+// Create + largest_double_plus_ulp
+// Create - largest_double_plus_ulp
+// Calculate answer with WRE set.
+
+// Cases when answer is ldn+1  are as follows:
+//  ldn                   ldn+1
+// --+----------|----------+------------
+//              | 
+//    +inf          +inf      -inf
+//                  RN         RN
+//                             RZ 
 
 { .mfi
-      setf.exp        fGt_pln = rGt_ln  // Create largest double + 1 ulp
-      fma.d.s2        fWre_urm_f8 = fS, fP, fS    // Result with wre set
-      nop.i           0
+	nop.m 999
+        fsetc.s2 0x7F,0x40                                          
+        mov           exp_GR_gt_ln  = 0x103ff ;;                      
 }
-;;
 
 { .mfi
-      nop.m           0
-      fsetc.s2        0x7F,0x40                   // Turn off wre in sf2
-      nop.i           0
+        setf.exp      exp_gt_pln    = exp_GR_gt_ln                 
+	nop.f 999
+	nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m           0
-      fcmp.ge.s1      p6, p0 =  fWre_urm_f8, fGt_pln // Test for overflow
-      nop.i           0
+	nop.m 999
+       fcmp.ge.unc.s1 p6, p0 =  exp_wre_urm_f8, exp_gt_pln 	  
+	nop.i 999 ;;
 }
-;;
 
 { .mfb
-      nop.m           0
-      nop.f           0
-(p6)  br.cond.spnt    EXP_CERTAIN_OVERFLOW // Branch if overflow
+	nop.m 999
+	nop.f 999
+(p6)   br.cond.spnt L(EXP_CERTAIN_OVERFLOW) ;; // Branch if really overflow
 }
-;;
 
 { .mfb
-      nop.m           0
-      fma.d.s0        f8 = fS, fP, fS
-      br.ret.sptk     b0                     // Exit if really no overflow
+	nop.m 999
+       fma.d        f8 = exp_S, exp_P, exp_S                      
+       br.ret.sptk     b0 ;;             // Exit if really no overflow
 }
-;;
 
-EXP_CERTAIN_OVERFLOW:
+L(EXP_CERTAIN_OVERFLOW):
 { .mmi
-      sub             rTmp = rExp_mask, r0, 1
-;;
-      setf.exp        fTmp = rTmp
-      nop.i           0
+      sub   exp_GR_17ones_m1 = exp_GR_17ones, r0, 1 ;;
+      setf.exp     f9 = exp_GR_17ones_m1
+      nop.i 999 ;;
 }
-;;
 
 { .mfi
-      alloc           r32=ar.pfs,1,4,4,0
-      fmerge.s        FR_X = f8,f8
-      nop.i           0
+      nop.m 999
+      fmerge.s FR_X = f8,f8
+      nop.i 999
 }
 { .mfb
-      mov             GR_Parameter_TAG = 14
-      fma.d.s0        FR_RESULT = fTmp, fTmp, f0    // Set I,O and +INF result
-      br.cond.sptk    __libm_error_region
+      mov        GR_Parameter_TAG = 14
+      fma.d       FR_RESULT = f9, f9, f0    // Set I,O and +INF result
+      br.cond.sptk  __libm_error_region ;;                             
 }
-;;
 
-EXP_POSSIBLE_UNDERFLOW:
+L(EXP_POSSIBLE_UNDERFLOW): 
 
-// Here if fMAX_DBL_ZERO_ARG < x < fMIN_DBL_NORM_ARG
-// Underflow is a possibility, not a certainty
+// We got an answer. EXP_MAX_DBL_UFLOW_ARG < x < EXP_MIN_DBL_NORM_ARG
+// underflow is a possibility, not a certainty
 
 // We define an underflow when the answer with
 //    ftz set
@@ -635,111 +709,81 @@ EXP_POSSIBLE_UNDERFLOW:
 //    largest dn                               smallest normal
 
 { .mfi
-      nop.m           0
-      fsetc.s2        0x7F,0x41                // Get user's round mode, set ftz
-      nop.i           0
+	nop.m 999
+       fsetc.s2 0x7F,0x41                                          
+	nop.i 999 ;;
 }
-;;
-
-{ .mfi
-      nop.m           0
-      fma.d.s2        fFtz_urm_f8 = fS, fP, fS // Result with ftz set
-      nop.i           0
-}
-;;
-
 { .mfi
-      nop.m           0
-      fsetc.s2        0x7F,0x40                // Turn off ftz in sf2
-      nop.i           0
+	nop.m 999
+       fma.d.s2      exp_ftz_urm_f8 = exp_S, exp_P, exp_S          
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m           0
-      fcmp.eq.s1      p6, p7 = fFtz_urm_f8, f0 // Test for underflow
-      nop.i           0
+	nop.m 999
+       fsetc.s2 0x7F,0x40                                          
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m           0
-      fma.d.s0        f8 = fS, fP, fS          // Compute result, set I, maybe U
-      nop.i           0
-}
-;;
-
-{ .mbb
-      nop.m           0
-(p6)  br.cond.spnt    EXP_UNDERFLOW_COMMON     // Branch if really underflow
-(p7)  br.ret.sptk     b0                       // Exit if really no underflow
+	nop.m 999
+       fcmp.eq.unc.s1 p6, p0 =  exp_ftz_urm_f8, f0 	          
+	nop.i 999 ;;
 }
-;;
-
-EXP_CERTAIN_UNDERFLOW:
-// Here if  x < fMAX_DBL_ZERO_ARG
-// Result will be zero (or smallest denorm if round to +inf) with I, U set
-{ .mmi
-      mov             rTmp = 1
-;;
-      setf.exp        fTmp = rTmp               // Form small normal
-      nop.i           0
+{ .mfb
+	nop.m 999
+	nop.f 999
+(p6)   br.cond.spnt L(EXP_CERTAIN_UNDERFLOW) ;; // Branch if really underflow
 }
-;;
-
 { .mfb
-      nop.m           0
-      fma.d.s0        f8 = fTmp, fTmp, f0 // Set I,U, tiny (+0.0) result
-      br.cond.sptk    EXP_UNDERFLOW_COMMON
+	nop.m 999
+       fma.d        f8 = exp_S, exp_P, exp_S                      
+       br.ret.sptk     b0 ;;                // Exit if really no underflow
 }
-;;
 
-EXP_UNDERFLOW_COMMON:
-// Determine if underflow result is zero or nonzero
+L(EXP_CERTAIN_UNDERFLOW):
 { .mfi
-      alloc           r32=ar.pfs,1,4,4,0
-      fcmp.eq.s1      p6, p0 =  f8, f0
-      nop.i           0
+      nop.m 999
+      fmerge.s FR_X = f8,f8
+      nop.i 999
 }
-;;
-
 { .mfb
-      nop.m           0
-      fmerge.s        FR_X = fNormX,fNormX
-(p6)  br.cond.spnt    EXP_UNDERFLOW_ZERO
+      mov        GR_Parameter_TAG = 15
+      fma.d       FR_RESULT  = exp_S, exp_P, exp_S // Set I,U and tiny result
+      br.cond.sptk  __libm_error_region ;;                             
 }
-;;
 
-EXP_UNDERFLOW_NONZERO:
-// Here if  x < fMIN_DBL_NORM_ARG and result nonzero;
-// I, U are set
-{ .mfb
-      mov             GR_Parameter_TAG = 15
-      nop.f           0                         // FR_RESULT already set
-      br.cond.sptk    __libm_error_region
+L(EXP_CERTAIN_UNDERFLOW_ZERO):
+{ .mmi
+      mov   exp_GR_one = 1 ;;
+      setf.exp     f9 = exp_GR_one
+      nop.i 999 ;;
 }
-;;
 
-EXP_UNDERFLOW_ZERO:
-// Here if x < fMIN_DBL_NORM_ARG and result zero;
-// I, U are set
+{ .mfi
+      nop.m 999
+      fmerge.s FR_X = f8,f8
+      nop.i 999
+}
 { .mfb
-      mov             GR_Parameter_TAG = 15
-      nop.f           0                         // FR_RESULT already set
-      br.cond.sptk    __libm_error_region
+      mov        GR_Parameter_TAG = 15
+      fma.d       FR_RESULT = f9, f9, f0    // Set I,U and tiny (+0.0) result
+      br.cond.sptk  __libm_error_region ;;                             
 }
-;;
 
-GLOBAL_IEEE754_END(exp)
+.endp exp
+ASM_SIZE_DIRECTIVE(exp)
+
 
-LOCAL_LIBM_ENTRY(__libm_error_region)
+.proc __libm_error_region
+__libm_error_region:
 .prologue
 { .mfi
         add   GR_Parameter_Y=-32,sp             // Parameter 2 value
         nop.f 0
 .save   ar.pfs,GR_SAVE_PFS
-        mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
+        mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs 
 }
 { .mfi
-.fframe 64
+.fframe 64 
         add sp=-64,sp                           // Create new stack
         nop.f 0
         mov GR_SAVE_GP=gp                       // Save gp
@@ -747,24 +791,24 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
 { .mmi
         stfd [GR_Parameter_Y] = FR_Y,16         // STORE Parameter 2 on stack
         add GR_Parameter_X = 16,sp              // Parameter 1 address
-.save   b0, GR_SAVE_B0
-        mov GR_SAVE_B0=b0                       // Save b0
+.save   b0, GR_SAVE_B0                      
+        mov GR_SAVE_B0=b0                       // Save b0 
 };;
 .body
 { .mib
-        stfd [GR_Parameter_X] = FR_X            // STORE Parameter 1 on stack
-        add   GR_Parameter_RESULT = 0,GR_Parameter_Y  // Parameter 3 address
-	nop.b 0
+        stfd [GR_Parameter_X] = FR_X                  // STORE Parameter 1 on stack 
+        add   GR_Parameter_RESULT = 0,GR_Parameter_Y  // Parameter 3 address 
+	nop.b 0                                      
 }
 { .mib
-        stfd [GR_Parameter_Y] = FR_RESULT       // STORE Parameter 3 on stack
-        add   GR_Parameter_Y = -16,GR_Parameter_Y
-        br.call.sptk b0=__libm_error_support#   // Call error handling function
+        stfd [GR_Parameter_Y] = FR_RESULT             // STORE Parameter 3 on stack
+        add   GR_Parameter_Y = -16,GR_Parameter_Y  
+        br.call.sptk b0=__libm_error_support#         // Call error handling function
 };;
 { .mmi
-        add   GR_Parameter_RESULT = 48,sp
         nop.m 0
-        nop.i 0
+        nop.m 0
+        add   GR_Parameter_RESULT = 48,sp
 };;
 { .mmi
         ldfd  f8 = [GR_Parameter_RESULT]       // Get return result off stack
@@ -773,11 +817,12 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
         mov   b0 = GR_SAVE_B0                  // Restore return address
 };;
 { .mib
-        mov   gp = GR_SAVE_GP                  // Restore gp
+        mov   gp = GR_SAVE_GP                  // Restore gp 
         mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
         br.ret.sptk     b0                     // Return
-};;
+};; 
 
-LOCAL_LIBM_END(__libm_error_region)
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
 .type   __libm_error_support#,@function
 .global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_expf.S b/sysdeps/ia64/fpu/e_expf.S
index 8d620b6ffa..2aad021335 100644
--- a/sysdeps/ia64/fpu/e_expf.S
+++ b/sysdeps/ia64/fpu/e_expf.S
@@ -1,10 +1,10 @@
 .file "expf.s"
 
-
-// Copyright (c) 2000 - 2002, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
 //
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
+//
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,501 +35,589 @@
 //
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// http://developer.intel.com/opensource.
 
 // History
-//*********************************************************************
-// 02/02/00 Original version
-// 04/04/00 Unwind support added
-// 08/15/00 Bundle added after call to __libm_error_support to properly
+//==============================================================
+// 4/04/00  Unwind update
+// 4/04/00  Unwind support added
+// 8/15/00  Bundle added after call to __libm_error_support to properly
 //          set [the previously overwritten] GR_Parameter_RESULT.
-// 08/21/00 Improvements to save 2 cycles on main path, and shorten x=0 case
+// 8/21/00  Improvements to save 2 cycles on main path, and shorten x=0 case
 // 12/07/00 Widen main path, shorten x=inf, nan paths
-// 03/15/01 Fix monotonicity problem around x=0 for round to +inf
-// 02/05/02 Corrected uninitialize predicate in POSSIBLE_UNDERFLOW path
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 07/26/02 Algorithm changed, accuracy improved
-// 09/26/02 support of higher precision inputs added, underflow threshold
-//          corrected
-// 11/15/02 Improved performance on Itanium 2, added possible over/under paths
-//
-//
-// API
-//*********************************************************************
-// float expf(float)
-//
-// Overview of operation
-//*********************************************************************
-// Take the input x. w is "how many log2/128 in x?"
-//  w = x * 64/log2
-//  NJ = int(w)
-//  x = NJ*log2/64 + R
-
-//  NJ = 64*n + j
-//  x = n*log2 + (log2/64)*j + R
-//
-//  So, exp(x) = 2^n * 2^(j/64)* exp(R)
-//
-//  T =  2^n * 2^(j/64)
-//       Construct 2^n
-//       Get 2^(j/64) table
-//           actually all the entries of 2^(j/64) table are stored in DP and
-//           with exponent bits set to 0 -> multiplication on 2^n can be
-//           performed by doing logical "or" operation with bits presenting 2^n
-
-//  exp(R) = 1 + (exp(R) - 1)
-//  P = exp(R) - 1 approximated by Taylor series of 3rd degree
-//      P = A3*R^3 + A2*R^2 + R, A3 = 1/6, A2 = 1/2
 //
 
-//  The final result is reconstructed as follows
-//  exp(x) = T + T*P
+#include "libm_support.h"
 
-// Special values
-//*********************************************************************
-// expf(+0)    = 1.0
-// expf(-0)    = 1.0
+// Assembly macros
+//==============================================================
+// integer registers used
 
-// expf(+qnan) = +qnan
-// expf(-qnan) = -qnan
-// expf(+snan) = +qnan
-// expf(-snan) = -qnan
+ exp_GR_0x0f                = r33
+ exp_GR_0xf0                = r34
 
-// expf(-inf)  = +0
-// expf(+inf)  = +inf
+ EXP_AD_P_1                 = r36
+ EXP_AD_P_2                 = r37
+ EXP_AD_T1                  = r38
+ EXP_AD_T2                  = r39
+ exp_GR_Mint                = r40
 
-// Overflow and Underflow
-//*********************************************************************
-// expf(x) = largest single normal when
-//     x = 88.72283 = 0x42b17217
+ exp_GR_Mint_p_128          = r41
+ exp_GR_Ind1                = r42
+ EXP_AD_M1                  = r43
+ exp_GR_Ind2                = r44
+ EXP_AD_M2                  = r45
 
-// expf(x) = smallest single normal when
-//     x = -87.33654 = 0xc2aeac4f
+ exp_GR_min_oflow           = r46
+ exp_GR_max_zero            = r47
+ exp_GR_max_norm            = r48
+ exp_GR_max_uflow           = r49
+ exp_GR_min_norm            = r50
 
-// expf(x) = largest round-to-nearest single zero when
-//     x = -103.97208 = 0xc2cff1b5
+ exp_GR_17ones              = r51
+ exp_GR_gt_ln               = r52
+ exp_GR_T2_size             = r53
 
+ exp_GR_17ones_m1           = r56
+ exp_GR_one                 = r57
 
-// Registers used
-//*********************************************************************
-// Floating Point registers used:
-// f8, input
-// f6,f7, f9 -> f15,  f32 -> f40
 
-// General registers used:
-// r3, r23 -> r38
 
-// Predicate registers used:
-// p10 -> p15
+GR_SAVE_B0                    = r53
+GR_SAVE_PFS                   = r55
+GR_SAVE_GP                    = r54 
+
+GR_Parameter_X                = r59
+GR_Parameter_Y                = r60
+GR_Parameter_RESULT           = r61
+GR_Parameter_TAG              = r62
+
+FR_X             = f10
+FR_Y             = f1
+FR_RESULT        = f8
 
-// Assembly macros
-//*********************************************************************
-// integer registers used
-// scratch
-rNJ                   = r3
-
-rTmp                  = r23
-rJ                    = r23
-rN                    = r24
-rTblAddr              = r25
-rA3                   = r26
-rExpHalf              = r27
-rLn2Div64             = r28
-r17ones_m1            = r29
-rGt_ln                = r29
-rRightShifter         = r30
-r64DivLn2             = r31
-// stacked
-GR_SAVE_PFS           = r32
-GR_SAVE_B0            = r33
-GR_SAVE_GP            = r34
-GR_Parameter_X        = r35
-GR_Parameter_Y        = r36
-GR_Parameter_RESULT   = r37
-GR_Parameter_TAG      = r38
 
 // floating point registers used
-FR_X                  = f10
-FR_Y                  = f1
-FR_RESULT             = f8
-// scratch
-fRightShifter         = f6
-f64DivLn2             = f7
-fNormX                = f9
-fNint                 = f10
-fN                    = f11
-fR                    = f12
-fLn2Div64             = f13
-fA2                   = f14
-fA3                   = f15
-// stacked
-fP                    = f32
-fT                    = f33
-fMIN_SGL_OFLOW_ARG    = f34
-fMAX_SGL_ZERO_ARG     = f35
-fMAX_SGL_NORM_ARG     = f36
-fMIN_SGL_NORM_ARG     = f37
-fRSqr                 = f38
-fTmp                  = f39
-fGt_pln               = f39
-fWre_urm_f8           = f40
-fFtz_urm_f8           = f40
-
-
-RODATA
+
+ EXP_MIN_SGL_OFLOW_ARG      = f11
+ EXP_MAX_SGL_ZERO_ARG       = f12
+ EXP_MAX_SGL_NORM_ARG       = f13
+ EXP_MAX_SGL_UFLOW_ARG      = f14
+ EXP_MIN_SGL_NORM_ARG       = f15
+
+ exp_coeff_P5               = f32
+ exp_coeff_P6               = f33
+ exp_coeff_P3               = f34
+ exp_coeff_P4               = f35
+
+ exp_coeff_P1               = f36
+ exp_coeff_P2               = f37
+ exp_Mx                     = f38
+ exp_Mfloat                 = f39
+ exp_R                      = f40
+
+ exp_P1                     = f41
+ exp_P2                     = f42
+ exp_P3                     = f43
+ exp_Rsq                    = f44
+ exp_R4                     = f45
+
+ exp_P4                     = f46
+ exp_P5                     = f47
+ exp_P6                     = f48
+ exp_P7                     = f49
+ exp_T1                     = f50
+
+ exp_T2                     = f51
+ exp_T                      = f52
+ exp_A                      = f53
+ exp_norm_f8                = f54
+ exp_wre_urm_f8             = f55
+
+ exp_ftz_urm_f8             = f56
+ exp_gt_pln                 = f57
+
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
 .align 16
 
-LOCAL_OBJECT_START(_expf_table)
-data4 0x42b17218         // Smallest sgl arg to overflow sgl result, +88.7228
-data4 0xc2cff1b5         // Largest sgl for rnd-to-nearest 0 result, -103.9720
-data4 0x42b17217         // Largest sgl arg to give normal sgl result, +88.7228
-data4 0xc2aeac4f         // Smallest sgl arg to give normal sgl result, -87.3365
+exp_coeff_1_table:
+ASM_TYPE_DIRECTIVE(exp_coeff_1_table,@object)
+data8 0x3F56F35FDE4F8563 // p5
+data8 0x3F2A378BEFECCFDD // p6
+data8 0x3FE00000258C581D // p1
+data8 0x3FC555557AE7B3D4 // p2
+ASM_SIZE_DIRECTIVE(exp_coeff_1_table)
+
+
+exp_coeff_2_table:
+ASM_TYPE_DIRECTIVE(exp_coeff_2_table,@object)
+data8 0x3FA5551BB6592FAE // p3
+data8 0x3F8110E8EBFFD485 // p4
+ASM_SIZE_DIRECTIVE(exp_coeff_2_table)
+
+
+exp_T2_table:
+ASM_TYPE_DIRECTIVE(exp_T2_table,@object)
+data8 0xa175cf9cd7d85844 , 0x00003f46 // exp(-128)
+data8 0xdb7279415a1f9eed , 0x00003f47 // exp(-127)
+data8 0x95213b242bd8ca5f , 0x00003f49 // exp(-126)
+data8 0xcab03c968c989f83 , 0x00003f4a // exp(-125)
+data8 0x89bdb674702961ad , 0x00003f4c // exp(-124)
+data8 0xbb35a2eec278be35 , 0x00003f4d // exp(-123)
+data8 0xfe71b17f373e7e7a , 0x00003f4e // exp(-122)
+data8 0xace9a6ec52a39b63 , 0x00003f50 // exp(-121)
+data8 0xeb03423fe393cf1c , 0x00003f51 // exp(-120)
+data8 0x9fb52c5bcaef1693 , 0x00003f53 // exp(-119)
+data8 0xd910b6377ed60bf1 , 0x00003f54 // exp(-118)
+data8 0x9382dad8a9fdbfe4 , 0x00003f56 // exp(-117)
+data8 0xc87d0a84dea869a3 , 0x00003f57 // exp(-116)
+data8 0x883efb4c6d1087b0 , 0x00003f59 // exp(-115)
+data8 0xb92d7373dce9a502 , 0x00003f5a // exp(-114)
+data8 0xfbaeb020577fb0cb , 0x00003f5b // exp(-113)
+ASM_SIZE_DIRECTIVE(exp_T2_table)
+
+
+exp_T1_table:
+ASM_TYPE_DIRECTIVE(exp_T1_table,@object)
+data8 0x8000000000000000 , 0x00003fff // exp(16 * 0)
+data8 0x87975e8540010249 , 0x00004016 // exp(16 * 1) 
+data8 0x8fa1fe625b3163ec , 0x0000402d // exp(16 * 2) 
+data8 0x9826b576512a59d7 , 0x00004044 // exp(16 * 3) 
+data8 0xa12cc167acbe6902 , 0x0000405b // exp(16 * 4) 
+data8 0xaabbcdcc279f59e4 , 0x00004072 // exp(16 * 5) 
+data8 0xb4dbfaadc045d16f , 0x00004089 // exp(16 * 6) 
+data8 0xbf95e372ccdbf146 , 0x000040a0 // exp(16 * 7) 
+data8 0xcaf2a62eea10bbfb , 0x000040b7 // exp(16 * 8) 
+data8 0xd6fbeb62fddbd340 , 0x000040ce // exp(16 * 9) 
+data8 0xe3bbee32e4a440ea , 0x000040e5 // exp(16 * 10)
+data8 0xf13d8517c34199a8 , 0x000040fc // exp(16 * 11)
+data8 0xff8c2b166241eedd , 0x00004113 // exp(16 * 12)
+data8 0x875a04c0b38d6129 , 0x0000412b // exp(16 * 13)
+data8 0x8f610127db6774d7 , 0x00004142 // exp(16 * 14)
+data8 0x97e1dd87e5c20bb6 , 0x00004159 // exp(16 * 15)
+ASM_SIZE_DIRECTIVE(exp_T1_table)
+
+// Argument Reduction
+//  exp_Mx = (int)f8            ==> The value of f8 rounded to int is placed into the
+//                                  significand of exp_Mx as a two's
+//                                  complement number.
+
+// Later we want to have exp_Mx in a general register. Do this with a getf.sig
+// and call the general register exp_GR_Mint
+
+//  exp_Mfloat = (float)(int)f8 ==> the two's complement number in
+//                                  significand of exp_Mx is turned
+//                                  into a floating point number.
+//  R = 1 - exp_Mfloat          ==> reduced argument
+
+// Core Approximation
+// Calculate a series in R
+//  R * p6 + p5
+//  R * p4 + p3
+//  R * p2 + p1
+//  R^2
+//  R^4
+//  R^2(R * p6 + p5) + (R * p4 + p3)
+//  R^2(R * p2 + p1)
+//  R^4(R^2(R * p6 + p5) + (R * p4 + p3)) + (R^2(R * p2 + p1))
+//  R + 1
+//  exp(R) = (1 + R) + R^4(R^2(R * p6 + p5) + (R * p4 + p3)) + (R^2(R * p2 + p1))
+//  exp(R) = 1 + R + R^2 * p1 + R^3 * p2 + R^4 * p3 + R^5 * p4 + R^6 * p5 + R^7 * p6
+
+// Reconstruction
+// signficand of exp_Mx is two's complement,
+// -103 < x < 89
+// The smallest single denormal is 2^-149 = ssdn
+//    For e^x = ssdn
+//        x   = log(ssdn) = -103.279
+//    But with rounding result goes to ssdn until -103.972079
+// The largest single normal is  1.<23 1's> 2^126 ~ 2^127 = lsn
+//    For e^x = lsn
+//        x   = log(lsn) = 88.7228
 //
-// 2^(j/64) table, j goes from 0 to 63
-data8 0x0000000000000000 // 2^(0/64)
-data8 0x00002C9A3E778061 // 2^(1/64)
-data8 0x000059B0D3158574 // 2^(2/64)
-data8 0x0000874518759BC8 // 2^(3/64)
-data8 0x0000B5586CF9890F // 2^(4/64)
-data8 0x0000E3EC32D3D1A2 // 2^(5/64)
-data8 0x00011301D0125B51 // 2^(6/64)
-data8 0x0001429AAEA92DE0 // 2^(7/64)
-data8 0x000172B83C7D517B // 2^(8/64)
-data8 0x0001A35BEB6FCB75 // 2^(9/64)
-data8 0x0001D4873168B9AA // 2^(10/64)
-data8 0x0002063B88628CD6 // 2^(11/64)
-data8 0x0002387A6E756238 // 2^(12/64)
-data8 0x00026B4565E27CDD // 2^(13/64)
-data8 0x00029E9DF51FDEE1 // 2^(14/64)
-data8 0x0002D285A6E4030B // 2^(15/64)
-data8 0x000306FE0A31B715 // 2^(16/64)
-data8 0x00033C08B26416FF // 2^(17/64)
-data8 0x000371A7373AA9CB // 2^(18/64)
-data8 0x0003A7DB34E59FF7 // 2^(19/64)
-data8 0x0003DEA64C123422 // 2^(20/64)
-data8 0x0004160A21F72E2A // 2^(21/64)
-data8 0x00044E086061892D // 2^(22/64)
-data8 0x000486A2B5C13CD0 // 2^(23/64)
-data8 0x0004BFDAD5362A27 // 2^(24/64)
-data8 0x0004F9B2769D2CA7 // 2^(25/64)
-data8 0x0005342B569D4F82 // 2^(26/64)
-data8 0x00056F4736B527DA // 2^(27/64)
-data8 0x0005AB07DD485429 // 2^(28/64)
-data8 0x0005E76F15AD2148 // 2^(29/64)
-data8 0x0006247EB03A5585 // 2^(30/64)
-data8 0x0006623882552225 // 2^(31/64)
-data8 0x0006A09E667F3BCD // 2^(32/64)
-data8 0x0006DFB23C651A2F // 2^(33/64)
-data8 0x00071F75E8EC5F74 // 2^(34/64)
-data8 0x00075FEB564267C9 // 2^(35/64)
-data8 0x0007A11473EB0187 // 2^(36/64)
-data8 0x0007E2F336CF4E62 // 2^(37/64)
-data8 0x00082589994CCE13 // 2^(38/64)
-data8 0x000868D99B4492ED // 2^(39/64)
-data8 0x0008ACE5422AA0DB // 2^(40/64)
-data8 0x0008F1AE99157736 // 2^(41/64)
-data8 0x00093737B0CDC5E5 // 2^(42/64)
-data8 0x00097D829FDE4E50 // 2^(43/64)
-data8 0x0009C49182A3F090 // 2^(44/64)
-data8 0x000A0C667B5DE565 // 2^(45/64)
-data8 0x000A5503B23E255D // 2^(46/64)
-data8 0x000A9E6B5579FDBF // 2^(47/64)
-data8 0x000AE89F995AD3AD // 2^(48/64)
-data8 0x000B33A2B84F15FB // 2^(49/64)
-data8 0x000B7F76F2FB5E47 // 2^(50/64)
-data8 0x000BCC1E904BC1D2 // 2^(51/64)
-data8 0x000C199BDD85529C // 2^(52/64)
-data8 0x000C67F12E57D14B // 2^(53/64)
-data8 0x000CB720DCEF9069 // 2^(54/64)
-data8 0x000D072D4A07897C // 2^(55/64)
-data8 0x000D5818DCFBA487 // 2^(56/64)
-data8 0x000DA9E603DB3285 // 2^(57/64)
-data8 0x000DFC97337B9B5F // 2^(58/64)
-data8 0x000E502EE78B3FF6 // 2^(59/64)
-data8 0x000EA4AFA2A490DA // 2^(60/64)
-data8 0x000EFA1BEE615A27 // 2^(61/64)
-data8 0x000F50765B6E4540 // 2^(62/64)
-data8 0x000FA7C1819E90D8 // 2^(63/64)
-LOCAL_OBJECT_END(_expf_table)
+// expf overflows                       when x > 42b17218 = 88.7228
+// expf returns largest single denormal when x = c2aeac50
+// expf goes to zero when                    x < c2cff1b5 
+
+// Consider range of 8-bit two's complement, -128 ---> 127
+// Add 128; range becomes                       0 ---> 255
+
+// The number (=i) in 0 ---> 255 is used as offset into two tables.
+
+// i = abcd efgh = abcd * 16 + efgh = i1 * 16 + i2
+
+// i1 = (exp_GR_Mint + 128)  & 0xf0 (show 0xf0 as -0x10 to avoid assembler error)
+//                                  (The immediate in the AND is an 8-bit two's complement)
+// i1 = i1 + start of T1 table (EXP_AD_T1)
+//    Note that the entries in T1 are double-extended numbers on 16-byte boundaries
+//    and that i1 is already shifted left by 16 after the AND.
+
+// i2 must be shifted left by 4 before adding to the start of the table.
+// i2 = ((exp_GR_Mint + 128)  & 0x0f) << 4
+// i2 = i2 + start of T2 table (EXP_AD_T2)
+
+// T      = T1 * T2
+// A      = T * (1 + R)
+// answer = T *  (R^2 * p1 + R^3 * p2 + R^4 * p3 + R^5 * p4 + R^6 * p5 + R^7 * p6) +
+//          T *  (1 + R)
+//        = T * exp(R)
+
 
+.global expf#
 
 .section .text
-GLOBAL_IEEE754_ENTRY(expf)
-      
-{ .mlx
-      addl            rTblAddr = @ltoff(_expf_table),gp
-      movl            r64DivLn2 = 0x40571547652B82FE // 64/ln(2)
+.proc  expf#
+.align 32
+expf:
+#ifdef _LIBC
+.global __ieee754_expf#
+__ieee754_expf:
+#endif
+
+{ .mfi
+     alloc      r32            = ar.pfs,1,26,4,0
+     fcvt.fx.s1   exp_Mx       =    f8
+     mov       exp_GR_17ones   =    0x1FFFF
 }
 { .mlx
-      addl            rA3 = 0x3E2AA, r0 // high bits of 1.0/6.0 rounded to SP
-      movl            rRightShifter = 0x43E8000000000000 // DP Right Shifter
+     addl      EXP_AD_P_1      =    @ltoff(exp_coeff_1_table),gp
+     movl      exp_GR_min_oflow = 0x42b17218    
 }
 ;;
 
+// Fnorm done to take any enabled faults
 { .mfi
-      // point to the beginning of the table
-      ld8             rTblAddr = [rTblAddr]
-      fclass.m        p14, p0 = f8, 0x22    // test for -INF
-      shl             rA3 = rA3, 12  // 0x3E2AA000, approx to 1.0/6.0 in SP
+     ld8       EXP_AD_P_1      =  [EXP_AD_P_1]
+     fclass.m  p6,p0      = f8, 0x07	//@zero
+     nop.i 999
 }
 { .mfi
-      nop.m           0
-      fnorm.s1        fNormX = f8           // normalized x
-      addl            rExpHalf = 0xFFFE, r0 // exponent of 1/2
+     add       exp_GR_max_norm = -1, exp_GR_min_oflow  // 0x42b17217
+     fnorm     exp_norm_f8     =    f8
+     nop.i 999
 }
 ;;
 
 { .mfi
-      setf.d          f64DivLn2 = r64DivLn2 // load 64/ln(2) to FP reg
-      fclass.m        p15, p0 = f8, 0x1e1   // test for NaT,NaN,+Inf
-      nop.i           0
+     setf.s    EXP_MIN_SGL_OFLOW_ARG = exp_GR_min_oflow  // 0x42b17218
+     fclass.m  p7,p0      = f8, 0x22	// Test for x=-inf
+     mov       exp_GR_0xf0 = 0x0f0
 }
 { .mlx
-      // load Right Shifter to FP reg
-      setf.d          fRightShifter = rRightShifter
-      movl            rLn2Div64 = 0x3F862E42FEFA39EF // DP ln(2)/64 in GR
+     setf.s    EXP_MAX_SGL_NORM_ARG = exp_GR_max_norm
+     movl      exp_GR_max_zero = 0xc2cff1b5    
 }
 ;;
 
-{ .mfi
-      nop.m           0
-      fcmp.eq.s1      p13, p0 = f0, f8      // test for x = 0.0
-      nop.i           0
+
+{ .mlx
+     mov       exp_GR_0x0f = 0x00f
+     movl      exp_GR_max_uflow = 0xc2aeac50    
 }
 { .mfb
-      setf.s          fA3 = rA3             // load A3 to FP reg
-(p14) fma.s.s0        f8 = f0, f1, f0       // result if x = -inf
-(p14) br.ret.spnt     b0                    // exit here if x = -inf
+     nop.m 999
+(p6) fma.s     f8 = f1,f1,f0
+(p6) br.ret.spnt   b0        // quick exit for x=0
 }
 ;;
 
 { .mfi
-      setf.exp        fA2 = rExpHalf        // load A2 to FP reg
-      fcmp.eq.s0      p6, p0 = f8, f0       // Dummy to flag denorm
-      nop.i           0
+     setf.s    EXP_MAX_SGL_ZERO_ARG = exp_GR_max_zero
+     fclass.m  p8,p0      = f8, 0x21	// Test for x=+inf
+     adds      exp_GR_min_norm = 1, exp_GR_max_uflow  // 0xc2aeac51
 }
 { .mfb
-      setf.d          fLn2Div64 = rLn2Div64 // load ln(2)/64 to FP reg
-(p15) fma.s.s0        f8 = f8, f1, f0       // result if x = NaT,NaN,+Inf
-(p15) br.ret.spnt     b0                    // exit here if x = NaT,NaN,+Inf
+     ldfpd     exp_coeff_P5,exp_coeff_P6     =    [EXP_AD_P_1],16
+(p7) fma.s     f8 = f0,f0,f0
+(p7) br.ret.spnt   b0        // quick exit for x=-inf
 }
 ;;
 
-{ .mfb
-      // overflow and underflow_zero threshold
-      ldfps           fMIN_SGL_OFLOW_ARG, fMAX_SGL_ZERO_ARG = [rTblAddr], 8
-(p13) fma.s.s0        f8 = f1, f1, f0       // result if x = 0.0
-(p13) br.ret.spnt     b0                    // exit here if x =0.0
+{ .mmf
+     ldfpd     exp_coeff_P1,exp_coeff_P2     =    [EXP_AD_P_1],16
+     setf.s    EXP_MAX_SGL_UFLOW_ARG = exp_GR_max_uflow
+     fclass.m  p9,p0      = f8, 0xc3	// Test for x=nan
 }
 ;;
 
-      // max normal and underflow_denorm threshold
-{ .mfi
-      ldfps           fMAX_SGL_NORM_ARG, fMIN_SGL_NORM_ARG = [rTblAddr], 8
-      nop.f           0
-      nop.i           0
+{ .mmb
+     ldfpd     exp_coeff_P3,exp_coeff_P4     =    [EXP_AD_P_1],16
+     setf.s    EXP_MIN_SGL_NORM_ARG = exp_GR_min_norm
+(p8) br.ret.spnt   b0        // quick exit for x=+inf
 }
 ;;
 
+// EXP_AD_P_1 now points to exp_T2_table
 { .mfi
-      nop.m           0
-      // x*(64/ln(2)) + Right Shifter
-      fma.s1          fNint = fNormX, f64DivLn2, fRightShifter
-      nop.i           0
+     mov exp_GR_T2_size           = 0x100
+     fcvt.xf   exp_Mfloat     =    exp_Mx
+     nop.i 999
+}
+;;
+
+{ .mfb
+     getf.sig  exp_GR_Mint    =    exp_Mx
+(p9) fmerge.s     f8 = exp_norm_f8, exp_norm_f8
+(p9) br.ret.spnt   b0        // quick exit for x=nan
 }
 ;;
 
+{ .mmi
+     nop.m 999
+     mov      EXP_AD_T2       =  EXP_AD_P_1
+     add      EXP_AD_T1       =  exp_GR_T2_size,EXP_AD_P_1 ;;
+}
+
+
+{ .mmi
+     adds      exp_GR_Mint_p_128   =    0x80,exp_GR_Mint ;;
+     and       exp_GR_Ind1      =    exp_GR_Mint_p_128, exp_GR_0xf0
+     and       exp_GR_Ind2      =    exp_GR_Mint_p_128, exp_GR_0x0f ;;
+}
+
 // Divide arguments into the following categories:
-//  Certain Underflow       p11 - -inf < x <= MAX_SGL_ZERO_ARG
-//  Possible Underflow      p13 - MAX_SGL_ZERO_ARG < x < MIN_SGL_NORM_ARG
+//  Certain Underflow/zero  p11 - -inf < x <= MAX_SGL_ZERO_ARG 
+//  Certain Underflow       p12 - MAX_SGL_ZERO_ARG < x <= MAX_SGL_UFLOW_ARG 
+//  Possible Underflow      p13 - MAX_SGL_UFLOW_ARG < x < MIN_SGL_NORM_ARG
 //  Certain Safe                - MIN_SGL_NORM_ARG <= x <= MAX_SGL_NORM_ARG
 //  Possible Overflow       p14 - MAX_SGL_NORM_ARG < x < MIN_SGL_OFLOW_ARG
 //  Certain Overflow        p15 - MIN_SGL_OFLOW_ARG <= x < +inf
 //
-// If the input is really a single arg, then there will never be
-// "Possible Overflow" arguments.
+// If the input is really a single arg, then there will never be "Possible
+// Underflow" or "Possible Overflow" arguments.
 //
 
 { .mfi
-      nop.m           0
-      // check for overflow
-      fcmp.ge.s1      p15, p0 = fNormX, fMIN_SGL_OFLOW_ARG
-      nop.i           0
+     add       EXP_AD_M1 =    exp_GR_Ind1,EXP_AD_T1
+     fcmp.ge.s1  p15,p14 = exp_norm_f8,EXP_MIN_SGL_OFLOW_ARG
+     nop.i 999
+}
+{ .mfi
+     shladd       EXP_AD_M2                =    exp_GR_Ind2,4,EXP_AD_T2
+     fms.s1    exp_R                    =    f1,f8,exp_Mfloat
+     nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m           0
-      // check for underflow and tiny (+0) result
-      fcmp.le.s1      p11, p0 = fNormX, fMAX_SGL_ZERO_ARG
-      nop.i           0
+     ldfe           exp_T1    =    [EXP_AD_M1]
+     fcmp.le.s1  p11,p12 = exp_norm_f8,EXP_MAX_SGL_ZERO_ARG
+     nop.i 999 ;;
 }
+
 { .mfb
-      nop.m           0
-      fms.s1          fN = fNint, f1, fRightShifter // n in FP register
-      // branch out if overflow
-(p15) br.cond.spnt    EXP_CERTAIN_OVERFLOW
+      ldfe           exp_T2   =    [EXP_AD_M2]
+(p14) fcmp.gt.s1  p14,p0 = exp_norm_f8,EXP_MAX_SGL_NORM_ARG
+(p15) br.cond.spnt L(EXP_CERTAIN_OVERFLOW) ;;
 }
-;;
 
 { .mfb
-      getf.sig        rNJ = fNint           // bits of n, j
-      // check for underflow and deno result
-      fcmp.lt.s1      p13, p0 = fNormX, fMIN_SGL_NORM_ARG
-      // branch out if underflow and tiny (+0) result
-(p11) br.cond.spnt    EXP_CERTAIN_UNDERFLOW
+      nop.m 999
+(p12) fcmp.le.s1  p12,p0 = exp_norm_f8,EXP_MAX_SGL_UFLOW_ARG
+(p11) br.cond.spnt L(EXP_CERTAIN_UNDERFLOW_ZERO)
+}
+;;
+
+{ .mfi
+      nop.m 999
+(p13) fcmp.lt.s1  p13,p0 = exp_norm_f8,EXP_MIN_SGL_NORM_ARG
+      nop.i 999
 }
 ;;
 
+
 { .mfi
-      nop.m           0
-      // check for possible overflow
-      fcmp.gt.s1      p14, p0 = fNormX, fMAX_SGL_NORM_ARG
-      extr.u          rJ = rNJ, 0, 6        // bits of j
+     nop.m                 999
+     fma.s1    exp_Rsq   =    exp_R,exp_R,f0
+     nop.i                 999
 }
 { .mfi
-      addl            rN = 0xFFFF - 63, rNJ // biased and shifted n
-      fnma.s1         fR = fLn2Div64, fN, fNormX // R = x - N*ln(2)/64
-      nop.i           0
+     nop.m                 999
+     fma.s1    exp_P3    =    exp_R,exp_coeff_P2,exp_coeff_P1
+     nop.i                 999 
 }
 ;;
 
 { .mfi
-      shladd          rJ = rJ, 3, rTblAddr  // address in the 2^(j/64) table
-      nop.f           0
-      shr             rN = rN, 6            // biased n
+     nop.m                 999
+     fma.s1    exp_P1    =    exp_R,exp_coeff_P6,exp_coeff_P5
+     nop.i                 999 
+}
+{ .mfi
+     nop.m                 999
+     fma.s1    exp_P2    =    exp_R,exp_coeff_P4,exp_coeff_P3
+     nop.i                 999
 }
 ;;
 
+
 { .mfi
-      ld8             rJ = [rJ]
-      nop.f           0
-      shl             rN = rN, 52           // 2^n bits in DP format
+     nop.m                 999
+     fma.s1    exp_P7    =    f1,exp_R,f1
+     nop.i                 999
 }
 ;;
 
+
+{ .mfi
+     nop.m                 999
+     fma.s1    exp_P5    =    exp_Rsq,exp_P3,f0
+     nop.i                 999
+}
 { .mfi
-      or              rN = rN, rJ // bits of 2^n * 2^(j/64) in DP format
-      nop.f           0
-      nop.i           0
+     nop.m                 999
+     fma.s1    exp_R4    =    exp_Rsq,exp_Rsq,f0
+     nop.i                 999 
 }
 ;;
 
 { .mfi
-      setf.d          fT = rN               // 2^n * 2^(j/64)
-      fma.s1          fP = fA3, fR, fA2     // A3*R + A2
-      nop.i           0
+     nop.m                 999
+     fma.s1    exp_T     =    exp_T1,exp_T2,f0
+     nop.i                 999 
 }
 { .mfi
-      nop.m           0
-      fma.s1          fRSqr = fR, fR, f0    // R^2
-      nop.i           0
+     nop.m                 999
+     fma.s1    exp_P4    =    exp_Rsq,exp_P1,exp_P2
+     nop.i                 999 
 }
 ;;
 
 { .mfi
-      nop.m           0
-      fma.s1          fP = fP, fRSqr, fR    // P = (A3*R + A2)*R^2 + R
-      nop.i           0
+     nop.m                 999
+     fma.s1    exp_A     =    exp_T,exp_P7,f0
+     nop.i                 999
+}
+{ .mfi
+     nop.m                 999
+     fma.s1    exp_P6    =    exp_R4,exp_P4,exp_P5
+     nop.i                 999
 }
 ;;
 
-{ .mbb
-      nop.m           0
-      // branch out if possible underflow
-(p13) br.cond.spnt    EXP_POSSIBLE_UNDERFLOW
-      // branch out if possible overflow result
-(p14) br.cond.spnt    EXP_POSSIBLE_OVERFLOW
+{ .bbb
+(p12) br.cond.spnt L(EXP_CERTAIN_UNDERFLOW)
+(p13) br.cond.spnt L(EXP_POSSIBLE_UNDERFLOW)
+(p14) br.cond.spnt L(EXP_POSSIBLE_OVERFLOW)
 }
 ;;
 
 { .mfb
-      nop.m           0
-      // final result in the absence of over- and underflow
-      fma.s.s0        f8 = fP, fT, fT
-      // exit here in the absence of over- and underflow
-      br.ret.sptk     b0
+     nop.m            999
+     fma.s     f8   =    exp_T,exp_P6,exp_A
+     br.ret.sptk     b0
 }
 ;;
 
-EXP_POSSIBLE_OVERFLOW:
+L(EXP_POSSIBLE_OVERFLOW):
+
+// We got an answer. EXP_MAX_SGL_NORM_ARG < x < EXP_MIN_SGL_OFLOW_ARG
+// overflow is a possibility, not a certainty
+// Set wre in s2 and perform the last operation with s2
+
+// We define an overflow when the answer with
+//    WRE set
+//    user-defined rounding mode
+// is lsn +1
+
+// Is the exponent 1 more than the largest single?
+// If so, go to ERROR RETURN, else (no overflow) get the answer and
+// leave.
+
+// Largest single is FE (biased single)
+//                   FE - 7F + FFFF = 1007E
 
-// Here if fMAX_SGL_NORM_ARG < x < fMIN_SGL_OFLOW_ARG
-// This cannot happen if input is a single, only if input higher precision.
-// Overflow is a possibility, not a certainty.
+// Create + largest_single_plus_ulp
+// Create - largest_single_plus_ulp
 
-// Recompute result using status field 2 with user's rounding mode,
-// and wre set.  If result is larger than largest single, then we have
-// overflow
+// Calculate answer with WRE set.
+
+// Cases when answer is lsn+1  are as follows:
+
+//           midpoint
+//              |
+//  lsn         |         lsn+1
+// --+----------|----------+------------
+//              |
+//    +inf          +inf      -inf
+//                  RN         RN
+//                             RZ
+// exp_gt_pln contains the floating point number lsn+1.
+// The setf.exp puts 0x1007f in the exponent and 0x800... in the significand.
+
+// If the answer is >= lsn+1, we have overflowed.
+// Then p6 is TRUE. Set the overflow tag, save input in FR_X,
+// do the final calculation for IEEE result, and branch to error return.
 
 { .mfi
-      mov             rGt_ln  = 0x1007f // Exponent for largest single + 1 ulp
-      fsetc.s2        0x7F,0x42         // Get user's round mode, set wre
-      nop.i           0
+       mov         exp_GR_gt_ln    = 0x1007F 
+       fsetc.s2    0x7F,0x42
+       nop.i 999
 }
 ;;
 
 { .mfi
-      setf.exp        fGt_pln = rGt_ln  // Create largest single + 1 ulp
-      fma.s.s2        fWre_urm_f8 = fP, fT, fT    // Result with wre set
-      nop.i           0
+       setf.exp      exp_gt_pln    = exp_GR_gt_ln
+       fma.s.s2    exp_wre_urm_f8  = exp_T,  exp_P6, exp_A
+       nop.i 999
 }
 ;;
 
 { .mfi
-      nop.m           0
-      fsetc.s2        0x7F,0x40                   // Turn off wre in sf2
-      nop.i           0
+       nop.m 999
+       fsetc.s2 0x7F,0x40
+       nop.i 999
 }
 ;;
 
 { .mfi
-      nop.m           0
-      fcmp.ge.s1      p6, p0 =  fWre_urm_f8, fGt_pln // Test for overflow
-      nop.i           0
+       nop.m 999
+       fcmp.ge.unc.s1 p6, p0       =  exp_wre_urm_f8, exp_gt_pln
+       nop.i 999
 }
 ;;
 
 { .mfb
-      nop.m           0
-      nop.f           0
-(p6)  br.cond.spnt    EXP_CERTAIN_OVERFLOW // Branch if overflow
+       nop.m 999
+       nop.f 999
+(p6)   br.cond.spnt L(EXP_CERTAIN_OVERFLOW)  // Branch if really overflow
 }
 ;;
 
 { .mfb
-      nop.m           0
-      fma.s.s0        f8 = fP, fT, fT
-      br.ret.sptk     b0                     // Exit if really no overflow
+       nop.m 999
+       fma.s        f8             = exp_T,  exp_P6, exp_A
+       br.ret.sptk     b0                 // Exit if really no overflow
 }
 ;;
 
-// here if overflow
-EXP_CERTAIN_OVERFLOW:
+L(EXP_CERTAIN_OVERFLOW):
 { .mmi
-      addl            r17ones_m1 = 0x1FFFE, r0
-;;
-      setf.exp        fTmp = r17ones_m1
-      nop.i           0
+      sub   exp_GR_17ones_m1 = exp_GR_17ones, r0, 1 ;;
+      setf.exp     f9 = exp_GR_17ones_m1
+      nop.i 999 ;;
 }
-;;
 
 { .mfi
-      alloc           r32=ar.pfs,0,3,4,0
-      fmerge.s        FR_X = f8,f8
-      nop.i           0
+      nop.m 999
+      fmerge.s FR_X = f8,f8
+      nop.i 999
 }
 { .mfb
-      mov             GR_Parameter_TAG = 16
-      fma.s.s0        FR_RESULT = fTmp, fTmp, f0 // Set I,O and +INF result
-      br.cond.sptk    __libm_error_region
+      mov        GR_Parameter_TAG = 16
+      fma.s       FR_RESULT = f9, f9, f0    // Set I,O and +INF result
+      br.cond.sptk  __libm_error_region ;;                             
 }
-;;
 
-EXP_POSSIBLE_UNDERFLOW:
+L(EXP_POSSIBLE_UNDERFLOW): 
 
-// Here if fMAX_SGL_ZERO_ARG < x < fMIN_SGL_NORM_ARG
-// Underflow is a possibility, not a certainty
+// We got an answer. EXP_MAX_SGL_UFLOW_ARG < x < EXP_MIN_SGL_NORM_ARG
+// underflow is a possibility, not a certainty
 
 // We define an underflow when the answer with
 //    ftz set
@@ -549,157 +637,144 @@ EXP_POSSIBLE_UNDERFLOW:
 //                           E
 // -----+--------------------+--------------------+-----
 //      |                    |                    |
-//   1.1...10 2^-3fff    1.1...11 2^-3fff    1.0...00 2^-3ffe
-//   0.1...11 2^-3ffe                                   (biased, 1)
+//   1.1...10 2^-7f      1.1...11 2^-7f      1.0...00 2^-7e  
+//   0.1...11 2^-7e                                     (biased, 1)
 //    largest dn                               smallest normal
 
-{ .mfi
-      nop.m           0
-      fsetc.s2        0x7F,0x41                // Get user's round mode, set ftz
-      nop.i           0
-}
-;;
+// If the answer is = 0, we have underflowed.
+// Then p6 is TRUE. Set the underflow tag, save input in FR_X,
+// do the final calculation for IEEE result, and branch to error return.
 
 { .mfi
-      nop.m           0
-      fma.s.s2        fFtz_urm_f8 = fP, fT, fT // Result with ftz set
-      nop.i           0
+       nop.m 999
+       fsetc.s2 0x7F,0x41
+       nop.i 999
 }
 ;;
 
 { .mfi
-      nop.m           0
-      fsetc.s2        0x7F,0x40                // Turn off ftz in sf2
-      nop.i           0
+       nop.m 999
+       fma.s.s2     exp_ftz_urm_f8  = exp_T,  exp_P6, exp_A
+       nop.i 999
 }
 ;;
 
+
 { .mfi
-      nop.m           0
-      fcmp.eq.s1      p6, p7 = fFtz_urm_f8, f0 // Test for underflow
-      nop.i           0
-}
-{ .mfi
-      nop.m           0
-      fma.s.s0        f8 = fP, fT, fT          // Compute result, set I, maybe U
-      nop.i           0
+       nop.m 999
+       fsetc.s2 0x7F,0x40
+       nop.i 999
 }
 ;;
 
-{ .mbb
-      nop.m           0
-(p6)  br.cond.spnt    EXP_UNDERFLOW_COMMON     // Branch if really underflow
-(p7)  br.ret.sptk     b0                       // Exit if really no underflow
+{ .mfi
+       nop.m 999
+       fcmp.eq.unc.s1 p6, p0     =  exp_ftz_urm_f8, f0
+       nop.i 999
 }
 ;;
 
-EXP_CERTAIN_UNDERFLOW:
-// Here if  x < fMAX_SGL_ZERO_ARG
-// Result will be zero (or smallest denorm if round to +inf) with I, U set
-{ .mmi
-      mov             rTmp = 1
-;;
-      setf.exp        fTmp = rTmp               // Form small normal
-      nop.i           0
+{ .mfb
+       nop.m 999
+       nop.f 999
+(p6)   br.cond.spnt L(EXP_CERTAIN_UNDERFLOW)  // Branch if really underflow 
 }
 ;;
 
 { .mfb
-      nop.m           0
-      fma.s.s0        f8 = fTmp, fTmp, f0 // Set I,U, tiny (+0.0) result
-      br.cond.sptk    EXP_UNDERFLOW_COMMON
+       nop.m 999
+       fma.s        f8             = exp_T,  exp_P6, exp_A
+       br.ret.sptk     b0                  // Exit if really no underflow
 }
 ;;
 
-EXP_UNDERFLOW_COMMON:
-// Determine if underflow result is zero or nonzero
+L(EXP_CERTAIN_UNDERFLOW):
 { .mfi
-      alloc           r32=ar.pfs,0,3,4,0
-      fcmp.eq.s1      p6, p0 =  f8, f0
-      nop.i           0
+      nop.m 999
+      fmerge.s FR_X = f8,f8
+      nop.i 999
 }
-;;
-
 { .mfb
-      nop.m           0
-      fmerge.s        FR_X = fNormX,fNormX
-(p6)  br.cond.spnt    EXP_UNDERFLOW_ZERO
+      mov        GR_Parameter_TAG = 17
+      fma.s       FR_RESULT  = exp_T, exp_P6, exp_A // Set I,U and tiny result
+      br.cond.sptk  __libm_error_region ;;                             
 }
-;;
 
-EXP_UNDERFLOW_NONZERO:
-// Here if  x < fMIN_SGL_NORM_ARG and result nonzero;
-// I, U are set
-{ .mfb
-      mov             GR_Parameter_TAG = 17
-      nop.f           0                         // FR_RESULT already set
-      br.cond.sptk    __libm_error_region
+L(EXP_CERTAIN_UNDERFLOW_ZERO):
+{ .mmi
+      mov   exp_GR_one = 1 ;;
+      setf.exp     f9 = exp_GR_one
+      nop.i 999 ;;
 }
-;;
 
-EXP_UNDERFLOW_ZERO:
-// Here if x < fMIN_SGL_NORM_ARG and result zero;
-// I, U are set
+{ .mfi
+      nop.m 999
+      fmerge.s FR_X = f8,f8
+      nop.i 999
+}
 { .mfb
-      mov             GR_Parameter_TAG = 17
-      nop.f           0                         // FR_RESULT already set
-      br.cond.sptk    __libm_error_region
+      mov        GR_Parameter_TAG = 17
+      fma.s       FR_RESULT = f9, f9, f0    // Set I,U and tiny (+0.0) result
+      br.cond.sptk  __libm_error_region ;;                             
 }
-;;
 
-GLOBAL_IEEE754_END(expf)
+.endp expf
+ASM_SIZE_DIRECTIVE(expf)
 
-LOCAL_LIBM_ENTRY(__libm_error_region)
+
+.proc __libm_error_region
+__libm_error_region:
 .prologue
 { .mfi
-      add   GR_Parameter_Y=-32,sp             // Parameter 2 value
-      nop.f 0
+        add   GR_Parameter_Y=-32,sp             // Parameter 2 value
+	nop.f 999
 .save   ar.pfs,GR_SAVE_PFS
-      mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
+        mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
 }
 { .mfi
 .fframe 64
-      add sp=-64,sp                           // Create new stack
-      nop.f 0
-      mov GR_SAVE_GP=gp                       // Save gp
+        add sp=-64,sp                           // Create new stack
+        nop.f 0
+        mov GR_SAVE_GP=gp                       // Save gp
 };;
 { .mmi
-      stfs [GR_Parameter_Y] = FR_Y,16         // Store Parameter 2 on stack
-      add GR_Parameter_X = 16,sp              // Parameter 1 address
+        stfs [GR_Parameter_Y] = FR_Y,16         // Store Parameter 2 on stack
+        add GR_Parameter_X = 16,sp              // Parameter 1 address
 .save   b0, GR_SAVE_B0
-      mov GR_SAVE_B0=b0                       // Save b0
+        mov GR_SAVE_B0=b0                       // Save b0
 };;
 .body
 { .mfi
-      stfs [GR_Parameter_X] = FR_X            // Store Parameter 1 on stack
-      nop.f 0
-      add   GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+        stfs [GR_Parameter_X] = FR_X            // Store Parameter 1 on stack
+        nop.f 0
+        add   GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
 }
 { .mib
-      stfs [GR_Parameter_Y] = FR_RESULT       // Store Parameter 3 on stack
-      add   GR_Parameter_Y = -16,GR_Parameter_Y
-      br.call.sptk b0=__libm_error_support#   // Call error handling function
+        stfs [GR_Parameter_Y] = FR_RESULT       // Store Parameter 3 on stack
+        add   GR_Parameter_Y = -16,GR_Parameter_Y
+        br.call.sptk b0=__libm_error_support#   // Call error handling function
 };;
 
 { .mmi
-      add   GR_Parameter_RESULT = 48,sp
-      nop.m 0
-      nop.i 0
+        nop.m 0
+        nop.m 0
+        add   GR_Parameter_RESULT = 48,sp
 };;
 
 { .mmi
-      ldfs  f8 = [GR_Parameter_RESULT]       // Get return result off stack
+        ldfs  f8 = [GR_Parameter_RESULT]       // Get return result off stack
 .restore sp
-      add   sp = 64,sp                       // Restore stack pointer
-      mov   b0 = GR_SAVE_B0                  // Restore return address
+        add   sp = 64,sp                       // Restore stack pointer
+        mov   b0 = GR_SAVE_B0                  // Restore return address
 };;
 { .mib
-      mov   gp = GR_SAVE_GP                  // Restore gp
-      mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
-      br.ret.sptk     b0                     // Return
-};;
+        mov   gp = GR_SAVE_GP                  // Restore gp 
+        mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
+        br.ret.sptk     b0                     // Return
+};; 
 
-LOCAL_LIBM_END(__libm_error_region)
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
 
 
 .type   __libm_error_support#,@function
diff --git a/sysdeps/ia64/fpu/e_fmod.S b/sysdeps/ia64/fpu/e_fmod.S
index d801e0c128..2b3ee9610f 100644
--- a/sysdeps/ia64/fpu/e_fmod.S
+++ b/sysdeps/ia64/fpu/e_fmod.S
@@ -1,10 +1,11 @@
 .file "fmod.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
 //
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
+// Bob Norin, Shane Story, and Ping Tak Peter Tang of the Computational
+// Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,7 +21,7 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
+//
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,42 +36,38 @@
 //
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// http://developer.intel.com/opensource.
 //
 // History
 //====================================================================
-// 02/02/00 Initial version
-// 03/02/00 New Algorithm
-// 04/04/00 Unwind support added
-// 08/15/00 Bundle added after call to __libm_error_support to properly
+// 2/02/00  Initial version
+// 3/02/00  New Algorithm
+// 4/04/00  Unwind support added
+// 8/15/00  Bundle added after call to __libm_error_support to properly
 //          set [the previously overwritten] GR_Parameter_RESULT.
-// 11/28/00 Set FR_Y to f9
-// 03/11/02 Fixed flags for fmod(qnan,zero)
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 02/10/03 Reordered header: .section, .global, .proc, .align
-// 04/28/03 Fix: fmod(sNaN,0) no longer sets errno
+//11/28/00  Set FR_Y to f9
 //
 // API
 //====================================================================
-// double fmod(double,double);
+// double fmod(double,double);   
 //
 // Overview of operation
 //====================================================================
 //  fmod(a,b)=a-i*b,
-//  where i is an integer such that, if b!=0,
+//  where i is an integer such that, if b!=0, 
 //  |i|<|a/b| and |a/b-i|<1
 //
 // Algorithm
 //====================================================================
 // a). if |a|<|b|, return a
-// b). get quotient and reciprocal overestimates accurate to
+// b). get quotient and reciprocal overestimates accurate to 
 //     33 bits (q2,y2)
 // c). if the exponent difference (exponent(a)-exponent(b))
 //     is less than 32, truncate quotient to integer and
 //     finish in one iteration
 // d). if exponent(a)-exponent(b)>=32 (q2>=2^32)
 //     round quotient estimate to single precision (k=RN(q2)),
-//     calculate partial remainder (a'=a-k*b),
+//     calculate partial remainder (a'=a-k*b), 
 //     get quotient estimate (a'*y2), and repeat from c).
 //
 // Special cases
@@ -84,9 +81,14 @@
 // General registers:   r2,r29,r32 (ar.pfs), r33-r39
 // Floating point registers: f6-f15
 
+#include "libm_support.h"
+
+.section .text
+
+
 GR_SAVE_B0                    = r33
 GR_SAVE_PFS                   = r34
-GR_SAVE_GP                    = r35
+GR_SAVE_GP                    = r35 
 GR_SAVE_SP                    = r36
 
 GR_Parameter_X                = r37
@@ -99,9 +101,17 @@ FR_Y             = f9
 FR_RESULT        = f8
 
 
-.section .text
-GLOBAL_IEEE754_ENTRY(fmod)
+.proc fmod#
+.align 32
+.global fmod#
+.align 32
 
+fmod:
+#ifdef _LIBC
+.global __ieee754_fmod
+.type __ieee754_fmod,@function
+__ieee754_fmod:
+#endif
 // inputs in f8, f9
 // result in f8
 
@@ -123,12 +133,12 @@ GLOBAL_IEEE754_ENTRY(fmod)
   // (1) y0
   frcpa.s1 f10,p6=f6,f7
   nop.i 0
-}
+} 
 
 // Y +-NAN, +-inf, +-0?     p7
 { .mfi
       nop.m 999
-      fclass.m.unc  p7,p0 = f9, 0xe7
+(p0)  fclass.m.unc  p7,p0 = f9, 0xe7           
       nop.i 999;;
 }
 
@@ -139,14 +149,14 @@ GLOBAL_IEEE754_ENTRY(fmod)
 
 { .mfi
       nop.m 999
-      fclass.m.unc  p9,p0 = f8, 0xe3
-      nop.i 999
+(p0)  fclass.m.unc  p9,p0 = f8, 0xe3           
+      nop.i 999 
 }
 
 // |x| < |y|? Return x p8
 { .mfi
       nop.m 999
-      fcmp.lt.unc.s1 p8,p0 = f6,f7
+(p0)  fcmp.lt.unc.s1 p8,p0 = f6,f7             
       nop.i 999 ;;
 }
 
@@ -162,33 +172,33 @@ GLOBAL_IEEE754_ENTRY(fmod)
   // (2) q0=a*y0
   (p6) fma.s1 f13=f6,f10,f0
   nop.i 0
-}
+} 
 { .mfi
   nop.m 0
   // (3) e0 = 1 - b * y0
   (p6) fnma.s1 f12=f7,f10,f1
   nop.i 0;;
-}
+} 
 
   {.mfi
   nop.m 0
   // normalize x (if |x|<|y|)
   (p8) fma.d.s0 f8=f8,f1,f0
   nop.i 0
-}
+} 
 {.bbb
-  (p9) br.cond.spnt FMOD_X_NAN_INF
-  (p7) br.cond.spnt FMOD_Y_NAN_INF_ZERO
+  (p9) br.cond.spnt L(FMOD_X_NAN_INF)
+  (p7) br.cond.spnt L(FMOD_Y_NAN_INF_ZERO)
   // if |x|<|y|, return
   (p8) br.ret.spnt    b0;;
 }
 
-  {.mfi
+  {.mfi 
   nop.m 0
   // normalize x
   fma.s0 f6=f6,f1,f0
   nop.i 0
-}
+} 
 {.mfi
   nop.m 0
   // normalize y
@@ -202,45 +212,45 @@ GLOBAL_IEEE754_ENTRY(fmod)
   // (4) q1=q0+e0*q0
   (p6) fma.s1 f13=f12,f13,f13
   nop.i 0
-}
+} 
 { .mfi
   nop.m 0
   // (5) e1 = e0 * e0 + 2^-34
   (p6) fma.s1 f14=f12,f12,f11
   nop.i 0;;
-}
+} 
 {.mlx
   nop.m 0
   movl r2=0x33a00000;;
-}
+} 
 { .mfi
   nop.m 0
   // (6) y1 = y0 + e0 * y0
   (p6) fma.s1 f10=f12,f10,f10
   nop.i 0;;
-}
+} 
 {.mfi
   // set f12=1.25*2^{-24}
   setf.s f12=r2
   // (7) q2=q1+e1*q1
   (p6) fma.s1 f13=f13,f14,f13
   nop.i 0;;
-}
+} 
 {.mfi
   nop.m 0
   fmerge.s f9=f8,f9
   nop.i 0
-}
+} 
 { .mfi
   nop.m 0
   // (8) y2 = y1 + e1 * y1
   (p6) fma.s1 f10=f14,f10,f10
   // set p6=0, p10=0
   cmp.ne.and p6,p10=r0,r0;;
-}
+} 
 
 .align 32
-loop53:
+L(loop53):
   {.mfi
   nop.m 0
   // compare q2, 2^32
@@ -270,7 +280,7 @@ loop53:
   // normalize truncated quotient
   (p8) fcvt.xf f13=f11
   nop.i 0;;
-}
+}  
   { .mfi
   nop.m 0
   // calculate remainder (assuming f13=RZ(Q))
@@ -279,7 +289,7 @@ loop53:
 }
   {.mfi
   nop.m 0
-  // also if exponent>32, round quotient to single precision
+  // also if exponent>32, round quotient to single precision 
   // and subtract 1 ulp: q=q-q*(1.25*2^{-24})
   (p7) fnma.s.s1 f11=f13,f12,f13
   nop.i 0;;
@@ -322,7 +332,7 @@ loop53:
 .pred.rel "mutex",p6,p10
   {.mfb
   nop.m 0
-  // add b to estimated remainder (to cover the case when the quotient was overestimated)
+  // add b to estimated remainder (to cover the case when the quotient was overestimated) 
   // also set correct sign by using f9=|b|*sgn(a), f12=sgn(a)
   (p6) fma.d.s0 f8=f11,f12,f9
   nop.b 0
@@ -344,114 +354,97 @@ loop53:
   nop.m 0
   // if f14 was RZ(Q), set remainder to f14
   (p9) mov f6=f14
-  br.cond.sptk loop53;;
+  br.cond.sptk L(loop53);;
 }
 
 
 
-FMOD_X_NAN_INF:
+L(FMOD_X_NAN_INF): 
 
 // Y zero ?
-{.mfi
-  nop.m 0
-  fclass.m p10,p0=f8,0xc3     // Test x=nan
-  nop.i 0
-}
-{.mfi
+{.mfi 
   nop.m 0
   fma.s1 f10=f9,f1,f0
   nop.i 0;;
 }
-
 {.mfi
-  nop.m 0
-  fma.s0 f8=f8,f1,f0
-  nop.i 0
-}
-{.mfi
-  nop.m 0
-(p10) fclass.m p10,p0=f9,0x07 // Test x=nan, and y=zero
-  nop.i 0;;
-}
-
-{.mfb
  nop.m 0
  fcmp.eq.unc.s1 p11,p0=f10,f0
-(p10) br.ret.spnt b0;;        // Exit with result=x if x=nan and y=zero
+ nop.i 0;;
 }
 {.mib
   nop.m 0
   nop.i 0
   // if Y zero
-  (p11) br.cond.spnt FMOD_Y_ZERO;;
+  (p11) br.cond.spnt L(FMOD_Y_ZERO);;                        
 }
 
 // X infinity? Return QNAN indefinite
 { .mfi
       nop.m 999
-      fclass.m.unc  p8,p9 = f8, 0x23
-      nop.i 999;;
+(p0)  fclass.m.unc  p8,p9 = f8, 0x23 
+      nop.i 999;; 
 }
 // Y NaN ?
 {.mfi
-     nop.m 999
+	 nop.m 999
 (p8) fclass.m p9,p8=f9,0xc3
-     nop.i 0;;
+	 nop.i 0;;
 }
 {.mfi
-      nop.m 999
-(p8)  frcpa.s0 f8,p0 = f8,f8
+	  nop.m 999
+(p8)  frcpa.s0 f8,p0 = f8,f8           
       nop.i 0
-}
+} 
 { .mfi
       nop.m 999
-    // also set Denormal flag if necessary
+	// also set Denormal flag if necessary
 (p8)  fma.s0 f9=f9,f1,f0
       nop.i 999 ;;
 }
 
 { .mfb
       nop.m 999
-(p8)  fma.d.s0 f8=f8,f1,f0
-      nop.b 999 ;;
+(p8)  fma.d f8=f8,f1,f0                     
+	  nop.b 999 ;;                        
 }
 
 { .mfb
       nop.m 999
-(p9)  frcpa.s0 f8,p7=f8,f9
-      br.ret.sptk   b0 ;;
+(p9)  frcpa.s0 f8,p7=f8,f9                     
+      br.ret.sptk   b0 ;;                        
 }
 
 
-FMOD_Y_NAN_INF_ZERO:
+L(FMOD_Y_NAN_INF_ZERO): 
 
 // Y INF
 { .mfi
       nop.m 999
-      fclass.m.unc  p7,p0 = f9, 0x23
+(p0)  fclass.m.unc  p7,p0 = f9, 0x23           
       nop.i 999 ;;
 }
 
 { .mfb
       nop.m 999
-(p7)  fma.d.s0 f8=f8,f1,f0
-(p7)  br.ret.spnt    b0 ;;
+(p7)  fma.d f8=f8,f1,f0                     
+(p7)  br.ret.spnt    b0 ;;                        
 }
 
 // Y NAN?
 { .mfi
       nop.m 999
-      fclass.m.unc  p9,p0 = f9, 0xc3
+(p0)  fclass.m.unc  p9,p0 = f9, 0xc3           
       nop.i 999 ;;
 }
 
 { .mfb
       nop.m 999
-(p9)  fma.d.s0 f8=f9,f1,f0
-(p9)  br.ret.spnt    b0 ;;
+(p9)  fma.d f8=f9,f1,f0                     
+(p9)  br.ret.spnt    b0 ;;                        
 }
 
-FMOD_Y_ZERO:
+L(FMOD_Y_ZERO):
 // Y zero? Must be zero at this point
 // because it is the only choice left.
 // Return QNAN indefinite
@@ -459,56 +452,60 @@ FMOD_Y_ZERO:
 {.mfi
   nop.m 0
   // set Invalid
-  frcpa.s0 f12,p0=f0,f0
+  frcpa f12,p0=f0,f0
   nop.i 0
 }
 // X NAN?
 { .mfi
       nop.m 999
-      fclass.m.unc  p9,p10 = f8, 0xc3
+(p0)  fclass.m.unc  p9,p10 = f8, 0xc3           
       nop.i 999 ;;
 }
 { .mfi
       nop.m 999
-(p10)  fclass.nm  p9,p10 = f8, 0xff
+(p10)  fclass.nm  p9,p10 = f8, 0xff           
       nop.i 999 ;;
 }
 
 {.mfi
  nop.m 999
- (p9) frcpa.s0 f11,p7=f8,f0
+ (p9) frcpa f11,p7=f8,f0
  nop.i 0;;
 }
 
 { .mfi
       nop.m 999
-(p10)  frcpa.s0         f11,p7 = f9,f9
-      mov        GR_Parameter_TAG = 121 ;;
+(p10)  frcpa         f11,p7 = f9,f9           
+(p0)  mov        GR_Parameter_TAG = 121 ;;                                 
 }
 
 { .mfi
       nop.m 999
-      fmerge.s      f10 = f8, f8
+(p0)  fmerge.s      f10 = f8, f8             
       nop.i 999
 }
 
 { .mfb
       nop.m 999
-      fma.d.s0 f8=f11,f1,f0
-      br.sptk __libm_error_region;;
+(p0)  fma.d f8=f11,f1,f0                     
+(p0)  br.sptk __libm_error_region;; 
 }
 
-GLOBAL_IEEE754_END(fmod)
-LOCAL_LIBM_ENTRY(__libm_error_region)
+.endp fmod
+ASM_SIZE_DIRECTIVE(fmod)
+ASM_SIZE_DIRECTIVE(__ieee754_fmod)
+
+.proc __libm_error_region
+__libm_error_region:
 .prologue
 { .mfi
         add   GR_Parameter_Y=-32,sp             // Parameter 2 value
         nop.f 0
 .save   ar.pfs,GR_SAVE_PFS
-        mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
+        mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs 
 }
 { .mfi
-.fframe 64
+.fframe 64 
         add sp=-64,sp                           // Create new stack
         nop.f 0
         mov GR_SAVE_GP=gp                       // Save gp
@@ -516,18 +513,18 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
 { .mmi
         stfd [GR_Parameter_Y] = FR_Y,16         // Save Parameter 2 on stack
         add GR_Parameter_X = 16,sp              // Parameter 1 address
-.save   b0, GR_SAVE_B0
-        mov GR_SAVE_B0=b0                       // Save b0
+.save   b0, GR_SAVE_B0                      
+        mov GR_SAVE_B0=b0                       // Save b0 
 };;
 .body
 { .mib
-        stfd [GR_Parameter_X] = FR_X            // Store Parameter 1 on stack
-        add   GR_Parameter_RESULT = 0,GR_Parameter_Y
-    nop.b 0                                 // Parameter 3 address
+        stfd [GR_Parameter_X] = FR_X            // Store Parameter 1 on stack 
+        add   GR_Parameter_RESULT = 0,GR_Parameter_Y  
+	nop.b 0                                 // Parameter 3 address
 }
 { .mib
         stfd [GR_Parameter_Y] = FR_RESULT      // Store Parameter 3 on stack
-        add   GR_Parameter_Y = -16,GR_Parameter_Y
+        add   GR_Parameter_Y = -16,GR_Parameter_Y  
         br.call.sptk b0=__libm_error_support#  // Call error handling function
 };;
 { .mmi
@@ -542,17 +539,13 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
         mov   b0 = GR_SAVE_B0                  // Restore return address
 };;
 { .mib
-        mov   gp = GR_SAVE_GP                  // Restore gp
+        mov   gp = GR_SAVE_GP                  // Restore gp 
         mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
         br.ret.sptk     b0                     // Return
-};;
-
-LOCAL_LIBM_END(__libm_error_region)
+};; 
 
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
 
 .type   __libm_error_support#,@function
 .global __libm_error_support#
-
-
-
-
diff --git a/sysdeps/ia64/fpu/e_fmodf.S b/sysdeps/ia64/fpu/e_fmodf.S
index fe1ec0304d..5b6390eeec 100644
--- a/sysdeps/ia64/fpu/e_fmodf.S
+++ b/sysdeps/ia64/fpu/e_fmodf.S
@@ -1,10 +1,10 @@
 .file "fmodf.s"
-
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (c) 2000, 2001, Intel Corporation
 // All rights reserved.
 //
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska, 
+// Bob Norin, Shane Story, and Ping Tak Peter Tang of the Computational 
+// Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,7 +20,9 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
+//
+// WARRANTY DISCLAIMER
+//
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,42 +37,38 @@
 //
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// http://developer.intel.com/opensource.
 //
 // History
 //====================================================================
-// 02/02/00 Initial version
-// 03/02/00 New Algorithm
-// 04/04/00 Unwind support added
-// 08/15/00 Bundle added after call to __libm_error_support to properly
+// 2/02/00  Initial version
+// 3/02/00  New Algorithm
+// 4/04/00  Unwind support added
+// 8/15/00  Bundle added after call to __libm_error_support to properly
 //          set [the previously overwritten] GR_Parameter_RESULT.
-// 11/28/00 Set FR_Y to f9
-// 03/11/02 Fixed flags for fmodf(qnan,zero)
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 02/10/03 Reordered header: .section, .global, .proc, .align
-// 04/28/03 Fix: fmod(sNaN,0) no longer sets errno
+//11/28/00  Set FR_Y to f9
 //
 // API
 //====================================================================
-// float fmodf(float,float);
+// float fmodf(float,float);   
 //
 // Overview of operation
 //====================================================================
 //  fmod(a,b)=a-i*b,
-//  where i is an integer such that, if b!=0,
+//  where i is an integer such that, if b!=0, 
 //  |i|<|a/b| and |a/b-i|<1
 
 // Algorithm
 //====================================================================
 // a). if |a|<|b|, return a
-// b). get quotient and reciprocal overestimates accurate to
+// b). get quotient and reciprocal overestimates accurate to 
 //     33 bits (q2,y2)
 // c). if the exponent difference (exponent(a)-exponent(b))
 //     is less than 32, truncate quotient to integer and
 //     finish in one iteration
 // d). if exponent(a)-exponent(b)>=32 (q2>=2^32)
 //     round quotient estimate to single precision (k=RN(q2)),
-//     calculate partial remainder (a'=a-k*b),
+//     calculate partial remainder (a'=a-k*b), 
 //     get quotient estimate (a'*y2), and repeat from c).
 
 // Special cases
@@ -84,9 +82,13 @@
 // General registers:   r2,r29,r32 (ar.pfs), r33-r39
 // Floating point registers: f6-f15
 
+#include "libm_support.h"
+
+.section .text
+
 GR_SAVE_B0                    = r33
 GR_SAVE_PFS                   = r34
-GR_SAVE_GP                    = r35
+GR_SAVE_GP                    = r35 
 GR_SAVE_SP                    = r36
 
 GR_Parameter_X                = r37
@@ -99,9 +101,18 @@ FR_Y             = f9
 FR_RESULT        = f8
 
 
-.section .text
-GLOBAL_IEEE754_ENTRY(fmodf)
 
+.proc fmodf#
+.align 32
+.global fmodf#
+.align 32
+
+fmodf:
+#ifdef _LIBC
+.global __ieee754_fmodf
+.type __ieee754_fmodf,@function
+__ieee754_fmodf:
+#endif
 // inputs in f8, f9
 // result in f8
 
@@ -123,13 +134,13 @@ GLOBAL_IEEE754_ENTRY(fmodf)
   // (1) y0
   frcpa.s1 f10,p6=f6,f7
   nop.i 0
-}
+} 
 
 // eliminate special cases
 // Y +-NAN, +-inf, +-0?     p7
 { .mfi
       nop.m 999
-      fclass.m.unc  p7,p0 = f9, 0xe7
+(p0)  fclass.m.unc  p7,p0 = f9, 0xe7           
       nop.i 999;;
 }
 
@@ -140,14 +151,14 @@ GLOBAL_IEEE754_ENTRY(fmodf)
 
 { .mfi
       nop.m 999
-      fclass.m.unc  p9,p0 = f8, 0xe3
-      nop.i 999
+(p0)  fclass.m.unc  p9,p0 = f8, 0xe3           
+      nop.i 999 
 }
 
 // |x| < |y|? Return x p8
 { .mfi
       nop.m 999
-      fcmp.lt.unc.s1 p8,p0 = f6,f7
+(p0)  fcmp.lt.unc.s1 p8,p0 = f6,f7             
       nop.i 999 ;;
 }
 
@@ -163,33 +174,33 @@ GLOBAL_IEEE754_ENTRY(fmodf)
   // (2) q0=a*y0
   (p6) fma.s1 f13=f6,f10,f0
   nop.i 0
-}
+} 
 { .mfi
   nop.m 0
   // (3) e0 = 1 - b * y0
   (p6) fnma.s1 f12=f7,f10,f1
   nop.i 0;;
-}
+} 
 
   {.mfi
   nop.m 0
   // normalize x (if |x|<|y|)
   (p8) fma.s.s0 f8=f8,f1,f0
   nop.i 0
-}
+} 
 {.bbb
-  (p9) br.cond.spnt FMOD_X_NAN_INF
-  (p7) br.cond.spnt FMOD_Y_NAN_INF_ZERO
+  (p9) br.cond.spnt L(FMOD_X_NAN_INF)
+  (p7) br.cond.spnt L(FMOD_Y_NAN_INF_ZERO)
   // if |x|<|y|, return
   (p8) br.ret.spnt    b0;;
 }
 
-  {.mfi
+  {.mfi 
   nop.m 0
   // normalize x
   fma.s0 f6=f6,f1,f0
   nop.i 0
-}
+} 
 {.mfi
   nop.m 0
   // normalize y
@@ -204,45 +215,45 @@ GLOBAL_IEEE754_ENTRY(fmodf)
   // (4) q1=q0+e0*q0
   (p6) fma.s1 f13=f12,f13,f13
   nop.i 0
-}
+} 
 { .mfi
   nop.m 0
   // (5) e1 = e0 * e0 + 2^-34
   (p6) fma.s1 f14=f12,f12,f11
   nop.i 0;;
-}
+} 
 {.mlx
   nop.m 0
   movl r2=0x33a00000;;
-}
+} 
 { .mfi
   nop.m 0
   // (6) y1 = y0 + e0 * y0
   (p6) fma.s1 f10=f12,f10,f10
   nop.i 0;;
-}
+} 
 {.mfi
   // set f12=1.25*2^{-24}
   setf.s f12=r2
   // (7) q2=q1+e1*q1
   (p6) fma.s1 f13=f13,f14,f13
   nop.i 0;;
-}
+} 
 {.mfi
   nop.m 0
   fmerge.s f9=f8,f9
   nop.i 0
-}
+} 
 { .mfi
   nop.m 0
   // (8) y2 = y1 + e1 * y1
   (p6) fma.s1 f10=f14,f10,f10
   // set p6=0, p10=0
   cmp.ne.and p6,p10=r0,r0;;
-}
+} 
 
 .align 32
-loop24:
+L(loop24):
   {.mfi
   nop.m 0
   // compare q2, 2^32
@@ -272,7 +283,7 @@ loop24:
   // normalize truncated quotient
   (p8) fcvt.xf f13=f11
   nop.i 0;;
-}
+}  
   { .mfi
   nop.m 0
   // calculate remainder (assuming f13=RZ(Q))
@@ -281,7 +292,7 @@ loop24:
 }
   {.mfi
   nop.m 0
-  // also if exponent>32, round quotient to single precision
+  // also if exponent>32, round quotient to single precision 
   // and subtract 1 ulp: q=q-q*(1.25*2^{-24})
   (p7) fnma.s.s1 f11=f13,f12,f13
   nop.i 0;;
@@ -324,7 +335,7 @@ loop24:
 .pred.rel "mutex",p6,p10
   {.mfb
   nop.m 0
-  // add b to estimated remainder (to cover the case when the quotient was overestimated)
+  // add b to estimated remainder (to cover the case when the quotient was overestimated) 
   // also set correct sign by using f9=|b|*sgn(a), f12=sgn(a)
   (p6) fma.s.s0 f8=f11,f12,f9
   nop.b 0
@@ -346,118 +357,102 @@ loop24:
   nop.m 0
   // if f14 was RZ(Q), set remainder to f14
   (p9) mov f6=f14
-  br.cond.sptk loop24;;
+  br.cond.sptk L(loop24);;
 }
 
   {  .mmb
-    nop.m 0
-    nop.m 0
-    br.ret.sptk b0;;
+	nop.m 0				    
+	nop.m 0				    
+	br.ret.sptk b0;;
  }
 
-FMOD_X_NAN_INF:
+L(FMOD_X_NAN_INF): 
 
 
 // Y zero ?
-{.mfi
-  nop.m 0
-  fclass.m p10,p0=f8,0xc3     // Test x=nan
-  nop.i 0
-}
-{.mfi
+{.mfi 
   nop.m 0
   fma.s1 f10=f9,f1,f0
   nop.i 0;;
 }
-
 {.mfi
-  nop.m 0
-  fma.s0 f8=f8,f1,f0
-  nop.i 0
-}
-{.mfi
-  nop.m 0
-(p10) fclass.m p10,p0=f9,0x07 // Test x=nan, and y=zero
-  nop.i 0;;
-}
-{.mfb
  nop.m 0
  fcmp.eq.unc.s1 p11,p0=f10,f0
-(p10) br.ret.spnt b0;;        // Exit with result=x if x=nan and y=zero
+ nop.i 0;;
 }
 {.mib
   nop.m 0
   nop.i 0
   // if Y zero
-  (p11) br.cond.spnt FMOD_Y_ZERO;;
+  (p11) br.cond.spnt L(FMOD_Y_ZERO);;                        
 }
 
 // X infinity? Return QNAN indefinite
 { .mfi
       nop.m 999
-      fclass.m.unc  p8,p9 = f8, 0x23
-      nop.i 999;;
+(p0)  fclass.m.unc  p8,p9 = f8, 0x23 
+      nop.i 999;; 
 }
 // Y NaN ?
 {.mfi
-     nop.m 999
+	 nop.m 999
 (p8) fclass.m p9,p8=f9,0xc3
-     nop.i 0;;
+	 nop.i 0;;
 }
 {.mfi
-    nop.m 999
-(p8)  frcpa.s0 f8,p0 = f8,f8
+	nop.m 999
+(p8)  frcpa.s0 f8,p0 = f8,f8           
     nop.i 0
-}
+} 
 { .mfi
       nop.m 999
-    // also set Denormal flag if necessary
+	// also set Denormal flag if necessary
 (p8)  fma.s0 f9=f9,f1,f0
       nop.i 999 ;;
 }
 
 { .mfb
       nop.m 999
-(p8)  fma.s.s0 f8=f8,f1,f0
-      nop.b 999 ;;
+(p8)  fma.s f8=f8,f1,f0                     
+	  nop.b 999 ;;                        
 }
 
 { .mfb
       nop.m 999
-(p9)  frcpa.s0 f8,p7=f8,f9
-      br.ret.sptk    b0 ;;
+(p9)  frcpa.s0 f8,p7=f8,f9                     
+      br.ret.sptk    b0 ;;                        
 }
 
 
-FMOD_Y_NAN_INF_ZERO:
+L(FMOD_Y_NAN_INF_ZERO): 
 
 // Y INF
 { .mfi
       nop.m 999
-      fclass.m.unc  p7,p0 = f9, 0x23
+(p0)  fclass.m.unc  p7,p0 = f9, 0x23           
       nop.i 999 ;;
 }
 
 { .mfb
       nop.m 999
-(p7)  fma.s.s0 f8=f8,f1,f0
-(p7)  br.ret.spnt    b0 ;;
+(p7)  fma.s f8=f8,f1,f0                     
+(p7)  br.ret.spnt    b0 ;;                        
 }
 
 // Y NAN?
 { .mfi
       nop.m 999
-      fclass.m.unc  p9,p0 = f9, 0xc3
+(p0)  fclass.m.unc  p9,p0 = f9, 0xc3           
       nop.i 999 ;;
 }
 
 { .mfb
       nop.m 999
-(p9)  fma.s.s0 f8=f9,f1,f0
-(p9)  br.ret.spnt    b0 ;;
+(p9)  fma.s f8=f9,f1,f0                     
+(p9)  br.ret.spnt    b0 ;;                        
 }
 
-FMOD_Y_ZERO:
+L(FMOD_Y_ZERO):
 // Y zero? Must be zero at this point
 // because it is the only choice left.
 // Return QNAN indefinite
@@ -465,65 +460,69 @@ FMOD_Y_ZERO:
 {.mfi
   nop.m 0
   // set Invalid
-  frcpa.s0 f12,p0=f0,f0
+  frcpa f12,p0=f0,f0
   nop.i 999
 }
 // X NAN?
 { .mfi
       nop.m 999
-      fclass.m.unc  p9,p10 = f8, 0xc3
+(p0)  fclass.m.unc  p9,p10 = f8, 0xc3           
       nop.i 999 ;;
 }
 { .mfi
       nop.m 999
-(p10)  fclass.nm  p9,p10 = f8, 0xff
+(p10)  fclass.nm  p9,p10 = f8, 0xff           
       nop.i 999 ;;
 }
 
 {.mfi
  nop.m 999
- (p9) frcpa.s0 f11,p7=f8,f0
+ (p9) frcpa f11,p7=f8,f0
  nop.i 0;;
 }
 
 { .mfi
       nop.m 999
-(p10) frcpa.s0 f11,p7 = f0,f0
+(p10) frcpa f11,p7 = f0,f0           
 nop.i 999;;
 }
 
 { .mfi
       nop.m 999
-      fmerge.s      f10 = f8, f8
+(p0)  fmerge.s      f10 = f8, f8             
       nop.i 999
 }
 
 { .mfi
       nop.m 999
-      fma.s.s0 f8=f11,f1,f0
+(p0)  fma.s f8=f11,f1,f0                     
       nop.i 999;;
 }
 
-EXP_ERROR_RETURN:
+L(EXP_ERROR_RETURN): 
 
 
 { .mib
       nop.m 0
-      mov GR_Parameter_TAG=122
-      br.sptk __libm_error_region;;
+(p0)  mov GR_Parameter_TAG=122                                 
+(p0)  br.sptk __libm_error_region;; 
 }
 
-GLOBAL_IEEE754_END(fmodf)
-LOCAL_LIBM_ENTRY(__libm_error_region)
+.endp fmodf
+ASM_SIZE_DIRECTIVE(fmodf)
+ASM_SIZE_DIRECTIVE(__ieee754_fmodf)
+
+.proc __libm_error_region
+__libm_error_region:
 .prologue
 { .mfi
         add   GR_Parameter_Y=-32,sp             // Parameter 2 value
         nop.f 0
 .save   ar.pfs,GR_SAVE_PFS
-        mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
+        mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs 
 }
 { .mfi
-.fframe 64
+.fframe 64 
         add sp=-64,sp                           // Create new stack
         nop.f 0
         mov GR_SAVE_GP=gp                       // Save gp
@@ -531,18 +530,18 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
 { .mmi
         stfs [GR_Parameter_Y] = FR_Y,16         // Save Parameter 2 on stack
         add GR_Parameter_X = 16,sp              // Parameter 1 address
-.save   b0, GR_SAVE_B0
-        mov GR_SAVE_B0=b0                       // Save b0
+.save   b0, GR_SAVE_B0                      
+        mov GR_SAVE_B0=b0                       // Save b0 
 };;
 .body
 { .mib
-        stfs [GR_Parameter_X] = FR_X            // Store Parameter 1 on stack
-        add   GR_Parameter_RESULT = 0,GR_Parameter_Y
-    nop.b 0                                 // Parameter 3 address
+        stfs [GR_Parameter_X] = FR_X            // Store Parameter 1 on stack 
+        add   GR_Parameter_RESULT = 0,GR_Parameter_Y  
+	nop.b 0                                 // Parameter 3 address
 }
 { .mib
         stfs [GR_Parameter_Y] = FR_RESULT      // Store Parameter 3 on stack
-        add   GR_Parameter_Y = -16,GR_Parameter_Y
+        add   GR_Parameter_Y = -16,GR_Parameter_Y  
         br.call.sptk b0=__libm_error_support#;;  // Call error handling function
 }
 { .mmi
@@ -557,14 +556,13 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
         mov   b0 = GR_SAVE_B0                  // Restore return address
 };;
 { .mib
-        mov   gp = GR_SAVE_GP                  // Restore gp
+        mov   gp = GR_SAVE_GP                  // Restore gp 
         mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
         br.ret.sptk     b0                     // Return
-};;
+};; 
 
-LOCAL_LIBM_END(__libm_error_region)
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
 
 .type   __libm_error_support#,@function
 .global __libm_error_support#
-
-
diff --git a/sysdeps/ia64/fpu/e_fmodl.S b/sysdeps/ia64/fpu/e_fmodl.S
index da08ae3f5c..85c9f6ef82 100644
--- a/sysdeps/ia64/fpu/e_fmodl.S
+++ b/sysdeps/ia64/fpu/e_fmodl.S
@@ -1,10 +1,11 @@
 .file "fmodl.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
 //
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
+// Bob Norin, Shane Story, and Ping Tak Peter Tang of the Computational
+// Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,7 +21,7 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
+//
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,42 +36,38 @@
 //
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// http://developer.intel.com/opensource.
 //
 // History
 //====================================================================
-// 02/02/00 Initial version
-// 03/02/00 New Algorithm
-// 04/04/00 Unwind support added
-// 08/15/00 Bundle added after call to __libm_error_support to properly
+// 2/02/00  Initial version
+// 3/02/00  New Algorithm
+// 4/04/00  Unwind support added
+// 8/15/00  Bundle added after call to __libm_error_support to properly
 //          set [the previously overwritten] GR_Parameter_RESULT.
-// 11/28/00 Set FR_Y to f9
-// 03/11/02 Fixed flags for fmodl(qnan,zero)
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 02/10/03 Reordered header: .section, .global, .proc, .align
-// 04/28/03 Fix: fmod(sNaN,0) no longer sets errno
+//11/28/00  Set FR_Y to f9
 //
 // API
 //====================================================================
-// long double fmodl(long double,long double);
+// long double fmodl(long double,long double);   
 //
 // Overview of operation
 //====================================================================
 //  fmod(a,b)=a-i*b,
-//  where i is an integer such that, if b!=0,
+//  where i is an integer such that, if b!=0, 
 //  |i|<|a/b| and |a/b-i|<1
 //
 // Algorithm
 //====================================================================
 // a). if |a|<|b|, return a
-// b). get quotient and reciprocal overestimates accurate to
+// b). get quotient and reciprocal overestimates accurate to 
 //     33 bits (q2,y2)
 // c). if the exponent difference (exponent(a)-exponent(b))
 //     is less than 32, truncate quotient to integer and
 //     finish in one iteration
 // d). if exponent(a)-exponent(b)>=32 (q2>=2^32)
 //     round quotient estimate to single precision (k=RN(q2)),
-//     calculate partial remainder (a'=a-k*b),
+//     calculate partial remainder (a'=a-k*b), 
 //     get quotient estimate (a'*y2), and repeat from c).
 //
 // Registers used
@@ -79,9 +76,13 @@
 // General registers:   r2,r29,r32 (ar.pfs), r33-r39
 // Floating point registers: f6-f15
 
+#include "libm_support.h"
+
+.section .text
+
 GR_SAVE_B0                    = r33
 GR_SAVE_PFS                   = r34
-GR_SAVE_GP                    = r35
+GR_SAVE_GP                    = r35 
 GR_SAVE_SP                    = r36
 
 GR_Parameter_X                = r37
@@ -94,9 +95,18 @@ FR_Y             = f9
 FR_RESULT        = f8
 
 
-.section .text
-GLOBAL_IEEE754_ENTRY(fmodl)
 
+.proc fmodl#
+.align 32
+.global fmodl#
+.align 32
+
+fmodl:
+#ifdef _LIBC
+.global __ieee754_fmodl
+.type __ieee754_fmodl,@function
+__ieee754_fmodl:
+#endif
 // inputs in f8, f9
 // result in f8
 
@@ -118,7 +128,7 @@ GLOBAL_IEEE754_ENTRY(fmodl)
   // (1) y0
   frcpa.s1 f10,p6=f6,f7
   nop.i 0;;
-}
+} 
 
 // eliminate special cases
 {.mmi
@@ -131,7 +141,7 @@ cmp.eq p7,p10=r29,r0;;
 // Y +-NAN, +-inf, +-0?     p7
 { .mfi
       nop.m 999
-(p10)  fclass.m  p7,p10 = f9, 0xe7
+(p10)  fclass.m  p7,p10 = f9, 0xe7           
       nop.i 999;;
 }
 
@@ -142,14 +152,14 @@ cmp.eq p7,p10=r29,r0;;
 
 { .mfi
       nop.m 999
-      fclass.m.unc  p9,p11 = f8, 0xe3
-      nop.i 999
+(p0)  fclass.m.unc  p9,p11 = f8, 0xe3           
+      nop.i 999 
 }
 
 // |x| < |y|? Return x p8
 { .mfi
       nop.m 999
-(p10)  fcmp.lt.unc.s1 p8,p0 = f6,f7
+(p10)  fcmp.lt.unc.s1 p8,p0 = f6,f7             
       nop.i 999 ;;
 }
 
@@ -163,13 +173,13 @@ cmp.eq p7,p10=r29,r0;;
   // (3) e0 = 1 - b * y0
   (p6) fnma.s1 f12=f7,f10,f1
   nop.i 0;;
-}
+} 
 
 // Y +-NAN, +-inf, +-0?     p7
 { .mfi
       nop.m 999
-      // pseudo-NaN ?
-(p10)  fclass.nm  p7,p0 = f9, 0xff
+	  // pseudo-NaN ?
+(p10)  fclass.nm  p7,p0 = f9, 0xff           
       nop.i 999
 }
 
@@ -180,7 +190,7 @@ cmp.eq p7,p10=r29,r0;;
 
 { .mfi
       nop.m 999
-(p11)  fclass.nm  p9,p0 = f8, 0xff
+(p11)  fclass.nm  p9,p0 = f8, 0xff          
       nop.i 999;;
 }
 
@@ -199,18 +209,18 @@ cmp.eq p7,p10=r29,r0;;
   nop.i 0
 }
 {.bbb
-  (p9) br.cond.spnt FMOD_X_NAN_INF
-  (p7) br.cond.spnt FMOD_Y_NAN_INF_ZERO
+  (p9) br.cond.spnt L(FMOD_X_NAN_INF)
+  (p7) br.cond.spnt L(FMOD_Y_NAN_INF_ZERO)
   // if |x|<|y|, return
   (p8) br.ret.spnt    b0;;
 }
 
-  {.mfi
+  {.mfi 
   nop.m 0
   // x denormal ? set D flag
   fnma.s0 f32=f6,f1,f6
   nop.i 0
-}
+} 
 {.mfi
   nop.m 0
   // y denormal ? set D flag
@@ -224,46 +234,46 @@ cmp.eq p7,p10=r29,r0;;
   // (4) q1=q0+e0*q0
   (p6) fma.s1 f13=f12,f13,f13
   nop.i 0
-}
+} 
 { .mfi
   nop.m 0
   // (5) e1 = e0 * e0 + 2^-34
   (p6) fma.s1 f14=f12,f12,f11
   nop.i 0;;
-}
+} 
 {.mlx
   nop.m 0
   movl r2=0x33a00000;;
-}
+} 
 { .mfi
   nop.m 0
   // (6) y1 = y0 + e0 * y0
   (p6) fma.s1 f10=f12,f10,f10
   nop.i 0;;
-}
+} 
 {.mfi
   // set f12=1.25*2^{-24}
   setf.s f12=r2
   // (7) q2=q1+e1*q1
   (p6) fma.s1 f13=f13,f14,f13
   nop.i 0;;
-}
+} 
 {.mfi
   nop.m 0
   fmerge.s f9=f8,f9
   nop.i 0
-}
+} 
 { .mfi
   nop.m 0
   // (8) y2 = y1 + e1 * y1
   (p6) fma.s1 f10=f14,f10,f10
   // set p6=0, p10=0
   cmp.ne.and p6,p10=r0,r0;;
-}
+} 
 
 
 .align 32
-loop64:
+L(loop64):
   {.mfi
   nop.m 0
   // compare q2, 2^32
@@ -295,7 +305,7 @@ loop64:
   // normalize truncated quotient
   (p8) fcvt.xf f13=f11
   nop.i 0;;
-}
+}  
   { .mfi
   nop.m 0
   // calculate remainder (assuming f13=RZ(Q))
@@ -304,7 +314,7 @@ loop64:
 }
   {.mfi
   nop.m 0
-  // also if exponent>32, round quotient to single precision
+  // also if exponent>32, round quotient to single precision 
   // and subtract 1 ulp: q=q-q*(1.25*2^{-24})
   (p7) fnma.s.s1 f11=f13,f12,f13
   nop.i 0;;
@@ -347,7 +357,7 @@ loop64:
 .pred.rel "mutex",p6,p10
   {.mfb
   nop.m 0
-  // add b to estimated remainder (to cover the case when the quotient was overestimated)
+  // add b to estimated remainder (to cover the case when the quotient was overestimated) 
   // also set correct sign by using f9=|b|*sgn(a), f12=sgn(a)
   (p6) fma.s0 f8=f11,f12,f9
   nop.b 0
@@ -368,59 +378,43 @@ loop64:
   nop.m 0
   // if f14 was RZ(Q), set remainder to f14
   (p9) mov f6=f14
-  br.cond.sptk loop64;;
+  br.cond.sptk L(loop64);;
 }
 
 
 
-FMOD_X_NAN_INF:
+L(FMOD_X_NAN_INF): 
 
 // Y zero ?
-{.mfi
-  nop.m 0
-  fclass.m p10,p0=f8,0xc3     // Test x=nan
-  nop.i 0
-}
-{.mfi
+{.mfi 
   nop.m 0
   fma.s1 f10=f9,f1,f0
   nop.i 0;;
 }
-
-{.mfi
-  nop.m 0
-  fma.s0 f8=f8,f1,f0
-  nop.i 0
-}
 {.mfi
-  nop.m 0
-(p10) fclass.m p10,p0=f9,0x07 // Test x=nan, and y=zero
-  nop.i 0;;
-}
-{.mfb
  nop.m 0
  fcmp.eq.unc.s1 p11,p0=f10,f0
-(p10) br.ret.spnt b0;;        // Exit with result=x if x=nan and y=zero
+ nop.i 0;;
 }
 {.mib
   nop.m 0
   nop.i 0
   // if Y zero
-  (p11) br.cond.spnt FMOD_Y_ZERO;;
+  (p11) br.cond.spnt L(FMOD_Y_ZERO);;                        
 }
 
 // X infinity? Return QNAN indefinite
 { .mfi
-     // set p7 t0 0
-     cmp.ne p7,p0=r0,r0
-     fclass.m.unc  p8,p9 = f8, 0x23
-     nop.i 999;;
+	 // set p7 t0 0
+	 cmp.ne p7,p0=r0,r0
+(p0) fclass.m.unc  p8,p9 = f8, 0x23 
+     nop.i 999;; 
 }
 // Y NaN ?
 {.mfi
      nop.m 999
 (p8) fclass.m p9,p8=f9,0xc3
-     nop.i 0;;
+	 nop.i 0;;
 }
 // Y not pseudo-zero ? (r29 holds significand)
 {.mii
@@ -429,63 +423,63 @@ FMOD_X_NAN_INF:
      nop.i 0;;
 }
 {.mfi
-    nop.m 999
-(p8)  frcpa.s0 f8,p0 = f8,f8
+	nop.m 999
+(p8)  frcpa.s0 f8,p0 = f8,f8           
     nop.i 0
-}
+} 
 { .mfi
      nop.m 999
-    // also set Denormal flag if necessary
+	// also set Denormal flag if necessary
 (p7) fnma.s0 f9=f9,f1,f9
      nop.i 999 ;;
 }
 
 { .mfb
       nop.m 999
-(p8)  fma.s0 f8=f8,f1,f0
-      nop.b 999 ;;
+(p8)  fma.s0 f8=f8,f1,f0                     
+	  nop.b 999 ;;                        
 }
 
 { .mfb
       nop.m 999
-(p9)  frcpa.s0 f8,p7=f8,f9
-      br.ret.sptk    b0 ;;
+(p9)  frcpa.s0 f8,p7=f8,f9                     
+      br.ret.sptk    b0 ;;                        
 }
 
 
-FMOD_Y_NAN_INF_ZERO:
+L(FMOD_Y_NAN_INF_ZERO): 
 // Y INF
 { .mfi
       nop.m 999
-      fclass.m.unc  p7,p0 = f9, 0x23
+(p0)  fclass.m.unc  p7,p0 = f9, 0x23           
       nop.i 999 ;;
 }
 
 { .mfb
       nop.m 999
-(p7)  fma.s0 f8=f8,f1,f0
-(p7)  br.ret.spnt    b0 ;;
+(p7)  fma f8=f8,f1,f0                     
+(p7)  br.ret.spnt    b0 ;;                        
 }
 
 // Y NAN?
 { .mfi
       nop.m 999
-      fclass.m.unc  p9,p10 = f9, 0xc3
+(p0)  fclass.m.unc  p9,p10 = f9, 0xc3           
       nop.i 999 ;;
 }
 { .mfi
       nop.m 999
-(p10)  fclass.nm  p9,p0 = f9, 0xff
+(p10)  fclass.nm  p9,p0 = f9, 0xff           
       nop.i 999 ;;
 }
 
 { .mfb
       nop.m 999
-(p9)  fma.s0 f8=f9,f1,f0
-(p9)  br.ret.spnt    b0 ;;
+(p9)  fma f8=f9,f1,f0                     
+(p9)  br.ret.spnt    b0 ;;                        
 }
 
-FMOD_Y_ZERO:
+L(FMOD_Y_ZERO):
 // Y zero? Must be zero at this point
 // because it is the only choice left.
 // Return QNAN indefinite
@@ -493,59 +487,62 @@ FMOD_Y_ZERO:
 {.mfi
   nop.m 0
   // set Invalid
-  frcpa.s0 f12,p0=f0,f0
+  frcpa f12,p0=f0,f0
   nop.i 0
 }
 // X NAN?
 { .mfi
       nop.m 999
-      fclass.m.unc  p9,p10 = f8, 0xc3
+(p0)  fclass.m.unc  p9,p10 = f8, 0xc3           
       nop.i 999 ;;
 }
 { .mfi
       nop.m 999
-(p10)  fclass.nm  p9,p10 = f8, 0xff
+(p10)  fclass.nm  p9,p10 = f8, 0xff           
       nop.i 999 ;;
 }
 
 {.mfi
  nop.m 999
- (p9) frcpa.s0 f11,p7=f8,f0
+ (p9) frcpa f11,p7=f8,f0
  nop.i 0;;
 }
 
 
 { .mfi
       nop.m 999
-(p10) frcpa.s0  f11,p7 = f9,f9
-      mov    GR_Parameter_TAG = 120 ;;
+(p10) frcpa  f11,p7 = f9,f9           
+(p0)  mov    GR_Parameter_TAG = 120 ;;                                 
 }
 
 { .mfi
       nop.m 999
-      fmerge.s      f10 = f8, f8
+(p0)  fmerge.s      f10 = f8, f8             
       nop.i 999
 }
 
 { .mfb
       nop.m 999
-      fma.s0 f8=f11,f1,f0
-      br.sptk __libm_error_region;;
+(p0)  fma f8=f11,f1,f0                     
+(p0)  br.sptk __libm_error_region;; 
 }
 
-GLOBAL_IEEE754_END(fmodl)
+.endp fmodl
+ASM_SIZE_DIRECTIVE(fmodl)
+ASM_SIZE_DIRECTIVE(__ieee754_fmodl)
 
 
-LOCAL_LIBM_ENTRY(__libm_error_region)
+.proc __libm_error_region
+__libm_error_region:
 .prologue
 { .mfi
         add   GR_Parameter_Y=-32,sp             // Parameter 2 value
         nop.f 0
 .save   ar.pfs,GR_SAVE_PFS
-        mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
+        mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs 
 }
 { .mfi
-.fframe 64
+.fframe 64 
         add sp=-64,sp                           // Create new stack
         nop.f 0
         mov GR_SAVE_GP=gp                       // Save gp
@@ -553,18 +550,18 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
 { .mmi
         stfe [GR_Parameter_Y] = FR_Y,16         // Save Parameter 2 on stack
         add GR_Parameter_X = 16,sp              // Parameter 1 address
-.save   b0, GR_SAVE_B0
-        mov GR_SAVE_B0=b0                       // Save b0
+.save   b0, GR_SAVE_B0                      
+        mov GR_SAVE_B0=b0                       // Save b0 
 };;
 .body
 { .mib
-        stfe [GR_Parameter_X] = FR_X            // Store Parameter 1 on stack
-        add   GR_Parameter_RESULT = 0,GR_Parameter_Y
-    nop.b 0                                 // Parameter 3 address
+        stfe [GR_Parameter_X] = FR_X            // Store Parameter 1 on stack 
+        add   GR_Parameter_RESULT = 0,GR_Parameter_Y  
+	nop.b 0                                 // Parameter 3 address
 }
 { .mib
         stfe [GR_Parameter_Y] = FR_RESULT      // Store Parameter 3 on stack
-        add   GR_Parameter_Y = -16,GR_Parameter_Y
+        add   GR_Parameter_Y = -16,GR_Parameter_Y  
         br.call.sptk b0=__libm_error_support#  // Call error handling function
 };;
 { .mmi
@@ -579,17 +576,15 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
         mov   b0 = GR_SAVE_B0                  // Restore return address
 };;
 { .mib
-        mov   gp = GR_SAVE_GP                  // Restore gp
+        mov   gp = GR_SAVE_GP                  // Restore gp 
         mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
         br.ret.sptk     b0                     // Return
-};;
-
-LOCAL_LIBM_END(__libm_error_region)
+};; 
 
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
 
 
 
 .type   __libm_error_support#,@function
 .global __libm_error_support#
-
-
diff --git a/sysdeps/ia64/fpu/e_hypot.S b/sysdeps/ia64/fpu/e_hypot.S
index 885c819326..113aac3461 100644
--- a/sysdeps/ia64/fpu/e_hypot.S
+++ b/sysdeps/ia64/fpu/e_hypot.S
@@ -1,10 +1,11 @@
-.file "hypot.s"
+.file "hypot.asm"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska, 
+// Bob Norin, Shane Story, and Ping Tak Peter Tang of the 
+// Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,7 +21,7 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
+//
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,27 +36,24 @@
 // 
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at 
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// http://developer.intel.com/opensource.
 //
-//*********************************************************************
+// *********************************************************************
 //
 // History: 
-// 02/02/00 hand-optimized
-// 04/04/00 Unwind support added
-// 06/20/00 new version
-// 08/15/00 Bundle added after call to __libm_error_support to properly
+// 2/02/00  hand-optimized
+// 4/04/00  Unwind support added
+// 6/20/00  new version
+// 8/15/00  Bundle added after call to __libm_error_support to properly
 //          set [the previously overwritten] GR_Parameter_RESULT.
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 02/10/03 Reordered header: .section, .global, .proc, .align
-// 04/17/03 Added missing mutex directive
 //
-//*********************************************************************
+// *********************************************************************
 //                           ___________
 // Function:   hypot(x,y) = |(x^2 + y^2) = for double precision values
 //             x and y
 //             Also provides cabs functionality.
 //
-//*********************************************************************
+// *********************************************************************
 //
 // Resources Used:
 //
@@ -70,7 +68,7 @@
 //
 //    Predicate Registers:      p6 - p10
 //
-//*********************************************************************
+// *********************************************************************
 //
 // IEEE Special Conditions:
 //
@@ -80,7 +78,7 @@
 //    hypot(QNaN and anything) = QNaN
 //    hypot(SNaN and anything ) = QNaN
 //
-//*********************************************************************
+// *********************************************************************
 //
 // Implementation:
 //  x2 = x * x   in double-extended
@@ -88,7 +86,9 @@
 //  temp = x2 + y2   in double-extended
 //  sqrt(temp) rounded to double 
 //
-//*********************************************************************
+// *********************************************************************
+
+#include "libm_support.h"
 
 GR_SAVE_PFS         = r33
 GR_SAVE_B0          = r34
@@ -103,11 +103,23 @@ FR_Y                = f33
 FR_RESULT           = f8
 
 .section .text
+#ifndef _LIBC
+.proc cabs#
+.global cabs#
+cabs: 
+.endp cabs
+#endif
+.proc hypot#
+.global hypot#
+.align 64 
 
-LOCAL_LIBM_ENTRY(cabs)
-LOCAL_LIBM_END(cabs)
-GLOBAL_IEEE754_ENTRY(hypot)
-
+hypot:
+#ifdef _LIBC
+.global __hypot
+__hypot:
+.global __ieee754_hypot
+__ieee754_hypot:
+#endif 
 {.mfi
   alloc r32= ar.pfs,0,4,4,0
   // Compute x*x
@@ -209,7 +221,6 @@ GLOBAL_IEEE754_ENTRY(hypot)
   mov r2=0x107fb;;
 }
 
-.pred.rel "mutex",p7,p8
 {.mfb
   nop.m 0
   // if f8=Infinity or f9=Zero, return |f8|
@@ -383,8 +394,11 @@ GLOBAL_IEEE754_ENTRY(hypot)
 	 // No overflow
 (p9) br.ret.sptk b0;; 
 }
-GLOBAL_IEEE754_END(hypot)
-LOCAL_LIBM_ENTRY(__libm_error_region)
+.endp hypot
+ASM_SIZE_DIRECTIVE(hypot)
+
+.proc __libm_error_region
+__libm_error_region:
 .prologue
 { .mfi
         add   GR_Parameter_Y=-32,sp             // Parameter 2 value
@@ -431,8 +445,7 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
         mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
         br.ret.sptk     b0                     // Return
 };;
-LOCAL_LIBM_END(__libm_error_region#)
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region) 
 .type   __libm_error_support#,@function
 .global __libm_error_support#
-
-
diff --git a/sysdeps/ia64/fpu/e_hypotf.S b/sysdeps/ia64/fpu/e_hypotf.S
index 633bb67e59..0a11ec5b41 100644
--- a/sysdeps/ia64/fpu/e_hypotf.S
+++ b/sysdeps/ia64/fpu/e_hypotf.S
@@ -1,10 +1,11 @@
-.file "hypotf.s"
+.file "hypotf.asm"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska, 
+// Bob Norin, Shane Story, and Ping Tak Peter Tang of the 
+// Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,7 +21,7 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
+//
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,27 +36,24 @@
 // 
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at 
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// http://developer.intel.com/opensource.
 //
-//*********************************************************************
+// *********************************************************************
 //
 // History: 
-// 02/02/00 hand-optimized
-// 04/04/00 Unwind support added
-// 06/26/00 new version
-// 08/15/00 Bundle added after call to __libm_error_support to properly
+// 2/02/00  hand-optimized
+// 4/04/00  Unwind support added
+// 6/26/00  new version
+// 8/15/00  Bundle added after call to __libm_error_support to properly
 //          set [the previously overwritten] GR_Parameter_RESULT.
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 02/10/03 Reordered header: .section, .global, .proc, .align
-// 04/17/03 Added missing mutex directive
 //
-//*********************************************************************
+// *********************************************************************
 //                           ___________
 // Function:   hypotf(x,y) = |(x^2 + y^2) = for single precision values
 //             x and y
 //             Also provides cabsf functionality.
 //
-//*********************************************************************
+// *********************************************************************
 //
 // Resources Used:
 //
@@ -70,7 +68,7 @@
 //
 //    Predicate Registers:      p6 - p10
 //
-//*********************************************************************
+// *********************************************************************
 //
 // IEEE Special Conditions:
 //
@@ -80,7 +78,7 @@
 //    hypotf(QNaN and anything) = QNaN
 //    hypotf(SNaN and anything ) = QNaN
 //
-//*********************************************************************
+// *********************************************************************
 //
 // Implementation:
 //  x2 = x * x   in double-extended
@@ -88,7 +86,9 @@
 //  temp = x2 + y2   in double-extended
 //  sqrt(temp) rounded to single precision 
 //
-//*********************************************************************
+// *********************************************************************
+
+#include "libm_support.h"
 
 GR_SAVE_PFS         = r33
 GR_SAVE_B0          = r34
@@ -103,10 +103,23 @@ FR_Y                = f15
 FR_RESULT           = f8
 
 .section .text
+#ifndef _LIBC
+.proc cabsf#
+.global cabsf#
+cabsf: 
+.endp cabsf
+#endif
+.proc hypotf#
+.global hypotf#
+.align 64 
 
-LOCAL_LIBM_ENTRY(cabsf)
-LOCAL_LIBM_END(cabsf)
-GLOBAL_IEEE754_ENTRY(hypotf)
+hypotf: 
+#ifdef _LIBC
+.global __hypotf
+__hypotf:
+.global __ieee754_hypotf
+__ieee754_hypotf:
+#endif 
 {.mfi
   alloc r32= ar.pfs,0,4,4,0
   // Compute x*x
@@ -194,7 +207,6 @@ GLOBAL_IEEE754_ENTRY(hypotf)
   nop.i 0;;
 }
 
-.pred.rel "mutex",p7,p8
 {.mfb
   nop.m 0
   // if f8=Infinity or f9=Zero, return |f8|
@@ -336,12 +348,15 @@ GLOBAL_IEEE754_ENTRY(hypotf)
 	 // No overflow
 (p9) br.ret.sptk b0;; 
 }
-GLOBAL_IEEE754_END(hypotf)
-LOCAL_LIBM_ENTRY(__libm_error_region)
+.endp hypotf
+ASM_SIZE_DIRECTIVE(hypotf)
+
+.proc __libm_error_region
+__libm_error_region:
 .prologue
 { .mii
         add   GR_Parameter_Y=-32,sp             // Parameter 2 value
-        mov   GR_Parameter_TAG = 47                   
+(p0)    mov   GR_Parameter_TAG = 47                   
 .save   ar.pfs,GR_SAVE_PFS
         mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
 }
@@ -385,9 +400,8 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
         br.ret.sptk     b0                     // Return
 };; 
 
-LOCAL_LIBM_END(__libm_error_region)
-
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
 
 .type   __libm_error_support#,@function
 .global __libm_error_support#
-
diff --git a/sysdeps/ia64/fpu/e_hypotl.S b/sysdeps/ia64/fpu/e_hypotl.S
index 0aa94b69b8..986faf6fcc 100644
--- a/sysdeps/ia64/fpu/e_hypotl.S
+++ b/sysdeps/ia64/fpu/e_hypotl.S
@@ -1,10 +1,11 @@
-.file "hypotl.s"
+.file "hypotl.asm"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska, 
+// Bob Norin, Shane Story, and Ping Tak Peter Tang of the 
+// Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,7 +21,7 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
+//
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,26 +36,24 @@
 // 
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at 
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// http://developer.intel.com/opensource.
 //
-//*********************************************************************
+// *********************************************************************
 //
 // History: 
-// 02/02/00 hand-optimized
-// 04/04/00 Unwind support added
-// 06/20/00 new version
-// 08/15/00 Bundle added after call to __libm_error_support to properly
+// 2/02/00  hand-optimized
+// 4/04/00  Unwind support added
+// 6/20/00  new version
+// 8/15/00  Bundle added after call to __libm_error_support to properly
 //          set [the previously overwritten] GR_Parameter_RESULT.
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 02/10/03 Reordered header: .section, .global, .proc, .align
 //
-//*********************************************************************
+// *********************************************************************
 //                           ___________
 // Function:   hypotl(x,y) = |(x^2 + y^2) = for double extended values
 //             x and y
 //             Also provides cabsl functionality.
 //
-//*********************************************************************
+// *********************************************************************
 //
 // Resources Used:
 //
@@ -69,7 +68,7 @@
 //
 //    Predicate Registers:      p6 - p10
 //
-//*********************************************************************
+// *********************************************************************
 //
 // IEEE Special Conditions:
 //
@@ -79,7 +78,7 @@
 //    hypotl(QNaN and anything) = QNaN
 //    hypotl(SNaN and anything ) = QNaN
 //
-//*********************************************************************
+// *********************************************************************
 //
 // Implementation:
 //  x2 = x * x   in double-extended
@@ -87,7 +86,9 @@
 //  temp = x2 + y2   in double-extended
 //  sqrt(temp) rounded to double extended
 //
-//*********************************************************************
+// *********************************************************************
+
+#include "libm_support.h"
 
 GR_SAVE_PFS         = r33
 GR_SAVE_B0          = r34
@@ -102,10 +103,23 @@ FR_Y                = f33
 FR_RESULT           = f8
 
 .section .text
+#ifndef _LIBC
+.proc cabsl#
+.global cabsl#
+cabsl: 
+.endp cabsl
+#endif
+.proc hypotl#
+.global hypotl#
+.align 64 
 
-LOCAL_LIBM_ENTRY(cabsl)
-LOCAL_LIBM_END(cabsl)
-GLOBAL_IEEE754_ENTRY(hypotl)
+hypotl: 
+#ifdef _LIBC
+.global __hypotl
+__hypotl:
+.global __ieee754_hypotl
+__ieee754_hypotl:
+#endif 
 {.mfi
   alloc r32= ar.pfs,0,4,4,0
   // Compute x*x
@@ -420,8 +434,11 @@ GLOBAL_IEEE754_ENTRY(hypotl)
 	 // No overflow
 (p9) br.ret.sptk b0;; 
 }
-GLOBAL_IEEE754_END(hypotl)
-LOCAL_LIBM_ENTRY(__libm_error_region)
+.endp hypotl
+ASM_SIZE_DIRECTIVE(hypotl)
+
+.proc __libm_error_region
+__libm_error_region:
 .prologue
 { .mfi
         add   GR_Parameter_Y=-32,sp             // Parameter 2 value
@@ -468,9 +485,7 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
         mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
         br.ret.sptk     b0                     // Return
 };;
-LOCAL_LIBM_END(__libm_error_region#)
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region) 
 .type   __libm_error_support#,@function
 .global __libm_error_support#
-
-
-
diff --git a/sysdeps/ia64/fpu/e_log.S b/sysdeps/ia64/fpu/e_log.S
index f80f153679..9ad1e5fe56 100644
--- a/sysdeps/ia64/fpu/e_log.S
+++ b/sysdeps/ia64/fpu/e_log.S
@@ -1,10 +1,10 @@
 .file "log.s"
 
-
-// Copyright (c) 2000 - 2002, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,1707 +20,1085 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
 // Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
 //
 // History
 //==============================================================
-// 02/02/00 Initial version
-// 04/04/00 Unwind support added
-// 06/16/00 Updated table to be rounded correctly
-// 08/15/00 Bundle added after call to __libm_error_support to properly
+// 2/02/00  Initial version
+// 4/04/00  Unwind support added
+// 6/16/00  Updated table to be rounded correctly
+// 8/15/00  Bundle added after call to __libm_error_support to properly
 //          set [the previously overwritten] GR_Parameter_RESULT.
-// 08/17/00 Improved speed of main path by 5 cycles
+// 8/17/00  Improved speed of main path by 5 cycles
 //          Shortened path for x=1.0
-// 01/09/01 Improved speed, fixed flags for neg denormals
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 05/23/02 Modified algorithm. Now only one polynomial is used
-//          for |x-1| >= 1/256 and for |x-1| < 1/256
-// 12/11/02 Improved performance for Itanium 2
+// 1/09/01  Improved speed, fixed flags for neg denormals
+//
 //
 // API
 //==============================================================
 // double log(double)
 // double log10(double)
 //
-//
 // Overview of operation
 //==============================================================
 // Background
-// ----------
-//
-// This algorithm is based on fact that
-// log(a b) = log(a) + log(b).
-// In our case we have x = 2^N f, where 1 <= f < 2.
-// So
-//   log(x) = log(2^N f) = log(2^N) + log(f) = n*log(2) + log(f)
-//
-// To calculate log(f) we do following
-//   log(f) = log(f * frcpa(f) / frcpa(f)) =
-//          = log(f * frcpa(f)) + log(1/frcpa(f))
 //
-// According to definition of IA-64's frcpa instruction it's a
-// floating point that approximates 1/f using a lookup on the
-// top of 8 bits of the input number's significand with relative
-// error < 2^(-8.886). So we have following
+// Consider  x = 2^N 1.f1 f2 f3 f4...f63
+// Log(x) = log(frcpa(x) x/frcpa(x))
+//        = log(1/frcpa(x)) + log(frcpa(x) x)
+//        = -log(frcpa(x)) + log(frcpa(x) x)
 //
-// |(1/f - frcpa(f)) / (1/f))| = |1 - f*frcpa(f)| < 1/256
+// frcpa(x)       = 2^-N frcpa((1.f1 f2 ... f63)
 //
-// and
+// -log(frcpa(x)) = -log(C) 
+//                = -log(2^-N) - log(frcpa(1.f1 f2 ... f63))
 //
-// log(f) = log(f * frcpa(f)) + log(1/frcpa(f)) =
-//        = log(1 + r) + T
+// -log(frcpa(x)) = -log(C) 
+//                = +Nlog2 - log(frcpa(1.f1 f2 ... f63))
 //
-// The first value can be computed by polynomial P(r) approximating
-// log(1 + r) on |r| < 1/256 and the second is precomputed tabular
-// value defined by top 8 bit of f.
+// -log(frcpa(x)) = -log(C) 
+//                = +Nlog2 + log(frcpa(1.f1 f2 ... f63))
 //
-// Finally we have that  log(x) ~ (N*log(2) + T) + P(r)
-//
-// Note that if input argument is close to 1.0 (in our case it means
-// that |1 - x| < 1/256) we can use just polynomial approximation
-// because x = 2^0 * f = f = 1 + r and
-// log(x) = log(1 + r) ~ P(r)
-//
-//
-// To compute log10(x) we use the simple identity
+// Log(x) = log(1/frcpa(x)) + log(frcpa(x) x)
+
+// Log(x) =  +Nlog2 + log(1./frcpa(1.f1 f2 ... f63)) + log(frcpa(x) x)
+// Log(x) =  +Nlog2 - log(/frcpa(1.f1 f2 ... f63))   + log(frcpa(x) x)
+// Log(x) =  +Nlog2 + T                              + log(frcpa(x) x)
 //
-//  log10(x) = log(x)/log(10)
+// Log(x) =  +Nlog2 + T                     + log(C x)
 //
-// so we have that
+// Cx = 1 + r
 //
-//  log10(x) = (N*log(2) + T  + log(1+r)) / log(10) =
-//           = N*(log(2)/log(10)) + (T/log(10)) + log(1 + r)/log(10)
+// Log(x) =  +Nlog2 + T  + log(1+r)
+// Log(x) =  +Nlog2 + T  + Series( r - r^2/2 + r^3/3 - r^4/4 ....)
 //
+// 1.f1 f2 ... f8 has 256 entries.
+// They are 1 + k/2^8, k = 0 ... 255
+// These 256 values are the table entries.
 //
 // Implementation
-// --------------
-// It can be seen that formulas for log and log10 differ from one another
-// only by coefficients and tabular values. Namely as log as log10 are
-// calculated as (N*L1 + T) + L2*Series(r) where in case of log
-//   L1 = log(2)
-//   T  = log(1/frcpa(x))
-//   L2 = 1.0
-// and in case of log10
-//   L1 = log(2)/log(10)
-//   T  = log(1/frcpa(x))/log(10)
-//   L2 = 1.0/log(10)
-//
-// So common code with two different entry points those set pointers
-// to the base address of coresponding data sets containing values
-// of L2,T and prepare integer representation of L1 needed for following
-// setf instruction.
-//
-// Note that both log and log10 use common approximation polynomial
-// it means we need only one set of coefficients of approximation.
-//
-//
-// 1. |x-1| >= 1/256
-//   InvX = frcpa(x)
-//   r = InvX*x - 1
-//   P(r) = r*((r*A3 - A2) + r^4*((A4 + r*A5) + r^2*(A6 + r*A7)),
-//   all coefficients are calcutated in quad and rounded to double
-//   precision. A7,A6,A5,A4 are stored in memory whereas A3 and A2
-//   created with setf.
-//
-//   N = float(n) where n is true unbiased exponent of x
+//===============
+// CASE 1:  |x-1| >= 2^-6
+// C = frcpa(x)
+// r = C * x - 1
 //
-//   T is tabular value of log(1/frcpa(x)) calculated in quad precision
-//   and represented by two floating-point numbers 64-bit Thi and 32-bit Tlo.
-//   To load Thi,Tlo we get bits from 55 to 62 of register format significand
-//   as index and calculate two addresses
-//     ad_Thi = Thi_table_base_addr + 8 * index
-//     ad_Tlo = Tlo_table_base_addr + 4 * index
+// Form rseries = r + P1*r^2 + P2*r^3 + P3*r^4 + P4*r^5 + P5*r^6
 //
-//   L2 (1.0 or 1.0/log(10) depending on function) is calculated in quad
-//   precision and rounded to double extended; it's loaded from memory.
+// x = f * 2*n where f is 1.f_1f_2f_3....f_63
+// Nfloat = float(n)  where n is the true unbiased exponent
+// pre-index = f_1f_2....f_8
+// index = pre_index * 16
+// get the dxt table entry at index + offset = T
 //
-//   L1 (log(2) or log10(2) depending on function) is calculated in quad
-//   precision and represented by two floating-point 64-bit numbers L1hi,L1lo
-//   stored in memory.
+// result = (T + Nfloat * log(2)) + rseries
 //
-//   And final result = ((L1hi*N + Thi) + (N*L1lo + Tlo)) + L2*P(r)
-//
-//
-// 2. |x-1| < 1/256
-//   r = x - 1
-//   P(r) = r*((r*A3 - A2) + r^4*((A4 + r*A5) + r^2*(A6 + r*A7)),
-//   A7,A6,A5A4,A3,A2 are the same as in case |x-1| >= 1/256
-//
-//   And final results
-//     log(x)   = P(r)
-//     log10(x) = L2*P(r)
-//
-// 3. How we define is input argument such that |x-1| < 1/256 or not.
-//
-//    To do it we analyze biased exponent and integer representation of
-//    input argument
-//
-//      a) First we test is biased exponent equal to 0xFFFE or 0xFFFF (i.e.
-//         we test is 0.5 <= x < 2). This comparison can be performed using
-//         unsigned version of cmp instruction in such a way
-//         biased_exponent_of_x - 0xFFFE < 2
-//
-//
-//      b) Second (in case when result of a) is true) we need to compare x
-//         with 1-1/256 and 1+1/256 or in double precision memory representation
-//         with 0x3FEFE00000000000 and 0x3FF0100000000000 correspondingly.
-//         This comparison can be made like in a), using unsigned
-//         version of cmp i.e. ix - 0x3FEFE00000000000 < 0x0000300000000000.
-//         0x0000300000000000 is difference between 0x3FF0100000000000 and
-//         0x3FEFE00000000000
+// The T table is calculated as follows
+// Form x_k = 1 + k/2^8 where k goes from 0... 255
+//      y_k = frcpa(x_k)
+//      log(1/y_k)  in quad and round to double-extended
+
+// CASE 2:  |x-1| < 2^-6
+// w = x - 1
 //
-//    Note: NaT, any NaNs, +/-INF, +/-0, negatives and unnormalized numbers are
-//          filtered and processed on special branches.
+// Form wseries = w + Q1*w^2 + Q2*w^3 + ... + Q7*w^8 + Q8*w^9
 //
+// result = wseries
 
-//
-// Special values
+// Special values 
 //==============================================================
-//
+
+
 // log(+0)    = -inf
 // log(-0)    = -inf
-//
-// log(+qnan) = +qnan
-// log(-qnan) = -qnan
-// log(+snan) = +qnan
-// log(-snan) = -qnan
-//
+
+// log(+qnan) = +qnan 
+// log(-qnan) = -qnan 
+// log(+snan) = +qnan 
+// log(-snan) = -qnan 
+
 // log(-n)    = QNAN Indefinite
-// log(-inf)  = QNAN Indefinite
-//
+// log(-inf)  = QNAN Indefinite 
+
 // log(+inf)  = +inf
-//
-//
+
 // Registers used
 //==============================================================
-// Floating Point registers used:
+// Floating Point registers used: 
 // f8, input
-// f7 -> f15,  f32 -> f42
-//
-// General registers used:
-// r8  -> r11
-// r14 -> r23
-//
+// f9 -> f15,  f32 -> f68
+
+// General registers used:  
+// r32 -> r51
+
 // Predicate registers used:
 // p6 -> p15
 
+// p8 log base e
+// p6 log base e special
+// p9 used in the frcpa
+// p13 log base e large W
+// p14 log base e small w
+
+// p7 log base 10
+// p10 log base 10 large W
+// p11 log base 10 small w
+// p12 log base 10 special
+
+#include "libm_support.h"
+
 // Assembly macros
 //==============================================================
-GR_TAG                 = r8
-GR_ad_1                = r8
-GR_ad_2                = r9
-GR_Exp                 = r10
-GR_N                   = r11
-
-GR_x                   = r14
-GR_dx                  = r15
-GR_NearOne             = r15
-GR_xorg                = r16
-GR_mask                = r16
-GR_05                  = r17
-GR_A3                  = r18
-GR_Sig                 = r19
-GR_Ind                 = r19
-GR_Nm1                 = r20
-GR_bias                = r21
-GR_ad_3                = r22
-GR_rexp                = r23
-
-
-GR_SAVE_B0             = r33
-GR_SAVE_PFS            = r34
-GR_SAVE_GP             = r35
-GR_SAVE_SP             = r36
-
-GR_Parameter_X         = r37
-GR_Parameter_Y         = r38
-GR_Parameter_RESULT    = r39
-GR_Parameter_TAG       = r40
-
-
-
-FR_NormX               = f7
-FR_RcpX                = f9
-FR_tmp                 = f9
-FR_r                   = f10
-FR_r2                  = f11
-FR_r4                  = f12
-FR_N                   = f13
-FR_Ln2hi               = f14
-FR_Ln2lo               = f15
-
-FR_A7                  = f32
-FR_A6                  = f33
-FR_A5                  = f34
-FR_A4                  = f35
-FR_A3                  = f36
-FR_A2                  = f37
-
-FR_Thi                 = f38
-FR_NxLn2hipThi         = f38
-FR_NxLn2pT             = f38
-FR_Tlo                 = f39
-FR_NxLn2lopTlo         = f39
-
-FR_InvLn10             = f40
-FR_A32                 = f41
-FR_A321                = f42
-
-
-FR_Y                   = f1
-FR_X                   = f10
-FR_RESULT              = f8
-
-
-// Data
+
+log_int_Nfloat   = f9 
+log_Nfloat       = f10 
+
+log_P5           = f11 
+log_P4           = f12 
+log_P3           = f13 
+log_P2           = f14 
+log_half         = f15
+
+log_log2         = f32 
+log_T            = f33 
+
+log_rp_p4        = f34 
+log_rp_p32       = f35 
+log_rp_p2        = f36 
+log_w6           = f37
+log_rp_p10       = f38
+log_rcube        = f39
+log_rsq          = f40 
+
+log_T_plus_Nlog2 = f41 
+log_w3           = f42
+
+log_r            = f43
+log_C            = f44
+
+log_w            = f45
+log_Q8           = f46
+log_Q7           = f47
+log_Q4           = f48 
+log_Q3           = f49
+log_Q6           = f50 
+log_Q5           = f51
+log_Q2           = f52
+log_Q1           = f53 
+log_P1           = f53 
+
+log_rp_q7        = f54 
+log_rp_q65       = f55
+log_Qlo          = f56
+
+log_rp_q3        = f57
+log_rp_q21       = f58
+log_Qhi          = f59
+
+log_wsq          = f60
+log_w4           = f61
+log_Q            = f62
+
+log_inv_ln10     = f63
+log_log10_hi     = f64
+log_log10_lo     = f65
+log_rp_q10       = f66
+log_NORM_f8      = f67
+log_r2P_r        = f68 
+
+// ===================================
+
+log_GR_exp_17_ones               = r33
+log_GR_exp_16_ones               = r34
+log_GR_exp_f8                    = r35
+log_GR_signexp_f8                = r36
+log_GR_true_exp_f8               = r37
+log_GR_significand_f8            = r38
+log_GR_half_exp                  = r39
+log_GR_index                     = r39
+log_AD_1                         = r40
+log_GR_signexp_w                 = r41
+log_GR_fff9                      = r42
+log_AD_2                         = r43
+log_GR_exp_w                     = r44
+
+GR_SAVE_B0                       = r45
+GR_SAVE_GP                       = r46
+GR_SAVE_PFS                      = r47
+
+GR_Parameter_X                   = r48
+GR_Parameter_Y                   = r49
+GR_Parameter_RESULT              = r50
+log_GR_tag                       = r51
+
+
+// Data tables
 //==============================================================
-RODATA
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
 .align 16
 
-LOCAL_OBJECT_START(log_data)
-// coefficients of polynomial approximation
-data8 0x3FC2494104381A8E // A7
-data8 0xBFC5556D556BBB69 // A6
-//
-// two parts of ln(2)
-data8 0x3FE62E42FEF00000,0x3DD473DE6AF278ED
-//
-data8 0x8000000000000000,0x3FFF // 1.0
-//
-data8 0x3FC999999988B5E9 // A5
-data8 0xBFCFFFFFFFF6FFF5 // A4
-//
-// hi parts of ln(1/frcpa(1+i/256)), i=0...255
-data8 0x3F60040155D5889D // 0
-data8 0x3F78121214586B54 // 1
-data8 0x3F841929F96832EF // 2
-data8 0x3F8C317384C75F06 // 3
-data8 0x3F91A6B91AC73386 // 4
-data8 0x3F95BA9A5D9AC039 // 5
-data8 0x3F99D2A8074325F3 // 6
-data8 0x3F9D6B2725979802 // 7
-data8 0x3FA0C58FA19DFAA9 // 8
-data8 0x3FA2954C78CBCE1A // 9
-data8 0x3FA4A94D2DA96C56 // 10
-data8 0x3FA67C94F2D4BB58 // 11
-data8 0x3FA85188B630F068 // 12
-data8 0x3FAA6B8ABE73AF4C // 13
-data8 0x3FAC441E06F72A9E // 14
-data8 0x3FAE1E6713606D06 // 15
-data8 0x3FAFFA6911AB9300 // 16
-data8 0x3FB0EC139C5DA600 // 17
-data8 0x3FB1DBD2643D190B // 18
-data8 0x3FB2CC7284FE5F1C // 19
-data8 0x3FB3BDF5A7D1EE64 // 20
-data8 0x3FB4B05D7AA012E0 // 21
-data8 0x3FB580DB7CEB5701 // 22
-data8 0x3FB674F089365A79 // 23
-data8 0x3FB769EF2C6B568D // 24
-data8 0x3FB85FD927506A47 // 25
-data8 0x3FB9335E5D594988 // 26
-data8 0x3FBA2B0220C8E5F4 // 27
-data8 0x3FBB0004AC1A86AB // 28
-data8 0x3FBBF968769FCA10 // 29
-data8 0x3FBCCFEDBFEE13A8 // 30
-data8 0x3FBDA727638446A2 // 31
-data8 0x3FBEA3257FE10F79 // 32
-data8 0x3FBF7BE9FEDBFDE5 // 33
-data8 0x3FC02AB352FF25F3 // 34
-data8 0x3FC097CE579D204C // 35
-data8 0x3FC1178E8227E47B // 36
-data8 0x3FC185747DBECF33 // 37
-data8 0x3FC1F3B925F25D41 // 38
-data8 0x3FC2625D1E6DDF56 // 39
-data8 0x3FC2D1610C868139 // 40
-data8 0x3FC340C59741142E // 41
-data8 0x3FC3B08B6757F2A9 // 42
-data8 0x3FC40DFB08378003 // 43
-data8 0x3FC47E74E8CA5F7C // 44
-data8 0x3FC4EF51F6466DE4 // 45
-data8 0x3FC56092E02BA516 // 46
-data8 0x3FC5D23857CD74D4 // 47
-data8 0x3FC6313A37335D76 // 48
-data8 0x3FC6A399DABBD383 // 49
-data8 0x3FC70337DD3CE41A // 50
-data8 0x3FC77654128F6127 // 51
-data8 0x3FC7E9D82A0B022D // 52
-data8 0x3FC84A6B759F512E // 53
-data8 0x3FC8AB47D5F5A30F // 54
-data8 0x3FC91FE49096581B // 55
-data8 0x3FC981634011AA75 // 56
-data8 0x3FC9F6C407089664 // 57
-data8 0x3FCA58E729348F43 // 58
-data8 0x3FCABB55C31693AC // 59
-data8 0x3FCB1E104919EFD0 // 60
-data8 0x3FCB94EE93E367CA // 61
-data8 0x3FCBF851C067555E // 62
-data8 0x3FCC5C0254BF23A5 // 63
-data8 0x3FCCC000C9DB3C52 // 64
-data8 0x3FCD244D99C85673 // 65
-data8 0x3FCD88E93FB2F450 // 66
-data8 0x3FCDEDD437EAEF00 // 67
-data8 0x3FCE530EFFE71012 // 68
-data8 0x3FCEB89A1648B971 // 69
-data8 0x3FCF1E75FADF9BDE // 70
-data8 0x3FCF84A32EAD7C35 // 71
-data8 0x3FCFEB2233EA07CD // 72
-data8 0x3FD028F9C7035C1C // 73
-data8 0x3FD05C8BE0D9635A // 74
-data8 0x3FD085EB8F8AE797 // 75
-data8 0x3FD0B9C8E32D1911 // 76
-data8 0x3FD0EDD060B78080 // 77
-data8 0x3FD122024CF0063F // 78
-data8 0x3FD14BE2927AECD4 // 79
-data8 0x3FD180618EF18ADF // 80
-data8 0x3FD1B50BBE2FC63B // 81
-data8 0x3FD1DF4CC7CF242D // 82
-data8 0x3FD214456D0EB8D4 // 83
-data8 0x3FD23EC5991EBA49 // 84
-data8 0x3FD2740D9F870AFB // 85
-data8 0x3FD29ECDABCDFA03 // 86
-data8 0x3FD2D46602ADCCEE // 87
-data8 0x3FD2FF66B04EA9D4 // 88
-data8 0x3FD335504B355A37 // 89
-data8 0x3FD360925EC44F5C // 90
-data8 0x3FD38BF1C3337E74 // 91
-data8 0x3FD3C25277333183 // 92
-data8 0x3FD3EDF463C1683E // 93
-data8 0x3FD419B423D5E8C7 // 94
-data8 0x3FD44591E0539F48 // 95
-data8 0x3FD47C9175B6F0AD // 96
-data8 0x3FD4A8B341552B09 // 97
-data8 0x3FD4D4F39089019F // 98
-data8 0x3FD501528DA1F967 // 99
-data8 0x3FD52DD06347D4F6 // 100
-data8 0x3FD55A6D3C7B8A89 // 101
-data8 0x3FD5925D2B112A59 // 102
-data8 0x3FD5BF406B543DB1 // 103
-data8 0x3FD5EC433D5C35AD // 104
-data8 0x3FD61965CDB02C1E // 105
-data8 0x3FD646A84935B2A1 // 106
-data8 0x3FD6740ADD31DE94 // 107
-data8 0x3FD6A18DB74A58C5 // 108
-data8 0x3FD6CF31058670EC // 109
-data8 0x3FD6F180E852F0B9 // 110
-data8 0x3FD71F5D71B894EF // 111
-data8 0x3FD74D5AEFD66D5C // 112
-data8 0x3FD77B79922BD37D // 113
-data8 0x3FD7A9B9889F19E2 // 114
-data8 0x3FD7D81B037EB6A6 // 115
-data8 0x3FD8069E33827230 // 116
-data8 0x3FD82996D3EF8BCA // 117
-data8 0x3FD85855776DCBFA // 118
-data8 0x3FD8873658327CCE // 119
-data8 0x3FD8AA75973AB8CE // 120
-data8 0x3FD8D992DC8824E4 // 121
-data8 0x3FD908D2EA7D9511 // 122
-data8 0x3FD92C59E79C0E56 // 123
-data8 0x3FD95BD750EE3ED2 // 124
-data8 0x3FD98B7811A3EE5B // 125
-data8 0x3FD9AF47F33D406B // 126
-data8 0x3FD9DF270C1914A7 // 127
-data8 0x3FDA0325ED14FDA4 // 128
-data8 0x3FDA33440224FA78 // 129
-data8 0x3FDA57725E80C382 // 130
-data8 0x3FDA87D0165DD199 // 131
-data8 0x3FDAAC2E6C03F895 // 132
-data8 0x3FDADCCC6FDF6A81 // 133
-data8 0x3FDB015B3EB1E790 // 134
-data8 0x3FDB323A3A635948 // 135
-data8 0x3FDB56FA04462909 // 136
-data8 0x3FDB881AA659BC93 // 137
-data8 0x3FDBAD0BEF3DB164 // 138
-data8 0x3FDBD21297781C2F // 139
-data8 0x3FDC039236F08818 // 140
-data8 0x3FDC28CB1E4D32FC // 141
-data8 0x3FDC4E19B84723C1 // 142
-data8 0x3FDC7FF9C74554C9 // 143
-data8 0x3FDCA57B64E9DB05 // 144
-data8 0x3FDCCB130A5CEBAF // 145
-data8 0x3FDCF0C0D18F326F // 146
-data8 0x3FDD232075B5A201 // 147
-data8 0x3FDD490246DEFA6B // 148
-data8 0x3FDD6EFA918D25CD // 149
-data8 0x3FDD9509707AE52F // 150
-data8 0x3FDDBB2EFE92C554 // 151
-data8 0x3FDDEE2F3445E4AE // 152
-data8 0x3FDE148A1A2726CD // 153
-data8 0x3FDE3AFC0A49FF3F // 154
-data8 0x3FDE6185206D516D // 155
-data8 0x3FDE882578823D51 // 156
-data8 0x3FDEAEDD2EAC990C // 157
-data8 0x3FDED5AC5F436BE2 // 158
-data8 0x3FDEFC9326D16AB8 // 159
-data8 0x3FDF2391A21575FF // 160
-data8 0x3FDF4AA7EE03192C // 161
-data8 0x3FDF71D627C30BB0 // 162
-data8 0x3FDF991C6CB3B379 // 163
-data8 0x3FDFC07ADA69A90F // 164
-data8 0x3FDFE7F18EB03D3E // 165
-data8 0x3FE007C053C5002E // 166
-data8 0x3FE01B942198A5A0 // 167
-data8 0x3FE02F74400C64EA // 168
-data8 0x3FE04360BE7603AC // 169
-data8 0x3FE05759AC47FE33 // 170
-data8 0x3FE06B5F1911CF51 // 171
-data8 0x3FE078BF0533C568 // 172
-data8 0x3FE08CD9687E7B0E // 173
-data8 0x3FE0A10074CF9019 // 174
-data8 0x3FE0B5343A234476 // 175
-data8 0x3FE0C974C89431CD // 176
-data8 0x3FE0DDC2305B9886 // 177
-data8 0x3FE0EB524BAFC918 // 178
-data8 0x3FE0FFB54213A475 // 179
-data8 0x3FE114253DA97D9F // 180
-data8 0x3FE128A24F1D9AFF // 181
-data8 0x3FE1365252BF0864 // 182
-data8 0x3FE14AE558B4A92D // 183
-data8 0x3FE15F85A19C765B // 184
-data8 0x3FE16D4D38C119FA // 185
-data8 0x3FE18203C20DD133 // 186
-data8 0x3FE196C7BC4B1F3A // 187
-data8 0x3FE1A4A738B7A33C // 188
-data8 0x3FE1B981C0C9653C // 189
-data8 0x3FE1CE69E8BB106A // 190
-data8 0x3FE1DC619DE06944 // 191
-data8 0x3FE1F160A2AD0DA3 // 192
-data8 0x3FE2066D7740737E // 193
-data8 0x3FE2147DBA47A393 // 194
-data8 0x3FE229A1BC5EBAC3 // 195
-data8 0x3FE237C1841A502E // 196
-data8 0x3FE24CFCE6F80D9A // 197
-data8 0x3FE25B2C55CD5762 // 198
-data8 0x3FE2707F4D5F7C40 // 199
-data8 0x3FE285E0842CA383 // 200
-data8 0x3FE294294708B773 // 201
-data8 0x3FE2A9A2670AFF0C // 202
-data8 0x3FE2B7FB2C8D1CC0 // 203
-data8 0x3FE2C65A6395F5F5 // 204
-data8 0x3FE2DBF557B0DF42 // 205
-data8 0x3FE2EA64C3F97654 // 206
-data8 0x3FE3001823684D73 // 207
-data8 0x3FE30E97E9A8B5CC // 208
-data8 0x3FE32463EBDD34E9 // 209
-data8 0x3FE332F4314AD795 // 210
-data8 0x3FE348D90E7464CF // 211
-data8 0x3FE35779F8C43D6D // 212
-data8 0x3FE36621961A6A99 // 213
-data8 0x3FE37C299F3C366A // 214
-data8 0x3FE38AE2171976E7 // 215
-data8 0x3FE399A157A603E7 // 216
-data8 0x3FE3AFCCFE77B9D1 // 217
-data8 0x3FE3BE9D503533B5 // 218
-data8 0x3FE3CD7480B4A8A2 // 219
-data8 0x3FE3E3C43918F76C // 220
-data8 0x3FE3F2ACB27ED6C6 // 221
-data8 0x3FE4019C2125CA93 // 222
-data8 0x3FE4181061389722 // 223
-data8 0x3FE42711518DF545 // 224
-data8 0x3FE436194E12B6BF // 225
-data8 0x3FE445285D68EA69 // 226
-data8 0x3FE45BCC464C893A // 227
-data8 0x3FE46AED21F117FC // 228
-data8 0x3FE47A1527E8A2D3 // 229
-data8 0x3FE489445EFFFCCB // 230
-data8 0x3FE4A018BCB69835 // 231
-data8 0x3FE4AF5A0C9D65D7 // 232
-data8 0x3FE4BEA2A5BDBE87 // 233
-data8 0x3FE4CDF28F10AC46 // 234
-data8 0x3FE4DD49CF994058 // 235
-data8 0x3FE4ECA86E64A683 // 236
-data8 0x3FE503C43CD8EB68 // 237
-data8 0x3FE513356667FC57 // 238
-data8 0x3FE522AE0738A3D7 // 239
-data8 0x3FE5322E26867857 // 240
-data8 0x3FE541B5CB979809 // 241
-data8 0x3FE55144FDBCBD62 // 242
-data8 0x3FE560DBC45153C6 // 243
-data8 0x3FE5707A26BB8C66 // 244
-data8 0x3FE587F60ED5B8FF // 245
-data8 0x3FE597A7977C8F31 // 246
-data8 0x3FE5A760D634BB8A // 247
-data8 0x3FE5B721D295F10E // 248
-data8 0x3FE5C6EA94431EF9 // 249
-data8 0x3FE5D6BB22EA86F5 // 250
-data8 0x3FE5E6938645D38F // 251
-data8 0x3FE5F673C61A2ED1 // 252
-data8 0x3FE6065BEA385926 // 253
-data8 0x3FE6164BFA7CC06B // 254
-data8 0x3FE62643FECF9742 // 255
-//
-// lo parts of ln(1/frcpa(1+i/256)), i=0...255
-data4 0x20E70672 // 0
-data4 0x1F60A5D0 // 1
-data4 0x218EABA0 // 2
-data4 0x21403104 // 3
-data4 0x20E9B54E // 4
-data4 0x21EE1382 // 5
-data4 0x226014E3 // 6
-data4 0x2095E5C9 // 7
-data4 0x228BA9D4 // 8
-data4 0x22932B86 // 9
-data4 0x22608A57 // 10
-data4 0x220209F3 // 11
-data4 0x212882CC // 12
-data4 0x220D46E2 // 13
-data4 0x21FA4C28 // 14
-data4 0x229E5BD9 // 15
-data4 0x228C9838 // 16
-data4 0x2311F954 // 17
-data4 0x221365DF // 18
-data4 0x22BD0CB3 // 19
-data4 0x223D4BB7 // 20
-data4 0x22A71BBE // 21
-data4 0x237DB2FA // 22
-data4 0x23194C9D // 23
-data4 0x22EC639E // 24
-data4 0x2367E669 // 25
-data4 0x232E1D5F // 26
-data4 0x234A639B // 27
-data4 0x2365C0E0 // 28
-data4 0x234646C1 // 29
-data4 0x220CBF9C // 30
-data4 0x22A00FD4 // 31
-data4 0x2306A3F2 // 32
-data4 0x23745A9B // 33
-data4 0x2398D756 // 34
-data4 0x23DD0B6A // 35
-data4 0x23DE338B // 36
-data4 0x23A222DF // 37
-data4 0x223164F8 // 38
-data4 0x23B4E87B // 39
-data4 0x23D6CCB8 // 40
-data4 0x220C2099 // 41
-data4 0x21B86B67 // 42
-data4 0x236D14F1 // 43
-data4 0x225A923F // 44
-data4 0x22748723 // 45
-data4 0x22200D13 // 46
-data4 0x23C296EA // 47
-data4 0x2302AC38 // 48
-data4 0x234B1996 // 49
-data4 0x2385E298 // 50
-data4 0x23175BE5 // 51
-data4 0x2193F482 // 52
-data4 0x23BFEA90 // 53
-data4 0x23D70A0C // 54
-data4 0x231CF30A // 55
-data4 0x235D9E90 // 56
-data4 0x221AD0CB // 57
-data4 0x22FAA08B // 58
-data4 0x23D29A87 // 59
-data4 0x20C4B2FE // 60
-data4 0x2381B8B7 // 61
-data4 0x23F8D9FC // 62
-data4 0x23EAAE7B // 63
-data4 0x2329E8AA // 64
-data4 0x23EC0322 // 65
-data4 0x2357FDCB // 66
-data4 0x2392A9AD // 67
-data4 0x22113B02 // 68
-data4 0x22DEE901 // 69
-data4 0x236A6D14 // 70
-data4 0x2371D33E // 71
-data4 0x2146F005 // 72
-data4 0x23230B06 // 73
-data4 0x22F1C77D // 74
-data4 0x23A89FA3 // 75
-data4 0x231D1241 // 76
-data4 0x244DA96C // 77
-data4 0x23ECBB7D // 78
-data4 0x223E42B4 // 79
-data4 0x23801BC9 // 80
-data4 0x23573263 // 81
-data4 0x227C1158 // 82
-data4 0x237BD749 // 83
-data4 0x21DDBAE9 // 84
-data4 0x23401735 // 85
-data4 0x241D9DEE // 86
-data4 0x23BC88CB // 87
-data4 0x2396D5F1 // 88
-data4 0x23FC89CF // 89
-data4 0x2414F9A2 // 90
-data4 0x2474A0F5 // 91
-data4 0x24354B60 // 92
-data4 0x23C1EB40 // 93
-data4 0x2306DD92 // 94
-data4 0x24353B6B // 95
-data4 0x23CD1701 // 96
-data4 0x237C7A1C // 97
-data4 0x245793AA // 98
-data4 0x24563695 // 99
-data4 0x23C51467 // 100
-data4 0x24476B68 // 101
-data4 0x212585A9 // 102
-data4 0x247B8293 // 103
-data4 0x2446848A // 104
-data4 0x246A53F8 // 105
-data4 0x246E496D // 106
-data4 0x23ED1D36 // 107
-data4 0x2314C258 // 108
-data4 0x233244A7 // 109
-data4 0x245B7AF0 // 110
-data4 0x24247130 // 111
-data4 0x22D67B38 // 112
-data4 0x2449F620 // 113
-data4 0x23BBC8B8 // 114
-data4 0x237D3BA0 // 115
-data4 0x245E8F13 // 116
-data4 0x2435573F // 117
-data4 0x242DE666 // 118
-data4 0x2463BC10 // 119
-data4 0x2466587D // 120
-data4 0x2408144B // 121
-data4 0x2405F0E5 // 122
-data4 0x22381CFF // 123
-data4 0x24154F9B // 124
-data4 0x23A4E96E // 125
-data4 0x24052967 // 126
-data4 0x2406963F // 127
-data4 0x23F7D3CB // 128
-data4 0x2448AFF4 // 129
-data4 0x24657A21 // 130
-data4 0x22FBC230 // 131
-data4 0x243C8DEA // 132
-data4 0x225DC4B7 // 133
-data4 0x23496EBF // 134
-data4 0x237C2B2B // 135
-data4 0x23A4A5B1 // 136
-data4 0x2394E9D1 // 137
-data4 0x244BC950 // 138
-data4 0x23C7448F // 139
-data4 0x2404A1AD // 140
-data4 0x246511D5 // 141
-data4 0x24246526 // 142
-data4 0x23111F57 // 143
-data4 0x22868951 // 144
-data4 0x243EB77F // 145
-data4 0x239F3DFF // 146
-data4 0x23089666 // 147
-data4 0x23EBFA6A // 148
-data4 0x23C51312 // 149
-data4 0x23E1DD5E // 150
-data4 0x232C0944 // 151
-data4 0x246A741F // 152
-data4 0x2414DF8D // 153
-data4 0x247B5546 // 154
-data4 0x2415C980 // 155
-data4 0x24324ABD // 156
-data4 0x234EB5E5 // 157
-data4 0x2465E43E // 158
-data4 0x242840D1 // 159
-data4 0x24444057 // 160
-data4 0x245E56F0 // 161
-data4 0x21AE30F8 // 162
-data4 0x23FB3283 // 163
-data4 0x247A4D07 // 164
-data4 0x22AE314D // 165
-data4 0x246B7727 // 166
-data4 0x24EAD526 // 167
-data4 0x24B41DC9 // 168
-data4 0x24EE8062 // 169
-data4 0x24A0C7C4 // 170
-data4 0x24E8DA67 // 171
-data4 0x231120F7 // 172
-data4 0x24401FFB // 173
-data4 0x2412DD09 // 174
-data4 0x248C131A // 175
-data4 0x24C0A7CE // 176
-data4 0x243DD4C8 // 177
-data4 0x24457FEB // 178
-data4 0x24DEEFBB // 179
-data4 0x243C70AE // 180
-data4 0x23E7A6FA // 181
-data4 0x24C2D311 // 182
-data4 0x23026255 // 183
-data4 0x2437C9B9 // 184
-data4 0x246BA847 // 185
-data4 0x2420B448 // 186
-data4 0x24C4CF5A // 187
-data4 0x242C4981 // 188
-data4 0x24DE1525 // 189
-data4 0x24F5CC33 // 190
-data4 0x235A85DA // 191
-data4 0x24A0B64F // 192
-data4 0x244BA0A4 // 193
-data4 0x24AAF30A // 194
-data4 0x244C86F9 // 195
-data4 0x246D5B82 // 196
-data4 0x24529347 // 197
-data4 0x240DD008 // 198
-data4 0x24E98790 // 199
-data4 0x2489B0CE // 200
-data4 0x22BC29AC // 201
-data4 0x23F37C7A // 202
-data4 0x24987FE8 // 203
-data4 0x22AFE20B // 204
-data4 0x24C8D7C2 // 205
-data4 0x24B28B7D // 206
-data4 0x23B6B271 // 207
-data4 0x24C77CB6 // 208
-data4 0x24EF1DCA // 209
-data4 0x24A4F0AC // 210
-data4 0x24CF113E // 211
-data4 0x2496BBAB // 212
-data4 0x23C7CC8A // 213
-data4 0x23AE3961 // 214
-data4 0x2410A895 // 215
-data4 0x23CE3114 // 216
-data4 0x2308247D // 217
-data4 0x240045E9 // 218
-data4 0x24974F60 // 219
-data4 0x242CB39F // 220
-data4 0x24AB8D69 // 221
-data4 0x23436788 // 222
-data4 0x24305E9E // 223
-data4 0x243E71A9 // 224
-data4 0x23C2A6B3 // 225
-data4 0x23FFE6CF // 226
-data4 0x2322D801 // 227
-data4 0x24515F21 // 228
-data4 0x2412A0D6 // 229
-data4 0x24E60D44 // 230
-data4 0x240D9251 // 231
-data4 0x247076E2 // 232
-data4 0x229B101B // 233
-data4 0x247B12DE // 234
-data4 0x244B9127 // 235
-data4 0x2499EC42 // 236
-data4 0x21FC3963 // 237
-data4 0x23E53266 // 238
-data4 0x24CE102D // 239
-data4 0x23CC45D2 // 240
-data4 0x2333171D // 241
-data4 0x246B3533 // 242
-data4 0x24931129 // 243
-data4 0x24405FFA // 244
-data4 0x24CF464D // 245
-data4 0x237095CD // 246
-data4 0x24F86CBD // 247
-data4 0x24E2D84B // 248
-data4 0x21ACBB44 // 249
-data4 0x24F43A8C // 250
-data4 0x249DB931 // 251
-data4 0x24A385EF // 252
-data4 0x238B1279 // 253
-data4 0x2436213E // 254
-data4 0x24F18A3B // 255
-LOCAL_OBJECT_END(log_data)
-
-
-LOCAL_OBJECT_START(log10_data)
-// coefficients of polynoimal approximation
-data8 0x3FC2494104381A8E // A7
-data8 0xBFC5556D556BBB69 // A6
-//
-// two parts of ln(2)/ln(10)
-data8 0x3FD3441350900000, 0x3DCEF3FDE623E256
-//
-data8 0xDE5BD8A937287195,0x3FFD // 1/ln(10)
-//
-data8 0x3FC999999988B5E9 // A5
-data8 0xBFCFFFFFFFF6FFF5 // A4
-//
-// Hi parts of ln(1/frcpa(1+i/256))/ln(10), i=0...255
-data8 0x3F4BD27045BFD024 // 0
-data8 0x3F64E84E793A474A // 1
-data8 0x3F7175085AB85FF0 // 2
-data8 0x3F787CFF9D9147A5 // 3
-data8 0x3F7EA9D372B89FC8 // 4
-data8 0x3F82DF9D95DA961C // 5
-data8 0x3F866DF172D6372B // 6
-data8 0x3F898D79EF5EEDEF // 7
-data8 0x3F8D22ADF3F9579C // 8
-data8 0x3F9024231D30C398 // 9
-data8 0x3F91F23A98897D49 // 10
-data8 0x3F93881A7B818F9E // 11
-data8 0x3F951F6E1E759E35 // 12
-data8 0x3F96F2BCE7ADC5B4 // 13
-data8 0x3F988D362CDF359E // 14
-data8 0x3F9A292BAF010981 // 15
-data8 0x3F9BC6A03117EB97 // 16
-data8 0x3F9D65967DE3AB08 // 17
-data8 0x3F9F061167FC31E7 // 18
-data8 0x3FA05409E4F7819B // 19
-data8 0x3FA125D0432EA20D // 20
-data8 0x3FA1F85D440D299B // 21
-data8 0x3FA2AD755749617C // 22
-data8 0x3FA381772A00E603 // 23
-data8 0x3FA45643E165A70A // 24
-data8 0x3FA52BDD034475B8 // 25
-data8 0x3FA5E3966B7E9295 // 26
-data8 0x3FA6BAAF47C5B244 // 27
-data8 0x3FA773B3E8C4F3C7 // 28
-data8 0x3FA84C51EBEE8D15 // 29
-data8 0x3FA906A6786FC1CA // 30
-data8 0x3FA9C197ABF00DD6 // 31
-data8 0x3FAA9C78712191F7 // 32
-data8 0x3FAB58C09C8D637C // 33
-data8 0x3FAC15A8BCDD7B7E // 34
-data8 0x3FACD331E2C2967B // 35
-data8 0x3FADB11ED766ABF4 // 36
-data8 0x3FAE70089346A9E6 // 37
-data8 0x3FAF2F96C6754AED // 38
-data8 0x3FAFEFCA8D451FD5 // 39
-data8 0x3FB0585283764177 // 40
-data8 0x3FB0B913AAC7D3A6 // 41
-data8 0x3FB11A294F2569F5 // 42
-data8 0x3FB16B51A2696890 // 43
-data8 0x3FB1CD03ADACC8BD // 44
-data8 0x3FB22F0BDD7745F5 // 45
-data8 0x3FB2916ACA38D1E7 // 46
-data8 0x3FB2F4210DF7663C // 47
-data8 0x3FB346A6C3C49065 // 48
-data8 0x3FB3A9FEBC605409 // 49
-data8 0x3FB3FD0C10A3AA54 // 50
-data8 0x3FB46107D3540A81 // 51
-data8 0x3FB4C55DD16967FE // 52
-data8 0x3FB51940330C000A // 53
-data8 0x3FB56D620EE7115E // 54
-data8 0x3FB5D2ABCF26178D // 55
-data8 0x3FB6275AA5DEBF81 // 56
-data8 0x3FB68D4EAF26D7EE // 57
-data8 0x3FB6E28C5C54A28D // 58
-data8 0x3FB7380B9665B7C7 // 59
-data8 0x3FB78DCCC278E85B // 60
-data8 0x3FB7F50C2CF25579 // 61
-data8 0x3FB84B5FD5EAEFD7 // 62
-data8 0x3FB8A1F6BAB2B226 // 63
-data8 0x3FB8F8D144557BDF // 64
-data8 0x3FB94FEFDCD61D92 // 65
-data8 0x3FB9A752EF316149 // 66
-data8 0x3FB9FEFAE7611EDF // 67
-data8 0x3FBA56E8325F5C86 // 68
-data8 0x3FBAAF1B3E297BB3 // 69
-data8 0x3FBB079479C372AC // 70
-data8 0x3FBB6054553B12F7 // 71
-data8 0x3FBBB95B41AB5CE5 // 72
-data8 0x3FBC12A9B13FE079 // 73
-data8 0x3FBC6C4017382BEA // 74
-data8 0x3FBCB41FBA42686C // 75
-data8 0x3FBD0E38CE73393E // 76
-data8 0x3FBD689B2193F132 // 77
-data8 0x3FBDC3472B1D285F // 78
-data8 0x3FBE0C06300D528B // 79
-data8 0x3FBE6738190E394B // 80
-data8 0x3FBEC2B50D208D9A // 81
-data8 0x3FBF0C1C2B936827 // 82
-data8 0x3FBF68216C9CC726 // 83
-data8 0x3FBFB1F6381856F3 // 84
-data8 0x3FC00742AF4CE5F8 // 85
-data8 0x3FC02C64906512D2 // 86
-data8 0x3FC05AF1E63E03B4 // 87
-data8 0x3FC0804BEA723AA8 // 88
-data8 0x3FC0AF1FD6711526 // 89
-data8 0x3FC0D4B2A88059FF // 90
-data8 0x3FC0FA5EF136A06C // 91
-data8 0x3FC1299A4FB3E305 // 92
-data8 0x3FC14F806253C3EC // 93
-data8 0x3FC175805D1587C1 // 94
-data8 0x3FC19B9A637CA294 // 95
-data8 0x3FC1CB5FC26EDE16 // 96
-data8 0x3FC1F1B4E65F2590 // 97
-data8 0x3FC218248B5DC3E5 // 98
-data8 0x3FC23EAED62ADC76 // 99
-data8 0x3FC26553EBD337BC // 100
-data8 0x3FC28C13F1B118FF // 101
-data8 0x3FC2BCAA14381385 // 102
-data8 0x3FC2E3A740B7800E // 103
-data8 0x3FC30ABFD8F333B6 // 104
-data8 0x3FC331F403985096 // 105
-data8 0x3FC35943E7A6068F // 106
-data8 0x3FC380AFAC6E7C07 // 107
-data8 0x3FC3A8377997B9E5 // 108
-data8 0x3FC3CFDB771C9ADB // 109
-data8 0x3FC3EDA90D39A5DE // 110
-data8 0x3FC4157EC09505CC // 111
-data8 0x3FC43D7113FB04C0 // 112
-data8 0x3FC4658030AD1CCE // 113
-data8 0x3FC48DAC404638F5 // 114
-data8 0x3FC4B5F56CBBB869 // 115
-data8 0x3FC4DE5BE05E7582 // 116
-data8 0x3FC4FCBC0776FD85 // 117
-data8 0x3FC525561E9256EE // 118
-data8 0x3FC54E0DF3198865 // 119
-data8 0x3FC56CAB7112BDE2 // 120
-data8 0x3FC59597BA735B15 // 121
-data8 0x3FC5BEA23A506FD9 // 122
-data8 0x3FC5DD7E08DE382E // 123
-data8 0x3FC606BDD3F92355 // 124
-data8 0x3FC6301C518A501E // 125
-data8 0x3FC64F3770618915 // 126
-data8 0x3FC678CC14C1E2D7 // 127
-data8 0x3FC6981005ED2947 // 128
-data8 0x3FC6C1DB5F9BB335 // 129
-data8 0x3FC6E1488ECD2880 // 130
-data8 0x3FC70B4B2E7E41B8 // 131
-data8 0x3FC72AE209146BF8 // 132
-data8 0x3FC7551C81BD8DCF // 133
-data8 0x3FC774DD76CC43BD // 134
-data8 0x3FC79F505DB00E88 // 135
-data8 0x3FC7BF3BDE099F30 // 136
-data8 0x3FC7E9E7CAC437F8 // 137
-data8 0x3FC809FE4902D00D // 138
-data8 0x3FC82A2757995CBD // 139
-data8 0x3FC85525C625E098 // 140
-data8 0x3FC8757A79831887 // 141
-data8 0x3FC895E2058D8E02 // 142
-data8 0x3FC8C13437695531 // 143
-data8 0x3FC8E1C812EF32BE // 144
-data8 0x3FC9026F112197E8 // 145
-data8 0x3FC923294888880A // 146
-data8 0x3FC94EEA4B8334F2 // 147
-data8 0x3FC96FD1B639FC09 // 148
-data8 0x3FC990CCA66229AB // 149
-data8 0x3FC9B1DB33334842 // 150
-data8 0x3FC9D2FD740E6606 // 151
-data8 0x3FC9FF49EEDCB553 // 152
-data8 0x3FCA209A84FBCFF7 // 153
-data8 0x3FCA41FF1E43F02B // 154
-data8 0x3FCA6377D2CE9377 // 155
-data8 0x3FCA8504BAE0D9F5 // 156
-data8 0x3FCAA6A5EEEBEFE2 // 157
-data8 0x3FCAC85B878D7878 // 158
-data8 0x3FCAEA259D8FFA0B // 159
-data8 0x3FCB0C0449EB4B6A // 160
-data8 0x3FCB2DF7A5C50299 // 161
-data8 0x3FCB4FFFCA70E4D1 // 162
-data8 0x3FCB721CD17157E2 // 163
-data8 0x3FCB944ED477D4EC // 164
-data8 0x3FCBB695ED655C7C // 165
-data8 0x3FCBD8F2364AEC0F // 166
-data8 0x3FCBFB63C969F4FF // 167
-data8 0x3FCC1DEAC134D4E9 // 168
-data8 0x3FCC4087384F4F80 // 169
-data8 0x3FCC6339498F09E1 // 170
-data8 0x3FCC86010FFC076B // 171
-data8 0x3FCC9D3D065C5B41 // 172
-data8 0x3FCCC029375BA079 // 173
-data8 0x3FCCE32B66978BA4 // 174
-data8 0x3FCD0643AFD51404 // 175
-data8 0x3FCD29722F0DEA45 // 176
-data8 0x3FCD4CB70070FE43 // 177
-data8 0x3FCD6446AB3F8C95 // 178
-data8 0x3FCD87B0EF71DB44 // 179
-data8 0x3FCDAB31D1FE99A6 // 180
-data8 0x3FCDCEC96FDC888E // 181
-data8 0x3FCDE69088763579 // 182
-data8 0x3FCE0A4E4A25C1FF // 183
-data8 0x3FCE2E2315755E32 // 184
-data8 0x3FCE461322D1648A // 185
-data8 0x3FCE6A0E95C7787B // 186
-data8 0x3FCE8E216243DD60 // 187
-data8 0x3FCEA63AF26E007C // 188
-data8 0x3FCECA74ED15E0B7 // 189
-data8 0x3FCEEEC692CCD259 // 190
-data8 0x3FCF070A36B8D9C0 // 191
-data8 0x3FCF2B8393E34A2D // 192
-data8 0x3FCF5014EF538A5A // 193
-data8 0x3FCF68833AF1B17F // 194
-data8 0x3FCF8D3CD9F3F04E // 195
-data8 0x3FCFA5C61ADD93E9 // 196
-data8 0x3FCFCAA8567EBA79 // 197
-data8 0x3FCFE34CC8743DD8 // 198
-data8 0x3FD0042BFD74F519 // 199
-data8 0x3FD016BDF6A18017 // 200
-data8 0x3FD023262F907322 // 201
-data8 0x3FD035CCED8D32A1 // 202
-data8 0x3FD042430E869FFB // 203
-data8 0x3FD04EBEC842B2DF // 204
-data8 0x3FD06182E84FD4AB // 205
-data8 0x3FD06E0CB609D383 // 206
-data8 0x3FD080E60BEC8F12 // 207
-data8 0x3FD08D7E0D894735 // 208
-data8 0x3FD0A06CC96A2055 // 209
-data8 0x3FD0AD131F3B3C55 // 210
-data8 0x3FD0C01771E775FB // 211
-data8 0x3FD0CCCC3CAD6F4B // 212
-data8 0x3FD0D986D91A34A8 // 213
-data8 0x3FD0ECA9B8861A2D // 214
-data8 0x3FD0F972F87FF3D5 // 215
-data8 0x3FD106421CF0E5F7 // 216
-data8 0x3FD11983EBE28A9C // 217
-data8 0x3FD12661E35B7859 // 218
-data8 0x3FD13345D2779D3B // 219
-data8 0x3FD146A6F597283A // 220
-data8 0x3FD15399E81EA83D // 221
-data8 0x3FD16092E5D3A9A6 // 222
-data8 0x3FD17413C3B7AB5D // 223
-data8 0x3FD1811BF629D6FA // 224
-data8 0x3FD18E2A47B46685 // 225
-data8 0x3FD19B3EBE1A4418 // 226
-data8 0x3FD1AEE9017CB450 // 227
-data8 0x3FD1BC0CED7134E1 // 228
-data8 0x3FD1C93712ABC7FF // 229
-data8 0x3FD1D66777147D3E // 230
-data8 0x3FD1EA3BD1286E1C // 231
-data8 0x3FD1F77BED932C4C // 232
-data8 0x3FD204C25E1B031F // 233
-data8 0x3FD2120F28CE69B1 // 234
-data8 0x3FD21F6253C48D00 // 235
-data8 0x3FD22CBBE51D60A9 // 236
-data8 0x3FD240CE4C975444 // 237
-data8 0x3FD24E37F8ECDAE7 // 238
-data8 0x3FD25BA8215AF7FC // 239
-data8 0x3FD2691ECC29F042 // 240
-data8 0x3FD2769BFFAB2DFF // 241
-data8 0x3FD2841FC23952C9 // 242
-data8 0x3FD291AA1A384978 // 243
-data8 0x3FD29F3B0E15584A // 244
-data8 0x3FD2B3A0EE479DF7 // 245
-data8 0x3FD2C142842C09E5 // 246
-data8 0x3FD2CEEACCB7BD6C // 247
-data8 0x3FD2DC99CE82FF20 // 248
-data8 0x3FD2EA4F902FD7D9 // 249
-data8 0x3FD2F80C186A25FC // 250
-data8 0x3FD305CF6DE7B0F6 // 251
-data8 0x3FD3139997683CE7 // 252
-data8 0x3FD3216A9BB59E7C // 253
-data8 0x3FD32F4281A3CEFE // 254
-data8 0x3FD33D2150110091 // 255
-//
-// Lo parts of ln(1/frcpa(1+i/256))/ln(10), i=0...255
-data4 0x1FB0EB5A // 0
-data4 0x206E5EE3 // 1
-data4 0x208F3609 // 2
-data4 0x2070EB03 // 3
-data4 0x1F314BAE // 4
-data4 0x217A889D // 5
-data4 0x21E63650 // 6
-data4 0x21C2F4A3 // 7
-data4 0x2192A10C // 8
-data4 0x1F84B73E // 9
-data4 0x2243FBCA // 10
-data4 0x21BD9C51 // 11
-data4 0x213C542B // 12
-data4 0x21047386 // 13
-data4 0x21217D8F // 14
-data4 0x226791B7 // 15
-data4 0x204CCE66 // 16
-data4 0x2234CE9F // 17
-data4 0x220675E2 // 18
-data4 0x22B8E5BA // 19
-data4 0x22C12D14 // 20
-data4 0x211D41F0 // 21
-data4 0x228507F3 // 22
-data4 0x22F7274B // 23
-data4 0x22A7FDD1 // 24
-data4 0x2244A06E // 25
-data4 0x215DCE69 // 26
-data4 0x22F5C961 // 27
-data4 0x22EBEF29 // 28
-data4 0x222A2CB6 // 29
-data4 0x22B9FE00 // 30
-data4 0x22E79EB7 // 31
-data4 0x222F9607 // 32
-data4 0x2189D87F // 33
-data4 0x2236DB45 // 34
-data4 0x22ED77FB // 35
-data4 0x21CB70F0 // 36
-data4 0x21B8ACE8 // 37
-data4 0x22EC58C1 // 38
-data4 0x22CFCC1C // 39
-data4 0x2343E77A // 40
-data4 0x237FBC7F // 41
-data4 0x230D472E // 42
-data4 0x234686FB // 43
-data4 0x23770425 // 44
-data4 0x223977EC // 45
-data4 0x2345800A // 46
-data4 0x237BC351 // 47
-data4 0x23191502 // 48
-data4 0x232BAC12 // 49
-data4 0x22692421 // 50
-data4 0x234D409D // 51
-data4 0x22EC3214 // 52
-data4 0x2376C916 // 53
-data4 0x22B00DD1 // 54
-data4 0x2309D910 // 55
-data4 0x22F925FD // 56
-data4 0x22A63A7B // 57
-data4 0x2106264A // 58
-data4 0x234227F9 // 59
-data4 0x1ECB1978 // 60
-data4 0x23460A62 // 61
-data4 0x232ED4B1 // 62
-data4 0x226DDC38 // 63
-data4 0x1F101A73 // 64
-data4 0x21B1F82B // 65
-data4 0x22752F19 // 66
-data4 0x2320BC15 // 67
-data4 0x236EEC5E // 68
-data4 0x23404D3E // 69
-data4 0x2304C517 // 70
-data4 0x22F7441A // 71
-data4 0x230D3D7A // 72
-data4 0x2264A9DF // 73
-data4 0x22410CC8 // 74
-data4 0x2342CCCB // 75
-data4 0x23560BD4 // 76
-data4 0x237BBFFE // 77
-data4 0x2373A206 // 78
-data4 0x22C871B9 // 79
-data4 0x2354B70C // 80
-data4 0x232EDB33 // 81
-data4 0x235DB680 // 82
-data4 0x230EF422 // 83
-data4 0x235316CA // 84
-data4 0x22EEEE8B // 85
-data4 0x2375C88C // 86
-data4 0x235ABD21 // 87
-data4 0x23A0D232 // 88
-data4 0x23F5FFB5 // 89
-data4 0x23D3CEC8 // 90
-data4 0x22A92204 // 91
-data4 0x238C64DF // 92
-data4 0x23B82896 // 93
-data4 0x22D633B8 // 94
-data4 0x23861E93 // 95
-data4 0x23CB594B // 96
-data4 0x2330387E // 97
-data4 0x21CD4702 // 98
-data4 0x2284C505 // 99
-data4 0x23D6995C // 100
-data4 0x23F6C807 // 101
-data4 0x239CEF5C // 102
-data4 0x239442B0 // 103
-data4 0x22B35EE5 // 104
-data4 0x2391E9A4 // 105
-data4 0x23A390F5 // 106
-data4 0x2349AC9C // 107
-data4 0x23FA5535 // 108
-data4 0x21E3A46A // 109
-data4 0x23B44ABA // 110
-data4 0x23CEA8E0 // 111
-data4 0x23F647DC // 112
-data4 0x2390D1A8 // 113
-data4 0x23D0CFA2 // 114
-data4 0x236E0872 // 115
-data4 0x23B88B91 // 116
-data4 0x2283C359 // 117
-data4 0x232F647F // 118
-data4 0x23122CD7 // 119
-data4 0x232CF564 // 120
-data4 0x232630FD // 121
-data4 0x23BEE1C8 // 122
-data4 0x23B2BD30 // 123
-data4 0x2301F1C0 // 124
-data4 0x23CE4D67 // 125
-data4 0x23A353C9 // 126
-data4 0x238086E8 // 127
-data4 0x22D0D29E // 128
-data4 0x23A3B3C8 // 129
-data4 0x23F69F4B // 130
-data4 0x23EA3C21 // 131
-data4 0x23951C88 // 132
-data4 0x2372AFFC // 133
-data4 0x23A6D1A8 // 134
-data4 0x22BBBAF4 // 135
-data4 0x227FA3DD // 136
-data4 0x23804D9B // 137
-data4 0x232D771F // 138
-data4 0x239CB57B // 139
-data4 0x2303CF34 // 140
-data4 0x22218C2A // 141
-data4 0x23991BEE // 142
-data4 0x23EB3596 // 143
-data4 0x230487FA // 144
-data4 0x2135DF4C // 145
-data4 0x2380FD2D // 146
-data4 0x23EB75E9 // 147
-data4 0x211C62C8 // 148
-data4 0x23F518F1 // 149
-data4 0x23FEF882 // 150
-data4 0x239097C7 // 151
-data4 0x223E2BDA // 152
-data4 0x23988F89 // 153
-data4 0x22E4A4AD // 154
-data4 0x23F03D9C // 155
-data4 0x23F5018F // 156
-data4 0x23E1E250 // 157
-data4 0x23FD3D90 // 158
-data4 0x22DEE2FF // 159
-data4 0x238342AB // 160
-data4 0x22E6736F // 161
-data4 0x233AFC28 // 162
-data4 0x2395F661 // 163
-data4 0x23D8B991 // 164
-data4 0x23CD58D5 // 165
-data4 0x21941FD6 // 166
-data4 0x23352915 // 167
-data4 0x235D09EE // 168
-data4 0x22DC7EF9 // 169
-data4 0x238BC9F3 // 170
-data4 0x2397DF8F // 171
-data4 0x2380A7BB // 172
-data4 0x23EFF48C // 173
-data4 0x21E67408 // 174
-data4 0x236420F7 // 175
-data4 0x22C8DFB5 // 176
-data4 0x239B5D35 // 177
-data4 0x23BDC09D // 178
-data4 0x239E822C // 179
-data4 0x23984F0A // 180
-data4 0x23EF2119 // 181
-data4 0x23F738B8 // 182
-data4 0x23B66187 // 183
-data4 0x23B06AD7 // 184
-data4 0x2369140F // 185
-data4 0x218DACE6 // 186
-data4 0x21DF23F1 // 187
-data4 0x235D8B34 // 188
-data4 0x23460333 // 189
-data4 0x23F11D62 // 190
-data4 0x23C37147 // 191
-data4 0x22B2AE2A // 192
-data4 0x23949211 // 193
-data4 0x23B69799 // 194
-data4 0x23DBEC75 // 195
-data4 0x229A6FB3 // 196
-data4 0x23FC6C60 // 197
-data4 0x22D01FFC // 198
-data4 0x235985F0 // 199
-data4 0x23F7ECA5 // 200
-data4 0x23F924D3 // 201
-data4 0x2381B92F // 202
-data4 0x243A0FBE // 203
-data4 0x24712D72 // 204
-data4 0x24594E2F // 205
-data4 0x220CD12A // 206
-data4 0x23D87FB0 // 207
-data4 0x2338288A // 208
-data4 0x242BB2CC // 209
-data4 0x220F6265 // 210
-data4 0x23BB7FE3 // 211
-data4 0x2301C0A2 // 212
-data4 0x246709AB // 213
-data4 0x23A619E2 // 214
-data4 0x24030E3B // 215
-data4 0x233C36CC // 216
-data4 0x241AAB77 // 217
-data4 0x243D41A3 // 218
-data4 0x23834A60 // 219
-data4 0x236AC7BF // 220
-data4 0x23B6D597 // 221
-data4 0x210E9474 // 222
-data4 0x242156E6 // 223
-data4 0x243A1D68 // 224
-data4 0x2472187C // 225
-data4 0x23834E86 // 226
-data4 0x23CA0807 // 227
-data4 0x24745887 // 228
-data4 0x23E2B0E1 // 229
-data4 0x2421EB67 // 230
-data4 0x23DCC64E // 231
-data4 0x22DF71D1 // 232
-data4 0x238D5ECA // 233
-data4 0x23CDE86F // 234
-data4 0x24131F45 // 235
-data4 0x240FE4E2 // 236
-data4 0x2317731A // 237
-data4 0x24015C76 // 238
-data4 0x2301A4E8 // 239
-data4 0x23E52A6D // 240
-data4 0x247D8A0D // 241
-data4 0x23DFEEBA // 242
-data4 0x22139FEC // 243
-data4 0x2454A112 // 244
-data4 0x23C21E28 // 245
-data4 0x2460D813 // 246
-data4 0x24258924 // 247
-data4 0x2425680F // 248
-data4 0x24194D1E // 249
-data4 0x24242C2F // 250
-data4 0x243DDE5E // 251
-data4 0x23DEB388 // 252
-data4 0x23E0E6EB // 253
-data4 0x24393E74 // 254
-data4 0x241B1863 // 255
-LOCAL_OBJECT_END(log10_data)
-
-
-
-// Code
-//==============================================================
+log_table_1:
+ASM_TYPE_DIRECTIVE(log_table_1,@object)
+data8 0xBFC5555DA7212371 // P5
+data8 0x3FC999A19EEF5826 // P4
+data8 0x3FBC756AC654273B // Q8
+data8 0xBFC001A42489AB4D // Q7
+data8 0x3FC99999999A169B // Q4
+data8 0xBFD00000000019AC // Q3
+ASM_SIZE_DIRECTIVE(log_table_1)
+log_table_2:
+ASM_TYPE_DIRECTIVE(log_table_2,@object)
+data8 0xBFCFFFFFFFFEF009 // P3
+data8 0x3FD555555554ECB2 // P2
+data8 0x3FC2492479AA0DF8 // Q6
+data8 0xBFC5555544986F52 // Q5
+data8 0x3FD5555555555555 // Q2
+data8 0xBFE0000000000000 // Q1, P1 = -0.5
+
+
+data8 0xde5bd8a937287195, 0x00003ffd  // double-extended 1/ln(10)
+data8 0xb17217f7d1cf79ac, 0x00003ffe  // log2
+//      b17217f7d1cf79ab c9e3b39803f2f6a
+
+
+data8 0x80200aaeac44ef38 , 0x00003ff6 //   log(1/frcpa(1+  0/2^-8))
+
+data8 0xc09090a2c35aa070 , 0x00003ff7 //   log(1/frcpa(1+  1/2^-8))
+data8 0xa0c94fcb41977c75 , 0x00003ff8 //   log(1/frcpa(1+  2/2^-8))
+data8 0xe18b9c263af83301 , 0x00003ff8 //   log(1/frcpa(1+  3/2^-8))
+data8 0x8d35c8d6399c30ea , 0x00003ff9 //   log(1/frcpa(1+  4/2^-8))
+data8 0xadd4d2ecd601cbb8 , 0x00003ff9 //   log(1/frcpa(1+  5/2^-8))
+
+data8 0xce95403a192f9f01 , 0x00003ff9 //   log(1/frcpa(1+  6/2^-8))
+data8 0xeb59392cbcc01096 , 0x00003ff9 //   log(1/frcpa(1+  7/2^-8))
+data8 0x862c7d0cefd54c5d , 0x00003ffa //   log(1/frcpa(1+  8/2^-8))
+data8 0x94aa63c65e70d499 , 0x00003ffa //   log(1/frcpa(1+  9/2^-8))
+data8 0xa54a696d4b62b382 , 0x00003ffa //   log(1/frcpa(1+ 10/2^-8))
+
+data8 0xb3e4a796a5dac208 , 0x00003ffa //   log(1/frcpa(1+ 11/2^-8))
+data8 0xc28c45b1878340a9 , 0x00003ffa //   log(1/frcpa(1+ 12/2^-8))
+data8 0xd35c55f39d7a6235 , 0x00003ffa //   log(1/frcpa(1+ 13/2^-8))
+data8 0xe220f037b954f1f5 , 0x00003ffa //   log(1/frcpa(1+ 14/2^-8))
+data8 0xf0f3389b036834f3 , 0x00003ffa //   log(1/frcpa(1+ 15/2^-8))
+
+data8 0xffd3488d5c980465 , 0x00003ffa //   log(1/frcpa(1+ 16/2^-8))
+data8 0x87609ce2ed300490 , 0x00003ffb //   log(1/frcpa(1+ 17/2^-8))
+data8 0x8ede9321e8c85927 , 0x00003ffb //   log(1/frcpa(1+ 18/2^-8))
+data8 0x96639427f2f8e2f4 , 0x00003ffb //   log(1/frcpa(1+ 19/2^-8))
+data8 0x9defad3e8f73217b , 0x00003ffb //   log(1/frcpa(1+ 20/2^-8))
+
+data8 0xa582ebd50097029c , 0x00003ffb //   log(1/frcpa(1+ 21/2^-8))
+data8 0xac06dbe75ab80fee , 0x00003ffb //   log(1/frcpa(1+ 22/2^-8))
+data8 0xb3a78449b2d3ccca , 0x00003ffb //   log(1/frcpa(1+ 23/2^-8))
+data8 0xbb4f79635ab46bb2 , 0x00003ffb //   log(1/frcpa(1+ 24/2^-8))
+data8 0xc2fec93a83523f3f , 0x00003ffb //   log(1/frcpa(1+ 25/2^-8))
+
+data8 0xc99af2eaca4c4571 , 0x00003ffb //   log(1/frcpa(1+ 26/2^-8))
+data8 0xd1581106472fa653 , 0x00003ffb //   log(1/frcpa(1+ 27/2^-8))
+data8 0xd8002560d4355f2e , 0x00003ffb //   log(1/frcpa(1+ 28/2^-8))
+data8 0xdfcb43b4fe508632 , 0x00003ffb //   log(1/frcpa(1+ 29/2^-8))
+data8 0xe67f6dff709d4119 , 0x00003ffb //   log(1/frcpa(1+ 30/2^-8))
+
+data8 0xed393b1c22351280 , 0x00003ffb //   log(1/frcpa(1+ 31/2^-8))
+data8 0xf5192bff087bcc35 , 0x00003ffb //   log(1/frcpa(1+ 32/2^-8))
+data8 0xfbdf4ff6dfef2fa3 , 0x00003ffb //   log(1/frcpa(1+ 33/2^-8))
+data8 0x81559a97f92f9cc7 , 0x00003ffc //   log(1/frcpa(1+ 34/2^-8))
+data8 0x84be72bce90266e8 , 0x00003ffc //   log(1/frcpa(1+ 35/2^-8))
+
+data8 0x88bc74113f23def2 , 0x00003ffc //   log(1/frcpa(1+ 36/2^-8))
+data8 0x8c2ba3edf6799d11 , 0x00003ffc //   log(1/frcpa(1+ 37/2^-8))
+data8 0x8f9dc92f92ea08b1 , 0x00003ffc //   log(1/frcpa(1+ 38/2^-8))
+data8 0x9312e8f36efab5a7 , 0x00003ffc //   log(1/frcpa(1+ 39/2^-8))
+data8 0x968b08643409ceb6 , 0x00003ffc //   log(1/frcpa(1+ 40/2^-8))
+
+data8 0x9a062cba08a1708c , 0x00003ffc //   log(1/frcpa(1+ 41/2^-8))
+data8 0x9d845b3abf95485c , 0x00003ffc //   log(1/frcpa(1+ 42/2^-8))
+data8 0xa06fd841bc001bb4 , 0x00003ffc //   log(1/frcpa(1+ 43/2^-8))
+data8 0xa3f3a74652fbe0db , 0x00003ffc //   log(1/frcpa(1+ 44/2^-8))
+data8 0xa77a8fb2336f20f5 , 0x00003ffc //   log(1/frcpa(1+ 45/2^-8))
+
+data8 0xab0497015d28b0a0 , 0x00003ffc //   log(1/frcpa(1+ 46/2^-8))
+data8 0xae91c2be6ba6a615 , 0x00003ffc //   log(1/frcpa(1+ 47/2^-8))
+data8 0xb189d1b99aebb20b , 0x00003ffc //   log(1/frcpa(1+ 48/2^-8))
+data8 0xb51cced5de9c1b2c , 0x00003ffc //   log(1/frcpa(1+ 49/2^-8))
+data8 0xb819bee9e720d42f , 0x00003ffc //   log(1/frcpa(1+ 50/2^-8))
+
+data8 0xbbb2a0947b093a5d , 0x00003ffc //   log(1/frcpa(1+ 51/2^-8))
+data8 0xbf4ec1505811684a , 0x00003ffc //   log(1/frcpa(1+ 52/2^-8))
+data8 0xc2535bacfa8975ff , 0x00003ffc //   log(1/frcpa(1+ 53/2^-8))
+data8 0xc55a3eafad187eb8 , 0x00003ffc //   log(1/frcpa(1+ 54/2^-8))
+data8 0xc8ff2484b2c0da74 , 0x00003ffc //   log(1/frcpa(1+ 55/2^-8))
+
+data8 0xcc0b1a008d53ab76 , 0x00003ffc //   log(1/frcpa(1+ 56/2^-8))
+data8 0xcfb6203844b3209b , 0x00003ffc //   log(1/frcpa(1+ 57/2^-8))
+data8 0xd2c73949a47a19f5 , 0x00003ffc //   log(1/frcpa(1+ 58/2^-8))
+data8 0xd5daae18b49d6695 , 0x00003ffc //   log(1/frcpa(1+ 59/2^-8))
+data8 0xd8f08248cf7e8019 , 0x00003ffc //   log(1/frcpa(1+ 60/2^-8))
+
+data8 0xdca7749f1b3e540e , 0x00003ffc //   log(1/frcpa(1+ 61/2^-8))
+data8 0xdfc28e033aaaf7c7 , 0x00003ffc //   log(1/frcpa(1+ 62/2^-8))
+data8 0xe2e012a5f91d2f55 , 0x00003ffc //   log(1/frcpa(1+ 63/2^-8))
+data8 0xe600064ed9e292a8 , 0x00003ffc //   log(1/frcpa(1+ 64/2^-8))
+data8 0xe9226cce42b39f60 , 0x00003ffc //   log(1/frcpa(1+ 65/2^-8))
+
+data8 0xec4749fd97a28360 , 0x00003ffc //   log(1/frcpa(1+ 66/2^-8))
+data8 0xef6ea1bf57780495 , 0x00003ffc //   log(1/frcpa(1+ 67/2^-8))
+data8 0xf29877ff38809091 , 0x00003ffc //   log(1/frcpa(1+ 68/2^-8))
+data8 0xf5c4d0b245cb89be , 0x00003ffc //   log(1/frcpa(1+ 69/2^-8))
+data8 0xf8f3afd6fcdef3aa , 0x00003ffc //   log(1/frcpa(1+ 70/2^-8))
+
+data8 0xfc2519756be1abc7 , 0x00003ffc //   log(1/frcpa(1+ 71/2^-8))
+data8 0xff59119f503e6832 , 0x00003ffc //   log(1/frcpa(1+ 72/2^-8))
+data8 0x8147ce381ae0e146 , 0x00003ffd //   log(1/frcpa(1+ 73/2^-8))
+data8 0x82e45f06cb1ad0f2 , 0x00003ffd //   log(1/frcpa(1+ 74/2^-8))
+data8 0x842f5c7c573cbaa2 , 0x00003ffd //   log(1/frcpa(1+ 75/2^-8))
+
+data8 0x85ce471968c8893a , 0x00003ffd //   log(1/frcpa(1+ 76/2^-8))
+data8 0x876e8305bc04066d , 0x00003ffd //   log(1/frcpa(1+ 77/2^-8))
+data8 0x891012678031fbb3 , 0x00003ffd //   log(1/frcpa(1+ 78/2^-8))
+data8 0x8a5f1493d766a05f , 0x00003ffd //   log(1/frcpa(1+ 79/2^-8))
+data8 0x8c030c778c56fa00 , 0x00003ffd //   log(1/frcpa(1+ 80/2^-8))
+
+data8 0x8da85df17e31d9ae , 0x00003ffd //   log(1/frcpa(1+ 81/2^-8))
+data8 0x8efa663e7921687e , 0x00003ffd //   log(1/frcpa(1+ 82/2^-8))
+data8 0x90a22b6875c6a1f8 , 0x00003ffd //   log(1/frcpa(1+ 83/2^-8))
+data8 0x91f62cc8f5d24837 , 0x00003ffd //   log(1/frcpa(1+ 84/2^-8))
+data8 0x93a06cfc3857d980 , 0x00003ffd //   log(1/frcpa(1+ 85/2^-8))
+
+data8 0x94f66d5e6fd01ced , 0x00003ffd //   log(1/frcpa(1+ 86/2^-8))
+data8 0x96a330156e6772f2 , 0x00003ffd //   log(1/frcpa(1+ 87/2^-8))
+data8 0x97fb3582754ea25b , 0x00003ffd //   log(1/frcpa(1+ 88/2^-8))
+data8 0x99aa8259aad1bbf2 , 0x00003ffd //   log(1/frcpa(1+ 89/2^-8))
+data8 0x9b0492f6227ae4a8 , 0x00003ffd //   log(1/frcpa(1+ 90/2^-8))
+
+data8 0x9c5f8e199bf3a7a5 , 0x00003ffd //   log(1/frcpa(1+ 91/2^-8))
+data8 0x9e1293b9998c1daa , 0x00003ffd //   log(1/frcpa(1+ 92/2^-8))
+data8 0x9f6fa31e0b41f308 , 0x00003ffd //   log(1/frcpa(1+ 93/2^-8))
+data8 0xa0cda11eaf46390e , 0x00003ffd //   log(1/frcpa(1+ 94/2^-8))
+data8 0xa22c8f029cfa45aa , 0x00003ffd //   log(1/frcpa(1+ 95/2^-8))
+
+data8 0xa3e48badb7856b34 , 0x00003ffd //   log(1/frcpa(1+ 96/2^-8))
+data8 0xa5459a0aa95849f9 , 0x00003ffd //   log(1/frcpa(1+ 97/2^-8))
+data8 0xa6a79c84480cfebd , 0x00003ffd //   log(1/frcpa(1+ 98/2^-8))
+data8 0xa80a946d0fcb3eb2 , 0x00003ffd //   log(1/frcpa(1+ 99/2^-8))
+data8 0xa96e831a3ea7b314 , 0x00003ffd //   log(1/frcpa(1+100/2^-8))
+
+data8 0xaad369e3dc544e3b , 0x00003ffd //   log(1/frcpa(1+101/2^-8))
+data8 0xac92e9588952c815 , 0x00003ffd //   log(1/frcpa(1+102/2^-8))
+data8 0xadfa035aa1ed8fdc , 0x00003ffd //   log(1/frcpa(1+103/2^-8))
+data8 0xaf6219eae1ad6e34 , 0x00003ffd //   log(1/frcpa(1+104/2^-8))
+data8 0xb0cb2e6d8160f753 , 0x00003ffd //   log(1/frcpa(1+105/2^-8))
+
+data8 0xb2354249ad950f72 , 0x00003ffd //   log(1/frcpa(1+106/2^-8))
+data8 0xb3a056e98ef4a3b4 , 0x00003ffd //   log(1/frcpa(1+107/2^-8))
+data8 0xb50c6dba52c6292a , 0x00003ffd //   log(1/frcpa(1+108/2^-8))
+data8 0xb679882c33876165 , 0x00003ffd //   log(1/frcpa(1+109/2^-8))
+data8 0xb78c07429785cedc , 0x00003ffd //   log(1/frcpa(1+110/2^-8))
+
+data8 0xb8faeb8dc4a77d24 , 0x00003ffd //   log(1/frcpa(1+111/2^-8))
+data8 0xba6ad77eb36ae0d6 , 0x00003ffd //   log(1/frcpa(1+112/2^-8))
+data8 0xbbdbcc915e9bee50 , 0x00003ffd //   log(1/frcpa(1+113/2^-8))
+data8 0xbd4dcc44f8cf12ef , 0x00003ffd //   log(1/frcpa(1+114/2^-8))
+data8 0xbec0d81bf5b531fa , 0x00003ffd //   log(1/frcpa(1+115/2^-8))
+
+data8 0xc034f19c139186f4 , 0x00003ffd //   log(1/frcpa(1+116/2^-8))
+data8 0xc14cb69f7c5e55ab , 0x00003ffd //   log(1/frcpa(1+117/2^-8))
+data8 0xc2c2abbb6e5fd56f , 0x00003ffd //   log(1/frcpa(1+118/2^-8))
+data8 0xc439b2c193e6771e , 0x00003ffd //   log(1/frcpa(1+119/2^-8))
+data8 0xc553acb9d5c67733 , 0x00003ffd //   log(1/frcpa(1+120/2^-8))
+
+data8 0xc6cc96e441272441 , 0x00003ffd //   log(1/frcpa(1+121/2^-8))
+data8 0xc8469753eca88c30 , 0x00003ffd //   log(1/frcpa(1+122/2^-8))
+data8 0xc962cf3ce072b05c , 0x00003ffd //   log(1/frcpa(1+123/2^-8))
+data8 0xcadeba8771f694aa , 0x00003ffd //   log(1/frcpa(1+124/2^-8))
+data8 0xcc5bc08d1f72da94 , 0x00003ffd //   log(1/frcpa(1+125/2^-8))
+
+data8 0xcd7a3f99ea035c29 , 0x00003ffd //   log(1/frcpa(1+126/2^-8))
+data8 0xcef93860c8a53c35 , 0x00003ffd //   log(1/frcpa(1+127/2^-8))
+data8 0xd0192f68a7ed23df , 0x00003ffd //   log(1/frcpa(1+128/2^-8))
+data8 0xd19a201127d3c645 , 0x00003ffd //   log(1/frcpa(1+129/2^-8))
+data8 0xd2bb92f4061c172c , 0x00003ffd //   log(1/frcpa(1+130/2^-8))
+
+data8 0xd43e80b2ee8cc8fc , 0x00003ffd //   log(1/frcpa(1+131/2^-8))
+data8 0xd56173601fc4ade4 , 0x00003ffd //   log(1/frcpa(1+132/2^-8))
+data8 0xd6e6637efb54086f , 0x00003ffd //   log(1/frcpa(1+133/2^-8))
+data8 0xd80ad9f58f3c8193 , 0x00003ffd //   log(1/frcpa(1+134/2^-8))
+data8 0xd991d1d31aca41f8 , 0x00003ffd //   log(1/frcpa(1+135/2^-8))
 
-// log   has p13 true, p14 false
-// log10 has p14 true, p13 false
+data8 0xdab7d02231484a93 , 0x00003ffd //   log(1/frcpa(1+136/2^-8))
+data8 0xdc40d532cde49a54 , 0x00003ffd //   log(1/frcpa(1+137/2^-8))
+data8 0xdd685f79ed8b265e , 0x00003ffd //   log(1/frcpa(1+138/2^-8))
+data8 0xde9094bbc0e17b1d , 0x00003ffd //   log(1/frcpa(1+139/2^-8))
+data8 0xe01c91b78440c425 , 0x00003ffd //   log(1/frcpa(1+140/2^-8))
+
+data8 0xe14658f26997e729 , 0x00003ffd //   log(1/frcpa(1+141/2^-8))
+data8 0xe270cdc2391e0d23 , 0x00003ffd //   log(1/frcpa(1+142/2^-8))
+data8 0xe3ffce3a2aa64922 , 0x00003ffd //   log(1/frcpa(1+143/2^-8))
+data8 0xe52bdb274ed82887 , 0x00003ffd //   log(1/frcpa(1+144/2^-8))
+data8 0xe6589852e75d7df6 , 0x00003ffd //   log(1/frcpa(1+145/2^-8))
+
+data8 0xe786068c79937a7d , 0x00003ffd //   log(1/frcpa(1+146/2^-8))
+data8 0xe91903adad100911 , 0x00003ffd //   log(1/frcpa(1+147/2^-8))
+data8 0xea481236f7d35bb0 , 0x00003ffd //   log(1/frcpa(1+148/2^-8))
+data8 0xeb77d48c692e6b14 , 0x00003ffd //   log(1/frcpa(1+149/2^-8))
+data8 0xeca84b83d7297b87 , 0x00003ffd //   log(1/frcpa(1+150/2^-8))
+
+data8 0xedd977f4962aa158 , 0x00003ffd //   log(1/frcpa(1+151/2^-8))
+data8 0xef7179a22f257754 , 0x00003ffd //   log(1/frcpa(1+152/2^-8))
+data8 0xf0a450d139366ca7 , 0x00003ffd //   log(1/frcpa(1+153/2^-8))
+data8 0xf1d7e0524ff9ffdb , 0x00003ffd //   log(1/frcpa(1+154/2^-8))
+data8 0xf30c29036a8b6cae , 0x00003ffd //   log(1/frcpa(1+155/2^-8))
+
+data8 0xf4412bc411ea8d92 , 0x00003ffd //   log(1/frcpa(1+156/2^-8))
+data8 0xf576e97564c8619d , 0x00003ffd //   log(1/frcpa(1+157/2^-8))
+data8 0xf6ad62fa1b5f172f , 0x00003ffd //   log(1/frcpa(1+158/2^-8))
+data8 0xf7e499368b55c542 , 0x00003ffd //   log(1/frcpa(1+159/2^-8))
+data8 0xf91c8d10abaffe22 , 0x00003ffd //   log(1/frcpa(1+160/2^-8))
+
+data8 0xfa553f7018c966f3 , 0x00003ffd //   log(1/frcpa(1+161/2^-8))
+data8 0xfb8eb13e185d802c , 0x00003ffd //   log(1/frcpa(1+162/2^-8))
+data8 0xfcc8e3659d9bcbed , 0x00003ffd //   log(1/frcpa(1+163/2^-8))
+data8 0xfe03d6d34d487fd2 , 0x00003ffd //   log(1/frcpa(1+164/2^-8))
+data8 0xff3f8c7581e9f0ae , 0x00003ffd //   log(1/frcpa(1+165/2^-8))
+
+data8 0x803e029e280173ae , 0x00003ffe //   log(1/frcpa(1+166/2^-8))
+data8 0x80dca10cc52d0757 , 0x00003ffe //   log(1/frcpa(1+167/2^-8))
+data8 0x817ba200632755a1 , 0x00003ffe //   log(1/frcpa(1+168/2^-8))
+data8 0x821b05f3b01d6774 , 0x00003ffe //   log(1/frcpa(1+169/2^-8))
+data8 0x82bacd623ff19d06 , 0x00003ffe //   log(1/frcpa(1+170/2^-8))
+
+data8 0x835af8c88e7a8f47 , 0x00003ffe //   log(1/frcpa(1+171/2^-8))
+data8 0x83c5f8299e2b4091 , 0x00003ffe //   log(1/frcpa(1+172/2^-8))
+data8 0x8466cb43f3d87300 , 0x00003ffe //   log(1/frcpa(1+173/2^-8))
+data8 0x850803a67c80ca4b , 0x00003ffe //   log(1/frcpa(1+174/2^-8))
+data8 0x85a9a1d11a23b461 , 0x00003ffe //   log(1/frcpa(1+175/2^-8))
+
+data8 0x864ba644a18e6e05 , 0x00003ffe //   log(1/frcpa(1+176/2^-8))
+data8 0x86ee1182dcc432f7 , 0x00003ffe //   log(1/frcpa(1+177/2^-8))
+data8 0x875a925d7e48c316 , 0x00003ffe //   log(1/frcpa(1+178/2^-8))
+data8 0x87fdaa109d23aef7 , 0x00003ffe //   log(1/frcpa(1+179/2^-8))
+data8 0x88a129ed4becfaf2 , 0x00003ffe //   log(1/frcpa(1+180/2^-8))
+
+data8 0x89451278ecd7f9cf , 0x00003ffe //   log(1/frcpa(1+181/2^-8))
+data8 0x89b29295f8432617 , 0x00003ffe //   log(1/frcpa(1+182/2^-8))
+data8 0x8a572ac5a5496882 , 0x00003ffe //   log(1/frcpa(1+183/2^-8))
+data8 0x8afc2d0ce3b2dadf , 0x00003ffe //   log(1/frcpa(1+184/2^-8))
+data8 0x8b6a69c608cfd3af , 0x00003ffe //   log(1/frcpa(1+185/2^-8))
+
+data8 0x8c101e106e899a83 , 0x00003ffe //   log(1/frcpa(1+186/2^-8))
+data8 0x8cb63de258f9d626 , 0x00003ffe //   log(1/frcpa(1+187/2^-8))
+data8 0x8d2539c5bd19e2b1 , 0x00003ffe //   log(1/frcpa(1+188/2^-8))
+data8 0x8dcc0e064b29e6f1 , 0x00003ffe //   log(1/frcpa(1+189/2^-8))
+data8 0x8e734f45d88357ae , 0x00003ffe //   log(1/frcpa(1+190/2^-8))
+
+data8 0x8ee30cef034a20db , 0x00003ffe //   log(1/frcpa(1+191/2^-8))
+data8 0x8f8b0515686d1d06 , 0x00003ffe //   log(1/frcpa(1+192/2^-8))
+data8 0x90336bba039bf32f , 0x00003ffe //   log(1/frcpa(1+193/2^-8))
+data8 0x90a3edd23d1c9d58 , 0x00003ffe //   log(1/frcpa(1+194/2^-8))
+data8 0x914d0de2f5d61b32 , 0x00003ffe //   log(1/frcpa(1+195/2^-8))
+
+data8 0x91be0c20d28173b5 , 0x00003ffe //   log(1/frcpa(1+196/2^-8))
+data8 0x9267e737c06cd34a , 0x00003ffe //   log(1/frcpa(1+197/2^-8))
+data8 0x92d962ae6abb1237 , 0x00003ffe //   log(1/frcpa(1+198/2^-8))
+data8 0x9383fa6afbe2074c , 0x00003ffe //   log(1/frcpa(1+199/2^-8))
+data8 0x942f0421651c1c4e , 0x00003ffe //   log(1/frcpa(1+200/2^-8))
+
+data8 0x94a14a3845bb985e , 0x00003ffe //   log(1/frcpa(1+201/2^-8))
+data8 0x954d133857f861e7 , 0x00003ffe //   log(1/frcpa(1+202/2^-8))
+data8 0x95bfd96468e604c4 , 0x00003ffe //   log(1/frcpa(1+203/2^-8))
+data8 0x9632d31cafafa858 , 0x00003ffe //   log(1/frcpa(1+204/2^-8))
+data8 0x96dfaabd86fa1647 , 0x00003ffe //   log(1/frcpa(1+205/2^-8))
+
+data8 0x9753261fcbb2a594 , 0x00003ffe //   log(1/frcpa(1+206/2^-8))
+data8 0x9800c11b426b996d , 0x00003ffe //   log(1/frcpa(1+207/2^-8))
+data8 0x9874bf4d45ae663c , 0x00003ffe //   log(1/frcpa(1+208/2^-8))
+data8 0x99231f5ee9a74f79 , 0x00003ffe //   log(1/frcpa(1+209/2^-8))
+data8 0x9997a18a56bcad28 , 0x00003ffe //   log(1/frcpa(1+210/2^-8))
+
+data8 0x9a46c873a3267e79 , 0x00003ffe //   log(1/frcpa(1+211/2^-8))
+data8 0x9abbcfc621eb6cb6 , 0x00003ffe //   log(1/frcpa(1+212/2^-8))
+data8 0x9b310cb0d354c990 , 0x00003ffe //   log(1/frcpa(1+213/2^-8))
+data8 0x9be14cf9e1b3515c , 0x00003ffe //   log(1/frcpa(1+214/2^-8))
+data8 0x9c5710b8cbb73a43 , 0x00003ffe //   log(1/frcpa(1+215/2^-8))
+
+data8 0x9ccd0abd301f399c , 0x00003ffe //   log(1/frcpa(1+216/2^-8))
+data8 0x9d7e67f3bdce8888 , 0x00003ffe //   log(1/frcpa(1+217/2^-8))
+data8 0x9df4ea81a99daa01 , 0x00003ffe //   log(1/frcpa(1+218/2^-8))
+data8 0x9e6ba405a54514ba , 0x00003ffe //   log(1/frcpa(1+219/2^-8))
+data8 0x9f1e21c8c7bb62b3 , 0x00003ffe //   log(1/frcpa(1+220/2^-8))
+
+data8 0x9f956593f6b6355c , 0x00003ffe //   log(1/frcpa(1+221/2^-8))
+data8 0xa00ce1092e5498c3 , 0x00003ffe //   log(1/frcpa(1+222/2^-8))
+data8 0xa0c08309c4b912c1 , 0x00003ffe //   log(1/frcpa(1+223/2^-8))
+data8 0xa1388a8c6faa2afa , 0x00003ffe //   log(1/frcpa(1+224/2^-8))
+data8 0xa1b0ca7095b5f985 , 0x00003ffe //   log(1/frcpa(1+225/2^-8))
+
+data8 0xa22942eb47534a00 , 0x00003ffe //   log(1/frcpa(1+226/2^-8))
+data8 0xa2de62326449d0a3 , 0x00003ffe //   log(1/frcpa(1+227/2^-8))
+data8 0xa357690f88bfe345 , 0x00003ffe //   log(1/frcpa(1+228/2^-8))
+data8 0xa3d0a93f45169a4b , 0x00003ffe //   log(1/frcpa(1+229/2^-8))
+data8 0xa44a22f7ffe65f30 , 0x00003ffe //   log(1/frcpa(1+230/2^-8))
+
+data8 0xa500c5e5b4c1aa36 , 0x00003ffe //   log(1/frcpa(1+231/2^-8))
+data8 0xa57ad064eb2ebbc2 , 0x00003ffe //   log(1/frcpa(1+232/2^-8))
+data8 0xa5f5152dedf4384e , 0x00003ffe //   log(1/frcpa(1+233/2^-8))
+data8 0xa66f9478856233ec , 0x00003ffe //   log(1/frcpa(1+234/2^-8))
+data8 0xa6ea4e7cca02c32e , 0x00003ffe //   log(1/frcpa(1+235/2^-8))
+
+data8 0xa765437325341ccf , 0x00003ffe //   log(1/frcpa(1+236/2^-8))
+data8 0xa81e21e6c75b4020 , 0x00003ffe //   log(1/frcpa(1+237/2^-8))
+data8 0xa899ab333fe2b9ca , 0x00003ffe //   log(1/frcpa(1+238/2^-8))
+data8 0xa9157039c51ebe71 , 0x00003ffe //   log(1/frcpa(1+239/2^-8))
+data8 0xa991713433c2b999 , 0x00003ffe //   log(1/frcpa(1+240/2^-8))
+
+data8 0xaa0dae5cbcc048b3 , 0x00003ffe //   log(1/frcpa(1+241/2^-8))
+data8 0xaa8a27ede5eb13ad , 0x00003ffe //   log(1/frcpa(1+242/2^-8))
+data8 0xab06de228a9e3499 , 0x00003ffe //   log(1/frcpa(1+243/2^-8))
+data8 0xab83d135dc633301 , 0x00003ffe //   log(1/frcpa(1+244/2^-8))
+data8 0xac3fb076adc7fe7a , 0x00003ffe //   log(1/frcpa(1+245/2^-8))
+
+data8 0xacbd3cbbe47988f1 , 0x00003ffe //   log(1/frcpa(1+246/2^-8))
+data8 0xad3b06b1a5dc57c3 , 0x00003ffe //   log(1/frcpa(1+247/2^-8))
+data8 0xadb90e94af887717 , 0x00003ffe //   log(1/frcpa(1+248/2^-8))
+data8 0xae3754a218f7c816 , 0x00003ffe //   log(1/frcpa(1+249/2^-8))
+data8 0xaeb5d9175437afa2 , 0x00003ffe //   log(1/frcpa(1+250/2^-8))
+
+data8 0xaf349c322e9c7cee , 0x00003ffe //   log(1/frcpa(1+251/2^-8))
+data8 0xafb39e30d1768d1c , 0x00003ffe //   log(1/frcpa(1+252/2^-8))
+data8 0xb032df51c2c93116 , 0x00003ffe //   log(1/frcpa(1+253/2^-8))
+data8 0xb0b25fd3e6035ad9 , 0x00003ffe //   log(1/frcpa(1+254/2^-8))
+data8 0xb1321ff67cba178c , 0x00003ffe //   log(1/frcpa(1+255/2^-8))
+ASM_SIZE_DIRECTIVE(log_table_2)
+
+   
+.align 32
+.global log#
+.global log10#
+
+// log10 has p7 true, p8 false
+// log   has p8 true, p7 false
 
 .section .text
-GLOBAL_IEEE754_ENTRY(log10)
+.proc  log10#
+.align 32
+
+log10:
+#ifdef _LIBC
+.global __ieee754_log10
+.type __ieee754_log10,@function
+__ieee754_log10:
+#endif
 { .mfi
-      getf.exp      GR_Exp = f8 // if x is unorm then must recompute
-      frcpa.s1      FR_RcpX,p0 = f1,f8
-      mov           GR_05 = 0xFFFE // biased exponent of A2=0.5
+     alloc     r32=ar.pfs,1,15,4,0                    
+     frcpa.s1  log_C,p9 = f1,f8                 
+     cmp.eq.unc     p7,p8         = r0, r0 
 }
-{ .mlx
-      addl          GR_ad_1 = @ltoff(log10_data),gp
-      movl          GR_A3 = 0x3fd5555555555557 // double precision memory
-                                               // representation of A3
-};;
+{ .mfb
+     addl           log_AD_1   = @ltoff(log_table_1), gp
+     fnorm.s1 log_NORM_f8 = f8 
+     br.sptk        L(LOG_LOG10_X) 
+}
+;;
+
+.endp log10
+ASM_SIZE_DIRECTIVE(log10)
+ASM_SIZE_DIRECTIVE(__ieee754_log10)
+
+
+.section .text
+.proc  log#
+.align 32
+log: 
+#ifdef _LIBC
+.global __ieee754_log
+.type __ieee754_log,@function
+__ieee754_log:
+#endif
 
 { .mfi
-      getf.sig      GR_Sig = f8 // get significand to calculate index
-      fclass.m      p8,p0 = f8,9 // is x positive unorm?
-      mov           GR_xorg = 0x3fefe // double precision memory msb of 255/256
+     alloc     r32=ar.pfs,1,15,4,0                    
+     frcpa.s1  log_C,p9 = f1,f8                 
+     cmp.eq.unc     p8,p7         = r0, r0 
 }
-{ .mib
-      ld8           GR_ad_1 = [GR_ad_1]
-      cmp.eq        p14,p13 = r0,r0 // set p14 to 1 for log10
-      br.cond.sptk  log_log10_common
-};;
-GLOBAL_IEEE754_END(log10)
-
-GLOBAL_IEEE754_ENTRY(log)
 { .mfi
-      getf.exp      GR_Exp = f8 // if x is unorm then must recompute
-      frcpa.s1      FR_RcpX,p0 = f1,f8
-      mov           GR_05 = 0xfffe
+     addl           log_AD_1   = @ltoff(log_table_1), gp
+     fnorm.s1 log_NORM_f8 = f8 
+     nop.i 999
 }
-{ .mlx
-      addl          GR_ad_1 = @ltoff(log_data),gp
-      movl          GR_A3 = 0x3fd5555555555557 // double precision memory
-                                               // representation of A3
-};;
+;;
+
+L(LOG_LOG10_X):
 
 { .mfi
-      getf.sig      GR_Sig = f8 // get significand to calculate index
-      fclass.m      p8,p0 = f8,9 // is x positive unorm?
-      mov           GR_xorg = 0x3fefe // double precision memory msb of 255/256
+     ld8 log_AD_1 = [log_AD_1]
+     fclass.m.unc p15,p0 = f8, 0x0b            // Test for x=unorm
+     mov        log_GR_fff9 = 0xfff9
 }
 { .mfi
-      ld8           GR_ad_1 = [GR_ad_1]
-      nop.f         0
-      cmp.eq        p13,p14 = r0,r0 // set p13 to 1 for log
-};;
+     mov       log_GR_half_exp = 0x0fffe
+     fms.s1     log_w = f8,f1,f1              
+     mov       log_GR_exp_17_ones = 0x1ffff
+}
+;;
+
+{ .mmi
+     getf.exp   log_GR_signexp_f8 = f8 // If x unorm then must recompute
+     setf.exp   log_half = log_GR_half_exp  // Form 0.5 = -Q1
+     nop.i 999
+}
+;;
+
+{ .mmb
+     adds log_AD_2 = 0x30, log_AD_1
+     mov       log_GR_exp_16_ones = 0xffff
+(p15) br.cond.spnt L(LOG_DENORM)     
+}
+;;
+
+L(LOG_COMMON):
+{.mfi
+     ldfpd      log_P5,log_P4 = [log_AD_1],16           
+     fclass.m.unc p6,p0 = f8, 0xc3             // Test for x=nan
+     and        log_GR_exp_f8 = log_GR_signexp_f8, log_GR_exp_17_ones  
+}
+{.mfi
+     ldfpd      log_P3,log_P2 = [log_AD_2],16           
+     nop.f 999
+     nop.i 999
+}
+;;
 
-log_log10_common:
 { .mfi
-      getf.d        GR_x = f8 // double precision memory representation of x
-      fclass.m      p9,p0 = f8,0x1E1 // is x NaN, NaT or +Inf?
-      dep.z         GR_dx = 3, 44, 2 // Create 0x0000300000000000
-                                     // Difference between double precision
-                                     // memory representations of 257/256 and
-                                     // 255/256
+     ldfpd      log_Q8,log_Q7 = [log_AD_1],16           
+     fclass.m.unc p11,p0 = f8, 0x21            // Test for x=+inf
+     sub       log_GR_true_exp_f8 = log_GR_exp_f8, log_GR_exp_16_ones 
 }
 { .mfi
-      setf.exp      FR_A2 = GR_05 // create A2
-      fnorm.s1      FR_NormX = f8
-      mov           GR_bias = 0xffff
-};;
-  
+     ldfpd      log_Q6,log_Q5 = [log_AD_2],16           
+     nop.f 999
+     nop.i 999
+}
+;;
+
+
 { .mfi
-      setf.d        FR_A3 = GR_A3 // create A3
-      fcmp.eq.s1    p12,p0 = f1,f8 // is x equal to 1.0?
-      dep.z         GR_xorg = GR_xorg, 44, 19 // 0x3fefe00000000000 
-                                              // double precision memory
-                                              // representation of 255/256
+     ldfpd      log_Q4,log_Q3 = [log_AD_1],16           
+     fma.s1     log_wsq     = log_w, log_w, f0
+     nop.i 999
 }
-{ .mib
-      add           GR_ad_2 = 0x30,GR_ad_1 // address of A5,A4
-      add           GR_ad_3 = 0x840,GR_ad_1 // address of ln(1/frcpa) lo parts
-(p8)  br.cond.spnt  log_positive_unorms
-};;
+{ .mfb
+     ldfpd      log_Q2,log_Q1 = [log_AD_2],16           
+(p6) fma.d.s0   f8 = f8,f1,f0      // quietize nan result if x=nan
+(p6) br.ret.spnt b0                // Exit for x=nan
+}
+;;
+
 
-log_core:
 { .mfi
-      ldfpd         FR_A7,FR_A6 = [GR_ad_1],16
-      fclass.m      p10,p0 = f8,0x3A // is x < 0?
-      sub           GR_Nm1 = GR_Exp,GR_05 // unbiased_exponent_of_x - 1
+     setf.sig  log_int_Nfloat = log_GR_true_exp_f8
+     fcmp.eq.s1 p10,p0 = log_NORM_f8, f1  // Test for x=+1.0
+     nop.i 999
+}
+{ .mfb
+     nop.m 999
+     fms.s1     log_r = log_C,f8,f1
+(p11) br.ret.spnt b0               // Exit for x=+inf
+}
+;;
+
+
+{ .mmf
+     getf.sig   log_GR_significand_f8 = log_NORM_f8 
+     ldfe       log_inv_ln10 = [log_AD_2],16      
+     fclass.m.unc p6,p0 = f8, 0x07        // Test for x=0
 }
+;;
+
+
+{ .mfb
+     nop.m 999
+(p10) fmerge.s f8 = f0, f0
+(p10) br.ret.spnt b0                // Exit for x=1.0
+;;
+}
+
 { .mfi
-      ldfpd         FR_A5,FR_A4 = [GR_ad_2],16
-(p9)  fma.d.s0      f8 = f8,f1,f0 // set V-flag
-      sub           GR_N = GR_Exp,GR_bias // unbiased_exponent_of_x
-};;
+     getf.exp   log_GR_signexp_w = log_w
+     fclass.m.unc p12,p0 = f8, 0x3a       // Test for x neg norm, unorm, inf
+     shl        log_GR_index = log_GR_significand_f8,1            
+}
+;;
 
 { .mfi
-      setf.sig      FR_N = GR_N // copy unbiased exponent of x to significand
-      fms.s1        FR_r = FR_RcpX,f8,f1 // range reduction for |x-1|>=1/256
-      extr.u        GR_Ind = GR_Sig,55,8 // get bits from 55 to 62 as index
+     ldfe       log_log2 = [log_AD_2],16   
+     fnma.s1    log_rp_q10 = log_half, log_wsq, log_w
+     shr.u     log_GR_index = log_GR_index,56
 }
-{ .mib
-      sub           GR_x = GR_x, GR_xorg // get diff between x and 255/256
-      cmp.gtu       p6, p7 = 2, GR_Nm1 // p6 true if 0.5 <= x < 2
-(p9)  br.ret.spnt   b0 // exit for NaN, NaT and +Inf
-};;
+{ .mfb
+     nop.m 999
+     fma.s1      log_w3      = log_wsq, log_w, f0
+(p6) br.cond.spnt L(LOG_ZERO_NEG)      // Branch if x=0
+;;
+}
+ 
 
 { .mfi
-      ldfpd         FR_Ln2hi,FR_Ln2lo = [GR_ad_1],16
-      fclass.m      p11,p0 = f8,0x07 // is x = 0?
-      shladd        GR_ad_3 = GR_Ind,2,GR_ad_3 // address of Tlo
+     and log_GR_exp_w = log_GR_exp_17_ones, log_GR_signexp_w
+     fma.s1      log_w4      = log_wsq, log_wsq, f0
+     nop.i 999
+}
+{ .mfb
+     shladd log_AD_2 = log_GR_index,4,log_AD_2
+     fma.s1     log_rsq     = log_r, log_r, f0                   
+(p12) br.cond.spnt L(LOG_ZERO_NEG)     // Branch if x<0
+;;
 }
-{ .mib
-      shladd        GR_ad_2 = GR_Ind,3,GR_ad_2 // address of Thi
-(p6)  cmp.leu       p6, p7 = GR_x, GR_dx       // 255/256 <= x <= 257/256
-(p10) br.cond.spnt  log_negatives // jump if x is negative
-};;
 
-// p6 is true if |x-1| < 1/256
-// p7 is true if |x-1| >= 1/256
 { .mfi
-      ldfd          FR_Thi = [GR_ad_2]
-(p6)  fms.s1        FR_r = f8,f1,f1 // range reduction for |x-1|<1/256
-      nop.i         0
-};;
+     ldfe       log_T = [log_AD_2]
+     fma.s1    log_rp_p4   = log_P5, log_r, log_P4
+     nop.i 999
+}
+{ .mfi
+     nop.m 999
+     fma.s1      log_rp_p32 = log_P3, log_r, log_P2
+     nop.i 999
+;;
+}
 
-{ .mmi
-(p7)  ldfs          FR_Tlo = [GR_ad_3]
-      nop.m         0
-      nop.i         0
+
+{ .mfi
+     nop.m 999
+     fma.s1    log_rp_q7   = log_Q8, log_w, log_Q7
+     nop.i 999
+}
+{ .mfi
+     nop.m 999
+     fma.s1    log_rp_q65  = log_Q6, log_w, log_Q5
+     nop.i 999
+;;
 }
-{ .mfb
-      nop.m         0
-(p12) fma.d.s0      f8 = f0,f0,f0
-(p12) br.ret.spnt   b0 // exit for +1.0
-};;
 
-.pred.rel "mutex",p6,p7
+//    p13 <== large w log
+//    p14 <== small w log
 { .mfi
-(p6)  mov           GR_NearOne = 1
-      fms.s1        FR_A32 = FR_A3,FR_r,FR_A2 // A3*r-A2
-(p7)  mov           GR_NearOne = 0
+(p8) cmp.ge.unc p13,p14 = log_GR_exp_w, log_GR_fff9
+     fma.s1    log_rp_q3   = log_Q4, log_w, log_Q3
+     nop.i 999
+;;
 }
-{ .mfb
-      ldfe          FR_InvLn10 = [GR_ad_1],16
-      fma.s1        FR_r2 = FR_r,FR_r,f0 // r^2
-(p11) br.cond.spnt  log_zeroes // jump if x is zero
-};;
 
+//    p10 <== large w log10
+//    p11 <== small w log10
 { .mfi
-      nop.m         0
-      fma.s1        FR_A6 = FR_A7,FR_r,FR_A6 // A7*r+A6
-      nop.i         0
+(p7) cmp.ge.unc p10,p11 = log_GR_exp_w, log_GR_fff9
+     fcvt.xf   log_Nfloat = log_int_Nfloat
+     nop.i 999
 }
+
 { .mfi
-(p7)  cmp.eq.unc    p9,p0 = r0,r0  // set p9 if |x-1| > 1/256
-      fma.s1        FR_A4 = FR_A5,FR_r,FR_A4 // A5*r+A4
-(p14) cmp.eq.unc    p8,p0 = 1,GR_NearOne // set p8 to 1 if it's log10
-                                         // and argument near 1.0
-};;
+     nop.m 999
+     fma.s1    log_rp_q21  = log_Q2, log_w3, log_rp_q10
+     nop.i 999 ;;
+}
 
 { .mfi
-(p6)  getf.exp      GR_rexp = FR_r  // Get signexp of x-1
-(p7)  fcvt.xf       FR_N = FR_N
-(p8)  cmp.eq        p9,p6 = r0,r0        // Also set p9 and clear p6 if log10 
-                                         // and arg near 1
-};;
+     nop.m 999
+     fma.s1    log_rcube   = log_rsq, log_r, f0
+     nop.i 999
+}
+{ .mfi
+     nop.m 999
+     fma.s1    log_rp_p10   = log_rsq, log_P1, log_r
+     nop.i 999
+;;
+}
 
 { .mfi
-      nop.m         0
-      fma.s1        FR_r4 = FR_r2,FR_r2,f0 // r^4
-      nop.i         0
+     nop.m 999
+     fcmp.eq.s0 p6,p0 = f8,f0         // Sets flag on +denormal input
+     nop.i 999
 }
 { .mfi
-      nop.m         0
-(p8)  fma.s1        FR_NxLn2pT = f0,f0,f0  // Clear NxLn2pT if log10 near 1
-      nop.i         0
-};;
+     nop.m 999
+     fma.s1     log_rp_p2   = log_rp_p4, log_rsq, log_rp_p32
+     nop.i 999
+;;
+}
+
 
 { .mfi
-      nop.m         0
-      // (A3*r+A2)*r^2+r
-      fma.s1        FR_A321 = FR_A32,FR_r2,FR_r
-      mov           GR_mask = 0x1ffff
+     nop.m 999
+     fma.s1        log_w6     = log_w3, log_w3, f0           
+     nop.i 999 
 }
 { .mfi
-      nop.m         0
-      // (A7*r+A6)*r^2+(A5*r+A4)
-      fma.s1        FR_A4 = FR_A6,FR_r2,FR_A4
-      nop.i         0
-};;
+     nop.m 999
+     fma.s1        log_Qlo     = log_rp_q7, log_wsq, log_rp_q65           
+     nop.i 999 
+}
+;;
+
+{ .mfi
+     nop.m 999
+     fma.s1        log_Qhi     = log_rp_q3, log_w4, log_rp_q21
+     nop.i 999 ;;
+}
+
 
 { .mfi
-(p6)  and           GR_rexp = GR_rexp, GR_mask
-      // N*Ln2hi+Thi
-(p7)  fma.s1        FR_NxLn2hipThi = FR_N,FR_Ln2hi,FR_Thi
-      nop.i         0
+     nop.m 999
+     fma.s1        log_T_plus_Nlog2 = log_Nfloat,log_log2, log_T    
+     nop.i 999 ;;
 }
+
 { .mfi
-      nop.m         0
-      // N*Ln2lo+Tlo
-(p7)  fma.s1        FR_NxLn2lopTlo = FR_N,FR_Ln2lo,FR_Tlo
-      nop.i         0
-};;
+     nop.m 999
+     fma.s1        log_r2P_r = log_rp_p2, log_rcube, log_rp_p10           
+     nop.i 999 ;;
+}
 
+
+//    small w, log   <== p14
 { .mfi
-(p6)  sub           GR_rexp = GR_rexp, GR_bias // unbiased exponent of x-1
-(p9)  fma.s1        f8 = FR_A4,FR_r4,FR_A321 // P(r) if |x-1| >= 1/256 or
-                                             // log10 and |x-1| < 1/256
-      nop.i         0
+     nop.m 999
+(p14) fma.d        f8       = log_Qlo, log_w6, log_Qhi          
+     nop.i 999
 }
 { .mfi
-      nop.m         0
-      // (N*Ln2hi+Thi) + (N*Ln2lo+Tlo)
-(p7)  fma.s1        FR_NxLn2pT = FR_NxLn2hipThi,f1,FR_NxLn2lopTlo
-      nop.i         0
-};;
+     nop.m 999
+     fma.s1        log_Q       = log_Qlo, log_w6, log_Qhi          
+     nop.i 999 ;;
+}
+
 
 { .mfi
-(p6)  cmp.gt.unc    p10, p6 = -40, GR_rexp // Test |x-1| < 2^-40
-      nop.f         0
-      nop.i         0
-};;
+     nop.m 999
+(p10) fma.s1        log_log10_hi     = log_T_plus_Nlog2, log_inv_ln10,f0
+     nop.i 999  ;;
+}
 
+//    large w, log   <== p13
+.pred.rel "mutex",p13,p10
 { .mfi
-      nop.m         0
-(p10) fma.d.s0      f8 = FR_A32,FR_r2,FR_r // log(x) if |x-1| < 2^-40
-      nop.i         0
-};;
+      nop.m 999
+(p13) fadd.d        f8              = log_T_plus_Nlog2, log_r2P_r 
+      nop.i 999 
+}
+{ .mfi
+      nop.m 999
+(p10) fma.s1     log_log10_lo     = log_inv_ln10, log_r2P_r,f0
+      nop.i 999  ;;
+}
+
 
-.pred.rel "mutex",p6,p9
+//    small w, log10 <== p11
 { .mfi
-      nop.m         0
-(p6)  fma.d.s0      f8 = FR_A4,FR_r4,FR_A321 // log(x) if 2^-40 <= |x-1| < 1/256
-      nop.i         0
+      nop.m 999
+(p11) fma.d      f8 = log_inv_ln10,log_Q,f0                         
+      nop.i 999 ;;
 }
-{ .mfb
-      nop.m         0
-(p9)  fma.d.s0      f8 = f8,FR_InvLn10,FR_NxLn2pT // result if |x-1| >= 1/256
-                                                  // or log10 and |x-1| < 1/256
-      br.ret.sptk   b0
-};;
 
-.align 32
-log_positive_unorms:
-{ .mmf
-      getf.exp      GR_Exp = FR_NormX // recompute biased exponent
-      getf.d        GR_x = FR_NormX   // recompute double precision x
-      fcmp.eq.s1    p12,p0 = f1,FR_NormX // is x equal to 1.0?
-};;
+//    large w, log10 <== p10
+{ .mfb
+      nop.m 999
+(p10) fma.d      f8                = log_log10_hi, f1, log_log10_lo 
+      br.ret.sptk     b0 
+;;
+}
 
+L(LOG_DENORM):
 { .mfb
-      getf.sig      GR_Sig = FR_NormX // recompute significand
-      fcmp.eq.s0    p15, p0 = f8, f0  // set denormal flag
-      br.cond.sptk  log_core
-};;
+     getf.exp   log_GR_signexp_f8 = log_NORM_f8 
+     nop.f 999
+     br.cond.sptk L(LOG_COMMON)
+}
+;;
 
-.align 32
-log_zeroes:
+L(LOG_ZERO_NEG): 
+
+// qnan snan inf norm     unorm 0 -+
+// 0    0    0   0        0     1 11      0x7
+// 0    0    1   1        1     0 10      0x3a
+
+// Save x (f8) in f10
 { .mfi
-      nop.m         0
-      fmerge.s      FR_X = f8,f8 // keep input argument for subsequent
-                                 // call of __libm_error_support#
-      nop.i         0
+     nop.m 999
+     fmerge.s f10 = f8,f8 
+     nop.i 999  ;;
 }
+
+// p8 p9  means  ln(+-0)  = -inf
+// p7 p10 means  log(+-0) = -inf
+
+//    p13 means  ln(-)
+//    p14 means  log(-)
+
+
 { .mfi
-      nop.m         0
-      fms.s1        FR_tmp = f0,f0,f1 // -1.0
-      nop.i         0
-};;
+     nop.m 999
+     fmerge.ns   f6 = f1,f1            // Form -1.0
+     nop.i 999  ;;
+}
+
+// p9  means  ln(+-0)  = -inf
+// p10 means  log(+-0) = -inf
+// Log(+-0) = -inf 
 
-.pred.rel "mutex",p13,p14
 { .mfi
-(p13) mov           GR_TAG = 2 // set libm error in case of log
-      frcpa.s0      f8,p0 = FR_tmp,f0 // log(+/-0) should be equal to -INF.
-                                      // We can get it using frcpa because it
-                                      // sets result to the IEEE-754 mandated
-                                      // quotient of FR_tmp/f0.
-                                      // As far as FR_tmp is -1 it'll be -INF
-      nop.i         0
+	nop.m 999
+(p8)  fclass.m.unc  p9,p0 = f10, 0x07           
+	nop.i 999
+}
+{ .mfi
+	nop.m 999
+(p7)  fclass.m.unc  p10,p0 = f10, 0x07           
+	nop.i 999 ;;
 }
-{ .mib
-(p14) mov           GR_TAG = 8 // set libm error in case of log10
-      nop.i         0
-      br.cond.sptk  log_libm_err
-};;
 
-.align 32
-log_negatives:
+
+// p13  ln(-)
+// p14  log(-)
+
+// Log(-inf, -normal, -unnormal) = QNAN indefinite
 { .mfi
-      nop.m         0
-      fmerge.s      FR_X = f8,f8
-      nop.i         0
-};;
+	nop.m 999
+(p8)  fclass.m.unc  p13,p0 = f10, 0x3a           
+	nop.i 999 
+}
+{ .mfi
+	nop.m 999
+(p7)  fclass.m.unc  p14,p0 = f10, 0x3a           
+	nop.i 999  ;;
+}
+
+
+.pred.rel "mutex",p9,p10
+{ .mfi
+(p9)     mov        log_GR_tag = 2       
+(p9)    frcpa f8,p11 = f6,f0                   
+            nop.i 999
+}
+{ .mfi
+(p10)    mov        log_GR_tag = 8       
+(p10)   frcpa f8,p12 = f6,f0                   
+            nop.i 999 ;;
+}
 
 .pred.rel "mutex",p13,p14
 { .mfi
-(p13) mov           GR_TAG = 3 // set libm error in case of log
-      frcpa.s0      f8,p0 = f0,f0 // log(negatives) should be equal to NaN.
-                                  // We can get it using frcpa because it
-                                  // sets result to the IEEE-754 mandated
-                                  // quotient of f0/f0 i.e. NaN.
-(p14) mov           GR_TAG = 9 // set libm error in case of log10
-};;
+(p13)    mov        log_GR_tag = 3       
+(p13)    frcpa f8,p11 = f0,f0                   
+            nop.i 999
+}
+{ .mfb
+(p14)    mov        log_GR_tag = 9       
+(p14)   frcpa f8,p12 = f0,f0                   
+        br.cond.sptk __libm_error_region ;; 
+}
+.endp log
+ASM_SIZE_DIRECTIVE(log)
+ASM_SIZE_DIRECTIVE(__ieee754_log)
+
+
+// Stack operations when calling error support.
+//       (1)               (2)                          (3) (call)              (4)
+//   sp   -> +          psp -> +                     psp -> +                   sp -> +
+//           |                 |                            |                         |
+//           |                 | <- GR_Y               R3 ->| <- GR_RESULT            | -> f8
+//           |                 |                            |                         |
+//           | <-GR_Y      Y2->|                       Y2 ->| <- GR_Y                 |
+//           |                 |                            |                         |
+//           |                 | <- GR_X               X1 ->|                         |
+//           |                 |                            |                         |
+//  sp-64 -> +          sp ->  +                     sp ->  +                         +
+//    save ar.pfs          save b0                                               restore gp
+//    save gp                                                                    restore ar.pfs
+
 
-.align 32
-log_libm_err:
-{ .mmi
-      alloc         r32 = ar.pfs,1,4,4,0
-      mov           GR_Parameter_TAG = GR_TAG
-      nop.i         0
-};;
-GLOBAL_IEEE754_END(log)
 
-LOCAL_LIBM_ENTRY(__libm_error_region)
+.proc __libm_error_region
+__libm_error_region:
 .prologue
+
+// (1)
 { .mfi
-        add   GR_Parameter_Y = -32,sp         // Parameter 2 value
+        add   GR_Parameter_Y=-32,sp             // Parameter 2 value
         nop.f 0
 .save   ar.pfs,GR_SAVE_PFS
-        mov  GR_SAVE_PFS = ar.pfs             // Save ar.pfs
+        mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
 }
 { .mfi
 .fframe 64
-        add sp = -64,sp                       // Create new stack
+        add sp=-64,sp                          // Create new stack
         nop.f 0
-        mov GR_SAVE_GP = gp                   // Save gp
+        mov GR_SAVE_GP=gp                      // Save gp
 };;
 
+
+// (2)
 { .mmi
-        stfd [GR_Parameter_Y] = FR_Y,16       // STORE Parameter 2 on stack
+        stfd [GR_Parameter_Y] = f1,16         // STORE Parameter 2 on stack
         add GR_Parameter_X = 16,sp            // Parameter 1 address
 .save   b0, GR_SAVE_B0
-        mov GR_SAVE_B0 = b0                   // Save b0
+        mov GR_SAVE_B0=b0                     // Save b0
 };;
 
 .body
+// (3)
 { .mib
-        stfd [GR_Parameter_X] = FR_X          // STORE Parameter 1 on stack
-        add   GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
-        nop.b 0
+        stfd [GR_Parameter_X] = f10                   // STORE Parameter 1 on stack
+        add   GR_Parameter_RESULT = 0,GR_Parameter_Y  // Parameter 3 address
+        nop.b 0                             
 }
 { .mib
-        stfd [GR_Parameter_Y] = FR_RESULT     // STORE Parameter 3 on stack
+        stfd [GR_Parameter_Y] = f8                    // STORE Parameter 3 on stack
         add   GR_Parameter_Y = -16,GR_Parameter_Y
-        br.call.sptk b0=__libm_error_support# // Call error handling function
+        br.call.sptk b0=__libm_error_support#         // Call error handling function
 };;
 
 { .mmi
-        add   GR_Parameter_RESULT = 48,sp
         nop.m 0
-        nop.i 0
+        nop.m 0
+        add   GR_Parameter_RESULT = 48,sp
 };;
 
+// (4)
 { .mmi
-        ldfd  f8 = [GR_Parameter_RESULT]      // Get return result off stack
+        ldfd  f8 = [GR_Parameter_RESULT]       // Get return result off stack
 .restore sp
-        add   sp = 64,sp                      // Restore stack pointer
-        mov   b0 = GR_SAVE_B0                 // Restore return address
+        add   sp = 64,sp                       // Restore stack pointer
+        mov   b0 = GR_SAVE_B0                  // Restore return address
 };;
-
 { .mib
-        mov   gp = GR_SAVE_GP                 // Restore gp
-        mov   ar.pfs = GR_SAVE_PFS            // Restore ar.pfs
-        br.ret.sptk     b0                    // Return
+        mov   gp = GR_SAVE_GP                  // Restore gp
+        mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
+        br.ret.sptk     b0                     // Return
 };;
-LOCAL_LIBM_END(__libm_error_region)
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
 
 .type   __libm_error_support#,@function
 .global __libm_error_support#
-
diff --git a/sysdeps/ia64/fpu/e_logf.S b/sysdeps/ia64/fpu/e_logf.S
index 0ca6d3f2c8..829d0abed0 100644
--- a/sysdeps/ia64/fpu/e_logf.S
+++ b/sysdeps/ia64/fpu/e_logf.S
@@ -1,10 +1,10 @@
 .file "logf.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,1072 +20,861 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
 // Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
 //
 // History
 //==============================================================
-// 03/01/00 Initial version
-// 08/15/00 Bundle added after call to __libm_error_support to properly
+// 3/01/00  Initial version
+// 8/15/00  Bundle added after call to __libm_error_support to properly
 //          set [the previously overwritten] GR_Parameter_RESULT.
-// 01/10/01 Improved speed, fixed flags for neg denormals
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 05/23/02 Modified algorithm. Now only one polynomial is used
-//          for |x-1| >= 1/256 and for |x-1| < 1/256
-// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 1/10/01  Improved speed, fixed flags for neg denormals
+//
 //
 // API
 //==============================================================
 // float logf(float)
 // float log10f(float)
 //
-//
 // Overview of operation
 //==============================================================
 // Background
-// ----------
 //
-// This algorithm is based on fact that
-// log(a b) = log(a) + log(b).
+// Consider  x = 2^N 1.f1 f2 f3 f4...f63
+// Log(x) = log(frcpa(x) x/frcpa(x))
+//        = log(1/frcpa(x)) + log(frcpa(x) x)
+//        = -log(frcpa(x)) + log(frcpa(x) x)
 //
-// In our case we have x = 2^N f, where 1 <= f < 2.
-// So
-//   log(x) = log(2^N f) = log(2^N) + log(f) = n*log(2) + log(f)
+// frcpa(x)       = 2^-N frcpa((1.f1 f2 ... f63)
 //
-// To calculate log(f) we do following
-//   log(f) = log(f * frcpa(f) / frcpa(f)) =
-//          = log(f * frcpa(f)) + log(1/frcpa(f))
+// -log(frcpa(x)) = -log(C) 
+//                = -log(2^-N) - log(frcpa(1.f1 f2 ... f63))
 //
-// According to definition of IA-64's frcpa instruction it's a
-// floating point that approximates 1/f using a lookup on the
-// top of 8 bits of the input number's significand with relative
-// error < 2^(-8.886). So we have following
+// -log(frcpa(x)) = -log(C) 
+//                = +Nlog2 - log(frcpa(1.f1 f2 ... f63))
 //
-// |(1/f - frcpa(f)) / (1/f))| = |1 - f*frcpa(f)| < 1/256
+// -log(frcpa(x)) = -log(C) 
+//                = +Nlog2 + log(frcpa(1.f1 f2 ... f63))
 //
-// and
-//
-// log(f) = log(f * frcpa(f)) + log(1/frcpa(f)) =
-//        = log(1 + r) + T
-//
-// The first value can be computed by polynomial P(r) approximating
-// log(1 + r) on |r| < 1/256 and the second is precomputed tabular
-// value defined by top 8 bit of f.
-//
-// Finally we have that  log(x) ~ (N*log(2) + T) + P(r)
-//
-// Note that if input argument is close to 1.0 (in our case it means
-// that |1 - x| < 1/256) we can use just polynomial approximation
-// because x = 2^0 * f = f = 1 + r and
-// log(x) = log(1 + r) ~ P(r)
-//
-//
-// To compute log10(x) we just use identity:
+// Log(x) = log(1/frcpa(x)) + log(frcpa(x) x)
+
+// Log(x) =  +Nlog2 + log(1./frcpa(1.f1 f2 ... f63)) + log(frcpa(x) x)
+// Log(x) =  +Nlog2 - log(/frcpa(1.f1 f2 ... f63))   + log(frcpa(x) x)
+// Log(x) =  +Nlog2 + T                              + log(frcpa(x) x)
 //
-//  log10(x) = log(x)/log(10)
+// Log(x) =  +Nlog2 + T                     + log(C x)
 //
-// so we have that
+// Cx = 1 + r
 //
-//  log10(x) = (N*log(2) + T  + log(1+r)) / log(10) =
-//           = N*(log(2)/log(10)) + (T/log(10)) + log(1 + r)/log(10)
+// Log(x) =  +Nlog2 + T  + log(1+r)
+// Log(x) =  +Nlog2 + T  + Series( r - r^2/2 + r^3/3 - r^4/4 ....)
 //
+// 1.f1 f2 ... f8 has 256 entries.
+// They are 1 + k/2^8, k = 0 ... 255
+// These 256 values are the table entries.
 //
 // Implementation
-// --------------
-// It can be seen that formulas for log and log10 differ from one another
-// only by coefficients and tabular values. Namely as log as log10 are
-// calculated as (N*L1 + T) + L2*Series(r) where in case of log
-//   L1 = log(2)
-//   T  = log(1/frcpa(x))
-//   L2 = 1.0
-// and in case of log10
-//   L1 = log(2)/log(10)
-//   T  = log(1/frcpa(x))/log(10)
-//   L2 = 1.0/log(10)
-//
-// So common code with two different entry points those set pointers
-// to the base address of coresponding data sets containing values
-// of L2,T and prepare integer representation of L1 needed for following
-// setf instruction can be used.
-//
-// Note that both log and log10 use common approximation polynomial
-// it means we need only one set of coefficients of approximation.
-//
-// 1. Computation of log(x) for |x-1| >= 1/256
-//   InvX = frcpa(x)
-//   r = InvX*x - 1
-//   P(r) = r*((1 - A2*r) + r^2*(A3 - A4*r)) = r*P2(r),
-//   A4,A3,A2 are created with setf inctruction.
-//   We use Taylor series and so A4 = 1/4, A3 = 1/3,
-//   A2 = 1/2 rounded to double.
-//
-//   N = float(n) where n is true unbiased exponent of x
-//
-//   T is tabular value of log(1/frcpa(x)) calculated in quad precision
-//   and rounded to double. To T we get bits from 55 to 62 of register
-//   format significand of x and calculate address
-//     ad_T = table_base_addr + 8 * index
-//
-//   L2 (1.0 or 1.0/log(10) depending on function) is calculated in quad
-//   precision and rounded to double; it's loaded from memory
-//
-//   L1 (log(2) or log10(2) depending on function) is calculated in quad
-//   precision and rounded to double; it's created with setf.
-//
-//   And final result = P2(r)*(r*L2) + (T + N*L1)
-//
-//
-// 2. Computation of log(x) for |x-1| < 1/256
-//   r = x - 1
-//   P(r) = r*((1 - A2*r) + r^2*(A3 - A4*r)) = r*P2(r),
-//   A4,A3,A2 are the same as in case |x-1| >= 1/256
-//
-//   And final result = P2(r)*(r*L2)
-//
-// 3. How we define is input argument such that |x-1| < 1/256 or not.
-//
-//    To do it we analyze biased exponent and significand of input argment.
+//===============
+// CASE 1:  |x-1| >= 2^-8
+// C = frcpa(x)
+// r = C * x - 1
 //
-//      a) First we test is biased exponent equal to 0xFFFE or 0xFFFF (i.e.
-//         we test is 0.5 <= x < 2). This comparison can be performed using
-//         unsigned version of cmp instruction in such a way
-//         biased_exponent_of_x - 0xFFFE < 2
+// Form rseries = r + P1*r^2 + P2*r^3 + P3*r^4
 //
+// x = f * 2*n where f is 1.f_1f_2f_3....f_63
+// Nfloat = float(n)  where n is the true unbiased exponent
+// pre-index = f_1f_2....f_8
+// index = pre_index * 16
+// get the dxt table entry at index + offset = T
 //
-//      b) Second (in case when result of a) is true) we need to compare x
-//         with 1-1/256 and 1+1/256 or in register format representation with
-//         0xFFFEFF00000000000000 and 0xFFFF8080000000000000 correspondingly.
-//         As far as biased exponent of x here can be equal only to 0xFFFE or
-//         0xFFFF we need to test only last bit of it. Also signifigand always
-//         has implicit bit set to 1 that can be exluded from comparison.
-//         Thus it's quite enough to generate 64-bit integer bits of that are
-//         ix[63] = biased_exponent_of_x[0] and ix[62-0] = significand_of_x[62-0]
-//         and compare it with 0x7F00000000000000 and 0x80800000000000000 (those
-//         obtained like ix from register representatinos of 255/256 and
-//         257/256). This comparison can be made like in a), using unsigned
-//         version of cmp i.e. ix - 0x7F00000000000000 < 0x0180000000000000.
-//         0x0180000000000000 is difference between 0x80800000000000000 and
-//         0x7F00000000000000.
+// result = (T + Nfloat * log(2)) + rseries
 //
-//    Note: NaT, any NaNs, +/-INF, +/-0, negatives and unnormalized numbers are
-//          filtered and processed on special branches.
+// The T table is calculated as follows
+// Form x_k = 1 + k/2^8 where k goes from 0... 255
+//      y_k = frcpa(x_k)
+//      log(1/y_k)  in quad and round to double
+
+// CASE 2:  |x-1| < 2^-6
+// w = x - 1
 //
+// Form wseries = w + Q1*w^2 + Q2*w^3 + Q3*w^4
 //
-// Special values
+// result = wseries
+
+// Special values 
 //==============================================================
-//
-// logf(+0)    = -inf
-// logf(-0)    = -inf
-//
-// logf(+qnan) = +qnan
-// logf(-qnan) = -qnan
-// logf(+snan) = +qnan
-// logf(-snan) = -qnan
-//
-// logf(-n)    = QNAN Indefinite
-// logf(-inf)  = QNAN Indefinite
-//
-// logf(+inf)  = +inf
-//
+
+
+// log(+0)    = -inf
+// log(-0)    = -inf
+
+// log(+qnan) = +qnan 
+// log(-qnan) = -qnan 
+// log(+snan) = +qnan 
+// log(-snan) = -qnan 
+
+// log(-n)    = QNAN Indefinite
+// log(-inf)  = QNAN Indefinite 
+
+// log(+inf)  = +inf
+
 // Registers used
 //==============================================================
-// Floating Point registers used:
+// Floating Point registers used: 
 // f8, input
-// f12 -> f14,  f33 -> f39
-//
-// General registers used:
-// r8  -> r11
-// r14 -> r19
-//
+// f9 -> f15,  f32 -> f47
+
+// General registers used:  
+// r32 -> r51
+
 // Predicate registers used:
-// p6 -> p12
+// p6 -> p15
 
+// p8 log base e
+// p6 log base e special
+// p9 used in the frcpa
+// p13 log base e large W
+// p14 log base e small w
+
+// p7 log base 10
+// p10 log base 10 large W
+// p11 log base 10 small w
+// p12 log base 10 special
+
+#include "libm_support.h"
 
 // Assembly macros
 //==============================================================
 
-GR_TAG                 = r8
-GR_ad_T                = r8
-GR_N                   = r9
-GR_Exp                 = r10
-GR_Sig                 = r11
-
-GR_025                 = r14
-GR_05                  = r15
-GR_A3                  = r16
-GR_Ind                 = r17
-GR_dx                  = r15
-GR_Ln2                 = r19
-GR_de                  = r20
-GR_x                   = r21
-GR_xorg                = r22
-
-GR_SAVE_B0             = r33
-GR_SAVE_PFS            = r34
-GR_SAVE_GP             = r35
-GR_SAVE_SP             = r36
-
-GR_Parameter_X         = r37
-GR_Parameter_Y         = r38
-GR_Parameter_RESULT    = r39
-GR_Parameter_TAG       = r40
-
-
-FR_A2                  = f12
-FR_A3                  = f13
-FR_A4                  = f14
-
-FR_RcpX                = f33
-FR_r                   = f34
-FR_r2                  = f35
-FR_tmp                 = f35
-FR_Ln2                 = f36
-FR_T                   = f37
-FR_N                   = f38
-FR_NxLn2pT             = f38
-FR_NormX               = f39
-FR_InvLn10             = f40
-
-
-FR_Y                   = f1
-FR_X                   = f10
-FR_RESULT              = f8
+log_int_Nfloat   = f9 
+log_Nfloat       = f10 
+
+log_P3           = f11 
+log_P2           = f12 
+log_P1           = f13 
+log_inv_ln10     = f14
+log_log2         = f15 
+
+log_w            = f32
+log_T            = f33 
+log_rp_p32       = f34 
+log_rp_p2        = f35 
+log_rp_p10       = f36
+log_rsq          = f37 
+log_T_plus_Nlog2 = f38 
+log_r            = f39
+log_C            = f40
+log_rp_q32       = f41
+log_rp_q2        = f42
+log_rp_q10       = f43
+log_wsq          = f44
+log_Q            = f45
+log_inv_ln10     = f46
+log_NORM_f8      = f47
+
+// ===================================
+
+log_GR_exp_17_ones               = r33
+log_GR_exp_16_ones               = r34
+log_GR_exp_f8                    = r35
+log_GR_signexp_f8                = r36
+log_GR_true_exp_f8               = r37
+log_GR_significand_f8            = r38
+log_GR_index                     = r39
+log_AD_1                         = r40
+log_GR_signexp_w                 = r41
+log_GR_fff7                      = r42
+log_AD_2                         = r43
+log_GR_exp_w                     = r44
+
+GR_SAVE_B0                       = r45
+GR_SAVE_GP                       = r46
+GR_SAVE_PFS                      = r47
+
+GR_Parameter_X                   = r48
+GR_Parameter_Y                   = r49
+GR_Parameter_RESULT              = r50
+log_GR_tag                       = r51
 
 
 // Data tables
 //==============================================================
-RODATA
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
 .align 16
-LOCAL_OBJECT_START(logf_data)
-data8 0x3FF0000000000000 // 1.0
-//
-// ln(1/frcpa(1+i/256)), i=0...255
-data8 0x3F60040155D5889E // 0
-data8 0x3F78121214586B54 // 1
-data8 0x3F841929F96832F0 // 2
-data8 0x3F8C317384C75F06 // 3
-data8 0x3F91A6B91AC73386 // 4
-data8 0x3F95BA9A5D9AC039 // 5
-data8 0x3F99D2A8074325F4 // 6
-data8 0x3F9D6B2725979802 // 7
-data8 0x3FA0C58FA19DFAAA // 8
-data8 0x3FA2954C78CBCE1B // 9
-data8 0x3FA4A94D2DA96C56 // 10
-data8 0x3FA67C94F2D4BB58 // 11
-data8 0x3FA85188B630F068 // 12
-data8 0x3FAA6B8ABE73AF4C // 13
-data8 0x3FAC441E06F72A9E // 14
-data8 0x3FAE1E6713606D07 // 15
-data8 0x3FAFFA6911AB9301 // 16
-data8 0x3FB0EC139C5DA601 // 17
-data8 0x3FB1DBD2643D190B // 18
-data8 0x3FB2CC7284FE5F1C // 19
-data8 0x3FB3BDF5A7D1EE64 // 20
-data8 0x3FB4B05D7AA012E0 // 21
-data8 0x3FB580DB7CEB5702 // 22
-data8 0x3FB674F089365A7A // 23
-data8 0x3FB769EF2C6B568D // 24
-data8 0x3FB85FD927506A48 // 25
-data8 0x3FB9335E5D594989 // 26
-data8 0x3FBA2B0220C8E5F5 // 27
-data8 0x3FBB0004AC1A86AC // 28
-data8 0x3FBBF968769FCA11 // 29
-data8 0x3FBCCFEDBFEE13A8 // 30
-data8 0x3FBDA727638446A2 // 31
-data8 0x3FBEA3257FE10F7A // 32
-data8 0x3FBF7BE9FEDBFDE6 // 33
-data8 0x3FC02AB352FF25F4 // 34
-data8 0x3FC097CE579D204D // 35
-data8 0x3FC1178E8227E47C // 36
-data8 0x3FC185747DBECF34 // 37
-data8 0x3FC1F3B925F25D41 // 38
-data8 0x3FC2625D1E6DDF57 // 39
-data8 0x3FC2D1610C86813A // 40
-data8 0x3FC340C59741142E // 41
-data8 0x3FC3B08B6757F2A9 // 42
-data8 0x3FC40DFB08378003 // 43
-data8 0x3FC47E74E8CA5F7C // 44
-data8 0x3FC4EF51F6466DE4 // 45
-data8 0x3FC56092E02BA516 // 46
-data8 0x3FC5D23857CD74D5 // 47
-data8 0x3FC6313A37335D76 // 48
-data8 0x3FC6A399DABBD383 // 49
-data8 0x3FC70337DD3CE41B // 50
-data8 0x3FC77654128F6127 // 51
-data8 0x3FC7E9D82A0B022D // 52
-data8 0x3FC84A6B759F512F // 53
-data8 0x3FC8AB47D5F5A310 // 54
-data8 0x3FC91FE49096581B // 55
-data8 0x3FC981634011AA75 // 56
-data8 0x3FC9F6C407089664 // 57
-data8 0x3FCA58E729348F43 // 58
-data8 0x3FCABB55C31693AD // 59
-data8 0x3FCB1E104919EFD0 // 60
-data8 0x3FCB94EE93E367CB // 61
-data8 0x3FCBF851C067555F // 62
-data8 0x3FCC5C0254BF23A6 // 63
-data8 0x3FCCC000C9DB3C52 // 64
-data8 0x3FCD244D99C85674 // 65
-data8 0x3FCD88E93FB2F450 // 66
-data8 0x3FCDEDD437EAEF01 // 67
-data8 0x3FCE530EFFE71012 // 68
-data8 0x3FCEB89A1648B971 // 69
-data8 0x3FCF1E75FADF9BDE // 70
-data8 0x3FCF84A32EAD7C35 // 71
-data8 0x3FCFEB2233EA07CD // 72
-data8 0x3FD028F9C7035C1C // 73
-data8 0x3FD05C8BE0D9635A // 74
-data8 0x3FD085EB8F8AE797 // 75
-data8 0x3FD0B9C8E32D1911 // 76
-data8 0x3FD0EDD060B78081 // 77
-data8 0x3FD122024CF0063F // 78
-data8 0x3FD14BE2927AECD4 // 79
-data8 0x3FD180618EF18ADF // 80
-data8 0x3FD1B50BBE2FC63B // 81
-data8 0x3FD1DF4CC7CF242D // 82
-data8 0x3FD214456D0EB8D4 // 83
-data8 0x3FD23EC5991EBA49 // 84
-data8 0x3FD2740D9F870AFB // 85
-data8 0x3FD29ECDABCDFA04 // 86
-data8 0x3FD2D46602ADCCEE // 87
-data8 0x3FD2FF66B04EA9D4 // 88
-data8 0x3FD335504B355A37 // 89
-data8 0x3FD360925EC44F5D // 90
-data8 0x3FD38BF1C3337E75 // 91
-data8 0x3FD3C25277333184 // 92
-data8 0x3FD3EDF463C1683E // 93
-data8 0x3FD419B423D5E8C7 // 94
-data8 0x3FD44591E0539F49 // 95
-data8 0x3FD47C9175B6F0AD // 96
-data8 0x3FD4A8B341552B09 // 97
-data8 0x3FD4D4F3908901A0 // 98
-data8 0x3FD501528DA1F968 // 99
-data8 0x3FD52DD06347D4F6 // 100
-data8 0x3FD55A6D3C7B8A8A // 101
-data8 0x3FD5925D2B112A59 // 102
-data8 0x3FD5BF406B543DB2 // 103
-data8 0x3FD5EC433D5C35AE // 104
-data8 0x3FD61965CDB02C1F // 105
-data8 0x3FD646A84935B2A2 // 106
-data8 0x3FD6740ADD31DE94 // 107
-data8 0x3FD6A18DB74A58C5 // 108
-data8 0x3FD6CF31058670EC // 109
-data8 0x3FD6F180E852F0BA // 110
-data8 0x3FD71F5D71B894F0 // 111
-data8 0x3FD74D5AEFD66D5C // 112
-data8 0x3FD77B79922BD37E // 113
-data8 0x3FD7A9B9889F19E2 // 114
-data8 0x3FD7D81B037EB6A6 // 115
-data8 0x3FD8069E33827231 // 116
-data8 0x3FD82996D3EF8BCB // 117
-data8 0x3FD85855776DCBFB // 118
-data8 0x3FD8873658327CCF // 119
-data8 0x3FD8AA75973AB8CF // 120
-data8 0x3FD8D992DC8824E5 // 121
-data8 0x3FD908D2EA7D9512 // 122
-data8 0x3FD92C59E79C0E56 // 123
-data8 0x3FD95BD750EE3ED3 // 124
-data8 0x3FD98B7811A3EE5B // 125
-data8 0x3FD9AF47F33D406C // 126
-data8 0x3FD9DF270C1914A8 // 127
-data8 0x3FDA0325ED14FDA4 // 128
-data8 0x3FDA33440224FA79 // 129
-data8 0x3FDA57725E80C383 // 130
-data8 0x3FDA87D0165DD199 // 131
-data8 0x3FDAAC2E6C03F896 // 132
-data8 0x3FDADCCC6FDF6A81 // 133
-data8 0x3FDB015B3EB1E790 // 134
-data8 0x3FDB323A3A635948 // 135
-data8 0x3FDB56FA04462909 // 136
-data8 0x3FDB881AA659BC93 // 137
-data8 0x3FDBAD0BEF3DB165 // 138
-data8 0x3FDBD21297781C2F // 139
-data8 0x3FDC039236F08819 // 140
-data8 0x3FDC28CB1E4D32FD // 141
-data8 0x3FDC4E19B84723C2 // 142
-data8 0x3FDC7FF9C74554C9 // 143
-data8 0x3FDCA57B64E9DB05 // 144
-data8 0x3FDCCB130A5CEBB0 // 145
-data8 0x3FDCF0C0D18F326F // 146
-data8 0x3FDD232075B5A201 // 147
-data8 0x3FDD490246DEFA6B // 148
-data8 0x3FDD6EFA918D25CD // 149
-data8 0x3FDD9509707AE52F // 150
-data8 0x3FDDBB2EFE92C554 // 151
-data8 0x3FDDEE2F3445E4AF // 152
-data8 0x3FDE148A1A2726CE // 153
-data8 0x3FDE3AFC0A49FF40 // 154
-data8 0x3FDE6185206D516E // 155
-data8 0x3FDE882578823D52 // 156
-data8 0x3FDEAEDD2EAC990C // 157
-data8 0x3FDED5AC5F436BE3 // 158
-data8 0x3FDEFC9326D16AB9 // 159
-data8 0x3FDF2391A2157600 // 160
-data8 0x3FDF4AA7EE03192D // 161
-data8 0x3FDF71D627C30BB0 // 162
-data8 0x3FDF991C6CB3B379 // 163
-data8 0x3FDFC07ADA69A910 // 164
-data8 0x3FDFE7F18EB03D3E // 165
-data8 0x3FE007C053C5002E // 166
-data8 0x3FE01B942198A5A1 // 167
-data8 0x3FE02F74400C64EB // 168
-data8 0x3FE04360BE7603AD // 169
-data8 0x3FE05759AC47FE34 // 170
-data8 0x3FE06B5F1911CF52 // 171
-data8 0x3FE078BF0533C568 // 172
-data8 0x3FE08CD9687E7B0E // 173
-data8 0x3FE0A10074CF9019 // 174
-data8 0x3FE0B5343A234477 // 175
-data8 0x3FE0C974C89431CE // 176
-data8 0x3FE0DDC2305B9886 // 177
-data8 0x3FE0EB524BAFC918 // 178
-data8 0x3FE0FFB54213A476 // 179
-data8 0x3FE114253DA97D9F // 180
-data8 0x3FE128A24F1D9AFF // 181
-data8 0x3FE1365252BF0865 // 182
-data8 0x3FE14AE558B4A92D // 183
-data8 0x3FE15F85A19C765B // 184
-data8 0x3FE16D4D38C119FA // 185
-data8 0x3FE18203C20DD133 // 186
-data8 0x3FE196C7BC4B1F3B // 187
-data8 0x3FE1A4A738B7A33C // 188
-data8 0x3FE1B981C0C9653D // 189
-data8 0x3FE1CE69E8BB106B // 190
-data8 0x3FE1DC619DE06944 // 191
-data8 0x3FE1F160A2AD0DA4 // 192
-data8 0x3FE2066D7740737E // 193
-data8 0x3FE2147DBA47A394 // 194
-data8 0x3FE229A1BC5EBAC3 // 195
-data8 0x3FE237C1841A502E // 196
-data8 0x3FE24CFCE6F80D9A // 197
-data8 0x3FE25B2C55CD5762 // 198
-data8 0x3FE2707F4D5F7C41 // 199
-data8 0x3FE285E0842CA384 // 200
-data8 0x3FE294294708B773 // 201
-data8 0x3FE2A9A2670AFF0C // 202
-data8 0x3FE2B7FB2C8D1CC1 // 203
-data8 0x3FE2C65A6395F5F5 // 204
-data8 0x3FE2DBF557B0DF43 // 205
-data8 0x3FE2EA64C3F97655 // 206
-data8 0x3FE3001823684D73 // 207
-data8 0x3FE30E97E9A8B5CD // 208
-data8 0x3FE32463EBDD34EA // 209
-data8 0x3FE332F4314AD796 // 210
-data8 0x3FE348D90E7464D0 // 211
-data8 0x3FE35779F8C43D6E // 212
-data8 0x3FE36621961A6A99 // 213
-data8 0x3FE37C299F3C366A // 214
-data8 0x3FE38AE2171976E7 // 215
-data8 0x3FE399A157A603E7 // 216
-data8 0x3FE3AFCCFE77B9D1 // 217
-data8 0x3FE3BE9D503533B5 // 218
-data8 0x3FE3CD7480B4A8A3 // 219
-data8 0x3FE3E3C43918F76C // 220
-data8 0x3FE3F2ACB27ED6C7 // 221
-data8 0x3FE4019C2125CA93 // 222
-data8 0x3FE4181061389722 // 223
-data8 0x3FE42711518DF545 // 224
-data8 0x3FE436194E12B6BF // 225
-data8 0x3FE445285D68EA69 // 226
-data8 0x3FE45BCC464C893A // 227
-data8 0x3FE46AED21F117FC // 228
-data8 0x3FE47A1527E8A2D3 // 229
-data8 0x3FE489445EFFFCCC // 230
-data8 0x3FE4A018BCB69835 // 231
-data8 0x3FE4AF5A0C9D65D7 // 232
-data8 0x3FE4BEA2A5BDBE87 // 233
-data8 0x3FE4CDF28F10AC46 // 234
-data8 0x3FE4DD49CF994058 // 235
-data8 0x3FE4ECA86E64A684 // 236
-data8 0x3FE503C43CD8EB68 // 237
-data8 0x3FE513356667FC57 // 238
-data8 0x3FE522AE0738A3D8 // 239
-data8 0x3FE5322E26867857 // 240
-data8 0x3FE541B5CB979809 // 241
-data8 0x3FE55144FDBCBD62 // 242
-data8 0x3FE560DBC45153C7 // 243
-data8 0x3FE5707A26BB8C66 // 244
-data8 0x3FE587F60ED5B900 // 245
-data8 0x3FE597A7977C8F31 // 246
-data8 0x3FE5A760D634BB8B // 247
-data8 0x3FE5B721D295F10F // 248
-data8 0x3FE5C6EA94431EF9 // 249
-data8 0x3FE5D6BB22EA86F6 // 250
-data8 0x3FE5E6938645D390 // 251
-data8 0x3FE5F673C61A2ED2 // 252
-data8 0x3FE6065BEA385926 // 253
-data8 0x3FE6164BFA7CC06B // 254
-data8 0x3FE62643FECF9743 // 255
-LOCAL_OBJECT_END(logf_data)
-
-LOCAL_OBJECT_START(log10f_data)
-data8 0x3FDBCB7B1526E50E // 1/ln(10)
-//
-// ln(1/frcpa(1+i/256))/ln(10), i=0...255
-data8 0x3F4BD27045BFD025 // 0
-data8 0x3F64E84E793A474A // 1
-data8 0x3F7175085AB85FF0 // 2
-data8 0x3F787CFF9D9147A5 // 3
-data8 0x3F7EA9D372B89FC8 // 4
-data8 0x3F82DF9D95DA961C // 5
-data8 0x3F866DF172D6372C // 6
-data8 0x3F898D79EF5EEDF0 // 7
-data8 0x3F8D22ADF3F9579D // 8
-data8 0x3F9024231D30C398 // 9
-data8 0x3F91F23A98897D4A // 10
-data8 0x3F93881A7B818F9E // 11
-data8 0x3F951F6E1E759E35 // 12
-data8 0x3F96F2BCE7ADC5B4 // 13
-data8 0x3F988D362CDF359E // 14
-data8 0x3F9A292BAF010982 // 15
-data8 0x3F9BC6A03117EB97 // 16
-data8 0x3F9D65967DE3AB09 // 17
-data8 0x3F9F061167FC31E8 // 18
-data8 0x3FA05409E4F7819C // 19
-data8 0x3FA125D0432EA20E // 20
-data8 0x3FA1F85D440D299B // 21
-data8 0x3FA2AD755749617D // 22
-data8 0x3FA381772A00E604 // 23
-data8 0x3FA45643E165A70B // 24
-data8 0x3FA52BDD034475B8 // 25
-data8 0x3FA5E3966B7E9295 // 26
-data8 0x3FA6BAAF47C5B245 // 27
-data8 0x3FA773B3E8C4F3C8 // 28
-data8 0x3FA84C51EBEE8D15 // 29
-data8 0x3FA906A6786FC1CB // 30
-data8 0x3FA9C197ABF00DD7 // 31
-data8 0x3FAA9C78712191F7 // 32
-data8 0x3FAB58C09C8D637C // 33
-data8 0x3FAC15A8BCDD7B7E // 34
-data8 0x3FACD331E2C2967C // 35
-data8 0x3FADB11ED766ABF4 // 36
-data8 0x3FAE70089346A9E6 // 37
-data8 0x3FAF2F96C6754AEE // 38
-data8 0x3FAFEFCA8D451FD6 // 39
-data8 0x3FB0585283764178 // 40
-data8 0x3FB0B913AAC7D3A7 // 41
-data8 0x3FB11A294F2569F6 // 42
-data8 0x3FB16B51A2696891 // 43
-data8 0x3FB1CD03ADACC8BE // 44
-data8 0x3FB22F0BDD7745F5 // 45
-data8 0x3FB2916ACA38D1E8 // 46
-data8 0x3FB2F4210DF7663D // 47
-data8 0x3FB346A6C3C49066 // 48
-data8 0x3FB3A9FEBC60540A // 49
-data8 0x3FB3FD0C10A3AA54 // 50
-data8 0x3FB46107D3540A82 // 51
-data8 0x3FB4C55DD16967FE // 52
-data8 0x3FB51940330C000B // 53
-data8 0x3FB56D620EE7115E // 54
-data8 0x3FB5D2ABCF26178E // 55
-data8 0x3FB6275AA5DEBF81 // 56
-data8 0x3FB68D4EAF26D7EE // 57
-data8 0x3FB6E28C5C54A28D // 58
-data8 0x3FB7380B9665B7C8 // 59
-data8 0x3FB78DCCC278E85B // 60
-data8 0x3FB7F50C2CF2557A // 61
-data8 0x3FB84B5FD5EAEFD8 // 62
-data8 0x3FB8A1F6BAB2B226 // 63
-data8 0x3FB8F8D144557BDF // 64
-data8 0x3FB94FEFDCD61D92 // 65
-data8 0x3FB9A752EF316149 // 66
-data8 0x3FB9FEFAE7611EE0 // 67
-data8 0x3FBA56E8325F5C87 // 68
-data8 0x3FBAAF1B3E297BB4 // 69
-data8 0x3FBB079479C372AD // 70
-data8 0x3FBB6054553B12F7 // 71
-data8 0x3FBBB95B41AB5CE6 // 72
-data8 0x3FBC12A9B13FE079 // 73
-data8 0x3FBC6C4017382BEA // 74
-data8 0x3FBCB41FBA42686D // 75
-data8 0x3FBD0E38CE73393F // 76
-data8 0x3FBD689B2193F133 // 77
-data8 0x3FBDC3472B1D2860 // 78
-data8 0x3FBE0C06300D528B // 79
-data8 0x3FBE6738190E394C // 80
-data8 0x3FBEC2B50D208D9B // 81
-data8 0x3FBF0C1C2B936828 // 82
-data8 0x3FBF68216C9CC727 // 83
-data8 0x3FBFB1F6381856F4 // 84
-data8 0x3FC00742AF4CE5F8 // 85
-data8 0x3FC02C64906512D2 // 86
-data8 0x3FC05AF1E63E03B4 // 87
-data8 0x3FC0804BEA723AA9 // 88
-data8 0x3FC0AF1FD6711527 // 89
-data8 0x3FC0D4B2A8805A00 // 90
-data8 0x3FC0FA5EF136A06C // 91
-data8 0x3FC1299A4FB3E306 // 92
-data8 0x3FC14F806253C3ED // 93
-data8 0x3FC175805D1587C1 // 94
-data8 0x3FC19B9A637CA295 // 95
-data8 0x3FC1CB5FC26EDE17 // 96
-data8 0x3FC1F1B4E65F2590 // 97
-data8 0x3FC218248B5DC3E5 // 98
-data8 0x3FC23EAED62ADC76 // 99
-data8 0x3FC26553EBD337BD // 100
-data8 0x3FC28C13F1B11900 // 101
-data8 0x3FC2BCAA14381386 // 102
-data8 0x3FC2E3A740B7800F // 103
-data8 0x3FC30ABFD8F333B6 // 104
-data8 0x3FC331F403985097 // 105
-data8 0x3FC35943E7A60690 // 106
-data8 0x3FC380AFAC6E7C07 // 107
-data8 0x3FC3A8377997B9E6 // 108
-data8 0x3FC3CFDB771C9ADB // 109
-data8 0x3FC3EDA90D39A5DF // 110
-data8 0x3FC4157EC09505CD // 111
-data8 0x3FC43D7113FB04C1 // 112
-data8 0x3FC4658030AD1CCF // 113
-data8 0x3FC48DAC404638F6 // 114
-data8 0x3FC4B5F56CBBB869 // 115
-data8 0x3FC4DE5BE05E7583 // 116
-data8 0x3FC4FCBC0776FD85 // 117
-data8 0x3FC525561E9256EE // 118
-data8 0x3FC54E0DF3198865 // 119
-data8 0x3FC56CAB7112BDE2 // 120
-data8 0x3FC59597BA735B15 // 121
-data8 0x3FC5BEA23A506FDA // 122
-data8 0x3FC5DD7E08DE382F // 123
-data8 0x3FC606BDD3F92355 // 124
-data8 0x3FC6301C518A501F // 125
-data8 0x3FC64F3770618916 // 126
-data8 0x3FC678CC14C1E2D8 // 127
-data8 0x3FC6981005ED2947 // 128
-data8 0x3FC6C1DB5F9BB336 // 129
-data8 0x3FC6E1488ECD2881 // 130
-data8 0x3FC70B4B2E7E41B9 // 131
-data8 0x3FC72AE209146BF9 // 132
-data8 0x3FC7551C81BD8DCF // 133
-data8 0x3FC774DD76CC43BE // 134
-data8 0x3FC79F505DB00E88 // 135
-data8 0x3FC7BF3BDE099F30 // 136
-data8 0x3FC7E9E7CAC437F9 // 137
-data8 0x3FC809FE4902D00D // 138
-data8 0x3FC82A2757995CBE // 139
-data8 0x3FC85525C625E098 // 140
-data8 0x3FC8757A79831887 // 141
-data8 0x3FC895E2058D8E03 // 142
-data8 0x3FC8C13437695532 // 143
-data8 0x3FC8E1C812EF32BE // 144
-data8 0x3FC9026F112197E8 // 145
-data8 0x3FC923294888880B // 146
-data8 0x3FC94EEA4B8334F3 // 147
-data8 0x3FC96FD1B639FC09 // 148
-data8 0x3FC990CCA66229AC // 149
-data8 0x3FC9B1DB33334843 // 150
-data8 0x3FC9D2FD740E6607 // 151
-data8 0x3FC9FF49EEDCB553 // 152
-data8 0x3FCA209A84FBCFF8 // 153
-data8 0x3FCA41FF1E43F02B // 154
-data8 0x3FCA6377D2CE9378 // 155
-data8 0x3FCA8504BAE0D9F6 // 156
-data8 0x3FCAA6A5EEEBEFE3 // 157
-data8 0x3FCAC85B878D7879 // 158
-data8 0x3FCAEA259D8FFA0B // 159
-data8 0x3FCB0C0449EB4B6B // 160
-data8 0x3FCB2DF7A5C50299 // 161
-data8 0x3FCB4FFFCA70E4D1 // 162
-data8 0x3FCB721CD17157E3 // 163
-data8 0x3FCB944ED477D4ED // 164
-data8 0x3FCBB695ED655C7D // 165
-data8 0x3FCBD8F2364AEC0F // 166
-data8 0x3FCBFB63C969F4FF // 167
-data8 0x3FCC1DEAC134D4E9 // 168
-data8 0x3FCC4087384F4F80 // 169
-data8 0x3FCC6339498F09E2 // 170
-data8 0x3FCC86010FFC076C // 171
-data8 0x3FCC9D3D065C5B42 // 172
-data8 0x3FCCC029375BA07A // 173
-data8 0x3FCCE32B66978BA4 // 174
-data8 0x3FCD0643AFD51404 // 175
-data8 0x3FCD29722F0DEA45 // 176
-data8 0x3FCD4CB70070FE44 // 177
-data8 0x3FCD6446AB3F8C96 // 178
-data8 0x3FCD87B0EF71DB45 // 179
-data8 0x3FCDAB31D1FE99A7 // 180
-data8 0x3FCDCEC96FDC888F // 181
-data8 0x3FCDE6908876357A // 182
-data8 0x3FCE0A4E4A25C200 // 183
-data8 0x3FCE2E2315755E33 // 184
-data8 0x3FCE461322D1648A // 185
-data8 0x3FCE6A0E95C7787B // 186
-data8 0x3FCE8E216243DD60 // 187
-data8 0x3FCEA63AF26E007C // 188
-data8 0x3FCECA74ED15E0B7 // 189
-data8 0x3FCEEEC692CCD25A // 190
-data8 0x3FCF070A36B8D9C1 // 191
-data8 0x3FCF2B8393E34A2D // 192
-data8 0x3FCF5014EF538A5B // 193
-data8 0x3FCF68833AF1B180 // 194
-data8 0x3FCF8D3CD9F3F04F // 195
-data8 0x3FCFA5C61ADD93E9 // 196
-data8 0x3FCFCAA8567EBA7A // 197
-data8 0x3FCFE34CC8743DD8 // 198
-data8 0x3FD0042BFD74F519 // 199
-data8 0x3FD016BDF6A18017 // 200
-data8 0x3FD023262F907322 // 201
-data8 0x3FD035CCED8D32A1 // 202
-data8 0x3FD042430E869FFC // 203
-data8 0x3FD04EBEC842B2E0 // 204
-data8 0x3FD06182E84FD4AC // 205
-data8 0x3FD06E0CB609D383 // 206
-data8 0x3FD080E60BEC8F12 // 207
-data8 0x3FD08D7E0D894735 // 208
-data8 0x3FD0A06CC96A2056 // 209
-data8 0x3FD0AD131F3B3C55 // 210
-data8 0x3FD0C01771E775FB // 211
-data8 0x3FD0CCCC3CAD6F4B // 212
-data8 0x3FD0D986D91A34A9 // 213
-data8 0x3FD0ECA9B8861A2D // 214
-data8 0x3FD0F972F87FF3D6 // 215
-data8 0x3FD106421CF0E5F7 // 216
-data8 0x3FD11983EBE28A9D // 217
-data8 0x3FD12661E35B785A // 218
-data8 0x3FD13345D2779D3B // 219
-data8 0x3FD146A6F597283A // 220
-data8 0x3FD15399E81EA83D // 221
-data8 0x3FD16092E5D3A9A6 // 222
-data8 0x3FD17413C3B7AB5E // 223
-data8 0x3FD1811BF629D6FB // 224
-data8 0x3FD18E2A47B46686 // 225
-data8 0x3FD19B3EBE1A4418 // 226
-data8 0x3FD1AEE9017CB450 // 227
-data8 0x3FD1BC0CED7134E2 // 228
-data8 0x3FD1C93712ABC7FF // 229
-data8 0x3FD1D66777147D3F // 230
-data8 0x3FD1EA3BD1286E1C // 231
-data8 0x3FD1F77BED932C4C // 232
-data8 0x3FD204C25E1B031F // 233
-data8 0x3FD2120F28CE69B1 // 234
-data8 0x3FD21F6253C48D01 // 235
-data8 0x3FD22CBBE51D60AA // 236
-data8 0x3FD240CE4C975444 // 237
-data8 0x3FD24E37F8ECDAE8 // 238
-data8 0x3FD25BA8215AF7FC // 239
-data8 0x3FD2691ECC29F042 // 240
-data8 0x3FD2769BFFAB2E00 // 241
-data8 0x3FD2841FC23952C9 // 242
-data8 0x3FD291AA1A384978 // 243
-data8 0x3FD29F3B0E15584B // 244
-data8 0x3FD2B3A0EE479DF7 // 245
-data8 0x3FD2C142842C09E6 // 246
-data8 0x3FD2CEEACCB7BD6D // 247
-data8 0x3FD2DC99CE82FF21 // 248
-data8 0x3FD2EA4F902FD7DA // 249
-data8 0x3FD2F80C186A25FD // 250
-data8 0x3FD305CF6DE7B0F7 // 251
-data8 0x3FD3139997683CE7 // 252
-data8 0x3FD3216A9BB59E7C // 253
-data8 0x3FD32F4281A3CEFF // 254
-data8 0x3FD33D2150110092 // 255
-LOCAL_OBJECT_END(log10f_data)
-
-
-// Code
-//==============================================================
+
+log_table_1:
+ASM_TYPE_DIRECTIVE(log_table_1,@object)
+data8 0xbfd0001008f39d59    // p3
+data8 0x3fd5556073e0c45a    // p2
+ASM_SIZE_DIRECTIVE(log_table_1)
+
+log_table_2:
+ASM_TYPE_DIRECTIVE(log_table_2,@object)
+data8 0xbfdffffffffaea15    // p1
+data8 0x3fdbcb7b1526e50e    // 1/ln10
+data8 0x3fe62e42fefa39ef    // Log(2)
+data8 0x0                   // pad
+
+data8 0x3F60040155D5889E    //log(1/frcpa(1+   0/256)
+data8 0x3F78121214586B54    //log(1/frcpa(1+   1/256)
+data8 0x3F841929F96832F0    //log(1/frcpa(1+   2/256)
+data8 0x3F8C317384C75F06    //log(1/frcpa(1+   3/256)
+data8 0x3F91A6B91AC73386    //log(1/frcpa(1+   4/256)
+data8 0x3F95BA9A5D9AC039    //log(1/frcpa(1+   5/256)
+data8 0x3F99D2A8074325F4    //log(1/frcpa(1+   6/256)
+data8 0x3F9D6B2725979802    //log(1/frcpa(1+   7/256)
+data8 0x3FA0C58FA19DFAAA    //log(1/frcpa(1+   8/256)
+data8 0x3FA2954C78CBCE1B    //log(1/frcpa(1+   9/256)
+data8 0x3FA4A94D2DA96C56    //log(1/frcpa(1+  10/256)
+data8 0x3FA67C94F2D4BB58    //log(1/frcpa(1+  11/256)
+data8 0x3FA85188B630F068    //log(1/frcpa(1+  12/256)
+data8 0x3FAA6B8ABE73AF4C    //log(1/frcpa(1+  13/256)
+data8 0x3FAC441E06F72A9E    //log(1/frcpa(1+  14/256)
+data8 0x3FAE1E6713606D07    //log(1/frcpa(1+  15/256)
+data8 0x3FAFFA6911AB9301    //log(1/frcpa(1+  16/256)
+data8 0x3FB0EC139C5DA601    //log(1/frcpa(1+  17/256)
+data8 0x3FB1DBD2643D190B    //log(1/frcpa(1+  18/256)
+data8 0x3FB2CC7284FE5F1C    //log(1/frcpa(1+  19/256)
+data8 0x3FB3BDF5A7D1EE64    //log(1/frcpa(1+  20/256)
+data8 0x3FB4B05D7AA012E0    //log(1/frcpa(1+  21/256)
+data8 0x3FB580DB7CEB5702    //log(1/frcpa(1+  22/256)
+data8 0x3FB674F089365A7A    //log(1/frcpa(1+  23/256)
+data8 0x3FB769EF2C6B568D    //log(1/frcpa(1+  24/256)
+data8 0x3FB85FD927506A48    //log(1/frcpa(1+  25/256)
+data8 0x3FB9335E5D594989    //log(1/frcpa(1+  26/256)
+data8 0x3FBA2B0220C8E5F5    //log(1/frcpa(1+  27/256)
+data8 0x3FBB0004AC1A86AC    //log(1/frcpa(1+  28/256)
+data8 0x3FBBF968769FCA11    //log(1/frcpa(1+  29/256)
+data8 0x3FBCCFEDBFEE13A8    //log(1/frcpa(1+  30/256)
+data8 0x3FBDA727638446A2    //log(1/frcpa(1+  31/256)
+data8 0x3FBEA3257FE10F7A    //log(1/frcpa(1+  32/256)
+data8 0x3FBF7BE9FEDBFDE6    //log(1/frcpa(1+  33/256)
+data8 0x3FC02AB352FF25F4    //log(1/frcpa(1+  34/256)
+data8 0x3FC097CE579D204D    //log(1/frcpa(1+  35/256)
+data8 0x3FC1178E8227E47C    //log(1/frcpa(1+  36/256)
+data8 0x3FC185747DBECF34    //log(1/frcpa(1+  37/256)
+data8 0x3FC1F3B925F25D41    //log(1/frcpa(1+  38/256)
+data8 0x3FC2625D1E6DDF57    //log(1/frcpa(1+  39/256)
+data8 0x3FC2D1610C86813A    //log(1/frcpa(1+  40/256)
+data8 0x3FC340C59741142E    //log(1/frcpa(1+  41/256)
+data8 0x3FC3B08B6757F2A9    //log(1/frcpa(1+  42/256)
+data8 0x3FC40DFB08378003    //log(1/frcpa(1+  43/256)
+data8 0x3FC47E74E8CA5F7C    //log(1/frcpa(1+  44/256)
+data8 0x3FC4EF51F6466DE4    //log(1/frcpa(1+  45/256)
+data8 0x3FC56092E02BA516    //log(1/frcpa(1+  46/256)
+data8 0x3FC5D23857CD74D5    //log(1/frcpa(1+  47/256)
+data8 0x3FC6313A37335D76    //log(1/frcpa(1+  48/256)
+data8 0x3FC6A399DABBD383    //log(1/frcpa(1+  49/256)
+data8 0x3FC70337DD3CE41B    //log(1/frcpa(1+  50/256)
+data8 0x3FC77654128F6127    //log(1/frcpa(1+  51/256)
+data8 0x3FC7E9D82A0B022D    //log(1/frcpa(1+  52/256)
+data8 0x3FC84A6B759F512F    //log(1/frcpa(1+  53/256)
+data8 0x3FC8AB47D5F5A310    //log(1/frcpa(1+  54/256)
+data8 0x3FC91FE49096581B    //log(1/frcpa(1+  55/256)
+data8 0x3FC981634011AA75    //log(1/frcpa(1+  56/256)
+data8 0x3FC9F6C407089664    //log(1/frcpa(1+  57/256)
+data8 0x3FCA58E729348F43    //log(1/frcpa(1+  58/256)
+data8 0x3FCABB55C31693AD    //log(1/frcpa(1+  59/256)
+data8 0x3FCB1E104919EFD0    //log(1/frcpa(1+  60/256)
+data8 0x3FCB94EE93E367CB    //log(1/frcpa(1+  61/256)
+data8 0x3FCBF851C067555F    //log(1/frcpa(1+  62/256)
+data8 0x3FCC5C0254BF23A6    //log(1/frcpa(1+  63/256)
+data8 0x3FCCC000C9DB3C52    //log(1/frcpa(1+  64/256)
+data8 0x3FCD244D99C85674    //log(1/frcpa(1+  65/256)
+data8 0x3FCD88E93FB2F450    //log(1/frcpa(1+  66/256)
+data8 0x3FCDEDD437EAEF01    //log(1/frcpa(1+  67/256)
+data8 0x3FCE530EFFE71012    //log(1/frcpa(1+  68/256)
+data8 0x3FCEB89A1648B971    //log(1/frcpa(1+  69/256)
+data8 0x3FCF1E75FADF9BDE    //log(1/frcpa(1+  70/256)
+data8 0x3FCF84A32EAD7C35    //log(1/frcpa(1+  71/256)
+data8 0x3FCFEB2233EA07CD    //log(1/frcpa(1+  72/256)
+data8 0x3FD028F9C7035C1C    //log(1/frcpa(1+  73/256)
+data8 0x3FD05C8BE0D9635A    //log(1/frcpa(1+  74/256)
+data8 0x3FD085EB8F8AE797    //log(1/frcpa(1+  75/256)
+data8 0x3FD0B9C8E32D1911    //log(1/frcpa(1+  76/256)
+data8 0x3FD0EDD060B78081    //log(1/frcpa(1+  77/256)
+data8 0x3FD122024CF0063F    //log(1/frcpa(1+  78/256)
+data8 0x3FD14BE2927AECD4    //log(1/frcpa(1+  79/256)
+data8 0x3FD180618EF18ADF    //log(1/frcpa(1+  80/256)
+data8 0x3FD1B50BBE2FC63B    //log(1/frcpa(1+  81/256)
+data8 0x3FD1DF4CC7CF242D    //log(1/frcpa(1+  82/256)
+data8 0x3FD214456D0EB8D4    //log(1/frcpa(1+  83/256)
+data8 0x3FD23EC5991EBA49    //log(1/frcpa(1+  84/256)
+data8 0x3FD2740D9F870AFB    //log(1/frcpa(1+  85/256)
+data8 0x3FD29ECDABCDFA04    //log(1/frcpa(1+  86/256)
+data8 0x3FD2D46602ADCCEE    //log(1/frcpa(1+  87/256)
+data8 0x3FD2FF66B04EA9D4    //log(1/frcpa(1+  88/256)
+data8 0x3FD335504B355A37    //log(1/frcpa(1+  89/256)
+data8 0x3FD360925EC44F5D    //log(1/frcpa(1+  90/256)
+data8 0x3FD38BF1C3337E75    //log(1/frcpa(1+  91/256)
+data8 0x3FD3C25277333184    //log(1/frcpa(1+  92/256)
+data8 0x3FD3EDF463C1683E    //log(1/frcpa(1+  93/256)
+data8 0x3FD419B423D5E8C7    //log(1/frcpa(1+  94/256)
+data8 0x3FD44591E0539F49    //log(1/frcpa(1+  95/256)
+data8 0x3FD47C9175B6F0AD    //log(1/frcpa(1+  96/256)
+data8 0x3FD4A8B341552B09    //log(1/frcpa(1+  97/256)
+data8 0x3FD4D4F3908901A0    //log(1/frcpa(1+  98/256)
+data8 0x3FD501528DA1F968    //log(1/frcpa(1+  99/256)
+data8 0x3FD52DD06347D4F6    //log(1/frcpa(1+ 100/256)
+data8 0x3FD55A6D3C7B8A8A    //log(1/frcpa(1+ 101/256)
+data8 0x3FD5925D2B112A59    //log(1/frcpa(1+ 102/256)
+data8 0x3FD5BF406B543DB2    //log(1/frcpa(1+ 103/256)
+data8 0x3FD5EC433D5C35AE    //log(1/frcpa(1+ 104/256)
+data8 0x3FD61965CDB02C1F    //log(1/frcpa(1+ 105/256)
+data8 0x3FD646A84935B2A2    //log(1/frcpa(1+ 106/256)
+data8 0x3FD6740ADD31DE94    //log(1/frcpa(1+ 107/256)
+data8 0x3FD6A18DB74A58C5    //log(1/frcpa(1+ 108/256)
+data8 0x3FD6CF31058670EC    //log(1/frcpa(1+ 109/256)
+data8 0x3FD6F180E852F0BA    //log(1/frcpa(1+ 110/256)
+data8 0x3FD71F5D71B894F0    //log(1/frcpa(1+ 111/256)
+data8 0x3FD74D5AEFD66D5C    //log(1/frcpa(1+ 112/256)
+data8 0x3FD77B79922BD37E    //log(1/frcpa(1+ 113/256)
+data8 0x3FD7A9B9889F19E2    //log(1/frcpa(1+ 114/256)
+data8 0x3FD7D81B037EB6A6    //log(1/frcpa(1+ 115/256)
+data8 0x3FD8069E33827231    //log(1/frcpa(1+ 116/256)
+data8 0x3FD82996D3EF8BCB    //log(1/frcpa(1+ 117/256)
+data8 0x3FD85855776DCBFB    //log(1/frcpa(1+ 118/256)
+data8 0x3FD8873658327CCF    //log(1/frcpa(1+ 119/256)
+data8 0x3FD8AA75973AB8CF    //log(1/frcpa(1+ 120/256)
+data8 0x3FD8D992DC8824E5    //log(1/frcpa(1+ 121/256)
+data8 0x3FD908D2EA7D9512    //log(1/frcpa(1+ 122/256)
+data8 0x3FD92C59E79C0E56    //log(1/frcpa(1+ 123/256)
+data8 0x3FD95BD750EE3ED3    //log(1/frcpa(1+ 124/256)
+data8 0x3FD98B7811A3EE5B    //log(1/frcpa(1+ 125/256)
+data8 0x3FD9AF47F33D406C    //log(1/frcpa(1+ 126/256)
+data8 0x3FD9DF270C1914A8    //log(1/frcpa(1+ 127/256)
+data8 0x3FDA0325ED14FDA4    //log(1/frcpa(1+ 128/256)
+data8 0x3FDA33440224FA79    //log(1/frcpa(1+ 129/256)
+data8 0x3FDA57725E80C383    //log(1/frcpa(1+ 130/256)
+data8 0x3FDA87D0165DD199    //log(1/frcpa(1+ 131/256)
+data8 0x3FDAAC2E6C03F896    //log(1/frcpa(1+ 132/256)
+data8 0x3FDADCCC6FDF6A81    //log(1/frcpa(1+ 133/256)
+data8 0x3FDB015B3EB1E790    //log(1/frcpa(1+ 134/256)
+data8 0x3FDB323A3A635948    //log(1/frcpa(1+ 135/256)
+data8 0x3FDB56FA04462909    //log(1/frcpa(1+ 136/256)
+data8 0x3FDB881AA659BC93    //log(1/frcpa(1+ 137/256)
+data8 0x3FDBAD0BEF3DB165    //log(1/frcpa(1+ 138/256)
+data8 0x3FDBD21297781C2F    //log(1/frcpa(1+ 139/256)
+data8 0x3FDC039236F08819    //log(1/frcpa(1+ 140/256)
+data8 0x3FDC28CB1E4D32FD    //log(1/frcpa(1+ 141/256)
+data8 0x3FDC4E19B84723C2    //log(1/frcpa(1+ 142/256)
+data8 0x3FDC7FF9C74554C9    //log(1/frcpa(1+ 143/256)
+data8 0x3FDCA57B64E9DB05    //log(1/frcpa(1+ 144/256)
+data8 0x3FDCCB130A5CEBB0    //log(1/frcpa(1+ 145/256)
+data8 0x3FDCF0C0D18F326F    //log(1/frcpa(1+ 146/256)
+data8 0x3FDD232075B5A201    //log(1/frcpa(1+ 147/256)
+data8 0x3FDD490246DEFA6B    //log(1/frcpa(1+ 148/256)
+data8 0x3FDD6EFA918D25CD    //log(1/frcpa(1+ 149/256)
+data8 0x3FDD9509707AE52F    //log(1/frcpa(1+ 150/256)
+data8 0x3FDDBB2EFE92C554    //log(1/frcpa(1+ 151/256)
+data8 0x3FDDEE2F3445E4AF    //log(1/frcpa(1+ 152/256)
+data8 0x3FDE148A1A2726CE    //log(1/frcpa(1+ 153/256)
+data8 0x3FDE3AFC0A49FF40    //log(1/frcpa(1+ 154/256)
+data8 0x3FDE6185206D516E    //log(1/frcpa(1+ 155/256)
+data8 0x3FDE882578823D52    //log(1/frcpa(1+ 156/256)
+data8 0x3FDEAEDD2EAC990C    //log(1/frcpa(1+ 157/256)
+data8 0x3FDED5AC5F436BE3    //log(1/frcpa(1+ 158/256)
+data8 0x3FDEFC9326D16AB9    //log(1/frcpa(1+ 159/256)
+data8 0x3FDF2391A2157600    //log(1/frcpa(1+ 160/256)
+data8 0x3FDF4AA7EE03192D    //log(1/frcpa(1+ 161/256)
+data8 0x3FDF71D627C30BB0    //log(1/frcpa(1+ 162/256)
+data8 0x3FDF991C6CB3B379    //log(1/frcpa(1+ 163/256)
+data8 0x3FDFC07ADA69A910    //log(1/frcpa(1+ 164/256)
+data8 0x3FDFE7F18EB03D3E    //log(1/frcpa(1+ 165/256)
+data8 0x3FE007C053C5002E    //log(1/frcpa(1+ 166/256)
+data8 0x3FE01B942198A5A1    //log(1/frcpa(1+ 167/256)
+data8 0x3FE02F74400C64EB    //log(1/frcpa(1+ 168/256)
+data8 0x3FE04360BE7603AD    //log(1/frcpa(1+ 169/256)
+data8 0x3FE05759AC47FE34    //log(1/frcpa(1+ 170/256)
+data8 0x3FE06B5F1911CF52    //log(1/frcpa(1+ 171/256)
+data8 0x3FE078BF0533C568    //log(1/frcpa(1+ 172/256)
+data8 0x3FE08CD9687E7B0E    //log(1/frcpa(1+ 173/256)
+data8 0x3FE0A10074CF9019    //log(1/frcpa(1+ 174/256)
+data8 0x3FE0B5343A234477    //log(1/frcpa(1+ 175/256)
+data8 0x3FE0C974C89431CE    //log(1/frcpa(1+ 176/256)
+data8 0x3FE0DDC2305B9886    //log(1/frcpa(1+ 177/256)
+data8 0x3FE0EB524BAFC918    //log(1/frcpa(1+ 178/256)
+data8 0x3FE0FFB54213A476    //log(1/frcpa(1+ 179/256)
+data8 0x3FE114253DA97D9F    //log(1/frcpa(1+ 180/256)
+data8 0x3FE128A24F1D9AFF    //log(1/frcpa(1+ 181/256)
+data8 0x3FE1365252BF0865    //log(1/frcpa(1+ 182/256)
+data8 0x3FE14AE558B4A92D    //log(1/frcpa(1+ 183/256)
+data8 0x3FE15F85A19C765B    //log(1/frcpa(1+ 184/256)
+data8 0x3FE16D4D38C119FA    //log(1/frcpa(1+ 185/256)
+data8 0x3FE18203C20DD133    //log(1/frcpa(1+ 186/256)
+data8 0x3FE196C7BC4B1F3B    //log(1/frcpa(1+ 187/256)
+data8 0x3FE1A4A738B7A33C    //log(1/frcpa(1+ 188/256)
+data8 0x3FE1B981C0C9653D    //log(1/frcpa(1+ 189/256)
+data8 0x3FE1CE69E8BB106B    //log(1/frcpa(1+ 190/256)
+data8 0x3FE1DC619DE06944    //log(1/frcpa(1+ 191/256)
+data8 0x3FE1F160A2AD0DA4    //log(1/frcpa(1+ 192/256)
+data8 0x3FE2066D7740737E    //log(1/frcpa(1+ 193/256)
+data8 0x3FE2147DBA47A394    //log(1/frcpa(1+ 194/256)
+data8 0x3FE229A1BC5EBAC3    //log(1/frcpa(1+ 195/256)
+data8 0x3FE237C1841A502E    //log(1/frcpa(1+ 196/256)
+data8 0x3FE24CFCE6F80D9A    //log(1/frcpa(1+ 197/256)
+data8 0x3FE25B2C55CD5762    //log(1/frcpa(1+ 198/256)
+data8 0x3FE2707F4D5F7C41    //log(1/frcpa(1+ 199/256)
+data8 0x3FE285E0842CA384    //log(1/frcpa(1+ 200/256)
+data8 0x3FE294294708B773    //log(1/frcpa(1+ 201/256)
+data8 0x3FE2A9A2670AFF0C    //log(1/frcpa(1+ 202/256)
+data8 0x3FE2B7FB2C8D1CC1    //log(1/frcpa(1+ 203/256)
+data8 0x3FE2C65A6395F5F5    //log(1/frcpa(1+ 204/256)
+data8 0x3FE2DBF557B0DF43    //log(1/frcpa(1+ 205/256)
+data8 0x3FE2EA64C3F97655    //log(1/frcpa(1+ 206/256)
+data8 0x3FE3001823684D73    //log(1/frcpa(1+ 207/256)
+data8 0x3FE30E97E9A8B5CD    //log(1/frcpa(1+ 208/256)
+data8 0x3FE32463EBDD34EA    //log(1/frcpa(1+ 209/256)
+data8 0x3FE332F4314AD796    //log(1/frcpa(1+ 210/256)
+data8 0x3FE348D90E7464D0    //log(1/frcpa(1+ 211/256)
+data8 0x3FE35779F8C43D6E    //log(1/frcpa(1+ 212/256)
+data8 0x3FE36621961A6A99    //log(1/frcpa(1+ 213/256)
+data8 0x3FE37C299F3C366A    //log(1/frcpa(1+ 214/256)
+data8 0x3FE38AE2171976E7    //log(1/frcpa(1+ 215/256)
+data8 0x3FE399A157A603E7    //log(1/frcpa(1+ 216/256)
+data8 0x3FE3AFCCFE77B9D1    //log(1/frcpa(1+ 217/256)
+data8 0x3FE3BE9D503533B5    //log(1/frcpa(1+ 218/256)
+data8 0x3FE3CD7480B4A8A3    //log(1/frcpa(1+ 219/256)
+data8 0x3FE3E3C43918F76C    //log(1/frcpa(1+ 220/256)
+data8 0x3FE3F2ACB27ED6C7    //log(1/frcpa(1+ 221/256)
+data8 0x3FE4019C2125CA93    //log(1/frcpa(1+ 222/256)
+data8 0x3FE4181061389722    //log(1/frcpa(1+ 223/256)
+data8 0x3FE42711518DF545    //log(1/frcpa(1+ 224/256)
+data8 0x3FE436194E12B6BF    //log(1/frcpa(1+ 225/256)
+data8 0x3FE445285D68EA69    //log(1/frcpa(1+ 226/256)
+data8 0x3FE45BCC464C893A    //log(1/frcpa(1+ 227/256)
+data8 0x3FE46AED21F117FC    //log(1/frcpa(1+ 228/256)
+data8 0x3FE47A1527E8A2D3    //log(1/frcpa(1+ 229/256)
+data8 0x3FE489445EFFFCCC    //log(1/frcpa(1+ 230/256)
+data8 0x3FE4A018BCB69835    //log(1/frcpa(1+ 231/256)
+data8 0x3FE4AF5A0C9D65D7    //log(1/frcpa(1+ 232/256)
+data8 0x3FE4BEA2A5BDBE87    //log(1/frcpa(1+ 233/256)
+data8 0x3FE4CDF28F10AC46    //log(1/frcpa(1+ 234/256)
+data8 0x3FE4DD49CF994058    //log(1/frcpa(1+ 235/256)
+data8 0x3FE4ECA86E64A684    //log(1/frcpa(1+ 236/256)
+data8 0x3FE503C43CD8EB68    //log(1/frcpa(1+ 237/256)
+data8 0x3FE513356667FC57    //log(1/frcpa(1+ 238/256)
+data8 0x3FE522AE0738A3D8    //log(1/frcpa(1+ 239/256)
+data8 0x3FE5322E26867857    //log(1/frcpa(1+ 240/256)
+data8 0x3FE541B5CB979809    //log(1/frcpa(1+ 241/256)
+data8 0x3FE55144FDBCBD62    //log(1/frcpa(1+ 242/256)
+data8 0x3FE560DBC45153C7    //log(1/frcpa(1+ 243/256)
+data8 0x3FE5707A26BB8C66    //log(1/frcpa(1+ 244/256)
+data8 0x3FE587F60ED5B900    //log(1/frcpa(1+ 245/256)
+data8 0x3FE597A7977C8F31    //log(1/frcpa(1+ 246/256)
+data8 0x3FE5A760D634BB8B    //log(1/frcpa(1+ 247/256)
+data8 0x3FE5B721D295F10F    //log(1/frcpa(1+ 248/256)
+data8 0x3FE5C6EA94431EF9    //log(1/frcpa(1+ 249/256)
+data8 0x3FE5D6BB22EA86F6    //log(1/frcpa(1+ 250/256)
+data8 0x3FE5E6938645D390    //log(1/frcpa(1+ 251/256)
+data8 0x3FE5F673C61A2ED2    //log(1/frcpa(1+ 252/256)
+data8 0x3FE6065BEA385926    //log(1/frcpa(1+ 253/256)
+data8 0x3FE6164BFA7CC06B    //log(1/frcpa(1+ 254/256)
+data8 0x3FE62643FECF9743    //log(1/frcpa(1+ 255/256)
+ASM_SIZE_DIRECTIVE(log_table_2)
+
+   
+.align 32
+.global logf#
+.global log10f#
+
+// log10 has p7 true, p8 false
+// log   has p8 true, p7 false
+
 .section .text
+.proc  log10f#
+.align 32
 
-// logf   has p13 true, p14 false
-// log10f has p14 true, p13 false
+log10f: 
+#ifdef _LIBC
+.global __ieee754_log10f
+.type __ieee754_log10f,@function
+__ieee754_log10f:
+#endif
+{ .mfi
+     alloc     r32=ar.pfs,1,15,4,0                    
+     frcpa.s1  log_C,p9 = f1,f8                 
+     cmp.eq.unc     p7,p8         = r0, r0 
+}
+{ .mfb
+     addl           log_AD_1   = @ltoff(log_table_1), gp
+     fnorm.s1 log_NORM_f8 = f8 
+     br.sptk        L(LOG_LOG10_X) 
+}
+;;
+
+.endp log10f
+ASM_SIZE_DIRECTIVE(log10f)
+ASM_SIZE_DIRECTIVE(__ieee754_log10f)
+
+
+
+.section .text
+.proc  logf#
+.align 32
+logf: 
+#ifdef _LIBC
+.global __ieee754_logf
+.type __ieee754_logf,@function
+__ieee754_logf:
+#endif
 
-GLOBAL_IEEE754_ENTRY(log10f)
 { .mfi
-      getf.exp      GR_Exp = f8 // if x is unorm then must recompute
-      frcpa.s1      FR_RcpX,p0 = f1,f8
-      mov           GR_05 = 0xFFFE // biased exponent of A2=0.5
+     alloc     r32=ar.pfs,1,15,4,0                    
+     frcpa.s1  log_C,p9 = f1,f8                 
+     cmp.eq.unc     p8,p7         = r0, r0 
 }
-{ .mlx
-      addl          GR_ad_T = @ltoff(log10f_data),gp
-      movl          GR_A3 = 0x3FD5555555555555 // double precision memory
-                                               // representation of A3
-};;
 { .mfi
-      getf.sig      GR_Sig = f8 // if x is unorm then must recompute
-      fclass.m      p8,p0 = f8,9 // is x positive unorm?
-      sub           GR_025 = GR_05,r0,1 // biased exponent of A4=0.25
+     addl           log_AD_1   = @ltoff(log_table_1), gp
+     fnorm.s1 log_NORM_f8 = f8 
+     nop.i 999
 }
-{ .mlx
-      ld8           GR_ad_T = [GR_ad_T]
-      movl          GR_Ln2 = 0x3FD34413509F79FF // double precision memory
-                                                // representation of
-                                                // log(2)/ln(10)
-};;
+;;
+
+L(LOG_LOG10_X):
+
 { .mfi
-      setf.d        FR_A3 = GR_A3 // create A3
-      fcmp.eq.s1    p14,p13 = f0,f0 // set p14 to 1 for log10f
-      dep.z         GR_xorg = GR_05,55,8 // 0x7F00000000000000 integer number
-                                         // bits of that are
-                                         // GR_xorg[63]   = last bit of biased
-                                         //            exponent of 255/256
-                                         // GR_xorg[62-0] = bits from 62 to 0
-                                         //            of significand of 255/256
+     getf.exp   log_GR_signexp_f8 = f8 // If x unorm then must recompute
+     fclass.m.unc p15,p0 = f8, 0x0b            // Test for x=unorm
+     mov        log_GR_fff7 = 0xfff7
 }
-{ .mib
-      setf.exp      FR_A2 = GR_05 // create A2
-      sub           GR_de = GR_Exp,GR_05 // biased_exponent_of_x - 0xFFFE
-                                         // needed to comparion with 0.5 and 2.0
-      br.cond.sptk  logf_log10f_common
-};;
-GLOBAL_IEEE754_END(log10f)
-GLOBAL_IEEE754_ENTRY(logf)
 { .mfi
-      getf.exp      GR_Exp = f8 // if x is unorm then must recompute
-      frcpa.s1      FR_RcpX,p0 = f1,f8
-      mov           GR_05 = 0xFFFE // biased exponent of A2=-0.5
+     ld8 log_AD_1 = [log_AD_1]
+     fms.s1     log_w = f8,f1,f1              
+     mov       log_GR_exp_17_ones = 0x1ffff
 }
-{ .mlx
-      addl          GR_ad_T = @ltoff(logf_data),gp
-      movl          GR_A3 = 0x3FD5555555555555 // double precision memory
-                                               // representation of A3
-};;
+;;
+
+{ .mmi
+     getf.sig   log_GR_significand_f8 = f8 // If x unorm then must recompute
+     mov       log_GR_exp_16_ones = 0xffff
+     nop.i 999
+}
+;;
+
+{ .mmb
+     adds log_AD_2 = 0x10, log_AD_1
+     and        log_GR_exp_f8 = log_GR_signexp_f8, log_GR_exp_17_ones  
+(p15) br.cond.spnt L(LOG_DENORM)     
+}
+;;
+
+L(LOG_COMMON):
+{.mfi
+     ldfpd      log_P3,log_P2 = [log_AD_1],16           
+     fclass.m.unc p6,p0 = f8, 0xc3             // Test for x=nan
+     shl        log_GR_index = log_GR_significand_f8,1            
+}
+{.mfi
+     sub       log_GR_true_exp_f8 = log_GR_exp_f8, log_GR_exp_16_ones 
+     nop.f 999
+     nop.i 999
+}
+;;
+
 { .mfi
-      getf.sig      GR_Sig = f8 // if x is unorm then must recompute
-      fclass.m      p8,p0 = f8,9 // is x positive unorm?
-      dep.z         GR_xorg = GR_05,55,8 // 0x7F00000000000000 integer number
-                                         // bits of that are
-                                         // GR_xorg[63]   = last bit of biased
-                                         //            exponent of 255/256
-                                         // GR_xorg[62-0] = bits from 62 to 0
-                                         //            of significand of 255/256
+     ldfpd      log_P1,log_inv_ln10 = [log_AD_2],16           
+     fclass.m.unc p11,p0 = f8, 0x21            // Test for x=+inf
+     shr.u     log_GR_index = log_GR_index,56
 }
 { .mfi
-      ld8           GR_ad_T = [GR_ad_T]
-      nop.f         0
-      sub           GR_025 = GR_05,r0,1 // biased exponent of A4=0.25
-};;
+     setf.sig  log_int_Nfloat = log_GR_true_exp_f8
+     nop.f 999
+     nop.i 999
+}
+;;
+
+
 { .mfi
-      setf.d        FR_A3 = GR_A3 // create A3
-      fcmp.eq.s1    p13,p14 = f0,f0 // p13 - true for logf
-      sub           GR_de = GR_Exp,GR_05 // biased_exponent_of_x - 0xFFFE
-                                         // needed to comparion with 0.5 and 2.0
+     ldfd       log_log2 = [log_AD_2],16   
+     fma.s1     log_wsq     = log_w, log_w, f0
+     nop.i 999
 }
-{ .mlx
-      setf.exp      FR_A2 = GR_05 // create A2
-      movl          GR_Ln2 = 0x3FE62E42FEFA39EF // double precision memory
-                                                // representation of log(2)
-};;
-logf_log10f_common:
+{ .mfb
+     nop.m 999
+(p6) fma.s.s0   f8 = f8,f1,f0      // quietize nan result if x=nan
+(p6) br.ret.spnt b0                // Exit for x=nan
+}
+;;
+
+
 { .mfi
-      setf.exp      FR_A4 = GR_025 // create A4=0.25
-      fclass.m      p9,p0 = f8,0x3A // is x < 0 (including negateve unnormals)?
-      dep           GR_x = GR_Exp,GR_Sig,63,1 // produce integer that bits are
-                                              // GR_x[63] = GR_Exp[0]
-                                              // GR_x[62-0] = GR_Sig[62-0]
+     shladd log_AD_2 = log_GR_index,3,log_AD_2
+     fcmp.eq.s1 p10,p0 = log_NORM_f8, f1  // Test for x=+1.0
+     nop.i 999
 }
-{ .mib
-      sub           GR_N = GR_Exp,GR_05,1 // unbiased exponent of x
-      cmp.gtu       p6,p7 = 2,GR_de // is 0.5 <= x < 2.0?
-(p8)  br.cond.spnt  logf_positive_unorm
-};;
-logf_core:
+{ .mfb
+     nop.m 999
+     fms.s1     log_r = log_C,f8,f1
+(p11) br.ret.spnt b0               // Exit for x=+inf
+}
+;;
+
+
+{ .mmf
+     nop.m 999
+     nop.m 999
+     fclass.m.unc p6,p0 = f8, 0x07        // Test for x=0
+}
+;;
+
+
+{ .mfb
+     ldfd       log_T = [log_AD_2]
+(p10) fmerge.s f8 = f0, f0
+(p10) br.ret.spnt b0                // Exit for x=1.0
+;;
+}
+
 { .mfi
-      setf.sig      FR_N = GR_N // copy unbiased exponent of x to the
-                                // significand field of FR_N
-      fclass.m      p10,p0 = f8,0x1E1 // is x NaN, NaT or +Inf?
-      dep.z         GR_dx = GR_05,54,3 // 0x0180000000000000 - difference
-                                       // between our integer representations
-                                       // of 257/256 and 255/256
+     getf.exp   log_GR_signexp_w = log_w
+     fclass.m.unc p12,p0 = f8, 0x3a       // Test for x neg norm, unorm, inf
+     nop.i 999
+}
+;;
+
+{ .mmb
+     nop.m 999
+     nop.m 999
+(p6) br.cond.spnt L(LOG_ZERO_NEG)      // Branch if x=0
+;;
 }
+ 
+
 { .mfi
-      nop.m         0
-      nop.f         0
-      sub           GR_x = GR_x,GR_xorg // difference between representations
-                                        // of x and 255/256
-};;
+     and log_GR_exp_w = log_GR_exp_17_ones, log_GR_signexp_w
+     nop.f 999
+     nop.i 999
+}
+{ .mfb
+     nop.m 999
+     fma.s1     log_rsq     = log_r, log_r, f0                   
+(p12) br.cond.spnt L(LOG_ZERO_NEG)     // Branch if x<0
+;;
+}
+
 { .mfi
-      ldfd          FR_InvLn10 = [GR_ad_T],8
-      fcmp.eq.s1    p11,p0 = f8,f1 // is x equal to 1.0?
-      extr.u        GR_Ind = GR_Sig,55,8 // get bits from 55 to 62 as index
+     nop.m 999
+     fma.s1      log_rp_p32 = log_P3, log_r, log_P2
+     nop.i 999
 }
-{ .mib
-      setf.d        FR_Ln2 = GR_Ln2 // create log(2) or log10(2)
-(p6)  cmp.gtu       p6,p7 = GR_dx,GR_x // set p6 if 255/256 <= x < 257/256
-(p9)  br.cond.spnt  logf_negatives // jump if input argument is negative number
-};;
-// p6 is true if |x-1| < 1/256
-// p7 is true if |x-1| >= 1/256
-.pred.rel "mutex",p6,p7
 { .mfi
-      shladd        GR_ad_T = GR_Ind,3,GR_ad_T // calculate address of T
-(p7)  fms.s1        FR_r = FR_RcpX,f8,f1 // range reduction for |x-1|>=1/256
-      extr.u        GR_Exp = GR_Exp,0,17 // exponent without sign
+     nop.m 999
+     fma.s1    log_rp_q32   = log_P3, log_w, log_P2
+     nop.i 999
+;;
 }
-{ .mfb
-      nop.m         0
-(p6)  fms.s1        FR_r = f8,f1,f1 // range reduction for |x-1|<1/256
-(p10) br.cond.spnt  logf_nan_nat_pinf // exit for NaN, NaT or +Inf
-};;
-{ .mfb
-      ldfd          FR_T = [GR_ad_T] // load T
-(p11) fma.s.s0      f8 = f0,f0,f0
-(p11) br.ret.spnt   b0 // exit for x = 1.0
-};;
-{ .mib
-      nop.m         0
-      cmp.eq        p12,p0 = r0,GR_Exp // is x +/-0? (here it's quite enough
-                                       // only to compare exponent with 0
-                                       // because all unnormals already
-                                       // have been filtered)
-(p12) br.cond.spnt  logf_zeroes        // Branch if input argument is +/-0
-};;
+
 { .mfi
-      nop.m         0
-      fnma.s1       FR_A2 = FR_A2,FR_r,f1 // A2*r+1
-      nop.i         0
+     nop.m 999
+     fcvt.xf   log_Nfloat = log_int_Nfloat
+     nop.i 999 ;;
 }
+
 { .mfi
-      nop.m         0
-      fma.s1        FR_r2 = FR_r,FR_r,f0  // r^2
-      nop.i         0
-};;
+     nop.m 999
+     fma.s1    log_rp_p10   = log_P1, log_r, f1
+     nop.i 999
+}
 { .mfi
-      nop.m         0
-      fcvt.xf       FR_N = FR_N // convert integer N in significand of FR_N
-                                // to floating-point representation
-      nop.i         0
+     nop.m 999
+     fma.s1    log_rp_q10  = log_P1, log_w, f1
+     nop.i 999
+;;
 }
+
+//    p13 <== large w log
+//    p14 <== small w log
 { .mfi
-      nop.m         0
-      fnma.s1       FR_A3 = FR_A4,FR_r,FR_A3 // A4*r+A3
-      nop.i         0
-};;
+(p8) cmp.ge.unc p13,p14 = log_GR_exp_w, log_GR_fff7
+     fcmp.eq.s0 p6,p0 = f8,f0         // Sets flag on +denormal input
+     nop.i 999
+;;
+}
+
+//    p10 <== large w log10
+//    p11 <== small w log10
 { .mfi
-      nop.m         0
-      fma.s1        FR_r = FR_r,FR_InvLn10,f0 // For log10f we have r/log(10)
-      nop.i         0
+(p7) cmp.ge.unc p10,p11 = log_GR_exp_w, log_GR_fff7
+     nop.f 999
+     nop.i 999 ;;
 }
+
 { .mfi
-      nop.m         0
-      nop.f         0
-      nop.i         0
-};;
+     nop.m 999
+     fma.s1        log_T_plus_Nlog2 = log_Nfloat,log_log2, log_T    
+     nop.i 999 ;;
+}
+
+
 { .mfi
-      nop.m         0
-      fma.s1        FR_A2 = FR_A3,FR_r2,FR_A2 // (A4*r+A3)*r^2+(A2*r+1)
-      nop.i         0
+     nop.m 999
+     fma.s1     log_rp_p2   = log_rp_p32, log_rsq, log_rp_p10
+     nop.i 999
 }
 { .mfi
-      nop.m         0
-      fma.s1        FR_NxLn2pT = FR_N,FR_Ln2,FR_T // N*Ln2+T
-      nop.i         0
-};;
-.pred.rel "mutex",p6,p7
+     nop.m 999
+     fma.s1     log_rp_q2   = log_rp_q32, log_wsq, log_rp_q10
+     nop.i 999
+;;
+}
+
+
+//    small w, log   <== p14
 { .mfi
-      nop.m         0
-(p7)  fma.s.s0      f8 = FR_A2,FR_r,FR_NxLn2pT // result for |x-1|>=1/256
-      nop.i         0
+     nop.m 999
+(p14) fma.s        f8       = log_rp_q2, log_w, f0
+     nop.i 999
+}
+{ .mfi
+     nop.m 999
+(p11) fma.s1        log_Q       = log_rp_q2, log_w, f0
+     nop.i 999 ;;
 }
-{ .mfb
-      nop.m         0
-(p6)  fma.s.s0      f8 = FR_A2,FR_r,f0 // result for |x-1|<1/256
-      br.ret.sptk   b0
-};;
 
-.align 32
-logf_positive_unorm:
+
+//    large w, log   <== p13
+.pred.rel "mutex",p13,p10
 { .mfi
-      nop.m         0
-(p8)  fma.s0        f8 = f8,f1,f0 // Normalize & set D-flag
-      nop.i         0
-};;
+      nop.m 999
+(p13) fma.s        f8        = log_rp_p2, log_r, log_T_plus_Nlog2
+      nop.i 999 
+}
 { .mfi
-      getf.exp      GR_Exp = f8    // recompute biased exponent
-      nop.f         0
-      cmp.ne        p6,p7 = r0,r0  // p6 <- 0, p7 <- 1 because
-                                   // in case of unorm we are out
-                                   // interval [255/256; 257/256]
-};;
+      nop.m 999
+(p10) fma.s1     log_Q     = log_rp_p2, log_r, log_T_plus_Nlog2
+      nop.i 999  ;;
+}
+
+
+//    log10
+{ .mfb
+      nop.m 999
+(p7)  fma.s      f8 = log_inv_ln10,log_Q,f0                         
+      br.ret.sptk     b0 
+;;
+}
+
+
+L(LOG_DENORM):
+{ .mmi
+     getf.exp   log_GR_signexp_f8 = log_NORM_f8 
+     nop.m 999
+     nop.i 999
+}
+;;
+{ .mmb
+     getf.sig   log_GR_significand_f8 = log_NORM_f8 
+     and        log_GR_exp_f8 = log_GR_signexp_f8, log_GR_exp_17_ones  
+     br.cond.sptk L(LOG_COMMON)
+}
+;;
+
+L(LOG_ZERO_NEG): 
+
+// qnan snan inf norm     unorm 0 -+
+// 0    0    0   0        0     1 11      0x7
+// 0    0    1   1        1     0 10      0x3a
+
+// Save x (f8) in f10
 { .mfi
-      getf.sig      GR_Sig = f8 // recompute significand
-      nop.f         0
-      nop.i         0
-};;
-{ .mib
-      sub           GR_N = GR_Exp,GR_05,1 // unbiased exponent N
-      nop.i         0
-      br.cond.sptk  logf_core // return into main path
-};;
+     nop.m 999
+     fmerge.s f10 = f8,f8 
+     nop.i 999  ;;
+}
+
+// p8 p9  means  ln(+-0)  = -inf
+// p7 p10 means  log(+-0) = -inf
+
+//    p13 means  ln(-)
+//    p14 means  log(-)
+
 
-.align 32
-logf_nan_nat_pinf:
 { .mfi
-      nop.m         0
-      fma.s.s0      f8 = f8,f1,f0 // set V-flag
-      nop.i         0
+     nop.m 999
+     fmerge.ns   f6 = f1,f1            // Form -1.0
+     nop.i 999  ;;
 }
-{ .mfb
-      nop.m         0
-      nop.f         0
-      br.ret.sptk   b0 // exit for NaN, NaT or +Inf
-};;
 
-.align 32
-logf_zeroes:
+// p9  means  ln(+-0)  = -inf
+// p10 means  log(+-0) = -inf
+// Log(+-0) = -inf 
+
 { .mfi
-      nop.m         0
-      fmerge.s      FR_X = f8,f8 // keep input argument for subsequent
-                                 // call of __libm_error_support#
-      nop.i         0
+	nop.m 999
+(p8)  fclass.m.unc  p9,p0 = f10, 0x07           
+	nop.i 999
 }
 { .mfi
-(p13) mov           GR_TAG = 4 // set libm error in case of logf
-      fms.s1        FR_tmp = f0,f0,f1 // -1.0
-      nop.i         0
-};;
+	nop.m 999
+(p7)  fclass.m.unc  p10,p0 = f10, 0x07           
+	nop.i 999 ;;
+}
+
+
+// p13  ln(-)
+// p14  log(-)
+
+// Log(-inf, -normal, -unnormal) = QNAN indefinite
 { .mfi
-      nop.m         0
-      frcpa.s0      f8,p0 = FR_tmp,f0 // log(+/-0) should be equal to -INF.
-                                      // We can get it using frcpa because it
-                                      // sets result to the IEEE-754 mandated
-                                      // quotient of FR_tmp/f0.
-                                      // As far as FR_tmp is -1 it'll be -INF
-      nop.i         0
+	nop.m 999
+(p8)  fclass.m.unc  p13,p0 = f10, 0x3a           
+	nop.i 999 
+}
+{ .mfi
+	nop.m 999
+(p7)  fclass.m.unc  p14,p0 = f10, 0x3a           
+	nop.i 999  ;;
 }
-{ .mib
-(p14) mov           GR_TAG = 10 // set libm error in case of log10f
-      nop.i         0
-      br.cond.sptk  logf_libm_err
-};;
 
-.align 32
-logf_negatives:
+
+.pred.rel "mutex",p9,p10
 { .mfi
-(p13) mov           GR_TAG = 5 // set libm error in case of logf
-      fmerge.s      FR_X = f8,f8 // keep input argument for subsequent
-                                 // call of __libm_error_support#
-      nop.i         0
-};;
+(p9)     mov        log_GR_tag = 4       
+(p9)    frcpa f8,p11 = f6,f0                   
+            nop.i 999
+}
 { .mfi
-(p14) mov           GR_TAG = 11 // set libm error in case of log10f
-      frcpa.s0      f8,p0 = f0,f0 // log(negatives) should be equal to NaN.
-                                  // We can get it using frcpa because it
-                                  // sets result to the IEEE-754 mandated
-                                  // quotient of f0/f0 i.e. NaN.
-      nop.i         0
-};;
+(p10)    mov        log_GR_tag = 10       
+(p10)   frcpa f8,p12 = f6,f0                   
+            nop.i 999 ;;
+}
+
+.pred.rel "mutex",p13,p14
+{ .mfi
+(p13)    mov        log_GR_tag = 5       
+(p13)    frcpa f8,p11 = f0,f0                   
+            nop.i 999
+}
+{ .mfb
+(p14)    mov        log_GR_tag = 11       
+(p14)   frcpa f8,p12 = f0,f0                   
+        br.cond.sptk __libm_error_region ;; 
+}
+.endp logf
+ASM_SIZE_DIRECTIVE(logf)
+ASM_SIZE_DIRECTIVE(__ieee754_logf)
 
-.align 32
-logf_libm_err:
-{ .mmi
-      alloc         r32 = ar.pfs,1,4,4,0
-      mov           GR_Parameter_TAG = GR_TAG
-      nop.i         0
-};;
-GLOBAL_IEEE754_END(logf)
 
 // Stack operations when calling error support.
 //       (1)               (2)                          (3) (call)              (4)
@@ -1101,56 +890,70 @@ GLOBAL_IEEE754_END(logf)
 //    save ar.pfs          save b0                                               restore gp
 //    save gp                                                                    restore ar.pfs
 
-LOCAL_LIBM_ENTRY(__libm_error_region)
+
+
+.proc __libm_error_region
+__libm_error_region:
 .prologue
+
+// (1)
 { .mfi
-      add   GR_Parameter_Y=-32,sp             // Parameter 2 value
-      nop.f 0
-.save ar.pfs,GR_SAVE_PFS
-      mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
+        add   GR_Parameter_Y=-32,sp             // Parameter 2 value
+        nop.f 0
+.save   ar.pfs,GR_SAVE_PFS
+        mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
 }
 { .mfi
 .fframe 64
-      add sp=-64,sp                           // Create new stack
-      nop.f 0
-      mov GR_SAVE_GP=gp                       // Save gp
+        add sp=-64,sp                          // Create new stack
+        nop.f 0
+        mov GR_SAVE_GP=gp                      // Save gp
 };;
+
+
+// (2)
 { .mmi
-      stfs [GR_Parameter_Y] = FR_Y,16         // STORE Parameter 2 on stack
-      add GR_Parameter_X = 16,sp              // Parameter 1 address
+        stfs [GR_Parameter_Y] = f1,16         // STORE Parameter 2 on stack
+        add GR_Parameter_X = 16,sp            // Parameter 1 address
 .save   b0, GR_SAVE_B0
-      mov GR_SAVE_B0=b0                       // Save b0
+        mov GR_SAVE_B0=b0                     // Save b0
 };;
+
 .body
+// (3)
 { .mib
-      stfs [GR_Parameter_X] = FR_X                  // STORE Parameter 1 on stack
-      add   GR_Parameter_RESULT = 0,GR_Parameter_Y  // Parameter 3 address
-      nop.b 0
+        stfs [GR_Parameter_X] = f10                   // STORE Parameter 1 on stack
+        add   GR_Parameter_RESULT = 0,GR_Parameter_Y  // Parameter 3 address
+        nop.b 0                             
 }
 { .mib
-      stfs [GR_Parameter_Y] = FR_RESULT             // STORE Parameter 3 on stack
-      add   GR_Parameter_Y = -16,GR_Parameter_Y
-      br.call.sptk b0=__libm_error_support#         // Call error handling function
+        stfs [GR_Parameter_Y] = f8                    // STORE Parameter 3 on stack
+        add   GR_Parameter_Y = -16,GR_Parameter_Y
+        br.call.sptk b0=__libm_error_support#         // Call error handling function
 };;
+
 { .mmi
-      nop.m 0
-      nop.m 0
-      add   GR_Parameter_RESULT = 48,sp
+        nop.m 0
+        nop.m 0
+        add   GR_Parameter_RESULT = 48,sp
 };;
+
+// (4)
 { .mmi
-      ldfs  f8 = [GR_Parameter_RESULT]       // Get return result off stack
+        ldfs  f8 = [GR_Parameter_RESULT]       // Get return result off stack
 .restore sp
-      add   sp = 64,sp                       // Restore stack pointer
-      mov   b0 = GR_SAVE_B0                  // Restore return address
+        add   sp = 64,sp                       // Restore stack pointer
+        mov   b0 = GR_SAVE_B0                  // Restore return address
 };;
 { .mib
-      mov   gp = GR_SAVE_GP                  // Restore gp
-      mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
-      br.ret.sptk     b0                     // Return
+        mov   gp = GR_SAVE_GP                  // Restore gp
+        mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
+        br.ret.sptk     b0                     // Return
 };;
 
-LOCAL_LIBM_END(__libm_error_region)
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
 
 .type   __libm_error_support#,@function
 .global __libm_error_support#
-
diff --git a/sysdeps/ia64/fpu/e_logl.c b/sysdeps/ia64/fpu/e_logl.c
new file mode 100644
index 0000000000..41254ae60a
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_logl.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/e_pow.S b/sysdeps/ia64/fpu/e_pow.S
index 11fae53d72..56f7f078ba 100644
--- a/sysdeps/ia64/fpu/e_pow.S
+++ b/sysdeps/ia64/fpu/e_pow.S
@@ -1,10 +1,10 @@
 .file "pow.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
 //
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
+//
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,41 +35,30 @@
 //
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// http://developer.intel.com/opensource.
 //
 // History
 //==============================================================
-// 02/02/00 Initial version
-// 02/03/00 Added p12 to definite over/under path. With odd power we did not
+// 2/02/00  Initial version
+// 2/03/00  Added p12 to definite over/under path. With odd power we did not
 //          maintain the sign of x in this path.
-// 04/04/00 Unwind support added
-// 04/19/00 pow(+-1,inf) now returns NaN
-//          pow(+-val, +-inf) returns 0 or inf, but now does not call error
-//          support
+// 4/04/00  Unwind support added
+// 4/19/00  pow(+-1,inf) now returns NaN
+//          pow(+-val, +-inf) returns 0 or inf, but now does not call error support
 //          Added s1 to fcvt.fx because invalid flag was incorrectly set.
-// 08/15/00 Bundle added after call to __libm_error_support to properly
+// 8/15/00  Bundle added after call to __libm_error_support to properly
 //          set [the previously overwritten] GR_Parameter_RESULT.
-// 09/07/00 Improved performance by eliminating bank conflicts and other stalls,
+// 9/07/00  Improved performance by eliminating bank conflicts and other stalls,
 //          and tweaking the critical path
-// 09/08/00 Per c99, pow(+-1,inf) now returns 1, and pow(+1,nan) returns 1
-// 09/28/00 Updated NaN**0 path
-// 01/20/01 Fixed denormal flag settings.
-// 02/13/01 Improved speed.
-// 03/19/01 Reordered exp polynomial to improve speed and eliminate monotonicity
-//          problem in round up, down, and to zero modes.  Also corrected
-//          overflow result when x negative, y odd in round up, down, zero.
-// 06/14/01 Added brace missing from bundle
-// 12/10/01 Corrected case where x negative, 2^52 <= |y| < 2^53, y odd integer.
-// 12/20/01 Fixed monotonity problem in round to nearest.
-// 02/08/02 Fixed overflow/underflow cases that were not calling error support.
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 08/29/02 Improved Itanium 2 performance
-// 09/21/02 Added branch for |y*log(x)|<2^-11 to fix monotonicity problems.
-// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 9/08/00  Per c99, pow(+-1,inf) now returns 1, and pow(+1,nan) returns 1
+// 9/28/00  Updated NaN**0 path 
+// 1/20/01  Fixed denormal flag settings.
+// 2/12/01  Improved speed.
 //
 // API
 //==============================================================
-// double pow(double x, double y)
+// double pow(double)
+// float  powf(float)
 //
 // Overview of operation
 //==============================================================
@@ -78,51 +67,51 @@
 // 1. Log(x)
 // 2. y Log(x)
 // 3. exp(y log(x))
-//
+// 
 // This means we work with the absolute value of x and merge in the sign later.
 //      Log(x) = G + delta + r -rsq/2 + p
 // G,delta depend on the exponent of x and table entries. The table entries are
 // indexed by the exponent of x, called K.
-//
+// 
 // The G and delta come out of the reduction; r is the reduced x.
-//
+// 
 // B = frcpa(x)
 // xB-1 is small means that B is the approximate inverse of x.
-//
+// 
 //      Log(x) = Log( (1/B)(Bx) )
 //             = Log(1/B) + Log(Bx)
 //             = Log(1/B) + Log( 1 + (Bx-1))
-//
+// 
 //      x  = 2^K 1.x_1x_2.....x_52
-//      B= frcpa(x) = 2^-k Cm
+//      B= frcpa(x) = 2^-k Cm 
 //      Log(1/B) = Log(1/(2^-K Cm))
 //      Log(1/B) = Log((2^K/ Cm))
 //      Log(1/B) = K Log(2) + Log(1/Cm)
-//
+// 
 //      Log(x)   = K Log(2) + Log(1/Cm) + Log( 1 + (Bx-1))
-//
+// 
 // If you take the significand of x, set the exponent to true 0, then Cm is
 // the frcpa. We tabulate the Log(1/Cm) values. There are 256 of them.
 // The frcpa table is indexed by 8 bits, the x_1 thru x_8.
 // m = x_1x_2...x_8 is an 8-bit index.
-//
+// 
 //      Log(1/Cm) = log(1/frcpa(1+m/256)) where m goes from 0 to 255.
-//
+// 
 // We tabluate as two doubles, T and t, where T +t is the value itself.
-//
+// 
 //      Log(x)   = (K Log(2)_hi + T) + (Log(2)_hi + t) + Log( 1 + (Bx-1))
 //      Log(x)   =  G + delta           + Log( 1 + (Bx-1))
-//
+// 
 // The Log( 1 + (Bx-1)) can be calculated as a series in r = Bx-1.
-//
+// 
 //      Log( 1 + (Bx-1)) = r - rsq/2 + p
-//
+// 
 // Then,
-//
+//    
 //      yLog(x) = yG + y delta + y(r-rsq/2) + yp
 //      yLog(x) = Z1 + e3      + Z2         + Z3 + (e2 + e3)
-//
-//
+// 
+// 
 //     exp(yLog(x)) = exp(Z1 + Z2 + Z3) exp(e1 + e2 + e3)
 //
 //
@@ -144,7 +133,7 @@
 //       exp(r)       = exp(Z - N log2/128)
 //
 //      r = s + d = (Z - N (log2/128)_hi) -N (log2/128)_lo
-//                =  Z - N (log2/128)
+//                =  Z - N (log2/128) 
 //
 //      Z         = s+d +N (log2/128)
 //
@@ -160,22 +149,22 @@
 //      n log2/128 = n_7n_6n_5 log2/8 + n_4n_3n_2n_1 log2/128
 //      n log2/128 = I2 log2/8 + I1 log2/128
 //
-//      N log2/128 = M log2 + I2 log2/8 + I1 log2/128
+//      N log2/128 = M log2 + I2 log2/8 + I1 log2/128 
 //
 //      exp(Z)    = exp(s) (1+d) exp(log(2^M) + log(2^I2/8) + log(2^I1/128))
 //      exp(Z)    = exp(s) (1+d1) (1+d2)(2^M) 2^I2/8 2^I1/128
 //      exp(Z)    = exp(s) f1 f2 (2^M) 2^I2/8 2^I1/128
 //
 // I1, I2 are table indices. Use a series for exp(s).
-// Then get exp(Z)
+// Then get exp(Z) 
 //
 //     exp(yLog(x)) = exp(Z1 + Z2 + Z3) exp(e1 + e2 + e3)
-//     exp(yLog(x)) = exp(Z) exp(Z3) f3
-//     exp(yLog(x)) = exp(Z)f3 exp(Z3)
-//     exp(yLog(x)) = A exp(Z3)
+//     exp(yLog(x)) = exp(Z) exp(Z3) f3 
+//     exp(yLog(x)) = exp(Z)f3 exp(Z3)  
+//     exp(yLog(x)) = A exp(Z3)  
 //
 // We actually calculate exp(Z3) -1.
-// Then,
+// Then, 
 //     exp(yLog(x)) = A + A( exp(Z3)   -1)
 //
 
@@ -186,146 +175,142 @@
 // ==============
 // The operation (K*log2_hi) must be exact. K is the true exponent of x.
 // If we allow gradual underflow (denormals), K can be represented in 12 bits
-// (as a two's complement number). We assume 13 bits as an engineering
-// precaution.
-//
+// (as a two's complement number). We assume 13 bits as an engineering precaution.
+// 
 //           +------------+----------------+-+
 //           |  13 bits   | 50 bits        | |
 //           +------------+----------------+-+
 //           0            1                66
 //                        2                34
-//
+// 
 // So we want the lsb(log2_hi) to be 2^-50
 // We get log2 as a quad-extended (15-bit exponent, 128-bit significand)
-//
+// 
 //      0 fffe b17217f7d1cf79ab c9e3b39803f2f6af (4...)
-//
+// 
 // Consider numbering the bits left to right, starting at 0 thru 127.
 // Bit 0 is the 2^-1 bit; bit 49 is the 2^-50 bit.
-//
+// 
 //  ...79ab
 //     0111 1001 1010 1011
 //     44
 //     89
-//
-// So if we shift off the rightmost 14 bits, then (shift back only
+// 
+// So if we shift off the rightmost 14 bits, then (shift back only 
 // the top half) we get
-//
+// 
 //      0 fffe b17217f7d1cf4000 e6af278ece600fcb dabc000000000000
-//
+// 
 // Put the right 64-bit signficand in an FR register, convert to double;
 // it is exact. Put the next 128 bits into a quad register and round to double.
 // The true exponent of the low part is -51.
-//
+// 
 // hi is 0 fffe b17217f7d1cf4000
 // lo is 0 ffcc e6af278ece601000
-//
+// 
 // Convert to double memory format and get
-//
+// 
 // hi is 0x3fe62e42fefa39e8
-// lo is 0x3cccd5e4f1d9cc02
-//
+// lo is 0x3cccd5e4f1d9cc02 
+// 
 // log2_hi + log2_lo is an accurate value for log2.
-//
-//
+// 
+// 
 // The T and t values
 // ==================
 // A similar method is used to generate the T and t values.
-//
+// 
 // K * log2_hi + T  must be exact.
-//
+// 
 // Smallest T,t
 // ----------
-// The smallest T,t is
+// The smallest T,t is 
 //       T                   t
-// 0x3f60040155d58800, 0x3c93bce0ce3ddd81  log(1/frcpa(1+0/256))=  +1.95503e-003
-//
+// data8 0x3f60040155d58800, 0x3c93bce0ce3ddd81  log(1/frcpa(1+0/256))=  +1.95503e-003
+// 
 // The exponent is 0x3f6 (biased)  or -9 (true).
 // For the smallest T value, what we want is to clip the significand such that
-// when it is shifted right by 9, its lsb is in the bit for 2^-51. The 9 is the
-// specific for the first entry. In general, it is 0xffff - (biased 15-bit
-// exponent).
+// when it is shifted right by 9, its lsb is in the bit for 2^-51. The 9 is the specific 
+// for the first entry. In general, it is 0xffff - (biased 15-bit exponent).
 
-// Independently, what we have calculated is the table value as a quad
-// precision number.
+// Independently, what we have calculated is the table value as a quad precision number.
 // Table entry 1 is
 // 0 fff6 80200aaeac44ef38 338f77605fdf8000
-//
+// 
 // We store this quad precision number in a data structure that is
-//    sign:           1
+//    sign:           1 
 //    exponent:      15
 //    signficand_hi: 64 (includes explicit bit)
 //    signficand_lo: 49
 // Because the explicit bit is included, the significand is 113 bits.
-//
+// 
 // Consider significand_hi for table entry 1.
-//
-//
+// 
+// 
 // +-+--- ... -------+--------------------+
 // | |
 // +-+--- ... -------+--------------------+
 // 0 1               4444444455555555556666
 //                   2345678901234567890123
-//
+// 
 // Labeled as above, bit 0 is 2^0, bit 1 is 2^-1, etc.
 // Bit 42 is 2^-42. If we shift to the right by 9, the bit in
 // bit 42 goes in 51.
-//
+// 
 // So what we want to do is shift bits 43 thru 63 into significand_lo.
-// This is shifting bit 42 into bit 63, taking care to retain shifted-off bits.
-// Then shifting (just with signficaand_hi) back into bit 42.
-//
-// The shift_value is 63-42 = 21. In general, this is
+// This is shifting bit 42 into bit 63, taking care to retain the shifted-off bits.
+// Then shifting (just with signficaand_hi) back into bit 42. 
+//  
+// The shift_value is 63-42 = 21. In general, this is 
 //      63 - (51 -(0xffff - 0xfff6))
 // For this example, it is
 //      63 - (51 - 9) = 63 - 42  = 21
-//
-// This means we are shifting 21 bits into significand_lo. We must maintain more
-// that a 128-bit signficand not to lose bits. So before the shift we put the
-// 128-bit significand into a 256-bit signficand and then shift.
+// 
+// This means we are shifting 21 bits into significand_lo.  We must maintain more
+// that a 128-bit signficand not to lose bits. So before the shift we put the 128-bit 
+// significand into a 256-bit signficand and then shift.
 // The 256-bit significand has four parts: hh, hl, lh, and ll.
-//
+// 
 // Start off with
 //      hh         hl         lh         ll
 //      <64>       <49><15_0> <64_0>     <64_0>
-//
+// 
 // After shift by 21 (then return for significand_hi),
 //      <43><21_0> <21><43>   <6><58_0>  <64_0>
-//
+// 
 // Take the hh part and convert to a double. There is no rounding here.
-// The conversion is exact. The true exponent of the high part is the same as
-// the true exponent of the input quad.
-//
-// We have some 64 plus significand bits for the low part. In this example, we
-// have 70 bits. We want to round this to a double. Put them in a quad and then
-// do a quad fnorm.
-// For this example the true exponent of the low part is
+// The conversion is exact. The true exponent of the high part is the same as the
+// true exponent of the input quad.
+// 
+// We have some 64 plus significand bits for the low part. In this example, we have
+// 70 bits. We want to round this to a double. Put them in a quad and then do a quad fnorm.
+// For this example the true exponent of the low part is 
 //      true_exponent_of_high - 43 = true_exponent_of_high - (64-21)
-// In general, this is
-//      true_exponent_of_high - (64 - shift_value)
-//
-//
+// In general, this is 
+//      true_exponent_of_high - (64 - shift_value)  
+// 
+// 
 // Largest T,t
 // ----------
 // The largest T,t is
-// 0x3fe62643fecf9742, 0x3c9e3147684bd37d  log(1/frcpa(1+255/256))=+6.92171e-001
-//
+// data8 0x3fe62643fecf9742, 0x3c9e3147684bd37d    log(1/frcpa(1+255/256))=  +6.92171e-001
+// 
 // Table entry 256 is
 // 0 fffe b1321ff67cba178c 51da12f4df5a0000
-//
-// The shift value is
+// 
+// The shift value is 
 //      63 - (51 -(0xffff - 0xfffe)) = 13
-//
-// The true exponent of the low part is
+// 
+// The true exponent of the low part is 
 //      true_exponent_of_high - (64 - shift_value)
 //      -1 - (64-13) = -52
 // Biased as a double, this is 0x3cb
-//
-//
-//
+// 
+// 
+// 
 // So then lsb(T) must be >= 2^-51
 // msb(Klog2_hi) <= 2^12
-//
+// 
 //              +--------+---------+
 //              |       51 bits    | <== largest T
 //              +--------+---------+
@@ -335,6 +320,7 @@
 // +------------+----------------+-+
 
 
+
 // Special Cases
 //==============================================================
 
@@ -399,67 +385,63 @@
 
 // X any   Y =0               +1
 
+#include "libm_support.h"
+
 // Assembly macros
 //==============================================================
 
 // integer registers used
 
-pow_GR_signexp_X          = r14
-pow_GR_17ones             = r15
-pow_AD_P                  = r16
-pow_GR_exp_2tom8          = r17
-pow_GR_sig_X              = r18
-pow_GR_10033              = r19
-pow_GR_16ones             = r20
-
-pow_AD_Tt                 = r21
-pow_GR_exp_X              = r22
-pow_AD_Q                  = r23
-pow_GR_true_exp_X         = r24
-pow_GR_y_zero             = r25
-
-pow_GR_exp_Y              = r26
-pow_AD_tbl1               = r27
-pow_AD_tbl2               = r28
-pow_GR_offset             = r29
-pow_GR_exp_Xm1            = r30
-pow_GR_xneg_yodd          = r31
-
-pow_GR_signexp_Xm1        = r35
-pow_GR_int_W1             = r36
-pow_GR_int_W2             = r37
-pow_GR_int_N              = r38
-pow_GR_index1             = r39
-pow_GR_index2             = r40
-
-pow_AD_T1                 = r41
-pow_AD_T2                 = r42
-pow_int_GR_M              = r43
-pow_GR_sig_int_Y          = r44
-pow_GR_sign_Y_Gpr         = r45
-
-pow_GR_17ones_m1          = r46
-pow_GR_one                = r47
-pow_GR_sign_Y             = r48
-pow_GR_signexp_Y_Gpr      = r49
-pow_GR_exp_Y_Gpr          = r50
-
-pow_GR_true_exp_Y_Gpr     = r51
-pow_GR_signexp_Y          = r52
-pow_GR_x_one              = r53
-pow_GR_exp_2toM63         = r54
-pow_GR_big_pos            = r55
-
-pow_GR_big_neg            = r56
-
-GR_SAVE_B0                = r50
-GR_SAVE_GP                = r51
-GR_SAVE_PFS               = r52
-
-GR_Parameter_X            = r53
-GR_Parameter_Y            = r54
-GR_Parameter_RESULT       = r55
-pow_GR_tag                = r56
+pow_AD_Tt                 = r33
+pow_GR_FFF7               = r34
+pow_GR_exp_Y              = r34 // duplicate
+pow_GR_17ones             = r35
+
+pow_AD_P                  = r36
+pow_AD_Q                  = r37
+pow_AD_tbl1               = r38
+pow_AD_tbl2               = r39
+pow_GR_exp_X              = r40
+pow_GR_true_exp_X         = r40 // duplicate
+
+pow_GR_offset             = r41
+pow_GR_exp_Xm1            = r42
+pow_GR_sig_X              = r43
+pow_GR_signexp_X          = r44
+
+pow_GR_signexp_Xm1        = r46
+pow_GR_int_W1             = r47
+pow_GR_int_W2             = r48
+pow_GR_int_N              = r49
+pow_GR_index1             = r50
+
+pow_GR_index2             = r51
+pow_AD_T1                 = r52
+pow_AD_T2                 = r53
+pow_GR_gt_ln              = r53 // duplicate
+pow_int_GR_M              = r54
+pow_GR_10033              = r55
+
+pow_GR_16ones             = r56
+pow_GR_sig_int_Y          = r57
+pow_GR_sign_Y_Gpr         = r58
+pow_GR_17ones_m1          = r59
+pow_GR_one                = r60
+pow_GR_sign_Y             = r60 
+
+pow_GR_signexp_Y_Gpr      = r61 
+pow_GR_exp_Y_Gpr          = r62 
+pow_GR_true_exp_Y_Gpr     = r63 
+pow_GR_signexp_Y          = r64 
+
+GR_SAVE_B0                = r65
+GR_SAVE_GP                = r66
+GR_SAVE_PFS               = r67
+
+GR_Parameter_X            = r68
+GR_Parameter_Y            = r69
+GR_Parameter_RESULT       = r70
+pow_GR_tag                = r71
 
 
 // floating point registers used
@@ -482,8 +464,7 @@ POW_log2_lo               = f43
 POW_r                     = f44
 POW_Q0_half               = f45
 
-POW_Q1                    = f46
-POW_tmp                   = f47
+POW_Q1                    = f46  
 POW_log2_hi               = f48
 POW_Q4                    = f49
 POW_P1                    = f50
@@ -495,7 +476,6 @@ POW_Yrcub                 = f54
 POW_log2_by_128_lo        = f55
 
 POW_v6                    = f56
-POW_xsq                   = f57
 POW_v4                    = f58
 POW_v2                    = f59
 POW_T                     = f60
@@ -504,7 +484,6 @@ POW_Tt                    = f61
 POW_RSHF                  = f62
 POW_v21ps                 = f63
 POW_s4                    = f64
-POW_twoV                  = f65
 
 POW_U                     = f66
 POW_G                     = f67
@@ -554,45 +533,44 @@ POW_1ps                   = f103
 POW_A                     = f104
 POW_es                    = f105
 
-POW_Xp1                   = f106
 POW_int_K                 = f107
 POW_K                     = f108
 POW_f123                  = f109
 POW_Gpr                   = f110
 
-POW_Y_Gpr                 = f111
+POW_Y_Gpr                 = f111 
 POW_int_Y                 = f112
-POW_abs_q                 = f114
-POW_2toM63                = f115
 
 POW_float_int_Y           = f116
 POW_ftz_urm_f8            = f117
 POW_wre_urm_f8            = f118
-POW_big_neg               = f119
-POW_big_pos               = f120
+POW_abs_A                 = f119
+POW_gt_pln                = f120
 
-POW_GY_Z2                 = f121
-POW_pYrcub_e3             = f122
-POW_d                     = f123
-POW_d2                    = f124
-POW_poly_d_hi             = f121
-POW_poly_d_lo             = f122
-POW_poly_d                = f121
+POW_xsq                   = f121
+
+POW_twoV                  = f122
+POW_Xp1                   = f123
 
 // Data tables
 //==============================================================
 
-RODATA
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
 
 .align 16
 
-LOCAL_OBJECT_START(pow_table_P)
+pow_table_P:
+ASM_TYPE_DIRECTIVE(pow_table_P,@object)
 data8 0x8000F7B249FF332D, 0x0000BFFC  // P_5
 data8 0xAAAAAAA9E7902C7F, 0x0000BFFC  // P_3
 data8 0x80000000000018E5, 0x0000BFFD  // P_1
 data8 0xb8aa3b295c17f0bc, 0x00004006  // inv_ln2_by_128
-//
-//
+
+
 data8 0x3FA5555555554A9E // Q_2
 data8 0x3F8111124F4DD9F9 // Q_3
 data8 0x3FE0000000000000 // Q_0
@@ -602,18 +580,20 @@ data8 0x43e8000000000000 // Right shift constant for exp
 data8 0xc9e3b39803f2f6af, 0x00003fb7  // ln2_by_128_lo
 data8 0x0000000000000000 // pad to eliminate bank conflicts with pow_table_Q
 data8 0x0000000000000000 // pad to eliminate bank conflicts with pow_table_Q
-LOCAL_OBJECT_END(pow_table_P)
+ASM_SIZE_DIRECTIVE(pow_table_P)
 
-LOCAL_OBJECT_START(pow_table_Q)
+pow_table_Q:
+ASM_TYPE_DIRECTIVE(pow_table_Q,@object)
 data8 0x9249FE7F0DC423CF, 0x00003FFC  // P_4
 data8 0xCCCCCCCC4ED2BA7F, 0x00003FFC  // P_2
 data8 0xAAAAAAAAAAAAB505, 0x00003FFD  // P_0
 data8 0x3fe62e42fefa39e8, 0x3cccd5e4f1d9cc02 // log2 hi lo =  +6.93147e-001
 data8 0xb17217f7d1cf79ab, 0x00003ff7  // ln2_by_128_hi
-LOCAL_OBJECT_END(pow_table_Q)
+ASM_SIZE_DIRECTIVE(pow_table_Q)
 
 
-LOCAL_OBJECT_START(pow_Tt)
+pow_Tt:
+ASM_TYPE_DIRECTIVE(pow_Tt,@object)
 data8 0x3f60040155d58800, 0x3c93bce0ce3ddd81 // log(1/frcpa(1+0/256))=  +1.95503e-003
 data8 0x3f78121214586a00, 0x3cb540e0a5cfc9bc // log(1/frcpa(1+1/256))=  +5.87661e-003
 data8 0x3f841929f9683200, 0x3cbdf1d57404da1f // log(1/frcpa(1+2/256))=  +9.81362e-003
@@ -870,12 +850,13 @@ data8 0x3fe5f673c61a2ed0, 0x3caa385eef5f2789 // log(1/frcpa(1+252/256))=  +6.863
 data8 0x3fe6065bea385924, 0x3cb11624f165c5b4 // log(1/frcpa(1+253/256))=  +6.88276e-001
 data8 0x3fe6164bfa7cc068, 0x3cbad884f87073fa // log(1/frcpa(1+254/256))=  +6.90222e-001
 data8 0x3fe62643fecf9740, 0x3cb78c51da12f4df // log(1/frcpa(1+255/256))=  +6.92171e-001
-LOCAL_OBJECT_END(pow_Tt)
+ASM_SIZE_DIRECTIVE(pow_Tt)
 
 
 // Table 1 is 2^(index_1/128) where
 // index_1 goes from 0 to 15
-LOCAL_OBJECT_START(pow_tbl1)
+pow_tbl1:
+ASM_TYPE_DIRECTIVE(pow_tbl1,@object)
 data8 0x8000000000000000 , 0x00003FFF
 data8 0x80B1ED4FD999AB6C , 0x00003FFF
 data8 0x8164D1F3BC030773 , 0x00003FFF
@@ -892,12 +873,13 @@ data8 0x88980E8092DA8527 , 0x00003FFF
 data8 0x8955EE03618E5FDD , 0x00003FFF
 data8 0x8A14D575496EFD9A , 0x00003FFF
 data8 0x8AD4C6452C728924 , 0x00003FFF
-LOCAL_OBJECT_END(pow_tbl1)
+ASM_SIZE_DIRECTIVE(pow_tbl1)
 
 
 // Table 2 is 2^(index_1/8) where
 // index_2 goes from 0 to 7
-LOCAL_OBJECT_START(pow_tbl2)
+pow_tbl2:
+ASM_TYPE_DIRECTIVE(pow_tbl2,@object)
 data8 0x8000000000000000 , 0x00003FFF
 data8 0x8B95C1E3EA8BD6E7 , 0x00003FFF
 data8 0x9837F0518DB8A96F , 0x00003FFF
@@ -906,319 +888,402 @@ data8 0xB504F333F9DE6484 , 0x00003FFF
 data8 0xC5672A115506DADD , 0x00003FFF
 data8 0xD744FCCAD69D6AF4 , 0x00003FFF
 data8 0xEAC0C6E7DD24392F , 0x00003FFF
-LOCAL_OBJECT_END(pow_tbl2)
+ASM_SIZE_DIRECTIVE(pow_tbl2)
+
+.global pow
 
 .section .text
-GLOBAL_LIBM_ENTRY(pow)
+.proc  pow
+.align 32
+
+pow:
 
-// Get exponent of x.  Will be used to calculate K.
 { .mfi
-          getf.exp     pow_GR_signexp_X = f8
-          fms.s1 POW_Xm1 = f8,f1,f1     // Will be used for r1 if x>0
-          mov           pow_GR_17ones   = 0x1FFFF
+          alloc         r32=ar.pfs,1,35,4,0 
+          fms.s1 POW_Xm1 = f8,f1,f1   // Will be used for r1 if x>0
+          mov           pow_GR_17ones  = 0x1FFFF
 }
 { .mfi
-          addl          pow_AD_P        = @ltoff(pow_table_P), gp
-          fma.s1 POW_Xp1 = f8,f1,f1     // Will be used for r1 if x<0
+(p0)      addl          pow_AD_P   = @ltoff(pow_table_P), gp
+          fma.s1 POW_Xp1 = f8,f1,f1   // Will be used for r1 if x<0
           nop.i 999
 ;;
 }
 
-// Get significand of x.  Will be used to get index to fetch T, Tt.
+
+// Get exponent of x.  Will be used to calculate K.
 { .mfi
-          getf.sig      pow_GR_sig_X    = f8
-          frcpa.s1      POW_B, p6       = f1,f8
+          getf.exp      pow_GR_signexp_X    = f8
+          frcpa.s1      POW_B, p6   = f1,f8
           nop.i 999
 }
 { .mfi
           ld8 pow_AD_P = [pow_AD_P]
-          fma.s1        POW_NORM_X      = f8,f1,f0
-          mov          pow_GR_exp_2tom8 = 0xFFF7
+          fma.s1        POW_NORM_X     = f8,f1,f0
+          mov           pow_GR_FFF7    = 0xFFF7
 }
 ;;
 
+
+
+// Get significand of x.  Will be used to get index to fetch T, Tt.
 // p13 = TRUE ==> X is unorm
 // DOUBLE 0x10033  exponent limit at which y is an integer
+// SINGLE 0x10016
 { .mfi
-          nop.m 999
-          fclass.m  p13,p0              = f8, 0x0b  // Test for x unorm
-          addl pow_GR_10033             = 0x10033, r0
+          getf.sig      pow_GR_sig_X        = f8
+          fclass.m  p13,p0          = f8, 0x0b  // Test for x unorm
+          addl pow_GR_10033                 = 0x10033, r0
 }
 { .mfi
           mov           pow_GR_16ones   = 0xFFFF
-          fma.s1        POW_NORM_Y      = f9,f1,f0
+          fma.s1        POW_NORM_Y     = f9,f1,f0
           nop.i 999
 }
 ;;
 
+
 // p14 = TRUE ==> X is ZERO
 { .mfi
           adds          pow_AD_Tt       = pow_Tt - pow_table_P,  pow_AD_P
-          fclass.m  p14,p0              = f8, 0x07
-          and           pow_GR_exp_X    = pow_GR_signexp_X, pow_GR_17ones
+          fclass.m  p14,p15          = f8, 0x07
+          and           pow_GR_exp_X        = pow_GR_signexp_X, pow_GR_17ones
 }
 { .mfi
-          adds          pow_AD_Q        = pow_table_Q - pow_table_P,  pow_AD_P
+          adds          pow_AD_Q       = pow_table_Q - pow_table_P,  pow_AD_P
           nop.f 999
           nop.i 999
 }
 ;;
 
 { .mfi
-          ldfe          POW_P5          = [pow_AD_P], 16
-          fcmp.lt.s1 p8,p9 = f8, f0     // Test for x<0
-          nop.i 999
+          ldfe          POW_P5         = [pow_AD_P], 16
+          fcmp.lt.s1 p8,p9 = f8, f0    // Test for x<0
+          shl           pow_GR_offset       = pow_GR_sig_X, 1
 }
 { .mib
-          ldfe          POW_P4          = [pow_AD_Q], 16
-          sub       pow_GR_true_exp_X   = pow_GR_exp_X, pow_GR_16ones
-(p13)     br.cond.spnt POW_X_DENORM
+          ldfe          POW_P4         = [pow_AD_Q], 16
+          sub       pow_GR_true_exp_X       = pow_GR_exp_X, pow_GR_16ones
+(p13)     br.cond.spnt L(POW_X_DENORM)
 }
 ;;
 
+
 // Continue normal and denormal paths here
-POW_COMMON:
+L(POW_COMMON):
 // p11 = TRUE ==> Y is a NAN
 { .mfi
-          ldfe          POW_P3          = [pow_AD_P], 16
-          fclass.m  p11,p0              = f9, 0xc3
-          nop.i 999
+          ldfe          POW_P3         = [pow_AD_P], 16
+          fclass.m.unc  p11,p0         = f9, 0xc3
+          shr.u     pow_GR_offset           = pow_GR_offset,56
 }
 { .mfi
-          ldfe          POW_P2          = [pow_AD_Q], 16
+          ldfe          POW_P2         = [pow_AD_Q], 16
           nop.f 999
-          mov pow_GR_y_zero = 0
+          nop.i 999
 }
 ;;
 
-// Note POW_Xm1 and POW_r1 are used interchangably
+
+
+// Compute xsq to decide later if |x|=1
+// p11 = TRUE ==> Y is a NaN
 { .mfi
-          alloc         r32=ar.pfs,2,19,4,0
-          fms.s1        POW_r           = POW_B, POW_NORM_X,f1
-          nop.i 999
+          setf.sig POW_int_K                = pow_GR_true_exp_X
+(p15)     fms.s1        POW_r          = POW_B, POW_NORM_X,f1
+          shladd pow_AD_Tt = pow_GR_offset, 4, pow_AD_Tt
 }
 { .mfi
-          setf.sig POW_int_K            = pow_GR_true_exp_X
-(p8)      fnma.s1        POW_Xm1        = POW_Xp1,f1,f0
+          nop.m 999
+(p8)      fnma.s1        POW_Xm1       = POW_Xp1,f1,f0
           nop.i 999
 }
 ;;
 
-// p12 = TRUE if Y is ZERO
-// Compute xsq to decide later if |x|=1
+
+
+// p12 = TRUE ==> X is ZERO and Y is ZERO
 { .mfi
-          ldfe          POW_P1          = [pow_AD_P], 16
-          fclass.m      p12,p0          = f9, 0x07
-          shl           pow_GR_offset   = pow_GR_sig_X, 1
+          ldfe          POW_P1         = [pow_AD_P], 16
+(p14)     fclass.m.unc  p12,p0              = f9, 0x07
+          nop.i 999
 }
 { .mfb
-          ldfe          POW_P0          = [pow_AD_Q], 16
+          ldfe          POW_P0         = [pow_AD_Q], 16
           fma.s1        POW_xsq = POW_NORM_X, POW_NORM_X, f0
-(p11)     br.cond.spnt  POW_Y_NAN       // Branch if y=nan
+(p11)     br.cond.spnt   L(POW_Y_NAN)
 }
 ;;
 
+
+.pred.rel "mutex",p8,p9
 // Get exponent of |x|-1 to use in comparison to 2^-8
-{ .mfi
-          getf.exp  pow_GR_signexp_Xm1  = POW_Xm1
-          fcvt.fx.s1   POW_int_Y        = POW_NORM_Y
-          shr.u     pow_GR_offset       = pow_GR_offset,56
+{ .mmf
+(p8)      getf.exp      pow_GR_signexp_Xm1  = POW_Xp1
+(p9)      getf.exp      pow_GR_signexp_Xm1  = POW_Xm1
+          fcvt.fx.s1   POW_int_Y            = POW_NORM_Y
 }
 ;;
 
+
 // p11 = TRUE ==> X is a NAN
 { .mfi
           ldfpd         POW_log2_hi, POW_log2_lo  = [pow_AD_Q], 16
-          fclass.m      p11,p0          = f8, 0xc3
-          shladd pow_AD_Tt = pow_GR_offset, 4, pow_AD_Tt
+          fclass.m.unc  p11,p0              = f8, 0xc3
+          nop.i 999
 }
-{ .mfi
-          ldfe          POW_inv_log2_by_128 = [pow_AD_P], 16
-          fma.s1 POW_delta              = f0,f0,f0 // delta=0 in case |x| near 1
-(p12)     mov pow_GR_y_zero = 1
+{ .mib
+          ldfpd  POW_T, POW_Tt              = [pow_AD_Tt], 16
+          nop.i 999
+(p12)     br.cond.spnt L(POW_X_0_Y_0)
 }
 ;;
 
+
+// p14 = TRUE ==> X is zero
+//    p15 = TRUE ==> X is zero AND Y is negative
+//    p10 = TRUE ==> X is zero AND Y is >= zero 
 { .mfi
-          ldfpd  POW_Q2, POW_Q3         = [pow_AD_P], 16
-          fma.s1 POW_G                  = f0,f0,f0  // G=0 in case |x| near 1
-          and       pow_GR_exp_Xm1      = pow_GR_signexp_Xm1, pow_GR_17ones
+          ldfe          POW_inv_log2_by_128 = [pow_AD_P], 16
+(p14)     fcmp.lt.unc.s1 p15, p10           = f9,f0
+          nop.i 999
 }
+{ .mfi
+          nop.m 999
+          nop.f 999
+          and       pow_GR_exp_Xm1          = pow_GR_signexp_Xm1, pow_GR_17ones
+} 
 ;;
 
+
 // Determine if we will use the |x| near 1 path (p6) or normal path (p7)
+// p12 = TRUE ==> X is a NAN and Y is a zero
+// p13 = TRUE ==> X is a NAN and Y is anything else
 { .mfi
-          getf.exp  pow_GR_signexp_Y    = POW_NORM_Y
-          nop.f 999
-          cmp.lt p6,p7                  = pow_GR_exp_Xm1, pow_GR_exp_2tom8
-}
-{ .mfb
-          ldfpd  POW_T, POW_Tt          = [pow_AD_Tt], 16
-          fma.s1        POW_rsq         = POW_r, POW_r,f0
-(p11)     br.cond.spnt  POW_X_NAN       // Branch if x=nan and y not nan
+          getf.exp  pow_GR_signexp_Y        = POW_NORM_Y 
+(p11)     fclass.m.unc  p12,p13             = f9, 0x07
+          cmp.lt.unc p6,p7                  = pow_GR_exp_Xm1, pow_GR_FFF7
 }
+{ .mfi
+          ldfpd  POW_Q2, POW_Q3             = [pow_AD_P], 16
+          fma.s1        POW_rsq             = POW_r, POW_r,f0
+          nop.i 999
 ;;
+}
 
 // If on the x near 1 path, assign r1 to r and r1*r1 to rsq
 { .mfi
-          ldfpd  POW_Q0_half, POW_Q1    = [pow_AD_P], 16
-(p6)      fma.s1    POW_r               = POW_r1, f1, f0
+          ldfpd  POW_Q0_half, POW_Q1             = [pow_AD_P], 16
+(p6)      fma.s1    POW_r                 = POW_r1, f1, f0
+          nop.i 999
+}
+{ .mfi
+          nop.m 999
+(p6)      fma.s1    POW_rsq                 = POW_r1, POW_r1, f0
           nop.i 999
+;;
+}
+
+
+{ .mfi
+          ldfpd   POW_Q4, POW_RSHF          = [pow_AD_P], 16
+(p7)      fma.s1 POW_v6                     = POW_r,  POW_P5, POW_P4
+          and pow_GR_exp_Y                   = pow_GR_signexp_Y, pow_GR_17ones
 }
 { .mfb
           nop.m 999
-(p6)      fma.s1    POW_rsq             = POW_r1, POW_r1, f0
-(p14)     br.cond.spnt POW_X_0          // Branch if x zero and y not nan
+(p6)      fma.s1 POW_v6                     = POW_r1, POW_P5, POW_P4
+(p12)     br.cond.spnt L(POW_X_NAN_Y_0)
 }
 ;;
 
+
 { .mfi
-          ldfpd   POW_Q4, POW_RSHF      = [pow_AD_P], 16
-(p7)      fma.s1 POW_v6                 = POW_r,  POW_P5, POW_P4
-          nop.i 999
+          nop.m 999
+(p7)      fma.s1 POW_v4                     = POW_P3, POW_r,  POW_P2 
+          andcm pow_GR_sign_Y               = pow_GR_signexp_Y, pow_GR_17ones
 }
-{ .mfi
-          mov pow_GR_exp_2toM63         = 0xffc0  // Exponent of 2^-63
-(p6)      fma.s1 POW_v6                 = POW_r1, POW_P5, POW_P4
-          nop.i 999
+{ .mfb
+          nop.m 999
+(p6)      fma.s1 POW_v4                     = POW_P3, POW_r1, POW_P2 
+(p12)     br.cond.spnt L(POW_X_NAN_Y_0)
 }
 ;;
 
 { .mfi
-          setf.exp POW_2toM63 = pow_GR_exp_2toM63  // Form 2^-63 for test of q
-(p7)      fma.s1 POW_v4                 = POW_P3, POW_r,  POW_P2
+          nop.m 999
+          fcvt.xf POW_K                     = POW_int_K
           nop.i 999
 }
-{ .mfi
+{ .mfb
           nop.m 999
-(p6)      fma.s1 POW_v4                 = POW_P3, POW_r1, POW_P2
-          nop.i 999
+(p13)     fma.d f8                           = f8,f1,f0
+(p13)     br.ret.spnt  b0    // Exit if x nan, y anything but zero
 }
 ;;
-
+          
+// p10 = TRUE ==> X is zero  AND Y is positive
+//  p8  = TRUE ==> X is zero  AND Y is outside integer range (treat as even int)
+//                   return +0
+//  p9  = TRUE ==> X is zero  AND Y is within integer range (may not be integer) 
+{ .mfi
+(p10)     cmp.gt.unc p8,p9                  =  pow_GR_exp_Y, pow_GR_10033
+(p6)      fmerge.s POW_delta                 = f0,f0
+          nop.i 999
+}
 { .mfi
           nop.m 999
-          fcvt.xf POW_K                 = POW_int_K
+(p6)      fma.s1 POW_G                       = f0,f0,f0
           nop.i 999
 }
 ;;
 
 { .mfi
-          getf.sig pow_GR_sig_int_Y     = POW_int_Y
-          fnma.s1 POW_twoV              = POW_NORM_Y, POW_rsq,f0
-          and pow_GR_exp_Y              = pow_GR_signexp_Y, pow_GR_17ones
+          getf.sig pow_GR_sig_int_Y         = POW_int_Y
+          fnma.s1 POW_twoV                   = POW_NORM_Y, POW_rsq,f0
+          nop.i 999
 }
-{ .mfb
-          andcm pow_GR_sign_Y           = pow_GR_signexp_Y, pow_GR_17ones
-          fma.s1 POW_U                  = POW_NORM_Y,POW_r,f0
-(p12)     br.cond.spnt POW_Y_0   // Branch if y=zero, x not zero or nan
+{ .mfi
+          nop.m 999
+          fma.s1 POW_U                      = POW_NORM_Y,POW_r,f0
+          nop.i 999
 }
 ;;
 
-// p11 = TRUE ==> X is NEGATIVE but not inf
 { .mfi
-          ldfe      POW_log2_by_128_lo  = [pow_AD_P], 16
-          fclass.m  p11,p0              = f8, 0x1a
+          ldfe      POW_log2_by_128_lo      = [pow_AD_P], 16
+(p6)      fma.s1 POW_v2                     = POW_P1, POW_r1, POW_P0 
           nop.i 999
 }
 { .mfi
-          ldfe      POW_log2_by_128_hi  = [pow_AD_Q], 16
-          fma.s1 POW_v2                 = POW_P1, POW_r,  POW_P0
+          ldfe          POW_log2_by_128_hi  = [pow_AD_Q], 16
+(p7)      fma.s1 POW_v2                     = POW_P1, POW_r,  POW_P0 
           nop.i 999
 }
 ;;
 
+
 { .mfi
           nop.m 999
-          fcvt.xf   POW_float_int_Y     = POW_int_Y
+          fcvt.xf   POW_float_int_Y               = POW_int_Y
           nop.i 999
 }
 { .mfi
           nop.m 999
-          fma.s1 POW_v3                 = POW_v6, POW_rsq,  POW_v4
-          adds          pow_AD_tbl1     = pow_tbl1 - pow_Tt,  pow_AD_Q
+          fma.s1 POW_v3                     = POW_v6, POW_rsq,  POW_v4 
+          adds          pow_AD_tbl1       = pow_tbl1 - pow_Tt,  pow_AD_Q
 }
 ;;
 
 { .mfi
           nop.m 999
-(p7)      fma.s1 POW_delta              = POW_K, POW_log2_lo, POW_Tt
+(p7)      fma.s1 POW_delta                  = POW_K, POW_log2_lo, POW_Tt
           nop.i 999
 }
 { .mfi
           nop.m 999
-(p7)      fma.s1 POW_G                  = POW_K, POW_log2_hi, POW_T
-          adds pow_AD_tbl2              = pow_tbl2 - pow_tbl1,  pow_AD_tbl1
+(p7)      fma.s1 POW_G                      = POW_K, POW_log2_hi, POW_T 
+          adds pow_AD_tbl2                  = pow_tbl2 - pow_tbl1,  pow_AD_tbl1
 }
 ;;
 
+
 { .mfi
           nop.m 999
-          fms.s1 POW_e2                 = POW_NORM_Y, POW_r, POW_U
+          fms.s1 POW_e2                     = POW_NORM_Y, POW_r, POW_U
           nop.i 999
 }
 { .mfi
           nop.m 999
-          fma.s1 POW_Z2                 = POW_twoV, POW_Q0_half, POW_U
+          fma.s1 POW_Z2                     = POW_twoV, POW_Q0_half, POW_U
           nop.i 999
 }
 ;;
 
+// p11 = TRUE ==> X is NEGATIVE 
+// p8  = TRUE ==> X is zero  AND Y is outside intger range (treat as even int)
+//                return +0
 { .mfi
           nop.m 999
-          fma.s1 POW_Yrcub              = POW_rsq, POW_U, f0
+          fclass.m.unc  p11,p0              = f8, 0x1a
           nop.i 999
 }
-{ .mfi
+{ .mfb
+          nop.m 999
+(p8)      fma.d f8                          = f0,f0,f0
+(p8)      br.ret.spnt b0
+}
+;;
+
+{ .mfi 
           nop.m 999
-          fma.s1 POW_p                  = POW_rsq, POW_v3, POW_v2
+          fma.s1 POW_Yrcub                 = POW_rsq, POW_U, f0
+          nop.i 999
+}
+{ .mfi 
+          nop.m 999
+          fma.s1 POW_p                      = POW_rsq, POW_v3, POW_v2
           nop.i 999
 }
 ;;
 
-// p11 = TRUE ==> X is NEGATIVE but not inf
-//    p12 = TRUE ==> X is NEGATIVE  AND  Y  already even int
+
+// p11 = TRUE ==> X is NEGATIVE
+//    p12 = TRUE ==> X is NEGATIVE  AND  Y  already int
 //    p13 = TRUE ==> X is NEGATIVE  AND  Y possible int
 { .mfi
           nop.m 999
-          fma.s1 POW_Z1                 = POW_NORM_Y, POW_G, f0
-(p11)     cmp.gt.unc  p12,p13           = pow_GR_exp_Y, pow_GR_10033
+          fma.s1 POW_Z1                     = POW_NORM_Y, POW_G, f0
+(p11)     cmp.ge.unc  p12,p13                = pow_GR_exp_Y, pow_GR_10033
 }
 { .mfi
           nop.m 999
-          fma.s1 POW_Gpr                = POW_G, f1, POW_r
+          fma.s1 POW_e3                     = POW_NORM_Y, POW_delta, f0
           nop.i 999
 }
 ;;
 
-// By adding RSHF (1.1000...*2^63) we put integer part in rightmost significand
+// p9  = TRUE ==> X is zero  AND Y is within integer range (may not be integer)
+//    p6 = TRUE ==>  X is zero  AND  Y is an integer (may be even or odd)
+//    p7 = TRUE ==>  X is zero  AND  Y is NOT an integer, return +0
 { .mfi
           nop.m 999
-          fma.s1 POW_W2  = POW_Z2, POW_inv_log2_by_128, POW_RSHF
+(p9)      fcmp.eq.unc.s1 p6,p7             = POW_float_int_Y,  POW_NORM_Y
           nop.i 999
 }
-{ .mfi
+{ .mfi 
           nop.m 999
-          fms.s1 POW_UmZ2               = POW_U, f1, POW_Z2
+          fma.s1 POW_Gpr                    = POW_G, f1, POW_r
           nop.i 999
 }
 ;;
 
+// By adding RSHF (1.1000...*2^63) we put integer part in rightmost significand
 { .mfi
           nop.m 999
-          fma.s1 POW_e3                 = POW_NORM_Y, POW_delta, f0
+          fma.s1 POW_W2  = POW_Z2, POW_inv_log2_by_128, POW_RSHF
+          nop.i 999
+}
+{ .mfi
+          nop.m 999
+          fms.s1 POW_UmZ2                   = POW_U, f1, POW_Z2
           nop.i 999
 }
 ;;
 
+
+// If x=0 and y>0, test y and flag denormal
+// p6  = TRUE ==>  X is zero  AND  Y is an integer (may be even or odd)
+//    p8 = TRUE ==>  X is zero  AND  Y is an odd  integer
+//    p9 = TRUE ==>  X is zero  AND  Y is an even integer
 { .mfi
           nop.m 999
-          fma.s1 POW_Z3                 = POW_p, POW_Yrcub, f0
-          nop.i 999
+(p10)     fcmp.eq.s0 p15,p0 = f9,f0
+(p6)      tbit.nz.unc  p8,p9                = pow_GR_sig_int_Y,0
 }
 { .mfi
           nop.m 999
-          fma.s1 POW_GY_Z2              = POW_G, POW_NORM_Y, POW_Z2
+          fma.s1 POW_Z3                      = POW_p, POW_Yrcub, f0
           nop.i 999
 }
 ;;
@@ -1226,7 +1291,7 @@ POW_COMMON:
 // By adding RSHF (1.1000...*2^63) we put integer part in rightmost significand
 { .mfi
           nop.m 999
-          fms.s1 POW_e1                 = POW_NORM_Y, POW_G, POW_Z1
+          fms.s1 POW_e1                     = POW_NORM_Y, POW_G, POW_Z1
           nop.i 999
 }
 { .mfi
@@ -1236,60 +1301,81 @@ POW_COMMON:
 }
 ;;
 
-// p13 = TRUE ==> X is NEGATIVE  AND  Y possible int
-//     p10 = TRUE ==> X is NEG and Y is an int
-//     p12 = TRUE ==> X is NEG and Y is not an int
 { .mfi
           nop.m 999
-(p13)     fcmp.eq.unc.s1 p10,p12        = POW_float_int_Y,  POW_NORM_Y
-          mov pow_GR_xneg_yodd = 0
+(p7)      fma.d f8  = f0,f0,f0  // Result +0 if x zero and y not integer
+          nop.i 999
 }
-{ .mfi
+{ .mfb
           nop.m 999
-          fma.s1 POW_Y_Gpr              = POW_NORM_Y, POW_Gpr, f0
-          nop.i 999
+          fma.s1 POW_Y_Gpr                  = POW_NORM_Y, POW_Gpr, f0
+(p8)      br.ret.spnt b0        // Exit if x zero and y odd integer
 }
 ;;
 
 // By subtracting RSHF we get rounded integer POW_N2float
+// p15 = TRUE ==> X_0_Y_NEG
 { .mfi
           nop.m 999
           fms.s1 POW_N2float  = POW_W2, f1, POW_RSHF
           nop.i 999
 }
-{ .mfi
+{ .mfb
           nop.m 999
-          fma.s1 POW_UmZ2pV             = POW_twoV,POW_Q0_half,POW_UmZ2
-          nop.i 999
+          fma.s1 POW_UmZ2pV                 = POW_twoV,POW_Q0_half,POW_UmZ2
+(p15)     br.cond.spnt L(POW_X_0_Y_NEG)
 }
 ;;
 
+
+
 { .mfi
           nop.m 999
-          fma.s1 POW_Z3sq               = POW_Z3, POW_Z3, f0
+          fma.s1 POW_Z3sq                   = POW_Z3, POW_Z3, f0
           nop.i 999
 }
-{ .mfi
+{ .mfb
           nop.m 999
-          fma.s1 POW_v4                 = POW_Z3, POW_Q3, POW_Q2
-          nop.i 999
+          fma.s1 POW_v4                     = POW_Z3, POW_Q3, POW_Q2
+(p7)      br.ret.spnt b0     // Exit if x zero and y not an integer
 }
 ;;
 
+
+
 // Extract rounded integer from rightmost significand of POW_W2
 // By subtracting RSHF we get rounded integer POW_N1float
 { .mfi
-          getf.sig pow_GR_int_W2        = POW_W2
+          getf.sig pow_GR_int_W2             = POW_W2
           fms.s1 POW_N1float  = POW_W1, f1, POW_RSHF
           nop.i 999
 }
 { .mfi
           nop.m 999
-          fma.s1 POW_v2                 = POW_Z3, POW_Q1, POW_Q0_half
+          fma.s1 POW_v2                     = POW_Z3, POW_Q1, POW_Q0_half
           nop.i 999
 }
 ;;
 
+
+
+
+// p13 = TRUE ==> X is NEGATIVE  AND  Y possible int
+//     p10 = TRUE ==> X is NEG and Y is an int
+//     p12 = TRUE ==> X is NEG and Y is not an int
+{ .mfi
+          nop.m 999
+(p13)     fcmp.eq.unc.s1 p10,p12             = POW_float_int_Y,  POW_NORM_Y
+          nop.i 999
+}
+{ .mfb
+          nop.m 999
+(p9)      fma.d f8  = f0,f0,f0   // Result +0 if x zero and y even integer
+(p9)      br.ret.spnt b0    // Exit if x zero and y even integer
+}
+;;
+
+
 { .mfi
           nop.m 999
           fnma.s1 POW_s2 = POW_N2float, POW_log2_by_128_hi, POW_Z2
@@ -1297,7 +1383,7 @@ POW_COMMON:
 }
 { .mfi
           nop.m 999
-          fma.s1 POW_e2                 = POW_e2,f1,POW_UmZ2pV
+          fma.s1 POW_e2                     = POW_e2,f1,POW_UmZ2pV
           nop.i 999
 }
 ;;
@@ -1305,283 +1391,278 @@ POW_COMMON:
 // Extract rounded integer from rightmost significand of POW_W1
 // Test if x inf
 { .mfi
-          getf.sig pow_GR_int_W1        = POW_W1
-          fclass.m p15,p0 = POW_NORM_X,  0x23
+          getf.sig pow_GR_int_W1             = POW_W1
+          fclass.m.unc p15,p0 = POW_NORM_X,  0x23
           nop.i 999
 }
 { .mfb
           nop.m 999
           fnma.s1 POW_f2  = POW_N2float, POW_log2_by_128_lo, f1
-(p12)     br.cond.spnt POW_X_NEG_Y_NONINT  // Branch if x neg, y not integer
+(p12)     br.cond.spnt L(POW_X_NEG_Y_NONINT)  // Branch if x neg, y not integer
 }
 ;;
 
-// p11 = TRUE ==> X is +1.0
 // p12 = TRUE ==> X is NEGATIVE  AND Y is an odd integer
 { .mfi
-          getf.exp pow_GR_signexp_Y_Gpr = POW_Y_Gpr
-          fcmp.eq.s1 p11,p0 = POW_NORM_X, f1
-(p10)     tbit.nz.unc  p12,p0           = pow_GR_sig_int_Y,0
-}
-{ .mfi
-          nop.m 999
-          fma.s1 POW_v3                 = POW_Z3sq, POW_Q4, POW_v4
-          nop.i 999
+          getf.exp pow_GR_signexp_Y_Gpr       = POW_Y_Gpr
+          fma.s1 POW_v3                     = POW_Z3sq, POW_Q4, POW_v4
+(p10)     tbit.nz.unc  p12,p0                = pow_GR_sig_int_Y,0
 }
 ;;
 
+
 { .mfi
-          nop.m 999
+          add pow_GR_int_N                   = pow_GR_int_W1, pow_GR_int_W2
           fnma.s1 POW_f1  = POW_N1float, POW_log2_by_128_lo, f1
           nop.i 999
 }
 { .mfb
           nop.m 999
           fnma.s1 POW_s1  = POW_N1float, POW_log2_by_128_hi, POW_Z1
-(p15)     br.cond.spnt POW_X_INF
+(p15)     br.cond.spnt L(POW_X_INF)
 }
 ;;
 
+
 // Test x and y and flag denormal
 { .mfi
-          nop.m 999
+          and pow_GR_index1                  = 0x0f, pow_GR_int_N
           fcmp.eq.s0 p15,p0 = f8,f9
-          nop.i 999
+          shr r2                             = pow_GR_int_N, 7
 }
 { .mfi
-          nop.m 999
-          fma.s1 POW_pYrcub_e3          = POW_p, POW_Yrcub, POW_e3
-          nop.i 999
+          and pow_GR_exp_Y_Gpr               = pow_GR_signexp_Y_Gpr, pow_GR_17ones
+          nop.f 999
+          and pow_GR_index2                  = 0x70, pow_GR_int_N
 }
 ;;
 
+
+
 { .mfi
-          nop.m 999
+          shladd pow_AD_T1                   = pow_GR_index1, 4, pow_AD_tbl1
           fcmp.eq.s1 p7,p0 = POW_NORM_Y, f1  // Test for y=1.0
-          nop.i 999
+          sub pow_GR_true_exp_Y_Gpr          = pow_GR_exp_Y_Gpr, pow_GR_16ones
 }
 { .mfi
-          nop.m 999
-          fma.s1  POW_e12               = POW_e1,f1,POW_e2
-          nop.i 999
+          addl pow_int_GR_M                  = 0xFFFF, r2
+          fma.s1  POW_e12                     = POW_e1,f1,POW_e2
+          add pow_AD_T2                      = pow_AD_tbl2, pow_GR_index2
 }
 ;;
 
-{ .mfi
-          add pow_GR_int_N              = pow_GR_int_W1, pow_GR_int_W2
-(p11)     fma.d.s0 f8 = f1,f1,f0    // If x=1, result is +1
-          nop.i 999
-}
-{ .mib
-(p12)     mov pow_GR_xneg_yodd = 1
-          nop.i 999
-(p11)     br.ret.spnt b0            // Early exit if x=1.0, result is +1
+
+{ .mmi
+          ldfe POW_T1                        = [pow_AD_T1],16
+          setf.exp POW_2M                    = pow_int_GR_M
+          andcm pow_GR_sign_Y_Gpr            = pow_GR_signexp_Y_Gpr, pow_GR_17ones
 }
 ;;
 
-{ .mfi
-          and pow_GR_index1             = 0x0f, pow_GR_int_N
-          fma.s1 POW_q                  = POW_Z3sq, POW_v3, POW_v2
-          shr pow_int_GR_M              = pow_GR_int_N, 7    // M = N/128
-}
-{ .mib
-          and pow_GR_index2             = 0x70, pow_GR_int_N
-          cmp.eq p6, p0                 = pow_GR_xneg_yodd, r0
+
+{ .mfb
+          ldfe POW_T2                        = [pow_AD_T2],16
+          fma.s1 POW_q                       = POW_Z3sq, POW_v3, POW_v2
 (p7)      br.ret.spnt b0        // Early exit if y=1.0, result is x
 }
 ;;
 
+
+// double: p8 TRUE ==> |Y(G + r)| >= 10
+// single: p8 TRUE ==> |Y(G + r)| >= 7
+
+// double
+//     -2^10  -2^9             2^9   2^10
+// -----+-----+----+ ... +-----+-----+-----
+//  p8  |             p9             |  p8
+//      |     |       p10      |     |  
+// single
+//     -2^7   -2^6             2^6   2^7
+// -----+-----+----+ ... +-----+-----+-----
+//  p8  |             p9             |  p8
+//      |     |       p10      |     |
+
+
 { .mfi
-          shladd pow_AD_T1              = pow_GR_index1, 4, pow_AD_tbl1
-          fma.s1 POW_s                  = POW_s1, f1, POW_s2
-          add pow_int_GR_M              = pow_GR_16ones, pow_int_GR_M
+(p0)      cmp.le.unc p8,p9                   = 10, pow_GR_true_exp_Y_Gpr
+          fma.s1 POW_s                       = POW_s1, f1, POW_s2
+          nop.i 999
 }
 { .mfi
-          add pow_AD_T2                 = pow_AD_tbl2, pow_GR_index2
-          fma.s1 POW_f12                = POW_f1, POW_f2,f0
-          and pow_GR_exp_Y_Gpr          = pow_GR_signexp_Y_Gpr, pow_GR_17ones
+          nop.m 999
+          fma.s1 POW_f12                     = POW_f1, POW_f2,f0
+          nop.i 999
 }
 ;;
 
-{ .mmi
-          ldfe POW_T1                   = [pow_AD_T1]
-          ldfe POW_T2                   = [pow_AD_T2]
-          sub pow_GR_true_exp_Y_Gpr     = pow_GR_exp_Y_Gpr, pow_GR_16ones
-}
-;;
 
 { .mfi
-          setf.exp POW_2M               = pow_int_GR_M
-          fma.s1 POW_e123               = POW_e12, f1, POW_e3
-          nop.i 999
-}
-{ .mfb
-(p6)      cmp.gt p6, p0                 = -11, pow_GR_true_exp_Y_Gpr
-          fma.s1 POW_d                  = POW_GY_Z2, f1, POW_pYrcub_e3
-(p6)      br.cond.spnt POW_NEAR_ONE // branch if |y*log(x)| < 2^(-11)
+          nop.f 999
+(p9)      cmp.le.unc p0,p10                  = 9, pow_GR_true_exp_Y_Gpr
 }
 ;;
 
-{ .mfi
+
+
+{ .mfb
           nop.m 999
-          fma.s1 POW_q                  = POW_Z3sq, POW_q, POW_Z3
-          nop.i 999
+          fma.s1 POW_e123                    = POW_e12, f1, POW_e3
+(p8)      br.cond.spnt L(POW_OVER_UNDER_X_NOT_INF)
 }
 ;;
 
-// p8 TRUE ==> |Y(G + r)| >= 10
 
-// double
-//     -2^10  -2^9             2^9   2^10
-// -----+-----+----+ ... +-----+-----+-----
-//  p8  |             p9             |  p8
-//      |     |       p10      |     |
+{ .mmf
+          fma.s1 POW_q                       = POW_Z3sq, POW_q, POW_Z3
+}
+;;
+
 
-// Form signexp of constants to indicate overflow
 { .mfi
-          mov         pow_GR_big_pos    = 0x103ff
-          fma.s1 POW_ssq                = POW_s, POW_s, f0
-          cmp.le p8,p9                  = 10, pow_GR_true_exp_Y_Gpr
+          nop.m 999 
+          fma.s1 POW_ssq                     = POW_s, POW_s, f0
+          nop.i 999
 }
 { .mfi
-          mov         pow_GR_big_neg    = 0x303ff
-          fma.s1 POW_v4                 = POW_s, POW_Q3, POW_Q2
-          andcm pow_GR_sign_Y_Gpr       = pow_GR_signexp_Y_Gpr, pow_GR_17ones
+          nop.m 999 
+          fma.s1 POW_v4                      = POW_s, POW_Q3, POW_Q2
+          nop.i 999
 }
 ;;
 
-// Form big positive and negative constants to test for possible overflow
 { .mfi
-          setf.exp POW_big_pos          = pow_GR_big_pos
-          fma.s1 POW_v2                 = POW_s, POW_Q1, POW_Q0_half
-(p9)      cmp.le.unc p0,p10             = 9, pow_GR_true_exp_Y_Gpr
+          nop.m 999
+          fma.s1 POW_v2                      = POW_s, POW_Q1, POW_Q0_half
+          nop.i 999
 }
-{ .mfb
-          setf.exp POW_big_neg          = pow_GR_big_neg
-          fma.s1 POW_1ps                = f1,f1,POW_s
-(p8)      br.cond.spnt POW_OVER_UNDER_X_NOT_INF
+{ .mfi
+          nop.m 999
+          fma.s1 POW_1ps                     = f1,f1,POW_s
+          nop.i 999
 }
 ;;
 
-// f123 = f12*(e123+1) = f12*e123+f12
 { .mfi
           nop.m 999
-          fma.s1 POW_f123               = POW_e123,POW_f12,POW_f12
+          fma.s1 POW_f3                      = POW_e123,f1,f1
           nop.i 999
 }
 ;;
 
 { .mfi
           nop.m 999
-          fma.s1 POW_T1T2               = POW_T1, POW_T2, f0
+          fma.s1 POW_T1T2                    = POW_T1, POW_T2, f0
           nop.i 999
 }
+;;
+
 { .mfi
           nop.m 999
-          fma.s1 POW_v3                 = POW_ssq, POW_Q4, POW_v4
-          cmp.ne p12,p13 = pow_GR_xneg_yodd, r0
+          fma.s1 POW_v3                     = POW_ssq, POW_Q4, POW_v4
+          nop.i 999
 }
 ;;
 
 { .mfi
           nop.m 999
-          fma.s1 POW_v21ps              = POW_ssq, POW_v2, POW_1ps
+          fma.s1 POW_v21ps                  = POW_ssq, POW_v2, POW_1ps
           nop.i 999
 }
 { .mfi
           nop.m 999
-          fma.s1 POW_s4                 = POW_ssq, POW_ssq, f0
+          fma.s1 POW_s4                     = POW_ssq, POW_ssq, f0
           nop.i 999
 }
 ;;
 
 { .mfi
           nop.m 999
-(p12)     fnma.s1 POW_A                 =  POW_2M, POW_f123, f0
+          fma.s1 POW_f123                    = POW_f12, POW_f3, f0
           nop.i 999
 }
-{ .mfi
-          nop.m 999
-(p13)     fma.s1 POW_A                  =  POW_2M, POW_f123, f0
-          cmp.eq p14,p11 = r0,r0   // Initialize p14 on, p11 off
-}
 ;;
 
 { .mfi
           nop.m 999
-          fmerge.s POW_abs_q = f0, POW_q // Form |q| so can test its size
+          fma.s1 POW_A                      =  POW_2M, POW_T1T2, f0
           nop.i 999
 }
 ;;
 
+
+
 { .mfi
-(p10)     cmp.eq p0,p14 = r0,r0    // Turn off p14 if no overflow
-          fma.s1 POW_es                 = POW_s4,  POW_v3, POW_v21ps
+          nop.m 999
+(p12)     fmerge.s POW_f123 = f8,POW_f123  // if x neg, y odd int
           nop.i 999
 }
 { .mfi
           nop.m 999
-          fma.s1 POW_A                  = POW_A, POW_T1T2, f0
+//          fma.s1 POW_es                     = POW_ssq,  POW_v3, POW_v2
           nop.i 999
 }
 ;;
 
 { .mfi
-// Test for |q| < 2^-63.  If so then reverse last two steps of the result
-// to avoid monotonicity problems for results near 1.0 in round up/down/zero.
-// p11 will be set if need to reverse the order, p14 if not.
           nop.m 999
-(p10)     fcmp.lt.s0 p11,p14 = POW_abs_q, POW_2toM63 // Test |q| <2^-63
+          fma.s1 POW_es                     = POW_s4,  POW_v3, POW_v21ps
           nop.i 999
 }
 ;;
 
-.pred.rel "mutex",p11,p14
+
 { .mfi
           nop.m 999
-(p14)     fma.s1 POW_A                  = POW_A, POW_es, f0
+          fma.s1 POW_A                      = POW_A, POW_f123, f0
           nop.i 999
 }
 { .mfi
           nop.m 999
-(p11)     fma.s1 POW_A                  = POW_A, POW_q, POW_A
+//          fma.s1 POW_es                     = POW_es, POW_ssq, POW_1ps
           nop.i 999
 }
 ;;
 
-// Dummy op to set inexact if |q| < 2^-63
+
 { .mfi
           nop.m 999
-(p11)     fma.d.s0 POW_tmp              = POW_A, POW_q, POW_A
+          fma.s1 POW_A                      = POW_A, POW_es,f0
           nop.i 999
 }
 ;;
 
-{ .mfi
-          nop.m 999
-(p14)     fma.d.s0 f8                   = POW_A, POW_q, POW_A
-          nop.i 999
-}
+
+
 { .mfb
           nop.m 999
-(p11)     fma.d.s0 f8                   = POW_A, POW_es, f0
-(p10)     br.ret.sptk     b0            // Exit main branch if no over/underflow
+(p10)     fma.d f8                          = POW_A, POW_q, POW_A
+(p10)     br.ret.sptk     b0
 }
 ;;
 
+
+
+
+
 // POSSIBLE_OVER_UNDER
-// p6 = TRUE ==> Y_Gpr negative
-// Result is already computed.  We just need to know if over/underflow occurred.
+// p6 = TRUE ==> Y negative
 
-{ .mfb
-        cmp.eq p0,p6                    = pow_GR_sign_Y_Gpr, r0
-        nop.f 999
-(p6)    br.cond.spnt POW_POSSIBLE_UNDER
+{ .mfi
+        nop.m 999
+        fmerge.s POW_abs_A                = f0, POW_A
+        cmp.eq.unc p0,p6                  = pow_GR_sign_Y, r0
+}
+;;
+
+{ .mib
+        nop.m 999
+        nop.i 999
+(p6)    br.cond.spnt L(POW_POSSIBLE_UNDER) 
 }
 ;;
 
 // POSSIBLE_OVER
-// We got an answer.
+// We got an answer. 
 // overflow is a possibility, not a certainty
 
 
@@ -1611,20 +1692,21 @@ POW_COMMON:
 //                  RN         RN
 //                             RZ
 
+
 // Put in s2 (td set, wre set)
 { .mfi
-        nop.m 999
+        mov           pow_GR_gt_ln                 = 0x103ff 
         fsetc.s2 0x7F,0x42
-        nop.i 999
+        nop.i 999 
 }
 ;;
 
+
 { .mfi
-        nop.m 999
-        fma.d.s2 POW_wre_urm_f8         = POW_A, POW_q, POW_A
-        nop.i 999
+        setf.exp POW_gt_pln                        = pow_GR_gt_ln
+        fma.d.s2 POW_wre_urm_f8                    = POW_abs_A, POW_q, POW_abs_A
+        nop.i 999 ;;
 }
-;;
 
 // Return s2 to default
 { .mfi
@@ -1634,67 +1716,31 @@ POW_COMMON:
 }
 ;;
 
+
 // p7 = TRUE ==> yes, we have an overflow
 { .mfi
         nop.m 999
-        fcmp.ge.s1 p7, p8               =  POW_wre_urm_f8, POW_big_pos
+        fcmp.ge.unc.s1 p7, p0                    =  POW_wre_urm_f8, POW_gt_pln
         nop.i 999
 }
 ;;
 
-{ .mfi
-        nop.m 999
-(p8)    fcmp.le.s1 p7, p0               =  POW_wre_urm_f8, POW_big_neg
-        nop.i 999
-}
-;;
 
-{ .mbb
-(p7)   mov pow_GR_tag                   = 24
-(p7)   br.cond.spnt __libm_error_region // Branch if overflow
-       br.ret.sptk     b0               // Exit if did not overflow
-}
-;;
 
-// Here if |y*log(x)| < 2^(-11)
-// pow(x,y) ~ exp(d) ~ 1 + d + 0.5*d^2 + Q1*d^3 + Q2*d^4, where d = y*log(x)
-.align 32
-POW_NEAR_ONE:
-
-{ .mfi
-          nop.m 999
-          fma.s1 POW_d2                 = POW_d, POW_d, f0
-          nop.i 999
-}
-;;
-
-{ .mfi
-          nop.m 999
-          fma.s1 POW_poly_d_hi          = POW_d, POW_Q0_half, f1
-          nop.i 999
-}
-{ .mfi
-          nop.m 999
-          fma.s1 POW_poly_d_lo          = POW_d, POW_Q2, POW_Q1
-          nop.i 999
-}
-;;
-
-{ .mfi
-          nop.m 999
-          fma.s1 POW_poly_d             = POW_d2, POW_poly_d_lo, POW_poly_d_hi
-          nop.i 999
+{ .mfb
+(p7)   mov pow_GR_tag                            = 24
+       fma.d f8                                  = POW_A, POW_q, POW_A
+(p7)   br.cond.spnt __libm_error_region 
 }
-;;
-
 { .mfb
-          nop.m 999
-          fma.d.s0 f8                   = POW_d, POW_poly_d, f1
-          br.ret.sptk b0 // exit function for arguments |y*log(x)| < 2^(-11)
+       nop.m 999
+       nop.f 999
+(p0)   br.ret.sptk     b0 
 }
 ;;
 
-POW_POSSIBLE_UNDER:
+
+L(POW_POSSIBLE_UNDER):
 // We got an answer. input was < -2^9 but > -2^10 (double)
 // We got an answer. input was < -2^6 but > -2^7  (float)
 // underflow is a possibility, not a certainty
@@ -1717,250 +1763,124 @@ POW_POSSIBLE_UNDER:
 //   0.1...11 2^-3ffe                                   (biased, 1)
 //    largest dn                               smallest normal
 
+
 // Put in s2 (td set, ftz set)
 { .mfi
         nop.m 999
         fsetc.s2 0x7F,0x41
-        nop.i 999
+        nop.i 999 
 }
 ;;
 
+
+
 { .mfi
         nop.m 999
-        fma.d.s2 POW_ftz_urm_f8         = POW_A, POW_q, POW_A
+        fma.d.s2 POW_ftz_urm_f8                    = POW_A, POW_q, POW_A
         nop.i 999
 }
 ;;
 
+
 // Return s2 to default
 { .mfi
         nop.m 999
         fsetc.s2 0x7F,0x40
-        nop.i 999
+        nop.i 999 
 }
 ;;
 
+
 // p7 = TRUE ==> yes, we have an underflow
 { .mfi
         nop.m 999
-        fcmp.eq.s1 p7, p0               =  POW_ftz_urm_f8, f0
-        nop.i 999
+        fcmp.eq.unc.s1 p7, p0                     =  POW_ftz_urm_f8, f0
+        nop.i 999 
 }
 ;;
 
-{ .mbb
-(p7)    mov pow_GR_tag                  = 25
-(p7)    br.cond.spnt __libm_error_region // Branch if underflow
-        br.ret.sptk     b0               // Exit if did not underflow
-}
-;;
-
-POW_X_DENORM:
-// Here if x unorm. Use the NORM_X for getf instructions, and then back
-// to normal path
-{ .mfi
-        getf.exp      pow_GR_signexp_X  = POW_NORM_X
-        nop.f 999
-        nop.i 999
-}
-;;
 
-{ .mmi
-        getf.sig      pow_GR_sig_X      = POW_NORM_X
-;;
-        and           pow_GR_exp_X      = pow_GR_signexp_X, pow_GR_17ones
-        nop.i 999
-}
-;;
-
-{ .mib
-        sub       pow_GR_true_exp_X     = pow_GR_exp_X, pow_GR_16ones
-        nop.i 999
-        br.cond.sptk    POW_COMMON
-}
-;;
 
-POW_X_0:
-// Here if x=0 and y not nan
-//
-// We have the following cases:
-//  p6  x=0  and  y>0 and is an integer (may be even or odd)
-//  p7  x=0  and  y>0 and is NOT an integer, return +0
-//  p8  x=0  and  y>0 and so big as to always be an even integer, return +0
-//  p9  x=0  and  y>0 and may not be integer
-//  p10 x=0  and  y>0 and is an odd  integer, return x
-//  p11 x=0  and  y>0 and is an even integer, return +0
-//  p12 used in dummy fcmp to set denormal flag if y=unorm
-//  p13 x=0  and  y>0
-//  p14 x=0  and  y=0, branch to code for calling error handling
-//  p15 x=0  and  y<0, branch to code for calling error handling
-//
-{ .mfi
-        getf.sig pow_GR_sig_int_Y = POW_int_Y // Get signif of int_Y
-        fcmp.lt.s1 p15,p13 = f9, f0           // Test for y<0
-        and pow_GR_exp_Y = pow_GR_signexp_Y, pow_GR_17ones
-}
-{ .mfb
-        cmp.ne p14,p0 = pow_GR_y_zero,r0      // Test for y=0
-        fcvt.xf   POW_float_int_Y = POW_int_Y
-(p14)   br.cond.spnt POW_X_0_Y_0              // Branch if x=0 and y=0
-}
-;;
 
-// If x=0 and y>0, test y and flag denormal
 { .mfb
-(p13)   cmp.gt.unc p8,p9 = pow_GR_exp_Y, pow_GR_10033 // Test y +big = even int
-(p13)   fcmp.eq.s0 p12,p0 = f9,f0    // If x=0, y>0 dummy op to flag denormal
-(p15)   br.cond.spnt POW_X_0_Y_NEG // Branch if x=0 and y<0
+(p7)    mov pow_GR_tag                           = 25
+        fma.d f8                                 = POW_A, POW_q, POW_A
+(p7)    br.cond.spnt __libm_error_region 
 }
 ;;
 
-// Here if x=0 and y>0
-{ .mfi
-        nop.m 999
-(p9)    fcmp.eq.unc.s1 p6,p7 = POW_float_int_Y,  POW_NORM_Y // Test y=int
-        nop.i 999
-}
-{ .mfi
-        nop.m 999
-(p8)    fma.d.s0 f8 = f0,f0,f0 // If x=0, y>0 and large even int, return +0
-        nop.i 999
-}
-;;
 
-{ .mfi
-        nop.m 999
-(p7)    fma.d.s0 f8  = f0,f0,f0   // Result +0 if x=0 and y>0 and not integer
-(p6)    tbit.nz.unc p10,p11 = pow_GR_sig_int_Y,0 // If y>0 int, test y even/odd
-}
-;;
-
-// Note if x=0, y>0 and odd integer, just return x
 { .mfb
         nop.m 999
-(p11)   fma.d.s0 f8  = f0,f0,f0   // Result +0 if x=0 and y even integer
-        br.ret.sptk b0            // Exit if x=0 and y>0
-}
-;;
-
-POW_X_0_Y_0:
-// When X is +-0 and Y is +-0, IEEE returns 1.0
-// We call error support with this value
-
-{ .mfb
-        mov pow_GR_tag                  = 26
-        fma.d.s0 f8                     = f1,f1,f0
-        br.cond.sptk __libm_error_region
+        nop.f 999
+        br.ret.sptk     b0 
 }
 ;;
 
-POW_X_0_Y_NEG:
-// When X is +-0 and Y is negative, IEEE returns
-// X     Y           answer
-// +0    -odd int    +inf
-// -0    -odd int    -inf
-
-// +0    !-odd int   +inf
-// -0    !-odd int   +inf
-
-// p6 == Y is a floating point number outside the integer.
-//       Hence it is an integer and is even.
-//       return +inf
-
-// p7 == Y is a floating point number within the integer range.
-//      p9  == (int_Y = NORM_Y), Y is an integer, which may be odd or even.
-//           p11 odd
-//              return (sign_of_x)inf
-//           p12 even
-//              return +inf
-//      p10 == Y is not an integer
-//         return +inf
-//
 
+L(POW_X_DENORM):
+// Here if x unorm. Use the NORM_X for getf instructions, and the back
+// to normal path
 { .mfi
-          nop.m 999
-          nop.f 999
-          cmp.gt  p6,p7                 = pow_GR_exp_Y, pow_GR_10033
+        getf.exp      pow_GR_signexp_X    = POW_NORM_X
+        nop.f 999
+        nop.i 999
 }
 ;;
 
 { .mfi
-          mov pow_GR_tag                = 27
-(p7)      fcmp.eq.unc.s1 p9,p10         = POW_float_int_Y,  POW_NORM_Y
-          nop.i 999
-}
-;;
-
-{ .mfb
-          nop.m 999
-(p6)      frcpa.s0 f8,p13               = f1, f0
-(p6)      br.cond.sptk __libm_error_region   // x=0, y<0, y large neg int
+        getf.sig      pow_GR_sig_X        = POW_NORM_X
+        nop.f 999
+        nop.i 999
 }
 ;;
 
-{ .mfb
-          nop.m 999
-(p10)     frcpa.s0 f8,p13               = f1, f0
-(p10)     br.cond.sptk __libm_error_region   // x=0, y<0, y not int
+{ .mfi
+        and           pow_GR_exp_X        = pow_GR_signexp_X, pow_GR_17ones
+        nop.f 999
 }
 ;;
 
-// x=0, y<0, y an int
 { .mib
-          nop.m 999
-(p9)      tbit.nz.unc p11,p12           = pow_GR_sig_int_Y,0
-          nop.b 999
+        sub       pow_GR_true_exp_X       = pow_GR_exp_X, pow_GR_16ones
+        shl           pow_GR_offset       = pow_GR_sig_X, 1
+        br.cond.sptk    L(POW_COMMON)
 }
 ;;
 
-{ .mfi
-          nop.m 999
-(p12)     frcpa.s0 f8,p13               = f1,f0
-          nop.i 999
-}
-;;
+
+L(POW_X_0_Y_0):
+// When X is +-0 and Y is +-0, IEEE returns 1.0 
+// We call error support with this value 
 
 { .mfb
-          nop.m 999
-(p11)     frcpa.s0 f8,p13               = f1,f8
-          br.cond.sptk __libm_error_region
+         mov pow_GR_tag                     = 26
+         fma.d f8                           = f1,f1,f0
+         br.cond.sptk __libm_error_region
 }
 ;;
 
 
-POW_Y_0:
-// Here for y zero, x anything but zero and nan
-// Set flag if x denormal
-// Result is +1.0
-{ .mfi
-        nop.m 999
-        fcmp.eq.s0 p6,p0 = f8,f0    // Sets flag if x denormal
-        nop.i 999
-}
-{ .mfb
-        nop.m 999
-        fma.d.s0 f8 = f1,f1,f0
-        br.ret.sptk b0
-}
-;;
 
 
-POW_X_INF:
-// Here when X is +-inf
+L(POW_X_INF):
+// When X is +-inf and Y is +-, IEEE returns 
 
-// X +inf  Y +inf             +inf
-// X -inf  Y +inf             +inf
+// overflow                       
+// X +inf  Y +inf             +inf  
+// X -inf  Y +inf             +inf 
 
-// X +inf  Y >0               +inf
+// X +inf  Y >0               +inf    
 // X -inf  Y >0, !odd integer +inf     <== (-inf)^0.5 = +inf !!
-// X -inf  Y >0,  odd integer -inf
+// X -inf  Y >0,  odd integer  -inf   
 
-// X +inf  Y -inf             +0
-// X -inf  Y -inf             +0
+// underflow                     
+// X +inf  Y -inf             +0   
+// X -inf  Y -inf             +0  
 
-// X +inf  Y <0               +0
-// X -inf  Y <0, !odd integer +0
-// X -inf  Y <0, odd integer  -0
+// X +inf  Y <0               +0      
+// X -inf  Y <0, !odd integer +0     
+// X -inf  Y <0, odd integer  -0    
 
 // X + inf Y=+0                +1
 // X + inf Y=-0                +1
@@ -1972,30 +1892,32 @@ POW_X_INF:
 
 // p6 == Y is a floating point number outside the integer.
 //       Hence it is an integer and is even.
-//       p13 == (Y negative)
+//       p13 == (Y negative) 
 //          return +inf
 //       p14 == (Y positive)
 //          return +0
 
+
+
 // p7 == Y is a floating point number within the integer range.
 //      p9  == (int_Y = NORM_Y), Y is an integer, which may be odd or even.
 //           p11 odd
-//              p13 == (Y negative)
+//              p13 == (Y negative)    
 //                 return (sign_of_x)inf
-//              p14 == (Y positive)
+//              p14 == (Y positive) 
 //                 return (sign_of_x)0
-//           pxx even
-//              p13 == (Y negative)
-//                 return +inf
+//           pxx even                
+//              p13 == (Y negative) 
+//                 return +inf     
 //              p14 == (Y positive)
-//                 return +0
+//                 return +0     
 
 //      pxx == Y is not an integer
-//           p13 == (Y negative)
+//           p13 == (Y negative) 
 //                 return +inf
 //           p14 == (Y positive)
 //                 return +0
-//
+// 
 
 // If x=inf, test y and flag denormal
 { .mfi
@@ -2007,131 +1929,207 @@ POW_X_INF:
 
 { .mfi
           nop.m 999
-          fcmp.lt.s0 p13,p14            = POW_NORM_Y,f0
-          cmp.gt  p6,p7                 = pow_GR_exp_Y, pow_GR_10033
+          fcmp.lt p13,p14                    = POW_NORM_Y,f0 
+          cmp.gt.unc  p6,p7                  = pow_GR_exp_Y, pow_GR_10033
 }
 { .mfi
           nop.m 999
-          fclass.m p12,p0               = f9, 0x23 //@inf
+          fclass.m p12,p0                    = f9, 0x23
           nop.i 999
 }
 ;;
 
+
 { .mfi
           nop.m 999
-          fclass.m p15,p0               = f9, 0x07 //@zero
+          fclass.m p15,p0                    = f9, 0x07	//@zero
           nop.i 999
 }
 ;;
 
 { .mfb
           nop.m 999
-(p15)     fmerge.s f8 = f1,f1      // Return +1.0 if x=inf, y=0
-(p15)     br.ret.spnt b0           // Exit if x=inf, y=0
+(p15)     fmerge.s f8 = f1,f1
+(p15)     br.ret.spnt b0
 }
 ;;
 
+        
 { .mfi
-          nop.m 999
-(p14)     frcpa.s1 f8,p10 = f1,f0  // If x=inf, y>0, assume result +inf
+(p13)     mov pow_GR_tag                     = 25
+(p14)     frcpa.s1 f8,p10                       = f1,f0
           nop.i 999
 }
 { .mfb
+(p14)     mov pow_GR_tag                     = 24
+(p13)     fma.s1 f8                          = f0,f0,f0
+(p12)     br.ret.spnt b0
+}
+;;
+
+   
+
+{ .mfb
           nop.m 999
-(p13)     fma.d.s0 f8 = f0,f0,f0   // If x=inf, y<0, assume result +0.0
-(p12)     br.ret.spnt b0           // Exit if x=inf, y=inf
+(p7)      fcmp.eq.unc.s1 p9,p0              = POW_float_int_Y,  POW_NORM_Y
+          nop.b 999
 }
 ;;
 
-// Here if x=inf, and 0 < |y| < inf.  Need to correct results if y odd integer.
 { .mfi
           nop.m 999
-(p7)      fcmp.eq.unc.s1 p9,p0 = POW_float_int_Y,  POW_NORM_Y // Is y integer?
-          nop.i 999
+          nop.f 999
+(p9)      tbit.nz.unc p11,p0                 = pow_GR_sig_int_Y,0
 }
 ;;
 
+{ .mfb
+          nop.m 999
+(p11)     fmerge.s f8 = POW_NORM_X,f8
+          br.ret.sptk b0 
+}
+;;
+
+
+
+L(POW_X_0_Y_NEG):
+// When X is +-0 and Y is negative, IEEE returns 
+// X     Y           answer
+// +0    -odd int    +inf
+// -0    -odd int    -inf
+
+// +0    !-odd int   +inf
+// -0    !-odd int   +inf
+
+
+// p6 == Y is a floating point number outside the integer.
+//       Hence it is an integer and is even.
+//       return +inf
+
+// p7 == Y is a floating point number within the integer range.
+//      p9  == (int_Y = NORM_Y), Y is an integer, which may be odd or even.
+//           p11 odd
+//              return (sign_of_x)inf
+//           p12 even
+//              return +inf
+//      p10 == Y is not an integer
+//         return +inf
+// 
+// 
+
 { .mfi
           nop.m 999
           nop.f 999
-(p9)      tbit.nz.unc p11,p0 = pow_GR_sig_int_Y,0  // Test for y odd integer
+          cmp.gt.unc  p6,p7                  = pow_GR_exp_Y, pow_GR_10033
+}
+;;
+
+
+{ .mfi
+          mov pow_GR_tag                     = 27
+(p7)      fcmp.eq.unc.s1 p9,p10              = POW_float_int_Y,  POW_NORM_Y
+          nop.i 999
+}
+;;
+
+
+{ .mfb
+          nop.m 999
+(p6)      frcpa.s0 f8,p13                       = f1, f0
+(p6)      br.cond.sptk __libm_error_region
 }
 ;;
 
 { .mfb
           nop.m 999
-(p11)     fmerge.s f8 = POW_NORM_X,f8    // If y odd integer use sign of x
-          br.ret.sptk b0                 // Exit for x=inf, 0 < |y| < inf
+(p10)     frcpa.s0 f8,p13                       = f1, f0
+(p10)     br.cond.sptk __libm_error_region
 }
 ;;
 
 
-POW_X_NEG_Y_NONINT:
-// When X is negative and Y is a non-integer, IEEE
-// returns a qnan indefinite.
-// We call error support with this value
 
-{ .mfb
-         mov pow_GR_tag                 = 28
-         frcpa.s0 f8,p6                 = f0,f0
-         br.cond.sptk __libm_error_region
+{ .mib
+          nop.m 999
+(p9)      tbit.nz.unc p11,p12                = pow_GR_sig_int_Y,0
+          nop.b 999
 }
 ;;
 
-POW_X_NAN:
-// Here if x=nan, y not nan
+
+
 { .mfi
-         nop.m 999
-         fclass.m  p9,p13 = f9, 0x07 // Test y=zero
-         nop.i 999
+          nop.m 999
+(p12)     frcpa.s0 f8,p13                      = f1,f0
+          nop.i 999
 }
 ;;
 
 { .mfb
-         nop.m 999
-(p13)    fma.d.s0 f8 = f8,f1,f0
-(p13)    br.ret.sptk  b0            // Exit if x nan, y anything but zero or nan
+          nop.m 999
+(p11)     frcpa f8,p13                      = f1,f8 
+          br.cond.sptk __libm_error_region
 }
 ;;
 
-POW_X_NAN_Y_0:
+
+
+
+L(POW_X_NEG_Y_NONINT):
+// When X is negative and Y is a non-integer, IEEE
+// returns a qnan indefinite.
+// We call error support with this value 
+
+{ .mfb
+         mov pow_GR_tag                     = 28
+         frcpa f8,p6                        = f0,f0
+         br.cond.sptk __libm_error_region
+}
+;;
+
+
+
+
+L(POW_X_NAN_Y_0):
 // When X is a NAN and Y is zero, IEEE returns 1.
 // We call error support with this value.
+
 { .mfi
-         nop.m 999
-         fcmp.eq.s0 p6,p0 = f8,f0       // Dummy op to set invalid on snan
-         nop.i 999
+         nop.m 0
+         fma.d.s0 f10 = f8,f1,f0 
+         nop.i 0
 }
 { .mfb
-         mov pow_GR_tag                 = 29
-         fma.d.s0 f8 = f0,f0,f1
+         mov pow_GR_tag                     = 29
+         fma.d.s0 f8 = f0,f0,f1 
          br.cond.sptk __libm_error_region
 }
 ;;
 
 
-POW_OVER_UNDER_X_NOT_INF:
+L(POW_OVER_UNDER_X_NOT_INF):
 
 // p8 is TRUE for overflow
 // p9 is TRUE for underflow
 
 // if y is infinity, we should not over/underflow
 
+
 { .mfi
           nop.m 999
-          fcmp.eq.s1     p14, p13       = POW_xsq,f1  // Test |x|=1
-          cmp.eq p8,p9                  = pow_GR_sign_Y_Gpr, r0
+          fcmp.eq.unc.s1     p14, p13        = POW_xsq,f1
+          cmp.eq.unc p8,p9                   = pow_GR_sign_Y_Gpr, r0
 }
 ;;
 
 { .mfi
           nop.m 999
-(p14)     fclass.m.unc       p15, p0    = f9, 0x23 // If |x|=1, test y=inf
+(p14)     fclass.m.unc       p15, p0         = f9, 0x23
           nop.i 999
 }
 { .mfi
           nop.m 999
-(p13)     fclass.m.unc       p11,p0     = f9, 0x23 // If |x| not 1, test y=inf
+(p13)     fclass.m.unc       p11,p0         = f9, 0x23
           nop.i 999
 }
 ;;
@@ -2139,33 +2137,31 @@ POW_OVER_UNDER_X_NOT_INF:
 // p15 = TRUE if |x|=1, y=inf, return +1
 { .mfb
           nop.m 999
-(p15)     fma.d.s0          f8          = f1,f1,f0 // If |x|=1, y=inf, result +1
-(p15)     br.ret.spnt b0                // Exit if |x|=1, y=inf
+(p15)     fma.d              f8              = f1,f1,f0
+(p15)     br.ret.spnt b0
 }
 ;;
 
 .pred.rel "mutex",p8,p9
 {  .mfb
-(p8)      setf.exp           f8 = pow_GR_17ones // If exp(+big), result inf
-(p9)      fmerge.s           f8 = f0,f0         // If exp(-big), result 0
-(p11)     br.ret.sptk b0                // Exit if |x| not 1, y=inf
+(p8)      setf.exp           f8              = pow_GR_17ones
+(p9)      fmerge.s           f8              = f0,f0
+(p11)     br.ret.sptk b0
 }
-;;
 
 { .mfb
           nop.m 999
           nop.f 999
-          br.cond.sptk POW_OVER_UNDER_ERROR // Branch if y not inf
+          br.cond.sptk L(POW_OVER_UNDER_ERROR)
 }
 ;;
 
+L(POW_Y_NAN):
 
-POW_Y_NAN:
-// Here if y=nan, x anything
-// If x = +1 then result is +1, else result is quiet Y
+// Is x = +1 then result is +1, else result is quiet Y
 { .mfi
        nop.m 999
-       fcmp.eq.s1         p10,p9        = POW_NORM_X, f1
+       fcmp.eq.s1         p10,p9               = POW_NORM_X, f1 
        nop.i 999
 }
 ;;
@@ -2179,117 +2175,148 @@ POW_Y_NAN:
 
 { .mfi
        nop.m 999
-(p10)  fma.d.s0 f8 = f1,f1,f0
+(p10)  fma.d f8 = f1,f1,f0 
        nop.i 999
 }
 { .mfb
        nop.m 999
-(p9)   fma.d.s0 f8 = f9,f8,f0
-       br.ret.sptk b0             // Exit y=nan
+(p9)   fma.d f8 = f9,f8,f0 
+       br.ret.sptk b0
 }
 ;;
 
 
-POW_OVER_UNDER_ERROR:
-// Here if we have overflow or underflow.
-// Enter with p12 true if x negative and y odd int to force -0 or -inf
+L(POW_OVER_UNDER_ERROR):
 
 { .mfi
-         sub   pow_GR_17ones_m1         = pow_GR_17ones, r0, 1
-         nop.f 999
-         mov pow_GR_one                 = 0x1
+          nop.m 999
+          fmerge.s f10                      = POW_NORM_X,POW_NORM_X
+          nop.i 999
+}
+{ .mfi
+          sub   pow_GR_17ones_m1            = pow_GR_17ones, r0, 1
+          nop.f 999
+          mov pow_GR_one                    = 0x1
 }
 ;;
 
-// overflow, force inf with O flag
+// overflow
 { .mmb
-(p8)     mov pow_GR_tag                 = 24
-(p8)     setf.exp POW_tmp               = pow_GR_17ones_m1
+(p8)     mov pow_GR_tag                     = 24
+(p8)     setf.exp f11                       = pow_GR_17ones_m1
          nop.b 999
 }
 ;;
 
-// underflow, force zero with I, U flags
+        
+// underflow
 { .mmi
-(p9)    mov pow_GR_tag                  = 25
-(p9)    setf.exp POW_tmp                = pow_GR_one
+(p9)    mov pow_GR_tag                     = 25
+(p9)    setf.exp f11                       = pow_GR_one
         nop.i 999
 }
 ;;
 
+
+// p12 x is negative and y is an odd integer 
+
+
 { .mfi
         nop.m 999
-        fma.d.s0 f8                     = POW_tmp, POW_tmp, f0
+        fma.d f8                               = f11, f11, f0
         nop.i 999
 }
 ;;
 
-// p12 x is negative and y is an odd integer, change sign of result
 { .mfi
         nop.m 999
-(p12)   fnma.d.s0 f8                    = POW_tmp, POW_tmp, f0
+(p12)   fmerge.ns f8                           = f8, f8
         nop.i 999
 }
 ;;
 
-GLOBAL_LIBM_END(pow)
 
-LOCAL_LIBM_ENTRY(__libm_error_region)
+.endp pow
+ASM_SIZE_DIRECTIVE(pow)
+
+
+// Stack operations when calling error support.
+//       (1)               (2)                          (3) (call)              (4)
+//   sp   -> +          psp -> +                     psp -> +                   sp -> +
+//           |                 |                            |                         |
+//           |                 | <- GR_Y               R3 ->| <- GR_RESULT            | -> f8
+//           |                 |                            |                         |
+//           | <-GR_Y      Y2->|                       Y2 ->| <- GR_Y                 |
+//           |                 |                            |                         |
+//           |                 | <- GR_X               X1 ->|                         |
+//           |                 |                            |                         |
+//  sp-64 -> +          sp ->  +                     sp ->  +                         +
+//    save ar.pfs          save b0                                               restore gp
+//    save gp                                                                    restore ar.pfs
+
+
 
+.proc __libm_error_region
+__libm_error_region:
+
+// Answer is inf for overflow and 0 for underflow.
 .prologue
+// (1)
 { .mfi
-        add   GR_Parameter_Y=-32,sp     // Parameter 2 value
+        add   GR_Parameter_Y=-32,sp             // Parameter 2 value
         nop.f 0
 .save   ar.pfs,GR_SAVE_PFS
-        mov  GR_SAVE_PFS=ar.pfs         // Save ar.pfs
+        mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
 }
 { .mfi
 .fframe 64
-        add sp=-64,sp                   // Create new stack
+        add sp=-64,sp                          // Create new stack
         nop.f 0
-        mov GR_SAVE_GP=gp               // Save gp
+        mov GR_SAVE_GP=gp                      // Save gp
 };;
 
+
+// (2)
 { .mmi
         stfd [GR_Parameter_Y] = POW_NORM_Y,16 // STORE Parameter 2 on stack
-        add GR_Parameter_X = 16,sp      // Parameter 1 address
+        add GR_Parameter_X = 16,sp            // Parameter 1 address
 .save   b0, GR_SAVE_B0
-        mov GR_SAVE_B0=b0               // Save b0
+        mov GR_SAVE_B0=b0                     // Save b0
 };;
 
 .body
+// (3)
 { .mib
-        stfd [GR_Parameter_X] = POW_NORM_X // STORE Parameter 1 on stack
+        stfd [GR_Parameter_X] = POW_NORM_X              // STORE Parameter 1 on stack
         add   GR_Parameter_RESULT = 0,GR_Parameter_Y    // Parameter 3 address
-        nop.b 0
+        nop.b 0                                
 }
 { .mib
-        stfd [GR_Parameter_Y] = f8      // STORE Parameter 3 on stack
+        stfd [GR_Parameter_Y] = f8                      // STORE Parameter 3 on stack
         add   GR_Parameter_Y = -16,GR_Parameter_Y
-        br.call.sptk b0=__libm_error_support# // Call error handling function
+        br.call.sptk b0=__libm_error_support#           // Call error handling function
 };;
-
 { .mmi
-        add   GR_Parameter_RESULT = 48,sp
         nop.m 0
-        nop.i 0
+        nop.m 0
+        add   GR_Parameter_RESULT = 48,sp
 };;
 
+// (4)
 { .mmi
-        ldfd  f8 = [GR_Parameter_RESULT] // Get return result off stack
+        ldfd  f8 = [GR_Parameter_RESULT]       // Get return result off stack
 .restore sp
-        add   sp = 64,sp                 // Restore stack pointer
-        mov   b0 = GR_SAVE_B0            // Restore return address
+        add   sp = 64,sp                       // Restore stack pointer
+        mov   b0 = GR_SAVE_B0                  // Restore return address
 };;
-
 { .mib
-        mov   gp = GR_SAVE_GP            // Restore gp
-        mov   ar.pfs = GR_SAVE_PFS       // Restore ar.pfs
-        br.ret.sptk     b0               // Return
+        mov   gp = GR_SAVE_GP                  // Restore gp
+        mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
+        br.ret.sptk     b0                     // Return
 };;
 
-LOCAL_LIBM_END(__libm_error_region)
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
 
 .type   __libm_error_support#,@function
 .global __libm_error_support#
-
diff --git a/sysdeps/ia64/fpu/e_powf.S b/sysdeps/ia64/fpu/e_powf.S
index 275843f1e2..d464058262 100644
--- a/sysdeps/ia64/fpu/e_powf.S
+++ b/sysdeps/ia64/fpu/e_powf.S
@@ -1,10 +1,10 @@
 .file "powf.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
 //
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
+//
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,39 +35,30 @@
 //
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// http://developer.intel.com/opensource.
 //
 // History
 //==============================================================
-// 02/02/00 Initial version
-// 02/03/00 Added p12 to definite over/under path. With odd power we did not
+// 2/02/00  Initial version
+// 2/03/00  Added p12 to definite over/under path. With odd power we did not
 //          maintain the sign of x in this path.
-// 04/04/00 Unwind support added
-// 04/19/00 pow(+-1,inf) now returns NaN
-//          pow(+-val, +-inf) returns 0 or inf, but now does not call error
-//          support
+// 4/04/00  Unwind support added
+// 4/19/00  pow(+-1,inf) now returns NaN
+//          pow(+-val, +-inf) returns 0 or inf, but now does not call error support
 //          Added s1 to fcvt.fx because invalid flag was incorrectly set.
-// 08/15/00 Bundle added after call to __libm_error_support to properly
+// 8/15/00  Bundle added after call to __libm_error_support to properly
 //          set [the previously overwritten] GR_Parameter_RESULT.
-// 09/07/00 Improved performance by eliminating bank conflicts and other stalls,
+// 9/07/00  Improved performance by eliminating bank conflicts and other stalls,
 //          and tweaking the critical path
-// 09/08/00 Per c99, pow(+-1,inf) now returns 1, and pow(+1,nan) returns 1
-// 09/28/00 Updated NaN**0 path
-// 01/20/01 Fixed denormal flag settings.
-// 02/13/01 Improved speed.
-// 03/19/01 Reordered exp polynomial to improve speed and eliminate monotonicity
-//          problem in round up, down, and to zero modes.  Also corrected
-//          overflow result when x negative, y odd in round up, down, zero.
-// 06/14/01 Added brace missing from bundle
-// 12/10/01 Corrected case where x negative, 2^23 <= |y| < 2^24, y odd integer.
-// 02/08/02 Fixed overflow/underflow cases that were not calling error support.
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 08/29/02 Improved Itanium 2 performance
-// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 9/08/00  Per c99, pow(+-1,inf) now returns 1, and pow(+1,nan) returns 1
+// 9/28/00  Updated NaN**0 path 
+// 1/20/01  Fixed denormal flag settings.
+// 2/12/01  Improved speed.
 //
 // API
 //==============================================================
-// float powf(float x, float y)
+// double pow(double)
+// float  powf(float)
 //
 // Overview of operation
 //==============================================================
@@ -76,51 +67,51 @@
 // 1. Log(x)
 // 2. y Log(x)
 // 3. exp(y log(x))
-//
+// 
 // This means we work with the absolute value of x and merge in the sign later.
 //      Log(x) = G + delta + r -rsq/2 + p
 // G,delta depend on the exponent of x and table entries. The table entries are
 // indexed by the exponent of x, called K.
-//
+// 
 // The G and delta come out of the reduction; r is the reduced x.
-//
+// 
 // B = frcpa(x)
 // xB-1 is small means that B is the approximate inverse of x.
-//
+// 
 //      Log(x) = Log( (1/B)(Bx) )
 //             = Log(1/B) + Log(Bx)
 //             = Log(1/B) + Log( 1 + (Bx-1))
-//
+// 
 //      x  = 2^K 1.x_1x_2.....x_52
-//      B= frcpa(x) = 2^-k Cm
+//      B= frcpa(x) = 2^-k Cm 
 //      Log(1/B) = Log(1/(2^-K Cm))
 //      Log(1/B) = Log((2^K/ Cm))
 //      Log(1/B) = K Log(2) + Log(1/Cm)
-//
+// 
 //      Log(x)   = K Log(2) + Log(1/Cm) + Log( 1 + (Bx-1))
-//
+// 
 // If you take the significand of x, set the exponent to true 0, then Cm is
 // the frcpa. We tabulate the Log(1/Cm) values. There are 256 of them.
 // The frcpa table is indexed by 8 bits, the x_1 thru x_8.
 // m = x_1x_2...x_8 is an 8-bit index.
-//
+// 
 //      Log(1/Cm) = log(1/frcpa(1+m/256)) where m goes from 0 to 255.
-//
+// 
 // We tabluate as two doubles, T and t, where T +t is the value itself.
-//
+// 
 //      Log(x)   = (K Log(2)_hi + T) + (Log(2)_hi + t) + Log( 1 + (Bx-1))
 //      Log(x)   =  G + delta           + Log( 1 + (Bx-1))
-//
+// 
 // The Log( 1 + (Bx-1)) can be calculated as a series in r = Bx-1.
-//
+// 
 //      Log( 1 + (Bx-1)) = r - rsq/2 + p
-//
+// 
 // Then,
-//
+//    
 //      yLog(x) = yG + y delta + y(r-rsq/2) + yp
 //      yLog(x) = Z1 + e3      + Z2         + Z3 + (e2 + e3)
-//
-//
+// 
+// 
 //     exp(yLog(x)) = exp(Z1 + Z2 + Z3) exp(e1 + e2 + e3)
 //
 //
@@ -142,7 +133,7 @@
 //       exp(r)       = exp(Z - N log2/128)
 //
 //      r = s + d = (Z - N (log2/128)_hi) -N (log2/128)_lo
-//                =  Z - N (log2/128)
+//                =  Z - N (log2/128) 
 //
 //      Z         = s+d +N (log2/128)
 //
@@ -158,22 +149,22 @@
 //      n log2/128 = n_7n_6n_5 log2/8 + n_4n_3n_2n_1 log2/128
 //      n log2/128 = I2 log2/8 + I1 log2/128
 //
-//      N log2/128 = M log2 + I2 log2/8 + I1 log2/128
+//      N log2/128 = M log2 + I2 log2/8 + I1 log2/128 
 //
 //      exp(Z)    = exp(s) (1+d) exp(log(2^M) + log(2^I2/8) + log(2^I1/128))
 //      exp(Z)    = exp(s) (1+d1) (1+d2)(2^M) 2^I2/8 2^I1/128
 //      exp(Z)    = exp(s) f1 f2 (2^M) 2^I2/8 2^I1/128
 //
 // I1, I2 are table indices. Use a series for exp(s).
-// Then get exp(Z)
+// Then get exp(Z) 
 //
 //     exp(yLog(x)) = exp(Z1 + Z2 + Z3) exp(e1 + e2 + e3)
-//     exp(yLog(x)) = exp(Z) exp(Z3) f3
-//     exp(yLog(x)) = exp(Z)f3 exp(Z3)
-//     exp(yLog(x)) = A exp(Z3)
+//     exp(yLog(x)) = exp(Z) exp(Z3) f3 
+//     exp(yLog(x)) = exp(Z)f3 exp(Z3)  
+//     exp(yLog(x)) = A exp(Z3)  
 //
 // We actually calculate exp(Z3) -1.
-// Then,
+// Then, 
 //     exp(yLog(x)) = A + A( exp(Z3)   -1)
 //
 
@@ -184,146 +175,142 @@
 // ==============
 // The operation (K*log2_hi) must be exact. K is the true exponent of x.
 // If we allow gradual underflow (denormals), K can be represented in 12 bits
-// (as a two's complement number). We assume 13 bits as an engineering
-// precaution.
-//
+// (as a two's complement number). We assume 13 bits as an engineering precaution.
+// 
 //           +------------+----------------+-+
 //           |  13 bits   | 50 bits        | |
 //           +------------+----------------+-+
 //           0            1                66
 //                        2                34
-//
+// 
 // So we want the lsb(log2_hi) to be 2^-50
 // We get log2 as a quad-extended (15-bit exponent, 128-bit significand)
-//
+// 
 //      0 fffe b17217f7d1cf79ab c9e3b39803f2f6af (4...)
-//
+// 
 // Consider numbering the bits left to right, starting at 0 thru 127.
 // Bit 0 is the 2^-1 bit; bit 49 is the 2^-50 bit.
-//
+// 
 //  ...79ab
 //     0111 1001 1010 1011
 //     44
 //     89
-//
-// So if we shift off the rightmost 14 bits, then (shift back only
+// 
+// So if we shift off the rightmost 14 bits, then (shift back only 
 // the top half) we get
-//
+// 
 //      0 fffe b17217f7d1cf4000 e6af278ece600fcb dabc000000000000
-//
+// 
 // Put the right 64-bit signficand in an FR register, convert to double;
 // it is exact. Put the next 128 bits into a quad register and round to double.
 // The true exponent of the low part is -51.
-//
+// 
 // hi is 0 fffe b17217f7d1cf4000
 // lo is 0 ffcc e6af278ece601000
-//
+// 
 // Convert to double memory format and get
-//
+// 
 // hi is 0x3fe62e42fefa39e8
-// lo is 0x3cccd5e4f1d9cc02
-//
+// lo is 0x3cccd5e4f1d9cc02 
+// 
 // log2_hi + log2_lo is an accurate value for log2.
-//
-//
+// 
+// 
 // The T and t values
 // ==================
 // A similar method is used to generate the T and t values.
-//
+// 
 // K * log2_hi + T  must be exact.
-//
+// 
 // Smallest T,t
 // ----------
-// The smallest T,t is
+// The smallest T,t is 
 //       T                   t
-// 0x3f60040155d58800, 0x3c93bce0ce3ddd81  log(1/frcpa(1+0/256))=  +1.95503e-003
-//
+// data8 0x3f60040155d58800, 0x3c93bce0ce3ddd81  log(1/frcpa(1+0/256))=  +1.95503e-003
+// 
 // The exponent is 0x3f6 (biased)  or -9 (true).
 // For the smallest T value, what we want is to clip the significand such that
-// when it is shifted right by 9, its lsb is in the bit for 2^-51. The 9 is the
-// specific for the first entry. In general, it is 0xffff - (biased 15-bit
-// exponent).
+// when it is shifted right by 9, its lsb is in the bit for 2^-51. The 9 is the specific 
+// for the first entry. In general, it is 0xffff - (biased 15-bit exponent).
 
-// Independently, what we have calculated is the table value as a quad
-// precision number.
+// Independently, what we have calculated is the table value as a quad precision number.
 // Table entry 1 is
 // 0 fff6 80200aaeac44ef38 338f77605fdf8000
-//
+// 
 // We store this quad precision number in a data structure that is
-//    sign:           1
+//    sign:           1 
 //    exponent:      15
 //    signficand_hi: 64 (includes explicit bit)
 //    signficand_lo: 49
 // Because the explicit bit is included, the significand is 113 bits.
-//
+// 
 // Consider significand_hi for table entry 1.
-//
-//
+// 
+// 
 // +-+--- ... -------+--------------------+
 // | |
 // +-+--- ... -------+--------------------+
 // 0 1               4444444455555555556666
 //                   2345678901234567890123
-//
+// 
 // Labeled as above, bit 0 is 2^0, bit 1 is 2^-1, etc.
 // Bit 42 is 2^-42. If we shift to the right by 9, the bit in
 // bit 42 goes in 51.
-//
+// 
 // So what we want to do is shift bits 43 thru 63 into significand_lo.
-// This is shifting bit 42 into bit 63, taking care to retain shifted-off bits.
-// Then shifting (just with signficaand_hi) back into bit 42.
-//
-// The shift_value is 63-42 = 21. In general, this is
+// This is shifting bit 42 into bit 63, taking care to retain the shifted-off bits.
+// Then shifting (just with signficaand_hi) back into bit 42. 
+//  
+// The shift_value is 63-42 = 21. In general, this is 
 //      63 - (51 -(0xffff - 0xfff6))
 // For this example, it is
 //      63 - (51 - 9) = 63 - 42  = 21
-//
-// This means we are shifting 21 bits into significand_lo. We must maintain more
-// that a 128-bit signficand not to lose bits. So before the shift we put the
-// 128-bit significand into a 256-bit signficand and then shift.
+// 
+// This means we are shifting 21 bits into significand_lo.  We must maintain more
+// that a 128-bit signficand not to lose bits. So before the shift we put the 128-bit 
+// significand into a 256-bit signficand and then shift.
 // The 256-bit significand has four parts: hh, hl, lh, and ll.
-//
+// 
 // Start off with
 //      hh         hl         lh         ll
 //      <64>       <49><15_0> <64_0>     <64_0>
-//
+// 
 // After shift by 21 (then return for significand_hi),
 //      <43><21_0> <21><43>   <6><58_0>  <64_0>
-//
+// 
 // Take the hh part and convert to a double. There is no rounding here.
-// The conversion is exact. The true exponent of the high part is the same as
-// the true exponent of the input quad.
-//
-// We have some 64 plus significand bits for the low part. In this example, we
-// have 70 bits. We want to round this to a double. Put them in a quad and then
-// do a quad fnorm.
-// For this example the true exponent of the low part is
+// The conversion is exact. The true exponent of the high part is the same as the
+// true exponent of the input quad.
+// 
+// We have some 64 plus significand bits for the low part. In this example, we have
+// 70 bits. We want to round this to a double. Put them in a quad and then do a quad fnorm.
+// For this example the true exponent of the low part is 
 //      true_exponent_of_high - 43 = true_exponent_of_high - (64-21)
-// In general, this is
-//      true_exponent_of_high - (64 - shift_value)
-//
-//
+// In general, this is 
+//      true_exponent_of_high - (64 - shift_value)  
+// 
+// 
 // Largest T,t
 // ----------
 // The largest T,t is
-// 0x3fe62643fecf9742, 0x3c9e3147684bd37d  log(1/frcpa(1+255/256))=+6.92171e-001
-//
+// data8 0x3fe62643fecf9742, 0x3c9e3147684bd37d    log(1/frcpa(1+255/256))=  +6.92171e-001
+// 
 // Table entry 256 is
 // 0 fffe b1321ff67cba178c 51da12f4df5a0000
-//
-// The shift value is
+// 
+// The shift value is 
 //      63 - (51 -(0xffff - 0xfffe)) = 13
-//
-// The true exponent of the low part is
+// 
+// The true exponent of the low part is 
 //      true_exponent_of_high - (64 - shift_value)
 //      -1 - (64-13) = -52
 // Biased as a double, this is 0x3cb
-//
-//
-//
+// 
+// 
+// 
 // So then lsb(T) must be >= 2^-51
 // msb(Klog2_hi) <= 2^12
-//
+// 
 //              +--------+---------+
 //              |       51 bits    | <== largest T
 //              +--------+---------+
@@ -333,6 +320,7 @@
 // +------------+----------------+-+
 
 
+
 // Special Cases
 //==============================================================
 
@@ -397,66 +385,63 @@
 
 // X any   Y =0               +1
 
+#include "libm_support.h"
+
 // Assembly macros
 //==============================================================
 
 // integer registers used
 
-pow_GR_signexp_X          = r14
-pow_GR_17ones             = r15
-pow_AD_P                  = r16
-pow_GR_exp_2tom8          = r17
-pow_GR_sig_X              = r18
-pow_GR_10033              = r19
-pow_GR_16ones             = r20
-
-pow_AD_Tt                 = r21
-pow_GR_exp_X              = r22
-pow_AD_Q                  = r23
-pow_GR_true_exp_X         = r24
-pow_GR_y_zero             = r25
-
-pow_GR_exp_Y              = r26
-pow_AD_tbl1               = r27
-pow_AD_tbl2               = r28
-pow_GR_offset             = r29
-pow_GR_exp_Xm1            = r30
-pow_GR_xneg_yodd          = r31
-
-pow_GR_signexp_Xm1        = r35
-pow_GR_int_W1             = r36
-pow_GR_int_W2             = r37
-pow_GR_int_N              = r38
-pow_GR_index1             = r39
-pow_GR_index2             = r40
-
-pow_AD_T1                 = r41
-pow_AD_T2                 = r42
-pow_int_GR_M              = r43
-pow_GR_sig_int_Y          = r44
-pow_GR_sign_Y_Gpr         = r45
-
-pow_GR_17ones_m1          = r46
-pow_GR_one                = r47
-pow_GR_sign_Y             = r48
-pow_GR_signexp_Y_Gpr      = r49
-pow_GR_exp_Y_Gpr          = r50
-
-pow_GR_true_exp_Y_Gpr     = r51
-pow_GR_signexp_Y          = r52
-pow_GR_x_one              = r53
-pow_GR_big_pos            = r55
-
-pow_GR_big_neg            = r56
-
-GR_SAVE_B0                = r50
-GR_SAVE_GP                = r51
-GR_SAVE_PFS               = r52
-
-GR_Parameter_X            = r53
-GR_Parameter_Y            = r54
-GR_Parameter_RESULT       = r55
-pow_GR_tag                = r56
+pow_AD_Tt                 = r33
+pow_GR_FFF7               = r34
+pow_GR_exp_Y              = r34 // duplicate
+pow_GR_17ones             = r35
+
+pow_AD_P                  = r36
+pow_AD_Q                  = r37
+pow_AD_tbl1               = r38
+pow_AD_tbl2               = r39
+pow_GR_exp_X              = r40
+pow_GR_true_exp_X         = r40 // duplicate
+
+pow_GR_offset             = r41
+pow_GR_exp_Xm1            = r42
+pow_GR_sig_X              = r43
+pow_GR_signexp_X          = r44
+
+pow_GR_signexp_Xm1        = r46
+pow_GR_int_W1             = r47
+pow_GR_int_W2             = r48
+pow_GR_int_N              = r49
+pow_GR_index1             = r50
+
+pow_GR_index2             = r51
+pow_AD_T1                 = r52
+pow_AD_T2                 = r53
+pow_GR_gt_ln              = r53 // duplicate
+pow_int_GR_M              = r54
+pow_GR_10033              = r55
+
+pow_GR_16ones             = r56
+pow_GR_sig_int_Y          = r57
+pow_GR_sign_Y_Gpr         = r58
+pow_GR_17ones_m1          = r59
+pow_GR_one                = r60
+pow_GR_sign_Y             = r60 
+
+pow_GR_signexp_Y_Gpr      = r61 
+pow_GR_exp_Y_Gpr          = r62 
+pow_GR_true_exp_Y_Gpr     = r63 
+pow_GR_signexp_Y          = r64 
+
+GR_SAVE_B0                = r65
+GR_SAVE_GP                = r66
+GR_SAVE_PFS               = r67
+
+GR_Parameter_X            = r68
+GR_Parameter_Y            = r69
+GR_Parameter_RESULT       = r70
+pow_GR_tag                = r71
 
 
 // floating point registers used
@@ -479,8 +464,7 @@ POW_log2_lo               = f43
 POW_r                     = f44
 POW_Q0_half               = f45
 
-POW_Q1                    = f46
-POW_tmp                   = f47
+POW_Q1                    = f46  
 POW_log2_hi               = f48
 POW_Q4                    = f49
 POW_P1                    = f50
@@ -492,7 +476,6 @@ POW_Yrcub                 = f54
 POW_log2_by_128_lo        = f55
 
 POW_v6                    = f56
-POW_xsq                   = f57
 POW_v4                    = f58
 POW_v2                    = f59
 POW_T                     = f60
@@ -501,7 +484,6 @@ POW_Tt                    = f61
 POW_RSHF                  = f62
 POW_v21ps                 = f63
 POW_s4                    = f64
-POW_twoV                  = f65
 
 POW_U                     = f66
 POW_G                     = f67
@@ -551,36 +533,44 @@ POW_1ps                   = f103
 POW_A                     = f104
 POW_es                    = f105
 
-POW_Xp1                   = f106
 POW_int_K                 = f107
 POW_K                     = f108
 POW_f123                  = f109
 POW_Gpr                   = f110
 
-POW_Y_Gpr                 = f111
+POW_Y_Gpr                 = f111 
 POW_int_Y                 = f112
-POW_2Mqp1                 = f113
 
 POW_float_int_Y           = f116
 POW_ftz_urm_f8            = f117
 POW_wre_urm_f8            = f118
-POW_big_neg               = f119
-POW_big_pos               = f120
+POW_abs_A                 = f119
+POW_gt_pln                = f120
+
+POW_xsq                   = f121
+
+POW_twoV                  = f122
+POW_Xp1                   = f123
 
 // Data tables
 //==============================================================
 
-RODATA
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
 
 .align 16
 
-LOCAL_OBJECT_START(pow_table_P)
+pow_table_P:
+ASM_TYPE_DIRECTIVE(pow_table_P,@object)
 data8 0x8000F7B249FF332D, 0x0000BFFC  // P_5
 data8 0xAAAAAAA9E7902C7F, 0x0000BFFC  // P_3
 data8 0x80000000000018E5, 0x0000BFFD  // P_1
 data8 0xb8aa3b295c17f0bc, 0x00004006  // inv_ln2_by_128
-//
-//
+
+
 data8 0x3FA5555555554A9E // Q_2
 data8 0x3F8111124F4DD9F9 // Q_3
 data8 0x3FE0000000000000 // Q_0
@@ -590,18 +580,20 @@ data8 0x43e8000000000000 // Right shift constant for exp
 data8 0xc9e3b39803f2f6af, 0x00003fb7  // ln2_by_128_lo
 data8 0x0000000000000000 // pad to eliminate bank conflicts with pow_table_Q
 data8 0x0000000000000000 // pad to eliminate bank conflicts with pow_table_Q
-LOCAL_OBJECT_END(pow_table_P)
+ASM_SIZE_DIRECTIVE(pow_table_P)
 
-LOCAL_OBJECT_START(pow_table_Q)
+pow_table_Q:
+ASM_TYPE_DIRECTIVE(pow_table_Q,@object)
 data8 0x9249FE7F0DC423CF, 0x00003FFC  // P_4
 data8 0xCCCCCCCC4ED2BA7F, 0x00003FFC  // P_2
 data8 0xAAAAAAAAAAAAB505, 0x00003FFD  // P_0
 data8 0x3fe62e42fefa39e8, 0x3cccd5e4f1d9cc02 // log2 hi lo =  +6.93147e-001
 data8 0xb17217f7d1cf79ab, 0x00003ff7  // ln2_by_128_hi
-LOCAL_OBJECT_END(pow_table_Q)
+ASM_SIZE_DIRECTIVE(pow_table_Q)
 
 
-LOCAL_OBJECT_START(pow_Tt)
+pow_Tt:
+ASM_TYPE_DIRECTIVE(pow_Tt,@object)
 data8 0x3f60040155d58800, 0x3c93bce0ce3ddd81 // log(1/frcpa(1+0/256))=  +1.95503e-003
 data8 0x3f78121214586a00, 0x3cb540e0a5cfc9bc // log(1/frcpa(1+1/256))=  +5.87661e-003
 data8 0x3f841929f9683200, 0x3cbdf1d57404da1f // log(1/frcpa(1+2/256))=  +9.81362e-003
@@ -858,12 +850,13 @@ data8 0x3fe5f673c61a2ed0, 0x3caa385eef5f2789 // log(1/frcpa(1+252/256))=  +6.863
 data8 0x3fe6065bea385924, 0x3cb11624f165c5b4 // log(1/frcpa(1+253/256))=  +6.88276e-001
 data8 0x3fe6164bfa7cc068, 0x3cbad884f87073fa // log(1/frcpa(1+254/256))=  +6.90222e-001
 data8 0x3fe62643fecf9740, 0x3cb78c51da12f4df // log(1/frcpa(1+255/256))=  +6.92171e-001
-LOCAL_OBJECT_END(pow_Tt)
+ASM_SIZE_DIRECTIVE(pow_Tt)
 
 
 // Table 1 is 2^(index_1/128) where
 // index_1 goes from 0 to 15
-LOCAL_OBJECT_START(pow_tbl1)
+pow_tbl1:
+ASM_TYPE_DIRECTIVE(pow_tbl1,@object)
 data8 0x8000000000000000 , 0x00003FFF
 data8 0x80B1ED4FD999AB6C , 0x00003FFF
 data8 0x8164D1F3BC030773 , 0x00003FFF
@@ -880,12 +873,13 @@ data8 0x88980E8092DA8527 , 0x00003FFF
 data8 0x8955EE03618E5FDD , 0x00003FFF
 data8 0x8A14D575496EFD9A , 0x00003FFF
 data8 0x8AD4C6452C728924 , 0x00003FFF
-LOCAL_OBJECT_END(pow_tbl1)
+ASM_SIZE_DIRECTIVE(pow_tbl1)
 
 
 // Table 2 is 2^(index_1/8) where
 // index_2 goes from 0 to 7
-LOCAL_OBJECT_START(pow_tbl2)
+pow_tbl2:
+ASM_TYPE_DIRECTIVE(pow_tbl2,@object)
 data8 0x8000000000000000 , 0x00003FFF
 data8 0x8B95C1E3EA8BD6E7 , 0x00003FFF
 data8 0x9837F0518DB8A96F , 0x00003FFF
@@ -894,287 +888,372 @@ data8 0xB504F333F9DE6484 , 0x00003FFF
 data8 0xC5672A115506DADD , 0x00003FFF
 data8 0xD744FCCAD69D6AF4 , 0x00003FFF
 data8 0xEAC0C6E7DD24392F , 0x00003FFF
-LOCAL_OBJECT_END(pow_tbl2)
+ASM_SIZE_DIRECTIVE(pow_tbl2)
+
+.global powf
 
 .section .text
-GLOBAL_LIBM_ENTRY(powf)
+.proc  powf
+.align 32
+
+powf:
 
-// Get exponent of x.  Will be used to calculate K.
 { .mfi
-          getf.exp     pow_GR_signexp_X = f8
-          fms.s1 POW_Xm1 = f8,f1,f1     // Will be used for r1 if x>0
-          mov           pow_GR_17ones   = 0x1FFFF
+          alloc         r32=ar.pfs,1,35,4,0 
+          fms.s1 POW_Xm1 = f8,f1,f1   // Will be used for r1 if x>0
+          mov           pow_GR_17ones  = 0x1FFFF
 }
 { .mfi
-          addl          pow_AD_P        = @ltoff(pow_table_P), gp
-          fma.s1 POW_Xp1 = f8,f1,f1     // Will be used for r1 if x<0
+(p0)      addl          pow_AD_P   = @ltoff(pow_table_P), gp
+          fma.s1 POW_Xp1 = f8,f1,f1   // Will be used for r1 if x<0
           nop.i 999
 ;;
 }
 
-// Get significand of x.  Will be used to get index to fetch T, Tt.
+
+// Get exponent of x.  Will be used to calculate K.
 { .mfi
-          getf.sig      pow_GR_sig_X    = f8
-          frcpa.s1      POW_B, p6       = f1,f8
+          getf.exp      pow_GR_signexp_X    = f8
+          frcpa.s1      POW_B, p6   = f1,f8
           nop.i 999
 }
 { .mfi
           ld8 pow_AD_P = [pow_AD_P]
-          fma.s1        POW_NORM_X      = f8,f1,f0
-          mov          pow_GR_exp_2tom8 = 0xFFF7
+          fma.s1        POW_NORM_X     = f8,f1,f0
+          mov           pow_GR_FFF7    = 0xFFF7
 }
 ;;
 
+
+
+// Get significand of x.  Will be used to get index to fetch T, Tt.
 // p13 = TRUE ==> X is unorm
 // DOUBLE 0x10033  exponent limit at which y is an integer
+// SINGLE 0x10016
 { .mfi
-          nop.m 999
-          fclass.m  p13,p0              = f8, 0x0b  // Test for x unorm
-          addl pow_GR_10033             = 0x10033, r0
+          getf.sig      pow_GR_sig_X        = f8
+          fclass.m  p13,p0          = f8, 0x0b  // Test for x unorm
+          addl pow_GR_10033                 = 0x10033, r0
 }
 { .mfi
           mov           pow_GR_16ones   = 0xFFFF
-          fma.s1        POW_NORM_Y      = f9,f1,f0
+          fma.s1        POW_NORM_Y     = f9,f1,f0
           nop.i 999
 }
 ;;
 
+
 // p14 = TRUE ==> X is ZERO
 { .mfi
           adds          pow_AD_Tt       = pow_Tt - pow_table_P,  pow_AD_P
-          fclass.m  p14,p0              = f8, 0x07
-          and           pow_GR_exp_X    = pow_GR_signexp_X, pow_GR_17ones
+          fclass.m  p14,p15          = f8, 0x07
+          and           pow_GR_exp_X        = pow_GR_signexp_X, pow_GR_17ones
 }
 { .mfi
-          adds          pow_AD_Q        = pow_table_Q - pow_table_P,  pow_AD_P
+          adds          pow_AD_Q       = pow_table_Q - pow_table_P,  pow_AD_P
           nop.f 999
           nop.i 999
 }
 ;;
 
 { .mfi
-          ldfe          POW_P5          = [pow_AD_P], 16
-          fcmp.lt.s1 p8,p9 = f8, f0     // Test for x<0
-          nop.i 999
+          ldfe          POW_P5         = [pow_AD_P], 16
+          fcmp.lt.s1 p8,p9 = f8, f0    // Test for x<0
+          shl           pow_GR_offset       = pow_GR_sig_X, 1
 }
 { .mib
-          ldfe          POW_P4          = [pow_AD_Q], 16
-          sub       pow_GR_true_exp_X   = pow_GR_exp_X, pow_GR_16ones
-(p13)     br.cond.spnt POW_X_DENORM
+          ldfe          POW_P4         = [pow_AD_Q], 16
+          sub       pow_GR_true_exp_X       = pow_GR_exp_X, pow_GR_16ones
+(p13)     br.cond.spnt L(POW_X_DENORM)
 }
 ;;
 
+
 // Continue normal and denormal paths here
-POW_COMMON:
+L(POW_COMMON):
 // p11 = TRUE ==> Y is a NAN
 { .mfi
-          ldfe          POW_P3          = [pow_AD_P], 16
-          fclass.m  p11,p0              = f9, 0xc3
-          nop.i 999
+          ldfe          POW_P3         = [pow_AD_P], 16
+          fclass.m.unc  p11,p0         = f9, 0xc3
+          shr.u     pow_GR_offset           = pow_GR_offset,56
 }
 { .mfi
-          ldfe          POW_P2          = [pow_AD_Q], 16
+          ldfe          POW_P2         = [pow_AD_Q], 16
           nop.f 999
-          mov pow_GR_y_zero = 0
+          nop.i 999
 }
 ;;
 
-// Note POW_Xm1 and POW_r1 are used interchangably
+
+
+// Compute xsq to decide later if |x|=1
+// p11 = TRUE ==> Y is a NaN
 { .mfi
-          alloc         r32=ar.pfs,2,19,4,0
-          fms.s1        POW_r           = POW_B, POW_NORM_X,f1
-          nop.i 999
+          setf.sig POW_int_K                = pow_GR_true_exp_X
+(p15)     fms.s1        POW_r          = POW_B, POW_NORM_X,f1
+          shladd pow_AD_Tt = pow_GR_offset, 4, pow_AD_Tt
 }
 { .mfi
-          setf.sig POW_int_K            = pow_GR_true_exp_X
-(p8)      fnma.s1        POW_Xm1        = POW_Xp1,f1,f0
+          nop.m 999
+(p8)      fnma.s1        POW_Xm1       = POW_Xp1,f1,f0
           nop.i 999
 }
 ;;
 
-// p12 = TRUE if Y is ZERO
-// Compute xsq to decide later if |x|=1
+
+
+// p12 = TRUE ==> X is ZERO and Y is ZERO
 { .mfi
-          ldfe          POW_P1          = [pow_AD_P], 16
-          fclass.m      p12,p0          = f9, 0x07
-          shl           pow_GR_offset   = pow_GR_sig_X, 1
+          ldfe          POW_P1         = [pow_AD_P], 16
+(p14)     fclass.m.unc  p12,p0              = f9, 0x07
+          nop.i 999
 }
 { .mfb
-          ldfe          POW_P0          = [pow_AD_Q], 16
+          ldfe          POW_P0         = [pow_AD_Q], 16
           fma.s1        POW_xsq = POW_NORM_X, POW_NORM_X, f0
-(p11)     br.cond.spnt  POW_Y_NAN       // Branch if y=nan
+(p11)     br.cond.spnt   L(POW_Y_NAN)
 }
 ;;
 
+
+.pred.rel "mutex",p8,p9
 // Get exponent of |x|-1 to use in comparison to 2^-8
-{ .mfi
-          getf.exp  pow_GR_signexp_Xm1  = POW_Xm1
-          fcvt.fx.s1   POW_int_Y        = POW_NORM_Y
-          shr.u     pow_GR_offset       = pow_GR_offset,56
+{ .mmf
+(p8)      getf.exp      pow_GR_signexp_Xm1  = POW_Xp1
+(p9)      getf.exp      pow_GR_signexp_Xm1  = POW_Xm1
+          fcvt.fx.s1   POW_int_Y            = POW_NORM_Y
 }
 ;;
 
+
 // p11 = TRUE ==> X is a NAN
 { .mfi
           ldfpd         POW_log2_hi, POW_log2_lo  = [pow_AD_Q], 16
-          fclass.m      p11,p0          = f8, 0xc3
-          shladd pow_AD_Tt = pow_GR_offset, 4, pow_AD_Tt
+          fclass.m.unc  p11,p0              = f8, 0xc3
+          nop.i 999
 }
-{ .mfi
-          ldfe          POW_inv_log2_by_128 = [pow_AD_P], 16
-          fma.s1 POW_delta              = f0,f0,f0 // delta=0 in case |x| near 1
-(p12)     mov pow_GR_y_zero = 1
+{ .mib
+          ldfpd  POW_T, POW_Tt              = [pow_AD_Tt], 16
+          nop.i 999
+(p12)     br.cond.spnt L(POW_X_0_Y_0)
 }
 ;;
 
+
+// p14 = TRUE ==> X is zero
+//    p15 = TRUE ==> X is zero AND Y is negative
+//    p10 = TRUE ==> X is zero AND Y is >= zero 
 { .mfi
-          ldfpd  POW_Q2, POW_Q3         = [pow_AD_P], 16
-          fma.s1 POW_G                  = f0,f0,f0  // G=0 in case |x| near 1
-          and       pow_GR_exp_Xm1      = pow_GR_signexp_Xm1, pow_GR_17ones
+          ldfe          POW_inv_log2_by_128 = [pow_AD_P], 16
+(p14)     fcmp.lt.unc.s1 p15, p10           = f9,f0
+          nop.i 999
 }
+{ .mfi
+          nop.m 999
+          nop.f 999
+          and       pow_GR_exp_Xm1          = pow_GR_signexp_Xm1, pow_GR_17ones
+} 
 ;;
 
+
 // Determine if we will use the |x| near 1 path (p6) or normal path (p7)
+// p12 = TRUE ==> X is a NAN and Y is a zero
+// p13 = TRUE ==> X is a NAN and Y is anything else
 { .mfi
-          getf.exp  pow_GR_signexp_Y    = POW_NORM_Y
-          nop.f 999
-          cmp.lt p6,p7                  = pow_GR_exp_Xm1, pow_GR_exp_2tom8
-}
-{ .mfb
-          ldfpd  POW_T, POW_Tt          = [pow_AD_Tt], 16
-          fma.s1        POW_rsq         = POW_r, POW_r,f0
-(p11)     br.cond.spnt  POW_X_NAN       // Branch if x=nan and y not nan
+          getf.exp  pow_GR_signexp_Y        = POW_NORM_Y 
+(p11)     fclass.m.unc  p12,p13             = f9, 0x07
+          cmp.lt.unc p6,p7                  = pow_GR_exp_Xm1, pow_GR_FFF7
 }
+{ .mfi
+          ldfpd  POW_Q2, POW_Q3             = [pow_AD_P], 16
+          fma.s1        POW_rsq             = POW_r, POW_r,f0
+          nop.i 999
 ;;
+}
 
 // If on the x near 1 path, assign r1 to r and r1*r1 to rsq
 { .mfi
-          ldfpd  POW_Q0_half, POW_Q1    = [pow_AD_P], 16
-(p6)      fma.s1    POW_r               = POW_r1, f1, f0
+          ldfpd  POW_Q0_half, POW_Q1             = [pow_AD_P], 16
+(p6)      fma.s1    POW_r                 = POW_r1, f1, f0
+          nop.i 999
+}
+{ .mfi
+          nop.m 999
+(p6)      fma.s1    POW_rsq                 = POW_r1, POW_r1, f0
           nop.i 999
+;;
+}
+
+
+{ .mfi
+          ldfpd   POW_Q4, POW_RSHF          = [pow_AD_P], 16
+(p7)      fma.s1 POW_v6                     = POW_r,  POW_P5, POW_P4
+          and pow_GR_exp_Y                   = pow_GR_signexp_Y, pow_GR_17ones
 }
 { .mfb
           nop.m 999
-(p6)      fma.s1    POW_rsq             = POW_r1, POW_r1, f0
-(p14)     br.cond.spnt POW_X_0          // Branch if x zero and y not nan
+(p6)      fma.s1 POW_v6                     = POW_r1, POW_P5, POW_P4
+(p12)     br.cond.spnt L(POW_X_NAN_Y_0)
 }
 ;;
 
+
 { .mfi
-          ldfpd   POW_Q4, POW_RSHF      = [pow_AD_P], 16
-(p7)      fma.s1 POW_v6                 = POW_r,  POW_P5, POW_P4
-          nop.i 999
+          nop.m 999
+(p7)      fma.s1 POW_v4                     = POW_P3, POW_r,  POW_P2 
+          andcm pow_GR_sign_Y               = pow_GR_signexp_Y, pow_GR_17ones
 }
-{ .mfi
+{ .mfb
           nop.m 999
-(p6)      fma.s1 POW_v6                 = POW_r1, POW_P5, POW_P4
-          nop.i 999
+(p6)      fma.s1 POW_v4                     = POW_P3, POW_r1, POW_P2 
+(p12)     br.cond.spnt L(POW_X_NAN_Y_0)
 }
 ;;
 
 { .mfi
           nop.m 999
-(p7)      fma.s1 POW_v4                 = POW_P3, POW_r,  POW_P2
+          fcvt.xf POW_K                     = POW_int_K
           nop.i 999
 }
-{ .mfi
+{ .mfb
           nop.m 999
-(p6)      fma.s1 POW_v4                 = POW_P3, POW_r1, POW_P2
-          nop.i 999
+(p13)     fma.s f8                           = f8,f1,f0
+(p13)     br.ret.spnt  b0    // Exit if x nan, y anything but zero
 }
 ;;
-
+          
+// p10 = TRUE ==> X is zero  AND Y is positive
+//  p8  = TRUE ==> X is zero  AND Y is outside integer range (treat as even int)
+//                   return +0
+//  p9  = TRUE ==> X is zero  AND Y is within integer range (may not be integer) 
+{ .mfi
+(p10)     cmp.gt.unc p8,p9                  =  pow_GR_exp_Y, pow_GR_10033
+(p6)      fmerge.s POW_delta                 = f0,f0
+          nop.i 999
+}
 { .mfi
           nop.m 999
-          fcvt.xf POW_K                 = POW_int_K
+(p6)      fma.s1 POW_G                       = f0,f0,f0
           nop.i 999
 }
 ;;
 
 { .mfi
-          getf.sig pow_GR_sig_int_Y     = POW_int_Y
-          fnma.s1 POW_twoV              = POW_NORM_Y, POW_rsq,f0
-          and pow_GR_exp_Y              = pow_GR_signexp_Y, pow_GR_17ones
+          getf.sig pow_GR_sig_int_Y         = POW_int_Y
+          fnma.s1 POW_twoV                   = POW_NORM_Y, POW_rsq,f0
+          nop.i 999
 }
-{ .mfb
-          andcm pow_GR_sign_Y           = pow_GR_signexp_Y, pow_GR_17ones
-          fma.s1 POW_U                  = POW_NORM_Y,POW_r,f0
-(p12)     br.cond.spnt POW_Y_0   // Branch if y=zero, x not zero or nan
+{ .mfi
+          nop.m 999
+          fma.s1 POW_U                      = POW_NORM_Y,POW_r,f0
+          nop.i 999
 }
 ;;
 
-// p11 = TRUE ==> X is NEGATIVE but not inf
 { .mfi
-          ldfe      POW_log2_by_128_lo  = [pow_AD_P], 16
-          fclass.m  p11,p0              = f8, 0x1a
+          ldfe      POW_log2_by_128_lo      = [pow_AD_P], 16
+(p6)      fma.s1 POW_v2                     = POW_P1, POW_r1, POW_P0 
           nop.i 999
 }
 { .mfi
-          ldfe      POW_log2_by_128_hi  = [pow_AD_Q], 16
-          fma.s1 POW_v2                 = POW_P1, POW_r,  POW_P0
+          ldfe          POW_log2_by_128_hi  = [pow_AD_Q], 16
+(p7)      fma.s1 POW_v2                     = POW_P1, POW_r,  POW_P0 
           nop.i 999
 }
 ;;
 
+
 { .mfi
           nop.m 999
-          fcvt.xf   POW_float_int_Y     = POW_int_Y
+          fcvt.xf   POW_float_int_Y               = POW_int_Y
           nop.i 999
 }
 { .mfi
           nop.m 999
-          fma.s1 POW_v3                 = POW_v6, POW_rsq,  POW_v4
-          adds          pow_AD_tbl1     = pow_tbl1 - pow_Tt,  pow_AD_Q
+          fma.s1 POW_v3                     = POW_v6, POW_rsq,  POW_v4 
+          adds          pow_AD_tbl1       = pow_tbl1 - pow_Tt,  pow_AD_Q
 }
 ;;
 
 { .mfi
           nop.m 999
-(p7)      fma.s1 POW_delta              = POW_K, POW_log2_lo, POW_Tt
+(p7)      fma.s1 POW_delta                  = POW_K, POW_log2_lo, POW_Tt
           nop.i 999
 }
 { .mfi
           nop.m 999
-(p7)      fma.s1 POW_G                  = POW_K, POW_log2_hi, POW_T
-          adds pow_AD_tbl2              = pow_tbl2 - pow_tbl1,  pow_AD_tbl1
+(p7)      fma.s1 POW_G                      = POW_K, POW_log2_hi, POW_T 
+          adds pow_AD_tbl2                  = pow_tbl2 - pow_tbl1,  pow_AD_tbl1
 }
 ;;
 
+
 { .mfi
           nop.m 999
-          fms.s1 POW_e2                 = POW_NORM_Y, POW_r, POW_U
+          fms.s1 POW_e2                     = POW_NORM_Y, POW_r, POW_U
           nop.i 999
 }
 { .mfi
           nop.m 999
-          fma.s1 POW_Z2                 = POW_twoV, POW_Q0_half, POW_U
+          fma.s1 POW_Z2                     = POW_twoV, POW_Q0_half, POW_U
           nop.i 999
 }
 ;;
 
+// p11 = TRUE ==> X is NEGATIVE 
+// p8  = TRUE ==> X is zero  AND Y is outside intger range (treat as even int)
+//                return +0
 { .mfi
           nop.m 999
-          fma.s1 POW_Yrcub              = POW_rsq, POW_U, f0
+          fclass.m.unc  p11,p0              = f8, 0x1a
           nop.i 999
 }
-{ .mfi
+{ .mfb
+          nop.m 999
+(p8)      fma.s f8                          = f0,f0,f0
+(p8)      br.ret.spnt b0
+}
+;;
+
+{ .mfi 
           nop.m 999
-          fma.s1 POW_p                  = POW_rsq, POW_v3, POW_v2
+          fma.s1 POW_Yrcub                 = POW_rsq, POW_U, f0
+          nop.i 999
+}
+{ .mfi 
+          nop.m 999
+          fma.s1 POW_p                      = POW_rsq, POW_v3, POW_v2
           nop.i 999
 }
 ;;
 
-// p11 = TRUE ==> X is NEGATIVE but not inf
-//    p12 = TRUE ==> X is NEGATIVE  AND  Y  already even int
+
+// p11 = TRUE ==> X is NEGATIVE
+//    p12 = TRUE ==> X is NEGATIVE  AND  Y  already int
 //    p13 = TRUE ==> X is NEGATIVE  AND  Y possible int
 { .mfi
           nop.m 999
-          fma.s1 POW_Z1                 = POW_NORM_Y, POW_G, f0
-(p11)     cmp.gt.unc  p12,p13           = pow_GR_exp_Y, pow_GR_10033
+          fma.s1 POW_Z1                     = POW_NORM_Y, POW_G, f0
+(p11)     cmp.ge.unc  p12,p13                = pow_GR_exp_Y, pow_GR_10033
 }
 { .mfi
           nop.m 999
-          fma.s1 POW_Gpr                = POW_G, f1, POW_r
+          fma.s1 POW_e3                     = POW_NORM_Y, POW_delta, f0
+          nop.i 999
+}
+;;
+
+// p9  = TRUE ==> X is zero  AND Y is within integer range (may not be integer)
+//    p6 = TRUE ==>  X is zero  AND  Y is an integer (may be even or odd)
+//    p7 = TRUE ==>  X is zero  AND  Y is NOT an integer, return +0
+{ .mfi
+          nop.m 999
+(p9)      fcmp.eq.unc.s1 p6,p7             = POW_float_int_Y,  POW_NORM_Y
+          nop.i 999
+}
+{ .mfi 
+          nop.m 999
+          fma.s1 POW_Gpr                    = POW_G, f1, POW_r
           nop.i 999
 }
 ;;
@@ -1187,14 +1266,24 @@ POW_COMMON:
 }
 { .mfi
           nop.m 999
-          fms.s1 POW_UmZ2               = POW_U, f1, POW_Z2
+          fms.s1 POW_UmZ2                   = POW_U, f1, POW_Z2
           nop.i 999
 }
 ;;
 
+
+// If x=0 and y>0, test y and flag denormal
+// p6  = TRUE ==>  X is zero  AND  Y is an integer (may be even or odd)
+//    p8 = TRUE ==>  X is zero  AND  Y is an odd  integer
+//    p9 = TRUE ==>  X is zero  AND  Y is an even integer
+{ .mfi
+          nop.m 999
+(p10)     fcmp.eq.s0 p15,p0 = f9,f0
+(p6)      tbit.nz.unc  p8,p9                = pow_GR_sig_int_Y,0
+}
 { .mfi
           nop.m 999
-          fma.s1 POW_Z3                 = POW_p, POW_Yrcub, f0
+          fma.s1 POW_Z3                      = POW_p, POW_Yrcub, f0
           nop.i 999
 }
 ;;
@@ -1202,7 +1291,7 @@ POW_COMMON:
 // By adding RSHF (1.1000...*2^63) we put integer part in rightmost significand
 { .mfi
           nop.m 999
-          fms.s1 POW_e1                 = POW_NORM_Y, POW_G, POW_Z1
+          fms.s1 POW_e1                     = POW_NORM_Y, POW_G, POW_Z1
           nop.i 999
 }
 { .mfi
@@ -1212,60 +1301,81 @@ POW_COMMON:
 }
 ;;
 
-// p13 = TRUE ==> X is NEGATIVE  AND  Y possible int
-//     p10 = TRUE ==> X is NEG and Y is an int
-//     p12 = TRUE ==> X is NEG and Y is not an int
 { .mfi
           nop.m 999
-(p13)     fcmp.eq.unc.s1 p10,p12        = POW_float_int_Y,  POW_NORM_Y
-          mov pow_GR_xneg_yodd = 0
+(p7)      fma.s f8  = f0,f0,f0  // Result +0 if x zero and y not integer
+          nop.i 999
 }
-{ .mfi
+{ .mfb
           nop.m 999
-          fma.s1 POW_Y_Gpr              = POW_NORM_Y, POW_Gpr, f0
-          nop.i 999
+          fma.s1 POW_Y_Gpr                  = POW_NORM_Y, POW_Gpr, f0
+(p8)      br.ret.spnt b0        // Exit if x zero and y odd integer
 }
 ;;
 
 // By subtracting RSHF we get rounded integer POW_N2float
+// p15 = TRUE ==> X_0_Y_NEG
 { .mfi
           nop.m 999
           fms.s1 POW_N2float  = POW_W2, f1, POW_RSHF
           nop.i 999
 }
-{ .mfi
+{ .mfb
           nop.m 999
-          fma.s1 POW_UmZ2pV             = POW_twoV,POW_Q0_half,POW_UmZ2
-          nop.i 999
+          fma.s1 POW_UmZ2pV                 = POW_twoV,POW_Q0_half,POW_UmZ2
+(p15)     br.cond.spnt L(POW_X_0_Y_NEG)
 }
 ;;
 
+
+
 { .mfi
           nop.m 999
-          fma.s1 POW_Z3sq               = POW_Z3, POW_Z3, f0
+          fma.s1 POW_Z3sq                   = POW_Z3, POW_Z3, f0
           nop.i 999
 }
-{ .mfi
+{ .mfb
           nop.m 999
-          fma.s1 POW_v4                 = POW_Z3, POW_Q3, POW_Q2
-          nop.i 999
+          fma.s1 POW_v4                     = POW_Z3, POW_Q3, POW_Q2
+(p7)      br.ret.spnt b0     // Exit if x zero and y not an integer
 }
 ;;
 
+
+
 // Extract rounded integer from rightmost significand of POW_W2
 // By subtracting RSHF we get rounded integer POW_N1float
 { .mfi
-          getf.sig pow_GR_int_W2        = POW_W2
+          getf.sig pow_GR_int_W2             = POW_W2
           fms.s1 POW_N1float  = POW_W1, f1, POW_RSHF
           nop.i 999
 }
 { .mfi
           nop.m 999
-          fma.s1 POW_v2                 = POW_Z3, POW_Q1, POW_Q0_half
+          fma.s1 POW_v2                     = POW_Z3, POW_Q1, POW_Q0_half
+          nop.i 999
+}
+;;
+
+
+
+
+// p13 = TRUE ==> X is NEGATIVE  AND  Y possible int
+//     p10 = TRUE ==> X is NEG and Y is an int
+//     p12 = TRUE ==> X is NEG and Y is not an int
+{ .mfi
+          nop.m 999
+(p13)     fcmp.eq.unc.s1 p10,p12             = POW_float_int_Y,  POW_NORM_Y
           nop.i 999
 }
+{ .mfb
+          nop.m 999
+(p9)      fma.s f8  = f0,f0,f0   // Result +0 if x zero and y even integer
+(p9)      br.ret.spnt b0    // Exit if x zero and y even integer
+}
 ;;
 
+
 { .mfi
           nop.m 999
           fnma.s1 POW_s2 = POW_N2float, POW_log2_by_128_hi, POW_Z2
@@ -1273,7 +1383,7 @@ POW_COMMON:
 }
 { .mfi
           nop.m 999
-          fma.s1 POW_e2                 = POW_e2,f1,POW_UmZ2pV
+          fma.s1 POW_e2                     = POW_e2,f1,POW_UmZ2pV
           nop.i 999
 }
 ;;
@@ -1281,250 +1391,278 @@ POW_COMMON:
 // Extract rounded integer from rightmost significand of POW_W1
 // Test if x inf
 { .mfi
-          getf.sig pow_GR_int_W1        = POW_W1
-          fclass.m p15,p0 = POW_NORM_X,  0x23
+          getf.sig pow_GR_int_W1             = POW_W1
+          fclass.m.unc p15,p0 = POW_NORM_X,  0x23
           nop.i 999
 }
 { .mfb
           nop.m 999
           fnma.s1 POW_f2  = POW_N2float, POW_log2_by_128_lo, f1
-(p12)     br.cond.spnt POW_X_NEG_Y_NONINT  // Branch if x neg, y not integer
+(p12)     br.cond.spnt L(POW_X_NEG_Y_NONINT)  // Branch if x neg, y not integer
 }
 ;;
 
-// p11 = TRUE ==> X is +1.0
 // p12 = TRUE ==> X is NEGATIVE  AND Y is an odd integer
 { .mfi
-          getf.exp pow_GR_signexp_Y_Gpr = POW_Y_Gpr
-          fcmp.eq.s1 p11,p0 = POW_NORM_X, f1
-(p10)     tbit.nz.unc  p12,p0           = pow_GR_sig_int_Y,0
-}
-{ .mfi
-          nop.m 999
-          fma.s1 POW_v3                 = POW_Z3sq, POW_Q4, POW_v4
-          nop.i 999
+          getf.exp pow_GR_signexp_Y_Gpr       = POW_Y_Gpr
+          fma.s1 POW_v3                     = POW_Z3sq, POW_Q4, POW_v4
+(p10)     tbit.nz.unc  p12,p0                = pow_GR_sig_int_Y,0
 }
 ;;
 
+
 { .mfi
-          nop.m 999
+          add pow_GR_int_N                   = pow_GR_int_W1, pow_GR_int_W2
           fnma.s1 POW_f1  = POW_N1float, POW_log2_by_128_lo, f1
           nop.i 999
 }
 { .mfb
           nop.m 999
           fnma.s1 POW_s1  = POW_N1float, POW_log2_by_128_hi, POW_Z1
-(p15)     br.cond.spnt POW_X_INF
+(p15)     br.cond.spnt L(POW_X_INF)
 }
 ;;
 
+
 // Test x and y and flag denormal
 { .mfi
-          nop.m 999
+          and pow_GR_index1                  = 0x0f, pow_GR_int_N
           fcmp.eq.s0 p15,p0 = f8,f9
-          nop.i 999
+          shr r2                             = pow_GR_int_N, 7
 }
 { .mfi
-          nop.m 999
-          fma.s1 POW_e3                 = POW_NORM_Y, POW_delta, f0
-          nop.i 999
+          and pow_GR_exp_Y_Gpr               = pow_GR_signexp_Y_Gpr, pow_GR_17ones
+          nop.f 999
+          and pow_GR_index2                  = 0x70, pow_GR_int_N
 }
 ;;
 
+
+
 { .mfi
-          nop.m 999
+          shladd pow_AD_T1                   = pow_GR_index1, 4, pow_AD_tbl1
           fcmp.eq.s1 p7,p0 = POW_NORM_Y, f1  // Test for y=1.0
-          nop.i 999
+          sub pow_GR_true_exp_Y_Gpr          = pow_GR_exp_Y_Gpr, pow_GR_16ones
 }
 { .mfi
-          nop.m 999
-          fma.s1  POW_e12               = POW_e1,f1,POW_e2
-          nop.i 999
+          addl pow_int_GR_M                  = 0xFFFF, r2
+          fma.s1  POW_e12                     = POW_e1,f1,POW_e2
+          add pow_AD_T2                      = pow_AD_tbl2, pow_GR_index2
 }
 ;;
 
-{ .mfi
-          add pow_GR_int_N              = pow_GR_int_W1, pow_GR_int_W2
-(p11)     fma.s.s0 f8 = f1,f1,f0    // If x=1, result is +1
-          nop.i 999
-}
-{ .mib
-(p12)     mov pow_GR_xneg_yodd = 1
-          nop.i 999
-(p11)     br.ret.spnt b0            // Early exit if x=1.0, result is +1
+
+{ .mmi
+          ldfe POW_T1                        = [pow_AD_T1],16
+          setf.exp POW_2M                    = pow_int_GR_M
+          andcm pow_GR_sign_Y_Gpr            = pow_GR_signexp_Y_Gpr, pow_GR_17ones
 }
 ;;
 
-{ .mfi
-          and pow_GR_index1             = 0x0f, pow_GR_int_N
-          fma.s1 POW_q                  = POW_Z3sq, POW_v3, POW_v2
-          shr pow_int_GR_M              = pow_GR_int_N, 7    // M = N/128
-}
-{ .mib
-          and pow_GR_index2             = 0x70, pow_GR_int_N
-          nop.i 999
+
+{ .mfb
+          ldfe POW_T2                        = [pow_AD_T2],16
+          fma.s1 POW_q                       = POW_Z3sq, POW_v3, POW_v2
 (p7)      br.ret.spnt b0        // Early exit if y=1.0, result is x
 }
 ;;
 
+
+// double: p8 TRUE ==> |Y(G + r)| >= 10
+// single: p8 TRUE ==> |Y(G + r)| >= 7
+
+// double
+//     -2^10  -2^9             2^9   2^10
+// -----+-----+----+ ... +-----+-----+-----
+//  p8  |             p9             |  p8
+//      |     |       p10      |     |  
+// single
+//     -2^7   -2^6             2^6   2^7
+// -----+-----+----+ ... +-----+-----+-----
+//  p8  |             p9             |  p8
+//      |     |       p10      |     |
+
+
 { .mfi
-          shladd pow_AD_T1              = pow_GR_index1, 4, pow_AD_tbl1
-          fma.s1 POW_s                  = POW_s1, f1, POW_s2
-          add pow_int_GR_M              = pow_GR_16ones, pow_int_GR_M
+(p0)      cmp.le.unc p8,p9                   = 7, pow_GR_true_exp_Y_Gpr
+          fma.s1 POW_s                       = POW_s1, f1, POW_s2
+          nop.i 999
 }
 { .mfi
-          add pow_AD_T2                 = pow_AD_tbl2, pow_GR_index2
-          fma.s1 POW_f12                = POW_f1, POW_f2,f0
+          nop.m 999
+          fma.s1 POW_f12                     = POW_f1, POW_f2,f0
           nop.i 999
 }
 ;;
 
-{ .mmf
-          ldfe POW_T1                   = [pow_AD_T1]
-          ldfe POW_T2                   = [pow_AD_T2]
-          nop.f 999
-}
-;;
 
 { .mfi
-          setf.exp POW_2M               = pow_int_GR_M
-          fma.s1 POW_e123               = POW_e12, f1, POW_e3
-          and pow_GR_exp_Y_Gpr          = pow_GR_signexp_Y_Gpr, pow_GR_17ones
+          nop.f 999
+(p9)      cmp.le.unc p0,p10                  = 6, pow_GR_true_exp_Y_Gpr
 }
 ;;
 
-{ .mfi
+
+
+{ .mfb
           nop.m 999
-          fma.s1 POW_q                  = POW_Z3sq, POW_q, POW_Z3
-          sub pow_GR_true_exp_Y_Gpr     = pow_GR_exp_Y_Gpr, pow_GR_16ones
+          fma.s1 POW_e123                    = POW_e12, f1, POW_e3
+(p8)      br.cond.spnt L(POW_OVER_UNDER_X_NOT_INF)
 }
 ;;
 
-// p8 TRUE ==> |Y(G + r)| >= 7
 
-// single
-//     -2^7   -2^6             2^6   2^7
-// -----+-----+----+ ... +-----+-----+-----
-//  p8  |             p9             |  p8
-//      |     |       p10      |     |
+{ .mmf
+          fma.s1 POW_q                       = POW_Z3sq, POW_q, POW_Z3
+}
+;;
+
 
-// Form signexp of constants to indicate overflow
 { .mfi
-          mov         pow_GR_big_pos    = 0x1007f
-          fma.s1 POW_ssq                = POW_s, POW_s, f0
-          cmp.le p8,p9                  = 7, pow_GR_true_exp_Y_Gpr
+          nop.m 999 
+          fma.s1 POW_ssq                     = POW_s, POW_s, f0
+          nop.i 999
 }
 { .mfi
-          mov         pow_GR_big_neg    = 0x3007f
-          fma.s1 POW_v4                 = POW_s, POW_Q3, POW_Q2
-          andcm pow_GR_sign_Y_Gpr       = pow_GR_signexp_Y_Gpr, pow_GR_17ones
+          nop.m 999 
+          fma.s1 POW_v4                      = POW_s, POW_Q3, POW_Q2
+          nop.i 999
 }
 ;;
 
-// Form big positive and negative constants to test for possible overflow
 { .mfi
-          setf.exp POW_big_pos          = pow_GR_big_pos
-          fma.s1 POW_v2                 = POW_s, POW_Q1, POW_Q0_half
-(p9)      cmp.le.unc p0,p10             = 6, pow_GR_true_exp_Y_Gpr
+          nop.m 999
+          fma.s1 POW_v2                      = POW_s, POW_Q1, POW_Q0_half
+          nop.i 999
 }
-{ .mfb
-          setf.exp POW_big_neg          = pow_GR_big_neg
-          fma.s1 POW_1ps                = f1,f1,POW_s
-(p8)      br.cond.spnt POW_OVER_UNDER_X_NOT_INF
+{ .mfi
+          nop.m 999
+          fma.s1 POW_1ps                     = f1,f1,POW_s
+          nop.i 999
 }
 ;;
 
-// f123 = f12*(e123+1) = f12*e123+f12
 { .mfi
           nop.m 999
-          fma.s1 POW_f123               = POW_e123,POW_f12,POW_f12
+          fma.s1 POW_f3                      = POW_e123,f1,f1
           nop.i 999
 }
 ;;
 
 { .mfi
           nop.m 999
-          fma.s1 POW_T1T2               = POW_T1, POW_T2, f0
+          fma.s1 POW_T1T2                    = POW_T1, POW_T2, f0
           nop.i 999
 }
+;;
+
 { .mfi
           nop.m 999
-          fma.s1 POW_v3                 = POW_ssq, POW_Q4, POW_v4
-          cmp.ne p12,p13 = pow_GR_xneg_yodd, r0
+          fma.s1 POW_v3                     = POW_ssq, POW_Q4, POW_v4
+          nop.i 999
 }
 ;;
 
 { .mfi
           nop.m 999
-          fma.s1 POW_2Mqp1              = POW_2M, POW_q, POW_2M
+          fma.s1 POW_v21ps                  = POW_ssq, POW_v2, POW_1ps
+          nop.i 999
+}
+{ .mfi
+          nop.m 999
+          fma.s1 POW_s4                     = POW_ssq, POW_ssq, f0
           nop.i 999
 }
 ;;
 
 { .mfi
           nop.m 999
-          fma.s1 POW_v21ps              = POW_ssq, POW_v2, POW_1ps
+          fma.s1 POW_f123                    = POW_f12, POW_f3, f0
           nop.i 999
 }
+;;
+
 { .mfi
           nop.m 999
-          fma.s1 POW_s4                 = POW_ssq, POW_ssq, f0
+          fma.s1 POW_A                      =  POW_2M, POW_T1T2, f0
           nop.i 999
 }
 ;;
 
+
+
 { .mfi
           nop.m 999
-(p12)     fnma.s1 POW_A                 =  POW_T1T2, POW_f123, f0
+(p12)     fmerge.s POW_f123 = f8,POW_f123  // if x neg, y odd int
           nop.i 999
 }
 { .mfi
           nop.m 999
-(p13)     fma.s1 POW_A                  =  POW_T1T2, POW_f123, f0
+//          fma.s1 POW_es                     = POW_ssq,  POW_v3, POW_v2
           nop.i 999
 }
 ;;
 
 { .mfi
           nop.m 999
-          fma.s1 POW_es                 = POW_s4,  POW_v3, POW_v21ps
+          fma.s1 POW_es                     = POW_s4,  POW_v3, POW_v21ps
           nop.i 999
 }
+;;
+
+
 { .mfi
           nop.m 999
-          fma.s1 POW_A                  = POW_A, POW_2Mqp1, f0
+          fma.s1 POW_A                      = POW_A, POW_f123, f0
+          nop.i 999
+}
+{ .mfi
+          nop.m 999
+//          fma.s1 POW_es                     = POW_es, POW_ssq, POW_1ps
           nop.i 999
 }
 ;;
 
-// Dummy op to set inexact
+
 { .mfi
           nop.m 999
-          fma.s0 POW_tmp                = POW_2M, POW_q, POW_2M
+          fma.s1 POW_A                      = POW_A, POW_es,f0
           nop.i 999
 }
 ;;
 
+
+
 { .mfb
           nop.m 999
-          fma.s.s0 f8                   = POW_A, POW_es, f0
-(p10)     br.ret.sptk     b0            // Exit main branch if no over/underflow
+(p10)     fma.s f8                          = POW_A, POW_q, POW_A
+(p10)     br.ret.sptk     b0
 }
 ;;
 
+
+
+
+
 // POSSIBLE_OVER_UNDER
-// p6 = TRUE ==> Y_Gpr negative
-// Result is already computed.  We just need to know if over/underflow occurred.
+// p6 = TRUE ==> Y negative
 
-{ .mfb
-        cmp.eq p0,p6                    = pow_GR_sign_Y_Gpr, r0
-        nop.f 999
-(p6)    br.cond.spnt POW_POSSIBLE_UNDER
+{ .mfi
+        nop.m 999
+        fmerge.s POW_abs_A                = f0, POW_A
+        cmp.eq.unc p0,p6                  = pow_GR_sign_Y, r0
+}
+;;
+
+{ .mib
+        nop.m 999
+        nop.i 999
+(p6)    br.cond.spnt L(POW_POSSIBLE_UNDER) 
 }
 ;;
 
 // POSSIBLE_OVER
-// We got an answer.
+// We got an answer. 
 // overflow is a possibility, not a certainty
 
 
@@ -1554,20 +1692,21 @@ POW_COMMON:
 //                  RN         RN
 //                             RZ
 
+
 // Put in s2 (td set, wre set)
 { .mfi
-        nop.m 999
+        mov           pow_GR_gt_ln                 = 0x1007f 
         fsetc.s2 0x7F,0x42
-        nop.i 999
+        nop.i 999 
 }
 ;;
 
+
 { .mfi
-        nop.m 999
-        fma.s.s2 POW_wre_urm_f8         = POW_A, POW_es, f0
-        nop.i 999
+        setf.exp POW_gt_pln                        = pow_GR_gt_ln
+        fma.s.s2 POW_wre_urm_f8                    = POW_abs_A, POW_q, POW_abs_A
+        nop.i 999 ;;
 }
-;;
 
 // Return s2 to default
 { .mfi
@@ -1577,30 +1716,31 @@ POW_COMMON:
 }
 ;;
 
+
 // p7 = TRUE ==> yes, we have an overflow
 { .mfi
         nop.m 999
-        fcmp.ge.s1 p7, p8               =  POW_wre_urm_f8, POW_big_pos
+        fcmp.ge.unc.s1 p7, p0                    =  POW_wre_urm_f8, POW_gt_pln
         nop.i 999
 }
 ;;
 
-{ .mfi
-        nop.m 999
-(p8)    fcmp.le.s1 p7, p0               =  POW_wre_urm_f8, POW_big_neg
-        nop.i 999
-}
-;;
 
-{ .mbb
-(p7)   mov pow_GR_tag                   = 30
-(p7)   br.cond.spnt __libm_error_region // Branch if overflow
-       br.ret.sptk     b0               // Exit if did not overflow
+
+{ .mfb
+(p7)   mov pow_GR_tag                            = 30
+       fma.s f8                                  = POW_A, POW_q, POW_A
+(p7)   br.cond.spnt __libm_error_region 
+}
+{ .mfb
+       nop.m 999
+       nop.f 999
+(p0)   br.ret.sptk     b0 
 }
 ;;
 
 
-POW_POSSIBLE_UNDER:
+L(POW_POSSIBLE_UNDER):
 // We got an answer. input was < -2^9 but > -2^10 (double)
 // We got an answer. input was < -2^6 but > -2^7  (float)
 // underflow is a possibility, not a certainty
@@ -1623,250 +1763,124 @@ POW_POSSIBLE_UNDER:
 //   0.1...11 2^-3ffe                                   (biased, 1)
 //    largest dn                               smallest normal
 
+
 // Put in s2 (td set, ftz set)
 { .mfi
         nop.m 999
         fsetc.s2 0x7F,0x41
-        nop.i 999
+        nop.i 999 
 }
 ;;
 
+
+
 { .mfi
         nop.m 999
-        fma.s.s2 POW_ftz_urm_f8         = POW_A, POW_es, f0
+        fma.s.s2 POW_ftz_urm_f8                    = POW_A, POW_q, POW_A
         nop.i 999
 }
 ;;
 
+
 // Return s2 to default
 { .mfi
         nop.m 999
         fsetc.s2 0x7F,0x40
-        nop.i 999
+        nop.i 999 
 }
 ;;
 
+
 // p7 = TRUE ==> yes, we have an underflow
 { .mfi
         nop.m 999
-        fcmp.eq.s1 p7, p0               =  POW_ftz_urm_f8, f0
-        nop.i 999
+        fcmp.eq.unc.s1 p7, p0                     =  POW_ftz_urm_f8, f0
+        nop.i 999 
 }
 ;;
 
-{ .mbb
-(p7)    mov pow_GR_tag                  = 31
-(p7)    br.cond.spnt __libm_error_region // Branch if underflow
-        br.ret.sptk     b0               // Exit if did not underflow
-}
-;;
 
-POW_X_DENORM:
-// Here if x unorm. Use the NORM_X for getf instructions, and then back
-// to normal path
-{ .mfi
-        getf.exp      pow_GR_signexp_X  = POW_NORM_X
-        nop.f 999
-        nop.i 999
-}
-;;
 
-{ .mmi
-        getf.sig      pow_GR_sig_X      = POW_NORM_X
-;;
-        and           pow_GR_exp_X      = pow_GR_signexp_X, pow_GR_17ones
-        nop.i 999
-}
-;;
-
-{ .mib
-        sub       pow_GR_true_exp_X     = pow_GR_exp_X, pow_GR_16ones
-        nop.i 999
-        br.cond.sptk    POW_COMMON
-}
-;;
-
-POW_X_0:
-// Here if x=0 and y not nan
-//
-// We have the following cases:
-//  p6  x=0  and  y>0 and is an integer (may be even or odd)
-//  p7  x=0  and  y>0 and is NOT an integer, return +0
-//  p8  x=0  and  y>0 and so big as to always be an even integer, return +0
-//  p9  x=0  and  y>0 and may not be integer
-//  p10 x=0  and  y>0 and is an odd  integer, return x
-//  p11 x=0  and  y>0 and is an even integer, return +0
-//  p12 used in dummy fcmp to set denormal flag if y=unorm
-//  p13 x=0  and  y>0
-//  p14 x=0  and  y=0, branch to code for calling error handling
-//  p15 x=0  and  y<0, branch to code for calling error handling
-//
-{ .mfi
-        getf.sig pow_GR_sig_int_Y = POW_int_Y // Get signif of int_Y
-        fcmp.lt.s1 p15,p13 = f9, f0           // Test for y<0
-        and pow_GR_exp_Y = pow_GR_signexp_Y, pow_GR_17ones
-}
-{ .mfb
-        cmp.ne p14,p0 = pow_GR_y_zero,r0      // Test for y=0
-        fcvt.xf   POW_float_int_Y = POW_int_Y
-(p14)   br.cond.spnt POW_X_0_Y_0              // Branch if x=0 and y=0
-}
-;;
 
-// If x=0 and y>0, test y and flag denormal
 { .mfb
-(p13)   cmp.gt.unc p8,p9 = pow_GR_exp_Y, pow_GR_10033 // Test y +big = even int
-(p13)   fcmp.eq.s0 p12,p0 = f9,f0    // If x=0, y>0 dummy op to flag denormal
-(p15)   br.cond.spnt POW_X_0_Y_NEG // Branch if x=0 and y<0
+(p7)    mov pow_GR_tag                           = 31
+        fma.s f8                                 = POW_A, POW_q, POW_A
+(p7)    br.cond.spnt __libm_error_region 
 }
 ;;
 
-// Here if x=0 and y>0
-{ .mfi
-        nop.m 999
-(p9)    fcmp.eq.unc.s1 p6,p7 = POW_float_int_Y,  POW_NORM_Y // Test y=int
-        nop.i 999
-}
-{ .mfi
-        nop.m 999
-(p8)    fma.s.s0 f8 = f0,f0,f0 // If x=0, y>0 and large even int, return +0
-        nop.i 999
-}
-;;
-
-{ .mfi
-        nop.m 999
-(p7)    fma.s.s0 f8  = f0,f0,f0   // Result +0 if x=0 and y>0 and not integer
-(p6)    tbit.nz.unc p10,p11 = pow_GR_sig_int_Y,0 // If y>0 int, test y even/odd
-}
-;;
 
-// Note if x=0, y>0 and odd integer, just return x
 { .mfb
         nop.m 999
-(p11)   fma.s.s0 f8  = f0,f0,f0   // Result +0 if x=0 and y even integer
-        br.ret.sptk b0            // Exit if x=0 and y>0
-}
-;;
-
-POW_X_0_Y_0:
-// When X is +-0 and Y is +-0, IEEE returns 1.0
-// We call error support with this value
-
-{ .mfb
-        mov pow_GR_tag                  = 32
-        fma.s.s0 f8                     = f1,f1,f0
-        br.cond.sptk __libm_error_region
+        nop.f 999
+        br.ret.sptk     b0 
 }
 ;;
 
-POW_X_0_Y_NEG:
-// When X is +-0 and Y is negative, IEEE returns
-// X     Y           answer
-// +0    -odd int    +inf
-// -0    -odd int    -inf
-
-// +0    !-odd int   +inf
-// -0    !-odd int   +inf
-
-// p6 == Y is a floating point number outside the integer.
-//       Hence it is an integer and is even.
-//       return +inf
-
-// p7 == Y is a floating point number within the integer range.
-//      p9  == (int_Y = NORM_Y), Y is an integer, which may be odd or even.
-//           p11 odd
-//              return (sign_of_x)inf
-//           p12 even
-//              return +inf
-//      p10 == Y is not an integer
-//         return +inf
-//
 
+L(POW_X_DENORM):
+// Here if x unorm. Use the NORM_X for getf instructions, and the back
+// to normal path
 { .mfi
-          nop.m 999
-          nop.f 999
-          cmp.gt  p6,p7                 = pow_GR_exp_Y, pow_GR_10033
+        getf.exp      pow_GR_signexp_X    = POW_NORM_X
+        nop.f 999
+        nop.i 999
 }
 ;;
 
 { .mfi
-          mov pow_GR_tag                = 33
-(p7)      fcmp.eq.unc.s1 p9,p10         = POW_float_int_Y,  POW_NORM_Y
-          nop.i 999
-}
-;;
-
-{ .mfb
-          nop.m 999
-(p6)      frcpa.s0 f8,p13               = f1, f0
-(p6)      br.cond.sptk __libm_error_region   // x=0, y<0, y large neg int
+        getf.sig      pow_GR_sig_X        = POW_NORM_X
+        nop.f 999
+        nop.i 999
 }
 ;;
 
-{ .mfb
-          nop.m 999
-(p10)     frcpa.s0 f8,p13               = f1, f0
-(p10)     br.cond.sptk __libm_error_region   // x=0, y<0, y not int
+{ .mfi
+        and           pow_GR_exp_X        = pow_GR_signexp_X, pow_GR_17ones
+        nop.f 999
 }
 ;;
 
-// x=0, y<0, y an int
 { .mib
-          nop.m 999
-(p9)      tbit.nz.unc p11,p12           = pow_GR_sig_int_Y,0
-          nop.b 999
+        sub       pow_GR_true_exp_X       = pow_GR_exp_X, pow_GR_16ones
+        shl           pow_GR_offset       = pow_GR_sig_X, 1
+        br.cond.sptk    L(POW_COMMON)
 }
 ;;
 
-{ .mfi
-          nop.m 999
-(p12)     frcpa.s0 f8,p13               = f1,f0
-          nop.i 999
-}
-;;
+
+L(POW_X_0_Y_0):
+// When X is +-0 and Y is +-0, IEEE returns 1.0 
+// We call error support with this value 
 
 { .mfb
-          nop.m 999
-(p11)     frcpa.s0 f8,p13               = f1,f8
-          br.cond.sptk __libm_error_region
+         mov pow_GR_tag                     = 32
+         fma.s f8                           = f1,f1,f0
+         br.cond.sptk __libm_error_region
 }
 ;;
 
 
-POW_Y_0:
-// Here for y zero, x anything but zero and nan
-// Set flag if x denormal
-// Result is +1.0
-{ .mfi
-        nop.m 999
-        fcmp.eq.s0 p6,p0 = f8,f0    // Sets flag if x denormal
-        nop.i 999
-}
-{ .mfb
-        nop.m 999
-        fma.s.s0 f8 = f1,f1,f0
-        br.ret.sptk b0
-}
-;;
 
 
-POW_X_INF:
-// Here when X is +-inf
+L(POW_X_INF):
+// When X is +-inf and Y is +-, IEEE returns 
 
-// X +inf  Y +inf             +inf
-// X -inf  Y +inf             +inf
+// overflow                       
+// X +inf  Y +inf             +inf  
+// X -inf  Y +inf             +inf 
 
-// X +inf  Y >0               +inf
+// X +inf  Y >0               +inf    
 // X -inf  Y >0, !odd integer +inf     <== (-inf)^0.5 = +inf !!
-// X -inf  Y >0,  odd integer -inf
+// X -inf  Y >0,  odd integer  -inf   
 
-// X +inf  Y -inf             +0
-// X -inf  Y -inf             +0
+// underflow                     
+// X +inf  Y -inf             +0   
+// X -inf  Y -inf             +0  
 
-// X +inf  Y <0               +0
-// X -inf  Y <0, !odd integer +0
-// X -inf  Y <0, odd integer  -0
+// X +inf  Y <0               +0      
+// X -inf  Y <0, !odd integer +0     
+// X -inf  Y <0, odd integer  -0    
 
 // X + inf Y=+0                +1
 // X + inf Y=-0                +1
@@ -1878,30 +1892,32 @@ POW_X_INF:
 
 // p6 == Y is a floating point number outside the integer.
 //       Hence it is an integer and is even.
-//       p13 == (Y negative)
+//       p13 == (Y negative) 
 //          return +inf
 //       p14 == (Y positive)
 //          return +0
 
+
+
 // p7 == Y is a floating point number within the integer range.
 //      p9  == (int_Y = NORM_Y), Y is an integer, which may be odd or even.
 //           p11 odd
-//              p13 == (Y negative)
+//              p13 == (Y negative)    
 //                 return (sign_of_x)inf
-//              p14 == (Y positive)
+//              p14 == (Y positive) 
 //                 return (sign_of_x)0
-//           pxx even
-//              p13 == (Y negative)
-//                 return +inf
+//           pxx even                
+//              p13 == (Y negative) 
+//                 return +inf     
 //              p14 == (Y positive)
-//                 return +0
+//                 return +0     
 
 //      pxx == Y is not an integer
-//           p13 == (Y negative)
+//           p13 == (Y negative) 
 //                 return +inf
 //           p14 == (Y positive)
 //                 return +0
-//
+// 
 
 // If x=inf, test y and flag denormal
 { .mfi
@@ -1913,131 +1929,207 @@ POW_X_INF:
 
 { .mfi
           nop.m 999
-          fcmp.lt.s0 p13,p14            = POW_NORM_Y,f0
-          cmp.gt  p6,p7                 = pow_GR_exp_Y, pow_GR_10033
+          fcmp.lt p13,p14                    = POW_NORM_Y,f0 
+          cmp.gt.unc  p6,p7                  = pow_GR_exp_Y, pow_GR_10033
 }
 { .mfi
           nop.m 999
-          fclass.m p12,p0               = f9, 0x23 //@inf
+          fclass.m p12,p0                    = f9, 0x23
           nop.i 999
 }
 ;;
 
+
 { .mfi
           nop.m 999
-          fclass.m p15,p0               = f9, 0x07 //@zero
+          fclass.m p15,p0                    = f9, 0x07	//@zero
           nop.i 999
 }
 ;;
 
 { .mfb
           nop.m 999
-(p15)     fmerge.s f8 = f1,f1      // Return +1.0 if x=inf, y=0
-(p15)     br.ret.spnt b0           // Exit if x=inf, y=0
+(p15)     fmerge.s f8 = f1,f1
+(p15)     br.ret.spnt b0
 }
 ;;
 
+        
 { .mfi
-          nop.m 999
-(p14)     frcpa.s1 f8,p10 = f1,f0  // If x=inf, y>0, assume result +inf
+(p13)     mov pow_GR_tag                     = 31
+(p14)     frcpa.s1 f8,p10                       = f1,f0
           nop.i 999
 }
 { .mfb
+(p14)     mov pow_GR_tag                     = 30
+(p13)     fma.s1 f8                          = f0,f0,f0
+(p12)     br.ret.spnt b0
+}
+;;
+
+   
+
+{ .mfb
           nop.m 999
-(p13)     fma.s.s0 f8 = f0,f0,f0   // If x=inf, y<0, assume result +0.0
-(p12)     br.ret.spnt b0           // Exit if x=inf, y=inf
+(p7)      fcmp.eq.unc.s1 p9,p0              = POW_float_int_Y,  POW_NORM_Y
+          nop.b 999
 }
 ;;
 
-// Here if x=inf, and 0 < |y| < inf.  Need to correct results if y odd integer.
 { .mfi
           nop.m 999
-(p7)      fcmp.eq.unc.s1 p9,p0 = POW_float_int_Y,  POW_NORM_Y // Is y integer?
-          nop.i 999
+          nop.f 999
+(p9)      tbit.nz.unc p11,p0                 = pow_GR_sig_int_Y,0
+}
+;;
+
+{ .mfb
+          nop.m 999
+(p11)     fmerge.s f8 = POW_NORM_X,f8
+          br.ret.sptk b0 
 }
 ;;
 
+
+
+L(POW_X_0_Y_NEG):
+// When X is +-0 and Y is negative, IEEE returns 
+// X     Y           answer
+// +0    -odd int    +inf
+// -0    -odd int    -inf
+
+// +0    !-odd int   +inf
+// -0    !-odd int   +inf
+
+
+// p6 == Y is a floating point number outside the integer.
+//       Hence it is an integer and is even.
+//       return +inf
+
+// p7 == Y is a floating point number within the integer range.
+//      p9  == (int_Y = NORM_Y), Y is an integer, which may be odd or even.
+//           p11 odd
+//              return (sign_of_x)inf
+//           p12 even
+//              return +inf
+//      p10 == Y is not an integer
+//         return +inf
+// 
+// 
+
 { .mfi
           nop.m 999
           nop.f 999
-(p9)      tbit.nz.unc p11,p0 = pow_GR_sig_int_Y,0  // Test for y odd integer
+          cmp.gt.unc  p6,p7                  = pow_GR_exp_Y, pow_GR_10033
+}
+;;
+
+
+{ .mfi
+          mov pow_GR_tag                     = 33
+(p7)      fcmp.eq.unc.s1 p9,p10              = POW_float_int_Y,  POW_NORM_Y
+          nop.i 999
+}
+;;
+
+
+{ .mfb
+          nop.m 999
+(p6)      frcpa.s0 f8,p13                       = f1, f0
+(p6)      br.cond.sptk __libm_error_region
 }
 ;;
 
 { .mfb
           nop.m 999
-(p11)     fmerge.s f8 = POW_NORM_X,f8    // If y odd integer use sign of x
-          br.ret.sptk b0                 // Exit for x=inf, 0 < |y| < inf
+(p10)     frcpa.s0 f8,p13                       = f1, f0
+(p10)     br.cond.sptk __libm_error_region
 }
 ;;
 
 
-POW_X_NEG_Y_NONINT:
-// When X is negative and Y is a non-integer, IEEE
-// returns a qnan indefinite.
-// We call error support with this value
 
-{ .mfb
-         mov pow_GR_tag                 = 34
-         frcpa.s0 f8,p6                 = f0,f0
-         br.cond.sptk __libm_error_region
+{ .mib
+          nop.m 999
+(p9)      tbit.nz.unc p11,p12                = pow_GR_sig_int_Y,0
+          nop.b 999
 }
 ;;
 
-POW_X_NAN:
-// Here if x=nan, y not nan
+
+
 { .mfi
-         nop.m 999
-         fclass.m  p9,p13 = f9, 0x07 // Test y=zero
-         nop.i 999
+          nop.m 999
+(p12)     frcpa.s0 f8,p13                      = f1,f0
+          nop.i 999
+}
+;;
+
+{ .mfb
+          nop.m 999
+(p11)     frcpa f8,p13                      = f1,f8 
+          br.cond.sptk __libm_error_region
 }
 ;;
 
+
+
+
+L(POW_X_NEG_Y_NONINT):
+// When X is negative and Y is a non-integer, IEEE
+// returns a qnan indefinite.
+// We call error support with this value 
+
 { .mfb
-         nop.m 999
-(p13)    fma.s.s0 f8 = f8,f1,f0
-(p13)    br.ret.sptk  b0            // Exit if x nan, y anything but zero or nan
+         mov pow_GR_tag                     = 34
+         frcpa f8,p6                        = f0,f0
+         br.cond.sptk __libm_error_region
 }
 ;;
 
-POW_X_NAN_Y_0:
+
+
+
+L(POW_X_NAN_Y_0):
 // When X is a NAN and Y is zero, IEEE returns 1.
 // We call error support with this value.
+
 { .mfi
-         nop.m 999
-         fcmp.eq.s0 p6,p0 = f8,f0       // Dummy op to set invalid on snan
-         nop.i 999
+         nop.m 0
+         fma.s.s0 f10 = f8,f1,f0 
+         nop.i 0
 }
 { .mfb
-         mov pow_GR_tag                 = 35
-         fma.s.s0 f8 = f0,f0,f1
+         mov pow_GR_tag                     = 35
+         fma.s.s0 f8 = f0,f0,f1 
          br.cond.sptk __libm_error_region
 }
 ;;
 
 
-POW_OVER_UNDER_X_NOT_INF:
+L(POW_OVER_UNDER_X_NOT_INF):
 
 // p8 is TRUE for overflow
 // p9 is TRUE for underflow
 
 // if y is infinity, we should not over/underflow
 
+
 { .mfi
           nop.m 999
-          fcmp.eq.s1     p14, p13       = POW_xsq,f1  // Test |x|=1
-          cmp.eq p8,p9                  = pow_GR_sign_Y_Gpr, r0
+          fcmp.eq.unc.s1     p14, p13        = POW_xsq,f1
+          cmp.eq.unc p8,p9                   = pow_GR_sign_Y_Gpr, r0
 }
 ;;
 
 { .mfi
           nop.m 999
-(p14)     fclass.m.unc       p15, p0    = f9, 0x23 // If |x|=1, test y=inf
+(p14)     fclass.m.unc       p15, p0         = f9, 0x23
           nop.i 999
 }
 { .mfi
           nop.m 999
-(p13)     fclass.m.unc       p11,p0     = f9, 0x23 // If |x| not 1, test y=inf
+(p13)     fclass.m.unc       p11,p0         = f9, 0x23
           nop.i 999
 }
 ;;
@@ -2045,33 +2137,31 @@ POW_OVER_UNDER_X_NOT_INF:
 // p15 = TRUE if |x|=1, y=inf, return +1
 { .mfb
           nop.m 999
-(p15)     fma.s.s0          f8          = f1,f1,f0 // If |x|=1, y=inf, result +1
-(p15)     br.ret.spnt b0                // Exit if |x|=1, y=inf
+(p15)     fma.s              f8              = f1,f1,f0
+(p15)     br.ret.spnt b0
 }
 ;;
 
 .pred.rel "mutex",p8,p9
 {  .mfb
-(p8)      setf.exp           f8 = pow_GR_17ones // If exp(+big), result inf
-(p9)      fmerge.s           f8 = f0,f0         // If exp(-big), result 0
-(p11)     br.ret.sptk b0                // Exit if |x| not 1, y=inf
+(p8)      setf.exp           f8              = pow_GR_17ones
+(p9)      fmerge.s           f8              = f0,f0
+(p11)     br.ret.sptk b0
 }
-;;
 
 { .mfb
           nop.m 999
           nop.f 999
-          br.cond.sptk POW_OVER_UNDER_ERROR // Branch if y not inf
+          br.cond.sptk L(POW_OVER_UNDER_ERROR)
 }
 ;;
 
+L(POW_Y_NAN):
 
-POW_Y_NAN:
-// Here if y=nan, x anything
-// If x = +1 then result is +1, else result is quiet Y
+// Is x = +1 then result is +1, else result is quiet Y
 { .mfi
        nop.m 999
-       fcmp.eq.s1         p10,p9        = POW_NORM_X, f1
+       fcmp.eq.s1         p10,p9               = POW_NORM_X, f1 
        nop.i 999
 }
 ;;
@@ -2085,117 +2175,148 @@ POW_Y_NAN:
 
 { .mfi
        nop.m 999
-(p10)  fma.s.s0 f8 = f1,f1,f0
+(p10)  fma.s f8 = f1,f1,f0 
        nop.i 999
 }
 { .mfb
        nop.m 999
-(p9)   fma.s.s0 f8 = f9,f8,f0
-       br.ret.sptk b0             // Exit y=nan
+(p9)   fma.s f8 = f9,f8,f0 
+       br.ret.sptk b0
 }
 ;;
 
 
-POW_OVER_UNDER_ERROR:
-// Here if we have overflow or underflow.
-// Enter with p12 true if x negative and y odd int to force -0 or -inf
+L(POW_OVER_UNDER_ERROR):
 
 { .mfi
-         sub   pow_GR_17ones_m1         = pow_GR_17ones, r0, 1
-         nop.f 999
-         mov pow_GR_one                 = 0x1
+          nop.m 999
+          fmerge.s f10                      = POW_NORM_X,POW_NORM_X
+          nop.i 999
+}
+{ .mfi
+          sub   pow_GR_17ones_m1            = pow_GR_17ones, r0, 1
+          nop.f 999
+          mov pow_GR_one                    = 0x1
 }
 ;;
 
-// overflow, force inf with O flag
+// overflow
 { .mmb
-(p8)     mov pow_GR_tag                 = 30
-(p8)     setf.exp POW_tmp               = pow_GR_17ones_m1
+(p8)     mov pow_GR_tag                     = 30
+(p8)     setf.exp f11                       = pow_GR_17ones_m1
          nop.b 999
 }
 ;;
 
-// underflow, force zero with I, U flags
+        
+// underflow
 { .mmi
-(p9)    mov pow_GR_tag                  = 31
-(p9)    setf.exp POW_tmp                = pow_GR_one
+(p9)    mov pow_GR_tag                     = 31
+(p9)    setf.exp f11                       = pow_GR_one
         nop.i 999
 }
 ;;
 
+
+// p12 x is negative and y is an odd integer 
+
+
 { .mfi
         nop.m 999
-        fma.s.s0 f8                     = POW_tmp, POW_tmp, f0
+        fma.s f8                               = f11, f11, f0
         nop.i 999
 }
 ;;
 
-// p12 x is negative and y is an odd integer, change sign of result
 { .mfi
         nop.m 999
-(p12)   fnma.s.s0 f8                    = POW_tmp, POW_tmp, f0
+(p12)   fmerge.ns f8                           = f8, f8
         nop.i 999
 }
 ;;
 
-GLOBAL_LIBM_END(powf)
 
-LOCAL_LIBM_ENTRY(__libm_error_region)
+.endp powf
+ASM_SIZE_DIRECTIVE(powf)
+
+
+// Stack operations when calling error support.
+//       (1)               (2)                          (3) (call)              (4)
+//   sp   -> +          psp -> +                     psp -> +                   sp -> +
+//           |                 |                            |                         |
+//           |                 | <- GR_Y               R3 ->| <- GR_RESULT            | -> f8
+//           |                 |                            |                         |
+//           | <-GR_Y      Y2->|                       Y2 ->| <- GR_Y                 |
+//           |                 |                            |                         |
+//           |                 | <- GR_X               X1 ->|                         |
+//           |                 |                            |                         |
+//  sp-64 -> +          sp ->  +                     sp ->  +                         +
+//    save ar.pfs          save b0                                               restore gp
+//    save gp                                                                    restore ar.pfs
+
+
 
+.proc __libm_error_region
+__libm_error_region:
+
+// Answer is inf for overflow and 0 for underflow.
 .prologue
+// (1)
 { .mfi
-        add   GR_Parameter_Y=-32,sp     // Parameter 2 value
+        add   GR_Parameter_Y=-32,sp             // Parameter 2 value
         nop.f 0
 .save   ar.pfs,GR_SAVE_PFS
-        mov  GR_SAVE_PFS=ar.pfs         // Save ar.pfs
+        mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
 }
 { .mfi
 .fframe 64
-        add sp=-64,sp                   // Create new stack
+        add sp=-64,sp                          // Create new stack
         nop.f 0
-        mov GR_SAVE_GP=gp               // Save gp
+        mov GR_SAVE_GP=gp                      // Save gp
 };;
 
+
+// (2)
 { .mmi
         stfs [GR_Parameter_Y] = POW_NORM_Y,16 // STORE Parameter 2 on stack
-        add GR_Parameter_X = 16,sp      // Parameter 1 address
+        add GR_Parameter_X = 16,sp            // Parameter 1 address
 .save   b0, GR_SAVE_B0
-        mov GR_SAVE_B0=b0               // Save b0
+        mov GR_SAVE_B0=b0                     // Save b0
 };;
 
 .body
+// (3)
 { .mib
-        stfs [GR_Parameter_X] = POW_NORM_X // STORE Parameter 1 on stack
+        stfs [GR_Parameter_X] = POW_NORM_X              // STORE Parameter 1 on stack
         add   GR_Parameter_RESULT = 0,GR_Parameter_Y    // Parameter 3 address
-        nop.b 0
+        nop.b 0                                
 }
 { .mib
-        stfs [GR_Parameter_Y] = f8      // STORE Parameter 3 on stack
+        stfs [GR_Parameter_Y] = f8                      // STORE Parameter 3 on stack
         add   GR_Parameter_Y = -16,GR_Parameter_Y
-        br.call.sptk b0=__libm_error_support# // Call error handling function
+        br.call.sptk b0=__libm_error_support#           // Call error handling function
 };;
-
 { .mmi
-        add   GR_Parameter_RESULT = 48,sp
         nop.m 0
-        nop.i 0
+        nop.m 0
+        add   GR_Parameter_RESULT = 48,sp
 };;
 
+// (4)
 { .mmi
-        ldfs  f8 = [GR_Parameter_RESULT] // Get return result off stack
+        ldfs  f8 = [GR_Parameter_RESULT]       // Get return result off stack
 .restore sp
-        add   sp = 64,sp                 // Restore stack pointer
-        mov   b0 = GR_SAVE_B0            // Restore return address
+        add   sp = 64,sp                       // Restore stack pointer
+        mov   b0 = GR_SAVE_B0                  // Restore return address
 };;
-
 { .mib
-        mov   gp = GR_SAVE_GP            // Restore gp
-        mov   ar.pfs = GR_SAVE_PFS       // Restore ar.pfs
-        br.ret.sptk     b0               // Return
+        mov   gp = GR_SAVE_GP                  // Restore gp
+        mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
+        br.ret.sptk     b0                     // Return
 };;
 
-LOCAL_LIBM_END(__libm_error_region)
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
 
 .type   __libm_error_support#,@function
 .global __libm_error_support#
-
diff --git a/sysdeps/ia64/fpu/e_powl.S b/sysdeps/ia64/fpu/e_powl.S
index 0896c19aac..d286e9abad 100644
--- a/sysdeps/ia64/fpu/e_powl.S
+++ b/sysdeps/ia64/fpu/e_powl.S
@@ -1,10 +1,10 @@
 .file "powl.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,69 +20,61 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
 // Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
 //
-//*********************************************************************
+// *********************************************************************
 //
 // Function:   powl(x,y), where
-//                          y
+//                         y
 //             powl(x,y) = x , for double extended precision x and y values
 //
-//*********************************************************************
+// *********************************************************************
 //
-// History:
-// 02/02/00 (Hand Optimized)
-// 04/04/00 Unwind support added
-// 08/15/00 Bundle added after call to __libm_error_support to properly
+// History: 
+// 2/02/00  (Hand Optimized)
+// 4/04/00  Unwind support added
+// 8/15/00  Bundle added after call to __libm_error_support to properly
 //          set [the previously overwritten] GR_Parameter_RESULT.
-// 01/22/01 Corrected results for powl(1,inf), powl(1,nan), and
+// 1/22/01  Corrected results for powl(1,inf), powl(1,nan), and
 //          powl(snan,0) to be 1 per C99, not nan.  Fixed many flag settings.
-// 02/06/01 Call __libm_error support if over/underflow when y=2.
-// 04/17/01 Support added for y close to 1 and x a non-special value.
-//          Shared software under/overflow detection for all paths
-// 02/07/02 Corrected sf3 setting to disable traps
-// 05/13/02 Improved performance of all paths
-// 02/10/03 Reordered header: .section, .global, .proc, .align;
-//          used data8 for long double table values
-// 04/17/03 Added missing mutex directive
+// 2/06/01  Call __libm_error support if over/underflow when y=2.
 //
-//*********************************************************************
+// *********************************************************************
 //
 // Resources Used:
 //
-//    Floating-Point Registers:
-//                        f8  (Input x and Return Value)
-//                        f9  (Input y)
-//                        f10-f15,f32-f79
+//    Floating-Point Registers: 
+//                        f8  (Input and Return Value)
+//                        f9-f15,f32-f63,f99 
 //
 //    General Purpose Registers:
-//                        Locals r14-24,r32-r65
+//                        Locals r32 - r61
 //                        Parameters to __libm_error_support r62,r63,r64,r65
 //
 //    Predicate Registers: p6-p15
 //
-//*********************************************************************
+// *********************************************************************
 //
 //  Special Cases and IEEE special conditions:
 //
 //    Denormal fault raised on denormal inputs
-//    Overflow exceptions raised when appropriate for pow
-//    Underflow exceptions raised when appropriate for pow
+//    Overflow exceptions raised when appropriate for pow 
+//    Underflow exceptions raised when appropriate for pow 
 //    (Error Handling Routine called for overflow and Underflow)
 //    Inexact raised when appropriate by algorithm
 //
@@ -110,8 +102,8 @@
 //  22. X or Y denorm/unorm and denorm/unorm operand trap is enabled,
 //      generate denorm/unorm fault except if invalid or div_0 raised.
 //
-//*********************************************************************
-//
+// *********************************************************************
+// 
 //  Algorithm
 //  =========
 //
@@ -121,23 +113,23 @@
 //    If Y = 0.5,  return sqrt(X).
 //
 //  Compute log(X) to extra precision.
-//
+//  
 //  ker_log_80( X, logX_hi, logX_lo, Safe );
 //
-//   ...logX_hi + logX_lo approximates log(X) to roughly 80
+//   ...logX_hi + logX_lo approximates log(X) to roughly 80 
 //   ...significant bits of accuracy.
 //
 //  Compute Y*log(X) to extra precision.
 //
 //    P_hi := Y * logX_hi
-//    P_lo := Y * logX_hi - P_hi       ...using FMA
-//    P_lo := Y * logX_lo + P_lo       ...using FMA
+//    P_lo := Y * logX_hi - P_hi	...using FMA
+//    P_lo := Y * logX_lo + P_lo	...using FMA
 //
 //  Compute exp(P_hi + P_lo)
 //
-//    Flag := 2;
+//    Flag := 2; 
 //    Expo_Range := 2; (assuming double-extended power function)
-//    ker_exp_64( P_hi, P_lo, Flag, Expo_Range,
+//    ker_exp_64( P_hi, P_lo, Flag, Expo_Range, 
 //                Z_hi, Z_lo, scale, Safe )
 //
 //    scale := sgn * scale
@@ -146,7 +138,7 @@
 //       return scale*Z_hi + (scale*Z_lo)
 //       quickly
 //    Else
-//       take necessary precaution in computing
+//       take necessary precaution in computing 
 //       scale*Z_hi + (scale*Z_lo)
 //       to set possible exceptions correctly.
 //    End If
@@ -160,8 +152,8 @@
 //   If Y is qNaN, return Y without exception.
 //   If X is qNaN, return X without exception.
 //
-//   At this point, X is real and Y is +-inf.
-//   Thus |X| can only be 1, strictly bigger than 1, or
+//   At this point, X is real and Y is +-inf. 
+//   Thus |X| can only be 1, strictly bigger than 1, or 
 //   strictly less than 1.
 //
 //   If |X| < 1, then
@@ -177,8 +169,8 @@
 //   ...Note that Y is real, finite, non-zero, and not +1.
 //
 //   If X is qNaN, return X without exception.
-//
-//   If X is +-0,
+//    
+//   If X is +-0, 
 //   return ( Y > 0 ? +0 : +inf )
 //
 //   If X is +inf
@@ -188,11 +180,11 @@
 //   return -0 ** -Y
 //   return ( Y > 0 ? +inf : +0 )
 //
-//  Case_Invalid
+//  Case_Invalid 
 //
 //   Return 0 * inf to generate a quiet NaN together
 //   with an invalid exception.
-//
+// 
 //  Implementation
 //  ==============
 //
@@ -201,15 +193,15 @@
 //
 //  STAGE 1
 //  -------
-//   This stage contains two threads.
+//   This stage contains two threads. 
 //
 //   Stage1.Thread1
 //
 //     fclass.m   X_excep,  X_ok   = X, (NatVal or s/qNaN) or
-//                              +-0, +-infinity
+//				  +-0, +-infinity
 //
 //     fclass.nm  X_unsupp, X_supp = X, (NatVal or s/qNaN) or
-//                              +-(0, unnorm, norm, infinity)
+//				  +-(0, unnorm, norm, infinity)
 //
 //     X_norm := fnorm( X ) with traps disabled
 //
@@ -217,26 +209,26 @@
 //     If (X_unsupp) goto Filtering (Step 2)
 //
 //     Stage1.Thread2
-//     ..............
+//     ..............    
 //
 //     fclass.m   Y_excep,  Y_ok   = Y, (NatVal or s/qNaN) or
-//                              +-0, +-infinity
+//				  +-0, +-infinity
 //
 //     fclass.nm  Y_unsupp, Y_supp = Y, (NatVal or s/qNaN) or
-//                              +-(0, unnorm, norm, infinity)
+//				  +-(0, unnorm, norm, infinity)
 //
 //     Y_norm := fnorm( Y ) with traps disabled
 //
 //     If (Y_excep)  goto Filtering (Step 2)
 //     If (Y_unsupp) goto Filtering (Step 2)
 //
-//
+// 
 //  STAGE 2
 //  -------
 //  This stage contains two threads.
 //
-//     Stage2.Thread1
-//     ..............
+//     Stage2.Thread1		
+//     ..............	
 //
 //     Set X_lt_0 if X < 0 (using fcmp)
 //     sgn := +1.0
@@ -253,14 +245,14 @@
 //   This stage contains two threads.
 //
 //
-//   Stage3.Thread1
-//   ..............
+//   Stage3.Thread1		
+//   .............. 	
 //
 //     X := fnorm(X) in prevailing traps
 //
 //
-//     Stage3.Thread2
-//     ..............
+//     Stage3.Thread2		
+//     ..............	
 //
 //     Y := fnorm(Y) in prevailing traps
 //
@@ -270,56 +262,60 @@
 //   Go to Case_Normal.
 //
 
-
-// ************* DO NOT CHANGE ORDER OF THESE TABLES ********************
-
-// double-extended 1/ln(2)
-// 3fff b8aa 3b29 5c17 f0bb be87fed0691d3e88
-// 3fff b8aa 3b29 5c17 f0bc
-// For speed the significand will be loaded directly with a movl and setf.sig
-//   and the exponent will be bias+63 instead of bias+0.  Thus subsequent
-//   computations need to scale appropriately.
-// The constant 2^12/ln(2) is needed for the computation of N.  This is also
-//   obtained by scaling the computations.
-//
-// Two shifting constants are loaded directly with movl and setf.d.
-//   1. RSHF_2TO51 = 1.1000..00 * 2^(63-12)
-//        This constant is added to x*1/ln2 to shift the integer part of
-//        x*2^12/ln2 into the rightmost bits of the significand.
-//        The result of this fma is N_signif.
-//   2. RSHF       = 1.1000..00 * 2^(63)
-//        This constant is subtracted from N_signif * 2^(-51) to give
-//        the integer part of N, N_fix, as a floating-point number.
-//        The result of this fms is float_N.
-RODATA
-
-.align 16
-// L_hi, L_lo
-LOCAL_OBJECT_START(Constants_exp_64_Arg)
-data8 0xB17217F400000000,0x00003FF2 // L_hi = hi part log(2)/2^12
-data8 0xF473DE6AF278ECE6,0x00003FD4 // L_lo = lo part log(2)/2^12
-LOCAL_OBJECT_END(Constants_exp_64_Arg)
-
-LOCAL_OBJECT_START(Constants_exp_64_A)
-// Reversed
-data8 0xAAAAAAABB1B736A0,0x00003FFA
-data8 0xAAAAAAAB90CD6327,0x00003FFC
-data8 0xFFFFFFFFFFFFFFFF,0x00003FFD
-LOCAL_OBJECT_END(Constants_exp_64_A)
-
-LOCAL_OBJECT_START(Constants_exp_64_P)
-// Reversed
-data8 0xD00D6C8143914A8A,0x00003FF2
-data8 0xB60BC4AC30304B30,0x00003FF5
-data8 0x888888887474C518,0x00003FF8
-data8 0xAAAAAAAA8DAE729D,0x00003FFA
-data8 0xAAAAAAAAAAAAAF61,0x00003FFC
-data8 0x80000000000004C7,0x00003FFE
-LOCAL_OBJECT_END(Constants_exp_64_P)
-
-LOCAL_OBJECT_START(Constants_exp_64_T1)
-data4 0x3F800000,0x3F8164D2,0x3F82CD87,0x3F843A29
-data4 0x3F85AAC3,0x3F871F62,0x3F88980F,0x3F8A14D5
+#include "libm_support.h"
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
+// Inv_L, L_hi, L_lo 
+.align 64
+Constants_exp_64_Arg:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_Arg,@object)
+data4 0x5C17F0BC,0xB8AA3B29,0x0000400B,0x00000000 
+data4 0x00000000,0xB17217F4,0x00003FF2,0x00000000
+data4 0xF278ECE6,0xF473DE6A,0x00003FD4,0x00000000
+ASM_SIZE_DIRECTIVE(Constants_exp_64_Arg)
+
+.align 64
+Constants_exp_64_Exponents:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_Exponents,@object)
+data4 0x0000007E,0x00000000,0xFFFFFF83,0xFFFFFFFF
+data4 0x000003FE,0x00000000,0xFFFFFC03,0xFFFFFFFF
+data4 0x00003FFE,0x00000000,0xFFFFC003,0xFFFFFFFF
+data4 0x00003FFE,0x00000000,0xFFFFC003,0xFFFFFFFF
+data4 0xFFFFFFE2,0xFFFFFFFF,0xFFFFFFC4,0xFFFFFFFF
+data4 0xFFFFFFBA,0xFFFFFFFF,0xFFFFFFBA,0xFFFFFFFF
+ASM_SIZE_DIRECTIVE(Constants_exp_64_Exponents)
+
+.align 64
+Constants_exp_64_A:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_A,@object)
+// Reversed 
+data4 0xB1B736A0,0xAAAAAAAB,0x00003FFA,0x00000000
+data4 0x90CD6327,0xAAAAAAAB,0x00003FFC,0x00000000
+data4 0xFFFFFFFF,0xFFFFFFFF,0x00003FFD,0x00000000
+ASM_SIZE_DIRECTIVE(Constants_exp_64_A)
+
+.align 64
+Constants_exp_64_P:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_P,@object)
+// Reversed 
+data4 0x43914A8A,0xD00D6C81,0x00003FF2,0x00000000
+data4 0x30304B30,0xB60BC4AC,0x00003FF5,0x00000000
+data4 0x7474C518,0x88888888,0x00003FF8,0x00000000
+data4 0x8DAE729D,0xAAAAAAAA,0x00003FFA,0x00000000
+data4 0xAAAAAF61,0xAAAAAAAA,0x00003FFC,0x00000000
+data4 0x000004C7,0x80000000,0x00003FFE,0x00000000
+ASM_SIZE_DIRECTIVE(Constants_exp_64_P)
+
+.align 64
+Constants_exp_64_T1:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_T1,@object)
+data4 0x3F800000,0x3F8164D2,0x3F82CD87,0x3F843A29 
+data4 0x3F85AAC3,0x3F871F62,0x3F88980F,0x3F8A14D5 
 data4 0x3F8B95C2,0x3F8D1ADF,0x3F8EA43A,0x3F9031DC
 data4 0x3F91C3D3,0x3F935A2B,0x3F94F4F0,0x3F96942D
 data4 0x3F9837F0,0x3F99E046,0x3F9B8D3A,0x3F9D3EDA
@@ -334,263 +330,274 @@ data4 0x3FD744FD,0x3FD99D16,0x3FDBFBB8,0x3FDE60F5
 data4 0x3FE0CCDF,0x3FE33F89,0x3FE5B907,0x3FE8396A
 data4 0x3FEAC0C7,0x3FED4F30,0x3FEFE4BA,0x3FF28177
 data4 0x3FF5257D,0x3FF7D0DF,0x3FFA83B3,0x3FFD3E0C
-LOCAL_OBJECT_END(Constants_exp_64_T1)
-
-LOCAL_OBJECT_START(Constants_exp_64_T2)
-data4 0x3F800000,0x3F80058C,0x3F800B18,0x3F8010A4
-data4 0x3F801630,0x3F801BBD,0x3F80214A,0x3F8026D7
-data4 0x3F802C64,0x3F8031F2,0x3F803780,0x3F803D0E
-data4 0x3F80429C,0x3F80482B,0x3F804DB9,0x3F805349
-data4 0x3F8058D8,0x3F805E67,0x3F8063F7,0x3F806987
-data4 0x3F806F17,0x3F8074A8,0x3F807A39,0x3F807FCA
-data4 0x3F80855B,0x3F808AEC,0x3F80907E,0x3F809610
-data4 0x3F809BA2,0x3F80A135,0x3F80A6C7,0x3F80AC5A
-data4 0x3F80B1ED,0x3F80B781,0x3F80BD14,0x3F80C2A8
-data4 0x3F80C83C,0x3F80CDD1,0x3F80D365,0x3F80D8FA
-data4 0x3F80DE8F,0x3F80E425,0x3F80E9BA,0x3F80EF50
-data4 0x3F80F4E6,0x3F80FA7C,0x3F810013,0x3F8105AA
-data4 0x3F810B41,0x3F8110D8,0x3F81166F,0x3F811C07
-data4 0x3F81219F,0x3F812737,0x3F812CD0,0x3F813269
-data4 0x3F813802,0x3F813D9B,0x3F814334,0x3F8148CE
+ASM_SIZE_DIRECTIVE(Constants_exp_64_T1)
+
+.align 64
+Constants_exp_64_T2:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_T2,@object)
+data4 0x3F800000,0x3F80058C,0x3F800B18,0x3F8010A4 
+data4 0x3F801630,0x3F801BBD,0x3F80214A,0x3F8026D7 
+data4 0x3F802C64,0x3F8031F2,0x3F803780,0x3F803D0E 
+data4 0x3F80429C,0x3F80482B,0x3F804DB9,0x3F805349 
+data4 0x3F8058D8,0x3F805E67,0x3F8063F7,0x3F806987 
+data4 0x3F806F17,0x3F8074A8,0x3F807A39,0x3F807FCA 
+data4 0x3F80855B,0x3F808AEC,0x3F80907E,0x3F809610 
+data4 0x3F809BA2,0x3F80A135,0x3F80A6C7,0x3F80AC5A 
+data4 0x3F80B1ED,0x3F80B781,0x3F80BD14,0x3F80C2A8 
+data4 0x3F80C83C,0x3F80CDD1,0x3F80D365,0x3F80D8FA 
+data4 0x3F80DE8F,0x3F80E425,0x3F80E9BA,0x3F80EF50 
+data4 0x3F80F4E6,0x3F80FA7C,0x3F810013,0x3F8105AA 
+data4 0x3F810B41,0x3F8110D8,0x3F81166F,0x3F811C07 
+data4 0x3F81219F,0x3F812737,0x3F812CD0,0x3F813269 
+data4 0x3F813802,0x3F813D9B,0x3F814334,0x3F8148CE 
 data4 0x3F814E68,0x3F815402,0x3F81599C,0x3F815F37
-LOCAL_OBJECT_END(Constants_exp_64_T2)
-
-LOCAL_OBJECT_START(Constants_exp_64_W1)
-data8 0x0000000000000000, 0xBE384454171EC4B4
-data8 0xBE6947414AA72766, 0xBE5D32B6D42518F8
-data8 0x3E68D96D3A319149, 0xBE68F4DA62415F36
-data8 0xBE6DDA2FC9C86A3B, 0x3E6B2E50F49228FE
-data8 0xBE49C0C21188B886, 0x3E64BFC21A4C2F1F
-data8 0xBE6A2FBB2CB98B54, 0x3E5DC5DE9A55D329
-data8 0x3E69649039A7AACE, 0x3E54728B5C66DBA5
-data8 0xBE62B0DBBA1C7D7D, 0x3E576E0409F1AF5F
-data8 0x3E6125001A0DD6A1, 0xBE66A419795FBDEF
-data8 0xBE5CDE8CE1BD41FC, 0xBE621376EA54964F
-data8 0x3E6370BE476E76EE, 0x3E390D1A3427EB92
-data8 0x3E1336DE2BF82BF8, 0xBE5FF1CBD0F7BD9E
-data8 0xBE60A3550CEB09DD, 0xBE5CA37E0980F30D
-data8 0xBE5C541B4C082D25, 0xBE5BBECA3B467D29
-data8 0xBE400D8AB9D946C5, 0xBE5E2A0807ED374A
-data8 0xBE66CB28365C8B0A, 0x3E3AAD5BD3403BCA
-data8 0x3E526055C7EA21E0, 0xBE442C75E72880D6
-data8 0x3E58B2BB85222A43, 0xBE5AAB79522C42BF
-data8 0xBE605CB4469DC2BC, 0xBE589FA7A48C40DC
-data8 0xBE51C2141AA42614, 0xBE48D087C37293F4
-data8 0x3E367A1CA2D673E0, 0xBE51BEBB114F7A38
-data8 0xBE6348E5661A4B48, 0xBDF526431D3B9962
-data8 0x3E3A3B5E35A78A53, 0xBE46C46C1CECD788
-data8 0xBE60B7EC7857D689, 0xBE594D3DD14F1AD7
-data8 0xBE4F9C304C9A8F60, 0xBE52187302DFF9D2
-data8 0xBE5E4C8855E6D68F, 0xBE62140F667F3DC4
-data8 0xBE36961B3BF88747, 0x3E602861C96EC6AA
-data8 0xBE3B5151D57FD718, 0x3E561CD0FC4A627B
-data8 0xBE3A5217CA913FEA, 0x3E40A3CC9A5D193A
-data8 0xBE5AB71310A9C312, 0x3E4FDADBC5F57719
-data8 0x3E361428DBDF59D5, 0x3E5DB5DB61B4180D
-data8 0xBE42AD5F7408D856, 0x3E2A314831B2B707
-LOCAL_OBJECT_END(Constants_exp_64_W1)
-
-LOCAL_OBJECT_START(Constants_exp_64_W2)
-data8 0x0000000000000000, 0xBE641F2537A3D7A2
-data8 0xBE68DD57AD028C40, 0xBE5C77D8F212B1B6
-data8 0x3E57878F1BA5B070, 0xBE55A36A2ECAE6FE
-data8 0xBE620608569DFA3B, 0xBE53B50EA6D300A3
-data8 0x3E5B5EF2223F8F2C, 0xBE56A0D9D6DE0DF4
-data8 0xBE64EEF3EAE28F51, 0xBE5E5AE2367EA80B
-data8 0x3E47CB1A5FCBC02D, 0xBE656BA09BDAFEB7
-data8 0x3E6E70C6805AFEE7, 0xBE6E0509A3415EBA
-data8 0xBE56856B49BFF529, 0x3E66DD3300508651
-data8 0x3E51165FC114BC13, 0x3E53333DC453290F
-data8 0x3E6A072B05539FDA, 0xBE47CD877C0A7696
-data8 0xBE668BF4EB05C6D9, 0xBE67C3E36AE86C93
-data8 0xBE533904D0B3E84B, 0x3E63E8D9556B53CE
-data8 0x3E212C8963A98DC8, 0xBE33138F032A7A22
-data8 0x3E530FA9BC584008, 0xBE6ADF82CCB93C97
-data8 0x3E5F91138370EA39, 0x3E5443A4FB6A05D8
-data8 0x3E63DACD181FEE7A, 0xBE62B29DF0F67DEC
-data8 0x3E65C4833DDE6307, 0x3E5BF030D40A24C1
-data8 0x3E658B8F14E437BE, 0xBE631C29ED98B6C7
-data8 0x3E6335D204CF7C71, 0x3E529EEDE954A79D
-data8 0x3E5D9257F64A2FB8, 0xBE6BED1B854ED06C
-data8 0x3E5096F6D71405CB, 0xBE3D4893ACB9FDF5
-data8 0xBDFEB15801B68349, 0x3E628D35C6A463B9
-data8 0xBE559725ADE45917, 0xBE68C29C042FC476
-data8 0xBE67593B01E511FA, 0xBE4A4313398801ED
-data8 0x3E699571DA7C3300, 0x3E5349BE08062A9E
-data8 0x3E5229C4755BB28E, 0x3E67E42677A1F80D
-data8 0xBE52B33F6B69C352, 0xBE6B3550084DA57F
-data8 0xBE6DB03FD1D09A20, 0xBE60CBC42161B2C1
-data8 0x3E56ED9C78A2B771, 0xBE508E319D0FA795
-data8 0xBE59482AFD1A54E9, 0xBE2A17CEB07FD23E
-data8 0x3E68BF5C17365712, 0x3E3956F9B3785569
-LOCAL_OBJECT_END(Constants_exp_64_W2)
-
-LOCAL_OBJECT_START(Constants_log_80_P)
-// P_8, P_7, ..., P_1
-data8 0xCCCE8B883B1042BC, 0x0000BFFB // P_8
-data8 0xE38997B7CADC2149, 0x00003FFB // P_7
-data8 0xFFFFFFFEB1ACB090, 0x0000BFFB // P_6
-data8 0x9249249806481C81, 0x00003FFC // P_5
-data8 0x0000000000000000, 0x00000000 // Pad for bank conflicts
-data8 0xAAAAAAAAAAAAB0EF, 0x0000BFFC // P_4
-data8 0xCCCCCCCCCCC91416, 0x00003FFC // P_3
-data8 0x8000000000000000, 0x0000BFFD // P_2
-data8 0xAAAAAAAAAAAAAAAB, 0x00003FFD // P_1
-LOCAL_OBJECT_END(Constants_log_80_P)
-
-LOCAL_OBJECT_START(Constants_log_80_Q)
-// log2_hi, log2_lo, Q_6, Q_5, Q_4, Q_3, Q_2, Q_1
-data8 0xB172180000000000,0x00003FFE
-data8 0x82E308654361C4C6,0x0000BFE2
-data8 0x92492453A51BE0AF,0x00003FFC
-data8 0xAAAAAB73A0CFD29F,0x0000BFFC
-data8 0xCCCCCCCCCCCE3872,0x00003FFC
-data8 0xFFFFFFFFFFFFB4FB,0x0000BFFC
-data8 0xAAAAAAAAAAAAAAAB,0x00003FFD
-data8 0x8000000000000000,0x0000BFFE
-LOCAL_OBJECT_END(Constants_log_80_Q)
-
-LOCAL_OBJECT_START(Constants_log_80_Z_G_H_h1)
-// Z1 - 16 bit fixed, G1 and H1 IEEE single, h1 IEEE double
+ASM_SIZE_DIRECTIVE(Constants_exp_64_T2)
+  
+.align 64
+Constants_exp_64_W1:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_W1,@object)
+data4 0x00000000,0x00000000,0x171EC4B4,0xBE384454
+data4 0x4AA72766,0xBE694741,0xD42518F8,0xBE5D32B6
+data4 0x3A319149,0x3E68D96D,0x62415F36,0xBE68F4DA
+data4 0xC9C86A3B,0xBE6DDA2F,0xF49228FE,0x3E6B2E50
+data4 0x1188B886,0xBE49C0C2,0x1A4C2F1F,0x3E64BFC2
+data4 0x2CB98B54,0xBE6A2FBB,0x9A55D329,0x3E5DC5DE
+data4 0x39A7AACE,0x3E696490,0x5C66DBA5,0x3E54728B
+data4 0xBA1C7D7D,0xBE62B0DB,0x09F1AF5F,0x3E576E04
+data4 0x1A0DD6A1,0x3E612500,0x795FBDEF,0xBE66A419
+data4 0xE1BD41FC,0xBE5CDE8C,0xEA54964F,0xBE621376
+data4 0x476E76EE,0x3E6370BE,0x3427EB92,0x3E390D1A 
+data4 0x2BF82BF8,0x3E1336DE,0xD0F7BD9E,0xBE5FF1CB 
+data4 0x0CEB09DD,0xBE60A355,0x0980F30D,0xBE5CA37E 
+data4 0x4C082D25,0xBE5C541B,0x3B467D29,0xBE5BBECA 
+data4 0xB9D946C5,0xBE400D8A,0x07ED374A,0xBE5E2A08 
+data4 0x365C8B0A,0xBE66CB28,0xD3403BCA,0x3E3AAD5B 
+data4 0xC7EA21E0,0x3E526055,0xE72880D6,0xBE442C75 
+data4 0x85222A43,0x3E58B2BB,0x522C42BF,0xBE5AAB79 
+data4 0x469DC2BC,0xBE605CB4,0xA48C40DC,0xBE589FA7 
+data4 0x1AA42614,0xBE51C214,0xC37293F4,0xBE48D087 
+data4 0xA2D673E0,0x3E367A1C,0x114F7A38,0xBE51BEBB 
+data4 0x661A4B48,0xBE6348E5,0x1D3B9962,0xBDF52643  
+data4 0x35A78A53,0x3E3A3B5E,0x1CECD788,0xBE46C46C 
+data4 0x7857D689,0xBE60B7EC,0xD14F1AD7,0xBE594D3D 
+data4 0x4C9A8F60,0xBE4F9C30,0x02DFF9D2,0xBE521873 
+data4 0x55E6D68F,0xBE5E4C88,0x667F3DC4,0xBE62140F 
+data4 0x3BF88747,0xBE36961B,0xC96EC6AA,0x3E602861 
+data4 0xD57FD718,0xBE3B5151,0xFC4A627B,0x3E561CD0 
+data4 0xCA913FEA,0xBE3A5217,0x9A5D193A,0x3E40A3CC 
+data4 0x10A9C312,0xBE5AB713,0xC5F57719,0x3E4FDADB 
+data4 0xDBDF59D5,0x3E361428,0x61B4180D,0x3E5DB5DB 
+data4 0x7408D856,0xBE42AD5F,0x31B2B707,0x3E2A3148
+ASM_SIZE_DIRECTIVE(Constants_exp_64_W1)
+
+.align 64
+Constants_exp_64_W2:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_W2,@object)
+data4 0x00000000,0x00000000,0x37A3D7A2,0xBE641F25 
+data4 0xAD028C40,0xBE68DD57,0xF212B1B6,0xBE5C77D8 
+data4 0x1BA5B070,0x3E57878F,0x2ECAE6FE,0xBE55A36A 
+data4 0x569DFA3B,0xBE620608,0xA6D300A3,0xBE53B50E 
+data4 0x223F8F2C,0x3E5B5EF2,0xD6DE0DF4,0xBE56A0D9 
+data4 0xEAE28F51,0xBE64EEF3,0x367EA80B,0xBE5E5AE2 
+data4 0x5FCBC02D,0x3E47CB1A,0x9BDAFEB7,0xBE656BA0 
+data4 0x805AFEE7,0x3E6E70C6,0xA3415EBA,0xBE6E0509 
+data4 0x49BFF529,0xBE56856B,0x00508651,0x3E66DD33 
+data4 0xC114BC13,0x3E51165F,0xC453290F,0x3E53333D 
+data4 0x05539FDA,0x3E6A072B,0x7C0A7696,0xBE47CD87 
+data4 0xEB05C6D9,0xBE668BF4,0x6AE86C93,0xBE67C3E3 
+data4 0xD0B3E84B,0xBE533904,0x556B53CE,0x3E63E8D9 
+data4 0x63A98DC8,0x3E212C89,0x032A7A22,0xBE33138F 
+data4 0xBC584008,0x3E530FA9,0xCCB93C97,0xBE6ADF82 
+data4 0x8370EA39,0x3E5F9113,0xFB6A05D8,0x3E5443A4 
+data4 0x181FEE7A,0x3E63DACD,0xF0F67DEC,0xBE62B29D 
+data4 0x3DDE6307,0x3E65C483,0xD40A24C1,0x3E5BF030  
+data4 0x14E437BE,0x3E658B8F,0xED98B6C7,0xBE631C29 
+data4 0x04CF7C71,0x3E6335D2,0xE954A79D,0x3E529EED
+data4 0xF64A2FB8,0x3E5D9257,0x854ED06C,0xBE6BED1B 
+data4 0xD71405CB,0x3E5096F6,0xACB9FDF5,0xBE3D4893 
+data4 0x01B68349,0xBDFEB158,0xC6A463B9,0x3E628D35 
+data4 0xADE45917,0xBE559725,0x042FC476,0xBE68C29C 
+data4 0x01E511FA,0xBE67593B,0x398801ED,0xBE4A4313 
+data4 0xDA7C3300,0x3E699571,0x08062A9E,0x3E5349BE 
+data4 0x755BB28E,0x3E5229C4,0x77A1F80D,0x3E67E426 
+data4 0x6B69C352,0xBE52B33F,0x084DA57F,0xBE6B3550 
+data4 0xD1D09A20,0xBE6DB03F,0x2161B2C1,0xBE60CBC4 
+data4 0x78A2B771,0x3E56ED9C,0x9D0FA795,0xBE508E31 
+data4 0xFD1A54E9,0xBE59482A,0xB07FD23E,0xBE2A17CE 
+data4 0x17365712,0x3E68BF5C,0xB3785569,0x3E3956F9
+ASM_SIZE_DIRECTIVE(Constants_exp_64_W2)
+
+.align 64
+Constants_log_80_P:
+ASM_TYPE_DIRECTIVE(Constants_log_80_P,@object)
+// 1/2, P_8, P_7, ..., P_1  
+data4 0x00000000, 0x80000000, 0x00003FFE, 0x00000000
+data4 0x3B1042BC, 0xCCCE8B88, 0x0000BFFB, 0x00000000
+data4 0xCADC2149, 0xE38997B7, 0x00003FFB, 0x00000000
+data4 0xB1ACB090, 0xFFFFFFFE, 0x0000BFFB, 0x00000000
+data4 0x06481C81, 0x92492498, 0x00003FFC, 0x00000000
+data4 0xAAAAB0EF, 0xAAAAAAAA, 0x0000BFFC, 0x00000000
+data4 0xCCC91416, 0xCCCCCCCC, 0x00003FFC, 0x00000000
+data4 0x00000000, 0x80000000, 0x0000BFFD, 0x00000000
+data4 0xAAAAAAAB, 0xAAAAAAAA, 0x00003FFD
+ASM_SIZE_DIRECTIVE(Constants_log_80_P)
+
+.align 64
+Constants_log_80_Q:
+ASM_TYPE_DIRECTIVE(Constants_log_80_Q,@object)
+// log2_hi, log2_lo, Q_6, Q_5, Q_4, Q_3, Q_2, Q_1 
+data4 0x00000000,0xB1721800,0x00003FFE,0x00000000
+data4 0x4361C4C6,0x82E30865,0x0000BFE2,0x00000000
+data4 0xA51BE0AF,0x92492453,0x00003FFC,0x00000000
+data4 0xA0CFD29F,0xAAAAAB73,0x0000BFFC,0x00000000
+data4 0xCCCE3872,0xCCCCCCCC,0x00003FFC,0x00000000
+data4 0xFFFFB4FB,0xFFFFFFFF,0x0000BFFC,0x00000000
+data4 0xAAAAAAAB,0xAAAAAAAA,0x00003FFD,0x00000000
+data4 0x00000000,0x80000000,0x0000BFFE,0x00000000
+ASM_SIZE_DIRECTIVE(Constants_log_80_Q)
+
+.align 64
+Constants_log_80_Z_G_H_h1:
+ASM_TYPE_DIRECTIVE(Constants_log_80_Z_G_H_h1,@object)
+// Z1 - 16 bit fixed, G1 and H1 IEEE single, h1 IEEE double   
 data4 0x00008000,0x3F800000,0x00000000,0x00000000
-data4 0x00000000,0x00000000,0x00000000,0x00000000
+data4 0x00000000,0x00000000,0x00000000,0x00000000 
 data4 0x00007879,0x3F70F0F0,0x3D785196,0x00000000
 data4 0xEBA0E0D1,0x8B1D330B,0x00003FDA,0x00000000
 data4 0x000071C8,0x3F638E38,0x3DF13843,0x00000000
 data4 0x9EADD553,0xE2AF365E,0x00003FE2,0x00000000
 data4 0x00006BCB,0x3F579430,0x3E2FF9A0,0x00000000
-data4 0x752F34A2,0xF585FEC3,0x0000BFE3,0x00000000
+data4 0x752F34A2,0xF585FEC3,0x0000BFE3,0x00000000  
 data4 0x00006667,0x3F4CCCC8,0x3E647FD6,0x00000000
-data4 0x893B03F3,0xF3546435,0x00003FE2,0x00000000
-data4 0x00006187,0x3F430C30,0x3E8B3AE7,0x00000000
-data4 0x39CDD2AC,0xBABA62E0,0x00003FE4,0x00000000
-data4 0x00005D18,0x3F3A2E88,0x3EA30C68,0x00000000
+data4 0x893B03F3,0xF3546435,0x00003FE2,0x00000000  
+data4 0x00006187,0x3F430C30,0x3E8B3AE7,0x00000000 
+data4 0x39CDD2AC,0xBABA62E0,0x00003FE4,0x00000000 
+data4 0x00005D18,0x3F3A2E88,0x3EA30C68,0x00000000 
 data4 0x457978A1,0x8718789F,0x00003FE2,0x00000000
-data4 0x0000590C,0x3F321640,0x3EB9CEC8,0x00000000
-data4 0x3185E56A,0x9442DF96,0x0000BFE4,0x00000000
-data4 0x00005556,0x3F2AAAA8,0x3ECF9927,0x00000000
-data4 0x2BBE2CBD,0xCBF9A4BF,0x00003FE4,0x00000000
-data4 0x000051EC,0x3F23D708,0x3EE47FC5,0x00000000
-data4 0x852D5935,0xF3537535,0x00003FE3,0x00000000
-data4 0x00004EC5,0x3F1D89D8,0x3EF8947D,0x00000000
-data4 0x46CDF32F,0xA1F1E699,0x0000BFDF,0x00000000
-data4 0x00004BDB,0x3F17B420,0x3F05F3A1,0x00000000
-data4 0xD8484CE3,0x84A61856,0x00003FE4,0x00000000
+data4 0x0000590C,0x3F321640,0x3EB9CEC8,0x00000000 
+data4 0x3185E56A,0x9442DF96,0x0000BFE4,0x00000000 
+data4 0x00005556,0x3F2AAAA8,0x3ECF9927,0x00000000 
+data4 0x2BBE2CBD,0xCBF9A4BF,0x00003FE4,0x00000000 
+data4 0x000051EC,0x3F23D708,0x3EE47FC5,0x00000000 
+data4 0x852D5935,0xF3537535,0x00003FE3,0x00000000 
+data4 0x00004EC5,0x3F1D89D8,0x3EF8947D,0x00000000 
+data4 0x46CDF32F,0xA1F1E699,0x0000BFDF,0x00000000 
+data4 0x00004BDB,0x3F17B420,0x3F05F3A1,0x00000000 
+data4 0xD8484CE3,0x84A61856,0x00003FE4,0x00000000 
 data4 0x00004925,0x3F124920,0x3F0F4303,0x00000000
-data4 0xFF28821B,0xC7DD97E0,0x0000BFE2,0x00000000
-data4 0x0000469F,0x3F0D3DC8,0x3F183EBF,0x00000000
-data4 0xEF1FD32F,0xD3C4A887,0x00003FE3,0x00000000
-data4 0x00004445,0x3F088888,0x3F20EC80,0x00000000
-data4 0x464C76DA,0x84672BE6,0x00003FE5,0x00000000
+data4 0xFF28821B,0xC7DD97E0,0x0000BFE2,0x00000000    
+data4 0x0000469F,0x3F0D3DC8,0x3F183EBF,0x00000000 
+data4 0xEF1FD32F,0xD3C4A887,0x00003FE3,0x00000000  
+data4 0x00004445,0x3F088888,0x3F20EC80,0x00000000 
+data4 0x464C76DA,0x84672BE6,0x00003FE5,0x00000000 
 data4 0x00004211,0x3F042108,0x3F29516A,0x00000000
-data4 0x18835FB9,0x9A43A511,0x0000BFE5,0x00000000
-LOCAL_OBJECT_END(Constants_log_80_Z_G_H_h1)
-
-LOCAL_OBJECT_START(Constants_log_80_Z_G_H_h2)
-// Z2 - 16 bit fixed, G2 and H2 IEEE single, h2 IEEE double
-data4 0x00008000,0x3F800000,0x00000000,0x00000000
-data4 0x00000000,0x00000000,0x00000000,0x00000000
-data4 0x00007F81,0x3F7F00F8,0x3B7F875D,0x00000000
+data4 0x18835FB9,0x9A43A511,0x0000BFE5,0x00000000 
+ASM_SIZE_DIRECTIVE(Constants_log_80_Z_G_H_h1)
+
+.align 64
+Constants_log_80_Z_G_H_h2:
+ASM_TYPE_DIRECTIVE(Constants_log_80_Z_G_H_h2,@object)
+// Z2 - 16 bit fixed, G2 and H2 IEEE single, h2 IEEE double   
+data4 0x00008000,0x3F800000,0x00000000,0x00000000 
+data4 0x00000000,0x00000000,0x00000000,0x00000000 
+data4 0x00007F81,0x3F7F00F8,0x3B7F875D,0x00000000 
 data4 0x211398BF,0xAD08B116,0x00003FDB,0x00000000
-data4 0x00007F02,0x3F7E03F8,0x3BFF015B,0x00000000
-data4 0xC376958E,0xB106790F,0x00003FDE,0x00000000
-data4 0x00007E85,0x3F7D08E0,0x3C3EE393,0x00000000
-data4 0x79A7679A,0xFD03F242,0x0000BFDA,0x00000000
-data4 0x00007E08,0x3F7C0FC0,0x3C7E0586,0x00000000
-data4 0x05E7AE08,0xF03F81C3,0x0000BFDF,0x00000000
-data4 0x00007D8D,0x3F7B1880,0x3C9E75D2,0x00000000
+data4 0x00007F02,0x3F7E03F8,0x3BFF015B,0x00000000 
+data4 0xC376958E,0xB106790F,0x00003FDE,0x00000000 
+data4 0x00007E85,0x3F7D08E0,0x3C3EE393,0x00000000 
+data4 0x79A7679A,0xFD03F242,0x0000BFDA,0x00000000 
+data4 0x00007E08,0x3F7C0FC0,0x3C7E0586,0x00000000 
+data4 0x05E7AE08,0xF03F81C3,0x0000BFDF,0x00000000 
+data4 0x00007D8D,0x3F7B1880,0x3C9E75D2,0x00000000 
 data4 0x049EB22F,0xD1B87D3C,0x00003FDE,0x00000000
-data4 0x00007D12,0x3F7A2328,0x3CBDC97A,0x00000000
-data4 0x3A9E81E0,0xFABC8B95,0x00003FDF,0x00000000
+data4 0x00007D12,0x3F7A2328,0x3CBDC97A,0x00000000 
+data4 0x3A9E81E0,0xFABC8B95,0x00003FDF,0x00000000 
 data4 0x00007C98,0x3F792FB0,0x3CDCFE47,0x00000000
-data4 0x7C4B5443,0xF5F3653F,0x00003FDF,0x00000000
-data4 0x00007C20,0x3F783E08,0x3CFC15D0,0x00000000
-data4 0xF65A1773,0xE78AB204,0x00003FE0,0x00000000
-data4 0x00007BA8,0x3F774E38,0x3D0D874D,0x00000000
-data4 0x7B8EF695,0xDB7CBFFF,0x0000BFE0,0x00000000
-data4 0x00007B31,0x3F766038,0x3D1CF49B,0x00000000
-data4 0xCF773FB3,0xC0241AEA,0x0000BFE0,0x00000000
-data4 0x00007ABB,0x3F757400,0x3D2C531D,0x00000000
-data4 0xC9539FDF,0xFC8F4D48,0x00003FE1,0x00000000
-data4 0x00007A45,0x3F748988,0x3D3BA322,0x00000000
-data4 0x954665C2,0x9CD035FB,0x0000BFE1,0x00000000
-data4 0x000079D1,0x3F73A0D0,0x3D4AE46F,0x00000000
-data4 0xDD367A30,0xEC9017C7,0x00003FE1,0x00000000
-data4 0x0000795D,0x3F72B9D0,0x3D5A1756,0x00000000
-data4 0xCB11189C,0xEE6625D3,0x0000BFE1,0x00000000
-data4 0x000078EB,0x3F71D488,0x3D693B9D,0x00000000
+data4 0x7C4B5443,0xF5F3653F,0x00003FDF,0x00000000 
+data4 0x00007C20,0x3F783E08,0x3CFC15D0,0x00000000 
+data4 0xF65A1773,0xE78AB204,0x00003FE0,0x00000000  
+data4 0x00007BA8,0x3F774E38,0x3D0D874D,0x00000000 
+data4 0x7B8EF695,0xDB7CBFFF,0x0000BFE0,0x00000000 
+data4 0x00007B31,0x3F766038,0x3D1CF49B,0x00000000 
+data4 0xCF773FB3,0xC0241AEA,0x0000BFE0,0x00000000 
+data4 0x00007ABB,0x3F757400,0x3D2C531D,0x00000000 
+data4 0xC9539FDF,0xFC8F4D48,0x00003FE1,0x00000000  
+data4 0x00007A45,0x3F748988,0x3D3BA322,0x00000000 
+data4 0x954665C2,0x9CD035FB,0x0000BFE1,0x00000000  
+data4 0x000079D1,0x3F73A0D0,0x3D4AE46F,0x00000000 
+data4 0xDD367A30,0xEC9017C7,0x00003FE1,0x00000000 
+data4 0x0000795D,0x3F72B9D0,0x3D5A1756,0x00000000 
+data4 0xCB11189C,0xEE6625D3,0x0000BFE1,0x00000000 
+data4 0x000078EB,0x3F71D488,0x3D693B9D,0x00000000 
 data4 0xBE11C424,0xA49C8DB5,0x0000BFE0,0x00000000
-LOCAL_OBJECT_END(Constants_log_80_Z_G_H_h2)
-
-LOCAL_OBJECT_START(Constants_log_80_h3_G_H)
-// h3 IEEE double extended, H3 and G3 IEEE single
-data4 0x112666B0,0xAAACAAB1,0x00003FD3,0x3F7FFC00
+ASM_SIZE_DIRECTIVE(Constants_log_80_Z_G_H_h2)
+ 
+.align 64
+Constants_log_80_h3_G_H:
+ASM_TYPE_DIRECTIVE(Constants_log_80_h3_G_H,@object)
+// h3 IEEE double extended, H3 and G3 IEEE single   
+data4 0x112666B0,0xAAACAAB1,0x00003FD3,0x3F7FFC00 
 data4 0x9B7FAD21,0x90051030,0x00003FD8,0x3F7FF400
-data4 0xF4D783C4,0xA6B46F46,0x00003FDA,0x3F7FEC00
-data4 0x11C6DDCA,0xDA148D88,0x0000BFD8,0x3F7FE400
+data4 0xF4D783C4,0xA6B46F46,0x00003FDA,0x3F7FEC00 
+data4 0x11C6DDCA,0xDA148D88,0x0000BFD8,0x3F7FE400 
 data4 0xCA964D95,0xCE65C1D8,0x0000BFD8,0x3F7FDC00
-data4 0x23412D13,0x883838EE,0x0000BFDB,0x3F7FD400
-data4 0x983ED687,0xB7E5CFA1,0x00003FDB,0x3F7FCC08
-data4 0xE3C3930B,0xDBE23B16,0x0000BFD9,0x3F7FC408
-data4 0x48AA4DFC,0x9B92F1FC,0x0000BFDC,0x3F7FBC10
-data4 0xCE9C8F7E,0x9A8CEB15,0x0000BFD9,0x3F7FB410
-data4 0x0DECE74A,0x8C220879,0x00003FDC,0x3F7FAC18
+data4 0x23412D13,0x883838EE,0x0000BFDB,0x3F7FD400 
+data4 0x983ED687,0xB7E5CFA1,0x00003FDB,0x3F7FCC08 
+data4 0xE3C3930B,0xDBE23B16,0x0000BFD9,0x3F7FC408 
+data4 0x48AA4DFC,0x9B92F1FC,0x0000BFDC,0x3F7FBC10 
+data4 0xCE9C8F7E,0x9A8CEB15,0x0000BFD9,0x3F7FB410 
+data4 0x0DECE74A,0x8C220879,0x00003FDC,0x3F7FAC18 
 data4 0x2F053150,0xB25CA912,0x0000BFDA,0x3F7FA420
-data4 0xD9A5BE20,0xA5876555,0x00003FDB,0x3F7F9C20
-data4 0x2053F087,0xC919BB6E,0x00003FD9,0x3F7F9428
-data4 0x041E9A77,0xB70BDA79,0x00003FDC,0x3F7F8C30
-data4 0xEA1C9C30,0xF18A5C08,0x00003FDA,0x3F7F8438
-data4 0x796D89E5,0xA3790D84,0x0000BFDD,0x3F7F7C40
-data4 0xA2915A3A,0xE1852369,0x0000BFDD,0x3F7F7448
-data4 0xA39ED868,0xD803858F,0x00003FDC,0x3F7F6C50
-data4 0x9417EBB7,0xB2EEE356,0x0000BFDD,0x3F7F6458
-data4 0x9BB0D07F,0xED5C1F8A,0x0000BFDC,0x3F7F5C68
-data4 0xE87C740A,0xD6D201A0,0x0000BFDD,0x3F7F5470
-data4 0x1CA74025,0xE8DEBF5E,0x00003FDC,0x3F7F4C78
+data4 0xD9A5BE20,0xA5876555,0x00003FDB,0x3F7F9C20 
+data4 0x2053F087,0xC919BB6E,0x00003FD9,0x3F7F9428 
+data4 0x041E9A77,0xB70BDA79,0x00003FDC,0x3F7F8C30 
+data4 0xEA1C9C30,0xF18A5C08,0x00003FDA,0x3F7F8438 
+data4 0x796D89E5,0xA3790D84,0x0000BFDD,0x3F7F7C40 
+data4 0xA2915A3A,0xE1852369,0x0000BFDD,0x3F7F7448 
+data4 0xA39ED868,0xD803858F,0x00003FDC,0x3F7F6C50 
+data4 0x9417EBB7,0xB2EEE356,0x0000BFDD,0x3F7F6458 
+data4 0x9BB0D07F,0xED5C1F8A,0x0000BFDC,0x3F7F5C68 
+data4 0xE87C740A,0xD6D201A0,0x0000BFDD,0x3F7F5470 
+data4 0x1CA74025,0xE8DEBF5E,0x00003FDC,0x3F7F4C78 
 data4 0x1F34A7EB,0x9A995A97,0x0000BFDC,0x3F7F4488
-data4 0x359EED97,0x9CB0F742,0x0000BFDA,0x3F7F3C90
-data4 0xBBC6A1C8,0xD6F833C2,0x0000BFDD,0x3F7F34A0
-data4 0xE71090EC,0xE1F68F2A,0x00003FDC,0x3F7F2CA8
-data4 0xC160A74F,0xD1881CF1,0x0000BFDB,0x3F7F24B8
-data4 0xD78CB5A4,0x9AD05AE2,0x00003FD6,0x3F7F1CC8
-data4 0x9A77DC4B,0xE658CB8E,0x0000BFDD,0x3F7F14D8
-data4 0x6BD6D312,0xBA281296,0x00003FDC,0x3F7F0CE0
-data4 0xF95210D0,0xB478BBEB,0x0000BFDB,0x3F7F04F0
-data4 0x38800100,0x39400480,0x39A00640,0x39E00C41 // H's start here
-data4 0x3A100A21,0x3A300F22,0x3A4FF51C,0x3A6FFC1D
+data4 0x359EED97,0x9CB0F742,0x0000BFDA,0x3F7F3C90 
+data4 0xBBC6A1C8,0xD6F833C2,0x0000BFDD,0x3F7F34A0 
+data4 0xE71090EC,0xE1F68F2A,0x00003FDC,0x3F7F2CA8 
+data4 0xC160A74F,0xD1881CF1,0x0000BFDB,0x3F7F24B8 
+data4 0xD78CB5A4,0x9AD05AE2,0x00003FD6,0x3F7F1CC8 
+data4 0x9A77DC4B,0xE658CB8E,0x0000BFDD,0x3F7F14D8 
+data4 0x6BD6D312,0xBA281296,0x00003FDC,0x3F7F0CE0 
+data4 0xF95210D0,0xB478BBEB,0x0000BFDB,0x3F7F04F0 
+data4 0x38800100,0x39400480,0x39A00640,0x39E00C41 // H's start here 
+data4 0x3A100A21,0x3A300F22,0x3A4FF51C,0x3A6FFC1D 
 data4 0x3A87F20B,0x3A97F68B,0x3AA7EB86,0x3AB7E101
-data4 0x3AC7E701,0x3AD7DD7B,0x3AE7D474,0x3AF7CBED
-data4 0x3B03E1F3,0x3B0BDE2F,0x3B13DAAA,0x3B1BD766
-data4 0x3B23CC5C,0x3B2BC997,0x3B33C711,0x3B3BBCC6
-data4 0x3B43BAC0,0x3B4BB0F4,0x3B53AF6D,0x3B5BA620
-data4 0x3B639D12,0x3B6B9444,0x3B7393BC,0x3B7B8B6D
-LOCAL_OBJECT_END(Constants_log_80_h3_G_H)
-
-GR_sig_inv_ln2      = r14
-GR_rshf_2to51       = r15
-GR_exp_2tom51       = r16
-GR_rshf             = r17
-GR_exp_half         = r18
-GR_sign_mask        = r19
-GR_exp_square_oflow = r20
-GR_exp_square_uflow = r21
-GR_exp_ynear1_oflow = r22
-GR_exp_ynear1_uflow = r23
-GR_signif_Z         = r24
-
-GR_signexp_x        = r32
-
-GR_exp_x            = r33
-
+data4 0x3AC7E701,0x3AD7DD7B,0x3AE7D474,0x3AF7CBED 
+data4 0x3B03E1F3,0x3B0BDE2F,0x3B13DAAA,0x3B1BD766 
+data4 0x3B23CC5C,0x3B2BC997,0x3B33C711,0x3B3BBCC6 
+data4 0x3B43BAC0,0x3B4BB0F4,0x3B53AF6D,0x3B5BA620 
+data4 0x3B639D12,0x3B6B9444,0x3B7393BC,0x3B7B8B6D 
+ASM_SIZE_DIRECTIVE(Constants_log_80_h3_G_H)
+
+.align 64
+Constant_half:
+ASM_TYPE_DIRECTIVE(Constant_half,@object)
+data4 0x00000000,0x80000000,0x00003FFE
+ASM_SIZE_DIRECTIVE(Constant_half)
+
+GR_Expo_Range       = r32
+GR_Flag             = r33
 GR_Table_Ptr        = r34
 
 GR_Table_Ptr1       = r35
+GR_BIAS             = r35
 
 GR_Index1           = r36
+GR_sign_mask        = r36
 
 GR_Index2           = r37
 GR_Expo_X           = r37
 
+GR_signif_Z         = r38
 GR_M                = r38
 
 GR_X_0              = r39
@@ -613,49 +620,45 @@ GR_k                = r44
 
 GR_Big_Pos_Exp      = r45
 
-GR_exp_pos_max      = r46
 
-GR_exp_bias_p_k     = r47
+GR_BIAS_p_k         = r47
+GR_BIASed_exp_y     = r47
 
+GR_Big_Neg_Exp      = r48
 GR_Index3           = r48
 GR_temp             = r48
 
 GR_vsm_expo         = r49
+GR_y_sign           = r49
 
 GR_T1_ptr           = r50
-GR_P_ptr1           = r50
 GR_T2_ptr           = r51
-GR_P_ptr2           = r51
 GR_N_fix            = r52
 GR_exp_y            = r53
 GR_signif_y         = r54
-GR_signexp_y        = r55
-GR_fraction_y       = r55
+GR_exp_and_sign_y   = r55
 GR_low_order_bit    = r56
-GR_exp_mask         = r57
-GR_exp_bias         = r58
-GR_y_sign           = r59
-GR_table_base       = r60
-GR_ptr_exp_Arg      = r61
-GR_Delta_Exp        = r62
-GR_Special_Exp      = r63
-GR_exp_neg_max      = r64
-GR_Big_Neg_Exp      = r65
-
-//** Registers for unwind support
+GR_get_exp_mask     = r57
+GR_exponent_zero    = r58
+
+// ** Registers for unwind support
 
 GR_SAVE_PFS         = r59
 GR_SAVE_B0          = r60
 GR_SAVE_GP          = r61
-GR_Parameter_X      = r62
-GR_Parameter_Y      = r63
-GR_Parameter_RESULT = r64
-GR_Parameter_TAG    = r65
+GR_Parameter_X      = r62 
+GR_Parameter_Y      = r63 
+GR_Parameter_RESULT = r64 
+GR_Parameter_TAG    = r65 
+
+FR_X      = f8
+FR_Y      = f9
+FR_RESULT = f99
 
-//**
+// **
 
 FR_Input_X          = f8
-FR_Result           = f8
+FR_Output           = f8
 FR_Input_Y          = f9
 
 FR_Neg              = f10
@@ -668,6 +671,7 @@ FR_poly_hi          = f11
 
 FR_Sgn              = f12
 
+FR_Neg_X            = f13
 FR_half_W           = f13
 
 FR_X_cor            = f14
@@ -694,11 +698,13 @@ FR_Scale            = f36
 FR_G_1              = f37
 FR_G                = f37
 FR_Wsq              = f37
+FR_L_Inv            = f37
 FR_temp             = f37
 
 FR_H_1              = f38
 FR_H                = f38
 FR_W4               = f38
+FR_float_N          = f38
 
 FR_h                = f39
 FR_h_1              = f39
@@ -714,7 +720,9 @@ FR_L_lo             = f41
 FR_A_1              = f41
 
 FR_h_2              = f42
+FR_P_6              = f42
 
+FR_abs_W            = f43
 FR_W1               = f43
 
 FR_G_3              = f44
@@ -732,6 +740,7 @@ FR_H_3              = f47
 
 FR_float_N          = f48
 
+FR_P_4              = f49
 FR_A_2              = f49
 
 FR_Q_4              = f50
@@ -759,6 +768,7 @@ FR_Two              = f56
 FR_Big              = f57
 
 FR_neg_2_mK         = f58
+FR_NBig             = f58
 
 FR_r                = f59
 
@@ -767,1253 +777,1652 @@ FR_poly_lo          = f60
 FR_poly             = f61
 
 FR_P_5              = f62
-FR_Result_small     = f62
 
 FR_rsq              = f63
 
-FR_Delta            = f64
-
-FR_save_Input_X     = f65
-FR_norm_X           = f66
-FR_norm_Y           = f67
-FR_Y_lo_2           = f68
-
-FR_P_6              = f69
-FR_Result_big       = f69
-
-FR_RSHF_2TO51       = f70
-FR_INV_LN2_2TO63    = f71
-FR_2TOM51           = f72
-FR_RSHF             = f73
-FR_TMP1             = f74
-FR_TMP2             = f75
-FR_TMP3             = f76
-FR_Tscale           = f77
-FR_P_4              = f78
-FR_NBig             = f79
-
+FR_Result           = f99
+FR_Result_small     = f100
+FR_Result_big       = f101
 
 .section .text
-GLOBAL_LIBM_ENTRY(powl)
-//
-//     Get significand of x.  It is the critical path.
-//
+.proc powl#
+.global powl#
+.align 64 
+
+powl: 
 { .mfi
-      getf.sig GR_signif_Z = FR_Input_X    // Get significand of x
-      fclass.m p11, p12 = FR_Input_X, 0x0b // Test x unorm
-      nop.i 999
+alloc  GR_Expo_Range = ar.pfs,0,30,4,0
+(p0)   fclass.m.unc p7, p13 =  FR_Input_Y, 0x1E7 
+nop.i 0
 }
 { .mfi
-      nop.m 999
-      fnorm.s1 FR_norm_X = FR_Input_X      // Normalize x
-      mov GR_exp_half = 0xffff - 1         // Exponent for 0.5
-}
-;;
-
+(p0)   getf.exp GR_exp_and_sign_y = FR_Input_Y 
+//
+//     Save State
+//
+(p0)   fclass.m.unc p6, p12 =  FR_Input_X, 0x1E7 
+nop.i 0
+};;
 { .mfi
-      alloc  r32 = ar.pfs,0,30,4,0
-      fclass.m p7, p0 =  FR_Input_Y, 0x1E7 // Test y natval, nan, inf, zero
-      mov GR_exp_pos_max = 0x13fff         // Max exponent for pos oflow test
+(p0)   getf.sig GR_signif_y = FR_Input_Y 
+(p0)   fcmp.eq.unc.s1 p12, p13 =  FR_Input_X, f1 
+nop.i 0
 }
 { .mfi
-      addl GR_table_base = @ltoff(Constants_exp_64_Arg#), gp // Ptr to tables
-      fnorm.s1 FR_norm_Y = FR_Input_Y      // Normalize y
-      mov GR_exp_neg_max = 0x33fff         // Max exponent for neg oflow test
+	nop.m 999
+//
+//     Check for y = 1 
+//     Identify EM unsupporteds.
+//     Load FR_half = .5
+//
+(p0)   fadd.s1 FR_Two = f1, f1 
+//
+//     Load 1/2 in GP register
+//
+nop.i 0 
 }
 ;;
 
-{ .mfi
-      getf.exp GR_signexp_y = FR_Input_Y   // Get sign and exp of y
-(p12) fclass.m p11, p0 =  FR_Input_Y, 0x0b // Test y unorm
-      mov GR_sign_mask = 0x20000           // Sign mask
-}
-{ .mfi
-      ld8 GR_table_base = [GR_table_base]  // Get base address for tables
-      fadd.s1 FR_Two = f1, f1              // Form 2.0 for square test
-      mov GR_exp_mask = 0x1FFFF            // Exponent mask
+{ .mmi
+      nop.m 999
+(p0)  addl           GR_Table_Ptr   = @ltoff(Constant_half#), gp
+      nop.i 999
 }
 ;;
 
-{ .mfi
-      getf.sig GR_signif_y = FR_Input_Y    // Get significand of y
-      fclass.m p6, p0 =  FR_Input_X, 0x1E7 // Test x natval, nan, inf, zero
+{ .mmi
+      ld8 GR_Table_Ptr = [GR_Table_Ptr]
+      nop.m 999
       nop.i 999
 }
 ;;
 
+{ .mlx
+(p0)   ldfe FR_Half =[GR_Table_Ptr],0
+(p0)   movl GR_get_exp_mask = 0x1FFFF ;; 
+}
+
 { .mfi
-      getf.exp GR_signexp_x = FR_Input_X   // Get signexp of x
-      fmerge.s FR_save_Input_X = FR_Input_X, FR_Input_X
-      extr.u GR_Index1 = GR_signif_Z, 59, 4  // Extract upper 4 signif bits of x
+	nop.m 999
+(p0)   fclass.nm.unc p9, p15 =  FR_Input_Y, 0x1FF 
+//
+//     Create FR_Two = 2
+//     Get exp and significand of Y
+//     Crate Masks
+//     sgn = 1
+//
+(p0)   and GR_exp_y = GR_get_exp_mask,GR_exp_and_sign_y
 }
-{ .mfb
-      setf.exp FR_Half = GR_exp_half       // Load half
-      nop.f 999
-(p11) br.cond.spnt  POWL_DENORM            // Branch if x or y denorm/unorm
+{ .mlx
+	nop.m 999
+(p0)   movl GR_exponent_zero = 0xFFFF ;; 
 }
-;;
-
-// Return here from POWL_DENORM
-POWL_COMMON:
 { .mfi
-      setf.exp FR_Big = GR_exp_pos_max     // Form big pos value for oflow test
-      fclass.nm p11, p0 = FR_Input_Y, 0x1FF // Test Y unsupported
-      shl GR_Index1 = GR_Index1,5          // Adjust index1 pointer x 32
+	nop.m 999
+(p0)   mov FR_Sgn = f1 
+	nop.i 999
 }
 { .mfi
-      add GR_Table_Ptr = 0x7c0, GR_table_base // Constants_log_80_Z_G_H_h1
-      fma.s1 FR_Sgn = f1,f1,f0             // Assume result positive
-      mov GR_exp_bias = 0xFFFF             // Form exponent bias
+	nop.m 999
+(p0)   fcmp.eq.unc.s1 p10, p11 =  FR_Input_Y, f1 
+	nop.i 999 ;;
 }
-;;
-
+{ .mfb
+	nop.m 999
 //
 //     Identify NatVals, NaNs, Infs, and Zeros.
+//     Load Half
 //
-//
+(p0)   fclass.nm.unc p8, p14 =  FR_Input_X, 0x1FF 
+//      
 //     Remove sign bit from exponent of y.
-//     Check for x = 1
+//     Check for x = 1 
+//
+(p6)   br.cond.spnt L(POWL_64_SPECIAL) ;; 
+}
+{ .mib
+	nop.m 999
+	nop.i 999
+(p7)   br.cond.spnt L(POWL_64_SPECIAL) ;; 
+}
+{ .mib
+	nop.m 999
+	nop.i 999
+(p8)   br.cond.spnt L(POWL_64_UNSUPPORT) ;; 
+}
+{ .mib
+	nop.m 999
+	nop.i 999
+(p9)   br.cond.spnt L(POWL_64_UNSUPPORT) ;; 
+}
+{ .mfi
+(p0)   cmp.lt.unc  p9, p0 = GR_exp_y,GR_exponent_zero 
+(p0)   fcmp.lt.unc.s1 p6, p13  =  FR_Input_X, f0 
+//
 //     Branch on Infs, Nans, Zeros, and Natvals
 //     Check to see that exponent < 0
 //
+(p0)   sub GR_exp_y = GR_exp_y,GR_exponent_zero
+}
+//     x not zero, is y ==2? 
 { .mfi
-      setf.exp FR_NBig = GR_exp_neg_max    // Form big neg value for oflow test
-      fclass.nm p8, p0 =  FR_Input_X, 0x1FF  // Test X unsupported
-      and GR_exp_y = GR_exp_mask,GR_signexp_y // Get biased exponent of y
+	nop.m 999
+(p11)  fcmp.eq.unc.s1 p7, p14 =  FR_Input_Y, FR_Two 
+	nop.i 999 ;;
 }
 { .mfb
-      add GR_Index1 = GR_Index1,GR_Table_Ptr
-      nop.f 999
-(p6)  br.cond.spnt POWL_64_SPECIAL         // Branch if x natval, nan, inf, zero
+	nop.m 999
+(p9)   fcmp.lt.unc.s1 p9, p0   =  FR_Input_X, f0 
+(p7)   br.cond.spnt L(POWL_64_SQUARE) ;;   // Branch if x not zero and y=2
 }
-;;
-
-//     load Z_1 from Index1
-
-// There is logic starting here to determine if y is an integer when x < 0.
-// If 0 < |y| < 1 then clearly y is not an integer.
-// If |y| > 1, then the significand of y is shifted left by the size of
-//    the exponent of y.  This preserves the lsb of the integer part + the
-//    fractional bits.  The lsb of the integer can be tested to determine if
-//    the integer is even or odd.  The fractional bits can be tested.  If zero,
-//    then y is an integer.
+{ .mfi
+	nop.m 999
+(p6)   fmerge.ns FR_Neg_X = FR_Input_X, FR_Input_X 
+	nop.i 999 ;;
+}
+{ .mfi
+	nop.m 999
+(p10)  fmpy.s0 FR_Result = FR_Input_X, f1 
+//
+//     For y = 1, compute result = x 
+//     For x = 1, compute 1 
+//     When Y is one return X and possible raise 
+//     denormal operand exception.
+//     Remove exponent BIAS
 //
+(p6)   shl GR_exp_and_sign_y=  GR_signif_y,GR_exp_y ;; 
+}
 { .mfi
-      ld2 GR_Z_1 =[GR_Index1],4            // Load Z_1
-      fmerge.s FR_Z = f0, FR_norm_X        // Z = |x|
-      extr.u GR_X_0 = GR_signif_Z, 49, 15  // Extract X_0 from significand
+(p9)   or  GR_exp_and_sign_y = 0xF,GR_signif_y 
+(p12)  fma.s0 FR_Result = FR_Input_Y, f0, f1 
+	nop.i 999 ;;
 }
-{ .mfb
-      cmp.lt p9, p0 = GR_exp_y,GR_exp_bias // Test 0 < |y| < 1
-      nop.f 999
-(p7)  br.cond.spnt POWL_64_SPECIAL         // Branch if y natval, nan, inf, zero
+{ .mii
+	nop.m 999
+(p6)   extr.u GR_exp_y = GR_exp_and_sign_y,63,1 ;; 
+(p6)   cmp.ne.unc  p9, p0 =  GR_exp_y, r0 
 }
-;;
-
-{ .mfb
-      ldfs  FR_G_1 = [GR_Index1],4         // Load G_1
-      fcmp.eq.s1 p10, p0 =  FR_Input_Y, f1 // Test Y = +1.0
-(p8)  br.cond.spnt POWL_64_UNSUPPORT       // Branch if x unsupported
+{ .mii
+	nop.m 999
+//
+//     Both predicates can be set. 
+//     Don't consider  y's < 1.
+//
+(p6)   shl GR_signif_y=  GR_exp_and_sign_y,1 ;; 
+//
+//     Is shift off integer part of y.
+//     Get y's  even or odd bit.
+//
+(p6)   cmp.ne.unc  p8, p0  =  GR_signif_y, r0 
 }
-;;
-
+{ .mib
+	nop.m 999
+	nop.i 999
 //
-//     X_0  = High order 15 bit of Z
+//     Is the fractional part of the y = 0?
+//     Is the integer even or odd. 
 //
-{ .mfb
-      ldfs  FR_H_1 = [GR_Index1],8             // Load H_1
-(p9)  fcmp.lt.unc.s1 p9, p0 = FR_Input_X, f0   // Test x<0, 0 <|y|<1
-(p11) br.cond.spnt POWL_64_UNSUPPORT           // Branch if y unsupported
+(p10)  br.cond.spnt L(POWL_64_RETURN) ;; 
+}
+{ .mib
+	nop.m 999
+	nop.i 999
+(p12)  br.cond.spnt L(POWL_64_RETURN) ;; 
+}
+{ .mib
+	nop.m 999
+	nop.i 999
+(p8)   br.cond.spnt L(POWL_64_XNEG) ;;
 }
-;;
-
 { .mfi
-      ldfe FR_h_1 = [GR_Index1]                // Load h_1
-      fcmp.eq.s1 p7, p0 =  FR_Input_Y, FR_Two  // Test y = 2.0
-      pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15     // X_1 = X_0 * Z_1 (bits 15-30)
-                                               // Wait 4 cycles to use result
+	nop.m 999
+(p9)   fmerge.ns FR_Sgn = FR_Sgn, FR_Sgn 
+	nop.i 999
 }
 { .mfi
-      add GR_Table_Ptr = 0x9c0, GR_table_base  // Constants_log_80_Z_G_H_h2
-      nop.f 999
-      sub GR_exp_y = GR_exp_y,GR_exp_bias      // Get true exponent of y
+	nop.m 999
+(p0)   fcmp.eq.unc.s0 p11, p0 =  FR_Input_Y, FR_Half 
+	nop.i 999 ;;
 }
-;;
-
 //
-//      Branch for (x < 0) and Y not an integer.
+//     Raise possible denormal operand exception for both
+//     X and Y.
 //
 { .mfb
-      nop.m 999
-      fcmp.lt.s1 p6, p0  =  FR_Input_X, f0     // Test x < 0
-(p9)  br.cond.spnt POWL_64_XNEG                // Branch if x < 0, 0 < |y| < 1
+	nop.m 999
+//
+//     Branch for (x < 0) and Y not an integer.
+//
+(p0)   fcmp.eq.unc.s0 p12, p0 =  FR_Input_X, f1 
+//
+//     For x < 0 and y integer, make x positive 
+//     For x < 0 and y odd integer,, set sign = -1.
+//
+(p11)  br.cond.spnt L(POWL_64_SQRT) ;; 
+}
+{ .mmf
+(p0)   cmp.eq.unc  p15, p14 =  r0, r0 
+	nop.m 999
+(p13)  fnorm.s1 FR_Z = FR_Input_X ;; 
+}
+{ .mfi
+	nop.m 999
+(p6)   fnorm.s1 FR_Z = FR_Neg_X 
+	nop.i 999
 }
 ;;
 
-{ .mfi
+//
+//     Branch to embedded sqrt(x)
+//
+//
+//     Computes ln( x ) to extra precision 
+//     Input  FR 1: FR_X            
+//     Output FR 2: FR_Y_hi  
+//     Output FR 3: FR_Y_lo  
+//     Output PR 1: PR_Safe  
+//
+
+{ .mmi
       nop.m 999
-      fcmp.eq.s1 p12, p0 =  FR_Input_X, f1     // Test x=+1.0
+(p0)  addl           GR_Table_Ptr   = @ltoff(Constants_log_80_Z_G_H_h1#), gp
       nop.i 999
 }
-{ .mfb
-      nop.m 999
-      fsub.s1 FR_W = FR_Z, f1                  // W = Z - 1
-(p7)  br.cond.spnt POWL_64_SQUARE              // Branch if y=2
-}
 ;;
 
-{ .mfi
+{ .mmi
+      ld8 GR_Table_Ptr = [GR_Table_Ptr]
       nop.m 999
-(p10) fmpy.s0 FR_Result = FR_Input_X, f1       // If y=+1.0, result=x
-(p6)  shl GR_fraction_y=  GR_signif_y,GR_exp_y // Get lsb of int + fraction
-                                               // Wait 4 cycles to use result
+      nop.i 999
 }
 ;;
 
+
+{ .mlx
+	nop.m 999
+(p0)   movl GR_BIAS = 0x000000000000FFFF ;; 
+}
 { .mfi
-      nop.m 999
-(p12) fma.s0 FR_Result = FR_Input_Y, f0, f1    // If x=1.0, result=1, chk denorm
-      extr.u GR_Index2 = GR_X_1, 6, 4          // Extract index2
+	nop.m 999
+(p0)   fsub.s1 FR_W = FR_Z, f1 
+	nop.i 999 ;;
 }
-;;
-
-//
-//     N = exponent of Z
-//
-{ .mib
-      getf.exp GR_N =  FR_Z                    // Get exponent of Z (also x)
-      shl GR_Index2=GR_Index2,5                // Index2  x 32 bytes
-(p10) br.ret.spnt  b0                          // Exit if y=+1.0
+//     
+//     Z = Norm(X) - both + and - case 
+//     Set Safe = True
+//     
+{ .mmb
+(p0)   getf.sig GR_signif_Z = FR_Z 
+(p0)   getf.exp GR_N =  FR_Z 
+	nop.b 999 ;;
+}
+{ .mii
+	nop.m 999
+//     
+//     Get significand of Z 
+//     W = Z - 1
+//     
+(p0)   extr.u GR_Index1 = GR_signif_Z, 59, 4 ;;  
+//     
+//     Index1 = High order 4 bits of Z
+//     X_0  = High order 15 bit of Z 
+//
+(p0)   shl GR_Index1 = GR_Index1,5 ;; 
+}
+{ .mfi
+	nop.m 999
+//     
+//     Add offset to Index1 ptr.
+//     
+(p0)   fabs FR_abs_W =  FR_W 
+//     
+//     BIAS = 0x000...FFFF
+//     Adjust Index1 ptr ( x 32) .
+//     
+(p0)   add GR_Index1 = GR_Index1,GR_Table_Ptr  
+}
+{ .mmi
+	nop.m 999 ;;
+(p0)   ld2 GR_Z_1 =[GR_Index1],4
+(p0)   extr.u GR_X_0 = GR_signif_Z, 49, 15  
 }
 ;;
 
-{ .mib
-      add GR_Index2 = GR_Index2, GR_Table_Ptr  // Pointer to table 2
+{ .mmi
+      nop.m 999
+(p0)  addl           GR_Table_Ptr   = @ltoff(Constants_log_80_Z_G_H_h2#), gp
       nop.i 999
-(p12) br.ret.spnt  b0                          // Exit if x=+1.0
 }
 ;;
 
 { .mmi
-      ld2 GR_Z_2 =[GR_Index2],4                // Load Z_2
-;;
-      ldfs  FR_G_2 = [GR_Index2],4             // Load G_2
+      ld8 GR_Table_Ptr = [GR_Table_Ptr]
+      nop.m 999
       nop.i 999
 }
 ;;
 
-{ .mii
-      ldfs  FR_H_2 = [GR_Index2],8             // Load H_2
-(p6)  tbit.nz.unc p9, p0 = GR_fraction_y, 63   // Test x<0 and y odd integer
-      add GR_Table_Ptr = 0xbcc, GR_table_base  // Constants_log_80_h3_G_H, G_3
-}
-;;
 
+{ .mmi
+(p0)   ldfs  FR_G_1 = [GR_Index1],4 ;; 
+(p0)   ldfs  FR_H_1 = [GR_Index1],8 
+	nop.i 999 ;;
+}
 //
-//      For x < 0 and y odd integer,, set sign = -1.
+//     Adjust Index2 (x 32). 
 //
 { .mfi
-      getf.exp GR_M = FR_W                      // Get signexp of W
-      nop.f 999
-      pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15      // X_2 = X_1 * Z_2 (bits 15-30)
+(p0)   ldfe FR_h_1 = [GR_Index1],0 
+	nop.f 999
+(p0)   pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15 ;; 
 }
-{ .mfi
-      ldfe FR_h_2 = [GR_Index2]                // Load h_2
-(p9)  fnma.s1 FR_Sgn = f1, f1, f0          // If x<0, y odd int, result negative
-      sub GR_N = GR_N, GR_exp_bias             // Get true exponent of x = N
+{ .mmi
+	nop.m 999 ;;
+//
+//     load Z_1 from Index1
+//     abs_W = |W|
+//     Point to Table2 
+//
+(p0)   getf.exp GR_M = FR_abs_W 
+//
+//     M = M - BIAS 
+//     Load G_1 
+//     N = exponent of Z 
+//
+       nop.i 999;;
 }
-;;
-
-{ .mfi
-      add GR_Table_Ptr1 = 0xdc0, GR_table_base // Ptr to H_3
-      fcmp.eq.s0 p11, p0 = FR_Input_Y, FR_Half // Test y=0.5, also set denorm
-(p6)  shl GR_fraction_y=  GR_fraction_y, 1     // Shift left 1 to get fraction
+{ .mmi
+       nop.m 999
+       nop.m 999
+       nop.i 999;;
 }
-;;
-
-{ .mmb
-      setf.sig FR_float_N = GR_N
-(p6)  cmp.ne.unc p8, p0 = GR_fraction_y, r0    // Test x<0 and y not integer
-(p8)  br.cond.spnt POWL_64_XNEG                // Branch if x<0 and y not int
+{ .mmi
+       nop.m 999
+       nop.m 999
+       nop.i 999;;
 }
-;;
-
+{ .mmi
+        nop.m 999
+        nop.m 999
+(p0)   extr.u GR_Index2 = GR_X_1, 6, 4 ;;  
+}
+{ .mii
+	nop.m 999
+//     
+//     Extract Index2 
+//     Load H_1
+//     Is -8 > M ?
 //
-//      Raise possible denormal operand exception for both X and Y.
-//      Set pointers in case |x| near 1
-//      Branch to embedded sqrt(x) if y=0.5
+(p0)   shl GR_Index2=GR_Index2,5 ;; 
+(p0)   add GR_Index2 = GR_Index2, GR_Table_Ptr  
+}
 //
-{ .mfi
-      add GR_P_ptr1 = 0x6b0, GR_table_base // Constants_log_80_P, P8, NEAR path
-      fcmp.eq.s0 p12, p0 =  FR_Input_X, FR_Input_Y // Dummy to set denormal
-      add GR_P_ptr2 = 0x700, GR_table_base // Constants_log_80_P, P4, NEAR path
+//     M = exponent of abs_W
+//     X_1 = X_0 * Z_1 
+//     
+{ .mii
+(p0)   sub GR_M = GR_M, GR_BIAS  
+	nop.i 999 ;;
+(p0)   cmp.gt.unc  p7, p14 =  -8, GR_M 
 }
-{ .mfb
-      cmp.eq p15, p14 =  r0, r0            // Assume result safe (no over/under)
-      fsub.s1  FR_Delta = FR_Input_Y,f1    // Delta = y - 1.0
-(p11) br.cond.spnt POWL_64_SQRT            // Branch if y=0.5
+{ .mib
+	nop.m 999
+	nop.i 999
+(p7)   br.cond.spnt L(LOGL80_NEAR) ;; 
 }
-;;
-
 //
-//     Computes ln( x ) to extra precision
-//     Input  FR 1: FR_X
-//     Output FR 2: FR_Y_hi
-//     Output FR 3: FR_Y_lo
-//     Output PR 1: PR_Safe
+//     Load h_1
+//     Possible branch out.  
+//     Add offset of table to Index2 
 //
 { .mfi
-      and GR_M = GR_exp_mask, GR_M            // Mask to get exponent of W
-      nop.f 999
-      extr.u GR_Index3 = GR_X_2, 1, 5         // Get index3
+(p0)   ld2 GR_Z_2 =[GR_Index2],4
+(p0)   fmerge.se FR_S =  f1,FR_Z
+(p0)   sub GR_N = GR_N, GR_BIAS  
 }
 ;;
 
 { .mmi
-      shladd GR_Table_Ptr1 = GR_Index3,2,GR_Table_Ptr1 // Ptr to H_3
-      shladd GR_Index3 = GR_Index3,4,GR_Table_Ptr      // Ptr to G_3
-      sub GR_M = GR_M, GR_exp_bias            // Get true exponent of W
-}
-;;
-
-{ .mib
-      ldfs FR_G_3 = [GR_Index3],-12           // Load G_3
-      cmp.gt  p7, p14 =  -8, GR_M             // Test if |x-1| < 2^-8
-(p7)  br.cond.spnt LOGL80_NEAR                // Branch if |x-1| < 2^-8
-}
-;;
-
-// Here if |x-1| >= 2^-8
-{ .mmf
-      ldfs FR_H_3 = [GR_Table_Ptr1]           // Load H_3
       nop.m 999
-      nop.f 999
+(p0)  addl           GR_Table_Ptr   = @ltoff(Constants_log_80_h3_G_H#), gp
+      nop.i 999
 }
 ;;
 
-{ .mfi
-      ldfe FR_h_3 = [GR_Index3]               // Load h_3
-      fmerge.se FR_S =  f1,FR_Z               // S = merge of 1.0 and signif(Z)
-      nop.i 999
-}
-{ .mfi
-      add GR_Table_Ptr = 0x740, GR_table_base // Constants_log_80_Q
-      fmpy.s1 FR_G = FR_G_1, FR_G_2           // G = G_1 * G_2
+{ .mmi
+      ld8 GR_Table_Ptr = [GR_Table_Ptr]
+      nop.m 999
       nop.i 999
 }
 ;;
 
+//     
+//     load Z_2 
+//     N - BIAS 
+//     Point to Table 3.
+//     S = merging of Z and 1.0
+//     
+{ .mmi
+(p0)   ldfs  FR_G_2 = [GR_Index2],4 
+(p0)   setf.sig FR_float_N = GR_N 
+(p0)   add GR_Table_Ptr1 = 0x200,GR_Table_Ptr ;;  
+}
 //
-//     Begin Loading Q's -  load log2_hi part
+//     load G_2 
+//     X_2 = X_1 * Z_2 
+//     Add offset to Table 2 ptr.
+//     float_N = significand of N
 //
-{ .mfi
-      ldfe FR_log2_hi = [GR_Table_Ptr],16     // Load log2_hi
-      fadd.s1 FR_H = FR_H_1, FR_H_2           // H = H_1 + H_2
-      nop.i 999
-};;
-
+{ .mmi
+(p0)   ldfs  FR_H_2 = [GR_Index2],8 ;; 
 //
-//     h = h_1 + h_2
+//     load H_2 
+//     G = G * G_2
 //
+(p0)   ldfe FR_h_2 = [GR_Index2],0 
+(p0)   pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 ;; 
+}
+{ .mmi
+       nop.m 999
+       nop.m 999
+       nop.i 999;;
+}
+{ .mmi
+       nop.m 999
+       nop.m 999
+       nop.i 999;;
+}
+{ .mmi
+        nop.m 999
+        nop.m 999
+        nop.i 999;;
+}
+{ .mii
+	nop.m 999
+	nop.i 999 ;;
+(p0)   extr.u GR_Index3 = GR_X_2, 1, 5 ;;  
+}
 { .mfi
-      ldfe FR_log2_lo = [GR_Table_Ptr],16     // Load log2_lo
-      fadd.s1 FR_h = FR_h_1, FR_h_2           // h = h_1 + h_2
-      nop.i 999
+(p0)   shladd GR_Table_Ptr1 = GR_Index3,2,GR_Table_Ptr1 
+	nop.f 999
+//
+//     h = h_1 + h_2  
+//     Adjust Index3 
+//
+(p0)   shladd GR_Index3 = GR_Index3,4,GR_Table_Ptr ;; 
+}
+{ .mmb
+	nop.m 999
+(p0)   ldfe FR_h_3 = [GR_Index3],12 
+	nop.b 999 ;;
+}
+{ .mmf
+(p0)   ldfs FR_H_3 = [GR_Table_Ptr1],0 
+//
+//     float_N = Make N a fp number
+//     Load h_3
+//     Get pointer to Q table.     
+//
+(p0)   ldfs  FR_G_3 = [GR_Index3],0 
+(p0)   fmpy.s1 FR_G = FR_G_1, FR_G_2 
 }
 ;;
 
-{ .mfi
-      ldfe FR_Q_6 = [GR_Table_Ptr],16         // Load Q_6
-      fcvt.xf FR_float_N = FR_float_N
+{ .mmi
+      nop.m 999
+(p0)  addl           GR_Table_Ptr   = @ltoff(Constants_log_80_Q#), gp
       nop.i 999
 }
 ;;
 
-{ .mfi
-      ldfe FR_Q_5 = [GR_Table_Ptr],16         // Load Q_5
-      nop.f 999
+{ .mmi
+      ld8 GR_Table_Ptr = [GR_Table_Ptr]
+      nop.m 999
       nop.i 999
 }
 ;;
 
+
+
+{ .mfi
+(p0)   ldfe FR_log2_hi = [GR_Table_Ptr],16
+(p0)   fadd.s1 FR_H = FR_H_1, FR_H_2 
+	nop.i 999 ;;
+}
+{ .mmf
+	nop.m 999
 //
-//     G = G_1 * G_2 * G_3
+//     G = G_1 * G_2 * G_3 
 //
-{ .mfi
-      ldfe FR_Q_4 = [GR_Table_Ptr],16         // Load Q_4
-      fmpy.s1 FR_G = FR_G, FR_G_3
-      nop.i 999
+(p0)   ldfe FR_log2_lo = [GR_Table_Ptr],16 
+//
+//     load h_2 
+//     H = H_1 + H_2 
+//     Get Index3
+//
+(p0)   fadd.s1 FR_h = FR_h_1, FR_h_2 ;; 
 }
-;;
-
 //
-//     H = H_1 + H_2 + H_3
+//     Load log2_lo part
+//     r = G*S -1
 //
 { .mfi
-      ldfe FR_Q_3 = [GR_Table_Ptr],16         // Load Q_3
-      fadd.s1 FR_H = FR_H, FR_H_3
-      nop.i 999
+(p0)   ldfe FR_Q_6 = [GR_Table_Ptr],16 
+//
+//     Load H_3
+//
+(p0)   fcvt.xf FR_float_N = FR_float_N 
+	nop.i 999 ;;
 }
-;;
-
 //
-//     Y_lo = poly + Y_lo
+//     Load Q_6
+//
+{ .mmi
+(p0)   ldfe FR_Q_5 = [GR_Table_Ptr],16 ;; 
+(p0)   ldfe FR_Q_4 = [GR_Table_Ptr],16 
+	nop.i 999 ;;
+}
+{ .mmi
+(p0)   ldfe FR_Q_3 = [GR_Table_Ptr],16 ;; 
+(p0)   ldfe FR_Q_2 = [GR_Table_Ptr],16 
+	nop.i 999 ;;
+}
+{ .mmf
+	nop.m 999
+//
+//     poly_lo = Q_5 + r * Q_6
+//     Load Q_2
+//     rsq = r * r 
+//
+(p0)   ldfe FR_Q_1 = [GR_Table_Ptr],16 
 //
-//     h = h_1 + h_2 + h_3
+//     h = h_1 + h_2 + h_3   
+//     H = H_1 + H_2 + H_3 
+//     Load G_3.
+//     Begin Loading Q's -  load log2_hi part
 //
+(p0)   fmpy.s1 FR_G = FR_G, FR_G_3 
+}
 { .mfi
-      ldfe FR_Q_2 = [GR_Table_Ptr],16         // Load Q_2
-      fadd.s1 FR_h = FR_h, FR_h_3
-      nop.i 999
+	nop.m 999
+(p0)   fadd.s1 FR_H = FR_H, FR_H_3 
+	nop.i 999 
 }
 ;;
 
 //
-//     GS_hi = G*S
-//     r = G*S -1
+//     Y_lo = poly + Y_lo 
 //
-{ .mfi
-      ldfe FR_Q_1 = [GR_Table_Ptr],16         // Load Q_1
-      fmpy.s1 FR_GS_hi = FR_G, FR_S
+
+{ .mmi
+      nop.m 999
+(p0)  addl           GR_Table_Ptr   = @ltoff(Constants_exp_64_Arg#), gp
       nop.i 999
 }
-{ .mfi
+;;
+
+{ .mmi
+      ld8 GR_Table_Ptr = [GR_Table_Ptr]
       nop.m 999
-      fms.s1 FR_r = FR_G, FR_S, f1
       nop.i 999
 }
 ;;
 
+
+{ .mfi
+	nop.m 999
+(p0)   fadd.s1 FR_h = FR_h, FR_h_3 
+	nop.i 999 ;;
+}
+{ .mfi
+	nop.m 999
 //
-//     poly_lo = Q_5 + r * Q_6
+//     Load Q_5
 //
+(p0)   fmpy.s1 FR_GS_hi = FR_G, FR_S 
+	nop.i 999
+}
 { .mfi
-      getf.exp GR_Delta_Exp =  FR_Delta     // Get signexp of y-1 for exp calc
-      fma.s1 FR_poly_lo = FR_r, FR_Q_6, FR_Q_5
-      nop.i 999
+	nop.m 999
+(p0)   fms.s1 FR_r = FR_G, FR_S, f1 
+	nop.i 999 ;;
 }
+{ .mfi
+	nop.m 999
+(p0)   fma.s1 FR_poly_lo = FR_r, FR_Q_6, FR_Q_5 
+	nop.i 999
+}
+{ .mfi
+	nop.m 999
 //
-//     r_cor = GS_hi -1
+//     GS_hi = G*S
+//     Load Q_4
 //
+(p0)   fsub.s1 FR_r_cor = FR_GS_hi, f1 
+	nop.i 999 ;;
+}
 { .mfi
-      nop.m 999
-      fsub.s1 FR_r_cor = FR_GS_hi, f1
-      nop.i 999
+	nop.m 999
+(p0)   fms.s1 FR_GS_lo = FR_G, FR_S, FR_GS_hi 
+	nop.i 999
 }
-;;
-
+{ .mfi
+	nop.m 999
+(p0)   fma.s1 FR_poly = FR_r, FR_Q_2, FR_Q_1 
+	nop.i 999 ;;
+}
+{ .mfi
+	nop.m 999
 //
+//     Load Q_3
+//     r_cor = GS_hi -1
 //     GS_lo  = G*S - GS_hi
 //
+(p0)   fmpy.s1 FR_rsq = FR_r, FR_r 
+	nop.i 999
+}
 { .mfi
-      nop.m 999
-      fms.s1 FR_GS_lo = FR_G, FR_S, FR_GS_hi
-      nop.i 999
+	nop.m 999
+(p0)   fma.s1 FR_G = FR_float_N, FR_log2_hi, FR_H 
+	nop.i 999 ;;
 }
-;;
-
+{ .mfi
+	nop.m 999
 //
-//     rsq = r * r
+//     poly = poly_hi + rsq * poly_lo 
+//     Tbl = float_N*log2_hi + H
 //
-{ .mfi
-      nop.m 999
-      fmpy.s1 FR_rsq = FR_r, FR_r
-      nop.i 999
+(p0)   fma.s1 FR_Y_lo = FR_float_N, FR_log2_lo, FR_h 
+	nop.i 999 ;;
 }
+{ .mfi
+	nop.m 999
 //
-//     G = float_N*log2_hi + H
+//     r_cor = r_cor - r
+//     poly_hi = r * Q_2 + Q_1
 //
-{ .mfi
-      nop.m 999
-      fma.s1 FR_G = FR_float_N, FR_log2_hi, FR_H
-      nop.i 999
+(p0)   fma.s1 FR_poly_lo = FR_r, FR_poly_lo, FR_Q_4 
+	nop.i 999
 }
-;;
-
+{ .mfi
+	nop.m 999
 //
-//     Y_lo = float_N*log2_lo + h
+//     Load Q_1
 //
+(p0)   fsub.s1 FR_r_cor = FR_r_cor, FR_r 
+	nop.i 999 ;;
+}
 { .mfi
-      nop.m 999
-      fma.s1 FR_Y_lo = FR_float_N, FR_log2_lo, FR_h
-      nop.i 999
+	nop.m 999
+// 
+//     Y_lo = float_N*log2_lo + h
+// 
+(p0)   fadd.s1 FR_Y_hi = FR_G, FR_r 
+	nop.i 999 ;;
 }
-;;
-
+{ .mfi
+	nop.m 999
 //
-//      poly_lo = Q_4 + r * poly_lo
-//      r_cor = r_cor - r
+//     poly_lo = Q_4 + r * poly_lo;;
+//     r_cor = r_cor + GS_lo;;
 //
+(p0)   fma.s1 FR_poly_lo = FR_r, FR_poly_lo, FR_Q_3 
+	nop.i 999
+}
 { .mfi
-      nop.m 999
-      fma.s1 FR_poly_lo = FR_r, FR_poly_lo, FR_Q_4
-      nop.i 999
+	nop.m 999
+(p0)   fadd.s1 FR_r_cor = FR_r_cor, FR_GS_lo 
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-      fsub.s1 FR_r_cor = FR_r_cor, FR_r
-      nop.i 999
+	nop.m 999
+(p0)   fadd.s1 FR_r_cor = FR_r_cor, FR_Y_lo 
+	nop.i 999
 }
-;;
-
+{ .mfi
+	nop.m 999
 //
-//      poly_hi = r * Q_2 + Q_1
-//      Y_hi = G + r
+//     poly_lo = Q_3 + r * poly_lo;;
 //
+(p0)   fma.s1 FR_poly = FR_rsq, FR_poly_lo, FR_poly 
+	nop.i 999 ;;
+}
 { .mfi
-      nop.m 999
-      fma.s1 FR_poly = FR_r, FR_Q_2, FR_Q_1
-      nop.i 999
+	nop.m 999
+(p0)   fsub.s1 FR_Y_lo = FR_G, FR_Y_hi 
+	nop.i 999
+}
+{ .mmi
+(p0)   ldfe FR_L_Inv = [GR_Table_Ptr],16 ;; 
+(p0)   ldfe FR_L_hi = [GR_Table_Ptr],16 
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-      fadd.s1 FR_Y_hi = FR_G, FR_r
-      nop.i 999
+(p0)   ldfe FR_L_lo = [GR_Table_Ptr],16 
+	nop.f 999
+	nop.i 999 ;;
 }
-;;
-
+{ .mfi
+	nop.m 999
 //
-//      poly_lo = Q_3 + r * poly_lo
-//      r_cor = r_cor + GS_lo
+//     Y_hi = Tbl + r 
+//     r_cor = r_cor + Y_lo 
 //
-{ .mfi
-      nop.m 999
-      fma.s1 FR_poly_lo = FR_r, FR_poly_lo, FR_Q_3
-      nop.i 999
+(p0)   fma.s1 FR_poly = FR_rsq, FR_poly, FR_r_cor 
+	nop.i 999 ;;
 }
 { .mfi
+	nop.m 999
+//     Y_lo = Tbl - Y_hi 
+//     poly = rsq * poly + r_cor
+//
+(p0)   fadd.s1 FR_Y_lo = FR_Y_lo, FR_r 
+	nop.i 999 ;;
+}
+{ .mfb
+	nop.m 999
+//
+//     Y_lo =  Y_lo + r  
+//
+(p0)   fadd.s1 FR_Y_lo = FR_Y_lo, FR_poly 
+//
+//     Load L_Inv
+//     Load L_hi
+//     Load L_lo
+//     all long before they are needed.
+//     They are used in LOGL_RETURN PATH
+//
+br.cond.sptk L(LOGL_RETURN) ;; 
+}
+L(LOGL80_NEAR): 
+//
+//     Branch LOGL80_NEAR
+//
+
+{ .mmi
       nop.m 999
-      fadd.s1 FR_r_cor = FR_r_cor, FR_GS_lo
+(p0)  addl           GR_Table_Ptr   = @ltoff(Constants_log_80_P#), gp
       nop.i 999
 }
 ;;
 
-//
-//      Y_lo = G - Y_hi
-//
-{ .mfi
+{ .mmi
+      ld8 GR_Table_Ptr = [GR_Table_Ptr]
       nop.m 999
-      fsub.s1 FR_Y_lo_2 = FR_G, FR_Y_hi
       nop.i 999
 }
 ;;
 
+{ .mfi
+	nop.m 999
+(p0)   fmpy.s1 FR_Wsq = FR_W, FR_W 
+(p0)   add GR_Table_Ptr1 = 0x50,GR_Table_Ptr  
+}
 //
-//      r_cor = r_cor + Y_lo
-//      poly = poly_hi + rsq * poly_lo
+//     Adjust ptr to 1/2 
+//     Adjust Ptr1 to P_4
 //
-{ .mfi
-      add  GR_Table_Ptr   = 0x0, GR_table_base   // Constants_exp_64_Arg
-      fadd.s1 FR_r_cor = FR_r_cor, FR_Y_lo
-      nop.i 999
+{ .mmi
+(p0)   ldfe FR_Half = [GR_Table_Ptr],16 ;; 
+(p0)   ldfe FR_P_4 = [GR_Table_Ptr1],16 
+	nop.i 999
 }
-{ .mfi
-      nop.m 999
-      fma.s1 FR_poly = FR_rsq, FR_poly_lo, FR_poly
-      nop.i 999
+//
+//     Load 1/2 
+//
+{ .mmi
+(p0)   ldfe FR_P_8 = [GR_Table_Ptr],16 ;; 
+(p0)   ldfe FR_P_3 = [GR_Table_Ptr1],16 
+	nop.i 999
+}
+{ .mmi
+(p0)   ldfe FR_P_7 = [GR_Table_Ptr],16 ;; 
+(p0)   ldfe FR_P_2 = [GR_Table_Ptr1],16 
+	nop.i 999
 }
-;;
-
 //
-//      Load L_hi
-//      Load L_lo
-//      all long before they are needed.
-//      They are used in LOGL_RETURN PATH
+//     Load P_7
+//     half_W = .5 * W
+//     Load P_3
 //
-//      Y_lo =  Y_lo + r
-//      poly = rsq * poly + r_cor
+{ .mmi
+(p0)   ldfe FR_P_6 = [GR_Table_Ptr],16 ;; 
+(p0)   ldfe FR_P_1 = [GR_Table_Ptr1],16 
+	nop.i 999 ;;
+}
+//
+//     Load P_6
+//     Wsq = w * w
+//     poly = w*P_4 + P_3 
+//     Load P_2
 //
 { .mfi
-      ldfe FR_L_hi = [GR_Table_Ptr],16           // Load L_hi
-      fadd.s1 FR_Y_lo = FR_Y_lo_2, FR_r
-      nop.i 999
+(p0)   ldfe FR_P_5 = [GR_Table_Ptr],16 
+//
+//     Load P_5
+//     poly_lo =  w * P_8 + P_7 
+//     Y_hi = w - (1/2)w*w
+//     Load P_1
+//
+(p0)   fmpy.s1 FR_W4 = FR_Wsq, FR_Wsq 
+	nop.i 999
 }
 { .mfi
-      nop.m 999
-      fma.s1 FR_poly = FR_rsq, FR_poly, FR_r_cor
-      nop.i 999
+	nop.m 999
+(p0)   fmpy.s1 FR_W3 = FR_Wsq, FR_W 
+	nop.i 999 
 }
 ;;
 
-{ .mfb
-      ldfe FR_L_lo = [GR_Table_Ptr],16           // Load L_lo
-      fadd.s1 FR_Y_lo = FR_Y_lo, FR_poly
-      br.cond.sptk LOGL_RETURN                   // Branch to common code
-}
-;;
-
-
-LOGL80_NEAR:
-// Here if |x-1| < 2^-8
 //
-//     Branch LOGL80_NEAR
+//     Y_lo = W3 * poly + Y_lo
 //
 
-{ .mmf
-      ldfe FR_P_8 = [GR_P_ptr1],16           // Load P_8
-      ldfe FR_P_4 = [GR_P_ptr2],16           // Load P_4
-      fmpy.s1 FR_Wsq = FR_W, FR_W
-}
-;;
-
 { .mmi
-      ldfe FR_P_7 = [GR_P_ptr1],16           // Load P_7
-      ldfe FR_P_3 = [GR_P_ptr2],16           // Load P_3
+      nop.m 999
+(p0)  addl           GR_Table_Ptr   = @ltoff(Constants_exp_64_Arg#), gp
       nop.i 999
 }
 ;;
 
 { .mmi
-      ldfe FR_P_6 = [GR_P_ptr1],16           // Load P_6
-      ldfe FR_P_2 = [GR_P_ptr2],16           // Load P_2
+      ld8 GR_Table_Ptr = [GR_Table_Ptr]
+      nop.m 999
       nop.i 999
 }
 ;;
 
+
 { .mmi
-      ldfe FR_P_5 = [GR_P_ptr1],16           // Load P_5
-      ldfe FR_P_1 = [GR_P_ptr2],16           // Load P_1
-      nop.i 999
+(p0)   ldfe FR_L_Inv = [GR_Table_Ptr],16 ;; 
+(p0)   ldfe FR_L_hi = [GR_Table_Ptr],16 
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      getf.exp GR_Delta_Exp =  FR_Delta      // Get signexp of y-1 for exp calc
-      fmpy.s1 FR_W4 = FR_Wsq, FR_Wsq
-      nop.i 999
+(p0)   ldfe FR_L_lo = [GR_Table_Ptr],16 
+//
+//     Load P_8
+//     Load P_4
+//
+(p0)   fmpy.s1 FR_half_W = FR_Half, FR_W 
+	nop.i 999 ;;
 }
 { .mfi
-      add  GR_Table_Ptr = 0x0, GR_table_base // Constants_exp_64_Arg
-      fmpy.s1 FR_W3 = FR_Wsq, FR_W
-      nop.i 999
+	nop.m 999
+(p0)   fma.s1 FR_poly_lo = FR_W, FR_P_8,FR_P_7 
+	nop.i 999
 }
-;;
-
 { .mfi
-      nop.m 999
-      fmpy.s1 FR_half_W = FR_Half, FR_W
-      nop.i 999
+	nop.m 999
+(p0)   fma.s1 FR_poly = FR_W, FR_P_4, FR_P_3 
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      ldfe FR_L_hi = [GR_Table_Ptr],16
-      fma.s1 FR_poly_lo = FR_W, FR_P_8,FR_P_7
-      nop.i 999
+	nop.m 999
+(p0)   fnma.s1 FR_Y_hi = FR_W, FR_half_W, FR_W 
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-      fma.s1 FR_poly = FR_W, FR_P_4, FR_P_3
-      nop.i 999
+	nop.m 999
+//
+//     W4 = Wsq * Wsq
+//     poly = w *poly + P_2
+//
+(p0)   fma.s1 FR_poly_lo = FR_W, FR_poly_lo, FR_P_6 
+	nop.i 999
 }
-;;
-
 { .mfi
-      ldfe FR_L_lo = [GR_Table_Ptr],16
-      fnma.s1 FR_Y_hi = FR_W, FR_half_W, FR_W
-      nop.i 999
+	nop.m 999
+(p0)   fma.s1 FR_poly = FR_W, FR_poly, FR_P_2 
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fma.s1 FR_poly_lo = FR_W, FR_poly_lo, FR_P_6
-      nop.i 999
+	nop.m 999
+(p0)   fsub.s1 FR_Y_lo = FR_W, FR_Y_hi 
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-      fma.s1 FR_poly = FR_W, FR_poly, FR_P_2
-      nop.i 999
+	nop.m 999
+//
+//     poly = w * poly + P_1
+//     w3 = wsq * w
+//
+(p0)   fma.s1 FR_poly_lo = FR_W, FR_poly_lo, FR_P_5 
+	nop.i 999
 }
-;;
-
 { .mfi
-      nop.m 999
-      fsub.s1 FR_Y_lo = FR_W, FR_Y_hi
-      nop.i 999
+	nop.m 999
+//
+//     poly_lo = w * poly_lo + P_6
+//     Y_lo = W - Y_hi
+//
+(p0)   fma.s1 FR_poly = FR_W, FR_poly, FR_P_1 
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fma.s1 FR_poly_lo = FR_W, FR_poly_lo, FR_P_5
-      nop.i 999
+	nop.m 999
+(p0)   fnma.s1 FR_Y_lo = FR_W, FR_half_W, FR_Y_lo 
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-      fma.s1 FR_poly = FR_W, FR_poly, FR_P_1
-      nop.i 999
+	nop.m 999
+//
+//     poly_lo = w * poly_lo + 
+//     Y_lo = Y_lo - w * (1/2)w
+//
+(p0)   fma.s1 FR_poly = FR_poly_lo, FR_W4, FR_poly 
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fnma.s1 FR_Y_lo = FR_W, FR_half_W, FR_Y_lo
-      nop.i 999
+	nop.m 999
+//
+//     Y_lo = (W-Y_hi) - w * (1/2)w
+//     poly =  W4* poly_lo + poly 
+//
+(p0)   fma.s1 FR_Y_lo = FR_poly, FR_W3, FR_Y_lo 
+	nop.i 999 ;;
+}
+L(LOGL_RETURN): 
+{ .mfi
+(p0)   add GR_Expo_Range = 0x2,r0  
+//
+//     Load L_Inv
+//     Load L_hi
+//     Load L_lo
+//     all long before they are needed.
+//
+//
+//     kernel_log_80 computed ln(X)
+//     and return logX_hi and logX_lo as results.
+//     PR_pow_Safe set as well. 
+//
+(p0)   fmpy.s1 FR_X_lo = FR_Input_Y, FR_logx_lo 
+//
+//     Compute Y * (logX_hi + logX_lo) 
+//     P_hi -> X 
+//     P_lo -> X_cor 
+//     (Manipulate names so that inputs are in
+//     the place kernel_exp expects them)
+//     Set GR_Flag to 2 
+//     Set GR_Expo_Range to Double
+// 
+//     This function computes exp( x  + x_cor) 
+//     Input  FR 1: FR_X            
+//     Input  FR 2: FR_X_cor  
+//     Input  GR 1: GR_Flag  
+//     Input  GR 2: GR_Expo_Range  
+//     Output FR 3: FR_Y_hi  
+//     Output FR 4: FR_Y_lo  
+//     Output FR 5: FR_Scale  
+//     Output PR 1: PR_Safe  
+// 
+(p0)   cmp.eq.unc  p15, p0 =  r0, r0 
 }
 ;;
 
-{ .mfi
-      nop.m 999
-      fma.s1 FR_poly = FR_poly_lo, FR_W4, FR_poly
-      nop.i 999
+{ .mmi
+(p0)  addl           GR_W1_ptr   = @ltoff(Constants_exp_64_W1#), gp
+(p0)  addl           GR_W2_ptr   = @ltoff(Constants_exp_64_W2#), gp
+(p0)  add GR_Flag = 0x2,r0  
 }
 ;;
 
-{ .mfi
-      nop.m 999
-      fma.s1 FR_Y_lo = FR_poly, FR_W3, FR_Y_lo
-      nop.i 999
+{ .mmi
+      ld8 GR_W1_ptr = [GR_W1_ptr]
+      ld8 GR_W2_ptr = [GR_W2_ptr]
+(p0)   cmp.ne.unc  p7, p0 =  0x1, GR_Flag 
 }
 ;;
 
+{ .mlx
+	nop.m 999
+(p0)   movl GR_Mask = 0x1FFFF ;; 
+}
 
-LOGL_RETURN:
-// Common code for completion of both logx paths
 
+{ .mlx
+	nop.m 999
+(p0)   movl GR_BIAS = 0x0FFFF ;; 
+}
+{ .mfi
+	nop.m 999
 //
-//     L_hi, L_lo already loaded.
-//
-//
-//     kernel_log_80 computed ln(X)
-//     and return logX_hi and logX_lo as results.
-//     PR_pow_Safe set as well.
+//     X_lo =  Y * logX_lo
 //
+(p0)   fma.s1 FR_P_hi = FR_Input_Y, FR_logx_hi,FR_X_lo 
+	nop.i 999 ;;
+}
+{ .mfi
+	nop.m 999
 //
-//     Compute Y * (logX_hi + logX_lo)
-//     P_hi -> X
-//     P_lo -> X_cor
-//     (Manipulate names so that inputs are in
-//     the place kernel_exp expects them)
+//     Set Safe=True 
+//     Flag is always 2 for this routine
 //
-//     This function computes exp( x  + x_cor)
-//     Input  FR 1: FR_X
-//     Input  FR 2: FR_X_cor
-//     Output FR 3: FR_Y_hi
-//     Output FR 4: FR_Y_lo
-//     Output FR 5: FR_Scale
-//     Output PR 1: PR_Safe
+(p0)   fmpy.s1 FR_float_N = FR_X, FR_L_Inv 
+	nop.i 999
+}
+{ .mfi
+	nop.m 999
 //
-//     P15 is True
+//     X_hi  = Y * logX_hi + X_lo
+//     Set GR_Flag = 2 for exp(x + xcor)
 //
-// Load constants used in computing N using right-shift technique
-{ .mlx
-      mov GR_exp_2tom51 = 0xffff-51
-      movl GR_sig_inv_ln2 = 0xb8aa3b295c17f0bc  // significand of 1/ln2
+(p0)   fms.s1 FR_P_lo= FR_Input_Y, FR_logx_hi, FR_P_hi 
+	nop.i 999 ;;
 }
-{ .mlx
-      add  GR_Special_Exp = -50,GR_exp_bias
-      movl GR_rshf_2to51 = 0x4718000000000000   // 1.10000 2^(63+51)
+{ .mmi
+	nop.m 999 ;;
+(p0)   getf.exp GR_Expo_X = FR_X 
+	nop.i 999 ;;
 }
-;;
-
+{ .mfi
+(p0)   and GR_Expo_X = GR_Expo_X, GR_Mask  
 //
+//     Calculate unBIASed exponent of X
 //     Point to Table of W1s
 //     Point to Table of W2s
 //
-{ .mmi
-      add GR_W1_ptr   = 0x2b0, GR_table_base    // Constants_exp_64_W1
-      add GR_W2_ptr   = 0x4b0, GR_table_base    // Constants_exp_64_W2
-      cmp.le p6,p0= GR_Delta_Exp,GR_Special_Exp
-};;
-
-// Form two constants we need
-//  1/ln2 * 2^63  to compute  w = x * 1/ln2 * 128
-//  1.1000..000 * 2^(63+63-12) to right shift int(N) into the significand
-
+(p0)   fcvt.fx.s1 FR_N = FR_float_N 
+	nop.i 999 ;;
+}
 { .mfi
-      setf.sig  FR_INV_LN2_2TO63 = GR_sig_inv_ln2 // form 1/ln2 * 2^63
-      nop.f 999
-      and GR_Delta_Exp=GR_Delta_Exp,GR_exp_mask  // Get exponent of y-1
+	nop.m 999
+(p0)   fadd.s1 FR_P_lo = FR_P_lo, FR_X_lo 
+//
+//     Float_N = X * L_Inv
+//     Create exponent BIAS
+//     Get BIASed exponent of X
+//
+(p0)   sub GR_Expo_X = GR_Expo_X, GR_BIAS ;;  
 }
-{ .mlx
-      setf.d  FR_RSHF_2TO51 = GR_rshf_2to51    // Form const 1.1000 * 2^(63+51)
-      movl GR_rshf = 0x43e8000000000000        // 1.10000 2^63 for right shift
+{ .mib
+(p0)   cmp.gt.unc  p9, p0  =  -6, GR_Expo_X 
+	nop.i 999
+//
+//     N = fcvt.fx(float_N)
+//     If -6 > Expo_X, set P9
+//
+(p9)   br.cond.spnt L(EXPL_SMALL) 
 }
 ;;
 
-{ .mfi
-      nop.m 999
-      fmpy.s1 FR_X_lo = FR_Input_Y, FR_logx_lo // logx_lo is Y_lo
-      cmp.eq  p15, p0=  r0, r0                 // Set p15, assume safe
-};;
-
+//
+//     If expo_X < -6 goto exp_small
+//
 { .mmi
-      setf.exp FR_2TOM51 = GR_exp_2tom51 // Form 2^-51 for scaling float_N
-      setf.d  FR_RSHF = GR_rshf          // Form right shift const 1.1000 * 2^63
-      add GR_Table_Ptr1   = 0x50, GR_table_base // Constants_exp_64_P for
-                                                // EXPL_SMALL path
+      nop.m 999
+(p0)  addl           GR_T1_ptr   = @ltoff(Constants_exp_64_T1#), gp
+(p0)  cmp.lt.unc  p10, p0 =  14, GR_Expo_X 
 }
 ;;
 
 { .mmi
-      ldfe FR_P_6 = [GR_Table_Ptr1],16          // Load P_6 for EXPL_SMALL path
-;;
-      ldfe FR_P_5 = [GR_Table_Ptr1],16          // Load P_5 for EXPL_SMALL path
+      ld8 GR_T1_ptr = [GR_T1_ptr]
+      nop.m 999
       nop.i 999
 }
 ;;
 
-{ .mfi
-      ldfe FR_P_4 = [GR_Table_Ptr1],16          // Load P_4 for EXPL_SMALL path
-      fma.s1 FR_P_hi = FR_Input_Y, FR_logx_hi,FR_X_lo  // logx_hi ix Y_hi
-      nop.i 999
+{ .mib
+	nop.m 999
+	nop.i 999
+//
+//     If 14 < Expo_X, set P10
+//     Create pointer to T1 table
+//  
+(p10)  br.cond.spnt L(EXPL_HUGE) ;;
 }
-;;
+
 
 { .mmi
-      ldfe FR_P_3 = [GR_Table_Ptr1],16          // Load P_3 for EXPL_SMALL path
-;;
-      ldfe FR_P_2 = [GR_Table_Ptr1],16          // Load P_2 for EXPL_SMALL path
+(p0)  addl           GR_Table_Ptr   = @ltoff(Constants_exp_64_Exponents#), gp
+(p0)  addl           GR_T2_ptr   = @ltoff(Constants_exp_64_T2#), gp
       nop.i 999
 }
 ;;
 
-// N = X * Inv_log2_by_2^12
-// By adding 1.10...0*2^63 we shift and get round_int(N_signif) in significand.
-// We actually add 1.10...0*2^51 to X * Inv_log2 to do the same thing.
-{ .mfi
-      ldfe FR_P_1 = [GR_Table_Ptr1]             // Load P_1 for EXPL_SMALL path
-      fma.s1 FR_N = FR_X, FR_INV_LN2_2TO63, FR_RSHF_2TO51
-      nop.i 999
-}
-{ .mfb
-      nop.m 999
-      fms.s1 FR_P_lo= FR_Input_Y, FR_logx_hi, FR_P_hi  // P_hi is X
-(p6)  br.cond.spnt POWL_Y_ALMOST_1              // Branch if |y-1| < 2^-50
-}
-;;
-
 { .mmi
-      getf.exp GR_Expo_X = FR_X
-      add GR_T1_ptr   = 0x0b0, GR_table_base    // Constants_exp_64_T1
-      add GR_T2_ptr   = 0x1b0, GR_table_base    // Constants_exp_64_T2
+      ld8 GR_Table_Ptr = [GR_Table_Ptr]
+      ld8 GR_T2_ptr = [GR_T2_ptr]
+      nop.i 999
 }
 ;;
 
-// float_N = round_int(N)
-// The signficand of N contains the rounded integer part of X * 2^12/ln2,
-// as a twos complement number in the lower bits (that is, it may be negative).
-// That twos complement number (called N) is put into GR_N_fix.
-
-// Since N is scaled by 2^51, it must be multiplied by 2^-51
-// before the shift constant 1.10000 * 2^63 is subtracted to yield float_N.
-// Thus, float_N contains the floating point version of N
-
 
-{ .mfi
-      add  GR_Table_Ptr   = 0x20, GR_table_base    // Constants_exp_64_A
-      fms.s1 FR_float_N = FR_N, FR_2TOM51, FR_RSHF // Form float_N
-      nop.i 999
-}
-//     Create low part of Y(ln(x)_hi + ln(x)_lo) as P_lo
-{ .mfi
-      mov GR_Big_Pos_Exp = 0x3ffe               // 16382, largest safe exponent
-      fadd.s1 FR_P_lo = FR_P_lo, FR_X_lo
-      mov GR_Big_Neg_Exp = -0x3ffd              // -16381 smallest safe exponent
-};;
-
-{ .mfi
-      nop.m 999
-      fmpy.s1 FR_rsq = FR_X, FR_X               // rsq = X*X for EXPL_SMALL path
-      mov GR_vsm_expo = -70                     // Exponent for very small path
+{ .mmi
+(p0)   shladd GR_Table_Ptr = GR_Expo_Range,4,GR_Table_Ptr ;;  
+//
+//     Adjust T1_ptr by x 4 for single-precision values
+//     Adjust T2_ptr by x 4 for single-precision values
+//
+(p0)   ld8 GR_Big_Pos_Exp = [GR_Table_Ptr],8
+	nop.i 999 ;;
 }
+//
+//     Load double W1
+//     Load +max exponent
+//
 { .mfi
-      nop.m 999
-      fma.s1 FR_poly_lo = FR_P_6, FR_X, FR_P_5  // poly_lo for EXPL_SMALL path
-      add GR_temp = 0x1,r0                      // For tiny signif if small path
+(p0)   ld8 GR_Big_Neg_Exp = [GR_Table_Ptr],0
+//
+//     If 14 < Expo_X, goto exp_huge
+//
+(p0)   fcvt.xf FR_float_N = FR_N 
+	nop.i 999 
 }
 ;;
 
 //
-//      If expo_X < -6 goto exp_small
+//     Load double W2
+//     Load -max exponent
+//     Load ptr to A's
 //
+
 { .mmi
-      getf.sig GR_N_fix = FR_N
-      ldfe FR_A_3 = [GR_Table_Ptr],16         // Load A_3
-      and GR_Expo_X = GR_Expo_X, GR_exp_mask  // Get exponent of X
+(p0)  getf.sig GR_N_fix = FR_N 
+(p0)  addl           GR_Table_Ptr   = @ltoff(Constants_exp_64_A#), gp
+      nop.i 999
 }
 ;;
 
-{ .mfi
-      ldfe FR_A_2 = [GR_Table_Ptr],16         // Load A_2
-      nop.f 999
-      sub GR_Expo_X = GR_Expo_X, GR_exp_bias  // Get true exponent of X
+{ .mmi
+      ld8 GR_Table_Ptr = [GR_Table_Ptr]
+      nop.m 999
+      nop.i 999
 }
 ;;
 
 //
-//     If -6 > Expo_X, set P9 and branch
+//     Load single T1
+//     Load single T2
+//     W_1_p1 = W_1 + 1
 //
-{ .mfb
-      cmp.gt  p9, p0  =  -6, GR_Expo_X
-      fnma.s1 FR_r = FR_L_hi, FR_float_N, FR_X // r = X - L_hi * float_N
-(p9)  br.cond.spnt EXPL_SMALL                  // Branch if |X| < 2^-6
+{ .mmi
+(p0)   ldfe FR_A_3 = [GR_Table_Ptr],16 ;; 
+//
+//     Load A_3
+//     if k > big_pos_exp, set p14 and Safe=False
+//
+(p0)   ldfe FR_A_2 = [GR_Table_Ptr],16 
+(p0)   extr.u GR_M1 = GR_N_fix, 6, 6  
 }
-;;
-
+{ .mmi
+	nop.m 999 ;;
+(p0)   shladd GR_W1_ptr = GR_M1,3,GR_W1_ptr  
 //
-//     If 14 <= Expo_X, set P10
+//     float_N = fcvt.xf(N) 
+//     N_fix = significand of N
+//     Create pointer to T2 table
 //
-{ .mib
-      cmp.le  p10, p0 =  14, GR_Expo_X
-      nop.i 999
-(p10) br.cond.spnt EXPL_HUGE                   // Branch if |X| >= 2^14
+(p0)   extr.u GR_M2 = GR_N_fix, 0, 6  
 }
-;;
-
 //
-//      Load single T1
-//      Load single T2
-//      W_1_p1 = W_1 + 1
+//     r = r + X_cor
+//     Adjust W1_ptr by x 8 for double-precision values
+//     Adjust W2_ptr by x 8 for double-precision values
+//     Adjust Table_ptr by Expo_Rangex16 
 //
 { .mmi
-      nop.m 999
-      nop.m 999
-      extr.u GR_M1 = GR_N_fix, 6, 6            // Extract index M_1
+(p0)   shladd GR_T1_ptr = GR_M1,2,GR_T1_ptr ;;  
+(p0)   ldfd  FR_W1 = [GR_W1_ptr],0 
+(p0)   shladd GR_W2_ptr = GR_M2,3,GR_W2_ptr  
 }
-;;
-
 //
-//      k = extr.u(N_fix,0,6)
+//     Load ptr to A's
 //
-{ .mmi
-      shladd GR_W1_ptr = GR_M1,3,GR_W1_ptr     // Point to W1
-      shladd GR_T1_ptr = GR_M1,2,GR_T1_ptr     // Point to T1
-      extr.u GR_M2 = GR_N_fix, 0, 6            // Extract index M_2
+{ .mfi
+(p0)   ldfs  FR_T1 = [GR_T1_ptr],0 
+(p0)   fnma.s1 FR_r = FR_L_hi, FR_float_N, FR_X 
+(p0)   shladd GR_T2_ptr = GR_M2,2,GR_T2_ptr ;; 
 }
-;;
-
-// N_fix is only correct up to 50 bits because of our right shift technique.
-// Actually in the normal path we will have restricted K to about 14 bits.
-// Somewhat arbitrarily we extract 32 bits.
 { .mmi
-      ldfd  FR_W1 = [GR_W1_ptr]
-      shladd GR_W2_ptr = GR_M2,3,GR_W2_ptr     // Point to W2
-      extr GR_k = GR_N_fix, 12, 32             // Extract k
+(p0)   ldfd  FR_W2 = [GR_W2_ptr],0 
+(p0)   ldfs  FR_T2 = [GR_T2_ptr],0 
+//
+//     r = x - L_hi * float_N
+//     M2 = extr.u(N_fix,0,6)
+//     M1 = extr.u(N_fix,6,6)
+//
+(p0)   extr GR_k = GR_N_fix, 12, 52 ;;  
 }
-;;
-
+//
+//     Load A_1
+//     poly = A_3 * r + A_2
+//     rsq = r*r
+//
+{ .mii
+(p0)   add GR_BIAS_p_k = GR_BIAS, GR_k  
+(p0)   cmp.gt.unc  p14,p15 = GR_k,GR_Big_Pos_Exp ;; 
+(p15)  cmp.lt p14,p15 = GR_k,GR_Big_Neg_Exp
+}
+//
+//     BIAS_p_K = BIAS + k
+//     T = T1 * T2
+//
 { .mfi
-      ldfs  FR_T1 = [GR_T1_ptr]
-      fnma.s1 FR_r = FR_L_lo, FR_float_N, FR_r
-      shladd GR_T2_ptr = GR_M2,2,GR_T2_ptr     // Point to T2
+(p0)   setf.exp FR_Scale = GR_BIAS_p_k  
+	nop.f 999
+	nop.i 999 ;;
 }
 { .mfi
-      add GR_exp_bias_p_k = GR_exp_bias, GR_k
-      nop.f 999
-      cmp.gt  p14,p15 = GR_k,GR_Big_Pos_Exp
+	nop.m 999
+(p0)   fnma.s1 FR_r = FR_L_lo, FR_float_N, FR_r 
+	nop.i 999
 }
-;;
-
 //
-//      if k < big_neg_exp, set p14 and Safe=False
+//     W = W_1_p1 * W2 + W1
 //
-{ .mmi
-      ldfs  FR_T2 = [GR_T2_ptr]
-(p15) cmp.lt p14,p15 = GR_k,GR_Big_Neg_Exp
-      nop.i 999
+{ .mfi
+(p0)   ldfe FR_A_1 = [GR_Table_Ptr],16 
+	nop.f 999
+	nop.i 999 ;;
 }
-;;
-
-{ .mmi
-      setf.exp FR_Scale = GR_exp_bias_p_k
-      ldfd  FR_W2 = [GR_W2_ptr]
-      nop.i 999
+{ .mfi
+	nop.m 999
+(p0)   fadd.s1 FR_W_1_p1 = FR_W1, f1 
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      ldfe FR_A_1 = [GR_Table_Ptr],16
-      fadd.s1 FR_r = FR_r, FR_X_cor
-      nop.i 999
+	nop.m 999
+//
+//     k = extr.u(N_fix,0,6)
+//     r = r - N * L_lo
+//     Load ptr to Table of exponent thresholds.
+//
+(p0)   fadd.s1 FR_r = FR_r, FR_X_cor 
+	nop.i 999
 }
-;;
-
 { .mfi
-      nop.m 999
-      fadd.s1 FR_W_1_p1 = FR_W1, f1
-      nop.i 999
+	nop.m 999
+(p0)   fmpy.s1 FR_T = FR_T1, FR_T2 
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fma.s1 FR_poly = FR_r, FR_A_3, FR_A_2
-      nop.i 999
+	nop.m 999
+//
+//     if k < big_neg_exp, set p14 and Safe=False
+//     Load A_2
+//
+(p0)   fma.s1 FR_W = FR_W2, FR_W_1_p1, FR_W1 
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-      fmpy.s1 FR_rsq = FR_r, FR_r
-      nop.i 999
+	nop.m 999
+(p0)   fma.s1 FR_poly = FR_r, FR_A_3, FR_A_2 
+	nop.i 999
 }
-;;
-
 { .mfi
-      nop.m 999
-      fmpy.s1 FR_T = FR_T1, FR_T2
-      nop.i 999
+	nop.m 999
+(p0)   fmpy.s1 FR_rsq = FR_r, FR_r 
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fma.s1 FR_W = FR_W2, FR_W_1_p1, FR_W1
-      nop.i 999
+	nop.m 999
+(p0)   mov FR_Y_hi = FR_T 
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fma.s1 FR_TMP1 = FR_Scale, FR_Sgn, f0
-      nop.i 999
+	nop.m 999
+//
+//     Scale = set_exp(BIAS_p_k)
+//     poly = r * poly + A_1
+//
+(p0)   fadd.s1 FR_Wp1 = FR_W, f1 
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fma.s1 FR_poly = FR_r, FR_poly, FR_A_1
-      nop.i 999
+	nop.m 999
+(p0)   fma.s1 FR_poly = FR_r, FR_poly, FR_A_1 
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fma.s1 FR_TMP2 = FR_T, f1, f0            // TMP2 = Y_hi = T
-      nop.i 999
+	nop.m 999
+(p0)   fma.s1 FR_poly = FR_rsq, FR_poly,FR_r  
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
+	nop.m 999
+//
+//     Wp1 =  W + 1
+//     poly = rsq * poly + rk
+//
+(p0)   fma.s1 FR_Y_lo = FR_Wp1, FR_poly, FR_W 
+	nop.i 999 ;;
+}
+{ .mfb
+	nop.m 999
+//
+//     Y_lo = poly * Wp1 + W 
+//     Y_hi = T
+//
+(p0)   fmpy.s1 FR_Y_lo = FR_Y_lo, FR_T 
+//
+//     Y_lo = T * Y_lo
+//
+(p0)   br.cond.sptk L(EXPL_RETURN) ;; 
+}
+
+L(EXPL_SMALL): 
+
+//
+//     r4 = rsq * rsq
+//
+
+{ .mmi
       nop.m 999
-      fadd.s1 FR_Wp1 = FR_W, f1
+(p0)  addl           GR_Table_Ptr1   = @ltoff(Constants_exp_64_P), gp
       nop.i 999
 }
 ;;
 
-{ .mfi
+{ .mmi
+      ld8 GR_Table_Ptr1 = [GR_Table_Ptr1]
       nop.m 999
-      fma.s1 FR_poly = FR_rsq, FR_poly,FR_r
       nop.i 999
 }
 ;;
 
-{ .mfi
-      nop.m 999
-      fma.s1 FR_Tscale = FR_T, FR_TMP1, f0    // Scale * Sgn * T
-      nop.i 999
+{ .mmf
+	nop.m 999
+(p0)   ldfe FR_P_6 = [GR_Table_Ptr1],16 
+//
+//     Return  
+//
+(p0)   fadd.s1 FR_r = FR_X,f0 ;; 
 }
-{ .mfi
+
+{ .mmi
       nop.m 999
-      fma.s1 FR_Y_lo = FR_Wp1, FR_poly, FR_W
+(p0)  addl           GR_Table_Ptr   = @ltoff(Constants_exp_64_Exponents#), gp
       nop.i 999
 }
 ;;
 
-{ .mfb
-      nop.m 999
-      fmpy.s1 FR_TMP3 = FR_Y_lo, FR_Tscale
-      br.cond.sptk POWL_64_SHARED
+{ .mmi
+      ld8 GR_Table_Ptr = [GR_Table_Ptr]
+(p0)  ldfe FR_P_5 = [GR_Table_Ptr1],16 
+      nop.i 999
 }
 ;;
 
-
-EXPL_SMALL:
-// Here if |ylogx| < 2^-6
 //
-//     Begin creating lsb to perturb final result
+//     Is input very small? 
+//     Load P_5
+//
+{ .mii
+(p0)   ldfe FR_P_4 = [GR_Table_Ptr1],16 
+(p0)   add GR_Table_Ptr = 0x040,GR_Table_Ptr ;;  
+(p0)   shladd GR_Table_Ptr = GR_Expo_Range,3,GR_Table_Ptr ;;  
+}
+{ .mmb
+(p0)   ldfe FR_P_3 = [GR_Table_Ptr1],16 
 //
+//     Adjust ptr.
+//
+(p0)   ld8  GR_vsm_expo = [GR_Table_Ptr],0
+	nop.b 999 ;;
+}
 { .mfi
-      setf.sig FR_temp = GR_temp
-      fma.s1 FR_poly_lo = FR_poly_lo, FR_X, FR_P_4
-      cmp.lt  p12, p0 =  GR_Expo_X, GR_vsm_expo   // Test |ylogx| < 2^-70
+	nop.m 999
+//
+//     r = X (don't seem to need X_Cor) 
+//     Load the threshold exponents
+//
+(p0)   fmpy.s1 FR_rsq = FR_r, FR_r 
+	nop.i 999 ;;
 }
+//
+//     Load the negative integer
+//     Load P_5
+//
 { .mfi
-      nop.m 999
-      fma.s1 FR_poly_hi = FR_P_2, FR_X, FR_P_1
-      nop.i 999
+(p0)   cmp.lt.unc  p12, p0 =  GR_Expo_X, GR_vsm_expo 
+	nop.f 999
+	nop.i 999 ;;
+}
+{ .mfb
+	nop.m 999
+//
+//     rsq = r * r
+//     Offset into exponents
+//
+(p0)   fmpy.s1 FR_r4 = FR_rsq, FR_rsq 
+(p12)  br.cond.spnt L(EXPL_VERY_SMALL) ;; 
 }
-;;
-
 { .mfi
-      nop.m 999
-      fmpy.s1 FR_TMP2 = f1, f1
-      nop.i 999
+(p0)   ldfe FR_P_2 = [GR_Table_Ptr1],16 
+//
+//     Load p4,p3,p2,p1
+//
+(p0)   fma.s1 FR_poly_lo = FR_P_6, FR_r, FR_P_5 
+//
+//     Y_lo = r4 * poly_lo + poly_hi
+//     Scale = 1.0
+//
+(p0)   add GR_temp = 0x1,r0 ;;  
 }
+{ .mmf
+	nop.m 999
+(p0)   ldfe FR_P_1 = [GR_Table_Ptr1],0 
+(p0)   mov FR_Scale = f1 
+}
+//
+//     Begin creating lsb to perturb final result
+//
 { .mfi
-      nop.m 999
-      fmpy.s1 FR_TMP1 = FR_Sgn, f1
-      nop.i 999
+(p0)   setf.sig FR_temp = GR_temp 
+(p0)   mov FR_Y_hi = f1 
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fmpy.s1 FR_r4 = FR_rsq, FR_rsq
-(p12) cmp.eq  p15, p0 =  r0, r0                   // Set safe if |ylogx| < 2^-70
+	nop.m 999
+//
+//     poly_lo = p_5 + p_6 * r
+//     poly_hi = p_1 + p_2 * r
+//
+(p0)   fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_P_4 
+	nop.i 999 ;;
 }
-{ .mfb
-      nop.m 999
-(p12) fmpy.s1 FR_TMP3 = FR_Sgn, FR_X
-(p12) br.cond.spnt POWL_64_SHARED                 // Branch if |ylogx| < 2^-70
+{ .mfi
+	nop.m 999
+//
+//     poly_lo = p_4 + poly_lo * r
+//     poly_hi = r + poly_hi * rsq
+//
+(p0)   fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_P_3 
+	nop.i 999
 }
-;;
-
 { .mfi
-      nop.m 999
-      fma.s1 FR_poly_lo = FR_poly_lo, FR_X, FR_P_3
-      nop.i 999
+	nop.m 999
+(p0)   fma.s1 FR_poly_hi = FR_P_2, FR_r, FR_P_1 
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-      fma.s1 FR_poly_hi = FR_poly_hi, FR_rsq, FR_X
-      nop.i 999
+	nop.m 999
+(p0)   fma.s1 FR_poly_hi = FR_poly_hi, FR_rsq, FR_r 
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fma.s1 FR_Y_lo = FR_poly_lo, FR_r4, FR_poly_hi
-      nop.i 999
+	nop.m 999
+//  
+//     poly_lo = p_3 + poly_lo * r
+//     Y_hi = 1, always
+//  
+(p0)   fma.s1 FR_Y_lo = FR_poly_lo, FR_r4, FR_poly_hi 
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fmpy.s1 FR_TMP3 = FR_Y_lo, FR_TMP1      // Add sign info
-      nop.i 999
+	nop.m 999
+//
+//     Set lsb in fp register
+// 
+(p0)   for FR_temp = FR_Y_lo,FR_temp 
+	nop.i 999 ;;
 }
-;;
-
+{ .mfb
+	nop.m 999
 //
 //     Toggle on last bit of Y_lo
+//
+(p0)   fmerge.se FR_Y_lo = FR_Y_lo,FR_temp 
+//
 //     Set lsb of Y_lo to 1
 //
+(p0)   br.cond.sptk L(EXPL_RETURN) ;; 
+}
+L(EXPL_VERY_SMALL): 
 { .mfi
-      nop.m 999
-      for FR_temp = FR_Y_lo,FR_temp
-      nop.i 999
+	nop.m 999
+(p0)   mov FR_Y_lo = FR_r 
+(p0)   cmp.eq.unc  p15, p0 =  r0, r0 
 }
-;;
-
+{ .mfi
+       nop.m 999
+(p0)   mov FR_Scale = f1
+       nop.i 999
+};;
 { .mfb
-      nop.m 999
-      fmerge.se FR_TMP3 = FR_TMP3,FR_temp
-      br.cond.sptk POWL_64_SHARED
+	nop.m 999
+(p0)   mov FR_Y_hi = f1 
+//
+//     If flag_not_1, 
+//     Y_hi = 1.0 
+//     Y_lo = X + X_cor
+//     PR_Safe = true
+//
+(p0)   br.cond.sptk L(EXPL_RETURN) ;; 
 }
-;;
-
-
-EXPL_HUGE:
-// Here if |ylogx| >= 2^14
+L(EXPL_HUGE): 
 { .mfi
-      mov GR_temp = 0x0A1DC               // If X < 0, exponent -24100
-      fcmp.gt.s1 p12, p13 =  FR_X, f0     // Test X > 0
-      cmp.eq  p14, p15 =  r0, r0          // Set Safe to false
+	nop.m 999
+//
+//     Return for flag=2 
+//
+(p0)   fcmp.gt.unc.s1 p12, p13 =  FR_X, f0 
+(p0)   cmp.eq.unc  p14, p15 =  r0, r0 ;; 
 }
-;;
-
-{ .mmi
-(p12) mov GR_Mask = 0x15DC0               // If X > 0, exponent +24000
-(p13) mov GR_Mask = 0x0A240               // If X < 0, exponent -24000
-      nop.i 999
+{ .mlx
+	nop.m 999
+//
+//     Set Safe to false
+//     Is x > 0
+//
+(p12)  movl GR_Mask = 0x15DC0 ;; 
 }
-;;
-
-{ .mmf
-      setf.exp FR_TMP2 = GR_Mask          // Form Y_hi = TMP2
-(p13) setf.exp FR_Y_lo = GR_temp          // If X < 0, Y_lo = 2^-24100
-(p12) mov FR_Y_lo = f1                    // IF X > 0, Y_lo = 1.0
+{ .mlx
+(p12)  setf.exp FR_Y_hi = GR_Mask 
+(p13)  movl GR_Mask = 0xA240 ;; 
+}
+{ .mlx
+(p13)  setf.exp FR_Y_hi = GR_Mask 
+//     
+//     x > 0: Create mask for Y_hi = 2**(24,000) 
+//     x <= 0: Create mask for Y_hi = 2**(-24,000) 
+//
+(p13)  movl GR_temp = 0xA1DC ;; 
 }
-;;
-
 { .mfi
-      nop.m 999
-      fmpy.s1 FR_TMP1 = FR_TMP2, FR_Sgn   // TMP1 = Y_hi * Sgn
-      nop.i 999
+(p13)  setf.exp FR_Y_lo = GR_temp 
+//
+//     x < =0: Create mask for 2**(-24,100)
+//     x <= 0: Y_lo = w**(-24,100)
+//
+(p12)  mov FR_Y_lo = f1 
+	nop.i 999 ;;
 }
-;;
-
-{ .mfb
-      nop.m 999
-      fmpy.s1 FR_TMP3 = FR_Y_lo,FR_TMP1   // TMP3 = Y_lo * (Y_hi * Sgn)
-      br.cond.sptk POWL_64_SHARED
+{ .mfi
+	nop.m 999
+(p12)  mov FR_Scale =  FR_Y_hi 
+	nop.i 999 ;;
 }
-;;
-
-POWL_Y_ALMOST_1:
-// Here if delta = |y-1| < 2^-50
+{ .mfi
+	nop.m 999
 //
-//  x**(1 + delta) = x * e (ln(x)*delta) = x ( 1 + ln(x) * delta)
+//     x > 0: Y_lo = 1.0
+//     x > 0: Scale = 2**(24,000) 
 //
-// Computation will be safe for 2^-16381 <= x < 2^16383
-
+(p13)  mov FR_Scale = FR_Y_hi 
+	nop.i 999 ;;
+}
+L(EXPL_RETURN): 
 { .mfi
-       mov GR_exp_ynear1_oflow = 0xffff + 16383
-       fma.s1 FR_TMP1 = FR_Input_X,FR_Delta,f0
-       and GR_exp_x = GR_exp_mask, GR_signexp_x
+	nop.m 999
+//
+//     Scale = 2**(24,000)
+//
+//
+//     exp(y *ln(x)) almost complete 
+//     FR_Scale is Scale
+//     f34 is Z_hi 
+//     f35 is Z_lo 
+//
+(p0)   fmpy.s1 FR_Sgn = FR_Scale, FR_Sgn  
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-       cmp.lt  p15, p14 =  GR_exp_x, GR_exp_ynear1_oflow
-       fma.s1 FR_TMP2 = FR_logx_hi,f1,FR_X_lo
-       mov GR_exp_ynear1_uflow = 0xffff - 16381
+	nop.m 999
+//
+//     sgn * scale 
+//
+(p0)   fmpy.s1 FR_Y_lo = FR_Y_lo,FR_Sgn   
+	nop.i 999 ;;
 }
-;;
-
 { .mfb
-(p15)  cmp.ge  p15, p14 =  GR_exp_x, GR_exp_ynear1_uflow
-       fma.s1 FR_TMP3 = FR_Input_X,f1,f0
-       br.cond.sptk POWL_64_SHARED
-};;
-
-POWL_64_SQUARE:
+	nop.m 999
+//
+//     Z_lo * (sgn * scale) 
 //
-//      Here if x not zero and y=2.
+(p0)   fma.s0 FR_Result = FR_Y_hi, FR_Sgn, FR_Y_lo  
 //
-//      Setup for multipath code
+//     Z_hi * (sgn * scale)  + Z_lo
 //
+(p15)  br.cond.sptk L(POWL_64_RETURN) ;;
+}
 { .mfi
-      mov GR_exp_square_oflow = 0xffff + 8192   // Exponent where x*x overflows
-      fmerge.se FR_TMP1 = FR_Input_X, FR_Input_X
-      and GR_exp_x = GR_exp_mask, GR_signexp_x  // Get exponent of x
+	nop.m 999
+(p0)   fsetc.s3 0x7F,0x01
+	nop.i 999
+}
+{ .mlx
+	nop.m 999
+//
+//     Z_hi * (sgn * scale)  + Z_lo with wre & td
+//     Z_hi * (sgn * scale)  + Z_lo with fz  & td
+//
+(p0)   movl GR_T1_ptr = 0x00000000013FFF ;;
 }
-;;
-
 { .mfi
-      cmp.lt  p15, p14 =  GR_exp_x, GR_exp_square_oflow // Decide safe/unsafe
-      fmerge.se FR_TMP2 = FR_Input_X, FR_Input_X
-      mov GR_exp_square_uflow = 0xffff - 8191   // Exponent where x*x underflows
+	nop.m 999
+(p0)   fma.s3  FR_Result_small = FR_Y_hi, FR_Sgn, FR_Y_lo  
+	nop.i 999
 }
-;;
-
 { .mfi
-(p15) cmp.ge  p15, p14 =  GR_exp_x, GR_exp_square_uflow // Decide safe/unsafe
-      fma.s1 FR_TMP3 = f0,f0,f0
-      nop.i 999
+	nop.m 999
+(p0)   fsetc.s3 0x7F,0x40
+	nop.i 999 ;;
 }
-;;
-
+{ .mfi
+	nop.m 999
 //
-//      This is the shared path that will set overflow and underflow.
+//     Return if no danger of over of underflow.
 //
-POWL_64_SHARED:
-
+(p0)   fsetc.s2 0x7F,0x42
+	nop.i 999;;
+}
+{ .mfi
+	nop.m 999
 //
-//      Return if no danger of over or underflow.
+//     S0 user supplied status
+//     S2 user supplied status + WRE + TD  (Overflows)
+//     S3 user supplied status + FZ + TD   (Underflows)
 //
-{ .mfb
-      nop.m 999
-      fma.s0 FR_Result = FR_TMP1, FR_TMP2, FR_TMP3
-(p15) br.ret.sptk  b0      // Main path return if certain no over/underflow
+(p0)   fma.s2  FR_Result_big = FR_Y_hi, FR_Sgn, FR_Y_lo  
+	nop.i 999 ;;
 }
-;;
-
 //
-//      S0 user supplied status
-//      S2 user supplied status + WRE + TD  (Overflows)
-//      S2 user supplied status + FZ + TD   (Underflows)
+//     S0 user supplied status
+//     S2 user supplied status + WRE + TD  (Overflows)
+//     S3 user supplied status + FZ + TD   (Underflows)
 //
 //
 //     If (Safe) is true, then
@@ -2021,741 +2430,973 @@ POWL_64_SHARED:
 //        No overflow or underflow here, but perhaps inexact.
 //        Return
 //     Else
-//       Determine if overflow or underflow was raised.
-//       Fetch +/- overflow threshold for IEEE double extended
-
+//       Determine if overflow or underflow  was raised.
+//       Fetch +/- overflow threshold for IEEE single, double,
+//       double extended
+//
 { .mfi
-      nop.m 999
-      fsetc.s2 0x7F,0x41       // For underflow test, set S2=User+TD+FTZ
-      nop.i 999
+(p0)   setf.exp FR_Big = GR_T1_ptr
+(p0)   fsetc.s2 0x7F,0x40
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fma.s2 FR_Result_small = FR_TMP1, FR_TMP2, FR_TMP3
-      nop.i 999
+	nop.m 999
+(p0)   fclass.m.unc   p11, p0 =  FR_Result_small, 0x00F
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fsetc.s2 0x7F,0x42       // For overflow test, set S2=User+TD+WRE
-      nop.i 999
+	nop.m 999
+(p0)   fmerge.ns FR_NBig = FR_Big, FR_Big
+	nop.i 999
 }
-;;
-
 { .mfi
-      nop.m 999
-      fma.s2 FR_Result_big = FR_TMP1, FR_TMP2,FR_TMP3
-      nop.i 999
+	nop.m 999
+//
+//     Create largest double exponent + 1.
+//     Create smallest double exponent - 1.
+//     Identify denormals
+//
+(p0)   fcmp.ge.unc.s1 p8, p0 = FR_Result_big , FR_Big
+	nop.i 999 ;;
+}
+{ .mii
+	nop.m 999
+	nop.i 999 ;;
+//
+//     fcmp:   resultS2 <= - overflow threshold  
+//     fclass: resultS3 is denorm/unorm/0       
+//
+(p8)   mov   GR_Parameter_TAG = 18 ;;
+}
+{ .mfb
+	nop.m 999
+//
+//     fcmp:   resultS2 >= + overflow threshold  
+//
+(p0)   fcmp.le.unc.s1 p9, p0 = FR_Result_big, FR_NBig
+(p8)   br.cond.spnt __libm_error_region ;;
+}
+{ .mii
+	nop.m 999
+	nop.i 999 ;;
+(p9)   mov   GR_Parameter_TAG = 18
+}
+{ .mib
+	nop.m 999
+	nop.i 999
+(p9)   br.cond.spnt __libm_error_region ;;
+}
+//
+//     Report that pow overflowed - either +Inf, or -Inf
+//
+{ .mmb
+(p11)  mov   GR_Parameter_TAG = 19
+	nop.m 999
+(p11)  br.cond.spnt __libm_error_region ;;
+}
+{ .mib
+	nop.m 999
+	nop.i 999
+//
+//     Report that pow underflowed
+//
+(p0)   br.cond.sptk L(POWL_64_RETURN) ;;
 }
-;;
 
+
+L(POWL_64_SQUARE):
+// Here if x not zero and y=2.
+// Must call __libm_error_support for overflow or underflow
+//
+//     S0 user supplied status
+//     S2 user supplied status + WRE + TD  (Overflows)
+//     S3 user supplied status + FZ + TD   (Underflows)
+//
 { .mfi
-      nop.m 999
-      fsetc.s2 0x7F,0x40       // Reset S2=User
-      nop.i 999
+	nop.m 999
+(p0)   fma.s0 FR_Result = FR_Input_X, FR_Input_X, f0
+	nop.i 999
 }
-;;
-
 { .mfi
-      nop.m 999
-      fclass.m p11, p0 = FR_Result_small, 0x00F // Test small result unorm/zero
-      nop.i 999
+	nop.m 999
+(p0)   fsetc.s3 0x7F,0x01
+	nop.i 999
+}
+{ .mlx
+	nop.m 999
+(p0)   movl GR_T1_ptr = 0x00000000013FFF ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fcmp.ge.s1 p8, p0 = FR_Result_big , FR_Big // Test >= + oflow threshold
-      nop.i 999
+	nop.m 999
+(p0)   fma.s3 FR_Result_small = FR_Input_X, FR_Input_X, f0
+	nop.i 999
 }
-;;
-
-{ .mfb
-(p11) mov   GR_Parameter_TAG = 19                // Set tag for underflow
-      fcmp.le.s1 p9, p0 = FR_Result_big, FR_NBig // Test <= - oflow threshold
-(p11) br.cond.spnt __libm_error_region           // Branch if pow underflowed
+{ .mfi
+	nop.m 999
+(p0)   fsetc.s3 0x7F,0x40
+	nop.i 999 ;;
+}
+{ .mfi
+	nop.m 999
+//
+//     Return if no danger of over of underflow.
+//
+(p0)   fsetc.s2 0x7F,0x42
+	nop.i 999;;
+}
+{ .mfi
+	nop.m 999
+(p0)   fma.s2 FR_Result_big = FR_Input_X, FR_Input_X, f0
+	nop.i 999 ;;
+}
+//
+//     S0 user supplied status
+//     S2 user supplied status + WRE + TD  (Overflows)
+//     S3 user supplied status + FZ + TD   (Underflows)
+//
+//
+//     If (Safe) is true, then
+//        Compute result using user supplied status field.
+//        No overflow or underflow here, but perhaps inexact.
+//        Return
+//     Else
+//       Determine if overflow or underflow  was raised.
+//       Fetch +/- overflow threshold for IEEE single, double,
+//       double extended
+//
+{ .mfi
+(p0)   setf.exp FR_Big = GR_T1_ptr
+(p0)   fsetc.s2 0x7F,0x40
+	nop.i 999 ;;
+}
+{ .mfi
+	nop.m 999
+(p0)   fclass.m.unc   p11, p0 =  FR_Result_small, 0x00F
+	nop.i 999 ;;
+}
+{ .mfi
+	nop.m 999
+(p0)   fmerge.ns FR_NBig = FR_Big, FR_Big
+	nop.i 999
+}
+{ .mfi
+	nop.m 999
+//
+//     Create largest double exponent + 1.
+//     Create smallest double exponent - 1.
+//     Identify denormals
+//
+(p0)   fcmp.ge.unc.s1 p8, p0 = FR_Result_big , FR_Big
+	nop.i 999 ;;
+}
+{ .mii
+	nop.m 999
+	nop.i 999 ;;
+//
+//     fcmp:   resultS2 <= - overflow threshold  
+//     fclass: resultS3 is denorm/unorm/0       
+//
+(p8)   mov   GR_Parameter_TAG = 18 ;;
 }
-;;
-
 { .mfb
-(p8)  mov   GR_Parameter_TAG = 18                // Set tag for overflow
-      nop.f 999
-(p8)  br.cond.spnt __libm_error_region           // Branch if pow +overflow
+	nop.m 999
+//
+//     fcmp:   resultS2 >= + overflow threshold  
+//
+(p0)   fcmp.le.unc.s1 p9, p0 = FR_Result_big, FR_NBig
+(p8)   br.cond.spnt __libm_error_region ;;
 }
-;;
-
-{ .mbb
-(p9)  mov   GR_Parameter_TAG = 18                // Set tag for overflow
-(p9)  br.cond.spnt __libm_error_region           // Branch if pow -overflow
-      br.ret.sptk  b0                            // Branch if result really ok
+{ .mii
+	nop.m 999
+	nop.i 999 ;;
+(p9)   mov   GR_Parameter_TAG = 18
 }
-;;
+{ .mib
+	nop.m 999
+	nop.i 999
+(p9)   br.cond.spnt __libm_error_region ;;
+}
+//
+//     Report that pow overflowed - either +Inf, or -Inf
+//
+{ .mmb
+(p11)  mov   GR_Parameter_TAG = 19
+	nop.m 999
+(p11)  br.cond.spnt __libm_error_region ;;
+}
+{ .mib
+	nop.m 999
+	nop.i 999
+//
+//     Report that pow underflowed
+//
+(p0)   br.cond.sptk L(POWL_64_RETURN) ;;
+}
+
 
 
-POWL_64_SPECIAL:
-// Here if x or y is NatVal, nan, inf, or zero
+
+L(POWL_64_SPECIAL): 
 { .mfi
-      nop.m 999
-      fcmp.eq.s1 p15, p0 =  FR_Input_X, f1  // Test x=+1
-      nop.i 999
+	nop.m 999
+(p0)   fcmp.eq.s1 p15, p0 =  FR_Input_X, f1  // Is x=+1
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fclass.m p8, p0 =  FR_Input_X, 0x143  // Test x natval, snan
-      nop.i 999
+	nop.m 999
+(p0)   fclass.m.unc p14, p0 =  FR_Input_Y, 0x023 
+	nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m 999
-(p15) fcmp.eq.unc.s0 p6,p0 = FR_Input_Y, f0 // If x=1, flag invalid if y=SNaN
-      nop.i 999
+	nop.m 999
+(p15)   fcmp.eq.unc.s0 p6,p0 = FR_Input_Y, f0 // If x=1, flag invalid if y=SNaN
+	nop.i 999 
 }
 { .mfb
-      nop.m 999
-(p15) fmpy.s0 FR_Result = f1,f1             // If x=1, result=1
-(p15) br.ret.spnt b0                        // Exit if x=1
+	nop.m 999
+(p15)   fmpy.s0 FR_Result = f1,f1        // If x=1, result=1
+(p15)   br.cond.spnt L(POWL_64_RETURN) ;;   // Exit if x=1
 }
-;;
 
 { .mfi
-      nop.m 999
-      fclass.m p6, p0 =  FR_Input_Y, 0x007  // Test y zero
-      nop.i 999
+	nop.m 999
+(p0)   fclass.m.unc p13, p0 =  FR_Input_X, 0x023 
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fclass.m p9, p0 =  FR_Input_Y, 0x143  // Test y natval, snan
-      nop.i 999
+	nop.m 999
+(p0)   fclass.m.unc p8, p0 =  FR_Input_X, 0x143 
+	nop.i 999
 }
-;;
-
 { .mfi
-      nop.m 999
-      fclass.m p10, p0 =  FR_Input_X, 0x083 // Test x qnan
-      nop.i 999
+	nop.m 999
+(p0)   fclass.m.unc p9, p0 =  FR_Input_Y, 0x143 
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p8)  fmpy.s0 FR_Result = FR_Input_Y, FR_Input_X // If x=snan, result=qnan
-(p6)  cmp.ne p8,p0 = r0,r0     // Don't exit if x=snan, y=0 ==> result=+1
+	nop.m 999
+(p0)   fclass.m.unc p10, p0 =  FR_Input_X, 0x083 
+	nop.i 999
 }
-;;
-
 { .mfi
-      nop.m 999
-(p6)  fclass.m.unc p15, p0 =  FR_Input_X,0x007   // Test x=0, y=0
-      nop.i 999
+	nop.m 999
+(p0)   fclass.m.unc p11, p0 =  FR_Input_Y, 0x083 
+	nop.i 999 ;;
 }
-{ .mfb
-      nop.m 999
-(p9)  fmpy.s0 FR_Result = FR_Input_Y, FR_Input_X // If y=snan, result=qnan
-(p8)  br.ret.spnt b0                             // Exit if x=snan, y not 0,
-                                                 //   result=qnan
+{ .mfi
+	nop.m 999
+(p0)   fclass.m.unc p6, p0 =  FR_Input_Y, 0x007 
+	nop.i 999
 }
-;;
-
 { .mfi
-      nop.m 999
-      fcmp.eq.s1 p7, p0 =  FR_Input_Y, f1        // Test y +1.0
-      nop.i 999
+	nop.m 999
+(p0)   fcmp.eq.unc.s1 p7, p0 =  FR_Input_Y, f1 
+	nop.i 999 ;;
+}
+{ .mfi
+	nop.m 999
+//
+//     set p13 if x +/- Inf 
+//     set p14 if y +/- Inf 
+//     set p8  if x  Natval or +/-SNaN  
+//     set p9  if y  Natval or +/-SNaN  
+//     set p10 if x QNaN
+//     set p11 if y QNaNs
+//     set p6  if y is +/-0 
+//     set p7  if y is 1
+//
+(p8)   fmpy.s0 FR_Result = FR_Input_Y, FR_Input_X 
+(p6)   cmp.ne p8,p0 = r0,r0 ;;  // Don't exit if x=snan, y=0 ==> result=+1
 }
 { .mfb
-      nop.m 999
-(p10) fmpy.s0 FR_Result = FR_Input_X, f0         // If x=qnan, result=qnan
-(p9)  br.ret.spnt b0                             // Exit if y=snan, result=qnan
+	nop.m 999
+(p9)   fmpy.s0 FR_Result = FR_Input_Y, FR_Input_X 
+(p8)   br.cond.spnt L(POWL_64_RETURN) ;; 
+}
+{ .mfb
+	nop.m 999
+(p10)  fmpy.s0 FR_Result = FR_Input_X, f0 
+(p9)   br.cond.spnt L(POWL_64_RETURN) ;; 
 }
-;;
-
 { .mfi
-      nop.m 999
-(p6)  fclass.m.unc p8, p0 =  FR_Input_X,0x0C3    // Test x=nan, y=0
-      nop.i 999
+	nop.m 999
+//
+//     Produce result for SNaN and NatVals and return
+//
+(p6)   fclass.m.unc p15, p0 =  FR_Input_X,0x007 
+	nop.i 999
+}
+{ .mfi
+	nop.m 999
+//
+//     If Y +/- 0, set p15 if x +/- 0
+//
+(p6)   fclass.m.unc p8, p0 =  FR_Input_X,0x0C3 
+	nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m 999
-(p6)  fcmp.eq.s0 p9,p0 = FR_Input_X, f0          // If y=0, flag if x denormal
-      nop.i 999
+	nop.m 999
+(p6)   fcmp.eq.s0 p9,p0 = FR_Input_X, f0 // If y=0, flag if x denormal
+	nop.i 999
 }
 { .mfi
-      nop.m 999
-(p6)  fadd.s0 FR_Result = f1, f0                 // If y=0, result=1
-      nop.i 999
+	nop.m 999
+(p6)   fadd.s0 FR_Result = f1, f0
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fclass.m p11, p0 =  FR_Input_Y, 0x083      // Test y qnan
-      nop.i 999
+	nop.m 999
+// 
+//     Set p8 if y = +/-0 and X is a QNaN/SNaN
+//     If y = +/-0, let result = 1.0
+// 
+(p7)   fmpy.s0 FR_Result = FR_Input_X,f1
+//
+//     If y == 1, result = x * 1 
+//
+(p15)  mov GR_Parameter_TAG = 20 
 }
-{ .mfb
-(p15) mov GR_Parameter_TAG = 20                  // Error tag for x=0, y=0
-(p7)  fmpy.s0 FR_Result = FR_Input_X,f1          // If y=1, result=x
-(p15) br.cond.spnt __libm_error_region           // Branch if x=0, y=0, result=1
+{ .mib
+	nop.m 999
+	nop.i 999
+(p15)  br.cond.spnt __libm_error_region ;;
 }
-;;
-
-{ .mfb
-(p8)  mov GR_Parameter_TAG = 23                  // Error tag for x=nan, y=0
-      fclass.m p14, p0 =  FR_Input_Y, 0x023      // Test y inf
-(p8)  br.cond.spnt __libm_error_region           // Branch if x=snan, y=0,
-                                                 //   result=1
+{ .mib
+	nop.m 999
+//
+//     If x and y are both zero, result = 1.0 and call error
+//     support. 
+//
+(p8)   mov GR_Parameter_TAG = 23 
+(p8)   br.cond.spnt __libm_error_region ;;
 }
-;;
-
-{ .mfb
-      nop.m 999
-      fclass.m p13, p0 =  FR_Input_X, 0x023      // Test x inf
-(p6)  br.ret.spnt b0                             // Exit y=0, x not nan or 0,
-                                                 //   result=1
+{ .mib
+	nop.m 999
+	nop.i 999
+//
+//     If y = +/-0 and x is a QNaN, result = 1.0 and call error
+//     support. 
+//
+(p6)   br.cond.spnt L(POWL_64_RETURN) ;; 
 }
-;;
 
+// If x=0, y=-inf, go to the X_IS_ZERO path
 { .mfb
-      nop.m 999
-(p14) fcmp.eq.unc.s1 p0,p14 = FR_Input_X,f0      // Test x not 0, y=inf
-(p7)  br.ret.spnt b0                             // Exit y=1, x not snan,
-                                                 //   result=x
+	nop.m 999
+(p14)  fcmp.eq.unc.s1 p0,p14 = FR_Input_X,f0 
+(p7)   br.cond.spnt L(POWL_64_RETURN) ;; 
 }
-;;
 
-{ .mfb
-      nop.m 999
-(p10) fmpy.s0 FR_Result = FR_Input_Y,FR_Input_X  // If x=qnan, y not snan,
-                                                 //   result=qnan
-(p10) br.ret.spnt b0                             // Exit x=qnan, y not snan,
-                                                 //   result=qnan
+{ .mfi
+	nop.m 999
+//
+//     Produce all results for x**0 and x**1 
+//     Let all the result x ** 0 == 1 and return
+//     Let all x ** 1 == x and return
+//
+(p10)  fmpy.s0 FR_Result = FR_Input_Y,FR_Input_X
+	nop.i 999 ;;
 }
-;;
-
 { .mfb
-      nop.m 999
-(p11) fmpy.s0 FR_Result = FR_Input_Y,FR_Input_X  // If y=qnan, x not nan or 1,
-                                                 //   result=qnan
-(p11) br.ret.spnt b0                             // Exit y=qnan, x not nan or 1,
-                                                 //   result=qnan
+	nop.m 999
+(p11)  fmpy.s0 FR_Result = FR_Input_Y,FR_Input_X
+(p10)  br.cond.spnt L(POWL_64_RETURN) ;;
 }
-;;
-
-{ .mbb
-      nop.m 999
-(p14) br.cond.spnt POWL_64_Y_IS_INF           // Branch if y=inf, x not 1 or nan
-(p13) br.cond.spnt POWL_64_X_IS_INF           // Branch if x=inf, y not 1 or nan
+{ .mib
+	nop.m 999
+	nop.i 999
+(p11)  br.cond.spnt L(POWL_64_RETURN) ;;
 }
-;;
-
-
-POWL_64_X_IS_ZERO:
-// Here if x=0, y not nan or 1 or inf or 0
-
-// There is logic starting here to determine if y is an integer when x = 0.
-// If 0 < |y| < 1 then clearly y is not an integer.
-// If |y| > 1, then the significand of y is shifted left by the size of
-//    the exponent of y.  This preserves the lsb of the integer part + the
-//    fractional bits.  The lsb of the integer can be tested to determine if
-//    the integer is even or odd.  The fractional bits can be tested.  If zero,
-//    then y is an integer.
+{ .mib
+	nop.m 999
+	nop.i 999
+//
+//     Return result for x or y QNaN input with QNaN result 
+//
+(p14)  br.cond.spnt L(POWL_64_Y_IS_INF) ;;
+}
+{ .mib
+	nop.m 999
+	nop.i 999
+(p13)  br.cond.spnt L(POWL_64_X_IS_INF) ;;
+}
+L(POWL_64_X_IS_ZERO): 
+{ .mmb
+(p0)   getf.sig GR_signif_y = FR_Input_Y 
+(p0)   getf.exp GR_BIASed_exp_y = FR_Input_Y
+	nop.b 999 ;;
+}
+{ .mlx
+	nop.m 999
+(p0)   movl GR_Mask = 0x1FFFF
+}
+{ .mlx
+	nop.m 999
+(p0)   movl GR_y_sign = 0x20000 ;;
+}
+//
+//     Get BIASed exp and significand of y
 //
 { .mfi
-      and GR_exp_y = GR_exp_mask,GR_signexp_y   // Get biased exponent of y
-      nop.f 999
-      and GR_y_sign = GR_sign_mask,GR_signexp_y // Get sign of y
+(p0)   and GR_exp_y = GR_Mask,GR_BIASed_exp_y
+	nop.f 999
+(p0)   and GR_y_sign = GR_y_sign,GR_BIASed_exp_y
 }
-;;
-
+{ .mlx
+	nop.m 999
+(p0)   movl GR_BIAS = 0xFFFF ;;
+}
+{ .mfi
+(p0)   cmp.lt.unc  p9, p8 = GR_exp_y,GR_BIAS
+	nop.f 999
 //
 //     Maybe y is < 1 already, so
 //     can never be an integer.
+//     Remove sign bit from exponent.
+//
+(p0)   sub GR_exp_y = GR_exp_y,GR_BIAS ;;
+}
+{ .mii
+	nop.m 999
+	nop.i 999 ;;
+//
+//     Remove exponent BIAS
 //
+(p8)   shl GR_exp_y=  GR_signif_y,GR_exp_y ;;
+}
 { .mfi
-      cmp.lt  p9, p8 = GR_exp_y,GR_exp_bias     // Test 0 < |y| < 1
-      nop.f 999
-      sub GR_exp_y = GR_exp_y,GR_exp_bias       // Get true exponent of y
+(p9)   or  GR_exp_y=  0xF,GR_signif_y
+	nop.f 999
+	nop.i 999 ;;
 }
-;;
-
+{ .mii
+	nop.m 999
 //
 //     Shift significand of y looking for nonzero bits
 //     For y > 1, shift signif_y exp_y bits to the left
-//     For y < 1, turn on 4 low order bits of significand of y
+//     For y < 1, turn on 4 low order bits of significand of y 
 //     so that the fraction will always be non-zero
 //
-{ .mmi
-(p9)  or  GR_exp_y=  0xF,GR_signif_y            // Force nonzero fraction if y<1
-;;
-      nop.m 999
-(p8)  shl GR_exp_y=  GR_signif_y,GR_exp_y       // Get lsb of int + fraction
-                                                // Wait 4 cycles to use result
-}
-;;
-
-{ .mmi
-      nop.m 999
-;;
-      nop.m 999
-      nop.i 999
-}
-;;
-
-{ .mmi
-      nop.m 999
-;;
-      nop.m 999
-      shl GR_fraction_y=  GR_exp_y,1            // Shift left 1 to get fraction
+(p0)   shl GR_signif_y=  GR_exp_y,1 ;;
+(p0)   extr.u GR_low_order_bit = GR_exp_y,63,1
 }
-;;
-
 //
 //     Integer part of y  shifted off.
 //     Get y's low even or odd bit - y might not be an int.
 //
 { .mii
-      cmp.eq  p13,p0  =  GR_fraction_y, r0      // Test for y integer
-      cmp.eq  p8,p0 =  GR_y_sign, r0            // Test for y > 0
-;;
-(p13) tbit.nz.unc p13,p0 = GR_exp_y, 63         // Test if y an odd integer
-}
-;;
-
-{ .mfi
-(p13) cmp.eq.unc p13,p14 =  GR_y_sign, r0   // Test y pos odd integer
-(p8)  fcmp.eq.s0 p12,p0 = FR_Input_Y, f0    // If x=0 and y>0 flag if y denormal
-      nop.i 999
+(p0)   cmp.eq.unc  p13,p0  =  GR_signif_y, r0
+(p0)   cmp.eq.unc  p8,p9 =  GR_y_sign, r0 ;;
+//
+//     Is y an int?
+//     Is y positive
+//
+(p13)  cmp.ne.unc  p13,p0 =  GR_low_order_bit, r0 ;;
 }
-;;
-
 //
-//     Return +/-0 when x=+/-0 and y is positive odd integer
+//     Is y and int and odd?
 //
 { .mfb
-      nop.m 999
-(p13) mov FR_Result = FR_Input_X            // If x=0,  y pos odd int, result=x
-(p13) br.ret.spnt b0                        // Exit x=0, y pos odd int, result=x
+(p13)  cmp.eq.unc  p13,p14 =  GR_y_sign, r0
+(p8)   fcmp.eq.s0 p12,p0 = FR_Input_Y, f0 // If x=0 and y>0 flag if y denormal
+	nop.b 999 ;;
 }
-;;
-
+{ .mfb
+	nop.m 999
 //
-//     Return +/-inf when x=+/-0 and y is negative odd int
+//     Is y and int and odd and positive?
 //
-{ .mfb
-(p14) mov GR_Parameter_TAG = 21
-(p14) frcpa.s0 FR_Result, p0 = f1, FR_Input_X  // Result +-inf, set Z flag
-(p14) br.cond.spnt __libm_error_region
+(p13)  mov FR_Result = FR_Input_X 
+(p13)  br.cond.sptk L(POWL_64_RETURN) ;;
 }
-;;
-
+{ .mfi
+	nop.m 999
 //
-//     Return +0 when x=+/-0 and y positive and not an odd integer
+//     Return +/-0 when x=+/-0 and y is and odd pos. int
 //
-{ .mfb
-      nop.m 999
-(p8)  mov FR_Result = f0      // If x=0, y>0 and not odd integer, result=+0
-(p8)  br.ret.sptk b0          // Exit x=0, y>0 and not odd integer, result=+0
+(p14)  frcpa.s0 FR_Result, p10 = f1, FR_Input_X
+(p14)  mov GR_Parameter_TAG = 21
+}
+{ .mib
+	nop.m 999
+	nop.i 999
+(p14)  br.cond.spnt __libm_error_region ;;
 }
-;;
 
+{ .mfb
+	nop.m 999
 //
-//     Return +inf when x=+/-0 and y is negative and not odd int
+//     Return +/-0 when x=+/-Inf and y is and odd neg int
+//     and raise dz exception
 //
-{ .mfb
-      mov GR_Parameter_TAG = 21
-      frcpa.s0 FR_Result, p10 = f1,f0   // Result +inf, raise Z flag
-      br.cond.sptk __libm_error_region
+(p8)   mov FR_Result = f0
+(p8)   br.cond.sptk L(POWL_64_RETURN) ;;
 }
-;;
-
-
-POWL_64_X_IS_INF:
+{ .mfi
+	nop.m 999
 //
-// Here if x=inf, y not 1 or nan
+//     Return +0 when x=+/-0 and y > 0  and not odd.
 //
+(p9)   frcpa.s0 FR_Result, p10 = f1,f0
+(p9)   mov GR_Parameter_TAG = 21
+}
+{ .mib
+	nop.m 999
+	nop.i 999
+(p9)   br.cond.sptk __libm_error_region ;;
+}
+L(POWL_64_X_IS_INF): 
 { .mfi
-      and GR_exp_y = GR_exp_mask,GR_signexp_y   // Get biased exponent y
-      fclass.m p13, p0 =  FR_Input_X,0x022      // Test x=-inf
-      nop.i 999
+(p0)   getf.exp GR_exp_y = FR_Input_Y
+(p0)   fclass.m.unc p13, p0 =  FR_Input_X,0x022 
+(p0)   mov GR_Mask = 0x1FFFF ;;
 }
-;;
 
 { .mfi
-      and GR_y_sign = GR_sign_mask,GR_signexp_y // Get sign of y
-      fcmp.eq.s0 p9,p0 = FR_Input_Y, f0         // Dummy to set flag if y denorm
-      nop.i 999
+(p0)   getf.sig GR_signif_y = FR_Input_Y
+(p0)   fcmp.eq.s0 p9,p0 = FR_Input_Y, f0 // Flag if y denormal
+       nop.i 999 ;;
 }
-;;
 
 //
-//     Maybe y is < 1 already, so
-//     isn't an int.
+//     Get exp and significand of y
+//     Create exponent mask and sign mask
 //
-{ .mfi
-(p13) cmp.lt.unc  p9, p8 = GR_exp_y,GR_exp_bias // Test 0 < |y| < 1 if x=-inf
-      fclass.m p11, p0 =  FR_Input_X,0x021      // Test x=+inf
-      sub GR_exp_y = GR_exp_y,GR_exp_bias       // Get true exponent y
+{ .mlx
+(p0)   and GR_low_order_bit = GR_Mask,GR_exp_y
+(p0)   movl GR_BIAS = 0xFFFF
 }
-;;
-
+{ .mmi
+	nop.m 999 ;;
 //
-//     Shift significand of y looking for nonzero bits
-//     For y > 1, shift signif_y exp_y bits to the left
-//     For y < 1, turn on 4 low order bits of significand of y
-//     so that the fraction will always be non-zero
+//     Remove sign bit from exponent.
 //
-{ .mmi
-(p9)  or  GR_exp_y=  0xF,GR_signif_y          // Force nonzero fraction if y<1
-;;
-(p11) cmp.eq.unc  p14,p12 = GR_y_sign, r0     // Test x=+inf, y>0
-(p8)  shl GR_exp_y=  GR_signif_y,GR_exp_y     // Get lsb of int + fraction
-                                              // Wait 4 cycles to use result
-}
-;;
-
+(p0)   cmp.lt.unc  p9, p8 = GR_low_order_bit,GR_BIAS
 //
-//     Return +inf for x=+inf, y > 0
-//     Return +0   for x=+inf, y < 0
+//     Maybe y is < 1 already, so 
+//     isn't an int.
 //
-{ .mfi
-      nop.m 999
-(p12) mov FR_Result = f0                      // If x=+inf, y<0, result=+0
-      nop.i 999
+(p0)   sub GR_low_order_bit = GR_low_order_bit,GR_BIAS
 }
-{ .mfb
-      nop.m 999
-(p14) fma.s0 FR_Result = FR_Input_X,f1,f0     // If x=+inf, y>0, result=+inf
-(p11) br.ret.sptk b0                          // Exit x=+inf
+{ .mlx
+	nop.m 999
+(p0)   movl GR_sign_mask = 0x20000 ;;
 }
-;;
-
+{ .mfi
+(p0)   and GR_sign_mask = GR_sign_mask,GR_exp_y
 //
-// Here only if x=-inf.  Wait until can use result of shl...
+//     Return +Inf when x=+/-0 and y < 0 and not odd and raise
+//     divide-by-zero exception.
 //
+(p0)   fclass.m.unc p11, p0 =  FR_Input_X,0x021 
+	nop.i 999 ;;
+}
 { .mmi
-      nop.m 999
-;;
-      nop.m 999
-      nop.i 999
+	nop.m 999 ;;
+//
+//     Is shift off integer part of y.
+//     Get y's even or odd bit - y might not be an int.
+//
+(p11)  cmp.eq.unc  p11,p12 = GR_sign_mask, r0
+//
+//     Remove exponent BIAS
+//
+(p8)   shl GR_exp_y = GR_signif_y,GR_low_order_bit ;;
 }
-;;
-
 { .mfi
-      cmp.eq  p8,p9 = GR_y_sign, r0           // Test y pos
-      nop.f 999
-      shl GR_fraction_y = GR_exp_y,1          // Shift left 1 to get fraction
+(p9)   or  GR_exp_y = 0xF,GR_signif_y
+//
+//     Is y positive or negative when x is +Inf?
+//     Is y and int when x = -Inf 
+//
+(p11)  mov FR_Result = FR_Input_X 
+	nop.i 999 ;;
 }
-;;
-
-{ .mmi
-      cmp.eq  p13,p0 = GR_fraction_y, r0      // Test y integer
-;;
-      nop.m 999
-(p13) tbit.nz.unc  p13,p0 = GR_exp_y, 63      // Test y odd integer
+{ .mfi
+	nop.m 999
+(p12)  mov FR_Result = f0
+	nop.i 999 ;;
 }
-;;
-
+{ .mii
+	nop.m 999
 //
-//     Is y even or odd?
+//     Shift signficand looking for nonzero bits 
+//     For y non-ints, upset the significand.
 //
+(p0)   shl GR_signif_y = GR_exp_y,1 ;;
+(p13)  cmp.eq.unc  p13,p0  = GR_signif_y, r0
+}
 { .mii
-(p13) cmp.eq.unc  p14,p10 = GR_y_sign, r0     // Test x=-inf, y pos odd int
-(p13) cmp.ne.and  p8,p9 = r0,r0               // If y odd int, turn off p8,p9
-      nop.i 999
+	nop.m 999
+(p0)   extr.u GR_low_order_bit = GR_exp_y,63,1 ;;
+(p13)  cmp.ne.unc  p13,p0 = GR_low_order_bit, r0
+}
+{ .mib
+	nop.m 999
+	nop.i 999
+(p11)  br.cond.sptk L(POWL_64_RETURN) ;; 
+}
+{ .mib
+	nop.m 999
+	nop.i 999
+(p12)  br.cond.sptk L(POWL_64_RETURN) ;; 
 }
-;;
-
 //
-//     Return -0   for x = -inf and y < 0 and odd int.
-//     Return -Inf for x = -inf and y > 0 and odd int.
+//     Return Inf for y > 0
+//     Return +0  for y < 0
+//     Is y even or odd?
 //
-{ .mfi
-      nop.m 999
-(p10) fmerge.ns FR_Result = f0, f0      // If x=-inf, y neg odd int, result=-0
-      nop.i 999
+{ .mii
+(p13)  cmp.eq.unc  p13,p10 = GR_sign_mask, r0
+(p0)   cmp.eq.unc  p8,p9 = GR_sign_mask, r0 ;;
+	nop.i 999
 }
 { .mfi
-      nop.m 999
-(p14) fmpy.s0 FR_Result = FR_Input_X,f1 // If x=-inf, y pos odd int, result=-inf
-      nop.i 999
-}
-;;
-
+	nop.m 999
 //
-//     Return Inf for x = -inf and y > 0 not an odd int.
-//     Return +0  for x = -inf and y < 0 not an odd int.
+//     For x = -inf, y is and int, positive  
+//     and odd 
+//     Is y positive in general?
 //
-.pred.rel "mutex",p8,p9
-{ .mfi
-      nop.m 999
-(p8)  fmerge.ns FR_Result = FR_Input_X, FR_Input_X // If x=-inf, y>0 not odd int
-                                                   //   result=+inf
-      nop.i 999
+(p13)  mov FR_Result = FR_Input_X
+	nop.i 999 ;;
 }
 { .mfb
-      nop.m 999
-(p9)  fmpy.s0 FR_Result = f0,f0                    // If x=-inf, y<0 not odd int
-                                                   //   result=+0
-      br.ret.sptk b0                               // Exit for x=-inf
+	nop.m 999
+(p10)  fmerge.ns FR_Result = f0, f0 
+(p13)  br.cond.sptk L(POWL_64_RETURN) ;; 
 }
-;;
-
-
-POWL_64_Y_IS_INF:
-// Here if y=inf, x not 1 or nan
+{ .mib
+	nop.m 999
+	nop.i 999
+(p10)  br.cond.sptk L(POWL_64_RETURN) ;; 
+}
+{ .mfi
+	nop.m 999
 //
-//     For y = +Inf and |x| < 1  returns 0
-//     For y = +Inf and |x| > 1  returns Inf
-//     For y = -Inf and |x| < 1  returns Inf
-//     For y = -Inf and |x| > 1  returns 0
-//     For y =  Inf and |x| = 1  returns 1
+//     Return -Inf for x = -inf and y > 0 and odd int.
+//     Return -0   for x = -inf and y < 0 and odd int.
 //
-{ .mfi
-      nop.m 999
-      fclass.m p8, p0 =  FR_Input_Y, 0x021    // Test y=+inf
-      nop.i 999
+(p8)   fmerge.ns FR_Result = FR_Input_X, FR_Input_X 
+	nop.i 999 ;;
 }
-;;
-
-{ .mfi
-      nop.m 999
-      fclass.m p9, p0 =  FR_Input_Y, 0x022    // Test y=-inf
-      nop.i 999
+{ .mfb
+	nop.m 999
+(p9)   mov FR_Result = f0
+(p8)   br.cond.sptk L(POWL_64_RETURN) ;; 
 }
-;;
-
+{ .mib
+	nop.m 999
+	nop.i 999
+(p9)   br.cond.sptk L(POWL_64_RETURN) ;; 
+}
+L(POWL_64_Y_IS_INF): 
 { .mfi
-      nop.m 999
-      fabs FR_X = FR_Input_X                  // Form |x|
-      nop.i 999
+	nop.m 999
+//
+//     Return Inf for x = -inf and y > 0 not an odd int.
+//     Return +0  for x = -inf and y < 0 and not an odd int.
+//
+(p0)   fclass.m.unc p8, p0 =  FR_Input_Y, 0x021
+	nop.i 999
 }
-;;
-
 { .mfi
-      nop.m 999
-      fcmp.eq.s0 p10,p0 = FR_Input_X, f0      // flag if x denormal
-      nop.i 999
+	nop.m 999
+(p0)   fclass.m.unc p9, p0 =  FR_Input_Y, 0x022
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-(p8)  fcmp.lt.unc.s1 p6, p0  =  FR_X, f1      // Test y=+inf, |x|<1
-      nop.i 999
+	nop.m 999
+(p0)   fabs FR_X = FR_Input_X
+	nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m 999
-(p8)  fcmp.gt.unc.s1 p7, p0  =  FR_X, f1      // Test y=+inf, |x|>1
-      nop.i 999
+	nop.m 999
+(p0)   fcmp.eq.s0 p10,p0 = FR_Input_X, f0 // flag if x denormal
+	nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m 999
-(p9)  fcmp.lt.unc.s1 p12, p0 =  FR_X, f1      // Test y=-inf, |x|<1
-      nop.i 999
+	nop.m 999
+//
+//     Find y = +/- Inf
+//     Compute |x|
+//
+(p8)   fcmp.lt.unc.s1 p6, p0  =  FR_X, f1
+	nop.i 999
 }
 { .mfi
-      nop.m 999
-(p6)  fmpy.s0 FR_Result = f0,f0               // If y=+inf, |x|<1, result=+0
-      nop.i 999
+	nop.m 999
+(p8)   fcmp.gt.unc.s1 p7, p0  =  FR_X, f1
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-(p9)  fcmp.gt.unc.s1 p13, p0 =  FR_X, f1      // Test y=-inf, |x|>1
-      nop.i 999
+	nop.m 999
+(p9)   fcmp.lt.unc.s1 p12, p0 =  FR_X, f1
+	nop.i 999
 }
 { .mfi
-      nop.m 999
-(p7)  fmpy.s0 FR_Result = FR_Input_Y, f1      // If y=+inf, |x|>1, result=+inf
-      nop.i 999
+	nop.m 999
+(p9)   fcmp.gt.unc.s1 p13, p0 =  FR_X, f1
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fcmp.eq.s1 p14, p0 =  FR_X, f1          // Test y=inf, |x|=1
-      nop.i 999
+	nop.m 999
+//
+//     For y = +Inf and |x| < 1  returns 0
+//     For y = +Inf and |x| > 1  returns Inf
+//     For y = -Inf and |x| < 1  returns Inf
+//     For y = -Inf and |x| > 1  returns 0
+//
+(p6)   mov FR_Result = f0
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p12) fnma.s0 FR_Result = FR_Input_Y, f1, f0  // If y=-inf, |x|<1, result=+inf
-      nop.i 999
+	nop.m 999
+(p7)   mov FR_Result = FR_Input_Y 
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-(p13) mov FR_Result = f0                      // If y=-inf, |x|>1, result=+0
-      nop.i 999
+	nop.m 999
+(p12)  fmpy.s0 FR_Result = FR_Input_Y, FR_Input_Y
+	nop.i 999 ;;
 }
-;;
-
 { .mfb
-      nop.m 999
-(p14) fmpy.s0 FR_Result = f1,f1               // If y=inf, |x|=1, result=+1
-      br.ret.sptk b0                          // Common return for y=inf
+	nop.m 999
+(p13)  mov FR_Result = f0
+//
+//     Produce x ** +/- Inf results
+//
+(p6)   br.cond.spnt L(POWL_64_RETURN) ;;
 }
-;;
-
-
-// Here if x or y denorm/unorm
-POWL_DENORM:
-{ .mmi
-      getf.sig GR_signif_Z = FR_norm_X   // Get significand of x
-;;
-      getf.exp GR_signexp_y = FR_norm_Y  // Get sign and exp of y
-      nop.i 999
+{ .mib
+	nop.m 999
+	nop.i 999
+(p7)   br.cond.spnt L(POWL_64_RETURN) ;;
 }
-;;
-
-{ .mfi
-      getf.sig GR_signif_y = FR_norm_Y   // Get significand of y
-      nop.f 999
-      nop.i 999
+{ .mib
+	nop.m 999
+	nop.i 999
+(p12)  br.cond.spnt L(POWL_64_RETURN) ;;
 }
-;;
-
 { .mib
-      getf.exp GR_signexp_x = FR_norm_X  // Get sign and exp of x
-      extr.u GR_Index1 = GR_signif_Z, 59, 4  // Extract upper 4 signif bits of x
-      br.cond.sptk  POWL_COMMON          // Branch back to main path
+	nop.m 999
+	nop.i 999
+(p13)  br.cond.spnt L(POWL_64_RETURN) ;;
 }
-;;
-
-
-POWL_64_UNSUPPORT:
+{ .mfb
+	nop.m 999
 //
-//     Raise exceptions for specific
-//     values - pseudo NaN and
-//     infinities.
-//     Return NaN and raise invalid
+//     +/-1 ** +/-Inf, result is +1
 //
-{ .mfb
-      nop.m 999
-      fmpy.s0 FR_Result = FR_Input_X,f0
-      br.ret.sptk b0
+(p0)   fmpy.s0 FR_Result = f1,f1
+(p0)   br.cond.sptk L(POWL_64_RETURN) ;;
 }
-;;
-
-POWL_64_XNEG:
+L(POWL_64_UNSUPPORT): 
+{ .mfb
+	nop.m 999
 //
-//     Raise invalid for x < 0  and
-//     y not an integer
+//     Return NaN and raise invalid    
 //
+(p0)   fmpy.s0 FR_Result = FR_Input_X,f0
+// 
+//     Raise exceptions for specific
+//     values - pseudo NaN and
+//     infinities.  
+// 
+(p0)   br.cond.sptk L(POWL_64_RETURN) ;; 
+}
+L(POWL_64_XNEG): 
 { .mfi
-      nop.m 999
-      frcpa.s0 FR_Result, p8 =  f0, f0
-      mov GR_Parameter_TAG = 22
+	nop.m 999
+(p0)   frcpa.s0 FR_Result, p8 =  f0, f0
+// 
+//     Raise invalid for x < 0  and
+//     y not an integer and
+// 
+(p0)   mov GR_Parameter_TAG = 22
 }
 { .mib
-      nop.m 999
-      nop.i 999
-      br.cond.sptk __libm_error_region
+	nop.m 999
+	nop.i 999
+(p0)   br.cond.sptk __libm_error_region ;; 
 }
-;;
-
-POWL_64_SQRT:
+L(POWL_64_SQRT): 
 { .mfi
-      nop.m 999
-      frsqrta.s0 FR_Result,p10 = FR_save_Input_X
-      nop.i 999 ;;
+	nop.m 999
+(p0)   frsqrta.s0 FR_Result,p10 = FR_Input_X
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p10) fma.s1   f62=FR_Half,FR_save_Input_X,f0
-      nop.i 999 ;;
+	nop.m 999
+(p10)  fma.s1   f62=FR_Half,FR_Input_X,f0
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p10) fma.s1   f63=FR_Result,FR_Result,f0
-      nop.i 999 ;;
+	nop.m 999
+//
+//     Step (2)
+//     h = 1/2 * a in f9
+//
+(p10)  fma.s1   f63=FR_Result,FR_Result,f0
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p10) fnma.s1  f32=f63,f62,FR_Half
-      nop.i 999 ;;
+	nop.m 999
+//
+//     Step (3)
+//     t1 = y0 * y0 in f10
+//
+(p10)  fnma.s1  f32=f63,f62,f11
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p10) fma.s1   f33=f32,FR_Result,FR_Result
-      nop.i 999 ;;
+	nop.m 999
+//
+//     Step (4)
+//     t2 = 1/2 - t1 * h in f10
+//
+(p10)  fma.s1   f33=f32,FR_Result,FR_Result
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p10) fma.s1   f34=f33,f62,f0
-      nop.i 999 ;;
+	nop.m 999
+//
+//     Step (5)
+//     y1 = y0 + t2 * y0 in f13
+//
+(p10)  fma.s1   f34=f33,f62,f0
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p10) fnma.s1  f35=f34,f33,FR_Half
-      nop.i 999 ;;
+	nop.m 999
+//
+//     Step (6)
+//     t3 = y1 * h in f10
+//
+(p10)  fnma.s1  f35=f34,f33,f11
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p10) fma.s1   f63=f35,f33,f33
-      nop.i 999 ;;
+	nop.m 999
+//
+//     Step (7)
+//     t4 = 1/2 - t3 * y1 in f10
+//
+(p10)  fma.s1   f63=f35,f33,f33
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p10) fma.s1   f32=FR_save_Input_X,f63,f0
-      nop.i 999
+	nop.m 999
+//
+//     Step (8)
+//     y2 = y1 + t4 * y1 in f13
+//
+(p10)  fma.s1   f32=FR_Input_X,f63,f0
+	nop.i 999
 }
 { .mfi
-      nop.m 999
-(p10) fma.s1   FR_Result=f63,f62,f0
-      nop.i 999 ;;
+	nop.m 999
+//
+//     Step (9)
+//     S = a * y2 in f10
+//
+(p10)  fma.s1   FR_Result=f63,f62,f0
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p10) fma.s1   f33=f11,f63,f0
-      nop.i 999 ;;
+	nop.m 999
+//
+//     Step (10)
+//     t5 = y2 * h in f9
+//
+(p10)  fma.s1   f33=f11,f63,f0
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p10) fnma.s1  f34=f32,f32,FR_save_Input_X
-      nop.i 999
+	nop.m 999
+//
+//     Step (11)
+//     H = 1/2 * y2 in f11
+//
+(p10)  fnma.s1  f34=f32,f32,f8
+	nop.i 999
 }
 { .mfi
-      nop.m 999
-(p10) fnma.s1  f35=FR_Result,f63,FR_Half
-      nop.i 999 ;;
+	nop.m 999
+//
+//     Step (12)
+//     d = a - S * S in f12
+//
+(p10)  fnma.s1  f35=FR_Result,f63,f11
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p10) fma.s1   f62=f33,f34,f32
-      nop.i 999
+	nop.m 999
+//
+//     Step (13)
+//     t6 = 1/2 - t5 * y2 in f7
+//
+(p10)  fma.s1   f62=f33,f34,f32
+	nop.i 999
 }
 { .mfi
-      nop.m 999
-(p10) fma.s1   f63=f33,f35,f33
-      nop.i 999 ;;
+	nop.m 999
+//
+//     Step (14)
+//     S1 = S + d * H in f13
+//
+(p10)  fma.s1   f63=f33,f35,f33
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p10) fnma.s1  f32=f62,f62,FR_save_Input_X
-      nop.i 999 ;;
+	nop.m 999
+//
+//     Step (15)
+//     H1 = H + t6 * h in f7
+//
+(p10)  fnma.s1  f32=f62,f62,FR_Input_X
+	nop.i 999 ;;
 }
 { .mfb
-      nop.m 999
-(p10) fma.s0 FR_Result=f32,f63,f62
-      br.ret.sptk   b0                // Exit for x > 0, y = 0.5
+	nop.m 999
+//
+//     Step (16)
+//     d1 = a - S1 * S1 
+//
+(p10)  fma.s0 FR_Result=f32,f63,f62
+//
+//     Step (17)
+//     R = S1 + d1 * H1 
+//
+(p10)  br.cond.sptk L(POWL_64_RETURN) ;; 
 }
-;;
+{ .mib
+	nop.m 999
+	nop.i 999
+//
+//     Do the Newton-Raphson iteration from the EAS.
+//
+(p0)   br.cond.sptk L(POWL_64_RETURN) ;; 
+}
+//
+//     Take care of the degenerate cases.
+//
 
-GLOBAL_LIBM_END(powl)
+L(POWL_64_RETURN):
+{ .mfb
+       nop.m 999
+(p0)   mov   FR_Output = FR_Result
+(p0)   br.ret.sptk   b0 ;;
+}
+.endp powl
+ASM_SIZE_DIRECTIVE(powl)
 
-LOCAL_LIBM_ENTRY(__libm_error_region)
+.proc __libm_error_region
+__libm_error_region:
 .prologue
 { .mfi
         add   GR_Parameter_Y=-32,sp             // Parameter 2 value
@@ -2770,32 +3411,32 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
         mov GR_SAVE_GP=gp                       // Save gp
 };;
 { .mmi
-        stfe [GR_Parameter_Y] = FR_Input_Y,16   // Save Parameter 2 on stack
+        stfe [GR_Parameter_Y] = FR_Y,16         // Save Parameter 2 on stack
         add GR_Parameter_X = 16,sp              // Parameter 1 address
 .save   b0, GR_SAVE_B0
         mov GR_SAVE_B0=b0                       // Save b0
 };;
 .body
 { .mib
-        stfe [GR_Parameter_X] = FR_save_Input_X // Store Parameter 1 on stack
+        stfe [GR_Parameter_X] = FR_X            // Store Parameter 1 on stack
         add   GR_Parameter_RESULT = 0,GR_Parameter_Y
         nop.b 0                                 // Parameter 3 address
 }
 { .mib
-        stfe [GR_Parameter_Y] = FR_Result       // Store Parameter 3 on stack
+        stfe [GR_Parameter_Y] = FR_RESULT      // Store Parameter 3 on stack
         add   GR_Parameter_Y = -16,GR_Parameter_Y
-        br.call.sptk b0=__libm_error_support#   // Call error handling function
+        br.call.sptk b0=__libm_error_support#  // Call error handling function
 };;
 { .mmi
-        add   GR_Parameter_RESULT = 48,sp
         nop.m 0
-        nop.i 0
+        nop.m 0
+        add   GR_Parameter_RESULT = 48,sp
 };;
 { .mmi
-        ldfe  f8 = [GR_Parameter_RESULT]        // Get return result off stack
+        ldfe  f8 = [GR_Parameter_RESULT]       // Get return result off stack
 .restore sp
-        add   sp = 64,sp                        // Restore stack pointer
-        mov   b0 = GR_SAVE_B0                   // Restore return address
+        add   sp = 64,sp                       // Restore stack pointer
+        mov   b0 = GR_SAVE_B0                  // Restore return address
 };;
 { .mib
         mov   gp = GR_SAVE_GP                  // Restore gp
@@ -2803,6 +3444,7 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
         br.ret.sptk     b0                     // Return
 };;
 
-.endp
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
 .type   __libm_error_support#,@function
 .global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_remainder.S b/sysdeps/ia64/fpu/e_remainder.S
index 2f6e90f994..d8a27722de 100644
--- a/sysdeps/ia64/fpu/e_remainder.S
+++ b/sysdeps/ia64/fpu/e_remainder.S
@@ -1,10 +1,10 @@
-.file "remainder.s"
-
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+  .file "remainder.asm"
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
 //
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska, Bob Norin, 
+// Shane Story, and Ping Tak Peter Tang of the Computational Software Lab, 
+// Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
+//
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,19 +35,17 @@
 //
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// http://developer.intel.com/opensource.
 //
 // History
 //====================================================================
-// 02/02/00 Initial version
-// 03/02/00 New Algorithm
-// 04/04/00 Unwind support added
-// 07/21/00 Fixed quotient=2^{24*m+23}*1.q1...q23 1 bug
-// 08/15/00 Bundle added after call to __libm_error_support to properly
+// 2/02/00  Initial version
+// 3/02/00  New Algorithm
+// 4/04/00  Unwind support added
+// 7/21/00  Fixed quotient=2^{24*m+23}*1.q1...q23 1 bug
+// 8/15/00  Bundle added after call to __libm_error_support to properly
 //          set [the previously overwritten] GR_Parameter_RESULT.
-// 11/29/00 Set FR_Y to f9
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 02/10/03 Reordered header: .section, .global, .proc, .align
+//11/29/00  Set FR_Y to f9
 //
 // API
 //====================================================================
@@ -80,12 +78,16 @@
 // a=+/- Inf, or b=+/-0: return NaN, call libm_error_support
 // a=NaN or b=NaN: return NaN
 
+#include "libm_support.h"
+
 // Registers used
 //====================================================================
 // Predicate registers: p6-p14
 // General registers:   r2,r3,r28,r29,r32 (ar.pfs), r33-r39
 // Floating point registers: f6-f15,f32
 
+  .section .text
+
 GR_SAVE_B0                    = r33
 GR_SAVE_PFS                   = r34
 GR_SAVE_GP                    = r35 
@@ -101,9 +103,18 @@ FR_Y             = f9
 FR_RESULT        = f8
 
 
-.section .text
-GLOBAL_IEEE754_ENTRY(remainder)
 
+  .proc  remainder#
+  .align 32
+  .global remainder#
+  .align 32
+
+remainder:
+#ifdef _LIBC
+.global __remainder
+.type __remainder,@function
+__remainder:
+#endif
 // inputs in f8, f9
 // result in f8
 
@@ -128,7 +139,7 @@ GLOBAL_IEEE754_ENTRY(remainder)
 // Y +-NAN, +-inf, +-0?     p11
 { .mfi
 	  setf.exp f32=r28
-      fclass.m.unc  p11,p0 = f9, 0xe7           
+(p0)  fclass.m.unc  p11,p0 = f9, 0xe7           
       nop.i 999
 }
 // qnan snan inf norm     unorm 0 -+
@@ -137,7 +148,7 @@ GLOBAL_IEEE754_ENTRY(remainder)
 // X +-NAN, +-inf, ?        p9
 { .mfi
       nop.m 999
-      fclass.m.unc  p9,p0 = f8, 0xe3           
+(p0)  fclass.m.unc  p9,p0 = f8, 0xe3           
       nop.i 999;; 
 }
 
@@ -156,8 +167,8 @@ GLOBAL_IEEE754_ENTRY(remainder)
 } 
 
 {.bbb
-  (p9) br.cond.spnt FREM_X_NAN_INF
-  (p11) br.cond.spnt FREM_Y_NAN_INF_ZERO
+  (p9) br.cond.spnt L(FREM_X_NAN_INF)
+  (p11) br.cond.spnt L(FREM_Y_NAN_INF_ZERO)
   nop.b 0
 }  {.mfi
    nop.m 0
@@ -167,7 +178,7 @@ GLOBAL_IEEE754_ENTRY(remainder)
 } 
 
 
-remloop24: 
+L(remloop24): 
   { .mfi
   nop.m 0
   // Step (2)
@@ -189,7 +200,7 @@ remloop24:
 {.mfi
   nop.m 0
   // q1=q0*(1+e0)
-  (p6) fma.s1 f15=f12,f7,f12
+  fma.s1 f15=f12,f7,f12
   nop.i 0
 }
 { .mfi
@@ -320,7 +331,7 @@ remloop24:
   //  (p9) set r=r2 (new a, if not last iteration)
   // (p10) new a =r
   (p10) mov f13=f6
-  (p12) br.cond.sptk remloop24;;
+  (p12) br.cond.sptk L(remloop24);;
 } 
 
 // last iteration
@@ -377,7 +388,7 @@ remloop24:
 }
 
 
-FREM_X_NAN_INF: 
+L(FREM_X_NAN_INF): 
 
 // Y zero ?
 {.mfi 
@@ -394,19 +405,19 @@ FREM_X_NAN_INF:
   nop.m 0
   nop.i 0
   // if Y zero
-  (p11) br.cond.spnt FREM_Y_ZERO;;                        
+  (p11) br.cond.spnt L(FREM_Y_ZERO);;                        
 }
 
 // X infinity? Return QNAN indefinite
 { .mfi
       nop.m 999
-      fclass.m.unc  p8,p0 = f8, 0x23 
+(p0)  fclass.m.unc  p8,p0 = f8, 0x23 
       nop.i 999
 }
 // X infinity? Return QNAN indefinite
 { .mfi
       nop.m 999
-      fclass.m.unc  p11,p0 = f8, 0x23 
+(p0)  fclass.m.unc  p11,p0 = f8, 0x23 
       nop.i 999;; 
 }
 // Y NaN ?
@@ -434,14 +445,14 @@ FREM_X_NAN_INF:
 }
 { .mfi
       nop.m 999
-(p8) fma.d.s0 f8=f8,f1,f0                     
+(p8) fma.d f8=f8,f1,f0                     
 	  nop.i 0 ;;                        
 }
 
 { .mfb
       nop.m 999
       frcpa.s0 f8,p7=f8,f9                     
-	  (p11) br.cond.spnt EXP_ERROR_RETURN;;                        
+	  (p11) br.cond.spnt L(EXP_ERROR_RETURN);;                        
 }
 { .mib
 	nop.m 0
@@ -450,35 +461,35 @@ FREM_X_NAN_INF:
 }
 
 
-FREM_Y_NAN_INF_ZERO: 
+L(FREM_Y_NAN_INF_ZERO): 
 
 // Y INF
 { .mfi
       nop.m 999
-      fclass.m.unc  p7,p0 = f9, 0x23           
+(p0)  fclass.m.unc  p7,p0 = f9, 0x23           
       nop.i 999 ;;
 }
 
 { .mfb
       nop.m 999
-(p7)  fma.d.s0 f8=f8,f1,f0                     
+(p7)  fma.d f8=f8,f1,f0                     
 (p7)  br.ret.spnt    b0 ;;                        
 }
 
 // Y NAN?
 { .mfi
       nop.m 999
-      fclass.m.unc  p9,p0 = f9, 0xc3           
+(p0)  fclass.m.unc  p9,p0 = f9, 0xc3           
       nop.i 999 ;;
 }
 
 { .mfb
       nop.m 999
-(p9)  fma.d.s0 f8=f9,f1,f0                     
+(p9)  fma.d f8=f9,f1,f0                     
 (p9)  br.ret.spnt    b0 ;;                        
 }
 
-FREM_Y_ZERO:
+L(FREM_Y_ZERO):
 // Y zero? Must be zero at this point
 // because it is the only choice left.
 // Return QNAN indefinite
@@ -486,7 +497,7 @@ FREM_Y_ZERO:
 // X NAN?
 { .mfi
       nop.m 999
-      fclass.m.unc  p9,p10 = f8, 0xc3           
+(p0)  fclass.m.unc  p9,p10 = f8, 0xc3           
       nop.i 999 ;;
 }
 { .mfi
@@ -497,41 +508,47 @@ FREM_Y_ZERO:
 
 {.mfi
  nop.m 999
- (p9) frcpa.s0 f11,p7=f8,f0
+ (p9) frcpa f11,p7=f8,f0
  nop.i 0;;
 }
 
 { .mfi
       nop.m 999
-(p10)  frcpa.s0         f11,p7 = f0,f0  
+(p10)  frcpa         f11,p7 = f0,f0  
 	  nop.i 999;;         
 }
 
 { .mfi
       nop.m 999
-      fmerge.s      f10 = f8, f8             
+(p0)  fmerge.s      f10 = f8, f8             
       nop.i 999
 }
 
 { .mfi
       nop.m 999
-      fma.d.s0 f8=f11,f1,f0                     
+(p0)  fma.d f8=f11,f1,f0                     
       nop.i 999
 }
 
 
-EXP_ERROR_RETURN: 
+L(EXP_ERROR_RETURN): 
 
 { .mib
-      mov   GR_Parameter_TAG = 124                                 
+(p0)  mov   GR_Parameter_TAG = 124                                 
 	  nop.i 999
-      br.sptk __libm_error_region;; 
+(p0)  br.sptk __libm_error_region;; 
 }
 
-GLOBAL_IEEE754_END(remainder)
+.endp remainder
+ASM_SIZE_DIRECTIVE(remainder)
+#ifdef _LIBC
+ASM_SIZE_DIRECTIVE(__remainder)
+#endif
+
 
 
-LOCAL_LIBM_ENTRY(__libm_error_region)
+.proc __libm_error_region
+__libm_error_region:
 .prologue
 { .mfi
         add   GR_Parameter_Y=-32,sp             // Parameter 2 value
@@ -579,11 +596,10 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
         br.ret.sptk     b0                     // Return
 };; 
 
-LOCAL_LIBM_END(__libm_error_region)
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
 
 
 
 .type   __libm_error_support#,@function
 .global __libm_error_support#
-
-
diff --git a/sysdeps/ia64/fpu/e_remainderf.S b/sysdeps/ia64/fpu/e_remainderf.S
index bbb5fd0e0f..40f9b32921 100644
--- a/sysdeps/ia64/fpu/e_remainderf.S
+++ b/sysdeps/ia64/fpu/e_remainderf.S
@@ -1,10 +1,11 @@
-.file "remainderf.s"
-
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+  .file "remainderf.asm"
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
 //
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska, 
+// Bob Norin, Shane Story, and Ping Tak Peter Tang of the Computational 
+// Software Lab, 
+// Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,7 +21,7 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
+//
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,19 +36,17 @@
 //
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// http://developer.intel.com/opensource.
 //
 // History
 //====================================================================
-// 02/02/00 Initial version
-// 03/02/00 New algorithm  
-// 04/04/00 Unwind support added
-// 07/21/00 Fixed quotient=2^{24*m+23} bug
-// 08/15/00 Bundle added after call to __libm_error_support to properly
+// 2/02/00 Initial version
+// 3/02/00 New algorithm  
+// 4/04/00 Unwind support added
+// 7/21/00 Fixed quotient=2^{24*m+23} bug
+// 8/15/00  Bundle added after call to __libm_error_support to properly
 //          set [the previously overwritten] GR_Parameter_RESULT.
-// 11/29/00 Set FR_Y to f9
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 02/10/03 Reordered header: .section, .global, .proc, .align
+//11/29/00  Set FR_Y to f9
 //
 // API
 //====================================================================
@@ -79,6 +78,9 @@
 //====================================================================
 // a=+/- Inf, or b=+/-0: return NaN, call libm_error_support
 // a=NaN or b=NaN: return NaN
+
+#include "libm_support.h"
+
 //
 // Registers used
 //====================================================================
@@ -87,6 +89,8 @@
 // Floating point registers: f6-f15
 //
 
+.section .text
+
 GR_SAVE_B0                    = r33
 GR_SAVE_PFS                   = r34
 GR_SAVE_GP                    = r35 
@@ -102,9 +106,17 @@ FR_Y             = f9
 FR_RESULT        = f8
 
 
-.section .text
-GLOBAL_IEEE754_ENTRY(remainderf)
+  .proc  remainderf#
+  .align 32
+  .global remainderf#
+  .align 32
 
+remainderf:
+#ifdef _LIBC
+.global __remainderf
+.type __remainderf,@function
+__remainderf:
+#endif
 // inputs in f8, f9
 // result in f8
 
@@ -129,7 +141,7 @@ GLOBAL_IEEE754_ENTRY(remainderf)
 // Y +-NAN, +-inf, +-0?     p11
 { .mfi
       nop.m 999
-      fclass.m.unc  p11,p0 = f9, 0xe7           
+(p0)  fclass.m.unc  p11,p0 = f9, 0xe7           
       nop.i 999
 }
 // qnan snan inf norm     unorm 0 -+
@@ -138,7 +150,7 @@ GLOBAL_IEEE754_ENTRY(remainderf)
 // X +-NAN, +-inf, ?        p9
 { .mfi
       nop.m 999
-      fclass.m.unc  p9,p0 = f8, 0xe3           
+(p0)  fclass.m.unc  p9,p0 = f8, 0xe3           
       nop.i 999;; 
 }
 
@@ -156,8 +168,8 @@ GLOBAL_IEEE754_ENTRY(remainderf)
   nop.i 0;;
 } 
 {.bbb
-  (p9) br.cond.spnt FREM_X_NAN_INF
-  (p11) br.cond.spnt FREM_Y_NAN_INF_ZERO
+  (p9) br.cond.spnt L(FREM_X_NAN_INF)
+  (p11) br.cond.spnt L(FREM_Y_NAN_INF_ZERO)
   nop.b 0
 }  {.mfi
    nop.m 0
@@ -167,7 +179,7 @@ GLOBAL_IEEE754_ENTRY(remainderf)
 } 
 
 .align 32
-remloop24: 
+L(remloop24): 
   { .mfi
   // f12=2^{24}-2
   setf.s f12=r3
@@ -335,7 +347,7 @@ remloop24:
   // (p9) set r=r2 (new a, if not last iteration)
   // (p10) new a =r
   (p10) mov f13=f6
-  (p12) br.cond.sptk remloop24;;
+  (p12) br.cond.sptk L(remloop24);;
 } 
 
 // last iteration
@@ -396,7 +408,7 @@ remloop24:
 }
 
 
-FREM_X_NAN_INF: 
+L(FREM_X_NAN_INF): 
 
 // Y zero ?
 {.mfi 
@@ -413,19 +425,19 @@ FREM_X_NAN_INF:
   nop.m 0
   nop.i 0
   // if Y zero
-  (p11) br.cond.spnt FREM_Y_ZERO;;                        
+  (p11) br.cond.spnt L(FREM_Y_ZERO);;                        
 }
 
 // X infinity? Return QNAN indefinite
 { .mfi
       nop.m 999
-      fclass.m.unc  p8,p0 = f8, 0x23 
+(p0)  fclass.m.unc  p8,p0 = f8, 0x23 
       nop.i 999
 }
 // X infinity? Return QNAN indefinite
 { .mfi
       nop.m 999
-      fclass.m.unc  p11,p0 = f8, 0x23 
+(p0)  fclass.m.unc  p11,p0 = f8, 0x23 
       nop.i 999;; 
 }
 // Y NaN ?
@@ -453,14 +465,14 @@ FREM_X_NAN_INF:
 }
 { .mfi
       nop.m 999
-(p8) fma.s.s0 f8=f8,f1,f0                     
+(p8) fma.s f8=f8,f1,f0                     
 	  nop.i 0 ;;                        
 }
 
 { .mfb
       nop.m 999
       frcpa.s0 f8,p7=f8,f9                     
-	  (p11) br.cond.spnt EXP_ERROR_RETURN;;                        
+	  (p11) br.cond.spnt L(EXP_ERROR_RETURN);;                        
 }
 { .mib
 	nop.m 0
@@ -469,35 +481,35 @@ FREM_X_NAN_INF:
 }
 
 
-FREM_Y_NAN_INF_ZERO: 
+L(FREM_Y_NAN_INF_ZERO): 
 
 // Y INF
 { .mfi
       nop.m 999
-      fclass.m.unc  p7,p0 = f9, 0x23           
+(p0)  fclass.m.unc  p7,p0 = f9, 0x23           
       nop.i 999 ;;
 }
 
 { .mfb
       nop.m 999
-(p7)  fma.s.s0 f8=f8,f1,f0                     
+(p7)  fma.s f8=f8,f1,f0                     
 (p7)  br.ret.spnt    b0 ;;                        
 }
 
 // Y NAN?
 { .mfi
       nop.m 999
-      fclass.m.unc  p9,p0 = f9, 0xc3           
+(p0)  fclass.m.unc  p9,p0 = f9, 0xc3           
       nop.i 999 ;;
 }
 
 { .mfb
       nop.m 999
-(p9)  fma.s.s0 f8=f9,f1,f0                     
+(p9)  fma.s f8=f9,f1,f0                     
 (p9)  br.ret.spnt    b0 ;;                        
 }
 
-FREM_Y_ZERO:
+L(FREM_Y_ZERO):
 // Y zero? Must be zero at this point
 // because it is the only choice left.
 // Return QNAN indefinite
@@ -505,7 +517,7 @@ FREM_Y_ZERO:
 // X NAN?
 { .mfi
       nop.m 999
-      fclass.m.unc  p9,p10 = f8, 0xc3           
+(p0)  fclass.m.unc  p9,p10 = f8, 0xc3           
       nop.i 999 ;;
 }
 { .mfi
@@ -516,41 +528,47 @@ FREM_Y_ZERO:
 
 {.mfi
  nop.m 999
- (p9) frcpa.s0 f11,p7=f8,f0
+ (p9) frcpa f11,p7=f8,f0
  nop.i 0;;
 }
 
 { .mfi
       nop.m 999
-(p10)  frcpa.s0         f11,p7 = f0,f0           
+(p10)  frcpa         f11,p7 = f0,f0           
 nop.i 999;;
 }
 
 { .mfi
       nop.m 999
-      fmerge.s      f10 = f8, f8             
+(p0)  fmerge.s      f10 = f8, f8             
       nop.i 999
 }
 
 { .mfi
       nop.m 999
-      fma.s.s0 f8=f11,f1,f0                     
+(p0)  fma.s f8=f11,f1,f0                     
       nop.i 999
 }
 
 
-EXP_ERROR_RETURN: 
+L(EXP_ERROR_RETURN): 
 
 { .mib
-      mov   GR_Parameter_TAG = 125                                
+(p0)  mov   GR_Parameter_TAG = 125                                
 	  nop.i 999
-      br.sptk __libm_error_region;; 
+(p0)  br.sptk __libm_error_region;; 
 }
 
-GLOBAL_IEEE754_END(remainderf)
+.endp remainderf
+ASM_SIZE_DIRECTIVE(remainderf)
+#ifdef _LIBC
+ASM_SIZE_DIRECTIVE(__remainderf)
+#endif
+
 
 
-LOCAL_LIBM_ENTRY(__libm_error_region)
+.proc __libm_error_region
+__libm_error_region:
 .prologue
 { .mfi
         add   GR_Parameter_Y=-32,sp             // Parameter 2 value
@@ -598,11 +616,9 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
         br.ret.sptk     b0                     // Return
 };; 
 
-LOCAL_LIBM_END(__libm_error_region)
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
 
 
 .type   __libm_error_support#,@function
 .global __libm_error_support#
-
-
-
diff --git a/sysdeps/ia64/fpu/e_remainderl.S b/sysdeps/ia64/fpu/e_remainderl.S
index 1c1a3c3072..5856861442 100644
--- a/sysdeps/ia64/fpu/e_remainderl.S
+++ b/sysdeps/ia64/fpu/e_remainderl.S
@@ -1,10 +1,10 @@
-.file "remainderl.s"
-
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+.file "remainderl.asm"
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
 //
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska, 
+// Bob Norin, Shane Story, and Ping Tak Peter Tang of the Computational 
+// Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
+//
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,19 +35,17 @@
 //
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// http://developer.intel.com/opensource.
 //
 // History
 //====================================================================
-// 02/02/00 Initial version
-// 03/02/00 New algorithm 
-// 04/04/00 Unwind support added
-// 07/21/00 Fixed quotient=2^{24*m+23}*1.q1...q23 1 bug
-// 08/15/00 Bundle added after call to __libm_error_support to properly
+// 2/02/00  Initial version
+// 3/02/00  New algorithm 
+// 4/04/00  Unwind support added
+// 7/21/00  Fixed quotient=2^{24*m+23}*1.q1...q23 1 bug
+// 8/15/00  Bundle added after call to __libm_error_support to properly
 //          set [the previously overwritten] GR_Parameter_RESULT.
-// 11/29/00 Set FR_Y to f9
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 02/10/03 Reordered header: .section, .global, .proc, .align
+//11/29/00  Set FR_Y to f9
 //
 // API
 //====================================================================
@@ -79,6 +77,9 @@
 //====================================================================
 // a=+/- Inf, or b=+/-0: return NaN, call libm_error_support
 // a=NaN or b=NaN: return NaN
+
+#include "libm_support.h"
+
 //
 // Registers used
 //====================================================================
@@ -86,6 +87,8 @@
 // General registers:   r2,r3,r28,r29,r32 (ar.pfs), r33-r39
 // Floating point registers: f6-f15,f32
 //
+.section .text
+
 
 GR_SAVE_B0                    = r33
 GR_SAVE_PFS                   = r34
@@ -102,9 +105,19 @@ FR_Y             = f9
 FR_RESULT        = f8
 
 
-.section .text
-GLOBAL_IEEE754_ENTRY(remainderl)
 
+
+  .proc  remainderl#
+  .align 32
+  .global remainderl#
+  .align 32
+
+remainderl:
+#ifdef _LIBC
+.global __remainderl
+.type __remainderl,@function
+__remainderl:
+#endif
 // inputs in f8, f9
 // result in f8
 
@@ -146,7 +159,7 @@ cmp.eq p11,p10=r29,r0;;
 // X +-NAN, +-inf, ?        p9
 { .mfi
       nop.m 999
-      fclass.m.unc  p9,p8 = f8, 0xe3           
+(p0)  fclass.m.unc  p9,p8 = f8, 0xe3           
       nop.i 999;; 
 }
 
@@ -183,8 +196,8 @@ cmp.eq p11,p10=r29,r0;;
 }
 
 {.bbb
-  (p9) br.cond.spnt FREM_X_NAN_INF
-  (p11) br.cond.spnt FREM_Y_NAN_INF_ZERO
+  (p9) br.cond.spnt L(FREM_X_NAN_INF)
+  (p11) br.cond.spnt L(FREM_Y_NAN_INF_ZERO)
   nop.b 0
 }  {.mfi
    nop.m 0
@@ -193,7 +206,7 @@ cmp.eq p11,p10=r29,r0;;
    nop.i 0;;
 } 
 
-remloop24: 
+L(remloop24): 
   { .mfi
   nop.m 0
   // Step (2)
@@ -215,7 +228,7 @@ remloop24:
 {.mfi
   nop.m 0
   // q1=q0*(1+e0)
-  (p6) fma.s1 f15=f12,f7,f12
+  fma.s1 f15=f12,f7,f12
   nop.i 0
 }
 { .mfi
@@ -345,7 +358,7 @@ remloop24:
   //  (p9) set r=r2 (new a, if not last iteration)
   // (p10) new a =r
   (p10) mov f13=f6
-  (p12) br.cond.sptk remloop24;;
+  (p12) br.cond.sptk L(remloop24);;
 } 
 
 // last iteration
@@ -403,7 +416,7 @@ remloop24:
 
 
 
-FREM_X_NAN_INF: 
+L(FREM_X_NAN_INF): 
 
 // Y zero ?
 {.mfi 
@@ -420,19 +433,19 @@ FREM_X_NAN_INF:
   nop.m 0
   nop.i 0
   // if Y zero
-  (p11) br.cond.spnt FREM_Y_ZERO;;                        
+  (p11) br.cond.spnt L(FREM_Y_ZERO);;                        
 }
 
 // X infinity? Return QNAN indefinite
 { .mfi
       nop.m 999
-      fclass.m.unc  p8,p0 = f8, 0x23 
+(p0)  fclass.m.unc  p8,p0 = f8, 0x23 
       nop.i 999
 }
 // X infinity? Return QNAN indefinite
 { .mfi
       nop.m 999
-      fclass.m.unc  p11,p0 = f8, 0x23 
+(p0)  fclass.m.unc  p11,p0 = f8, 0x23 
       nop.i 999;; 
 }
 // Y NaN ?
@@ -460,14 +473,14 @@ FREM_X_NAN_INF:
 }
 { .mfi
      nop.m 999
-(p8) fma.s0 f8=f8,f1,f0                     
+(p8) fma f8=f8,f1,f0                     
 	 nop.i 0 ;;                        
 }
 
 { .mfb
       nop.m 999
       frcpa.s0 f8,p7=f8,f9                     
-	  (p11) br.cond.spnt EXP_ERROR_RETURN;;                        
+	  (p11) br.cond.spnt L(EXP_ERROR_RETURN);;                        
 }
 { .mib
 	nop.m 0
@@ -476,24 +489,24 @@ FREM_X_NAN_INF:
 }
 
 
-FREM_Y_NAN_INF_ZERO: 
+L(FREM_Y_NAN_INF_ZERO): 
 // Y INF
 { .mfi
       nop.m 999
-      fclass.m.unc  p7,p0 = f9, 0x23           
+(p0)  fclass.m.unc  p7,p0 = f9, 0x23           
       nop.i 999 ;;
 }
 
 { .mfb
       nop.m 999
-(p7)  fma.s0 f8=f8,f1,f0                     
+(p7)  fma f8=f8,f1,f0                     
 (p7)  br.ret.spnt    b0 ;;                        
 }
 
 // Y NAN?
 { .mfi
       nop.m 999
-      fclass.m.unc  p9,p10 = f9, 0xc3           
+(p0)  fclass.m.unc  p9,p10 = f9, 0xc3           
       nop.i 999 ;;
 }
 { .mfi
@@ -504,11 +517,11 @@ FREM_Y_NAN_INF_ZERO:
 
 { .mfb
       nop.m 999
-(p9)  fma.s0 f8=f9,f1,f0                     
+(p9)  fma f8=f9,f1,f0                     
 (p9)  br.ret.spnt    b0 ;;                        
 }
 
-FREM_Y_ZERO:
+L(FREM_Y_ZERO):
 // Y zero? Must be zero at this point
 // because it is the only choice left.
 // Return QNAN indefinite
@@ -516,7 +529,7 @@ FREM_Y_ZERO:
 // X NAN?
 { .mfi
       nop.m 999
-      fclass.m.unc  p9,p10 = f8, 0xc3           
+(p0)  fclass.m.unc  p9,p10 = f8, 0xc3           
       nop.i 999 ;;
 }
 { .mfi
@@ -527,37 +540,43 @@ FREM_Y_ZERO:
 
 {.mfi
  nop.m 999
- (p9) frcpa.s0 f11,p7=f8,f0
+ (p9) frcpa f11,p7=f8,f0
  nop.i 0;;
 }
 { .mfi
       nop.m 999
-(p10)  frcpa.s0   f11,p7 = f0,f0           
+(p10)  frcpa   f11,p7 = f0,f0           
 	  nop.i 999;;
 }
 
 { .mfi
       nop.m 999
-      fmerge.s      f10 = f8, f8             
+(p0)  fmerge.s      f10 = f8, f8             
       nop.i 999
 }
 
 { .mfi
       nop.m 999
-      fma.s0 f8=f11,f1,f0                     
+(p0)  fma f8=f11,f1,f0                     
       nop.i 999;;
 }
 
-EXP_ERROR_RETURN: 
+L(EXP_ERROR_RETURN): 
 
 { .mib
-      mov   GR_Parameter_TAG = 123                                 
+(p0)  mov   GR_Parameter_TAG = 123                                 
 	  nop.i 999
-      br.sptk __libm_error_region;; 
+(p0)  br.sptk __libm_error_region;; 
 }
 
-GLOBAL_IEEE754_END(remainderl)
-LOCAL_LIBM_ENTRY(__libm_error_region)
+.endp remainderl
+ASM_SIZE_DIRECTIVE(remainderl)
+#ifdef _LIBC
+ASM_SIZE_DIRECTIVE(__remainderl)
+#endif
+
+.proc __libm_error_region
+__libm_error_region:
 .prologue
 { .mfi
         add   GR_Parameter_Y=-32,sp             // Parameter 2 value
@@ -605,12 +624,9 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
         br.ret.sptk     b0                     // Return
 };; 
 
-LOCAL_LIBM_END(__libm_error_region)
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
 
 
 .type   __libm_error_support#,@function
 .global __libm_error_support#
-
-
-
-
diff --git a/sysdeps/ia64/fpu/e_scalb.S b/sysdeps/ia64/fpu/e_scalb.S
index 82e914e259..7f5b5796de 100644
--- a/sysdeps/ia64/fpu/e_scalb.S
+++ b/sysdeps/ia64/fpu/e_scalb.S
@@ -1,10 +1,10 @@
 .file "scalb.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
+//
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,14 +35,12 @@
 // 
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at 
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// http://developer.intel.com/opensource.
 //
 // History
 //==============================================================
-// 02/02/00 Initial version
-// 01/26/01 Scalb completely reworked and now standalone version 
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 2/02/00  Initial version
+// 1/26/01  Scalb completely reworked and now standalone version 
 //
 // API
 //==============================================================
@@ -55,6 +53,8 @@
 //
 //
 
+#include "libm_support.h"
+
 FR_Floating_X  = f8
 FR_Result      = f8
 FR_Floating_N  = f9
@@ -84,8 +84,19 @@ GR_Parameter_Y      = r36
 GR_Parameter_RESULT = r37
 GR_Tag              = r38
 
+.align 32
+.global scalb
+
 .section .text
-GLOBAL_IEEE754_ENTRY(scalb)
+.proc  scalb
+.align 32
+
+scalb: 
+#ifdef _LIBC
+.global __ieee754_scalb
+.type __ieee754_scalb,@function
+__ieee754_scalb:
+#endif
 
 //
 //   Is x NAN, INF, ZERO, +-?
@@ -129,12 +140,12 @@ GLOBAL_IEEE754_ENTRY(scalb)
 { .mib
      setf.exp      FR_Big = GR_Scratch                  
      nop.i 0 
-(p6) br.cond.spnt  SCALB_NAN_INF_ZERO 
+(p6) br.cond.spnt  L(SCALB_NAN_INF_ZERO) 
 }
 { .mib
      setf.exp      FR_NBig = GR_Scratch1                  
      nop.i 0 
-(p7) br.cond.spnt  SCALB_NAN_INF_ZERO 
+(p7) br.cond.spnt  L(SCALB_NAN_INF_ZERO) 
 };;
 
 //
@@ -201,7 +212,7 @@ GLOBAL_IEEE754_ENTRY(scalb)
 }
 {    .mfb
      nop.m 0
-(p7) frcpa.s0          f8,p11     =    f0,f0
+(p7) frcpa          f8,p11     =    f0,f0
 (p7) br.ret.spnt    b0          
 };;
 
@@ -235,7 +246,7 @@ GLOBAL_IEEE754_ENTRY(scalb)
 }
 { .mlx
      nop.m 999
-     movl GR_Scratch = 0x00000000000303FF 
+(p0) movl GR_Scratch = 0x00000000000303FF 
 };;
 {    .mfi
      nop.m 0
@@ -244,7 +255,7 @@ GLOBAL_IEEE754_ENTRY(scalb)
 }
 {    .mlx
      nop.m 999
-     movl GR_Scratch1= 0x00000000000103FF 
+(p0) movl GR_Scratch1= 0x00000000000103FF 
 };;
 
 //   Set up necessary status fields 
@@ -255,12 +266,12 @@ GLOBAL_IEEE754_ENTRY(scalb)
 //
 {    .mfi
      nop.m 999
-     fsetc.s3 0x7F,0x41
+(p0) fsetc.s3 0x7F,0x41
      nop.i 999
 }
 {    .mfi
      nop.m 999
-     fsetc.s2 0x7F,0x42
+(p0) fsetc.s2 0x7F,0x42
      nop.i 999
 };;
 
@@ -334,7 +345,7 @@ GLOBAL_IEEE754_ENTRY(scalb)
 {    .mfb
 (p6) addl GR_Tag = 54, r0
 (p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
-(p6) br.cond.spnt SCALB_UNDERFLOW 
+(p6) br.cond.spnt L(SCALB_UNDERFLOW) 
 };;
 
 //
@@ -342,8 +353,8 @@ GLOBAL_IEEE754_ENTRY(scalb)
 //
 { .mbb
      nop.m 0
-(p7) br.cond.spnt SCALB_OVERFLOW 
-(p9) br.cond.spnt SCALB_OVERFLOW 
+(p7) br.cond.spnt L(SCALB_OVERFLOW) 
+(p9) br.cond.spnt L(SCALB_OVERFLOW) 
 };;
 
 //
@@ -355,7 +366,7 @@ GLOBAL_IEEE754_ENTRY(scalb)
      br.ret.sptk     b0;;                   
 }
 
-SCALB_NAN_INF_ZERO: 
+L(SCALB_NAN_INF_ZERO): 
 
 //
 //   Convert N to a fp integer
@@ -460,11 +471,16 @@ SCALB_NAN_INF_ZERO:
      br.ret.sptk   b0
 };;
 
-GLOBAL_IEEE754_END(scalb)
+.endp scalb
+ASM_SIZE_DIRECTIVE(scalb)
+#ifdef _LIBC
+ASM_SIZE_DIRECTIVE(__ieee754_scalb)
+#endif
+.proc __libm_error_region
 __libm_error_region:
 
-SCALB_OVERFLOW: 
-SCALB_UNDERFLOW: 
+L(SCALB_OVERFLOW): 
+L(SCALB_UNDERFLOW): 
 
 //
 // Get stack address of N
@@ -541,7 +557,8 @@ SCALB_UNDERFLOW:
    br.ret.sptk     b0                  
 };;
 
-LOCAL_LIBM_END(__libm_error_region)
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
 
 .type   __libm_error_support#,@function
 .global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_scalbf.S b/sysdeps/ia64/fpu/e_scalbf.S
index 07acb3297e..40af080d38 100644
--- a/sysdeps/ia64/fpu/e_scalbf.S
+++ b/sysdeps/ia64/fpu/e_scalbf.S
@@ -1,10 +1,10 @@
 .file "scalbf.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
+//
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,14 +35,12 @@
 // 
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at 
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// http://developer.intel.com/opensource.
 //
 // History
 //==============================================================
-// 02/02/00 Initial version
-// 01/26/01 Scalb completely reworked and now standalone version 
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 2/02/00  Initial version
+// 1/26/01  Scalb completely reworked and now standalone version 
 //
 // API
 //==============================================================
@@ -55,6 +53,8 @@
 //
 //
 
+#include "libm_support.h"
+
 FR_Floating_X  = f8
 FR_Result      = f8
 FR_Floating_N  = f9
@@ -84,8 +84,19 @@ GR_Parameter_Y      = r36
 GR_Parameter_RESULT = r37
 GR_Tag              = r38
 
+.align 32
+.global scalbf
+
 .section .text
-GLOBAL_IEEE754_ENTRY(scalbf)
+.proc  scalbf
+.align 32
+
+scalbf: 
+#ifdef _LIBC
+.global __ieee754_scalbf
+.type __ieee754_scalbf,@function
+__ieee754_scalbf:
+#endif
 
 //
 //   Is x NAN, INF, ZERO, +-?
@@ -129,12 +140,12 @@ GLOBAL_IEEE754_ENTRY(scalbf)
 { .mib
      setf.exp      FR_Big = GR_Scratch                  
      nop.i 0 
-(p6) br.cond.spnt  SCALBF_NAN_INF_ZERO 
+(p6) br.cond.spnt  L(SCALBF_NAN_INF_ZERO) 
 }
 { .mib
      setf.exp      FR_NBig = GR_Scratch1                  
      nop.i 0 
-(p7) br.cond.spnt  SCALBF_NAN_INF_ZERO 
+(p7) br.cond.spnt  L(SCALBF_NAN_INF_ZERO) 
 };;
 
 //
@@ -201,7 +212,7 @@ GLOBAL_IEEE754_ENTRY(scalbf)
 }
 {    .mfb
      nop.m 0
-(p7) frcpa.s0          f8,p11     =    f0,f0
+(p7) frcpa          f8,p11     =    f0,f0
 (p7) br.ret.spnt    b0          
 };;
 
@@ -235,7 +246,7 @@ GLOBAL_IEEE754_ENTRY(scalbf)
 }
 { .mlx
      nop.m 999
-     movl GR_Scratch = 0x000000000003007F 
+(p0) movl GR_Scratch = 0x000000000003007F 
 };;
 {    .mfi
      nop.m 0
@@ -244,7 +255,7 @@ GLOBAL_IEEE754_ENTRY(scalbf)
 }
 {    .mlx
      nop.m 999
-     movl GR_Scratch1= 0x000000000001007F 
+(p0) movl GR_Scratch1= 0x000000000001007F 
 };;
 
 //   Set up necessary status fields 
@@ -255,12 +266,12 @@ GLOBAL_IEEE754_ENTRY(scalbf)
 //
 {    .mfi
      nop.m 999
-     fsetc.s3 0x7F,0x41
+(p0) fsetc.s3 0x7F,0x41
      nop.i 999
 }
 {    .mfi
      nop.m 999
-     fsetc.s2 0x7F,0x42
+(p0) fsetc.s2 0x7F,0x42
      nop.i 999
 };;
 
@@ -334,7 +345,7 @@ GLOBAL_IEEE754_ENTRY(scalbf)
 {    .mfb
 (p6) addl GR_Tag = 56, r0
 (p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
-(p6) br.cond.spnt SCALBF_UNDERFLOW 
+(p6) br.cond.spnt L(SCALBF_UNDERFLOW) 
 };;
 
 //
@@ -342,8 +353,8 @@ GLOBAL_IEEE754_ENTRY(scalbf)
 //
 { .mbb
      nop.m 0
-(p7) br.cond.spnt SCALBF_OVERFLOW 
-(p9) br.cond.spnt SCALBF_OVERFLOW 
+(p7) br.cond.spnt L(SCALBF_OVERFLOW) 
+(p9) br.cond.spnt L(SCALBF_OVERFLOW) 
 };;
 
 //
@@ -355,7 +366,7 @@ GLOBAL_IEEE754_ENTRY(scalbf)
      br.ret.sptk     b0;;                   
 }
 
-SCALBF_NAN_INF_ZERO: 
+L(SCALBF_NAN_INF_ZERO): 
 
 //
 //   Convert N to a fp integer
@@ -460,11 +471,16 @@ SCALBF_NAN_INF_ZERO:
      br.ret.sptk   b0
 };;
 
-GLOBAL_IEEE754_END(scalbf)
+.endp scalbf
+ASM_SIZE_DIRECTIVE(scalbf)
+#ifdef _LIBC
+ASM_SIZE_DIRECTIVE(__ieee754_scalbf)
+#endif
+.proc __libm_error_region
 __libm_error_region:
 
-SCALBF_OVERFLOW: 
-SCALBF_UNDERFLOW: 
+L(SCALBF_OVERFLOW): 
+L(SCALBF_UNDERFLOW): 
 
 //
 // Get stack address of N
@@ -541,7 +557,8 @@ SCALBF_UNDERFLOW:
    br.ret.sptk     b0                  
 };;
 
-LOCAL_LIBM_END(__libm_error_region)
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
 
 .type   __libm_error_support#,@function
 .global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_scalbl.S b/sysdeps/ia64/fpu/e_scalbl.S
index d22d029155..43eac7a2ad 100644
--- a/sysdeps/ia64/fpu/e_scalbl.S
+++ b/sysdeps/ia64/fpu/e_scalbl.S
@@ -1,10 +1,10 @@
 .file "scalbl.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
+//
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,14 +35,12 @@
 // 
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at 
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// http://developer.intel.com/opensource.
 //
 // History
 //==============================================================
-// 02/02/00 Initial version
-// 01/26/01 Scalb completely reworked and now standalone version 
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 2/02/00  Initial version
+// 1/26/01  Scalb completely reworked and now standalone version 
 //
 // API
 //==============================================================
@@ -55,6 +53,8 @@
 //
 //
 
+#include "libm_support.h"
+
 FR_Floating_X  = f8
 FR_Result      = f8
 FR_Floating_N  = f9
@@ -84,8 +84,19 @@ GR_Parameter_Y      = r36
 GR_Parameter_RESULT = r37
 GR_Tag              = r38
 
+.align 32
+.global scalbl
+
 .section .text
-GLOBAL_IEEE754_ENTRY(scalbl)
+.proc  scalbl
+.align 32
+
+scalbl: 
+#ifdef _LIBC
+.global __ieee754_scalbl
+.type __ieee754_scalbl,@function
+__ieee754_scalbl:
+#endif
 
 //
 //   Is x NAN, INF, ZERO, +-?
@@ -129,12 +140,12 @@ GLOBAL_IEEE754_ENTRY(scalbl)
 { .mib
      setf.exp      FR_Big = GR_Scratch                  
      nop.i 0 
-(p6) br.cond.spnt  SCALBL_NAN_INF_ZERO 
+(p6) br.cond.spnt  L(SCALBL_NAN_INF_ZERO) 
 }
 { .mib
      setf.exp      FR_NBig = GR_Scratch1                  
      nop.i 0 
-(p7) br.cond.spnt  SCALBL_NAN_INF_ZERO 
+(p7) br.cond.spnt  L(SCALBL_NAN_INF_ZERO) 
 };;
 
 //
@@ -201,7 +212,7 @@ GLOBAL_IEEE754_ENTRY(scalbl)
 }
 {    .mfb
      nop.m 0
-(p7) frcpa.s0          f8,p11     =    f0,f0
+(p7) frcpa          f8,p11     =    f0,f0
 (p7) br.ret.spnt    b0          
 };;
 
@@ -235,7 +246,7 @@ GLOBAL_IEEE754_ENTRY(scalbl)
 }
 { .mlx
      nop.m 999
-     movl GR_Scratch = 0x0000000000033FFF 
+(p0) movl GR_Scratch = 0x0000000000033FFF 
 };;
 {    .mfi
      nop.m 0
@@ -244,7 +255,7 @@ GLOBAL_IEEE754_ENTRY(scalbl)
 }
 {    .mlx
      nop.m 999
-     movl GR_Scratch1= 0x0000000000013FFF 
+(p0) movl GR_Scratch1= 0x0000000000013FFF 
 };;
 
 //   Set up necessary status fields 
@@ -255,12 +266,12 @@ GLOBAL_IEEE754_ENTRY(scalbl)
 //
 {    .mfi
      nop.m 999
-     fsetc.s3 0x7F,0x41
+(p0) fsetc.s3 0x7F,0x41
      nop.i 999
 }
 {    .mfi
      nop.m 999
-     fsetc.s2 0x7F,0x42
+(p0) fsetc.s2 0x7F,0x42
      nop.i 999
 };;
 
@@ -334,7 +345,7 @@ GLOBAL_IEEE754_ENTRY(scalbl)
 {    .mfb
 (p6) addl GR_Tag = 52, r0
 (p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
-(p6) br.cond.spnt SCALBL_UNDERFLOW 
+(p6) br.cond.spnt L(SCALBL_UNDERFLOW) 
 };;
 
 //
@@ -342,8 +353,8 @@ GLOBAL_IEEE754_ENTRY(scalbl)
 //
 { .mbb
      nop.m 0
-(p7) br.cond.spnt SCALBL_OVERFLOW 
-(p9) br.cond.spnt SCALBL_OVERFLOW 
+(p7) br.cond.spnt L(SCALBL_OVERFLOW) 
+(p9) br.cond.spnt L(SCALBL_OVERFLOW) 
 };;
 
 //
@@ -355,7 +366,7 @@ GLOBAL_IEEE754_ENTRY(scalbl)
      br.ret.sptk     b0;;                   
 }
 
-SCALBL_NAN_INF_ZERO: 
+L(SCALBL_NAN_INF_ZERO): 
 
 //
 //   Convert N to a fp integer
@@ -460,11 +471,16 @@ SCALBL_NAN_INF_ZERO:
      br.ret.sptk   b0
 };;
 
-GLOBAL_IEEE754_END(scalbl)
+.endp scalbl
+ASM_SIZE_DIRECTIVE(scalbl)
+#ifdef _LIBC
+ASM_SIZE_DIRECTIVE(__ieee754_scalbl)
+#endif
+.proc __libm_error_region
 __libm_error_region:
 
-SCALBL_OVERFLOW: 
-SCALBL_UNDERFLOW: 
+L(SCALBL_OVERFLOW): 
+L(SCALBL_UNDERFLOW): 
 
 //
 // Get stack address of N
@@ -541,7 +557,8 @@ SCALBL_UNDERFLOW:
    br.ret.sptk     b0                  
 };;
 
-LOCAL_LIBM_END(__libm_error_region)
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
 
 .type   __libm_error_support#,@function
 .global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_sinh.S b/sysdeps/ia64/fpu/e_sinh.S
index 84c312c2b7..4415dc7524 100644
--- a/sysdeps/ia64/fpu/e_sinh.S
+++ b/sysdeps/ia64/fpu/e_sinh.S
@@ -1,10 +1,10 @@
 .file "sinh.s"
 
-
-// Copyright (c) 2000 - 2002, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,838 +20,1249 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
 // Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
 //
 // History
 //==============================================================
-// 02/02/00 Initial version
-// 04/04/00 Unwind support added
-// 08/15/00 Bundle added after call to __libm_error_support to properly
+// 2/02/00  Initial version
+// 4/04/00  Unwind support added
+// 8/15/00  Bundle added after call to __libm_error_support to properly
 //          set [the previously overwritten] GR_Parameter_RESULT.
 // 10/12/00 Update to set denormal operand and underflow flags
-// 01/22/01 Fixed to set inexact flag for small args.
-// 05/02/01 Reworked to improve speed of all paths
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 11/20/02 Improved speed with new algorithm
-
+// 1/22/01  Fixed to set inexact flag for small args.
+//
 // API
 //==============================================================
-// double sinh(double)
-
+// double = sinh(double)
+// input  floating point f8
+// output floating point f8
+//
+// Registers used
+//==============================================================
+// general registers: 
+// r32 -> r47
+// predicate registers used:
+// p6 p7 p8 p9
+// floating-point registers used:
+// f9 -> f15; f32 -> f45; 
+// f8 has input, then output
+//
 // Overview of operation
 //==============================================================
-// Case 1:  0 < |x| < 2^-60
-//  Result = x, computed by x+sgn(x)*x^2) to handle flags and rounding
+// There are four paths
+// 1. |x| < 0.25        SINH_BY_POLY
+// 2. |x| < 32          SINH_BY_TBL
+// 3. |x| < 2^14        SINH_BY_EXP
+// 4. |x_ >= 2^14       SINH_HUGE
+//
+// For double extended we get infinity for x >= 400c b174 ddc0 31ae c0ea
+//                                           >= 1.0110001.... x 2^13
+//                                           >= 11357.2166
+//
+// But for double we get infinity for x >= 408633ce8fb9f87e
+//                                      >= 1.0110...x 2^9
+//                                      >= +7.10476e+002
+//
+// And for single we get infinity for x >= 42b3a496
+//                                      >= 1.0110... 2^6
+//                                      >= 89.8215
 //
-// Case 2:  2^-60 < |x| < 0.25
-//  Evaluate sinh(x) by a 13th order polynomial
-//  Care is take for the order of multiplication; and A1 is not exactly 1/3!,
-//  A2 is not exactly 1/5!, etc.
-//  sinh(x) = x + (A1*x^3 + A2*x^5 + A3*x^7 + A4*x^9 + A5*x^11 + A6*x^13)
+// SAFE: If there is danger of overflow set SAFE to 0
+//       NOT implemented: if there is danger of underflow, set SAFE to 0
+// SAFE for all paths listed below
 //
-// Case 3:  0.25 < |x| < 710.47586
-//  Algorithm is based on the identity sinh(x) = ( exp(x) - exp(-x) ) / 2.
-//  The algorithm for exp is described as below.  There are a number of
-//  economies from evaluating both exp(x) and exp(-x).  Although we
-//  are evaluating both quantities, only where the quantities diverge do we
-//  duplicate the computations.  The basic algorithm for exp(x) is described
-//  below.
+// 1. SINH_BY_POLY
+// ===============
+// If |x| is less than the tiny threshold, then clear SAFE 
+// For double, the tiny threshold is -1022 = -0x3fe => -3fe + ffff = fc01
+//             register-biased, this is fc01
+// For single, the tiny threshold is -126  = -7e    => -7e  + ffff = ff81
+// If |x| < tiny threshold, set SAFE = 0
 //
-// Take the input x. w is "how many log2/128 in x?"
-//  w = x * 128/log2
-//  n = int(w)
-//  x = n log2/128 + r + delta
+// 2. SINH_BY_TBL
+// =============
+// SAFE: SAFE is always 1 for TBL; 
+//
+// 3. SINH_BY_EXP
+// ==============
+// There is a danger of double-extended overflow   if N-1 > 16382 = 0x3ffe
+// r34 has N-1; 16382 is in register biased form, 0x13ffd
+// There is danger of double overflow if N-1 > 0x3fe
+//                       in register biased form, 0x103fd
+// Analagously, there is danger of single overflow if N-1 > 0x7e
+//                       in register biased form, 0x1007d
+// SAFE: If there is danger of overflow set SAFE to 0
+//
+// 4. SINH_HUGE
+// ============
+// SAFE: SAFE is always 0 for HUGE
 
-//  n = 128M + index_1 + 2^4 index_2
-//  x = M log2 + (log2/128) index_1 + (log2/8) index_2 + r + delta
+#include "libm_support.h"
 
-//  exp(x) = 2^M  2^(index_1/128)  2^(index_2/8) exp(r) exp(delta)
-//       Construct 2^M
-//       Get 2^(index_1/128) from table_1;
-//       Get 2^(index_2/8)   from table_2;
-//       Calculate exp(r) by 5th order polynomial
-//          r = x - n (log2/128)_high
-//          delta = - n (log2/128)_low
-//       Calculate exp(delta) as 1 + delta
+//
+// Assembly macros
+//==============================================================
+sinh_FR_X            = f44
+sinh_FR_X2           = f9
+sinh_FR_X4           = f10
+sinh_FR_SGNX         = f40
+sinh_FR_all_ones     = f45
+sinh_FR_tmp          = f42
 
+sinh_FR_Inv_log2by64 = f9
+sinh_FR_log2by64_lo  = f11
+sinh_FR_log2by64_hi  = f10
 
-// Special values
-//==============================================================
-// sinh(+0)    = +0
-// sinh(-0)    = -0
+sinh_FR_A1           = f9
+sinh_FR_A2           = f10
+sinh_FR_A3           = f11
 
-// sinh(+qnan) = +qnan
-// sinh(-qnan) = -qnan
-// sinh(+snan) = +qnan
-// sinh(-snan) = -qnan
+sinh_FR_Rcub         = f12
+sinh_FR_M_temp       = f13
+sinh_FR_R_temp       = f13
+sinh_FR_Rsq          = f13
+sinh_FR_R            = f14
 
-// sinh(-inf)  = -inf
-// sinh(+inf)  = +inf
+sinh_FR_M            = f38
 
-// Overflow and Underflow
-//=======================
-// sinh(x) = largest double normal when
-//     |x| = 710.47586 = 0x408633ce8fb9f87d
-//
-// Underflow is handled as described in case 1 above
+sinh_FR_B1           = f15
+sinh_FR_B2           = f32
+sinh_FR_B3           = f33
 
-// Registers used
-//==============================================================
-// Floating Point registers used:
-// f8, input, output
-// f6 -> f15,  f32 -> f61
+sinh_FR_peven_temp1  = f34
+sinh_FR_peven_temp2  = f35
+sinh_FR_peven        = f36
 
-// General registers used:
-// r14 -> r40
+sinh_FR_podd_temp1   = f34
+sinh_FR_podd_temp2   = f35
+sinh_FR_podd         = f37
 
-// Predicate registers used:
-// p6 -> p15
+sinh_FR_poly_podd_temp1    =  f11 
+sinh_FR_poly_podd_temp2    =  f13
+sinh_FR_poly_peven_temp1   =  f11
+sinh_FR_poly_peven_temp2   =  f13
 
-// Assembly macros
-//==============================================================
+sinh_FR_J_temp       = f9
+sinh_FR_J            = f10
+
+sinh_FR_Mmj          = f39
+
+sinh_FR_N_temp1      = f11
+sinh_FR_N_temp2      = f12
+sinh_FR_N            = f13
+
+sinh_FR_spos         = f14
+sinh_FR_sneg         = f15
+
+sinh_FR_Tjhi         = f32
+sinh_FR_Tjlo         = f33
+sinh_FR_Tmjhi        = f34
+sinh_FR_Tmjlo        = f35
+
+sinh_GR_mJ           = r35
+sinh_GR_J            = r36
+
+sinh_AD_mJ           = r38
+sinh_AD_J            = r39
+sinh_GR_all_ones     = r40
+
+sinh_FR_S_hi         = f9
+sinh_FR_S_hi_temp    = f10
+sinh_FR_S_lo_temp1   = f11 
+sinh_FR_S_lo_temp2   = f12 
+sinh_FR_S_lo_temp3   = f13 
+
+sinh_FR_S_lo         = f38
+sinh_FR_C_hi         = f39
+
+sinh_FR_C_hi_temp1   = f10
+sinh_FR_Y_hi         = f11 
+sinh_FR_Y_lo_temp    = f12 
+sinh_FR_Y_lo         = f13 
+sinh_FR_SINH         = f9
+
+sinh_FR_P1           = f14
+sinh_FR_P2           = f15
+sinh_FR_P3           = f32
+sinh_FR_P4           = f33
+sinh_FR_P5           = f34
+sinh_FR_P6           = f35
+
+sinh_FR_TINY_THRESH  = f9
 
-rRshf                 = r14
-rN_neg                = r14
-rAD_TB1               = r15
-rAD_TB2               = r16
-rAD_P                 = r17
-rN                    = r18
-rIndex_1              = r19
-rIndex_2_16           = r20
-rM                    = r21
-rBiased_M             = r21
-rSig_inv_ln2          = r22
-rIndex_1_neg          = r22
-rExp_bias             = r23
-rExp_bias_minus_1     = r23
-rExp_mask             = r24
-rTmp                  = r24
-rGt_ln                = r24
-rIndex_2_16_neg       = r24
-rM_neg                = r25
-rBiased_M_neg         = r25
-rRshf_2to56           = r26
-rAD_T1_neg            = r26
-rExp_2tom56           = r28
-rAD_T2_neg            = r28
-rAD_T1                = r29
-rAD_T2                = r30
-rSignexp_x            = r31
-rExp_x                = r31
-
-GR_SAVE_B0            = r33
-GR_SAVE_PFS           = r34
-GR_SAVE_GP            = r35
-
-GR_Parameter_X        = r37
-GR_Parameter_Y        = r38
-GR_Parameter_RESULT   = r39
-GR_Parameter_TAG      = r40
-
-
-FR_X                  = f10
-FR_Y                  = f1
-FR_RESULT             = f8
-
-fRSHF_2TO56           = f6
-fINV_LN2_2TO63        = f7
-fW_2TO56_RSH          = f9
-f2TOM56               = f11
-fP5                   = f12
-fP4                   = f13
-fP3                   = f14
-fP2                   = f15
-
-fLn2_by_128_hi        = f33
-fLn2_by_128_lo        = f34
-
-fRSHF                 = f35
-fNfloat               = f36
-fNormX                = f37
-fR                    = f38
-fF                    = f39
-
-fRsq                  = f40
-f2M                   = f41
-fS1                   = f42
-fT1                   = f42
-fS2                   = f43
-fT2                   = f43
-fS                    = f43
-fWre_urm_f8           = f44
-fAbsX                 = f44
-
-fMIN_DBL_OFLOW_ARG    = f45
-fMAX_DBL_NORM_ARG     = f46
-fXsq                  = f47
-fX4                   = f48
-fGt_pln               = f49
-fTmp                  = f49
-
-fP54                  = f50
-fP5432                = f50
-fP32                  = f51
-fP                    = f52
-fP54_neg              = f53
-fP5432_neg            = f53
-fP32_neg              = f54
-fP_neg                = f55
-fF_neg                = f56
-
-f2M_neg               = f57
-fS1_neg               = f58
-fT1_neg               = f58
-fS2_neg               = f59
-fT2_neg               = f59
-fS_neg                = f59
-fExp                  = f60
-fExp_neg              = f61
-
-fA6                   = f50
-fA65                  = f50
-fA6543                = f50
-fA654321              = f50
-fA5                   = f51
-fA4                   = f52
-fA43                  = f52
-fA3                   = f53
-fA2                   = f54
-fA21                  = f54
-fA1                   = f55
-fX3                   = f56
+sinh_FR_SINH_temp    = f10
+sinh_FR_SCALE        = f11 
+
+sinh_FR_signed_hi_lo = f10
+
+
+GR_SAVE_PFS          = r41
+GR_SAVE_B0           = r42
+GR_SAVE_GP           = r43
+
+GR_Parameter_X       = r44
+GR_Parameter_Y       = r45
+GR_Parameter_RESULT  = r46
 
 // Data tables
 //==============================================================
 
-RODATA
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
 .align 16
+double_sinh_arg_reduction:
+ASM_TYPE_DIRECTIVE(double_sinh_arg_reduction,@object)
+   data8 0xB8AA3B295C17F0BC, 0x00004005
+   data8 0xB17217F7D1000000, 0x00003FF8
+   data8 0xCF79ABC9E3B39804, 0x00003FD0
+ASM_SIZE_DIRECTIVE(double_sinh_arg_reduction)
+
+double_sinh_p_table:
+ASM_TYPE_DIRECTIVE(double_sinh_p_table,@object)
+   data8 0xAAAAAAAAAAAAAAAB, 0x00003FFC
+   data8 0x8888888888888412, 0x00003FF8
+   data8 0xD00D00D00D4D39F2, 0x00003FF2
+   data8 0xB8EF1D28926D8891, 0x00003FEC
+   data8 0xD732377688025BE9, 0x00003FE5
+   data8 0xB08AF9AE78C1239F, 0x00003FDE
+ASM_SIZE_DIRECTIVE(double_sinh_p_table)
+
+double_sinh_ab_table:
+ASM_TYPE_DIRECTIVE(double_sinh_ab_table,@object)
+   data8 0xAAAAAAAAAAAAAAAC, 0x00003FFC
+   data8 0x88888888884ECDD5, 0x00003FF8
+   data8 0xD00D0C6DCC26A86B, 0x00003FF2
+   data8 0x8000000000000002, 0x00003FFE
+   data8 0xAAAAAAAAAA402C77, 0x00003FFA
+   data8 0xB60B6CC96BDB144D, 0x00003FF5
+ASM_SIZE_DIRECTIVE(double_sinh_ab_table)
+
+double_sinh_j_table:
+ASM_TYPE_DIRECTIVE(double_sinh_j_table,@object)
+   data8 0xB504F333F9DE6484, 0x00003FFE, 0x1EB2FB13, 0x00000000
+   data8 0xB6FD91E328D17791, 0x00003FFE, 0x1CE2CBE2, 0x00000000
+   data8 0xB8FBAF4762FB9EE9, 0x00003FFE, 0x1DDC3CBC, 0x00000000
+   data8 0xBAFF5AB2133E45FB, 0x00003FFE, 0x1EE9AA34, 0x00000000
+   data8 0xBD08A39F580C36BF, 0x00003FFE, 0x9EAEFDC1, 0x00000000
+   data8 0xBF1799B67A731083, 0x00003FFE, 0x9DBF517B, 0x00000000
+   data8 0xC12C4CCA66709456, 0x00003FFE, 0x1EF88AFB, 0x00000000
+   data8 0xC346CCDA24976407, 0x00003FFE, 0x1E03B216, 0x00000000
+   data8 0xC5672A115506DADD, 0x00003FFE, 0x1E78AB43, 0x00000000
+   data8 0xC78D74C8ABB9B15D, 0x00003FFE, 0x9E7B1747, 0x00000000
+   data8 0xC9B9BD866E2F27A3, 0x00003FFE, 0x9EFE3C0E, 0x00000000
+   data8 0xCBEC14FEF2727C5D, 0x00003FFE, 0x9D36F837, 0x00000000
+   data8 0xCE248C151F8480E4, 0x00003FFE, 0x9DEE53E4, 0x00000000
+   data8 0xD06333DAEF2B2595, 0x00003FFE, 0x9E24AE8E, 0x00000000
+   data8 0xD2A81D91F12AE45A, 0x00003FFE, 0x1D912473, 0x00000000
+   data8 0xD4F35AABCFEDFA1F, 0x00003FFE, 0x1EB243BE, 0x00000000
+   data8 0xD744FCCAD69D6AF4, 0x00003FFE, 0x1E669A2F, 0x00000000
+   data8 0xD99D15C278AFD7B6, 0x00003FFE, 0x9BBC610A, 0x00000000
+   data8 0xDBFBB797DAF23755, 0x00003FFE, 0x1E761035, 0x00000000
+   data8 0xDE60F4825E0E9124, 0x00003FFE, 0x9E0BE175, 0x00000000
+   data8 0xE0CCDEEC2A94E111, 0x00003FFE, 0x1CCB12A1, 0x00000000
+   data8 0xE33F8972BE8A5A51, 0x00003FFE, 0x1D1BFE90, 0x00000000
+   data8 0xE5B906E77C8348A8, 0x00003FFE, 0x1DF2F47A, 0x00000000
+   data8 0xE8396A503C4BDC68, 0x00003FFE, 0x1EF22F22, 0x00000000
+   data8 0xEAC0C6E7DD24392F, 0x00003FFE, 0x9E3F4A29, 0x00000000
+   data8 0xED4F301ED9942B84, 0x00003FFE, 0x1EC01A5B, 0x00000000
+   data8 0xEFE4B99BDCDAF5CB, 0x00003FFE, 0x1E8CAC3A, 0x00000000
+   data8 0xF281773C59FFB13A, 0x00003FFE, 0x9DBB3FAB, 0x00000000
+   data8 0xF5257D152486CC2C, 0x00003FFE, 0x1EF73A19, 0x00000000
+   data8 0xF7D0DF730AD13BB9, 0x00003FFE, 0x9BB795B5, 0x00000000
+   data8 0xFA83B2DB722A033A, 0x00003FFE, 0x1EF84B76, 0x00000000
+   data8 0xFD3E0C0CF486C175, 0x00003FFE, 0x9EF5818B, 0x00000000
+   data8 0x8000000000000000, 0x00003FFF, 0x00000000, 0x00000000
+   data8 0x8164D1F3BC030773, 0x00003FFF, 0x1F77CACA, 0x00000000
+   data8 0x82CD8698AC2BA1D7, 0x00003FFF, 0x1EF8A91D, 0x00000000
+   data8 0x843A28C3ACDE4046, 0x00003FFF, 0x1E57C976, 0x00000000
+   data8 0x85AAC367CC487B15, 0x00003FFF, 0x9EE8DA92, 0x00000000
+   data8 0x871F61969E8D1010, 0x00003FFF, 0x1EE85C9F, 0x00000000
+   data8 0x88980E8092DA8527, 0x00003FFF, 0x1F3BF1AF, 0x00000000
+   data8 0x8A14D575496EFD9A, 0x00003FFF, 0x1D80CA1E, 0x00000000
+   data8 0x8B95C1E3EA8BD6E7, 0x00003FFF, 0x9D0373AF, 0x00000000
+   data8 0x8D1ADF5B7E5BA9E6, 0x00003FFF, 0x9F167097, 0x00000000
+   data8 0x8EA4398B45CD53C0, 0x00003FFF, 0x1EB70051, 0x00000000
+   data8 0x9031DC431466B1DC, 0x00003FFF, 0x1F6EB029, 0x00000000
+   data8 0x91C3D373AB11C336, 0x00003FFF, 0x1DFD6D8E, 0x00000000
+   data8 0x935A2B2F13E6E92C, 0x00003FFF, 0x9EB319B0, 0x00000000
+   data8 0x94F4EFA8FEF70961, 0x00003FFF, 0x1EBA2BEB, 0x00000000
+   data8 0x96942D3720185A00, 0x00003FFF, 0x1F11D537, 0x00000000
+   data8 0x9837F0518DB8A96F, 0x00003FFF, 0x1F0D5A46, 0x00000000
+   data8 0x99E0459320B7FA65, 0x00003FFF, 0x9E5E7BCA, 0x00000000
+   data8 0x9B8D39B9D54E5539, 0x00003FFF, 0x9F3AAFD1, 0x00000000
+   data8 0x9D3ED9A72CFFB751, 0x00003FFF, 0x9E86DACC, 0x00000000
+   data8 0x9EF5326091A111AE, 0x00003FFF, 0x9F3EDDC2, 0x00000000
+   data8 0xA0B0510FB9714FC2, 0x00003FFF, 0x1E496E3D, 0x00000000
+   data8 0xA27043030C496819, 0x00003FFF, 0x9F490BF6, 0x00000000
+   data8 0xA43515AE09E6809E, 0x00003FFF, 0x1DD1DB48, 0x00000000
+   data8 0xA5FED6A9B15138EA, 0x00003FFF, 0x1E65EBFB, 0x00000000
+   data8 0xA7CD93B4E965356A, 0x00003FFF, 0x9F427496, 0x00000000
+   data8 0xA9A15AB4EA7C0EF8, 0x00003FFF, 0x1F283C4A, 0x00000000
+   data8 0xAB7A39B5A93ED337, 0x00003FFF, 0x1F4B0047, 0x00000000
+   data8 0xAD583EEA42A14AC6, 0x00003FFF, 0x1F130152, 0x00000000
+   data8 0xAF3B78AD690A4375, 0x00003FFF, 0x9E8367C0, 0x00000000
+   data8 0xB123F581D2AC2590, 0x00003FFF, 0x9F705F90, 0x00000000
+   data8 0xB311C412A9112489, 0x00003FFF, 0x1EFB3C53, 0x00000000
+   data8 0xB504F333F9DE6484, 0x00003FFF, 0x1F32FB13, 0x00000000
+ASM_SIZE_DIRECTIVE(double_sinh_j_table)
+
+.align 32
+.global sinh#
 
-// ************* DO NOT CHANGE ORDER OF THESE TABLES ********************
+.section .text
+.proc  sinh#
+.align 32
 
-// double-extended 1/ln(2)
-// 3fff b8aa 3b29 5c17 f0bb be87fed0691d3e88
-// 3fff b8aa 3b29 5c17 f0bc
-// For speed the significand will be loaded directly with a movl and setf.sig
-//   and the exponent will be bias+63 instead of bias+0.  Thus subsequent
-//   computations need to scale appropriately.
-// The constant 128/ln(2) is needed for the computation of w.  This is also
-//   obtained by scaling the computations.
-//
-// Two shifting constants are loaded directly with movl and setf.d.
-//   1. fRSHF_2TO56 = 1.1000..00 * 2^(63-7)
-//        This constant is added to x*1/ln2 to shift the integer part of
-//        x*128/ln2 into the rightmost bits of the significand.
-//        The result of this fma is fW_2TO56_RSH.
-//   2. fRSHF       = 1.1000..00 * 2^(63)
-//        This constant is subtracted from fW_2TO56_RSH * 2^(-56) to give
-//        the integer part of w, n, as a floating-point number.
-//        The result of this fms is fNfloat.
-
-
-LOCAL_OBJECT_START(exp_table_1)
-data8 0x408633ce8fb9f87e // smallest dbl overflow arg
-data8 0x408633ce8fb9f87d // largest dbl arg to give normal dbl result
-data8 0xb17217f7d1cf79ab , 0x00003ff7 // ln2/128 hi
-data8 0xc9e3b39803f2f6af , 0x00003fb7 // ln2/128 lo
-//
-// Table 1 is 2^(index_1/128) where
-// index_1 goes from 0 to 15
-//
-data8 0x8000000000000000 , 0x00003FFF
-data8 0x80B1ED4FD999AB6C , 0x00003FFF
-data8 0x8164D1F3BC030773 , 0x00003FFF
-data8 0x8218AF4373FC25EC , 0x00003FFF
-data8 0x82CD8698AC2BA1D7 , 0x00003FFF
-data8 0x8383594EEFB6EE37 , 0x00003FFF
-data8 0x843A28C3ACDE4046 , 0x00003FFF
-data8 0x84F1F656379C1A29 , 0x00003FFF
-data8 0x85AAC367CC487B15 , 0x00003FFF
-data8 0x8664915B923FBA04 , 0x00003FFF
-data8 0x871F61969E8D1010 , 0x00003FFF
-data8 0x87DB357FF698D792 , 0x00003FFF
-data8 0x88980E8092DA8527 , 0x00003FFF
-data8 0x8955EE03618E5FDD , 0x00003FFF
-data8 0x8A14D575496EFD9A , 0x00003FFF
-data8 0x8AD4C6452C728924 , 0x00003FFF
-LOCAL_OBJECT_END(exp_table_1)
-
-// Table 2 is 2^(index_1/8) where
-// index_2 goes from 0 to 7
-LOCAL_OBJECT_START(exp_table_2)
-data8 0x8000000000000000 , 0x00003FFF
-data8 0x8B95C1E3EA8BD6E7 , 0x00003FFF
-data8 0x9837F0518DB8A96F , 0x00003FFF
-data8 0xA5FED6A9B15138EA , 0x00003FFF
-data8 0xB504F333F9DE6484 , 0x00003FFF
-data8 0xC5672A115506DADD , 0x00003FFF
-data8 0xD744FCCAD69D6AF4 , 0x00003FFF
-data8 0xEAC0C6E7DD24392F , 0x00003FFF
-LOCAL_OBJECT_END(exp_table_2)
-
-
-LOCAL_OBJECT_START(exp_p_table)
-data8 0x3f8111116da21757 //P5
-data8 0x3fa55555d787761c //P4
-data8 0x3fc5555555555414 //P3
-data8 0x3fdffffffffffd6a //P2
-LOCAL_OBJECT_END(exp_p_table)
-
-LOCAL_OBJECT_START(sinh_p_table)
-data8 0xB08AF9AE78C1239F, 0x00003FDE  // A6
-data8 0xB8EF1D28926D8891, 0x00003FEC  // A4
-data8 0x8888888888888412, 0x00003FF8  // A2
-data8 0xD732377688025BE9, 0x00003FE5  // A5
-data8 0xD00D00D00D4D39F2, 0x00003FF2  // A3
-data8 0xAAAAAAAAAAAAAAAB, 0x00003FFC  // A1
-LOCAL_OBJECT_END(sinh_p_table)
+sinh: 
+#ifdef _LIBC
+.global __ieee754_sinh
+.type __ieee754_sinh,@function
+__ieee754_sinh:
+#endif
 
+// X infinity or NAN?
+// Take invalid fault if enabled
 
-.section .text
-GLOBAL_IEEE754_ENTRY(sinh)
 
-{ .mlx
-      getf.exp        rSignexp_x = f8  // Must recompute if x unorm
-      movl            rSig_inv_ln2 = 0xb8aa3b295c17f0bc  // significand of 1/ln2
-}
-{ .mlx
-      addl            rAD_TB1    = @ltoff(exp_table_1), gp
-      movl            rRshf_2to56 = 0x4768000000000000   // 1.10000 2^(63+56)
+{ .mfi
+      alloc r32 = ar.pfs,0,12,4,0                  
+(p0)     fclass.m.unc  p6,p0 = f8, 0xe3	//@qnan | @snan | @inf 
+         mov sinh_GR_all_ones = -1
 }
 ;;
 
-{ .mfi
-      ld8             rAD_TB1    = [rAD_TB1]
-      fclass.m        p6,p0 = f8,0x0b  // Test for x=unorm
-      mov             rExp_mask = 0x1ffff
+
+{ .mfb
+         nop.m 999
+(p6)     fma.d.s0   f8 = f8,f1,f8               
+(p6)     br.ret.spnt     b0 ;;                          
 }
+
+// Put 0.25 in f9; p6 true if x < 0.25
+// Make constant that will generate inexact when squared
+{ .mlx
+         setf.sig sinh_FR_all_ones = sinh_GR_all_ones 
+(p0)     movl            r32 = 0x000000000000fffd ;;         
+}
+
 { .mfi
-      mov             rExp_bias = 0xffff
-      fnorm.s1        fNormX   = f8
-      mov             rExp_2tom56 = 0xffff-56
+(p0)     setf.exp        f9 = r32                         
+(p0)     fclass.m.unc  p7,p0 = f8, 0x07	//@zero
+         nop.i 999 ;;
 }
-;;
 
-// Form two constants we need
-//  1/ln2 * 2^63  to compute  w = x * 1/ln2 * 128
-//  1.1000..000 * 2^(63+63-7) to right shift int(w) into the significand
+{ .mfb
+         nop.m 999
+(p0)     fmerge.s      sinh_FR_X    = f0,f8             
+(p7)     br.ret.spnt     b0 ;;                          
+}
 
+// Identify denormal operands.
 { .mfi
-      setf.sig        fINV_LN2_2TO63 = rSig_inv_ln2 // form 1/ln2 * 2^63
-      fclass.m        p8,p0 = f8,0x07  // Test for x=0
-      nop.i 999
+         nop.m 999
+         fclass.m.unc  p10,p0 = f8, 0x09        //  + denorm
+         nop.i 999
+};;
+{ .mfi
+         nop.m 999
+         fclass.m.unc  p11,p0 = f8, 0x0a        //  - denorm
+         nop.i 999 
 }
-{ .mlx
-      setf.d          fRSHF_2TO56 = rRshf_2to56 // Form const 1.100 * 2^(63+56)
-      movl            rRshf = 0x43e8000000000000 // 1.10000 2^63 for right shift
+
+{ .mfi
+         nop.m 999
+(p0)     fmerge.s      sinh_FR_SGNX = f8,f1             
+         nop.i 999 ;;
 }
-;;
 
 { .mfi
-      ldfpd           fMIN_DBL_OFLOW_ARG, fMAX_DBL_NORM_ARG = [rAD_TB1],16
-      fclass.m        p10,p0 = f8,0x1e3  // Test for x=inf, nan, NaT
-      nop.i           0
+         nop.m 999
+(p0)     fcmp.lt.unc.s1  p0,p7 = sinh_FR_X,f9             
+         nop.i 999 ;;
 }
+
+{ .mib
+         nop.m 999
+         nop.i 999
+(p7)     br.cond.sptk    L(SINH_BY_TBL) ;;                      
+}
+
+
+L(SINH_BY_POLY): 
+
+// POLY cannot overflow so there is no need to call __libm_error_support
+// Set tiny_SAFE (p7) to 1(0) if answer is not tiny 
+// Currently we do not use tiny_SAFE. So the setting of tiny_SAFE is
+// commented out.
+//(p0)     movl            r32            = 0x000000000000fc01           
+//(p0)     setf.exp        f10            = r32                         
+//(p0)     fcmp.lt.unc.s1  p6,p7          = f8,f10                     
+// Here is essentially the algorithm for SINH_BY_POLY. Care is take for the order 
+// of multiplication; and P_1 is not exactly 1/3!, P_2 is not exactly 1/5!, etc.
+// Note that ax = |x|
+// sinh(x) = sign * (series(e^x) - series(e^-x))/2
+//         = sign * (ax + ax^3/3! + ax^5/5! + ax^7/7! + ax^9/9! + ax^11/11! + ax^13/13!)
+//         = sign * (ax   + ax * ( ax^2 * (1/3! + ax^4 * (1/7! + ax^4*1/11!)) )
+//                        + ax * ( ax^4 * (1/5! + ax^4 * (1/9! + ax^4*1/13!)) ) )
+//         = sign * (ax   + ax*p_odd + (ax*p_even))
+//         = sign * (ax   + Y_lo)
+// sinh(x) = sign * (Y_hi + Y_lo)
+// Get the values of P_x from the table
 { .mfb
-      setf.exp        f2TOM56 = rExp_2tom56 // form 2^-56 for scaling Nfloat
-      nop.f           0
-(p6)  br.cond.spnt    SINH_UNORM            // Branch if x=unorm
+(p0)  addl           r34   = @ltoff(double_sinh_p_table), gp
+(p10) fma.d.s0       f8 =  f8,f8,f8
+(p10) br.ret.spnt    b0
 }
 ;;
 
-SINH_COMMON:
-{ .mfi
-      ldfe            fLn2_by_128_hi  = [rAD_TB1],16
-      nop.f           0
-      nop.i           0
-}
 { .mfb
-      setf.d          fRSHF = rRshf // Form right shift const 1.100 * 2^63
-      nop.f           0
-(p8)  br.ret.spnt     b0                    // Exit for x=0, result=x
+      ld8 r34 = [r34]
+(p11) fnma.d.s0      f8 =  f8,f8,f8
+(p11) br.ret.spnt    b0
 }
 ;;
 
-{ .mfi
-      ldfe            fLn2_by_128_lo  = [rAD_TB1],16
-      nop.f           0
-      nop.i           0
+// Calculate sinh_FR_X2 = ax*ax and sinh_FR_X4 = ax*ax*ax*ax
+{ .mmf
+         nop.m 999
+(p0)     ldfe            sinh_FR_P1 = [r34],16                 
+(p0)     fma.s1        sinh_FR_X2 = sinh_FR_X, sinh_FR_X, f0 ;;           
 }
-{ .mfb
-      and             rExp_x = rExp_mask, rSignexp_x // Biased exponent of x
-(p10) fma.d.s0        f8 = f8,f1,f0  // Result if x=inf, nan, NaT
-(p10) br.ret.spnt     b0               // quick exit for x=inf, nan, NaT
+
+{ .mmi
+(p0)     ldfe            sinh_FR_P2 = [r34],16 ;;                 
+(p0)     ldfe            sinh_FR_P3 = [r34],16                 
+         nop.i 999 ;;
+}
+
+{ .mmi
+(p0)     ldfe            sinh_FR_P4 = [r34],16 ;;                 
+(p0)     ldfe            sinh_FR_P5 = [r34],16                 
+         nop.i 999 ;;
 }
-;;
 
-// After that last load rAD_TB1 points to the beginning of table 1
 { .mfi
-      nop.m           0
-      fcmp.eq.s0      p6,p0 = f8, f0       // Dummy to set D
-      sub             rExp_x = rExp_x, rExp_bias // True exponent of x
+(p0)     ldfe            sinh_FR_P6 = [r34],16                 
+(p0)     fma.s1        sinh_FR_X4 = sinh_FR_X2, sinh_FR_X2, f0         
+         nop.i 999 ;;
 }
-;;
 
+// Calculate sinh_FR_podd = p_odd and sinh_FR_peven = p_even 
 { .mfi
-      nop.m           0
-      fmerge.s        fAbsX = f0, fNormX   // Form |x|
-      nop.i           0
+         nop.m 999
+(p0)     fma.s1      sinh_FR_poly_podd_temp1 = sinh_FR_X4, sinh_FR_P5, sinh_FR_P3                
+         nop.i 999 ;;
 }
-{ .mfb
-      cmp.gt          p7, p0 = -2, rExp_x      // Test |x| < 2^(-2)
-      fma.s1          fXsq = fNormX, fNormX, f0  // x*x for small path
-(p7)  br.cond.spnt    SINH_SMALL               // Branch if 0 < |x| < 2^-2
+
+{ .mfi
+         nop.m 999
+(p0)     fma.s1      sinh_FR_poly_podd_temp2 = sinh_FR_X4, sinh_FR_poly_podd_temp1, sinh_FR_P1   
+         nop.i 999
 }
-;;
 
-// W = X * Inv_log2_by_128
-// By adding 1.10...0*2^63 we shift and get round_int(W) in significand.
-// We actually add 1.10...0*2^56 to X * Inv_log2 to do the same thing.
+{ .mfi
+         nop.m 999
+(p0)     fma.s1      sinh_FR_poly_peven_temp1 = sinh_FR_X4, sinh_FR_P6, sinh_FR_P4               
+         nop.i 999 ;;
+}
 
 { .mfi
-      add             rAD_P = 0x180, rAD_TB1
-      fma.s1          fW_2TO56_RSH  = fNormX, fINV_LN2_2TO63, fRSHF_2TO56
-      add             rAD_TB2 = 0x100, rAD_TB1
+         nop.m 999
+(p0)     fma.s1      sinh_FR_podd       = sinh_FR_X2, sinh_FR_poly_podd_temp2, f0           
+         nop.i 999
 }
-;;
 
-// Divide arguments into the following categories:
-//  Certain Safe                - 0.25 <= |x| <= MAX_DBL_NORM_ARG
-//  Possible Overflow       p14 - MAX_DBL_NORM_ARG < |x| < MIN_DBL_OFLOW_ARG
-//  Certain Overflow        p15 - MIN_DBL_OFLOW_ARG <= |x| < +inf
-//
-// If the input is really a double arg, then there will never be
-// "Possible Overflow" arguments.
-//
+{ .mfi
+         nop.m 999
+(p0)     fma.s1      sinh_FR_poly_peven_temp2 = sinh_FR_X4, sinh_FR_poly_peven_temp1, sinh_FR_P2 
+         nop.i 999 ;;
+}
 
 { .mfi
-      ldfpd           fP5, fP4  = [rAD_P] ,16
-      fcmp.ge.s1      p15,p14 = fAbsX,fMIN_DBL_OFLOW_ARG
-      nop.i           0
+         nop.m 999
+(p0)     fma.s1      sinh_FR_peven       = sinh_FR_X4, sinh_FR_poly_peven_temp2, f0         
+         nop.i 999 ;;
 }
-;;
 
-// Nfloat = round_int(W)
-// The signficand of fW_2TO56_RSH contains the rounded integer part of W,
-// as a twos complement number in the lower bits (that is, it may be negative).
-// That twos complement number (called N) is put into rN.
+// Calculate sinh_FR_Y_lo = ax*p_odd + (ax*p_even)
+{ .mfi
+         nop.m 999
+(p0)     fma.s1      sinh_FR_Y_lo_temp    = sinh_FR_X, sinh_FR_peven, f0                    
+         nop.i 999 ;;
+}
 
-// Since fW_2TO56_RSH is scaled by 2^56, it must be multiplied by 2^-56
-// before the shift constant 1.10000 * 2^63 is subtracted to yield fNfloat.
-// Thus, fNfloat contains the floating point version of N
+{ .mfi
+         nop.m 999
+(p0)     fma.s1      sinh_FR_Y_lo         = sinh_FR_X, sinh_FR_podd,  sinh_FR_Y_lo_temp          
+         nop.i 999 ;;
+}
 
+// Calculate sinh_FR_SINH = Y_hi + Y_lo. Note that ax = Y_hi
 { .mfi
-      ldfpd           fP3, fP2  = [rAD_P]
-(p14) fcmp.gt.unc.s1  p14,p0 = fAbsX,fMAX_DBL_NORM_ARG
-      nop.i           0
+         nop.m 999
+(p0)     fma.s1      sinh_FR_SINH        = sinh_FR_X, f1, sinh_FR_Y_lo                      
+         nop.i 999 ;;
 }
-{ .mfb
-      nop.m           0
-      fms.s1          fNfloat = fW_2TO56_RSH, f2TOM56, fRSHF
-(p15) br.cond.spnt    SINH_CERTAIN_OVERFLOW
+// Dummy multiply to generate inexact
+{ .mfi
+         nop.m 999
+(p0)     fmpy.s0      sinh_FR_tmp = sinh_FR_all_ones, sinh_FR_all_ones
+         nop.i 999
 }
-;;
 
-{ .mfi
-      getf.sig        rN        = fW_2TO56_RSH
-      nop.f           0
-      mov             rExp_bias_minus_1 = 0xfffe
+// Calculate f8 = sign * (Y_hi + Y_lo)
+// Go to return
+{ .mfb
+         nop.m 999
+(p0)     fma.d.s0        f8 = sinh_FR_SGNX,sinh_FR_SINH,f0                       
+(p0)     br.ret.sptk     b0 ;;                          
 }
-;;
 
-// rIndex_1 has index_1
-// rIndex_2_16 has index_2 * 16
-// rBiased_M has M
 
-// rM has true M
-// r = x - Nfloat * ln2_by_128_hi
-// f = 1 - Nfloat * ln2_by_128_lo
+L(SINH_BY_TBL): 
+
+// Now that we are at TBL; so far all we know is that |x| >= 0.25.
+// The first two steps are the same for TBL and EXP, but if we are HUGE
+// we want to leave now. 
+// Double-extended:
+// Go to HUGE if |x| >= 2^14, 1000d (register-biased) is e = 14 (true)
+// Double
+// Go to HUGE if |x| >= 2^10, 10009 (register-biased) is e = 10 (true)
+// Single
+// Go to HUGE if |x| >= 2^7,  10006 (register-biased) is e =  7 (true)
+
+{ .mlx
+         nop.m 999
+(p0)     movl            r32 = 0x0000000000010009 ;;         
+}
+
 { .mfi
-      and             rIndex_1 = 0x0f, rN
-      fnma.s1         fR   = fNfloat, fLn2_by_128_hi, fNormX
-      shr             rM = rN,  0x7
+(p0)     setf.exp        f9 = r32                         
+         nop.f 999
+         nop.i 999 ;;
 }
+
 { .mfi
-      and             rIndex_2_16 = 0x70, rN
-      fnma.s1         fF   = fNfloat, fLn2_by_128_lo, f1
-      sub             rN_neg = r0, rN
+         nop.m 999
+(p0)     fcmp.ge.unc.s1  p6,p7 = sinh_FR_X,f9             
+         nop.i 999 ;;
 }
-;;
+
+{ .mib
+         nop.m 999
+         nop.i 999
+(p6)     br.cond.spnt    L(SINH_HUGE) ;;                        
+}
+
+// r32 = 1
+// r34 = N-1 
+// r35 = N
+// r36 = j
+// r37 = N+1
+
+// TBL can never overflow
+// sinh(x) = sinh(B+R)
+//         = sinh(B)cosh(R) + cosh(B)sinh(R)
+// 
+// ax = |x| = M*log2/64 + R
+// B = M*log2/64
+// M = 64*N + j 
+//   We will calcualte M and get N as (M-j)/64
+//   The division is a shift.
+// exp(B)  = exp(N*log2 + j*log2/64)
+//         = 2^N * 2^(j*log2/64)
+// sinh(B) = 1/2(e^B -e^-B)
+//         = 1/2(2^N * 2^(j*log2/64) - 2^-N * 2^(-j*log2/64)) 
+// sinh(B) = (2^(N-1) * 2^(j*log2/64) - 2^(-N-1) * 2^(-j*log2/64)) 
+// cosh(B) = (2^(N-1) * 2^(j*log2/64) + 2^(-N-1) * 2^(-j*log2/64)) 
+// 2^(j*log2/64) is stored as Tjhi + Tjlo , j= -32,....,32
+// Tjhi is double-extended (80-bit) and Tjlo is single(32-bit)
+// R = ax - M*log2/64
+// R = ax - M*log2_by_64_hi - M*log2_by_64_lo
+// exp(R) = 1 + R +R^2(1/2! + R(1/3! + R(1/4! + ... + R(1/n!)...)
+//        = 1 + p_odd + p_even
+//        where the p_even uses the A coefficients and the p_even uses the B coefficients
+// So sinh(R) = 1 + p_odd + p_even -(1 -p_odd -p_even)/2 = p_odd
+//    cosh(R) = 1 + p_even
+//    sinh(B) = S_hi + S_lo
+//    cosh(B) = C_hi
+// sinh(x) = sinh(B)cosh(R) + cosh(B)sinh(R)
+// ******************************************************
+// STEP 1 (TBL and EXP)
+// ******************************************************
+// Get the following constants. 
+// f9  = Inv_log2by64
+// f10 = log2by64_hi
+// f11 = log2by64_lo
 
 { .mmi
-      and             rIndex_1_neg = 0x0f, rN_neg
-      add             rBiased_M = rExp_bias_minus_1, rM
-      shr             rM_neg = rN_neg,  0x7
+(p0)  adds                 r32 = 0x1,r0      
+(p0)  addl           r34   = @ltoff(double_sinh_arg_reduction), gp
+      nop.i 999
 }
+;;
+
 { .mmi
-      and             rIndex_2_16_neg = 0x70, rN_neg
-      add             rAD_T2 = rAD_TB2, rIndex_2_16
-      shladd          rAD_T1 = rIndex_1, 4, rAD_TB1
+      ld8 r34 = [r34]
+      nop.m 999
+      nop.i 999
 }
 ;;
 
-// rAD_T1 has address of T1
-// rAD_T2 has address if T2
+
+// We want 2^(N-1) and 2^(-N-1). So bias N-1 and -N-1 and
+// put them in an exponent.
+// sinh_FR_spos = 2^(N-1) and sinh_FR_sneg = 2^(-N-1)
+// r39 = 0xffff + (N-1)  = 0xffff +N -1
+// r40 = 0xffff - (N +1) = 0xffff -N -1
+
+{ .mlx
+         nop.m 999
+(p0)     movl                r38 = 0x000000000000fffe ;; 
+}
 
 { .mmi
-      setf.exp        f2M = rBiased_M
-      ldfe            fT2  = [rAD_T2]
-      nop.i           0
+(p0)     ldfe            sinh_FR_Inv_log2by64 = [r34],16 ;;       
+(p0)     ldfe            sinh_FR_log2by64_hi  = [r34],16       
+         nop.i 999 ;;
+}
+
+{ .mbb
+(p0)     ldfe            sinh_FR_log2by64_lo  = [r34],16       
+         nop.b 999
+         nop.b 999 ;;
 }
+
+// Get the A coefficients
+// f9  = A_1
+// f10 = A_2
+// f11 = A_3
+
 { .mmi
-      add             rBiased_M_neg = rExp_bias_minus_1, rM_neg
-      add             rAD_T2_neg = rAD_TB2, rIndex_2_16_neg
-      shladd          rAD_T1_neg = rIndex_1_neg, 4, rAD_TB1
+      nop.m 999
+(p0)  addl           r34   = @ltoff(double_sinh_ab_table), gp
+      nop.i 999
 }
 ;;
 
-// Create Scale = 2^M
-// Load T1 and T2
 { .mmi
-      ldfe            fT1  = [rAD_T1]
-      nop.m           0
-      nop.i           0
-}
-{ .mmf
-      setf.exp        f2M_neg = rBiased_M_neg
-      ldfe            fT2_neg  = [rAD_T2_neg]
-      fma.s1          fF_neg   = fNfloat, fLn2_by_128_lo, f1
+      ld8 r34 = [r34]
+      nop.m 999
+      nop.i 999
 }
 ;;
 
+
+// Calculate M and keep it as integer and floating point.
+// f38 = M = round-to-integer(x*Inv_log2by64)
+// sinh_FR_M = M = truncate(ax/(log2/64))
+// Put the significand of M in r35
+//    and the floating point representation of M in sinh_FR_M
+
 { .mfi
-      nop.m           0
-      fma.s1          fRsq = fR, fR, f0
-      nop.i           0
+         nop.m 999
+(p0)     fma.s1          sinh_FR_M      = sinh_FR_X, sinh_FR_Inv_log2by64, f0 
+         nop.i 999
 }
+
 { .mfi
-      ldfe            fT1_neg  = [rAD_T1_neg]
-      fma.s1          fP54 = fR, fP5, fP4
-      nop.i           0
+(p0)     ldfe            sinh_FR_A1 = [r34],16            
+         nop.f 999
+         nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m           0
-      fma.s1          fP32 = fR, fP3, fP2
-      nop.i           0
+         nop.m 999
+(p0)     fcvt.fx.s1      sinh_FR_M_temp = sinh_FR_M                      
+         nop.i 999 ;;
 }
+
 { .mfi
-      nop.m           0
-      fnma.s1         fP54_neg = fR, fP5, fP4
-      nop.i           0
+         nop.m 999
+(p0)     fnorm.s1        sinh_FR_M      = sinh_FR_M_temp                 
+         nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m           0
-      fnma.s1         fP32_neg = fR, fP3, fP2
-      nop.i           0
+(p0)     getf.sig        r35       = sinh_FR_M_temp                 
+         nop.f 999
+         nop.i 999 ;;
 }
-;;
 
-{ .mfi
-      nop.m           0
-      fma.s1          fP5432  = fRsq, fP54, fP32
-      nop.i           0
+// M is still in r35. Calculate j. j is the signed extension of the six lsb of M. It 
+// has a range of -32 thru 31.
+// r35 = M
+// r36 = j 
+
+{ .mii
+         nop.m 999
+         nop.i 999 ;;
+(p0)     and            r36 = 0x3f, r35 ;;   
 }
+
+// Calculate R
+// f13 = f44 - f12*f10 = ax - M*log2by64_hi
+// f14 = f13 - f8*f11  = R = (ax - M*log2by64_hi) - M*log2by64_lo
+
 { .mfi
-      nop.m           0
-      fma.s1          fS2  = fF,fT2,f0
-      nop.i           0
+         nop.m 999
+(p0)     fnma.s1           sinh_FR_R_temp = sinh_FR_M, sinh_FR_log2by64_hi, sinh_FR_X      
+         nop.i 999
 }
-;;
 
 { .mfi
-      nop.m           0
-      fma.s1          fS1  = f2M,fT1,f0
-      nop.i           0
+(p0)     ldfe            sinh_FR_A2 = [r34],16            
+         nop.f 999
+         nop.i 999 ;;
 }
+
 { .mfi
-      nop.m           0
-      fma.s1          fP5432_neg  = fRsq, fP54_neg, fP32_neg
-      nop.i           0
+         nop.m 999
+(p0)     fnma.s1           sinh_FR_R      = sinh_FR_M, sinh_FR_log2by64_lo, sinh_FR_R_temp 
+         nop.i 999
 }
-;;
 
-{ .mfi
-      nop.m           0
-      fma.s1          fS1_neg  = f2M_neg,fT1_neg,f0
-      nop.i           0
+// Get the B coefficients
+// f15 = B_1
+// f32 = B_2
+// f33 = B_3
+
+{ .mmi
+(p0)     ldfe            sinh_FR_A3 = [r34],16 ;;            
+(p0)     ldfe            sinh_FR_B1 = [r34],16            
+         nop.i 999 ;;
+}
+
+{ .mmi
+(p0)     ldfe            sinh_FR_B2 = [r34],16 ;;            
+(p0)     ldfe            sinh_FR_B3 = [r34],16            
+         nop.i 999 ;;
 }
+
+{ .mii
+         nop.m 999
+(p0)     shl            r34 = r36,  0x2 ;;   
+(p0)     sxt1           r37 = r34 ;;         
+}
+
+// ******************************************************
+// STEP 2 (TBL and EXP)
+// ******************************************************
+// Calculate Rsquared and Rcubed in preparation for p_even and p_odd
+// f12 = R*R*R
+// f13 = R*R
+// f14 = R <== from above
+
 { .mfi
-      nop.m           0
-      fma.s1          fS2_neg  = fF_neg,fT2_neg,f0
-      nop.i           0
+         nop.m 999
+(p0)     fma.s1             sinh_FR_Rsq  = sinh_FR_R,   sinh_FR_R, f0  
+(p0)     shr            r36 = r37,  0x2 ;;   
+}
+
+// r34 = M-j = r35 - r36
+// r35 = N = (M-j)/64
+
+{ .mii
+(p0)     sub                  r34 = r35, r36    
+         nop.i 999 ;;
+(p0)     shr                  r35 = r34, 0x6 ;;    
+}
+
+{ .mii
+(p0)     sub                 r40 = r38, r35           
+(p0)     adds                 r37 = 0x1, r35    
+(p0)     add                 r39 = r38, r35 ;;           
+}
+
+// Get the address of the J table, add the offset, 
+// addresses are sinh_AD_mJ and sinh_AD_J, get the T value
+// f32 = T(j)_hi
+// f33 = T(j)_lo
+// f34 = T(-j)_hi
+// f35 = T(-j)_lo
+
+{ .mmi
+(p0)  sub                  r34 = r35, r32    
+(p0)  addl           r37   = @ltoff(double_sinh_j_table), gp
+      nop.i 999
 }
 ;;
 
+{ .mmi
+      ld8 r37 = [r37]
+      nop.m 999
+      nop.i 999
+}
+;;
+
+
 { .mfi
-      nop.m           0
-      fma.s1          fP     = fRsq, fP5432, fR
-      nop.i           0
+         nop.m 999
+(p0)     fma.s1             sinh_FR_Rcub = sinh_FR_Rsq, sinh_FR_R, f0  
+         nop.i 999
 }
+
+// ******************************************************
+// STEP 3 Now decide if we need to branch to EXP
+// ******************************************************
+// Put 32 in f9; p6 true if x < 32
+// Go to EXP if |x| >= 32 
+
+{ .mlx
+         nop.m 999
+(p0)     movl                r32 = 0x0000000000010004 ;;               
+}
+
+// Calculate p_even
+// f34 = B_2 + Rsq *B_3
+// f35 = B_1 + Rsq*f34      = B_1 + Rsq * (B_2 + Rsq *B_3)
+// f36 = p_even = Rsq * f35 = Rsq * (B_1 + Rsq * (B_2 + Rsq *B_3))
+
 { .mfi
-      nop.m           0
-      fma.s1          fS   = fS1,fS2,f0
-      nop.i           0
+         nop.m 999
+(p0)     fma.s1          sinh_FR_peven_temp1 = sinh_FR_Rsq, sinh_FR_B3,          sinh_FR_B2  
+         nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m           0
-      fms.s1          fP_neg     = fRsq, fP5432_neg, fR
-      nop.i           0
+         nop.m 999
+(p0)     fma.s1          sinh_FR_peven_temp2 = sinh_FR_Rsq, sinh_FR_peven_temp1, sinh_FR_B1  
+         nop.i 999
 }
+
+// Calculate p_odd
+// f34 = A_2 + Rsq *A_3
+// f35 = A_1 + Rsq * (A_2 + Rsq *A_3)
+// f37 = podd = R + Rcub * (A_1 + Rsq * (A_2 + Rsq *A_3))
+
 { .mfi
-      nop.m           0
-      fma.s1          fS_neg   = fS1_neg,fS2_neg,f0
-      nop.i           0
+         nop.m 999
+(p0)     fma.s1          sinh_FR_podd_temp1 = sinh_FR_Rsq,        sinh_FR_A3,         sinh_FR_A2  
+         nop.i 999 ;;
 }
-;;
 
-{ .mfb
-      nop.m           0
-      fmpy.s0         fTmp = fLn2_by_128_lo, fLn2_by_128_lo // Force inexact
-(p14) br.cond.spnt    SINH_POSSIBLE_OVERFLOW
+{ .mfi
+(p0)     setf.exp            sinh_FR_N_temp1 = r39            
+         nop.f 999
+         nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m           0
-      fma.s1          fExp = fS, fP, fS
-      nop.i           0
+         nop.m 999
+(p0)     fma.s1          sinh_FR_peven       = sinh_FR_Rsq, sinh_FR_peven_temp2, f0     
+         nop.i 999
 }
+
 { .mfi
-      nop.m           0
-      fma.s1          fExp_neg = fS_neg, fP_neg, fS_neg
-      nop.i           0
+         nop.m 999
+(p0)     fma.s1          sinh_FR_podd_temp2 = sinh_FR_Rsq,        sinh_FR_podd_temp1, sinh_FR_A1  
+         nop.i 999 ;;
 }
-;;
 
-{ .mfb
-      nop.m           0
-      fms.d.s0        f8 = fExp, f1, fExp_neg
-      br.ret.sptk     b0                  // Normal path exit
+{ .mfi
+(p0)     setf.exp            f9  = r32                              
+         nop.f 999
+         nop.i 999 ;;
 }
-;;
 
-// Here if 0 < |x| < 0.25
-SINH_SMALL:
 { .mfi
-      add             rAD_T1 = 0x1a0, rAD_TB1
-      fcmp.lt.s1      p7, p8 = fNormX, f0       // Test sign of x
-      cmp.gt          p6, p0 = -60, rExp_x      // Test |x| < 2^(-60)
+         nop.m 999
+(p0)     fma.s1          sinh_FR_podd       = sinh_FR_podd_temp2, sinh_FR_Rcub,       sinh_FR_R   
+         nop.i 999
+}
+
+// sinh_GR_mj contains the table offset for -j
+// sinh_GR_j  contains the table offset for +j
+// p6 is true when j <= 0
+
+{ .mlx
+(p0)     setf.exp            sinh_FR_N_temp2 = r40            
+(p0)     movl                r40 = 0x0000000000000020 ;;    
 }
+
 { .mfi
-      add             rAD_T2 = 0x1d0, rAD_TB1
-      nop.f           0
-      nop.i           0
+(p0)     sub                 sinh_GR_mJ = r40,  r36           
+(p0)     fmerge.se           sinh_FR_spos    = sinh_FR_N_temp1, f1 
+(p0)     adds                sinh_GR_J  = 0x20, r36 ;;           
 }
-;;
 
-{ .mmb
-      ldfe            fA6 = [rAD_T1],16
-      ldfe            fA5 = [rAD_T2],16
-(p6)  br.cond.spnt    SINH_VERY_SMALL           // Branch if |x| < 2^(-60)
+{ .mii
+         nop.m 999
+(p0)     shl                  sinh_GR_mJ = sinh_GR_mJ, 5 ;;   
+(p0)     add                  sinh_AD_mJ = r37, sinh_GR_mJ ;; 
 }
-;;
 
 { .mmi
-      ldfe            fA4 = [rAD_T1],16
-      ldfe            fA3 = [rAD_T2],16
-      nop.i           0
+         nop.m 999
+(p0)     ldfe                 sinh_FR_Tmjhi = [sinh_AD_mJ],16                 
+(p0)     shl                  sinh_GR_J  = sinh_GR_J, 5 ;;    
+}
+
+{ .mfi
+(p0)     ldfs                 sinh_FR_Tmjlo = [sinh_AD_mJ],16                 
+(p0)     fcmp.lt.unc.s1      p0,p7 = sinh_FR_X,f9                          
+(p0)     add                  sinh_AD_J  = r37, sinh_GR_J ;;  
 }
-;;
 
 { .mmi
-      ldfe            fA2 = [rAD_T1]
-      ldfe            fA1 = [rAD_T2]
-      nop.i           0
+(p0)     ldfe                 sinh_FR_Tjhi  = [sinh_AD_J],16 ;;                  
+(p0)     ldfs                 sinh_FR_Tjlo  = [sinh_AD_J],16                  
+         nop.i 999 ;;
+}
+
+{ .mfb
+         nop.m 999
+(p0)     fmerge.se           sinh_FR_sneg    = sinh_FR_N_temp2, f1 
+(p7)     br.cond.spnt        L(SINH_BY_EXP) ;;                            
 }
-;;
 
 { .mfi
-      nop.m           0
-      fma.s1          fX3 = fNormX, fXsq, f0
-      nop.i           0
+         nop.m 999
+         nop.f 999
+         nop.i 999 ;;
 }
+
+// ******************************************************
+// If NOT branch to EXP
+// ******************************************************
+// Calculate S_hi and S_lo
+// sinh_FR_S_hi_temp = sinh_FR_sneg * sinh_FR_Tmjhi
+// sinh_FR_S_hi = sinh_FR_spos * sinh_FR_Tjhi - sinh_FR_S_hi_temp
+// sinh_FR_S_hi = sinh_FR_spos * sinh_FR_Tjhi - (sinh_FR_sneg * sinh_FR_Tmjlo)
+
 { .mfi
-      nop.m           0
-      fma.s1          fX4 = fXsq, fXsq, f0
-      nop.i           0
+         nop.m 999
+(p0)    fma.s1         sinh_FR_S_hi_temp = sinh_FR_sneg, sinh_FR_Tmjhi, f0   
+         nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m           0
-      fma.s1          fA65 = fXsq, fA6, fA5
-      nop.i           0
+         nop.m 999
+(p0)    fms.s1         sinh_FR_S_hi = sinh_FR_spos, sinh_FR_Tjhi,  sinh_FR_S_hi_temp              
+         nop.i 999
 }
+
+// Calculate C_hi
+// sinh_FR_C_hi_temp1 = sinh_FR_sneg * sinh_FR_Tmjhi
+// sinh_FR_C_hi = sinh_FR_spos * sinh_FR_Tjhi + sinh_FR_C_hi_temp1
+
 { .mfi
-      nop.m           0
-      fma.s1          fA43 = fXsq, fA4, fA3
-      nop.i           0
+         nop.m 999
+(p0)    fma.s1         sinh_FR_C_hi_temp1 = sinh_FR_sneg, sinh_FR_Tmjhi, f0                   
+         nop.i 999 ;;
 }
-;;
+
+// sinh_FR_S_lo_temp1 =  sinh_FR_spos * sinh_FR_Tjhi - sinh_FR_S_hi
+// sinh_FR_S_lo_temp2 = -sinh_FR_sneg * sinh_FR_Tmjlo + (sinh_FR_spos * sinh_FR_Tjhi - sinh_FR_S_hi)
+// sinh_FR_S_lo_temp2 = -sinh_FR_sneg * sinh_FR_Tmjlo + (sinh_FR_S_lo_temp1              )
 
 { .mfi
-      nop.m           0
-      fma.s1          fA21 = fXsq, fA2, fA1
-      nop.i           0
+         nop.m 999
+(p0)    fms.s1         sinh_FR_S_lo_temp1 =  sinh_FR_spos, sinh_FR_Tjhi,  sinh_FR_S_hi            
+         nop.i 999
 }
-;;
 
 { .mfi
-      nop.m           0
-      fma.s1          fA6543 = fX4, fA65, fA43
-      nop.i           0
+         nop.m 999
+(p0)    fma.s1         sinh_FR_C_hi       = sinh_FR_spos, sinh_FR_Tjhi, sinh_FR_C_hi_temp1    
+         nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m           0
-      fma.s1          fA654321 = fX4, fA6543, fA21
-      nop.i           0
+         nop.m 999
+(p0)    fnma.s1        sinh_FR_S_lo_temp2 = sinh_FR_sneg, sinh_FR_Tmjhi, sinh_FR_S_lo_temp1       
+         nop.i 999
 }
-;;
 
-// Dummy multiply to generate inexact
+// sinh_FR_S_lo_temp1 = sinh_FR_sneg * sinh_FR_Tmjlo
+// sinh_FR_S_lo_temp3 = sinh_FR_spos * sinh_FR_Tjlo - sinh_FR_S_lo_temp1
+// sinh_FR_S_lo_temp3 = sinh_FR_spos * sinh_FR_Tjlo -(sinh_FR_sneg * sinh_FR_Tmjlo)
+// sinh_FR_S_lo = sinh_FR_S_lo_temp3 + sinh_FR_S_lo_temp2
+
 { .mfi
-      nop.m           0
-      fmpy.s0         fTmp = fA6, fA6
-      nop.i           0
+         nop.m 999
+(p0)    fma.s1         sinh_FR_S_lo_temp1 =  sinh_FR_sneg, sinh_FR_Tmjlo, f0                  
+         nop.i 999 ;;
 }
-{ .mfb
-      nop.m           0
-      fma.d.s0        f8 = fA654321, fX3, fNormX
-      br.ret.sptk     b0                // Exit if 2^-60 < |x| < 0.25
+
+/////////// BUG FIX fma to fms -TK
+{ .mfi
+         nop.m 999
+(p0)    fms.s1         sinh_FR_S_lo_temp3 =  sinh_FR_spos, sinh_FR_Tjlo,  sinh_FR_S_lo_temp1  
+         nop.i 999 ;;
+}
+
+{ .mfi
+         nop.m 999
+(p0)    fma.s1         sinh_FR_S_lo       =  sinh_FR_S_lo_temp3, f1,   sinh_FR_S_lo_temp2     
+         nop.i 999 ;;
 }
-;;
 
-SINH_VERY_SMALL:
-// Here if 0 < |x| < 2^-60
-// Compute result by x + sgn(x)*x^2 to get properly rounded result
-.pred.rel "mutex",p7,p8
+// Y_hi = S_hi 
+// Y_lo = C_hi*p_odd + (S_hi*p_even + S_lo)
+// sinh_FR_Y_lo_temp = sinh_FR_S_hi * sinh_FR_peven + sinh_FR_S_lo
+// sinh_FR_Y_lo      = sinh_FR_C_hi * sinh_FR_podd + sinh_FR_Y_lo_temp
+
+{ .mfi
+         nop.m 999
+(p0)    fma.s1         sinh_FR_Y_lo_temp  = sinh_FR_S_hi, sinh_FR_peven, sinh_FR_S_lo           
+         nop.i 999 ;;
+}
+
+{ .mfi
+         nop.m 999
+(p0)    fma.s1         sinh_FR_Y_lo       =  sinh_FR_C_hi, sinh_FR_podd, sinh_FR_Y_lo_temp      
+         nop.i 999 ;;
+}
+
+// sinh_FR_SINH = Y_hi + Y_lo
+// f8 = answer = sinh_FR_SGNX * sinh_FR_SINH
+
+// Dummy multiply to generate inexact
 { .mfi
-      nop.m           0
-(p7)  fnma.d.s0       f8 = fNormX, fNormX, fNormX // If x<0 result ~ x-x^2
-      nop.i           0
+         nop.m 999
+(p0)     fmpy.s0      sinh_FR_tmp = sinh_FR_all_ones, sinh_FR_all_ones
+         nop.i 999
 }
+{ .mfi
+         nop.m 999
+(p0)    fma.s1         sinh_FR_SINH       =  sinh_FR_S_hi, f1, sinh_FR_Y_lo    
+         nop.i 999 ;;
+}
+
 { .mfb
-      nop.m           0
-(p8)  fma.d.s0        f8 = fNormX, fNormX, fNormX // If x>0 result ~ x+x^2
-      br.ret.sptk     b0                          // Exit if |x| < 2^-60
+         nop.m 999
+(p0)    fma.d.s0       f8 = sinh_FR_SGNX, sinh_FR_SINH,f0                      
+(p0)    br.ret.sptk     b0 ;;                          
 }
-;;
 
 
-SINH_POSSIBLE_OVERFLOW:
+L(SINH_BY_EXP): 
 
-// Here if fMAX_DBL_NORM_ARG < |x| < fMIN_DBL_OFLOW_ARG
-// This cannot happen if input is a double, only if input higher precision.
-// Overflow is a possibility, not a certainty.
+// When p7 is true,  we know that an overflow is not going to happen
+// When p7 is false, we must check for possible overflow
+// p7 is the over_SAFE flag
+// Y_hi = Tjhi
+// Y_lo = Tjhi * (p_odd + p_even) +Tjlo
+// Scale = sign * 2^(N-1)
+// sinh_FR_Y_lo =  sinh_FR_Tjhi * (sinh_FR_peven + sinh_FR_podd)
+// sinh_FR_Y_lo =  sinh_FR_Tjhi * (sinh_FR_Y_lo_temp      )
 
-// Recompute result using status field 2 with user's rounding mode,
-// and wre set.  If result is larger than largest double, then we have
-// overflow
+{ .mfi
+         nop.m 999
+(p0)   fma.s1            sinh_FR_Y_lo_temp =  sinh_FR_peven, f1, sinh_FR_podd                   
+         nop.i 999
+}
+
+// Now we are in EXP. This is the only path where an overflow is possible
+// but not for certain. So this is the only path where over_SAFE has any use.
+// r34 still has N-1
+// There is a danger of double-extended overflow   if N-1 > 16382 = 0x3ffe
+// There is a danger of double overflow            if N-1 > 0x3fe = 1022
+{ .mlx
+         nop.m 999
+(p0)   movl                r32          = 0x00000000000003fe ;;                       
+}
 
 { .mfi
-      mov             rGt_ln  = 0x103ff // Exponent for largest dbl + 1 ulp
-      fsetc.s2        0x7F,0x42         // Get user's round mode, set wre
-      nop.i           0
+(p0)   cmp.gt.unc          p0,p7        = r34, r32                                 
+(p0)   fmerge.s          sinh_FR_SCALE     = sinh_FR_SGNX, sinh_FR_spos                         
+         nop.i 999 ;;
 }
-;;
 
 { .mfi
-      setf.exp        fGt_pln = rGt_ln  // Create largest double + 1 ulp
-      fma.d.s2        fWre_urm_f8 = fS, fP, fS    // Result with wre set
-      nop.i           0
+         nop.m 999
+(p0)   fma.s1            sinh_FR_Y_lo      =  sinh_FR_Tjhi,  sinh_FR_Y_lo_temp, sinh_FR_Tjlo    
+         nop.i 999 ;;
 }
-;;
 
+// f8 = answer = scale * (Y_hi + Y_lo)
 { .mfi
-      nop.m           0
-      fsetc.s2        0x7F,0x40                   // Turn off wre in sf2
-      nop.i           0
+         nop.m 999
+(p0)   fma.s1            sinh_FR_SINH_temp = sinh_FR_Y_lo,  f1, sinh_FR_Tjhi       
+         nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m           0
-      fcmp.ge.s1      p6, p0 =  fWre_urm_f8, fGt_pln // Test for overflow
-      nop.i           0
+         nop.m 999
+(p0)   fma.d.s0          f44          = sinh_FR_SCALE,  sinh_FR_SINH_temp, f0      
+         nop.i 999 ;;
 }
-;;
 
-{ .mfb
-      nop.m           0
-      nop.f           0
-(p6)  br.cond.spnt    SINH_CERTAIN_OVERFLOW // Branch if overflow
+// Dummy multiply to generate inexact
+{ .mfi
+         nop.m 999
+(p7)     fmpy.s0      sinh_FR_tmp = sinh_FR_all_ones, sinh_FR_all_ones
+         nop.i 999 ;;
 }
-;;
 
+// If over_SAFE is set, return
 { .mfb
-      nop.m           0
-      fma.d.s0        f8 = fS, fP, fS
-      br.ret.sptk     b0                     // Exit if really no overflow
+       nop.m 999
+(p7)   fmerge.s            f8 = f44,f44                                            
+(p7)   br.ret.sptk     b0 ;;                          
 }
-;;
 
-SINH_CERTAIN_OVERFLOW:
+// Else see if we overflowed
+// S0 user supplied status
+// S2 user supplied status + WRE + TD  (Overflows)
+// If WRE is set then an overflow will not occur in EXP.
+// The input value that would cause a register (WRE) value to overflow is about 2^15
+// and this input would go into the HUGE path.
+// Answer with WRE is in f43.
+
 { .mfi
-      sub             rTmp = rExp_mask, r0, 1
-      fcmp.lt.s1      p6, p7 = fNormX, f0    // Test for x < 0
-      nop.i           0
+         nop.m 999
+(p0)   fsetc.s2            0x7F,0x42                                               
+         nop.i 999;;
+}
+
+{ .mfi
+         nop.m 999
+(p0)   fma.d.s2            f43  = sinh_FR_SCALE,  sinh_FR_SINH_temp, f0                      
+         nop.i 999 ;;
+}
+
+// 103FF => 103FF -FFFF = 400(true)
+// 400 + 3FF = 7FF, which is 1 more that the exponent of the largest
+// double (7FE). So 0 103FF 8000000000000000  is one ulp more than
+// largest double in register bias
+// Now  set p8 if the answer with WRE is greater than or equal this value
+// Also set p9 if the answer with WRE is less than or equal to negative this value
+
+{ .mlx
+         nop.m 999
+(p0)   movl                r32     = 0x000000000103FF ;;                              
 }
-;;
 
 { .mmf
-      alloc           r32=ar.pfs,1,4,4,0
-      setf.exp        fTmp = rTmp
-      fmerge.s        FR_X = f8,f8
+         nop.m 999
+(p0)   setf.exp            f41 = r32                                               
+(p0)   fsetc.s2            0x7F,0x40 ;;                                               
 }
-;;
 
 { .mfi
-      mov             GR_Parameter_TAG = 127
-(p6)  fnma.d.s0       FR_RESULT = fTmp, fTmp, f0    // Set I,O and -INF result
-      nop.i           0
+         nop.m 999
+(p0)   fcmp.ge.unc.s1 p8, p0 =  f43, f41                                           
+         nop.i 999
 }
+
+{ .mfi
+         nop.m 999
+(p0)   fmerge.ns           f42 = f41, f41                                          
+         nop.i 999 ;;
+}
+
+// The error tag for overflow is 127
+{ .mii
+         nop.m 999
+         nop.i 999 ;;
+(p8)   mov                 r47 = 127 ;;                                               
+}
+
 { .mfb
-      nop.m           0
-(p7)  fma.d.s0        FR_RESULT = fTmp, fTmp, f0    // Set I,O and +INF result
-      br.cond.sptk    __libm_error_region
+         nop.m 999
+(p0)   fcmp.le.unc.s1      p9, p0 =  f43, f42                                      
+(p8)   br.cond.spnt L(SINH_ERROR_SUPPORT) ;;
+}
+
+{ .mii
+         nop.m 999
+         nop.i 999 ;;
+(p9)   mov                 r47 = 127                                               
+}
+
+{ .mib
+         nop.m 999
+         nop.i 999
+(p9)   br.cond.spnt L(SINH_ERROR_SUPPORT) ;;
+}
+
+// Dummy multiply to generate inexact
+{ .mfi
+         nop.m 999
+(p0)     fmpy.s0      sinh_FR_tmp = sinh_FR_all_ones, sinh_FR_all_ones
+         nop.i 999 ;;
 }
-;;
 
-// Here if x unorm
-SINH_UNORM:
 { .mfb
-      getf.exp        rSignexp_x = fNormX    // Must recompute if x unorm
-      fcmp.eq.s0      p6, p0 = f8, f0        // Set D flag
-      br.cond.sptk    SINH_COMMON
+         nop.m 999
+(p0)   fmerge.s            f8 = f44,f44                                            
+(p0)   br.ret.sptk     b0 ;;                          
 }
-;;
 
-GLOBAL_IEEE754_END(sinh)
+L(SINH_HUGE): 
 
-LOCAL_LIBM_ENTRY(__libm_error_region)
+// for SINH_HUGE, put 24000 in exponent; take sign from input; add 1
+// SAFE: SAFE is always 0 for HUGE
+
+{ .mlx
+         nop.m 999
+(p0)   movl                r32 = 0x0000000000015dbf ;;                                
+}
+
+{ .mfi
+(p0)   setf.exp            f9  = r32                                               
+         nop.f 999
+         nop.i 999 ;;
+}
+
+{ .mfi
+         nop.m 999
+(p0)   fma.s1              sinh_FR_signed_hi_lo = sinh_FR_SGNX, f9, f1                       
+         nop.i 999 ;;
+}
+
+{ .mfi
+         nop.m 999
+(p0)   fma.d.s0            f44 = sinh_FR_signed_hi_lo,  f9, f0                          
+(p0)   mov                 r47 = 127                                               
+}
+.endp sinh
+ASM_SIZE_DIRECTIVE(sinh)
+#ifdef _LIBC
+ASM_SIZE_DIRECTIVE(__ieee754_sinh)
+#endif
+
+// Stack operations when calling error support.
+//       (1)               (2)                          (3) (call)              (4)
+//   sp   -> +          psp -> +                     psp -> +                   sp -> +
+//           |                 |                            |                         |
+//           |                 | <- GR_Y               R3 ->| <- GR_RESULT            | -> f8
+//           |                 |                            |                         |
+//           | <-GR_Y      Y2->|                       Y2 ->| <- GR_Y                 |
+//           |                 |                            |                         |
+//           |                 | <- GR_X               X1 ->|                         |
+//           |                 |                            |                         |
+//  sp-64 -> +          sp ->  +                     sp ->  +                         +
+//    save ar.pfs          save b0                                               restore gp
+//    save gp                                                                    restore ar.pfs
+
+.proc __libm_error_region
+__libm_error_region:
+L(SINH_ERROR_SUPPORT):
 .prologue
+
+// (1)
 { .mfi
         add   GR_Parameter_Y=-32,sp             // Parameter 2 value
         nop.f 0
@@ -860,32 +1271,39 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
 }
 { .mfi
 .fframe 64
-        add sp=-64,sp                           // Create new stack
+        add sp=-64,sp                          // Create new stack
         nop.f 0
-        mov GR_SAVE_GP=gp                       // Save gp
+        mov GR_SAVE_GP=gp                      // Save gp
 };;
+
+
+// (2)
 { .mmi
-        stfd [GR_Parameter_Y] = FR_Y,16         // STORE Parameter 2 on stack
-        add GR_Parameter_X = 16,sp              // Parameter 1 address
+        stfd [GR_Parameter_Y] = f0,16         // STORE Parameter 2 on stack
+        add GR_Parameter_X = 16,sp            // Parameter 1 address
 .save   b0, GR_SAVE_B0
-        mov GR_SAVE_B0=b0                       // Save b0
+        mov GR_SAVE_B0=b0                     // Save b0
 };;
+
 .body
+// (3)
 { .mib
-        stfd [GR_Parameter_X] = FR_X            // STORE Parameter 1 on stack
-        add   GR_Parameter_RESULT = 0,GR_Parameter_Y  // Parameter 3 address
-        nop.b 0
+        stfd [GR_Parameter_X] = f8                     // STORE Parameter 1 on stack
+        add   GR_Parameter_RESULT = 0,GR_Parameter_Y   // Parameter 3 address
+        nop.b 0                            
 }
 { .mib
-        stfd [GR_Parameter_Y] = FR_RESULT       // STORE Parameter 3 on stack
+        stfd [GR_Parameter_Y] = f44                    // STORE Parameter 3 on stack
         add   GR_Parameter_Y = -16,GR_Parameter_Y
-        br.call.sptk b0=__libm_error_support#   // Call error handling function
+        br.call.sptk b0=__libm_error_support#          // Call error handling function
 };;
 { .mmi
-        add   GR_Parameter_RESULT = 48,sp
         nop.m 0
-        nop.i 0
+        nop.m 0
+        add   GR_Parameter_RESULT = 48,sp
 };;
+
+// (4)
 { .mmi
         ldfd  f8 = [GR_Parameter_RESULT]       // Get return result off stack
 .restore sp
@@ -898,6 +1316,8 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
         br.ret.sptk     b0                     // Return
 };;
 
-LOCAL_LIBM_END(__libm_error_region)
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
 .type   __libm_error_support#,@function
 .global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_sinhf.S b/sysdeps/ia64/fpu/e_sinhf.S
index 4a407b7f3c..d5aa2dca16 100644
--- a/sysdeps/ia64/fpu/e_sinhf.S
+++ b/sysdeps/ia64/fpu/e_sinhf.S
@@ -1,10 +1,10 @@
 .file "sinhf.s"
 
-
-// Copyright (c) 2000 - 2002, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,727 +20,1305 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
 // Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
-
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
+//
 // History
-//*********************************************************************
-// 02/02/00 Initial version
-// 04/04/00 Unwind support added
-// 08/15/00 Bundle added after call to __libm_error_support to properly
+//==============================================================
+// 2/02/00  Initial version
+// 4/04/00  Unwind support added
+// 8/15/00  Bundle added after call to __libm_error_support to properly
 //          set [the previously overwritten] GR_Parameter_RESULT.
 // 10/12/00 Update to set denormal operand and underflow flags
-// 01/22/01 Fixed to set inexact flag for small args.
-// 05/02/01 Reworked to improve speed of all paths
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 11/20/02 Improved algorithm based on expf
+// 1/22/01  Fixed to set inexact flag for small args.
 //
 // API
-//*********************************************************************
-// float sinhf(float)
+//==============================================================
+// float = sinhf(float)
+// input  floating point f8
+// output floating point f8
+//
+// Registers used
+//==============================================================
+// general registers: 
+// r32 -> r47
+// predicate registers used:
+// p6 p7 p8 p9
+// floating-point registers used:
+// f9 -> f15; f32 -> f45; 
+// f8 has input, then output
 //
 // Overview of operation
-//*********************************************************************
-// Case 1:  0 < |x| < 2^-60
-//  Result = x, computed by x+sgn(x)*x^2) to handle flags and rounding
+//==============================================================
+// There are four paths
+// 1. |x| < 0.25        SINH_BY_POLY
+// 2. |x| < 32          SINH_BY_TBL
+// 3. |x| < 2^14        SINH_BY_EXP
+// 4. |x_ >= 2^14       SINH_HUGE
 //
-// Case 2:  2^-60 < |x| < 0.25
-//  Evaluate sinh(x) by a 9th order polynomial
-//  Care is take for the order of multiplication; and A2 is not exactly 1/5!,
-//  A3 is not exactly 1/7!, etc.
-//  sinh(x) = x + (A1*x^3 + A2*x^5 + A3*x^7 + A4*x^9)
+// For double extended we get infinity for x >= 400c b174 ddc0 31ae c0ea
+//                                           >= 1.0110001.... x 2^13
+//                                           >= 11357.2166
 //
-// Case 3:  0.25 < |x| < 89.41598
-//  Algorithm is based on the identity sinh(x) = ( exp(x) - exp(-x) ) / 2.
-//  The algorithm for exp is described as below.  There are a number of
-//  economies from evaluating both exp(x) and exp(-x).  Although we
-//  are evaluating both quantities, only where the quantities diverge do we
-//  duplicate the computations.  The basic algorithm for exp(x) is described
-//  below.
+// But for double we get infinity for x >= 408633ce8fb9f87e
+//                                      >= 1.0110...x 2^9
+//                                      >= +7.10476e+002
 //
-// Take the input x. w is "how many log2/128 in x?"
-//  w = x * 64/log2
-//  NJ = int(w)
-//  x = NJ*log2/64 + R
-
-//  NJ = 64*n + j
-//  x = n*log2 + (log2/64)*j + R
+// And for single we get infinity for x >= 42b3a496
+//                                      >= 1.0110... 2^6
+//                                      >= 89.8215
 //
-//  So, exp(x) = 2^n * 2^(j/64)* exp(R)
+// SAFE: If there is danger of overflow set SAFE to 0
+//       NOT implemented: if there is danger of underflow, set SAFE to 0
+// SAFE for all paths listed below
 //
-//  T =  2^n * 2^(j/64)
-//       Construct 2^n
-//       Get 2^(j/64) table
-//           actually all the entries of 2^(j/64) table are stored in DP and
-//           with exponent bits set to 0 -> multiplication on 2^n can be
-//           performed by doing logical "or" operation with bits presenting 2^n
-
-//  exp(R) = 1 + (exp(R) - 1)
-//  P = exp(R) - 1 approximated by Taylor series of 3rd degree
-//      P = A3*R^3 + A2*R^2 + R, A3 = 1/6, A2 = 1/2
+// 1. SINH_BY_POLY
+// ===============
+// If |x| is less than the tiny threshold, then clear SAFE 
+// For double, the tiny threshold is -1022 = -0x3fe => -3fe + ffff = fc01
+//             register-biased, this is fc01
+// For single, the tiny threshold is -126  = -7e    => -7e  + ffff = ff81
+// If |x| < tiny threshold, set SAFE = 0
+//
+// 2. SINH_BY_TBL
+// =============
+// SAFE: SAFE is always 1 for TBL; 
+//
+// 3. SINH_BY_EXP
+// ==============
+// There is a danger of double-extended overflow   if N-1 > 16382 = 0x3ffe
+// r34 has N-1; 16382 is in register biased form, 0x13ffd
+// There is danger of double overflow if N-1 > 0x3fe
+//                       in register biased form, 0x103fd
+// Analagously, there is danger of single overflow if N-1 > 0x7e
+//                       in register biased form, 0x1007d
+// SAFE: If there is danger of overflow set SAFE to 0
+//
+// 4. SINH_HUGE
+// ============
+// SAFE: SAFE is always 0 for HUGE
 //
 
-//  The final result is reconstructed as follows
-//  exp(x) = T + T*P
+#include "libm_support.h"
 
-// Special values
-//*********************************************************************
-// sinhf(+0)    = +0
-// sinhf(-0)    = -0
+// Assembly macros
+//==============================================================
+sinh_FR_X            = f44
+sinh_FR_X2           = f9
+sinh_FR_X4           = f10
+sinh_FR_SGNX         = f40
+sinh_FR_all_ones     = f45
+sinh_FR_tmp          = f42
 
-// sinhf(+qnan) = +qnan
-// sinhf(-qnan) = -qnan
-// sinhf(+snan) = +qnan
-// sinhf(-snan) = -qnan
+sinh_FR_Inv_log2by64 = f9
+sinh_FR_log2by64_lo  = f11
+sinh_FR_log2by64_hi  = f10
 
-// sinhf(-inf)  = -inf
-// sinhf(+inf)  = +inf
+sinh_FR_A1           = f9
+sinh_FR_A2           = f10
+sinh_FR_A3           = f11
 
-// Overflow and Underflow
-//*********************************************************************
-// sinhf(x) = largest single normal when
-//     x = 89.41598 = 0x42b2d4fc
-//
-// Underflow is handled as described in case 1 above
+sinh_FR_Rcub         = f12
+sinh_FR_M_temp       = f13
+sinh_FR_R_temp       = f13
+sinh_FR_Rsq          = f13
+sinh_FR_R            = f14
 
-// Registers used
-//*********************************************************************
-// Floating Point registers used:
-// f8 input, output
-// f6,f7, f9 -> f15,  f32 -> f45
+sinh_FR_M            = f38
 
-// General registers used:
-// r2, r3, r16 -> r38
+sinh_FR_B1           = f15
+sinh_FR_B2           = f32
+sinh_FR_B3           = f33
 
-// Predicate registers used:
-// p6 -> p15
+sinh_FR_peven_temp1  = f34
+sinh_FR_peven_temp2  = f35
+sinh_FR_peven        = f36
 
-// Assembly macros
-//*********************************************************************
-// integer registers used
-// scratch
-rNJ                   = r2
-rNJ_neg               = r3
-
-rJ_neg                = r16
-rN_neg                = r17
-rSignexp_x            = r18
-rExp_x                = r18
-rExp_mask             = r19
-rExp_bias             = r20
-rAd1                  = r21
-rAd2                  = r22
-rJ                    = r23
-rN                    = r24
-rTblAddr              = r25
-rA3                   = r26
-rExpHalf              = r27
-rLn2Div64             = r28
-rGt_ln                = r29
-r17ones_m1            = r29
-rRightShifter         = r30
-rJ_mask               = r30
-r64DivLn2             = r31
-rN_mask               = r31
-// stacked
-GR_SAVE_PFS           = r32
-GR_SAVE_B0            = r33
-GR_SAVE_GP            = r34
-GR_Parameter_X        = r35
-GR_Parameter_Y        = r36
-GR_Parameter_RESULT   = r37
-GR_Parameter_TAG      = r38
-
-// floating point registers used
-FR_X                  = f10
-FR_Y                  = f1
-FR_RESULT             = f8
-// scratch
-fRightShifter         = f6
-f64DivLn2             = f7
-fNormX                = f9
-fNint                 = f10
-fN                    = f11
-fR                    = f12
-fLn2Div64             = f13
-fA2                   = f14
-fA3                   = f15
-// stacked
-fP                    = f32
-fT                    = f33
-fMIN_SGL_OFLOW_ARG    = f34
-fMAX_SGL_NORM_ARG     = f35
-fRSqr                 = f36
-fA1                   = f37
-fA21                  = f37
-fA4                   = f38
-fA43                  = f38
-fA4321                = f38
-fX4                   = f39
-fTmp                  = f39
-fGt_pln               = f39
-fWre_urm_f8           = f40
-fXsq                  = f40
-fP_neg                = f41
-fX3                   = f41
-fT_neg                = f42
-fExp                  = f43
-fExp_neg              = f44
-fAbsX                 = f45
-
-
-RODATA
-.align 16
+sinh_FR_podd_temp1   = f34
+sinh_FR_podd_temp2   = f35
+sinh_FR_podd         = f37
 
-LOCAL_OBJECT_START(_sinhf_table)
-data4 0x42b2d4fd         // Smallest single arg to overflow single result
-data4 0x42b2d4fc         // Largest single arg to give normal single result
-data4 0x00000000         // pad
-data4 0x00000000         // pad
-//
-// 2^(j/64) table, j goes from 0 to 63
-data8 0x0000000000000000 // 2^(0/64)
-data8 0x00002C9A3E778061 // 2^(1/64)
-data8 0x000059B0D3158574 // 2^(2/64)
-data8 0x0000874518759BC8 // 2^(3/64)
-data8 0x0000B5586CF9890F // 2^(4/64)
-data8 0x0000E3EC32D3D1A2 // 2^(5/64)
-data8 0x00011301D0125B51 // 2^(6/64)
-data8 0x0001429AAEA92DE0 // 2^(7/64)
-data8 0x000172B83C7D517B // 2^(8/64)
-data8 0x0001A35BEB6FCB75 // 2^(9/64)
-data8 0x0001D4873168B9AA // 2^(10/64)
-data8 0x0002063B88628CD6 // 2^(11/64)
-data8 0x0002387A6E756238 // 2^(12/64)
-data8 0x00026B4565E27CDD // 2^(13/64)
-data8 0x00029E9DF51FDEE1 // 2^(14/64)
-data8 0x0002D285A6E4030B // 2^(15/64)
-data8 0x000306FE0A31B715 // 2^(16/64)
-data8 0x00033C08B26416FF // 2^(17/64)
-data8 0x000371A7373AA9CB // 2^(18/64)
-data8 0x0003A7DB34E59FF7 // 2^(19/64)
-data8 0x0003DEA64C123422 // 2^(20/64)
-data8 0x0004160A21F72E2A // 2^(21/64)
-data8 0x00044E086061892D // 2^(22/64)
-data8 0x000486A2B5C13CD0 // 2^(23/64)
-data8 0x0004BFDAD5362A27 // 2^(24/64)
-data8 0x0004F9B2769D2CA7 // 2^(25/64)
-data8 0x0005342B569D4F82 // 2^(26/64)
-data8 0x00056F4736B527DA // 2^(27/64)
-data8 0x0005AB07DD485429 // 2^(28/64)
-data8 0x0005E76F15AD2148 // 2^(29/64)
-data8 0x0006247EB03A5585 // 2^(30/64)
-data8 0x0006623882552225 // 2^(31/64)
-data8 0x0006A09E667F3BCD // 2^(32/64)
-data8 0x0006DFB23C651A2F // 2^(33/64)
-data8 0x00071F75E8EC5F74 // 2^(34/64)
-data8 0x00075FEB564267C9 // 2^(35/64)
-data8 0x0007A11473EB0187 // 2^(36/64)
-data8 0x0007E2F336CF4E62 // 2^(37/64)
-data8 0x00082589994CCE13 // 2^(38/64)
-data8 0x000868D99B4492ED // 2^(39/64)
-data8 0x0008ACE5422AA0DB // 2^(40/64)
-data8 0x0008F1AE99157736 // 2^(41/64)
-data8 0x00093737B0CDC5E5 // 2^(42/64)
-data8 0x00097D829FDE4E50 // 2^(43/64)
-data8 0x0009C49182A3F090 // 2^(44/64)
-data8 0x000A0C667B5DE565 // 2^(45/64)
-data8 0x000A5503B23E255D // 2^(46/64)
-data8 0x000A9E6B5579FDBF // 2^(47/64)
-data8 0x000AE89F995AD3AD // 2^(48/64)
-data8 0x000B33A2B84F15FB // 2^(49/64)
-data8 0x000B7F76F2FB5E47 // 2^(50/64)
-data8 0x000BCC1E904BC1D2 // 2^(51/64)
-data8 0x000C199BDD85529C // 2^(52/64)
-data8 0x000C67F12E57D14B // 2^(53/64)
-data8 0x000CB720DCEF9069 // 2^(54/64)
-data8 0x000D072D4A07897C // 2^(55/64)
-data8 0x000D5818DCFBA487 // 2^(56/64)
-data8 0x000DA9E603DB3285 // 2^(57/64)
-data8 0x000DFC97337B9B5F // 2^(58/64)
-data8 0x000E502EE78B3FF6 // 2^(59/64)
-data8 0x000EA4AFA2A490DA // 2^(60/64)
-data8 0x000EFA1BEE615A27 // 2^(61/64)
-data8 0x000F50765B6E4540 // 2^(62/64)
-data8 0x000FA7C1819E90D8 // 2^(63/64)
-LOCAL_OBJECT_END(_sinhf_table)
-
-LOCAL_OBJECT_START(sinh_p_table)
-data8 0x3ec749d84bc96d7d // A4
-data8 0x3f2a0168d09557cf // A3
-data8 0x3f811111326ed15a // A2
-data8 0x3fc55555552ed1e2 // A1
-LOCAL_OBJECT_END(sinh_p_table)
+sinh_FR_poly_podd_temp1    =  f11 
+sinh_FR_poly_podd_temp2    =  f13
+sinh_FR_poly_peven_temp1   =  f11
+sinh_FR_poly_peven_temp2   =  f13
+
+sinh_FR_J_temp       = f9
+sinh_FR_J            = f10
 
+sinh_FR_Mmj          = f39
+
+sinh_FR_N_temp1      = f11
+sinh_FR_N_temp2      = f12
+sinh_FR_N            = f13
+
+sinh_FR_spos         = f14
+sinh_FR_sneg         = f15
+
+sinh_FR_Tjhi         = f32
+sinh_FR_Tjlo         = f33
+sinh_FR_Tmjhi        = f34
+sinh_FR_Tmjlo        = f35
+
+sinh_GR_mJ           = r35
+sinh_GR_J            = r36
+
+sinh_AD_mJ           = r38
+sinh_AD_J            = r39
+sinh_GR_all_ones     = r40
+
+sinh_FR_S_hi         = f9
+sinh_FR_S_hi_temp    = f10
+sinh_FR_S_lo_temp1   = f11 
+sinh_FR_S_lo_temp2   = f12 
+sinh_FR_S_lo_temp3   = f13 
+
+sinh_FR_S_lo         = f38
+sinh_FR_C_hi         = f39
+
+sinh_FR_C_hi_temp1   = f10
+sinh_FR_Y_hi         = f11 
+sinh_FR_Y_lo_temp    = f12 
+sinh_FR_Y_lo         = f13 
+sinh_FR_SINH         = f9
+
+sinh_FR_P1           = f14
+sinh_FR_P2           = f15
+sinh_FR_P3           = f32
+sinh_FR_P4           = f33
+sinh_FR_P5           = f34
+sinh_FR_P6           = f35
+
+sinh_FR_TINY_THRESH  = f9
+
+sinh_FR_SINH_temp    = f10
+sinh_FR_SCALE        = f11 
+
+sinh_FR_signed_hi_lo = f10
+
+
+GR_SAVE_PFS          = r41
+GR_SAVE_B0           = r42
+GR_SAVE_GP           = r43
+
+GR_Parameter_X       = r44
+GR_Parameter_Y       = r45
+GR_Parameter_RESULT  = r46
+
+// Data tables
+//==============================================================
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
+.align 16
+double_sinh_arg_reduction:
+ASM_TYPE_DIRECTIVE(double_sinh_arg_reduction,@object)
+   data8 0xB8AA3B295C17F0BC, 0x00004005
+   data8 0xB17217F7D1000000, 0x00003FF8
+   data8 0xCF79ABC9E3B39804, 0x00003FD0
+ASM_SIZE_DIRECTIVE(double_sinh_arg_reduction)
+
+double_sinh_p_table:
+ASM_TYPE_DIRECTIVE(double_sinh_p_table,@object)
+   data8 0xAAAAAAAAAAAAAAAB, 0x00003FFC
+   data8 0x8888888888888412, 0x00003FF8
+   data8 0xD00D00D00D4D39F2, 0x00003FF2
+   data8 0xB8EF1D28926D8891, 0x00003FEC
+   data8 0xD732377688025BE9, 0x00003FE5
+   data8 0xB08AF9AE78C1239F, 0x00003FDE
+ASM_SIZE_DIRECTIVE(double_sinh_p_table)
+
+double_sinh_ab_table:
+ASM_TYPE_DIRECTIVE(double_sinh_ab_table,@object)
+   data8 0xAAAAAAAAAAAAAAAC, 0x00003FFC
+   data8 0x88888888884ECDD5, 0x00003FF8
+   data8 0xD00D0C6DCC26A86B, 0x00003FF2
+   data8 0x8000000000000002, 0x00003FFE
+   data8 0xAAAAAAAAAA402C77, 0x00003FFA
+   data8 0xB60B6CC96BDB144D, 0x00003FF5
+ASM_SIZE_DIRECTIVE(double_sinh_ab_table)
+
+double_sinh_j_table:
+ASM_TYPE_DIRECTIVE(double_sinh_j_table,@object)
+   data8 0xB504F333F9DE6484, 0x00003FFE, 0x1EB2FB13, 0x00000000
+   data8 0xB6FD91E328D17791, 0x00003FFE, 0x1CE2CBE2, 0x00000000
+   data8 0xB8FBAF4762FB9EE9, 0x00003FFE, 0x1DDC3CBC, 0x00000000
+   data8 0xBAFF5AB2133E45FB, 0x00003FFE, 0x1EE9AA34, 0x00000000
+   data8 0xBD08A39F580C36BF, 0x00003FFE, 0x9EAEFDC1, 0x00000000
+   data8 0xBF1799B67A731083, 0x00003FFE, 0x9DBF517B, 0x00000000
+   data8 0xC12C4CCA66709456, 0x00003FFE, 0x1EF88AFB, 0x00000000
+   data8 0xC346CCDA24976407, 0x00003FFE, 0x1E03B216, 0x00000000
+   data8 0xC5672A115506DADD, 0x00003FFE, 0x1E78AB43, 0x00000000
+   data8 0xC78D74C8ABB9B15D, 0x00003FFE, 0x9E7B1747, 0x00000000
+   data8 0xC9B9BD866E2F27A3, 0x00003FFE, 0x9EFE3C0E, 0x00000000
+   data8 0xCBEC14FEF2727C5D, 0x00003FFE, 0x9D36F837, 0x00000000
+   data8 0xCE248C151F8480E4, 0x00003FFE, 0x9DEE53E4, 0x00000000
+   data8 0xD06333DAEF2B2595, 0x00003FFE, 0x9E24AE8E, 0x00000000
+   data8 0xD2A81D91F12AE45A, 0x00003FFE, 0x1D912473, 0x00000000
+   data8 0xD4F35AABCFEDFA1F, 0x00003FFE, 0x1EB243BE, 0x00000000
+   data8 0xD744FCCAD69D6AF4, 0x00003FFE, 0x1E669A2F, 0x00000000
+   data8 0xD99D15C278AFD7B6, 0x00003FFE, 0x9BBC610A, 0x00000000
+   data8 0xDBFBB797DAF23755, 0x00003FFE, 0x1E761035, 0x00000000
+   data8 0xDE60F4825E0E9124, 0x00003FFE, 0x9E0BE175, 0x00000000
+   data8 0xE0CCDEEC2A94E111, 0x00003FFE, 0x1CCB12A1, 0x00000000
+   data8 0xE33F8972BE8A5A51, 0x00003FFE, 0x1D1BFE90, 0x00000000
+   data8 0xE5B906E77C8348A8, 0x00003FFE, 0x1DF2F47A, 0x00000000
+   data8 0xE8396A503C4BDC68, 0x00003FFE, 0x1EF22F22, 0x00000000
+   data8 0xEAC0C6E7DD24392F, 0x00003FFE, 0x9E3F4A29, 0x00000000
+   data8 0xED4F301ED9942B84, 0x00003FFE, 0x1EC01A5B, 0x00000000
+   data8 0xEFE4B99BDCDAF5CB, 0x00003FFE, 0x1E8CAC3A, 0x00000000
+   data8 0xF281773C59FFB13A, 0x00003FFE, 0x9DBB3FAB, 0x00000000
+   data8 0xF5257D152486CC2C, 0x00003FFE, 0x1EF73A19, 0x00000000
+   data8 0xF7D0DF730AD13BB9, 0x00003FFE, 0x9BB795B5, 0x00000000
+   data8 0xFA83B2DB722A033A, 0x00003FFE, 0x1EF84B76, 0x00000000
+   data8 0xFD3E0C0CF486C175, 0x00003FFE, 0x9EF5818B, 0x00000000
+   data8 0x8000000000000000, 0x00003FFF, 0x00000000, 0x00000000
+   data8 0x8164D1F3BC030773, 0x00003FFF, 0x1F77CACA, 0x00000000
+   data8 0x82CD8698AC2BA1D7, 0x00003FFF, 0x1EF8A91D, 0x00000000
+   data8 0x843A28C3ACDE4046, 0x00003FFF, 0x1E57C976, 0x00000000
+   data8 0x85AAC367CC487B15, 0x00003FFF, 0x9EE8DA92, 0x00000000
+   data8 0x871F61969E8D1010, 0x00003FFF, 0x1EE85C9F, 0x00000000
+   data8 0x88980E8092DA8527, 0x00003FFF, 0x1F3BF1AF, 0x00000000
+   data8 0x8A14D575496EFD9A, 0x00003FFF, 0x1D80CA1E, 0x00000000
+   data8 0x8B95C1E3EA8BD6E7, 0x00003FFF, 0x9D0373AF, 0x00000000
+   data8 0x8D1ADF5B7E5BA9E6, 0x00003FFF, 0x9F167097, 0x00000000
+   data8 0x8EA4398B45CD53C0, 0x00003FFF, 0x1EB70051, 0x00000000
+   data8 0x9031DC431466B1DC, 0x00003FFF, 0x1F6EB029, 0x00000000
+   data8 0x91C3D373AB11C336, 0x00003FFF, 0x1DFD6D8E, 0x00000000
+   data8 0x935A2B2F13E6E92C, 0x00003FFF, 0x9EB319B0, 0x00000000
+   data8 0x94F4EFA8FEF70961, 0x00003FFF, 0x1EBA2BEB, 0x00000000
+   data8 0x96942D3720185A00, 0x00003FFF, 0x1F11D537, 0x00000000
+   data8 0x9837F0518DB8A96F, 0x00003FFF, 0x1F0D5A46, 0x00000000
+   data8 0x99E0459320B7FA65, 0x00003FFF, 0x9E5E7BCA, 0x00000000
+   data8 0x9B8D39B9D54E5539, 0x00003FFF, 0x9F3AAFD1, 0x00000000
+   data8 0x9D3ED9A72CFFB751, 0x00003FFF, 0x9E86DACC, 0x00000000
+   data8 0x9EF5326091A111AE, 0x00003FFF, 0x9F3EDDC2, 0x00000000
+   data8 0xA0B0510FB9714FC2, 0x00003FFF, 0x1E496E3D, 0x00000000
+   data8 0xA27043030C496819, 0x00003FFF, 0x9F490BF6, 0x00000000
+   data8 0xA43515AE09E6809E, 0x00003FFF, 0x1DD1DB48, 0x00000000
+   data8 0xA5FED6A9B15138EA, 0x00003FFF, 0x1E65EBFB, 0x00000000
+   data8 0xA7CD93B4E965356A, 0x00003FFF, 0x9F427496, 0x00000000
+   data8 0xA9A15AB4EA7C0EF8, 0x00003FFF, 0x1F283C4A, 0x00000000
+   data8 0xAB7A39B5A93ED337, 0x00003FFF, 0x1F4B0047, 0x00000000
+   data8 0xAD583EEA42A14AC6, 0x00003FFF, 0x1F130152, 0x00000000
+   data8 0xAF3B78AD690A4375, 0x00003FFF, 0x9E8367C0, 0x00000000
+   data8 0xB123F581D2AC2590, 0x00003FFF, 0x9F705F90, 0x00000000
+   data8 0xB311C412A9112489, 0x00003FFF, 0x1EFB3C53, 0x00000000
+   data8 0xB504F333F9DE6484, 0x00003FFF, 0x1F32FB13, 0x00000000
+ASM_SIZE_DIRECTIVE(double_sinh_j_table)
+
+.align 32
+.global sinhf#
 
 .section .text
-GLOBAL_IEEE754_ENTRY(sinhf)
+.proc  sinhf#
+.align 32
 
-{ .mlx
-      getf.exp        rSignexp_x = f8  // Must recompute if x unorm
-      movl            r64DivLn2 = 0x40571547652B82FE // 64/ln(2)
+sinhf: 
+#ifdef _LIBC
+.global __ieee754_sinhf
+.type __ieee754_sinhf,@function
+__ieee754_sinhf:
+#endif
+
+// X infinity or NAN?
+// Take invalid fault if enabled
+
+
+{ .mfi
+      alloc r32 = ar.pfs,0,12,4,0                  
+(p0)     fclass.m.unc  p6,p0 = f8, 0xe3	//@qnan | @snan | @inf 
+         mov sinh_GR_all_ones = -1
+}
+;;
+
+
+{ .mfb
+         nop.m 999
+(p6)     fma.s.s0   f8 = f8,f1,f8               
+(p6)     br.ret.spnt     b0 ;;                          
 }
+
+// Put 0.25 in f9; p6 true if x < 0.25
+// Make constant that will generate inexact when squared
 { .mlx
-      addl            rTblAddr = @ltoff(_sinhf_table),gp
-      movl            rRightShifter = 0x43E8000000000000 // DP Right Shifter
+         setf.sig sinh_FR_all_ones = sinh_GR_all_ones 
+(p0)     movl            r32 = 0x000000000000fffd ;;         
 }
-;;
 
 { .mfi
-      // point to the beginning of the table
-      ld8             rTblAddr = [rTblAddr]
-      fclass.m        p6, p0 = f8, 0x0b   // Test for x=unorm
-      addl            rA3 = 0x3E2AA, r0   // high bits of 1.0/6.0 rounded to SP
+(p0)     setf.exp        f9 = r32                         
+(p0)     fclass.m.unc  p7,p0 = f8, 0x07	//@zero
+         nop.i 999 ;;
 }
-{ .mfi
-      nop.m           0
-      fnorm.s1        fNormX = f8 // normalized x
-      addl            rExpHalf = 0xFFFE, r0 // exponent of 1/2
+
+{ .mfb
+         nop.m 999
+(p0)     fmerge.s      sinh_FR_X    = f0,f8             
+(p7)     br.ret.spnt     b0 ;;                          
 }
-;;
 
+// Identify denormal operands.
+{ .mfi
+         nop.m 999
+         fclass.m.unc  p10,p0 = f8, 0x09        //  + denorm
+         nop.i 999
+};;
 { .mfi
-      setf.d          f64DivLn2 = r64DivLn2 // load 64/ln(2) to FP reg
-      fclass.m        p15, p0 = f8, 0x1e3   // test for NaT,NaN,Inf
-      nop.i           0
+         nop.m 999
+         fclass.m.unc  p11,p0 = f8, 0x0a        //  - denorm
+         nop.i 999 
 }
-{ .mlx
-      // load Right Shifter to FP reg
-      setf.d          fRightShifter = rRightShifter
-      movl            rLn2Div64 = 0x3F862E42FEFA39EF // DP ln(2)/64 in GR
+
+{ .mfi
+         nop.m 999
+(p0)     fmerge.s      sinh_FR_SGNX = f8,f1             
+         nop.i 999 ;;
 }
-;;
 
 { .mfi
-      mov             rExp_mask = 0x1ffff
-      fcmp.eq.s1      p13, p0 = f0, f8 // test for x = 0.0
-      shl             rA3 = rA3, 12    // 0x3E2AA000, approx to 1.0/6.0 in SP
+         nop.m 999
+(p0)     fcmp.lt.unc.s1  p0,p7 = sinh_FR_X,f9             
+         nop.i 999 ;;
+}
+
+{ .mib
+         nop.m 999
+         nop.i 999
+(p7)     br.cond.sptk    L(SINH_BY_TBL) ;;                      
 }
+
+
+L(SINH_BY_POLY): 
+
+// POLY cannot overflow so there is no need to call __libm_error_support
+// Set tiny_SAFE (p7) to 1(0) if answer is not tiny 
+// Currently we do not use tiny_SAFE. So the setting of tiny_SAFE is
+// commented out.
+//(p0)     movl            r32            = 0x000000000000fc01           
+//(p0)     setf.exp        f10            = r32                         
+//(p0)     fcmp.lt.unc.s1  p6,p7          = f8,f10                     
+// Here is essentially the algorithm for SINH_BY_POLY. Care is take for the order 
+// of multiplication; and P_1 is not exactly 1/3!, P_2 is not exactly 1/5!, etc.
+// Note that ax = |x|
+// sinh(x) = sign * (series(e^x) - series(e^-x))/2
+//         = sign * (ax + ax^3/3! + ax^5/5! + ax^7/7! + ax^9/9! + ax^11/11! + ax^13/13!)
+//         = sign * (ax   + ax * ( ax^2 * (1/3! + ax^4 * (1/7! + ax^4*1/11!)) )
+//                        + ax * ( ax^4 * (1/5! + ax^4 * (1/9! + ax^4*1/13!)) ) )
+//         = sign * (ax   + ax*p_odd + (ax*p_even))
+//         = sign * (ax   + Y_lo)
+// sinh(x) = sign * (Y_hi + Y_lo)
+// Get the values of P_x from the table
 { .mfb
-      nop.m           0
-      nop.f           0
-(p6)  br.cond.spnt    SINH_UNORM            // Branch if x=unorm
+(p0)  addl           r34   = @ltoff(double_sinh_p_table), gp
+(p10) fma.s.s0       f8 =  f8,f8,f8
+(p10) br.ret.spnt    b0
 }
 ;;
 
-SINH_COMMON:
-{ .mfi
-      setf.exp        fA2 = rExpHalf        // load A2 to FP reg
-      nop.f           0
-      mov             rExp_bias = 0xffff
-}
 { .mfb
-      setf.d          fLn2Div64 = rLn2Div64 // load ln(2)/64 to FP reg
-(p15) fma.s.s0        f8 = f8, f1, f0       // result if x = NaT,NaN,Inf
-(p15) br.ret.spnt     b0                    // exit here if x = NaT,NaN,Inf
+      ld8 r34 = [r34]
+(p11) fnma.s.s0      f8 =  f8,f8,f8
+(p11) br.ret.spnt    b0
 }
 ;;
 
+// Calculate sinh_FR_X2 = ax*ax and sinh_FR_X4 = ax*ax*ax*ax
+{ .mmf
+         nop.m 999
+(p0)     ldfe            sinh_FR_P1 = [r34],16                 
+(p0)     fma.s1        sinh_FR_X2 = sinh_FR_X, sinh_FR_X, f0 ;;           
+}
+
+{ .mmi
+(p0)     ldfe            sinh_FR_P2 = [r34],16 ;;                 
+(p0)     ldfe            sinh_FR_P3 = [r34],16                 
+         nop.i 999 ;;
+}
+
+{ .mmi
+(p0)     ldfe            sinh_FR_P4 = [r34],16 ;;                 
+(p0)     ldfe            sinh_FR_P5 = [r34],16                 
+         nop.i 999 ;;
+}
+
 { .mfi
-      // min overflow and max normal threshold
-      ldfps           fMIN_SGL_OFLOW_ARG, fMAX_SGL_NORM_ARG = [rTblAddr], 8
-      nop.f           0
-      and             rExp_x = rExp_mask, rSignexp_x // Biased exponent of x
+(p0)     ldfe            sinh_FR_P6 = [r34],16                 
+(p0)     fma.s1        sinh_FR_X4 = sinh_FR_X2, sinh_FR_X2, f0         
+         nop.i 999 ;;
 }
-{ .mfb
-      setf.s          fA3 = rA3                  // load A3 to FP reg
-      nop.f           0
-(p13) br.ret.spnt     b0                         // exit here if x=0.0, return x
+
+// Calculate sinh_FR_podd = p_odd and sinh_FR_peven = p_even 
+{ .mfi
+         nop.m 999
+(p0)     fma.s1      sinh_FR_poly_podd_temp1 = sinh_FR_X4, sinh_FR_P5, sinh_FR_P3                
+         nop.i 999 ;;
 }
-;;
 
 { .mfi
-      sub             rExp_x = rExp_x, rExp_bias // True exponent of x
-      fmerge.s        fAbsX = f0, fNormX         // Form |x|
-      nop.i           0
+         nop.m 999
+(p0)     fma.s1      sinh_FR_poly_podd_temp2 = sinh_FR_X4, sinh_FR_poly_podd_temp1, sinh_FR_P1   
+         nop.i 999
+}
+
+{ .mfi
+         nop.m 999
+(p0)     fma.s1      sinh_FR_poly_peven_temp1 = sinh_FR_X4, sinh_FR_P6, sinh_FR_P4               
+         nop.i 999 ;;
+}
+
+{ .mfi
+         nop.m 999
+(p0)     fma.s1      sinh_FR_podd       = sinh_FR_X2, sinh_FR_poly_podd_temp2, f0           
+         nop.i 999
 }
-;;
 
 { .mfi
-      nop.m           0
-      // x*(64/ln(2)) + Right Shifter
-      fma.s1          fNint = fNormX, f64DivLn2, fRightShifter
-      add             rTblAddr = 8, rTblAddr
+         nop.m 999
+(p0)     fma.s1      sinh_FR_poly_peven_temp2 = sinh_FR_X4, sinh_FR_poly_peven_temp1, sinh_FR_P2 
+         nop.i 999 ;;
 }
+
+{ .mfi
+         nop.m 999
+(p0)     fma.s1      sinh_FR_peven       = sinh_FR_X4, sinh_FR_poly_peven_temp2, f0         
+         nop.i 999 ;;
+}
+
+// Calculate sinh_FR_Y_lo = ax*p_odd + (ax*p_even)
+{ .mfi
+         nop.m 999
+(p0)     fma.s1      sinh_FR_Y_lo_temp    = sinh_FR_X, sinh_FR_peven, f0                    
+         nop.i 999 ;;
+}
+
+{ .mfi
+         nop.m 999
+(p0)     fma.s1      sinh_FR_Y_lo         = sinh_FR_X, sinh_FR_podd,  sinh_FR_Y_lo_temp          
+         nop.i 999 ;;
+}
+
+// Calculate sinh_FR_SINH = Y_hi + Y_lo. Note that ax = Y_hi
+{ .mfi
+         nop.m 999
+(p0)     fma.s1      sinh_FR_SINH        = sinh_FR_X, f1, sinh_FR_Y_lo                      
+         nop.i 999 ;;
+}
+// Dummy multiply to generate inexact
+{ .mfi
+         nop.m 999
+(p0)     fmpy.s0      sinh_FR_tmp = sinh_FR_all_ones, sinh_FR_all_ones
+         nop.i 999
+}
+
+// Calculate f8 = sign * (Y_hi + Y_lo)
+// Go to return
 { .mfb
-      cmp.gt          p7, p0 = -2, rExp_x        // Test |x| < 2^(-2)
-      fma.s1          fXsq = fNormX, fNormX, f0  // x*x for small path
-(p7)  br.cond.spnt    SINH_SMALL                 // Branch if 0 < |x| < 2^-2
+         nop.m 999
+(p0)     fma.s.s0        f8 = sinh_FR_SGNX,sinh_FR_SINH,f0                       
+(p0)     br.ret.sptk     b0 ;;                          
+}
+
+
+L(SINH_BY_TBL): 
+
+// Now that we are at TBL; so far all we know is that |x| >= 0.25.
+// The first two steps are the same for TBL and EXP, but if we are HUGE
+// we want to leave now. 
+// Double-extended:
+// Go to HUGE if |x| >= 2^14, 1000d (register-biased) is e = 14 (true)
+// Double
+// Go to HUGE if |x| >= 2^10, 10009 (register-biased) is e = 10 (true)
+// Single
+// Go to HUGE if |x| >= 2^7,  10006 (register-biased) is e =  7 (true)
+
+{ .mlx
+         nop.m 999
+(p0)     movl            r32 = 0x0000000000010006 ;;         
 }
-;;
 
 { .mfi
-      nop.m           0
-      // check for overflow
-      fcmp.ge.s1      p12, p13 = fAbsX, fMIN_SGL_OFLOW_ARG
-      mov             rJ_mask = 0x3f             // 6-bit mask for J
+(p0)     setf.exp        f9 = r32                         
+         nop.f 999
+         nop.i 999 ;;
+}
+
+{ .mfi
+         nop.m 999
+(p0)     fcmp.ge.unc.s1  p6,p7 = sinh_FR_X,f9             
+         nop.i 999 ;;
+}
+
+{ .mib
+         nop.m 999
+         nop.i 999
+(p6)     br.cond.spnt    L(SINH_HUGE) ;;                        
+}
+
+// r32 = 1
+// r34 = N-1 
+// r35 = N
+// r36 = j
+// r37 = N+1
+
+// TBL can never overflow
+// sinh(x) = sinh(B+R)
+//         = sinh(B)cosh(R) + cosh(B)sinh(R)
+// 
+// ax = |x| = M*log2/64 + R
+// B = M*log2/64
+// M = 64*N + j 
+//   We will calcualte M and get N as (M-j)/64
+//   The division is a shift.
+// exp(B)  = exp(N*log2 + j*log2/64)
+//         = 2^N * 2^(j*log2/64)
+// sinh(B) = 1/2(e^B -e^-B)
+//         = 1/2(2^N * 2^(j*log2/64) - 2^-N * 2^(-j*log2/64)) 
+// sinh(B) = (2^(N-1) * 2^(j*log2/64) - 2^(-N-1) * 2^(-j*log2/64)) 
+// cosh(B) = (2^(N-1) * 2^(j*log2/64) + 2^(-N-1) * 2^(-j*log2/64)) 
+// 2^(j*log2/64) is stored as Tjhi + Tjlo , j= -32,....,32
+// Tjhi is double-extended (80-bit) and Tjlo is single(32-bit)
+// R = ax - M*log2/64
+// R = ax - M*log2_by_64_hi - M*log2_by_64_lo
+// exp(R) = 1 + R +R^2(1/2! + R(1/3! + R(1/4! + ... + R(1/n!)...)
+//        = 1 + p_odd + p_even
+//        where the p_even uses the A coefficients and the p_even uses the B coefficients
+// So sinh(R) = 1 + p_odd + p_even -(1 -p_odd -p_even)/2 = p_odd
+//    cosh(R) = 1 + p_even
+//    sinh(B) = S_hi + S_lo
+//    cosh(B) = C_hi
+// sinh(x) = sinh(B)cosh(R) + cosh(B)sinh(R)
+// ******************************************************
+// STEP 1 (TBL and EXP)
+// ******************************************************
+// Get the following constants. 
+// f9  = Inv_log2by64
+// f10 = log2by64_hi
+// f11 = log2by64_lo
+
+{ .mmi
+(p0)  adds                 r32 = 0x1,r0      
+(p0)  addl           r34   = @ltoff(double_sinh_arg_reduction), gp
+      nop.i 999
 }
 ;;
 
-{ .mfb
-      nop.m           0
-      fms.s1          fN = fNint, f1, fRightShifter // n in FP register
-      // branch out if overflow
-(p12) br.cond.spnt    SINH_CERTAIN_OVERFLOW
+{ .mmi
+      ld8 r34 = [r34]
+      nop.m 999
+      nop.i 999
 }
 ;;
 
-{ .mfi
-      getf.sig        rNJ = fNint                   // bits of n, j
-      // check for possible overflow
-      fcmp.gt.s1      p13, p0 = fAbsX, fMAX_SGL_NORM_ARG
-      nop.i           0
+
+// We want 2^(N-1) and 2^(-N-1). So bias N-1 and -N-1 and
+// put them in an exponent.
+// sinh_FR_spos = 2^(N-1) and sinh_FR_sneg = 2^(-N-1)
+// r39 = 0xffff + (N-1)  = 0xffff +N -1
+// r40 = 0xffff - (N +1) = 0xffff -N -1
+
+{ .mlx
+         nop.m 999
+(p0)     movl                r38 = 0x000000000000fffe ;; 
+}
+
+{ .mmi
+(p0)     ldfe            sinh_FR_Inv_log2by64 = [r34],16 ;;       
+(p0)     ldfe            sinh_FR_log2by64_hi  = [r34],16       
+         nop.i 999 ;;
+}
+
+{ .mbb
+(p0)     ldfe            sinh_FR_log2by64_lo  = [r34],16       
+         nop.b 999
+         nop.b 999 ;;
+}
+
+// Get the A coefficients
+// f9  = A_1
+// f10 = A_2
+// f11 = A_3
+
+{ .mmi
+      nop.m 999
+(p0)  addl           r34   = @ltoff(double_sinh_ab_table), gp
+      nop.i 999
 }
 ;;
 
+{ .mmi
+      ld8 r34 = [r34]
+      nop.m 999
+      nop.i 999
+}
+;;
+
+
+// Calculate M and keep it as integer and floating point.
+// f38 = M = round-to-integer(x*Inv_log2by64)
+// sinh_FR_M = M = truncate(ax/(log2/64))
+// Put the significand of M in r35
+//    and the floating point representation of M in sinh_FR_M
+
 { .mfi
-      addl            rN = 0xFFBF - 63, rNJ      // biased and shifted n-1,j
-      fnma.s1         fR = fLn2Div64, fN, fNormX // R = x - N*ln(2)/64
-      and             rJ = rJ_mask, rNJ          // bits of j
+         nop.m 999
+(p0)     fma.s1          sinh_FR_M      = sinh_FR_X, sinh_FR_Inv_log2by64, f0 
+         nop.i 999
 }
+
 { .mfi
-      sub             rNJ_neg = r0, rNJ          // bits of n, j for -x
-      nop.f           0
-      andcm           rN_mask = -1, rJ_mask      // 0xff...fc0 to mask N
+(p0)     ldfe            sinh_FR_A1 = [r34],16            
+         nop.f 999
+         nop.i 999 ;;
 }
-;;
 
 { .mfi
-      shladd          rJ = rJ, 3, rTblAddr // address in the 2^(j/64) table
-      nop.f           0
-      and             rN = rN_mask, rN     // biased, shifted n-1
+         nop.m 999
+(p0)     fcvt.fx.s1      sinh_FR_M_temp = sinh_FR_M                      
+         nop.i 999 ;;
 }
+
 { .mfi
-      addl            rN_neg = 0xFFBF - 63, rNJ_neg // -x biased, shifted n-1,j
-      nop.f           0
-      and             rJ_neg = rJ_mask, rNJ_neg     // bits of j for -x
+         nop.m 999
+(p0)     fnorm.s1        sinh_FR_M      = sinh_FR_M_temp                 
+         nop.i 999 ;;
 }
-;;
 
 { .mfi
-      ld8             rJ = [rJ]                    // Table value
-      nop.f           0
-      shl             rN = rN, 46 // 2^(n-1) bits in DP format
+(p0)     getf.sig        r35       = sinh_FR_M_temp                 
+         nop.f 999
+         nop.i 999 ;;
 }
+
+// M is still in r35. Calculate j. j is the signed extension of the six lsb of M. It 
+// has a range of -32 thru 31.
+// r35 = M
+// r36 = j 
+
+{ .mii
+         nop.m 999
+         nop.i 999 ;;
+(p0)     and            r36 = 0x3f, r35 ;;   
+}
+
+// Calculate R
+// f13 = f44 - f12*f10 = ax - M*log2by64_hi
+// f14 = f13 - f8*f11  = R = (ax - M*log2by64_hi) - M*log2by64_lo
+
 { .mfi
-      shladd          rJ_neg = rJ_neg, 3, rTblAddr // addr in 2^(j/64) table -x
-      nop.f           0
-      and             rN_neg = rN_mask, rN_neg     // biased, shifted n-1 for -x
+         nop.m 999
+(p0)     fnma.s1           sinh_FR_R_temp = sinh_FR_M, sinh_FR_log2by64_hi, sinh_FR_X      
+         nop.i 999
 }
-;;
 
 { .mfi
-      ld8             rJ_neg = [rJ_neg]            // Table value for -x
-      nop.f           0
-      shl             rN_neg = rN_neg, 46 // 2^(n-1) bits in DP format for -x
+(p0)     ldfe            sinh_FR_A2 = [r34],16            
+         nop.f 999
+         nop.i 999 ;;
 }
-;;
 
 { .mfi
-      or              rN = rN, rJ // bits of 2^n * 2^(j/64) in DP format
-      nop.f           0
-      nop.i           0
+         nop.m 999
+(p0)     fnma.s1           sinh_FR_R      = sinh_FR_M, sinh_FR_log2by64_lo, sinh_FR_R_temp 
+         nop.i 999
+}
+
+// Get the B coefficients
+// f15 = B_1
+// f32 = B_2
+// f33 = B_3
+
+{ .mmi
+(p0)     ldfe            sinh_FR_A3 = [r34],16 ;;            
+(p0)     ldfe            sinh_FR_B1 = [r34],16            
+         nop.i 999 ;;
+}
+
+{ .mmi
+(p0)     ldfe            sinh_FR_B2 = [r34],16 ;;            
+(p0)     ldfe            sinh_FR_B3 = [r34],16            
+         nop.i 999 ;;
+}
+
+{ .mii
+         nop.m 999
+(p0)     shl            r34 = r36,  0x2 ;;   
+(p0)     sxt1           r37 = r34 ;;         
+}
+
+// ******************************************************
+// STEP 2 (TBL and EXP)
+// ******************************************************
+// Calculate Rsquared and Rcubed in preparation for p_even and p_odd
+// f12 = R*R*R
+// f13 = R*R
+// f14 = R <== from above
+
+{ .mfi
+         nop.m 999
+(p0)     fma.s1             sinh_FR_Rsq  = sinh_FR_R,   sinh_FR_R, f0  
+(p0)     shr            r36 = r37,  0x2 ;;   
+}
+
+// r34 = M-j = r35 - r36
+// r35 = N = (M-j)/64
+
+{ .mii
+(p0)     sub                  r34 = r35, r36    
+         nop.i 999 ;;
+(p0)     shr                  r35 = r34, 0x6 ;;    
+}
+
+{ .mii
+(p0)     sub                 r40 = r38, r35           
+(p0)     adds                 r37 = 0x1, r35    
+(p0)     add                 r39 = r38, r35 ;;           
+}
+
+// Get the address of the J table, add the offset, 
+// addresses are sinh_AD_mJ and sinh_AD_J, get the T value
+// f32 = T(j)_hi
+// f33 = T(j)_lo
+// f34 = T(-j)_hi
+// f35 = T(-j)_lo
+
+{ .mmi
+(p0)  sub                  r34 = r35, r32    
+(p0)  addl           r37   = @ltoff(double_sinh_j_table), gp
+      nop.i 999
 }
 ;;
 
-{ .mmf
-      setf.d          fT = rN            // 2^(n-1) * 2^(j/64)
-      or              rN_neg = rN_neg, rJ_neg // -x bits of 2^n * 2^(j/64) in DP
-      fma.s1          fRSqr = fR, fR, f0 // R^2
+{ .mmi
+      ld8 r37 = [r37]
+      nop.m 999
+      nop.i 999
 }
 ;;
 
+
 { .mfi
-      setf.d          fT_neg = rN_neg    // 2^(n-1) * 2^(j/64) for -x
-      fma.s1          fP = fA3, fR, fA2  // A3*R + A2
-      nop.i           0
+         nop.m 999
+(p0)     fma.s1             sinh_FR_Rcub = sinh_FR_Rsq, sinh_FR_R, f0  
+         nop.i 999
+}
+
+// ******************************************************
+// STEP 3 Now decide if we need to branch to EXP
+// ******************************************************
+// Put 32 in f9; p6 true if x < 32
+// Go to EXP if |x| >= 32 
+
+{ .mlx
+         nop.m 999
+(p0)     movl                r32 = 0x0000000000010004 ;;               
 }
+
+// Calculate p_even
+// f34 = B_2 + Rsq *B_3
+// f35 = B_1 + Rsq*f34      = B_1 + Rsq * (B_2 + Rsq *B_3)
+// f36 = p_even = Rsq * f35 = Rsq * (B_1 + Rsq * (B_2 + Rsq *B_3))
+
 { .mfi
-      nop.m           0
-      fnma.s1         fP_neg = fA3, fR, fA2  // A3*R + A2 for -x
-      nop.i           0
+         nop.m 999
+(p0)     fma.s1          sinh_FR_peven_temp1 = sinh_FR_Rsq, sinh_FR_B3,          sinh_FR_B2  
+         nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m           0
-      fma.s1          fP = fP, fRSqr, fR // P = (A3*R + A2)*R^2 + R
-      nop.i           0
+         nop.m 999
+(p0)     fma.s1          sinh_FR_peven_temp2 = sinh_FR_Rsq, sinh_FR_peven_temp1, sinh_FR_B1  
+         nop.i 999
 }
+
+// Calculate p_odd
+// f34 = A_2 + Rsq *A_3
+// f35 = A_1 + Rsq * (A_2 + Rsq *A_3)
+// f37 = podd = R + Rcub * (A_1 + Rsq * (A_2 + Rsq *A_3))
+
 { .mfi
-      nop.m           0
-      fms.s1          fP_neg = fP_neg, fRSqr, fR // P = (A3*R + A2)*R^2 + R, -x
-      nop.i           0
+         nop.m 999
+(p0)     fma.s1          sinh_FR_podd_temp1 = sinh_FR_Rsq,        sinh_FR_A3,         sinh_FR_A2  
+         nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m           0
-      fmpy.s0         fTmp = fLn2Div64, fLn2Div64       // Force inexact
-      nop.i           0
+(p0)     setf.exp            sinh_FR_N_temp1 = r39            
+         nop.f 999
+         nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m           0
-      fma.s1          fExp = fP, fT, fT                 // exp(x)/2
-      nop.i           0
+         nop.m 999
+(p0)     fma.s1          sinh_FR_peven       = sinh_FR_Rsq, sinh_FR_peven_temp2, f0     
+         nop.i 999
 }
-{ .mfb
-      nop.m           0
-      fma.s1          fExp_neg = fP_neg, fT_neg, fT_neg // exp(-x)/2
-      // branch out if possible overflow result
-(p13) br.cond.spnt    SINH_POSSIBLE_OVERFLOW
+
+{ .mfi
+         nop.m 999
+(p0)     fma.s1          sinh_FR_podd_temp2 = sinh_FR_Rsq,        sinh_FR_podd_temp1, sinh_FR_A1  
+         nop.i 999 ;;
+}
+
+{ .mfi
+(p0)     setf.exp            f9  = r32                              
+         nop.f 999
+         nop.i 999 ;;
+}
+
+{ .mfi
+         nop.m 999
+(p0)     fma.s1          sinh_FR_podd       = sinh_FR_podd_temp2, sinh_FR_Rcub,       sinh_FR_R   
+         nop.i 999
+}
+
+// sinh_GR_mj contains the table offset for -j
+// sinh_GR_j  contains the table offset for +j
+// p6 is true when j <= 0
+
+{ .mlx
+(p0)     setf.exp            sinh_FR_N_temp2 = r40            
+(p0)     movl                r40 = 0x0000000000000020 ;;    
+}
+
+{ .mfi
+(p0)     sub                 sinh_GR_mJ = r40,  r36           
+(p0)     fmerge.se           sinh_FR_spos    = sinh_FR_N_temp1, f1 
+(p0)     adds                sinh_GR_J  = 0x20, r36 ;;           
+}
+
+{ .mii
+         nop.m 999
+(p0)     shl                  sinh_GR_mJ = sinh_GR_mJ, 5 ;;   
+(p0)     add                  sinh_AD_mJ = r37, sinh_GR_mJ ;; 
+}
+
+{ .mmi
+         nop.m 999
+(p0)     ldfe                 sinh_FR_Tmjhi = [sinh_AD_mJ],16                 
+(p0)     shl                  sinh_GR_J  = sinh_GR_J, 5 ;;    
+}
+
+{ .mfi
+(p0)     ldfs                 sinh_FR_Tmjlo = [sinh_AD_mJ],16                 
+(p0)     fcmp.lt.unc.s1      p0,p7 = sinh_FR_X,f9                          
+(p0)     add                  sinh_AD_J  = r37, sinh_GR_J ;;  
+}
+
+{ .mmi
+(p0)     ldfe                 sinh_FR_Tjhi  = [sinh_AD_J],16 ;;                  
+(p0)     ldfs                 sinh_FR_Tjlo  = [sinh_AD_J],16                  
+         nop.i 999 ;;
 }
-;;
 
 { .mfb
-      nop.m           0
-      // final result in the absence of overflow
-      fms.s.s0        f8 = fExp, f1, fExp_neg  // result = (exp(x)-exp(-x))/2
-      // exit here in the absence of overflow
-      br.ret.sptk     b0              // Exit main path, 0.25 <= |x| < 89.41598
+         nop.m 999
+(p0)     fmerge.se           sinh_FR_sneg    = sinh_FR_N_temp2, f1 
+(p7)     br.cond.spnt        L(SINH_BY_EXP) ;;                            
 }
-;;
 
-// Here if 0 < |x| < 0.25.  Evaluate 9th order polynomial.
-SINH_SMALL:
 { .mfi
-      add             rAd1 = 0x200, rTblAddr
-      fcmp.lt.s1      p7, p8 = fNormX, f0       // Test sign of x
-      cmp.gt          p6, p0 = -60, rExp_x      // Test |x| < 2^(-60)
+         nop.m 999
+         nop.f 999
+         nop.i 999 ;;
 }
+
+// ******************************************************
+// If NOT branch to EXP
+// ******************************************************
+// Calculate S_hi and S_lo
+// sinh_FR_S_hi_temp = sinh_FR_sneg * sinh_FR_Tmjhi
+// sinh_FR_S_hi = sinh_FR_spos * sinh_FR_Tjhi - sinh_FR_S_hi_temp
+// sinh_FR_S_hi = sinh_FR_spos * sinh_FR_Tjhi - (sinh_FR_sneg * sinh_FR_Tmjlo)
+
 { .mfi
-      add             rAd2 = 0x210, rTblAddr
-      nop.f           0
-      nop.i           0
+         nop.m 999
+(p0)    fma.s1         sinh_FR_S_hi_temp = sinh_FR_sneg, sinh_FR_Tmjhi, f0   
+         nop.i 999 ;;
 }
-;;
 
-{ .mmb
-      ldfpd           fA4, fA3 = [rAd1]
-      ldfpd           fA2, fA1 = [rAd2]
-(p6)  br.cond.spnt    SINH_VERY_SMALL           // Branch if |x| < 2^(-60)
+{ .mfi
+         nop.m 999
+(p0)    fms.s1         sinh_FR_S_hi = sinh_FR_spos, sinh_FR_Tjhi,  sinh_FR_S_hi_temp              
+         nop.i 999
 }
-;;
+
+// Calculate C_hi
+// sinh_FR_C_hi_temp1 = sinh_FR_sneg * sinh_FR_Tmjhi
+// sinh_FR_C_hi = sinh_FR_spos * sinh_FR_Tjhi + sinh_FR_C_hi_temp1
 
 { .mfi
-      nop.m           0
-      fma.s1          fX3 = fXsq, fNormX, f0
-      nop.i           0
+         nop.m 999
+(p0)    fma.s1         sinh_FR_C_hi_temp1 = sinh_FR_sneg, sinh_FR_Tmjhi, f0                   
+         nop.i 999 ;;
 }
+
+// sinh_FR_S_lo_temp1 =  sinh_FR_spos * sinh_FR_Tjhi - sinh_FR_S_hi
+// sinh_FR_S_lo_temp2 = -sinh_FR_sneg * sinh_FR_Tmjlo + (sinh_FR_spos * sinh_FR_Tjhi - sinh_FR_S_hi)
+// sinh_FR_S_lo_temp2 = -sinh_FR_sneg * sinh_FR_Tmjlo + (sinh_FR_S_lo_temp1              )
+
 { .mfi
-      nop.m           0
-      fma.s1          fX4 = fXsq, fXsq, f0
-      nop.i           0
+         nop.m 999
+(p0)    fms.s1         sinh_FR_S_lo_temp1 =  sinh_FR_spos, sinh_FR_Tjhi,  sinh_FR_S_hi            
+         nop.i 999
 }
-;;
 
 { .mfi
-      nop.m           0
-      fma.s1          fA43 = fXsq, fA4, fA3
-      nop.i           0
+         nop.m 999
+(p0)    fma.s1         sinh_FR_C_hi       = sinh_FR_spos, sinh_FR_Tjhi, sinh_FR_C_hi_temp1    
+         nop.i 999 ;;
 }
+
 { .mfi
-      nop.m           0
-      fma.s1          fA21 = fXsq, fA2, fA1
-      nop.i           0
+         nop.m 999
+(p0)    fnma.s1        sinh_FR_S_lo_temp2 = sinh_FR_sneg, sinh_FR_Tmjhi, sinh_FR_S_lo_temp1       
+         nop.i 999
 }
-;;
+
+// sinh_FR_S_lo_temp1 = sinh_FR_sneg * sinh_FR_Tmjlo
+// sinh_FR_S_lo_temp3 = sinh_FR_spos * sinh_FR_Tjlo - sinh_FR_S_lo_temp1
+// sinh_FR_S_lo_temp3 = sinh_FR_spos * sinh_FR_Tjlo -(sinh_FR_sneg * sinh_FR_Tmjlo)
+// sinh_FR_S_lo = sinh_FR_S_lo_temp3 + sinh_FR_S_lo_temp2
 
 { .mfi
-      nop.m           0
-      fma.s1          fA4321 = fX4, fA43, fA21
-      nop.i           0
+         nop.m 999
+(p0)    fma.s1         sinh_FR_S_lo_temp1 =  sinh_FR_sneg, sinh_FR_Tmjlo, f0                  
+         nop.i 999 ;;
 }
-;;
 
-// Dummy multiply to generate inexact
+/////////// BUG FIX fma to fms -TK
 { .mfi
-      nop.m           0
-      fmpy.s0         fTmp = fA4, fA4
-      nop.i           0
+         nop.m 999
+(p0)    fms.s1         sinh_FR_S_lo_temp3 =  sinh_FR_spos, sinh_FR_Tjlo,  sinh_FR_S_lo_temp1  
+         nop.i 999 ;;
 }
-{ .mfb
-      nop.m           0
-      fma.s.s0        f8 = fA4321, fX3, fNormX
-      br.ret.sptk     b0                // Exit if 2^-60 < |x| < 0.25
+
+{ .mfi
+         nop.m 999
+(p0)    fma.s1         sinh_FR_S_lo       =  sinh_FR_S_lo_temp3, f1,   sinh_FR_S_lo_temp2     
+         nop.i 999 ;;
 }
-;;
 
-SINH_VERY_SMALL:
-// Here if 0 < |x| < 2^-60
-// Compute result by x + sgn(x)*x^2 to get properly rounded result
-.pred.rel "mutex",p7,p8
+// Y_hi = S_hi 
+// Y_lo = C_hi*p_odd + (S_hi*p_even + S_lo)
+// sinh_FR_Y_lo_temp = sinh_FR_S_hi * sinh_FR_peven + sinh_FR_S_lo
+// sinh_FR_Y_lo      = sinh_FR_C_hi * sinh_FR_podd + sinh_FR_Y_lo_temp
+
+{ .mfi
+         nop.m 999
+(p0)    fma.s1         sinh_FR_Y_lo_temp  = sinh_FR_S_hi, sinh_FR_peven, sinh_FR_S_lo           
+         nop.i 999 ;;
+}
+
+{ .mfi
+         nop.m 999
+(p0)    fma.s1         sinh_FR_Y_lo       =  sinh_FR_C_hi, sinh_FR_podd, sinh_FR_Y_lo_temp      
+         nop.i 999 ;;
+}
+
+// sinh_FR_SINH = Y_hi + Y_lo
+// f8 = answer = sinh_FR_SGNX * sinh_FR_SINH
+
+// Dummy multiply to generate inexact
+{ .mfi
+         nop.m 999
+(p0)     fmpy.s0      sinh_FR_tmp = sinh_FR_all_ones, sinh_FR_all_ones
+         nop.i 999
+}
 { .mfi
-      nop.m           0
-(p7)  fnma.s.s0       f8 = fNormX, fNormX, fNormX // If x<0 result ~ x-x^2
-      nop.i           0
+         nop.m 999
+(p0)    fma.s1         sinh_FR_SINH       =  sinh_FR_S_hi, f1, sinh_FR_Y_lo    
+         nop.i 999 ;;
 }
+
 { .mfb
-      nop.m           0
-(p8)  fma.s.s0        f8 = fNormX, fNormX, fNormX // If x>0 result ~ x+x^2
-      br.ret.sptk     b0                          // Exit if |x| < 2^-60
+         nop.m 999
+(p0)    fma.s.s0       f8 = sinh_FR_SGNX, sinh_FR_SINH,f0                      
+(p0)    br.ret.sptk     b0 ;;                          
 }
-;;
 
-SINH_POSSIBLE_OVERFLOW:
 
-// Here if fMAX_SGL_NORM_ARG < x < fMIN_SGL_OFLOW_ARG
-// This cannot happen if input is a single, only if input higher precision.
-// Overflow is a possibility, not a certainty.
+L(SINH_BY_EXP): 
 
-// Recompute result using status field 2 with user's rounding mode,
-// and wre set.  If result is larger than largest single, then we have
-// overflow
+// When p7 is true,  we know that an overflow is not going to happen
+// When p7 is false, we must check for possible overflow
+// p7 is the over_SAFE flag
+// Y_hi = Tjhi
+// Y_lo = Tjhi * (p_odd + p_even) +Tjlo
+// Scale = sign * 2^(N-1)
+// sinh_FR_Y_lo =  sinh_FR_Tjhi * (sinh_FR_peven + sinh_FR_podd)
+// sinh_FR_Y_lo =  sinh_FR_Tjhi * (sinh_FR_Y_lo_temp      )
 
 { .mfi
-      mov             rGt_ln  = 0x1007f // Exponent for largest single + 1 ulp
-      fsetc.s2        0x7F,0x42         // Get user's round mode, set wre
-      nop.i           0
+         nop.m 999
+(p0)   fma.s1            sinh_FR_Y_lo_temp =  sinh_FR_peven, f1, sinh_FR_podd                   
+         nop.i 999
+}
+
+// Now we are in EXP. This is the only path where an overflow is possible
+// but not for certain. So this is the only path where over_SAFE has any use.
+// r34 still has N-1
+// There is a danger of double-extended overflow   if N-1 > 16382 = 0x3ffe
+// There is a danger of double overflow            if N-1 > 0x3fe = 1022
+// There is a danger of single overflow            if N-1 > 0x7e = 126
+{ .mlx
+         nop.m 999
+(p0)   movl                r32          = 0x000000000000007e ;;                       
 }
-;;
 
 { .mfi
-      setf.exp        fGt_pln = rGt_ln  // Create largest single + 1 ulp
-      fma.s.s2        fWre_urm_f8 = fP, fT, fT    // Result with wre set
-      nop.i           0
+(p0)   cmp.gt.unc          p0,p7        = r34, r32                                 
+(p0)   fmerge.s          sinh_FR_SCALE     = sinh_FR_SGNX, sinh_FR_spos                         
+         nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m           0
-      fsetc.s2        0x7F,0x40                   // Turn off wre in sf2
-      nop.i           0
+         nop.m 999
+(p0)   fma.s1            sinh_FR_Y_lo      =  sinh_FR_Tjhi,  sinh_FR_Y_lo_temp, sinh_FR_Tjlo    
+         nop.i 999 ;;
 }
-;;
 
+// f8 = answer = scale * (Y_hi + Y_lo)
 { .mfi
-      nop.m           0
-      fcmp.ge.s1      p6, p0 =  fWre_urm_f8, fGt_pln // Test for overflow
-      nop.i           0
+         nop.m 999
+(p0)   fma.s1            sinh_FR_SINH_temp = sinh_FR_Y_lo,  f1, sinh_FR_Tjhi       
+         nop.i 999 ;;
 }
-;;
 
-{ .mfb
-      nop.m           0
-      nop.f           0
-(p6)  br.cond.spnt    SINH_CERTAIN_OVERFLOW // Branch if overflow
+{ .mfi
+         nop.m 999
+(p0)   fma.s.s0          f44          = sinh_FR_SCALE,  sinh_FR_SINH_temp, f0      
+         nop.i 999 ;;
 }
-;;
 
+// Dummy multiply to generate inexact
+{ .mfi
+         nop.m 999
+(p7)     fmpy.s0      sinh_FR_tmp = sinh_FR_all_ones, sinh_FR_all_ones
+         nop.i 999 ;;
+}
+
+// If over_SAFE is set, return
 { .mfb
-      nop.m           0
-      fma.s.s0        f8 = fP, fT, fT
-      br.ret.sptk     b0                     // Exit if really no overflow
+       nop.m 999
+(p7)   fmerge.s            f8 = f44,f44                                            
+(p7)   br.ret.sptk     b0 ;;                          
 }
-;;
 
-// here if overflow
-SINH_CERTAIN_OVERFLOW:
+// Else see if we overflowed
+// S0 user supplied status
+// S2 user supplied status + WRE + TD  (Overflows)
+// If WRE is set then an overflow will not occur in EXP.
+// The input value that would cause a register (WRE) value to overflow is about 2^15
+// and this input would go into the HUGE path.
+// Answer with WRE is in f43.
+
 { .mfi
-      addl            r17ones_m1 = 0x1FFFE, r0
-      fcmp.lt.s1      p6, p7 = fNormX, f0     // Test for x < 0
-      nop.i           0
+         nop.m 999
+(p0)   fsetc.s2            0x7F,0x42                                               
+         nop.i 999;;
+}
+
+{ .mfi
+         nop.m 999
+(p0)   fma.s.s2            f43  = sinh_FR_SCALE,  sinh_FR_SINH_temp, f0                      
+         nop.i 999 ;;
+}
+
+// 1007F => 1007F -FFFF = 80(true)
+// 80 + 7F = FF, which is 1 more that the exponent of the largest
+// double (FE). So 0 1007F 8000000000000000  is one ulp more than
+// largest single in register bias
+// Now  set p8 if the answer with WRE is greater than or equal this value
+// Also set p9 if the answer with WRE is less than or equal to negative this value
+
+{ .mlx
+         nop.m 999
+(p0)   movl                r32     = 0x0000000001007F ;;                              
 }
-;;
 
 { .mmf
-      alloc           r32 = ar.pfs, 0, 3, 4, 0 // get some registers
-      setf.exp        fTmp = r17ones_m1
-      fmerge.s        FR_X = f8,f8
+         nop.m 999
+(p0)   setf.exp            f41 = r32                                               
+(p0)   fsetc.s2            0x7F,0x40 ;;                                               
 }
-;;
 
 { .mfi
-      mov             GR_Parameter_TAG = 128
-(p6)  fnma.s.s0       FR_RESULT = fTmp, fTmp, f0 // Set I,O and -INF result
-      nop.i           0
+         nop.m 999
+(p0)   fcmp.ge.unc.s1 p8, p0 =  f43, f41                                           
+         nop.i 999
+}
+
+{ .mfi
+         nop.m 999
+(p0)   fmerge.ns           f42 = f41, f41                                          
+         nop.i 999 ;;
+}
+
+// The error tag for overflow is 128
+{ .mii
+         nop.m 999
+         nop.i 999 ;;
+(p8)   mov                 r47 = 128 ;;                                               
 }
+
 { .mfb
-      nop.m           0
-(p7)  fma.s.s0        FR_RESULT = fTmp, fTmp, f0 // Set I,O and +INF result
-      br.cond.sptk    __libm_error_region
+         nop.m 999
+(p0)   fcmp.le.unc.s1      p9, p0 =  f43, f42                                      
+(p8)   br.cond.spnt L(SINH_ERROR_SUPPORT) ;;
+}
+
+{ .mii
+         nop.m 999
+         nop.i 999 ;;
+(p9)   mov                 r47 = 128                                               
+}
+
+{ .mib
+         nop.m 999
+         nop.i 999
+(p9)   br.cond.spnt L(SINH_ERROR_SUPPORT) ;;
+}
+
+// Dummy multiply to generate inexact
+{ .mfi
+         nop.m 999
+(p0)     fmpy.s0      sinh_FR_tmp = sinh_FR_all_ones, sinh_FR_all_ones
+         nop.i 999 ;;
 }
-;;
 
-// Here if x unorm
-SINH_UNORM:
 { .mfb
-      getf.exp        rSignexp_x = fNormX    // Must recompute if x unorm
-      fcmp.eq.s0      p6, p0 = f8, f0        // Set D flag
-      br.cond.sptk    SINH_COMMON            // Return to main path
+         nop.m 999
+(p0)   fmerge.s            f8 = f44,f44                                            
+(p0)   br.ret.sptk     b0 ;;                          
 }
-;;
 
-GLOBAL_IEEE754_END(sinhf)
+L(SINH_HUGE): 
+
+// for SINH_HUGE, put 24000 in exponent; take sign from input; add 1
+// SAFE: SAFE is always 0 for HUGE
 
-LOCAL_LIBM_ENTRY(__libm_error_region)
+{ .mlx
+         nop.m 999
+(p0)   movl                r32 = 0x0000000000015dbf ;;                                
+}
+
+{ .mfi
+(p0)   setf.exp            f9  = r32                                               
+         nop.f 999
+         nop.i 999 ;;
+}
+
+{ .mfi
+         nop.m 999
+(p0)   fma.s1              sinh_FR_signed_hi_lo = sinh_FR_SGNX, f9, f1                       
+         nop.i 999 ;;
+}
+
+{ .mfi
+         nop.m 999
+(p0)   fma.s.s0            f44 = sinh_FR_signed_hi_lo,  f9, f0                          
+(p0)   mov                 r47 = 128                                               
+}
+.endp sinhf
+ASM_SIZE_DIRECTIVE(sinhf)
+#ifdef _LIBC
+ASM_SIZE_DIRECTIVE(__ieee754_sinhf)
+#endif
+
+// Stack operations when calling error support.
+//       (1)               (2)                          (3) (call)              (4)
+//   sp   -> +          psp -> +                     psp -> +                   sp -> +
+//           |                 |                            |                         |
+//           |                 | <- GR_Y               R3 ->| <- GR_RESULT            | -> f8
+//           |                 |                            |                         |
+//           | <-GR_Y      Y2->|                       Y2 ->| <- GR_Y                 |
+//           |                 |                            |                         |
+//           |                 | <- GR_X               X1 ->|                         |
+//           |                 |                            |                         |
+//  sp-64 -> +          sp ->  +                     sp ->  +                         +
+//    save ar.pfs          save b0                                               restore gp
+//    save gp                                                                    restore ar.pfs
+
+.proc __libm_error_region
+__libm_error_region:
+L(SINH_ERROR_SUPPORT):
 .prologue
+
+// (1)
 { .mfi
-      add   GR_Parameter_Y=-32,sp             // Parameter 2 value
-      nop.f 0
+        add   GR_Parameter_Y=-32,sp             // Parameter 2 value
+        nop.f 0
 .save   ar.pfs,GR_SAVE_PFS
-      mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
+        mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
 }
 { .mfi
 .fframe 64
-      add sp=-64,sp                           // Create new stack
-      nop.f 0
-      mov GR_SAVE_GP=gp                       // Save gp
+        add sp=-64,sp                          // Create new stack
+        nop.f 0
+        mov GR_SAVE_GP=gp                      // Save gp
 };;
+
+
+// (2)
 { .mmi
-      stfs [GR_Parameter_Y] = FR_Y,16         // Store Parameter 2 on stack
-      add GR_Parameter_X = 16,sp              // Parameter 1 address
+        stfs [GR_Parameter_Y] = f0,16         // STORE Parameter 2 on stack
+        add GR_Parameter_X = 16,sp            // Parameter 1 address
 .save   b0, GR_SAVE_B0
-      mov GR_SAVE_B0=b0                       // Save b0
+        mov GR_SAVE_B0=b0                     // Save b0
 };;
+
 .body
-{ .mfi
-      stfs [GR_Parameter_X] = FR_X            // Store Parameter 1 on stack
-      nop.f 0
-      add   GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+// (3)
+{ .mib
+        stfs [GR_Parameter_X] = f8                     // STORE Parameter 1 on stack
+        add   GR_Parameter_RESULT = 0,GR_Parameter_Y   // Parameter 3 address
+        nop.b 0                            
 }
 { .mib
-      stfs [GR_Parameter_Y] = FR_RESULT       // Store Parameter 3 on stack
-      add   GR_Parameter_Y = -16,GR_Parameter_Y
-      br.call.sptk b0=__libm_error_support#   // Call error handling function
+        stfs [GR_Parameter_Y] = f44                    // STORE Parameter 3 on stack
+        add   GR_Parameter_Y = -16,GR_Parameter_Y
+        br.call.sptk b0=__libm_error_support#          // Call error handling function
 };;
-
 { .mmi
-      add   GR_Parameter_RESULT = 48,sp
-      nop.m 0
-      nop.i 0
+        nop.m 0
+        nop.m 0
+        add   GR_Parameter_RESULT = 48,sp
 };;
 
+// (4)
 { .mmi
-      ldfs  f8 = [GR_Parameter_RESULT]       // Get return result off stack
+        ldfs  f8 = [GR_Parameter_RESULT]       // Get return result off stack
 .restore sp
-      add   sp = 64,sp                       // Restore stack pointer
-      mov   b0 = GR_SAVE_B0                  // Restore return address
+        add   sp = 64,sp                       // Restore stack pointer
+        mov   b0 = GR_SAVE_B0                  // Restore return address
 };;
 { .mib
-      mov   gp = GR_SAVE_GP                  // Restore gp
-      mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
-      br.ret.sptk     b0                     // Return
+        mov   gp = GR_SAVE_GP                  // Restore gp
+        mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
+        br.ret.sptk     b0                     // Return
 };;
 
-LOCAL_LIBM_END(__libm_error_region)
-
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
 
 .type   __libm_error_support#,@function
 .global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_sinhl.S b/sysdeps/ia64/fpu/e_sinhl.S
index ccc996a8cc..b880b95b64 100644
--- a/sysdeps/ia64/fpu/e_sinhl.S
+++ b/sysdeps/ia64/fpu/e_sinhl.S
@@ -1,10 +1,10 @@
 .file "sinhl.s"
 
-
-// Copyright (c) 2000 - 2002, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
+//
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,20 +35,17 @@
 // 
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at 
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// http://developer.intel.com/opensource.
 //
 // History
 //==============================================================
-// 02/02/00 Initial version
-// 04/04/00 Unwind support added
-// 08/15/00 Bundle added after call to __libm_error_support to properly
+// 2/02/00  Initial version
+// 4/04/00  Unwind support added
+// 8/15/00  Bundle added after call to __libm_error_support to properly
 //          set [the previously overwritten] GR_Parameter_RESULT.
 // 10/12/00 Update to set denormal operand and underflow flags
-// 01/22/01 Fixed to set inexact flag for small args.  Fixed incorrect 
+// 1/22/01  Fixed to set inexact flag for small args.  Fixed incorrect 
 //          call to __libm_error_support for 710.476 < x < 11357.2166.
-// 05/02/01 Reworked to improve speed of all paths
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 12/04/02 Improved performance
 //
 // API
 //==============================================================
@@ -59,1058 +56,1269 @@
 // Registers used
 //==============================================================
 // general registers: 
-// r14 -> r40
+// r32 -> r47
 // predicate registers used:
-// p6 -> p11
+// p6 p7 p8 p9
 // floating-point registers used:
-// f9 -> f15; f32 -> f90; 
+// f9 -> f15; f32 -> f45; 
 // f8 has input, then output
 //
 // Overview of operation
 //==============================================================
-// There are seven paths
-// 1. 0 < |x| < 0.25          SINH_BY_POLY
-// 2. 0.25 <=|x| < 32         SINH_BY_TBL
-// 3. 32 <= |x| < 11357.21655 SINH_BY_EXP (merged path with SINH_BY_TBL)
-// 4. |x| >= 11357.21655      SINH_HUGE
-// 5. x=0                     Done with early exit
-// 6. x=inf,nan               Done with early exit
-// 7. x=denormal              SINH_DENORM
+// There are four paths
+// 1. |x| < 0.25        SINH_BY_POLY
+// 2. |x| < 32          SINH_BY_TBL
+// 3. |x| < 2^14        SINH_BY_EXP
+// 4. |x_ >= 2^14       SINH_HUGE
 //
-// For double extended we get overflow for x >= 400c b174 ddc0 31ae c0ea
-//                                           >= 11357.21655
+// For double extended we get infinity for x >= 400c b174 ddc0 31ae c0ea
+//                                           >= 1.0110001.... x 2^13
+//                                           >= 11357.2166
 //
+// But for double we get infinity for x >= 408633ce8fb9f87e
+//                                      >= 1.0110...x 2^9
+//                                      >= +7.10476e+002
 //
-// 1. SINH_BY_POLY   0 < |x| < 0.25
-// ===============
-// Evaluate sinh(x) by a 13th order polynomial
-// Care is take for the order of multiplication; and P_1 is not exactly 1/3!, 
-// P_2 is not exactly 1/5!, etc.
-// sinh(x) = sign * (series(e^x) - series(e^-x))/2
-//         = sign * (ax + ax^3/3! + ax^5/5! + ax^7/7! + ax^9/9! + ax^11/11!
-//                        + ax^13/13!)
-//         = sign * (ax   + ax * ( ax^2 * (1/3! + ax^4 * (1/7! + ax^4*1/11!)) )
-//                        + ax * ( ax^4 * (1/5! + ax^4 * (1/9! + ax^4*1/13!)) ))
-//         = sign * (ax   + ax*p_odd + (ax*p_even))
-//         = sign * (ax   + Y_lo)
-// sinh(x) = sign * (Y_hi + Y_lo)
-// Note that ax = |x|
+// And for single we get infinity for x >= 42b3a496
+//                                      >= 1.0110... 2^6
+//                                      >= 89.8215
 //
-// 2. SINH_BY_TBL   0.25 <= |x| < 32.0
-// =============
-// sinh(x) = sinh(B+R)
-//         = sinh(B)cosh(R) + cosh(B)sinh(R)
-// 
-// ax = |x| = M*log2/64 + R
-// B = M*log2/64
-// M = 64*N + j 
-//   We will calculate M and get N as (M-j)/64
-//   The division is a shift.
-// exp(B)  = exp(N*log2 + j*log2/64)
-//         = 2^N * 2^(j*log2/64)
-// sinh(B) = 1/2(e^B -e^-B)
-//         = 1/2(2^N * 2^(j*log2/64) - 2^-N * 2^(-j*log2/64)) 
-// sinh(B) = (2^(N-1) * 2^(j*log2/64) - 2^(-N-1) * 2^(-j*log2/64)) 
-// cosh(B) = (2^(N-1) * 2^(j*log2/64) + 2^(-N-1) * 2^(-j*log2/64)) 
-// 2^(j*log2/64) is stored as Tjhi + Tjlo , j= -32,....,32
-// Tjhi is double-extended (80-bit) and Tjlo is single(32-bit)
+// SAFE: If there is danger of overflow set SAFE to 0
+//       NOT implemented: if there is danger of underflow, set SAFE to 0
+// SAFE for all paths listed below
 //
-// R = ax - M*log2/64
-// R = ax - M*log2_by_64_hi - M*log2_by_64_lo
-// exp(R) = 1 + R +R^2(1/2! + R(1/3! + R(1/4! + ... + R(1/n!)...)
-//        = 1 + p_odd + p_even
-//        where the p_even uses the A coefficients and the p_even uses 
-//        the B coefficients
+// 1. SINH_BY_POLY
+// ===============
+// If |x| is less than the tiny threshold, then clear SAFE 
+// For double, the tiny threshold is -1022 = -0x3fe => -3fe + ffff = fc01
+//             register-biased, this is fc01
+// For single, the tiny threshold is -126  = -7e    => -7e  + ffff = ff81
+// If |x| < tiny threshold, set SAFE = 0
 //
-// So sinh(R) = 1 + p_odd + p_even -(1 -p_odd -p_even)/2 = p_odd
-//    cosh(R) = 1 + p_even
-//    sinh(B) = S_hi + S_lo
-//    cosh(B) = C_hi
-// sinh(x) = sinh(B)cosh(R) + cosh(B)sinh(R)
+// 2. SINH_BY_TBL
+// =============
+// SAFE: SAFE is always 1 for TBL; 
 //
-// 3. SINH_BY_EXP   32.0 <= |x| < 11357.21655  ( 400c b174 ddc0 31ae c0ea )
+// 3. SINH_BY_EXP
 // ==============
-// Can approximate result by exp(x)/2 in this region.
-// Y_hi = Tjhi
-// Y_lo = Tjhi * (p_odd + p_even) + Tjlo
-// sinh(x) = Y_hi + Y_lo
+// There is a danger of double-extended overflow   if N-1 > 16382 = 0x3ffe
+// r34 has N-1; 16382 is in register biased form, 0x13ffd
+// There is danger of double overflow if N-1 > 0x3fe
+//                       in register biased form, 0x103fd
+// Analagously, there is danger of single overflow if N-1 > 0x7e
+//                       in register biased form, 0x1007d
+// SAFE: If there is danger of overflow set SAFE to 0
 //
-// 4. SINH_HUGE     |x| >= 11357.21655  ( 400c b174 ddc0 31ae c0ea )
+// 4. SINH_HUGE
 // ============
-// Set error tag and call error support
-//
+// SAFE: SAFE is always 0 for HUGE
 //
+
+#include "libm_support.h"
+
 // Assembly macros
 //==============================================================
-r_ad5                 = r14
-r_rshf_2to57          = r15
-r_exp_denorm          = r15
-r_ad_mJ_lo            = r15
-r_ad_J_lo             = r16
-r_2Nm1                = r17
-r_2mNm1               = r18
-r_exp_x               = r18
-r_ad_J_hi             = r19
-r_ad2o                = r19
-r_ad_mJ_hi            = r20
-r_mj                  = r21
-r_ad2e                = r22
-r_ad3                 = r23
-r_ad1                 = r24
-r_Mmj                 = r24
-r_rshf                = r25
-r_M                   = r25
-r_N                   = r25
-r_jshf                = r26
-r_exp_2tom57          = r26
-r_j                   = r26
-r_exp_mask            = r27
-r_signexp_x           = r28
-r_signexp_sgnx_0_5    = r28
-r_exp_0_25            = r29
-r_sig_inv_ln2         = r30
-r_exp_32              = r30
-r_exp_huge            = r30
-r_ad4                 = r31
-
-GR_SAVE_PFS           = r34
-GR_SAVE_B0            = r35
-GR_SAVE_GP            = r36
-
-GR_Parameter_X        = r37
-GR_Parameter_Y        = r38
-GR_Parameter_RESULT   = r39
-GR_Parameter_TAG      = r40
-
-
-f_ABS_X               = f9 
-f_X2                  = f10
-f_X4                  = f11
-f_tmp                 = f14
-f_RSHF                = f15
-
-f_Inv_log2by64        = f32
-f_log2by64_lo         = f33
-f_log2by64_hi         = f34
-f_A1                  = f35
-
-f_A2                  = f36
-f_A3                  = f37
-f_Rcub                = f38
-f_M_temp              = f39
-f_R_temp              = f40
-
-f_Rsq                 = f41
-f_R                   = f42
-f_M                   = f43
-f_B1                  = f44
-f_B2                  = f45
-
-f_B3                  = f46
-f_peven_temp1         = f47
-f_peven_temp2         = f48
-f_peven               = f49
-f_podd_temp1          = f50
-
-f_podd_temp2          = f51
-f_podd                = f52
-f_poly65              = f53
-f_poly6543            = f53
-f_poly6to1            = f53
-f_poly43              = f54
-f_poly21              = f55
-
-f_X3                  = f56
-f_INV_LN2_2TO63       = f57
-f_RSHF_2TO57          = f58
-f_2TOM57              = f59
-f_smlst_oflow_input   = f60
-
-f_pre_result          = f61
-f_huge                = f62
-f_spos                = f63
-f_sneg                = f64
-f_Tjhi                = f65
-
-f_Tjlo                = f66
-f_Tmjhi               = f67
-f_Tmjlo               = f68
-f_S_hi                = f69
-f_SC_hi_temp          = f70
-
-f_S_lo_temp1          = f71 
-f_S_lo_temp2          = f72 
-f_S_lo_temp3          = f73 
-f_S_lo_temp4          = f73 
-f_S_lo                = f74
-f_C_hi                = f75
-
-f_Y_hi                = f77 
-f_Y_lo_temp           = f78 
-f_Y_lo                = f79 
-f_NORM_X              = f80
-
-f_P1                  = f81
-f_P2                  = f82
-f_P3                  = f83
-f_P4                  = f84
-f_P5                  = f85
-
-f_P6                  = f86
-f_Tjhi_spos           = f87
-f_Tjlo_spos           = f88
-f_huge                = f89
-f_signed_hi_lo        = f90
+sinh_FR_X            = f44
+sinh_FR_X2           = f9
+sinh_FR_X4           = f10
+sinh_FR_SGNX         = f40
+sinh_FR_all_ones     = f45
+sinh_FR_tmp          = f42
+
+sinh_FR_Inv_log2by64 = f9
+sinh_FR_log2by64_lo  = f11
+sinh_FR_log2by64_hi  = f10
+
+sinh_FR_A1           = f9
+sinh_FR_A2           = f10
+sinh_FR_A3           = f11
+
+sinh_FR_Rcub         = f12
+sinh_FR_M_temp       = f13
+sinh_FR_R_temp       = f13
+sinh_FR_Rsq          = f13
+sinh_FR_R            = f14
+
+sinh_FR_M            = f38
+
+sinh_FR_B1           = f15
+sinh_FR_B2           = f32
+sinh_FR_B3           = f33
 
+sinh_FR_peven_temp1  = f34
+sinh_FR_peven_temp2  = f35
+sinh_FR_peven        = f36
+
+sinh_FR_podd_temp1   = f34
+sinh_FR_podd_temp2   = f35
+sinh_FR_podd         = f37
+
+sinh_FR_poly_podd_temp1    =  f11 
+sinh_FR_poly_podd_temp2    =  f13
+sinh_FR_poly_peven_temp1   =  f11
+sinh_FR_poly_peven_temp2   =  f13
+
+sinh_FR_J_temp       = f9
+sinh_FR_J            = f10
+
+sinh_FR_Mmj          = f39
+
+sinh_FR_N_temp1      = f11
+sinh_FR_N_temp2      = f12
+sinh_FR_N            = f13
+
+sinh_FR_spos         = f14
+sinh_FR_sneg         = f15
+
+sinh_FR_Tjhi         = f32
+sinh_FR_Tjlo         = f33
+sinh_FR_Tmjhi        = f34
+sinh_FR_Tmjlo        = f35
+
+sinh_GR_mJ           = r35
+sinh_GR_J            = r36
+
+sinh_AD_mJ           = r38
+sinh_AD_J            = r39
+sinh_GR_all_ones     = r40
+
+sinh_FR_S_hi         = f9
+sinh_FR_S_hi_temp    = f10
+sinh_FR_S_lo_temp1   = f11 
+sinh_FR_S_lo_temp2   = f12 
+sinh_FR_S_lo_temp3   = f13 
+
+sinh_FR_S_lo         = f38
+sinh_FR_C_hi         = f39
+
+sinh_FR_C_hi_temp1   = f10
+sinh_FR_Y_hi         = f11 
+sinh_FR_Y_lo_temp    = f12 
+sinh_FR_Y_lo         = f13 
+sinh_FR_SINH         = f9
+
+sinh_FR_P1           = f14
+sinh_FR_P2           = f15
+sinh_FR_P3           = f32
+sinh_FR_P4           = f33
+sinh_FR_P5           = f34
+sinh_FR_P6           = f35
+
+sinh_FR_TINY_THRESH  = f9
+
+sinh_FR_SINH_temp    = f10
+sinh_FR_SCALE        = f11 
+
+sinh_FR_signed_hi_lo = f10
+
+
+GR_SAVE_PFS          = r41
+GR_SAVE_B0           = r42
+GR_SAVE_GP           = r43
+
+GR_Parameter_X       = r44
+GR_Parameter_Y       = r45
+GR_Parameter_RESULT  = r46
 
 // Data tables
 //==============================================================
 
-// DO NOT CHANGE ORDER OF THESE TABLES
-RODATA
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
 
 .align 16
-LOCAL_OBJECT_START(sinh_arg_reduction)
-//   data8 0xB8AA3B295C17F0BC, 0x00004005  // 64/log2 -- signif loaded with setf
-   data8 0xB17217F7D1000000, 0x00003FF8  // log2/64 high part
-   data8 0xCF79ABC9E3B39804, 0x00003FD0  // log2/64 low part
-   data8 0xb174ddc031aec0ea, 0x0000400c  // Smallest x to overflow (11357.21655)
-LOCAL_OBJECT_END(sinh_arg_reduction)
-
-LOCAL_OBJECT_START(sinh_p_table)
-   data8 0xB08AF9AE78C1239F, 0x00003FDE  // P6
-   data8 0xB8EF1D28926D8891, 0x00003FEC  // P4
-   data8 0x8888888888888412, 0x00003FF8  // P2
-   data8 0xD732377688025BE9, 0x00003FE5  // P5
-   data8 0xD00D00D00D4D39F2, 0x00003FF2  // P3
-   data8 0xAAAAAAAAAAAAAAAB, 0x00003FFC  // P1
-LOCAL_OBJECT_END(sinh_p_table)
-
-LOCAL_OBJECT_START(sinh_ab_table)
-   data8 0xAAAAAAAAAAAAAAAC, 0x00003FFC  // A1
-   data8 0x88888888884ECDD5, 0x00003FF8  // A2
-   data8 0xD00D0C6DCC26A86B, 0x00003FF2  // A3
-   data8 0x8000000000000002, 0x00003FFE  // B1
-   data8 0xAAAAAAAAAA402C77, 0x00003FFA  // B2
-   data8 0xB60B6CC96BDB144D, 0x00003FF5  // B3
-LOCAL_OBJECT_END(sinh_ab_table)
-
-LOCAL_OBJECT_START(sinh_j_hi_table)
-   data8 0xB504F333F9DE6484, 0x00003FFE
-   data8 0xB6FD91E328D17791, 0x00003FFE
-   data8 0xB8FBAF4762FB9EE9, 0x00003FFE
-   data8 0xBAFF5AB2133E45FB, 0x00003FFE
-   data8 0xBD08A39F580C36BF, 0x00003FFE
-   data8 0xBF1799B67A731083, 0x00003FFE
-   data8 0xC12C4CCA66709456, 0x00003FFE
-   data8 0xC346CCDA24976407, 0x00003FFE
-   data8 0xC5672A115506DADD, 0x00003FFE
-   data8 0xC78D74C8ABB9B15D, 0x00003FFE
-   data8 0xC9B9BD866E2F27A3, 0x00003FFE
-   data8 0xCBEC14FEF2727C5D, 0x00003FFE
-   data8 0xCE248C151F8480E4, 0x00003FFE
-   data8 0xD06333DAEF2B2595, 0x00003FFE
-   data8 0xD2A81D91F12AE45A, 0x00003FFE
-   data8 0xD4F35AABCFEDFA1F, 0x00003FFE
-   data8 0xD744FCCAD69D6AF4, 0x00003FFE
-   data8 0xD99D15C278AFD7B6, 0x00003FFE
-   data8 0xDBFBB797DAF23755, 0x00003FFE
-   data8 0xDE60F4825E0E9124, 0x00003FFE
-   data8 0xE0CCDEEC2A94E111, 0x00003FFE
-   data8 0xE33F8972BE8A5A51, 0x00003FFE
-   data8 0xE5B906E77C8348A8, 0x00003FFE
-   data8 0xE8396A503C4BDC68, 0x00003FFE
-   data8 0xEAC0C6E7DD24392F, 0x00003FFE
-   data8 0xED4F301ED9942B84, 0x00003FFE
-   data8 0xEFE4B99BDCDAF5CB, 0x00003FFE
-   data8 0xF281773C59FFB13A, 0x00003FFE
-   data8 0xF5257D152486CC2C, 0x00003FFE
-   data8 0xF7D0DF730AD13BB9, 0x00003FFE
-   data8 0xFA83B2DB722A033A, 0x00003FFE
-   data8 0xFD3E0C0CF486C175, 0x00003FFE
-   data8 0x8000000000000000, 0x00003FFF // Center of table
-   data8 0x8164D1F3BC030773, 0x00003FFF
-   data8 0x82CD8698AC2BA1D7, 0x00003FFF
-   data8 0x843A28C3ACDE4046, 0x00003FFF
-   data8 0x85AAC367CC487B15, 0x00003FFF
-   data8 0x871F61969E8D1010, 0x00003FFF
-   data8 0x88980E8092DA8527, 0x00003FFF
-   data8 0x8A14D575496EFD9A, 0x00003FFF
-   data8 0x8B95C1E3EA8BD6E7, 0x00003FFF
-   data8 0x8D1ADF5B7E5BA9E6, 0x00003FFF
-   data8 0x8EA4398B45CD53C0, 0x00003FFF
-   data8 0x9031DC431466B1DC, 0x00003FFF
-   data8 0x91C3D373AB11C336, 0x00003FFF
-   data8 0x935A2B2F13E6E92C, 0x00003FFF
-   data8 0x94F4EFA8FEF70961, 0x00003FFF
-   data8 0x96942D3720185A00, 0x00003FFF
-   data8 0x9837F0518DB8A96F, 0x00003FFF
-   data8 0x99E0459320B7FA65, 0x00003FFF
-   data8 0x9B8D39B9D54E5539, 0x00003FFF
-   data8 0x9D3ED9A72CFFB751, 0x00003FFF
-   data8 0x9EF5326091A111AE, 0x00003FFF
-   data8 0xA0B0510FB9714FC2, 0x00003FFF
-   data8 0xA27043030C496819, 0x00003FFF
-   data8 0xA43515AE09E6809E, 0x00003FFF
-   data8 0xA5FED6A9B15138EA, 0x00003FFF
-   data8 0xA7CD93B4E965356A, 0x00003FFF
-   data8 0xA9A15AB4EA7C0EF8, 0x00003FFF
-   data8 0xAB7A39B5A93ED337, 0x00003FFF
-   data8 0xAD583EEA42A14AC6, 0x00003FFF
-   data8 0xAF3B78AD690A4375, 0x00003FFF
-   data8 0xB123F581D2AC2590, 0x00003FFF
-   data8 0xB311C412A9112489, 0x00003FFF
-   data8 0xB504F333F9DE6484, 0x00003FFF
-LOCAL_OBJECT_END(sinh_j_hi_table)
-
-LOCAL_OBJECT_START(sinh_j_lo_table)
-   data4 0x1EB2FB13
-   data4 0x1CE2CBE2
-   data4 0x1DDC3CBC
-   data4 0x1EE9AA34
-   data4 0x9EAEFDC1
-   data4 0x9DBF517B
-   data4 0x1EF88AFB
-   data4 0x1E03B216
-   data4 0x1E78AB43
-   data4 0x9E7B1747
-   data4 0x9EFE3C0E
-   data4 0x9D36F837
-   data4 0x9DEE53E4
-   data4 0x9E24AE8E
-   data4 0x1D912473
-   data4 0x1EB243BE
-   data4 0x1E669A2F
-   data4 0x9BBC610A
-   data4 0x1E761035
-   data4 0x9E0BE175
-   data4 0x1CCB12A1
-   data4 0x1D1BFE90
-   data4 0x1DF2F47A
-   data4 0x1EF22F22
-   data4 0x9E3F4A29
-   data4 0x1EC01A5B
-   data4 0x1E8CAC3A
-   data4 0x9DBB3FAB
-   data4 0x1EF73A19
-   data4 0x9BB795B5
-   data4 0x1EF84B76
-   data4 0x9EF5818B
-   data4 0x00000000 // Center of table
-   data4 0x1F77CACA
-   data4 0x1EF8A91D
-   data4 0x1E57C976
-   data4 0x9EE8DA92
-   data4 0x1EE85C9F
-   data4 0x1F3BF1AF
-   data4 0x1D80CA1E
-   data4 0x9D0373AF
-   data4 0x9F167097
-   data4 0x1EB70051
-   data4 0x1F6EB029
-   data4 0x1DFD6D8E
-   data4 0x9EB319B0
-   data4 0x1EBA2BEB
-   data4 0x1F11D537
-   data4 0x1F0D5A46
-   data4 0x9E5E7BCA
-   data4 0x9F3AAFD1
-   data4 0x9E86DACC
-   data4 0x9F3EDDC2
-   data4 0x1E496E3D
-   data4 0x9F490BF6
-   data4 0x1DD1DB48
-   data4 0x1E65EBFB
-   data4 0x9F427496
-   data4 0x1F283C4A
-   data4 0x1F4B0047
-   data4 0x1F130152
-   data4 0x9E8367C0
-   data4 0x9F705F90
-   data4 0x1EFB3C53
-   data4 0x1F32FB13
-LOCAL_OBJECT_END(sinh_j_lo_table)
-
+double_sinh_arg_reduction:
+ASM_TYPE_DIRECTIVE(double_sinh_arg_reduction,@object)
+   data8 0xB8AA3B295C17F0BC, 0x00004005
+   data8 0xB17217F7D1000000, 0x00003FF8
+   data8 0xCF79ABC9E3B39804, 0x00003FD0
+ASM_SIZE_DIRECTIVE(double_sinh_arg_reduction)
+
+double_sinh_p_table:
+ASM_TYPE_DIRECTIVE(double_sinh_p_table,@object)
+   data8 0xAAAAAAAAAAAAAAAB, 0x00003FFC
+   data8 0x8888888888888412, 0x00003FF8
+   data8 0xD00D00D00D4D39F2, 0x00003FF2
+   data8 0xB8EF1D28926D8891, 0x00003FEC
+   data8 0xD732377688025BE9, 0x00003FE5
+   data8 0xB08AF9AE78C1239F, 0x00003FDE
+ASM_SIZE_DIRECTIVE(double_sinh_p_table)
+
+double_sinh_ab_table:
+ASM_TYPE_DIRECTIVE(double_sinh_ab_table,@object)
+   data8 0xAAAAAAAAAAAAAAAC, 0x00003FFC
+   data8 0x88888888884ECDD5, 0x00003FF8
+   data8 0xD00D0C6DCC26A86B, 0x00003FF2
+   data8 0x8000000000000002, 0x00003FFE
+   data8 0xAAAAAAAAAA402C77, 0x00003FFA
+   data8 0xB60B6CC96BDB144D, 0x00003FF5
+ASM_SIZE_DIRECTIVE(double_sinh_ab_table)
+
+double_sinh_j_table:
+ASM_TYPE_DIRECTIVE(double_sinh_j_table,@object)
+   data8 0xB504F333F9DE6484, 0x00003FFE, 0x1EB2FB13, 0x00000000
+   data8 0xB6FD91E328D17791, 0x00003FFE, 0x1CE2CBE2, 0x00000000
+   data8 0xB8FBAF4762FB9EE9, 0x00003FFE, 0x1DDC3CBC, 0x00000000
+   data8 0xBAFF5AB2133E45FB, 0x00003FFE, 0x1EE9AA34, 0x00000000
+   data8 0xBD08A39F580C36BF, 0x00003FFE, 0x9EAEFDC1, 0x00000000
+   data8 0xBF1799B67A731083, 0x00003FFE, 0x9DBF517B, 0x00000000
+   data8 0xC12C4CCA66709456, 0x00003FFE, 0x1EF88AFB, 0x00000000
+   data8 0xC346CCDA24976407, 0x00003FFE, 0x1E03B216, 0x00000000
+   data8 0xC5672A115506DADD, 0x00003FFE, 0x1E78AB43, 0x00000000
+   data8 0xC78D74C8ABB9B15D, 0x00003FFE, 0x9E7B1747, 0x00000000
+   data8 0xC9B9BD866E2F27A3, 0x00003FFE, 0x9EFE3C0E, 0x00000000
+   data8 0xCBEC14FEF2727C5D, 0x00003FFE, 0x9D36F837, 0x00000000
+   data8 0xCE248C151F8480E4, 0x00003FFE, 0x9DEE53E4, 0x00000000
+   data8 0xD06333DAEF2B2595, 0x00003FFE, 0x9E24AE8E, 0x00000000
+   data8 0xD2A81D91F12AE45A, 0x00003FFE, 0x1D912473, 0x00000000
+   data8 0xD4F35AABCFEDFA1F, 0x00003FFE, 0x1EB243BE, 0x00000000
+   data8 0xD744FCCAD69D6AF4, 0x00003FFE, 0x1E669A2F, 0x00000000
+   data8 0xD99D15C278AFD7B6, 0x00003FFE, 0x9BBC610A, 0x00000000
+   data8 0xDBFBB797DAF23755, 0x00003FFE, 0x1E761035, 0x00000000
+   data8 0xDE60F4825E0E9124, 0x00003FFE, 0x9E0BE175, 0x00000000
+   data8 0xE0CCDEEC2A94E111, 0x00003FFE, 0x1CCB12A1, 0x00000000
+   data8 0xE33F8972BE8A5A51, 0x00003FFE, 0x1D1BFE90, 0x00000000
+   data8 0xE5B906E77C8348A8, 0x00003FFE, 0x1DF2F47A, 0x00000000
+   data8 0xE8396A503C4BDC68, 0x00003FFE, 0x1EF22F22, 0x00000000
+   data8 0xEAC0C6E7DD24392F, 0x00003FFE, 0x9E3F4A29, 0x00000000
+   data8 0xED4F301ED9942B84, 0x00003FFE, 0x1EC01A5B, 0x00000000
+   data8 0xEFE4B99BDCDAF5CB, 0x00003FFE, 0x1E8CAC3A, 0x00000000
+   data8 0xF281773C59FFB13A, 0x00003FFE, 0x9DBB3FAB, 0x00000000
+   data8 0xF5257D152486CC2C, 0x00003FFE, 0x1EF73A19, 0x00000000
+   data8 0xF7D0DF730AD13BB9, 0x00003FFE, 0x9BB795B5, 0x00000000
+   data8 0xFA83B2DB722A033A, 0x00003FFE, 0x1EF84B76, 0x00000000
+   data8 0xFD3E0C0CF486C175, 0x00003FFE, 0x9EF5818B, 0x00000000
+   data8 0x8000000000000000, 0x00003FFF, 0x00000000, 0x00000000
+   data8 0x8164D1F3BC030773, 0x00003FFF, 0x1F77CACA, 0x00000000
+   data8 0x82CD8698AC2BA1D7, 0x00003FFF, 0x1EF8A91D, 0x00000000
+   data8 0x843A28C3ACDE4046, 0x00003FFF, 0x1E57C976, 0x00000000
+   data8 0x85AAC367CC487B15, 0x00003FFF, 0x9EE8DA92, 0x00000000
+   data8 0x871F61969E8D1010, 0x00003FFF, 0x1EE85C9F, 0x00000000
+   data8 0x88980E8092DA8527, 0x00003FFF, 0x1F3BF1AF, 0x00000000
+   data8 0x8A14D575496EFD9A, 0x00003FFF, 0x1D80CA1E, 0x00000000
+   data8 0x8B95C1E3EA8BD6E7, 0x00003FFF, 0x9D0373AF, 0x00000000
+   data8 0x8D1ADF5B7E5BA9E6, 0x00003FFF, 0x9F167097, 0x00000000
+   data8 0x8EA4398B45CD53C0, 0x00003FFF, 0x1EB70051, 0x00000000
+   data8 0x9031DC431466B1DC, 0x00003FFF, 0x1F6EB029, 0x00000000
+   data8 0x91C3D373AB11C336, 0x00003FFF, 0x1DFD6D8E, 0x00000000
+   data8 0x935A2B2F13E6E92C, 0x00003FFF, 0x9EB319B0, 0x00000000
+   data8 0x94F4EFA8FEF70961, 0x00003FFF, 0x1EBA2BEB, 0x00000000
+   data8 0x96942D3720185A00, 0x00003FFF, 0x1F11D537, 0x00000000
+   data8 0x9837F0518DB8A96F, 0x00003FFF, 0x1F0D5A46, 0x00000000
+   data8 0x99E0459320B7FA65, 0x00003FFF, 0x9E5E7BCA, 0x00000000
+   data8 0x9B8D39B9D54E5539, 0x00003FFF, 0x9F3AAFD1, 0x00000000
+   data8 0x9D3ED9A72CFFB751, 0x00003FFF, 0x9E86DACC, 0x00000000
+   data8 0x9EF5326091A111AE, 0x00003FFF, 0x9F3EDDC2, 0x00000000
+   data8 0xA0B0510FB9714FC2, 0x00003FFF, 0x1E496E3D, 0x00000000
+   data8 0xA27043030C496819, 0x00003FFF, 0x9F490BF6, 0x00000000
+   data8 0xA43515AE09E6809E, 0x00003FFF, 0x1DD1DB48, 0x00000000
+   data8 0xA5FED6A9B15138EA, 0x00003FFF, 0x1E65EBFB, 0x00000000
+   data8 0xA7CD93B4E965356A, 0x00003FFF, 0x9F427496, 0x00000000
+   data8 0xA9A15AB4EA7C0EF8, 0x00003FFF, 0x1F283C4A, 0x00000000
+   data8 0xAB7A39B5A93ED337, 0x00003FFF, 0x1F4B0047, 0x00000000
+   data8 0xAD583EEA42A14AC6, 0x00003FFF, 0x1F130152, 0x00000000
+   data8 0xAF3B78AD690A4375, 0x00003FFF, 0x9E8367C0, 0x00000000
+   data8 0xB123F581D2AC2590, 0x00003FFF, 0x9F705F90, 0x00000000
+   data8 0xB311C412A9112489, 0x00003FFF, 0x1EFB3C53, 0x00000000
+   data8 0xB504F333F9DE6484, 0x00003FFF, 0x1F32FB13, 0x00000000
+ASM_SIZE_DIRECTIVE(double_sinh_j_table)
+
+.align 32
+.global sinhl#
 
 .section .text
-GLOBAL_IEEE754_ENTRY(sinhl)
+.proc  sinhl#
+.align 32
 
-{ .mlx
-      getf.exp        r_signexp_x = f8   // Get signexp of x, must redo if unorm
-      movl            r_sig_inv_ln2 = 0xb8aa3b295c17f0bc // significand of 1/ln2
+sinhl:
+#ifdef _LIBC
+.global __ieee754_sinhl
+.type __ieee754_sinhl,@function
+__ieee754_sinhl:
+#endif
+
+// X infinity or NAN?
+// Take invalid fault if enabled
+
+
+{ .mfi
+      alloc r32 = ar.pfs,0,12,4,0                  
+(p0)     fclass.m.unc  p6,p0 = f8, 0xe3	//@qnan | @snan | @inf 
+         mov sinh_GR_all_ones = -1
+}
+;;
+
+
+{ .mfb
+         nop.m 999
+(p6)     fma.s0   f8 = f8,f1,f8               
+(p6)     br.ret.spnt     b0 ;;                          
 }
+
+// Put 0.25 in f9; p6 true if x < 0.25
+// Make constant that will generate inexact when squared
 { .mlx
-      addl            r_ad1 = @ltoff(sinh_arg_reduction), gp
-      movl            r_rshf_2to57 = 0x4778000000000000 // 1.10000 2^(63+57)
+         setf.sig sinh_FR_all_ones = sinh_GR_all_ones 
+(p0)     movl            r32 = 0x000000000000fffd ;;         
+}
+
+{ .mfi
+(p0)     setf.exp        f9 = r32                         
+(p0)     fclass.m.unc  p7,p0 = f8, 0x07	//@zero
+         nop.i 999 ;;
+}
+
+{ .mfb
+         nop.m 999
+(p0)     fmerge.s      sinh_FR_X    = f0,f8             
+(p7)     br.ret.spnt     b0 ;;                          
 }
-;;
 
+// Identify denormal operands.
 { .mfi
-      ld8             r_ad1 = [r_ad1]
-      fmerge.s        f_ABS_X    = f0,f8
-      mov             r_exp_0_25 = 0x0fffd    // Form exponent for 0.25
+         nop.m 999
+         fclass.m.unc  p10,p0 = f8, 0x09        //  + denorm
+         nop.i 999
+};;
+{ .mfi
+         nop.m 999
+         fclass.m.unc  p11,p0 = f8, 0x0a        //  - denorm
+         nop.i 999 
 }
+
 { .mfi
-      nop.m           0
-      fnorm.s1        f_NORM_X = f8      
-      mov             r_exp_2tom57 = 0xffff-57
+         nop.m 999
+(p0)     fmerge.s      sinh_FR_SGNX = f8,f1             
+         nop.i 999 ;;
 }
-;;
 
 { .mfi
-      setf.d          f_RSHF_2TO57 = r_rshf_2to57 // Form const 1.100 * 2^120
-      fclass.m        p10,p0 = f8, 0x0b           // Test for denorm
-      mov             r_exp_mask = 0x1ffff 
+         nop.m 999
+(p0)     fcmp.lt.unc.s1  p0,p7 = sinh_FR_X,f9             
+         nop.i 999 ;;
 }
-{ .mlx
-      setf.sig        f_INV_LN2_2TO63 = r_sig_inv_ln2 // Form 1/ln2 * 2^63
-      movl            r_rshf = 0x43e8000000000000 // 1.1000 2^63 for right shift
+
+{ .mib
+         nop.m 999
+         nop.i 999
+(p7)     br.cond.sptk    L(SINH_BY_TBL) ;;                      
+}
+
+
+L(SINH_BY_POLY): 
+
+// POLY cannot overflow so there is no need to call __libm_error_support
+// Set tiny_SAFE (p7) to 1(0) if answer is not tiny 
+// Currently we do not use tiny_SAFE. So the setting of tiny_SAFE is
+// commented out.
+//(p0)     movl            r32            = 0x000000000000fc01           
+//(p0)     setf.exp        f10            = r32                         
+//(p0)     fcmp.lt.unc.s1  p6,p7          = f8,f10                     
+// Here is essentially the algorithm for SINH_BY_POLY. Care is take for the order 
+// of multiplication; and P_1 is not exactly 1/3!, P_2 is not exactly 1/5!, etc.
+// Note that ax = |x|
+// sinh(x) = sign * (series(e^x) - series(e^-x))/2
+//         = sign * (ax + ax^3/3! + ax^5/5! + ax^7/7! + ax^9/9! + ax^11/11! + ax^13/13!)
+//         = sign * (ax   + ax * ( ax^2 * (1/3! + ax^4 * (1/7! + ax^4*1/11!)) )
+//                        + ax * ( ax^4 * (1/5! + ax^4 * (1/9! + ax^4*1/13!)) ) )
+//         = sign * (ax   + ax*p_odd + (ax*p_even))
+//         = sign * (ax   + Y_lo)
+// sinh(x) = sign * (Y_hi + Y_lo)
+// Get the values of P_x from the table
+{ .mfb
+(p0)  addl           r34   = @ltoff(double_sinh_p_table), gp
+(p10) fma.s0       f8 =  f8,f8,f8
+(p10) br.ret.spnt    b0
+}
+;;
+
+{ .mfb
+      ld8 r34 = [r34]
+(p11) fnma.s0      f8 =  f8,f8,f8
+(p11) br.ret.spnt    b0
 }
 ;;
 
+// Calculate sinh_FR_X2 = ax*ax and sinh_FR_X4 = ax*ax*ax*ax
+{ .mmf
+         nop.m 999
+(p0)     ldfe            sinh_FR_P1 = [r34],16                 
+(p0)     fma.s1        sinh_FR_X2 = sinh_FR_X, sinh_FR_X, f0 ;;           
+}
+
+{ .mmi
+(p0)     ldfe            sinh_FR_P2 = [r34],16 ;;                 
+(p0)     ldfe            sinh_FR_P3 = [r34],16                 
+         nop.i 999 ;;
+}
+
+{ .mmi
+(p0)     ldfe            sinh_FR_P4 = [r34],16 ;;                 
+(p0)     ldfe            sinh_FR_P5 = [r34],16                 
+         nop.i 999 ;;
+}
+
 { .mfi
-      nop.m           0
-      fclass.m        p7,p0 = f8, 0x07  // Test if x=0
-      nop.i           0
+(p0)     ldfe            sinh_FR_P6 = [r34],16                 
+(p0)     fma.s1        sinh_FR_X4 = sinh_FR_X2, sinh_FR_X2, f0         
+         nop.i 999 ;;
 }
+
+// Calculate sinh_FR_podd = p_odd and sinh_FR_peven = p_even 
 { .mfi
-      setf.exp        f_2TOM57 = r_exp_2tom57 // Form 2^-57 for scaling
-      nop.f           0
-      add             r_ad3 = 0x90, r_ad1  // Point to ab_table
+         nop.m 999
+(p0)     fma.s1      sinh_FR_poly_podd_temp1 = sinh_FR_X4, sinh_FR_P5, sinh_FR_P3                
+         nop.i 999 ;;
 }
-;;
 
 { .mfi
-      setf.d          f_RSHF = r_rshf     // Form right shift const 1.100 * 2^63
-      fclass.m        p6,p0 = f8, 0xe3     // Test if x nan, inf
-      add             r_ad4 = 0x2f0, r_ad1 // Point to j_hi_table midpoint
+         nop.m 999
+(p0)     fma.s1      sinh_FR_poly_podd_temp2 = sinh_FR_X4, sinh_FR_poly_podd_temp1, sinh_FR_P1   
+         nop.i 999
 }
-{ .mib
-      add             r_ad2e = 0x20, r_ad1 // Point to p_table
-      nop.i           0
-(p10) br.cond.spnt    SINH_DENORM          // Branch if x denorm
+
+{ .mfi
+         nop.m 999
+(p0)     fma.s1      sinh_FR_poly_peven_temp1 = sinh_FR_X4, sinh_FR_P6, sinh_FR_P4               
+         nop.i 999 ;;
 }
-;;
 
-// Common path -- return here from SINH_DENORM if x is unnorm
-SINH_COMMON:
 { .mfi
-      ldfe            f_smlst_oflow_input = [r_ad2e],16
-      nop.f           0
-      add             r_ad5 = 0x580, r_ad1 // Point to j_lo_table midpoint
+         nop.m 999
+(p0)     fma.s1      sinh_FR_podd       = sinh_FR_X2, sinh_FR_poly_podd_temp2, f0           
+         nop.i 999
 }
-{ .mib
-      ldfe            f_log2by64_hi  = [r_ad1],16       
-      and             r_exp_x = r_exp_mask, r_signexp_x
-(p7)  br.ret.spnt     b0                  // Exit if x=0
+
+{ .mfi
+         nop.m 999
+(p0)     fma.s1      sinh_FR_poly_peven_temp2 = sinh_FR_X4, sinh_FR_poly_peven_temp1, sinh_FR_P2 
+         nop.i 999 ;;
 }
-;;
 
-// Get the A coefficients for SINH_BY_TBL
 { .mfi
-      ldfe            f_A1 = [r_ad3],16            
-      fcmp.lt.s1      p8,p9 = f8,f0           // Test for x<0
-      cmp.lt          p7,p0 = r_exp_x, r_exp_0_25  // Test x < 0.25
+         nop.m 999
+(p0)     fma.s1      sinh_FR_peven       = sinh_FR_X4, sinh_FR_poly_peven_temp2, f0         
+         nop.i 999 ;;
 }
-{ .mfb
-      add             r_ad2o = 0x30, r_ad2e  // Point to p_table odd coeffs
-(p6)  fma.s0          f8 = f8,f1,f0          // Result for x nan, inf          
-(p6)  br.ret.spnt     b0                     // Exit for x nan, inf
+
+// Calculate sinh_FR_Y_lo = ax*p_odd + (ax*p_even)
+{ .mfi
+         nop.m 999
+(p0)     fma.s1      sinh_FR_Y_lo_temp    = sinh_FR_X, sinh_FR_peven, f0                    
+         nop.i 999 ;;
 }
-;;
 
-// Calculate X2 = ax*ax for SINH_BY_POLY
 { .mfi
-      ldfe            f_log2by64_lo  = [r_ad1],16       
-      nop.f           0
-      nop.i           0
+         nop.m 999
+(p0)     fma.s1      sinh_FR_Y_lo         = sinh_FR_X, sinh_FR_podd,  sinh_FR_Y_lo_temp          
+         nop.i 999 ;;
 }
+
+// Calculate sinh_FR_SINH = Y_hi + Y_lo. Note that ax = Y_hi
+{ .mfi
+         nop.m 999
+(p0)     fma.s1      sinh_FR_SINH        = sinh_FR_X, f1, sinh_FR_Y_lo                      
+         nop.i 999 ;;
+}
+// Dummy multiply to generate inexact
+{ .mfi
+         nop.m 999
+(p0)     fmpy.s0      sinh_FR_tmp = sinh_FR_all_ones, sinh_FR_all_ones
+         nop.i 999
+}
+
+// Calculate f8 = sign * (Y_hi + Y_lo)
+// Go to return
 { .mfb
-      ldfe            f_A2 = [r_ad3],16            
-      fma.s1          f_X2 = f_NORM_X, f_NORM_X, f0
-(p7)  br.cond.spnt    SINH_BY_POLY
+         nop.m 999
+(p0)     fma.s0        f8 = sinh_FR_SGNX,sinh_FR_SINH,f0                       
+(p0)     br.ret.sptk     b0 ;;                          
 }
-;;
 
-// Here if |x| >= 0.25
-SINH_BY_TBL: 
+
+L(SINH_BY_TBL): 
+
+// Now that we are at TBL; so far all we know is that |x| >= 0.25.
+// The first two steps are the same for TBL and EXP, but if we are HUGE
+// we want to leave now. 
+// Double-extended:
+// Go to HUGE if |x| >= 2^14, 1000d (register-biased) is e = 14 (true)
+// Double
+// Go to HUGE if |x| >= 2^10, 10009 (register-biased) is e = 10 (true)
+// Single
+// Go to HUGE if |x| >= 2^7,  10006 (register-biased) is e =  7 (true)
+
+{ .mlx
+         nop.m 999
+(p0)     movl            r32 = 0x000000000001000d ;;         
+}
+
+{ .mfi
+(p0)     setf.exp        f9 = r32                         
+         nop.f 999
+         nop.i 999 ;;
+}
+
+{ .mfi
+         nop.m 999
+(p0)     fcmp.ge.unc.s1  p6,p7 = sinh_FR_X,f9             
+         nop.i 999 ;;
+}
+
+{ .mib
+         nop.m 999
+         nop.i 999
+(p6)     br.cond.spnt    L(SINH_HUGE) ;;                        
+}
+
+// r32 = 1
+// r34 = N-1 
+// r35 = N
+// r36 = j
+// r37 = N+1
+
+// TBL can never overflow
+// sinh(x) = sinh(B+R)
+//         = sinh(B)cosh(R) + cosh(B)sinh(R)
+// 
+// ax = |x| = M*log2/64 + R
+// B = M*log2/64
+// M = 64*N + j 
+//   We will calcualte M and get N as (M-j)/64
+//   The division is a shift.
+// exp(B)  = exp(N*log2 + j*log2/64)
+//         = 2^N * 2^(j*log2/64)
+// sinh(B) = 1/2(e^B -e^-B)
+//         = 1/2(2^N * 2^(j*log2/64) - 2^-N * 2^(-j*log2/64)) 
+// sinh(B) = (2^(N-1) * 2^(j*log2/64) - 2^(-N-1) * 2^(-j*log2/64)) 
+// cosh(B) = (2^(N-1) * 2^(j*log2/64) + 2^(-N-1) * 2^(-j*log2/64)) 
+// 2^(j*log2/64) is stored as Tjhi + Tjlo , j= -32,....,32
+// Tjhi is double-extended (80-bit) and Tjlo is single(32-bit)
+// R = ax - M*log2/64
+// R = ax - M*log2_by_64_hi - M*log2_by_64_lo
+// exp(R) = 1 + R +R^2(1/2! + R(1/3! + R(1/4! + ... + R(1/n!)...)
+//        = 1 + p_odd + p_even
+//        where the p_even uses the A coefficients and the p_even uses the B coefficients
+// So sinh(R) = 1 + p_odd + p_even -(1 -p_odd -p_even)/2 = p_odd
+//    cosh(R) = 1 + p_even
+//    sinh(B) = S_hi + S_lo
+//    cosh(B) = C_hi
+// sinh(x) = sinh(B)cosh(R) + cosh(B)sinh(R)
 // ******************************************************
-// STEP 1 (TBL and EXP) - Argument reduction
+// STEP 1 (TBL and EXP)
 // ******************************************************
 // Get the following constants. 
-// Inv_log2by64
-// log2by64_hi
-// log2by64_lo
+// f9  = Inv_log2by64
+// f10 = log2by64_hi
+// f11 = log2by64_lo
+
+{ .mmi
+(p0)  adds                 r32 = 0x1,r0      
+(p0)  addl           r34   = @ltoff(double_sinh_arg_reduction), gp
+      nop.i 999
+}
+;;
+
+{ .mmi
+      ld8 r34 = [r34]
+      nop.m 999
+      nop.i 999
+}
+;;
 
 
 // We want 2^(N-1) and 2^(-N-1). So bias N-1 and -N-1 and
 // put them in an exponent.
-// f_spos = 2^(N-1) and f_sneg = 2^(-N-1)
-// 0xffff + (N-1)  = 0xffff +N -1
-// 0xffff - (N +1) = 0xffff -N -1
+// sinh_FR_spos = 2^(N-1) and sinh_FR_sneg = 2^(-N-1)
+// r39 = 0xffff + (N-1)  = 0xffff +N -1
+// r40 = 0xffff - (N +1) = 0xffff -N -1
 
+{ .mlx
+         nop.m 999
+(p0)     movl                r38 = 0x000000000000fffe ;; 
+}
 
-// Calculate M and keep it as integer and floating point.
-// M = round-to-integer(x*Inv_log2by64)
-// f_M = M = truncate(ax/(log2/64))
-// Put the integer representation of M in r_M
-//    and the floating point representation of M in f_M
+{ .mmi
+(p0)     ldfe            sinh_FR_Inv_log2by64 = [r34],16 ;;       
+(p0)     ldfe            sinh_FR_log2by64_hi  = [r34],16       
+         nop.i 999 ;;
+}
+
+{ .mbb
+(p0)     ldfe            sinh_FR_log2by64_lo  = [r34],16       
+         nop.b 999
+         nop.b 999 ;;
+}
+
+// Get the A coefficients
+// f9  = A_1
+// f10 = A_2
+// f11 = A_3
 
-// Get the remaining A,B coefficients
 { .mmi
-      ldfe            f_A3 = [r_ad3],16
-      nop.m           0
-      nop.i           0
+      nop.m 999
+(p0)  addl           r34   = @ltoff(double_sinh_ab_table), gp
+      nop.i 999
 }
 ;;
 
-.pred.rel "mutex",p8,p9
-// Use constant (1.100*2^(63-6)) to get rounded M into rightmost significand
-// |x| * 64 * 1/ln2 * 2^(63-6) + 1.1000 * 2^(63+(63-6))
-{ .mfi
-(p8)  mov             r_signexp_sgnx_0_5 = 0x2fffe // signexp of -0.5
-      fma.s1          f_M_temp = f_ABS_X, f_INV_LN2_2TO63, f_RSHF_2TO57
-(p9)  mov             r_signexp_sgnx_0_5 = 0x0fffe // signexp of +0.5
+{ .mmi
+      ld8 r34 = [r34]
+      nop.m 999
+      nop.i 999
 }
 ;;
 
-// Test for |x| >= overflow limit
+
+// Calculate M and keep it as integer and floating point.
+// f38 = M = round-to-integer(x*Inv_log2by64)
+// sinh_FR_M = M = truncate(ax/(log2/64))
+// Put the significand of M in r35
+//    and the floating point representation of M in sinh_FR_M
+
 { .mfi
-      ldfe            f_B1 = [r_ad3],16
-      fcmp.ge.s1      p6,p0 = f_ABS_X, f_smlst_oflow_input
-      nop.i           0
+         nop.m 999
+(p0)     fma.s1          sinh_FR_M      = sinh_FR_X, sinh_FR_Inv_log2by64, f0 
+         nop.i 999
 }
-;;
 
 { .mfi
-      ldfe            f_B2 = [r_ad3],16
-      nop.f           0
-      mov             r_exp_32 = 0x10004
+(p0)     ldfe            sinh_FR_A1 = [r34],16            
+         nop.f 999
+         nop.i 999 ;;
 }
-;;
 
-// Subtract RSHF constant to get rounded M as a floating point value
-// M_temp * 2^(63-6) - 2^63
-{ .mfb
-      ldfe            f_B3 = [r_ad3],16            
-      fms.s1          f_M = f_M_temp, f_2TOM57, f_RSHF
-(p6)  br.cond.spnt    SINH_HUGE  // Branch if result will overflow
+{ .mfi
+         nop.m 999
+(p0)     fcvt.fx.s1      sinh_FR_M_temp = sinh_FR_M                      
+         nop.i 999 ;;
 }
-;;
 
 { .mfi
-      getf.sig        r_M = f_M_temp                 
-      nop.f           0
-      cmp.ge          p7,p6 = r_exp_x, r_exp_32 // Test if x >= 32
+         nop.m 999
+(p0)     fnorm.s1        sinh_FR_M      = sinh_FR_M_temp                 
+         nop.i 999 ;;
+}
+
+{ .mfi
+(p0)     getf.sig        r35       = sinh_FR_M_temp                 
+         nop.f 999
+         nop.i 999 ;;
 }
-;;
 
-// Calculate j. j is the signed extension of the six lsb of M. It 
+// M is still in r35. Calculate j. j is the signed extension of the six lsb of M. It 
 // has a range of -32 thru 31.
+// r35 = M
+// r36 = j 
+
+{ .mii
+         nop.m 999
+         nop.i 999 ;;
+(p0)     and            r36 = 0x3f, r35 ;;   
+}
 
 // Calculate R
-// ax - M*log2by64_hi
-// R = (ax - M*log2by64_hi) - M*log2by64_lo
+// f13 = f44 - f12*f10 = ax - M*log2by64_hi
+// f14 = f13 - f8*f11  = R = (ax - M*log2by64_hi) - M*log2by64_lo
 
 { .mfi
-      nop.m           0
-      fnma.s1         f_R_temp = f_M, f_log2by64_hi, f_ABS_X
-      and             r_j = 0x3f, r_M
+         nop.m 999
+(p0)     fnma.s1           sinh_FR_R_temp = sinh_FR_M, sinh_FR_log2by64_hi, sinh_FR_X      
+         nop.i 999
 }
-;;
 
-{ .mii
-      nop.m           0
-      shl             r_jshf = r_j, 0x2 // Shift j so can sign extend it
-;;
-      sxt1            r_jshf = r_jshf
+{ .mfi
+(p0)     ldfe            sinh_FR_A2 = [r34],16            
+         nop.f 999
+         nop.i 999 ;;
 }
-;;
 
-{ .mii
-      nop.m           0
-      shr             r_j = r_jshf, 0x2    // Now j has range -32 to 31
-      nop.i           0
+{ .mfi
+         nop.m 999
+(p0)     fnma.s1           sinh_FR_R      = sinh_FR_M, sinh_FR_log2by64_lo, sinh_FR_R_temp 
+         nop.i 999
 }
-;;
+
+// Get the B coefficients
+// f15 = B_1
+// f32 = B_2
+// f33 = B_3
 
 { .mmi
-      shladd          r_ad_J_hi = r_j, 4, r_ad4 // pointer to Tjhi
-      sub             r_Mmj = r_M, r_j          // M-j
-      sub             r_mj = r0, r_j            // Form -j
+(p0)     ldfe            sinh_FR_A3 = [r34],16 ;;            
+(p0)     ldfe            sinh_FR_B1 = [r34],16            
+         nop.i 999 ;;
 }
-;;
 
-// The TBL and EXP branches are merged and predicated
-// If TBL, p6 true, 0.25 <= |x| < 32
-// If EXP, p7 true, 32 <= |x| < overflow_limit
-//
-// N = (M-j)/64
-{ .mfi
-      ldfe            f_Tjhi = [r_ad_J_hi]
-      fnma.s1         f_R = f_M, f_log2by64_lo, f_R_temp 
-      shr             r_N = r_Mmj, 0x6            // N = (M-j)/64 
+{ .mmi
+(p0)     ldfe            sinh_FR_B2 = [r34],16 ;;            
+(p0)     ldfe            sinh_FR_B3 = [r34],16            
+         nop.i 999 ;;
 }
-{ .mfi
-      shladd          r_ad_mJ_hi = r_mj, 4, r_ad4 // pointer to Tmjhi
-      nop.f           0
-      shladd          r_ad_mJ_lo = r_mj, 2, r_ad5 // pointer to Tmjlo
+
+{ .mii
+         nop.m 999
+(p0)     shl            r34 = r36,  0x2 ;;   
+(p0)     sxt1           r37 = r34 ;;         
 }
-;;
+
+// ******************************************************
+// STEP 2 (TBL and EXP)
+// ******************************************************
+// Calculate Rsquared and Rcubed in preparation for p_even and p_odd
+// f12 = R*R*R
+// f13 = R*R
+// f14 = R <== from above
 
 { .mfi
-      sub             r_2mNm1 = r_signexp_sgnx_0_5, r_N // signexp sgnx*2^(-N-1)
-      nop.f           0
-      shladd          r_ad_J_lo = r_j, 2, r_ad5   // pointer to Tjlo
+         nop.m 999
+(p0)     fma.s1             sinh_FR_Rsq  = sinh_FR_R,   sinh_FR_R, f0  
+(p0)     shr            r36 = r37,  0x2 ;;   
 }
-{ .mfi
-      ldfe            f_Tmjhi = [r_ad_mJ_hi]
-      nop.f           0
-      add             r_2Nm1 = r_signexp_sgnx_0_5, r_N // signexp sgnx*2^(N-1)
+
+// r34 = M-j = r35 - r36
+// r35 = N = (M-j)/64
+
+{ .mii
+(p0)     sub                  r34 = r35, r36    
+         nop.i 999 ;;
+(p0)     shr                  r35 = r34, 0x6 ;;    
 }
-;;
 
-{ .mmf
-      ldfs            f_Tmjlo = [r_ad_mJ_lo]
-      setf.exp        f_sneg = r_2mNm1            // Form sgnx * 2^(-N-1)
-      nop.f           0
+{ .mii
+(p0)     sub                 r40 = r38, r35           
+(p0)     adds                 r37 = 0x1, r35    
+(p0)     add                 r39 = r38, r35 ;;           
+}
+
+// Get the address of the J table, add the offset, 
+// addresses are sinh_AD_mJ and sinh_AD_J, get the T value
+// f32 = T(j)_hi
+// f33 = T(j)_lo
+// f34 = T(-j)_hi
+// f35 = T(-j)_lo
+
+{ .mmi
+(p0)  sub                  r34 = r35, r32    
+(p0)  addl           r37   = @ltoff(double_sinh_j_table), gp
+      nop.i 999
 }
 ;;
 
-{ .mmf
-      ldfs            f_Tjlo  = [r_ad_J_lo]
-      setf.exp        f_spos = r_2Nm1             // Form sgnx * 2^(N-1)
-      nop.f           0
+{ .mmi
+      ld8 r37 = [r37]
+      nop.m 999
+      nop.i 999
 }
 ;;
 
+
+{ .mfi
+         nop.m 999
+(p0)     fma.s1             sinh_FR_Rcub = sinh_FR_Rsq, sinh_FR_R, f0  
+         nop.i 999
+}
+
 // ******************************************************
-// STEP 2 (TBL and EXP)
+// STEP 3 Now decide if we need to branch to EXP
 // ******************************************************
-// Calculate Rsquared and Rcubed in preparation for p_even and p_odd
+// Put 32 in f9; p6 true if x < 32
+// Go to EXP if |x| >= 32 
 
-{ .mmf
-      nop.m           0
-      nop.m           0
-      fma.s1          f_Rsq  = f_R, f_R, f0
+{ .mlx
+         nop.m 999
+(p0)     movl                r32 = 0x0000000000010004 ;;               
 }
-;;
-
 
 // Calculate p_even
-// B_2 + Rsq *B_3
-// B_1 + Rsq * (B_2 + Rsq *B_3)
-// p_even = Rsq * (B_1 + Rsq * (B_2 + Rsq *B_3))
+// f34 = B_2 + Rsq *B_3
+// f35 = B_1 + Rsq*f34      = B_1 + Rsq * (B_2 + Rsq *B_3)
+// f36 = p_even = Rsq * f35 = Rsq * (B_1 + Rsq * (B_2 + Rsq *B_3))
+
 { .mfi
-      nop.m           0
-      fma.s1          f_peven_temp1 = f_Rsq, f_B3, f_B2
-      nop.i           0
+         nop.m 999
+(p0)     fma.s1          sinh_FR_peven_temp1 = sinh_FR_Rsq, sinh_FR_B3,          sinh_FR_B2  
+         nop.i 999 ;;
 }
-// Calculate p_odd
-// A_2 + Rsq *A_3
-// A_1 + Rsq * (A_2 + Rsq *A_3)
-// podd = R + Rcub * (A_1 + Rsq * (A_2 + Rsq *A_3))
+
 { .mfi
-      nop.m           0
-      fma.s1          f_podd_temp1 = f_Rsq, f_A3, f_A2
-      nop.i           0
+         nop.m 999
+(p0)     fma.s1          sinh_FR_peven_temp2 = sinh_FR_Rsq, sinh_FR_peven_temp1, sinh_FR_B1  
+         nop.i 999
 }
-;;
+
+// Calculate p_odd
+// f34 = A_2 + Rsq *A_3
+// f35 = A_1 + Rsq * (A_2 + Rsq *A_3)
+// f37 = podd = R + Rcub * (A_1 + Rsq * (A_2 + Rsq *A_3))
 
 { .mfi
-      nop.m           0
-      fma.s1          f_Rcub = f_Rsq, f_R, f0
-      nop.i           0
+         nop.m 999
+(p0)     fma.s1          sinh_FR_podd_temp1 = sinh_FR_Rsq,        sinh_FR_A3,         sinh_FR_A2  
+         nop.i 999 ;;
 }
-;;
 
-// 
-// If TBL, 
-// Calculate S_hi and S_lo, and C_hi
-// SC_hi_temp = sneg * Tmjhi
-// S_hi = spos * Tjhi - SC_hi_temp
-// S_hi = spos * Tjhi - (sneg * Tmjhi)
-// C_hi = spos * Tjhi + SC_hi_temp
-// C_hi = spos * Tjhi + (sneg * Tmjhi)
+{ .mfi
+(p0)     setf.exp            sinh_FR_N_temp1 = r39            
+         nop.f 999
+         nop.i 999 ;;
+}
 
 { .mfi
-      nop.m           0
-(p6)  fma.s1          f_SC_hi_temp = f_sneg, f_Tmjhi, f0   
-      nop.i           0
+         nop.m 999
+(p0)     fma.s1          sinh_FR_peven       = sinh_FR_Rsq, sinh_FR_peven_temp2, f0     
+         nop.i 999
 }
-;;
 
-// If TBL, 
-// S_lo_temp3 = sneg * Tmjlo
-// S_lo_temp4 = spos * Tjlo - S_lo_temp3
-// S_lo_temp4 = spos * Tjlo -(sneg * Tmjlo)
 { .mfi
-      nop.m           0
-(p6)  fma.s1          f_S_lo_temp3 =  f_sneg, f_Tmjlo, f0
-      nop.i           0
+         nop.m 999
+(p0)     fma.s1          sinh_FR_podd_temp2 = sinh_FR_Rsq,        sinh_FR_podd_temp1, sinh_FR_A1  
+         nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m           0
-      fma.s1          f_peven_temp2 = f_Rsq, f_peven_temp1, f_B1
-      nop.i           0
+(p0)     setf.exp            f9  = r32                              
+         nop.f 999
+         nop.i 999 ;;
 }
+
 { .mfi
-      nop.m           0
-      fma.s1          f_podd_temp2 = f_Rsq, f_podd_temp1, f_A1
-      nop.i           0
+         nop.m 999
+(p0)     fma.s1          sinh_FR_podd       = sinh_FR_podd_temp2, sinh_FR_Rcub,       sinh_FR_R   
+         nop.i 999
+}
+
+// sinh_GR_mj contains the table offset for -j
+// sinh_GR_j  contains the table offset for +j
+// p6 is true when j <= 0
+
+{ .mlx
+(p0)     setf.exp            sinh_FR_N_temp2 = r40            
+(p0)     movl                r40 = 0x0000000000000020 ;;    
 }
-;;
 
-// If EXP, 
-// Compute sgnx * 2^(N-1) * Tjhi and sgnx * 2^(N-1) * Tjlo
 { .mfi
-      nop.m           0
-(p7)  fma.s1          f_Tjhi_spos = f_Tjhi, f_spos, f0
-      nop.i           0
+(p0)     sub                 sinh_GR_mJ = r40,  r36           
+(p0)     fmerge.se           sinh_FR_spos    = sinh_FR_N_temp1, f1 
+(p0)     adds                sinh_GR_J  = 0x20, r36 ;;           
 }
+
+{ .mii
+         nop.m 999
+(p0)     shl                  sinh_GR_mJ = sinh_GR_mJ, 5 ;;   
+(p0)     add                  sinh_AD_mJ = r37, sinh_GR_mJ ;; 
+}
+
+{ .mmi
+         nop.m 999
+(p0)     ldfe                 sinh_FR_Tmjhi = [sinh_AD_mJ],16                 
+(p0)     shl                  sinh_GR_J  = sinh_GR_J, 5 ;;    
+}
+
 { .mfi
-      nop.m           0
-(p7)  fma.s1          f_Tjlo_spos = f_Tjlo, f_spos, f0
-      nop.i           0
+(p0)     ldfs                 sinh_FR_Tmjlo = [sinh_AD_mJ],16                 
+(p0)     fcmp.lt.unc.s1      p0,p7 = sinh_FR_X,f9                          
+(p0)     add                  sinh_AD_J  = r37, sinh_GR_J ;;  
+}
+
+{ .mmi
+(p0)     ldfe                 sinh_FR_Tjhi  = [sinh_AD_J],16 ;;                  
+(p0)     ldfs                 sinh_FR_Tjlo  = [sinh_AD_J],16                  
+         nop.i 999 ;;
+}
+
+{ .mfb
+         nop.m 999
+(p0)     fmerge.se           sinh_FR_sneg    = sinh_FR_N_temp2, f1 
+(p7)     br.cond.spnt        L(SINH_BY_EXP) ;;                            
 }
-;;
 
 { .mfi
-      nop.m           0
-(p6)  fms.s1          f_S_hi = f_spos, f_Tjhi, f_SC_hi_temp
-      nop.i           0
+         nop.m 999
+         nop.f 999
+         nop.i 999 ;;
 }
-;;
+
+// ******************************************************
+// If NOT branch to EXP
+// ******************************************************
+// Calculate S_hi and S_lo
+// sinh_FR_S_hi_temp = sinh_FR_sneg * sinh_FR_Tmjhi
+// sinh_FR_S_hi = sinh_FR_spos * sinh_FR_Tjhi - sinh_FR_S_hi_temp
+// sinh_FR_S_hi = sinh_FR_spos * sinh_FR_Tjhi - (sinh_FR_sneg * sinh_FR_Tmjlo)
 
 { .mfi
-      nop.m           0
-(p6)  fma.s1          f_C_hi = f_spos, f_Tjhi, f_SC_hi_temp
-      nop.i           0
+         nop.m 999
+(p0)    fma.s1         sinh_FR_S_hi_temp = sinh_FR_sneg, sinh_FR_Tmjhi, f0   
+         nop.i 999 ;;
 }
+
 { .mfi
-      nop.m           0
-(p6)  fms.s1          f_S_lo_temp4 = f_spos, f_Tjlo, f_S_lo_temp3
-      nop.i           0
+         nop.m 999
+(p0)    fms.s1         sinh_FR_S_hi = sinh_FR_spos, sinh_FR_Tjhi,  sinh_FR_S_hi_temp              
+         nop.i 999
 }
-;;
+
+// Calculate C_hi
+// sinh_FR_C_hi_temp1 = sinh_FR_sneg * sinh_FR_Tmjhi
+// sinh_FR_C_hi = sinh_FR_spos * sinh_FR_Tjhi + sinh_FR_C_hi_temp1
 
 { .mfi
-      nop.m           0
-      fma.s1          f_peven = f_Rsq, f_peven_temp2, f0
-      nop.i           0
+         nop.m 999
+(p0)    fma.s1         sinh_FR_C_hi_temp1 = sinh_FR_sneg, sinh_FR_Tmjhi, f0                   
+         nop.i 999 ;;
 }
+
+// sinh_FR_S_lo_temp1 =  sinh_FR_spos * sinh_FR_Tjhi - sinh_FR_S_hi
+// sinh_FR_S_lo_temp2 = -sinh_FR_sneg * sinh_FR_Tmjlo + (sinh_FR_spos * sinh_FR_Tjhi - sinh_FR_S_hi)
+// sinh_FR_S_lo_temp2 = -sinh_FR_sneg * sinh_FR_Tmjlo + (sinh_FR_S_lo_temp1              )
+
 { .mfi
-      nop.m           0
-      fma.s1          f_podd = f_podd_temp2, f_Rcub, f_R
-      nop.i           0
+         nop.m 999
+(p0)    fms.s1         sinh_FR_S_lo_temp1 =  sinh_FR_spos, sinh_FR_Tjhi,  sinh_FR_S_hi            
+         nop.i 999
 }
-;;
 
-// If TBL,
-// S_lo_temp1 =  spos * Tjhi - S_hi
-// S_lo_temp2 = -sneg * Tmjlo + S_lo_temp1
-// S_lo_temp2 = -sneg * Tmjlo + (spos * Tjhi - S_hi)
+{ .mfi
+         nop.m 999
+(p0)    fma.s1         sinh_FR_C_hi       = sinh_FR_spos, sinh_FR_Tjhi, sinh_FR_C_hi_temp1    
+         nop.i 999 ;;
+}
 
 { .mfi
-      nop.m           0
-(p6)  fms.s1          f_S_lo_temp1 =  f_spos, f_Tjhi,  f_S_hi
-      nop.i           0
+         nop.m 999
+(p0)    fnma.s1        sinh_FR_S_lo_temp2 = sinh_FR_sneg, sinh_FR_Tmjhi, sinh_FR_S_lo_temp1       
+         nop.i 999
 }
-;;
+
+// sinh_FR_S_lo_temp1 = sinh_FR_sneg * sinh_FR_Tmjlo
+// sinh_FR_S_lo_temp3 = sinh_FR_spos * sinh_FR_Tjlo - sinh_FR_S_lo_temp1
+// sinh_FR_S_lo_temp3 = sinh_FR_spos * sinh_FR_Tjlo -(sinh_FR_sneg * sinh_FR_Tmjlo)
+// sinh_FR_S_lo = sinh_FR_S_lo_temp3 + sinh_FR_S_lo_temp2
 
 { .mfi
-      nop.m           0
-(p6)  fnma.s1         f_S_lo_temp2 = f_sneg, f_Tmjhi, f_S_lo_temp1       
-      nop.i           0
+         nop.m 999
+(p0)    fma.s1         sinh_FR_S_lo_temp1 =  sinh_FR_sneg, sinh_FR_Tmjlo, f0                  
+         nop.i 999 ;;
 }
-;;
 
-// If EXP,
-// Y_hi = sgnx * 2^(N-1) * Tjhi
-// Y_lo = sgnx * 2^(N-1) * Tjhi * (p_odd + p_even) + sgnx * 2^(N-1) * Tjlo
+/////////// BUG FIX fma to fms -TK
 { .mfi
-      nop.m           0
-(p7)  fma.s1          f_Y_lo_temp =  f_peven, f1, f_podd
-      nop.i           0
+         nop.m 999
+(p0)    fms.s1         sinh_FR_S_lo_temp3 =  sinh_FR_spos, sinh_FR_Tjlo,  sinh_FR_S_lo_temp1  
+         nop.i 999 ;;
 }
-;;
 
-// If TBL,
-// S_lo = S_lo_temp4 + S_lo_temp2
 { .mfi
-      nop.m           0
-(p6)  fma.s1          f_S_lo = f_S_lo_temp4, f1, f_S_lo_temp2
-      nop.i           0
+         nop.m 999
+(p0)    fma.s1         sinh_FR_S_lo       =  sinh_FR_S_lo_temp3, f1,   sinh_FR_S_lo_temp2     
+         nop.i 999 ;;
 }
-;;
 
-// If TBL,
 // Y_hi = S_hi 
 // Y_lo = C_hi*p_odd + (S_hi*p_even + S_lo)
+// sinh_FR_Y_lo_temp = sinh_FR_S_hi * sinh_FR_peven + sinh_FR_S_lo
+// sinh_FR_Y_lo      = sinh_FR_C_hi * sinh_FR_podd + sinh_FR_Y_lo_temp
+
 { .mfi
-      nop.m           0
-(p6)  fma.s1          f_Y_lo_temp = f_S_hi, f_peven, f_S_lo
-      nop.i           0
+         nop.m 999
+(p0)    fma.s1         sinh_FR_Y_lo_temp  = sinh_FR_S_hi, sinh_FR_peven, sinh_FR_S_lo           
+         nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m           0
-(p7)  fma.s1          f_Y_lo = f_Tjhi_spos, f_Y_lo_temp, f_Tjlo_spos
-      nop.i           0
+         nop.m 999
+(p0)    fma.s1         sinh_FR_Y_lo       =  sinh_FR_C_hi, sinh_FR_podd, sinh_FR_Y_lo_temp      
+         nop.i 999 ;;
 }
-;;
+
+// sinh_FR_SINH = Y_hi + Y_lo
+// f8 = answer = sinh_FR_SGNX * sinh_FR_SINH
 
 // Dummy multiply to generate inexact
 { .mfi
-      nop.m           0
-      fmpy.s0         f_tmp = f_B2, f_B2
-      nop.i           0
-}
-{ .mfi
-      nop.m           0
-(p6)  fma.s1          f_Y_lo = f_C_hi, f_podd, f_Y_lo_temp
-      nop.i           0
+         nop.m 999
+(p0)     fmpy.s0      sinh_FR_tmp = sinh_FR_all_ones, sinh_FR_all_ones
+         nop.i 999
 }
-;;
-
-// f8 = answer = Y_hi + Y_lo
 { .mfi
-      nop.m           0
-(p7)  fma.s0          f8 = f_Y_lo,  f1, f_Tjhi_spos
-      nop.i           0
+         nop.m 999
+(p0)    fma.s1         sinh_FR_SINH       =  sinh_FR_S_hi, f1, sinh_FR_Y_lo    
+         nop.i 999 ;;
 }
-;;
 
-// f8 = answer = Y_hi + Y_lo
 { .mfb
-      nop.m           0
-(p6)  fma.s0          f8 = f_Y_lo, f1, f_S_hi
-      br.ret.sptk     b0      // Exit for SINH_BY_TBL and SINH_BY_EXP
+         nop.m 999
+(p0)    fma.s0       f8 = sinh_FR_SGNX, sinh_FR_SINH,f0                      
+(p0)    br.ret.sptk     b0 ;;                          
 }
-;;
 
 
-// Here if 0 < |x| < 0.25
-SINH_BY_POLY: 
-{ .mmf
-      ldfe            f_P6 = [r_ad2e],16
-      ldfe            f_P5 = [r_ad2o],16
-      nop.f           0
-}
-;;
+L(SINH_BY_EXP): 
 
-{ .mmi
-      ldfe            f_P4 = [r_ad2e],16
-      ldfe            f_P3 = [r_ad2o],16
-      nop.i           0
+// When p7 is true,  we know that an overflow is not going to happen
+// When p7 is false, we must check for possible overflow
+// p7 is the over_SAFE flag
+// Y_hi = Tjhi
+// Y_lo = Tjhi * (p_odd + p_even) +Tjlo
+// Scale = sign * 2^(N-1)
+// sinh_FR_Y_lo =  sinh_FR_Tjhi * (sinh_FR_peven + sinh_FR_podd)
+// sinh_FR_Y_lo =  sinh_FR_Tjhi * (sinh_FR_Y_lo_temp      )
+
+{ .mfi
+         nop.m 999
+(p0)   fma.s1            sinh_FR_Y_lo_temp =  sinh_FR_peven, f1, sinh_FR_podd                   
+         nop.i 999
 }
-;;
 
-{ .mmi
-      ldfe            f_P2 = [r_ad2e],16
-      ldfe            f_P1 = [r_ad2o],16                 
-      nop.i           0
+// Now we are in EXP. This is the only path where an overflow is possible
+// but not for certain. So this is the only path where over_SAFE has any use.
+// r34 still has N-1
+// There is a danger of double-extended overflow   if N-1 > 16382 = 0x3ffe
+// There is a danger of double overflow            if N-1 > 0x3fe = 1022
+{ .mlx
+         nop.m 999
+(p0)   movl                r32          = 0x0000000000003ffe ;;                       
 }
-;;
 
 { .mfi
-      nop.m           0
-      fma.s1          f_X3 = f_NORM_X, f_X2, f0
-      nop.i           0
+(p0)   cmp.gt.unc          p0,p7        = r34, r32                                 
+(p0)   fmerge.s          sinh_FR_SCALE     = sinh_FR_SGNX, sinh_FR_spos                         
+         nop.i 999 ;;
 }
+
 { .mfi
-      nop.m           0
-      fma.s1          f_X4 = f_X2, f_X2, f0
-      nop.i           0
+         nop.m 999
+(p0)   fma.s1            sinh_FR_Y_lo      =  sinh_FR_Tjhi,  sinh_FR_Y_lo_temp, sinh_FR_Tjlo    
+         nop.i 999 ;;
 }
-;;
 
+// f8 = answer = scale * (Y_hi + Y_lo)
 { .mfi
-      nop.m           0
-      fma.s1          f_poly65 = f_X2, f_P6, f_P5
-      nop.i           0
+         nop.m 999
+(p0)   fma.s1            sinh_FR_SINH_temp = sinh_FR_Y_lo,  f1, sinh_FR_Tjhi       
+         nop.i 999 ;;
 }
+
 { .mfi
-      nop.m           0
-      fma.s1          f_poly43 = f_X2, f_P4, f_P3
-      nop.i           0
+         nop.m 999
+(p0)   fma.s0          f44          = sinh_FR_SCALE,  sinh_FR_SINH_temp, f0      
+         nop.i 999 ;;
 }
-;;
 
+// Dummy multiply to generate inexact
 { .mfi
-      nop.m           0
-      fma.s1          f_poly21 = f_X2, f_P2, f_P1
-      nop.i           0
+         nop.m 999
+(p7)     fmpy.s0      sinh_FR_tmp = sinh_FR_all_ones, sinh_FR_all_ones
+         nop.i 999 ;;
 }
-;;
 
-{ .mfi
-      nop.m           0
-      fma.s1          f_poly6543 = f_X4, f_poly65, f_poly43
-      nop.i           0
+// If over_SAFE is set, return
+{ .mfb
+       nop.m 999
+(p7)   fmerge.s            f8 = f44,f44                                            
+(p7)   br.ret.sptk     b0 ;;                          
 }
-;;
+
+// Else see if we overflowed
+// S0 user supplied status
+// S2 user supplied status + WRE + TD  (Overflows)
+// If WRE is set then an overflow will not occur in EXP.
+// The input value that would cause a register (WRE) value to overflow is about 2^15
+// and this input would go into the HUGE path.
+// Answer with WRE is in f43.
 
 { .mfi
-      nop.m           0
-      fma.s1          f_poly6to1 = f_X4, f_poly6543, f_poly21
-      nop.i           0
+         nop.m 999
+(p0)   fsetc.s2            0x7F,0x42                                               
+         nop.i 999;;
 }
-;;
 
-// Dummy multiply to generate inexact
 { .mfi
-      nop.m           0
-      fmpy.s0         f_tmp = f_P6, f_P6
-      nop.i           0
-}
-{ .mfb
-      nop.m           0
-      fma.s0          f8 = f_poly6to1, f_X3, f_NORM_X
-      br.ret.sptk     b0                // Exit SINH_BY_POLY
+         nop.m 999
+(p0)   fma.s2            f43  = sinh_FR_SCALE,  sinh_FR_SINH_temp, f0                      
+         nop.i 999 ;;
 }
-;;
 
+// 13FFF => 13FFF -FFFF = 4000(true)
+// 4000 + 3FFF = 7FFF, which is 1 more that the exponent of the largest
+// long double (7FFE). So 0 13FFF 8000000000000000  is one ulp more than
+// largest long double in register bias
+// Now  set p8 if the answer with WRE is greater than or equal this value
+// Also set p9 if the answer with WRE is less than or equal to negative this value
+
+{ .mlx
+         nop.m 999
+(p0)   movl                r32     = 0x00000000013FFF ;;                              
+}
 
-// Here if x denorm or unorm
-SINH_DENORM:
-// Determine if x really a denorm and not a unorm
 { .mmf
-      getf.exp        r_signexp_x = f_NORM_X
-      mov             r_exp_denorm = 0x0c001   // Real denorms have exp < this
-      fmerge.s        f_ABS_X = f0, f_NORM_X
+         nop.m 999
+(p0)   setf.exp            f41 = r32                                               
+(p0)   fsetc.s2            0x7F,0x40 ;;                                               
 }
-;;
 
 { .mfi
-      nop.m           0
-      fcmp.eq.s0      p10,p0 = f8, f0  // Set denorm flag
-      nop.i           0
+         nop.m 999
+(p0)   fcmp.ge.unc.s1 p8, p0 =  f43, f41                                           
+         nop.i 999
 }
-;;
 
-// Set p8 if really a denorm
-{ .mmi
-      and             r_exp_x = r_exp_mask, r_signexp_x
-;;
-      cmp.lt          p8,p9 = r_exp_x, r_exp_denorm
-      nop.i           0
+{ .mfi
+         nop.m 999
+(p0)   fmerge.ns           f42 = f41, f41                                          
+         nop.i 999 ;;
+}
+
+// The error tag for overflow is 126
+{ .mii
+         nop.m 999
+         nop.i 999 ;;
+(p8)   mov                 r47 = 126 ;;                                               
 }
-;;
 
-// Identify denormal operands.
 { .mfb
-      nop.m           0
-(p8)  fcmp.ge.unc.s1  p6,p7 = f8, f0   // Test sign of denorm
-(p9)  br.cond.sptk    SINH_COMMON    // Return to main path if x unorm
+         nop.m 999
+(p0)   fcmp.le.unc.s1      p9, p0 =  f43, f42                                      
+(p8)   br.cond.spnt L(SINH_ERROR_SUPPORT) ;;
+}
+
+{ .mii
+         nop.m 999
+         nop.i 999 ;;
+(p9)   mov                 r47 = 126                                               
 }
-;;
 
+{ .mib
+         nop.m 999
+         nop.i 999
+(p9)   br.cond.spnt L(SINH_ERROR_SUPPORT) ;;
+}
+
+// Dummy multiply to generate inexact
 { .mfi
-      nop.m           0
-(p6)  fma.s0          f8 =  f8,f8,f8  // If x +denorm, result=x+x^2
-      nop.i           0 
+         nop.m 999
+(p0)     fmpy.s0      sinh_FR_tmp = sinh_FR_all_ones, sinh_FR_all_ones
+         nop.i 999 ;;
 }
+
 { .mfb
-      nop.m           0
-(p7)  fnma.s0         f8 =  f8,f8,f8  // If x -denorm, result=x-x^2
-      br.ret.sptk     b0            // Exit if x denorm
+         nop.m 999
+(p0)   fmerge.s            f8 = f44,f44                                            
+(p0)   br.ret.sptk     b0 ;;                          
 }
-;;
 
+L(SINH_HUGE): 
 
-// Here if |x| >= overflow limit
-SINH_HUGE: 
-// for SINH_HUGE, put 24000 in exponent; take sign from input
-{ .mmi
-      mov             r_exp_huge = 0x15dbf
-;;
-      setf.exp        f_huge  = r_exp_huge
-      nop.i           0
-}
-;;
+// for SINH_HUGE, put 24000 in exponent; take sign from input; add 1
+// SAFE: SAFE is always 0 for HUGE
 
-.pred.rel "mutex",p8,p9
-{ .mfi
-      alloc           r32 = ar.pfs,0,5,4,0                  
-(p8)  fnma.s1         f_signed_hi_lo = f_huge, f1, f1
-      nop.i           0
+{ .mlx
+         nop.m 999
+(p0)   movl                r32 = 0x0000000000015dbf ;;                                
 }
+
 { .mfi
-      nop.m           0
-(p9)  fma.s1          f_signed_hi_lo = f_huge, f1, f1
-      nop.i           0
+(p0)   setf.exp            f9  = r32                                               
+         nop.f 999
+         nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m           0
-      fma.s0          f_pre_result = f_signed_hi_lo, f_huge, f0
-      mov             GR_Parameter_TAG = 126
+         nop.m 999
+(p0)   fma.s1              sinh_FR_signed_hi_lo = sinh_FR_SGNX, f9, f1                       
+         nop.i 999 ;;
 }
-;;
 
-GLOBAL_IEEE754_END(sinhl)
-
-LOCAL_LIBM_ENTRY(__libm_error_region)
+{ .mfi
+         nop.m 999
+(p0)   fma.s0            f44 = sinh_FR_signed_hi_lo,  f9, f0                          
+(p0)   mov                 r47 = 126                                               
+}
+.endp sinhl
+ASM_SIZE_DIRECTIVE(sinhl)
+#ifdef _LIBC
+ASM_SIZE_DIRECTIVE(__ieee754_sinhl)
+#endif
+
+// Stack operations when calling error support.
+//       (1)               (2)                          (3) (call)              (4)
+//   sp   -> +          psp -> +                     psp -> +                   sp -> +
+//           |                 |                            |                         |
+//           |                 | <- GR_Y               R3 ->| <- GR_RESULT            | -> f8
+//           |                 |                            |                         |
+//           | <-GR_Y      Y2->|                       Y2 ->| <- GR_Y                 |
+//           |                 |                            |                         |
+//           |                 | <- GR_X               X1 ->|                         |
+//           |                 |                            |                         |
+//  sp-64 -> +          sp ->  +                     sp ->  +                         +
+//    save ar.pfs          save b0                                               restore gp
+//    save gp                                                                    restore ar.pfs
+
+.proc __libm_error_region
+__libm_error_region:
+L(SINH_ERROR_SUPPORT):
 .prologue
 
+// (1)
 { .mfi
-        add   GR_Parameter_Y=-32,sp              // Parameter 2 value
+        add   GR_Parameter_Y=-32,sp             // Parameter 2 value
         nop.f 0
 .save   ar.pfs,GR_SAVE_PFS
-        mov  GR_SAVE_PFS=ar.pfs                  // Save ar.pfs
+        mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
 }
 { .mfi
 .fframe 64
-        add sp=-64,sp                            // Create new stack
+        add sp=-64,sp                          // Create new stack
         nop.f 0
-        mov GR_SAVE_GP=gp                        // Save gp
+        mov GR_SAVE_GP=gp                      // Save gp
 };;
 
+
+// (2)
 { .mmi
-        stfe [GR_Parameter_Y] = f0,16            // STORE Parameter 2 on stack
-        add GR_Parameter_X = 16,sp               // Parameter 1 address
+        stfe [GR_Parameter_Y] = f0,16         // STORE Parameter 2 on stack
+        add GR_Parameter_X = 16,sp            // Parameter 1 address
 .save   b0, GR_SAVE_B0
-        mov GR_SAVE_B0=b0                        // Save b0
+        mov GR_SAVE_B0=b0                     // Save b0
 };;
 
 .body
+// (3)
 { .mib
-        stfe [GR_Parameter_X] = f8               // STORE Parameter 1 on stack
+        stfe [GR_Parameter_X] = f8                     // STORE Parameter 1 on stack
         add   GR_Parameter_RESULT = 0,GR_Parameter_Y   // Parameter 3 address
         nop.b 0                            
 }
 { .mib
-        stfe [GR_Parameter_Y] = f_pre_result     // STORE Parameter 3 on stack
+        stfe [GR_Parameter_Y] = f44                    // STORE Parameter 3 on stack
         add   GR_Parameter_Y = -16,GR_Parameter_Y
-        br.call.sptk b0=__libm_error_support#    // Call error handling function
+        br.call.sptk b0=__libm_error_support#          // Call error handling function
 };;
-
 { .mmi
-        add   GR_Parameter_RESULT = 48,sp
         nop.m 0
-        nop.i 0
+        nop.m 0
+        add   GR_Parameter_RESULT = 48,sp
 };;
 
+// (4)
 { .mmi
-        ldfe  f8 = [GR_Parameter_RESULT]         // Get return result off stack
+        ldfe  f8 = [GR_Parameter_RESULT]       // Get return result off stack
 .restore sp
-        add   sp = 64,sp                         // Restore stack pointer
-        mov   b0 = GR_SAVE_B0                    // Restore return address
+        add   sp = 64,sp                       // Restore stack pointer
+        mov   b0 = GR_SAVE_B0                  // Restore return address
 };;
-
 { .mib
-        mov   gp = GR_SAVE_GP                    // Restore gp
-        mov   ar.pfs = GR_SAVE_PFS               // Restore ar.pfs
-        br.ret.sptk     b0                       // Return
+        mov   gp = GR_SAVE_GP                  // Restore gp
+        mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
+        br.ret.sptk     b0                     // Return
 };;
 
-LOCAL_LIBM_END(__libm_error_region)
-
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
 
 .type   __libm_error_support#,@function
 .global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_sqrt.S b/sysdeps/ia64/fpu/e_sqrt.S
index 0e208b3de1..dd057f58ee 100644
--- a/sysdeps/ia64/fpu/e_sqrt.S
+++ b/sysdeps/ia64/fpu/e_sqrt.S
@@ -1,11 +1,11 @@
 .file "sqrt.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
 // 
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
-// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
 // met:
@@ -20,7 +20,7 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
+//
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,28 +35,27 @@
 // 
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at 
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// http://developer.intel.com/opensource.
 //
-//********************************************************************
+// ********************************************************************
 // History
-//********************************************************************
-// 02/02/00 Initial version
-// 04/04/00 Unwind support added
-// 08/15/00 Bundle added after call to __libm_error_support to properly
+// ********************************************************************
+// 2/02/00  Initial version
+// 4/04/00  Unwind support added
+// 8/15/00  Bundle added after call to __libm_error_support to properly
 //          set [the previously overwritten] GR_Parameter_RESULT.
-// 02/10/03 Reordered header: .section, .global, .proc, .align
 //
-//********************************************************************
+// ********************************************************************
 //
 // Function:   Combined sqrt(x), where
 //                        _
 //             sqrt(x) = |x, for double precision x values
 //
-//********************************************************************
+// ********************************************************************
 //
 // Accuracy:       Correctly Rounded
 //
-//********************************************************************
+// ********************************************************************
 //
 // Resources Used:
 //
@@ -69,7 +68,7 @@
 //
 //    Predicate Registers:      p6, p7, p8
 //
-//*********************************************************************
+// *********************************************************************
 //
 // IEEE Special Conditions:
 //
@@ -79,13 +78,15 @@
 //    sqrt(+/-0) = +/-0
 //    sqrt(negative) = QNaN and error handling is called
 //
-//*********************************************************************
+// *********************************************************************
 //
 // Implementation:
 //
 //  Modified Newton-Raphson Algorithm
 //
-//*********************************************************************
+// *********************************************************************
+
+#include "libm_support.h"
 
 GR_SAVE_PFS          = r33
 GR_SAVE_B0           = r34
@@ -97,7 +98,19 @@ GR_Parameter_RESULT  = r39
 
 
 .section .text
-GLOBAL_IEEE754_ENTRY(sqrt)
+.proc sqrt#
+.global sqrt#
+.align 64 
+
+sqrt:
+#ifdef _LIBC
+.global __sqrt
+.type __sqrt,@function
+__sqrt:
+.global __ieee754_sqrt
+.type __ieee754_sqrt,@function
+__ieee754_sqrt:
+#endif
 { .mfi
   alloc r32= ar.pfs,0,5,4,0
   frsqrta.s0 f7,p6=f8
@@ -242,7 +255,7 @@ GLOBAL_IEEE754_ENTRY(sqrt)
 
 { .mfb
   nop.m 0
-       mov   f8 = f7
+  (p0) mov   f8 = f7
   (p8) br.ret.sptk b0 ;;
 }
 { .mfb
@@ -251,7 +264,13 @@ GLOBAL_IEEE754_ENTRY(sqrt)
   (p7) br.cond.sptk __libm_error_region ;;
 }
 // END DOUBLE PRECISION MINIMUM LATENCY SQUARE ROOT ALGORITHM
-GLOBAL_IEEE754_END(sqrt)
+.endp sqrt#
+ASM_SIZE_DIRECTIVE(sqrt)
+#ifdef _LIBC
+ASM_SIZE_DIRECTIVE(__sqrt)
+ASM_SIZE_DIRECTIVE(__ieee754_sqrt)
+#endif
+
 // Stack operations when calling error support.
 //       (1)               (2)                          (3) (call)              (4)
 //   sp   -> +          psp -> +                     psp -> +                   sp -> +
@@ -267,7 +286,8 @@ GLOBAL_IEEE754_END(sqrt)
 //    save gp                                                                    restore ar.pfs
 
 
-LOCAL_LIBM_ENTRY(__libm_error_region)
+.proc __libm_error_region
+__libm_error_region:
 
 //
 // This branch includes all those special values that are not negative,
@@ -332,9 +352,8 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
         br.ret.sptk     b0                     // Return
 };;
 
-LOCAL_LIBM_END(__libm_error_region)
-
-
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
 
 
 .type   __libm_error_support#,@function
diff --git a/sysdeps/ia64/fpu/e_sqrtf.S b/sysdeps/ia64/fpu/e_sqrtf.S
index bee0df7414..1799845d6d 100644
--- a/sysdeps/ia64/fpu/e_sqrtf.S
+++ b/sysdeps/ia64/fpu/e_sqrtf.S
@@ -1,10 +1,10 @@
 .file "sqrtf.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
+//
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,29 +35,27 @@
 // 
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at 
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// http://developer.intel.com/opensource.
 //
-//*********************************************************************
+// *********************************************************************
 // History:
 //
-// 02/02/00 Initial version
-// 04/04/00 Unwind support added
-// 08/15/00 Bundle added after call to __libm_error_support to properly
+// 2/02/00  Initial version
+// 4/04/00  Unwind support added
+// 8/15/00  Bundle added after call to __libm_error_support to properly
 //          set [the previously overwritten] GR_Parameter_RESULT.
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 02/10/03 Reordered header: .section, .global, .proc, .align
 //
-//*********************************************************************
+// *********************************************************************
 //
 // Function:   Combined sqrtf(x), where
 //                         _
 //             sqrtf(x) = |x, for single precision x values
 //
-//********************************************************************
+// ********************************************************************
 //
 // Accuracy:       Correctly Rounded 
 //
-//********************************************************************
+// ********************************************************************
 //
 // Resources Used:
 //
@@ -70,7 +68,7 @@
 //
 //    Predicate Registers:      p6, p7, p8
 //
-//********************************************************************
+// ********************************************************************
 //
 // IEEE Special Conditions:
 //
@@ -80,14 +78,15 @@
 //    sqrtf(+/-0) = +/-0 
 //    sqrtf(negative) = QNaN and error handling is called
 //
-//********************************************************************
+// ********************************************************************
 //
 // Implementation:
 //
 //  Modified Newton-Raphson Algorithm
 //
-//********************************************************************
+// ********************************************************************
 
+#include "libm_support.h"
 
 GR_SAVE_B0                    = r34
 GR_SAVE_PFS                   = r33
@@ -103,8 +102,21 @@ FR_Y             = f0
 FR_RESULT        = f8
 
 
+
 .section .text
-GLOBAL_IEEE754_ENTRY(sqrtf)
+.proc sqrtf#
+.global sqrtf#
+.align 64 
+
+sqrtf: 
+#ifdef _LIBC
+.global __sqrtf
+.type __sqrtf,@function
+__sqrtf:
+.global __ieee754_sqrtf
+.type __ieee754_sqrtf,@function
+__ieee754_sqrtf:
+#endif
 { .mlx
   // BEGIN SINGLE PRECISION MINIMUM LATENCY SQUARE ROOT ALGORITHM
   alloc r32= ar.pfs,0,5,4,0
@@ -185,7 +197,7 @@ GLOBAL_IEEE754_ENTRY(sqrtf)
   // Step (10)
   // d1 = a - S1 * S1 in f9
   (p6) fnma.s1 f9=f7,f7,f8
-  nop.i 0;;
+  nop.i 0;;;
 } { .mfb
   nop.m 0
   // Step (11)
@@ -195,20 +207,27 @@ GLOBAL_IEEE754_ENTRY(sqrtf)
 // END SINGLE PRECISION MINIMUM LATENCY SQUARE ROOT ALGORITHM
 } { .mfb
   nop.m 0
-       mov   f8 = f7
+  (p0) mov   f8 = f7
   (p8) br.ret.sptk b0 ;;
 }
 //
 // This branch includes all those special values that are not negative,
 // with the result equal to frcpa(x)
 //
-GLOBAL_IEEE754_END(sqrtf)
+.endp sqrtf
+ASM_SIZE_DIRECTIVE(sqrtf)
+#ifdef _LIBC
+ASM_SIZE_DIRECTIVE(__sqrtf)
+ASM_SIZE_DIRECTIVE(__ieee754_sqrtf)
+#endif
+
 
-LOCAL_LIBM_ENTRY(__libm_error_region)
+.proc __libm_error_region
+__libm_error_region:
 .prologue
 { .mii
         add   GR_Parameter_Y=-32,sp             // Parameter 2 value
-        mov   GR_Parameter_TAG = 50                   
+(p0)    mov   GR_Parameter_TAG = 50                   
 .save   ar.pfs,GR_SAVE_PFS
         mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
 }
@@ -252,7 +271,8 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
         br.ret.sptk     b0                     // Return
 };; 
 
-LOCAL_LIBM_END(__libm_error_region)
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
 
 
 .type   __libm_error_support#,@function
diff --git a/sysdeps/ia64/fpu/e_sqrtl.S b/sysdeps/ia64/fpu/e_sqrtl.S
index ec1475626d..e41148243a 100644
--- a/sysdeps/ia64/fpu/e_sqrtl.S
+++ b/sysdeps/ia64/fpu/e_sqrtl.S
@@ -1,10 +1,10 @@
 .file "sqrtl.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
+//
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,25 +35,23 @@
 // 
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at 
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// http://developer.intel.com/opensource.
 //
-//********************************************************************
+// ********************************************************************
 //
 // History:
-// 02/02/00 (hand-optimized)
-// 04/04/00 Unwind support added
-// 08/15/00 Bundle added after call to __libm_error_support to properly
+// 2/02/00 (hand-optimized)
+// 4/04/00  Unwind support added
+// 8/15/00  Bundle added after call to __libm_error_support to properly
 //          set [the previously overwritten] GR_Parameter_RESULT.
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 02/10/03 Reordered header: .section, .global, .proc, .align
 //
-//********************************************************************
+// ********************************************************************
 //
 // Function:   Combined sqrtl(x), where
 //                         _
 //             sqrtl(x) = |x, for double-extended precision x values
 //
-//********************************************************************
+// ********************************************************************
 //
 // Resources Used:
 //
@@ -66,7 +64,7 @@
 //
 //    Predicate Registers:      p6, p7, p8
 //
-//********************************************************************
+// ********************************************************************
 //
 // IEEE Special Conditions:
 //
@@ -76,13 +74,15 @@
 //    sqrtl(+/-0) = +/-0
 //    sqrtl(negative) = QNaN and error handling is called
 //
-//********************************************************************
+// ********************************************************************
 //
 // Implementation:
 //
 //  Modified Newton-Raphson Algorithm
 //
-//********************************************************************
+// ********************************************************************
+
+#include "libm_support.h"
 
 GR_SAVE_PFS         = r33
 GR_SAVE_B0          = r34
@@ -97,7 +97,19 @@ FR_Y                = f0
 FR_RESULT           = f8
 
 .section .text
-GLOBAL_IEEE754_ENTRY(sqrtl)
+.proc sqrtl#
+.global sqrtl#
+.align 64
+
+sqrtl:
+#ifdef _LIBC
+.global __sqrtl
+.type __sqrtl,@function
+__sqrtl:
+.global __ieee754_sqrtl
+.type __ieee754_sqrtl,@function
+__ieee754_sqrtl:
+#endif
 { .mlx
 alloc r32= ar.pfs,0,5,4,0
   // exponent of +1/2 in r2
@@ -139,7 +151,7 @@ alloc r32= ar.pfs,0,5,4,0
 } 
 { .mfi
   nop.m 0
-       mov f15=f8
+  (p0) mov f15=f8
   nop.i 0;;
 } { .mfi
   nop.m 0
@@ -209,8 +221,8 @@ alloc r32= ar.pfs,0,5,4,0
   (p6) br.ret.sptk b0 ;;
 }
 { .mfb
-       mov GR_Parameter_TAG    = 48
-       mov   f8 = f7
+  (p0) mov GR_Parameter_TAG    = 48
+  (p0) mov   f8 = f7
   (p8) br.ret.sptk b0 ;;
 }
 //
@@ -220,8 +232,15 @@ alloc r32= ar.pfs,0,5,4,0
 
 
 // END DOUBLE EXTENDED PRECISION MINIMUM LATENCY SQUARE ROOT ALGORITHM
-GLOBAL_IEEE754_END(sqrtl)
-LOCAL_LIBM_ENTRY(__libm_error_region)
+.endp sqrtl#
+ASM_SIZE_DIRECTIVE(sqrtl)
+#ifdef _LIBC
+ASM_SIZE_DIRECTIVE(__sqrtl)
+ASM_SIZE_DIRECTIVE(__ieee754_sqrtl)
+#endif
+
+.proc __libm_error_region
+__libm_error_region:
 .prologue
 { .mfi
         add   GR_Parameter_Y=-32,sp             // Parameter 2 value
@@ -269,6 +288,7 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
         br.ret.sptk     b0                     // Return
 };;
 
-LOCAL_LIBM_END(__libm_error_region#)
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
 .type   __libm_error_support#,@function
 .global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/libm_atan2_reg.S b/sysdeps/ia64/fpu/libm_atan2_reg.S
new file mode 100644
index 0000000000..5649670d19
--- /dev/null
+++ b/sysdeps/ia64/fpu/libm_atan2_reg.S
@@ -0,0 +1,1234 @@
+.file "libm_atan2_reg.s"
+
+// Copyright (C) 2000, 2001, Intel Corporation
+// All rights reserved.
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00: Initial version
+// 4/04/00  Unwind support added
+
+#include "libm_support.h"
+
+.data
+
+.align 64
+ASM_TYPE_DIRECTIVE(Constants_atan#,@object)
+Constants_atan:
+data4   0x54442D18, 0x3FF921FB, 0x248D3132, 0x3E000000
+// double pi/2, single lo_pi/2, two**(-3)
+data4   0xAAAAAAA3, 0xAAAAAAAA, 0x0000BFFD, 0x00000000 // P_1
+data4   0xCCCC54B2, 0xCCCCCCCC, 0x00003FFC, 0x00000000 // P_2
+data4   0x47E4D0C2, 0x92492492, 0x0000BFFC, 0x00000000 // P_3
+data4   0x58870889, 0xE38E38E0, 0x00003FFB, 0x00000000 // P_4
+data4   0x290149F8, 0xBA2E895B, 0x0000BFFB, 0x00000000 // P_5
+data4   0x250F733D, 0x9D88E6D4, 0x00003FFB, 0x00000000 // P_6
+data4   0xFB8745A0, 0x884E51FF, 0x0000BFFB, 0x00000000 // P_7
+data4   0x394396BD, 0xE1C7412B, 0x00003FFA, 0x00000000 // P_8
+data4   0xAAAAA52F, 0xAAAAAAAA, 0x0000BFFD, 0x00000000 // Q_1
+data4   0xC75B60D3, 0xCCCCCCCC, 0x00003FFC, 0x00000000 // Q_2
+data4   0x011F1940, 0x924923AD, 0x0000BFFC, 0x00000000 // Q_3
+data4   0x2A5F89BD, 0xE36F716D, 0x00003FFB, 0x00000000 // Q_4
+//      Entries Tbl_hi  (double precision)
+//      B = 1+Index/16+1/32  Index = 0
+//      Entries Tbl_lo (single precision)
+//      B = 1+Index/16+1/32  Index = 0
+data4   0xA935BD8E, 0x3FE9A000, 0x23ACA08F, 0x00000000
+// Entries Tbl_hi  (double precision) Index = 0,1,...,15
+// B = 2^(-1)*(1+Index/16+1/32)
+// Entries Tbl_lo (single precision)
+// Index = 0,1,...,15  B = 2^(-1)*(1+Index/16+1/32)
+data4   0x7F175A34, 0x3FDE77EB, 0x238729EE, 0x00000000
+data4   0x73C1A40B, 0x3FE0039C, 0x249334DB, 0x00000000
+data4   0x5B5B43DA, 0x3FE0C614, 0x22CBA7D1, 0x00000000
+data4   0x88BE7C13, 0x3FE1835A, 0x246310E7, 0x00000000
+data4   0xE2CC9E6A, 0x3FE23B71, 0x236210E5, 0x00000000
+data4   0x8406CBCA, 0x3FE2EE62, 0x2462EAF5, 0x00000000
+data4   0x1CD41719, 0x3FE39C39, 0x24B73EF3, 0x00000000
+data4   0x5B795B55, 0x3FE44506, 0x24C11260, 0x00000000
+data4   0x5BB6EC04, 0x3FE4E8DE, 0x242519EE, 0x00000000
+data4   0x1F732FBA, 0x3FE587D8, 0x24D4346C, 0x00000000
+data4   0x115D7B8D, 0x3FE6220D, 0x24ED487B, 0x00000000
+data4   0x920B3D98, 0x3FE6B798, 0x2495FF1E, 0x00000000
+data4   0x8FBA8E0F, 0x3FE74897, 0x223D9531, 0x00000000
+data4   0x289FA093, 0x3FE7D528, 0x242B0411, 0x00000000
+data4   0x576CC2C5, 0x3FE85D69, 0x2335B374, 0x00000000
+data4   0xA99CC05D, 0x3FE8E17A, 0x24C27CFB, 0x00000000
+//
+//      Entries Tbl_hi  (double precision) Index = 0,1,...,15
+//      B = 2^(-2)*(1+Index/16+1/32)
+//      Entries Tbl_lo (single precision)
+//      Index = 0,1,...,15  B = 2^(-2)*(1+Index/16+1/32)
+//
+data4   0x510665B5, 0x3FD025FA, 0x24263482, 0x00000000
+data4   0x362431C9, 0x3FD1151A, 0x242C8DC9, 0x00000000
+data4   0x67E47C95, 0x3FD20255, 0x245CF9BA, 0x00000000
+data4   0x7A823CFE, 0x3FD2ED98, 0x235C892C, 0x00000000
+data4   0x29271134, 0x3FD3D6D1, 0x2389BE52, 0x00000000
+data4   0x586890E6, 0x3FD4BDEE, 0x24436471, 0x00000000
+data4   0x175E0F4E, 0x3FD5A2E0, 0x2389DBD4, 0x00000000
+data4   0x9F5FA6FD, 0x3FD68597, 0x2476D43F, 0x00000000
+data4   0x52817501, 0x3FD76607, 0x24711774, 0x00000000
+data4   0xB8DF95D7, 0x3FD84422, 0x23EBB501, 0x00000000
+data4   0x7CD0C662, 0x3FD91FDE, 0x23883A0C, 0x00000000
+data4   0x66168001, 0x3FD9F930, 0x240DF63F, 0x00000000
+data4   0x5422058B, 0x3FDAD00F, 0x23FE261A, 0x00000000
+data4   0x378624A5, 0x3FDBA473, 0x23A8CD0E, 0x00000000
+data4   0x0AAD71F8, 0x3FDC7655, 0x2422D1D0, 0x00000000
+data4   0xC9EC862B, 0x3FDD45AE, 0x2344A109, 0x00000000
+//
+//      Entries Tbl_hi  (double precision) Index = 0,1,...,15
+//      B = 2^(-3)*(1+Index/16+1/32)
+//      Entries Tbl_lo (single precision)
+//      Index = 0,1,...,15  B = 2^(-3)*(1+Index/16+1/32)
+//
+data4   0x84212B3D, 0x3FC068D5, 0x239874B6, 0x00000000
+data4   0x41060850, 0x3FC16465, 0x2335E774, 0x00000000
+data4   0x171A535C, 0x3FC25F6E, 0x233E36BE, 0x00000000
+data4   0xEDEB99A3, 0x3FC359E8, 0x239680A3, 0x00000000
+data4   0xC6092A9E, 0x3FC453CE, 0x230FB29E, 0x00000000
+data4   0xBA11570A, 0x3FC54D18, 0x230C1418, 0x00000000
+data4   0xFFB3AA73, 0x3FC645BF, 0x23F0564A, 0x00000000
+data4   0xE8A7D201, 0x3FC73DBD, 0x23D4A5E1, 0x00000000
+data4   0xE398EBC7, 0x3FC8350B, 0x23D4ADDA, 0x00000000
+data4   0x7D050271, 0x3FC92BA3, 0x23BCB085, 0x00000000
+data4   0x601081A5, 0x3FCA217E, 0x23BC841D, 0x00000000
+data4   0x574D780B, 0x3FCB1696, 0x23CF4A8E, 0x00000000
+data4   0x4D768466, 0x3FCC0AE5, 0x23BECC90, 0x00000000
+data4   0x4E1D5395, 0x3FCCFE65, 0x2323DCD2, 0x00000000
+data4   0x864C9D9D, 0x3FCDF110, 0x23F53F3A, 0x00000000
+data4   0x451D980C, 0x3FCEE2E1, 0x23CCB11F, 0x00000000
+data4   0x54442D18, 0x400921FB, 0x33145C07, 0x3CA1A626 // I two doubles
+data4   0x54442D18, 0x3FF921FB, 0x33145C07, 0x3C91A626 // I_by_2 two dbls
+data4   0x54442D18, 0x3FE921FB, 0x33145C07, 0x3C81A626 // I_by_4 two dbls
+data4   0x7F3321D2, 0x4002D97C, 0x4C9E8A0A, 0x3C9A7939 // 3I_by_4 two dbls
+ASM_SIZE_DIRECTIVE(Constants_atan#)
+.section .text
+
+.proc __libm_atan2_reg#
+.global __libm_atan2_reg#
+.align 64
+__libm_atan2_reg:
+
+
+{ .mfi
+       alloc  r32 = ar.pfs,0,20,4,0
+(p0)   mov f32 = f8
+       nop.i 0
+} 
+{ .mmi
+      nop.m 0
+(p0)  addl           r39   = @ltoff(Constants_atan#), gp
+      nop.i 999
+}
+;;
+
+{ .mmi
+      ld8 r39 = [r39]
+      nop.m 999
+      nop.i 999
+}
+;;
+
+{ .mfi
+       nop 999	// EMbo added ...
+(p0)   mov f33 = f9
+ nop.i 0
+ } { .mfi
+       nop 999	// EMbo added ...
+(p0)   fclass.nm.unc p9,p0 = f32 ,0x1FF
+       nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p0)   fclass.nm.unc p8,p0 = f33 ,0x1FF
+	nop 999	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p0)   fclass.m.unc  p6,p0 = f33 ,0x103
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p0)   fclass.m.unc  p7,p0 = f32 ,0x103
+	nop 999	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p0)   fclass.m.unc p12,p0 = f33 ,0x0C3
+	nop 999;;	// EMbo added ...
+ } { .mfb
+	nop 999	// EMbo added ...
+//
+//     Check for NatVals.
+//     Check for EM Unsupporteds
+//     Check for NaNs.
+//
+(p0)   fclass.m.unc p13,p0 = f32 ,0x0C3
+(p6)   br.cond.sptk L(ATAN_NATVAL);;
+ } { .mbb
+	nop 999	// EMbo added ...
+(p7)   br.cond.sptk L(ATAN_NATVAL)
+(p8)   br.cond.sptk L(ATAN_UNSUPPORTED);;
+ } { .mib
+(p0)   add r40 = 96, r39
+	nop 999	// EMbo added ...
+(p9)   br.cond.sptk L(ATAN_UNSUPPORTED);;
+ } { .mib
+(p0)   ldfd  f50 = [r39],8
+	nop 999	// EMbo added ...
+(p12)  br.cond.sptk L(ATAN_NAN);;
+ } { .mfb
+	nop 999	// EMbo added ...
+(p0)   fnorm.s1 f33 = f33
+(p13)  br.cond.sptk L(ATAN_NAN);;
+ } { .mfi
+(p0)   ldfs  f51 = [r39],4
+//
+//     Remove sign bits from exponents
+//     Load 2**(-3)
+//     Normalize the input argument.
+//
+(p0)   fnorm.s1 f32 = f32
+	nop 999	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p0)   mov f82 = f1
+	nop 999;;	// EMbo added ...
+ } { .mmi
+	nop 999;;	// EMbo added ...
+(p0)   ldfs  f78 = [r39],180
+	nop 999;;	// EMbo added ...
+ } { .mmi
+(p0)   getf.exp r36 = f33;;
+//
+//     Get exp and sign of ArgX
+//     Get exp and sign of ArgY
+//     Load 2**(-3) and increment ptr to Q_4.
+//
+(p0)   getf.exp r37 = f32
+(p0)   shr.u r36 = r36,17;;
+ } { .mfi
+	nop 999	// EMbo added ...
+(p0)   fmerge.s f84 =  f1,f32
+(p0)   shr.u r37 = r37,17;;
+ } { .mfi
+	nop 999	// EMbo added ...
+//
+//     ArgX_abs = |ArgX|
+//     ArgY_abs = |ArgY|
+//     sign_X is sign bit of ArgX
+//     sign_Y is sign bit of ArgY
+//
+(p0)   fmerge.s f83 =  f1,f33
+(p0)   cmp.eq.unc  p8,p9 = 0x00000, r37;;
+ } { .mfi
+	nop 999	// EMbo added ...
+(p8)   fadd.s1 f34 = f0, f1
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p9)   fsub.s1 f34 = f0, f1
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p0)   fmin.s1 f36 = f83, f84
+	nop 999	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p0)   fmax.s1 f35 = f83, f84
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+//
+//     Is ArgX_abs >= ArgY_abs
+//     Is sign_Y == 0?
+//
+(p0)   fcmp.ge.s1  p6,p7 = f83,f84
+	nop 999;;	// EMbo added ...
+ } { .mii
+(p6)   cmp.eq.unc  p10, p11 =  0x00000, r36
+(p6)   add r38 = r0, r0;;
+//
+//     U = max(ArgX_abs,ArgY_abs)
+//     V = min(ArgX_abs,ArgY_abs)
+//     if p6, swap = 0
+//     if p7, swap = 1
+//
+//
+//     Let M = 1.0
+//     if p8, s_Y = 1.0
+//     if p9, s_Y = -1.0
+//
+(p7)   add r38 = 1,r0;;
+ } { .mfi
+	nop 999	// EMbo added ...
+(p0)   frcpa.s1 f37, p6 =  f36, f35
+	nop 999;;	// EMbo added ...
+ } { .mfb
+	nop 999	// EMbo added ...
+//
+//     E = frcpa(V,U)
+//
+(p10)  fsub.s1 f82 = f82, f1
+(p6)   br.cond.sptk L(ATAN_STEP2);;
+ } { .mib
+	nop 999	// EMbo added ...
+	nop 999	// EMbo added ...
+// /**************************************************/
+// /********************* STEP2 **********************/
+// /**************************************************/
+(p0)   br.cond.spnt L(ATAN_SPECIAL_HANDLING);;
+ }
+L(ATAN_STEP2):
+ { .mlx
+	nop 999	// EMbo added ...
+(p0)   movl r47 =  0x8400000000000000
+ } { .mlx
+	nop 999	// EMbo added ...
+(p0)   movl r48 =  0x0000000000000100;;
+ } { .mfi
+	nop 999	// EMbo added ...
+(p0)   fmpy.s1 f38 = f37, f36
+	nop 999	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p0)   fcmp.lt.unc.s0  p0,p9 = f9,f1
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p0)   fcmp.lt.unc.s0  p0,p8 = f8,f1
+	nop 999	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+//
+//     Q = E * V
+//
+(p11)  fadd.s1 f82 = f82, f1
+	nop 999;;	// EMbo added ...
+ } { .mfi
+(p0)   getf.sig r46 = f38
+(p0)   fcmp.lt.unc p6,p7 = f38,f78
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p0)   fmpy.s1 f38 = f37, f36
+(p0)   extr.u r42 = r46, 59, 4;;
+ } { .mfi
+	nop 999	// EMbo added ...
+(p0)   fmpy.s1 f50 = f82, f50
+(p0)   dep r47 = r42, r47, 59, 4
+ } { .mfi
+	nop 999	// EMbo added ...
+(p0)   fmpy.s1 f51 = f82, f51
+	nop 999;;	// EMbo added ...
+ } { .mmi
+	nop 999;;	// EMbo added ...
+//
+//     Is Q < 2**(-3)?
+//
+//
+//     Do fcmp to raise any denormal operand
+//     exceptions.
+//
+(p0)   getf.exp r45 = f38
+	nop 999;;	// EMbo added ...
+ } { .mib
+//
+//     lookup = b_1 b_2 b_3 B_4
+//
+//
+//     Generate 1.b_1 b_2 b_3 b_4 1 0 0 0 ... 0
+//
+(p0)   andcm r41 = 0x0003, r45
+	nop 999	// EMbo added ...
+//
+//     We waited a few extra cycles so P_lo and P_hi could be calculated.
+//     Load the constant 256 for loading up table entries.
+//
+//    /**************************************************/
+//    /********************* STEP3 **********************/
+//    /**************************************************/
+(p6)   br.cond.spnt L(ATAN_POLY);;
+ } { .mii
+(p0)   setf.sig f39 = r47
+(p0)   cmp.eq.unc  p8, p9 =  0x0000, r41
+//
+//     z_hi = s exp 1.b_1 b_2 b_3 b_4 1 0 0 0 ... 0
+//     point to beginning of Tbl_hi entries - k = 0.
+//
+(p0)   add r40 = 16, r39
+ } { .mmi
+(p0)   ldfe f73 = [r39],-16;;
+(p9)   sub r41 = r41,r0,1
+(p9)   add r40 = 16,r40
+ } { .mfi
+(p8)   ldfd  f48 = [r40],8
+(p0)   fmpy.s1 f50 = f34, f50
+(p0)   xor r38 = r36,r38;;
+ } { .mmi
+(p0)   ldfe f71 = [r39],-16;;
+(p8)   ldfs  f49 = [r40],8
+(p9)   pmpy2.r r41 = r41,r48;;
+ } { .mfi
+(p0)   ldfe f69 = [r39],-16
+//
+//     Let z_hi have exponent and sign of original Q
+//     Load the Tbl_hi(0) else, increment pointer.
+//
+(p0)   fmerge.se f39 =  f38,f39
+(p9)   shladd r42 = r42,0x0004,r41;;
+ } { .mmi
+(p9)   add r40 = r40, r42;;
+(p9)   ldfd  f48 = [r40],8
+	nop 999;;	// EMbo added ...
+ } { .mmi
+(p0)   ldfe f67 = [r39],-16;;
+(p9)   ldfs  f49 = [r40],8
+	nop 999	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+//
+//     U_prime_hi = U + V * z_hi
+//     Load the Tbl_lo(0)
+//
+(p0)   fma.s1 f40 = f36, f39, f35
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p0)   fnma.s1 f42 = f35, f39, f36
+	nop 999	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p0)   mov f52 = f48
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p0)   frcpa.s1 f43, p6 =  f1, f40
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+//
+//     U_prime_lo = U - U_prime_hi
+//     k = k * 256 - result can be 0, 256, or 512.
+//
+(p0)   fsub.s1 f41 = f35, f40
+(p0)   cmp.eq.unc  p7, p6 =  0x00000, r38
+ } { .mfi
+	nop 999	// EMbo added ...
+(p0)   fmpy.s1 f52 = f34, f52
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p7)   fadd.s1 f54 = f0, f1
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p6)   fsub.s1 f54 = f0, f1
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p0)   fnma.s1 f80 = f43, f40, f1
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p0)   fadd.s1 f79 = f41, f40
+	nop 999	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p0)   fma.s1 f41 = f36, f39, f41
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p0)   fma.s1 f56 = f54, f52, f50
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p0)   fma.s1 f43 = f80, f43, f43
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+//
+//     U_prime_lo =  U - U_hold
+//     lookup -> lookup * 16 + k
+//
+//
+//     V_prime =  V - U * z_hi
+//     U_prime_lo =  V * z_hi + U_prime_lo
+//
+(p0)   fsub.s1 f79 = f35, f79
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p0)   fnma.s1 f80 = f43, f40, f1
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+//
+//     C_hi = frcpa(1,U_prime_hi)
+//     U_prime_lo = U_prime_lo + U_hold
+//
+//
+//     C_hi_hold = 1 - C_hi * U_prime_hi (1)
+//
+//
+//     C_hi = C_hi + C_hi * C_hi_hold    (1)
+//
+//
+//     C_hi_hold = 1 - C_hi * U_prime_hi (2)
+//
+(p0)   fadd.s1 f41 = f41, f79
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+//
+//     C_hi = C_hi + C_hi * C_hi_hold    (2)
+//
+(p0)   fma.s1 f43 = f80, f43, f43
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+//
+//     C_hi_hold = 1 - C_hi * U_prime_hi (3)
+//
+(p0)   fnma.s1 f80 = f43, f40, f1
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+//
+//     C_hi = C_hi + C_hi * C_hi_hold    (3)
+//
+(p0)   fma.s1 f43 = f80, f43, f43
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+//
+//     w_hi = V_prime * C_hi
+//
+(p0)   fmpy.s1 f44 = f42, f43
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p0)   fmpy.s1 f46 = f44, f44
+	nop 999	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+//
+//     wsq = w_hi * w_hi
+//     w_lo =  = V_prime - w_hi * U_prime_hi
+//
+(p0)   fnma.s1 f45 = f44, f40, f42
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p0)   fma.s1 f47 = f46, f73, f71
+	nop 999	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+//
+//     poly = Q_3 + wsq * Q_4
+//     w_lo =  = w_lo - w_hi * U_prime_lo
+//
+(p0)   fnma.s1 f45 = f44, f41, f45
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p0)   fma.s1 f47 = f46, f47, f69
+	nop 999	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+//
+//     poly = Q_2 + wsq * poly
+//     w_lo =  = w_lo * C_hi
+//
+(p0)   fmpy.s1 f45 = f43, f45
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p0)   fma.s1 f47 = f46, f47, f67
+	nop 999	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+//
+//     poly = Q_1 + wsq * poly
+//     A_lo = Tbl_lo + w_lo
+//     swap = xor(swap,sign_X)
+//
+(p0)   fadd.s1 f53 = f49, f45
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+//
+//     Is (swap) != 0 ?
+//     poly = wsq * poly
+//     A_hi = Tbl_hi
+//
+(p0)   fmpy.s1 f47 = f46, f47
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+//
+//     poly = wsq * poly
+//
+//
+//     if (p6) sigma = -1.0
+//     if (p7) sigma =  1.0
+//
+(p0)   fmpy.s1 f47 = f44, f47
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+//
+//     P_hi = s_Y * P_hi
+//     A_lo = A_lo + poly
+//
+(p0)   fadd.s1 f53 = f53, f47
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+//
+//     A_lo = A_lo + w_hi
+//     A_hi = s_Y * A_hi
+//
+(p0)   fadd.s1 f53 = f53, f44
+	nop 999;;	// EMbo added ...
+ } { .mfb
+	nop 999	// EMbo added ...
+//
+//     result_hi = P_hi + sigma * A_hi
+//     result_lo = P_lo + sigma * A_lo
+//
+(p0)   fma.s1 f55 = f54, f53, f51
+(p0)   br.cond.sptk L(RETURN_ATAN);;
+} 
+//
+//     result  =  result_hi + result_lo * s_Y  (User Supplied Rounding Mode)
+//
+//     (p0)   fma.d.s0 f57 = f55, f34, f56
+//
+// /**************************************************/
+// /********************* STEP4 **********************/
+// /**************************************************/
+//
+L(ATAN_POLY):
+{ .mmi
+(p0)   xor r38 = r36,r38
+(p0)  addl           r39   = @ltoff(Constants_atan#), gp
+      nop.i 999
+}
+;;
+
+{ .mmi
+      ld8 r39 = [r39]
+      nop.m 999
+      nop.i 999
+}
+;;
+
+
+{ .mlx
+	nop 999	// EMbo added ...
+(p0)   movl r47 =  0x24005;;
+ } { .mfi
+(p0)   add r39 = 128, r39
+(p0)   fnma.s1 f81 = f37, f35, f1
+(p0)   cmp.eq.unc  p7, p6 =  0x00000, r38;;
+ } { .mmf
+	nop 999	// EMbo added ...
+(p0)   ldfe f77 = [r39],-16
+//
+//     Iterate 3 times E = E + E*(1.0 - E*U)
+//     Also load P_8, P_7, P_6, P_5, P_4
+//     E_hold = 1.0 - E * U     (1)
+//     A_temp = Q
+//
+(p0)   mov f85 = f38;;
+ } { .mmf
+	nop 999	// EMbo added ...
+(p0)   ldfe f76 = [r39],-16
+(p6)   fsub.s1 f54 = f0, f1;;
+ } { .mmf
+	nop 999	// EMbo added ...
+(p0)   ldfe f75 = [r39],-16
+//
+//     E = E + E_hold*E         (1)
+//     Point to P_8.
+//
+(p0)   fma.s1 f37 = f37, f81, f37;;
+ } { .mmf
+	nop 999	// EMbo added ...
+(p0)   ldfe f74 = [r39],-16
+(p0)   fnma.s1 f64 = f85, f35, f36;;
+ } { .mmf
+	nop 999	// EMbo added ...
+(p0)   ldfe f72 = [r39],-16
+(p7)   fadd.s1 f54 = f0, f1;;
+ } { .mmf
+	nop 999	// EMbo added ...
+(p0)   ldfe f70 = [r39],-16
+//
+//     E_hold = 1.0 - E * U     (2)
+//
+(p0)   fnma.s1 f81 = f37, f35, f1;;
+ } { .mmf
+	nop 999	// EMbo added ...
+(p0)   ldfe f68 = [r39],-16
+(p0)   fmpy.s1 f50 = f34, f50;;
+ } { .mmf
+	nop 999	// EMbo added ...
+(p0)   ldfe f66 = [r39],-16
+(p0)   fmpy.d.s0 f67 = f67, f67
+ } { .mfi
+	nop 999	// EMbo added ...
+//
+//     E = E + E_hold*E         (2)
+//
+(p0)   fma.s1 f37 = f37, f81, f37
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+//
+//     E_hold = 1.0 - E * U     (3)
+//
+(p0)   fnma.s1 f81 = f37, f35, f1
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+//
+//     E = E + E_hold*E         (3)
+//     At this point E approximates 1/U to roughly working precision
+//     z = V*E approximates V/U
+//
+(p0)   fma.s1 f37 = f37, f81, f37
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+//
+//     z =   V * E
+//
+(p0)   fmpy.s1 f59 = f36, f37
+	nop 999	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p0)   fmpy.s1 f64 = f64, f37
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+//
+//     zsq = z * z
+//     Also load P_3
+//
+(p0)   fmpy.s1 f60 = f59, f59
+	nop 999	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p0)   fadd.s1 f52 = f85, f64
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p0)   fma.s1 f62 = f60, f77, f76
+	nop 999	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p0)   fma.s1 f63 = f60, f70, f68
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+//
+//     z8 = zsq * zsq
+//     Also load P_2
+//
+(p0)   fmpy.s1 f61 = f60, f60
+	nop 999	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p0)   fsub.s1 f85 = f85, f52
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p0)   fmerge.s  f65 = f52,f52
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p0)   fma.s1 f62 = f60, f62, f75
+	nop 999	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p0)   fma.s1 f63 = f60, f63, f66
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+//
+//     z8 = z8 * z8
+//     Also load P_1
+//     poly1 = _4 + zsq*(P_5 + zsq*(P_6 + zsq*(P_7 + zsq*P_8)))
+//     poly2 = zsq*(P_1 + zsq*(P_2 + zsq*P_3))
+//
+//
+//     poly1 = P_7 + zsq * P_8
+//     poly2 = P_2 + zsq * P_3
+//     poly1 = P_4 + zsq*(P_5 + zsq*(P_6 + zsq*poly1))
+//     poly2 = zsq*(P_1 + zsq*poly2)
+//
+//
+//     poly1 = P_6 + zsq * poly1
+//     poly2 = P_1 + zsq * poly2
+//     poly1 = P_4 + zsq*(P_5 + zsq*poly1)
+//     poly2 = zsq*poly2
+//
+(p0)   fmpy.s1 f61 = f61, f61
+	nop 999	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p0)   fadd.s1 f64 = f85, f64
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p0)   fma.s1 f62 = f60, f62, f74
+	nop 999	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+//
+//     poly1 = P_5 + zsq * poly1
+//     poly2 = zsq * poly2
+//     poly1 = P_4 + zsq*poly1
+//
+(p0)   fmpy.s1 f63 = f63, f60
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+//
+//     poly1 = P_4 + zsq * poly1
+//     swap = xor(swap,sign_X)
+//
+(p0)   fma.s1 f62 = f60, f62, f72
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+//
+//     poly = z8*poly1 + poly2  (Typo in writeup)
+//     Is (swap) != 0 ?
+//
+//
+//     z_lo = V - A_temp * U
+//     if (p7) sigma =  1.0
+//     Writeup shows A_temp as A_hi
+//
+//
+//     z_lo = z_lo * E
+//     if (p6) sigma = -1.0
+//     z_lo = (V - A_temp * U) *E
+//
+//
+//     Fixup added to force inexact later -
+//     A_hi = A_temp + z_lo
+//     z_lo = (A_temp - A_hi) + z_lo
+//     z_lo = A_hi - z_lo -A_hi + z_lo = about 0
+//
+(p0)   fma.s1 f47 = f61, f62, f63
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+//
+//     A_lo = z * poly + z_lo
+//
+(p0)   fma.s1 f53 = f59, f47, f64
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p0)   fadd.s1  f52 = f65, f53
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p0)   fsub.s1  f65 = f65, f52
+	nop 999	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p0)   fmpy.s1 f52 = f34, f52
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p0)   fadd.s1  f53 = f65, f53
+	nop 999	// EMbo added ...
+ } { .mfi
+(p0)   setf.exp f65 = r47
+(p0)   fma.s1 f56 = f54, f52, f50
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p0)   fclass.m.unc  p6,p0 = f53,0x007
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+//
+//     P_hi = s_Y * P_hi
+//     A_hi = s_Y * A_hi
+//
+//
+//     result_hi = P_hi + sigma * A_hi
+//
+(p6)   mov f53 = f65
+	nop 999	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+//
+//     tmp = P_hi - result_hi
+//
+(p0)   fsub.s1 f65 = f50, f56
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p0)   fma.s1 f65 = f52, f54, f65
+	nop 999	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+//
+//     tmp   = sigma * A_hi  + tmp
+//     sigma = A_lo * sigma  + P_lo
+//
+(p0)   fma.s1 f54 = f53, f54, f51
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+//
+//     result_lo = s_Y * sigma + tmp
+//
+(p0)   fma.s1 f55 = f34, f54, f65
+	nop 999;;	// EMbo added ...
+ } { .mfb
+       nop.m 0
+       mov f34 = f1
+(p0)   br.cond.sptk L(RETURN_ATAN);;
+}
+//
+//     result  =  result_hi + result_lo  (User Supplied Rounding Mode)
+//
+//     (p0)   fadd.d.s0 f57 = f55, f56
+L(ATAN_UNSUPPORTED):
+L(ATAN_NATVAL):
+ { .mfb
+	nop 999	// EMbo added ...
+//
+//     Deal with the NatVal and unsupported cases.
+//     Raise invalid if warrented.
+//
+(p0)   fmpy.d.s0 f57 = f8, f9
+br.cond.sptk L(RETURN_ATAN);;
+ }
+L(ATAN_NAN):
+ { .mfb
+	nop 999	// EMbo added ...
+//
+//     If only one NaN, then generate the resulting
+//     NaN and return - may raise invalid.
+//
+(p0)   fmpy.d.s0 f57 = f8, f9
+(p0)   br.cond.sptk L(RETURN_ATAN);;
+ }
+L(ATAN_SPECIAL_HANDLING):
+
+ { .mmf
+(p0)   addl           r39   = @ltoff(Constants_atan#), gp
+       nop.m 999
+(p0)   fcmp.lt.s0 p0,p7 = f8,f1
+ } 
+;;
+
+//
+//     Raise denormal operand faults if necessary
+//
+
+{ .mfi
+       ld8 r39 = [r39]
+(p0)   fcmp.lt.s0 p0,p6 = f9,f1
+	nop 999;;	// EMbo added ...
+} 
+;;
+
+
+
+{ .mfi
+	nop 999	// EMbo added ...
+(p0)   fclass.m.unc p6,p7 = f32,0x007
+	nop 999;;	// EMbo added ...
+ } { .mlx
+	nop 999	// EMbo added ...
+(p0)   movl r47 = 992;;
+ } { .mib
+(p0)   add r39 = r39, r47
+	nop 999	// EMbo added ...
+(p7)   br.cond.sptk L(ATAN_ArgY_Not_ZERO);;
+ } { .mfi
+	nop 999	// EMbo added ...
+(p6)   fclass.m.unc p14,p0 = f33,0x035
+	nop 999	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p6)   fclass.m.unc p15,p0 = f33,0x036
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p6)   fclass.m.unc p13,p0 = f33,0x007
+	nop 999	// EMbo added ...
+ } { .mfi
+(p0)   ldfd  f56 = [r39],8
+	nop 999	// EMbo added ...
+	nop 999;;	// EMbo added ...
+ } { .mfi
+(p0)   ldfd  f55 = [r39],-8
+(p14)  fmerge.s f56 =  f32,f0
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+//
+//     Return sign_Y * 0 when Y = +/-0 and X > 0
+//
+(p14)  fmerge.s f55 =  f32,f0
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p15)  fmerge.s f56 =  f32,f56
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+//
+//     Return sign_Y * PI when X <  -0
+//
+//
+(p15)  fmerge.s f55 =  f32,f55
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p0)   fadd.d.s0 f57 = f56,f55
+      nop.i 0
+ } { .bbb
+//
+//     Call error support function for atan(0,0)
+//     - expected value already computed.
+//
+    nop.b 0
+    nop.b 0
+(p0)  br.cond.sptk L(RETURN_ATAN)
+ }
+L(ATAN_ArgY_Not_ZERO):
+ { .mfi
+	nop 999	// EMbo added ...
+(p0)   fclass.m.unc p9,p10 = f32,0x023
+	nop 999;;	// EMbo added ...
+ } { .mfb
+	nop 999	// EMbo added ...
+(p9)   fclass.m.unc p6,p0 = f33,0x017
+(p10)  br.cond.sptk L(ATAN_ArgY_Not_INF);;
+ } { .mfi
+(p6)   add r39 = 16,r39
+(p9)   fclass.m.unc p7,p0 = f33,0x021
+	nop 999;;	// EMbo added ...
+ } { .mmf
+	nop 999	// EMbo added ...
+(p0)   ldfd  f56 = [r39],8
+(p9)   fclass.m.unc p8,p0 = f33,0x022;;
+ } { .mbb
+(p0)   ldfd  f55 = [r39],-8
+	nop 999	// EMbo added ...
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p6)   fmerge.s f56 =  f32,f56
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p6)   fmerge.s f55 =  f32,f55
+	nop 999;;	// EMbo added ...
+ } { .mfb
+	nop 999	// EMbo added ...
+//
+//     Load I/2 and adjust its sign.
+//     Return +I/2 when ArgY = +Inf and ArgX = +/-0,normal
+//     Return -I/2 when ArgY = -Inf and ArgX = +/-0,normal
+//
+(p6)   fadd.d.s0  f57 =   f56,  f55
+(p6)   br.cond.sptk L(RETURN_ATAN);;
+ } { .mmi
+(p7)   add r39 = 32,r39;;
+(p7)   ldfd  f56 = [r39],8
+	nop 999;;	// EMbo added ...
+ } { .mmi
+	nop 999;;	// EMbo added ...
+(p7)   ldfd  f55 = [r39],-8
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p7)   fmerge.s f56 =  f32,f56
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p7)   fmerge.s f55 =  f32,f55
+	nop 999;;	// EMbo added ...
+ } { .mfb
+	nop 999	// EMbo added ...
+//
+//     Load PI/4 and adjust its sign.
+//     Return +PI/4 when ArgY = +Inf and ArgX = +Inf
+//     Return -PI/4 when ArgY = -Inf and ArgX = +Inf
+//
+(p7)   fadd.d.s0  f57 =   f56,  f55
+(p7)   br.cond.sptk L(RETURN_ATAN);;
+ } { .mmi
+(p8)   add r39 = 48,r39;;
+(p8)   ldfd f56 =[r39],8
+	nop 999;;	// EMbo added ...
+ } { .mmi
+	nop 999;;	// EMbo added ...
+(p8)   ldfd f55 =[r39],-8
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p8)   fmerge.s f56 =  f32,f56
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p8)   fmerge.s f55 =  f32,f55
+	nop 999;;	// EMbo added ...
+ } { .mfb
+	nop 999	// EMbo added ...
+//
+//     Load I/4 and adjust its sign.
+//     Return +3I/4 when ArgY = +Inf and ArgX = -Inf
+//     Return -3I/4 when ArgY = -Inf and ArgX = -Inf
+//
+(p8)   fadd.d.s0  f57 =   f56,  f55
+(p8)   br.cond.sptk L(RETURN_ATAN);;
+ }
+L(ATAN_ArgY_Not_INF):
+ { .mfi
+	nop 999	// EMbo added ...
+(p0)   fclass.m.unc  p6,p0 = f33,0x007
+	nop 999	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p0)   fclass.m.unc  p7,p0 = f33,0x021
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p0)   fclass.m.unc  p8,p0 = f33,0x022
+(p6)   add r39 = 16,r39;;
+ } { .mfi
+(p6)   ldfd f56 =[r39],8
+	nop 999	// EMbo added ...
+	nop 999;;	// EMbo added ...
+ } { .mmi
+	nop 999;;	// EMbo added ...
+(p6)   ldfd f55 =[r39],-8
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p6)   fmerge.s f56 =  f32,f56
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p6)   fmerge.s f55 =  f32,f55
+	nop 999;;	// EMbo added ...
+ } { .mfb
+	nop 999	// EMbo added ...
+//
+//     return = sign_Y * I/2 when ArgX = +/-0
+//
+(p6)   fadd.d.s0 f57 = f56, f55
+(p6)   br.cond.sptk L(RETURN_ATAN);;
+ } { .mfi
+	nop 999	// EMbo added ...
+(p7)   fmerge.s f56 =  f32,f0
+	nop 999	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p7)   fmerge.s f55 =  f32,f0
+	nop 999;;	// EMbo added ...
+ } { .mfb
+	nop 999	// EMbo added ...
+//
+//     return = sign_Y * 0 when ArgX = Inf
+//
+(p7)   fadd.d.s0 f57 = f56, f55
+(p7)   br.cond.sptk L(RETURN_ATAN);;
+ } { .mfi
+(p8)   ldfd f56 = [r39],8
+	nop 999	// EMbo added ...
+	nop 999;;	// EMbo added ...
+ } { .mmi
+	nop 999;;	// EMbo added ...
+(p8)   ldfd f55 = [r39],-8
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p8)   fmerge.s f56 =  f32,f56
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+(p8)   fmerge.s f55 =  f32,f55
+	nop 999;;	// EMbo added ...
+ } { .mfi
+	nop 999	// EMbo added ...
+//
+//     return = sign_Y * I when ArgX = -Inf
+//
+(p8)   fadd.d.s0  f57 =   f56,  f55
+	nop 999	// EMbo added ...
+ };;
+L(RETURN_ATAN):
+// mov    f8     = f57 ;;
+// The answer is in f57.
+// But Z_hi is f56
+//     Z_lo is f55
+//     s_Y  is f34
+//     W is in f9 and untouched
+
+{ .mfi
+	nop 999	
+mov    f8     = f56
+        nop.i 0
+};;
+
+{ .mfi
+	nop 999	
+mov    f10    = f55
+        nop.i 999
+}
+{ .mfb
+	nop 999	
+mov    f11    = f34
+br.ret.sptk   b0
+};;
+
+.endp __libm_atan2_reg
+ASM_SIZE_DIRECTIVE(__libm_atan2_reg)
diff --git a/sysdeps/ia64/fpu/libm_error.c b/sysdeps/ia64/fpu/libm_error.c
index 42ca36d98f..ebbaad02ad 100644
--- a/sysdeps/ia64/fpu/libm_error.c
+++ b/sysdeps/ia64/fpu/libm_error.c
@@ -1,10 +1,9 @@
-/* file: libm_error.c */
-
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+//
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
 //
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story, James
+// Edwards, and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,15 +19,14 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
 //
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
@@ -36,39 +34,19 @@
 //
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// http://developer.intel.com/opensource.
 //
 // History
 //==============================================================
 //  2/02/00: Initial version
-//  3/22/00: Updated to support flexible and dynamic error handling. 
-//  8/16/00: Changed all matherr function-calls to use the pmatherr 
+//  3/22/00: Updated to support flexible and dynamic error handling.
+//  8/16/00: Changed all matherr function-calls to use the pmatherr
 //           function-pointers.
 // 10/03/00: Corrected a scalb type.
 // 11/28/00: Changed INPUT_XL to INPUT_XD for scalb_underflow case.
 // 12/07/00: Added code to make scalbn error support equivalent to ldexp.
 //  2/07/01: Added __declspec(align(16)) to long double constants to correct
 //           alignment problem.
-//  4/23/01: Added code for remquo
-//  6/07/01: Added code for fdim, lrint, lround, llrint, llround
-//           Deleted code for remquo
-//  8/15/01: Added code for scalbln, nexttoward
-// 12/10/01: Added code for erfc
-// 12/27/01: Added code for degree argument functions
-// 01/02/02: Added code for tand, cotd
-// 01/15/02: Corrected SVID/XOPEN code for log1p, pow, and acosh
-// 01/25/02: Corrected ISOC for lgamma and gamma to return EDOM for neg ints
-// 01/28/02: Corrected SVID/XOPEN stderr message for log2
-// 05/20/02: Added code for cot
-// 07/01/02: Added code for sinhcosh
-// 10/04/02: Underflow detection in ISOC path redefined to 
-//           be zero rather than tiny and inexact  
-// 12/06/02: Added code for annuity and compound
-// 01/30/03: Corrected test for underflow in ISOC path to not set denormal
-// 04/10/03: Corrected ISOC branch for gamma/lgamma to return ERANGE for neg ints.
-//           Added code for tgamma
-// 04/11/03: Corrected POSIX/SVID/XOPEN branches for gamma/lgamma 
-//           to return EDOM for neg ints.
 //
 
 #include <errno.h>
@@ -76,41 +54,38 @@
 #include <stdlib.h>
 #include "libm_support.h"
 
-#ifdef _LIBC
-# define pmatherr matherr
-# define pmatherrf matherrf
-# define pmatherrl matherrl
-#else
+#ifndef _LIBC
 _LIB_VERSION_TYPE
 #if defined( __POSIX__ )
-_LIB_VERSIONIMF = _POSIX_;
+_LIB_VERSION = _POSIX_;
 #elif defined( __XOPEN__ )
-_LIB_VERSIONIMF = _XOPEN_;
+_LIB_VERSION = _XOPEN_;
 #elif defined( __SVID__ )
-_LIB_VERSIONIMF = _SVID_;
+_LIB_VERSION = _SVID_;
 #elif defined( __IEEE__ )
-_LIB_VERSIONIMF = _IEEE_;
+_LIB_VERSION = _IEEE_;
 #else
-_LIB_VERSIONIMF = _ISOC_;
+_LIB_VERSION = _ISOC_;
+#endif
 #endif
 
 /************************************************************/
 /* matherrX function pointers and setusermatherrX functions */
 /************************************************************/
+#if 0
 int (*pmatherrf)(struct exceptionf*) = MATHERR_F;
 int (*pmatherr)(struct EXC_DECL_D*) = MATHERR_D;
 int (*pmatherrl)(struct exceptionl*) = matherrl;
 
 void __libm_setusermatherrf( int(*user_merrf)(struct exceptionf*) )
-{	pmatherrf = ( (user_merrf==NULL)? (MATHERR_F) : (user_merrf) );	}
+{      pmatherrf = ( (user_merrf==NULL)? (MATHERR_F) : (user_merrf) ); }
 
 void __libm_setusermatherr( int(*user_merr)(struct EXC_DECL_D*) )
-{	pmatherr = ( (user_merr==NULL)? (MATHERR_D) : (user_merr) );	}
+{      pmatherr = ( (user_merr==NULL)? (MATHERR_D) : (user_merr) );    }
 
 void __libm_setusermatherrl( int(*user_merrl)(struct exceptionl*) )
-{	pmatherrl = ( (user_merrl==NULL)? (matherrl) : (user_merrl) );	}
-
-#endif /* !_LIBC */
+{      pmatherrl = ( (user_merrl==NULL)? (matherrl) : (user_merrl) );  }
+#endif
 
 /***********************************************/
 /* error-handling function, libm_error_support */
@@ -118,27 +93,22 @@ void __libm_setusermatherrl( int(*user_merrl)(struct exceptionl*) )
 void __libm_error_support(void *arg1,void *arg2,void *retval,error_types input_tag)
 {
 
+
 # ifdef __cplusplus
 struct __exception exc;
-# else 
+# else
 struct exception  exc;
-# endif 
+# endif
 
 struct exceptionf excf;
 struct exceptionl excl;
 
-# if defined(__GNUC__)
-#define ALIGNIT __attribute__ ((__aligned__ (16)))
-# elif defined opensource
+# if defined opensource || defined _LIBC
 #define ALIGNIT
+#define ALIGNATTR __attribute__ ((__aligned__ (16)))
 # else
 #define ALIGNIT __declspec(align(16))
-# endif
-
-# ifdef SIZE_LONG_INT_64
-#define __INT_64__ signed long
-# else 
-#define __INT_64__ __int64
+#define ALIGNATTR
 # endif
 
 const char float_inf[4] = {0x00,0x00,0x80,0x7F};
@@ -148,74 +118,66 @@ const char float_neg_inf[4] = {0x00,0x00,0x80,0xFF};
 const char float_neg_huge[4] = {0xFF,0xFF,0x7F,0xFF};
 const char float_neg_zero[4] = {0x00,0x00,0x00,0x80};
 ALIGNIT
-const char double_inf[8] = {0x00,0x00,0x00,0x00,0x00,0x00,0xF0,0x7F}; 
-#if 0 /* unused */
+const char double_inf[8] ALIGNATTR = {0x00,0x00,0x00,0x00,0x00,0x00,0xF0,0x7F};
 ALIGNIT
-const char double_huge[8] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xEF,0x7F};
-#endif
+//const char double_huge[8] ALIGNATTR = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xEF,0x7F};
 ALIGNIT
-const char double_zero[8] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
+const char double_zero[8] ALIGNATTR = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
 ALIGNIT
-const char double_neg_inf[8] = {0x00,0x00,0x00,0x00,0x00,0x00,0xF0,0xFF}; 
-#if 0 /* unused */
+const char double_neg_inf[8] ALIGNATTR = {0x00,0x00,0x00,0x00,0x00,0x00,0xF0,0xFF};
 ALIGNIT
-const char double_neg_huge[8] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xEF,0xFF};
-#endif
+//const char double_neg_huge[8] ALIGNATTR = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xEF,0xFF};
 ALIGNIT
-const char double_neg_zero[8] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80};
+const char double_neg_zero[8] ALIGNATTR = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80};
 ALIGNIT
-const char long_double_inf[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0xFF,0x7F,0x00,0x00,0x00,0x00,0x00,0x00}; 
-#if 0 /* unused */
+const char long_double_inf[16] ALIGNATTR = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0xFF,0x7F,0x00,0x00,0x00,0x00,0x00,0x00};
 ALIGNIT
-const char long_double_huge[16] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFE,0x7F,0x00,0x00,0x00,0x00,0x00,0x00};
-#endif
+//const char long_double_huge[16] ALIGNATTR = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFE,0x7F,0x00,0x00,0x00,0x00,0x00,0x00};
 ALIGNIT
-const char long_double_zero[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
+const char long_double_zero[16] ALIGNATTR = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
 ALIGNIT
-const char long_double_neg_inf[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0x00}; 
-#if 0 /* unused */
+const char long_double_neg_inf[16] ALIGNATTR = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0xFF,0xFF,0x00,0x00,0x00,0x00,0x00,0x00};
 ALIGNIT
-const char long_double_neg_huge[16] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFE,0xFF,0x00,0x00,0x00,0x00,0x00,0x00};
-#endif
+//const char long_double_neg_huge[16] ALIGNATTR = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, 0xFE,0xFF,0x00,0x00,0x00,0x00,0x00,0x00};
 ALIGNIT
-const char long_double_neg_zero[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0x00,0x00,0x00,0x00,0x00,0x00};
+const char long_double_neg_zero[16] ALIGNATTR = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x80,0x00,0x00,0x00,0x00,0x00,0x00};
 
-#define RETVAL_HUGE_VALL *(long double *)retval =  *(long double *)long_double_inf 
-#define RETVAL_NEG_HUGE_VALL *(long double *)retval = *(long double *)long_double_neg_inf 
-#define RETVAL_HUGEL *(long double *)retval = (long double)*(float *)float_huge 
-#define RETVAL_NEG_HUGEL *(long double *)retval =(long double)*(float*)float_neg_huge 
+#define RETVAL_HUGE_VALL *(long double *)retval =  *(long double *)long_double_inf
+#define RETVAL_NEG_HUGE_VALL *(long double *)retval = *(long double *)long_double_neg_inf
+#define RETVAL_HUGEL *(long double *)retval = (long double)*(float *)float_huge
+#define RETVAL_NEG_HUGEL *(long double *)retval =(long double)*(float*)float_neg_huge
 
 #define RETVAL_HUGE_VALD *(double *)retval = *(double *) double_inf
 #define RETVAL_NEG_HUGE_VALD *(double *)retval = *(double *) double_neg_inf
 #define RETVAL_HUGED *(double *)retval = (double) *(float *)float_huge
-#define RETVAL_NEG_HUGED *(double *)retval = (double) *(float *) float_neg_huge 
+#define RETVAL_NEG_HUGED *(double *)retval = (double) *(float *) float_neg_huge
 
 #define RETVAL_HUGE_VALF *(float *)retval =  *(float *) float_inf
 #define RETVAL_NEG_HUGE_VALF *(float *)retval = *(float *) float_neg_inf
 #define RETVAL_HUGEF *(float *)retval = *(float *) float_huge
-#define RETVAL_NEG_HUGEF *(float *)retval = *(float *) float_neg_huge 
+#define RETVAL_NEG_HUGEF *(float *)retval = *(float *) float_neg_huge
 
-#define RETVAL_ZEROL *(long double *)retval = *(long double *)long_double_zero 
-#define RETVAL_ZEROD *(double *)retval = *(double *)double_zero 
-#define RETVAL_ZEROF *(float *)retval = *(float *)float_zero 
+#define RETVAL_ZEROL *(long double *)retval = *(long double *)long_double_zero
+#define RETVAL_ZEROD *(double *)retval = *(double *)double_zero
+#define RETVAL_ZEROF *(float *)retval = *(float *)float_zero
 
-#define RETVAL_NEG_ZEROL *(long double *)retval = *(long double *)long_double_neg_zero 
-#define RETVAL_NEG_ZEROD *(double *)retval = *(double *)double_neg_zero 
-#define RETVAL_NEG_ZEROF *(float *)retval = *(float *)float_neg_zero 
+#define RETVAL_NEG_ZEROL *(long double *)retval = *(long double *)long_double_neg_zero
+#define RETVAL_NEG_ZEROD *(double *)retval = *(double *)double_neg_zero
+#define RETVAL_NEG_ZEROF *(float *)retval = *(float *)float_neg_zero
 
-#define RETVAL_ONEL *(long double *)retval = (long double) 1.0 
-#define RETVAL_ONED *(double *)retval = 1.0 
-#define RETVAL_ONEF *(float *)retval = 1.0f 
+#define RETVAL_ONEL *(long double *)retval = (long double) 1.0
+#define RETVAL_ONED *(double *)retval = 1.0
+#define RETVAL_ONEF *(float *)retval = 1.0f
 
-#define NOT_MATHERRL excl.arg1=*(long double *)arg1;excl.arg2=*(long double *)arg2;excl.retval=*(long double *)retval;if(!pmatherrl(&excl))
-#define NOT_MATHERRD exc.arg1=*(double *)arg1;exc.arg2=*(double *)arg2;exc.retval=*(double *)retval;if(!pmatherr(&exc))
-#define NOT_MATHERRF excf.arg1=*(float *)arg1;excf.arg2=*(float *)arg2;excf.retval=*(float *)retval;if(!pmatherrf(&excf))
+#define NOT_MATHERRL excl.arg1=*(long double *)arg1;excl.arg2=*(long double *)arg2;excl.retval=*(long double *)retval;if(!matherrl(&excl))
+#define NOT_MATHERRD exc.arg1=*(double *)arg1;exc.arg2=*(double *)arg2;exc.retval=*(double *)retval;if(!MATHERR_D(&exc))
+#define NOT_MATHERRF excf.arg1=*(float *)arg1;excf.arg2=*(float *)arg2;excf.retval=*(float *)retval;if(!MATHERR_F(&excf))
 
-#define ifSVID if(_LIB_VERSIONIMF==_SVID_)
+#define ifSVID if(_LIB_VERSION==_SVID_)
 
-#define NAMEL excl.name  
-#define NAMED exc.name  
-#define NAMEF excf.name  
+#define NAMEL excl.name
+#define NAMED exc.name
+#define NAMEF excf.name
 
 //
 // These should work OK for MS because they are ints -
@@ -230,28 +192,28 @@ const char long_double_neg_zero[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0
 #define PLOSS           6
 
 #define SINGL excl.type = SING
-#define DOMAINL excl.type = DOMAIN 
-#define OVERFLOWL excl.type = OVERFLOW 
-#define UNDERFLOWL excl.type = UNDERFLOW 
-#define TLOSSL excl.type = TLOSS 
+#define DOMAINL excl.type = DOMAIN
+#define OVERFLOWL excl.type = OVERFLOW
+#define UNDERFLOWL excl.type = UNDERFLOW
+#define TLOSSL excl.type = TLOSS
 #define SINGD exc.type = SING
-#define DOMAIND exc.type = DOMAIN 
-#define OVERFLOWD exc.type = OVERFLOW 
-#define UNDERFLOWD exc.type = UNDERFLOW 
-#define TLOSSD exc.type = TLOSS 
+#define DOMAIND exc.type = DOMAIN
+#define OVERFLOWD exc.type = OVERFLOW
+#define UNDERFLOWD exc.type = UNDERFLOW
+#define TLOSSD exc.type = TLOSS
 #define SINGF excf.type = SING
-#define DOMAINF excf.type = DOMAIN 
-#define OVERFLOWF excf.type = OVERFLOW 
-#define UNDERFLOWF excf.type = UNDERFLOW 
-#define TLOSSF excf.type = TLOSS 
+#define DOMAINF excf.type = DOMAIN
+#define OVERFLOWF excf.type = OVERFLOW
+#define UNDERFLOWF excf.type = UNDERFLOW
+#define TLOSSF excf.type = TLOSS
 
 #define INPUT_XL (excl.arg1=*(long double*)arg1)
 #define INPUT_XD (exc.arg1=*(double*)arg1)
 #define INPUT_XF (excf.arg1=*(float*)arg1)
-#define INPUT_YL (excl.arg2=*(long double*)arg2)
-#define INPUT_YD (exc.arg2=*(double*)arg2)
-#define INPUT_YF (excf.arg2=*(float*)arg2)
-#define INPUT_RESL (*(long double *)retval) 
+#define INPUT_YL (excl.arg1=*(long double*)arg2)
+#define INPUT_YD (exc.arg1=*(double*)arg2)
+#define INPUT_YF (excf.arg1=*(float*)arg2)
+#define INPUT_RESL (*(long double *)retval)
 #define INPUT_RESD (*(double *)retval)
 #define INPUT_RESF (*(float *)retval)
 
@@ -286,17 +248,11 @@ const char long_double_neg_zero[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0
 #define WRITED_LOG1P_NEGATIVE fputs("log1p: DOMAIN error\n",stderr)
 #define WRITEF_LOG1P_NEGATIVE fputs("log1pf: DOMAIN error\n",stderr)
 #define WRITEL_LOG10_ZERO fputs("log10l: SING error\n",stderr)
-#define WRITED_LOG10_ZERO fputs("log10: SING error\n",stderr) 
+#define WRITED_LOG10_ZERO fputs("log10: SING error\n",stderr)
 #define WRITEF_LOG10_ZERO fputs("log10f: SING error\n",stderr)
 #define WRITEL_LOG10_NEGATIVE fputs("log10l: DOMAIN error\n",stderr)
 #define WRITED_LOG10_NEGATIVE fputs("log10: DOMAIN error\n",stderr)
 #define WRITEF_LOG10_NEGATIVE fputs("log10f: DOMAIN error\n",stderr)
-#define WRITEL_LOG2_ZERO fputs("log2l: SING error\n",stderr)
-#define WRITED_LOG2_ZERO fputs("log2: SING error\n",stderr) 
-#define WRITEF_LOG2_ZERO fputs("log2f: SING error\n",stderr)
-#define WRITEL_LOG2_NEGATIVE fputs("log2l: DOMAIN error\n",stderr)
-#define WRITED_LOG2_NEGATIVE fputs("log2: DOMAIN error\n",stderr)
-#define WRITEF_LOG2_NEGATIVE fputs("log2f: DOMAIN error\n",stderr)
 #define WRITEL_POW_ZERO_TO_ZERO fputs("powl(0,0): DOMAIN error\n",stderr)
 #define WRITED_POW_ZERO_TO_ZERO fputs("pow(0,0): DOMAIN error\n",stderr)
 #define WRITEF_POW_ZERO_TO_ZERO fputs("powf(0,0): DOMAIN error\n",stderr)
@@ -339,9 +295,6 @@ const char long_double_neg_zero[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0
 #define WRITEL_GAMMA_NEGATIVE fputs("gammal: SING error\n",stderr)
 #define WRITED_GAMMA_NEGATIVE fputs("gamma: SING error\n",stderr)
 #define WRITEF_GAMMA_NEGATIVE fputs("gammaf: SING error\n",stderr)
-#define WRITEL_TGAMMA_NEGATIVE fputs("tgammal: DOMAIN error\n",stderr)
-#define WRITED_TGAMMA_NEGATIVE fputs("tgamma: DOMAIN error\n",stderr)
-#define WRITEF_TGAMMA_NEGATIVE fputs("tgammaf: DOMAIN error\n",stderr)
 #define WRITEL_J0_TLOSS  fputs("j0l: TLOSS error\n",stderr)
 #define WRITEL_Y0_TLOSS  fputs("y0l: TLOSS error\n",stderr)
 #define WRITEL_J1_TLOSS  fputs("j1l: TLOSS error\n",stderr)
@@ -360,26 +313,16 @@ const char long_double_neg_zero[16] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0
 #define WRITEF_Y1_TLOSS  fputs("y1f: TLOSS error\n",stderr)
 #define WRITEF_JN_TLOSS  fputs("jnf: TLOSS error\n",stderr)
 #define WRITEF_YN_TLOSS  fputs("ynf: TLOSS error\n",stderr)
-#define WRITEL_ACOSD fputs("acosdl: DOMAIN error\n",stderr)
-#define WRITED_ACOSD fputs("acosd: DOMAIN error\n",stderr)
-#define WRITEF_ACOSD fputs("acosdf: DOMAIN error\n",stderr)
-#define WRITEL_ASIND fputs("asindl: DOMAIN error\n",stderr)
-#define WRITED_ASIND fputs("asind: DOMAIN error\n",stderr)
-#define WRITEF_ASIND fputs("asindf: DOMAIN error\n",stderr)
-#define WRITEL_ATAN2D_ZERO_BY_ZERO fputs("atan2dl: DOMAIN error\n",stderr)
-#define WRITED_ATAN2D_ZERO_BY_ZERO fputs("atan2d: DOMAIN error\n",stderr)
-#define WRITEF_ATAN2D_ZERO_BY_ZERO fputs("atan2df: DOMAIN error\n",stderr)
-
 
 /***********************/
 /* IEEE Path           */
 /***********************/
-if(_LIB_VERSIONIMF==_IEEE_) return;
+if(_LIB_VERSION==_IEEE_) return;
 
 /***********************/
 /* C9X Path           */
 /***********************/
-else if(_LIB_VERSIONIMF==_ISOC_) 
+else if(_LIB_VERSION==_ISOC_)
 {
   switch(input_tag)
   {
@@ -396,146 +339,80 @@ else if(_LIB_VERSIONIMF==_ISOC_)
     case log1p_zero:
     case log1pf_zero:
     case powl_overflow:
-    case pow_overflow:  
-    case powf_overflow: 
+    case pow_overflow:
+    case powf_overflow:
+    case powl_underflow:
+    case pow_underflow:
+    case powf_underflow:
     case expl_overflow:
-    case exp_overflow:  
-    case expf_overflow: 
+    case exp_overflow:
+    case expf_overflow:
+    case expl_underflow:
+    case exp_underflow:
+    case expf_underflow:
     case exp2l_overflow:
-    case exp2_overflow:  
-    case exp2f_overflow: 
+    case exp2_overflow:
+    case exp2f_overflow:
+    case exp2l_underflow:
+    case exp2_underflow:
+    case exp2f_underflow:
     case exp10l_overflow:
-    case exp10_overflow:  
-    case exp10f_overflow: 
+    case exp10_overflow:
+    case exp10f_overflow:
     case expm1l_overflow:
-    case expm1_overflow:  
-    case expm1f_overflow: 
+    case expm1_overflow:
+    case expm1f_overflow:
     case hypotl_overflow:
     case hypot_overflow:
     case hypotf_overflow:
-    case sinhl_overflow: 
-    case sinh_overflow: 
-    case sinhf_overflow: 
-    case atanhl_eq_one:  
-    case atanh_eq_one:  
-    case atanhf_eq_one:  
+    case sinhl_overflow:
+    case sinh_overflow:
+    case sinhf_overflow:
+    case atanhl_eq_one:
+    case atanh_eq_one:
+    case atanhf_eq_one:
     case scalbl_overflow:
     case scalb_overflow:
     case scalbf_overflow:
+    case scalbl_underflow:
+    case scalb_underflow:
+    case scalbf_underflow:
     case coshl_overflow:
     case cosh_overflow:
     case coshf_overflow:
     case nextafterl_overflow:
     case nextafter_overflow:
     case nextafterf_overflow:
-    case nexttowardl_overflow:
-    case nexttoward_overflow:
-    case nexttowardf_overflow:
     case scalbnl_overflow:
     case scalbn_overflow:
     case scalbnf_overflow:
-    case scalblnl_overflow:
-    case scalbln_overflow:
-    case scalblnf_overflow:
+    case scalbnl_underflow:
+    case scalbn_underflow:
+    case scalbnf_underflow:
     case ldexpl_overflow:
     case ldexp_overflow:
     case ldexpf_overflow:
+    case ldexpl_underflow:
+    case ldexp_underflow:
+    case ldexpf_underflow:
     case lgammal_overflow:
     case lgamma_overflow:
     case lgammaf_overflow:
-    case gammal_overflow:
-    case gamma_overflow:
-    case gammaf_overflow:
     case lgammal_negative:
     case lgamma_negative:
     case lgammaf_negative:
+    case gammal_overflow:
+    case gamma_overflow:
+    case gammaf_overflow:
     case gammal_negative:
     case gamma_negative:
     case gammaf_negative:
     case ilogbl_zero:
-    case ilogb_zero: 
+    case ilogb_zero:
     case ilogbf_zero:
-    case fdiml_overflow:
-    case fdim_overflow: 
-    case fdimf_overflow:
-    case llrintl_large:
-    case llrint_large: 
-    case llrintf_large:
-    case llroundl_large:
-    case llround_large: 
-    case llroundf_large:
-    case lrintl_large:
-    case lrint_large: 
-    case lrintf_large:
-    case lroundl_large:
-    case lround_large: 
-    case lroundf_large:
-    case tandl_overflow:
-    case tand_overflow: 
-    case tandf_overflow:
-    case cotdl_overflow:
-    case cotd_overflow: 
-    case cotdf_overflow:
-    case cotl_overflow:
-    case cot_overflow: 
-    case cotf_overflow:
-    case sinhcoshl_overflow: 
-    case sinhcosh_overflow: 
-    case sinhcoshf_overflow: 
-    case annuityl_overflow:
-    case annuity_overflow:
-    case annuityf_overflow:
-    case compoundl_overflow:
-    case compound_overflow:
-    case compoundf_overflow:
-    case tgammal_overflow:
-    case tgamma_overflow:
-    case tgammaf_overflow:
     {
          ERRNO_RANGE; break;
     }
-    case powl_underflow:
-    case expl_underflow:
-    case exp2l_underflow:
-    case scalbl_underflow:
-    case scalbnl_underflow:
-    case scalblnl_underflow:
-    case ldexpl_underflow:
-    case erfcl_underflow:
-    case annuityl_underflow:
-    case compoundl_underflow:
-    {
-       if ( *(__INT_64__*)retval == 0 ) ERRNO_RANGE; 
-       break;
-    }
-    case pow_underflow:  
-    case exp_underflow:  
-    case exp2_underflow:  
-    case scalb_underflow:
-    case scalbn_underflow:
-    case scalbln_underflow:
-    case ldexp_underflow:
-    case erfc_underflow:  
-    case annuity_underflow:
-    case compound_underflow:
-    {
-       if ( ((*(__INT_64__*)retval)<<1) == 0 ) ERRNO_RANGE; 
-       break;
-    }
-    case powf_underflow: 
-    case expf_underflow: 
-    case exp2f_underflow: 
-    case scalbf_underflow:
-    case scalbnf_underflow:
-    case scalblnf_underflow:
-    case ldexpf_underflow:
-    case erfcf_underflow: 
-    case annuityf_underflow:
-    case compoundf_underflow:
-    {
-       if ( ((*(__INT_64__*)retval)<<33) == 0 ) ERRNO_RANGE; 
-       break;
-    }
     case logl_negative:
     case log_negative:
     case logf_negative:
@@ -563,17 +440,17 @@ else if(_LIB_VERSIONIMF==_ISOC_)
     case fmodl_by_zero:
     case fmod_by_zero:
     case fmodf_by_zero:
-    case atanhl_gt_one:  
-    case atanh_gt_one:  
-    case atanhf_gt_one:  
-    case acosl_gt_one: 
-    case acos_gt_one: 
-    case acosf_gt_one: 
-    case asinl_gt_one: 
-    case asin_gt_one: 
-    case asinf_gt_one: 
+    case atanhl_gt_one:
+    case atanh_gt_one:
+    case atanhf_gt_one:
+    case acosl_gt_one:
+    case acos_gt_one:
+    case acosf_gt_one:
+    case asinl_gt_one:
+    case asin_gt_one:
+    case asinf_gt_one:
     case logbl_zero:
-    case logb_zero: 
+    case logb_zero:
     case logbf_zero:
     case acoshl_lt_one:
     case acosh_lt_one:
@@ -596,30 +473,6 @@ else if(_LIB_VERSIONIMF==_ISOC_)
     case ynl_negative:
     case yn_negative:
     case ynf_negative:
-    case acosdl_gt_one: 
-    case acosd_gt_one: 
-    case acosdf_gt_one: 
-    case asindl_gt_one: 
-    case asind_gt_one: 
-    case asindf_gt_one: 
-    case atan2dl_zero:
-    case atan2d_zero:
-    case atan2df_zero:
-    case annuityl_by_zero:
-    case annuity_by_zero:
-    case annuityf_by_zero:
-    case annuityl_less_m1:
-    case annuity_less_m1:
-    case annuityf_less_m1:
-    case compoundl_by_zero:
-    case compound_by_zero:
-    case compoundf_by_zero:
-    case compoundl_less_m1:
-    case compound_less_m1:
-    case compoundf_less_m1:
-    case tgammal_negative:
-    case tgamma_negative:
-    case tgammaf_negative:
     {
          ERRNO_DOMAIN; break;
     }
@@ -633,37 +486,31 @@ else if(_LIB_VERSIONIMF==_ISOC_)
 /* _POSIX_ Path        */
 /***********************/
 
-else if(_LIB_VERSIONIMF==_POSIX_)
+else if(_LIB_VERSION==_POSIX_)
 {
 switch(input_tag)
   {
   case gammal_overflow:
   case lgammal_overflow:
-  case tgammal_overflow:
   {
        RETVAL_HUGE_VALL; ERRNO_RANGE; break;
   }
   case gamma_overflow:
   case lgamma_overflow:
-  case tgamma_overflow:
   {
        RETVAL_HUGE_VALD; ERRNO_RANGE; break;
   }
   case gammaf_overflow:
   case lgammaf_overflow:
-  case tgammaf_overflow:
   {
        RETVAL_HUGE_VALF; ERRNO_RANGE; break;
   }
   case gammal_negative:
-  case lgammal_negative:
   case gamma_negative:
-  case lgamma_negative:
   case gammaf_negative:
+  case lgammal_negative:
+  case lgamma_negative:
   case lgammaf_negative:
-  case tgammal_negative:
-  case tgamma_negative:
-  case tgammaf_negative:
   {
        ERRNO_DOMAIN; break;
   }
@@ -679,56 +526,38 @@ switch(input_tag)
   case scalbn_underflow:
   case scalbnf_overflow:
   case scalbnf_underflow:
-  case scalblnl_overflow:
-  case scalblnl_underflow:
-  case scalbln_overflow:
-  case scalbln_underflow:
-  case scalblnf_overflow:
-  case scalblnf_underflow:
-  case tandl_overflow:
-  case tand_overflow: 
-  case tandf_overflow:
-  case cotdl_overflow:
-  case cotd_overflow: 
-  case cotdf_overflow:
-  case cotl_overflow:
-  case cot_overflow: 
-  case cotf_overflow:
-  case sinhcoshl_overflow: 
-  case sinhcosh_overflow: 
-  case sinhcoshf_overflow: 
   {
        ERRNO_RANGE; break;
   }
-  case atanhl_gt_one: 
-  case atanhl_eq_one: 
+  case atanhl_gt_one:
+  case atanhl_eq_one:
     /* atanhl(|x| >= 1) */
     {
        ERRNO_DOMAIN; break;
     }
-  case atanh_gt_one: 
-  case atanh_eq_one: 
+  case atanh_gt_one:
+  case atanh_eq_one:
     /* atanh(|x| >= 1) */
     {
        ERRNO_DOMAIN; break;
     }
-  case atanhf_gt_one: 
-  case atanhf_eq_one: 
+  case atanhf_gt_one:
+  case atanhf_eq_one:
     /* atanhf(|x| >= 1) */
     {
        ERRNO_DOMAIN; break;
     }
-  case sqrtl_negative: 
+  case sqrtl_negative:
     /* sqrtl(x < 0) */
     {
        ERRNO_DOMAIN; break;
     }
-  case sqrt_negative: 
+  case sqrt_negative:
     /* sqrt(x < 0) */
     {
        ERRNO_DOMAIN; break;
     }
-  case sqrtf_negative: 
+  case sqrtf_negative:
     /* sqrtf(x < 0) */
     {
        ERRNO_DOMAIN; break;
@@ -777,7 +606,7 @@ switch(input_tag)
     /* yn(x < 0) */
     {
        RETVAL_NEG_HUGE_VALD; ERRNO_DOMAIN; break;
-    } 
+    }
   case y0f_negative:
   case y1f_negative:
   case ynf_negative:
@@ -786,11 +615,10 @@ switch(input_tag)
     /* ynf(x < 0) */
     {
        RETVAL_NEG_HUGE_VALF; ERRNO_DOMAIN; break;
-    } 
+    }
   case logl_zero:
   case log1pl_zero:
   case log10l_zero:
-  case log2l_zero:
     /* logl(0) */
     /* log1pl(0) */
     /* log10l(0) */
@@ -800,7 +628,7 @@ switch(input_tag)
   case log_zero:
   case log1p_zero:
   case log10_zero:
-  case log2_zero:
+  case log2l_zero:
    /* log(0) */
    /* log1p(0) */
    /* log10(0) */
@@ -810,7 +638,6 @@ switch(input_tag)
   case logf_zero:
   case log1pf_zero:
   case log10f_zero:
-  case log2f_zero:
     /* logf(0) */
     /* log1pf(0) */
     /* log10f(0) */
@@ -825,9 +652,6 @@ switch(input_tag)
     /* log1pl(x < 0) */
     /* log10l(x < 0) */
     {
-#ifndef _LIBC
-       RETVAL_NEG_HUGE_VALL;
-#endif
        ERRNO_DOMAIN; break;
     }
   case log_negative:
@@ -838,11 +662,8 @@ switch(input_tag)
     /* log1p(x < 0) */
     /* log10(x < 0) */
     {
-#ifndef _LIBC
-       RETVAL_NEG_HUGE_VALD;
-#endif
        ERRNO_DOMAIN; break;
-    } 
+    }
   case logf_negative:
   case log1pf_negative:
   case log10f_negative:
@@ -851,46 +672,34 @@ switch(input_tag)
     /* log1pf(x < 0) */
     /* log10f(x < 0) */
     {
-#ifndef _LIBC
-       RETVAL_NEG_HUGE_VALF;
-#endif
        ERRNO_DOMAIN; break;
-    } 
+    }
   case expl_overflow:
-  case exp2l_overflow:
-  case exp10l_overflow:
     /* expl overflow */
     {
        RETVAL_HUGE_VALL; ERRNO_RANGE; break;
     }
   case exp_overflow:
-  case exp2_overflow:
-  case exp10_overflow:
     /* exp overflow */
     {
        RETVAL_HUGE_VALD; ERRNO_RANGE; break;
     }
   case expf_overflow:
-  case exp2f_overflow:
-  case exp10f_overflow:
     /* expf overflow */
     {
        RETVAL_HUGE_VALF; ERRNO_RANGE; break;
     }
   case expl_underflow:
-  case exp2l_underflow:
     /* expl underflow */
     {
        RETVAL_ZEROL; ERRNO_RANGE; break;
     }
   case exp_underflow:
-  case exp2_underflow:
     /* exp underflow */
     {
        RETVAL_ZEROD; ERRNO_RANGE; break;
     }
   case expf_underflow:
-  case exp2f_underflow:
     /* expf underflow */
     {
        RETVAL_ZEROF; ERRNO_RANGE; break;
@@ -941,17 +750,13 @@ switch(input_tag)
        break;
     }
   case powl_overflow:
-  case annuityl_overflow:
-  case compoundl_overflow:
     /* powl(x,y) overflow */
     {
        if (INPUT_RESL < 0) RETVAL_NEG_HUGE_VALL;
        else RETVAL_HUGE_VALL;
-       ERRNO_RANGE; break; 
+       ERRNO_RANGE; break;
     }
   case pow_overflow:
-  case annuity_overflow:
-  case compound_overflow:
     /* pow(x,y) overflow */
     {
        if (INPUT_RESD < 0) RETVAL_NEG_HUGE_VALD;
@@ -959,8 +764,6 @@ switch(input_tag)
        ERRNO_RANGE; break;
     }
   case powf_overflow:
-  case annuityf_overflow:
-  case compoundf_overflow:
     /* powf(x,y) overflow */
     {
        if (INPUT_RESF < 0) RETVAL_NEG_HUGE_VALF;
@@ -968,41 +771,20 @@ switch(input_tag)
        ERRNO_RANGE; break;
     }
   case powl_underflow:
-  case annuityl_underflow:
-  case compoundl_underflow:
     /* powl(x,y) underflow */
     {
        RETVAL_ZEROL; ERRNO_RANGE; break;
     }
   case pow_underflow:
-  case annuity_underflow:
-  case compound_underflow:
     /* pow(x,y) underflow */
     {
        RETVAL_ZEROD; ERRNO_RANGE; break;
     }
-  case powf_underflow:
-  case annuityf_underflow:
-  case compoundf_underflow:
+  case  powf_underflow:
     /* powf(x,y) underflow */
     {
        RETVAL_ZEROF; ERRNO_RANGE; break;
     }
-  case annuityl_by_zero:
-  case annuityl_less_m1:
-  case compoundl_by_zero:
-  case compoundl_less_m1:
-  case annuity_by_zero:
-  case annuity_less_m1:
-  case compound_by_zero:
-  case compound_less_m1:
-  case annuityf_by_zero:
-  case annuityf_less_m1:
-  case compoundf_by_zero:
-  case compoundf_less_m1:
-    {
-       ERRNO_DOMAIN; break;
-    }
   case powl_zero_to_negative:
     /* 0**neg */
     {
@@ -1038,7 +820,7 @@ switch(input_tag)
     /* Special Error */
     {
        break;
-    } 
+    }
   case  pow_nan_to_zero:
     /* pow(NaN,0.0) */
     {
@@ -1050,51 +832,36 @@ switch(input_tag)
        break;
     }
   case atan2l_zero:
-  case atan2dl_zero:
-    /* atan2dl(0,0) */
+    /* atan2l(0,0) */
     {
-#ifndef _LIBC
-       RETVAL_ZEROL;
-#else
-       /* XXX  arg1 and arg2 are switched!!!!  */
+      /* XXX  arg1 and arg2 are switched!!!!  */
        if (signbit (*(long double *) arg1))
 	 /* y == -0 */
-	 *(long double *) retval = __libm_copysignl (M_PIl, *(long double *) arg2);
+	 *(long double *) retval = copysignl (M_PIl, *(long double *) arg2);
        else
 	 *(long double *) retval = *(long double *) arg2;
-#endif
        ERRNO_DOMAIN; break;
     }
   case atan2_zero:
-  case atan2d_zero:
-    /* atan2d(0,0) */
+    /* atan2(0,0) */
     {
-#ifndef _LIBC
-       RETVAL_ZEROD;
-#else
-       /* XXX  arg1 and arg2 are switched!!!!  */
+      /* XXX  arg1 and arg2 are switched!!!!  */
        if (signbit (*(double *) arg1))
 	 /* y == -0 */
-	 *(double *) retval = __libm_copysign (M_PI, *(double *) arg2);
+	 *(double *) retval = copysign (M_PI, *(double *) arg2);
        else
 	 *(double *) retval = *(double *) arg2;
-#endif
        ERRNO_DOMAIN; break;
     }
-  case atan2f_zero:
-  case atan2df_zero:
+  case
+    atan2f_zero:
     /* atan2f(0,0) */
-    /* atan2df(0,0) */
     {
-#ifndef _LIBC
-       RETVAL_ZEROF;
-#else
        if (signbit (*(float *) arg2))
 	 /* y == -0 */
-	 *(float *) retval = __libm_copysignf (M_PI, *(float *) arg1);
+	 *(float *) retval = copysignf (M_PI, *(float *) arg1);
        else
 	 *(float *) retval = *(float *) arg1;
-#endif
        ERRNO_DOMAIN; break;
     }
   case expm1l_overflow:
@@ -1145,42 +912,42 @@ switch(input_tag)
   case scalbl_underflow:
     /* scalbl underflow */
     {
-       if (INPUT_XL < 0) RETVAL_NEG_ZEROL; 
+       if (INPUT_XL < 0) RETVAL_NEG_ZEROL;
        else RETVAL_ZEROL;
        ERRNO_RANGE; break;
     }
   case scalb_underflow:
     /* scalb underflow */
     {
-       if (INPUT_XD < 0) RETVAL_NEG_ZEROD; 
+       if (INPUT_XD < 0) RETVAL_NEG_ZEROD;
        else RETVAL_ZEROD;
        ERRNO_RANGE; break;
     }
   case scalbf_underflow:
     /* scalbf underflow */
     {
-       if (INPUT_XF < 0) RETVAL_NEG_ZEROF; 
+       if (INPUT_XF < 0) RETVAL_NEG_ZEROF;
        else RETVAL_ZEROF;
        ERRNO_RANGE; break;
     }
   case scalbl_overflow:
     /* scalbl overflow */
     {
-       if (INPUT_XL < 0) RETVAL_NEG_HUGE_VALL; 
+       if (INPUT_XL < 0) RETVAL_NEG_HUGE_VALL;
        else RETVAL_HUGE_VALL;
        ERRNO_RANGE; break;
     }
   case scalb_overflow:
     /* scalb overflow */
     {
-       if (INPUT_XD < 0) RETVAL_NEG_HUGE_VALD; 
+       if (INPUT_XD < 0) RETVAL_NEG_HUGE_VALD;
        else RETVAL_HUGE_VALD;
        ERRNO_RANGE; break;
     }
   case scalbf_overflow:
     /* scalbf overflow */
     {
-       if (INPUT_XF < 0) RETVAL_NEG_HUGE_VALF; 
+       if (INPUT_XF < 0) RETVAL_NEG_HUGE_VALF;
        else RETVAL_HUGE_VALF;
        ERRNO_RANGE; break;
     }
@@ -1200,62 +967,33 @@ switch(input_tag)
         ERRNO_DOMAIN; break;
     }
   case acosl_gt_one:
-  case acosdl_gt_one:
     /* acosl(x > 1) */
-    /* acosdl(x > 1) */
     {
-#ifndef _LIBC
-       RETVAL_ZEROL;
-#endif
        ERRNO_DOMAIN; break;
     }
   case acos_gt_one:
-  case acosd_gt_one:
     /* acos(x > 1) */
-    /* acosd(x > 1) */
     {
-#ifndef _LIBC
-       RETVAL_ZEROD;
-#endif
-       ERRNO_DOMAIN; break;
+      ERRNO_DOMAIN; break;
     }
   case acosf_gt_one:
-  case acosdf_gt_one:
     /* acosf(x > 1) */
-    /* acosdf(x > 1) */
     {
-#ifndef _LIBC
-       RETVAL_ZEROF;
-#endif
-       ERRNO_DOMAIN; break;
+      ERRNO_DOMAIN; break;
     }
   case asinl_gt_one:
-  case asindl_gt_one:
     /* asinl(x > 1) */
-    /* asindl(x > 1) */
     {
-#ifndef _LIBC
-       RETVAL_ZEROL;
-#endif
        ERRNO_DOMAIN; break;
     }
   case asin_gt_one:
-  case asind_gt_one:
     /* asin(x > 1) */
-    /* asind(x > 1) */
     {
-#ifndef _LIBC
-       RETVAL_ZEROD;
-#endif
        ERRNO_DOMAIN; break;
     }
   case asinf_gt_one:
-  case asindf_gt_one:
-    /* asindf(x > 1) */
+    /* asinf(x > 1) */
     {
-#ifndef _LIBC
-       RETVAL_ZEROF;
-#endif
        ERRNO_DOMAIN; break;
     }
   case remainderl_by_zero:
@@ -1291,15 +1029,6 @@ switch(input_tag)
     {
        RETVAL_HUGE_VALF; ERRNO_RANGE; break;
     }
-  case nextafterl_overflow:
-  case nextafter_overflow:
-  case nextafterf_overflow:
-  case nexttowardl_overflow:
-  case nexttoward_overflow:
-  case nexttowardf_overflow:
-    {
-      ERRNO_RANGE; break;
-    }
   case sinhl_overflow:
     /* sinhl overflows */
     {
@@ -1361,7 +1090,7 @@ return;
 /*******************************/
 /* __SVID__ and __XOPEN__ Path */
 /*******************************/
-else 
+else
 {
   switch(input_tag)
   {
@@ -1377,57 +1106,15 @@ else
   case scalbn_underflow:
   case scalbnf_overflow:
   case scalbnf_underflow:
-  case scalblnl_overflow:
-  case scalblnl_underflow:
-  case scalbln_overflow:
-  case scalbln_underflow:
-  case scalblnf_overflow:
-  case scalblnf_underflow:
-  case tandl_overflow:
-  case tand_overflow: 
-  case tandf_overflow:
-  case cotdl_overflow:
-  case cotd_overflow: 
-  case cotdf_overflow:
-  case cotl_overflow:
-  case cot_overflow: 
-  case cotf_overflow:
-  case annuityl_overflow:
-  case annuityl_underflow:
-  case annuity_overflow:
-  case annuity_underflow:
-  case annuityf_overflow:
-  case annuityf_underflow:
-  case compoundl_overflow:
-  case compoundl_underflow:
-  case compound_overflow:
-  case compound_underflow:
-  case compoundf_overflow:
-  case compoundf_underflow:
   {
        ERRNO_RANGE; break;
   }
-  case annuityl_by_zero:
-  case annuityl_less_m1:
-  case annuity_by_zero:
-  case annuity_less_m1:
-  case annuityf_by_zero:
-  case annuityf_less_m1:
-  case compoundl_by_zero:
-  case compoundl_less_m1:
-  case compound_by_zero:
-  case compound_less_m1:
-  case compoundf_by_zero:
-  case compoundf_less_m1:
-  {
-       ERRNO_DOMAIN; break;
-  }
-  case sqrtl_negative: 
+  case sqrtl_negative:
     /* sqrtl(x < 0) */
     {
        DOMAINL; NAMEL = (char *) "sqrtl";
-       ifSVID 
-       { 
+       ifSVID
+       {
           RETVAL_ZEROL;
           NOT_MATHERRL
           {
@@ -1435,22 +1122,22 @@ else
             ERRNO_DOMAIN;
           }
        }
-       else 
+       else
        { /* NaN already computed */
           NOT_MATHERRL {ERRNO_DOMAIN;}
        }
-       *(long double *)retval = excl.retval;	
+       *(long double *)retval = excl.retval;
        break;
     }
-  case sqrt_negative: 
+  case sqrt_negative:
     /* sqrt(x < 0) */
     {
        DOMAIND; NAMED = (char *) "sqrt";
-       ifSVID 
+       ifSVID
        {
-         
+
          RETVAL_ZEROD;
-         NOT_MATHERRD 
+         NOT_MATHERRD
          {
            WRITED_SQRT;
            ERRNO_DOMAIN;
@@ -1459,18 +1146,18 @@ else
        else
        { /* NaN already computed */
          NOT_MATHERRD {ERRNO_DOMAIN;}
-       } 
-       *(double *)retval = exc.retval;	
+       }
+       *(double *)retval = exc.retval;
        break;
     }
-  case sqrtf_negative: 
+  case sqrtf_negative:
     /* sqrtf(x < 0) */
     {
        DOMAINF; NAMEF = (char *) "sqrtf";
-       ifSVID 
+       ifSVID
        {
          RETVAL_ZEROF;
-         NOT_MATHERRF 
+         NOT_MATHERRF
          {
            WRITEF_SQRT;
            ERRNO_DOMAIN;
@@ -1479,59 +1166,62 @@ else
        else
        {
          NOT_MATHERRF {ERRNO_DOMAIN;}
-       } 
-       *(float *)retval = excf.retval;	
+       }
+       *(float *)retval = excf.retval;
        break;
     }
   case logl_zero:
+  case log2l_zero:
     /* logl(0) */
     {
        SINGL; NAMEL = (char *) "logl";
-       ifSVID 
+       ifSVID
        {
          RETVAL_NEG_HUGEL;
-         NOT_MATHERRL 
+         NOT_MATHERRL
          {
            WRITEL_LOG_ZERO;
            ERRNO_DOMAIN;
-         } 
+         }
        }
        else
        {
          RETVAL_NEG_HUGE_VALL;
          NOT_MATHERRL {ERRNO_DOMAIN;}
-       } 
-       *(long double *)retval = excl.retval;	
+       }
+       *(long double *)retval = excl.retval;
        break;
     }
   case log_zero:
+  case log2_zero:
     /* log(0) */
     {
        SINGD; NAMED = (char *) "log";
-       ifSVID 
+       ifSVID
        {
          RETVAL_NEG_HUGED;
-         NOT_MATHERRD 
+         NOT_MATHERRD
          {
            WRITED_LOG_ZERO;
            ERRNO_DOMAIN;
-         }  
+         }
        }
        else
        {
          RETVAL_NEG_HUGE_VALD;
          NOT_MATHERRD {ERRNO_DOMAIN;}
        }
-       *(double *)retval = exc.retval;	
+       *(double *)retval = exc.retval;
        break;
     }
   case logf_zero:
+  case log2f_zero:
     /* logf(0) */
     {
        SINGF; NAMEF = (char *) "logf";
-       ifSVID 
+       ifSVID
        {
-         RETVAL_NEG_HUGEF; 
+         RETVAL_NEG_HUGEF;
          NOT_MATHERRF
          {
             WRITEF_LOG_ZERO;
@@ -1540,21 +1230,22 @@ else
        }
        else
        {
-         RETVAL_NEG_HUGE_VALF; 
+         RETVAL_NEG_HUGE_VALF;
          NOT_MATHERRF {ERRNO_DOMAIN;}
        }
-       *(float *)retval = excf.retval;	
+       *(float *)retval = excf.retval;
        break;
     }
 
   case logl_negative:
+  case log2l_negative:
     /* logl(x < 0) */
     {
        DOMAINL; NAMEL = (char *) "logl";
-       ifSVID 
+       ifSVID
        {
          RETVAL_NEG_HUGEL;
-         NOT_MATHERRL 
+         NOT_MATHERRL
          {
            WRITEL_LOG_NEGATIVE;
            ERRNO_DOMAIN;
@@ -1562,20 +1253,21 @@ else
        }
        else
        {
-         RETVAL_NEG_HUGE_VALL; 
+         RETVAL_NEG_HUGE_VALL;
          NOT_MATHERRL {ERRNO_DOMAIN;}
        }
-       *(long double *)retval = excl.retval;	
+       *(long double *)retval = excl.retval;
        break;
     }
   case log_negative:
+  case log2_negative:
     /* log(x < 0) */
     {
        DOMAIND; NAMED = (char *) "log";
-       ifSVID 
+       ifSVID
        {
          RETVAL_NEG_HUGED;
-         NOT_MATHERRD 
+         NOT_MATHERRD
          {
            WRITED_LOG_NEGATIVE;
            ERRNO_DOMAIN;
@@ -1583,38 +1275,39 @@ else
        }
        else
        {
-         RETVAL_NEG_HUGE_VALD; 
+         RETVAL_NEG_HUGE_VALD;
          NOT_MATHERRD {ERRNO_DOMAIN;}
        }
-       *(double *)retval = exc.retval;	
+       *(double *)retval = exc.retval;
        break;
-    } 
+    }
   case logf_negative:
+  case log2f_negative:
     /* logf(x < 0) */
     {
        DOMAINF; NAMEF = (char *) "logf";
-       ifSVID 
+       ifSVID
        {
          RETVAL_NEG_HUGEF;
-         NOT_MATHERRF 
+         NOT_MATHERRF
          {
            WRITEF_LOG_NEGATIVE;
            ERRNO_DOMAIN;
          }
-       }  
+       }
        else
        {
-         RETVAL_NEG_HUGE_VALF; 
+         RETVAL_NEG_HUGE_VALF;
          NOT_MATHERRF{ERRNO_DOMAIN;}
        }
-       *(float *)retval = excf.retval;	
+       *(float *)retval = excf.retval;
        break;
     }
   case log1pl_zero:
     /* log1pl(-1) */
     {
        SINGL; NAMEL = (char *) "log1pl";
-       ifSVID 
+       ifSVID
        {
          RETVAL_NEG_HUGEL;
          NOT_MATHERRL
@@ -1635,7 +1328,7 @@ else
     /* log1p(-1) */
     {
        SINGD; NAMED = (char *) "log1p";
-       ifSVID 
+       ifSVID
        {
          RETVAL_NEG_HUGED;
          NOT_MATHERRD
@@ -1656,7 +1349,7 @@ else
     /* log1pf(-1) */
     {
        SINGF; NAMEF = (char *) "log1pf";
-       ifSVID 
+       ifSVID
        {
          RETVAL_NEG_HUGEF;
          NOT_MATHERRF
@@ -1668,11 +1361,11 @@ else
        else
        {
          RETVAL_NEG_HUGE_VALF;
-         NOT_MATHERRF {ERRNO_DOMAIN;}
+         NOT_MATHERRF {}ERRNO_DOMAIN;
        }
        *(float *)retval = excf.retval;
        break;
-    } 
+    }
  case log1pl_negative:
    /* log1pl(x < -1) */
    {
@@ -1686,7 +1379,7 @@ else
           ERRNO_DOMAIN;
         }
       }
-      else 
+      else
       {
         RETVAL_NEG_HUGE_VALL;
         NOT_MATHERRL {ERRNO_DOMAIN;}
@@ -1707,7 +1400,7 @@ else
           ERRNO_DOMAIN;
         }
       }
-      else 
+      else
       {
         RETVAL_NEG_HUGE_VALD;
         NOT_MATHERRD {ERRNO_DOMAIN;}
@@ -1728,7 +1421,7 @@ else
           ERRNO_DOMAIN;
         }
       }
-      else 
+      else
       {
         RETVAL_NEG_HUGE_VALF;
         NOT_MATHERRF {ERRNO_DOMAIN;}
@@ -1740,7 +1433,7 @@ else
     /* log10l(0) */
     {
        SINGL; NAMEL = (char *) "log10l";
-       ifSVID 
+       ifSVID
        {
          RETVAL_NEG_HUGEL;
          NOT_MATHERRL
@@ -1754,14 +1447,14 @@ else
          RETVAL_NEG_HUGE_VALL;
          NOT_MATHERRL {ERRNO_DOMAIN;}
        }
-       *(long double *)retval = excl.retval;	
+       *(long double *)retval = excl.retval;
        break;
     }
   case log10_zero:
     /* log10(0) */
     {
        SINGD; NAMED = (char *) "log10";
-       ifSVID 
+       ifSVID
        {
          RETVAL_NEG_HUGED;
          NOT_MATHERRD
@@ -1775,14 +1468,14 @@ else
          RETVAL_NEG_HUGE_VALD;
          NOT_MATHERRD {ERRNO_DOMAIN;}
        }
-       *(double *)retval = exc.retval;	
+       *(double *)retval = exc.retval;
        break;
     }
   case log10f_zero:
     /* log10f(0) */
     {
        SINGF; NAMEF = (char *) "log10f";
-       ifSVID 
+       ifSVID
        {
          RETVAL_NEG_HUGEF;
          NOT_MATHERRF
@@ -1796,17 +1489,17 @@ else
          RETVAL_NEG_HUGE_VALF;
          NOT_MATHERRF {ERRNO_DOMAIN;}
        }
-       *(float *)retval = excf.retval;	
+       *(float *)retval = excf.retval;
        break;
     }
   case log10l_negative:
     /* log10l(x < 0) */
     {
        DOMAINL; NAMEL = (char *) "log10l";
-       ifSVID 
+       ifSVID
        {
          RETVAL_NEG_HUGEL;
-         NOT_MATHERRL 
+         NOT_MATHERRL
          {
            WRITEL_LOG10_NEGATIVE;
            ERRNO_DOMAIN;
@@ -1817,145 +1510,40 @@ else
          RETVAL_NEG_HUGE_VALL;
          NOT_MATHERRL {ERRNO_DOMAIN;}
        }
-       *(long double *)retval = excl.retval;	
+       *(long double *)retval = excl.retval;
        break;
     }
   case log10_negative:
     /* log10(x < 0) */
     {
        DOMAIND; NAMED = (char *) "log10";
-       ifSVID 
+       ifSVID
        {
          RETVAL_NEG_HUGED;
-         NOT_MATHERRD 
+         NOT_MATHERRD
          {
            WRITED_LOG10_NEGATIVE;
            ERRNO_DOMAIN;
          }
-       }  
+       }
        else
        {
          RETVAL_NEG_HUGE_VALD;
          NOT_MATHERRD {ERRNO_DOMAIN;}
        }
-       *(double *)retval = exc.retval;	
+       *(double *)retval = exc.retval;
        break;
     }
   case log10f_negative:
     /* log10f(x < 0) */
     {
        DOMAINF; NAMEF = (char *) "log10f";
-       ifSVID 
-       {
-         RETVAL_NEG_HUGEF;
-         NOT_MATHERRF 
-         {
-           WRITEF_LOG10_NEGATIVE;
-           ERRNO_DOMAIN;
-         }
-       }
-       else
-       {
-         RETVAL_NEG_HUGE_VALF;
-         NOT_MATHERRF {ERRNO_DOMAIN;}
-       }
-       *(float *)retval = excf.retval;	
-       break;
-    }
-  case log2_zero:
-    /* log2(0) */
-    {
-       SINGD; NAMED = (char *) "log2";
-       ifSVID 
-       {
-         RETVAL_NEG_HUGED;
-         NOT_MATHERRD
-         {
-           WRITED_LOG2_ZERO;
-           ERRNO_DOMAIN;
-         }
-       }
-       else
-       {
-         RETVAL_NEG_HUGE_VALD;
-         NOT_MATHERRD {ERRNO_DOMAIN;}
-       }
-       *(double *)retval = exc.retval;	
-       break;
-    }
-  case log2f_zero:
-    /* log2f(0) */
-    {
-       SINGF; NAMEF = (char *) "log2f";
-       ifSVID 
+       ifSVID
        {
          RETVAL_NEG_HUGEF;
          NOT_MATHERRF
          {
-          WRITEF_LOG2_ZERO;
-          ERRNO_DOMAIN;
-         }
-       }
-       else
-       {
-         RETVAL_NEG_HUGE_VALF;
-         NOT_MATHERRF {ERRNO_DOMAIN;}
-       }
-       *(float *)retval = excf.retval;	
-       break;
-    }
-  case log2l_negative:
-    /* log2l(x < 0) */
-    {
-       DOMAINL; NAMEL = (char *) "log2l";
-       ifSVID 
-       {
-         RETVAL_NEG_HUGEL;
-         NOT_MATHERRL 
-         {
-           WRITEL_LOG2_NEGATIVE;
-           ERRNO_DOMAIN;
-         }
-       }
-       else
-       {
-         RETVAL_NEG_HUGE_VALL;
-         NOT_MATHERRL {ERRNO_DOMAIN;}
-       }
-       *(long double *)retval = excl.retval;	
-       break;
-    }
-  case log2_negative:
-    /* log2(x < 0) */
-    {
-       DOMAIND; NAMED = (char *) "log2";
-       ifSVID 
-       {
-         RETVAL_NEG_HUGED;
-         NOT_MATHERRD 
-         {
-           WRITED_LOG2_NEGATIVE;
-           ERRNO_DOMAIN;
-         }
-       }  
-       else
-       {
-         RETVAL_NEG_HUGE_VALD;
-         NOT_MATHERRD {ERRNO_DOMAIN;}
-       }
-       *(double *)retval = exc.retval;	
-       break;
-    }
-  case log2f_negative:
-    /* log2f(x < 0) */
-    {
-       DOMAINF; NAMEF = (char *) "log2f";
-       ifSVID 
-       {
-         RETVAL_NEG_HUGEF;
-         NOT_MATHERRF 
-         {
-           WRITEF_LOG2_NEGATIVE;
+           WRITEF_LOG10_NEGATIVE;
            ERRNO_DOMAIN;
          }
        }
@@ -1964,14 +1552,14 @@ else
          RETVAL_NEG_HUGE_VALF;
          NOT_MATHERRF {ERRNO_DOMAIN;}
        }
-       *(float *)retval = excf.retval;	
+       *(float *)retval = excf.retval;
        break;
     }
   case expl_overflow:
     /* expl overflow */
     {
        OVERFLOWL; NAMEL = (char *) "expl";
-       ifSVID 
+       ifSVID
        {
          RETVAL_HUGEL;
        }
@@ -1980,14 +1568,14 @@ else
        RETVAL_HUGE_VALL;
        }
        NOT_MATHERRL {ERRNO_RANGE;}
-       *(long double *)retval = excl.retval;	
+       *(long double *)retval = excl.retval;
        break;
     }
   case exp_overflow:
     /* exp overflow */
     {
        OVERFLOWD; NAMED = (char *) "exp";
-       ifSVID 
+       ifSVID
        {
          RETVAL_HUGED;
        }
@@ -1996,14 +1584,14 @@ else
          RETVAL_HUGE_VALD;
        }
        NOT_MATHERRD {ERRNO_RANGE;}
-       *(double *)retval = exc.retval;	
+       *(double *)retval = exc.retval;
        break;
     }
   case expf_overflow:
     /* expf overflow */
     {
        OVERFLOWF; NAMEF = (char *) "expf";
-       ifSVID 
+       ifSVID
        {
          RETVAL_HUGEF;
        }
@@ -2012,7 +1600,7 @@ else
          RETVAL_HUGE_VALF;
        }
        NOT_MATHERRF {ERRNO_RANGE;}
-       *(float *)retval = excf.retval;	
+       *(float *)retval = excf.retval;
        break;
     }
   case expl_underflow:
@@ -2020,7 +1608,7 @@ else
     {
        UNDERFLOWL; NAMEL = (char *) "expl"; RETVAL_ZEROL;
        NOT_MATHERRL {ERRNO_RANGE;}
-       *(long double *)retval = excl.retval;	
+       *(long double *)retval = excl.retval;
        break;
     }
   case exp_underflow:
@@ -2028,7 +1616,7 @@ else
     {
        UNDERFLOWD; NAMED = (char *) "exp"; RETVAL_ZEROD;
        NOT_MATHERRD {ERRNO_RANGE;}
-       *(double *)retval = exc.retval;	
+       *(double *)retval = exc.retval;
        break;
     }
   case expf_underflow:
@@ -2036,22 +1624,22 @@ else
     {
        UNDERFLOWF; NAMEF = (char *) "expf"; RETVAL_ZEROF;
        NOT_MATHERRF {ERRNO_RANGE;}
-       *(float *)retval = excf.retval;	
+       *(float *)retval = excf.retval;
        break;
     }
   case powl_zero_to_zero:
     /* powl 0**0 */
     {
        DOMAINL; NAMEL = (char *) "powl";
-       ifSVID 
+       ifSVID
        {
          RETVAL_ZEROL;
-         NOT_MATHERRL 
+         NOT_MATHERRL
          {
             WRITEL_POW_ZERO_TO_ZERO;
-            ERRNO_DOMAIN;
+            ERRNO_RANGE;
          }
-         *(long double *)retval = excl.retval;	
+         *(long double *)retval = excl.retval;
        }
        else RETVAL_ONEL;
        break;
@@ -2060,15 +1648,15 @@ else
     /* pow 0**0 */
     {
        DOMAIND; NAMED = (char *) "pow";
-       ifSVID 
+       ifSVID
        {
          RETVAL_ZEROD;
-         NOT_MATHERRD 
+         NOT_MATHERRD
          {
             WRITED_POW_ZERO_TO_ZERO;
-            ERRNO_DOMAIN;
+            ERRNO_RANGE;
          }
-         *(double *)retval = exc.retval;	
+         *(double *)retval = exc.retval;
        }
        else RETVAL_ONED;
        break;
@@ -2077,15 +1665,15 @@ else
     /* powf 0**0 */
     {
        DOMAINF; NAMEF = (char *) "powf";
-       ifSVID 
+       ifSVID
        {
          RETVAL_ZEROF;
-         NOT_MATHERRF 
+         NOT_MATHERRF
          {
           WRITEF_POW_ZERO_TO_ZERO;
-          ERRNO_DOMAIN;
+          ERRNO_RANGE;
          }
-         *(float *)retval = excf.retval;	
+         *(float *)retval = excf.retval;
        }
        else RETVAL_ONEF;
        break;
@@ -2094,54 +1682,54 @@ else
     /* powl(x,y) overflow */
     {
        OVERFLOWL; NAMEL = (char *) "powl";
-       ifSVID 
+       ifSVID
        {
          if (INPUT_XL < 0)  RETVAL_NEG_HUGEL;
          else RETVAL_HUGEL;
        }
        else
-       { 
+       {
          if (INPUT_XL < 0) RETVAL_NEG_HUGE_VALL;
          else RETVAL_HUGE_VALL;
        }
        NOT_MATHERRL {ERRNO_RANGE;}
-       *(long double *)retval = excl.retval;	
+       *(long double *)retval = excl.retval;
        break;
     }
   case pow_overflow:
     /* pow(x,y) overflow */
     {
        OVERFLOWD; NAMED = (char *) "pow";
-       ifSVID 
+       ifSVID
        {
          if (INPUT_XD < 0) RETVAL_NEG_HUGED;
          else RETVAL_HUGED;
        }
        else
-       { 
+       {
          if (INPUT_XD < 0) RETVAL_NEG_HUGE_VALD;
          else RETVAL_HUGE_VALD;
        }
        NOT_MATHERRD {ERRNO_RANGE;}
-       *(double *)retval = exc.retval;	
+       *(double *)retval = exc.retval;
        break;
     }
   case powf_overflow:
     /* powf(x,y) overflow */
     {
        OVERFLOWF; NAMEF = (char *) "powf";
-       ifSVID 
+       ifSVID
        {
          if (INPUT_XF < 0) RETVAL_NEG_HUGEF;
-         else RETVAL_HUGEF; 
+         else RETVAL_HUGEF;
        }
        else
-       { 
+       {
          if (INPUT_XF < 0) RETVAL_NEG_HUGE_VALF;
          else RETVAL_HUGE_VALF;
        }
        NOT_MATHERRF {ERRNO_RANGE;}
-       *(float *)retval = excf.retval;	
+       *(float *)retval = excf.retval;
        break;
     }
   case powl_underflow:
@@ -2149,7 +1737,7 @@ else
     {
        UNDERFLOWL; NAMEL = (char *) "powl"; RETVAL_ZEROL;
        NOT_MATHERRL {ERRNO_RANGE;}
-       *(long double *)retval = excl.retval;	
+       *(long double *)retval = excl.retval;
        break;
     }
   case pow_underflow:
@@ -2157,7 +1745,7 @@ else
     {
        UNDERFLOWD; NAMED = (char *) "pow"; RETVAL_ZEROD;
        NOT_MATHERRD {ERRNO_RANGE;}
-       *(double *)retval = exc.retval;	
+       *(double *)retval = exc.retval;
        break;
     }
   case powf_underflow:
@@ -2165,17 +1753,17 @@ else
     {
        UNDERFLOWF; NAMEF = (char *) "powf"; RETVAL_ZEROF;
        NOT_MATHERRF {ERRNO_RANGE;}
-       *(float *)retval = excf.retval;	
+       *(float *)retval = excf.retval;
        break;
     }
   case powl_zero_to_negative:
     /* 0 to neg */
     {
        DOMAINL; NAMEL = (char *) "powl";
-       ifSVID 
-       { 
+       ifSVID
+       {
          RETVAL_ZEROL;
-         NOT_MATHERRL 
+         NOT_MATHERRL
          {
            WRITEL_POW_ZERO_TO_NEGATIVE;
            ERRNO_DOMAIN;
@@ -2186,17 +1774,17 @@ else
          RETVAL_NEG_HUGE_VALL;
          NOT_MATHERRL {ERRNO_DOMAIN;}
        }
-       *(long double *)retval = excl.retval;	
+       *(long double *)retval = excl.retval;
        break;
     }
   case pow_zero_to_negative:
     /* 0**neg */
     {
        DOMAIND; NAMED = (char *) "pow";
-       ifSVID 
-       { 
+       ifSVID
+       {
          RETVAL_ZEROD;
-         NOT_MATHERRD 
+         NOT_MATHERRD
          {
            WRITED_POW_ZERO_TO_NEGATIVE;
            ERRNO_DOMAIN;
@@ -2207,7 +1795,7 @@ else
          RETVAL_NEG_HUGE_VALD;
          NOT_MATHERRD {ERRNO_DOMAIN;}
        }
-       *(double *)retval = exc.retval;	
+       *(double *)retval = exc.retval;
        break;
     }
   case powf_zero_to_negative:
@@ -2215,10 +1803,10 @@ else
     {
        DOMAINF; NAMEF = (char *) "powf";
        RETVAL_NEG_HUGE_VALF;
-       ifSVID 
-       { 
+       ifSVID
+       {
          RETVAL_ZEROF;
-         NOT_MATHERRF 
+         NOT_MATHERRF
          {
             WRITEF_POW_ZERO_TO_NEGATIVE;
             ERRNO_DOMAIN;
@@ -2229,17 +1817,17 @@ else
          RETVAL_NEG_HUGE_VALF;
          NOT_MATHERRF {ERRNO_DOMAIN;}
        }
-       *(float *)retval = excf.retval;	
+       *(float *)retval = excf.retval;
        break;
     }
   case powl_neg_to_non_integer:
     /* neg**non_integral */
     {
        DOMAINL; NAMEL = (char *) "powl";
-       ifSVID 
-       { 
+       ifSVID
+       {
          RETVAL_ZEROF;
-         NOT_MATHERRL 
+         NOT_MATHERRL
          {
            WRITEL_POW_NEG_TO_NON_INTEGER;
            ERRNO_DOMAIN;
@@ -2249,17 +1837,17 @@ else
        {
          NOT_MATHERRL {ERRNO_DOMAIN;}
        }
-       *(long double *)retval = excl.retval;	
+       *(long double *)retval = excl.retval;
        break;
     }
   case pow_neg_to_non_integer:
     /* neg**non_integral */
     {
        DOMAIND; NAMED = (char *) "pow";
-       ifSVID 
-       { 
+       ifSVID
+       {
          RETVAL_ZEROD;
-         NOT_MATHERRD 
+         NOT_MATHERRD
          {
             WRITED_POW_NEG_TO_NON_INTEGER;
             ERRNO_DOMAIN;
@@ -2269,17 +1857,17 @@ else
        {
          NOT_MATHERRD {ERRNO_DOMAIN;}
        }
-       *(double *)retval = exc.retval;	
+       *(double *)retval = exc.retval;
        break;
     }
   case powf_neg_to_non_integer:
     /* neg**non-integral */
     {
        DOMAINF; NAMEF = (char *) "powf";
-       ifSVID 
-       { 
+       ifSVID
+       {
          RETVAL_ZEROF;
-         NOT_MATHERRF 
+         NOT_MATHERRF
          {
             WRITEF_POW_NEG_TO_NON_INTEGER;
             ERRNO_DOMAIN;
@@ -2289,37 +1877,37 @@ else
        {
          NOT_MATHERRF {ERRNO_DOMAIN;}
        }
-       *(float *)retval = excf.retval;	
+       *(float *)retval = excf.retval;
        break;
     }
   case powl_nan_to_zero:
     /* pow(NaN,0.0) */
     /* Special Error */
     {
-       DOMAINL; NAMEL = (char *) "powl";
-       *(long double *)retval = *(long double *)arg1;
+       DOMAINL; NAMEL = (char *) "powl"; INPUT_XL; INPUT_YL;
+       excl.retval = *(long double *)arg1;
        NOT_MATHERRL {ERRNO_DOMAIN;}
-       *(long double *)retval = excl.retval;	
+       *(long double *)retval = excl.retval;
        break;
-    } 
+    }
   case pow_nan_to_zero:
     /* pow(NaN,0.0) */
     /* Special Error */
     {
-       DOMAIND; NAMED = (char *) "pow";
-       *(double *)retval = *(double *)arg1;
+       DOMAIND; NAMED = (char *) "pow"; INPUT_XD; INPUT_YD;
+       exc.retval = *(double *)arg1;
        NOT_MATHERRD {ERRNO_DOMAIN;}
-       *(double *)retval = exc.retval;	
+       *(double *)retval = exc.retval;
        break;
     }
   case powf_nan_to_zero:
     /* powf(NaN,0.0) */
     /* Special Error */
     {
-       DOMAINF; NAMEF = (char *) "powf";
-       *(float *)retval = *(float *)arg1;
+       DOMAINF; NAMEF = (char *) "powf"; INPUT_XF; INPUT_YF;
+       excf.retval = *(float *)arg1;
        NOT_MATHERRF {ERRNO_DOMAIN;}
-       *(float *)retval = excf.retval;	
+       *(float *)retval = excf.retval;
        break;
     }
   case atan2l_zero:
@@ -2327,15 +1915,15 @@ else
     {
        DOMAINL; NAMEL = (char *) "atan2l";
        RETVAL_ZEROL;
-       NOT_MATHERRL 
+       NOT_MATHERRL
        {
-         ifSVID 
+         ifSVID
          {
             WRITEL_ATAN2_ZERO_BY_ZERO;
          }
          ERRNO_DOMAIN;
        }
-       *(long double *)retval = excl.retval;	
+       *(long double *)retval = excl.retval;
        break;
     }
   case atan2_zero:
@@ -2343,15 +1931,15 @@ else
     {
        DOMAIND; NAMED = (char *) "atan2";
        RETVAL_ZEROD;
-       NOT_MATHERRD 
+       NOT_MATHERRD
        {
-         ifSVID 
-         { 
+         ifSVID
+         {
             WRITED_ATAN2_ZERO_BY_ZERO;
          }
          ERRNO_DOMAIN;
        }
-       *(double *)retval = exc.retval;	
+       *(double *)retval = exc.retval;
        break;
     }
   case atan2f_zero:
@@ -2359,59 +1947,13 @@ else
     {
        DOMAINF; NAMEF = (char *) "atan2f";
        RETVAL_ZEROF;
-       NOT_MATHERRF 
-         ifSVID  
+       NOT_MATHERRF
+         ifSVID
          {
             WRITEF_ATAN2_ZERO_BY_ZERO;
          }
        ERRNO_DOMAIN;
-       *(float *)retval = excf.retval;	
-       break;
-    }
-  case atan2dl_zero:
-    /* atan2dl(0.0,0.0) */
-    {
-       DOMAINL; NAMEL = (char *) "atan2dl";
-       RETVAL_ZEROL;
-       NOT_MATHERRL 
-       {
-         ifSVID 
-         {
-            WRITEL_ATAN2D_ZERO_BY_ZERO;
-         }
-         ERRNO_DOMAIN;
-       }
-       *(long double *)retval = excl.retval;	
-       break;
-    }
-  case atan2d_zero:
-    /* atan2d(0.0,0.0) */
-    {
-       DOMAIND; NAMED = (char *) "atan2d";
-       RETVAL_ZEROD;
-       NOT_MATHERRD 
-       {
-         ifSVID 
-         { 
-            WRITED_ATAN2D_ZERO_BY_ZERO;
-         }
-         ERRNO_DOMAIN;
-       }
-       *(double *)retval = exc.retval;	
-       break;
-    }
-  case atan2df_zero:
-    /* atan2df(0.0,0.0) */
-    {
-       DOMAINF; NAMEF = (char *) "atan2df";
-       RETVAL_ZEROF;
-       NOT_MATHERRF 
-         ifSVID  
-         {
-            WRITEF_ATAN2D_ZERO_BY_ZERO;
-         }
-       ERRNO_DOMAIN;
-       *(float *)retval = excf.retval;	
+       *(float *)retval = excf.retval;
        break;
     }
   case expm1_overflow:
@@ -2448,8 +1990,8 @@ else
        UNDERFLOWL; NAMEL = (char *) "scalbl";
        if (INPUT_XL < 0.0L) RETVAL_NEG_ZEROL;
        else  RETVAL_ZEROL;
-       NOT_MATHERRL {ERRNO_RANGE;} 
-       *(long double *)retval = excl.retval;	
+       NOT_MATHERRL {ERRNO_RANGE;}
+       *(long double *)retval = excf.retval;
        break;
     }
   case scalb_underflow:
@@ -2458,8 +2000,8 @@ else
        UNDERFLOWD; NAMED = (char *) "scalb";
        if (INPUT_XD < 0.0) RETVAL_NEG_ZEROD;
        else  RETVAL_ZEROD;
-       NOT_MATHERRD {ERRNO_RANGE;} 
-       *(double *)retval = exc.retval;	
+       NOT_MATHERRD {ERRNO_RANGE;}
+       *(double *)retval = exc.retval;
        break;
     }
   case scalbf_underflow:
@@ -2468,8 +2010,8 @@ else
        UNDERFLOWF; NAMEF = (char *) "scalbf";
        if (INPUT_XF < 0.0) RETVAL_NEG_ZEROF;
        else  RETVAL_ZEROF;
-       NOT_MATHERRF {ERRNO_RANGE;} 
-       *(float *)retval = excf.retval;	
+       NOT_MATHERRF {ERRNO_RANGE;}
+       *(float *)retval = excf.retval;
        break;
     }
   case scalbl_overflow:
@@ -2478,8 +2020,8 @@ else
        OVERFLOWL; NAMEL = (char *) "scalbl";
        if (INPUT_XL < 0) RETVAL_NEG_HUGE_VALL;
        else RETVAL_HUGE_VALL;
-       NOT_MATHERRL {ERRNO_RANGE;} 
-       *(long double *)retval = excl.retval;	
+       NOT_MATHERRL {ERRNO_RANGE;}
+       *(long double *)retval = excl.retval;
        break;
     }
   case scalb_overflow:
@@ -2488,8 +2030,8 @@ else
        OVERFLOWD; NAMED = (char *) "scalb";
        if (INPUT_XD < 0) RETVAL_NEG_HUGE_VALD;
        else RETVAL_HUGE_VALD;
-       NOT_MATHERRD {ERRNO_RANGE;} 
-       *(double *)retval = exc.retval;	
+       NOT_MATHERRD {ERRNO_RANGE;}
+       *(double *)retval = exc.retval;
        break;
     }
   case scalbf_overflow:
@@ -2498,8 +2040,8 @@ else
        OVERFLOWF; NAMEF = (char *) "scalbf";
        if (INPUT_XF < 0) RETVAL_NEG_HUGE_VALF;
        else RETVAL_HUGE_VALF;
-       NOT_MATHERRF {ERRNO_RANGE;} 
-       *(float *)retval = excf.retval;	
+       NOT_MATHERRF {ERRNO_RANGE;}
+       *(float *)retval = excf.retval;
        break;
     }
   case hypotl_overflow:
@@ -2507,7 +2049,7 @@ else
     {
        OVERFLOWL; NAMEL = (char *) "hypotl";
        ifSVID
-       { 
+       {
          RETVAL_HUGEL;
        }
        else
@@ -2515,7 +2057,7 @@ else
          RETVAL_HUGE_VALL;
        }
        NOT_MATHERRL {ERRNO_RANGE;}
-       *(long double *)retval = excl.retval;	
+       *(long double *)retval = excl.retval;
        break;
     }
   case hypot_overflow:
@@ -2523,7 +2065,7 @@ else
     {
        OVERFLOWD; NAMED = (char *) "hypot";
        ifSVID
-       { 
+       {
          RETVAL_HUGED;
        }
        else
@@ -2531,14 +2073,14 @@ else
          RETVAL_HUGE_VALD;
        }
        NOT_MATHERRD {ERRNO_RANGE;}
-       *(double *)retval = exc.retval;	
+       *(double *)retval = exc.retval;
        break;
     }
   case hypotf_overflow:
     /* hypotf overflow */
-    { 
+    {
        OVERFLOWF; NAMEF = (char *) "hypotf";
-       ifSVID 
+       ifSVID
        {
          RETVAL_HUGEF;
        }
@@ -2547,7 +2089,7 @@ else
          RETVAL_HUGE_VALF;
        }
        NOT_MATHERRF {ERRNO_RANGE;}
-       *(float *)retval = excf.retval;	
+       *(float *)retval = excf.retval;
        break;
     }
   case acosl_gt_one:
@@ -2555,7 +2097,7 @@ else
     {
        DOMAINL; NAMEL = (char *) "acosl";
        RETVAL_ZEROL;
-       ifSVID 
+       ifSVID
        {
          NOT_MATHERRL
          {
@@ -2575,7 +2117,7 @@ else
     {
        DOMAIND; NAMED = (char *) "acos";
        RETVAL_ZEROD;
-       ifSVID 
+       ifSVID
        {
          NOT_MATHERRD
          {
@@ -2595,9 +2137,9 @@ else
     {
        DOMAINF; NAMEF = (char *) "acosf";
        RETVAL_ZEROF;
-       ifSVID 
+       ifSVID
        {
-         NOT_MATHERRF 
+         NOT_MATHERRF
          {
            WRITEF_ACOS;
            ERRNO_DOMAIN;
@@ -2606,8 +2148,8 @@ else
        else
        {
          NOT_MATHERRF {ERRNO_DOMAIN;}
-       } 
-       *(float *)retval = excf.retval;	
+       }
+       *(float *)retval = excf.retval;
        break;
     }
   case asinl_gt_one:
@@ -2615,7 +2157,7 @@ else
     {
        DOMAINL; NAMEL = (char *) "asinl";
        RETVAL_ZEROL;
-       ifSVID 
+       ifSVID
        {
          NOT_MATHERRL
          {
@@ -2635,7 +2177,7 @@ else
     {
        DOMAIND; NAMED = (char *) "asin";
        RETVAL_ZEROD;
-       ifSVID 
+       ifSVID
        {
          NOT_MATHERRD
          {
@@ -2655,9 +2197,9 @@ else
     {
        DOMAINF; NAMEF = (char *) "asinf";
        RETVAL_ZEROF;
-       ifSVID 
+       ifSVID
        {
-         NOT_MATHERRF 
+         NOT_MATHERRF
          {
             WRITEF_ASIN;
             ERRNO_DOMAIN;
@@ -2666,128 +2208,8 @@ else
        else
        {
          NOT_MATHERRF {ERRNO_DOMAIN;}
-       } 
-       *(float *)retval = excf.retval;	
-       break;
-    }
-  case acosdl_gt_one:
-    /* acosdl(x > 1) */
-    {
-       DOMAINL; NAMEL = (char *) "acosdl";
-       RETVAL_ZEROL;
-       ifSVID 
-       {
-         NOT_MATHERRL
-         {
-           WRITEL_ACOSD;
-           ERRNO_DOMAIN;
-         }
        }
-       else
-       {
-         NOT_MATHERRL {ERRNO_DOMAIN;}
-       }
-       *(long double *)retval = excl.retval;
-       break;
-    }
-  case acosd_gt_one:
-    /* acosd(x > 1) */
-    {
-       DOMAIND; NAMED = (char *) "acosd";
-       RETVAL_ZEROD;
-       ifSVID 
-       {
-         NOT_MATHERRD
-         {
-           WRITED_ACOSD;
-           ERRNO_DOMAIN;
-         }
-       }
-       else
-       {
-         NOT_MATHERRD {ERRNO_DOMAIN;}
-       }
-       *(double *)retval = exc.retval;
-       break;
-    }
-  case acosdf_gt_one:
-    /* acosdf(x > 1) */
-    {
-       DOMAINF; NAMEF = (char *) "acosdf";
-       RETVAL_ZEROF;
-       ifSVID 
-       {
-         NOT_MATHERRF 
-         {
-           WRITEF_ACOSD;
-           ERRNO_DOMAIN;
-         }
-       }
-       else
-       {
-         NOT_MATHERRF {ERRNO_DOMAIN;}
-       } 
-       *(float *)retval = excf.retval;	
-       break;
-    }
-  case asindl_gt_one:
-    /* asindl(x > 1) */
-    {
-       DOMAINL; NAMEL = (char *) "asindl";
-       RETVAL_ZEROL;
-       ifSVID 
-       {
-         NOT_MATHERRL
-         {
-           WRITEL_ASIND;
-           ERRNO_DOMAIN;
-         }
-       }
-       else
-       {
-         NOT_MATHERRL {ERRNO_DOMAIN;}
-       }
-       *(long double *)retval = excl.retval;
-       break;
-    }
-  case asind_gt_one:
-    /* asind(x > 1) */
-    {
-       DOMAIND; NAMED = (char *) "asind";
-       RETVAL_ZEROD;
-       ifSVID 
-       {
-         NOT_MATHERRD
-         {
-           WRITED_ASIND;
-           ERRNO_DOMAIN;
-         }
-       }
-       else
-       {
-         NOT_MATHERRD {ERRNO_DOMAIN;}
-       }
-       *(double *)retval = exc.retval;
-       break;
-    }
-  case asindf_gt_one:
-    /* asindf(x > 1) */
-    {
-       DOMAINF; NAMEF = (char *) "asindf";
-       RETVAL_ZEROF;
-       ifSVID 
-       {
-         NOT_MATHERRF 
-         {
-            WRITEF_ASIND;
-            ERRNO_DOMAIN;
-         }
-       }
-       else
-       {
-         NOT_MATHERRF {ERRNO_DOMAIN;}
-       } 
-       *(float *)retval = excf.retval;	
+       *(float *)retval = excf.retval;
        break;
     }
  case coshl_overflow:
@@ -2798,7 +2220,7 @@ else
       {
         RETVAL_HUGEL;
       }
-      else 
+      else
       {
         RETVAL_HUGE_VALL;
       }
@@ -2814,7 +2236,7 @@ else
       {
         RETVAL_HUGED;
       }
-      else 
+      else
       {
         RETVAL_HUGE_VALD;
       }
@@ -2830,7 +2252,7 @@ else
       {
         RETVAL_HUGEF;
       }
-      else 
+      else
       {
         RETVAL_HUGE_VALF;
       }
@@ -2847,7 +2269,7 @@ else
         if (INPUT_XL > 0.0) RETVAL_HUGEL;
         else RETVAL_NEG_HUGEL;
       }
-      else 
+      else
       {
         if (INPUT_XL > 0.0) RETVAL_HUGE_VALL;
         else RETVAL_NEG_HUGE_VALL;
@@ -2865,7 +2287,7 @@ else
         if (INPUT_XD > 0.0) RETVAL_HUGED;
         else RETVAL_NEG_HUGED;
       }
-      else 
+      else
       {
         if (INPUT_XD > 0.0) RETVAL_HUGE_VALD;
         else RETVAL_NEG_HUGE_VALD;
@@ -2883,7 +2305,7 @@ else
         if( INPUT_XF > 0.0) RETVAL_HUGEF;
         else RETVAL_NEG_HUGEF;
       }
-      else 
+      else
       {
         if (INPUT_XF > 0.0) RETVAL_HUGE_VALF;
         else RETVAL_NEG_HUGE_VALF;
@@ -2896,7 +2318,7 @@ else
     /* acoshl(x < 1) */
     {
        DOMAINL; NAMEL = (char *) "acoshl";
-       ifSVID 
+       ifSVID
        {
          NOT_MATHERRL
          {
@@ -2904,10 +2326,7 @@ else
            ERRNO_DOMAIN;
          }
        }
-       else 
-       {
-           NOT_MATHERRL {ERRNO_DOMAIN;}
-       }
+       else NOT_MATHERRL {ERRNO_DOMAIN;}
        *(long double *)retval = excl.retval;
        break;
     }
@@ -2915,7 +2334,7 @@ else
     /* acosh(x < 1) */
     {
        DOMAIND; NAMED = (char *) "acosh";
-       ifSVID 
+       ifSVID
        {
          NOT_MATHERRD
          {
@@ -2923,10 +2342,7 @@ else
           ERRNO_DOMAIN;
          }
        }
-       else 
-       {
-          NOT_MATHERRD {ERRNO_DOMAIN;}
-       }
+       else NOT_MATHERRD {ERRNO_DOMAIN;}
        *(double *)retval = exc.retval;
        break;
     }
@@ -2934,7 +2350,7 @@ else
     /* acoshf(x < 1) */
     {
        DOMAINF; NAMEF = (char *) "acoshf";
-       ifSVID 
+       ifSVID
        {
          NOT_MATHERRF
          {
@@ -2953,7 +2369,7 @@ else
     /* atanhl(|x| > 1) */
     {
        DOMAINL; NAMEL = (char *) "atanhl";
-       ifSVID 
+       ifSVID
        {
          NOT_MATHERRL
          {
@@ -2971,7 +2387,7 @@ else
     /* atanh(|x| > 1) */
     {
        DOMAIND; NAMED = (char *) "atanh";
-       ifSVID 
+       ifSVID
        {
          NOT_MATHERRD
          {
@@ -2989,7 +2405,7 @@ else
     /* atanhf(|x| > 1) */
     {
        DOMAINF; NAMEF = (char *) "atanhf";
-       ifSVID 
+       ifSVID
        {
          NOT_MATHERRF
          {
@@ -3006,8 +2422,8 @@ else
   case atanhl_eq_one:
     /* atanhl(|x| == 1) */
     {
-       SINGL; NAMEL = (char *) "atanhl";
-       ifSVID 
+       SINGL; NAMEL = (char *)"atanhl";
+       ifSVID
        {
          NOT_MATHERRL
          {
@@ -3025,7 +2441,7 @@ else
     /* atanh(|x| == 1) */
     {
        SINGD; NAMED = (char *) "atanh";
-       ifSVID 
+       ifSVID
        {
          NOT_MATHERRD
          {
@@ -3043,7 +2459,7 @@ else
     /* atanhf(|x| == 1) */
     {
        SINGF; NAMEF = (char *) "atanhf";
-       ifSVID 
+       ifSVID
        {
          NOT_MATHERRF
          {
@@ -3061,7 +2477,7 @@ else
     /* gammal overflow */
     {
        OVERFLOWL; NAMEL = (char *) "gammal";
-       ifSVID 
+       ifSVID
        {
          RETVAL_HUGEL;
        }
@@ -3069,15 +2485,15 @@ else
        {
          RETVAL_HUGE_VALL;
        }
-       NOT_MATHERRL{ERRNO_RANGE;}
-       *(long double*)retval = excl.retval;
+       NOT_MATHERRL {ERRNO_RANGE;}
+       *(long double *)retval = excl.retval;
        break;
     }
   case gamma_overflow:
     /* gamma overflow */
     {
        OVERFLOWD; NAMED = (char *) "gamma";
-       ifSVID 
+       ifSVID
        {
          RETVAL_HUGED;
        }
@@ -3085,94 +2501,31 @@ else
        {
          RETVAL_HUGE_VALD;
        }
-       NOT_MATHERRD{ERRNO_RANGE;}
-       *(double*)retval = exc.retval;
+       NOT_MATHERRD {ERRNO_RANGE;}
+       *(double *)retval = exc.retval;
        break;
     }
   case gammaf_overflow:
     /* gammaf overflow */
     {
        OVERFLOWF; NAMEF = (char *) "gammaf";
-       ifSVID 
-       {
-         RETVAL_HUGEF;
-       }
-       else
-       {
-         RETVAL_HUGE_VALF;
-       }
-       NOT_MATHERRF{ERRNO_RANGE;}
-       *(float*)retval = excf.retval;
-       break;
-    }
-  case gammal_negative:
-    /* gammal -int or 0 */
-    {
-       SINGL; NAMEL = (char *) "gammal";
        ifSVID
        {
-         RETVAL_HUGEL;
-         NOT_MATHERRL
-         {
-            WRITEL_GAMMA_NEGATIVE;
-            ERRNO_DOMAIN;
-         }
-       }
-       else
-       {
-         RETVAL_HUGE_VALL;
-         NOT_MATHERRL{ERRNO_DOMAIN;}
-       }
-       *(long double*)retval = excl.retval;	
-       break;
-    }
-  case gamma_negative:
-    /* gamma -int or 0 */
-    {
-       SINGD; NAMED = (char *) "gamma";
-       ifSVID 
-       {
-         RETVAL_HUGED;
-         NOT_MATHERRD
-         {
-            WRITED_GAMMA_NEGATIVE;
-            ERRNO_DOMAIN;
-         }
-       }
-       else
-       {
-         RETVAL_HUGE_VALD;
-         NOT_MATHERRD{ERRNO_DOMAIN;}
-       }
-       *(double*)retval = exc.retval;	
-       break;
-    }
-  case gammaf_negative:
-    /* gammaf -int or 0 */
-    {
-       SINGF; NAMEF = (char *) "gammaf";
-       ifSVID 
-       {
          RETVAL_HUGEF;
-         NOT_MATHERRF
-         {
-            WRITEF_GAMMA_NEGATIVE;
-            ERRNO_DOMAIN;
-         }
        }
        else
        {
          RETVAL_HUGE_VALF;
-         NOT_MATHERRF{ERRNO_DOMAIN;}
        }
-       *(float*)retval = excf.retval;	
+       NOT_MATHERRF {ERRNO_RANGE;}
+       *(float *)retval = excf.retval;
        break;
     }
   case lgammal_overflow:
     /* lgammal overflow */
     {
        OVERFLOWL; NAMEL = (char *) "lgammal";
-       ifSVID 
+       ifSVID
        {
          RETVAL_HUGEL;
        }
@@ -3180,15 +2533,15 @@ else
        {
          RETVAL_HUGE_VALL;
        }
-       NOT_MATHERRL{ERRNO_RANGE;}
-       *(long double*)retval = excl.retval;
+       NOT_MATHERRL {ERRNO_RANGE;}
+       *(long double *)retval = excl.retval;
        break;
     }
   case lgamma_overflow:
     /* lgamma overflow */
     {
        OVERFLOWD; NAMED = (char *) "lgamma";
-       ifSVID 
+       ifSVID
        {
          RETVAL_HUGED;
        }
@@ -3196,15 +2549,15 @@ else
        {
          RETVAL_HUGE_VALD;
        }
-       NOT_MATHERRD{ERRNO_RANGE;}
-       *(double*)retval = exc.retval;
+       NOT_MATHERRD {ERRNO_RANGE;}
+       *(double *)retval = exc.retval;
        break;
     }
   case lgammaf_overflow:
     /* lgammaf overflow */
     {
        OVERFLOWF; NAMEF = (char *) "lgammaf";
-       ifSVID 
+       ifSVID
        {
          RETVAL_HUGEF;
        }
@@ -3212,8 +2565,8 @@ else
        {
          RETVAL_HUGE_VALF;
        }
-       NOT_MATHERRF{ERRNO_RANGE;}
-       *(float*)retval = excf.retval;
+       NOT_MATHERRF {ERRNO_RANGE;}
+       *(float *)retval = excf.retval;
        break;
     }
   case lgammal_negative:
@@ -3225,16 +2578,16 @@ else
          RETVAL_HUGEL;
          NOT_MATHERRL
          {
-            WRITEL_GAMMA_NEGATIVE;
-            ERRNO_DOMAIN;
+           WRITEL_LGAMMA_NEGATIVE;
+           ERRNO_DOMAIN;
          }
        }
        else
        {
          RETVAL_HUGE_VALL;
-         NOT_MATHERRL{ERRNO_DOMAIN;}
+         NOT_MATHERRL {ERRNO_DOMAIN;}
        }
-       *(long double*)retval = excl.retval;	
+       *(long double *)retval = excl.retval;
        break;
     }
   case lgamma_negative:
@@ -3253,16 +2606,16 @@ else
        else
        {
          RETVAL_HUGE_VALD;
-         NOT_MATHERRD{ERRNO_DOMAIN;}
+         NOT_MATHERRD {ERRNO_DOMAIN;}
        }
-       *(double*)retval = exc.retval;	
+       *(double *)retval = exc.retval;
        break;
     }
   case lgammaf_negative:
     /* lgammaf -int or 0 */
     {
        SINGF; NAMEF = (char *) "lgammaf";
-       ifSVID 
+       ifSVID
        {
          RETVAL_HUGEF;
          NOT_MATHERRF
@@ -3274,114 +2627,72 @@ else
        else
        {
          RETVAL_HUGE_VALF;
-         NOT_MATHERRF{ERRNO_DOMAIN;}
+         NOT_MATHERRF {ERRNO_DOMAIN;}
        }
-       *(float*)retval = excf.retval;	
+       *(float *)retval = excf.retval;
        break;
     }
-  case tgammal_overflow:
-    /* tgammal overflow */
+  case gammal_negative:
+    /* gammal -int or 0 */
     {
-       OVERFLOWL; NAMEL = (char *) "tgammal";
-       ifSVID 
+       SINGL; NAMEL = (char *) "gammal";
+       ifSVID
        {
          RETVAL_HUGEL;
-       }
-       else
-       {
-         RETVAL_HUGE_VALL;
-       }
-       NOT_MATHERRL{ERRNO_RANGE;}
-       *(long double*)retval = excl.retval;
-       break;
-    }
-  case tgamma_overflow:
-    /* tgamma overflow */
-    {
-       OVERFLOWD; NAMED = (char *) "tgamma";
-       ifSVID 
-       {
-         RETVAL_HUGED;
-       }
-       else
-       {
-         RETVAL_HUGE_VALD;
-       }
-       NOT_MATHERRD{ERRNO_RANGE;}
-       *(double*)retval = exc.retval;
-       break;
-    }
-  case tgammaf_overflow:
-    /* tgammaf overflow */
-    {
-       OVERFLOWF; NAMEF = (char *) "tgammaf";
-       ifSVID 
-       {
-         RETVAL_HUGEF;
-       }
-       else
-       {
-         RETVAL_HUGE_VALF;
-       }
-       NOT_MATHERRF{ERRNO_RANGE;}
-       *(float*)retval = excf.retval;
-       break;
-    }
-  case tgammal_negative:
-    /* tgammal -int or 0 */
-    {
-       SINGL; NAMEL = (char *) "tgammal";
-       ifSVID 
-       {
          NOT_MATHERRL
          {
-           WRITEL_TGAMMA_NEGATIVE;
-           ERRNO_DOMAIN;
+            WRITEL_GAMMA_NEGATIVE;
+            ERRNO_DOMAIN;
          }
        }
        else
        {
-         NOT_MATHERRL{ERRNO_DOMAIN;}
+         RETVAL_HUGE_VALL;
+         NOT_MATHERRL {ERRNO_DOMAIN;}
        }
-       *(long double*)retval = excl.retval;	
+       *(long double *)retval = excl.retval;
        break;
     }
-  case tgamma_negative:
-    /* tgamma -int or 0 */
+  case gamma_negative:
+    /* gamma -int or 0 */
     {
-       SINGD; NAMED = (char *) "tgamma";
-       ifSVID 
+       SINGD; NAMED = (char *) "gamma";
+       ifSVID
        {
+         RETVAL_HUGED;
          NOT_MATHERRD
          {
-           WRITED_TGAMMA_NEGATIVE;
-           ERRNO_DOMAIN;
+            WRITED_GAMMA_NEGATIVE;
+            ERRNO_DOMAIN;
          }
        }
        else
        {
-         NOT_MATHERRD{ERRNO_DOMAIN;}
+         RETVAL_HUGE_VALD;
+         NOT_MATHERRD {ERRNO_DOMAIN;}
        }
-       *(double*)retval = exc.retval;	
+       *(double *)retval = exc.retval;
        break;
     }
-  case tgammaf_negative:
-    /* tgammaf -int or 0 */
+  case gammaf_negative:
+    /* gammaf -int or 0 */
     {
-       SINGF; NAMEF = (char *) "tgammaf";
-       ifSVID 
+       SINGF; NAMEF = (char *) "gammaf";
+       ifSVID
        {
+         RETVAL_HUGEF;
          NOT_MATHERRF
          {
-           WRITEF_TGAMMA_NEGATIVE;
-           ERRNO_DOMAIN;
+            WRITEF_GAMMA_NEGATIVE;
+            ERRNO_DOMAIN;
          }
        }
        else
        {
-         NOT_MATHERRF{ERRNO_DOMAIN;}
+         RETVAL_HUGE_VALF;
+         NOT_MATHERRF {ERRNO_DOMAIN;}
        }
-       *(float*)retval = excf.retval;	
+       *(float *)retval = excf.retval;
        break;
     }
   case j0l_gt_loss:
@@ -3389,7 +2700,7 @@ else
     {
        TLOSSL; NAMEL = (char *) "j0l";
        RETVAL_ZEROL;
-       ifSVID 
+       ifSVID
        {
          NOT_MATHERRL
          {
@@ -3401,7 +2712,7 @@ else
        {
          NOT_MATHERRL {ERRNO_RANGE;}
        }
-       *(long double *)retval = excl.retval;	
+       *(long double *)retval = excl.retval;
        break;
     }
   case j0_gt_loss:
@@ -3409,7 +2720,7 @@ else
     {
        TLOSSD; NAMED = (char *) "j0";
        RETVAL_ZEROD;
-       ifSVID 
+       ifSVID
        {
          NOT_MATHERRD
          {
@@ -3421,7 +2732,7 @@ else
        {
          NOT_MATHERRD {ERRNO_RANGE;}
        }
-       *(double*)retval = exc.retval;	
+       *(double*)retval = exc.retval;
        break;
     }
   case j0f_gt_loss:
@@ -3429,7 +2740,7 @@ else
     {
        TLOSSF; NAMEF = (char *) "j0f";
        RETVAL_ZEROF;
-       ifSVID 
+       ifSVID
        {
          NOT_MATHERRF
          {
@@ -3449,7 +2760,7 @@ else
     {
        TLOSSL; NAMEL = (char *) "j1l";
        RETVAL_ZEROL;
-       ifSVID 
+       ifSVID
        {
          NOT_MATHERRL
          {
@@ -3461,7 +2772,7 @@ else
        {
          NOT_MATHERRL {ERRNO_RANGE;}
        }
-       *(long double *)retval = excl.retval;	
+       *(long double *)retval = excl.retval;
        break;
     }
   case j1_gt_loss:
@@ -3469,7 +2780,7 @@ else
     {
        TLOSSD; NAMED = (char *) "j1";
        RETVAL_ZEROD;
-       ifSVID 
+       ifSVID
        {
          NOT_MATHERRD
          {
@@ -3481,7 +2792,7 @@ else
        {
          NOT_MATHERRD {ERRNO_RANGE;}
        }
-       *(double*)retval = exc.retval;	
+       *(double*)retval = exc.retval;
        break;
     }
   case j1f_gt_loss:
@@ -3489,7 +2800,7 @@ else
     {
        TLOSSF; NAMEF = (char *) "j1f";
        RETVAL_ZEROF;
-       ifSVID 
+       ifSVID
        {
          NOT_MATHERRF
          {
@@ -3509,7 +2820,7 @@ else
     {
        TLOSSL; NAMEL = (char *) "jnl";
        RETVAL_ZEROL;
-       ifSVID 
+       ifSVID
        {
          NOT_MATHERRL
          {
@@ -3521,7 +2832,7 @@ else
        {
          NOT_MATHERRL {ERRNO_RANGE;}
        }
-       *(long double *)retval = excl.retval;	
+       *(long double *)retval = excl.retval;
        break;
     }
   case jn_gt_loss:
@@ -3529,7 +2840,7 @@ else
     {
        TLOSSD; NAMED = (char *) "jn";
        RETVAL_ZEROD;
-       ifSVID 
+       ifSVID
        {
          NOT_MATHERRD
          {
@@ -3541,7 +2852,7 @@ else
        {
          NOT_MATHERRD {ERRNO_RANGE;}
        }
-       *(double*)retval = exc.retval;	
+       *(double*)retval = exc.retval;
        break;
     }
   case jnf_gt_loss:
@@ -3549,7 +2860,7 @@ else
     {
        TLOSSF; NAMEF = (char *) "jnf";
        RETVAL_ZEROF;
-       ifSVID 
+       ifSVID
        {
          NOT_MATHERRF
          {
@@ -3569,7 +2880,7 @@ else
     {
        TLOSSL; NAMEL = (char *) "y0l";
        RETVAL_ZEROL;
-       ifSVID 
+       ifSVID
        {
          NOT_MATHERRL
          {
@@ -3589,7 +2900,7 @@ else
     {
        TLOSSD; NAMED = (char *) "y0";
        RETVAL_ZEROD;
-       ifSVID 
+       ifSVID
        {
          NOT_MATHERRD
          {
@@ -3609,7 +2920,7 @@ else
     {
        TLOSSF; NAMEF = (char *) "y0f";
        RETVAL_ZEROF;
-       ifSVID 
+       ifSVID
        {
          NOT_MATHERRF
          {
@@ -3628,10 +2939,10 @@ else
     /* y0l(0) */
     {
        DOMAINL; NAMEL = (char *) "y0l";
-       ifSVID 
+       ifSVID
        {
          RETVAL_NEG_HUGEL;
-         NOT_MATHERRL 
+         NOT_MATHERRL
          {
            WRITEL_Y0_ZERO;
            ERRNO_DOMAIN;
@@ -3639,20 +2950,20 @@ else
        }
        else
        {
-         RETVAL_NEG_HUGE_VALL; 
+         RETVAL_NEG_HUGE_VALL;
          NOT_MATHERRL {ERRNO_DOMAIN;}
        }
-       *(long double *)retval = excl.retval;	
+       *(long double *)retval = excl.retval;
        break;
     }
   case y0_zero:
     /* y0(0) */
     {
        DOMAIND; NAMED = (char *) "y0";
-       ifSVID 
+       ifSVID
        {
          RETVAL_NEG_HUGED;
-         NOT_MATHERRD 
+         NOT_MATHERRD
          {
            WRITED_Y0_ZERO;
            ERRNO_DOMAIN;
@@ -3660,20 +2971,20 @@ else
        }
        else
        {
-         RETVAL_NEG_HUGE_VALD; 
+         RETVAL_NEG_HUGE_VALD;
          NOT_MATHERRD {ERRNO_DOMAIN;}
        }
-       *(double *)retval = exc.retval;	
+       *(double *)retval = exc.retval;
        break;
     }
   case y0f_zero:
     /* y0f(0) */
     {
        DOMAINF; NAMEF = (char *) "y0f";
-       ifSVID 
+       ifSVID
        {
          RETVAL_NEG_HUGEF;
-         NOT_MATHERRF 
+         NOT_MATHERRF
          {
            WRITEF_Y0_ZERO;
            ERRNO_DOMAIN;
@@ -3681,10 +2992,10 @@ else
        }
        else
        {
-         RETVAL_NEG_HUGE_VALF; 
+         RETVAL_NEG_HUGE_VALF;
          NOT_MATHERRF {ERRNO_DOMAIN;}
        }
-       *(float *)retval = excf.retval;	
+       *(float *)retval = excf.retval;
        break;
     }
   case y1l_gt_loss:
@@ -3692,7 +3003,7 @@ else
     {
        TLOSSL; NAMEL = (char *) "y1l";
        RETVAL_ZEROL;
-       ifSVID 
+       ifSVID
        {
          NOT_MATHERRL
          {
@@ -3712,7 +3023,7 @@ else
     {
        TLOSSD; NAMED = (char *) "y1";
        RETVAL_ZEROD;
-       ifSVID 
+       ifSVID
        {
          NOT_MATHERRD
          {
@@ -3732,7 +3043,7 @@ else
     {
        TLOSSF; NAMEF = (char *) "y1f";
        RETVAL_ZEROF;
-       ifSVID 
+       ifSVID
        {
          NOT_MATHERRF
          {
@@ -3751,10 +3062,10 @@ else
     /* y1l(0) */
     {
        DOMAINL; NAMEL = (char *) "y1l";
-       ifSVID 
+       ifSVID
        {
          RETVAL_NEG_HUGEL;
-         NOT_MATHERRL 
+         NOT_MATHERRL
          {
            WRITEL_Y1_ZERO;
            ERRNO_DOMAIN;
@@ -3762,20 +3073,20 @@ else
        }
        else
        {
-         RETVAL_NEG_HUGE_VALL; 
+         RETVAL_NEG_HUGE_VALL;
          NOT_MATHERRL {ERRNO_DOMAIN;}
        }
-       *(long double *)retval = excl.retval;	
+       *(long double *)retval = excl.retval;
        break;
     }
   case y1_zero:
     /* y1(0) */
     {
        DOMAIND; NAMED = (char *) "y1";
-       ifSVID 
+       ifSVID
        {
          RETVAL_NEG_HUGED;
-         NOT_MATHERRD 
+         NOT_MATHERRD
          {
            WRITED_Y1_ZERO;
            ERRNO_DOMAIN;
@@ -3783,30 +3094,30 @@ else
        }
        else
        {
-         RETVAL_NEG_HUGE_VALD; 
+         RETVAL_NEG_HUGE_VALD;
          NOT_MATHERRD {ERRNO_DOMAIN;}
        }
-       *(double *)retval = exc.retval;	
+       *(double *)retval = exc.retval;
        break;
     }
   case y1f_zero:
     /* y1f(0) */
     {
        DOMAINF; NAMEF = (char *) "y1f";
-       ifSVID 
+       ifSVID
        {
          RETVAL_NEG_HUGEF;
-         NOT_MATHERRF 
+         NOT_MATHERRF
          {
            WRITEF_Y1_ZERO;
            ERRNO_DOMAIN;
          }
        }else
        {
-         RETVAL_NEG_HUGE_VALF; 
+         RETVAL_NEG_HUGE_VALF;
          NOT_MATHERRF {ERRNO_DOMAIN;}
        }
-       *(float *)retval = excf.retval;	
+       *(float *)retval = excf.retval;
        break;
     }
   case ynl_gt_loss:
@@ -3814,7 +3125,7 @@ else
     {
        TLOSSL; NAMEL = (char *) "ynl";
        RETVAL_ZEROL;
-       ifSVID 
+       ifSVID
        {
          NOT_MATHERRL
          {
@@ -3834,7 +3145,7 @@ else
     {
        TLOSSD; NAMED = (char *) "yn";
        RETVAL_ZEROD;
-       ifSVID 
+       ifSVID
        {
          NOT_MATHERRD
          {
@@ -3854,7 +3165,7 @@ else
     {
        TLOSSF; NAMEF = (char *) "ynf";
        RETVAL_ZEROF;
-       ifSVID 
+       ifSVID
        {
          NOT_MATHERRF
          {
@@ -3873,10 +3184,10 @@ else
     /* ynl(0) */
     {
        DOMAINL; NAMEL = (char *) "ynl";
-       ifSVID 
+       ifSVID
        {
          RETVAL_NEG_HUGEL;
-         NOT_MATHERRL 
+         NOT_MATHERRL
          {
            WRITEL_YN_ZERO;
            ERRNO_DOMAIN;
@@ -3884,20 +3195,20 @@ else
        }
        else
        {
-         RETVAL_NEG_HUGE_VALL; 
+         RETVAL_NEG_HUGE_VALL;
          NOT_MATHERRL {ERRNO_DOMAIN;}
        }
-       *(long double *)retval = excl.retval;	
+       *(long double *)retval = excl.retval;
        break;
     }
   case yn_zero:
     /* yn(0) */
     {
        DOMAIND; NAMED = (char *) "yn";
-       ifSVID 
+       ifSVID
        {
          RETVAL_NEG_HUGED;
-         NOT_MATHERRD 
+         NOT_MATHERRD
          {
            WRITED_YN_ZERO;
            ERRNO_DOMAIN;
@@ -3905,20 +3216,20 @@ else
        }
        else
        {
-         RETVAL_NEG_HUGE_VALD; 
+         RETVAL_NEG_HUGE_VALD;
          NOT_MATHERRD {ERRNO_DOMAIN;}
        }
-       *(double *)retval = exc.retval;	
+       *(double *)retval = exc.retval;
        break;
     }
   case ynf_zero:
     /* ynf(0) */
     {
        DOMAINF; NAMEF = (char *) "ynf";
-       ifSVID 
+       ifSVID
        {
          RETVAL_NEG_HUGEF;
-         NOT_MATHERRF 
+         NOT_MATHERRF
          {
            WRITEF_YN_ZERO;
            ERRNO_DOMAIN;
@@ -3926,20 +3237,20 @@ else
        }
        else
        {
-         RETVAL_NEG_HUGE_VALF; 
+         RETVAL_NEG_HUGE_VALF;
          NOT_MATHERRF {ERRNO_DOMAIN;}
        }
-       *(float *)retval = excf.retval;	
+       *(float *)retval = excf.retval;
        break;
     }
   case y0l_negative:
     /* y0l(x<0) */
     {
        DOMAINL; NAMEL = (char *) "y0l";
-       ifSVID 
+       ifSVID
        {
          RETVAL_NEG_HUGEL;
-         NOT_MATHERRL 
+         NOT_MATHERRL
          {
            WRITEL_Y0_NEGATIVE;
            ERRNO_DOMAIN;
@@ -3947,20 +3258,20 @@ else
        }
        else
        {
-         RETVAL_NEG_HUGE_VALL; 
+         RETVAL_NEG_HUGE_VALL;
          NOT_MATHERRL {ERRNO_DOMAIN;}
        }
-       *(long double *)retval = excl.retval;	
+       *(long double *)retval = excl.retval;
        break;
     }
   case y0_negative:
     /* y0(x<0) */
     {
        DOMAIND; NAMED = (char *) "y0";
-       ifSVID 
+       ifSVID
        {
          RETVAL_NEG_HUGED;
-         NOT_MATHERRD 
+         NOT_MATHERRD
          {
            WRITED_Y0_NEGATIVE;
            ERRNO_DOMAIN;
@@ -3968,20 +3279,20 @@ else
        }
        else
        {
-         RETVAL_NEG_HUGE_VALD; 
+         RETVAL_NEG_HUGE_VALD;
          NOT_MATHERRD {ERRNO_DOMAIN;}
        }
-       *(double *)retval = exc.retval;	
+       *(double *)retval = exc.retval;
        break;
     }
   case y0f_negative:
     /* y0f(x<0) */
     {
        DOMAINF; NAMEF = (char *) "y0f";
-       ifSVID 
+       ifSVID
        {
          RETVAL_NEG_HUGEF;
-         NOT_MATHERRF 
+         NOT_MATHERRF
          {
            WRITEF_Y0_NEGATIVE;
            ERRNO_DOMAIN;
@@ -3989,20 +3300,20 @@ else
        }
        else
        {
-         RETVAL_NEG_HUGE_VALF; 
+         RETVAL_NEG_HUGE_VALF;
          NOT_MATHERRF {ERRNO_DOMAIN;}
        }
-       *(float *)retval = excf.retval;	
+       *(float *)retval = excf.retval;
        break;
     }
   case y1l_negative:
     /* y1l(x<0) */
     {
        DOMAINL; NAMEL = (char *) "y1l";
-       ifSVID 
+       ifSVID
        {
          RETVAL_NEG_HUGEL;
-         NOT_MATHERRL 
+         NOT_MATHERRL
          {
            WRITEL_Y1_NEGATIVE;
            ERRNO_DOMAIN;
@@ -4010,20 +3321,20 @@ else
        }
        else
        {
-         RETVAL_NEG_HUGE_VALL; 
+         RETVAL_NEG_HUGE_VALL;
          NOT_MATHERRL {ERRNO_DOMAIN;}
        }
-       *(long double *)retval = excl.retval;	
+       *(long double *)retval = excl.retval;
        break;
     }
   case y1_negative:
     /* y1(x<0) */
     {
        DOMAIND; NAMED = (char *) "y1";
-       ifSVID 
+       ifSVID
        {
          RETVAL_NEG_HUGED;
-         NOT_MATHERRD 
+         NOT_MATHERRD
          {
            WRITED_Y1_NEGATIUE;
            ERRNO_DOMAIN;
@@ -4031,20 +3342,20 @@ else
        }
        else
        {
-         RETVAL_NEG_HUGE_VALD; 
+         RETVAL_NEG_HUGE_VALD;
          NOT_MATHERRD {ERRNO_DOMAIN;}
        }
-       *(double *)retval = exc.retval;	
+       *(double *)retval = exc.retval;
        break;
     }
   case y1f_negative:
     /* y1f(x<0) */
     {
        DOMAINF; NAMEF = (char *) "y1f";
-       ifSVID 
+       ifSVID
        {
          RETVAL_NEG_HUGEF;
-         NOT_MATHERRF 
+         NOT_MATHERRF
          {
            WRITEF_Y1_NEGATIVE;
            ERRNO_DOMAIN;
@@ -4052,20 +3363,20 @@ else
        }
        else
        {
-         RETVAL_NEG_HUGE_VALF; 
+         RETVAL_NEG_HUGE_VALF;
          NOT_MATHERRF {ERRNO_DOMAIN;}
        }
-       *(float *)retval = excf.retval;	
+       *(float *)retval = excf.retval;
        break;
     }
   case ynl_negative:
     /* ynl(x<0) */
     {
        DOMAINL; NAMEL = (char *) "ynl";
-       ifSVID 
+       ifSVID
        {
          RETVAL_NEG_HUGEL;
-         NOT_MATHERRL 
+         NOT_MATHERRL
          {
           WRITEL_YN_NEGATIVE;
           ERRNO_DOMAIN;
@@ -4073,20 +3384,20 @@ else
        }
        else
        {
-         RETVAL_NEG_HUGE_VALL; 
+         RETVAL_NEG_HUGE_VALL;
          NOT_MATHERRL {ERRNO_DOMAIN;}
        }
-       *(long double *)retval = excl.retval;	
+       *(long double *)retval = excl.retval;
        break;
     }
   case yn_negative:
     /* yn(x<0) */
     {
        DOMAIND; NAMED = (char *) "yn";
-       ifSVID 
+       ifSVID
        {
          RETVAL_NEG_HUGED;
-         NOT_MATHERRD 
+         NOT_MATHERRD
          {
            WRITED_YN_NEGATIVE;
            ERRNO_DOMAIN;
@@ -4094,20 +3405,20 @@ else
        }
        else
        {
-         RETVAL_NEG_HUGE_VALD; 
+         RETVAL_NEG_HUGE_VALD;
          NOT_MATHERRD {ERRNO_DOMAIN;}
        }
-       *(double *)retval = exc.retval;	
+       *(double *)retval = exc.retval;
        break;
     }
   case ynf_negative:
     /* ynf(x<0) */
     {
        DOMAINF; NAMEF = (char *) "ynf";
-       ifSVID 
+       ifSVID
        {
          RETVAL_NEG_HUGEF;
-         NOT_MATHERRF 
+         NOT_MATHERRF
          {
            WRITEF_YN_NEGATIVE;
            ERRNO_DOMAIN;
@@ -4115,18 +3426,18 @@ else
        }
        else
        {
-         RETVAL_NEG_HUGE_VALF; 
+         RETVAL_NEG_HUGE_VALF;
          NOT_MATHERRF {ERRNO_DOMAIN;}
        }
-       *(float *)retval = excf.retval;	
+       *(float *)retval = excf.retval;
        break;
     }
-  case fmodl_by_zero: 
+  case fmodl_by_zero:
     /* fmodl(x,0) */
     {
        DOMAINL; NAMEL = (char *) "fmodl";
-       ifSVID 
-       { 
+       ifSVID
+       {
             *(long double *)retval = *(long double *)arg1;
             NOT_MATHERRL
             {
@@ -4134,21 +3445,21 @@ else
               ERRNO_DOMAIN;
             }
        }
-       else 
+       else
        { /* NaN already computed */
             NOT_MATHERRL {ERRNO_DOMAIN;}
        }
-       *(long double *)retval = excl.retval;	
+       *(long double *)retval = excl.retval;
        break;
     }
-  case fmod_by_zero: 
+  case fmod_by_zero:
     /* fmod(x,0) */
     {
        DOMAIND; NAMED = (char *) "fmod";
-       ifSVID 
+       ifSVID
        {
          *(double *)retval = *(double *)arg1;
-         NOT_MATHERRD 
+         NOT_MATHERRD
          {
            WRITED_FMOD;
            ERRNO_DOMAIN;
@@ -4157,18 +3468,18 @@ else
        else
        { /* NaN already computed */
          NOT_MATHERRD {ERRNO_DOMAIN;}
-       } 
-       *(double *)retval = exc.retval;	
+       }
+       *(double *)retval = exc.retval;
        break;
     }
-  case fmodf_by_zero: 
+  case fmodf_by_zero:
     /* fmodf(x,0) */
     {
        DOMAINF; NAMEF = (char *) "fmodf";
-       ifSVID 
+       ifSVID
        {
          *(float *)retval = *(float *)arg1;
-         NOT_MATHERRF 
+         NOT_MATHERRF
          {
            WRITEF_FMOD;
            ERRNO_DOMAIN;
@@ -4177,36 +3488,36 @@ else
        else
        {
          NOT_MATHERRF {ERRNO_DOMAIN;}
-       } 
-       *(float *)retval = excf.retval;	
+       }
+       *(float *)retval = excf.retval;
        break;
     }
-  case remainderl_by_zero: 
+  case remainderl_by_zero:
     /* remainderl(x,0) */
     {
        DOMAINL; NAMEL = (char *) "remainderl";
-       ifSVID 
-       { 
+       ifSVID
+       {
           NOT_MATHERRL
           {
             WRITEL_REM;
             ERRNO_DOMAIN;
           }
        }
-       else 
+       else
        { /* NaN already computed */
             NOT_MATHERRL {ERRNO_DOMAIN;}
        }
-       *(long double *)retval = excl.retval;	
+       *(long double *)retval = excl.retval;
        break;
     }
-  case remainder_by_zero: 
+  case remainder_by_zero:
     /* remainder(x,0) */
     {
        DOMAIND; NAMED = (char *) "remainder";
-       ifSVID 
+       ifSVID
        {
-         NOT_MATHERRD 
+         NOT_MATHERRD
          {
            WRITED_REM;
            ERRNO_DOMAIN;
@@ -4215,17 +3526,17 @@ else
        else
        { /* NaN already computed */
          NOT_MATHERRD {ERRNO_DOMAIN;}
-       } 
-       *(double *)retval = exc.retval;	
+       }
+       *(double *)retval = exc.retval;
        break;
     }
-  case remainderf_by_zero: 
+  case remainderf_by_zero:
     /* remainderf(x,0) */
     {
        DOMAINF; NAMEF = (char *) "remainderf";
-       ifSVID 
+       ifSVID
        {
-         NOT_MATHERRF 
+         NOT_MATHERRF
          {
            WRITEF_REM;
            ERRNO_DOMAIN;
@@ -4234,14 +3545,12 @@ else
        else
        {
          NOT_MATHERRF {ERRNO_DOMAIN;}
-       } 
-       *(float *)retval = excf.retval;	
+       }
+       *(float *)retval = excf.retval;
        break;
     }
   default:
-    /* We don't want to abort () since SVID doesn't cover all math
-       library functions.  */
-    break;
+    abort();
    }
    return;
    }
diff --git a/sysdeps/ia64/fpu/libm_reduce.S b/sysdeps/ia64/fpu/libm_reduce.S
index 8bdf91d6de..1c7f4e1e88 100644
--- a/sysdeps/ia64/fpu/libm_reduce.S
+++ b/sysdeps/ia64/fpu/libm_reduce.S
@@ -1,10 +1,10 @@
 .file "libm_reduce.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,310 +20,304 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
 // Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
 //
-// History:
-// 02/02/00 Initial Version
-// 05/13/02 Rescheduled for speed, changed interface to pass
-//          parameters in fp registers
-// 02/10/03 Reordered header: .section, .global, .proc, .align;
-//          used data8 for long double data storage
+// History:  02/02/00 Initial Version
 //
-//*********************************************************************
-//*********************************************************************
+// *********************************************************************
+// *********************************************************************
 //
 // Function:   __libm_pi_by_two_reduce(x) return r, c, and N where
 //             x = N * pi/4 + (r+c) , where |r+c| <= pi/4.
 //             This function is not designed to be used by the
 //             general user.
 //
-//*********************************************************************
+// *********************************************************************
 //
 // Accuracy:       Returns double-precision values
 //
-//*********************************************************************
+// *********************************************************************
 //
 // Resources Used:
 //
-//    Floating-Point Registers:
-//      f8  = Input x, return value r
-//      f9  = return value c
-//      f32-f70
+//    Floating-Point Registers: f32-f70
 //
 //    General Purpose Registers:
 //      r8  = return value N
+//      r32 = Address of x
+//      r33 = Address of where to place r and then c 
 //      r34-r64
 //
 //    Predicate Registers:      p6-p14
 //
-//*********************************************************************
+// *********************************************************************
 //
 // IEEE Special Conditions:
 //
-//    No condions should be raised.
+//    No condions should be raised. 
 //
-//*********************************************************************
+// *********************************************************************
 //
 // I. Introduction
 // ===============
 //
 // For the forward trigonometric functions sin, cos, sincos, and
-// tan, the original algorithms for IA 64 handle arguments up to
+// tan, the original algorithms for IA 64 handle arguments up to 
 // 1 ulp less than 2^63 in magnitude. For double-extended arguments x,
-// |x| >= 2^63, this routine returns N and r_hi, r_lo where
-//
+// |x| >= 2^63, this routine returns CASE, N and r_hi, r_lo where
+// 
 //    x  is accurately approximated by
 //    2*K*pi  +  N * pi/2  +  r_hi + r_lo,  |r_hi+r_lo| <= pi/4.
 //    CASE = 1 or 2.
 //    CASE is 1 unless |r_hi + r_lo| < 2^(-33).
-//
+// 
 // The exact value of K is not determined, but that information is
 // not required in trigonometric function computations.
-//
-// We first assume the argument x in question satisfies x >= 2^(63).
+// 
+// We first assume the argument x in question satisfies x >= 2^(63). 
 // In particular, it is positive. Negative x can be handled by symmetry:
-//
+// 
 //   -x  is accurately approximated by
 //         -2*K*pi  +  (-N) * pi/2  -  (r_hi + r_lo),  |r_hi+r_lo| <= pi/4.
-//
+// 
 // The idea of the reduction is that
-//
-//       x  *  2/pi   =   N_big  +  N  +  f,      |f| <= 1/2
-//
+// 
+// 	x  *  2/pi   =   N_big  +  N  +  f,	|f| <= 1/2
+// 
 // Moreover, for double extended x, |f| >= 2^(-75). (This is an
 // non-obvious fact found by enumeration using a special algorithm
-// involving continued fraction.) The algorithm described below
+// involving continued fraction.) The algorithm described below 
 // calculates N and an accurate approximation of f.
-//
-// Roughly speaking, an appropriate 256-bit (4 X 64) portion of
+// 
+// Roughly speaking, an appropriate 256-bit (4 X 64) portion of 
 // 2/pi is multiplied with x to give the desired information.
-//
+// 
 // II. Representation of 2/PI
 // ==========================
-//
+// 
 // The value of 2/pi in binary fixed-point is
-//
+// 
 //            .101000101111100110......
-//
+// 
 // We store 2/pi in a table, starting at the position corresponding
-// to bit position 63
-//
+// to bit position 63 
+// 
 //   bit position  63 62 ... 0   -1 -2 -3 -4 -5 -6 -7  ....  -16576
-//
-//              0  0  ... 0  . 1  0  1  0  1  0  1  ....    X
-//
+// 
+// 	 	0  0  ... 0  . 1  0  1  0  1  0  1  ....    X
+//                 
 //                              ^
-//                               |__ implied binary pt
-//
+// 	     	             |__ implied binary pt 
+// 
 // III. Algorithm
 // ==============
-//
+// 
 // This describes the algorithm in the most natural way using
-// unsigned interger multiplication. The implementation section
+// unsigned interger multiplication. The implementation section 
 // describes how the integer arithmetic is simulated.
-//
+// 
 // STEP 0. Initialization
 // ----------------------
-//
-// Let the input argument x be
-//
+// 
+// Let the input argument x be 
+// 
 //     x = 2^m * ( 1. b_1 b_2 b_3 ... b_63 ),  63 <= m <= 16383.
-//
-// The first crucial step is to fetch four 64-bit portions of 2/pi.
+// 
+// The first crucial step is to fetch four 64-bit portions of 2/pi. 
 // To fulfill this goal, we calculate the bit position L of the
 // beginning of these 256-bit quantity by
-//
+// 
 //     L :=  62 - m.
-//
-// Note that -16321 <= L <= -1 because 63 <= m <= 16383; and that
+// 
+// Note that -16321 <= L <= -1 because 63 <= m <= 16383; and that 
 // the storage of 2/pi is adequate.
-//
+// 
 // Fetch P_1, P_2, P_3, P_4 beginning at bit position L thus:
-//
+// 
 //      bit position  L  L-1  L-2    ...  L-63
-//
+// 
 //      P_1    =      b   b    b     ...    b
-//
+// 
 // each b can be 0 or 1. Also, let P_0 be the two bits correspoding to
 // bit positions L+2 and L+1. So, when each of the P_j is interpreted
 // with appropriate scaling, we have
 //
 //      2/pi  =  P_big  + P_0 + (P_1 + P_2 + P_3 + P_4)  +  P_small
-//
+// 
 // Note that P_big and P_small can be ignored. The reasons are as follow.
 // First, consider P_big. If P_big = 0, we can certainly ignore it.
-// Otherwise, P_big >= 2^(L+3). Now,
-//
+// Otherwise, P_big >= 2^(L+3). Now, 
+// 
 //        P_big * ulp(x) >=  2^(L+3) * 2^(m-63)
-//                   >=  2^(65-m  +  m-63 )
-//                   >=  2^2
-//
+// 		      >=  2^(65-m  +  m-63 )
+// 		      >=  2^2
+// 
 // Thus, P_big * x is an integer of the form 4*K. So
-//
-//       x = 4*K * (pi/2) + x*(P_0 + P_1 + P_2 + P_3 + P_4)*(pi/2)
+// 
+// 	x = 4*K * (pi/2) + x*(P_0 + P_1 + P_2 + P_3 + P_4)*(pi/2)
 //                + x*P_small*(pi/2).
-//
+// 
 // Hence, P_big*x corresponds to information that can be ignored for
 // trigonometic function evaluation.
-//
+// 
 // Next, we must estimate the effect of ignoring P_small. The absolute
 // error made by ignoring P_small is bounded by
-//
+// 
 //       |P_small * x|  <=  ulp(P_4) * x
-//                  <=  2^(L-255) * 2^(m+1)
-//                  <=  2^(62-m-255 + m + 1)
-//                  <=  2^(-192)
-//
-// Since for double-extended precision, x * 2/pi = integer + f,
+// 		     <=  2^(L-255) * 2^(m+1)
+// 		     <=  2^(62-m-255 + m + 1)
+// 		     <=  2^(-192)
+// 
+// Since for double-extended precision, x * 2/pi = integer + f, 
 // 0.5 >= |f| >= 2^(-75), the relative error introduced by ignoring
 // P_small is bounded by 2^(-192+75) <= 2^(-117), which is acceptable.
-//
+// 
 // Further note that if x is split into x_hi + x_lo where x_lo is the
 // two bits corresponding to bit positions 2^(m-62) and 2^(m-63); then
-//
-//       P_0 * x_hi
-//
+// 
+// 	P_0 * x_hi 
+// 
 // is also an integer of the form 4*K; and thus can also be ignored.
 // Let M := P_0 * x_lo which is a small integer. The main part of the
 // calculation is really the multiplication of x with the four pieces
 // P_1, P_2, P_3, and P_4.
-//
+// 
 // Unless the reduced argument is extremely small in magnitude, it
 // suffices to carry out the multiplication of x with P_1, P_2, and
-// P_3. x*P_4 will be carried out and added on as a correction only
+// P_3. x*P_4 will be carried out and added on as a correction only 
 // when it is found to be needed. Note also that x*P_4 need not be
 // computed exactly. A straightforward multiplication suffices since
 // the rounding error thus produced would be bounded by 2^(-3*64),
 // that is 2^(-192) which is small enough as the reduced argument
 // is bounded from below by 2^(-75).
-//
+// 
 // Now that we have four 64-bit data representing 2/pi and a
 // 64-bit x. We first need to calculate a highly accurate product
 // of x and P_1, P_2, P_3. This is best understood as integer
 // multiplication.
-//
-//
+// 
+// 
 // STEP 1. Multiplication
 // ----------------------
-//
-//
+// 
+// 
 //                     ---------   ---------   ---------
-//                    |  P_1  |   |  P_2  |   |  P_3  |
-//                    ---------   ---------   ---------
-//
-//                                            ---------
-//             X                              |   X   |
+// 	             |  P_1  |   |  P_2  |   |  P_3  |
+// 	             ---------   ---------   ---------
+// 
 //                                            ---------
+// 	      X                              |   X   |
+// 	                                     ---------
 //      ----------------------------------------------------
 //
 //                                 ---------   ---------
-//                               |  A_hi |   |  A_lo |
-//                               ---------   ---------
+//	                         |  A_hi |   |  A_lo |
+//	                         ---------   ---------
 //
 //
 //                    ---------   ---------
-//                   |  B_hi |   |  B_lo |
-//                   ---------   ---------
+//	             |  B_hi |   |  B_lo |
+//	             ---------   ---------
 //
 //
-//        ---------   ---------
-//       |  C_hi |   |  C_lo |
-//       ---------   ---------
+//        ---------   ---------  
+//	 |  C_hi |   |  C_lo |  
+//	 ---------   ---------  
 //
 //      ====================================================
 //       ---------   ---------   ---------   ---------
-//       |  S_0  |   |  S_1  |   |  S_2  |   |  S_3  |
-//       ---------   ---------   ---------   ---------
+//	 |  S_0  |   |  S_1  |   |  S_2  |   |  S_3  |
+//	 ---------   ---------   ---------   ---------
 //
 //
 //
 // STEP 2. Get N and f
 // -------------------
-//
+// 
 // Conceptually, after the individual pieces S_0, S_1, ..., are obtained,
 // we have to sum them and obtain an integer part, N, and a fraction, f.
 // Here, |f| <= 1/2, and N is an integer. Note also that N need only to
 // be known to module 2^k, k >= 2. In the case when |f| is small enough,
 // we would need to add in the value x*P_4.
-//
-//
+// 
+// 
 // STEP 3. Get reduced argument
 // ----------------------------
-//
+// 
 // The value f is not yet the reduced argument that we seek. The
 // equation
-//
-//       x * 2/pi = 4K  + N  + f
-//
+// 
+// 	x * 2/pi = 4K  + N  + f
+// 
 // says that
-//
+// 
 //         x   =  2*K*pi  + N * pi/2  +  f * (pi/2).
-//
+// 
 // Thus, the reduced argument is given by
-//
-//       reduced argument =  f * pi/2.
-//
+// 
+// 	reduced argument =  f * pi/2.
+// 
 // This multiplication must be performed to extra precision.
-//
+// 
 // IV. Implementation
 // ==================
-//
+// 
 // Step 0. Initialization
 // ----------------------
-//
+// 
 // Set sgn_x := sign(x); x := |x|; x_lo := 2 lsb of x.
-//
+// 
 // In memory, 2/pi is stored contigously as
-//
+// 
 //  0x00000000 0x00000000 0xA2F....
 //                       ^
 //                       |__ implied binary bit
-//
+// 
 // Given x = 2^m * 1.xxxx...xxx; we calculate L := 62 - m. Thus
 // -1 <= L <= -16321. We fetch from memory 5 integer pieces of data.
-//
+// 
 // P_0 is the two bits corresponding to bit positions L+2 and L+1
 // P_1 is the 64-bit starting at bit position  L
 // P_2 is the 64-bit starting at bit position  L-64
 // P_3 is the 64-bit starting at bit position  L-128
 // P_4 is the 64-bit starting at bit position  L-192
-//
+// 
 // For example, if m = 63, P_0 would be 0 and P_1 would look like
 // 0xA2F...
-//
+// 
 // If m = 65, P_0 would be the two msb of 0xA, thus, P_0 is 10 in binary.
-// P_1 in binary would be  1 0 0 0 1 0 1 1 1 1 ....
-//
+// P_1 in binary would be  1 0 0 0 1 0 1 1 1 1 .... 
+//  
 // Step 1. Multiplication
 // ----------------------
-//
+// 
 // At this point, P_1, P_2, P_3, P_4 are integers. They are
 // supposed to be interpreted as
-//
+// 
 //  2^(L-63)     * P_1;
 //  2^(L-63-64)  * P_2;
 //  2^(L-63-128) * P_3;
 // 2^(L-63-192) * P_4;
-//
+// 
 // Since each of them need to be multiplied to x, we would scale
 // both x and the P_j's by some convenient factors: scale each
 // of P_j's up by 2^(63-L), and scale x down by 2^(L-63).
-//
+// 
 //   p_1 := fcvt.xf ( P_1 )
 //   p_2 := fcvt.xf ( P_2 ) * 2^(-64)
 //   p_3 := fcvt.xf ( P_3 ) * 2^(-128)
@@ -331,30 +325,30 @@
 //   x   := replace exponent of x by -1
 //          because 2^m    * 1.xxxx...xxx  * 2^(L-63)
 //          is      2^(-1) * 1.xxxx...xxx
-//
+// 
 // We are now faced with the task of computing the following
-//
+// 
 //                     ---------   ---------   ---------
-//                    |  P_1  |   |  P_2  |   |  P_3  |
-//                    ---------   ---------   ---------
-//
+// 	             |  P_1  |   |  P_2  |   |  P_3  |
+// 	             ---------   ---------   ---------
+// 
 //                                             ---------
-//             X                              |   X   |
-//                                            ---------
+// 	      X                              |   X   |
+// 	                                     ---------
 //       ----------------------------------------------------
-//
+// 
 //                                 ---------   ---------
-//                                |  A_hi |   |  A_lo |
-//                                ---------   ---------
-//
+// 	                         |  A_hi |   |  A_lo |
+// 	                         ---------   ---------
+// 
 //                     ---------   ---------
-//                    |  B_hi |   |  B_lo |
-//                    ---------   ---------
-//
-//         ---------   ---------
-//        |  C_hi |   |  C_lo |
-//        ---------   ---------
-//
+// 	             |  B_hi |   |  B_lo |
+// 	             ---------   ---------
+// 
+//         ---------   ---------  
+// 	 |  C_hi |   |  C_lo |  
+// 	 ---------   ---------  
+// 
 //      ====================================================
 //       -----------   ---------   ---------   ---------
 //       |    S_0  |   |  S_1  |   |  S_2  |   |  S_3  |
@@ -363,108 +357,108 @@
 //        |          |___ binary point
 //        |
 //        |___ possibly one more bit
-//
+// 
 // Let FPSR3 be set to round towards zero with widest precision
-// and exponent range. Unless an explicit FPSR is given,
+// and exponent range. Unless an explicit FPSR is given, 
 // round-to-nearest with widest precision and exponent range is
 // used.
-//
+// 
 // Define sigma_C := 2^63; sigma_B := 2^(-1); sigma_C := 2^(-65).
-//
+// 
 // Tmp_C := fmpy.fpsr3( x, p_1 );
 // If Tmp_C >= sigma_C then
 //    C_hi := Tmp_C;
 //    C_lo := x*p_1 - C_hi ...fma, exact
 // Else
 //    C_hi := fadd.fpsr3(sigma_C, Tmp_C) - sigma_C
-//                   ...subtraction is exact, regardless
-//                   ...of rounding direction
+// 			...subtraction is exact, regardless
+// 			...of rounding direction
 //    C_lo := x*p_1 - C_hi ...fma, exact
 // End If
-//
+// 
 // Tmp_B := fmpy.fpsr3( x, p_2 );
 // If Tmp_B >= sigma_B then
 //    B_hi := Tmp_B;
 //    B_lo := x*p_2 - B_hi ...fma, exact
 // Else
 //    B_hi := fadd.fpsr3(sigma_B, Tmp_B) - sigma_B
-//                   ...subtraction is exact, regardless
-//                   ...of rounding direction
+// 			...subtraction is exact, regardless
+// 			...of rounding direction
 //    B_lo := x*p_2 - B_hi ...fma, exact
 // End If
-//
+// 
 // Tmp_A := fmpy.fpsr3( x, p_3 );
 // If Tmp_A >= sigma_A then
 //    A_hi := Tmp_A;
 //    A_lo := x*p_3 - A_hi ...fma, exact
 // Else
 //    A_hi := fadd.fpsr3(sigma_A, Tmp_A) - sigma_A
-//                   ...subtraction is exact, regardless
-//                   ...of rounding direction
+// 			...subtraction is exact, regardless
+// 			...of rounding direction
 //    A_lo := x*p_3 - A_hi ...fma, exact
 // End If
-//
+// 
 // ...Note that C_hi is of integer value. We need only the
-// ...last few bits. Thus we can ensure C_hi is never a big
+// ...last few bits. Thus we can ensure C_hi is never a big 
 // ...integer, freeing us from overflow worry.
-//
+// 
 // Tmp_C := fadd.fpsr3( C_hi, 2^(70) ) - 2^(70);
 // ...Tmp_C is the upper portion of C_hi
 // C_hi := C_hi - Tmp_C
 // ...0 <= C_hi < 2^7
-//
+// 
 // Step 2. Get N and f
 // -------------------
-//
-// At this point, we have all the components to obtain
+// 
+// At this point, we have all the components to obtain 
 // S_0, S_1, S_2, S_3 and thus N and f. We start by adding
 // C_lo and B_hi. This sum together with C_hi gives a good
-// estimation of N and f.
-//
+// estimation of N and f. 
+// 
 // A := fadd.fpsr3( B_hi, C_lo )
 // B := max( B_hi, C_lo )
 // b := min( B_hi, C_lo )
-//
-// a := (B - A) + b      ...exact. Note that a is either 0
-//                   ...or 2^(-64).
-//
+// 
+// a := (B - A) + b	...exact. Note that a is either 0
+// 			...or 2^(-64).
+// 
 // N := round_to_nearest_integer_value( A );
-// f := A - N;            ...exact because lsb(A) >= 2^(-64)
-//                   ...and |f| <= 1/2.
-//
-// f := f + a            ...exact because a is 0 or 2^(-64);
-//                   ...the msb of the sum is <= 1/2
-//                   ...lsb >= 2^(-64).
-//
+// f := A - N;		...exact because lsb(A) >= 2^(-64)
+// 			...and |f| <= 1/2.
+// 
+// f := f + a		...exact because a is 0 or 2^(-64);
+// 			...the msb of the sum is <= 1/2
+// 			...lsb >= 2^(-64).
+// 
 // N := convert to integer format( C_hi + N );
 // M := P_0 * x_lo;
 // N := N + M;
-//
+// 
 // If sgn_x == 1 (that is original x was negative)
 // N := 2^10 - N
 // ...this maintains N to be non-negative, but still
 // ...equivalent to the (negated N) mod 4.
 // End If
-//
+// 
 // If |f| >= 2^(-33)
-//
+// 
 // ...Case 1
 // CASE := 1
 // g := A_hi + B_lo;
 // s_hi := f + g;
 // s_lo := (f - s_hi) + g;
-//
+// 
 // Else
-//
+// 
 // ...Case 2
 // CASE := 2
 // A := fadd.fpsr3( A_hi, B_lo )
 // B := max( A_hi, B_lo )
 // b := min( A_hi, B_lo )
-//
-// a := (B - A) + b      ...exact. Note that a is either 0
-//                   ...or 2^(-128).
-//
+// 
+// a := (B - A) + b	...exact. Note that a is either 0
+// 			...or 2^(-128).
+// 
 // f_hi := A + f;
 // f_lo := (f - f_hi) + A;
 // ...this is exact.
@@ -474,9 +468,9 @@
 // ...If f = 2^(-64), f-f_hi involves cancellation and is
 // ...exact. If f = -2^(-64), then A + f is exact. Hence
 // ...f-f_hi is -A exactly, giving f_lo = 0.
-//
+// 
 // f_lo := f_lo + a;
-//
+// 
 // If |f| >= 2^(-50) then
 //    s_hi := f_hi;
 //    s_lo := f_lo;
@@ -485,111 +479,117 @@
 //    s_hi := f_hi + f_lo
 //    s_lo := (f_hi - s_hi) + f_lo
 // End If
-//
+// 
 // End If
-//
+// 
 // Step 3. Get reduced argument
 // ----------------------------
-//
+// 
 // If sgn_x == 0 (that is original x is positive)
-//
+// 
 // D_hi := Pi_by_2_hi
 // D_lo := Pi_by_2_lo
 // ...load from table
-//
+// 
 // Else
-//
+// 
 // D_hi := neg_Pi_by_2_hi
 // D_lo := neg_Pi_by_2_lo
 // ...load from table
 // End If
-//
+// 
 // r_hi :=  s_hi*D_hi
-// r_lo :=  s_hi*D_hi - r_hi         ...fma
+// r_lo :=  s_hi*D_hi - r_hi   	...fma
 // r_lo := (s_hi*D_lo + r_lo) + s_lo*D_hi
-//
-// Return  N, r_hi, r_lo
-//
-FR_input_X = f8
-FR_r_hi    = f8
-FR_r_lo    = f9
-
-FR_X       = f32
-FR_N       = f33
-FR_p_1     = f34
-FR_TWOM33  = f35
-FR_TWOM50  = f36
-FR_g       = f37
-FR_p_2     = f38
-FR_f       = f39
-FR_s_lo    = f40
-FR_p_3     = f41
-FR_f_abs   = f42
-FR_D_lo    = f43
-FR_p_4     = f44
-FR_D_hi    = f45
-FR_Tmp2_C  = f46
-FR_s_hi    = f47
-FR_sigma_A = f48
-FR_A       = f49
-FR_sigma_B = f50
-FR_B       = f51
-FR_sigma_C = f52
-FR_b       = f53
-FR_ScaleP2 = f54
-FR_ScaleP3 = f55
-FR_ScaleP4 = f56
-FR_Tmp_A   = f57
-FR_Tmp_B   = f58
-FR_Tmp_C   = f59
-FR_A_hi    = f60
-FR_f_hi    = f61
-FR_RSHF    = f62
-FR_A_lo    = f63
-FR_B_hi    = f64
-FR_a       = f65
-FR_B_lo    = f66
+// 
+// Return  CASE, N, r_hi, r_lo
+//
+
+#include "libm_support.h"
+
+FR_X       = f32 
+FR_N       = f33 
+FR_p_1     = f34 
+FR_TWOM33  = f35 
+FR_TWOM50  = f36 
+FR_g       = f37 
+FR_p_2     = f38 
+FR_f       = f39 
+FR_s_lo    = f40 
+FR_p_3     = f41 
+FR_f_abs   = f42 
+FR_D_lo    = f43 
+FR_p_4     = f44 
+FR_D_hi    = f45 
+FR_Tmp2_C  = f46 
+FR_s_hi    = f47 
+FR_sigma_A = f48 
+FR_A       = f49 
+FR_sigma_B = f50 
+FR_B       = f51 
+FR_sigma_C = f52 
+FR_b       = f53 
+FR_ScaleP2 = f54 
+FR_ScaleP3 = f55 
+FR_ScaleP4 = f56 
+FR_Tmp_A   = f57 
+FR_Tmp_B   = f58 
+FR_Tmp_C   = f59 
+FR_A_hi    = f60 
+FR_f_hi    = f61 
+FR_r_hi    = f62 
+FR_A_lo    = f63 
+FR_B_hi    = f64 
+FR_a       = f65 
+FR_B_lo    = f66 
 FR_f_lo    = f67
-FR_N_fix   = f68
-FR_C_hi    = f69
-FR_C_lo    = f70
+FR_r_lo    = f68 
+FR_C_hi    = f69 
+FR_C_lo    = f70 
 
 GR_N       = r8
-GR_Exp_x   = r36
-GR_Temp    = r37
-GR_BIASL63 = r38
+GR_Address_of_Input  = r32 
+GR_Address_of_Outputs = r33 
+GR_Exp_x   = r36 
+GR_Temp    = r37 
+GR_BIASL63 = r38 
 GR_CASE    = r39
-GR_x_lo    = r40
-GR_sgn_x   = r41
+GR_x_lo    = r40 
+GR_sgn_x   = r41 
 GR_M       = r42
 GR_BASE    = r43
 GR_LENGTH1 = r44
 GR_LENGTH2 = r45
 GR_ASUB    = r46
 GR_P_0     = r47
-GR_P_1     = r48
-GR_P_2     = r49
-GR_P_3     = r50
-GR_P_4     = r51
+GR_P_1     = r48 
+GR_P_2     = r49 
+GR_P_3     = r50 
+GR_P_4     = r51 
 GR_START   = r52
 GR_SEGMENT = r53
 GR_A       = r54
-GR_B       = r55
+GR_B       = r55 
 GR_C       = r56
 GR_D       = r57
 GR_E       = r58
-GR_TEMP1   = r59
-GR_TEMP2   = r60
-GR_TEMP3   = r61
-GR_TEMP4   = r62
+GR_TEMP1   = r59 
+GR_TEMP2   = r60 
+GR_TEMP3   = r61 
+GR_TEMP4   = r62 
 GR_TEMP5   = r63
 GR_TEMP6   = r64
-GR_rshf    = r64
 
-RODATA
 .align 64
 
-LOCAL_OBJECT_START(Constants_Bits_of_2_by_pi)
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
+Constants_Bits_of_2_by_pi:
+ASM_TYPE_DIRECTIVE(Constants_Bits_of_2_by_pi,@object)
 data8 0x0000000000000000,0xA2F9836E4E441529
 data8 0xFC2757D1F534DDC0,0xDB6295993C439041
 data8 0xFE5163ABDEBBC561,0xB7246E3A424DD2E0
@@ -721,33 +721,34 @@ data8 0xB5D6DF8261DD9602,0x36169F3AC4A1A283
 data8 0x6DED727A8D39A9B8,0x825C326B5B2746ED
 data8 0x34007700D255F4FC,0x4D59018071E0E13F
 data8 0x89B295F364A8F1AE,0xA74B38FC4CEAB2BB
-LOCAL_OBJECT_END(Constants_Bits_of_2_by_pi)
+ASM_SIZE_DIRECTIVE(Constants_Bits_of_2_by_pi)
 
-LOCAL_OBJECT_START(Constants_Bits_of_pi_by_2)
-data8 0xC90FDAA22168C234,0x00003FFF
-data8 0xC4C6628B80DC1CD1,0x00003FBF
-LOCAL_OBJECT_END(Constants_Bits_of_pi_by_2)
+Constants_Bits_of_pi_by_2:
+ASM_TYPE_DIRECTIVE(Constants_Bits_of_pi_by_2,@object)
+data4 0x2168C234,0xC90FDAA2,0x00003FFF,0x00000000
+data4 0x80DC1CD1,0xC4C6628B,0x00003FBF,0x00000000
+ASM_SIZE_DIRECTIVE(Constants_Bits_of_pi_by_2)
 
 .section .text
-.global __libm_pi_by_2_reduce#
 .proc __libm_pi_by_2_reduce#
-.align 32
+.global __libm_pi_by_2_reduce#
+.align 64 
 
-__libm_pi_by_2_reduce:
+__libm_pi_by_2_reduce: 
 
-//    X is in f8
-//    Place the two-piece result r (r_hi) in f8 and c (r_lo) in f9
-//    N is returned in r8
+//    X is at the address in Address_of_Input
+//    Place the two-piece result at the address in Address_of_Outputs
+//    r followed by c
+//    N is returned
 
-{ .mfi
-      alloc  r34 = ar.pfs,2,34,0,0
-      fsetc.s3 0x00,0x7F     // Set sf3 to round to zero, 82-bit prec, td, ftz
-      nop.i 999
+{ .mmf
+alloc  r34 = ar.pfs,2,34,0,0
+(p0)  ldfe  FR_X = [GR_Address_of_Input]
+(p0)  fsetc.s3 0x00,0x7F ;;
 }
-{ .mfi
-      addl           GR_BASE   = @ltoff(Constants_Bits_of_2_by_pi#), gp
-      nop.f 999
-      mov GR_BIASL63 = 0x1003E
+{ .mlx
+	nop.m 999
+(p0)  movl GR_BIASL63 = 0x1003E
 }
 ;;
 
@@ -764,61 +765,73 @@ __libm_pi_by_2_reduce:
 //    Address_BASE = shladd(SEGMENT,3) + BASE
 
 
+
 { .mmi
-      getf.exp GR_Exp_x = FR_input_X
-      ld8 GR_BASE = [GR_BASE]
-      mov GR_TEMP5 = 0x0FFFE
+      nop.m 999
+(p0)  addl           GR_BASE   = @ltoff(Constants_Bits_of_2_by_pi#), gp
+      nop.i 999
 }
 ;;
 
-//    Define sigma_C := 2^63; sigma_B := 2^(-1); sigma_A := 2^(-65).
 { .mmi
-      getf.sig GR_x_lo = FR_input_X
-      mov GR_TEMP6 = 0x0FFBE
+      ld8 GR_BASE = [GR_BASE]
+      nop.m 999
       nop.i 999
 }
 ;;
 
-//    Special Code for testing DE arguments
-//          movl GR_BIASL63 = 0x0000000000013FFE
-//          movl GR_x_lo = 0xFFFFFFFFFFFFFFFF
-//          setf.exp FR_X = GR_BIASL63
-//          setf.sig FR_ScaleP3 = GR_x_lo
-//          fmerge.se FR_X = FR_X,FR_ScaleP3
+
+{ .mlx
+	nop.m 999
+(p0)  movl GR_TEMP5 = 0x000000000000FFFE
+}
+{ .mmi
+	nop.m 999 ;;
+(p0)  setf.exp FR_sigma_B = GR_TEMP5
+	nop.i 999
+}
+{ .mlx
+	nop.m 999
+(p0)  movl GR_TEMP6 = 0x000000000000FFBE ;;
+}
+//    Define sigma_C := 2^63; sigma_B := 2^(-1); sigma_A := 2^(-65).
+{ .mfi
+(p0)  setf.exp FR_sigma_A = GR_TEMP6
+	nop.f 999
+	nop.i 999 ;;
+}
+//    Special Code for testing DE arguments 
+//    (p0)  movl GR_BIASL63 = 0x0000000000013FFE
+//    (p0)  movl GR_x_lo = 0xFFFFFFFFFFFFFFFF
+//    (p0)  setf.exp FR_X = GR_BIASL63
+//    (p0)  setf.sig FR_ScaleP3 = GR_x_lo
+//    (p0)  fmerge.se FR_X = FR_X,FR_ScaleP3
 //    Set sgn_x := sign(x); x := |x|; x_lo := 2 lsb of x.
 //    2/pi is stored contigously as
 //    0x00000000 0x00000000.0xA2F....
 //    M = EXP - BIAS  ( M >= 63)
 //    Given x = 2^m * 1.xxxx...xxx; we calculate L := 62 - m.
 //    Thus -1 <= L <= -16321.
-{ .mmi
-      setf.exp FR_sigma_B = GR_TEMP5
-      setf.exp FR_sigma_A = GR_TEMP6
-      extr.u GR_M = GR_Exp_x,0,17
+{ .mmf
+(p0)  getf.exp GR_Exp_x = FR_X
+(p0)  getf.sig GR_x_lo = FR_X
+(p0)  fabs FR_X = FR_X ;;
 }
-;;
-
 { .mii
-      and  GR_x_lo = 0x03,GR_x_lo
-      sub  GR_START = GR_M,GR_BIASL63
-      add  GR_BASE = 8,GR_BASE           // To effectively add 1 to SEGMENT
+(p0)  and  GR_x_lo = 0x03,GR_x_lo
+(p0)  extr.u GR_M = GR_Exp_x,0,17 ;;
+(p0)  sub  GR_START = GR_M,GR_BIASL63
 }
-;;
-
-{ .mii
-      and  GR_LENGTH1 = 0x3F,GR_START
-      shr.u  GR_SEGMENT = GR_START,6
-      nop.i 999
+{ .mmi
+	nop.m 999 ;;
+(p0)  and  GR_LENGTH1 = 0x3F,GR_START
+(p0)  shr.u  GR_SEGMENT = GR_START,6
 }
-;;
-
 { .mmi
-      shladd GR_BASE = GR_SEGMENT,3,GR_BASE
-      sub  GR_LENGTH2 = 0x40,GR_LENGTH1
-      cmp.le p6,p7 = 0x2,GR_LENGTH1
+	nop.m 999 ;;
+(p0)  add  GR_SEGMENT = 0x1,GR_SEGMENT
+(p0)  sub  GR_LENGTH2 = 0x40,GR_LENGTH1
 }
-;;
-
 //    P_0 is the two bits corresponding to bit positions L+2 and L+1
 //    P_1 is the 64-bit starting at bit position  L
 //    P_2 is the 64-bit starting at bit position  L-64
@@ -836,13 +849,13 @@ __libm_pi_by_2_reduce:
 //    P_4 is made up of Clo and Dhi
 //    P_4 = deposit Dlo, position 0, length2  into P_4, position length1
 //          deposit Ehi, position length2, length1 into P_4, position 0
-{ .mfi
-      ld8 GR_A = [GR_BASE],8
-      fabs FR_X = FR_input_X
-(p7)  cmp.eq.unc p8,p9 = 0x1,GR_LENGTH1
+{ .mmi
+(p0)  cmp.le.unc p6,p7 = 0x2,GR_LENGTH1 ;;
+(p0)  shladd GR_BASE = GR_SEGMENT,3,GR_BASE
+(p7)  cmp.eq.unc p8,p9 = 0x1,GR_LENGTH1 ;;
 }
-;;
-
+{ .mmi
+	nop.m 999
 //    ld_64 A at Base and increment Base by 8
 //    ld_64 B at Base and increment Base by 8
 //    ld_64 C at Base and increment Base by 8
@@ -853,35 +866,31 @@ __libm_pi_by_2_reduce:
 //    A, B, C, D, and E look like    | length1 | length2   |
 //                                    ---------------------
 //                                       hi        lo
-{ .mlx
-      ld8 GR_B = [GR_BASE],8
-      movl GR_rshf = 0x43e8000000000000   // 1.10000 2^63 for right shift N_fix
+(p0)  ld8 GR_A = [GR_BASE],8
+(p0)  extr.u GR_sgn_x = GR_Exp_x,17,1 ;;
 }
-;;
-
-{ .mmi
-      ld8 GR_C = [GR_BASE],8
-      nop.m 999
-(p8)  extr.u GR_Temp = GR_A,63,1
+{ .mmf
+	nop.m 999
+(p0)  ld8 GR_B = [GR_BASE],8
+(p0)  fmerge.se FR_X = FR_sigma_B,FR_X ;;
 }
-;;
-
+{ .mii
+(p0)  ld8 GR_C = [GR_BASE],8
+(p8)  extr.u GR_Temp = GR_A,63,1 ;;
+(p0)  shl GR_TEMP1 = GR_A,GR_LENGTH1
+}
+{ .mii
+(p0)  ld8 GR_D = [GR_BASE],8
 //    If length1 >= 2,
 //       P_0 = deposit Ahi, position length2, 2 bit into P_0 at position 0.
-{ .mii
-      ld8 GR_D = [GR_BASE],8
-      shl GR_TEMP1 = GR_A,GR_LENGTH1   // MM instruction
-(p6)  shr.u GR_P_0 = GR_A,GR_LENGTH2   // MM instruction
+(p6)     shr.u GR_P_0 = GR_A,GR_LENGTH2 ;;
+(p0)  shl GR_TEMP2 = GR_B,GR_LENGTH1
 }
-;;
-
 { .mii
-      ld8 GR_E = [GR_BASE],-40
-      shl GR_TEMP2 = GR_B,GR_LENGTH1   // MM instruction
-      shr.u GR_P_1 = GR_B,GR_LENGTH2   // MM instruction
+(p0)  ld8 GR_E = [GR_BASE],-40
+(p0)  shr.u GR_P_1 = GR_B,GR_LENGTH2 ;;
+(p0)  shr.u GR_P_2 = GR_C,GR_LENGTH2
 }
-;;
-
 //    Else
 //       Load 16 bit of ASUB from (Base_Address_of_A - 2)
 //       P_0 = ASUB & 0x3
@@ -891,56 +900,43 @@ __libm_pi_by_2_reduce:
 //          Deposit element 63 from Ahi and place in element 0 of P_0.
 //       Endif
 //    Endif
-
 { .mii
 (p7)  ld2 GR_ASUB = [GR_BASE],8
-      shl GR_TEMP3 = GR_C,GR_LENGTH1   // MM instruction
-      shr.u GR_P_2 = GR_C,GR_LENGTH2   // MM instruction
+(p0)  shl GR_TEMP3 = GR_C,GR_LENGTH1 ;;
+(p0)  shl GR_TEMP4 = GR_D,GR_LENGTH1
 }
-;;
-
 { .mii
-      setf.d FR_RSHF = GR_rshf         // Form right shift const 1.100 * 2^63
-      shl GR_TEMP4 = GR_D,GR_LENGTH1   // MM instruction
-      shr.u GR_P_3 = GR_D,GR_LENGTH2   // MM instruction
+	nop.m 999
+(p0)  shr.u GR_P_3 = GR_D,GR_LENGTH2 ;;
+(p0)  shr.u GR_P_4 = GR_E,GR_LENGTH2
 }
-;;
-
-{ .mmi
+{ .mii
 (p7)  and GR_P_0 = 0x03,GR_ASUB
-(p6)  and GR_P_0 = 0x03,GR_P_0
-      shr.u GR_P_4 = GR_E,GR_LENGTH2   // MM instruction
+(p6)     and GR_P_0 = 0x03,GR_P_0 ;;
+(p0)  or GR_P_1 = GR_P_1,GR_TEMP1
 }
-;;
-
 { .mmi
-      nop.m 999
-      or GR_P_1 = GR_P_1,GR_TEMP1
-(p8)  and GR_P_0 = 0x1,GR_P_0
+(p8)  and GR_P_0 = 0x1,GR_P_0 ;;
+(p0)  or GR_P_2 = GR_P_2,GR_TEMP2
+(p8)  shl GR_P_0 = GR_P_0,0x1 ;;
 }
-;;
-
-{ .mmi
-      setf.sig FR_p_1 = GR_P_1
-      or GR_P_2 = GR_P_2,GR_TEMP2
-(p8)  shladd GR_P_0 = GR_P_0,1,GR_Temp
+{ .mii
+	nop.m 999
+(p0)  or GR_P_3 = GR_P_3,GR_TEMP3
+(p8)  or GR_P_0 = GR_P_0,GR_Temp
 }
-;;
-
-{ .mmf
-      setf.sig FR_p_2 = GR_P_2
-      or GR_P_3 = GR_P_3,GR_TEMP3
-      fmerge.se FR_X = FR_sigma_B,FR_X
+{ .mmi
+(p0)  setf.sig FR_p_1 = GR_P_1 ;;
+(p0)  setf.sig FR_p_2 = GR_P_2
+(p0)  or GR_P_4 = GR_P_4,GR_TEMP4 ;;
 }
-;;
-
 { .mmi
-      setf.sig FR_p_3 = GR_P_3
-      or GR_P_4 = GR_P_4,GR_TEMP4
-      pmpy2.r GR_M = GR_P_0,GR_x_lo
+	nop.m 999 ;;
+(p0)  setf.sig FR_p_3 = GR_P_3
+(p0)  pmpy2.r GR_M = GR_P_0,GR_x_lo
 }
-;;
-
+{ .mlx
+(p0)  setf.sig FR_p_4 = GR_P_4
 //    P_1, P_2, P_3, P_4 are integers. They should be
 //    2^(L-63)     * P_1;
 //    2^(L-63-64)  * P_2;
@@ -958,18 +954,18 @@ __libm_pi_by_2_reduce:
 //             |  P_1  |   |  P_2  |   |  P_3  |
 //             ---------   ---------   ---------
 //                                           ---------
-//            X                              |   X   |
-//                                           ---------
+//	      X                              |   X   |
+//	                                     ---------
 //      ----------------------------------------------------
 //                               ---------   ---------
-//                               |  A_hi |   |  A_lo |
-//                               ---------   ---------
+//	                         |  A_hi |   |  A_lo |
+//	                         ---------   ---------
 //                   ---------   ---------
-//                   |  B_hi |   |  B_lo |
-//                   ---------   ---------
-//       ---------   ---------
-//       |  C_hi |   |  C_lo |
+//	             |  B_hi |   |  B_lo |
+//	             ---------   ---------
 //       ---------   ---------
+//	 |  C_hi |   |  C_lo |
+//	 ---------   ---------
 //     ====================================================
 //    -----------   ---------   ---------   ---------
 //    |    S_0  |   |  S_1  |   |  S_2  |   |  S_3  |
@@ -981,55 +977,52 @@ __libm_pi_by_2_reduce:
 //    and exponent range. Unless an explicit FPSR is given,
 //    round-to-nearest with widest precision and exponent range is
 //    used.
-{ .mmi
-      setf.sig FR_p_4 = GR_P_4
-      mov GR_TEMP1 = 0x0FFBF
-      nop.i 999
+(p0)  movl GR_TEMP1 = 0x000000000000FFBF
 }
-;;
-
 { .mmi
-      setf.exp FR_ScaleP2 = GR_TEMP1
-      mov GR_TEMP2 = 0x0FF7F
-      nop.i 999
+	nop.m 999 ;;
+(p0)  setf.exp FR_ScaleP2 = GR_TEMP1
+	nop.i 999
+}
+{ .mlx
+	nop.m 999
+(p0)  movl GR_TEMP4 = 0x000000000001003E
 }
-;;
-
 { .mmi
-      setf.exp FR_ScaleP3 = GR_TEMP2
-      mov GR_TEMP4 = 0x1003E
-      nop.i 999
+	nop.m 999 ;;
+(p0)  setf.exp FR_sigma_C = GR_TEMP4
+	nop.i 999
+}
+{ .mlx
+	nop.m 999
+(p0)  movl GR_TEMP2 = 0x000000000000FF7F ;;
 }
-;;
-
 { .mmf
-      setf.exp FR_sigma_C = GR_TEMP4
-      mov GR_Temp = 0x0FFDE
-      fcvt.xuf.s1 FR_p_1 = FR_p_1
+	nop.m 999
+(p0)  setf.exp FR_ScaleP3 = GR_TEMP2
+(p0)  fcvt.xuf.s1 FR_p_1 = FR_p_1 ;;
 }
-;;
-
 { .mfi
-      setf.exp FR_TWOM33 = GR_Temp
-      fcvt.xuf.s1 FR_p_2 = FR_p_2
-      nop.i 999
+	nop.m 999
+(p0)  fcvt.xuf.s1 FR_p_2 = FR_p_2
+	nop.i 999
 }
-;;
-
-{ .mfi
-      nop.m 999
-      fcvt.xuf.s1 FR_p_3 = FR_p_3
-      nop.i 999
+{ .mlx
+	nop.m 999
+(p0)  movl GR_Temp = 0x000000000000FFDE ;;
+}
+{ .mmf
+	nop.m 999
+(p0)  setf.exp FR_TWOM33 = GR_Temp
+(p0)  fcvt.xuf.s1 FR_p_3 = FR_p_3 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fcvt.xuf.s1 FR_p_4 = FR_p_4
-      nop.i 999
+	nop.m 999
+(p0)  fcvt.xuf.s1 FR_p_4 = FR_p_4
+	nop.i 999 ;;
 }
-;;
-
+{ .mfi
+	nop.m 999
 //    Tmp_C := fmpy.fpsr3( x, p_1 );
 //    Tmp_B := fmpy.fpsr3( x, p_2 );
 //    Tmp_A := fmpy.fpsr3( x, p_3 );
@@ -1055,62 +1048,55 @@ __libm_pi_by_2_reduce:
 //      Exact, regardless ...of rounding direction
 //      A_lo := x*p_3 - A_hi ...fma, exact
 //    Endif
-{ .mfi
-      nop.m 999
-      fmpy.s3 FR_Tmp_C = FR_X,FR_p_1
-      nop.i 999
+(p0)  fmpy.s3 FR_Tmp_C = FR_X,FR_p_1
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      mov GR_TEMP3 = 0x0FF3F
-      fmpy.s1 FR_p_2 = FR_p_2,FR_ScaleP2
-      nop.i 999
+	nop.m 999
+(p0)  fmpy.s1 FR_p_2 = FR_p_2,FR_ScaleP2
+	nop.i 999
+}
+{ .mlx
+	nop.m 999
+(p0)  movl GR_Temp = 0x0000000000000400
+}
+{ .mlx
+	nop.m 999
+(p0)  movl GR_TEMP3 = 0x000000000000FF3F ;;
 }
-;;
-
 { .mmf
-      setf.exp FR_ScaleP4 = GR_TEMP3
-      mov GR_TEMP4 = 0x10045
-      fmpy.s1 FR_p_3 = FR_p_3,FR_ScaleP3
+	nop.m 999
+(p0)  setf.exp FR_ScaleP4 = GR_TEMP3
+(p0)  fmpy.s1 FR_p_3 = FR_p_3,FR_ScaleP3 ;;
 }
-;;
-
-{ .mfi
-      nop.m 999
-      fadd.s3 FR_C_hi = FR_sigma_C,FR_Tmp_C   // For Tmp_C < sigma_C case
-      nop.i 999
+{ .mlx
+	nop.m 999
+(p0)  movl GR_TEMP4 = 0x0000000000010045 ;;
 }
-;;
-
 { .mmf
-      setf.exp FR_Tmp2_C = GR_TEMP4
-      nop.m 999
-      fmpy.s3 FR_Tmp_B = FR_X,FR_p_2
+	nop.m 999
+(p0)  setf.exp FR_Tmp2_C = GR_TEMP4
+(p0)  fmpy.s3 FR_Tmp_B = FR_X,FR_p_2 ;;
 }
-;;
-
 { .mfi
-      addl           GR_BASE   = @ltoff(Constants_Bits_of_pi_by_2#), gp
-      fcmp.ge.s1 p12,  p9 = FR_Tmp_C,FR_sigma_C
-      nop.i 999
+	nop.m 999
+(p0)  fcmp.ge.unc.s1 p12,  p9 = FR_Tmp_C,FR_sigma_C
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-      fmpy.s3 FR_Tmp_A = FR_X,FR_p_3
-      nop.i 99
+	nop.m 999
+(p0)  fmpy.s3 FR_Tmp_A = FR_X,FR_p_3
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      ld8 GR_BASE = [GR_BASE]
+	nop.m 999
 (p12) mov FR_C_hi = FR_Tmp_C
-      nop.i 999
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p9)  fsub.s1 FR_C_hi = FR_C_hi,FR_sigma_C
-      nop.i 999
+(p0)  addl           GR_BASE   = @ltoff(Constants_Bits_of_pi_by_2#), gp
+(p9)  fadd.s3 FR_C_hi = FR_sigma_C,FR_Tmp_C
+	nop.i 999
 }
 ;;
 
@@ -1128,106 +1114,97 @@ __libm_pi_by_2_reduce:
 //      Load from table
 //   End If
 
-{ .mfi
-      nop.m 999
-      fmpy.s1 FR_p_4 = FR_p_4,FR_ScaleP4
-      nop.i 999
-}
-{ .mfi
+
+{ .mmi
+      ld8 GR_BASE = [GR_BASE]
       nop.m 999
-      fadd.s3 FR_B_hi = FR_sigma_B,FR_Tmp_B     // For Tmp_B < sigma_B case
       nop.i 999
 }
 ;;
 
+
 { .mfi
-      nop.m 999
-      fadd.s3 FR_A_hi = FR_sigma_A,FR_Tmp_A     // For Tmp_A < sigma_A case
-      nop.i 999
+(p0) ldfe FR_D_hi = [GR_BASE],16
+(p0)  fmpy.s1 FR_p_4 = FR_p_4,FR_ScaleP4
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fcmp.ge.s1 p13, p10 = FR_Tmp_B,FR_sigma_B
-      nop.i 999
+(p0) ldfe FR_D_lo = [GR_BASE],0
+(p0)  fcmp.ge.unc.s1 p13, p10 = FR_Tmp_B,FR_sigma_B
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-      fms.s1 FR_C_lo = FR_X,FR_p_1,FR_C_hi
-      nop.i 999
+	nop.m 999
+(p13) mov FR_B_hi = FR_Tmp_B
+	nop.i 999
 }
-;;
-
 { .mfi
-      ldfe FR_D_hi = [GR_BASE],16
-      fcmp.ge.s1 p14, p11 = FR_Tmp_A,FR_sigma_A
-      nop.i 999
+	nop.m 999
+(p12) fms.s1 FR_C_lo = FR_X,FR_p_1,FR_C_hi
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      ldfe FR_D_lo = [GR_BASE]
-(p13) mov FR_B_hi = FR_Tmp_B
-      nop.i 999
+	nop.m 999
+(p10) fadd.s3 FR_B_hi = FR_sigma_B,FR_Tmp_B
+	nop.i 999
 }
 { .mfi
-      nop.m 999
-(p10) fsub.s1 FR_B_hi = FR_B_hi,FR_sigma_B
-      nop.i 999
+	nop.m 999
+(p9)  fsub.s1 FR_C_hi = FR_C_hi,FR_sigma_C
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
+	nop.m 999
+(p0)  fcmp.ge.unc.s1 p14, p11 = FR_Tmp_A,FR_sigma_A
+	nop.i 999 ;;
+}
+{ .mfi
+	nop.m 999
 (p14) mov FR_A_hi = FR_Tmp_A
-      nop.i 999
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p11) fsub.s1 FR_A_hi = FR_A_hi,FR_sigma_A
-      nop.i 999
+	nop.m 999
+(p11) fadd.s3 FR_A_hi = FR_sigma_A,FR_Tmp_A
+	nop.i 999 ;;
 }
-;;
-
+{ .mfi
+	nop.m 999
+(p9)  fms.s1 FR_C_lo = FR_X,FR_p_1,FR_C_hi
+(p0)  cmp.eq.unc p12,p9 = 0x1,GR_sgn_x
+}
+{ .mfi
+	nop.m 999
+(p13) fms.s1 FR_B_lo = FR_X,FR_p_2,FR_B_hi
+	nop.i 999 ;;
+}
+{ .mfi
+	nop.m 999
+(p10) fsub.s1 FR_B_hi = FR_B_hi,FR_sigma_B
+	nop.i 999
+}
+{ .mfi
+	nop.m 999
 //    Note that C_hi is of integer value. We need only the
 //    last few bits. Thus we can ensure C_hi is never a big
 //    integer, freeing us from overflow worry.
 //    Tmp_C := fadd.fpsr3( C_hi, 2^(70) ) - 2^(70);
 //    Tmp_C is the upper portion of C_hi
-{ .mfi
-      nop.m 999
-      fadd.s3 FR_Tmp_C = FR_C_hi,FR_Tmp2_C
-      tbit.z p12,p9 = GR_Exp_x, 17
-}
-;;
-
-{ .mfi
-      nop.m 999
-      fms.s1 FR_B_lo = FR_X,FR_p_2,FR_B_hi
-      nop.i 999
+(p0)  fadd.s3 FR_Tmp_C = FR_C_hi,FR_Tmp2_C
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-      fadd.s3 FR_A = FR_B_hi,FR_C_lo
-      nop.i 999
+	nop.m 999
+(p14) fms.s1 FR_A_lo = FR_X,FR_p_3,FR_A_hi
+	nop.i 999
 }
-;;
-
 { .mfi
-      nop.m 999
-      fms.s1 FR_A_lo = FR_X,FR_p_3,FR_A_hi
-      nop.i 999
+	nop.m 999
+(p11) fsub.s1 FR_A_hi = FR_A_hi,FR_sigma_A
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fsub.s1 FR_Tmp_C = FR_Tmp_C,FR_Tmp2_C
-      nop.i 999
-}
-;;
-
+	nop.m 999
 //    *******************
 //    Step 2. Get N and f
 //    *******************
@@ -1238,213 +1215,168 @@ __libm_pi_by_2_reduce:
 //    A := fadd.fpsr3( B_hi, C_lo )
 //    B := max( B_hi, C_lo )
 //    b := min( B_hi, C_lo )
-{ .mfi
-      nop.m 999
-      fmax.s1 FR_B = FR_B_hi,FR_C_lo
-      nop.i 999
+(p0)  fadd.s3 FR_A = FR_B_hi,FR_C_lo
+	nop.i 999
 }
-;;
-
-// We use a right-shift trick to get the integer part of A into the rightmost
-// bits of the significand by adding 1.1000..00 * 2^63.  This operation is good
-// if |A| < 2^61, which it is in this case.  We are doing this to save a few
-// cycles over using fcvt.fx followed by fnorm.  The second step of the trick
-// is to subtract the same constant to float the rounded integer into a fp reg.
-
 { .mfi
-      nop.m 999
-//    N := round_to_nearest_integer_value( A );
-      fma.s1 FR_N_fix = FR_A, f1, FR_RSHF
-      nop.i 999
+	nop.m 999
+(p10) fms.s1 FR_B_lo = FR_X,FR_p_2,FR_B_hi
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fmin.s1 FR_b = FR_B_hi,FR_C_lo
-      nop.i 999
+	nop.m 999
+(p0)  fsub.s1 FR_Tmp_C = FR_Tmp_C,FR_Tmp2_C
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-//    C_hi := C_hi - Tmp_C ...0 <= C_hi < 2^7
-      fsub.s1 FR_C_hi = FR_C_hi,FR_Tmp_C
-      nop.i 999
+	nop.m 999
+(p0)  fmax.s1 FR_B = FR_B_hi,FR_C_lo
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-//    a := (B - A) + b: Exact - note that a is either 0 or 2^(-64).
-      fsub.s1 FR_a = FR_B,FR_A
-      nop.i 999
+	nop.m 999
+(p0)  fmin.s1 FR_b = FR_B_hi,FR_C_lo
+	nop.i 999
 }
-;;
-
 { .mfi
-      nop.m 999
-      fms.s1 FR_N = FR_N_fix, f1, FR_RSHF
-      nop.i 999
+	nop.m 999
+(p11) fms.s1 FR_A_lo = FR_X,FR_p_3,FR_A_hi
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fadd.s1 FR_a = FR_a,FR_b
-      nop.i 999
+	nop.m 999
+//    N := round_to_nearest_integer_value( A );
+(p0)  fcvt.fx.s1 FR_N = FR_A
+	nop.i 999 ;;
 }
-;;
-
-//    f := A - N; Exact because lsb(A) >= 2^(-64) and |f| <= 1/2.
-//    N := convert to integer format( C_hi + N );
-//    M := P_0 * x_lo;
-//    N := N + M;
 { .mfi
-      nop.m 999
-      fsub.s1 FR_f = FR_A,FR_N
-      nop.i 999
+	nop.m 999
+//    C_hi := C_hi - Tmp_C ...0 <= C_hi < 2^7
+(p0)  fsub.s1 FR_C_hi = FR_C_hi,FR_Tmp_C
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-      fadd.s1 FR_N = FR_N,FR_C_hi
-      nop.i 999
+	nop.m 999
+//    a := (B - A) + b: Exact - note that a is either 0 or 2^(-64).
+(p0)  fsub.s1 FR_a = FR_B,FR_A
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-(p9)  fsub.s1 FR_D_hi = f0, FR_D_hi
-      nop.i 999
+	nop.m 999
+//    f := A - N; Exact because lsb(A) >= 2^(-64) and |f| <= 1/2.
+(p0)  fnorm.s1 FR_N = FR_N
+	nop.i 999
 }
 { .mfi
-      nop.m 999
-(p9)  fsub.s1 FR_D_lo = f0, FR_D_lo
-      nop.i 999
+	nop.m 999
+(p0)  fadd.s1 FR_a = FR_a,FR_b
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fadd.s1 FR_g = FR_A_hi,FR_B_lo          // For Case 1, g=A_hi+B_lo
-      nop.i 999
+	nop.m 999
+(p0)  fsub.s1 FR_f = FR_A,FR_N
+	nop.i 999
 }
 { .mfi
-      nop.m 999
-      fadd.s3 FR_A = FR_A_hi,FR_B_lo          // For Case 2, A=A_hi+B_lo w/ sf3
-      nop.i 999
+	nop.m 999
+//    N := convert to integer format( C_hi + N );
+//    M := P_0 * x_lo;
+//    N := N + M;
+(p0)  fadd.s1 FR_N = FR_N,FR_C_hi
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      mov GR_Temp = 0x0FFCD                   // For Case 2, exponent of 2^-50
-      fmax.s1 FR_B = FR_A_hi,FR_B_lo          // For Case 2, B=max(A_hi,B_lo)
-      nop.i 999
-}
-;;
-
-//    f = f + a      Exact because a is 0 or 2^(-64);
+	nop.m 999
+//    f = f + a	Exact because a is 0 or 2^(-64);
 //    the msb of the sum is <= 1/2 and lsb >= 2^(-64).
-{ .mfi
-      setf.exp FR_TWOM50 = GR_Temp            // For Case 2, form 2^-50
-      fcvt.fx.s1 FR_N = FR_N
-      nop.i 999
+(p0)  fadd.s1 FR_f = FR_f,FR_a
+	nop.i 999
 }
 { .mfi
-      nop.m 999
-      fadd.s1 FR_f = FR_f,FR_a
-      nop.i 999
+	nop.m 999
+//
+//    Create 2**(-33)
+//
+(p0)  fcvt.fx.s1 FR_N = FR_N
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fmin.s1 FR_b = FR_A_hi,FR_B_lo          // For Case 2, b=min(A_hi,B_lo)
-      nop.i 999
+	nop.m 999
+(p0)  fabs FR_f_abs = FR_f
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fsub.s1 FR_a = FR_B,FR_A                // For Case 2, a=B-A
-      nop.i 999
+(p0)  getf.sig GR_N = FR_N
+	nop.f 999
+	nop.i 999 ;;
 }
-;;
-
-{ .mfi
-      nop.m 999
-      fadd.s1 FR_s_hi = FR_f,FR_g             // For Case 1, s_hi=f+g
-      nop.i 999
+{ .mii
+	nop.m 999
+	nop.i 999 ;;
+(p0)  add GR_N = GR_N,GR_M ;;
 }
-{ .mfi
-      nop.m 999
-      fadd.s1 FR_f_hi = FR_A,FR_f             // For Case 2, f_hi=A+f
-      nop.i 999
+//    If sgn_x == 1 (that is original x was negative)
+//       N := 2^10 - N
+//       this maintains N to be non-negative, but still
+//       equivalent to the (negated N) mod 4.
+//    End If
+{ .mii
+(p12) sub GR_N = GR_Temp,GR_N
+(p0) cmp.eq.unc p12,p9 = 0x0,GR_sgn_x ;;
+	nop.i 999
 }
-;;
-
 { .mfi
-      nop.m 999
-      fabs FR_f_abs = FR_f
-      nop.i 999
+	nop.m 999
+(p0)  fcmp.ge.unc.s1 p13, p10 = FR_f_abs,FR_TWOM33
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      getf.sig GR_N = FR_N
-      fsetc.s3 0x7F,0x40                 // Reset sf3 to user settings + td
-      nop.i 999
+	nop.m 999
+(p9) fsub.s1 FR_D_hi = f0, FR_D_hi
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fsub.s1 FR_s_lo = FR_f,FR_s_hi          // For Case 1, s_lo=f-s_hi
-      nop.i 999
+	nop.m 999
+(p10)    fadd.s3 FR_A = FR_A_hi,FR_B_lo
+	nop.i 999
 }
 { .mfi
-      nop.m 999
-      fsub.s1 FR_f_lo = FR_f,FR_f_hi          // For Case 2, f_lo=f-f_hi
-      nop.i 999
+	nop.m 999
+(p13)    fadd.s1 FR_g = FR_A_hi,FR_B_lo
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fmpy.s1 FR_r_hi = FR_s_hi,FR_D_hi       // For Case 1, r_hi=s_hi*D_hi
-      nop.i 999
+	nop.m 999
+(p10)    fmax.s1 FR_B = FR_A_hi,FR_B_lo
+	nop.i 999
 }
 { .mfi
-      nop.m 999
-      fadd.s1 FR_a = FR_a,FR_b                // For Case 2, a=a+b
-      nop.i 999
+	nop.m 999
+(p9) fsub.s1 FR_D_lo = f0, FR_D_lo
+	nop.i 999 ;;
 }
-;;
-
-
-//    If sgn_x == 1 (that is original x was negative)
-//       N := 2^10 - N
-//       this maintains N to be non-negative, but still
-//       equivalent to the (negated N) mod 4.
-//    End If
 { .mfi
-      add GR_N = GR_N,GR_M
-      fcmp.ge.s1 p13, p10 = FR_f_abs,FR_TWOM33
-      mov GR_Temp = 0x00400
+	nop.m 999
+(p10)    fmin.s1 FR_b = FR_A_hi,FR_B_lo
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-(p9)  sub GR_N = GR_Temp,GR_N
-      fadd.s1 FR_s_lo = FR_s_lo,FR_g           // For Case 1, s_lo=s_lo+g
-      nop.i 999
+	nop.m 999
+(p0) fsetc.s3 0x7F,0x40
+	nop.i 999
 }
-{ .mfi
-      nop.m 999
-      fadd.s1 FR_f_lo = FR_f_lo,FR_A           // For Case 2, f_lo=f_lo+A
-      nop.i 999
+{ .mlx
+	nop.m 999
+(p10)    movl GR_Temp = 0x000000000000FFCD ;;
 }
-;;
-
-//       a := (B - A) + b      Exact.
+{ .mmf
+	nop.m 999
+(p10)    setf.exp FR_TWOM50 = GR_Temp
+(p10)    fadd.s1 FR_f_hi = FR_A,FR_f ;;
+}
+{ .mfi
+	nop.m 999
+//       a := (B - A) + b	Exact.
 //       Note that a is either 0 or 2^(-128).
 //       f_hi := A + f;
 //       f_lo := (f - f_hi) + A
@@ -1455,32 +1387,68 @@ __libm_pi_by_2_reduce:
 //       exact. If f = -2^(-64), then A + f is exact. Hence
 //       f-f_hi is -A exactly, giving f_lo = 0.
 //       f_lo := f_lo + a;
-
+(p10)    fsub.s1 FR_a = FR_B,FR_A
+	nop.i 999
+}
+{ .mfi
+	nop.m 999
+(p13)    fadd.s1 FR_s_hi = FR_f,FR_g
+	nop.i 999 ;;
+}
+{ .mlx
+	nop.m 999
 //    If |f| >= 2^(-33)
 //       Case 1
 //       CASE := 1
 //       g := A_hi + B_lo;
 //       s_hi := f + g;
 //       s_lo := (f - s_hi) + g;
+(p13)    movl GR_CASE = 0x1 ;;
+}
+{ .mlx
+	nop.m 999
 //   Else
 //       Case 2
 //       CASE := 2
 //       A := fadd.fpsr3( A_hi, B_lo )
 //       B := max( A_hi, B_lo )
 //       b := min( A_hi, B_lo )
-
+(p10)    movl GR_CASE = 0x2
+}
 { .mfi
-      nop.m 999
-(p10) fcmp.ge.unc.s1 p14, p11 = FR_f_abs,FR_TWOM50
-      nop.i 999
+	nop.m 999
+(p10)    fsub.s1 FR_f_lo = FR_f,FR_f_hi
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p13) fms.s1 FR_r_lo = FR_s_hi,FR_D_hi,FR_r_hi //For Case 1, r_lo=s_hi*D_hi+r_hi
-      nop.i 999
+	nop.m 999
+(p10)    fadd.s1 FR_a = FR_a,FR_b
+	nop.i 999
 }
-;;
-
+{ .mfi
+	nop.m 999
+(p13)    fsub.s1 FR_s_lo = FR_f,FR_s_hi
+	nop.i 999 ;;
+}
+{ .mfi
+	nop.m 999
+(p13)    fadd.s1 FR_s_lo = FR_s_lo,FR_g
+	nop.i 999 ;;
+}
+{ .mfi
+	nop.m 999
+(p10)    fcmp.ge.unc.s1 p14, p11 = FR_f_abs,FR_TWOM50
+	nop.i 999 ;;
+}
+{ .mfi
+	nop.m 999
+//
+//       Create 2**(-50)
+(p10)    fadd.s1 FR_f_lo = FR_f_lo,FR_A
+	nop.i 999 ;;
+}
+{ .mfi
+	nop.m 999
 //       If |f| >= 2^(-50) then
 //          s_hi := f_hi;
 //          s_lo := f_lo;
@@ -1489,90 +1457,84 @@ __libm_pi_by_2_reduce:
 //          s_hi := f_hi + f_lo
 //          s_lo := (f_hi - s_hi) + f_lo
 //       End If
-{ .mfi
-      nop.m 999
-(p14) mov FR_s_hi = FR_f_hi
-      nop.i 999
+(p14)  mov FR_s_hi = FR_f_hi
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p10) fadd.s1 FR_f_lo = FR_f_lo,FR_a
-      nop.i 999
+	nop.m 999
+(p10)    fadd.s1 FR_f_lo = FR_f_lo,FR_a
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-(p14) mov FR_s_lo = FR_f_lo
-      nop.i 999
+	nop.m 999
+(p14)  mov FR_s_lo = FR_f_lo
+	nop.i 999
 }
 { .mfi
-      nop.m 999
-(p11) fadd.s1 FR_f_lo = FR_f_lo,FR_A_lo
-      nop.i 999
+	nop.m 999
+(p11)  fadd.s1 FR_f_lo = FR_f_lo,FR_A_lo
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-(p11) fma.s1 FR_f_lo = FR_X,FR_p_4,FR_f_lo
-      nop.i 999
+	nop.m 999
+(p11)  fma.s1 FR_f_lo = FR_X,FR_p_4,FR_f_lo
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-(p13) fma.s1 FR_r_lo = FR_s_hi,FR_D_lo,FR_r_lo //For Case 1, r_lo=s_hi*D_lo+r_lo
-      nop.i 999
+	nop.m 999
+(p11)  fadd.s1 FR_s_hi = FR_f_hi,FR_f_lo
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p11) fadd.s1 FR_s_hi = FR_f_hi,FR_f_lo
-      nop.i 999
-}
-;;
-
+	nop.m 999
 //   r_hi :=  s_hi*D_hi
 //   r_lo :=  s_hi*D_hi - r_hi  with fma
 //   r_lo := (s_hi*D_lo + r_lo) + s_lo*D_hi
+(p0) fmpy.s1 FR_r_hi = FR_s_hi,FR_D_hi
+	nop.i 999
+}
 { .mfi
-      nop.m 999
-(p10) fmpy.s1 FR_r_hi = FR_s_hi,FR_D_hi
-      nop.i 999
+	nop.m 999
+(p11)  fsub.s1 FR_s_lo = FR_f_hi,FR_s_hi
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p11) fsub.s1 FR_s_lo = FR_f_hi,FR_s_hi
-      nop.i 999
+	nop.m 999
+(p0) fms.s1 FR_r_lo = FR_s_hi,FR_D_hi,FR_r_hi
+	nop.i 999
 }
-;;
-
 { .mfi
-      nop.m 999
-(p10) fms.s1 FR_r_lo = FR_s_hi,FR_D_hi,FR_r_hi
-      nop.i 999
+	nop.m 999
+(p11)  fadd.s1 FR_s_lo = FR_s_lo,FR_f_lo
+	nop.i 999 ;;
+}
+{ .mmi
+	nop.m 999 ;;
+//   Return  N, r_hi, r_lo
+//   We do not return CASE
+(p0) stfe [GR_Address_of_Outputs] = FR_r_hi,16
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p11) fadd.s1 FR_s_lo = FR_s_lo,FR_f_lo
-      nop.i 999
+	nop.m 999
+(p0) fma.s1 FR_r_lo = FR_s_hi,FR_D_lo,FR_r_lo
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-(p10) fma.s1 FR_r_lo = FR_s_hi,FR_D_lo,FR_r_lo
-      nop.i 999
+	nop.m 999
+(p0) fma.s1 FR_r_lo = FR_s_lo,FR_D_hi,FR_r_lo
+	nop.i 999 ;;
 }
-;;
-
-//   Return  N, r_hi, r_lo
-//   We do not return CASE
-{ .mfb
-      nop.m 999
-      fma.s1 FR_r_lo = FR_s_lo,FR_D_hi,FR_r_lo
-      br.ret.sptk   b0
+{ .mmi
+	nop.m 999 ;;
+(p0) stfe [GR_Address_of_Outputs] = FR_r_lo,-16
+	nop.i 999
+}
+{ .mib
+	nop.m 999
+	nop.i 999
+(p0) br.ret.sptk   b0 ;;
 }
-;;
 
-.endp __libm_pi_by_2_reduce#
+.endp __libm_pi_by_2_reduce
+ASM_SIZE_DIRECTIVE(__libm_pi_by_2_reduce)
diff --git a/sysdeps/ia64/fpu/libm_support.h b/sysdeps/ia64/fpu/libm_support.h
index 50dac33133..5d3498dfc9 100644
--- a/sysdeps/ia64/fpu/libm_support.h
+++ b/sysdeps/ia64/fpu/libm_support.h
@@ -1,10 +1,9 @@
-/* file: libm_support.h */
-
-
-// Copyright (c) 2000 - 2002, Intel Corporation
+//
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
 //
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,14 +19,14 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
+//
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
@@ -35,51 +34,45 @@
 //
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// http://developer.intel.com/opensource.
 //
 
-// History: 02/02/2000 Initial version 
+// History: 02/02/2000 Initial version
 //          2/28/2000 added tags for logb and nextafter
-//          3/22/2000 Changes to support _LIB_VERSIONIMF variable
-//                    and filled some enum gaps. Added support for C99.  
+//          3/22/2000 Changes to support _LIB_VERSION variable
+//                    and filled some enum gaps. Added support for C99.
 //          5/31/2000 added prototypes for __libm_frexp_4l/8l
-//          8/10/2000 Changed declaration of _LIB_VERSIONIMF to work for library
+//          8/10/2000 Changed declaration of _LIB_VERSION to work for library
 //                    builds and other application builds (precompiler directives).
 //          8/11/2000 Added pointers-to-matherr-functions declarations to allow
 //                    for user-defined matherr functions in the dll build.
 //         12/07/2000 Added scalbn error_types values.
-//          5/01/2001 Added error_types values for C99 nearest integer 
-//                    functions.
-//          6/07/2001 Added error_types values for fdim.
-//          6/18/2001 Added include of complex_support.h.
-//          8/03/2001 Added error_types values for nexttoward, scalbln.
-//          8/23/2001 Corrected tag numbers from 186 and higher.
-//          8/27/2001 Added check for long int and long long int definitions.
-//         12/10/2001 Added error_types for erfc.
-//         12/27/2001 Added error_types for degree argument functions.
-//         01/02/2002 Added error_types for tand, cotd.
-//         01/04/2002 Delete include of complex_support.h
-//         01/23/2002 Deleted prototypes for __libm_frexp*.  Added check for
-//                    multiple int, long int, and long long int definitions.
-//         05/20/2002 Added error_types for cot.
-//         06/27/2002 Added error_types for sinhcosh.
-//         12/05/2002 Added error_types for annuity and compound
-//         04/10/2003 Added error_types for tgammal/tgamma/tgammaf
 //
 
+#ifndef __ASSEMBLER__
+#include <math.h>
+
+float __libm_frexp_4f( float x, int*  exp);
+float _GI___libm_frexp_4f( float x, int*  exp);
+float __libm_frexp_8f( float x, int*  exp);
+double __libm_frexp_4( double x, int*  exp);
+double _GI___libm_frexp_4( double x, int*  exp);
+double __libm_frexp_8( double x, int*  exp);
+long double __libm_frexp_4l( long double x, int*  exp);
+long double _GI___libm_frexp_4l( long double x, int*  exp);
+long double __libm_frexp_8l( long double x, int*  exp);
 void __libm_sincos_pi4(double,double*,double*,int);
 void __libm_y0y1(double , double *, double *);
 void __libm_j0j1(double , double *, double *);
+double __libm_lgamma_kernel(double,int*,int,int);
 double __libm_j0(double);
 double __libm_j1(double);
 double __libm_jn(int,double);
 double __libm_y0(double);
 double __libm_y1(double);
 double __libm_yn(int,double);
-double __libm_copysign (double, double);
-float __libm_copysignf (float, float);
-long double __libm_copysignl (long double, long double);
 
+extern double rint(double);
 extern double sqrt(double);
 extern double fabs(double);
 extern double log(double);
@@ -119,31 +112,24 @@ extern long double log1pl(long double);
 extern long double logl(long double);
 extern long double sqrtl(long double);
 extern long double expl(long double);
-extern long double fabsl(long double);
 
-#if !(defined(SIZE_INT_32) || defined(SIZE_INT_64))
-#error integer size not established; define SIZE_INT_32 or SIZE_INT_64
-#endif
-
-#if (defined(SIZE_INT_32) && defined(SIZE_INT_64))
-#error multiple integer size definitions; define SIZE_INT_32 or SIZE_INT_64
-#endif
-
-#if !(defined(SIZE_LONG_INT_32) || defined(SIZE_LONG_INT_64))
-#error long int size not established; define SIZE_LONG_INT_32 or SIZE_LONG_INT_64
-#endif
+extern long lround(double);
+extern long lroundf(float);
+extern long lroundl(long double);
 
-#if (defined(SIZE_LONG_INT_32) && defined(SIZE_LONG_INT_64))
-#error multiple long int size definitions; define SIZE_LONG_INT_32 or SIZE_LONG_INT_64
+#if !(defined(SIZE_INT_32) || defined(SIZE_INT_64))
+    #error integer size not established; define SIZE_INT_32 or SIZE_INT_64
 #endif
 
-#if !(defined(SIZE_LONG_LONG_INT_32) || defined(SIZE_LONG_LONG_INT_64))
-#error long long int size not established; define SIZE_LONG_LONG_INT_32 or SIZE_LONG_LONG_INT_64
-#endif
+struct fp64 { /*/ sign:1 exponent:11 significand:52 (implied leading 1)*/
+  unsigned lo_significand:32;
+  unsigned hi_significand:20;
+  unsigned exponent:11;
+  unsigned sign:1;
+};
 
-#if (defined(SIZE_LONG_LONG_INT_32) && defined(SIZE_LONG_LONG_INT_64))
-#error multiple long long int size definitions; define SIZE_LONG_LONG_INT_32 or SIZE_LONG_LONG_INT_64
-#endif
+#define HI_SIGNIFICAND_LESS(X, HI) ((X)->hi_significand < 0x ## HI)
+#define f64abs(x) ((x) < 0.0 ? -(x) : (x))
 
 typedef enum
 {
@@ -162,14 +148,14 @@ typedef enum
   powl_neg_to_non_integer,                       /* 22     */
   powl_nan_to_zero,                              /* 23     */
   pow_overflow,  pow_underflow,                  /* 24, 25 */
-  pow_zero_to_zero,                              /* 26     */ 
+  pow_zero_to_zero,                              /* 26     */
   pow_zero_to_negative,                          /* 27     */
   pow_neg_to_non_integer,                        /* 28     */
   pow_nan_to_zero,                               /* 29     */
   powf_overflow, powf_underflow,                 /* 30, 31 */
   powf_zero_to_zero,                             /* 32     */
-  powf_zero_to_negative,                         /* 33     */ 
-  powf_neg_to_non_integer,                       /* 34     */ 
+  powf_zero_to_negative,                         /* 33     */
+  powf_neg_to_non_integer,                       /* 34     */
   powf_nan_to_zero,                              /* 35     */
   atan2l_zero,                                   /* 36     */
   atan2_zero,                                    /* 37     */
@@ -195,13 +181,13 @@ typedef enum
   y0l_zero, y0l_negative,y0l_gt_loss,            /* 66, 67, 68 */
   y0_zero, y0_negative,y0_gt_loss,               /* 69, 70, 71 */
   y0f_zero, y0f_negative,y0f_gt_loss,            /* 72, 73, 74 */
-  y1l_zero, y1l_negative,y1l_gt_loss,            /* 75, 76, 77 */ 
-  y1_zero, y1_negative,y1_gt_loss,               /* 78, 79, 80 */ 
-  y1f_zero, y1f_negative,y1f_gt_loss,            /* 81, 82, 83 */ 
+  y1l_zero, y1l_negative,y1l_gt_loss,            /* 75, 76, 77 */
+  y1_zero, y1_negative,y1_gt_loss,               /* 78, 79, 80 */
+  y1f_zero, y1f_negative,y1f_gt_loss,            /* 81, 82, 83 */
   ynl_zero, ynl_negative,ynl_gt_loss,            /* 84, 85, 86 */
   yn_zero, yn_negative,yn_gt_loss,               /* 87, 88, 89 */
   ynf_zero, ynf_negative,ynf_gt_loss,            /* 90, 91, 92 */
-  j0l_gt_loss,                                   /* 93 */ 
+  j0l_gt_loss,                                   /* 93 */
   j0_gt_loss,                                    /* 94 */
   j0f_gt_loss,                                   /* 95 */
   j1l_gt_loss,                                   /* 96 */
@@ -215,7 +201,7 @@ typedef enum
   lgammaf_overflow, lgammaf_negative, lgammaf_reserve,/* 108, 109, 110 */
   gammal_overflow,gammal_negative, gammal_reserve,    /* 111, 112, 113 */
   gamma_overflow, gamma_negative, gamma_reserve,      /* 114, 115, 116 */
-  gammaf_overflow,gammaf_negative,gammaf_reserve,     /* 117, 118, 119 */   
+  gammaf_overflow,gammaf_negative,gammaf_reserve,     /* 117, 118, 119 */
   fmodl_by_zero,                                 /* 120 */
   fmod_by_zero,                                  /* 121 */
   fmodf_by_zero,                                 /* 122 */
@@ -236,7 +222,7 @@ typedef enum
   ldexp_overflow,    ldexp_underflow,            /* 146, 147 */
   ldexpf_overflow,   ldexpf_underflow,           /* 148, 149 */
   logbl_zero,   logb_zero, logbf_zero,           /* 150, 151, 152 */
-  nextafterl_overflow,   nextafter_overflow,  
+  nextafterl_overflow,   nextafter_overflow,
   nextafterf_overflow,                           /* 153, 154, 155 */
   ilogbl_zero,  ilogb_zero, ilogbf_zero,         /* 156, 157, 158 */
   exp2l_overflow, exp2l_underflow,               /* 159, 160 */
@@ -249,406 +235,18 @@ typedef enum
   log2f_zero,    log2f_negative,                 /* 172, 173 */
   scalbnl_overflow, scalbnl_underflow,           /* 174, 175 */
   scalbn_overflow,  scalbn_underflow,            /* 176, 177 */
-  scalbnf_overflow, scalbnf_underflow,           /* 178, 179 */
-  remquol_by_zero,                               /* 180 */
-  remquo_by_zero,                                /* 181 */
-  remquof_by_zero,                               /* 182 */
-  lrintl_large, lrint_large, lrintf_large,       /* 183, 184, 185 */
-  llrintl_large, llrint_large, llrintf_large,    /* 186, 187, 188 */
-  lroundl_large, lround_large, lroundf_large,    /* 189, 190, 191 */
-  llroundl_large, llround_large, llroundf_large, /* 192, 193, 194 */
-  fdiml_overflow, fdim_overflow, fdimf_overflow, /* 195, 196, 197 */
-  nexttowardl_overflow,   nexttoward_overflow,   
-  nexttowardf_overflow,                          /* 198, 199, 200 */
-  scalblnl_overflow, scalblnl_underflow,         /* 201, 202 */
-  scalbln_overflow,  scalbln_underflow,          /* 203, 204 */
-  scalblnf_overflow, scalblnf_underflow,         /* 205, 206 */
-  erfcl_underflow, erfc_underflow, erfcf_underflow, /* 207, 208, 209 */
-  acosdl_gt_one, acosd_gt_one, acosdf_gt_one,    /* 210, 211, 212 */
-  asindl_gt_one, asind_gt_one, asindf_gt_one,    /* 213, 214, 215 */
-  atan2dl_zero, atan2d_zero, atan2df_zero,       /* 216, 217, 218 */
-  tandl_overflow, tand_overflow, tandf_overflow, /* 219, 220, 221 */
-  cotdl_overflow, cotd_overflow, cotdf_overflow, /* 222, 223, 224 */
-  cotl_overflow, cot_overflow, cotf_overflow,    /* 225, 226, 227 */
-  sinhcoshl_overflow, sinhcosh_overflow, sinhcoshf_overflow, /* 228, 229, 230 */
-  annuityl_by_zero, annuity_by_zero, annuityf_by_zero, /* 231, 232, 233 */
-  annuityl_less_m1, annuity_less_m1, annuityf_less_m1, /* 234, 235, 236 */
-  annuityl_overflow, annuity_overflow, annuityf_overflow, /* 237, 238, 239 */
-  annuityl_underflow, annuity_underflow, annuityf_underflow, /* 240, 241, 242 */
-  compoundl_by_zero, compound_by_zero, compoundf_by_zero, /* 243, 244, 245 */
-  compoundl_less_m1, compound_less_m1, compoundf_less_m1, /* 246, 247, 248 */
-  compoundl_overflow, compound_overflow, compoundf_overflow, /* 249, 250, 251 */
-  compoundl_underflow, compound_underflow, compoundf_underflow, /* 252, 253, 254 */
-  tgammal_overflow, tgammal_negative, tgammal_reserve, /* 255, 256, 257 */
-  tgamma_overflow, tgamma_negative, tgamma_reserve, /* 258, 259, 260 */
-  tgammaf_overflow, tgammaf_negative, tgammaf_reserve, /* 261, 262, 263 */
+  scalbnf_overflow, scalbnf_underflow            /* 178, 179 */
 } error_types;
 
 void __libm_error_support(void*,void*,void*,error_types);
-#ifdef _LIBC
 libc_hidden_proto(__libm_error_support)
-#endif
-
-#define HI_SIGNIFICAND_LESS(X, HI) ((X)->hi_significand < 0x ## HI)
-#define f64abs(x) ((x) < 0.0 ? -(x) : (x))
-
-#if !defined(__USE_EXTERNAL_FPMEMTYP_H__)
-
-#define BIAS_32  0x007F
-#define BIAS_64  0x03FF
-#define BIAS_80  0x3FFF
-
-#define MAXEXP_32  0x00FE
-#define MAXEXP_64  0x07FE
-#define MAXEXP_80  0x7FFE
-
-#define EXPINF_32  0x00FF
-#define EXPINF_64  0x07FF
-#define EXPINF_80  0x7FFF
-
-struct fp32 { /*// sign:1 exponent:8 significand:23 (implied leading 1)*/
-#if defined(SIZE_INT_32)
-    unsigned significand:23;
-    unsigned exponent:8;
-    unsigned sign:1;
-#elif defined(SIZE_INT_64)
-    unsigned significand:23;
-    unsigned exponent:8;
-    unsigned sign:1;
-#endif
-};
-
-struct fp64 { /*/ sign:1 exponent:11 significand:52 (implied leading 1)*/
-#if defined(SIZE_INT_32)
-    unsigned lo_significand:32;
-    unsigned hi_significand:20;
-    unsigned exponent:11;
-    unsigned sign:1;
-#elif defined(SIZE_INT_64)
-    unsigned significand:52;
-    unsigned exponent:11;
-    unsigned sign:1;
-#endif
-};
-
-struct fp80 { /*/ sign:1 exponent:15 significand:64 (NO implied bits) */
-#if defined(SIZE_INT_32)
-    unsigned         lo_significand;
-    unsigned         hi_significand;
-    unsigned         exponent:15;
-    unsigned         sign:1;
-#elif defined(SIZE_INT_64)
-    unsigned         significand;
-    unsigned         exponent:15;
-    unsigned         sign:1;
-#endif
-};
-
-#endif /*__USE_EXTERNAL_FPMEMTYP_H__*/
-
-/* macros to form a double value in hex representation (unsigned int type) */
-
-#define DOUBLE_HEX(hi,lo) 0x##lo,0x##hi /*LITTLE_ENDIAN*/
-
-/* macros to form a long double value in hex representation (unsigned short type) */
-
-#if defined(_WIN32) || defined(_WIN64)
-#define LDOUBLE_ALIGN 16
-#else
-#define LDOUBLE_ALIGN 12
-#endif
-
-#if (LDOUBLE_ALIGN == 16)
-#define _XPD_ ,0x0000,0x0000,0x0000
-#else /*12*/
-#define _XPD_ ,0x0000
-#endif
-
-#define LDOUBLE_HEX(w4,w3,w2,w1,w0) 0x##w0,0x##w1,0x##w2,0x##w3,0x##w4 _XPD_ /*LITTLE_ENDIAN*/
-
-/* macros to sign-expand low 'num' bits of 'val' to native integer */
 
-#if defined(SIZE_INT_32)
-# define SIGN_EXPAND(val,num)  ((int)(val) << (32-(num))) >> (32-(num)) /* sign expand of 'num' LSBs */
-#elif defined(SIZE_INT_64)
-# define SIGN_EXPAND(val,num)  ((int)(val) << (64-(num))) >> (64-(num)) /* sign expand of 'num' LSBs */
-#endif
-
-/* macros to form pointers to FP number on-the-fly */
-
-#define FP32(f)  ((struct fp32 *)&f)
-#define FP64(d)  ((struct fp64 *)&d)
-#define FP80(ld) ((struct fp80 *)&ld)
-
-/* macros to extract signed low and high doubleword of long double */
-
-#if defined(SIZE_INT_32)
-# define HI_DWORD_80(ld) ((((FP80(ld)->sign << 15) | FP80(ld)->exponent) << 16) | \
-                          ((FP80(ld)->hi_significand >> 16) & 0xFFFF))
-# define LO_DWORD_80(ld) SIGN_EXPAND(FP80(ld)->lo_significand, 32)
-#elif defined(SIZE_INT_64)
-# define HI_DWORD_80(ld) ((((FP80(ld)->sign << 15) | FP80(ld)->exponent) << 16) | \
-                          ((FP80(ld)->significand >> 48) & 0xFFFF))
-# define LO_DWORD_80(ld) SIGN_EXPAND(FP80(ld)->significand, 32)
-#endif
-
-/* macros to extract hi bits of significand.
- * note that explicit high bit do not count (returns as is)
- */
-
-#if defined(SIZE_INT_32)
-# define HI_SIGNIFICAND_80(X,NBITS) ((X)->hi_significand >> (31 - (NBITS)))
-#elif defined(SIZE_INT_64)
-# define HI_SIGNIFICAND_80(X,NBITS) ((X)->significand >> (63 - (NBITS)))
-#endif
-
-/* macros to check, whether a significand bits are all zero, or some of them are non-zero.
- * note that SIGNIFICAND_ZERO_80 tests high bit also, but SIGNIFICAND_NONZERO_80 does not
- */
-
-#define SIGNIFICAND_ZERO_32(X)     ((X)->significand == 0)
-#define SIGNIFICAND_NONZERO_32(X)  ((X)->significand != 0)
+#define BIAS_64  1023
+#define EXPINF_64  2047
 
-#if defined(SIZE_INT_32)
-# define SIGNIFICAND_ZERO_64(X)    (((X)->hi_significand == 0) && ((X)->lo_significand == 0))
-# define SIGNIFICAND_NONZERO_64(X) (((X)->hi_significand != 0) || ((X)->lo_significand != 0))
-#elif defined(SIZE_INT_64)
-# define SIGNIFICAND_ZERO_64(X)    ((X)->significand == 0)
-# define SIGNIFICAND_NONZERO_64(X) ((X)->significand != 0)
-#endif
-
-#if defined(SIZE_INT_32)
-# define SIGNIFICAND_ZERO_80(X)    (((X)->hi_significand == 0x00000000) && ((X)->lo_significand == 0))
-# define SIGNIFICAND_NONZERO_80(X) (((X)->hi_significand != 0x80000000) || ((X)->lo_significand != 0))
-#elif defined(SIZE_INT_64)
-# define SIGNIFICAND_ZERO_80(X)    ((X)->significand == 0x0000000000000000)
-# define SIGNIFICAND_NONZERO_80(X) ((X)->significand != 0x8000000000000000)
-#endif
-
-/* macros to compare long double with constant value, represented as hex */
-
-#define SIGNIFICAND_EQ_HEX_32(X,BITS) ((X)->significand == 0x ## BITS)
-#define SIGNIFICAND_GT_HEX_32(X,BITS) ((X)->significand >  0x ## BITS)
-#define SIGNIFICAND_GE_HEX_32(X,BITS) ((X)->significand >= 0x ## BITS)
-#define SIGNIFICAND_LT_HEX_32(X,BITS) ((X)->significand <  0x ## BITS)
-#define SIGNIFICAND_LE_HEX_32(X,BITS) ((X)->significand <= 0x ## BITS)
-
-#if defined(SIZE_INT_32)
-# define SIGNIFICAND_EQ_HEX_64(X,HI,LO) \
-    (((X)->hi_significand == 0x ## HI) && ((X)->lo_significand == 0x ## LO))
-# define SIGNIFICAND_GT_HEX_64(X,HI,LO) (((X)->hi_significand > 0x ## HI) || \
-    (((X)->hi_significand == 0x ## HI) && ((X)->lo_significand >  0x ## LO)))
-# define SIGNIFICAND_GE_HEX_64(X,HI,LO) (((X)->hi_significand > 0x ## HI) || \
-    (((X)->hi_significand == 0x ## HI) && ((X)->lo_significand >= 0x ## LO)))
-# define SIGNIFICAND_LT_HEX_64(X,HI,LO) (((X)->hi_significand < 0x ## HI) || \
-    (((X)->hi_significand == 0x ## HI) && ((X)->lo_significand <  0x ## LO)))
-# define SIGNIFICAND_LE_HEX_64(X,HI,LO) (((X)->hi_significand < 0x ## HI) || \
-    (((X)->hi_significand == 0x ## HI) && ((X)->lo_significand <= 0x ## LO)))
-#elif defined(SIZE_INT_64)
-# define SIGNIFICAND_EQ_HEX_64(X,HI,LO) ((X)->significand == 0x ## HI ## LO)
-# define SIGNIFICAND_GT_HEX_64(X,HI,LO) ((X)->significand >  0x ## HI ## LO)
-# define SIGNIFICAND_GE_HEX_64(X,HI,LO) ((X)->significand >= 0x ## HI ## LO)
-# define SIGNIFICAND_LT_HEX_64(X,HI,LO) ((X)->significand <  0x ## HI ## LO)
-# define SIGNIFICAND_LE_HEX_64(X,HI,LO) ((X)->significand <= 0x ## HI ## LO)
-#endif
-	
-#if defined(SIZE_INT_32)
-# define SIGNIFICAND_EQ_HEX_80(X,HI,LO) \
-    (((X)->hi_significand == 0x ## HI) && ((X)->lo_significand == 0x ## LO))
-# define SIGNIFICAND_GT_HEX_80(X,HI,LO) (((X)->hi_significand > 0x ## HI) || \
-    (((X)->hi_significand == 0x ## HI) && ((X)->lo_significand >  0x ## LO)))
-# define SIGNIFICAND_GE_HEX_80(X,HI,LO) (((X)->hi_significand > 0x ## HI) || \
-    (((X)->hi_significand == 0x ## HI) && ((X)->lo_significand >= 0x ## LO)))
-# define SIGNIFICAND_LT_HEX_80(X,HI,LO) (((X)->hi_significand < 0x ## HI) || \
-    (((X)->hi_significand == 0x ## HI) && ((X)->lo_significand <  0x ## LO)))
-# define SIGNIFICAND_LE_HEX_80(X,HI,LO) (((X)->hi_significand < 0x ## HI) || \
-    (((X)->hi_significand == 0x ## HI) && ((X)->lo_significand <= 0x ## LO)))
-#elif defined(SIZE_INT_64)
-# define SIGNIFICAND_EQ_HEX_80(X,HI,LO) ((X)->significand == 0x ## HI ## LO)
-# define SIGNIFICAND_GT_HEX_80(X,HI,LO) ((X)->significand >  0x ## HI ## LO)
-# define SIGNIFICAND_GE_HEX_80(X,HI,LO) ((X)->significand >= 0x ## HI ## LO)
-# define SIGNIFICAND_LT_HEX_80(X,HI,LO) ((X)->significand <  0x ## HI ## LO)
-# define SIGNIFICAND_LE_HEX_80(X,HI,LO) ((X)->significand <= 0x ## HI ## LO)
-#endif
-
-#define VALUE_EQ_HEX_32(X,EXP,BITS) \
-   (((X)->exponent == (EXP)) && (SIGNIFICAND_EQ_HEX_32(X, BITS)))
-#define VALUE_GT_HEX_32(X,EXP,BITS) (((X)->exponent > (EXP)) || \
-   (((X)->exponent == (EXP)) && (SIGNIFICAND_GT_HEX_32(X, BITS))))
-#define VALUE_GE_HEX_32(X,EXP,BITS) (((X)->exponent > (EXP)) || \
-   (((X)->exponent == (EXP)) && (SIGNIFICAND_GE_HEX_32(X, BITS))))
-#define VALUE_LT_HEX_32(X,EXP,BITS) (((X)->exponent < (EXP)) || \
-   (((X)->exponent == (EXP)) && (SIGNIFICAND_LT_HEX_32(X, BITS))))
-#define VALUE_LE_HEX_32(X,EXP,BITS) (((X)->exponent < (EXP)) || \
-   (((X)->exponent == (EXP)) && (SIGNIFICAND_LE_HEX_32(X, BITS))))
-
-#define VALUE_EQ_HEX_64(X,EXP,HI,LO) \
-   (((X)->exponent == (EXP)) && (SIGNIFICAND_EQ_HEX_64(X, HI, LO)))
-#define VALUE_GT_HEX_64(X,EXP,HI,LO) (((X)->exponent > (EXP)) || \
-   (((X)->exponent == (EXP)) && (SIGNIFICAND_GT_HEX_64(X, HI, LO))))
-#define VALUE_GE_HEX_64(X,EXP,HI,LO) (((X)->exponent > (EXP)) || \
-   (((X)->exponent == (EXP)) && (SIGNIFICAND_GE_HEX_64(X, HI, LO))))
-#define VALUE_LT_HEX_64(X,EXP,HI,LO) (((X)->exponent < (EXP)) || \
-   (((X)->exponent == (EXP)) && (SIGNIFICAND_LT_HEX_64(X, HI, LO))))
-#define VALUE_LE_HEX_64(X,EXP,HI,LO) (((X)->exponent < (EXP)) || \
-   (((X)->exponent == (EXP)) && (SIGNIFICAND_LE_HEX_64(X, HI, LO))))
-
-#define VALUE_EQ_HEX_80(X,EXP,HI,LO) \
-   (((X)->exponent == (EXP)) && (SIGNIFICAND_EQ_HEX_80(X, HI, LO)))
-#define VALUE_GT_HEX_80(X,EXP,HI,LO) (((X)->exponent > (EXP)) || \
-   (((X)->exponent == (EXP)) && (SIGNIFICAND_GT_HEX_80(X, HI, LO))))
-#define VALUE_GE_HEX_80(X,EXP,HI,LO) (((X)->exponent > (EXP)) || \
-   (((X)->exponent == (EXP)) && (SIGNIFICAND_GE_HEX_80(X, HI, LO))))
-#define VALUE_LT_HEX_80(X,EXP,HI,LO) (((X)->exponent < (EXP)) || \
-   (((X)->exponent == (EXP)) && (SIGNIFICAND_LT_HEX_80(X, HI, LO))))
-#define VALUE_LE_HEX_80(X,EXP,HI,LO) (((X)->exponent < (EXP)) || \
-   (((X)->exponent == (EXP)) && (SIGNIFICAND_LE_HEX_80(X, HI, LO))))
-
-/* macros to compare two long doubles */
-
-#define SIGNIFICAND_EQ_32(X,Y) ((X)->significand == (Y)->significand)
-#define SIGNIFICAND_GT_32(X,Y) ((X)->significand > (Y)->significand)
-#define SIGNIFICAND_GE_32(X,Y) ((X)->significand >= (Y)->significand)
-#define SIGNIFICAND_LT_32(X,Y) ((X)->significand < (Y)->significand)
-#define SIGNIFICAND_LE_32(X,Y) ((X)->significand <= (Y)->significand)
-
-#if defined(SIZE_INT_32)
-# define SIGNIFICAND_EQ_64(X,Y) \
-	(((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand == (Y)->lo_significand))
-# define SIGNIFICAND_GT_64(X,Y) (((X)->hi_significand > (Y)->hi_significand) || \
-	(((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand >  (Y)->lo_significand)))
-# define SIGNIFICAND_GE_64(X,Y) (((X)->hi_significand > (Y)->hi_significand) || \
-	(((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand >= (Y)->lo_significand)))
-# define SIGNIFICAND_LT_64(X,Y) (((X)->hi_significand < (Y)->hi_significand) || \
-    (((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand <  (Y)->lo_significand)))
-# define SIGNIFICAND_LE_64(X,Y) (((X)->hi_significand < (Y)->hi_significand) || \
-    (((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand <= (Y)->lo_significand)))
-#elif defined(SIZE_INT_64)
-# define SIGNIFICAND_EQ_64(X,Y) ((X)->significand == (Y)->significand)
-# define SIGNIFICAND_GT_64(X,Y) ((X)->significand >  (Y)->significand)
-# define SIGNIFICAND_GE_64(X,Y) ((X)->significand >= (Y)->significand)
-# define SIGNIFICAND_LT_64(X,Y) ((X)->significand <  (Y)->significand)
-# define SIGNIFICAND_LE_64(X,Y) ((X)->significand <= (Y)->significand)
-#endif
-
-#if defined(SIZE_INT_32)
-# define SIGNIFICAND_EQ_80(X,Y) \
-    (((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand == (Y)->lo_significand))
-# define SIGNIFICAND_GT_80(X,Y) (((X)->hi_significand > (Y)->hi_significand) || \
-    (((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand >  (Y)->lo_significand)))
-# define SIGNIFICAND_GE_80(X,Y) (((X)->hi_significand > (Y)->hi_significand) || \
-    (((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand >= (Y)->lo_significand)))
-# define SIGNIFICAND_LT_80(X,Y) (((X)->hi_significand < (Y)->hi_significand) || \
-    (((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand <  (Y)->lo_significand)))
-# define SIGNIFICAND_LE_80(X,Y) (((X)->hi_significand < (Y)->hi_significand) || \
-    (((X)->hi_significand == (Y)->hi_significand) && ((X)->lo_significand <= (Y)->lo_significand)))
-#elif defined(SIZE_INT_64)
-# define SIGNIFICAND_EQ_80(X,Y) ((X)->significand == (Y)->significand)
-# define SIGNIFICAND_GT_80(X,Y) ((X)->significand >  (Y)->significand)
-# define SIGNIFICAND_GE_80(X,Y) ((X)->significand >= (Y)->significand)
-# define SIGNIFICAND_LT_80(X,Y) ((X)->significand <  (Y)->significand)
-# define SIGNIFICAND_LE_80(X,Y) ((X)->significand <= (Y)->significand)
-#endif
-
-#define VALUE_EQ_32(X,Y) \
-   (((X)->exponent == (Y)->exponent) && (SIGNIFICAND_EQ_32(X, Y)))
-#define VALUE_GT_32(X,Y) (((X)->exponent > (Y)->exponent) || \
-   (((X)->exponent == (Y)->exponent) && (SIGNIFICAND_GT_32(X, Y))))
-#define VALUE_GE_32(X,Y) (((X)->exponent > (Y)->exponent) || \
-   (((X)->exponent == (Y)->exponent) && (SIGNIFICAND_GE_32(X, Y))))
-#define VALUE_LT_32(X,Y) (((X)->exponent < (Y)->exponent) || \
-   (((X)->exponent == (Y)->exponent) && (SIGNIFICAND_LT_32(X, Y))))
-#define VALUE_LE_32(X,Y) (((X)->exponent < (Y)->exponent) || \
-   (((X)->exponent == (Y)->exponent) && (SIGNIFICAND_LE_32(X, Y))))
-   
-#define VALUE_EQ_64(X,Y) \
-   (((X)->exponent == (Y)->exponent) && (SIGNIFICAND_EQ_64(X, Y)))
-#define VALUE_GT_64(X,Y) (((X)->exponent > (Y)->exponent) || \
-   (((X)->exponent == (Y)->exponent) && (SIGNIFICAND_GT_64(X, Y))))
-#define VALUE_GE_64(X,Y) (((X)->exponent > (Y)->exponent) || \
-   (((X)->exponent == (Y)->exponent) && (SIGNIFICAND_GE_64(X, Y))))
-#define VALUE_LT_64(X,Y) (((X)->exponent < (Y)->exponent) || \
-   (((X)->exponent == (Y)->exponent) && (SIGNIFICAND_LT_64(X, Y))))
-#define VALUE_LE_64(X,Y) (((X)->exponent < (Y)->exponent) || \
-   (((X)->exponent == (Y)->exponent) && (SIGNIFICAND_LE_64(X, Y))))
-   
-#define VALUE_EQ_80(X,Y) \
-   (((X)->exponent == (Y)->exponent) && (SIGNIFICAND_EQ_80(X, Y)))
-#define VALUE_GT_80(X,Y) (((X)->exponent > (Y)->exponent) || \
-   (((X)->exponent == (Y)->exponent) && (SIGNIFICAND_GT_80(X, Y))))
-#define VALUE_GE_80(X,Y) (((X)->exponent > (Y)->exponent) || \
-   (((X)->exponent == (Y)->exponent) && (SIGNIFICAND_GE_80(X, Y))))
-#define VALUE_LT_80(X,Y) (((X)->exponent < (Y)->exponent) || \
-   (((X)->exponent == (Y)->exponent) && (SIGNIFICAND_LT_80(X, Y))))
-#define VALUE_LE_80(X,Y) (((X)->exponent < (Y)->exponent) || \
-   (((X)->exponent == (Y)->exponent) && (SIGNIFICAND_LE_80(X, Y))))
-
-/* add/subtract 1 ulp macros */
-
-#if defined(SIZE_INT_32)
-# define ADD_ULP_80(X) \
-    if ((++(X)->lo_significand == 0) && \
-        (++(X)->hi_significand == (((X)->exponent == 0) ? 0x80000000 : 0))) \
-    { \
-        (X)->hi_significand |= 0x80000000; \
-        ++(X)->exponent; \
-    }
-# define SUB_ULP_80(X) \
-    if (--(X)->lo_significand == 0xFFFFFFFF) { \
-        --(X)->hi_significand; \
-        if (((X)->exponent != 0) && \
-            ((X)->hi_significand == 0x7FFFFFFF) && \
-            (--(X)->exponent != 0)) \
-        { \
-            (X)->hi_significand |= 0x80000000; \
-        } \
-    }
-#elif defined(SIZE_INT_64)
-# define ADD_ULP_80(X) \
-    if (++(X)->significand == (((X)->exponent == 0) ? 0x8000000000000000 : 0))) { \
-        (X)->significand |= 0x8000000000000000; \
-        ++(X)->exponent; \
-    }
-# define SUB_ULP_80(X) \
-    { \
-        --(X)->significand; \
-        if (((X)->exponent != 0) && \
-            ((X)->significand == 0x7FFFFFFFFFFFFFFF) && \
-            (--(X)->exponent != 0)) \
-        { \
-            (X)->significand |= 0x8000000000000000; \
-        } \
-    }
-#endif
-
-
-
-#if (defined(_WIN32) && !defined(_WIN64))
-
-#define FP80_DECLARE()
-#define _FPC_64    0x0300
-static unsigned short __wControlWord, __wNewControlWord;
-#define FP80_SET() { \
-        __asm { fnstcw   word ptr [__wControlWord] }   \
-        __wNewControlWord = __wControlWord | _FPC_64;  \
-        __asm { fldcw   word ptr [__wNewControlWord] } \
-    }
-#define FP80_RESET() { \
-        __asm { fldcw   word ptr [__wControlWord] } \
-    }
-#else /* defined(_WIN32) && !defined(_WIN64) */
-
-#define FP80_DECLARE()
-#define FP80_SET()
-#define FP80_RESET()
-
-#endif  /* defined(_WIN32) && !defined(_WIN64) */
-
-
-#ifdef _LIBC
-# include <math.h>
-#else
+#define DOUBLE_HEX(HI, LO) 0x ## LO, 0x ## HI
 
+#if 0
 static const unsigned INF[] = {
     DOUBLE_HEX(7ff00000, 00000000),
     DOUBLE_HEX(fff00000, 00000000)
@@ -657,12 +255,12 @@ static const unsigned INF[] = {
 static const double _zeroo = 0.0;
 static const double _bigg = 1.0e300;
 static const double _ponee = 1.0;
-static const double _nonee = -1.0; 
+static const double _nonee = -1.0;
 
 #define INVALID    (_zeroo * *((double*)&INF[0]))
-#define PINF       *((double*)&INF[0]) 
-#define NINF       -PINF 
-#define PINF_DZ    (_ponee/_zeroo) 
+#define PINF       *((double*)&INF[0])
+#define NINF       -PINF
+#define PINF_DZ    (_ponee/_zeroo)
 #define X_TLOSS    1.41484755040568800000e+16
 #endif
 
@@ -680,7 +278,7 @@ struct __exception
   char *name;
   double arg1, arg2, retval;
 };
-# else 
+# else
 
 #  ifndef _LIBC
 struct exception
@@ -702,18 +300,18 @@ struct exceptionl
 };
 
 #ifdef _MS_
-#define	MATHERR_F	_matherrf
-#define	MATHERR_D	_matherr
+#define        MATHERR_F       _matherrf
+#define        MATHERR_D       _matherr
 #else
-#define	MATHERR_F	matherrf
-#define	MATHERR_D	matherr
+#define        MATHERR_F       matherrf
+#define        MATHERR_D       matherr
 #endif
 
 # ifdef __cplusplus
-#define	EXC_DECL_D	__exception
+#define        EXC_DECL_D      __exception
 #else
 // exception is a reserved name in C++
-#define	EXC_DECL_D	exception
+#define        EXC_DECL_D      exception
 #endif
 
 extern int MATHERR_F(struct exceptionf*);
@@ -726,7 +324,7 @@ extern int matherrl(struct exceptionl*);
 #define ERRNO_DOMAIN errno = EDOM
 
 
-// Add code to support _LIB_VERSIONIMF
+// Add code to support _LIB_VERSION
 #ifndef _LIBC
 typedef enum
 {
@@ -737,19 +335,29 @@ typedef enum
     _ISOC_       // ISO C9X
 } _LIB_VERSION_TYPE;
 
+extern _LIB_VERSION_TYPE _LIB_VERSION;
+#endif
 
-#if !defined( LIBM_BUILD )
-#if defined( _DLL )
-extern _LIB_VERSION_TYPE __declspec(dllimport) _LIB_VERSIONIMF;
-#else
-extern _LIB_VERSION_TYPE _LIB_VERSIONIMF;
-#endif	/* _DLL */
-#else
-extern int (*pmatherrf)(struct exceptionf*);
-extern int (*pmatherr)(struct EXC_DECL_D*);
-extern int (*pmatherrl)(struct exceptionl*);
-#endif	/* LIBM_BUILD */
-
-// This is a run-time variable and may affect
+// This is a run-time variable and may effect
 // floating point behavior of the libm functions
+
+#elif defined _LIBC
+
+# if !defined NOT_IN_libc && defined SHARED && defined DO_VERSIONING \
+     && !defined HAVE_BROKEN_ALIAS_ATTRIBUTE && !defined NO_HIDDEN
+#  define __libm_error_support  __GI___libm_error_support
+# endif
+
+#endif	/* __ASSEMBLER__ */
+
+/* Support for compatible assembler handling.  */
+#if !defined L && defined _LIBC
+#define L(name) .L##name
+#endif
+#ifdef __ELF__
+#define ASM_SIZE_DIRECTIVE(name) .size name,.-name
+#define ASM_TYPE_DIRECTIVE(name,T) .type name,T
+#else
+#define ASM_SIZE_DIRECTIVE(name)
+#define ASM_TYPE_DIRECTIVE(name,T)
 #endif
diff --git a/sysdeps/ia64/fpu/s_atan.S b/sysdeps/ia64/fpu/s_atan.S
index 720ecad28a..c0daabd3d7 100644
--- a/sysdeps/ia64/fpu/s_atan.S
+++ b/sysdeps/ia64/fpu/s_atan.S
@@ -1,10 +1,10 @@
 .file "atan.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,734 +20,947 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
 // Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
 //
 // History
 //==============================================================
-// 02/02/00  Initial version
-// 04/13/00  Improved speed
-// 04/19/00  Removed the qualifying predicate from the fmerge.s that
-//           takes the absolute value.
-// 06/16/00  Reassigned FP registers to eliminate stalls on loads
-// 08/30/00  Saved 5 cycles in main path by rearranging large argument logic
-//           and delaying use of result of fcmp in load by 1 group
-// 05/20/02  Cleaned up namespace and sf0 syntax
-// 08/20/02  Use atan2 algorithm with x=1 for better accuracy
-// 02/06/03  Reordered header: .section, .global, .proc, .align
+// 2/02/00: Initial version
+// 4/13/00: Improved speed
+// 4/19/00: Removed the qualifying predicate from the fmerge.s that
+//          takes the absolute value.
+// 6/16/00: Reassigned FP registers to eliminate stalls on loads
+// 8/30/00: Saved 5 cycles in main path by rearranging large argument logic
+//          and delaying use of result of fcmp in load by 1 group
 //
 // API
 //==============================================================
-// double atan(double Y)
+// double atan( double x);
 //
 // Overview of operation
 //==============================================================
+// atan(x) = sign(X)pi/2 - atan(1/x)
 //
-// The atan function returns values in the interval [-pi/2,+pi/2].
-//
-// The algorithm used is the atan2(Y,X) algorithm where we fix X=1.0.
-//
-// There are two basic paths: swap true and swap false.
-// atan2(Y,X) ==> atan2(V/U) where U >= V. If Y > X, we must swap.
-//
-// p6  swap True    |Y| > |X|
-// p7  swap False   |Y| <= |X|
-//
-//
-// Simple trigonometric identities show
-//   Region 1
-//         |Y|<=1.0, V=Y, U=1.0     atan2(Y,X) = sgnY * (0 + atan(V/U))
+// We have two paths: |x| > 1 and |x| <= 1
 //
-//   Region 2
-//         |Y|>1.0, V=1.0, U=Y      atan2(Y,X) = sgnY * (pi/2 - atan(V/U))
+// |x| >  1
+// ==========================================
 //
+// c  = frcpa(x) which is approximately 1/x
 //
-// We compute atan(V/U) from the identity
-//      atan(z) + atan([(V/U)-z] / [1+(V/U)z])
-//      where z is a limited precision approximation (16 bits) to V/U
+// xc = 1- B
+// B  = 1-xc
 //
-// z is calculated with the assistance of the frcpa instruction.
+// Approximate 1/(1-B)^k by a polynomial in B, poly(B)
+// k is 45.
 //
-// atan(z) is calculated by a polynomial z + z^3 * p(w),  w=z^2
-// where p(w) = P0+P1*w+...+P22*w^22
+// poly(B)     = 1 + r1 B + r2 B^2 + ...+ r10 B^10
 //
-// Let d = [(V/U)-z] / [1+(V/U)z]) = (V-U*z)/(U+V*z)
-//
-// Approximate atan(d) by d + P0*d^3
-// Let F = 1/(U+V*z) * (1-a), where |a|< 2^-8.8.
-// Compute q(a) = 1 + a + ... + a^5.
-// Then F*q(a) approximates the reciprocal to more than 50 bits.
+// c^k         = (1-B)^k/x^k
+// c^k/(1-B)^k = 1/x^k 
+// c^k poly(B) = 1/x^k
+
+// poly(x)     = series(atan(1/x)) = 1/x - 1/3x^3 + 1/5x^5 - 1/7x^7 .... + 1/45 x^45
+//                                 = 1/x^45 ( x^44 - x^42/3 + x^40/5 - x^38/7 ... +1)
+//                                 = 1/x^45 ( y^22 - y^21/3 + y^20/5 - y^19/7 ... +1)
+//        
+//                                 = c^45 poly(B) poly(x)
+//                                 = c^45 r(B) q(y)
+
+// q(y) = q0 + q1 y + q2 y^2 + ... + q22 y^22
+// where q22 is 1.0
+
+// atan(x) = sign(X)pi/2 -  c^45 r(B) q(y)
+
+// |x| <=  1
+// ==========================================
+// poly(x)    = series(atan(x))   = x - x^3/3 + x^5/5 + .....
+// poly(x)    = series(atan(x))   = x  + x^3(- 1/3 + x^2/5 + ..... +x^47/47)
+// poly(x)    = series(atan(x))   = x  + x^3(p0 + x^2/5 + ..... + x^44/47)
+// poly(x)    = series(atan(x))   = x  + x^3(p0 + y/5 + ..... + y^22/47)
+   
+// where p0 is about -1/3.
 
-// Special values
+// atan(x) = poly(x)
+
+#include "libm_support.h"
+
+// Special Values
 //==============================================================
 // atan(QNAN)  = QNAN
 // atan(SNAN)  = quieted SNAN
-// atan(+-inf) = +- pi/2
+// atan(+-inf) = +- pi/2     
 // atan(+-0)   = +-0
 
+
+
 // Registers used
 //==============================================================
 
-// predicate registers used:
-// p6 -> p15
+// predicate registers used: 
+// p6 -> p11
 
-// floating-point registers used:
-// f8, input
-// f32 -> f116
+// floating-point registers used:  
+// f32 -> f127
 
 // general registers used
-// r14 -> r16
+// r32 -> r37
 
 // Assembly macros
 //==============================================================
+atan_Pi_by_2                  = f32
+atan_S_PI                     = f33
+atan_ABS_f8                   = f34
+
+atan_R0                       = f35
+atan_R1                       = f36
+atan_R2                       = f37
+atan_R3                       = f38 
+atan_R4                       = f39 
+atan_R5                       = f40 
+atan_R6                       = f41
+atan_R7                       = f42
+atan_R8                       = f43 
+atan_R9                       = f44 
+atan_R10                      = f45 
+
+atan_Q0                       = f46
+
+atan_Q1                       = f47 
+atan_Q2                       = f48
+atan_Q3                       = f49
+atan_Q4                       = f50
+atan_Q5                       = f51 
+atan_Q6                       = f52 
+atan_Q7                       = f53 
+atan_Q8                       = f54 
+atan_Q9                       = f55 
+atan_Q10                      = f56 
+
+atan_Q11                      = f57 
+atan_Q12                      = f58
+atan_Q13                      = f59
+atan_Q14                      = f60
+atan_Q15                      = f61 
+atan_Q16                      = f62 
+atan_Q17                      = f63 
+atan_Q18                      = f64 
+atan_Q19                      = f65 
+atan_Q20                      = f66 
+atan_Q21                      = f67 
+atan_Q22                      = f68 
+
+// P and Q constants are mutually exclusive 
+// so they can share macro definitions
+atan_P0                       = f46
+
+atan_P1                       = f47 
+atan_P2                       = f48
+atan_P3                       = f49
+atan_P4                       = f10
+atan_P5                       = f11 
+atan_P6                       = f12 
+atan_P7                       = f13 
+atan_P10                      = f103 
+
+atan_P11                      = f114 
+atan_P12                      = f58
+atan_P13                      = f59
+atan_P14                      = f60
+atan_P15                      = f61 
+atan_P16                      = f62 
+atan_P17                      = f63 
+atan_P18                      = f64 
+atan_P19                      = f65 
+atan_P20                      = f14 
+atan_P21                      = f99 
+atan_P22                      = f68 
+// end of P constant macros
+
+atan_C                        = f69
+atan_Y                        = f70
+atan_B                        = f71
+atan_Z                        = f72
+atan_V11                      = f73
+atan_V12                      = f74
+
+atan_V7                       = f75
+atan_V8                       = f76
+
+atan_W13                      = f77
+atan_W11                      = f78
+
+atan_V3                       = f79
+atan_V4                       = f80
+
+atan_G11                      = f81
+atan_G12                      = f82
+atan_G7                       = f83
+atan_G8                       = f84
+
+atan_Z1                       = f85
+atan_W7                       = f86
+
+atan_G3                       = f87
+atan_W8                       = f88
+atan_V9                       = f89
+atan_V10                      = f90
+
+atan_G10                      = f91
+atan_W3                       = f92
+atan_G4                       = f93
+atan_G9                       = f94
+                                
+atan_G6                       = f95
+atan_W4                       = f96
+atan_Z2                       = f97
+atan_V6                       = f98
+                              
+atan_V2                       = f99
+atan_W6                       = f100
+atan_W10                      = f101
+atan_Y3                       = f102
+                              
+atan_G2                       = f103
+
+atan_Y8                       = f104
+                              
+atan_G5                       = f105
+atan_Z3                       = f106
+atan_Z4                       = f107
+atan_W2                       = f108
+atan_V5                       = f109
+                            
+atan_W5                       = f110
+atan_G1                       = f111
+atan_Y11                      = f112
+                             
+atan_Z5                       = f113
+atan_Z6                       = f114
+atan_V1                       = f115
+atan_W1                       = f116
+                              
+atan_Z7                       = f117
+atan_Q                        = f118
+atan_Z                        = f119
+atan_abs_f8                   = f120                            
+
+atan_V13                      = f121
+atan_Xcub                     = f122
+atan_Y12                      = f123
+atan_P                        = f124
+
+atan_NORM_f8                  = f125
+
+atan_P8                       = f126
+atan_P9                       = f127
+
+
+
+
+atan_GR_AD_R                 = r14
+atan_GR_AD_Q                 = r15
+atan_GR_AD_P                 = r16
+atan_GR_10172                = r17 
+atan_GR_exp_f8               = r18 
+atan_GR_signexp_f8           = r19
+atan_GR_exp_mask             = r20
+
+
 
-EXP_AD_P1                    = r14
-EXP_AD_P2                    = r15
-rsig_near_one                = r16
-
-atan2_Y                      = f8
-atan2_X                      = f1
-
-atan2_u1_X                   = f32
-atan2_u1_Y                   = f33
-atan2_z2_X                   = f34
-
-atan2_two                    = f36
-atan2_B1sq_Y                 = f37
-atan2_z1_X                   = f38
-atan2_B1X                    = f40
-
-atan2_B1Y                    = f41
-atan2_wp_X                   = f42
-atan2_B1sq_X                 = f43
-atan2_z                      = f44
-atan2_w                      = f45
-
-atan2_P0                     = f46
-atan2_P1                     = f47
-atan2_P2                     = f48
-atan2_P3                     = f49
-atan2_P4                     = f50
-
-atan2_P5                     = f51
-atan2_P6                     = f52
-atan2_P7                     = f53
-atan2_P8                     = f54
-atan2_P9                     = f55
-
-atan2_P10                    = f56
-atan2_P11                    = f57
-atan2_P12                    = f58
-atan2_P13                    = f59
-atan2_P14                    = f60
-
-atan2_P15                    = f61
-atan2_P16                    = f62
-atan2_P17                    = f63
-atan2_P18                    = f64
-atan2_P19                    = f65
-
-atan2_P20                    = f66
-atan2_P21                    = f67
-atan2_P22                    = f68
-atan2_pi_by_2                = f69
-atan2_sgn_pi_by_2            = f69
-atan2_V13                    = f70
-
-atan2_W11                    = f71
-atan2_E                      = f72
-atan2_wp_Y                   = f73
-atan2_V11                    = f74
-atan2_V12                    = f75
-
-atan2_V7                     = f76
-atan2_V8                     = f77
-atan2_W7                     = f78
-atan2_W8                     = f79
-atan2_W3                     = f80
-
-atan2_W4                     = f81
-atan2_V3                     = f82
-atan2_V4                     = f83
-atan2_F                      = f84
-atan2_gV                     = f85
-
-atan2_V10                    = f86
-atan2_zcub                   = f87
-atan2_V6                     = f88
-atan2_V9                     = f89
-atan2_W10                    = f90
-
-atan2_W6                     = f91
-atan2_W2                     = f92
-atan2_V2                     = f93
-atan2_alpha                  = f94
-atan2_alpha_1                = f95
-
-atan2_gVF                    = f96
-atan2_V5                     = f97
-atan2_W12                    = f98
-atan2_W5                     = f99
-atan2_alpha_sq               = f100
-
-atan2_Cp                     = f101
-atan2_V1                     = f102
-atan2_ysq                    = f103
-atan2_W1                     = f104
-atan2_alpha_cub              = f105
-
-atan2_C                      = f106
-atan2_d                      = f108
-atan2_A_hi                   = f109
-atan2_dsq                    = f110
-
-atan2_pd                     = f111
-atan2_A_lo                   = f112
-atan2_A                      = f113
-atan2_Pp                     = f114
-atan2_sgnY                   = f115
-
-atan2_sig_near_one           = f116
-atan2_near_one               = f116
 
 /////////////////////////////////////////////////////////////
 
 
-RODATA
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
 
 .align 16
 
-LOCAL_OBJECT_START(atan2_tb1)
-data8 0xA21922DC45605EA1 ,  0x00003FFA // P11
-data8 0xB199DD6D2675C40F ,  0x0000BFFA // P10
-data8 0xC2F01E5DDD100DBE ,  0x00003FFA // P9
-data8 0xD78F28FC2A592781 ,  0x0000BFFA // P8
-data8 0xF0F03ADB3FC930D3 ,  0x00003FFA // P7
-data8 0x88887EBB209E3543 ,  0x0000BFFB // P6
-data8 0x9D89D7D55C3287A5 ,  0x00003FFB // P5
-data8 0xBA2E8B9793955C77 ,  0x0000BFFB // P4
-data8 0xE38E38E320A8A098 ,  0x00003FFB // P3
-data8 0x9249249247E37913 ,  0x0000BFFC // P2
-data8 0xCCCCCCCCCCC906CD ,  0x00003FFC // P1
-data8 0xAAAAAAAAAAAAA8A9 ,  0x0000BFFD // P0
-data8 0x0000000000000000 ,  0x00000000 // pad to avoid bank conflict
-LOCAL_OBJECT_END(atan2_tb1)
-
-LOCAL_OBJECT_START(atan2_tb2)
-data8 0xCE585A259BD8374C ,  0x00003FF0 // P21
-data8 0x9F90FB984D8E39D0 ,  0x0000BFF3 // P20
-data8 0x9D3436AABE218776 ,  0x00003FF5 // P19
-data8 0xDEC343E068A6D2A8 ,  0x0000BFF6 // P18
-data8 0xF396268151CFB11C ,  0x00003FF7 // P17
-data8 0xD818B4BB43D84BF2 ,  0x0000BFF8 // P16
-data8 0xA2270D30A90AA220 ,  0x00003FF9 // P15
-data8 0xD5F4F2182E7A8725 ,  0x0000BFF9 // P14
-data8 0x80D601879218B53A ,  0x00003FFA // P13
-data8 0x9297B23CCFFB291F ,  0x0000BFFA // P12
-data8 0xFE7E52D2A89995B3 ,  0x0000BFEC // P22
-data8 0xC90FDAA22168C235 ,  0x00003FFF // pi/2
-LOCAL_OBJECT_END(atan2_tb2)
-
+double_atan_constants_R:
+ASM_TYPE_DIRECTIVE(double_atan_constants_R,@object)
+   data8 0xB36B46B9C5443CED, 0x0000401C  //R8
+   data8 0x842633E0D126261F, 0x0000401F  //R9
+   data8 0xBE04FFFFFFFF46E0, 0x00004010  //R4
+   data8 0xE8C62000244D66E2, 0x00004013  //R5
+   data8 0xF2790C001E3789B3, 0x00004016  //R6
+   data8 0xDCD2CCF97D7C764F, 0x00004019  //R7
+   data8 0xB40000000000000B, 0x00004004  //R1
+   data8 0xB265F3D38F5EE28F, 0x00004021  //R10
+   data8 0x8160000000000001, 0x00004009  //R2
+   data8 0xFD5BFFFFFFFE55CD, 0x0000400C  //R3
+   data8 0xC90FDAA22168C235, 0x00003FFF  // pi/2
+ASM_SIZE_DIRECTIVE(double_atan_constants_R)
+
+double_atan_constants_Q:
+ASM_TYPE_DIRECTIVE(double_atan_constants_Q,@object)
+   data8 0xEBD602FA7761BC33, 0x00003FF9  //Q8
+   data8 0x8CB1CABD6A91913C, 0x0000BFFA  //Q9
+   data8 0x84C665C37D623CD2, 0x00003FF7  //Q4
+   data8 0x8DE0D1673DAEA9BC, 0x0000BFF8  //Q5
+   data8 0xF658ADBE2C6E6FCC, 0x00003FF8  //Q6
+
+   data8 0xB56307BE1DD3FFB6, 0x0000BFF9  //Q7
+   data8 0xAAAAAAAAAAAA8000, 0x0000BFFD  //Q21
+   data8 0x8000000000000000, 0x00003FFF  //Q22
+   data8 0x924924923A9D710C, 0x0000BFFC  //Q19
+   data8 0xCCCCCCCCCC9380E7, 0x00003FFC  //Q20
+ 
+   data8 0xA644DC250EFA2800, 0x00003FED  //Q0
+   data8 0x83DEAE24EEBF5E44, 0x0000BFF1  //Q1
+   data8 0xC758CCC64793D4EC, 0x00003FF3  //Q2
+   data8 0xBFDC0B54E7C89DCE, 0x0000BFF5  //Q3
+   data8 0x888855199D1290AF, 0x0000BFFB  //Q15
+
+   data8 0x9D89D3BE514B0178, 0x00003FFB  //Q16
+   data8 0xBA2E8B4DEC70282A, 0x0000BFFB  //Q17
+   data8 0xE38E38DF9E9FC83B, 0x00003FFB  //Q18
+   data8 0x9F8781CC990029D9, 0x00003FFA  //Q10
+   data8 0xB0B39472DEBA3C79, 0x0000BFFA  //Q11
+
+   data8 0xC2AFAEF8C85B0BC6, 0x00003FFA  //Q12
+   data8 0xD780E539797525DD, 0x0000BFFA  //Q13
+   data8 0xF0EDC449AC786DF9, 0x00003FFA  //Q14
+ASM_SIZE_DIRECTIVE(double_atan_constants_Q)
+
+
+
+double_atan_constants_P:
+ASM_TYPE_DIRECTIVE(double_atan_constants_P,@object)
+   data8 0xB1899EC590CDB8DF, 0x0000BFFA //P10
+   data8 0xA1E79850A67D59B0, 0x00003FFA //P11
+   data8 0x911D8B30C2A96E6D, 0x0000BFF3 //P20
+   data8 0xB87233C68A640706, 0x00003FF0 //P21
+   data8 0xD78E4B82F3C29D7A, 0x0000BFFA //P8
+
+   data8 0xC2EBE37AF932C14F, 0x00003FFA //P9
+   data8 0xBA2E8B94AA104DD6, 0x0000BFFB //P4
+   data8 0x9D89D7A640B71D38, 0x00003FFB //P5
+   data8 0x88887CA2CE9B2A40, 0x0000BFFB //P6
+   data8 0xF0F017D57A919C1E, 0x00003FFA //P7
+
+   data8 0xD0D635F230C80E06, 0x0000BFF8 //P16
+   data8 0xE847BECA7209B479, 0x00003FF7 //P17
+   data8 0xD14C6A2AAE0D5B07, 0x0000BFF6 //P18
+   data8 0x915F612A5C469117, 0x00003FF5 //P19
+   data8 0x921EDE5FD0DBBBE2, 0x0000BFFA //P12
+
+   data8 0xFFD303C2C8535445, 0x00003FF9 //P13
+   data8 0xD30DF50E295386F7, 0x0000BFF9 //P14
+   data8 0x9E81F2B1BBD210A8, 0x00003FF9 //P15
+   data8 0xAAAAAAAAAAAAA800, 0x0000BFFD //P0
+   data8 0xCCCCCCCCCCC7D476, 0x00003FFC //P1
+
+   data8 0x9249249247838066, 0x0000BFFC //P2
+   data8 0xE38E38E302290D68, 0x00003FFB //P3
+   data8 0xDF7F0A816F7E5025, 0x0000BFEC //P22
+ASM_SIZE_DIRECTIVE(double_atan_constants_P)
+
+
+.align 32
+.global atan#
+
+////////////////////////////////////////////////////////
 
 
 
 .section .text
-GLOBAL_LIBM_ENTRY(atan)
+.proc  atan#
+.align 32
 
-{ .mfi
-           nop.m 999
-           frcpa.s1     atan2_u1_Y,p7 = f1,atan2_Y
-           nop.i 999
+atan: 
+
+{ .mmf
+(p0)  addl      atan_GR_AD_P   = @ltoff(double_atan_constants_P), gp
+(p0)  addl      atan_GR_AD_Q   = @ltoff(double_atan_constants_Q), gp
+(p0)  fmerge.s  atan_ABS_f8  = f0,f8                       
 }
-{ .mfi
-           addl         EXP_AD_P1   = @ltoff(atan2_tb1), gp
-           fma.s1       atan2_two  = f1,f1,f1
-           nop.i 999
 ;;
+
+{ .mmf
+      ld8 atan_GR_AD_P = [atan_GR_AD_P]
+      ld8 atan_GR_AD_Q = [atan_GR_AD_Q]
+(p0)  frcpa.s1     atan_C,p8 = f1,f8                                  
 }
+;;
 
-{ .mfi
-           ld8  EXP_AD_P1 = [EXP_AD_P1]
-           frcpa.s1     atan2_u1_X,p6 = f1,atan2_X
-           nop.i 999
+{ .mmf
+(p0)  addl      atan_GR_AD_R   = @ltoff(double_atan_constants_R), gp
+(p0)  addl      atan_GR_exp_mask = 0x1ffff, r0
+(p0)  fma.s1       atan_Y = f8,f8,f0                                  
 }
-{ .mfi
-           nop.m 999
-           fma.s1       atan2_ysq  = atan2_Y,atan2_Y,f0
-           nop.i 999
+;;
+
+// This fnorm takes faults or sets fault flags
+{ .mmf
+(p0)  mov       atan_GR_10172 = 0x10172
+      ld8 atan_GR_AD_R = [atan_GR_AD_R]
+(p0)  fnorm     atan_NORM_f8  = f8
 }
 ;;
 
-{ .mfi
-           add  EXP_AD_P2 = 0xd0,EXP_AD_P1
-           fmerge.s     atan2_sgnY = atan2_Y,f1
-           nop.i 999
+
+// qnan snan inf norm     unorm 0 -+
+// 1    1    0   0        0     1 11
+// c                      7
+
+// p9 set if we have a NAN or +-0
+
+{ .mmf
+(p0)  ldfe      atan_Q8  = [atan_GR_AD_Q],16 
+(p0)  ldfe      atan_P10    = [atan_GR_AD_P],16                   
+(p0)  fclass.m.unc p9, p0   = f8, 0xc7                         
 }
 ;;
 
 
+{ .mmi
+(p0)  ldfe      atan_Q9  = [atan_GR_AD_Q],16 
+(p0)  ldfe      atan_P11    = [atan_GR_AD_P],16                   
+      nop.i 999   
+}
+;;
+
+        
+{ .mmf
+(p0)  ldfe      atan_Q4      = [atan_GR_AD_Q],16 
+(p0)  ldfe      atan_P20     = [atan_GR_AD_P],16                   
+(p9)  fma.d.s0  f8          = f8,f1,f0
+;;
+}
+
+// Exit if we have a NAN or +-0
+{ .mmb
+(p0)  ldfe      atan_Q5     = [atan_GR_AD_Q],16 
+(p0)  ldfe      atan_P21    = [atan_GR_AD_P],16                   
+(p9)  br.ret.spnt b0
+;;
+}
+
+
+// p6 is TRUE if |x| <= 1
+// p7 is TRUE if |x| >  1
+{ .mmf
+(p0)  ldfe      atan_Q6  = [atan_GR_AD_Q],16 
+(p0)  ldfe      atan_P8  = [atan_GR_AD_P],16                   
+(p0)  fcmp.le.unc  p6,p7 = atan_ABS_f8, f1 
+;;
+}
+
+
 { .mfi
-           ldfe         atan2_P11  = [EXP_AD_P1],16
-           fclass.m p10,p0 = atan2_Y, 0xc3     // Test for y=nan
-           nop.i 999
+(p0)  ldfe         atan_Q7  = [atan_GR_AD_Q],16 
+(p0)  fma.s1       atan_Z   = atan_C, atan_C, f0                        
+      nop.i 999   
 }
 { .mfi
-           ldfe         atan2_P21  = [EXP_AD_P2],16
-           nop.f 999
-           nop.i 999
-;;
+(p0)  ldfe         atan_P9  = [atan_GR_AD_P],16                   
+(p0)  fnma.s1      atan_B   = atan_C,f8, f1                             
+      nop.i 999    ;;
 }
 
 
 { .mfi
-           ldfe         atan2_P10  = [EXP_AD_P1],16
-           fnma.s1      atan2_B1Y  = atan2_u1_Y, atan2_Y, atan2_two
-           nop.i 999
+(p0)  ldfe         atan_Q21  = [atan_GR_AD_Q],16 
+(p0)  fma.s1       atan_V12  = atan_Y, atan_Y, f0                      
+      nop.i 999   
 }
 { .mfi
-           ldfe         atan2_P20  = [EXP_AD_P2],16
-           fma.s1       atan2_wp_Y   = atan2_u1_Y, atan2_u1_Y, f0
-           nop.i 999
+(p0)  ldfe         atan_P4    = [atan_GR_AD_P],16                   
+(p0)  fma.s1       atan_Xcub  = f8, atan_Y  , f0                        
+      nop.i 999   
+;;
+}
+
+
+{ .mmi
+(p7)  ldfe      atan_Q22         = [atan_GR_AD_Q],16 
+(p6)  ldfe      atan_P5          = [atan_GR_AD_P],16                   
+(p6)  cmp.eq.unc  p8,p0 = r0,r0
+;;
+}
+
+
+{ .mmi
+(p7)  ldfe      atan_Q19   = [atan_GR_AD_Q],16 
+(p6)  ldfe      atan_P6    = [atan_GR_AD_P],16                   
+(p7)  cmp.eq.unc  p9,p0 = r0,r0
+;;
+}
+
+
+{ .mmi
+(p7)  ldfe      atan_Q20  = [atan_GR_AD_Q],16 
+(p6)  ldfe      atan_P7    = [atan_GR_AD_P],16                   
+      nop.i 999   
 ;;
 }
 
 { .mfi
-           ldfe         atan2_P9   = [EXP_AD_P1],16
-           fma.s1       atan2_z1_X = atan2_u1_X, atan2_Y, f0
-           nop.i 999
+(p7)  ldfe      atan_Q0  = [atan_GR_AD_Q],16 
+(p6)  fma.s1       atan_V13 = atan_Y, atan_P11, atan_P10              
+      nop.i 999   
 }
 { .mfi
-           ldfe         atan2_P19  = [EXP_AD_P2],16
-           fnma.s1      atan2_B1X  = atan2_u1_X, atan2_X, atan2_two
-           nop.i 999
+(p6)  ldfe      atan_P16    = [atan_GR_AD_P],16                   
+(p7)  fma.s1       atan_V11 = atan_Y, atan_Q9, atan_Q8                
+      nop.i 999    ;;
 }
-;;
+
 
 { .mfi
-           ldfe         atan2_P8   = [EXP_AD_P1],16
-           fma.s1       atan2_z2_X = atan2_u1_X, atan2_ysq, f0
-           nop.i 999
+(p7)  ldfe      atan_Q1  = [atan_GR_AD_Q],16 
+(p7)  fma.s1       atan_G12 = atan_B, atan_B, f0                      
+      nop.i 999   
 }
-{ .mfb
-           ldfe         atan2_P18  = [EXP_AD_P2],16
-(p10)      fma.d.s0 f8 = atan2_Y,atan2_X,f0   // If y=nan, result quietized y
-(p10)      br.ret.spnt b0        // Exit if y=nan
+{ .mfi
+(p6)  ldfe      atan_P17    = [atan_GR_AD_P],16                   
+(p0)  fma.s1       atan_V9  = atan_V12, atan_V12, f0                  
+      nop.i 999    ;;
 }
-;;
 
-// p6 true if swap,    means |y| >  1.0    or ysq > 1.0
-// p7 true if no swap, means 1.0 >= |y|    or 1.0 >= ysq
+
 { .mfi
-           ldfe         atan2_P7   = [EXP_AD_P1],16
-           fcmp.ge.s1 p7,p6    = f1, atan2_ysq
-           nop.i 999
+(p7)  ldfe      atan_Q2  = [atan_GR_AD_Q],16 
+(p6)  fma.s1       atan_W11 = atan_Y, atan_P21, atan_P20              
+      nop.i 999   
 }
-{ .mmf
-           ldfe         atan2_P17  = [EXP_AD_P2],16
-           nop.m 999
-           nop.f 999
+{ .mfi
+(p6)  ldfe      atan_P18    = [atan_GR_AD_P],16                   
+(p7)  fma.s1       atan_V7  = atan_Y, atan_Q5, atan_Q4                
+      nop.i 999    ;;
 }
-;;
 
 { .mfi
-           ldfe         atan2_P6   = [EXP_AD_P1],16
-           fma.s1       atan2_E         = atan2_u1_Y, atan2_B1Y, atan2_Y
-           nop.i 999
+(p7)  ldfe      atan_Q3  = [atan_GR_AD_Q],16 
+(p7)  fma.s1    atan_Z1  = atan_Z, atan_Z, f0                      
+      nop.i 999   
 }
 { .mfi
-           ldfe         atan2_P16  = [EXP_AD_P2],16
-           fma.s1       atan2_B1sq_Y = atan2_B1Y, atan2_B1Y, f0
-           nop.i 999
-;;
+(p6)  ldfe      atan_P19    = [atan_GR_AD_P],16                   
+(p7)  fma.s1       atan_Y3  = atan_Y  , atan_V12, f0                  
+      nop.i 999    ;;
 }
 
 { .mfi
-           ldfe         atan2_P5   = [EXP_AD_P1],16
-(p7)       fma.s1       atan2_wp_X   = atan2_z1_X, atan2_z1_X, f0
-           nop.i 999
+(p7)  ldfe      atan_R8  = [atan_GR_AD_R],16 
+(p6)  fma.s1       atan_V11 = atan_Y, atan_P9, atan_P8                
+      nop.i 999   
 }
 { .mfi
-           ldfe         atan2_P15       = [EXP_AD_P2],16
-(p7)       fma.s1       atan2_B1sq_X = atan2_B1X, atan2_B1X, f0
-           nop.i 999
+(p6)  ldfe      atan_P12    = [atan_GR_AD_P],16                   
+(p7)  fma.s1       atan_V8  = atan_Y, atan_Q7, atan_Q6                
+      nop.i 999    ;;
+}
+
+{ .mmi
+(p7)  ldfe      atan_R9     = [atan_GR_AD_R],16 
+(p6)  ldfe      atan_P13    = [atan_GR_AD_P],16                   
+      nop.i 999   
 ;;
 }
 
 { .mfi
-           ldfe         atan2_P4   = [EXP_AD_P1],16
-(p6)       fma.s1       atan2_z         = atan2_u1_Y, atan2_B1Y, f0
-           nop.i 999
+(p7)  ldfe      atan_R4  = [atan_GR_AD_R],16 
+(p6)  fma.s1    atan_V7  = atan_Y, atan_P5, atan_P4                 
+      nop.i 999   
 }
 { .mfi
-           ldfe         atan2_P14  = [EXP_AD_P2],16
-(p7)       fma.s1       atan2_E         = atan2_z2_X, atan2_B1X, atan2_X
-           nop.i 999
-;;
+(p6)  ldfe      atan_P14    = [atan_GR_AD_P],16                   
+(p7)  fma.s1       atan_W13 = atan_Y, atan_Q22, atan_Q21              
+      nop.i 999    ;;
 }
 
 
 { .mfi
-           ldfe         atan2_P3        = [EXP_AD_P1],16
-           fcmp.eq.s0  p14,p15=atan2_X,atan2_Y  // Dummy for denorm and invalid
-           nop.i 999
+(p7)  ldfe      atan_R5  = [atan_GR_AD_R],16 
+(p6)  fma.s1    atan_Y12 = atan_V9 , atan_V9 , f0                   
+      nop.i 999   
 }
-{ .mmf
-           ldfe         atan2_P13  = [EXP_AD_P2],16
-           nop.m 999
-(p7)       fma.s1       atan2_z         = atan2_z1_X, atan2_B1X, f0
-;;
+{ .mfi
+(p6)  ldfe      atan_P15    = [atan_GR_AD_P],16                   
+(p7)  fma.s1    atan_Y8 = atan_V9 , atan_V9 , f0                   
+      nop.i 999    ;;
 }
 
+
 { .mfi
-           ldfe         atan2_P2        = [EXP_AD_P1],16
-(p6)       fma.s1       atan2_w         = atan2_wp_Y, atan2_B1sq_Y,f0
-           nop.i 999
+(p7)  ldfe      atan_R6  = [atan_GR_AD_R],16 
+(p6)  fma.s1    atan_V8  = atan_Y, atan_P7, atan_P6                 
+      nop.i 999   
 }
-{ .mlx
-           ldfe         atan2_P12  = [EXP_AD_P2],16
-           movl         rsig_near_one = 0x8000000000000001 // signif near 1.0
-;;
+{ .mfi
+(p6)  ldfe      atan_P0     = [atan_GR_AD_P],16                   
+(p7)  fma.s1       atan_W11 = atan_Y, atan_Q20, atan_Q19              
+      nop.i 999    ;;
 }
 
+
 { .mfi
-           ldfe         atan2_P1        = [EXP_AD_P1],16
-           fclass.m p9,p0 = atan2_Y, 0x23  // test if y inf
-           nop.i 999
+(p7)  ldfe      atan_R7  = [atan_GR_AD_R],16 
+(p7)  fma.s1    atan_Z2  = atan_Z1 , atan_Z1,  f0                  
+      nop.i 999   
 }
 { .mfi
-           ldfe         atan2_P22       = [EXP_AD_P2],16
-(p7)       fma.s1       atan2_w         = atan2_wp_X, atan2_B1sq_X,f0
-           nop.i 999
-;;
+(p6)  ldfe      atan_P1     = [atan_GR_AD_P],16                   
+(p6)  fma.s1       atan_V10 = atan_V12, atan_V13, atan_V11             
+      nop.i 999    ;;
 }
 
 { .mfi
-           ldfe         atan2_P0        = [EXP_AD_P1],16
-           frcpa.s1     atan2_F,p0     = f1, atan2_E
-           nop.i 999
+(p7)  ldfe      atan_Q15 = [atan_GR_AD_Q],16 
+(p6)  fma.s1    atan_W7  = atan_Y, atan_P17, atan_P16               
+      nop.i 999   
 }
 { .mfi
-           ldfe         atan2_pi_by_2   = [EXP_AD_P2],16
-(p6)       fnma.s1       atan2_gV        = atan2_Y, atan2_z, atan2_X
-           nop.i 999
-;;
+(p6)  ldfe      atan_P2     = [atan_GR_AD_P],16                   
+(p7)  fma.s1       atan_V3  = atan_Y, atan_Q1 , atan_Q0               
+      nop.i 999    ;;
 }
 
 { .mfi
-           setf.sig      atan2_sig_near_one = rsig_near_one
-(p7)       fnma.s1       atan2_gV        = atan2_X, atan2_z, atan2_Y
-           nop.i 999
+(p7)  ldfe      atan_Q16 = [atan_GR_AD_Q],16 
+(p7)  fma.s1    atan_G9  = atan_G12, atan_G12, f0                  
+      nop.i 999   
 }
-{ .mfb
-           nop.m 999
-(p9)       fma.d.s0  f8 = atan2_sgnY, atan2_pi_by_2, f0  // +-pi/2 if y inf
-(p9)       br.ret.spnt b0      // exit if y inf, result is +-pi/2
-;;
+{ .mfi
+(p6)  ldfe      atan_P3     = [atan_GR_AD_P],16                   
+(p7)  fma.s1       atan_V6  = atan_V12, atan_V8,  atan_V7             
+      nop.i 999    ;;
 }
 
+
 { .mfi
-           nop.m 999
-           fma.s1       atan2_V13       = atan2_w, atan2_P11, atan2_P10
-           nop.i 999
+(p7)  ldfe      atan_R1     = [atan_GR_AD_R],16 
+(p6)  fma.s1       atan_W8  = atan_Y, atan_P19, atan_P18               
+      nop.i 999   
 }
 { .mfi
-           nop.m 999
-           fma.s1       atan2_W11       = atan2_w, atan2_P21, atan2_P20
-           nop.i 999
-;;
+(p6)  ldfe      atan_P22    = [atan_GR_AD_P],16                   
+(p7)  fma.s1       atan_V4  = atan_Y, atan_Q3 , atan_Q2               
+      nop.i 999    ;;
 }
 
+
 { .mfi
-           nop.m 999
-           fma.s1       atan2_V11       = atan2_w, atan2_P9, atan2_P8
-           nop.i 999
+      getf.exp     atan_GR_signexp_f8  = atan_NORM_f8
+(p7)  fma.s1       atan_Y11 = atan_Y3 , atan_Y8 , f0                  
+      nop.i 999   
 }
 { .mfi
-           nop.m 999
-           fma.s1       atan2_V12       = atan2_w, atan2_w, f0
-           nop.i 999
-;;
+(p7)  ldfe      atan_Q17    = [atan_GR_AD_Q],16 
+(p6)  fma.s1       atan_V6  = atan_V12, atan_V8,  atan_V7             
+      nop.i 999    ;;
 }
 
+
 { .mfi
-           nop.m 999
-           fma.s1       atan2_V8        = atan2_w, atan2_P7 , atan2_P6
-           nop.i 999
+(p7)  ldfe      atan_Q18    = [atan_GR_AD_Q],16 
+(p6)  fma.s1       atan_W3  = atan_Y, atan_P13, atan_P12               
+      nop.i 999   
 }
 { .mfi
-           nop.m 999
-           fma.s1       atan2_W8        = atan2_w, atan2_P19, atan2_P18
-           nop.i 999
-;;
+(p7)  ldfe      atan_R10    = [atan_GR_AD_R],16 
+(p7)  fma.s1       atan_G11 = atan_B, atan_R9 , atan_R8               
+      nop.i 999    ;;
 }
 
+
 { .mfi
-           nop.m 999
-           fnma.s1      atan2_alpha     = atan2_E, atan2_F, f1
-           nop.i 999
+(p7)  ldfe      atan_Q10    = [atan_GR_AD_Q],16 
+(p7)  fma.s1    atan_Z3     = atan_Z1 , atan_Z2 , f0                  
+(p0)  and       atan_GR_exp_f8 = atan_GR_signexp_f8,atan_GR_exp_mask
 }
 { .mfi
-           nop.m 999
-           fnma.s1      atan2_alpha_1   = atan2_E, atan2_F, atan2_two
-           nop.i 999
-;;
+(p7)  ldfe      atan_R2     = [atan_GR_AD_R],16 
+(p7)  fma.s1       atan_Z4  = atan_Z2 , atan_Z2 , f0                  
+      nop.i 999    ;;
 }
 
 
 { .mfi
-           nop.m 999
-           fma.s1       atan2_V7        = atan2_w, atan2_P5 , atan2_P4
-           nop.i 999
+(p7)  ldfe      atan_Q11 = [atan_GR_AD_Q],16 
+(p6)  fma.s1    atan_W4  = atan_Y, atan_P15, atan_P14               
+      nop.i 999
 }
 { .mfi
-           nop.m 999
-           fma.s1       atan2_W7        = atan2_w, atan2_P17, atan2_P16
-           nop.i 999
+(p7)  ldfe      atan_R3     = [atan_GR_AD_R],16 
+(p7)  fma.s1       atan_G7  = atan_B, atan_R5 , atan_R4               
+(p0)  cmp.le.unc   p11,p0  = atan_GR_10172,atan_GR_exp_f8 
+;;
+}
+
+
+{ .mmf
+(p9)  ldfe      atan_Q12 = [atan_GR_AD_Q],16 
+(p0)  ldfe      atan_S_PI     = [atan_GR_AD_R],16 
+(p8)  fma.s1       atan_W6  = atan_V12, atan_W8,  atan_W7             
 ;;
 }
 
+
+
 { .mfi
-           nop.m 999
-           fma.s1       atan2_V4        = atan2_w, atan2_P3 , atan2_P2
-           nop.i 999
+(p9)  ldfe      atan_Q13 = [atan_GR_AD_Q],16 
+(p8)  fma.s1       atan_V3  = atan_Y, atan_P1 , atan_P0                
+(p11) cmp.ne.and p6,p7 = r0,r0
 }
 { .mfi
-           nop.m 999
-           fma.s1       atan2_W4        = atan2_w, atan2_P15, atan2_P14
-           nop.i 999
-;;
+      nop.m 999
+(p8)  fma.s1       atan_V5  = atan_V9 , atan_V10, atan_V6             
+      nop.i 999    ;;
 }
 
+
+.pred.rel "mutex",p6,p7,p11
 { .mfi
-           nop.m 999
-           fma.s1       atan2_V3        = atan2_w, atan2_P1 , atan2_P0
-           nop.i 999
+(p7)  ldfe      atan_Q14 = [atan_GR_AD_Q],16 
+(p6)  fma.s1       atan_Y12 = atan_V9 , atan_Y12, f0                   
+      nop.i 999    
 }
 { .mfi
-           nop.m 999
-           fma.s1       atan2_W3        = atan2_w, atan2_P13, atan2_P12
-           nop.i 999
-;;
+      nop.m 999
+(p7)  fma.s1    atan_G8  = atan_B, atan_R7 , atan_R6               
+      nop.i 999    ;;
 }
 
+
 { .mfi
-           nop.m 999
-           fma.s1       atan2_V10       = atan2_V12, atan2_V13, atan2_V11
-           nop.i 999
+      nop.m 999
+(p6)  fma.s1       atan_V4  = atan_Y, atan_P3 , atan_P2                
+      nop.i 999    
 }
 { .mfi
-           nop.m 999
-           fma.s1       atan2_gVF       = atan2_gV, atan2_F, f0
-           nop.i 999
-;;
+      nop.m 999
+(p7)  fma.s1       atan_W7  = atan_Y, atan_Q16, atan_Q15              
+      nop.i 999    ;;
 }
 
+
 { .mfi
-           nop.m 999
-           fma.s1       atan2_alpha_sq  = atan2_alpha, atan2_alpha, f0
-           nop.i 999
+      nop.m 999
+(p6)  fma.s1       atan_W10 = atan_V12, atan_P22, atan_W11            
+      nop.i 999    
 }
 { .mfi
-           nop.m 999
-           fma.s1       atan2_Cp        = atan2_alpha, atan2_alpha_1, f1
-           nop.i 999
-;;
+      nop.m 999
+(p7)  fma.s1       atan_G3  = atan_B, atan_R1 , f1                    
+      nop.i 999    ;;
 }
 
+
 { .mfi
-           nop.m 999
-           fma.s1       atan2_V9        = atan2_V12, atan2_V12, f0
-           nop.i 999
+      nop.m 999
+(p6)  fma.s1    atan_W2  = atan_V12, atan_W4 , atan_W3             
+      nop.i 999    
 }
 { .mfi
-           nop.m 999
-           fma.s1       atan2_W10       = atan2_V12, atan2_P22 , atan2_W11
-           nop.i 999
-;;
+      nop.m 999
+(p7)  fma.s1       atan_V2  = atan_V12, atan_V4 , atan_V3              
+      nop.i 999    ;;
 }
 
 { .mfi
-           nop.m 999
-           fma.s1       atan2_V6        = atan2_V12, atan2_V8 , atan2_V7
-           nop.i 999
+      nop.m 999
+(p7)  fma.s1       atan_W8  = atan_Y, atan_Q18, atan_Q17              
+      nop.i 999   
 }
 { .mfi
-           nop.m 999
-           fma.s1       atan2_W6        = atan2_V12, atan2_W8 , atan2_W7
-           nop.i 999
-;;
+      nop.m 999
+(p7)  fma.s1       atan_G10 = atan_G12, atan_R10, atan_G11            
+      nop.i 999    ;;
 }
 
 { .mfi
-           nop.m 999
-           fma.s1       atan2_V2        = atan2_V12, atan2_V4 , atan2_V3
-           nop.i 999
+      nop.m 999
+(p7)  fma.s1       atan_V10 = atan_V12, atan_Q10, atan_V11            
+      nop.i 999    
 }
 { .mfi
-           nop.m 999
-           fma.s1       atan2_W2        = atan2_V12, atan2_W4  , atan2_W3
-           nop.i 999
-;;
+      nop.m 999
+(p7)  fma.s1       atan_G6  = atan_G12, atan_G8 , atan_G7             
+      nop.i 999    ;;
 }
 
+
 { .mfi
-           nop.m 999
-           fma.s1       atan2_alpha_cub = atan2_alpha, atan2_alpha_sq, f0
-           nop.i 999
+      nop.m 999
+(p6)  fma.s1       atan_V2  = atan_V12, atan_V4,  atan_V3             
+      nop.i 999    
 }
 { .mfi
-           nop.m 999
-           fma.s1       atan2_C         = atan2_gVF, atan2_Cp, f0
-           nop.i 999
-;;
+      nop.m 999
+(p7)  fma.s1       atan_G4  = atan_B  , atan_R3 , atan_R2             
+      nop.i 999    ;;
 }
 
+
 { .mfi
-           nop.m 999
-           fma.s1       atan2_W12       = atan2_V9, atan2_V9, f0
-           nop.i 999
-;;
+      nop.m 999
+(p6)  fma.s1       atan_W5  = atan_V9 , atan_W10, atan_W6             
+      nop.i 999    
+}
+{ .mfi
+      nop.m 999
+(p7)  fma.s1       atan_W3  = atan_Y  , atan_Q12, atan_Q11            
+      nop.i 999    ;;
 }
 
+
 { .mfi
-           nop.m 999
-           fma.s1       atan2_V5        = atan2_V9, atan2_V10, atan2_V6
-           nop.i 999
+      nop.m 999
+(p7)  fma.s1       atan_Z5  = atan_Z3 , atan_Z4 , f0                  
+      nop.i 999    
 }
 { .mfi
-           nop.m 999
-           fma.s1       atan2_W5        = atan2_V9, atan2_W10, atan2_W6
-           nop.i 999
-;;
+      nop.m 999
+(p7)  fma.s1       atan_W10 = atan_V12, atan_W13, atan_W11            
+      nop.i 999    ;;
 }
 
+
 { .mfi
-           nop.m 999
-           fclass.m p8,p0 = atan2_Y, 0x07  // Test for y=0
-           nop.i 999
+      nop.m 999
+(p7)  fma.s1       atan_W4  = atan_Y  , atan_Q14, atan_Q13            
+      nop.i 999    
 }
 { .mfi
-           nop.m 999
-           fma.s1       atan2_d         = atan2_alpha_cub, atan2_C, atan2_C
-           nop.i 999
+      nop.m 999
+(p7)  fma.s1       atan_W6  = atan_V12, atan_W8,  atan_W7             
+      nop.i 999    ;;
 }
-;;
 
 { .mfi
-           nop.m 999
-           fma.s1       atan2_W12       = atan2_V9, atan2_W12, f0
-           nop.i 999
+      nop.m 999
+(p7)  fma.s1       atan_V5  = atan_V9 , atan_V10, atan_V6             
+      nop.i 999    
+}
+{ .mfi
+      nop.m 999
+(p7)  fma.s1       atan_G5  = atan_G9 , atan_G10, atan_G6             
+      nop.i 999    ;;
 }
-;;
+
 
 { .mfi
-           nop.m 999
-           fma.s1       atan2_V1        = atan2_V9, atan2_V5, atan2_V2
-           nop.i 999
+      nop.m 999
+(p6)  fma.s1       atan_V1  = atan_V9 , atan_V5 , atan_V2             
+      nop.i 999    
 }
 { .mfi
-           nop.m 999
-           fma.s1       atan2_W1        = atan2_V9, atan2_W5, atan2_W2
-           nop.i 999
-;;
+      nop.m 999
+(p7)  fma.s1       atan_G2  = atan_G12, atan_G4 , atan_G3             
+      nop.i 999    ;;
 }
 
+
 { .mfi
-           nop.m 999
-(p8)       fmerge.s     f8              = atan2_sgnY, f0  // +-0 if y=0
-           nop.i 999
+      nop.m 999
+(p6)  fma.s1       atan_W1  = atan_V9 , atan_W5 , atan_W2             
+      nop.i 999    
 }
-{ .mfb
-           nop.m 999
-           fma.s1       atan2_zcub      = atan2_z, atan2_w, f0
-(p8)       br.ret.spnt b0      // Exit if y=0
-;;
+{ .mfi
+      nop.m 999
+(p7)  fma.s1       atan_Z6  = atan_Z4 , atan_C  , f0                  
+      nop.i 999    ;;
 }
 
 { .mfi
-           nop.m 999
-           fma.s1       atan2_pd        = atan2_P0, atan2_d, f0
-           nop.i 999
+      nop.m 999
+(p0)  fmerge.s    atan_S_PI = f8, atan_S_PI
+      nop.i 999     ;;
 }
+
+
 { .mfi
-           nop.m 999
-           fma.s1       atan2_dsq       = atan2_d, atan2_d, f0
-           nop.i 999
-;;
+      nop.m 999
+(p7)  fma.s1       atan_W5  = atan_V9 , atan_W10, atan_W6             
+      nop.i 999    
+}
+{ .mfi
+      nop.m 999
+(p7)  fma.s1       atan_W2  = atan_V12, atan_W4 , atan_W3             
+      nop.i 999    ;;
 }
 
 
 { .mfi
-           nop.m 999
-           fmerge.se    atan2_near_one = f1, atan2_sig_near_one // Const ~1.0
-           nop.i 999
+      nop.m 999
+(p7)  fma.s1       atan_G1  = atan_G9 , atan_G5 , atan_G2             
+      nop.i 999    
 }
 { .mfi
-           nop.m 999
-           fma.s1       atan2_Pp        = atan2_W12, atan2_W1, atan2_V1
-           nop.i 999
-;;
+      nop.m 999
+(p7)  fma.s1       atan_V1  = atan_V9 , atan_V5 , atan_V2             
+      nop.i 999    ;;
 }
 
+
 { .mfi
-           nop.m 999
-           fma.s1       atan2_sgn_pi_by_2 = atan2_pi_by_2, atan2_sgnY, f0
-           nop.i 999
+      nop.m 999
+(p6)  fma.s1       atan_P   = atan_Y12, atan_W1 , atan_V1              
+      nop.i 999    
 }
 { .mfi
-           nop.m 999
-           fma.s1       atan2_A_lo      = atan2_pd, atan2_dsq, atan2_d
-           nop.i 999
-;;
+      nop.m 999
+(p7)  fma.s1       atan_Z7  = atan_Z5 , atan_Z6 , f0                  
+      nop.i 999    ;;
 }
 
 
 { .mfi
-           nop.m 999
-           fma.s1       atan2_A_hi      = atan2_zcub, atan2_Pp, atan2_z
-           nop.i 999
-;;
+      nop.m 999
+(p7)  fma.s1       atan_W1  = atan_V9 , atan_W5 , atan_W2             
+      nop.i 999    ;;
 }
 
 
 { .mfi
-           nop.m 999
-(p6)       fma.s1       atan2_A         = atan2_A_hi, f1, atan2_A_lo
-           nop.i 999
+      nop.m 999
+(p11) fma.d.s0  f8 = atan_S_PI,f1,f0
+      nop.i 999
 }
-// For |Y| <= |X| and X > 0, result is A_hi + A_lo
 { .mfi
-           nop.m 999
-(p7)       fma.d.s0       f8         = atan2_A_hi, f1, atan2_A_lo
-           nop.i 999
-;;
+      nop.m 999
+(p7)  fma.s1       atan_Z   = atan_G1 , atan_Z7 , f0                  
+      nop.i 999    ;;
+}
+
+
+{ .mfi
+      nop.m 999
+(p7)  fma.s1       atan_Q   = atan_Y11, atan_W1 , atan_V1             
+      nop.i 999    ;;
 }
 
-// For |Y| > |X|, result is  +- pi/2 - (A_hi + A_lo)
-// We perturb A by multiplying by 1.0+1ulp as we produce the result
-// in order to get symmetrically rounded results in directed rounding modes.
-// If we don't do this, there are a few cases where the trailing 11 bits of
-// the significand of the result, before converting to double, are zero.  These
-// cases do not round symmetrically in round to +infinity or round to -infinity.
+
+{ .mfi
+      nop.m 999
+(p6)  fma.d.s0    f8       = atan_P  , atan_Xcub  , f8               
+      nop.i 999    
+}
 { .mfb
-           nop.m 999
-(p6)       fnma.d.s0      f8        = atan2_A, atan2_near_one, atan2_sgn_pi_by_2
-           br.ret.sptk  b0
-;;
+      nop.m 999
+(p7)  fnma.d.s0    f8       = atan_Z  , atan_Q  , atan_S_PI           
+(p0)  br.ret.sptk    b0    ;;
 }
 
-GLOBAL_LIBM_END(atan)
+.endp atan
+ASM_SIZE_DIRECTIVE(atan)
diff --git a/sysdeps/ia64/fpu/s_atanf.S b/sysdeps/ia64/fpu/s_atanf.S
index fb7f4a307e..b0a68737aa 100644
--- a/sysdeps/ia64/fpu/s_atanf.S
+++ b/sysdeps/ia64/fpu/s_atanf.S
@@ -1,10 +1,12 @@
 .file "atanf.s"
 
+// THIS IS NOT OPTIMIZED AND NOT OFFICIAL
 
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
 //
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,7 +22,7 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
+//
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,18 +37,16 @@
 //
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// http://developer.intel.com/opensource.
 
 
 // History
 //==============================================================
-// 02/20/00 Initial version
-// 08/17/00 Changed predicate register macro-usage to direct predicate
+// ?/??/00  Initial revision 
+// 8/17/00  Changed predicate register macro-usage to direct predicate
 //          names due to an assembler bug.
-// 02/06/02 Corrected .section statement
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 02/06/03 Reordered header: .section, .global, .proc, .align;
-//          added missing bundling
+
+#include "libm_support.h"
 
 //
 // Assembly macros
@@ -140,11 +140,16 @@ atanf_answer       = f8
 //atanf_pred_GT1     = p7
 
 
-RODATA
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
 
 .align 16
 
-LOCAL_OBJECT_START(atanf_coeff_1_table)
+atanf_coeff_1_table:
+ASM_TYPE_DIRECTIVE(atanf_coeff_1_table,@object)
 data8 0x40c4c241be751ff2  // r4
 data8 0x40e9f300c2f3070b  // r5
 data8 0x409babffef772075  // r3
@@ -159,11 +164,12 @@ data8 0xbfc2473c5145ee38  // p3
 data8 0x3fbc4f512b1865f5  // p4
 data8 0x3fc9997e7afbff4e  // p2
 data8 0x3ff921fb54442d18  // pi/2
-LOCAL_OBJECT_END(atanf_coeff_1_table)
+ASM_SIZE_DIRECTIVE(atanf_coeff_1_table)
 
 
 
-LOCAL_OBJECT_START(atanf_coeff_2_table)
+atanf_coeff_2_table:
+ASM_TYPE_DIRECTIVE(atanf_coeff_2_table,@object)
 data8 0x4035000000004284  // r1
 data8 0x406cdffff336a59b  // r2
 data8 0x3fbc4f512b1865f5  // p4 = q6
@@ -176,12 +182,18 @@ data8 0xbfa6e10ba401393f  // p7
 data8 0x3f97105b4160f86b  // p8
 data8 0xbf7deaadaa336451  // p9
 data8 0x3f522e5d33bc9baa  // p10
-LOCAL_OBJECT_END(atanf_coeff_2_table)
+ASM_SIZE_DIRECTIVE(atanf_coeff_2_table)
+
+
 
+.global atanf 
 
+.text
+.proc  atanf
+
+.align 32
+atanf:
 
-.section .text
-GLOBAL_LIBM_ENTRY(atanf)
 
 {     .mfi 
      alloc      r32                  = ar.pfs,1,2,0,0
@@ -313,7 +325,7 @@ GLOBAL_LIBM_ENTRY(atanf)
 {     .mfb 
      nop.m                 999
      fma.s1    atanf_x5  =    atanf_t,atanf_xcub,f0
-(p8)  br.cond.spnt   ATANF_X_INF_NAN_ZERO
+(p8)  br.cond.spnt   L(ATANF_X_INF_NAN_ZERO)
 } 
 ;;
  
@@ -475,7 +487,7 @@ GLOBAL_LIBM_ENTRY(atanf)
  
 {     .mfi 
      nop.m                           999
-     fma.s0       atanf_sgnx_piby2    =    atanf_sgn_x,atanf_piby2,f0
+     fma       atanf_sgnx_piby2    =    atanf_sgn_x,atanf_piby2,f0
      nop.i                           999
 } 
 {     .mfi 
@@ -518,38 +530,27 @@ GLOBAL_LIBM_ENTRY(atanf)
 {     .mfi 
                     nop.m                      999
 //(atanf_pred_GT1)    fnma.s    atanf_answer   =    atanf_poly_q,atanf_z21_poly_r,atanf_sgnx_piby2
-(p7)    fnma.s.s0    atanf_answer   =    atanf_poly_q,atanf_z21_poly_r,atanf_sgnx_piby2
+(p7)    fnma.s    atanf_answer   =    atanf_poly_q,atanf_z21_poly_r,atanf_sgnx_piby2
                     nop.i                      999;;
 } 
  
 {     .mfb 
                     nop.m                      999
 //(atanf_pred_LE1)    fma.s     atanf_answer   =    atanf_x11,atanf_poly_p1,atanf_poly_p4
-(p6)    fma.s.s0     atanf_answer   =    atanf_x11,atanf_poly_p1,atanf_poly_p4
+(p6)    fma.s     atanf_answer   =    atanf_x11,atanf_poly_p1,atanf_poly_p4
                     br.ret.sptk b0
 } 
 
 
 
-ATANF_X_INF_NAN_ZERO:
+L(ATANF_X_INF_NAN_ZERO):
 
-{ .mfi
-      nop.m 0
-      fclass.m p8,p9 = f8,0x23  // @inf
-      nop.i 0
-}
+      fclass.m p8,p9 = f8,0x23	// @inf
 ;;
-{ .mfi
-      nop.m 0
 (p8)  fmerge.s f8 = f8, atanf_piby2
-      nop.i 0
-}
 ;;
-{ .mfb
-      nop.m 0
-      fnorm.s.s0 f8 = f8
+      fnorm.s f8 = f8
       br.ret.sptk b0
-}
-;;
 
-GLOBAL_LIBM_END(atanf)
+.endp atanf
+ASM_SIZE_DIRECTIVE(atanf)
diff --git a/sysdeps/ia64/fpu/s_atanl.S b/sysdeps/ia64/fpu/s_atanl.S
index bfd9f458f4..28d44c1850 100644
--- a/sysdeps/ia64/fpu/s_atanl.S
+++ b/sysdeps/ia64/fpu/s_atanl.S
@@ -1,10 +1,10 @@
 .file "atanl.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
+//
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,52 +35,41 @@
 // 
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at 
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// http://developer.intel.com/opensource.
 //
 //
-//*********************************************************************
+// *********************************************************************
 //
 // History
-// 02/02/00 (hand-optimized)
-// 04/04/00 Unwind support added
-// 08/15/00 Bundle added after call to __libm_error_support to properly
+// 2/02/00  (hand-optimized)
+// 4/04/00  Unwind support added
+// 8/15/00  Bundle added after call to __libm_error_support to properly
 //          set [the previously overwritten] GR_Parameter_RESULT.
-// 03/13/01 Fixed flags when denormal raised on intermediate result
-// 01/08/02 Improved speed.
-// 02/06/02 Corrected .section statement
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 02/10/03 Reordered header: .section, .global, .proc, .align;
-//          used data8 for long double table values
 //
-//*********************************************************************
+// *********************************************************************
 //
 // Function:   atanl(x) = inverse tangent(x), for double extended x values
-// Function:   atan2l(y,x) = atan(y/x), for double extended y, x values
-//
-// API
-//
-//  long double atanl  (long double x)
-//  long double atan2l (long double y, long double x)
+// Function:   atan2l(y,x) = atan(y/x), for double extended x values
 //
-//*********************************************************************
+// *********************************************************************
 //
 // Resources Used:
 //
 //    Floating-Point Registers: f8 (Input and Return Value)
-//                              f9 (Input for atan2l)
-//                              f10-f15, f32-f83
+//                              f9-f15
+//                              f32-f79
 //
 //    General Purpose Registers:
-//      r32-r51
-//      r49-r52 (Arguments to error support for 0,0 case)
+//      r32-r48
+//      r49,r50,r51,r52 (Arguments to error support for 0,0 case)
 //
 //    Predicate Registers:      p6-p15
 //
-//*********************************************************************
+// *********************************************************************
 //
 // IEEE Special Conditions:
 //
-//    Denormal fault raised on denormal inputs
+//    Denormal  fault raised on denormal inputs
 //    Underflow exceptions may occur 
 //    Special error handling for the y=0 and x=0 case
 //    Inexact raised when appropriate by algorithm
@@ -103,7 +92,7 @@
 //    atan2l(+/-Inf, Inf) = +/-pi/4
 //    atan2l(+/-Inf, -Inf) = +/-3pi/4
 //
-//*********************************************************************
+// *********************************************************************
 //
 // Mathematical Description
 // ---------------------------
@@ -119,16 +108,16 @@
 //
 //
 //        (Arg_X, Arg_Y) x
-//                        \
-//                \
-//                 \
-//                  \
+//                        \ 
+//                \ 
+//                 \ 
+//                  \ 
 //                   \ angle between is ATANL(Arg_Y,Arg_X)
 
 
 
 
-//                    \
+//                    \ 
 //                     ------------------> X-axis
 
 //                   Origin
@@ -243,14 +232,14 @@
 //       z_hi = 2^k * 1.b_1 b_2 b_3 b_4 1
 //
 // then
-//                                            /                \
+//                                            /                \ 
 //                                            |  (V/U) - z_hi  |
 
 //      arctan(V/U) = arctan(z_hi) + acrtan| -------------- |
 //                                            | 1 + (V/U)*z_hi |
 //                                            \                /
 //
-//                                            /                \
+//                                            /                \ 
 //                                            |   V - z_hi*U   |
 
 //                  = arctan(z_hi) + acrtan| -------------- |
@@ -306,7 +295,7 @@
 //    U      := max( |Arg_X|, |Arg_Y| )
 //    V      := min( |Arg_X|, |Arg_Y| )
 //
-//    execute: frcpa E, pred, V, U
+//    execute: frcap E, pred, V, U
 //    If pred is 0, go to Step 5 for special cases handling.
 //
 // Step 2. Decide on branch.
@@ -410,7 +399,7 @@
 //
 //    z := V * E     ...z approximates V/U to roughly working precision
 //    zsq := z * z
-//    z4 := zsq * zsq; z8 := z4 * z4
+//    z8 := zsq * zsq; z8 := z8 * z8
 //
 //    poly1 := P_4 + zsq*(P_5 + zsq*(P_6 + zsq*(P_7 + zsq*P_8)))
 //    poly2 := zsq*(P_1 + zsq*(P_2 + zsq*P_3))
@@ -449,11 +438,12 @@
 //
 // Step 5. Special Cases
 //
-//    These are detected early in the function by fclass instructions.
+//    If pred is 0 where pred is obtained in
+//        frcap E, pred, V, U
 //
-//    We are in one of those special cases when X or Y is 0,+-inf or NaN
+//    we are in one of those special cases of 0,+-inf or NaN
 //
-//    If one of X and Y is NaN, return X+Y (which will generate
+//    If one of U and V is NaN, return U+V (which will generate
 //    invalid in case one is a signaling NaN). Otherwise,
 //    return the Result as described in the table
 //
@@ -479,6 +469,8 @@
 //
 //
 
+#include "libm_support.h"
+
 ArgY_orig   =   f8
 Result      =   f8
 FR_RESULT   =   f8
@@ -512,7 +504,6 @@ Res_hi      =   f49
 Res_lo      =   f50
 Z           =   f52
 zsq         =   f53
-z4          =   f54
 z8          =   f54
 poly1       =   f55
 poly2       =   f56
@@ -530,8 +521,8 @@ P_5         =   f67
 P_6         =   f68
 P_7         =   f69
 P_8         =   f70
-U_hold      =   f71
-TWO_TO_NEG3 =   f72
+TWO_TO_NEG3 =   f71
+U_hold      =   f72
 C_hi_hold   =   f73
 E_hold      =   f74
 M           =   f75
@@ -539,11 +530,6 @@ ArgX_abs    =   f76
 ArgY_abs    =   f77
 Result_lo   =   f78
 A_temp      =   f79
-FR_temp     =   f80
-Xsq         =   f81
-Ysq         =   f82
-tmp_small   =   f83
-
 GR_SAVE_PFS   = r33
 GR_SAVE_B0    = r34
 GR_SAVE_GP    = r35
@@ -559,1399 +545,1415 @@ exp_ArgY      = r44
 exponent_Q    = r45 
 significand_Q = r46 
 special       = r47 
-sp_exp_Q      = r48 
-sp_exp_4sig_Q = r49 
-table_base    = r50 
-int_temp      = r51
-
+special1      = r48 
 GR_Parameter_X      = r49
 GR_Parameter_Y      = r50
 GR_Parameter_RESULT = r51
 GR_Parameter_TAG    = r52
-GR_temp             = r52
-
-RODATA
-.align 16 
-
-LOCAL_OBJECT_START(Constants_atan)
-//       double pi/2
-data8 0x3FF921FB54442D18
-//       single lo_pi/2, two**(-3)
-data4 0x248D3132, 0x3E000000
-data8 0xAAAAAAAAAAAAAAA3, 0xBFFD // P_1
-data8 0xCCCCCCCCCCCC54B2, 0x3FFC // P_2
-data8 0x9249249247E4D0C2, 0xBFFC // P_3
-data8 0xE38E38E058870889, 0x3FFB // P_4
-data8 0xBA2E895B290149F8, 0xBFFB // P_5
-data8 0x9D88E6D4250F733D, 0x3FFB // P_6
-data8 0x884E51FFFB8745A0, 0xBFFB // P_7
-data8 0xE1C7412B394396BD, 0x3FFA // P_8
-data8 0xAAAAAAAAAAAAA52F, 0xBFFD // Q_1
-data8 0xCCCCCCCCC75B60D3, 0x3FFC // Q_2
-data8 0x924923AD011F1940, 0xBFFC // Q_3
-data8 0xE36F716D2A5F89BD, 0x3FFB // Q_4
+int_temp            = r52
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+.align 64 
+
+Constants_atan:
+ASM_TYPE_DIRECTIVE(Constants_atan,@object)
+data4    0x54442D18, 0x3FF921FB, 0x248D3132, 0x3E000000
+//       double pi/2, single lo_pi/2, two**(-3)
+data4    0xAAAAAAA3, 0xAAAAAAAA, 0x0000BFFD, 0x00000000 // P_1
+data4    0xCCCC54B2, 0xCCCCCCCC, 0x00003FFC, 0x00000000 // P_2
+data4    0x47E4D0C2, 0x92492492, 0x0000BFFC, 0x00000000 // P_3
+data4    0x58870889, 0xE38E38E0, 0x00003FFB, 0x00000000 // P_4
+data4    0x290149F8, 0xBA2E895B, 0x0000BFFB, 0x00000000 // P_5
+data4    0x250F733D, 0x9D88E6D4, 0x00003FFB, 0x00000000 // P_6
+data4    0xFB8745A0, 0x884E51FF, 0x0000BFFB, 0x00000000 // P_7
+data4    0x394396BD, 0xE1C7412B, 0x00003FFA, 0x00000000 // P_8
+data4    0xAAAAA52F, 0xAAAAAAAA, 0x0000BFFD, 0x00000000 // Q_1
+data4    0xC75B60D3, 0xCCCCCCCC, 0x00003FFC, 0x00000000 // Q_2
+data4    0x011F1940, 0x924923AD, 0x0000BFFC, 0x00000000 // Q_3
+data4    0x2A5F89BD, 0xE36F716D, 0x00003FFB, 0x00000000 // Q_4
 //
 //    Entries Tbl_hi  (double precision)
 //    B = 1+Index/16+1/32  Index = 0
 //    Entries Tbl_lo (single precision)
 //    B = 1+Index/16+1/32  Index = 0
 //
-data8 0x3FE9A000A935BD8E 
-data4 0x23ACA08F, 0x00000000
+data4   0xA935BD8E, 0x3FE9A000, 0x23ACA08F, 0x00000000
 //
 //    Entries Tbl_hi  (double precision) Index = 0,1,...,15
 //    B = 2^(-1)*(1+Index/16+1/32)
 //    Entries Tbl_lo (single precision)
 //    Index = 0,1,...,15  B = 2^(-1)*(1+Index/16+1/32)
 //
-data8 0x3FDE77EB7F175A34 
-data4 0x238729EE, 0x00000000
-data8 0x3FE0039C73C1A40B 
-data4 0x249334DB, 0x00000000
-data8 0x3FE0C6145B5B43DA 
-data4 0x22CBA7D1, 0x00000000
-data8 0x3FE1835A88BE7C13 
-data4 0x246310E7, 0x00000000
-data8 0x3FE23B71E2CC9E6A 
-data4 0x236210E5, 0x00000000
-data8 0x3FE2EE628406CBCA 
-data4 0x2462EAF5, 0x00000000
-data8 0x3FE39C391CD41719 
-data4 0x24B73EF3, 0x00000000
-data8 0x3FE445065B795B55 
-data4 0x24C11260, 0x00000000
-data8 0x3FE4E8DE5BB6EC04 
-data4 0x242519EE, 0x00000000
-data8 0x3FE587D81F732FBA 
-data4 0x24D4346C, 0x00000000
-data8 0x3FE6220D115D7B8D 
-data4 0x24ED487B, 0x00000000
-data8 0x3FE6B798920B3D98 
-data4 0x2495FF1E, 0x00000000
-data8 0x3FE748978FBA8E0F 
-data4 0x223D9531, 0x00000000
-data8 0x3FE7D528289FA093 
-data4 0x242B0411, 0x00000000
-data8 0x3FE85D69576CC2C5 
-data4 0x2335B374, 0x00000000
-data8 0x3FE8E17AA99CC05D 
-data4 0x24C27CFB, 0x00000000
+data4   0x7F175A34, 0x3FDE77EB, 0x238729EE, 0x00000000
+data4   0x73C1A40B, 0x3FE0039C, 0x249334DB, 0x00000000
+data4   0x5B5B43DA, 0x3FE0C614, 0x22CBA7D1, 0x00000000
+data4   0x88BE7C13, 0x3FE1835A, 0x246310E7, 0x00000000
+data4   0xE2CC9E6A, 0x3FE23B71, 0x236210E5, 0x00000000
+data4   0x8406CBCA, 0x3FE2EE62, 0x2462EAF5, 0x00000000
+data4   0x1CD41719, 0x3FE39C39, 0x24B73EF3, 0x00000000
+data4   0x5B795B55, 0x3FE44506, 0x24C11260, 0x00000000
+data4   0x5BB6EC04, 0x3FE4E8DE, 0x242519EE, 0x00000000
+data4   0x1F732FBA, 0x3FE587D8, 0x24D4346C, 0x00000000
+data4   0x115D7B8D, 0x3FE6220D, 0x24ED487B, 0x00000000
+data4   0x920B3D98, 0x3FE6B798, 0x2495FF1E, 0x00000000
+data4   0x8FBA8E0F, 0x3FE74897, 0x223D9531, 0x00000000
+data4   0x289FA093, 0x3FE7D528, 0x242B0411, 0x00000000
+data4   0x576CC2C5, 0x3FE85D69, 0x2335B374, 0x00000000
+data4   0xA99CC05D, 0x3FE8E17A, 0x24C27CFB, 0x00000000
 //
 //    Entries Tbl_hi  (double precision) Index = 0,1,...,15
 //    B = 2^(-2)*(1+Index/16+1/32)
 //    Entries Tbl_lo (single precision)
 //    Index = 0,1,...,15  B = 2^(-2)*(1+Index/16+1/32)
 //
-data8 0x3FD025FA510665B5 
-data4 0x24263482, 0x00000000
-data8 0x3FD1151A362431C9
-data4 0x242C8DC9, 0x00000000
-data8 0x3FD2025567E47C95
-data4 0x245CF9BA, 0x00000000
-data8 0x3FD2ED987A823CFE
-data4 0x235C892C, 0x00000000
-data8 0x3FD3D6D129271134
-data4 0x2389BE52, 0x00000000
-data8 0x3FD4BDEE586890E6
-data4 0x24436471, 0x00000000
-data8 0x3FD5A2E0175E0F4E
-data4 0x2389DBD4, 0x00000000
-data8 0x3FD685979F5FA6FD
-data4 0x2476D43F, 0x00000000
-data8 0x3FD7660752817501
-data4 0x24711774, 0x00000000
-data8 0x3FD84422B8DF95D7
-data4 0x23EBB501, 0x00000000
-data8 0x3FD91FDE7CD0C662
-data4 0x23883A0C, 0x00000000
-data8 0x3FD9F93066168001
-data4 0x240DF63F, 0x00000000
-data8 0x3FDAD00F5422058B
-data4 0x23FE261A, 0x00000000
-data8 0x3FDBA473378624A5
-data4 0x23A8CD0E, 0x00000000
-data8 0x3FDC76550AAD71F8
-data4 0x2422D1D0, 0x00000000
-data8 0x3FDD45AEC9EC862B
-data4 0x2344A109, 0x00000000
+data4    0x510665B5, 0x3FD025FA, 0x24263482, 0x00000000
+data4    0x362431C9, 0x3FD1151A, 0x242C8DC9, 0x00000000
+data4    0x67E47C95, 0x3FD20255, 0x245CF9BA, 0x00000000
+data4    0x7A823CFE, 0x3FD2ED98, 0x235C892C, 0x00000000
+data4    0x29271134, 0x3FD3D6D1, 0x2389BE52, 0x00000000
+data4    0x586890E6, 0x3FD4BDEE, 0x24436471, 0x00000000
+data4    0x175E0F4E, 0x3FD5A2E0, 0x2389DBD4, 0x00000000
+data4    0x9F5FA6FD, 0x3FD68597, 0x2476D43F, 0x00000000
+data4    0x52817501, 0x3FD76607, 0x24711774, 0x00000000
+data4    0xB8DF95D7, 0x3FD84422, 0x23EBB501, 0x00000000
+data4    0x7CD0C662, 0x3FD91FDE, 0x23883A0C, 0x00000000
+data4    0x66168001, 0x3FD9F930, 0x240DF63F, 0x00000000
+data4    0x5422058B, 0x3FDAD00F, 0x23FE261A, 0x00000000
+data4    0x378624A5, 0x3FDBA473, 0x23A8CD0E, 0x00000000
+data4    0x0AAD71F8, 0x3FDC7655, 0x2422D1D0, 0x00000000
+data4    0xC9EC862B, 0x3FDD45AE, 0x2344A109, 0x00000000
 //
 //    Entries Tbl_hi  (double precision) Index = 0,1,...,15
 //    B = 2^(-3)*(1+Index/16+1/32)
 //    Entries Tbl_lo (single precision)
 //    Index = 0,1,...,15  B = 2^(-3)*(1+Index/16+1/32)
 //
-data8 0x3FC068D584212B3D
-data4 0x239874B6, 0x00000000
-data8 0x3FC1646541060850
-data4 0x2335E774, 0x00000000
-data8 0x3FC25F6E171A535C
-data4 0x233E36BE, 0x00000000
-data8 0x3FC359E8EDEB99A3
-data4 0x239680A3, 0x00000000
-data8 0x3FC453CEC6092A9E
-data4 0x230FB29E, 0x00000000
-data8 0x3FC54D18BA11570A
-data4 0x230C1418, 0x00000000
-data8 0x3FC645BFFFB3AA73
-data4 0x23F0564A, 0x00000000
-data8 0x3FC73DBDE8A7D201
-data4 0x23D4A5E1, 0x00000000
-data8 0x3FC8350BE398EBC7
-data4 0x23D4ADDA, 0x00000000
-data8 0x3FC92BA37D050271
-data4 0x23BCB085, 0x00000000
-data8 0x3FCA217E601081A5
-data4 0x23BC841D, 0x00000000
-data8 0x3FCB1696574D780B
-data4 0x23CF4A8E, 0x00000000
-data8 0x3FCC0AE54D768466
-data4 0x23BECC90, 0x00000000
-data8 0x3FCCFE654E1D5395
-data4 0x2323DCD2, 0x00000000
-data8 0x3FCDF110864C9D9D
-data4 0x23F53F3A, 0x00000000
-data8 0x3FCEE2E1451D980C
-data4 0x23CCB11F, 0x00000000
-//
-data8 0x400921FB54442D18, 0x3CA1A62633145C07 // PI two doubles
-data8 0x3FF921FB54442D18, 0x3C91A62633145C07 // PI_by_2 two dbles
-data8 0x3FE921FB54442D18, 0x3C81A62633145C07 // PI_by_4 two dbles
-data8 0x4002D97C7F3321D2, 0x3C9A79394C9E8A0A // 3PI_by_4 two dbles
-LOCAL_OBJECT_END(Constants_atan)
-
-
-.section .text
-GLOBAL_IEEE754_ENTRY(atanl)
-
-// Use common code with atan2l after setting x=1.0
-{ .mfi
-      alloc r32 = ar.pfs, 0, 17, 4, 0
-      fma.s1 Ysq = ArgY_orig, ArgY_orig, f0          // Form y*y
-      nop.i 999
-}
-{ .mfi
-      addl table_ptr1 = @ltoff(Constants_atan#), gp  // Address of table pointer
-      fma.s1 Xsq = f1, f1, f0                        // Form x*x
-      nop.i 999
-}
-;;
-
+data4    0x84212B3D, 0x3FC068D5, 0x239874B6, 0x00000000
+data4    0x41060850, 0x3FC16465, 0x2335E774, 0x00000000
+data4    0x171A535C, 0x3FC25F6E, 0x233E36BE, 0x00000000
+data4    0xEDEB99A3, 0x3FC359E8, 0x239680A3, 0x00000000
+data4    0xC6092A9E, 0x3FC453CE, 0x230FB29E, 0x00000000
+data4    0xBA11570A, 0x3FC54D18, 0x230C1418, 0x00000000
+data4    0xFFB3AA73, 0x3FC645BF, 0x23F0564A, 0x00000000
+data4    0xE8A7D201, 0x3FC73DBD, 0x23D4A5E1, 0x00000000
+data4    0xE398EBC7, 0x3FC8350B, 0x23D4ADDA, 0x00000000
+data4    0x7D050271, 0x3FC92BA3, 0x23BCB085, 0x00000000
+data4    0x601081A5, 0x3FCA217E, 0x23BC841D, 0x00000000
+data4    0x574D780B, 0x3FCB1696, 0x23CF4A8E, 0x00000000
+data4    0x4D768466, 0x3FCC0AE5, 0x23BECC90, 0x00000000
+data4    0x4E1D5395, 0x3FCCFE65, 0x2323DCD2, 0x00000000
+data4    0x864C9D9D, 0x3FCDF110, 0x23F53F3A, 0x00000000
+data4    0x451D980C, 0x3FCEE2E1, 0x23CCB11F, 0x00000000
+
+data4    0x54442D18, 0x400921FB, 0x33145C07, 0x3CA1A626 // PI two doubles
+data4    0x54442D18, 0x3FF921FB, 0x33145C07, 0x3C91A626 // PI_by_2 two dbles
+data4    0x54442D18, 0x3FE921FB, 0x33145C07, 0x3C81A626 // PI_by_4 two dbles
+data4    0x7F3321D2, 0x4002D97C, 0x4C9E8A0A, 0x3C9A7939 // 3PI_by_4 two dbles
+ASM_SIZE_DIRECTIVE(Constants_atan)
+
+
+.text
+.proc atanl#
+.global atanl#
+.align 64
+
+atanl: 
+{ .mfb
+	nop.m 999
+(p0)   mov ArgX_orig = f1 
+(p0)   br.cond.sptk atan2l ;;
+}
+.endp atanl
+ASM_SIZE_DIRECTIVE(atanl)
+
+.text
+.proc atan2l#
+.global atan2l#
+#ifdef _LIBC
+.proc __atan2l#
+.global __atan2l#
+.proc __ieee754_atan2l#
+.global __ieee754_atan2l#
+#endif
+.align 64 
+
+
+atan2l:
+#ifdef _LIBC
+__atan2l:
+__ieee754_atan2l:
+#endif
+{ .mfi
+alloc r32 = ar.pfs, 0, 17 , 4, 0
+(p0)  mov   ArgY = ArgY_orig
+}
+{ .mfi
+	nop.m 999
+(p0)  mov   ArgX = ArgX_orig
+	nop.i 999
+};;
 { .mfi
-      ld8 table_ptr1 = [table_ptr1]                  // Get table pointer
-      fnorm.s1 ArgY = ArgY_orig
-      nop.i 999
+	nop.m 999
+(p0)   fclass.m.unc p7,p0 = ArgY_orig, 0x103
+	nop.i 999 
 }
 { .mfi
-      nop.m 999
-      fnorm.s1 ArgX = f1
-      nop.i 999
-}
-;;
-
+	nop.m 999
+//
+//
+//  Save original input args and load table ptr.
+//
+(p0)   fclass.m.unc p6,p0 = ArgX_orig, 0x103
+	nop.i 999
+};;
 { .mfi
-      getf.exp sign_X = f1               // Get signexp of x
-      fmerge.s ArgX_abs = f0, f1         // Form |x|
-      nop.i 999
+(p0)   addl      table_ptr1   = @ltoff(Constants_atan#), gp
+(p0)   fclass.m.unc p0,p9 = ArgY_orig, 0x1FF
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-      fnorm.s1 ArgX_orig = f1
-      nop.i 999
+       ld8 table_ptr1 = [table_ptr1]
+(p0)   fclass.m.unc p0,p8 = ArgX_orig, 0x1FF
+	nop.i 999
 }
-;;
-
 { .mfi
-      getf.exp sign_Y = ArgY_orig        // Get signexp of y
-      fmerge.s ArgY_abs = f0, ArgY_orig  // Form |y|
-      mov table_base = table_ptr1        // Save base pointer to tables
+	nop.m 999
+(p0)   fclass.m.unc p13,p0 = ArgY_orig, 0x0C3
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      ldfd P_hi = [table_ptr1],8         // Load double precision hi part of pi
-      fclass.m p8,p0 = ArgY_orig, 0x1e7  // Test y natval, nan, inf, zero
-      nop.i 999 
+(p0)   fclass.m.unc p12,p0 = ArgX_orig, 0x0C3
+	nop.i 999
 }
-;;
 
-{ .mfi
-      ldfps P_lo, TWO_TO_NEG3 = [table_ptr1], 8 // Load P_lo and constant 2^-3
-      nop.f 999 
-      nop.i 999 
-}
-{ .mfi
-      nop.m 999
-      fma.s1 M = f1, f1, f0              // Set M = 1.0
-      nop.i 999 
-}
-;;
 
 //
+//     Check for NatVals.
 //     Check for everything - if false, then must be pseudo-zero
 //     or pseudo-nan (IA unsupporteds).
 //
-{ .mfb
-      nop.m 999
-      fclass.m p0,p12 = f1, 0x1FF        // Test x unsupported
-(p8)  br.cond.spnt ATANL_Y_SPECIAL       // Branch if y natval, nan, inf, zero
+{ .mib
+	nop.m 999
+	nop.i 999
+(p6)   br.cond.spnt L(ATANL_NATVAL) ;;
 }
-;;
 
-//     U = max(ArgX_abs,ArgY_abs)
-//     V = min(ArgX_abs,ArgY_abs)
-{ .mfi
-      nop.m 999
-      fcmp.ge.s1 p6,p7 = Xsq, Ysq        // Test for |x| >= |y| using squares
-      nop.i 999 
+{ .mib
+	nop.m 999
+	nop.i 999
+(p7)   br.cond.spnt L(ATANL_NATVAL) ;;
 }
-{ .mfb
-      nop.m 999
-      fma.s1 V = ArgX_abs, f1, f0        // Set V assuming |x| < |y|
-      br.cond.sptk ATANL_COMMON          // Branch to common code
+{ .mib
+(p0)   ldfd P_hi = [table_ptr1],8
+	nop.i 999
+(p8)   br.cond.spnt L(ATANL_UNSUPPORTED) ;;
 }
-;;
-
-GLOBAL_IEEE754_END(atanl)
-GLOBAL_IEEE754_ENTRY(atan2l)
-
-{ .mfi
-      alloc r32 = ar.pfs, 0, 17, 4, 0
-      fma.s1 Ysq = ArgY_orig, ArgY_orig, f0          // Form y*y
-      nop.i 999
+{ .mbb
+(p0)   add table_ptr2 = 96, table_ptr1
+(p9)   br.cond.spnt L(ATANL_UNSUPPORTED)
+//
+//     Load double precision high-order part of pi
+//
+(p12)  br.cond.spnt L(ATANL_NAN) ;;
 }
-{ .mfi
-      addl table_ptr1 = @ltoff(Constants_atan#), gp  // Address of table pointer
-      fma.s1 Xsq = ArgX_orig, ArgX_orig, f0          // Form x*x
-      nop.i 999
+{ .mfb
+	nop.m 999
+(p0)   fnorm.s1 ArgX = ArgX
+(p13)  br.cond.spnt L(ATANL_NAN) ;;
 }
-;;
-
-{ .mfi
-      ld8 table_ptr1 = [table_ptr1]                  // Get table pointer
-      fnorm.s1 ArgY = ArgY_orig
-      nop.i 999
+//
+//     Normalize the input argument.
+//     Branch out if NaN inputs
+//
+{ .mmf
+(p0)   ldfs P_lo = [table_ptr1], 4
+	nop.m 999
+(p0)   fnorm.s1 ArgY = ArgY ;;
 }
-{ .mfi
-      nop.m 999
-      fnorm.s1 ArgX = ArgX_orig
-      nop.i 999
+{ .mmf
+	nop.m 999
+(p0)   ldfs TWO_TO_NEG3 = [table_ptr1], 180
+//
+//     U = max(ArgX_abs,ArgY_abs)
+//     V = min(ArgX_abs,ArgY_abs)
+//     if PR1, swap = 0
+//     if PR2, swap = 1
+//
+(p0)   mov M = f1 ;;
 }
-;;
-
 { .mfi
-      getf.exp sign_X = ArgX_orig        // Get signexp of x
-      fmerge.s ArgX_abs = f0, ArgX_orig  // Form |x|
-      nop.i 999
+	nop.m 999
+//
+//     Get exp and sign of ArgX
+//     Get exp and sign of ArgY
+//     Load 2**(-3) and increment ptr to Q_4.
+//
+(p0)   fmerge.s ArgX_abs = f1, ArgX
+	nop.i 999 ;;
 }
-;;
-
+//
+//     load single precision low-order part of pi = P_lo
+//
 { .mfi
-      getf.exp sign_Y = ArgY_orig        // Get signexp of y
-      fmerge.s ArgY_abs = f0, ArgY_orig  // Form |y|
-      mov table_base = table_ptr1        // Save base pointer to tables
+(p0)   getf.exp sign_X = ArgX
+(p0)   fmerge.s ArgY_abs = f1, ArgY
+	nop.i 999 ;;
 }
-;;
-
-{ .mfi
-      ldfd P_hi = [table_ptr1],8         // Load double precision hi part of pi
-      fclass.m p8,p0 = ArgY_orig, 0x1e7  // Test y natval, nan, inf, zero
-      nop.i 999 
+{ .mii
+(p0)   getf.exp sign_Y = ArgY
+	nop.i 999 ;;
+(p0)   shr sign_X = sign_X, 17 ;;
 }
-;;
-
-{ .mfi
-      ldfps P_lo, TWO_TO_NEG3 = [table_ptr1], 8 // Load P_lo and constant 2^-3
-      fclass.m p9,p0 = ArgX_orig, 0x1e7  // Test x natval, nan, inf, zero
-      nop.i 999 
+{ .mii
+	nop.m 999
+(p0)   shr sign_Y = sign_Y, 17 ;;
+(p0)   cmp.eq.unc p8, p9 = 0x00000, sign_Y ;;
 }
 { .mfi
-      nop.m 999
-      fma.s1 M = f1, f1, f0              // Set M = 1.0
-      nop.i 999 
-}
-;;
-
+	nop.m 999
 //
-//     Check for everything - if false, then must be pseudo-zero
-//     or pseudo-nan (IA unsupporteds).
+//     Is ArgX_abs >= ArgY_abs
+//     Is sign_Y == 0?
 //
-{ .mfb
-      nop.m 999
-      fclass.m p0,p12 = ArgX_orig, 0x1FF // Test x unsupported
-(p8)  br.cond.spnt ATANL_Y_SPECIAL       // Branch if y natval, nan, inf, zero
+(p0)   fmax.s1 U = ArgX_abs, ArgY_abs
+	nop.i 999
 }
-;;
-
-//     U = max(ArgX_abs,ArgY_abs)
-//     V = min(ArgX_abs,ArgY_abs)
 { .mfi
-      nop.m 999
-      fcmp.ge.s1 p6,p7 = Xsq, Ysq        // Test for |x| >= |y| using squares
-      nop.i 999 
-}
-{ .mfb
-      nop.m 999
-      fma.s1 V = ArgX_abs, f1, f0        // Set V assuming |x| < |y|
-(p9)  br.cond.spnt ATANL_X_SPECIAL       // Branch if x natval, nan, inf, zero
-}
-;;
-
-// Now common code for atanl and atan2l
-ATANL_COMMON:
-{ .mfi
-      nop.m 999
-      fclass.m p0,p13 = ArgY_orig, 0x1FF // Test y unsupported
-      shr sign_X = sign_X, 17            // Get sign bit of x
+	nop.m 999
+//
+//     ArgX_abs = |ArgX|
+//     ArgY_abs = |ArgY|
+//     sign_X is sign bit of ArgX
+//     sign_Y is sign bit of ArgY
+//
+(p0)   fcmp.ge.s1 p6, p7 = ArgX_abs, ArgY_abs
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-      fma.s1 U = ArgY_abs, f1, f0        // Set U assuming |x| < |y|
-      adds table_ptr1 = 176, table_ptr1  // Point to Q4
+	nop.m 999
+(p0)   fmin.s1 V = ArgX_abs, ArgY_abs
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-(p6)  add swap = r0, r0                  // Set swap=0 if |x| >= |y|
-(p6)  frcpa.s1 E, p0 = ArgY_abs, ArgX_abs // Compute E if |x| >= |y|
-      shr sign_Y = sign_Y, 17            // Get sign bit of y
+	nop.m 999
+(p8)   fadd.s1 s_Y = f0, f1
+(p6)   cmp.eq.unc p10, p11 = 0x00000, sign_X
 }
-{ .mfb
-      nop.m 999
-(p6)  fma.s1 V = ArgY_abs, f1, f0        // Set V if |x| >= |y|
-(p12) br.cond.spnt ATANL_UNSUPPORTED     // Branch if x unsupported
+{ .mii
+(p6)   add swap = r0, r0
+	nop.i 999 ;;
+(p7)   add swap = 1, r0
 }
-;;
-
-// Set p8 if y >=0
-// Set p9 if y < 0
-// Set p10 if |x| >= |y| and x >=0
-// Set p11 if |x| >= |y| and x < 0
 { .mfi
-      cmp.eq p8, p9 = 0, sign_Y          // Test for y >= 0
-(p7)  frcpa.s1 E, p0 = ArgX_abs, ArgY_abs // Compute E if |x| < |y|
-(p7)  add swap = 1, r0                   // Set swap=1 if |x| < |y|
-}
-{ .mfb
-(p6)  cmp.eq.unc p10, p11 = 0, sign_X    // If |x| >= |y|, test for x >= 0
-(p6)  fma.s1 U = ArgX_abs, f1, f0        // Set U if |x| >= |y|
-(p13) br.cond.spnt ATANL_UNSUPPORTED     // Branch if y unsupported
-}
-;;
-
+	nop.m 999
 //
+//     Let M = 1.0
 //     if p8, s_Y = 1.0
 //     if p9, s_Y = -1.0
 //
-.pred.rel "mutex",p8,p9
+(p10)  fsub.s1 M = M, f1
+	nop.i 999 ;;
+}
 { .mfi
-      nop.m 999
-(p8)  fadd.s1 s_Y = f0, f1               // If y >= 0 set s_Y = 1.0
-      nop.i 999
+	nop.m 999
+(p9)   fsub.s1 s_Y = f0, f1
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p9)  fsub.s1 s_Y = f0, f1               // If y < 0 set s_Y = -1.0
-      nop.i 999
+	nop.m 999
+(p0)   frcpa.s1 E, p6 = V, U
+	nop.i 999 ;;
 }
-;;
-
-.pred.rel "mutex",p10,p11
+{ .mbb
+	nop.m 999
+//
+//     E = frcpa(V,U)
+//
+(p6)   br.cond.sptk L(ATANL_STEP2)
+(p0)   br.cond.spnt L(ATANL_SPECIAL_HANDLING) ;;
+}
+L(ATANL_STEP2): 
 { .mfi
-      nop.m 999
-(p10) fsub.s1 M = M, f1                  // If |x| >= |y| and x >=0, set M=0
-      nop.i 999
+	nop.m 999
+(p0)   fmpy.s1 Q = E, V
+	nop.i 999
 }
 { .mfi
-      nop.m 999
-(p11) fadd.s1 M = M, f1                  // If |x| >= |y| and x < 0, set M=2.0
-      nop.i 999
+	nop.m 999
+(p0)   fcmp.eq.s0     p0, p9 = f1, ArgY_orig
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fcmp.eq.s0 p0, p9 = ArgX_orig, ArgY_orig // Dummy to set denormal flag
-      nop.i 999
+	nop.m 999
+//
+//     Is Q < 2**(-3)?
+//
+(p0)   fcmp.eq.s0     p0, p8 = f1, ArgX_orig
+	nop.i 999
+}
+{ .mfi
+	nop.m 999
+(p11)  fadd.s1 M = M, f1
+	nop.i 999 ;;
 }
+{ .mlx
+	nop.m 999
 // *************************************************
 // ********************* STEP2 *********************
 // *************************************************
+(p0)   movl special = 0x8400000000000000
+}
+{ .mlx
+	nop.m 999
 //
-//     Q = E * V
+//     lookup = b_1 b_2 b_3 B_4
 //
-{ .mfi
-      nop.m 999
-      fmpy.s1 Q = E, V
-      nop.i 999
+(p0)   movl special1 = 0x0000000000000100 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fnma.s1 E_hold = E, U, f1           // E_hold = 1.0 - E*U (1) if POLY path
-      nop.i 999
-}
-;;
-
-// Create a single precision representation of the signexp of Q with the 
-// 4 most significant bits of the significand followed by a 1 and then 18 0's
-{ .mfi
-      nop.m 999
-      fmpy.s1 P_hi = M, P_hi
-      dep.z special = 0x1, 18, 1           // Form 0x0000000000040000
-}
-{ .mfi
-      nop.m 999
-      fmpy.s1 P_lo = M, P_lo
-      add table_ptr2 = 32, table_ptr1
+	nop.m 999
+//
+//     Do fnorms to raise any denormal operand
+//     exceptions.
+//
+(p0)   fmpy.s1 P_hi = M, P_hi
+	nop.i 999
 }
-;;
-
 { .mfi
-      nop.m 999
-      fma.s1 A_temp = Q, f1, f0            // Set A_temp if POLY path
-      nop.i 999
+	nop.m 999
+(p0)   fmpy.s1 P_lo = M, P_lo
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-      fma.s1 E = E, E_hold, E              // E = E + E*E_hold (1) if POLY path
-      nop.i 999
-}
-;;
-
+	nop.m 999
 //
-//     Is Q < 2**(-3)?
-//     swap = xor(swap,sign_X)
+//     Q = E * V
 //
-{ .mfi
-      nop.m 999
-      fcmp.lt.s1 p9, p0 = Q, TWO_TO_NEG3    // Test Q < 2^-3
-      xor swap = sign_X, swap
+(p0)   fcmp.lt.unc.s1 p6, p7 = Q, TWO_TO_NEG3
+	nop.i 999 ;;
 }
-;;
-
-//     P_hi = s_Y * P_hi
-{ .mmf
-      getf.exp exponent_Q =  Q              // Get signexp of Q
-      cmp.eq.unc p7, p6 = 0x00000, swap
-      fmpy.s1 P_hi = s_Y, P_hi
+{ .mmb
+(p0)   getf.sig significand_Q = Q
+(p0)   getf.exp exponent_Q =  Q
+	nop.b 999 ;;
 }
-;;
-
+{ .mmi
+	nop.m 999 ;;
+(p0)   andcm k = 0x0003, exponent_Q
+(p0)   extr.u lookup = significand_Q, 59, 4 ;;
+}
+{ .mib
+	nop.m 999
+(p0)   dep special = lookup, special, 59, 4
 //
-//     if (PR_1) sigma = -1.0
-//     if (PR_2) sigma =  1.0
+//     Generate 1.b_1 b_2 b_3 b_4 1 0 0 0 ... 0
 //
-{ .mfi
-      getf.sig significand_Q = Q            // Get significand of Q
-(p6)  fsub.s1 sigma = f0, f1
-      nop.i 999
-}
-{ .mfb
-(p9)  add table_ptr1 = 128, table_base      // Point to P8 if POLY path
-(p7)  fadd.s1 sigma = f0, f1
-(p9)  br.cond.spnt ATANL_POLY               // Branch to POLY if 0 < Q < 2^-3
+(p6)   br.cond.spnt L(ATANL_POLY) ;;
 }
-;;
-
+{ .mfi
+(p0)   cmp.eq.unc p8, p9 = 0x0000, k
+(p0)   fmpy.s1 P_hi = s_Y, P_hi
+//
+//     We waited a few extra cycles so P_lo and P_hi could be calculated.
+//     Load the constant 256 for loading up table entries.
 //
 // *************************************************
 // ******************** STEP3 **********************
 // *************************************************
+(p0)   add table_ptr2 = 16, table_ptr1
+}
 //
-//     lookup = b_1 b_2 b_3 B_4
+//     Let z_hi have exponent and sign of original Q
+//     Load the Tbl_hi(0) else, increment pointer.
 //
+{ .mii
+(p0)   ldfe Q_4 = [table_ptr1], -16
+(p0)   xor swap = sign_X, swap ;;
+(p9)   sub k = k, r0, 1
+}
 { .mmi
-      nop.m 999
-      nop.m 999
-      andcm k = 0x0003, exponent_Q  // k=0,1,2,3 for exp_Q=0,-1,-2,-3
+(p0)   setf.sig z_hi = special
+(p0)   ldfe Q_3 = [table_ptr1], -16
+(p9)   add table_ptr2 = 16, table_ptr2 ;;
 }
-;;
-
 //
-//  Generate sign_exp_Q b_1 b_2 b_3 b_4 1 0 0 0 ... 0  in single precision 
-//  representation.  Note sign of Q is always 0.
+//     U_hold = U - U_prime_hi
+//     k = k * 256 - Result can be 0, 256, or 512.
 //
-{ .mfi
-      cmp.eq p8, p9 = 0x0000, k             // Test k=0
-      nop.f 999
-      extr.u lookup = significand_Q, 59, 4  // Extract b_1 b_2 b_3 b_4 for index
+{ .mmb
+(p0)   ldfe Q_2 = [table_ptr1], -16
+(p8)   ldfd Tbl_hi = [table_ptr2], 8
+	nop.b 999 ;;
 }
-{ .mfi
-      sub sp_exp_Q = 0x7f, k                // Form single prec biased exp of Q
-      nop.f 999
-      sub k = k, r0, 1                      // Decrement k
+//
+//     U_prime_lo =  U_hold + V * z_hi
+//     lookup -> lookup * 16 + k
+//
+{ .mmi
+(p0)   ldfe Q_1 = [table_ptr1], -16 ;;
+(p8)   ldfs Tbl_lo = [table_ptr2], 8
+//
+//     U_prime_hi = U + V * z_hi
+//     Load the Tbl_lo(0)
+//
+(p9)   pmpy2.r k = k, special1 ;;
 }
-;;
-
-//     Form pointer to B index table
-{ .mfi
-      ldfe Q_4 = [table_ptr1], -16          // Load Q_4
-      nop.f 999
-(p9)  shl k = k, 8                          // k = 0, 256, or 512
+{ .mii
+	nop.m 999
+	nop.i 999 
+	nop.i 999 ;;
 }
-{ .mfi
-(p9)  shladd table_ptr2 = lookup, 4, table_ptr2
-      nop.f 999
-      shladd sp_exp_4sig_Q = sp_exp_Q, 4, lookup // Shift and add in 4 high bits
+{ .mii
+	nop.m 999
+	nop.i 999 
+	nop.i 999 ;;
 }
-;;
-
-{ .mmi
-(p8)  add table_ptr2 = -16, table_ptr2      // Pointer if original k was 0
-(p9)  add table_ptr2 = k, table_ptr2        // Pointer if k was 1, 2, 3
-      dep special = sp_exp_4sig_Q, special, 19, 13 // Form z_hi as single prec
+{ .mii
+	nop.m 999
+	nop.i 999 
+	nop.i 999 ;;
 }
-;;
-
-//     z_hi = s exp 1.b_1 b_2 b_3 b_4 1 0 0 0 ... 0
-{ .mmi
-      ldfd Tbl_hi = [table_ptr2], 8         // Load Tbl_hi from index table
-;;
-      setf.s z_hi = special                 // Form z_hi
-      nop.i 999
+{ .mii
+	nop.m 999
+	nop.i 999 ;;
+(p9)   shladd lookup = lookup, 0x0004, k ;;
 }
 { .mmi
-      ldfs Tbl_lo = [table_ptr2], 8         // Load Tbl_lo from index table
-;;
-      ldfe Q_3 = [table_ptr1], -16          // Load Q_3
-      nop.i 999
-}
-;;
-
-{ .mmi
-      ldfe Q_2 = [table_ptr1], -16          // Load Q_2
-      nop.m 999
-      nop.i 999
+(p9)   add table_ptr2 = table_ptr2, lookup ;;
+//
+//     V_prime =  V - U * z_hi
+//
+(p9)   ldfd Tbl_hi = [table_ptr2], 8
+	nop.i 999 ;;
 }
-;;
-
 { .mmf
-      ldfe Q_1 = [table_ptr1], -16          // Load Q_1
-      nop.m 999
-      nop.f 999
+	nop.m 999
+//
+//     C_hi = frcpa(1,U_prime_hi)
+//
+(p9)   ldfs Tbl_lo = [table_ptr2], 8
+//
+//     z_hi = s exp 1.b_1 b_2 b_3 b_4 1 0 0 0 ... 0
+//     Point to beginning of Tbl_hi entries - k = 0.
+//
+(p0)   fmerge.se z_hi = Q, z_hi ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fma.s1 U_prime_hi = V, z_hi, U        // U_prime_hi = U + V * z_hi
-      nop.i 999
+	nop.m 999
+(p0)   fma.s1 U_prime_hi = V, z_hi, U
+	nop.i 999
 }
 { .mfi
-      nop.m 999
-      fnma.s1 V_prime = U, z_hi, V          // V_prime =  V - U * z_hi
-      nop.i 999
+	nop.m 999
+(p0)   fnma.s1 V_prime = U, z_hi, V
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      mov A_hi = Tbl_hi                     // Start with A_hi = Tbl_hi
-      nop.i 999
+	nop.m 999
+(p0)   mov A_hi = Tbl_hi
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fsub.s1 U_hold = U, U_prime_hi        // U_hold = U - U_prime_hi
-      nop.i 999
+	nop.m 999
+(p0)   fsub.s1 U_hold = U, U_prime_hi
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      frcpa.s1 C_hi, p0 = f1, U_prime_hi    // C_hi = frcpa(1,U_prime_hi)
-      nop.i 999
+	nop.m 999
+(p0)   frcpa.s1 C_hi, p6 = f1, U_prime_hi
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fmpy.s1 A_hi = s_Y, A_hi              // A_hi = s_Y * A_hi
-      nop.i 999
+(p0)   cmp.eq.unc p7, p6 = 0x00000, swap
+(p0)   fmpy.s1 A_hi = s_Y, A_hi
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fma.s1 U_prime_lo = z_hi, V, U_hold   // U_prime_lo =  U_hold + V * z_hi
-      nop.i 999
+	nop.m 999
+//
+//     poly = wsq * poly
+//
+(p7)   fadd.s1 sigma = f0, f1
+	nop.i 999 ;;
 }
-;;
-
-//     C_hi_hold = 1 - C_hi * U_prime_hi (1)
 { .mfi
-      nop.m 999
-      fnma.s1 C_hi_hold = C_hi, U_prime_hi, f1 
-      nop.i 999
+	nop.m 999
+(p0)   fma.s1 U_prime_lo = z_hi, V, U_hold
+	nop.i 999
 }
-;;
-
 { .mfi
-      nop.m 999
-      fma.s1 Res_hi = sigma, A_hi, P_hi   // Res_hi = P_hi + sigma * A_hi
-      nop.i 999
+	nop.m 999
+(p6)   fsub.s1 sigma = f0, f1
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fma.s1 C_hi = C_hi_hold, C_hi, C_hi // C_hi = C_hi + C_hi * C_hi_hold (1)
-      nop.i 999
+	nop.m 999
+(p0)   fnma.s1 C_hi_hold = C_hi, U_prime_hi, f1
+	nop.i 999 ;;
 }
-;;
-
-//     C_hi_hold = 1 - C_hi * U_prime_hi (2)
 { .mfi
-      nop.m 999
-      fnma.s1 C_hi_hold = C_hi, U_prime_hi, f1
-      nop.i 999
+	nop.m 999
+//
+//     A_lo = A_lo + w_hi
+//     A_hi = s_Y * A_hi
+//
+(p0)   fma.s1 Res_hi = sigma, A_hi, P_hi
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fma.s1 C_hi = C_hi_hold, C_hi, C_hi // C_hi = C_hi + C_hi * C_hi_hold (2)
-      nop.i 999
+	nop.m 999
+//
+//     C_hi_hold = 1 - C_hi * U_prime_hi (1)
+//
+(p0)   fma.s1 C_hi = C_hi_hold, C_hi, C_hi
+	nop.i 999 ;;
 }
-;;
-
-//     C_hi_hold = 1 - C_hi * U_prime_hi (3)
 { .mfi
-      nop.m 999
-      fnma.s1 C_hi_hold = C_hi, U_prime_hi, f1 
-      nop.i 999
+	nop.m 999
+//
+//     C_hi = C_hi + C_hi * C_hi_hold    (1)
+//
+(p0)   fnma.s1 C_hi_hold = C_hi, U_prime_hi, f1
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fma.s1 C_hi = C_hi_hold, C_hi, C_hi // C_hi = C_hi + C_hi * C_hi_hold (3)
-      nop.i 999
+	nop.m 999
+//
+//     C_hi_hold = 1 - C_hi * U_prime_hi (2)
+//
+(p0)   fma.s1 C_hi = C_hi_hold, C_hi, C_hi
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fmpy.s1 w_hi = V_prime, C_hi           // w_hi = V_prime * C_hi
-      nop.i 999
+	nop.m 999
+//
+//     C_hi = C_hi + C_hi * C_hi_hold    (2)
+//
+(p0)   fnma.s1 C_hi_hold = C_hi, U_prime_hi, f1
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fmpy.s1 wsq = w_hi, w_hi               // wsq = w_hi * w_hi
-      nop.i 999
+	nop.m 999
+//
+//     C_hi_hold = 1 - C_hi * U_prime_hi (3)
+//
+(p0)   fma.s1 C_hi = C_hi_hold, C_hi, C_hi
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-      fnma.s1 w_lo = w_hi, U_prime_hi, V_prime // w_lo = V_prime-w_hi*U_prime_hi
-      nop.i 999
+	nop.m 999
+//
+//     C_hi = C_hi + C_hi * C_hi_hold    (3)
+//
+(p0)   fmpy.s1 w_hi = V_prime, C_hi
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fma.s1 poly =  wsq, Q_4, Q_3           // poly = Q_3 + wsq * Q_4
-      nop.i 999
+	nop.m 999
+//
+//     w_hi = V_prime * C_hi
+//
+(p0)   fmpy.s1 wsq = w_hi, w_hi
+	nop.i 999
 }
 { .mfi
-      nop.m 999
-      fnma.s1 w_lo = w_hi, U_prime_lo, w_lo  // w_lo = w_lo - w_hi * U_prime_lo
-      nop.i 999
+	nop.m 999
+(p0)   fnma.s1 w_lo = w_hi, U_prime_hi, V_prime
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fma.s1 poly = wsq, poly, Q_2           // poly = Q_2 + wsq * poly
-      nop.i 999
+	nop.m 999
+//
+//     wsq = w_hi * w_hi
+//     w_lo =  = V_prime - w_hi * U_prime_hi
+//
+(p0)   fma.s1 poly =  wsq, Q_4, Q_3
+	nop.i 999
 }
 { .mfi
-      nop.m 999
-      fmpy.s1 w_lo = C_hi, w_lo              // w_lo =  = w_lo * C_hi
-      nop.i 999
+	nop.m 999
+(p0)   fnma.s1 w_lo = w_hi, U_prime_lo, w_lo
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fma.s1 poly = wsq, poly, Q_1           // poly = Q_1 + wsq * poly
-      nop.i 999
+	nop.m 999
+//
+//     poly = Q_3 + wsq * Q_4
+//     w_lo =  = w_lo - w_hi * U_prime_lo
+//
+(p0)   fma.s1 poly = wsq, poly, Q_2
+	nop.i 999
 }
 { .mfi
-      nop.m 999
-      fadd.s1 A_lo = Tbl_lo, w_lo            // A_lo = Tbl_lo + w_lo
-      nop.i 999
+	nop.m 999
+(p0)   fmpy.s1 w_lo = C_hi, w_lo
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fmpy.s0 Q_1 =  Q_1, Q_1                // Dummy operation to raise inexact
-      nop.i 999
+	nop.m 999
+//
+//     poly = Q_2 + wsq * poly
+//     w_lo =  = w_lo * C_hi
+//
+(p0)   fma.s1 poly = wsq, poly, Q_1
+	nop.i 999
 }
-;;
-
 { .mfi
-      nop.m 999
-      fmpy.s1 poly = wsq, poly               // poly = wsq * poly
-      nop.i 999
+	nop.m 999
+(p0)   fadd.s1 A_lo = Tbl_lo, w_lo
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fmpy.s1 poly = w_hi, poly              // poly = w_hi * poly
-      nop.i 999
+	nop.m 999
+//
+//     Result  =  Res_hi + Res_lo * s_Y  (User Supplied Rounding Mode)
+//
+(p0)   fmpy.s0 Q_1 =  Q_1, Q_1
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fadd.s1 A_lo = A_lo, poly              // A_lo = A_lo + poly
-      nop.i 999
+	nop.m 999
+//
+//     poly = Q_1 + wsq * poly
+//     A_lo = Tbl_lo + w_lo
+//     swap = xor(swap,sign_X)
+//
+(p0)   fmpy.s1 poly = wsq, poly
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fadd.s1 A_lo = A_lo, w_hi              // A_lo = A_lo + w_hi
-      nop.i 999
+	nop.m 999
+//
+//     Is (swap) != 0 ?
+//     poly = wsq * poly
+//     A_hi = Tbl_hi
+//
+(p0)   fmpy.s1 poly = w_hi, poly
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fma.s1 Res_lo = sigma, A_lo, P_lo      // Res_lo = P_lo + sigma * A_lo
-      nop.i 999
+	nop.m 999
+//
+//     if (PR_1) sigma = -1.0
+//     if (PR_2) sigma =  1.0
+//
+(p0)   fadd.s1 A_lo = A_lo, poly
+	nop.i 999 ;;
 }
-;;
-
+{ .mfi
+	nop.m 999
 //
-//     Result  =  Res_hi + Res_lo * s_Y  (User Supplied Rounding Mode)
+//     P_hi = s_Y * P_hi
+//     A_lo = A_lo + poly
 //
+(p0)   fadd.s1 A_lo = A_lo, w_hi
+	nop.i 999 ;;
+}
+{ .mfi
+	nop.m 999
+(p0)   fma.s1 Res_lo = sigma, A_lo, P_lo
+	nop.i 999 ;;
+}
 { .mfb
-      nop.m 999
-      fma.s0 Result = Res_lo, s_Y, Res_hi
-      br.ret.sptk   b0                        // Exit table path 2^-3 <= V/U < 1
+	nop.m 999
+//
+//     Res_hi = P_hi + sigma * A_hi
+//     Res_lo = P_lo + sigma * A_lo
+//
+(p0)   fma.s0 Result = Res_lo, s_Y, Res_hi
+//
+//     Raise inexact.
+//
+br.ret.sptk   b0 ;;
 }
-;;
-
-
-ATANL_POLY: 
-// Here if 0 < V/U < 2^-3
 //
-// ***********************************************
-// ******************** STEP4 ********************
-// ***********************************************
-
+//     poly1 = P_5 + zsq * poly1
+//     poly2 = zsq * poly2
 //
-//     Following:
-//     Iterate 3 times E = E + E*(1.0 - E*U)
-//     Also load P_8, P_7, P_6, P_5, P_4
+L(ATANL_POLY): 
+{ .mmf
+(p0)   xor swap = sign_X, swap
+	nop.m 999
+(p0)   fnma.s1 E_hold = E, U, f1 ;;
+}
+{ .mfi
+	nop.m 999
+(p0)   mov A_temp = Q
+//
+//     poly1 = P_4 + zsq * poly1
+//     swap = xor(swap,sign_X)
+//
+//     sign_X            gr_002
+//     swap              gr_004
+//     poly1 = poly1 <== Done with poly1
+//     poly1 = P_4 + zsq * poly1
+//     swap = xor(swap,sign_X)
 //
+(p0)   cmp.eq.unc p7, p6 = 0x00000, swap
+}
 { .mfi
-      ldfe P_8 = [table_ptr1], -16            // Load P_8
-      fnma.s1 z_lo = A_temp, U, V             // z_lo = V - A_temp * U
-      nop.i 999
+	nop.m 999
+(p0)   fmpy.s1 P_hi = s_Y, P_hi
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-      fnma.s1 E_hold = E, U, f1               // E_hold = 1.0 - E*U (2)
-      nop.i 999
+	nop.m 999
+(p6)   fsub.s1 sigma = f0, f1
+	nop.i 999
 }
-;;
-
-{ .mmi
-      ldfe P_7 = [table_ptr1], -16            // Load P_7
-;;
-      ldfe P_6 = [table_ptr1], -16            // Load P_6
-      nop.i 999
+{ .mfi
+	nop.m 999
+(p7)   fadd.s1 sigma = f0, f1
+	nop.i 999 ;;
 }
-;;
 
-{ .mfi
-      ldfe P_5 = [table_ptr1], -16            // Load P_5
-      fma.s1 E = E, E_hold, E                 // E = E + E_hold*E (2)
+// ***********************************************
+// ******************** STEP4 ********************
+// ***********************************************
+
+{ .mmi
+      nop.m 999
+(p0)  addl           table_ptr1   = @ltoff(Constants_atan#), gp
       nop.i 999
 }
 ;;
 
 { .mmi
-      ldfe P_4 = [table_ptr1], -16            // Load P_4
-;;
-      ldfe P_3 = [table_ptr1], -16            // Load P_3
+      ld8 table_ptr1 = [table_ptr1]
+      nop.m 999
       nop.i 999
 }
 ;;
 
+
 { .mfi
-      ldfe P_2 = [table_ptr1], -16            // Load P_2
-      fnma.s1 E_hold = E, U, f1               // E_hold = 1.0 - E*U (3)
-      nop.i 999
-}
-{ .mlx
-      nop.m 999
-      movl         int_temp = 0x24005         // Signexp for small neg number
+	nop.m 999
+(p0)   fma.s1 E = E, E_hold, E
+//
+//     Following:
+//     Iterate 3 times E = E + E*(1.0 - E*U)
+//     Also load P_8, P_7, P_6, P_5, P_4
+//     E_hold = 1.0 - E * U     (1)
+//     A_temp = Q
+//
+(p0)   add table_ptr1 = 128, table_ptr1 ;;
 }
-;;
-
 { .mmf
-      ldfe P_1 = [table_ptr1], -16            // Load P_1
-      setf.exp     tmp_small = int_temp       // Form small neg number
-      fma.s1 E = E, E_hold, E                 // E = E + E_hold*E (3)
+	nop.m 999
+//
+//     E = E + E_hold*E         (1)
+//     Point to P_8.
+//
+(p0)   ldfe P_8 = [table_ptr1], -16
+//
+//     poly = z8*poly1 + poly2  (Typo in writeup)
+//     Is (swap) != 0 ?
+//
+(p0)   fnma.s1 z_lo = A_temp, U, V ;;
 }
-;;
-
+{ .mmb
+	nop.m 999
+//
+//     E_hold = 1.0 - E * U     (2)
+//
+(p0)   ldfe P_7 = [table_ptr1], -16
+	nop.b 999 ;;
+}
+{ .mmb
+	nop.m 999
+//
+//     E = E + E_hold*E         (2)
+//
+(p0)   ldfe P_6 = [table_ptr1], -16
+	nop.b 999 ;;
+}
+{ .mmb
+	nop.m 999
+//
+//     E_hold = 1.0 - E * U     (3)
+//
+(p0)   ldfe P_5 = [table_ptr1], -16
+	nop.b 999 ;;
+}
+{ .mmf
+	nop.m 999
+//
+//     E = E + E_hold*E         (3)
 //
 //
 // At this point E approximates 1/U to roughly working precision
-// Z = V*E approximates V/U
+// z = V*E approximates V/U
 //
-{ .mfi
-      nop.m 999
-      fmpy.s1 Z = V, E                         // Z = V * E
-      nop.i 999
+(p0)   ldfe P_4 = [table_ptr1], -16
+(p0)   fnma.s1 E_hold = E, U, f1 ;;
 }
-{ .mfi
-      nop.m 999
-      fmpy.s1 z_lo = z_lo, E                   // z_lo = z_lo * E
-      nop.i 999
+{ .mmb
+	nop.m 999
+//
+//     Z =   V * E
+//
+(p0)   ldfe P_3 = [table_ptr1], -16
+	nop.b 999 ;;
 }
-;;
-
+{ .mmb
+	nop.m 999
 //
-//     Now what we want to do is
-//     poly1 = P_4 + zsq*(P_5 + zsq*(P_6 + zsq*(P_7 + zsq*P_8)))
-//     poly2 = zsq*(P_1 + zsq*(P_2 + zsq*P_3))
+//     zsq = Z * Z
 //
+(p0)   ldfe P_2 = [table_ptr1], -16
+	nop.b 999 ;;
+}
+{ .mmb
+	nop.m 999
 //
-//     Fixup added to force inexact later -
-//     A_hi = A_temp + z_lo
-//     z_lo = (A_temp - A_hi) + z_lo
+//     z8 = zsq * zsq
 //
-{ .mfi
-      nop.m 999
-      fmpy.s1 zsq = Z, Z                        // zsq = Z * Z
-      nop.i 999
+(p0)   ldfe P_1 = [table_ptr1], -16
+	nop.b 999 ;;
+}
+{ .mlx
+	nop.m 999
+(p0)   movl         int_temp = 0x24005
 }
 { .mfi
-      nop.m 999
-      fadd.s1 A_hi = A_temp, z_lo               // A_hi = A_temp + z_lo
-      nop.i 999
+	nop.m 999
+(p0)   fma.s1 E = E, E_hold, E
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fma.s1 poly1 = zsq, P_8, P_7              // poly1 = P_7 + zsq * P_8
-      nop.i 999
+	nop.m 999
+(p0)   fnma.s1 E_hold = E, U, f1
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-      fma.s1 poly2 = zsq, P_3, P_2              // poly2 = P_2 + zsq * P_3
-      nop.i 999
+	nop.m 999
+(p0)   fma.s1 E = E, E_hold, E
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fmpy.s1 z4 = zsq, zsq                     // z4 = zsq * zsq
-      nop.i 999
+	nop.m 999
+(p0)   fmpy.s1 Z = V, E
+	nop.i 999
 }
 { .mfi
-      nop.m 999
-      fsub.s1 A_temp = A_temp, A_hi             // A_temp = A_temp - A_hi
-      nop.i 999
+	nop.m 999
+//
+//     z_lo = V - A_temp * U
+//     if (PR_2) sigma =  1.0
+//
+(p0)   fmpy.s1 z_lo = z_lo, E
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fmerge.s     tmp = A_hi, A_hi             // Copy tmp = A_hi
-      nop.i 999
+	nop.m 999
+(p0)   fmpy.s1 zsq = Z, Z
+	nop.i 999
 }
-;;
-
 { .mfi
-      nop.m 999
-      fma.s1 poly1 = zsq, poly1, P_6            // poly1 = P_6 + zsq * poly1
-      nop.i 999
+	nop.m 999
+//
+//     z_lo = z_lo * E
+//     if (PR_1) sigma = -1.0
+//
+(p0)   fadd.s1 A_hi = A_temp, z_lo
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-      fma.s1 poly2 = zsq, poly2, P_1            // poly2 = P_2 + zsq * poly2
-      nop.i 999
+	nop.m 999
+//
+//     z8 = z8 * z8
+//
+//
+//     Now what we want to do is
+//     poly1 = P_4 + zsq*(P_5 + zsq*(P_6 + zsq*(P_7 + zsq*P_8)))
+//     poly2 = zsq*(P_1 + zsq*(P_2 + zsq*P_3))
+//
+(p0)   fma.s1 poly1 = zsq, P_8, P_7
+	nop.i 999
 }
-;;
-
 { .mfi
-      nop.m 999
-      fmpy.s1 z8 = z4, z4                       // z8 = z4 * z4
-      nop.i 999
+	nop.m 999
+(p0)   fma.s1 poly2 = zsq, P_3, P_2
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-      fadd.s1 z_lo = A_temp, z_lo               // z_lo = (A_temp - A_hi) + z_lo
-      nop.i 999
+	nop.m 999
+(p0)   fmpy.s1 z8 = zsq, zsq
+	nop.i 999
 }
-;;
-
 { .mfi
-      nop.m 999
-      fma.s1 poly1 = zsq, poly1, P_5            // poly1 = P_5 + zsq * poly1
-      nop.i 999
+	nop.m 999
+(p0)   fsub.s1 A_temp = A_temp, A_hi
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-      fmpy.s1 poly2 = poly2, zsq                // poly2 = zsq * poly2
-      nop.i 999
+	nop.m 999
+//
+//     A_lo = Z * poly + z_lo
+//
+(p0)   fmerge.s     tmp = A_hi, A_hi
+	nop.i 999 ;;
 }
-;;
-
-//     Create small GR double in case need to raise underflow
 { .mfi
-      nop.m 999
-      fma.s1 poly1 = zsq, poly1, P_4            // poly1 = P_4 + zsq * poly1
-      dep GR_temp = -1,r0,0,53
+	nop.m 999
+//
+//     poly1 = P_7 + zsq * P_8
+//     poly2 = P_2 + zsq * P_3
+//
+(p0)   fma.s1 poly1 = zsq, poly1, P_6
+	nop.i 999
 }
-;;
-
-//     Create small double in case need to raise underflow
 { .mfi
-      setf.d FR_temp = GR_temp	
-      fma.s1 poly = z8, poly1, poly2            // poly = poly2 + z8 * poly1
-      nop.i 999
+	nop.m 999
+(p0)   fma.s1 poly2 = zsq, poly2, P_1
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fma.s1 A_lo = Z, poly, z_lo               // A_lo = z_lo + Z * poly
-      nop.i 999
+	nop.m 999
+(p0)   fmpy.s1 z8 = z8, z8
+	nop.i 999
 }
-;;
-
 { .mfi
-      nop.m 999
-      fadd.s1      A_hi = tmp, A_lo             // A_hi = tmp + A_lo
-      nop.i 999
+	nop.m 999
+(p0)   fadd.s1 z_lo = A_temp, z_lo
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fsub.s1      tmp = tmp, A_hi              // tmp = tmp - A_hi
-      nop.i 999
+	nop.m 999
+//
+//     poly1 = P_6 + zsq * poly1
+//     poly2 = P_2 + zsq * poly2
+//
+(p0)   fma.s1 poly1 = zsq, poly1, P_5
+	nop.i 999
 }
 { .mfi
-      nop.m 999
-      fmpy.s1 A_hi = s_Y, A_hi                  // A_hi = s_Y * A_hi
-      nop.i 999
+	nop.m 999
+(p0)   fmpy.s1 poly2 = poly2, zsq
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fadd.s1      A_lo = tmp, A_lo             // A_lo = tmp + A_lo
-      nop.i 999
+	nop.m 999
+//
+//     Result  =  Res_hi + Res_lo  (User Supplied Rounding Mode)
+//
+(p0)   fmpy.s1 P_5 = P_5, P_5
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-      fma.s1 Res_hi = sigma, A_hi, P_hi         // Res_hi = P_hi + sigma * A_hi
-      nop.i 999
+	nop.m 999
+(p0)   fma.s1 poly1 = zsq, poly1, P_4
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fsub.s1 tmp =  P_hi, Res_hi               // tmp = P_hi - Res_hi
-      nop.i 999
+	nop.m 999
+(p0)   fma.s1 poly = z8, poly1, poly2
+	nop.i 999 ;;
 }
-;;
-
+{ .mfi
+	nop.m 999
 //
-//     Test if A_lo is zero
+//     Fixup added to force inexact later -
+//     A_hi = A_temp + z_lo
+//     z_lo = (A_temp - A_hi) + z_lo
 //
-{ .mfi
-      nop.m 999
-      fclass.m p6,p0 = A_lo, 0x007              // Test A_lo = 0
-      nop.i 999
+(p0)   fma.s1 A_lo = Z, poly, z_lo
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-(p6)  mov          A_lo = tmp_small             // If A_lo zero, make very small
-      nop.i 999
+	nop.m 999
+(p0)   fadd.s1      A_hi = tmp, A_lo
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fma.s1 tmp = A_hi, sigma, tmp             // tmp = sigma * A_hi  + tmp
-      nop.i 999
+	nop.m 999
+(p0)   fsub.s1      tmp = tmp, A_hi
+	nop.i 999
 }
 { .mfi
-      nop.m 999
-      fma.s1 sigma =  A_lo, sigma, P_lo         // sigma = A_lo * sigma  + P_lo
-      nop.i 999
+	nop.m 999
+(p0)   fmpy.s1 A_hi = s_Y, A_hi
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fma.s1 Res_lo = s_Y, sigma, tmp           // Res_lo = s_Y * sigma + tmp
-      nop.i 999
+	nop.m 999
+(p0)   fadd.s1      A_lo = tmp, A_lo
+	nop.i 999
 }
-;;
-
+{ .mfi
+(p0)   setf.exp     tmp = int_temp
 //
-//     Test if Res_lo is denormal
+//     P_hi = s_Y * P_hi
+//     A_hi = s_Y * A_hi
 //
+(p0)   fma.s1 Res_hi = sigma, A_hi, P_hi
+	nop.i 999 ;;
+}
 { .mfi
-      nop.m 999
-      fclass.m p14, p15 = Res_lo, 0x0b
-      nop.i 999
+	nop.m 999
+(p0)   fclass.m.unc p6,p0 = A_lo, 0x007
+	nop.i 999 ;;
 }
-;;
-
+{ .mfi
+	nop.m 999
+(p6)   mov          A_lo = tmp
+	nop.i 999
+}
+{ .mfi
+	nop.m 999
 //
-//     Compute Result = Res_lo + Res_hi.  Use s3 if Res_lo is denormal.
+//     Res_hi = P_hi + sigma * A_hi
 //
-{ .mfi
-      nop.m 999
-(p14) fadd.s3 Result = Res_lo, Res_hi     // Result for Res_lo denormal
-      nop.i 999
+(p0)   fsub.s1 tmp =  P_hi, Res_hi
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p15) fadd.s0 Result = Res_lo, Res_hi     // Result for Res_lo normal
-      nop.i 999
+	nop.m 999
+//
+//     tmp = P_hi - Res_hi
+//
+(p0)   fma.s1 tmp = A_hi, sigma, tmp
+	nop.i 999
 }
-;;
-
-//	
-//     If Res_lo is denormal test if Result equals zero
-//	
 { .mfi
-      nop.m 999
-(p14) fclass.m.unc p14, p0 = Result, 0x07
-      nop.i 999
+	nop.m 999
+(p0)   fma.s1 sigma =  A_lo, sigma, P_lo
+	nop.i 999 ;;
 }
-;;
-
+{ .mfi
+	nop.m 999
 //
-//     If Res_lo is denormal and Result equals zero, raise inexact, underflow
-//     by squaring small double
+//     tmp   = sigma * A_hi  + tmp
+//     sigma = A_lo * sigma  + P_lo
 //
+(p0)   fma.s1 Res_lo = s_Y, sigma, tmp
+	nop.i 999 ;;
+}
 { .mfb
-      nop.m 999
-(p14) fmpy.d.s0 FR_temp = FR_temp, FR_temp
-      br.ret.sptk   b0                     // Exit POLY path, 0 < Q < 2^-3
+	nop.m 999
+//
+//     Res_lo = s_Y * sigma + tmp
+//
+(p0)   fadd.s0 Result = Res_lo, Res_hi
+br.ret.sptk   b0 ;;
 }
-;;
-
-
-ATANL_UNSUPPORTED: 
+L(ATANL_NATVAL): 
+L(ATANL_UNSUPPORTED): 
+L(ATANL_NAN): 
 { .mfb
-      nop.m 999
-      fmpy.s0 Result = ArgX,ArgY 
-      br.ret.sptk   b0
+	nop.m 999
+(p0)   fmpy.s0 Result = ArgX,ArgY 
+(p0)   br.ret.sptk   b0 ;;
 }
-;;
-
-// Here if y natval, nan, inf, zero
-ATANL_Y_SPECIAL:
-// Here if x natval, nan, inf, zero
-ATANL_X_SPECIAL:
+L(ATANL_SPECIAL_HANDLING): 
 { .mfi
-      nop.m 999
-      fclass.m p13,p12 = ArgY_orig, 0x0c3  // Test y nan
-      nop.i 999
+	nop.m 999
+(p0)   fcmp.eq.s0     p0, p6 = f1, ArgY_orig
+	nop.i 999
 }
-;;
-
 { .mfi
-      nop.m 999
-      fclass.m p15,p14 = ArgY_orig, 0x103  // Test y natval
-      nop.i 999
+	nop.m 999
+(p0)   fcmp.eq.s0     p0, p5 = f1, ArgX_orig
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-(p12) fclass.m p13,p0 = ArgX_orig, 0x0c3  // Test x nan
-      nop.i 999
+	nop.m 999
+(p0)   fclass.m.unc p6, p7 = ArgY, 0x007
+	nop.i 999
 }
-;;
-
-{ .mfi
-      nop.m 999
-(p14) fclass.m p15,p0 = ArgX_orig, 0x103  // Test x natval
-      nop.i 999
+{ .mlx
+	nop.m 999
+(p0)   movl special = 992
 }
 ;;
 
-{ .mfb
-      nop.m 999
-(p13) fmpy.s0 Result = ArgX_orig, ArgY_orig // Result nan if x or y nan
-(p13) br.ret.spnt b0                      // Exit if x or y nan
-}
-;;
 
-{ .mfb
+{ .mmi
       nop.m 999
-(p15) fmpy.s0 Result = ArgX_orig, ArgY_orig // Result natval if x or y natval
-(p15) br.ret.spnt b0                      // Exit if x or y natval
+(p0)  addl           table_ptr1   = @ltoff(Constants_atan#), gp
+      nop.i 999
 }
 ;;
 
-
-// Here if x or y inf or zero
-ATANL_SPECIAL_HANDLING: 
-{ .mfi
+{ .mmi
+      ld8 table_ptr1 = [table_ptr1]
       nop.m 999
-      fclass.m p6, p7 = ArgY_orig, 0x007        // Test y zero
-      mov special = 992                         // Offset to table
+      nop.i 999
 }
 ;;
 
-{ .mfb
-      add table_ptr1 = table_base, special      // Point to 3pi/4
-      fcmp.eq.s0 p0, p9 = ArgX_orig, ArgY_orig  // Dummy to set denormal flag
-(p7)  br.cond.spnt ATANL_ArgY_Not_ZERO          // Branch if y not zero
-}
-;;
 
-// Here if y zero
+{ .mib
+(p0)   add table_ptr1 = table_ptr1, special
+	nop.i 999
+(p7)   br.cond.spnt L(ATANL_ArgY_Not_ZERO) ;;
+}
 { .mmf
-      ldfd  Result = [table_ptr1], 8            // Get pi high
-      nop.m 999
-      fclass.m p14, p0 = ArgX, 0x035            // Test for x>=+0
+(p0)   ldfd  Result = [table_ptr1], 8
+	nop.m 999
+(p6)   fclass.m.unc p14, p0 = ArgX, 0x035 ;;
 }
-;;
-
 { .mmf
-      nop.m 999
-      ldfd  Result_lo = [table_ptr1], -8        // Get pi lo
-      fclass.m p15, p0 = ArgX, 0x036            // Test for x<=-0
+	nop.m 999
+(p0)   ldfd  Result_lo = [table_ptr1], -8
+(p6)   fclass.m.unc p15, p0 = ArgX, 0x036 ;;
 }
-;;
-
-//
-//     Return sign_Y * 0 when  ArgX > +0
-//
 { .mfi
-      nop.m 999
-(p14) fmerge.s Result = ArgY, f0               // If x>=+0, y=0, hi sgn(y)*0
-      nop.i 999
+	nop.m 999
+(p14)  fmerge.s Result = ArgY, f0
+	nop.i 999
 }
-;;
-
 { .mfi
-      nop.m 999
-      fclass.m p13, p0 = ArgX, 0x007           // Test for x=0
-      nop.i 999
+	nop.m 999
+(p6)   fclass.m.unc p13, p0 = ArgX, 0x007
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-(p14) fmerge.s Result_lo = ArgY, f0            // If x>=+0, y=0, lo sgn(y)*0
-      nop.i 999
+	nop.m 999
+(p14)  fmerge.s Result_lo = ArgY, f0
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-(p13) mov GR_Parameter_TAG = 36                // Error tag for x=0, y=0
-      nop.f 999
-      nop.i 999
+(p13)  mov GR_Parameter_TAG = 36 
+	nop.f 999
+	nop.i 999 ;;
 }
-;;
-
+{ .mfi
+	nop.m 999
 //
-//     Return sign_Y * pi when  ArgX < -0
+//     Return sign_Y * 0 when  ArgX > +0
 //
-{ .mfi
-      nop.m 999
-(p15) fmerge.s Result = ArgY, Result           // If x<0, y=0, hi=sgn(y)*pi
-      nop.i 999
+(p15)  fmerge.s Result = ArgY, Result
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-(p15) fmerge.s Result_lo = ArgY, Result_lo     // If x<0, y=0, lo=sgn(y)*pi
-      nop.i 999
+	nop.m 999
+(p15)  fmerge.s Result_lo = ArgY, Result_lo
+	nop.i 999 ;;
 }
-;;
-
+{ .mfb
+	nop.m 999
 //
-//     Call error support function for atan(0,0)
+//     Return sign_Y * 0 when  ArgX < -0
 //
-{ .mfb
-      nop.m 999
-      fadd.s0 Result = Result, Result_lo
-(p13) br.cond.spnt __libm_error_region         // Branch if atan(0,0)
+(p0)   fadd.s0 Result = Result, Result_lo
+(p13)  br.cond.spnt __libm_error_region ;;
 }
-;;
-
 { .mib
-      nop.m 999
-      nop.i 999
-      br.ret.sptk   b0                         // Exit for y=0, x not 0
+	nop.m 999
+	nop.i 999
+//
+//     Call error support funciton for atan(0,0)
+//
+(p0)    br.ret.sptk   b0 ;;
 }
-;;
-
-// Here if y not zero
-ATANL_ArgY_Not_ZERO: 
+L(ATANL_ArgY_Not_ZERO): 
 { .mfi
-      nop.m 999
-      fclass.m p0, p10 = ArgY, 0x023           // Test y inf
-      nop.i 999
+	nop.m 999
+(p0)   fclass.m.unc p9, p10 = ArgY, 0x023
+	nop.i 999 ;;
+}
+{ .mib
+	nop.m 999
+	nop.i 999
+(p10)  br.cond.spnt  L(ATANL_ArgY_Not_INF) ;;
+}
+{ .mfi
+	nop.m 999
+(p9)   fclass.m.unc p6, p0 = ArgX, 0x017
+	nop.i 999
+}
+{ .mfi
+	nop.m 999
+(p9)   fclass.m.unc p7, p0 = ArgX, 0x021
+	nop.i 999 ;;
+}
+{ .mfi
+	nop.m 999
+(p9)   fclass.m.unc p8, p0 = ArgX, 0x022
+	nop.i 999 ;;
+}
+{ .mmi
+(p6)   add table_ptr1 =  16, table_ptr1 ;;
+(p0)   ldfd Result = [table_ptr1], 8
+	nop.i 999 ;;
+}
+{ .mfi
+(p0)   ldfd Result_lo = [table_ptr1], -8
+	nop.f 999
+	nop.i 999 ;;
+}
+{ .mfi
+	nop.m 999
+(p6)   fmerge.s Result = ArgY, Result
+	nop.i 999 ;;
+}
+{ .mfi
+	nop.m 999
+(p6)   fmerge.s Result_lo = ArgY, Result_lo
+	nop.i 999 ;;
 }
-;;
-
 { .mfb
-      nop.m 999
-      fclass.m p6, p0 = ArgX, 0x017            // Test for 0 <= |x| < inf
-(p10) br.cond.spnt  ATANL_ArgY_Not_INF         // Branch if 0 < |y| < inf
+	nop.m 999
+(p6)    fadd.s0 Result = Result, Result_lo
+(p6)    br.ret.sptk   b0 ;;
 }
-;;
-
-// Here if y=inf
 //
+//     Load PI/2 and adjust its sign.
 //     Return +PI/2 when ArgY = +Inf and ArgX = +/-0 or normal
 //     Return -PI/2 when ArgY = -Inf and ArgX = +/-0 or normal
-//     Return +PI/4 when ArgY = +Inf and ArgX = +Inf
-//     Return -PI/4 when ArgY = -Inf and ArgX = +Inf
-//     Return +3PI/4 when ArgY = +Inf and ArgX = -Inf
-//     Return -3PI/4 when ArgY = -Inf and ArgX = -Inf
 //
+{ .mmi
+(p7)   add table_ptr1 = 32, table_ptr1 ;;
+(p7)   ldfd Result = [table_ptr1], 8
+	nop.i 999 ;;
+}
 { .mfi
-      nop.m 999
-      fclass.m p7, p0 = ArgX, 0x021            // Test for x=+inf
-      nop.i 999
+(p7)   ldfd Result_lo = [table_ptr1], -8
+	nop.f 999
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-(p6)  add table_ptr1 =  16, table_ptr1         // Point to pi/2, if x finite 
-      fclass.m p8, p0 = ArgX, 0x022            // Test for x=-inf
-      nop.i 999
+	nop.m 999
+(p7)   fmerge.s Result = ArgY, Result
+	nop.i 999 ;;
 }
-;;
-
-{ .mmi
-(p7)  add table_ptr1 =  32, table_ptr1         // Point to pi/4 if x=+inf
-;;
-(p8)  add table_ptr1 =  48, table_ptr1         // Point to 3pi/4 if x=-inf
-
-      nop.i 999
+{ .mfi
+	nop.m 999
+(p7)   fmerge.s Result_lo = ArgY, Result_lo
+	nop.i 999 ;;
 }
-;;
-
+{ .mfb
+	nop.m 999
+(p7)    fadd.s0 Result = Result, Result_lo
+(p7)    br.ret.sptk   b0 ;;
+}
+//
+//     Load PI/4 and adjust its sign.
+//     Return +PI/4 when ArgY = +Inf and ArgX = +Inf
+//     Return -PI/4 when ArgY = -Inf and ArgX = +Inf
+//
 { .mmi
-      ldfd Result = [table_ptr1], 8            // Load pi/2, pi/4, or 3pi/4 hi
-;;
-      ldfd Result_lo = [table_ptr1], -8        // Load pi/2, pi/4, or 3pi/4 lo
-      nop.i 999
+(p8)   add table_ptr1 = 48, table_ptr1 ;;
+(p8)   ldfd Result = [table_ptr1], 8
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fmerge.s Result = ArgY, Result           // Merge sgn(y) in hi
-      nop.i 999
+(p8)   ldfd Result_lo = [table_ptr1], -8
+	nop.f 999
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fmerge.s Result_lo = ArgY, Result_lo     // Merge sgn(y) in lo
-      nop.i 999
+	nop.m 999
+(p8)   fmerge.s Result = ArgY, Result
+	nop.i 999 ;;
+}
+{ .mfi
+	nop.m 999
+(p8)   fmerge.s Result_lo = ArgY, Result_lo
+	nop.i 999 ;;
 }
-;;
-
 { .mfb
-      nop.m 999
-      fadd.s0 Result = Result, Result_lo       // Compute complete result
-      br.ret.sptk   b0                         // Exit for y=inf
+	nop.m 999
+(p8)   fadd.s0 Result = Result, Result_lo
+(p8)   br.ret.sptk   b0 ;; 
 }
-;;
-
-// Here if y not INF, and x=0 or INF
-ATANL_ArgY_Not_INF: 
+L(ATANL_ArgY_Not_INF): 
+{ .mfi
+	nop.m 999
 //
-//     Return +PI/2 when ArgY NOT Inf, ArgY > 0 and ArgX = +/-0
-//     Return -PI/2 when ArgY NOT Inf, ArgY < 0 and ArgX = +/-0
-//     Return +0    when ArgY NOT Inf, ArgY > 0 and ArgX = +Inf
-//     Return -0    when ArgY NOT Inf, ArgY > 0 and ArgX = +Inf
-//     Return +PI   when ArgY NOT Inf, ArgY > 0 and ArgX = -Inf
-//     Return -PI   when ArgY NOT Inf, ArgY > 0 and ArgX = -Inf
+//     Load PI/4 and adjust its sign.
+//     Return +3PI/4 when ArgY = +Inf and ArgX = -Inf
+//     Return -3PI/4 when ArgY = -Inf and ArgX = -Inf
 //
+(p0)  fclass.m.unc p6, p0 = ArgX, 0x007
+	nop.i 999
+}
 { .mfi
-      nop.m 999
-      fclass.m p7, p9 = ArgX, 0x021            // Test for x=+inf
-      nop.i 999
+	nop.m 999
+(p0)  fclass.m.unc p7, p0 = ArgX, 0x021
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fclass.m p6, p0 = ArgX, 0x007            // Test for x=0
-      nop.i 999
+	nop.m 999
+(p0)  fclass.m.unc p8, p0 = ArgX, 0x022
+	nop.i 999 ;;
+}
+{ .mmi
+(p6)  add table_ptr1 = 16, table_ptr1 ;;
+(p6)  ldfd Result = [table_ptr1], 8
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-(p6)  add table_ptr1 = 16, table_ptr1          // Point to pi/2
-      fclass.m p8, p0 = ArgX, 0x022            // Test for x=-inf
-      nop.i 999
+(p6)  ldfd Result_lo = [table_ptr1], -8
+	nop.f 999
+	nop.i 999 ;;
 }
-;;
-
-.pred.rel "mutex",p7,p9
 { .mfi
-(p9)  ldfd Result = [table_ptr1], 8           // Load pi or pi/2 hi
-(p7)  fmerge.s Result = ArgY, f0              // If y not inf, x=+inf, sgn(y)*0
-      nop.i 999
+	nop.m 999
+(p6)  fmerge.s Result = ArgY, Result
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-(p9)  ldfd Result_lo = [table_ptr1], -8       // Load pi or pi/2 lo
-(p7)  fnorm.s0 Result = Result                // If y not inf, x=+inf normalize
-      nop.i 999
+	nop.m 999
+(p6)  fmerge.s Result_lo = ArgY, Result_lo
+	nop.i 999 ;;
+}
+{ .mfb
+	nop.m 999
+(p6)  fadd.s0 Result = Result, Result_lo
+(p6)  br.ret.spnt   b0 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-(p9)  fmerge.s Result = ArgY, Result          // Merge sgn(y) in hi
-      nop.i 999
+	nop.m 999
+//
+//    return = sign_Y * PI/2 when ArgX = 0
+//
+(p7)  fmerge.s Result = ArgY, f0
+	nop.i 999 ;;
+}
+{ .mfb
+	nop.m 999
+(p7)  fnorm.s0 Result = Result
+(p7)  br.ret.spnt   b0 ;;
+}
+//
+//    return = sign_Y * 0 when ArgX = Inf
+//
+{ .mmi
+(p8)  ldfd Result = [table_ptr1], 8 ;;
+(p8)  ldfd Result_lo = [table_ptr1], -8
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-(p9)  fmerge.s Result_lo = ArgY, Result_lo    // Merge sgn(y) in lo
-      nop.i 999
+	nop.m 999
+(p8)  fmerge.s Result = ArgY, Result
+	nop.i 999 ;;
+}
+{ .mfi
+	nop.m 999
+(p8)  fmerge.s Result_lo = ArgY, Result_lo
+	nop.i 999 ;;
 }
-;;
-
 { .mfb
-      nop.m 999
-(p9)  fadd.s0 Result = Result, Result_lo      // Compute complete result
-      br.ret.spnt   b0                        // Exit for y not inf, x=0,inf
+	nop.m 999
+(p8)  fadd.s0 Result = Result, Result_lo
+(p8)  br.ret.sptk   b0 ;;
 }
-;;
-
-GLOBAL_IEEE754_END(atan2l)
-LOCAL_LIBM_ENTRY(__libm_error_region)
+//
+//    return = sign_Y * PI when ArgX = -Inf
+//
+.endp atan2l
+ASM_SIZE_DIRECTIVE(atan2l)
+ASM_SIZE_DIRECTIVE(__atan2l)
+ASM_SIZE_DIRECTIVE(__ieee754_atan2l)
+ 
+.proc __libm_error_region
+__libm_error_region:
 .prologue
 { .mfi
         add   GR_Parameter_Y=-32,sp             // Parameter 2 value
@@ -1999,6 +2001,7 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
         br.ret.sptk     b0                     // Return
 };;
 
-LOCAL_LIBM_END(__libm_error_region#)
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region) 
 .type   __libm_error_support#,@function
 .global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_cbrt.S b/sysdeps/ia64/fpu/s_cbrt.S
index b7a827d1da..1e23b6024d 100644
--- a/sysdeps/ia64/fpu/s_cbrt.S
+++ b/sysdeps/ia64/fpu/s_cbrt.S
@@ -1,10 +1,11 @@
-.file "cbrt.s"
+.file "cbrt.asm"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska, 
+// Bob Norin, Shane Story, and Ping Tak Peter Tang 
+// of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,30 +21,27 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
 // Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http: //www.intel.com/software/products/opensource/libraries/num.htm.
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
 //
 // History
 //==============================================================
-// 02/02/00 Initial version
-// 05/19/00 New version (modified algorithm)
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 01/28/03 Updated polynomial coefficients (changed to Remez coefficients),
-//          to slightly improve accuracy
+// 2/02/00: Initial version 
+// 5/19/00: New version (modified algorithm)
 //
 // API
 //==============================================================
@@ -55,713 +53,637 @@
 //
 // Implementation
 //
-// Let y= frcpa(a), where a is the argument
-//
-// cbrt(a)= cbrt(a*y)/cbrt(y) = cbrt(1 - (1-a*y)) * (1/cbrt(y))
-//
-// For all values of y, the 3 possible significands of 1/cbrt(y)
-// are stored in a table (T0) to 64 bits of accuracy. (There are
-// 3 possible significands because the exponent of y modulo 3
-// can be 0, 1, or 2.)
+//   cbrt(a) = cbrt(a y) / cbrt(y)
+//          = cbrt(1 - (1 - a y)) * 1/cbrt(y)
 //
+// where y = frcpa(a). 
 //
-// * cbrt(1 - (1-a*y)) is approximated by a degree-5 polynomial ~
-//
-// ~ 1 - (1/3)*r - (1/9)*r^2 - (5/81)*r^3 - (10/243)*r^4 - (22/729)*r^5
-//
-// in r = 1-a*y.
+//  * cbrt(1 - (1 - a y)) is approximated by a degree-5 polynomial 
+//   
+//  1 - (1/3)*r - (1/9)*r^2 - (5/81)*r^3 - (10/243)*r^4 - (22/729)*r^5
+// 
+//  in r = 1 - a y.
 //
+//  * The values 1/cbrt(y) are stored in a table of constants T0
+//   to 64 bits of accuracy
 //
 // The table values are stored for three exponent values and are
 // then multiplied by e/3 where e is the exponent of the input number.
 // This computation is carried out in parallel with the polynomial
 // evaluation:
 //
-// T= 2^(e/3) * T0
+//      T = 2^(e/3) * T0
 
 
 
 
 
 //===============
-// input= x
-// C= frcpa(x)
-// r= 1 - C * x
+// input = x
+// C = frcpa(x)
+// r = 1 - C * x
 //
-// Special values
+// Special values 
 //==============================================================
 
 
 
 // Registers used
 //==============================================================
-// f6-f15
-// GR_GP, r23-r26, r28-r30
-// p6, p7, p8, p12
-
-       FR_R        = f6
-       FR_COEFF1   = f7
-       FR_COEFF2   = f9
-       FR_COEFF3   = f10
-       FR_COEFF4   = f11
-       FR_COEFF5   = f12
-       FR_R2       = f13
-       FR_ARG      = f14
-       FR_P23      = f15
-       FR_P25      = f32
-       FR_P15      = f33
-       FR_P1       = f34
-       FR_P45      = f35
-       FR_2EXP     = f36
-       FR_TMP63    = f37
-
-       GR_GP       = r2
-       GR_ADDR     = r2
-       GR_CONST1   = r3
-       GR_I1       = r8
-       GR_EXP      = r9
-       GR_ADDR2    = r10
-       GR_IT1      = r11
-       GR_TMP2     = r11
-       GR_EXPON    = r15
-       GR_TMP1     = r16
-       GR_TMP6     = r16
-       GR_ITB1     = r17
-       GR_TMP3     = r18
-       GR_TMP4     = r19
-       GR_TMP63    = r19
-       GR_TMP5     = r20
-       GR_EXP_BY_3 = r20
-       GR_CONST4   = r21
-       GR_TMP6     = r22
-       GR_INDEX    = r23
-       GR_EBIAS    = r24
-       GR_SIGNIF   = r25
-       GR_SIGNIF2  = r25
-       GR_TEST     = r25
-       GR_ARGEXP   = r26
-       GR_CONST2   = r27
-       GR_SIGN     = r28
-       GR_REM      = r29
-       GR_CONST3   = r30
-       GR_SEXP     = r31
-
-
-
+//   f6-f15
+//   r2, r23-r26, r28-r30
+//   p6,p7,p8,p12
 
+#include "libm_support.h"
 
 // Data tables
 //==============================================================
 
-RODATA
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
 
 .align 16
 
-LOCAL_OBJECT_START(poly_coeffs)
-
-       data8 0xaaaaaaaaaaaaaab4, 0x0000bffd // ~ 1/3
-       data8 0xbfbc71c71c718e45, 0xbfaf9add3c0bbb43
-       data8 0xbfa511edb93dc98d, 0xbf9ee71c45f0dfbc
-LOCAL_OBJECT_END(poly_coeffs)
-
-
-//   For every entry B in the frcpa table, this table contains
-// the significands of cbrt(1/B), cbrt(2/B), cbrt(4/B).
-// The index to this table is the same as the frcpa index.
-
-LOCAL_OBJECT_START(T_table)
-
-
-       data8 0x80155c748c374836, 0xa160019ed37fb4ae
-       data8 0xcb51ddcb9e93095e, 0x8040404b0879f7f9
-       data8 0xa1960b5966da4608, 0xcb95f333968ad59b
-       data8 0x806b5dce4b405c10, 0xa1cc5dbe6dc2aab4
-       data8 0xcbda64292d3ffd97, 0x8096b586974669b1
-       data8 0xa202f97995b69c0d, 0xcc1f3184af961596
-       data8 0x80bcd273d952a028, 0xa232fe6eb0c0577d
-       data8 0xcc5bb1ac954d33e2, 0x80e898c52813f2f3
-       data8 0xa26a2582012f6e17, 0xcca12e9831fc6402
-       data8 0x81149add67c2d208, 0xa2a197e5d10465cb
-       data8 0xcce70a67b64f24ad, 0x813b4e2c856b6e9a
-       data8 0xa2d25a532efefbc8, 0xcd24794726477ea5
-       data8 0x8167c1dde03de7aa, 0xa30a5bd6e49e4ab8
-       data8 0xcd6b096a0b70ee87, 0x818ed973b811135e
-       data8 0xa33b9c9b59879e24, 0xcda9177738b15a90
-       data8 0x81bbc0c33e13ec98, 0xa3742fca6a3c1f21
-       data8 0xcdf05f2247dffab9, 0x81e33e69fbe7504a
-       data8 0xa3a5f1273887bf22, 0xce2f0f347f96f906
-       data8 0x820aec524e3c23e9, 0xa3d7ef508ff11574
-       data8 0xce6e0be0cd551a61, 0x823880f78e70b805
-       data8 0xa4115ce30548bc15, 0xceb666b2c347d1de
-       data8 0x826097a62a8e5200, 0xa443df0e53df577a
-       data8 0xcef609b0cb874f00, 0x8288dfe00e9b5eaf
-       data8 0xa4769fa5913c0ec3, 0xcf35fb5447e5c765
-       data8 0x82b15a10c5371624, 0xa4a99f303bc7def5
-       data8 0xcf763c47ee869f00, 0x82da06a527b18937
-       data8 0xa4dcde37779adf4b, 0xcfb6cd3888d71785
-       data8 0x8302e60b635ab394, 0xa5105d46152c938a
-       data8 0xcff7aed4fbfbb447, 0x832bf8b2feec2f0e
-       data8 0xa5441ce89825cb8d, 0xd038e1ce5167e3c6
-       data8 0x83553f0ce00e276b, 0xa5781dad3e54d899
-       data8 0xd07a66d7bfa0ebba, 0x837eb98b50f8322a
-       data8 0xa5ac602406c4e68c, 0xd0bc3ea6b32d1b21
-       data8 0x83a270f44c84f699, 0xa5d9601d95c2c0bc
-       data8 0xd0f4f0e8f36c1bf8, 0x83cc4d7cfcfac5ca
-       data8 0xa60e1e1a2de14745, 0xd1376458e34b037e
-       data8 0x83f65f78a8872b4c, 0xa6431f6e3fbd9658
-       data8 0xd17a2ca133f78572, 0x8420a75f2f7b53c8
-       data8 0xa67864b0d432fda4, 0xd1bd4a80301c5715
-       data8 0x844510461ff14209, 0xa6a6444aa0243c0b
-       data8 0xd1f71682b2fa4575, 0x846fbd91b930bed2
-       data8 0xa6dc094d10f25792, 0xd23ad555f773f059
-       data8 0x84947e18234f3294, 0xa70a574cc02bba69
-       data8 0xd2752c7039a5bf73, 0x84bf92755825045a
-       data8 0xa7409e2af9549084, 0xd2b98ee008c06b59
-       data8 0x84e4ac0ee112ba51, 0xa76f5c64ca2cf13b
-       data8 0xd2f4735ffd700280, 0x8509ef44b86f20be
-       data8 0xa79e4f0babab5dc0, 0xd32f99ed6d9ac0e1
-       data8 0x85359d5d91768427, 0xa7d5579ae5164b85
-       data8 0xd374f0666c75d51c, 0x855b3bd5b7384357
-       data8 0xa804bd3c6fe61cc8, 0xd3b0a7d13618e4a1
-       data8 0x858104f0c415f79a, 0xa8345895e5250a5a
-       data8 0xd3eca2ea53bcec0c, 0x85a6f90390d29864
-       data8 0xa8642a122b44ef0b, 0xd428e23874f13a17
-       data8 0x85d3772fcd56a1dd, 0xa89c38ca18f6108b
-       data8 0xd46f82fe293bc6d3, 0x85f9c982fcc002f3
-       data8 0xa8cc81063b6e87ca, 0xd4ac57e9b7186420
-       data8 0x862047e0e7ea554b, 0xa8fd00bfa409285e
-       data8 0xd4e972becb04e8b8, 0x8646f2a26f7f5852
-       data8 0xa92db8664d5516da, 0xd526d40a7a9b43a3
-       data8 0x866dca21754096b5, 0xa95ea86b75cc2c20
-       data8 0xd5647c5b73917370, 0x8694ceb8dfd17a37
-       data8 0xa98fd141a4992deb, 0xd5a26c4201bd6d13
-       data8 0x86bc00c49e9307e8, 0xa9c1335cae7446ba
-       data8 0xd5e0a45015350a7e, 0x86dccd74fce79610
-       data8 0xa9ea8686f556f645, 0xd614b539c6194104
-       data8 0x870453c845acf90f, 0xaa1c52d17906bb19
-       data8 0xd6537310e224283f, 0x872c089a1e90342c
-       data8 0xaa4e59b046dab887, 0xd6927ab62244c917
-       data8 0x8753ec4a92d16c5e, 0xaa809b9c60d1890b
-       data8 0xd6d1ccc1fc4ef4b7, 0x877bff3aca19f6b4
-       data8 0xaab319102f3f9b33, 0xd71169cea98fdded
-       data8 0x879d88b6fe1c324c, 0xaadd5a18c1e21274
-       data8 0xd746a66a5bc9f6d9, 0x87c5f346dbf98c3a
-       data8 0xab1045f2ac31bdf5, 0xd786ce8f0fae5317
-       data8 0x87e7c653efacef2c, 0xab3ae3ab2df7231e
-       data8 0xd7bc7ff214c4e75a, 0x881089d4e73ffefc
-       data8 0xab6e3f945d1e96fc, 0xd7fd35467a517ed1
-       data8 0x88397e6a366f2a8a, 0xaba1d953a08fa94e
-       data8 0xd83e38838648d815, 0x885bc559e5e1c081
-       data8 0xabcd090db7ef4c3f, 0xd874a1db598b8951
-       data8 0x887e2ee392bb7a93, 0xabf864602d7c323d
-       data8 0xd8ab42205b80edaf, 0x88a7a8587e404257
-       data8 0xac2ca5886ccf9b57, 0xd8ed1849d202f965
-       data8 0x88ca5eda67594784, 0xac5861d4aa441f0f
-       data8 0xd92432bd5a173685, 0x88f4356166bd590e
-       data8 0xac8d183fe3a2fbed, 0xd9669ca45b03c23e
-       data8 0x89173a0acf5ce026, 0xacb93703ff51571e
-       data8 0xd99e3327cf89574e, 0x893a62a098b6a57b
-       data8 0xace5830ad0c3f14b, 0xd9d602b19b100466
-       data8 0x895daf637236ae2c, 0xad11fca5d78b3ff2
-       data8 0xda0e0ba86c096841, 0x89883b9d1c2fa9c5
-       data8 0xad4797fddf91a798, 0xda5195fcdb1c3dce
-       data8 0x89abd8dd374a5d7b, 0xad747701e559ebcb
-       data8 0xda8a1eb87a491f6c, 0x89cf9b1dcd197fa0
-       data8 0xada184a47e9c7613, 0xdac2e230b91c3f84
-       data8 0x89f382a258ea79de, 0xadcec13ab0dda8ff
-       data8 0xdafbe0d0b66aea30, 0x8a178faf06648f29
-       data8 0xadfc2d1a5fd21ba8, 0xdb351b04a8fafced
-       data8 0x8a3bc288b3e1d18a, 0xae29c89a5053c33a
-       data8 0xdb6e9139e33cdd8e, 0x8a601b74f4d1f835
-       data8 0xae5794122b638df9, 0xdba843ded7151ea1
-       data8 0x8a849aba14274764, 0xae858fda8137ae0a
-       data8 0xdbe2336319b61fc8, 0x8aa9409f16cdbc9b
-       data8 0xaeb3bc4ccc56d3d1, 0xdc1c60376789fa68
-       data8 0x8ace0d6bbe2cb316, 0xaee219c374c09920
-       data8 0xdc56cacda82d0cd5, 0x8af301688ab33558
-       data8 0xaf10a899d3235fe7, 0xdc917398f2797814
-       data8 0x8b181cdebe6f3206, 0xaf3f692c341fe8b4
-       data8 0xdccc5b0d90a3e628, 0x8b3d60185fafcb7c
-       data8 0xaf6e5bd7db9ae6c2, 0xdd0781a10469f0f2
-       data8 0x8b62cb603bb2fad0, 0xaf9d80fb081cd91b
-       data8 0xdd42e7ca0b52838f, 0x8b80d7d6bc4104de
-       data8 0xafc35ce063eb3787, 0xdd729ad01c69114d
-       data8 0x8ba68bf73ac74f39, 0xaff2ddcb5f28f03d
-       data8 0xddae749c001fbf5e, 0x8bcc68fb9f9f7335
-       data8 0xb022923b148e05c5, 0xddea8f50a51c69b1
-       data8 0x8bf26f31c534fca2, 0xb0527a919adbf58b
-       data8 0xde26eb69a0f0f111, 0x8c10f86e13a1a1f9
-       data8 0xb078f3ab1d701c65, 0xde576480262399bc
-       data8 0x8c3749916cc6abb5, 0xb0a93a6870649f31
-       data8 0xde943789645933c8, 0x8c5dc4c4f7706032
-       data8 0xb0d9b624d62ec856, 0xded14d58139a28af
-       data8 0x8c7cac3a8c42e3e0, 0xb100a5f53fb3c8e1
-       data8 0xdf025c00bbf2b5c7, 0x8ca373f1b7bf2716
-       data8 0xb131821882f5540a, 0xdf3feb44d723a713
-       data8 0x8cc29907fb951294, 0xb158bf8e4cb04055
-       data8 0xdf715bc16c159be0, 0x8ce9ae4e9492aac8
-       data8 0xb189fd69d56b238f, 0xdfaf66240e29cda8
-       data8 0x8d0911dddbfdad0e, 0xb1b189958e8108e4
-       data8 0xdfe139cbf6e19bdc, 0x8d3075c4f20f04ee
-       data8 0xb1e32a8165b09832, 0xe01fc0fe94d9fc52
-       data8 0x8d5018a9d4de77d5, 0xb20b0678fc271eec
-       data8 0xe051f92ffcc0bd60, 0x8d77cc47dd143515
-       data8 0xb23d0bd3f7592b6e, 0xe090feec9c9a06ac
-       data8 0x8d97af6352739cb7, 0xb26538b2db8420dc
-       data8 0xe0c39d0c9ff862d6, 0x8db7af523167800f
-       data8 0xb28d89e339ceca14, 0xe0f668eeb99f188d
-       data8 0x8ddfd80bc68c32ff, 0xb2c022ca12e55a16
-       data8 0xe1362890eb663139, 0x8e00197e1e7c88fe
-       data8 0xb2e8c6852c6b03f1, 0xe1695c7212aecbaa
-       data8 0x8e207859f77e20e7, 0xb3118f4eda9fe40f
-       data8 0xe19cbf0391bbbbe9, 0x8e40f4ce60c9f8e2
-       data8 0xb33a7d6268109ebe, 0xe1d050901c531e85
-       data8 0x8e69ba46cf2fde4d, 0xb36ddbc5ea70ec55
-       data8 0xe2110903b4f4047a, 0x8e8a7a00bd7ae63e
-       data8 0xb3971e9b39264023, 0xe2450559b4d80b6d
-       data8 0x8eab57ef1cf2f529, 0xb3c0877ecc18e24a
-       data8 0xe27931a231554ef3, 0x8ecc5442cffb1dad
-       data8 0xb3ea16ae3a6c905f, 0xe2ad8e2ac3c5b04b
-       data8 0x8eed6f2d2a4acbfe, 0xb413cc67aa0e4d2d
-       data8 0xe2e21b41b9694cce, 0x8f0ea8dff24441ff
-       data8 0xb43da8e9d163e1af, 0xe316d93615862714
-       data8 0x8f385c95d696b817, 0xb47233773b84d425
-       data8 0xe3590bd86a0d30f9, 0x8f59dc43edd930f3
-       data8 0xb49c6825430fe730, 0xe38e38e38e38e38e
-       data8 0x8f7b7b5f5ffad1c4, 0xb4c6c46bcdb27dcf
-       data8 0xe3c397d1e6db7839, 0x8f9d3a1bea165f38
-       data8 0xb4f1488c0b35d26f, 0xe3f928f5953feb9e
-       data8 0x8fbf18adc34b66da, 0xb51bf4c7c51f0168
-       data8 0xe42eeca17c62886c, 0x8fe117499e356095
-       data8 0xb546c9616087ab9c, 0xe464e32943446305
-       data8 0x90033624aa685f8d, 0xb571c69bdffd9a70
-       data8 0xe49b0ce15747a8a2, 0x9025757495f36b86
-       data8 0xb59cecbae56984c3, 0xe4d16a1eee94e9d4
-       data8 0x903f3a5dcc091203, 0xb5bd64512bb14bb7
-       data8 0xe4fa52107353f67d, 0x9061b2fceb2bdbab
-       data8 0xb5e8d2a4bf5ba416, 0xe5310a471f4d2dc3
-       data8 0x90844ca7211032a7, 0xb6146a9a1bc47819
-       data8 0xe567f6f1c2b9c224, 0x90a7079403e6a15d
-       data8 0xb6402c7749d621c0, 0xe59f18689a9e4c9a
-       data8 0x90c9e3fbafd63799, 0xb66c1882fb435ea2
-       data8 0xe5d66f04b8a68ecf, 0x90ece216c8a16ee4
-       data8 0xb6982f048c999a56, 0xe60dfb2005c192e9
-       data8 0x9110021e7b516f0a, 0xb6c47044075b4142
-       data8 0xe645bd1544c7ea51, 0x912a708a39be9075
-       data8 0xb6e5bd6bfd02bafd, 0xe66fb21b505b20a0
-       data8 0x914dcc7b31146370, 0xb7124a2736ff8ef2
-       data8 0xe6a7d32af4a7c59a, 0x91714af8cfe984d5
-       data8 0xb73f026a01e94177, 0xe6e02b129c6a5ae4
-       data8 0x918c00a6f3795e97, 0xb760a959f1d0a7a7
-       data8 0xe70a9136a7403039, 0x91afbc299ed0295d
-       data8 0xb78dae7e06868ab0, 0xe74349fb2d92a589
-       data8 0x91d39add3e958db0, 0xb7badff8ad9e4e02
-       data8 0xe77c3a9c86ed7d42, 0x91ee9920a8974d92
-       data8 0xb7dce25b8e17ae9f, 0xe7a713f88151518a
-       data8 0x9212b5fcac537c19, 0xb80a6226904045e2
-       data8 0xe7e067453317ed2b, 0x9236f6b256923fcf
-       data8 0xb8380f1cafd73c1c, 0xe819f37a81871bb5
-       data8 0x92523ee6f90dcfc3, 0xb85a6ea8e321b4d8
-       data8 0xe8454236bfaeca14, 0x9276bef031e6eb79
-       data8 0xb8886b684ae7d2fa, 0xe87f32f24c3fc90e
-       data8 0x929236ec237a24ad, 0xb8ab0726fa00cf5d
-       data8 0xe8aacd8688892ba6, 0x92b6f70b7efe9dc3
-       data8 0xb8d954a4d13b7cb1, 0xe8e523fd32f606f7
-       data8 0x92d29f61eec7dc2b, 0xb8fc2d4f6cd9f04a
-       data8 0xe9110b5311407927, 0x92f7a05d5b8ba92f
-       data8 0xb92acc851476b1ab, 0xe94bc8bf0c108fa3
-       data8 0x931379a403be5c16, 0xb94de2d841a184c2
-       data8 0xe977fdc439c2ca3c, 0x9338bc44de2e3f34
-       data8 0xb97cd4c36c92693c, 0xe9b3236528fc349e
-       data8 0x9354c71412c69486, 0xb9a0297f172665e3
-       data8 0xe9dfa70b745ac1b4, 0x937a4c273907e262
-       data8 0xb9cf6f21e36c3924, 0xea1b36268d0eaa38
-       data8 0x93968919f6e7975d, 0xb9f3030951267208
-       data8 0xea480963fd394197, 0x93bc516fdd4680c9
-       data8 0xba229d6a618e7c59, 0xea84034425f27484
-       data8 0x93d8c123d9be59b2, 0xba467144459f9855
-       data8 0xeab12713138dd1cc, 0x93f546c955e60076
-       data8 0xba6a60c3c48f1a4b, 0xeade6db73a5e503b
-       data8 0x941b70a65879079f, 0xba9a76056b67ee7a
-       data8 0xeb1b0268343b121b, 0x943829f337410591
-       data8 0xbabea699563ada6e, 0xeb489b0b2bdb5f14
-       data8 0x9454f995765bc4d2, 0xbae2f350b262cc4b
-       data8 0xeb765721e85f03d0, 0x947b86b57f5842ed
-       data8 0xbb1385a23be24e57, 0xebb389645f222f62
-       data8 0x94988aeb23470f86, 0xbb3814975e17c680
-       data8 0xebe198f090607e0c, 0x94b5a5dc9695f42a
-       data8 0xbb5cc031009bf467, 0xec0fcc9321024509
-       data8 0x94d2d7a9170d8b42, 0xbb81889680024764
-       data8 0xec3e247da8b82f61, 0x94f9e87dd78bf019
-       data8 0xbbb2c0d8703ae95d, 0xec7c27d21321c9f7
-       data8 0x95175019a503d89e, 0xbbd7cd09ba3c5463
-       data8 0xecaad5278824e453, 0x9534cefa625fcb3a
-       data8 0xbbfcf68c4977718f, 0xecd9a76d097d4e77
-       data8 0x955265405c491a25, 0xbc223d88cfc88eee
-       data8 0xed089ed5dcd99446, 0x9570130c1f9bb857
-       data8 0xbc47a2284fee4ff8, 0xed37bb95add09a1c
-       data8 0x9597ca4119525184, 0xbc79ac0916ed7b8a
-       data8 0xed76c70508f904b6, 0x95b5af6fb5aa4d3c
-       data8 0xbc9f5670d1a13030, 0xeda63bb05e7f93c6
-       data8 0x95d3ac9273aafd7a, 0xbcc51f068cb95c1d
-       data8 0xedd5d661daed2dc4, 0x95f1c1cafdfd3684
-       data8 0xbceb05f4b30a9bc0, 0xee05974eef86b903
-       data8 0x960fef3b430b8d5f, 0xbd110b6604c7d306
-       data8 0xee357ead791fc670, 0x962e350575b409c5
-       data8 0xbd372f8598620f19, 0xee658cb3c134a463
-       data8 0x964c934c0dfc1708, 0xbd5d727edb6b3c7e
-       data8 0xee95c1987f080211, 0x966b0a31c9c6bc7d
-       data8 0xbd83d47d937bbc6d, 0xeec61d92d8c4314f
-       data8 0x968999d9ad8d264e, 0xbdaa55addf1ae47d
-       data8 0xeef6a0da64a014ac, 0x96a8426705198795
-       data8 0xbdd0f63c36aa73f0, 0xef274ba72a07c811
-       data8 0x96c703fd64445ee5, 0xbdf7b6556d550a15
-       data8 0xef581e31a2c91260, 0x96e5dec0a7b4268d
-       data8 0xbe1e9626b1ffa96b, 0xef8918b2bc43aec6
-       data8 0x9704d2d4f59f79f3, 0xbe4595dd903e5371
-       data8 0xefba3b63d89d7cbf, 0x9723e05ebe91b9b0
-       data8 0xbe6cb5a7f14bc935, 0xefeb867ecffaa607
-       data8 0x97430782be323831, 0xbe93f5b41d047cf7
-       data8 0xf01cfa3df1b9c9fa, 0x97624865fc0df8bf
-       data8 0xbebb5630bae4c15f, 0xf04e96dc05b43e2d
-       data8 0x9781a32dcc640b2a, 0xbee2d74cd30a430c
-       data8 0xf0805c944d827454, 0x97a117ffd0f48e46
-       data8 0xbf0a7937cf38d981, 0xf0b24ba285c495cb
-       data8 0x97c0a701f9d263c9, 0xbf323c217be2bc8c
-       data8 0xf0e46442e76f6569, 0x97e0505a8637a036
-       data8 0xbf5a203a09342bbb, 0xf116a6b2291d7896
-       data8 0x97f57a9fb0b08c6e, 0xbf74cad1c14ebfc4
-       data8 0xf1383fa9e9b5b381, 0x9815503365914a9d
-       data8 0xbf9ce6a497a89f78, 0xf16ac84f90083b9b
-       data8 0x98354085054fd204, 0xbfc52428bec6e72f
-       data8 0xf19d7b686dcb03d7, 0x98554bbbf8a77902
-       data8 0xbfed838fddab024b, 0xf1d0593311db1757
-       data8 0x987571fffb7f94f6, 0xc016050c0420981a
-       data8 0xf20361ee8f1c711e, 0x9895b3791dd03c23
-       data8 0xc03ea8cfabddc330, 0xf23695da7de51d3f
-       data8 0x98ab43a5fc65d0c8, 0xc059d3cbd65ddbce
-       data8 0xf258d095e465cc35, 0x98cbb2d196bd713d
-       data8 0xc082b122a3c78c9d, 0xf28c4d0bfc982b34
-       data8 0x98ec3d9ec7b6f21a, 0xc0abb1499ae736c4
-       data8 0xf2bff55eb3f0ea71, 0x990ce436db5e8344
-       data8 0xc0d4d474c3aedaaf, 0xf2f3c9cf9884636e
-       data8 0x9922b8218160967a, 0xc0f054ca33eb3437
-       data8 0xf31670135ab9cc0f, 0x99438d686f75779d
-       data8 0xc119b2c67e600ed0, 0xf34a8e9f0b54cdfb
-       data8 0x99647eea131fa20b, 0xc1433453de2033ff
-       data8 0xf37ed9fa6b8add3f, 0x997a85045a47c6d0
-       data8 0xc15ef3e44e10032d, 0xf3a1cfe884ef6bb6
-       data8 0x999ba5f14f8add02, 0xc188b130431d80e6
-       data8 0xf3d66689dcc8e8d3, 0x99bce38b5465ecae
-       data8 0xc1b2929d6067730e, 0xf40b2ab069d5c96a
-       data8 0x99d31ca0887f30f9, 0xc1ce9268f31cc734
-       data8 0xf42e718b90c8bc16, 0x99f48a669c74c09e
-       data8 0xc1f8b0877c1b0c08, 0xf463822a0a3b4b00
-       data8 0x9a16154eb445c873, 0xc222f35a87b415ba
-       data8 0xf498c1076015faf8, 0x9a2c822ec198d667
-       data8 0xc23f3467349e5c88, 0xf4bc5a19a33990b5
-       data8 0x9a4e3e080cd91b78, 0xc269b4e40e088c01
-       data8 0xf4f1e6a7d6f5425f, 0x9a70177afe52322e
-       data8 0xc2945aac24daaf6e, 0xf527a232cf6be334
-       data8 0x9a86b8fa94eebe10, 0xc2b0de05e43c1d66
-       data8 0xf54b8ecdcda90851, 0x9aa8c42866ae2958
-       data8 0xc2dbc275e1229d09, 0xf5819949c7ad87b4
-       data8 0x9abf86f9e12fc45e, 0xc2f86fca9d80eeff
-       data8 0xf5a5bac9213b48a9, 0x9ae1c462fc05f49d
-       data8 0xc323938449a2587e, 0xf5dc1501f324a812
-       data8 0x9af8a8dc936b84d0, 0xc3406b40a538ed20
-       data8 0xf6006bee86b5589e, 0x9b1b19033be35730
-       data8 0xc36bcee8211d15e0, 0xf63716b2fa067fa4
-       data8 0x9b3da7daf04c2892, 0xc397593adf2ba366
-       data8 0xf66df22fb6132b9c, 0x9b54c2e4c8a9012b
-       data8 0xc3b475b6206155d5, 0xf6929fb98225deb1
-       data8 0x9b77854e6c661200, 0xc3e0410243b97383
-       data8 0xf6c9cd13021e3fea, 0x9b8ec2e678d56d2f
-       data8 0xc3fd890709833d37, 0xf6eeb177472cedae
-       data8 0x9ba60e6a5ca133b6, 0xc41ae295f7e7fa06
-       data8 0xf713abf4cb0b3afb, 0x9bc919ea66a151a4
-       data8 0xc44709f7bb8a4dd2, 0xf74b4d5333684ef1
-       data8 0x9be0887c09ef82bb, 0xc4648fb0e0bec4c1
-       data8 0xf7707f75a72f8e94, 0x9c03c8d5fffc3503
-       data8 0xc490f9a94695ba14, 0xf7a874b97927af44
-       data8 0x9c1b5ad21a81cbb9, 0xc4aeac0173b7d390
-       data8 0xf7cddf140aedf1d8, 0x9c3ed09216e9ca02
-       data8 0xc4db5941007aa853, 0xf806291bacb7f7a9
-       data8 0x9c568656c0423def, 0xc4f938aec206291a
-       data8 0xf82bcc43b92eafef, 0x9c7a320af242ce60
-       data8 0xc52629e899dfd622, 0xf8646bf0defb759e
-       data8 0x9c920bf7a8c01dc2, 0xc54436e44043b965
-       data8 0xf88a487dfc3ff5f7, 0x9ca9f475d98b159c
-       data8 0xc562563abf9ea07f, 0xf8b03c2b46cdc17f
-       data8 0x9ccdeca60e80b5f8, 0xc58fa7d1dc42921c
-       data8 0xf8e95541c152ae7a, 0x9ce5f9d4653d4902
-       data8 0xc5adf561b91e110a, 0xf90f832c2700c160
-       data8 0x9cfe15cb38bfdd8e, 0xc5cc5591bdbd82fa
-       data8 0xf935c88e0c7f419b, 0x9d225b983f6c1f96
-       data8 0xc5fa08f1ff20593c, 0xf96f5cd84fd86873
-       data8 0x9d3a9cca32261ed7, 0xc618980a79ce6862
-       data8 0xf995dd53ebdd9d6d, 0x9d52ecfccebe1768
-       data8 0xc6373a09e34b50fa, 0xf9bc75a034436a41
-       data8 0x9d77818d95b82f86, 0xc66550a6e0baaf35
-       data8 0xf9f686f26d5518de, 0x9d8ff7893fa4706c
-       data8 0xc6842241926342c9, 0xfa1d5b39b910a8c5
-       data8 0x9da87cbef36f2a5e, 0xc6a3070b7c93bb9e
-       data8 0xfa4447acc4ecbfd2, 0x9dcd6140b4a35aeb
-       data8 0xc6d18260bb84081b, 0xfa7ed7e51e6fdfb4
-       data8 0x9de60cd06dc6e2d4, 0xc6f0977c9416828b
-       data8 0xfaa601394d49a1a0, 0x9dfec7d4cc43b76f
-       data8 0xc70fc0117c641630, 0xfacd431644ce0e40
-       data8 0x9e17925ec9fccc4a, 0xc72efc34d7e615be
-       data8 0xfaf49d96f7a75909, 0x9e3cdf6db57dc075
-       data8 0xc75dfb441594141e, 0xfb2fd3c65e562fd5
-       data8 0x9e55d110b63637a8, 0xc77d68aa019bda4c
-       data8 0xfb576c5762024805, 0x9e6ed27594550d2e
-       data8 0xc79ce9ea478dbc4f, 0xfb7f1debc22c4040
-       data8 0x9e87e3adc385d393, 0xc7bc7f1ae453219d
-       data8 0xfba6e89f32d0190a, 0x9ead9b54b37a1055
-       data8 0xc7ec0476e15e141a, 0xfbe2c803a0894893
-       data8 0x9ec6d46a3d7de215, 0xc80bcbe16f1d540f
-       data8 0xfc0ad1ff0ed9ecf0, 0x9ee01d9108be3154
-       data8 0xc82ba78a5d349735, 0xfc32f57bdfbcbe7f
-       data8 0x9ef976db07288d04, 0xc84b978847a06b87
-       data8 0xfc5b32968f99b21c, 0x9f12e05a4759ec25
-       data8 0xc86b9bf1ee817bc6, 0xfc83896bc861ab08
-       data8 0x9f2c5a20f4da6668, 0xc88bb4de3667cdf4
-       data8 0xfcabfa1861ed4815, 0x9f52af78ed1733ca
-       data8 0xc8bc00e7fe9e23a3, 0xfce8d3cea7d3163e
-       data8 0x9f6c52426a39d003, 0xc8dc4d7ff2d25232
-       data8 0xfd118595143ee273, 0x9f860593d42fd7f3
-       data8 0xc8fcaeebcb40eb47, 0xfd3a519943d4865a
-       data8 0x9f9fc97fdb96bd51, 0xc91d25431426a663
-       data8 0xfd6337f8e1ae5a4b, 0x9fb99e194f4a7037
-       data8 0xc93db09d7fdb2949, 0xfd8c38d1c8e927eb
-       data8 0x9fd383731ca51db9, 0xc95e5112e721582a
-       data8 0xfdb5544205095a53, 0x9fed79a04fbf9423
-       data8 0xc97f06bb49787677, 0xfdde8a67d2613531
-       data8 0xa00780b413b24ee8, 0xc99fd1aecd6e1b06
-       data8 0xfe07db619e781611, 0xa02eab2c4474b0cd
-       data8 0xc9d12a3e27bb1625, 0xfe460768d80bf758
-       data8 0xa048dcd51ccfd142, 0xc9f22ad82ba3d5f0
-       data8 0xfe6f9bfb06cd32f6, 0xa0631fa894b11b8d
-       data8 0xca134113105e67b2, 0xfe994bcd3d14fcc2
-       data8 0xa07d73ba65e680af, 0xca346d07b045a876
-       data8 0xfec316fecaf3f2ab, 0xa097d91e6aaf71b0
-       data8 0xca55aecf0e94bb88, 0xfeecfdaf33fadb80
-       data8 0xa0b24fe89e02602f, 0xca77068257be9bab
-       data8 0xff16fffe2fa8fad6, 0xa0ccd82d1bd2f68b
-       data8 0xca98743ae1c693a8, 0xff411e0ba9db886d
-       data8 0xa0e77200215909e6, 0xcab9f8122c99a101
-       data8 0xff6b57f7c33e4e9a, 0xa1021d760d584855
-       data8 0xcadb9221e268c3b5, 0xff95ade2d1bd7358
-       data8 0xa11cdaa36068a57d, 0xcafd4283d8043dfd
-       data8 0xffc01fed60f86fb5, 0xa137a99cbd3f880b
-       data8 0xcb1f09520d37c6fb, 0xffeaae3832b63956
-LOCAL_OBJECT_END(T_table)
-
-
-
-
-
-
+poly_coeffs:
+ASM_TYPE_DIRECTIVE(poly_coeffs,@object)
+data8 0xaaaaaaaaaaaaaaab, 0x00003ffd  // 1/3
+data8 0x3fbc71c71c71c71d, 0x3faf9add3c0ca459
+data8 0x3fa511e8d2b3183b, 0x3f9ee7113506ac13
+ASM_SIZE_DIRECTIVE(poly_coeffs)
+
+T_table:
+ASM_TYPE_DIRECTIVE(T_table,@object)
+
+data8 0x80155c748c374836, 0xa160019ed37fb4ae
+data8 0xcb51ddcb9e93095e, 0x8040404b0879f7f9
+data8 0xa1960b5966da4608, 0xcb95f333968ad59b
+data8 0x806b5dce4b405c10, 0xa1cc5dbe6dc2aab4
+data8 0xcbda64292d3ffd97, 0x8096b586974669b1
+data8 0xa202f97995b69c0d, 0xcc1f3184af961596
+data8 0x80bcd273d952a028, 0xa232fe6eb0c0577d
+data8 0xcc5bb1ac954d33e2, 0x80e898c52813f2f3
+data8 0xa26a2582012f6e17, 0xcca12e9831fc6402
+data8 0x81149add67c2d208, 0xa2a197e5d10465cb
+data8 0xcce70a67b64f24ad, 0x813b4e2c856b6e9a
+data8 0xa2d25a532efefbc8, 0xcd24794726477ea5
+data8 0x8167c1dde03de7aa, 0xa30a5bd6e49e4ab8
+data8 0xcd6b096a0b70ee87, 0x818ed973b811135e
+data8 0xa33b9c9b59879e24, 0xcda9177738b15a90
+data8 0x81bbc0c33e13ec98, 0xa3742fca6a3c1f21
+data8 0xcdf05f2247dffab9, 0x81e33e69fbe7504a
+data8 0xa3a5f1273887bf22, 0xce2f0f347f96f906
+data8 0x820aec524e3c23e9, 0xa3d7ef508ff11574
+data8 0xce6e0be0cd551a61, 0x823880f78e70b805
+data8 0xa4115ce30548bc15, 0xceb666b2c347d1de
+data8 0x826097a62a8e5200, 0xa443df0e53df577a
+data8 0xcef609b0cb874f00, 0x8288dfe00e9b5eaf
+data8 0xa4769fa5913c0ec3, 0xcf35fb5447e5c765
+data8 0x82b15a10c5371624, 0xa4a99f303bc7def5
+data8 0xcf763c47ee869f00, 0x82da06a527b18937
+data8 0xa4dcde37779adf4b, 0xcfb6cd3888d71785
+data8 0x8302e60b635ab394, 0xa5105d46152c938a
+data8 0xcff7aed4fbfbb447, 0x832bf8b2feec2f0e
+data8 0xa5441ce89825cb8d, 0xd038e1ce5167e3c6
+data8 0x83553f0ce00e276b, 0xa5781dad3e54d899
+data8 0xd07a66d7bfa0ebba, 0x837eb98b50f8322a
+data8 0xa5ac602406c4e68c, 0xd0bc3ea6b32d1b21
+data8 0x83a270f44c84f699, 0xa5d9601d95c2c0bc
+data8 0xd0f4f0e8f36c1bf8, 0x83cc4d7cfcfac5ca
+data8 0xa60e1e1a2de14745, 0xd1376458e34b037e
+data8 0x83f65f78a8872b4c, 0xa6431f6e3fbd9658
+data8 0xd17a2ca133f78572, 0x8420a75f2f7b53c8
+data8 0xa67864b0d432fda4, 0xd1bd4a80301c5715
+data8 0x844510461ff14209, 0xa6a6444aa0243c0b
+data8 0xd1f71682b2fa4575, 0x846fbd91b930bed2
+data8 0xa6dc094d10f25792, 0xd23ad555f773f059
+data8 0x84947e18234f3294, 0xa70a574cc02bba69
+data8 0xd2752c7039a5bf73, 0x84bf92755825045a
+data8 0xa7409e2af9549084, 0xd2b98ee008c06b59
+data8 0x84e4ac0ee112ba51, 0xa76f5c64ca2cf13b
+data8 0xd2f4735ffd700280, 0x8509ef44b86f20be
+data8 0xa79e4f0babab5dc0, 0xd32f99ed6d9ac0e1
+data8 0x85359d5d91768427, 0xa7d5579ae5164b85
+data8 0xd374f0666c75d51c, 0x855b3bd5b7384357
+data8 0xa804bd3c6fe61cc8, 0xd3b0a7d13618e4a1
+data8 0x858104f0c415f79a, 0xa8345895e5250a5a
+data8 0xd3eca2ea53bcec0c, 0x85a6f90390d29864
+data8 0xa8642a122b44ef0b, 0xd428e23874f13a17
+data8 0x85d3772fcd56a1dd, 0xa89c38ca18f6108b
+data8 0xd46f82fe293bc6d3, 0x85f9c982fcc002f3
+data8 0xa8cc81063b6e87ca, 0xd4ac57e9b7186420
+data8 0x862047e0e7ea554b, 0xa8fd00bfa409285e
+data8 0xd4e972becb04e8b8, 0x8646f2a26f7f5852
+data8 0xa92db8664d5516da, 0xd526d40a7a9b43a3
+data8 0x866dca21754096b5, 0xa95ea86b75cc2c20
+data8 0xd5647c5b73917370, 0x8694ceb8dfd17a37
+data8 0xa98fd141a4992deb, 0xd5a26c4201bd6d13
+data8 0x86bc00c49e9307e8, 0xa9c1335cae7446ba
+data8 0xd5e0a45015350a7e, 0x86dccd74fce79610
+data8 0xa9ea8686f556f645, 0xd614b539c6194104
+data8 0x870453c845acf90f, 0xaa1c52d17906bb19
+data8 0xd6537310e224283f, 0x872c089a1e90342c
+data8 0xaa4e59b046dab887, 0xd6927ab62244c917
+data8 0x8753ec4a92d16c5e, 0xaa809b9c60d1890b
+data8 0xd6d1ccc1fc4ef4b7, 0x877bff3aca19f6b4
+data8 0xaab319102f3f9b33, 0xd71169cea98fdded
+data8 0x879d88b6fe1c324c, 0xaadd5a18c1e21274
+data8 0xd746a66a5bc9f6d9, 0x87c5f346dbf98c3a
+data8 0xab1045f2ac31bdf5, 0xd786ce8f0fae5317
+data8 0x87e7c653efacef2c, 0xab3ae3ab2df7231e
+data8 0xd7bc7ff214c4e75a, 0x881089d4e73ffefc
+data8 0xab6e3f945d1e96fc, 0xd7fd35467a517ed1
+data8 0x88397e6a366f2a8a, 0xaba1d953a08fa94e
+data8 0xd83e38838648d815, 0x885bc559e5e1c081
+data8 0xabcd090db7ef4c3f, 0xd874a1db598b8951
+data8 0x887e2ee392bb7a93, 0xabf864602d7c323d
+data8 0xd8ab42205b80edaf, 0x88a7a8587e404257
+data8 0xac2ca5886ccf9b57, 0xd8ed1849d202f965
+data8 0x88ca5eda67594784, 0xac5861d4aa441f0f
+data8 0xd92432bd5a173685, 0x88f4356166bd590e
+data8 0xac8d183fe3a2fbed, 0xd9669ca45b03c23e
+data8 0x89173a0acf5ce026, 0xacb93703ff51571e
+data8 0xd99e3327cf89574e, 0x893a62a098b6a57b
+data8 0xace5830ad0c3f14b, 0xd9d602b19b100466
+data8 0x895daf637236ae2c, 0xad11fca5d78b3ff2
+data8 0xda0e0ba86c096841, 0x89883b9d1c2fa9c5
+data8 0xad4797fddf91a798, 0xda5195fcdb1c3dce
+data8 0x89abd8dd374a5d7b, 0xad747701e559ebcb
+data8 0xda8a1eb87a491f6c, 0x89cf9b1dcd197fa0
+data8 0xada184a47e9c7613, 0xdac2e230b91c3f84
+data8 0x89f382a258ea79de, 0xadcec13ab0dda8ff
+data8 0xdafbe0d0b66aea30, 0x8a178faf06648f29
+data8 0xadfc2d1a5fd21ba8, 0xdb351b04a8fafced
+data8 0x8a3bc288b3e1d18a, 0xae29c89a5053c33a
+data8 0xdb6e9139e33cdd8e, 0x8a601b74f4d1f835
+data8 0xae5794122b638df9, 0xdba843ded7151ea1
+data8 0x8a849aba14274764, 0xae858fda8137ae0a
+data8 0xdbe2336319b61fc8, 0x8aa9409f16cdbc9b
+data8 0xaeb3bc4ccc56d3d1, 0xdc1c60376789fa68
+data8 0x8ace0d6bbe2cb316, 0xaee219c374c09920
+data8 0xdc56cacda82d0cd5, 0x8af301688ab33558
+data8 0xaf10a899d3235fe7, 0xdc917398f2797814
+data8 0x8b181cdebe6f3206, 0xaf3f692c341fe8b4
+data8 0xdccc5b0d90a3e628, 0x8b3d60185fafcb7c
+data8 0xaf6e5bd7db9ae6c2, 0xdd0781a10469f0f2
+data8 0x8b62cb603bb2fad0, 0xaf9d80fb081cd91b
+data8 0xdd42e7ca0b52838f, 0x8b80d7d6bc4104de
+data8 0xafc35ce063eb3787, 0xdd729ad01c69114d
+data8 0x8ba68bf73ac74f39, 0xaff2ddcb5f28f03d
+data8 0xddae749c001fbf5e, 0x8bcc68fb9f9f7335
+data8 0xb022923b148e05c5, 0xddea8f50a51c69b1
+data8 0x8bf26f31c534fca2, 0xb0527a919adbf58b
+data8 0xde26eb69a0f0f111, 0x8c10f86e13a1a1f9
+data8 0xb078f3ab1d701c65, 0xde576480262399bc
+data8 0x8c3749916cc6abb5, 0xb0a93a6870649f31
+data8 0xde943789645933c8, 0x8c5dc4c4f7706032
+data8 0xb0d9b624d62ec856, 0xded14d58139a28af
+data8 0x8c7cac3a8c42e3e0, 0xb100a5f53fb3c8e1
+data8 0xdf025c00bbf2b5c7, 0x8ca373f1b7bf2716
+data8 0xb131821882f5540a, 0xdf3feb44d723a713
+data8 0x8cc29907fb951294, 0xb158bf8e4cb04055
+data8 0xdf715bc16c159be0, 0x8ce9ae4e9492aac8
+data8 0xb189fd69d56b238f, 0xdfaf66240e29cda8
+data8 0x8d0911dddbfdad0e, 0xb1b189958e8108e4
+data8 0xdfe139cbf6e19bdc, 0x8d3075c4f20f04ee
+data8 0xb1e32a8165b09832, 0xe01fc0fe94d9fc52
+data8 0x8d5018a9d4de77d5, 0xb20b0678fc271eec
+data8 0xe051f92ffcc0bd60, 0x8d77cc47dd143515
+data8 0xb23d0bd3f7592b6e, 0xe090feec9c9a06ac
+data8 0x8d97af6352739cb7, 0xb26538b2db8420dc
+data8 0xe0c39d0c9ff862d6, 0x8db7af523167800f
+data8 0xb28d89e339ceca14, 0xe0f668eeb99f188d
+data8 0x8ddfd80bc68c32ff, 0xb2c022ca12e55a16
+data8 0xe1362890eb663139, 0x8e00197e1e7c88fe
+data8 0xb2e8c6852c6b03f1, 0xe1695c7212aecbaa
+data8 0x8e207859f77e20e7, 0xb3118f4eda9fe40f
+data8 0xe19cbf0391bbbbe9, 0x8e40f4ce60c9f8e2
+data8 0xb33a7d6268109ebe, 0xe1d050901c531e85
+data8 0x8e69ba46cf2fde4d, 0xb36ddbc5ea70ec55
+data8 0xe2110903b4f4047a, 0x8e8a7a00bd7ae63e
+data8 0xb3971e9b39264023, 0xe2450559b4d80b6d
+data8 0x8eab57ef1cf2f529, 0xb3c0877ecc18e24a
+data8 0xe27931a231554ef3, 0x8ecc5442cffb1dad
+data8 0xb3ea16ae3a6c905f, 0xe2ad8e2ac3c5b04b
+data8 0x8eed6f2d2a4acbfe, 0xb413cc67aa0e4d2d
+data8 0xe2e21b41b9694cce, 0x8f0ea8dff24441ff
+data8 0xb43da8e9d163e1af, 0xe316d93615862714
+data8 0x8f385c95d696b817, 0xb47233773b84d425
+data8 0xe3590bd86a0d30f9, 0x8f59dc43edd930f3
+data8 0xb49c6825430fe730, 0xe38e38e38e38e38e
+data8 0x8f7b7b5f5ffad1c4, 0xb4c6c46bcdb27dcf
+data8 0xe3c397d1e6db7839, 0x8f9d3a1bea165f38
+data8 0xb4f1488c0b35d26f, 0xe3f928f5953feb9e
+data8 0x8fbf18adc34b66da, 0xb51bf4c7c51f0168
+data8 0xe42eeca17c62886c, 0x8fe117499e356095
+data8 0xb546c9616087ab9c, 0xe464e32943446305
+data8 0x90033624aa685f8d, 0xb571c69bdffd9a70
+data8 0xe49b0ce15747a8a2, 0x9025757495f36b86
+data8 0xb59cecbae56984c3, 0xe4d16a1eee94e9d4
+data8 0x903f3a5dcc091203, 0xb5bd64512bb14bb7
+data8 0xe4fa52107353f67d, 0x9061b2fceb2bdbab
+data8 0xb5e8d2a4bf5ba416, 0xe5310a471f4d2dc3
+data8 0x90844ca7211032a7, 0xb6146a9a1bc47819
+data8 0xe567f6f1c2b9c224, 0x90a7079403e6a15d
+data8 0xb6402c7749d621c0, 0xe59f18689a9e4c9a
+data8 0x90c9e3fbafd63799, 0xb66c1882fb435ea2
+data8 0xe5d66f04b8a68ecf, 0x90ece216c8a16ee4
+data8 0xb6982f048c999a56, 0xe60dfb2005c192e9
+data8 0x9110021e7b516f0a, 0xb6c47044075b4142
+data8 0xe645bd1544c7ea51, 0x912a708a39be9075
+data8 0xb6e5bd6bfd02bafd, 0xe66fb21b505b20a0
+data8 0x914dcc7b31146370, 0xb7124a2736ff8ef2
+data8 0xe6a7d32af4a7c59a, 0x91714af8cfe984d5
+data8 0xb73f026a01e94177, 0xe6e02b129c6a5ae4
+data8 0x918c00a6f3795e97, 0xb760a959f1d0a7a7
+data8 0xe70a9136a7403039, 0x91afbc299ed0295d
+data8 0xb78dae7e06868ab0, 0xe74349fb2d92a589
+data8 0x91d39add3e958db0, 0xb7badff8ad9e4e02
+data8 0xe77c3a9c86ed7d42, 0x91ee9920a8974d92
+data8 0xb7dce25b8e17ae9f, 0xe7a713f88151518a
+data8 0x9212b5fcac537c19, 0xb80a6226904045e2
+data8 0xe7e067453317ed2b, 0x9236f6b256923fcf
+data8 0xb8380f1cafd73c1c, 0xe819f37a81871bb5
+data8 0x92523ee6f90dcfc3, 0xb85a6ea8e321b4d8
+data8 0xe8454236bfaeca14, 0x9276bef031e6eb79
+data8 0xb8886b684ae7d2fa, 0xe87f32f24c3fc90e
+data8 0x929236ec237a24ad, 0xb8ab0726fa00cf5d
+data8 0xe8aacd8688892ba6, 0x92b6f70b7efe9dc3
+data8 0xb8d954a4d13b7cb1, 0xe8e523fd32f606f7
+data8 0x92d29f61eec7dc2b, 0xb8fc2d4f6cd9f04a
+data8 0xe9110b5311407927, 0x92f7a05d5b8ba92f
+data8 0xb92acc851476b1ab, 0xe94bc8bf0c108fa3
+data8 0x931379a403be5c16, 0xb94de2d841a184c2
+data8 0xe977fdc439c2ca3c, 0x9338bc44de2e3f34
+data8 0xb97cd4c36c92693c, 0xe9b3236528fc349e
+data8 0x9354c71412c69486, 0xb9a0297f172665e3
+data8 0xe9dfa70b745ac1b4, 0x937a4c273907e262
+data8 0xb9cf6f21e36c3924, 0xea1b36268d0eaa38
+data8 0x93968919f6e7975d, 0xb9f3030951267208
+data8 0xea480963fd394197, 0x93bc516fdd4680c9
+data8 0xba229d6a618e7c59, 0xea84034425f27484
+data8 0x93d8c123d9be59b2, 0xba467144459f9855
+data8 0xeab12713138dd1cc, 0x93f546c955e60076
+data8 0xba6a60c3c48f1a4b, 0xeade6db73a5e503b
+data8 0x941b70a65879079f, 0xba9a76056b67ee7a
+data8 0xeb1b0268343b121b, 0x943829f337410591
+data8 0xbabea699563ada6e, 0xeb489b0b2bdb5f14
+data8 0x9454f995765bc4d2, 0xbae2f350b262cc4b
+data8 0xeb765721e85f03d0, 0x947b86b57f5842ed
+data8 0xbb1385a23be24e57, 0xebb389645f222f62
+data8 0x94988aeb23470f86, 0xbb3814975e17c680
+data8 0xebe198f090607e0c, 0x94b5a5dc9695f42a
+data8 0xbb5cc031009bf467, 0xec0fcc9321024509
+data8 0x94d2d7a9170d8b42, 0xbb81889680024764
+data8 0xec3e247da8b82f61, 0x94f9e87dd78bf019
+data8 0xbbb2c0d8703ae95d, 0xec7c27d21321c9f7
+data8 0x95175019a503d89e, 0xbbd7cd09ba3c5463
+data8 0xecaad5278824e453, 0x9534cefa625fcb3a
+data8 0xbbfcf68c4977718f, 0xecd9a76d097d4e77
+data8 0x955265405c491a25, 0xbc223d88cfc88eee
+data8 0xed089ed5dcd99446, 0x9570130c1f9bb857
+data8 0xbc47a2284fee4ff8, 0xed37bb95add09a1c
+data8 0x9597ca4119525184, 0xbc79ac0916ed7b8a
+data8 0xed76c70508f904b6, 0x95b5af6fb5aa4d3c
+data8 0xbc9f5670d1a13030, 0xeda63bb05e7f93c6
+data8 0x95d3ac9273aafd7a, 0xbcc51f068cb95c1d
+data8 0xedd5d661daed2dc4, 0x95f1c1cafdfd3684
+data8 0xbceb05f4b30a9bc0, 0xee05974eef86b903
+data8 0x960fef3b430b8d5f, 0xbd110b6604c7d306
+data8 0xee357ead791fc670, 0x962e350575b409c5
+data8 0xbd372f8598620f19, 0xee658cb3c134a463
+data8 0x964c934c0dfc1708, 0xbd5d727edb6b3c7e
+data8 0xee95c1987f080211, 0x966b0a31c9c6bc7d
+data8 0xbd83d47d937bbc6d, 0xeec61d92d8c4314f
+data8 0x968999d9ad8d264e, 0xbdaa55addf1ae47d
+data8 0xeef6a0da64a014ac, 0x96a8426705198795
+data8 0xbdd0f63c36aa73f0, 0xef274ba72a07c811
+data8 0x96c703fd64445ee5, 0xbdf7b6556d550a15
+data8 0xef581e31a2c91260, 0x96e5dec0a7b4268d
+data8 0xbe1e9626b1ffa96b, 0xef8918b2bc43aec6
+data8 0x9704d2d4f59f79f3, 0xbe4595dd903e5371
+data8 0xefba3b63d89d7cbf, 0x9723e05ebe91b9b0
+data8 0xbe6cb5a7f14bc935, 0xefeb867ecffaa607
+data8 0x97430782be323831, 0xbe93f5b41d047cf7
+data8 0xf01cfa3df1b9c9fa, 0x97624865fc0df8bf
+data8 0xbebb5630bae4c15f, 0xf04e96dc05b43e2d
+data8 0x9781a32dcc640b2a, 0xbee2d74cd30a430c
+data8 0xf0805c944d827454, 0x97a117ffd0f48e46
+data8 0xbf0a7937cf38d981, 0xf0b24ba285c495cb
+data8 0x97c0a701f9d263c9, 0xbf323c217be2bc8c
+data8 0xf0e46442e76f6569, 0x97e0505a8637a036
+data8 0xbf5a203a09342bbb, 0xf116a6b2291d7896
+data8 0x97f57a9fb0b08c6e, 0xbf74cad1c14ebfc4
+data8 0xf1383fa9e9b5b381, 0x9815503365914a9d
+data8 0xbf9ce6a497a89f78, 0xf16ac84f90083b9b
+data8 0x98354085054fd204, 0xbfc52428bec6e72f
+data8 0xf19d7b686dcb03d7, 0x98554bbbf8a77902
+data8 0xbfed838fddab024b, 0xf1d0593311db1757
+data8 0x987571fffb7f94f6, 0xc016050c0420981a
+data8 0xf20361ee8f1c711e, 0x9895b3791dd03c23
+data8 0xc03ea8cfabddc330, 0xf23695da7de51d3f
+data8 0x98ab43a5fc65d0c8, 0xc059d3cbd65ddbce
+data8 0xf258d095e465cc35, 0x98cbb2d196bd713d
+data8 0xc082b122a3c78c9d, 0xf28c4d0bfc982b34
+data8 0x98ec3d9ec7b6f21a, 0xc0abb1499ae736c4
+data8 0xf2bff55eb3f0ea71, 0x990ce436db5e8344
+data8 0xc0d4d474c3aedaaf, 0xf2f3c9cf9884636e
+data8 0x9922b8218160967a, 0xc0f054ca33eb3437
+data8 0xf31670135ab9cc0f, 0x99438d686f75779d
+data8 0xc119b2c67e600ed0, 0xf34a8e9f0b54cdfb
+data8 0x99647eea131fa20b, 0xc1433453de2033ff
+data8 0xf37ed9fa6b8add3f, 0x997a85045a47c6d0
+data8 0xc15ef3e44e10032d, 0xf3a1cfe884ef6bb6
+data8 0x999ba5f14f8add02, 0xc188b130431d80e6
+data8 0xf3d66689dcc8e8d3, 0x99bce38b5465ecae
+data8 0xc1b2929d6067730e, 0xf40b2ab069d5c96a
+data8 0x99d31ca0887f30f9, 0xc1ce9268f31cc734
+data8 0xf42e718b90c8bc16, 0x99f48a669c74c09e
+data8 0xc1f8b0877c1b0c08, 0xf463822a0a3b4b00
+data8 0x9a16154eb445c873, 0xc222f35a87b415ba
+data8 0xf498c1076015faf8, 0x9a2c822ec198d667
+data8 0xc23f3467349e5c88, 0xf4bc5a19a33990b5
+data8 0x9a4e3e080cd91b78, 0xc269b4e40e088c01
+data8 0xf4f1e6a7d6f5425f, 0x9a70177afe52322e
+data8 0xc2945aac24daaf6e, 0xf527a232cf6be334
+data8 0x9a86b8fa94eebe10, 0xc2b0de05e43c1d66
+data8 0xf54b8ecdcda90851, 0x9aa8c42866ae2958
+data8 0xc2dbc275e1229d09, 0xf5819949c7ad87b4
+data8 0x9abf86f9e12fc45e, 0xc2f86fca9d80eeff
+data8 0xf5a5bac9213b48a9, 0x9ae1c462fc05f49d
+data8 0xc323938449a2587e, 0xf5dc1501f324a812
+data8 0x9af8a8dc936b84d0, 0xc3406b40a538ed20
+data8 0xf6006bee86b5589e, 0x9b1b19033be35730
+data8 0xc36bcee8211d15e0, 0xf63716b2fa067fa4
+data8 0x9b3da7daf04c2892, 0xc397593adf2ba366
+data8 0xf66df22fb6132b9c, 0x9b54c2e4c8a9012b
+data8 0xc3b475b6206155d5, 0xf6929fb98225deb1
+data8 0x9b77854e6c661200, 0xc3e0410243b97383
+data8 0xf6c9cd13021e3fea, 0x9b8ec2e678d56d2f
+data8 0xc3fd890709833d37, 0xf6eeb177472cedae
+data8 0x9ba60e6a5ca133b6, 0xc41ae295f7e7fa06
+data8 0xf713abf4cb0b3afb, 0x9bc919ea66a151a4
+data8 0xc44709f7bb8a4dd2, 0xf74b4d5333684ef1
+data8 0x9be0887c09ef82bb, 0xc4648fb0e0bec4c1
+data8 0xf7707f75a72f8e94, 0x9c03c8d5fffc3503
+data8 0xc490f9a94695ba14, 0xf7a874b97927af44
+data8 0x9c1b5ad21a81cbb9, 0xc4aeac0173b7d390
+data8 0xf7cddf140aedf1d8, 0x9c3ed09216e9ca02
+data8 0xc4db5941007aa853, 0xf806291bacb7f7a9
+data8 0x9c568656c0423def, 0xc4f938aec206291a
+data8 0xf82bcc43b92eafef, 0x9c7a320af242ce60
+data8 0xc52629e899dfd622, 0xf8646bf0defb759e
+data8 0x9c920bf7a8c01dc2, 0xc54436e44043b965
+data8 0xf88a487dfc3ff5f7, 0x9ca9f475d98b159c
+data8 0xc562563abf9ea07f, 0xf8b03c2b46cdc17f
+data8 0x9ccdeca60e80b5f8, 0xc58fa7d1dc42921c
+data8 0xf8e95541c152ae7a, 0x9ce5f9d4653d4902
+data8 0xc5adf561b91e110a, 0xf90f832c2700c160
+data8 0x9cfe15cb38bfdd8e, 0xc5cc5591bdbd82fa
+data8 0xf935c88e0c7f419b, 0x9d225b983f6c1f96
+data8 0xc5fa08f1ff20593c, 0xf96f5cd84fd86873
+data8 0x9d3a9cca32261ed7, 0xc618980a79ce6862
+data8 0xf995dd53ebdd9d6d, 0x9d52ecfccebe1768
+data8 0xc6373a09e34b50fa, 0xf9bc75a034436a41
+data8 0x9d77818d95b82f86, 0xc66550a6e0baaf35
+data8 0xf9f686f26d5518de, 0x9d8ff7893fa4706c
+data8 0xc6842241926342c9, 0xfa1d5b39b910a8c5
+data8 0x9da87cbef36f2a5e, 0xc6a3070b7c93bb9e
+data8 0xfa4447acc4ecbfd2, 0x9dcd6140b4a35aeb
+data8 0xc6d18260bb84081b, 0xfa7ed7e51e6fdfb4
+data8 0x9de60cd06dc6e2d4, 0xc6f0977c9416828b
+data8 0xfaa601394d49a1a0, 0x9dfec7d4cc43b76f
+data8 0xc70fc0117c641630, 0xfacd431644ce0e40
+data8 0x9e17925ec9fccc4a, 0xc72efc34d7e615be
+data8 0xfaf49d96f7a75909, 0x9e3cdf6db57dc075
+data8 0xc75dfb441594141e, 0xfb2fd3c65e562fd5
+data8 0x9e55d110b63637a8, 0xc77d68aa019bda4c
+data8 0xfb576c5762024805, 0x9e6ed27594550d2e
+data8 0xc79ce9ea478dbc4f, 0xfb7f1debc22c4040
+data8 0x9e87e3adc385d393, 0xc7bc7f1ae453219d
+data8 0xfba6e89f32d0190a, 0x9ead9b54b37a1055
+data8 0xc7ec0476e15e141a, 0xfbe2c803a0894893
+data8 0x9ec6d46a3d7de215, 0xc80bcbe16f1d540f
+data8 0xfc0ad1ff0ed9ecf0, 0x9ee01d9108be3154
+data8 0xc82ba78a5d349735, 0xfc32f57bdfbcbe7f
+data8 0x9ef976db07288d04, 0xc84b978847a06b87
+data8 0xfc5b32968f99b21c, 0x9f12e05a4759ec25
+data8 0xc86b9bf1ee817bc6, 0xfc83896bc861ab08
+data8 0x9f2c5a20f4da6668, 0xc88bb4de3667cdf4
+data8 0xfcabfa1861ed4815, 0x9f52af78ed1733ca
+data8 0xc8bc00e7fe9e23a3, 0xfce8d3cea7d3163e
+data8 0x9f6c52426a39d003, 0xc8dc4d7ff2d25232
+data8 0xfd118595143ee273, 0x9f860593d42fd7f3
+data8 0xc8fcaeebcb40eb47, 0xfd3a519943d4865a
+data8 0x9f9fc97fdb96bd51, 0xc91d25431426a663
+data8 0xfd6337f8e1ae5a4b, 0x9fb99e194f4a7037
+data8 0xc93db09d7fdb2949, 0xfd8c38d1c8e927eb
+data8 0x9fd383731ca51db9, 0xc95e5112e721582a
+data8 0xfdb5544205095a53, 0x9fed79a04fbf9423
+data8 0xc97f06bb49787677, 0xfdde8a67d2613531
+data8 0xa00780b413b24ee8, 0xc99fd1aecd6e1b06
+data8 0xfe07db619e781611, 0xa02eab2c4474b0cd
+data8 0xc9d12a3e27bb1625, 0xfe460768d80bf758
+data8 0xa048dcd51ccfd142, 0xc9f22ad82ba3d5f0
+data8 0xfe6f9bfb06cd32f6, 0xa0631fa894b11b8d
+data8 0xca134113105e67b2, 0xfe994bcd3d14fcc2
+data8 0xa07d73ba65e680af, 0xca346d07b045a876
+data8 0xfec316fecaf3f2ab, 0xa097d91e6aaf71b0
+data8 0xca55aecf0e94bb88, 0xfeecfdaf33fadb80
+data8 0xa0b24fe89e02602f, 0xca77068257be9bab
+data8 0xff16fffe2fa8fad6, 0xa0ccd82d1bd2f68b
+data8 0xca98743ae1c693a8, 0xff411e0ba9db886d
+data8 0xa0e77200215909e6, 0xcab9f8122c99a101
+data8 0xff6b57f7c33e4e9a, 0xa1021d760d584855
+data8 0xcadb9221e268c3b5, 0xff95ade2d1bd7358
+data8 0xa11cdaa36068a57d, 0xcafd4283d8043dfd
+data8 0xffc01fed60f86fb5, 0xa137a99cbd3f880b
+data8 0xcb1f09520d37c6fb, 0xffeaae3832b63956
+ASM_SIZE_DIRECTIVE(T_table)
+
+
+
+
+
+
+.align 32
+.global cbrt#
 
 .section .text
-GLOBAL_LIBM_ENTRY(cbrt)
-
-
-{.mfi
-       // get significand
-       getf.sig GR_SIGNIF = f8
-       // normalize a
-       fma.s1 FR_ARG = f8, f1, f0
-       // GR_GP = pointer to C_1,..., C_5 followed by T_table
-       addl GR_GP = @ltoff(poly_coeffs), gp ;;
+.proc  cbrt#
+.align 32
+cbrt: 
+
+
+{ .mfi
+  // get significand
+  getf.sig r23=f8
+  // will continue only for normal/denormal numbers          
+  (p0)  fclass.nm.unc p12,p0 = f8, 0x1b           
+  // r2 = pointer to C_1,...,C_5 followed by T_table
+  addl r2 = @ltoff(poly_coeffs), gp
 }
-
 {.mfi
-       // get exponent
-       getf.exp GR_ARGEXP = f8
-       // will continue only for normal/denormal numbers
-       fclass.m.unc p12, p13 = f8, 0x1e7
-       // GR_CONST4 = bias-((2^{12}-1)/3)-63 = 0xffff-0x555-0x3f = 0xfa6b
-       mov GR_CONST4 = 0xfa6b ;;
+  // get exponent
+  getf.exp r24=f8   
+  // normalize a
+  fma.s1 f14=f8,f1,f0
+  // r29=bias-((2^{12}-1)/3) -63=0xffff-0x555-0x3f=0xfa6b
+  mov r29=0xfa6b;;    
 }
-
 {.mlx
-       mov GR_CONST2 = 0x20000
-       // GR_CONST3 = 2^52
-       movl GR_CONST3 = 0x8000000000000000 ;;
-}
-
-.pred.rel "mutex", p12, p13
-{.mfi
-       // load start address for C_1,..., C_5 followed by T_table
-       ld8 GR_ADDR = [ GR_GP ]
-       // y = frcpa(a)
- (p13) frcpa.s0 f8, p0 = f1, f8
-       // p7 = 1 if denormal input
-       cmp.gtu p7, p0 = GR_CONST3, GR_SIGNIF
+  mov r25=0x20000
+  // r28=2^52
+  movl r28=0x8000000000000000;;
 }
 {.mfb
-       nop.m 0
-       // if argument is 0, +/-Infinity, NaN, or NaTVal, then return
- (p12) fma.d.s0 f8 = f8, f1, f0
- (p12) br.ret.spnt b0 ;;
+  // load start address for C_1,...,C_5 followed by T_table
+  ld8 r3=[r2]
+  (p12) fma.d.s0 f8=f8,f1,f0
+  (p12) br.ret.spnt b0
+}
+{.mfi
+  nop.m 0
+  // y=frcpa(a)
+  frcpa.s0 f8,p6=f1,f8
+  // p7=1 if denormal input
+  cmp.gtu p7,p0=r28,r23;;
 }
-
 {.mmi
-       // get exponent (for denormal input)
- (p7) getf.exp GR_ARGEXP = FR_ARG
-       // get normalized significand (for denormal input)
- (p7) getf.sig GR_SIGNIF = FR_ARG
-       // GR_CONST1 = bias-(2^{12}-1)
-       mov GR_CONST1 = 0xf000 ;;
+  // get exponent
+  (p7) getf.exp r24=f14
+  // get normalized significand
+  (p7) getf.sig r23=f14
+  // r28=bias-(2^{12}-1)
+  mov r28=0xf000;;
 }
-
 {.mii
-       // get GR_SIGN = sign
-       and GR_SIGN = GR_ARGEXP, GR_CONST2
-       // eliminate leading 1 from GR_I1 = 1st table index
-       shl GR_I1 = GR_SIGNIF, 1
-       // eliminate sign from exponent
-       andcm GR_EXP = GR_ARGEXP, GR_CONST2 ;;
+  // get r26=sign
+  and r26=r24,r25
+  // eliminate leading 1 from r23=1st table index
+  shl r23=r23,1
+  // eliminate sign from exponent (r25)
+  andcm r25=r24,r25;;
 }
-
 {.mib
-       add GR_ADDR2 = 32, GR_ADDR
-       // GR_IT1 = 1st table index (y_index, 8 bits)
-       shr.u GR_IT1 = GR_I1, 56
-       nop.b 0
+  add r2=32,r3
+  // r23=1st table index (y_index,8 bits)
+  shr.u r23=r23,56
+  nop.b 0
 }
 {.mib
-       // load C_1
-       ldfe FR_COEFF1 = [ GR_ADDR ], 16
-       // subtract bias from GR_EXPON = exponent
-       sub GR_EXPON = GR_EXP, GR_CONST1
-       nop.b 0 ;;
+  // load C_1
+  ldfe f7=[r3],16
+  // subtract bias from r25=exponent
+  sub r25=r25,r28
+  nop.b 0;;
 }
-
 {.mib
-       // load C_2, C_3
-       ldfpd FR_COEFF2, FR_COEFF3 = [ GR_ADDR ]
-       // 1: exponent* = 5; // (2^{16}-1)/3 = 0x5555
-       shladd GR_TMP1 = GR_EXPON, 2, GR_EXPON
-       nop.b 0
+  // load C_2, C_3
+  ldfpd f9,f10=[r3]
+  // 1: exponent*=5;  // (2^{16}-1)/3=0x5555
+  shladd r24=r25,2,r25
+  nop.b 0
 }
 {.mib
-       // load C_4, C_5
-       ldfpd FR_COEFF4, FR_COEFF5 = [ GR_ADDR2 ], 16
-       // GR_TMP2 = 3*y_index
-       shladd GR_TMP2 = GR_IT1, 1, GR_IT1
-       nop.b 0 ;;
+  // load C_4, C_5
+  ldfpd f11,f12=[r2],16
+  // r23=3*y_index
+  shladd r23=r23,1,r23
+  nop.b 0;;
 }
 
 {.mfi
-       // GR_TMP6 = (5*expon)*16+5*expon = (0x55)*expon
-       shladd GR_TMP6 = GR_TMP1, 4, GR_TMP1
-       // r = 1-a*y
-       fnma.s1 FR_R = f8, FR_ARG, f1
-       // adjust T_table pointer by 1st index
-       shladd GR_ITB1 = GR_TMP2, 3, GR_ADDR2 ;;
+  // r30=(5*expon)*16+5*expon=(0x55)*expon
+  shladd r30=r24,4,r24
+  // r=1-a*y
+  (p6) fnma.s1 f6=f8,f14,f1
+  // adjust T_table pointer by 1st index
+  shladd r2=r23,3,r2;;
 }
 
 {.mii
-       // eliminate leading 1 from significand
-       add GR_SIGNIF2 = GR_SIGNIF, GR_SIGNIF
-       // GR_TMP3 = (0x5500)*expon
-       shl GR_TMP3 = GR_TMP6, 8 ;;
-       // GR_TMP4 = (0x5555)*expon
-       add GR_TMP4 = GR_TMP3, GR_TMP6 ;;
+  nop.m 0
+  // r24=(0x5500)*expon
+  shl r24=r30,8;;
+  // r24=(0x5555)*expon
+  add r24=r24,r30;;
 }
-
 {.mii
-       // GR_TMP5 = (0x5556)*expon // 0x5556 = (2^{16}+2)/3
-       add GR_TMP5 = GR_TMP4, GR_EXPON
-       nop.i 0 ;;
-       // GR_EXP_BY_3 = floor(expon/3)
-       shr GR_EXP_BY_3 = GR_TMP5, 16 ;;
+  // r24=(0x5556)*expon  // 0x5556=(2^{16}+2)/3
+  add r24=r24,r25
+  nop.i 0;;
+  // r24=floor(expon/3)
+  shr r24=r24,16;;
 }
-
 {.mfi
-       // GR_TMP6 = 3*exponent
-       shladd GR_TMP6 = GR_EXP_BY_3, 1, GR_EXP_BY_3
-       // r*r
-       fma.s1 FR_R2 = FR_R, FR_R, f0
-       // bias exponent
-       add GR_EBIAS = GR_CONST4, GR_EXP_BY_3 ;;
+  // r28=3*exponent
+  shladd r28=r24,1,r24
+  // r2=r*r
+  (p6) fma.s1 f13=f6,f6,f0
+  // bias exponent
+  add r24=r29,r24;;
 }
-
 {.mfi
-       // get remainder of exponent/3
-       sub GR_REM = GR_EXPON, GR_TMP6
-       // c2+c3*r
-       fma.s1 FR_P23 = FR_COEFF3, FR_R, FR_COEFF2
-       nop.i 0
+  // get remainder of exponent/3 : r25-r28
+  sub r25=r25,r28
+  // c2+c3*r
+  (p6) fma.s1 f9=f10,f6,f9
+  // add sign to exponent
+  or r24=r24,r26
 }
 {.mfi
-       // add sign to exponent
-       or GR_SEXP = GR_EBIAS, GR_SIGN
-       // c4+c5*r
-       fma.s1 FR_P45 = FR_COEFF5, FR_R, FR_COEFF4
-       mov GR_TMP63 = 63+0xffff ;;
+  nop.m 0
+  // c4+c5*r
+  (p6) fma.s1 f11=f12,f6,f11
+  nop.i 0;;
 }
-
 {.mmi
-       // FR_2EXP = sign*2^{exponent/3}
-       setf.exp FR_2EXP = GR_SEXP
-       // adjust T_table pointer by 2nd index
-       shladd GR_INDEX = GR_REM, 3, GR_ITB1
-       // is the argument of the form 2^(3*k) ?
-       // get (significand - leading 1) | (exponent mod 3)
-       or GR_TEST = GR_REM, GR_SIGNIF2 ;;
+  // f14=sign*2^{exponent/3}
+  (p6) setf.exp f14=r24
+  // adjust T_table pointer by 2nd index
+  shladd r2=r25,3,r2
+  nop.i 0;;
 }
-
 {.mmi
-       // 2^63
-       setf.exp FR_TMP63 = GR_TMP63
-       // load T
-       ldf8 f8 = [ GR_INDEX ]
-       // is the argument of the form 2^(3*k) ?
-       cmp.eq p14, p0 = GR_TEST, r0 ;;
+  // load T
+  (p6) ldf8 f8=[r2]
+  nop.m 0
+  nop.i 0;;
 }
 
 {.mfi
-       nop.m 0
-       // (c2+c3*r)+r^2*(c4+c5*r)
-       fma.s1 FR_P25 = FR_P45, FR_R2, FR_P23
-       nop.i 0
+  nop.m 0
+  // (c2+c3*r)+r^2*(c4+c5*r)
+  (p6) fma.s1 f9=f11,f13,f9
+  nop.i 0
 }
 {.mfi
-       nop.m 0
-       // c1*r
-       fma.s1 FR_P1 = FR_COEFF1, FR_R, f0
-       nop.i 0 ;;
-}
-
-{.mfb
-       nop.m 0
- (p14) fma.d.s0 f8 = FR_2EXP, FR_TMP63, f0
- (p14) br.ret.spnt b0 ;;
+  nop.m 0
+  // c1*r
+  (p6) fma.s1 f7=f7,f6,f0
+  nop.i 0;;
 }
 
 {.mfi
-       nop.m 0
-       // P = c1*r+r^2* [ (c2+c3*r)+r^2*(c4+c5*r) ]
-       fma.s1 FR_P15 = FR_P25, FR_R2, FR_P1
-       nop.i 0
+  nop.m 0
+  // P=c1*r+r^2*[(c2+c3*r)+r^2*(c4+c5*r)]
+  (p6) fma.s1 f9=f9,f13,f7
+  nop.i 0
 }
 {.mfi
-       nop.m 0
-       // T' = T*(2^exp)
-       fma.s1 f8 = f8, FR_2EXP, f0
-       nop.i 0 ;;
+  nop.m 0
+  // T'=T*(2^exp)
+  (p6) fma.s1 f8=f8,f14,f0
+  nop.i 0;;
 }
-
 {.mfb
-       nop.m 0
-       // result = T'+T'*P
-       fma.d.s0 f8 = f8, FR_P15, f8
-       br.ret.sptk b0 ;;
+  nop.m 0
+  // result = T'-T'*P
+  (p6) fnma.d.s0 f8=f8,f9,f8
+  br.ret.sptk b0;;
 }
-
-
-GLOBAL_LIBM_END(cbrt)
+.endp cbrt
+ASM_SIZE_DIRECTIVE(cbrt)
diff --git a/sysdeps/ia64/fpu/s_cbrtf.S b/sysdeps/ia64/fpu/s_cbrtf.S
index c8c6500b25..20167797b8 100644
--- a/sysdeps/ia64/fpu/s_cbrtf.S
+++ b/sysdeps/ia64/fpu/s_cbrtf.S
@@ -1,10 +1,11 @@
-.file "cbrtf.s"
+.file "cbrtf.asm"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska, 
+// Bob Norin, Shane Story, and Ping Tak Peter Tang 
+// of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,30 +21,27 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
 // Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http: //www.intel.com/software/products/opensource/libraries/num.htm.
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
 //
 // History
 //==============================================================
-// 02/02/00 Initial version
-// 05/18/00 New version (modified algorithm)
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 01/28/03 Rescheduled some instructions for better performance
-//          on Itanium 2, and reformatted
+// 2/02/00: Initial version 
+// 5/18/00: New version (modified algorithm)
 //
 // API
 //==============================================================
@@ -55,710 +53,616 @@
 //
 // Implementation
 //
-// Let y= frcpa(a), where a is the argument
+//   cbrt(a) = cbrt(a y) / cbrt(y)
+//          = cbrt(1 - (1 - a y)) * 1/cbrt(y)
 //
-// cbrt(a)= cbrt(a*y)/cbrt(y) = cbrt(1 - (1-a*y)) * (1/cbrt(y))
+// where y = frcpa(a). 
 //
-// For all values of y, the 3 possible significands of 1/cbrt(y)
-// are stored in a table (T0) to 64 bits of accuracy. (There are
-// 3 possible significands because the exponent of y modulo 3
-// can be 0, 1, or 2.)
+//  * cbrt(1 - (1 - a y)) is approximated by a degree-2 polynomial 
+//   
+//  1 - (1/3)*r - (1/9)*r^2
+// 
+//  in r = 1 - a y.
 //
-//
-// * cbrt(1 - (1-a*y)) is approximated by a degree-2 polynomial
-//
-// 1 - (1/3)*r - (1/9)*r^2
-//
-// in r = 1-a*y.
+//  * The values 1/cbrt(y) are stored in a table of constants T0
+//   to 64 bits of accuracy
 //
 // The table values are stored for three exponent values and are
-// then multiplied by 2^(e/3) where e is the exponent of the input number.
+// then multiplied by e/3 where e is the exponent of the input number.
 // This computation is carried out in parallel with the polynomial
 // evaluation:
 //
-// T= 2^(e/3) * T0
+//      T = 2^(e/3) * T0
 
 
 
 
 
 //===============
-// input= x
-// C= frcpa(x)
-// r= 1 - C * x
+// input = x
+// C = frcpa(x)
+// r = 1 - C * x
 //
-// Special values
+// Special values 
 //==============================================================
 
 
 
 // Registers used
 //==============================================================
-// p6, p7, p8, p12
-
-       FR_R      = f6
-       FR_COEFF1 = f7
-       FR_COEFF2 = f9
-       FR_T0     = f10
-       FR_T1     = f11
-       FR_T2     = f12
-       FR_2M63   = f13
-       FR_ARG    = f14
-       FR_Y      = f15
-
-       GR_GP     = r2
-       GR_ADDR   = r2
-       GR_TMP5   = r3
-       GR_CONST  = r8
-       GR_TMP63  = r8
-       GR_SIGN   = r9
-       GR_CT2    = r10
-       GR_CT3    = r11
-       GR_TMP4   = r14
-       GR_EBIAS3 = r15
-       GR_REM    = r16
-       GR_SEXP   = r17
-       GR_2P63   = r18
-       GR_SIGNIF = r19
-       GR_I1     = r20
-       GR_EBIAS  = r21
-       GR_EXP    = r22
-       GR_IT1    = r23
-       GR_E5     = r24
-       GR_IT1_3  = r25
-       GR_TP1    = r26
-       GR_TMP    = r27
-       GR_TMP2   = r28
-       GR_TMP3   = r29
-       GR_EXP3   = r30
-       GR_ARGEXP = r31
-
-
+//   f6-f15
+//   r2, r23-r26, r28-r30
+//   p6,p7,p8,p12
 
+#include "libm_support.h"
 
 // Data tables
 //==============================================================
 
-RODATA
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
 
 .align 16
 
-LOCAL_OBJECT_START(poly_coeffs)
-
-       data8 0xaaaab19b7e1f5ef9, 0x00003ffd // ~ 1/3
-       data8 0xe38e5192a5a8e56c, 0x00003ffb // ~ 1/9
-LOCAL_OBJECT_END(poly_coeffs)
-
-//   For every entry B in the frcpa table, this table contains
-// the significands of cbrt(1/B), cbrt(2/B), cbrt(4/B).
-// The index to this table is the same as the frcpa index.
-
-LOCAL_OBJECT_START(T_table)
-
-       data8 0x80155c748c374836, 0xa160019ed37fb4ae
-       data8 0xcb51ddcb9e93095e, 0x8040404b0879f7f9
-       data8 0xa1960b5966da4608, 0xcb95f333968ad59b
-       data8 0x806b5dce4b405c10, 0xa1cc5dbe6dc2aab4
-       data8 0xcbda64292d3ffd97, 0x8096b586974669b1
-       data8 0xa202f97995b69c0d, 0xcc1f3184af961596
-       data8 0x80bcd273d952a028, 0xa232fe6eb0c0577d
-       data8 0xcc5bb1ac954d33e2, 0x80e898c52813f2f3
-       data8 0xa26a2582012f6e17, 0xcca12e9831fc6402
-       data8 0x81149add67c2d208, 0xa2a197e5d10465cb
-       data8 0xcce70a67b64f24ad, 0x813b4e2c856b6e9a
-       data8 0xa2d25a532efefbc8, 0xcd24794726477ea5
-       data8 0x8167c1dde03de7aa, 0xa30a5bd6e49e4ab8
-       data8 0xcd6b096a0b70ee87, 0x818ed973b811135e
-       data8 0xa33b9c9b59879e24, 0xcda9177738b15a90
-       data8 0x81bbc0c33e13ec98, 0xa3742fca6a3c1f21
-       data8 0xcdf05f2247dffab9, 0x81e33e69fbe7504a
-       data8 0xa3a5f1273887bf22, 0xce2f0f347f96f906
-       data8 0x820aec524e3c23e9, 0xa3d7ef508ff11574
-       data8 0xce6e0be0cd551a61, 0x823880f78e70b805
-       data8 0xa4115ce30548bc15, 0xceb666b2c347d1de
-       data8 0x826097a62a8e5200, 0xa443df0e53df577a
-       data8 0xcef609b0cb874f00, 0x8288dfe00e9b5eaf
-       data8 0xa4769fa5913c0ec3, 0xcf35fb5447e5c765
-       data8 0x82b15a10c5371624, 0xa4a99f303bc7def5
-       data8 0xcf763c47ee869f00, 0x82da06a527b18937
-       data8 0xa4dcde37779adf4b, 0xcfb6cd3888d71785
-       data8 0x8302e60b635ab394, 0xa5105d46152c938a
-       data8 0xcff7aed4fbfbb447, 0x832bf8b2feec2f0e
-       data8 0xa5441ce89825cb8d, 0xd038e1ce5167e3c6
-       data8 0x83553f0ce00e276b, 0xa5781dad3e54d899
-       data8 0xd07a66d7bfa0ebba, 0x837eb98b50f8322a
-       data8 0xa5ac602406c4e68c, 0xd0bc3ea6b32d1b21
-       data8 0x83a270f44c84f699, 0xa5d9601d95c2c0bc
-       data8 0xd0f4f0e8f36c1bf8, 0x83cc4d7cfcfac5ca
-       data8 0xa60e1e1a2de14745, 0xd1376458e34b037e
-       data8 0x83f65f78a8872b4c, 0xa6431f6e3fbd9658
-       data8 0xd17a2ca133f78572, 0x8420a75f2f7b53c8
-       data8 0xa67864b0d432fda4, 0xd1bd4a80301c5715
-       data8 0x844510461ff14209, 0xa6a6444aa0243c0b
-       data8 0xd1f71682b2fa4575, 0x846fbd91b930bed2
-       data8 0xa6dc094d10f25792, 0xd23ad555f773f059
-       data8 0x84947e18234f3294, 0xa70a574cc02bba69
-       data8 0xd2752c7039a5bf73, 0x84bf92755825045a
-       data8 0xa7409e2af9549084, 0xd2b98ee008c06b59
-       data8 0x84e4ac0ee112ba51, 0xa76f5c64ca2cf13b
-       data8 0xd2f4735ffd700280, 0x8509ef44b86f20be
-       data8 0xa79e4f0babab5dc0, 0xd32f99ed6d9ac0e1
-       data8 0x85359d5d91768427, 0xa7d5579ae5164b85
-       data8 0xd374f0666c75d51c, 0x855b3bd5b7384357
-       data8 0xa804bd3c6fe61cc8, 0xd3b0a7d13618e4a1
-       data8 0x858104f0c415f79a, 0xa8345895e5250a5a
-       data8 0xd3eca2ea53bcec0c, 0x85a6f90390d29864
-       data8 0xa8642a122b44ef0b, 0xd428e23874f13a17
-       data8 0x85d3772fcd56a1dd, 0xa89c38ca18f6108b
-       data8 0xd46f82fe293bc6d3, 0x85f9c982fcc002f3
-       data8 0xa8cc81063b6e87ca, 0xd4ac57e9b7186420
-       data8 0x862047e0e7ea554b, 0xa8fd00bfa409285e
-       data8 0xd4e972becb04e8b8, 0x8646f2a26f7f5852
-       data8 0xa92db8664d5516da, 0xd526d40a7a9b43a3
-       data8 0x866dca21754096b5, 0xa95ea86b75cc2c20
-       data8 0xd5647c5b73917370, 0x8694ceb8dfd17a37
-       data8 0xa98fd141a4992deb, 0xd5a26c4201bd6d13
-       data8 0x86bc00c49e9307e8, 0xa9c1335cae7446ba
-       data8 0xd5e0a45015350a7e, 0x86dccd74fce79610
-       data8 0xa9ea8686f556f645, 0xd614b539c6194104
-       data8 0x870453c845acf90f, 0xaa1c52d17906bb19
-       data8 0xd6537310e224283f, 0x872c089a1e90342c
-       data8 0xaa4e59b046dab887, 0xd6927ab62244c917
-       data8 0x8753ec4a92d16c5e, 0xaa809b9c60d1890b
-       data8 0xd6d1ccc1fc4ef4b7, 0x877bff3aca19f6b4
-       data8 0xaab319102f3f9b33, 0xd71169cea98fdded
-       data8 0x879d88b6fe1c324c, 0xaadd5a18c1e21274
-       data8 0xd746a66a5bc9f6d9, 0x87c5f346dbf98c3a
-       data8 0xab1045f2ac31bdf5, 0xd786ce8f0fae5317
-       data8 0x87e7c653efacef2c, 0xab3ae3ab2df7231e
-       data8 0xd7bc7ff214c4e75a, 0x881089d4e73ffefc
-       data8 0xab6e3f945d1e96fc, 0xd7fd35467a517ed1
-       data8 0x88397e6a366f2a8a, 0xaba1d953a08fa94e
-       data8 0xd83e38838648d815, 0x885bc559e5e1c081
-       data8 0xabcd090db7ef4c3f, 0xd874a1db598b8951
-       data8 0x887e2ee392bb7a93, 0xabf864602d7c323d
-       data8 0xd8ab42205b80edaf, 0x88a7a8587e404257
-       data8 0xac2ca5886ccf9b57, 0xd8ed1849d202f965
-       data8 0x88ca5eda67594784, 0xac5861d4aa441f0f
-       data8 0xd92432bd5a173685, 0x88f4356166bd590e
-       data8 0xac8d183fe3a2fbed, 0xd9669ca45b03c23e
-       data8 0x89173a0acf5ce026, 0xacb93703ff51571e
-       data8 0xd99e3327cf89574e, 0x893a62a098b6a57b
-       data8 0xace5830ad0c3f14b, 0xd9d602b19b100466
-       data8 0x895daf637236ae2c, 0xad11fca5d78b3ff2
-       data8 0xda0e0ba86c096841, 0x89883b9d1c2fa9c5
-       data8 0xad4797fddf91a798, 0xda5195fcdb1c3dce
-       data8 0x89abd8dd374a5d7b, 0xad747701e559ebcb
-       data8 0xda8a1eb87a491f6c, 0x89cf9b1dcd197fa0
-       data8 0xada184a47e9c7613, 0xdac2e230b91c3f84
-       data8 0x89f382a258ea79de, 0xadcec13ab0dda8ff
-       data8 0xdafbe0d0b66aea30, 0x8a178faf06648f29
-       data8 0xadfc2d1a5fd21ba8, 0xdb351b04a8fafced
-       data8 0x8a3bc288b3e1d18a, 0xae29c89a5053c33a
-       data8 0xdb6e9139e33cdd8e, 0x8a601b74f4d1f835
-       data8 0xae5794122b638df9, 0xdba843ded7151ea1
-       data8 0x8a849aba14274764, 0xae858fda8137ae0a
-       data8 0xdbe2336319b61fc8, 0x8aa9409f16cdbc9b
-       data8 0xaeb3bc4ccc56d3d1, 0xdc1c60376789fa68
-       data8 0x8ace0d6bbe2cb316, 0xaee219c374c09920
-       data8 0xdc56cacda82d0cd5, 0x8af301688ab33558
-       data8 0xaf10a899d3235fe7, 0xdc917398f2797814
-       data8 0x8b181cdebe6f3206, 0xaf3f692c341fe8b4
-       data8 0xdccc5b0d90a3e628, 0x8b3d60185fafcb7c
-       data8 0xaf6e5bd7db9ae6c2, 0xdd0781a10469f0f2
-       data8 0x8b62cb603bb2fad0, 0xaf9d80fb081cd91b
-       data8 0xdd42e7ca0b52838f, 0x8b80d7d6bc4104de
-       data8 0xafc35ce063eb3787, 0xdd729ad01c69114d
-       data8 0x8ba68bf73ac74f39, 0xaff2ddcb5f28f03d
-       data8 0xddae749c001fbf5e, 0x8bcc68fb9f9f7335
-       data8 0xb022923b148e05c5, 0xddea8f50a51c69b1
-       data8 0x8bf26f31c534fca2, 0xb0527a919adbf58b
-       data8 0xde26eb69a0f0f111, 0x8c10f86e13a1a1f9
-       data8 0xb078f3ab1d701c65, 0xde576480262399bc
-       data8 0x8c3749916cc6abb5, 0xb0a93a6870649f31
-       data8 0xde943789645933c8, 0x8c5dc4c4f7706032
-       data8 0xb0d9b624d62ec856, 0xded14d58139a28af
-       data8 0x8c7cac3a8c42e3e0, 0xb100a5f53fb3c8e1
-       data8 0xdf025c00bbf2b5c7, 0x8ca373f1b7bf2716
-       data8 0xb131821882f5540a, 0xdf3feb44d723a713
-       data8 0x8cc29907fb951294, 0xb158bf8e4cb04055
-       data8 0xdf715bc16c159be0, 0x8ce9ae4e9492aac8
-       data8 0xb189fd69d56b238f, 0xdfaf66240e29cda8
-       data8 0x8d0911dddbfdad0e, 0xb1b189958e8108e4
-       data8 0xdfe139cbf6e19bdc, 0x8d3075c4f20f04ee
-       data8 0xb1e32a8165b09832, 0xe01fc0fe94d9fc52
-       data8 0x8d5018a9d4de77d5, 0xb20b0678fc271eec
-       data8 0xe051f92ffcc0bd60, 0x8d77cc47dd143515
-       data8 0xb23d0bd3f7592b6e, 0xe090feec9c9a06ac
-       data8 0x8d97af6352739cb7, 0xb26538b2db8420dc
-       data8 0xe0c39d0c9ff862d6, 0x8db7af523167800f
-       data8 0xb28d89e339ceca14, 0xe0f668eeb99f188d
-       data8 0x8ddfd80bc68c32ff, 0xb2c022ca12e55a16
-       data8 0xe1362890eb663139, 0x8e00197e1e7c88fe
-       data8 0xb2e8c6852c6b03f1, 0xe1695c7212aecbaa
-       data8 0x8e207859f77e20e7, 0xb3118f4eda9fe40f
-       data8 0xe19cbf0391bbbbe9, 0x8e40f4ce60c9f8e2
-       data8 0xb33a7d6268109ebe, 0xe1d050901c531e85
-       data8 0x8e69ba46cf2fde4d, 0xb36ddbc5ea70ec55
-       data8 0xe2110903b4f4047a, 0x8e8a7a00bd7ae63e
-       data8 0xb3971e9b39264023, 0xe2450559b4d80b6d
-       data8 0x8eab57ef1cf2f529, 0xb3c0877ecc18e24a
-       data8 0xe27931a231554ef3, 0x8ecc5442cffb1dad
-       data8 0xb3ea16ae3a6c905f, 0xe2ad8e2ac3c5b04b
-       data8 0x8eed6f2d2a4acbfe, 0xb413cc67aa0e4d2d
-       data8 0xe2e21b41b9694cce, 0x8f0ea8dff24441ff
-       data8 0xb43da8e9d163e1af, 0xe316d93615862714
-       data8 0x8f385c95d696b817, 0xb47233773b84d425
-       data8 0xe3590bd86a0d30f9, 0x8f59dc43edd930f3
-       data8 0xb49c6825430fe730, 0xe38e38e38e38e38e
-       data8 0x8f7b7b5f5ffad1c4, 0xb4c6c46bcdb27dcf
-       data8 0xe3c397d1e6db7839, 0x8f9d3a1bea165f38
-       data8 0xb4f1488c0b35d26f, 0xe3f928f5953feb9e
-       data8 0x8fbf18adc34b66da, 0xb51bf4c7c51f0168
-       data8 0xe42eeca17c62886c, 0x8fe117499e356095
-       data8 0xb546c9616087ab9c, 0xe464e32943446305
-       data8 0x90033624aa685f8d, 0xb571c69bdffd9a70
-       data8 0xe49b0ce15747a8a2, 0x9025757495f36b86
-       data8 0xb59cecbae56984c3, 0xe4d16a1eee94e9d4
-       data8 0x903f3a5dcc091203, 0xb5bd64512bb14bb7
-       data8 0xe4fa52107353f67d, 0x9061b2fceb2bdbab
-       data8 0xb5e8d2a4bf5ba416, 0xe5310a471f4d2dc3
-       data8 0x90844ca7211032a7, 0xb6146a9a1bc47819
-       data8 0xe567f6f1c2b9c224, 0x90a7079403e6a15d
-       data8 0xb6402c7749d621c0, 0xe59f18689a9e4c9a
-       data8 0x90c9e3fbafd63799, 0xb66c1882fb435ea2
-       data8 0xe5d66f04b8a68ecf, 0x90ece216c8a16ee4
-       data8 0xb6982f048c999a56, 0xe60dfb2005c192e9
-       data8 0x9110021e7b516f0a, 0xb6c47044075b4142
-       data8 0xe645bd1544c7ea51, 0x912a708a39be9075
-       data8 0xb6e5bd6bfd02bafd, 0xe66fb21b505b20a0
-       data8 0x914dcc7b31146370, 0xb7124a2736ff8ef2
-       data8 0xe6a7d32af4a7c59a, 0x91714af8cfe984d5
-       data8 0xb73f026a01e94177, 0xe6e02b129c6a5ae4
-       data8 0x918c00a6f3795e97, 0xb760a959f1d0a7a7
-       data8 0xe70a9136a7403039, 0x91afbc299ed0295d
-       data8 0xb78dae7e06868ab0, 0xe74349fb2d92a589
-       data8 0x91d39add3e958db0, 0xb7badff8ad9e4e02
-       data8 0xe77c3a9c86ed7d42, 0x91ee9920a8974d92
-       data8 0xb7dce25b8e17ae9f, 0xe7a713f88151518a
-       data8 0x9212b5fcac537c19, 0xb80a6226904045e2
-       data8 0xe7e067453317ed2b, 0x9236f6b256923fcf
-       data8 0xb8380f1cafd73c1c, 0xe819f37a81871bb5
-       data8 0x92523ee6f90dcfc3, 0xb85a6ea8e321b4d8
-       data8 0xe8454236bfaeca14, 0x9276bef031e6eb79
-       data8 0xb8886b684ae7d2fa, 0xe87f32f24c3fc90e
-       data8 0x929236ec237a24ad, 0xb8ab0726fa00cf5d
-       data8 0xe8aacd8688892ba6, 0x92b6f70b7efe9dc3
-       data8 0xb8d954a4d13b7cb1, 0xe8e523fd32f606f7
-       data8 0x92d29f61eec7dc2b, 0xb8fc2d4f6cd9f04a
-       data8 0xe9110b5311407927, 0x92f7a05d5b8ba92f
-       data8 0xb92acc851476b1ab, 0xe94bc8bf0c108fa3
-       data8 0x931379a403be5c16, 0xb94de2d841a184c2
-       data8 0xe977fdc439c2ca3c, 0x9338bc44de2e3f34
-       data8 0xb97cd4c36c92693c, 0xe9b3236528fc349e
-       data8 0x9354c71412c69486, 0xb9a0297f172665e3
-       data8 0xe9dfa70b745ac1b4, 0x937a4c273907e262
-       data8 0xb9cf6f21e36c3924, 0xea1b36268d0eaa38
-       data8 0x93968919f6e7975d, 0xb9f3030951267208
-       data8 0xea480963fd394197, 0x93bc516fdd4680c9
-       data8 0xba229d6a618e7c59, 0xea84034425f27484
-       data8 0x93d8c123d9be59b2, 0xba467144459f9855
-       data8 0xeab12713138dd1cc, 0x93f546c955e60076
-       data8 0xba6a60c3c48f1a4b, 0xeade6db73a5e503b
-       data8 0x941b70a65879079f, 0xba9a76056b67ee7a
-       data8 0xeb1b0268343b121b, 0x943829f337410591
-       data8 0xbabea699563ada6e, 0xeb489b0b2bdb5f14
-       data8 0x9454f995765bc4d2, 0xbae2f350b262cc4b
-       data8 0xeb765721e85f03d0, 0x947b86b57f5842ed
-       data8 0xbb1385a23be24e57, 0xebb389645f222f62
-       data8 0x94988aeb23470f86, 0xbb3814975e17c680
-       data8 0xebe198f090607e0c, 0x94b5a5dc9695f42a
-       data8 0xbb5cc031009bf467, 0xec0fcc9321024509
-       data8 0x94d2d7a9170d8b42, 0xbb81889680024764
-       data8 0xec3e247da8b82f61, 0x94f9e87dd78bf019
-       data8 0xbbb2c0d8703ae95d, 0xec7c27d21321c9f7
-       data8 0x95175019a503d89e, 0xbbd7cd09ba3c5463
-       data8 0xecaad5278824e453, 0x9534cefa625fcb3a
-       data8 0xbbfcf68c4977718f, 0xecd9a76d097d4e77
-       data8 0x955265405c491a25, 0xbc223d88cfc88eee
-       data8 0xed089ed5dcd99446, 0x9570130c1f9bb857
-       data8 0xbc47a2284fee4ff8, 0xed37bb95add09a1c
-       data8 0x9597ca4119525184, 0xbc79ac0916ed7b8a
-       data8 0xed76c70508f904b6, 0x95b5af6fb5aa4d3c
-       data8 0xbc9f5670d1a13030, 0xeda63bb05e7f93c6
-       data8 0x95d3ac9273aafd7a, 0xbcc51f068cb95c1d
-       data8 0xedd5d661daed2dc4, 0x95f1c1cafdfd3684
-       data8 0xbceb05f4b30a9bc0, 0xee05974eef86b903
-       data8 0x960fef3b430b8d5f, 0xbd110b6604c7d306
-       data8 0xee357ead791fc670, 0x962e350575b409c5
-       data8 0xbd372f8598620f19, 0xee658cb3c134a463
-       data8 0x964c934c0dfc1708, 0xbd5d727edb6b3c7e
-       data8 0xee95c1987f080211, 0x966b0a31c9c6bc7d
-       data8 0xbd83d47d937bbc6d, 0xeec61d92d8c4314f
-       data8 0x968999d9ad8d264e, 0xbdaa55addf1ae47d
-       data8 0xeef6a0da64a014ac, 0x96a8426705198795
-       data8 0xbdd0f63c36aa73f0, 0xef274ba72a07c811
-       data8 0x96c703fd64445ee5, 0xbdf7b6556d550a15
-       data8 0xef581e31a2c91260, 0x96e5dec0a7b4268d
-       data8 0xbe1e9626b1ffa96b, 0xef8918b2bc43aec6
-       data8 0x9704d2d4f59f79f3, 0xbe4595dd903e5371
-       data8 0xefba3b63d89d7cbf, 0x9723e05ebe91b9b0
-       data8 0xbe6cb5a7f14bc935, 0xefeb867ecffaa607
-       data8 0x97430782be323831, 0xbe93f5b41d047cf7
-       data8 0xf01cfa3df1b9c9fa, 0x97624865fc0df8bf
-       data8 0xbebb5630bae4c15f, 0xf04e96dc05b43e2d
-       data8 0x9781a32dcc640b2a, 0xbee2d74cd30a430c
-       data8 0xf0805c944d827454, 0x97a117ffd0f48e46
-       data8 0xbf0a7937cf38d981, 0xf0b24ba285c495cb
-       data8 0x97c0a701f9d263c9, 0xbf323c217be2bc8c
-       data8 0xf0e46442e76f6569, 0x97e0505a8637a036
-       data8 0xbf5a203a09342bbb, 0xf116a6b2291d7896
-       data8 0x97f57a9fb0b08c6e, 0xbf74cad1c14ebfc4
-       data8 0xf1383fa9e9b5b381, 0x9815503365914a9d
-       data8 0xbf9ce6a497a89f78, 0xf16ac84f90083b9b
-       data8 0x98354085054fd204, 0xbfc52428bec6e72f
-       data8 0xf19d7b686dcb03d7, 0x98554bbbf8a77902
-       data8 0xbfed838fddab024b, 0xf1d0593311db1757
-       data8 0x987571fffb7f94f6, 0xc016050c0420981a
-       data8 0xf20361ee8f1c711e, 0x9895b3791dd03c23
-       data8 0xc03ea8cfabddc330, 0xf23695da7de51d3f
-       data8 0x98ab43a5fc65d0c8, 0xc059d3cbd65ddbce
-       data8 0xf258d095e465cc35, 0x98cbb2d196bd713d
-       data8 0xc082b122a3c78c9d, 0xf28c4d0bfc982b34
-       data8 0x98ec3d9ec7b6f21a, 0xc0abb1499ae736c4
-       data8 0xf2bff55eb3f0ea71, 0x990ce436db5e8344
-       data8 0xc0d4d474c3aedaaf, 0xf2f3c9cf9884636e
-       data8 0x9922b8218160967a, 0xc0f054ca33eb3437
-       data8 0xf31670135ab9cc0f, 0x99438d686f75779d
-       data8 0xc119b2c67e600ed0, 0xf34a8e9f0b54cdfb
-       data8 0x99647eea131fa20b, 0xc1433453de2033ff
-       data8 0xf37ed9fa6b8add3f, 0x997a85045a47c6d0
-       data8 0xc15ef3e44e10032d, 0xf3a1cfe884ef6bb6
-       data8 0x999ba5f14f8add02, 0xc188b130431d80e6
-       data8 0xf3d66689dcc8e8d3, 0x99bce38b5465ecae
-       data8 0xc1b2929d6067730e, 0xf40b2ab069d5c96a
-       data8 0x99d31ca0887f30f9, 0xc1ce9268f31cc734
-       data8 0xf42e718b90c8bc16, 0x99f48a669c74c09e
-       data8 0xc1f8b0877c1b0c08, 0xf463822a0a3b4b00
-       data8 0x9a16154eb445c873, 0xc222f35a87b415ba
-       data8 0xf498c1076015faf8, 0x9a2c822ec198d667
-       data8 0xc23f3467349e5c88, 0xf4bc5a19a33990b5
-       data8 0x9a4e3e080cd91b78, 0xc269b4e40e088c01
-       data8 0xf4f1e6a7d6f5425f, 0x9a70177afe52322e
-       data8 0xc2945aac24daaf6e, 0xf527a232cf6be334
-       data8 0x9a86b8fa94eebe10, 0xc2b0de05e43c1d66
-       data8 0xf54b8ecdcda90851, 0x9aa8c42866ae2958
-       data8 0xc2dbc275e1229d09, 0xf5819949c7ad87b4
-       data8 0x9abf86f9e12fc45e, 0xc2f86fca9d80eeff
-       data8 0xf5a5bac9213b48a9, 0x9ae1c462fc05f49d
-       data8 0xc323938449a2587e, 0xf5dc1501f324a812
-       data8 0x9af8a8dc936b84d0, 0xc3406b40a538ed20
-       data8 0xf6006bee86b5589e, 0x9b1b19033be35730
-       data8 0xc36bcee8211d15e0, 0xf63716b2fa067fa4
-       data8 0x9b3da7daf04c2892, 0xc397593adf2ba366
-       data8 0xf66df22fb6132b9c, 0x9b54c2e4c8a9012b
-       data8 0xc3b475b6206155d5, 0xf6929fb98225deb1
-       data8 0x9b77854e6c661200, 0xc3e0410243b97383
-       data8 0xf6c9cd13021e3fea, 0x9b8ec2e678d56d2f
-       data8 0xc3fd890709833d37, 0xf6eeb177472cedae
-       data8 0x9ba60e6a5ca133b6, 0xc41ae295f7e7fa06
-       data8 0xf713abf4cb0b3afb, 0x9bc919ea66a151a4
-       data8 0xc44709f7bb8a4dd2, 0xf74b4d5333684ef1
-       data8 0x9be0887c09ef82bb, 0xc4648fb0e0bec4c1
-       data8 0xf7707f75a72f8e94, 0x9c03c8d5fffc3503
-       data8 0xc490f9a94695ba14, 0xf7a874b97927af44
-       data8 0x9c1b5ad21a81cbb9, 0xc4aeac0173b7d390
-       data8 0xf7cddf140aedf1d8, 0x9c3ed09216e9ca02
-       data8 0xc4db5941007aa853, 0xf806291bacb7f7a9
-       data8 0x9c568656c0423def, 0xc4f938aec206291a
-       data8 0xf82bcc43b92eafef, 0x9c7a320af242ce60
-       data8 0xc52629e899dfd622, 0xf8646bf0defb759e
-       data8 0x9c920bf7a8c01dc2, 0xc54436e44043b965
-       data8 0xf88a487dfc3ff5f7, 0x9ca9f475d98b159c
-       data8 0xc562563abf9ea07f, 0xf8b03c2b46cdc17f
-       data8 0x9ccdeca60e80b5f8, 0xc58fa7d1dc42921c
-       data8 0xf8e95541c152ae7a, 0x9ce5f9d4653d4902
-       data8 0xc5adf561b91e110a, 0xf90f832c2700c160
-       data8 0x9cfe15cb38bfdd8e, 0xc5cc5591bdbd82fa
-       data8 0xf935c88e0c7f419b, 0x9d225b983f6c1f96
-       data8 0xc5fa08f1ff20593c, 0xf96f5cd84fd86873
-       data8 0x9d3a9cca32261ed7, 0xc618980a79ce6862
-       data8 0xf995dd53ebdd9d6d, 0x9d52ecfccebe1768
-       data8 0xc6373a09e34b50fa, 0xf9bc75a034436a41
-       data8 0x9d77818d95b82f86, 0xc66550a6e0baaf35
-       data8 0xf9f686f26d5518de, 0x9d8ff7893fa4706c
-       data8 0xc6842241926342c9, 0xfa1d5b39b910a8c5
-       data8 0x9da87cbef36f2a5e, 0xc6a3070b7c93bb9e
-       data8 0xfa4447acc4ecbfd2, 0x9dcd6140b4a35aeb
-       data8 0xc6d18260bb84081b, 0xfa7ed7e51e6fdfb4
-       data8 0x9de60cd06dc6e2d4, 0xc6f0977c9416828b
-       data8 0xfaa601394d49a1a0, 0x9dfec7d4cc43b76f
-       data8 0xc70fc0117c641630, 0xfacd431644ce0e40
-       data8 0x9e17925ec9fccc4a, 0xc72efc34d7e615be
-       data8 0xfaf49d96f7a75909, 0x9e3cdf6db57dc075
-       data8 0xc75dfb441594141e, 0xfb2fd3c65e562fd5
-       data8 0x9e55d110b63637a8, 0xc77d68aa019bda4c
-       data8 0xfb576c5762024805, 0x9e6ed27594550d2e
-       data8 0xc79ce9ea478dbc4f, 0xfb7f1debc22c4040
-       data8 0x9e87e3adc385d393, 0xc7bc7f1ae453219d
-       data8 0xfba6e89f32d0190a, 0x9ead9b54b37a1055
-       data8 0xc7ec0476e15e141a, 0xfbe2c803a0894893
-       data8 0x9ec6d46a3d7de215, 0xc80bcbe16f1d540f
-       data8 0xfc0ad1ff0ed9ecf0, 0x9ee01d9108be3154
-       data8 0xc82ba78a5d349735, 0xfc32f57bdfbcbe7f
-       data8 0x9ef976db07288d04, 0xc84b978847a06b87
-       data8 0xfc5b32968f99b21c, 0x9f12e05a4759ec25
-       data8 0xc86b9bf1ee817bc6, 0xfc83896bc861ab08
-       data8 0x9f2c5a20f4da6668, 0xc88bb4de3667cdf4
-       data8 0xfcabfa1861ed4815, 0x9f52af78ed1733ca
-       data8 0xc8bc00e7fe9e23a3, 0xfce8d3cea7d3163e
-       data8 0x9f6c52426a39d003, 0xc8dc4d7ff2d25232
-       data8 0xfd118595143ee273, 0x9f860593d42fd7f3
-       data8 0xc8fcaeebcb40eb47, 0xfd3a519943d4865a
-       data8 0x9f9fc97fdb96bd51, 0xc91d25431426a663
-       data8 0xfd6337f8e1ae5a4b, 0x9fb99e194f4a7037
-       data8 0xc93db09d7fdb2949, 0xfd8c38d1c8e927eb
-       data8 0x9fd383731ca51db9, 0xc95e5112e721582a
-       data8 0xfdb5544205095a53, 0x9fed79a04fbf9423
-       data8 0xc97f06bb49787677, 0xfdde8a67d2613531
-       data8 0xa00780b413b24ee8, 0xc99fd1aecd6e1b06
-       data8 0xfe07db619e781611, 0xa02eab2c4474b0cd
-       data8 0xc9d12a3e27bb1625, 0xfe460768d80bf758
-       data8 0xa048dcd51ccfd142, 0xc9f22ad82ba3d5f0
-       data8 0xfe6f9bfb06cd32f6, 0xa0631fa894b11b8d
-       data8 0xca134113105e67b2, 0xfe994bcd3d14fcc2
-       data8 0xa07d73ba65e680af, 0xca346d07b045a876
-       data8 0xfec316fecaf3f2ab, 0xa097d91e6aaf71b0
-       data8 0xca55aecf0e94bb88, 0xfeecfdaf33fadb80
-       data8 0xa0b24fe89e02602f, 0xca77068257be9bab
-       data8 0xff16fffe2fa8fad6, 0xa0ccd82d1bd2f68b
-       data8 0xca98743ae1c693a8, 0xff411e0ba9db886d
-       data8 0xa0e77200215909e6, 0xcab9f8122c99a101
-       data8 0xff6b57f7c33e4e9a, 0xa1021d760d584855
-       data8 0xcadb9221e268c3b5, 0xff95ade2d1bd7358
-       data8 0xa11cdaa36068a57d, 0xcafd4283d8043dfd
-       data8 0xffc01fed60f86fb5, 0xa137a99cbd3f880b
-       data8 0xcb1f09520d37c6fb, 0xffeaae3832b63956
-LOCAL_OBJECT_END(T_table)
-
-
-
-
-
-
+poly_coeffs:
+ASM_TYPE_DIRECTIVE(poly_coeffs,@object)
+data8 0xaaaaaaaaaaaaaaab, 0x00003ffd // 1/3
+data8 0xe38e38e38e38e38e, 0x00003ffb // 1/9
+ASM_SIZE_DIRECTIVE(poly_coeffs)
+
+
+T_table:
+ASM_TYPE_DIRECTIVE(T_table,@object)
+
+data8 0x80155c748c374836, 0xa160019ed37fb4ae
+data8 0xcb51ddcb9e93095e, 0x8040404b0879f7f9
+data8 0xa1960b5966da4608, 0xcb95f333968ad59b
+data8 0x806b5dce4b405c10, 0xa1cc5dbe6dc2aab4
+data8 0xcbda64292d3ffd97, 0x8096b586974669b1
+data8 0xa202f97995b69c0d, 0xcc1f3184af961596
+data8 0x80bcd273d952a028, 0xa232fe6eb0c0577d
+data8 0xcc5bb1ac954d33e2, 0x80e898c52813f2f3
+data8 0xa26a2582012f6e17, 0xcca12e9831fc6402
+data8 0x81149add67c2d208, 0xa2a197e5d10465cb
+data8 0xcce70a67b64f24ad, 0x813b4e2c856b6e9a
+data8 0xa2d25a532efefbc8, 0xcd24794726477ea5
+data8 0x8167c1dde03de7aa, 0xa30a5bd6e49e4ab8
+data8 0xcd6b096a0b70ee87, 0x818ed973b811135e
+data8 0xa33b9c9b59879e24, 0xcda9177738b15a90
+data8 0x81bbc0c33e13ec98, 0xa3742fca6a3c1f21
+data8 0xcdf05f2247dffab9, 0x81e33e69fbe7504a
+data8 0xa3a5f1273887bf22, 0xce2f0f347f96f906
+data8 0x820aec524e3c23e9, 0xa3d7ef508ff11574
+data8 0xce6e0be0cd551a61, 0x823880f78e70b805
+data8 0xa4115ce30548bc15, 0xceb666b2c347d1de
+data8 0x826097a62a8e5200, 0xa443df0e53df577a
+data8 0xcef609b0cb874f00, 0x8288dfe00e9b5eaf
+data8 0xa4769fa5913c0ec3, 0xcf35fb5447e5c765
+data8 0x82b15a10c5371624, 0xa4a99f303bc7def5
+data8 0xcf763c47ee869f00, 0x82da06a527b18937
+data8 0xa4dcde37779adf4b, 0xcfb6cd3888d71785
+data8 0x8302e60b635ab394, 0xa5105d46152c938a
+data8 0xcff7aed4fbfbb447, 0x832bf8b2feec2f0e
+data8 0xa5441ce89825cb8d, 0xd038e1ce5167e3c6
+data8 0x83553f0ce00e276b, 0xa5781dad3e54d899
+data8 0xd07a66d7bfa0ebba, 0x837eb98b50f8322a
+data8 0xa5ac602406c4e68c, 0xd0bc3ea6b32d1b21
+data8 0x83a270f44c84f699, 0xa5d9601d95c2c0bc
+data8 0xd0f4f0e8f36c1bf8, 0x83cc4d7cfcfac5ca
+data8 0xa60e1e1a2de14745, 0xd1376458e34b037e
+data8 0x83f65f78a8872b4c, 0xa6431f6e3fbd9658
+data8 0xd17a2ca133f78572, 0x8420a75f2f7b53c8
+data8 0xa67864b0d432fda4, 0xd1bd4a80301c5715
+data8 0x844510461ff14209, 0xa6a6444aa0243c0b
+data8 0xd1f71682b2fa4575, 0x846fbd91b930bed2
+data8 0xa6dc094d10f25792, 0xd23ad555f773f059
+data8 0x84947e18234f3294, 0xa70a574cc02bba69
+data8 0xd2752c7039a5bf73, 0x84bf92755825045a
+data8 0xa7409e2af9549084, 0xd2b98ee008c06b59
+data8 0x84e4ac0ee112ba51, 0xa76f5c64ca2cf13b
+data8 0xd2f4735ffd700280, 0x8509ef44b86f20be
+data8 0xa79e4f0babab5dc0, 0xd32f99ed6d9ac0e1
+data8 0x85359d5d91768427, 0xa7d5579ae5164b85
+data8 0xd374f0666c75d51c, 0x855b3bd5b7384357
+data8 0xa804bd3c6fe61cc8, 0xd3b0a7d13618e4a1
+data8 0x858104f0c415f79a, 0xa8345895e5250a5a
+data8 0xd3eca2ea53bcec0c, 0x85a6f90390d29864
+data8 0xa8642a122b44ef0b, 0xd428e23874f13a17
+data8 0x85d3772fcd56a1dd, 0xa89c38ca18f6108b
+data8 0xd46f82fe293bc6d3, 0x85f9c982fcc002f3
+data8 0xa8cc81063b6e87ca, 0xd4ac57e9b7186420
+data8 0x862047e0e7ea554b, 0xa8fd00bfa409285e
+data8 0xd4e972becb04e8b8, 0x8646f2a26f7f5852
+data8 0xa92db8664d5516da, 0xd526d40a7a9b43a3
+data8 0x866dca21754096b5, 0xa95ea86b75cc2c20
+data8 0xd5647c5b73917370, 0x8694ceb8dfd17a37
+data8 0xa98fd141a4992deb, 0xd5a26c4201bd6d13
+data8 0x86bc00c49e9307e8, 0xa9c1335cae7446ba
+data8 0xd5e0a45015350a7e, 0x86dccd74fce79610
+data8 0xa9ea8686f556f645, 0xd614b539c6194104
+data8 0x870453c845acf90f, 0xaa1c52d17906bb19
+data8 0xd6537310e224283f, 0x872c089a1e90342c
+data8 0xaa4e59b046dab887, 0xd6927ab62244c917
+data8 0x8753ec4a92d16c5e, 0xaa809b9c60d1890b
+data8 0xd6d1ccc1fc4ef4b7, 0x877bff3aca19f6b4
+data8 0xaab319102f3f9b33, 0xd71169cea98fdded
+data8 0x879d88b6fe1c324c, 0xaadd5a18c1e21274
+data8 0xd746a66a5bc9f6d9, 0x87c5f346dbf98c3a
+data8 0xab1045f2ac31bdf5, 0xd786ce8f0fae5317
+data8 0x87e7c653efacef2c, 0xab3ae3ab2df7231e
+data8 0xd7bc7ff214c4e75a, 0x881089d4e73ffefc
+data8 0xab6e3f945d1e96fc, 0xd7fd35467a517ed1
+data8 0x88397e6a366f2a8a, 0xaba1d953a08fa94e
+data8 0xd83e38838648d815, 0x885bc559e5e1c081
+data8 0xabcd090db7ef4c3f, 0xd874a1db598b8951
+data8 0x887e2ee392bb7a93, 0xabf864602d7c323d
+data8 0xd8ab42205b80edaf, 0x88a7a8587e404257
+data8 0xac2ca5886ccf9b57, 0xd8ed1849d202f965
+data8 0x88ca5eda67594784, 0xac5861d4aa441f0f
+data8 0xd92432bd5a173685, 0x88f4356166bd590e
+data8 0xac8d183fe3a2fbed, 0xd9669ca45b03c23e
+data8 0x89173a0acf5ce026, 0xacb93703ff51571e
+data8 0xd99e3327cf89574e, 0x893a62a098b6a57b
+data8 0xace5830ad0c3f14b, 0xd9d602b19b100466
+data8 0x895daf637236ae2c, 0xad11fca5d78b3ff2
+data8 0xda0e0ba86c096841, 0x89883b9d1c2fa9c5
+data8 0xad4797fddf91a798, 0xda5195fcdb1c3dce
+data8 0x89abd8dd374a5d7b, 0xad747701e559ebcb
+data8 0xda8a1eb87a491f6c, 0x89cf9b1dcd197fa0
+data8 0xada184a47e9c7613, 0xdac2e230b91c3f84
+data8 0x89f382a258ea79de, 0xadcec13ab0dda8ff
+data8 0xdafbe0d0b66aea30, 0x8a178faf06648f29
+data8 0xadfc2d1a5fd21ba8, 0xdb351b04a8fafced
+data8 0x8a3bc288b3e1d18a, 0xae29c89a5053c33a
+data8 0xdb6e9139e33cdd8e, 0x8a601b74f4d1f835
+data8 0xae5794122b638df9, 0xdba843ded7151ea1
+data8 0x8a849aba14274764, 0xae858fda8137ae0a
+data8 0xdbe2336319b61fc8, 0x8aa9409f16cdbc9b
+data8 0xaeb3bc4ccc56d3d1, 0xdc1c60376789fa68
+data8 0x8ace0d6bbe2cb316, 0xaee219c374c09920
+data8 0xdc56cacda82d0cd5, 0x8af301688ab33558
+data8 0xaf10a899d3235fe7, 0xdc917398f2797814
+data8 0x8b181cdebe6f3206, 0xaf3f692c341fe8b4
+data8 0xdccc5b0d90a3e628, 0x8b3d60185fafcb7c
+data8 0xaf6e5bd7db9ae6c2, 0xdd0781a10469f0f2
+data8 0x8b62cb603bb2fad0, 0xaf9d80fb081cd91b
+data8 0xdd42e7ca0b52838f, 0x8b80d7d6bc4104de
+data8 0xafc35ce063eb3787, 0xdd729ad01c69114d
+data8 0x8ba68bf73ac74f39, 0xaff2ddcb5f28f03d
+data8 0xddae749c001fbf5e, 0x8bcc68fb9f9f7335
+data8 0xb022923b148e05c5, 0xddea8f50a51c69b1
+data8 0x8bf26f31c534fca2, 0xb0527a919adbf58b
+data8 0xde26eb69a0f0f111, 0x8c10f86e13a1a1f9
+data8 0xb078f3ab1d701c65, 0xde576480262399bc
+data8 0x8c3749916cc6abb5, 0xb0a93a6870649f31
+data8 0xde943789645933c8, 0x8c5dc4c4f7706032
+data8 0xb0d9b624d62ec856, 0xded14d58139a28af
+data8 0x8c7cac3a8c42e3e0, 0xb100a5f53fb3c8e1
+data8 0xdf025c00bbf2b5c7, 0x8ca373f1b7bf2716
+data8 0xb131821882f5540a, 0xdf3feb44d723a713
+data8 0x8cc29907fb951294, 0xb158bf8e4cb04055
+data8 0xdf715bc16c159be0, 0x8ce9ae4e9492aac8
+data8 0xb189fd69d56b238f, 0xdfaf66240e29cda8
+data8 0x8d0911dddbfdad0e, 0xb1b189958e8108e4
+data8 0xdfe139cbf6e19bdc, 0x8d3075c4f20f04ee
+data8 0xb1e32a8165b09832, 0xe01fc0fe94d9fc52
+data8 0x8d5018a9d4de77d5, 0xb20b0678fc271eec
+data8 0xe051f92ffcc0bd60, 0x8d77cc47dd143515
+data8 0xb23d0bd3f7592b6e, 0xe090feec9c9a06ac
+data8 0x8d97af6352739cb7, 0xb26538b2db8420dc
+data8 0xe0c39d0c9ff862d6, 0x8db7af523167800f
+data8 0xb28d89e339ceca14, 0xe0f668eeb99f188d
+data8 0x8ddfd80bc68c32ff, 0xb2c022ca12e55a16
+data8 0xe1362890eb663139, 0x8e00197e1e7c88fe
+data8 0xb2e8c6852c6b03f1, 0xe1695c7212aecbaa
+data8 0x8e207859f77e20e7, 0xb3118f4eda9fe40f
+data8 0xe19cbf0391bbbbe9, 0x8e40f4ce60c9f8e2
+data8 0xb33a7d6268109ebe, 0xe1d050901c531e85
+data8 0x8e69ba46cf2fde4d, 0xb36ddbc5ea70ec55
+data8 0xe2110903b4f4047a, 0x8e8a7a00bd7ae63e
+data8 0xb3971e9b39264023, 0xe2450559b4d80b6d
+data8 0x8eab57ef1cf2f529, 0xb3c0877ecc18e24a
+data8 0xe27931a231554ef3, 0x8ecc5442cffb1dad
+data8 0xb3ea16ae3a6c905f, 0xe2ad8e2ac3c5b04b
+data8 0x8eed6f2d2a4acbfe, 0xb413cc67aa0e4d2d
+data8 0xe2e21b41b9694cce, 0x8f0ea8dff24441ff
+data8 0xb43da8e9d163e1af, 0xe316d93615862714
+data8 0x8f385c95d696b817, 0xb47233773b84d425
+data8 0xe3590bd86a0d30f9, 0x8f59dc43edd930f3
+data8 0xb49c6825430fe730, 0xe38e38e38e38e38e
+data8 0x8f7b7b5f5ffad1c4, 0xb4c6c46bcdb27dcf
+data8 0xe3c397d1e6db7839, 0x8f9d3a1bea165f38
+data8 0xb4f1488c0b35d26f, 0xe3f928f5953feb9e
+data8 0x8fbf18adc34b66da, 0xb51bf4c7c51f0168
+data8 0xe42eeca17c62886c, 0x8fe117499e356095
+data8 0xb546c9616087ab9c, 0xe464e32943446305
+data8 0x90033624aa685f8d, 0xb571c69bdffd9a70
+data8 0xe49b0ce15747a8a2, 0x9025757495f36b86
+data8 0xb59cecbae56984c3, 0xe4d16a1eee94e9d4
+data8 0x903f3a5dcc091203, 0xb5bd64512bb14bb7
+data8 0xe4fa52107353f67d, 0x9061b2fceb2bdbab
+data8 0xb5e8d2a4bf5ba416, 0xe5310a471f4d2dc3
+data8 0x90844ca7211032a7, 0xb6146a9a1bc47819
+data8 0xe567f6f1c2b9c224, 0x90a7079403e6a15d
+data8 0xb6402c7749d621c0, 0xe59f18689a9e4c9a
+data8 0x90c9e3fbafd63799, 0xb66c1882fb435ea2
+data8 0xe5d66f04b8a68ecf, 0x90ece216c8a16ee4
+data8 0xb6982f048c999a56, 0xe60dfb2005c192e9
+data8 0x9110021e7b516f0a, 0xb6c47044075b4142
+data8 0xe645bd1544c7ea51, 0x912a708a39be9075
+data8 0xb6e5bd6bfd02bafd, 0xe66fb21b505b20a0
+data8 0x914dcc7b31146370, 0xb7124a2736ff8ef2
+data8 0xe6a7d32af4a7c59a, 0x91714af8cfe984d5
+data8 0xb73f026a01e94177, 0xe6e02b129c6a5ae4
+data8 0x918c00a6f3795e97, 0xb760a959f1d0a7a7
+data8 0xe70a9136a7403039, 0x91afbc299ed0295d
+data8 0xb78dae7e06868ab0, 0xe74349fb2d92a589
+data8 0x91d39add3e958db0, 0xb7badff8ad9e4e02
+data8 0xe77c3a9c86ed7d42, 0x91ee9920a8974d92
+data8 0xb7dce25b8e17ae9f, 0xe7a713f88151518a
+data8 0x9212b5fcac537c19, 0xb80a6226904045e2
+data8 0xe7e067453317ed2b, 0x9236f6b256923fcf
+data8 0xb8380f1cafd73c1c, 0xe819f37a81871bb5
+data8 0x92523ee6f90dcfc3, 0xb85a6ea8e321b4d8
+data8 0xe8454236bfaeca14, 0x9276bef031e6eb79
+data8 0xb8886b684ae7d2fa, 0xe87f32f24c3fc90e
+data8 0x929236ec237a24ad, 0xb8ab0726fa00cf5d
+data8 0xe8aacd8688892ba6, 0x92b6f70b7efe9dc3
+data8 0xb8d954a4d13b7cb1, 0xe8e523fd32f606f7
+data8 0x92d29f61eec7dc2b, 0xb8fc2d4f6cd9f04a
+data8 0xe9110b5311407927, 0x92f7a05d5b8ba92f
+data8 0xb92acc851476b1ab, 0xe94bc8bf0c108fa3
+data8 0x931379a403be5c16, 0xb94de2d841a184c2
+data8 0xe977fdc439c2ca3c, 0x9338bc44de2e3f34
+data8 0xb97cd4c36c92693c, 0xe9b3236528fc349e
+data8 0x9354c71412c69486, 0xb9a0297f172665e3
+data8 0xe9dfa70b745ac1b4, 0x937a4c273907e262
+data8 0xb9cf6f21e36c3924, 0xea1b36268d0eaa38
+data8 0x93968919f6e7975d, 0xb9f3030951267208
+data8 0xea480963fd394197, 0x93bc516fdd4680c9
+data8 0xba229d6a618e7c59, 0xea84034425f27484
+data8 0x93d8c123d9be59b2, 0xba467144459f9855
+data8 0xeab12713138dd1cc, 0x93f546c955e60076
+data8 0xba6a60c3c48f1a4b, 0xeade6db73a5e503b
+data8 0x941b70a65879079f, 0xba9a76056b67ee7a
+data8 0xeb1b0268343b121b, 0x943829f337410591
+data8 0xbabea699563ada6e, 0xeb489b0b2bdb5f14
+data8 0x9454f995765bc4d2, 0xbae2f350b262cc4b
+data8 0xeb765721e85f03d0, 0x947b86b57f5842ed
+data8 0xbb1385a23be24e57, 0xebb389645f222f62
+data8 0x94988aeb23470f86, 0xbb3814975e17c680
+data8 0xebe198f090607e0c, 0x94b5a5dc9695f42a
+data8 0xbb5cc031009bf467, 0xec0fcc9321024509
+data8 0x94d2d7a9170d8b42, 0xbb81889680024764
+data8 0xec3e247da8b82f61, 0x94f9e87dd78bf019
+data8 0xbbb2c0d8703ae95d, 0xec7c27d21321c9f7
+data8 0x95175019a503d89e, 0xbbd7cd09ba3c5463
+data8 0xecaad5278824e453, 0x9534cefa625fcb3a
+data8 0xbbfcf68c4977718f, 0xecd9a76d097d4e77
+data8 0x955265405c491a25, 0xbc223d88cfc88eee
+data8 0xed089ed5dcd99446, 0x9570130c1f9bb857
+data8 0xbc47a2284fee4ff8, 0xed37bb95add09a1c
+data8 0x9597ca4119525184, 0xbc79ac0916ed7b8a
+data8 0xed76c70508f904b6, 0x95b5af6fb5aa4d3c
+data8 0xbc9f5670d1a13030, 0xeda63bb05e7f93c6
+data8 0x95d3ac9273aafd7a, 0xbcc51f068cb95c1d
+data8 0xedd5d661daed2dc4, 0x95f1c1cafdfd3684
+data8 0xbceb05f4b30a9bc0, 0xee05974eef86b903
+data8 0x960fef3b430b8d5f, 0xbd110b6604c7d306
+data8 0xee357ead791fc670, 0x962e350575b409c5
+data8 0xbd372f8598620f19, 0xee658cb3c134a463
+data8 0x964c934c0dfc1708, 0xbd5d727edb6b3c7e
+data8 0xee95c1987f080211, 0x966b0a31c9c6bc7d
+data8 0xbd83d47d937bbc6d, 0xeec61d92d8c4314f
+data8 0x968999d9ad8d264e, 0xbdaa55addf1ae47d
+data8 0xeef6a0da64a014ac, 0x96a8426705198795
+data8 0xbdd0f63c36aa73f0, 0xef274ba72a07c811
+data8 0x96c703fd64445ee5, 0xbdf7b6556d550a15
+data8 0xef581e31a2c91260, 0x96e5dec0a7b4268d
+data8 0xbe1e9626b1ffa96b, 0xef8918b2bc43aec6
+data8 0x9704d2d4f59f79f3, 0xbe4595dd903e5371
+data8 0xefba3b63d89d7cbf, 0x9723e05ebe91b9b0
+data8 0xbe6cb5a7f14bc935, 0xefeb867ecffaa607
+data8 0x97430782be323831, 0xbe93f5b41d047cf7
+data8 0xf01cfa3df1b9c9fa, 0x97624865fc0df8bf
+data8 0xbebb5630bae4c15f, 0xf04e96dc05b43e2d
+data8 0x9781a32dcc640b2a, 0xbee2d74cd30a430c
+data8 0xf0805c944d827454, 0x97a117ffd0f48e46
+data8 0xbf0a7937cf38d981, 0xf0b24ba285c495cb
+data8 0x97c0a701f9d263c9, 0xbf323c217be2bc8c
+data8 0xf0e46442e76f6569, 0x97e0505a8637a036
+data8 0xbf5a203a09342bbb, 0xf116a6b2291d7896
+data8 0x97f57a9fb0b08c6e, 0xbf74cad1c14ebfc4
+data8 0xf1383fa9e9b5b381, 0x9815503365914a9d
+data8 0xbf9ce6a497a89f78, 0xf16ac84f90083b9b
+data8 0x98354085054fd204, 0xbfc52428bec6e72f
+data8 0xf19d7b686dcb03d7, 0x98554bbbf8a77902
+data8 0xbfed838fddab024b, 0xf1d0593311db1757
+data8 0x987571fffb7f94f6, 0xc016050c0420981a
+data8 0xf20361ee8f1c711e, 0x9895b3791dd03c23
+data8 0xc03ea8cfabddc330, 0xf23695da7de51d3f
+data8 0x98ab43a5fc65d0c8, 0xc059d3cbd65ddbce
+data8 0xf258d095e465cc35, 0x98cbb2d196bd713d
+data8 0xc082b122a3c78c9d, 0xf28c4d0bfc982b34
+data8 0x98ec3d9ec7b6f21a, 0xc0abb1499ae736c4
+data8 0xf2bff55eb3f0ea71, 0x990ce436db5e8344
+data8 0xc0d4d474c3aedaaf, 0xf2f3c9cf9884636e
+data8 0x9922b8218160967a, 0xc0f054ca33eb3437
+data8 0xf31670135ab9cc0f, 0x99438d686f75779d
+data8 0xc119b2c67e600ed0, 0xf34a8e9f0b54cdfb
+data8 0x99647eea131fa20b, 0xc1433453de2033ff
+data8 0xf37ed9fa6b8add3f, 0x997a85045a47c6d0
+data8 0xc15ef3e44e10032d, 0xf3a1cfe884ef6bb6
+data8 0x999ba5f14f8add02, 0xc188b130431d80e6
+data8 0xf3d66689dcc8e8d3, 0x99bce38b5465ecae
+data8 0xc1b2929d6067730e, 0xf40b2ab069d5c96a
+data8 0x99d31ca0887f30f9, 0xc1ce9268f31cc734
+data8 0xf42e718b90c8bc16, 0x99f48a669c74c09e
+data8 0xc1f8b0877c1b0c08, 0xf463822a0a3b4b00
+data8 0x9a16154eb445c873, 0xc222f35a87b415ba
+data8 0xf498c1076015faf8, 0x9a2c822ec198d667
+data8 0xc23f3467349e5c88, 0xf4bc5a19a33990b5
+data8 0x9a4e3e080cd91b78, 0xc269b4e40e088c01
+data8 0xf4f1e6a7d6f5425f, 0x9a70177afe52322e
+data8 0xc2945aac24daaf6e, 0xf527a232cf6be334
+data8 0x9a86b8fa94eebe10, 0xc2b0de05e43c1d66
+data8 0xf54b8ecdcda90851, 0x9aa8c42866ae2958
+data8 0xc2dbc275e1229d09, 0xf5819949c7ad87b4
+data8 0x9abf86f9e12fc45e, 0xc2f86fca9d80eeff
+data8 0xf5a5bac9213b48a9, 0x9ae1c462fc05f49d
+data8 0xc323938449a2587e, 0xf5dc1501f324a812
+data8 0x9af8a8dc936b84d0, 0xc3406b40a538ed20
+data8 0xf6006bee86b5589e, 0x9b1b19033be35730
+data8 0xc36bcee8211d15e0, 0xf63716b2fa067fa4
+data8 0x9b3da7daf04c2892, 0xc397593adf2ba366
+data8 0xf66df22fb6132b9c, 0x9b54c2e4c8a9012b
+data8 0xc3b475b6206155d5, 0xf6929fb98225deb1
+data8 0x9b77854e6c661200, 0xc3e0410243b97383
+data8 0xf6c9cd13021e3fea, 0x9b8ec2e678d56d2f
+data8 0xc3fd890709833d37, 0xf6eeb177472cedae
+data8 0x9ba60e6a5ca133b6, 0xc41ae295f7e7fa06
+data8 0xf713abf4cb0b3afb, 0x9bc919ea66a151a4
+data8 0xc44709f7bb8a4dd2, 0xf74b4d5333684ef1
+data8 0x9be0887c09ef82bb, 0xc4648fb0e0bec4c1
+data8 0xf7707f75a72f8e94, 0x9c03c8d5fffc3503
+data8 0xc490f9a94695ba14, 0xf7a874b97927af44
+data8 0x9c1b5ad21a81cbb9, 0xc4aeac0173b7d390
+data8 0xf7cddf140aedf1d8, 0x9c3ed09216e9ca02
+data8 0xc4db5941007aa853, 0xf806291bacb7f7a9
+data8 0x9c568656c0423def, 0xc4f938aec206291a
+data8 0xf82bcc43b92eafef, 0x9c7a320af242ce60
+data8 0xc52629e899dfd622, 0xf8646bf0defb759e
+data8 0x9c920bf7a8c01dc2, 0xc54436e44043b965
+data8 0xf88a487dfc3ff5f7, 0x9ca9f475d98b159c
+data8 0xc562563abf9ea07f, 0xf8b03c2b46cdc17f
+data8 0x9ccdeca60e80b5f8, 0xc58fa7d1dc42921c
+data8 0xf8e95541c152ae7a, 0x9ce5f9d4653d4902
+data8 0xc5adf561b91e110a, 0xf90f832c2700c160
+data8 0x9cfe15cb38bfdd8e, 0xc5cc5591bdbd82fa
+data8 0xf935c88e0c7f419b, 0x9d225b983f6c1f96
+data8 0xc5fa08f1ff20593c, 0xf96f5cd84fd86873
+data8 0x9d3a9cca32261ed7, 0xc618980a79ce6862
+data8 0xf995dd53ebdd9d6d, 0x9d52ecfccebe1768
+data8 0xc6373a09e34b50fa, 0xf9bc75a034436a41
+data8 0x9d77818d95b82f86, 0xc66550a6e0baaf35
+data8 0xf9f686f26d5518de, 0x9d8ff7893fa4706c
+data8 0xc6842241926342c9, 0xfa1d5b39b910a8c5
+data8 0x9da87cbef36f2a5e, 0xc6a3070b7c93bb9e
+data8 0xfa4447acc4ecbfd2, 0x9dcd6140b4a35aeb
+data8 0xc6d18260bb84081b, 0xfa7ed7e51e6fdfb4
+data8 0x9de60cd06dc6e2d4, 0xc6f0977c9416828b
+data8 0xfaa601394d49a1a0, 0x9dfec7d4cc43b76f
+data8 0xc70fc0117c641630, 0xfacd431644ce0e40
+data8 0x9e17925ec9fccc4a, 0xc72efc34d7e615be
+data8 0xfaf49d96f7a75909, 0x9e3cdf6db57dc075
+data8 0xc75dfb441594141e, 0xfb2fd3c65e562fd5
+data8 0x9e55d110b63637a8, 0xc77d68aa019bda4c
+data8 0xfb576c5762024805, 0x9e6ed27594550d2e
+data8 0xc79ce9ea478dbc4f, 0xfb7f1debc22c4040
+data8 0x9e87e3adc385d393, 0xc7bc7f1ae453219d
+data8 0xfba6e89f32d0190a, 0x9ead9b54b37a1055
+data8 0xc7ec0476e15e141a, 0xfbe2c803a0894893
+data8 0x9ec6d46a3d7de215, 0xc80bcbe16f1d540f
+data8 0xfc0ad1ff0ed9ecf0, 0x9ee01d9108be3154
+data8 0xc82ba78a5d349735, 0xfc32f57bdfbcbe7f
+data8 0x9ef976db07288d04, 0xc84b978847a06b87
+data8 0xfc5b32968f99b21c, 0x9f12e05a4759ec25
+data8 0xc86b9bf1ee817bc6, 0xfc83896bc861ab08
+data8 0x9f2c5a20f4da6668, 0xc88bb4de3667cdf4
+data8 0xfcabfa1861ed4815, 0x9f52af78ed1733ca
+data8 0xc8bc00e7fe9e23a3, 0xfce8d3cea7d3163e
+data8 0x9f6c52426a39d003, 0xc8dc4d7ff2d25232
+data8 0xfd118595143ee273, 0x9f860593d42fd7f3
+data8 0xc8fcaeebcb40eb47, 0xfd3a519943d4865a
+data8 0x9f9fc97fdb96bd51, 0xc91d25431426a663
+data8 0xfd6337f8e1ae5a4b, 0x9fb99e194f4a7037
+data8 0xc93db09d7fdb2949, 0xfd8c38d1c8e927eb
+data8 0x9fd383731ca51db9, 0xc95e5112e721582a
+data8 0xfdb5544205095a53, 0x9fed79a04fbf9423
+data8 0xc97f06bb49787677, 0xfdde8a67d2613531
+data8 0xa00780b413b24ee8, 0xc99fd1aecd6e1b06
+data8 0xfe07db619e781611, 0xa02eab2c4474b0cd
+data8 0xc9d12a3e27bb1625, 0xfe460768d80bf758
+data8 0xa048dcd51ccfd142, 0xc9f22ad82ba3d5f0
+data8 0xfe6f9bfb06cd32f6, 0xa0631fa894b11b8d
+data8 0xca134113105e67b2, 0xfe994bcd3d14fcc2
+data8 0xa07d73ba65e680af, 0xca346d07b045a876
+data8 0xfec316fecaf3f2ab, 0xa097d91e6aaf71b0
+data8 0xca55aecf0e94bb88, 0xfeecfdaf33fadb80
+data8 0xa0b24fe89e02602f, 0xca77068257be9bab
+data8 0xff16fffe2fa8fad6, 0xa0ccd82d1bd2f68b
+data8 0xca98743ae1c693a8, 0xff411e0ba9db886d
+data8 0xa0e77200215909e6, 0xcab9f8122c99a101
+data8 0xff6b57f7c33e4e9a, 0xa1021d760d584855
+data8 0xcadb9221e268c3b5, 0xff95ade2d1bd7358
+data8 0xa11cdaa36068a57d, 0xcafd4283d8043dfd
+data8 0xffc01fed60f86fb5, 0xa137a99cbd3f880b
+data8 0xcb1f09520d37c6fb, 0xffeaae3832b63956
+ASM_SIZE_DIRECTIVE(T_table)
+
+
+
+
+
+
+.align 32
+.global cbrtf#
 
 .section .text
-GLOBAL_LIBM_ENTRY(cbrtf)
+.proc  cbrtf#
+.align 32
+cbrtf: 
 
 
-{.mfi
-       getf.sig GR_SIGNIF = f8
-       // will continue only for normal/denormal numbers
-       fclass.nm.unc p12, p7 = f8, 0x1b
-       // GR_GP = pointer to C_1, C_2 followed by T_table
-       nop.i 0
+{ .mfi
+  getf.sig r28=f8
+     // will continue only for normal/denormal numbers          
+(p0)  fclass.nm.unc p12,p7 = f8, 0x1b           
+  // r2 = pointer to C_1,C_2 followed by T_table
+  addl r2 = @ltoff(poly_coeffs), gp
 }
 {.mfi
-       addl GR_GP = @ltoff(poly_coeffs), gp
-       // normalize a
-       fma.s1 FR_ARG = f8, f1, f0
-       // GR_CT3 = bias-((2^8-1)/3) -63 = 0xffff-0x55-0x3f = 0xff6b
-       mov GR_CT3 = 0xff6b ;;
-}
-
-{.mmi
-       // get exponent
-       getf.exp GR_ARGEXP = f8
-       // load start address for C_1, C_2 followed by T_table
-       ld8 GR_ADDR = [ GR_GP ]
-       nop.i 0 ;;
-}
-
-{.mlx
-       // check if input significand is 0
- (p7)  cmp.eq p12, p7 = GR_SIGNIF, r0
-       // GR_2P63 = 2^63
-       movl GR_2P63 = 0x8000000000000000 ;;
+  // r29=bias-((2^8-1)/3) -63=0xffff-0x55-0x3f=0xff6b
+  mov r29=0xff6b    
+  // normalize a
+  fma.s1 f14=f8,f1,f0
+  nop.i 0;;
 }
-
-{.mfi
-       nop.m 0
-       // y = frcpa(a)
-       // p7 = 1 for normal and denormal (but non-zero) arguments
- (p7)  frcpa.s0 FR_Y, p0 = f1, f8
-       // p9 = 1 if denormal input
-       cmp.gtu p9, p0 = GR_2P63, GR_SIGNIF
+{.mib
+  nop.m 0
+  (p7) cmp.eq p12,p0=r28,r0
+  nop.b 0;;
 }
 {.mfb
-       // load C_1
-       ldfe FR_COEFF1 = [ GR_ADDR ], 16
-       // if argument is 0, +/-Infinity, or NaN, return
- (p12) fma.s.s0 f8 = f8, f1, f0
- (p12) br.ret.spnt b0 ;;
+  // load start address for C_1,C_2 followed by T_table
+  ld8 r2=[r2]
+  (p12) fma.s.s0 f8=f8,f1,f0
+  (p12) br.ret.spnt b0;;
+}
+{.mmf 
+  // load C_1
+  ldfe f7=[r2],16
+  nop.m 0
+  // y=frcpa(a)
+  frcpa.s0 f8,p6=f1,f8;;
 }
-
 {.mmi
-       // get normalized significand (for denormal inputs only)
- (p9)  getf.sig GR_SIGNIF = FR_ARG
-       // load C_2
-       ldfe FR_COEFF2 = [ GR_ADDR ], 16
-       // GR_CT2 = bias-(2^8-1)
-       mov GR_CT2 = 0xff00
+  // load C_2
+  ldfe f9=[r2],16
+  // r28=bias-(2^8-1)
+  mov r28=0xff00
+  nop.i 0;;
 }
-
-{.mii
-       // get exponent (for denormal inputs only)
- (p9)  getf.exp GR_ARGEXP = FR_ARG
-       nop.i 0
-       mov GR_CONST = 0x20000 ;;
+{.mmi
+  // get normalized significand
+  getf.sig r23=f14
+  // get exponent
+  getf.exp r24=f14
+  mov r25=0x20000;;
 }
-
-
 {.mii
-       // get GR_SIGN = sign
-       and GR_SIGN = GR_ARGEXP, GR_CONST
-       // eliminate leading 1 from GR_I1 = 1st table index
-       shl GR_I1 = GR_SIGNIF, 1
-       // eliminate sign from exponent
-       andcm GR_EBIAS = GR_ARGEXP, GR_CONST ;;
+  // get r26=sign
+  and r26=r24,r25
+  // eliminate leading 1 from r23=1st table index
+  shl r23=r23,1
+  // eliminate sign from exponent (r25)
+  andcm r25=r24,r25;;
 }
-
-
 {.mfi
-       // subtract bias from GR_EXP = exponent
-       sub GR_EXP = GR_EBIAS, GR_CT2
-       // r = 1-a*y
-       fnma.s1 FR_R = FR_Y, FR_ARG, f1
-       // GR_IT1 = 1st table index (y_index8 bits)
-       shr.u GR_IT1 = GR_I1, 56 ;;
+  // subtract bias from r25=exponent
+  sub r25=r25,r28
+  // r=1-a*y
+  (p6) fnma.s1 f6=f8,f14,f1
+  // r23=1st table index (y_index8 bits)
+  shr.u r23=r23,56;;
 }
-
-
 {.mii
-       // 1: exponent* = 5; // (2^{16}-1)/3 = 0x5555
-       shladd GR_E5 = GR_EXP, 2, GR_EXP
-       // GR_IT1_3 = 3*y_index
-       shladd GR_IT1_3 = GR_IT1, 1, GR_IT1
-       nop.i 0 ;;
+  // 1: exponent*=5;  // (2^{16}-1)/3=0x5555
+  shladd r24=r25,2,r25
+  // r23=3*y_index
+  shladd r23=r23,1,r23;;
+  // r30=(5*expon)*16+5*expon=(0x55)*expon
+  shladd r30=r24,4,r24;;
 }
-
-
-{.mmi
-       // GR_TMP5 = (5*expon)*16+5*expon = (0x55)*expon
-       shladd GR_TMP5 = GR_E5, 4, GR_E5
-       // adjust T_table pointer by 1st index
-       shladd GR_TP1 = GR_IT1_3, 3, GR_ADDR
-       nop.i 0 ;;
-}
-
-
 {.mmi
-       // FR_T0 = T [ 0 ] [ y ]
-       ldf8 FR_T0 = [ GR_TP1 ], 8
-       // get 2^{-63}
-       mov GR_TMP63 = 0xffff + 63
-       // GR_TMP = (0x5500)*expon
-       shl GR_TMP = GR_TMP5, 8 ;;
+  // adjust T_table pointer by 1st index
+  shladd r2=r23,3,r2;;
+  // f10=T[0][y]
+  (p6) ldf8 f10=[r2],8
+  // r24=(0x5500)*expon
+  shl r24=r30,8;;
 }
-
-
 {.mfi
-       // FR_T1 = T [ 1 ] [ y ]
-       ldf8 FR_T1 = [ GR_TP1 ], 8
-       // P_1 = C_1+C_2*r
-       fma.s1 FR_COEFF1 = FR_COEFF2, FR_R, FR_COEFF1
-       // GR_TMP2 = (0x5555)*expon
-       add GR_TMP2 = GR_TMP, GR_TMP5 ;;
+  // f11=T[1][y]
+  (p6) ldf8 f11=[r2],8
+  // P_1=C_1+C_2*r
+  (p6) fma.s1 f7=f9,f6,f7
+  // r24=(0x5555)*expon
+  add r24=r24,r30;;
 }
-
-
 {.mmi
-       // GR_TMP3 = (0x5556)*expon // 0x5556 = (2^{16}+2)/3
-       add GR_TMP3 = GR_TMP2, GR_EXP ;;
-       // FR_T2 = T [ 2 ] [ y ]
-       ldf8 FR_T2 = [ GR_TP1 ]
-       // GR_EXP3 = floor(expon/3)
-       shr GR_EXP3 = GR_TMP3, 16 ;;
+  // r24=(0x5556)*expon  // 0x5556=(2^{16}+2)/3
+  add r24=r24,r25;;
+  // f8=T[2][y]
+  (p6) ldf8 f8=[r2]
+  // r24=floor(expon/3)
+  shr r24=r24,16;;
 }
-
-
 {.mmi
-       setf.exp FR_2M63 = GR_TMP63
-       // GR_TMP4 = 3*exponent
-       shladd GR_TMP4 = GR_EXP3, 1, GR_EXP3
-       // bias exponent
-       add GR_EBIAS3 = GR_CT3, GR_EXP3 ;;
-}
-
-
-{.mmf
-       // get remainder of exponent/3
-       sub GR_REM = GR_EXP, GR_TMP4
-       // add sign to exponent
-       or GR_SEXP = GR_EBIAS3, GR_SIGN
-       // P_2 = -r*P_1
-       fnma.s1 FR_R = FR_COEFF1, FR_R, f0 ;;
+  nop.m 0
+  // r28=3*exponent
+  shladd r28=r24,1,r24
+  // bias exponent
+  add r24=r29,r24;;
 }
-
-
-
 {.mmi
-       // FR_ARG = sign*2^{exponent/3}
-       setf.exp FR_ARG = GR_SEXP
-       nop.m 0
-       // remainder = 0 ?
-       // p7=1 if input exponent is 3*j (remainder is 0)
-       cmp.eq.unc p7, p8 = r0, GR_REM ;;
+  // get remainder of exponent/3
+  sub r25=r25,r28
+  // add sign to exponent
+  or r24=r24,r26
+  nop.i 0;;
+}  
+{.mfi
+  nop.m 0
+  // P_2=-r*P_1
+  (p6) fnma.s1 f6=f7,f6,f0
+  // remainder=0 ?
+  (p6) cmp.eq.unc p7,p8=r0,r25;;
 }
-
-
 {.mfi
-       // remainder = 1 ?
-       // p8=1 if input exponent is 3*j+1 (remainder is 1)
-       // p12=1 if input exponent is 3*j+2 (remainder is 2)
-  (p8) cmp.eq.unc p8, p12 = 1, GR_REM
-       // p7=1 -> remainder = 0 -> use T = FR_T0
-  (p7)  fma.s1 f8 = FR_T0, FR_R, FR_T0
-       // argument is of the form 2^(3*k) ?
-       // ( GR_I1 holds significand bits, without the leading 1)
-       or GR_I1 = GR_I1, GR_REM ;;
+  // f14=sign*2^{exponent/3}
+  (p6) setf.exp f14=r24
+  nop.f 0
+  // remainder = 1 ?
+  (p8) cmp.eq.unc p8,p12=1,r25;;
 }
-
-
-.pred.rel "mutex", p12, p8
+.pred.rel "mutex",p7,p8
 {.mfi
-       nop.m 0
-       // p8=1 -> remainder = 1 -> use FR_T1
-  (p8) fma.s1 f8 = FR_T1, FR_R, FR_T1
-       // argument is of the form 2^(3*k) ?
-       cmp.eq p14, p7 = GR_I1, r0
+  nop.m 0
+  // remainder=0 -> use T=f10
+  (p7) fma.s1 f8=f10,f6,f10
+  nop.i 0
 }
-
-
 {.mfi
-       nop.m 0
-       // p12=1 -> remainder=2 -> result = T+T*P_2
- (p12) fma.s1 f8 = FR_T2, FR_R, FR_T2
-       nop.i 0 ;;
+  nop.m 0
+  // remainder =1 -> use f11
+  (p8) fma.s1 f8=f11,f6,f11
+  nop.i 0;;
 }
-
-
-.pred.rel "mutex", p14, p7
 {.mfi
-       nop.m 0
-       // if argument is sgn*2^{3*(expon/3)}
- (p14) fma.s.s0 f8 = FR_2M63, FR_ARG, f0
-       nop.i 0
+  nop.m 0
+  // result=T+T*P_2
+  (p12) fma.s.s0 f8=f8,f6,f8
+  nop.i 0;;
 }
 {.mfb
-       nop.m 0
-       // T* = sgn*2^{expon/3}
-  (p7) fma.s.s0 f8 = f8, FR_ARG, f0
-       br.ret.sptk b0 ;;
+  nop.m 0
+  // T*=sgn*2^{expon/3}
+  (p6) fma.s.s0 f8=f8,f14,f0
+  br.ret.sptk b0;;
 }
-
-
-GLOBAL_LIBM_END(cbrtf)
-
-
-
+.endp cbrtf
+ASM_SIZE_DIRECTIVE(cbrtf)
diff --git a/sysdeps/ia64/fpu/s_cbrtl.S b/sysdeps/ia64/fpu/s_cbrtl.S
index 3e621e2c12..d4bbf8fdbf 100644
--- a/sysdeps/ia64/fpu/s_cbrtl.S
+++ b/sysdeps/ia64/fpu/s_cbrtl.S
@@ -1,10 +1,11 @@
-.file "cbrtl.s"
+.file "cbrtl.asm"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Cristina Iordache, Ted Kubaska, 
+// Bob Norin, Shane Story, and Ping Tak Peter Tang 
+// of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,7 +21,7 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
+//
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,13 +36,11 @@
 // 
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at 
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// http://developer.intel.com/opensource.
 //
 // History
 //==============================================================
-// 04/28/00 Initial version 
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 02/06/03 Reordered header: .section, .global, .proc, .align
+// 4/28/00: Initial version 
 //
 // API
 //==============================================================
@@ -96,26 +95,29 @@
 //   r2-r3, r23-r30
 //   p6,p7,p12
 
-
+#include "libm_support.h"
 
 // Data tables
 //==============================================================
 
-RODATA
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
 
 .align 16
 
-LOCAL_OBJECT_START(poly_coeffs)
-
+poly_coeffs:
+ASM_TYPE_DIRECTIVE(poly_coeffs,@object)
 data8 0xaaaaaaaaaaaaaab1, 0x00003ffd // C_1
 data8 0xe38e38e38e38e3e0, 0x00003ffb // C_2
 data8 0x3faf9add3c0be9a6, 0x3fa511e8d2b1f749 // C_3, C_4
 data8 0x3f9ee71b2c6ebe99, 0x3f9809180fd0340c // C_5, C_6
-LOCAL_OBJECT_END(poly_coeffs)
-
-
-LOCAL_OBJECT_START(T_table)
+ASM_SIZE_DIRECTIVE(poly_coeffs)
 
+T_table:
+ASM_TYPE_DIRECTIVE(T_table,@object)
 
 data8 0x80155c748c374836, 0x8040404b0879f7f9
 data8 0x806b5dce4b405c10, 0x8096b586974669b1
@@ -501,15 +503,14 @@ data8 0xfec316fecaf3f2ab, 0xfeecfdaf33fadb80
 data8 0xff16fffe2fa8fad6, 0xff411e0ba9db886d
 data8 0xff6b57f7c33e4e9a, 0xff95ade2d1bd7358
 data8 0xffc01fed60f86fb5, 0xffeaae3832b63956
-LOCAL_OBJECT_END(T_table)
+ASM_SIZE_DIRECTIVE(T_table)
 
 
 
 
 
-
-LOCAL_OBJECT_START(D_table)
-
+D_table:
+ASM_TYPE_DIRECTIVE(D_table,@object)
 data4 0x1e50f488, 0x1ebdc559, 0x1e649ec1, 0x9eed9b2c
 data4 0x9e511c44, 0x9ec6d551, 0x9eefe248, 0x9e313854
 data4 0x9f54ff18, 0x9d231411, 0x1ee5d63c, 0x9edf6b95
@@ -702,16 +703,25 @@ data4 0x9eafd508, 0x9ef0e9fc, 0x1d1307ac, 0x1eecee20
 data4 0x1cf60c6f, 0x9d556216, 0x9eaed175, 0x9ec919f4
 data4 0x1ec2c988, 0x1cd82772, 0x9dc99456, 0x1eab0467
 data4 0x1e89b36f, 0x1c757944, 0x1eef9abd, 0x9e98664d
-LOCAL_OBJECT_END(D_table)
+ASM_SIZE_DIRECTIVE(D_table)
+
+
 
 
+
+.align 32
+.global cbrtl#
+
 .section .text
-GLOBAL_LIBM_ENTRY(cbrtl)
+.proc  cbrtl#
+.align 32
+cbrtl: 
+
 
 { .mfi
   getf.sig r3=f8
      // will continue only for normal/denormal numbers          
-      fclass.nm.unc p12,p7 = f8, 0x1b           
+(p0)  fclass.nm.unc p12,p7 = f8, 0x1b           
   // r2 = pointer to C_1...C_6 followed by T_table
   addl r2 = @ltoff(poly_coeffs), gp;;
 }
@@ -888,5 +898,5 @@ GLOBAL_LIBM_ENTRY(cbrtl)
   (p6) fma.s0 f8=f8,f6,f8
   br.ret.sptk b0;;
 }
-GLOBAL_LIBM_END(cbrtl)
-
+.endp cbrtl
+ASM_SIZE_DIRECTIVE(cbrtl)
diff --git a/sysdeps/ia64/fpu/s_ceil.S b/sysdeps/ia64/fpu/s_ceil.S
index d1d2980618..f7e6d2cfa6 100644
--- a/sysdeps/ia64/fpu/s_ceil.S
+++ b/sysdeps/ia64/fpu/s_ceil.S
@@ -1,10 +1,10 @@
 .file "ceil.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,67 +20,90 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
 // Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
 //
+
+#include "libm_support.h"
+
+.align 32
+.global ceil#
+
+.section .text
+.proc  ceil#
+.align 32
+
 // History
 //==============================================================
-// 02/02/00 Initial version
-// 06/13/00 Improved speed
-// 06/27/00 Eliminated incorrect invalid flag setting
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 01/28/03 Improved performance
-//==============================================================
+// 2/02/00: Initial version
+// 6/13/00: Improved speed
+// 6/27/00: Eliminated incorrect invalid flag setting
 
 // API
 //==============================================================
 // double ceil(double x)
-//==============================================================
 
-// general input registers:
-// r14 - r19
+// general input registers:  
+
+ceil_GR_FFFF      = r14
+ceil_GR_signexp   = r15
+ceil_GR_exponent  = r16
+ceil_GR_expmask   = r17
+ceil_GR_bigexp    = r18
+
 
-rSignexp   = r14
-rExp       = r15
-rExpMask   = r16
-rBigexp    = r17
-rM1        = r18
-rSignexpM1 = r19
+// predicate registers used: 
 
-// floating-point registers:
-// f8 - f13
+// p6  ==> Input is NaN, infinity, zero
+// p7  ==> Input is denormal
+// p8  ==> Input is <0
+// p9  ==> Input is >=0
+// p10 ==> Input is already an integer (bigger than largest integer)
+// p11 ==> Input is not a large integer
+// p12 ==> Input is a smaller integer
+// p13 ==> Input is not an even integer, so inexact must be set
+// p14 ==> Input is between -1 and 0, so result will be -0 and inexact
 
-fXInt      = f9
-fNormX     = f10
-fTmp       = f11
-fAdj       = f12
-fPreResult = f13
 
-// predicate registers used:
-// p6 - p10
+// floating-point registers used: 
+
+CEIL_SIGNED_ZERO  = f7
+CEIL_NORM_f8      = f9                        
+CEIL_FFFF         = f10 
+CEIL_INEXACT      = f11 
+CEIL_FLOAT_INT_f8 = f12
+CEIL_INT_f8       = f13
+CEIL_adj          = f14
+CEIL_MINUS_ONE    = f15
 
 // Overview of operation
 //==============================================================
+
 // double ceil(double x)
-// Return an integer value (represented as a double) that is the smallest
+// Return an integer value (represented as a double) that is the smallest 
 // value not less than x
 // This is x rounded toward +infinity to an integral value.
 // Inexact is set if x != ceil(x)
-//==============================================================
+// **************************************************************************
+
+// Set denormal flag for denormal input and
+// and take denormal fault if necessary.
+
+// Is the input an integer value already?
 
 // double_extended
 // if the exponent is > 1003e => 3F(true) = 63(decimal)
@@ -101,124 +124,139 @@ fPreResult = f13
 // If we multiply by 2^23, we no longer have a fractional part
 // So input is an integer value already.
 
+// If x is NAN, ZERO, or INFINITY, then  return
+
+// qnan snan inf norm     unorm 0 -+
+// 1    1    1   0        0     1 11     0xe7
 
-.section .text
-GLOBAL_LIBM_ENTRY(ceil)
+
+ceil:
 
 { .mfi
-      getf.exp         rSignexp  = f8        // Get signexp, recompute if unorm
-      fclass.m         p7,p0 = f8, 0x0b      // Test x unorm
-      addl             rBigexp = 0x10033, r0 // Set exponent at which is integer
+      getf.exp ceil_GR_signexp  = f8
+      fcvt.fx.trunc.s1     CEIL_INT_f8  = f8
+      addl        ceil_GR_bigexp = 0x10033, r0
 }
 { .mfi
-      mov              rM1 = -1              // Set all ones
-      fcvt.fx.trunc.s1 fXInt  = f8           // Convert to int in significand
-      mov              rExpMask    = 0x1FFFF // Form exponent mask
+      addl        ceil_GR_FFFF      = -1,r0
+      fcmp.lt.s1  p8,p9 = f8,f0
+      mov         ceil_GR_expmask    = 0x1FFFF ;;
 }
-;;
 
+// p7 ==> denorm
 { .mfi
-      mov              rSignexpM1  = 0x2FFFF // Form signexp of -1
-      fcmp.lt.s1       p8,p9 = f8, f0        // Test x < 0
-      nop.i            0
+      setf.sig    CEIL_FFFF  = ceil_GR_FFFF
+      fclass.m    p7,p0 = f8, 0x0b
+      nop.i 999
 }
-{ .mfb
-      setf.sig         fTmp = rM1            // Make const for setting inexact
-      fnorm.s1         fNormX  = f8          // Normalize input
-(p7)  br.cond.spnt     CEIL_UNORM            // Branch if x unorm
+{ .mfi
+      nop.m 999
+      fnorm           CEIL_NORM_f8  = f8
+      nop.i 999 ;;
 }
-;;
 
-CEIL_COMMON:
-// Return here from CEIL_UNORM
+// Form 0 with sign of input in case negative zero is needed
+{ .mfi
+      nop.m 999
+      fmerge.s           CEIL_SIGNED_ZERO = f8, f0
+      nop.i 999
+}
 { .mfi
-      nop.m            0
-      fclass.m         p6,p0 = f8, 0x1e7     // Test x natval, nan, inf, 0
-      nop.i            0
+      nop.m 999
+      fsub.s1           CEIL_MINUS_ONE = f0, f1
+      nop.i 999 ;;
+}
+
+// p6 ==> NAN, INF, ZERO
+{ .mfb
+      nop.m 999
+      fclass.m      p6,p10 = f8, 0xe7
+(p7)  br.cond.spnt  L(CEIL_DENORM) ;;
 }
-;;
 
+L(CEIL_COMMON):
 .pred.rel "mutex",p8,p9
+// Set adjustment to add to trunc(x) for result
+//   If x>0,  adjustment is 1.0
+//   If x<=0, adjustment is 0.0
 { .mfi
-      nop.m            0
-(p8)  fma.s1           fAdj = f0, f0, f0     // If x < 0, adjustment is 0
-      nop.i            0
+      and      ceil_GR_exponent = ceil_GR_signexp, ceil_GR_expmask
+(p9)  fadd.s1  CEIL_adj = f1,f0
+      nop.i 999
 }
 { .mfi
-      nop.m            0
-(p9)  fma.s1           fAdj = f1, f1, f0     // If x > 0, adjustment is +1
-      nop.i            0
+      nop.m 999
+(p8)  fadd.s1  CEIL_adj = f0,f0
+      nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m            0
-      fcvt.xf          fPreResult = fXInt    // trunc(x)
-      nop.i            0
+(p10) cmp.ge.unc    p10,p11 = ceil_GR_exponent, ceil_GR_bigexp
+(p6)  fnorm.d f8 = f8
+      nop.i 999 ;;
 }
-{ .mfb
-      nop.m            0
-(p6)  fma.d.s0         f8 = f8, f1, f0       // Result if x natval, nan, inf, 0
-(p6)  br.ret.spnt      b0                    // Exit if x natval, nan, inf, 0
+
+{ .mfi
+      nop.m 999
+(p11) fcvt.xf         CEIL_FLOAT_INT_f8   = CEIL_INT_f8
+      nop.i 999 ;;
 }
-;;
 
-{ .mmi
-      and              rExp = rSignexp, rExpMask // Get biased exponent
-;;
-      cmp.ge           p7,p6 = rExp, rBigexp  // Is |x| >= 2^52?
-(p8)  cmp.lt.unc       p10,p0 = rSignexp, rSignexpM1 // Is -1 < x < 0?
+{ .mfi
+      nop.m 999
+(p10) fnorm.d f8 = CEIL_NORM_f8
+      nop.i 999 ;;
 }
-;;
 
-// If -1 < x < 0, we turn off p6 and compute result as -0
+// Is -1 < x < 0?  If so, result will be -0.  Special case it with p14 set.
 { .mfi
-(p10) cmp.ne           p6,p0 = r0,r0
-(p10) fmerge.s         f8 = fNormX, f0
-      nop.i            0
+      nop.m 999
+(p8)  fcmp.gt.unc.s1 p14,p0 = CEIL_NORM_f8, CEIL_MINUS_ONE
+      nop.i 999 ;;
 }
-;;
 
-.pred.rel "mutex",p6,p7
 { .mfi
-      nop.m            0
-(p6)  fma.d.s0         f8 = fPreResult, f1, fAdj // Result if !int, |x| < 2^52
-      nop.i            0
+(p14) cmp.ne  p11,p0 = r0,r0
+(p14) fnorm.d f8 = CEIL_SIGNED_ZERO
+      nop.i 999
 }
 { .mfi
-      nop.m            0
-(p7)  fma.d.s0         f8 = fNormX, f1, f0    // Result, if |x| >= 2^52
-(p10) cmp.eq           p6,p0 = r0,r0          // If -1 < x < 0, turn on p6 again
+      nop.m 999
+(p14) fmpy.s0     CEIL_INEXACT = CEIL_FFFF,CEIL_FFFF
+      nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m            0
-(p6)  fcmp.eq.unc.s1   p8, p9 = fPreResult, fNormX // Is trunc(x) = x ?
-      nop.i            0
+      nop.m 999
+(p11) fadd.d   f8 = CEIL_FLOAT_INT_f8,CEIL_adj
+      nop.i 999 ;;
+}
+{ .mfi
+      nop.m 999
+(p11) fcmp.eq.unc.s1  p12,p13  = CEIL_FLOAT_INT_f8, CEIL_NORM_f8
+      nop.i 999 ;;
 }
-;;
 
+// Set inexact if result not equal to input
 { .mfi
-      nop.m            0
-(p9)  fmpy.s0          fTmp = fTmp, fTmp      // Dummy to set inexact
-      nop.i            0
+      nop.m 999
+(p13) fmpy.s0     CEIL_INEXACT = CEIL_FFFF,CEIL_FFFF
+      nop.i 999
 }
+// Set result to input if integer
 { .mfb
-      nop.m            0
-(p8)  fma.d.s0         f8 = fNormX, f1, f0    // If x int, result normalized x
-      br.ret.sptk      b0                     // Exit main path, 0 < |x| < 2^52
+      nop.m 999
+(p12) fnorm.d f8 = CEIL_NORM_f8
+      br.ret.sptk    b0 ;;
 }
-;;
-
 
-CEIL_UNORM:
-// Here if x unorm
+// Here if input denorm
+L(CEIL_DENORM):
 { .mfb
-      getf.exp         rSignexp  = fNormX     // Get signexp, recompute if unorm
-      fcmp.eq.s0       p7,p0 = f8, f0         // Dummy op to set denormal flag
-      br.cond.sptk     CEIL_COMMON            // Return to main path
+      getf.exp ceil_GR_signexp  = CEIL_NORM_f8
+      fcvt.fx.trunc.s1     CEIL_INT_f8  = CEIL_NORM_f8
+      br.cond.sptk  L(CEIL_COMMON) ;;
 }
-;;
 
-GLOBAL_LIBM_END(ceil)
+.endp ceil
+ASM_SIZE_DIRECTIVE(ceil)
diff --git a/sysdeps/ia64/fpu/s_ceilf.S b/sysdeps/ia64/fpu/s_ceilf.S
index 051534a202..d1011052e8 100644
--- a/sysdeps/ia64/fpu/s_ceilf.S
+++ b/sysdeps/ia64/fpu/s_ceilf.S
@@ -1,10 +1,10 @@
 .file "ceilf.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,67 +20,90 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
 // Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
 //
+
+#include "libm_support.h"
+
+.align 32
+.global ceilf#
+
+.section .text
+.proc  ceilf#
+.align 32
+
 // History
 //==============================================================
-// 02/02/00 Initial version
-// 06/13/00 Improved speed
-// 06/27/00 Eliminated incorrect invalid flag setting
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 01/28/03 Improved performance
-//==============================================================
+// 2/02/00: Initial version
+// 6/13/00: Improved speed
+// 6/27/00: Eliminated incorrect invalid flag setting
 
 // API
 //==============================================================
 // float ceilf(float x)
-//==============================================================
 
-// general input registers:
-// r14 - r19
+// general input registers:  
+
+ceil_GR_FFFF      = r14
+ceil_GR_signexp   = r15
+ceil_GR_exponent  = r16
+ceil_GR_expmask   = r17
+ceil_GR_bigexp    = r18
+
 
-rSignexp   = r14
-rExp       = r15
-rExpMask   = r16
-rBigexp    = r17
-rM1        = r18
-rSignexpM1 = r19
+// predicate registers used: 
 
-// floating-point registers:
-// f8 - f13
+// p6  ==> Input is NaN, infinity, zero
+// p7  ==> Input is denormal
+// p8  ==> Input is <0
+// p9  ==> Input is >=0
+// p10 ==> Input is already an integer (bigger than largest integer)
+// p11 ==> Input is not a large integer
+// p12 ==> Input is a smaller integer
+// p13 ==> Input is not an even integer, so inexact must be set
+// p14 ==> Input is between -1 and 0, so result will be -0 and inexact
 
-fXInt      = f9
-fNormX     = f10
-fTmp       = f11
-fAdj       = f12
-fPreResult = f13
 
-// predicate registers used:
-// p6 - p10
+// floating-point registers used: 
+
+CEIL_SIGNED_ZERO  = f7
+CEIL_NORM_f8      = f9                        
+CEIL_FFFF         = f10 
+CEIL_INEXACT      = f11 
+CEIL_FLOAT_INT_f8 = f12
+CEIL_INT_f8       = f13
+CEIL_adj          = f14
+CEIL_MINUS_ONE    = f15
 
 // Overview of operation
 //==============================================================
+
 // float ceilf(float x)
-// Return an integer value (represented as a float) that is the smallest
+// Return an integer value (represented as a float) that is the smallest 
 // value not less than x
 // This is x rounded toward +infinity to an integral value.
 // Inexact is set if x != ceilf(x)
-//==============================================================
+// **************************************************************************
+
+// Set denormal flag for denormal input and
+// and take denormal fault if necessary.
+
+// Is the input an integer value already?
 
 // double_extended
 // if the exponent is > 1003e => 3F(true) = 63(decimal)
@@ -101,124 +124,139 @@ fPreResult = f13
 // If we multiply by 2^23, we no longer have a fractional part
 // So input is an integer value already.
 
+// If x is NAN, ZERO, or INFINITY, then  return
+
+// qnan snan inf norm     unorm 0 -+
+// 1    1    1   0        0     1 11     0xe7
 
-.section .text
-GLOBAL_LIBM_ENTRY(ceilf)
+
+ceilf:
 
 { .mfi
-      getf.exp         rSignexp  = f8        // Get signexp, recompute if unorm
-      fclass.m         p7,p0 = f8, 0x0b      // Test x unorm
-      addl             rBigexp = 0x10016, r0 // Set exponent at which is integer
+      getf.exp ceil_GR_signexp  = f8
+      fcvt.fx.trunc.s1     CEIL_INT_f8  = f8
+      addl        ceil_GR_bigexp = 0x10016, r0
 }
 { .mfi
-      mov              rM1 = -1              // Set all ones
-      fcvt.fx.trunc.s1 fXInt  = f8           // Convert to int in significand
-      mov              rExpMask    = 0x1FFFF // Form exponent mask
+      addl        ceil_GR_FFFF      = -1,r0
+      fcmp.lt.s1  p8,p9 = f8,f0
+      mov         ceil_GR_expmask    = 0x1FFFF ;;
 }
-;;
 
+// p7 ==> denorm
 { .mfi
-      mov              rSignexpM1  = 0x2FFFF // Form signexp of -1
-      fcmp.lt.s1       p8,p9 = f8, f0        // Test x < 0
-      nop.i            0
+      setf.sig    CEIL_FFFF  = ceil_GR_FFFF
+      fclass.m    p7,p0 = f8, 0x0b
+      nop.i 999
 }
-{ .mfb
-      setf.sig         fTmp = rM1            // Make const for setting inexact
-      fnorm.s1         fNormX  = f8          // Normalize input
-(p7)  br.cond.spnt     CEIL_UNORM            // Branch if x unorm
+{ .mfi
+      nop.m 999
+      fnorm           CEIL_NORM_f8  = f8
+      nop.i 999 ;;
 }
-;;
 
-CEIL_COMMON:
-// Return here from CEIL_UNORM
+// Form 0 with sign of input in case negative zero is needed
+{ .mfi
+      nop.m 999
+      fmerge.s           CEIL_SIGNED_ZERO = f8, f0
+      nop.i 999
+}
 { .mfi
-      nop.m            0
-      fclass.m         p6,p0 = f8, 0x1e7     // Test x natval, nan, inf, 0
-      nop.i            0
+      nop.m 999
+      fsub.s1           CEIL_MINUS_ONE = f0, f1
+      nop.i 999 ;;
+}
+
+// p6 ==> NAN, INF, ZERO
+{ .mfb
+      nop.m 999
+      fclass.m      p6,p10 = f8, 0xe7
+(p7)  br.cond.spnt  L(CEIL_DENORM) ;;
 }
-;;
 
+L(CEIL_COMMON):
 .pred.rel "mutex",p8,p9
+// Set adjustment to add to trunc(x) for result
+//   If x>0,  adjustment is 1.0
+//   If x<=0, adjustment is 0.0
 { .mfi
-      nop.m            0
-(p8)  fma.s1           fAdj = f0, f0, f0     // If x < 0, adjustment is 0
-      nop.i            0
+      and      ceil_GR_exponent = ceil_GR_signexp, ceil_GR_expmask
+(p9)  fadd.s1  CEIL_adj = f1,f0
+      nop.i 999
 }
 { .mfi
-      nop.m            0
-(p9)  fma.s1           fAdj = f1, f1, f0     // If x > 0, adjustment is +1
-      nop.i            0
+      nop.m 999
+(p8)  fadd.s1  CEIL_adj = f0,f0
+      nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m            0
-      fcvt.xf          fPreResult = fXInt    // trunc(x)
-      nop.i            0
+(p10) cmp.ge.unc    p10,p11 = ceil_GR_exponent, ceil_GR_bigexp
+(p6)  fnorm.s f8 = f8
+      nop.i 999 ;;
 }
-{ .mfb
-      nop.m            0
-(p6)  fma.s.s0         f8 = f8, f1, f0       // Result if x natval, nan, inf, 0
-(p6)  br.ret.spnt      b0                    // Exit if x natval, nan, inf, 0
+
+{ .mfi
+      nop.m 999
+(p11) fcvt.xf         CEIL_FLOAT_INT_f8   = CEIL_INT_f8
+      nop.i 999 ;;
 }
-;;
 
-{ .mmi
-      and              rExp = rSignexp, rExpMask // Get biased exponent
-;;
-      cmp.ge           p7,p6 = rExp, rBigexp  // Is |x| >= 2^23?
-(p8)  cmp.lt.unc       p10,p0 = rSignexp, rSignexpM1 // Is -1 < x < 0?
+{ .mfi
+      nop.m 999
+(p10) fnorm.s f8 = CEIL_NORM_f8
+      nop.i 999 ;;
 }
-;;
 
-// If -1 < x < 0, we turn off p6 and compute result as -0
+// Is -1 < x < 0?  If so, result will be -0.  Special case it with p14 set.
 { .mfi
-(p10) cmp.ne           p6,p0 = r0,r0
-(p10) fmerge.s         f8 = fNormX, f0
-      nop.i            0
+      nop.m 999
+(p8)  fcmp.gt.unc.s1 p14,p0 = CEIL_NORM_f8, CEIL_MINUS_ONE
+      nop.i 999 ;;
 }
-;;
 
-.pred.rel "mutex",p6,p7
 { .mfi
-      nop.m            0
-(p6)  fma.s.s0         f8 = fPreResult, f1, fAdj // Result if !int, |x| < 2^23
-      nop.i            0
+(p14) cmp.ne  p11,p0 = r0,r0
+(p14) fnorm.s f8 = CEIL_SIGNED_ZERO
+      nop.i 999
 }
 { .mfi
-      nop.m            0
-(p7)  fma.s.s0         f8 = fNormX, f1, f0    // Result, if |x| >= 2^23
-(p10) cmp.eq           p6,p0 = r0,r0          // If -1 < x < 0, turn on p6 again
+      nop.m 999
+(p14) fmpy.s0     CEIL_INEXACT = CEIL_FFFF,CEIL_FFFF
+      nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m            0
-(p6)  fcmp.eq.unc.s1   p8, p9 = fPreResult, fNormX // Is trunc(x) = x ?
-      nop.i            0
+      nop.m 999
+(p11) fadd.s   f8 = CEIL_FLOAT_INT_f8,CEIL_adj
+      nop.i 999 ;;
+}
+{ .mfi
+      nop.m 999
+(p11) fcmp.eq.unc.s1  p12,p13  = CEIL_FLOAT_INT_f8, CEIL_NORM_f8
+      nop.i 999 ;;
 }
-;;
 
+// Set inexact if result not equal to input
 { .mfi
-      nop.m            0
-(p9)  fmpy.s0          fTmp = fTmp, fTmp      // Dummy to set inexact
-      nop.i            0
+      nop.m 999
+(p13) fmpy.s0     CEIL_INEXACT = CEIL_FFFF,CEIL_FFFF
+      nop.i 999
 }
+// Set result to input if integer
 { .mfb
-      nop.m            0
-(p8)  fma.s.s0         f8 = fNormX, f1, f0    // If x int, result normalized x
-      br.ret.sptk      b0                     // Exit main path, 0 < |x| < 2^23
+      nop.m 999
+(p12) fnorm.s f8 = CEIL_NORM_f8
+      br.ret.sptk    b0 ;;
 }
-;;
-
 
-CEIL_UNORM:
-// Here if x unorm
+// Here if input denorm
+L(CEIL_DENORM):
 { .mfb
-      getf.exp         rSignexp  = fNormX     // Get signexp, recompute if unorm
-      fcmp.eq.s0       p7,p0 = f8, f0         // Dummy op to set denormal flag
-      br.cond.sptk     CEIL_COMMON            // Return to main path
+      getf.exp ceil_GR_signexp  = CEIL_NORM_f8
+      fcvt.fx.trunc.s1     CEIL_INT_f8  = CEIL_NORM_f8
+      br.cond.sptk  L(CEIL_COMMON) ;;
 }
-;;
 
-GLOBAL_LIBM_END(ceilf)
+.endp ceilf
+ASM_SIZE_DIRECTIVE(ceilf)
diff --git a/sysdeps/ia64/fpu/s_ceill.S b/sysdeps/ia64/fpu/s_ceill.S
index 71cb01d3fa..d3d8719584 100644
--- a/sysdeps/ia64/fpu/s_ceill.S
+++ b/sysdeps/ia64/fpu/s_ceill.S
@@ -1,10 +1,10 @@
 .file "ceill.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,67 +20,90 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
 // Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
 //
+
+#include "libm_support.h"
+
+.align 32
+.global ceill#
+
+.section .text
+.proc  ceill#
+.align 32
+
 // History
 //==============================================================
-// 02/02/00 Initial version
-// 06/13/00 Improved speed
-// 06/27/00 Eliminated incorrect invalid flag setting
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 01/28/03 Improved performance
-//==============================================================
+// 2/02/00: Initial version
+// 6/13/00: Improved speed
+// 6/27/00: Eliminated incorrect invalid flag setting
 
 // API
 //==============================================================
-// long double ceill(long double x)
-//==============================================================
+// double ceill(double x)
+
+// general input registers:  
+
+ceil_GR_FFFF      = r14
+ceil_GR_signexp   = r15
+ceil_GR_exponent  = r16
+ceil_GR_expmask   = r17
+ceil_GR_bigexp    = r18
 
-// general input registers:
-// r14 - r19
 
-rSignexp   = r14
-rExp       = r15
-rExpMask   = r16
-rBigexp    = r17
-rM1        = r18
-rSignexpM1 = r19
+// predicate registers used: 
 
-// floating-point registers:
-// f8 - f13
+// p6  ==> Input is NaN, infinity, zero
+// p7  ==> Input is denormal
+// p8  ==> Input is <0
+// p9  ==> Input is >=0
+// p10 ==> Input is already an integer (bigger than largest integer)
+// p11 ==> Input is not a large integer
+// p12 ==> Input is a smaller integer
+// p13 ==> Input is not an even integer, so inexact must be set
+// p14 ==> Input is between -1 and 0, so result will be -0 and inexact
 
-fXInt      = f9
-fNormX     = f10
-fTmp       = f11
-fAdj       = f12
-fPreResult = f13
 
-// predicate registers used:
-// p6 - p10
+// floating-point registers used: 
+
+CEIL_SIGNED_ZERO  = f7
+CEIL_NORM_f8      = f9                        
+CEIL_FFFF         = f10 
+CEIL_INEXACT      = f11 
+CEIL_FLOAT_INT_f8 = f12
+CEIL_INT_f8       = f13
+CEIL_adj          = f14
+CEIL_MINUS_ONE    = f15
 
 // Overview of operation
 //==============================================================
+
 // long double ceill(long double x)
-// Return an integer value (represented as a long double) that is the smallest
+// Return an integer value (represented as a long double) that is the smallest 
 // value not less than x
 // This is x rounded toward +infinity to an integral value.
 // Inexact is set if x != ceill(x)
-//==============================================================
+// **************************************************************************
+
+// Set denormal flag for denormal input and
+// and take denormal fault if necessary.
+
+// Is the input an integer value already?
 
 // double_extended
 // if the exponent is > 1003e => 3F(true) = 63(decimal)
@@ -101,124 +124,139 @@ fPreResult = f13
 // If we multiply by 2^23, we no longer have a fractional part
 // So input is an integer value already.
 
+// If x is NAN, ZERO, or INFINITY, then  return
+
+// qnan snan inf norm     unorm 0 -+
+// 1    1    1   0        0     1 11     0xe7
 
-.section .text
-GLOBAL_LIBM_ENTRY(ceill)
+
+ceill:
 
 { .mfi
-      getf.exp         rSignexp  = f8        // Get signexp, recompute if unorm
-      fclass.m         p7,p0 = f8, 0x0b      // Test x unorm
-      addl             rBigexp = 0x1003e, r0 // Set exponent at which is integer
+      getf.exp ceil_GR_signexp  = f8
+      fcvt.fx.trunc.s1     CEIL_INT_f8  = f8
+      addl        ceil_GR_bigexp = 0x1003e, r0
 }
 { .mfi
-      mov              rM1 = -1              // Set all ones
-      fcvt.fx.trunc.s1 fXInt  = f8           // Convert to int in significand
-      mov              rExpMask    = 0x1FFFF // Form exponent mask
+      addl        ceil_GR_FFFF      = -1,r0
+      fcmp.lt.s1  p8,p9 = f8,f0
+      mov         ceil_GR_expmask    = 0x1FFFF ;;
 }
-;;
 
+// p7 ==> denorm
 { .mfi
-      mov              rSignexpM1  = 0x2FFFF // Form signexp of -1
-      fcmp.lt.s1       p8,p9 = f8, f0        // Test x < 0
-      nop.i            0
+      setf.sig    CEIL_FFFF  = ceil_GR_FFFF
+      fclass.m    p7,p0 = f8, 0x0b
+      nop.i 999
 }
-{ .mfb
-      setf.sig         fTmp = rM1            // Make const for setting inexact
-      fnorm.s1         fNormX  = f8          // Normalize input
-(p7)  br.cond.spnt     CEIL_UNORM            // Branch if x unorm
+{ .mfi
+      nop.m 999
+      fnorm           CEIL_NORM_f8  = f8
+      nop.i 999 ;;
 }
-;;
 
-CEIL_COMMON:
-// Return here from CEIL_UNORM
+// Form 0 with sign of input in case negative zero is needed
+{ .mfi
+      nop.m 999
+      fmerge.s           CEIL_SIGNED_ZERO = f8, f0
+      nop.i 999
+}
 { .mfi
-      nop.m            0
-      fclass.m         p6,p0 = f8, 0x1e7     // Test x natval, nan, inf, 0
-      nop.i            0
+      nop.m 999
+      fsub.s1           CEIL_MINUS_ONE = f0, f1
+      nop.i 999 ;;
+}
+
+// p6 ==> NAN, INF, ZERO
+{ .mfb
+      nop.m 999
+      fclass.m      p6,p10 = f8, 0xe7
+(p7)  br.cond.spnt  L(CEIL_DENORM) ;;
 }
-;;
 
+L(CEIL_COMMON):
 .pred.rel "mutex",p8,p9
+// Set adjustment to add to trunc(x) for result
+//   If x>0,  adjustment is 1.0
+//   If x<=0, adjustment is 0.0
 { .mfi
-      nop.m            0
-(p8)  fma.s1           fAdj = f0, f0, f0     // If x < 0, adjustment is 0
-      nop.i            0
+      and      ceil_GR_exponent = ceil_GR_signexp, ceil_GR_expmask
+(p9)  fadd.s1  CEIL_adj = f1,f0
+      nop.i 999
 }
 { .mfi
-      nop.m            0
-(p9)  fma.s1           fAdj = f1, f1, f0     // If x > 0, adjustment is +1
-      nop.i            0
+      nop.m 999
+(p8)  fadd.s1  CEIL_adj = f0,f0
+      nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m            0
-      fcvt.xf          fPreResult = fXInt    // trunc(x)
-      nop.i            0
+(p10) cmp.ge.unc    p10,p11 = ceil_GR_exponent, ceil_GR_bigexp
+(p6)  fnorm   f8 = f8
+      nop.i 999 ;;
 }
-{ .mfb
-      nop.m            0
-(p6)  fma.s0           f8 = f8, f1, f0       // Result if x natval, nan, inf, 0
-(p6)  br.ret.spnt      b0                    // Exit if x natval, nan, inf, 0
+
+{ .mfi
+      nop.m 999
+(p11) fcvt.xf         CEIL_FLOAT_INT_f8   = CEIL_INT_f8
+      nop.i 999 ;;
 }
-;;
 
-{ .mmi
-      and              rExp = rSignexp, rExpMask // Get biased exponent
-;;
-      cmp.ge           p7,p6 = rExp, rBigexp  // Is |x| >= 2^63?
-(p8)  cmp.lt.unc       p10,p0 = rSignexp, rSignexpM1 // Is -1 < x < 0?
+{ .mfi
+      nop.m 999
+(p10) fnorm   f8 = CEIL_NORM_f8
+      nop.i 999 ;;
 }
-;;
 
-// If -1 < x < 0, we turn off p6 and compute result as -0
+// Is -1 < x < 0?  If so, result will be -0.  Special case it with p14 set.
 { .mfi
-(p10) cmp.ne           p6,p0 = r0,r0
-(p10) fmerge.s         f8 = fNormX, f0
-      nop.i            0
+      nop.m 999
+(p8)  fcmp.gt.unc.s1 p14,p0 = CEIL_NORM_f8, CEIL_MINUS_ONE
+      nop.i 999 ;;
 }
-;;
 
-.pred.rel "mutex",p6,p7
 { .mfi
-      nop.m            0
-(p6)  fma.s0           f8 = fPreResult, f1, fAdj // Result if !int, |x| < 2^63
-      nop.i            0
+(p14) cmp.ne  p11,p0 = r0,r0
+(p14) fnorm   f8 = CEIL_SIGNED_ZERO
+      nop.i 999
 }
 { .mfi
-      nop.m            0
-(p7)  fma.s0           f8 = fNormX, f1, f0    // Result, if |x| >= 2^63
-(p10) cmp.eq           p6,p0 = r0,r0          // If -1 < x < 0, turn on p6 again
+      nop.m 999
+(p14) fmpy.s0     CEIL_INEXACT = CEIL_FFFF,CEIL_FFFF
+      nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m            0
-(p6)  fcmp.eq.unc.s1   p8, p9 = fPreResult, fNormX // Is trunc(x) = x ?
-      nop.i            0
+      nop.m 999
+(p11) fadd     f8 = CEIL_FLOAT_INT_f8,CEIL_adj
+      nop.i 999 ;;
+}
+{ .mfi
+      nop.m 999
+(p11) fcmp.eq.unc.s1  p12,p13  = CEIL_FLOAT_INT_f8, CEIL_NORM_f8
+      nop.i 999 ;;
 }
-;;
 
+// Set inexact if result not equal to input
 { .mfi
-      nop.m            0
-(p9)  fmpy.s0          fTmp = fTmp, fTmp      // Dummy to set inexact
-      nop.i            0
+      nop.m 999
+(p13) fmpy.s0     CEIL_INEXACT = CEIL_FFFF,CEIL_FFFF
+      nop.i 999
 }
+// Set result to input if integer
 { .mfb
-      nop.m            0
-(p8)  fma.s0           f8 = fNormX, f1, f0    // If x int, result normalized x
-      br.ret.sptk      b0                     // Exit main path, 0 < |x| < 2^63
+      nop.m 999
+(p12) fnorm   f8 = CEIL_NORM_f8
+      br.ret.sptk    b0 ;;
 }
-;;
-
 
-CEIL_UNORM:
-// Here if x unorm
+// Here if input denorm
+L(CEIL_DENORM):
 { .mfb
-      getf.exp         rSignexp  = fNormX     // Get signexp, recompute if unorm
-      fcmp.eq.s0       p7,p0 = f8, f0         // Dummy op to set denormal flag
-      br.cond.sptk     CEIL_COMMON            // Return to main path
+      getf.exp ceil_GR_signexp  = CEIL_NORM_f8
+      fcvt.fx.trunc.s1     CEIL_INT_f8  = CEIL_NORM_f8
+      br.cond.sptk  L(CEIL_COMMON) ;;
 }
-;;
 
-GLOBAL_LIBM_END(ceill)
+.endp ceill
+ASM_SIZE_DIRECTIVE(ceill)
diff --git a/sysdeps/ia64/fpu/s_copysign.S b/sysdeps/ia64/fpu/s_copysign.S
index 0903565ff3..e0d08cb721 100644
--- a/sysdeps/ia64/fpu/s_copysign.S
+++ b/sysdeps/ia64/fpu/s_copysign.S
@@ -23,16 +23,12 @@ ENTRY (__copysign)
 {
 	fmerge.s fret0 = farg1, farg0
 	br.ret.sptk.many rp
-}
+}	
 END (__copysign)
 
 strong_alias (__copysign, __copysignf)
 strong_alias (__copysign, __copysignl)
 
-strong_alias (__copysign, __libm_copysign)
-strong_alias (__copysign, __libm_copysignf)
-strong_alias (__copysign, __libm_copysignl)
-
 weak_alias (__copysign, copysign)
 weak_alias (__copysignf, copysignf)
 weak_alias (__copysignl, copysignl)
diff --git a/sysdeps/ia64/fpu/s_cos.S b/sysdeps/ia64/fpu/s_cos.S
index 84c177abab..6540aec724 100644
--- a/sysdeps/ia64/fpu/s_cos.S
+++ b/sysdeps/ia64/fpu/s_cos.S
@@ -1,10 +1,10 @@
 .file "sincos.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
 //
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
+//
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,22 +35,17 @@
 //
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// http://developer.intel.com/opensource.
 //
 // History
 //==============================================================
-// 02/02/00 Initial version
-// 04/02/00 Unwind support added.
-// 06/16/00 Updated tables to enforce symmetry
-// 08/31/00 Saved 2 cycles in main path, and 9 in other paths.
-// 09/20/00 The updated tables regressed to an old version, so reinstated them
+// 2/02/00  Initial revision
+// 4/02/00  Unwind support added.
+// 6/16/00  Updated tables to enforce symmetry
+// 8/31/00  Saved 2 cycles in main path, and 9 in other paths.
+// 9/20/00  The updated tables regressed to an old version, so reinstated them
 // 10/18/00 Changed one table entry to ensure symmetry
-// 01/03/01 Improved speed, fixed flag settings for small arguments.
-// 02/18/02 Large arguments processing routine excluded
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 06/03/02 Insure inexact flag set for large arg result
-// 09/05/02 Work range is widened by reduction strengthen (3 parts of Pi/16)
-// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 1/03/01  Improved speed, fixed flag settings for small arguments.
 
 // API
 //==============================================================
@@ -68,13 +63,9 @@
 //    nfloat = Round result to integer (round-to-nearest)
 //
 // r = x -  nfloat * pi/2^k
-//    Do this as ((((x -  nfloat * HIGH(pi/2^k))) - 
-//                        nfloat * LOW(pi/2^k)) - 
-//                        nfloat * LOWEST(pi/2^k) for increased accuracy.
+//    Do this as (x -  nfloat * HIGH(pi/2^k)) - nfloat * LOW(pi/2^k) for increased accuracy.
 //    pi/2^k is stored as two numbers that when added make pi/2^k.
 //       pi/2^k = HIGH(pi/2^k) + LOW(pi/2^k)
-//    HIGH and LOW parts are rounded to zero values, 
-//    and LOWEST is rounded to nearest one.
 //
 // x = (nfloat * pi/2^k) + r
 //    r is small enough that we can use a polynomial approximation
@@ -130,7 +121,7 @@
 //
 // as follows
 //
-//    S[m] = Sin(Mpi/2^k) and C[m] = Cos(Mpi/2^k)
+//    Sm = Sin(Mpi/2^k) and Cm = Cos(Mpi/2^k)
 //    rsq = r*r
 //
 //
@@ -150,22 +141,23 @@
 //
 //       P =  r + rcub * P
 //
-//    Answer = S[m] Cos(r) + [Cm] P
+//    Answer = Sm Cos(r) + Cm P
 //
 //       Cos(r) = 1 + rsq Q
 //       Cos(r) = 1 + r^2 Q
 //       Cos(r) = 1 + r^2 (q1 + r^2q2 + r^4q3 + r^6q4)
 //       Cos(r) = 1 + r^2q1 + r^4q2 + r^6q3 + r^8q4 + ...
 //
-//       S[m] Cos(r) = S[m](1 + rsq Q)
-//       S[m] Cos(r) = S[m] + Sm rsq Q
-//       S[m] Cos(r) = S[m] + s_rsq Q
-//       Q         = S[m] + s_rsq Q
+//       Sm Cos(r) = Sm(1 + rsq Q)
+//       Sm Cos(r) = Sm + Sm rsq Q
+//       Sm Cos(r) = Sm + s_rsq Q
+//       Q         = Sm + s_rsq Q
 //
 // Then,
 //
-//    Answer = Q + C[m] P
+//    Answer = Q + Cm P
 
+#include "libm_support.h"
 
 // Registers used
 //==============================================================
@@ -182,97 +174,99 @@
 
 // Assembly macros
 //==============================================================
-sincos_NORM_f8                 = f9
-sincos_W                       = f10
-sincos_int_Nfloat              = f11
-sincos_Nfloat                  = f12
+sind_NORM_f8                 = f9
+sind_W                       = f10
+sind_int_Nfloat              = f11
+sind_Nfloat                  = f12
 
-sincos_r                       = f13
-sincos_rsq                     = f14
-sincos_rcub                    = f15
-sincos_save_tmp                = f15
+sind_r                       = f13
+sind_rsq                     = f14
+sind_rcub                    = f15
 
-sincos_Inv_Pi_by_16            = f32
-sincos_Pi_by_16_1              = f33
-sincos_Pi_by_16_2              = f34
+sind_Inv_Pi_by_16            = f32
+sind_Pi_by_16_hi             = f33
+sind_Pi_by_16_lo             = f34
 
-sincos_Inv_Pi_by_64            = f35
+sind_Inv_Pi_by_64            = f35
+sind_Pi_by_64_hi             = f36
+sind_Pi_by_64_lo             = f37
 
-sincos_Pi_by_16_3              = f36
+sind_Sm                      = f38
+sind_Cm                      = f39
 
-sincos_r_exact                 = f37
+sind_P1                      = f40
+sind_Q1                      = f41
+sind_P2                      = f42
+sind_Q2                      = f43
+sind_P3                      = f44
+sind_Q3                      = f45
+sind_P4                      = f46
+sind_Q4                      = f47
 
-sincos_Sm                      = f38
-sincos_Cm                      = f39
+sind_P_temp1                 = f48
+sind_P_temp2                 = f49
 
-sincos_P1                      = f40
-sincos_Q1                      = f41
-sincos_P2                      = f42
-sincos_Q2                      = f43
-sincos_P3                      = f44
-sincos_Q3                      = f45
-sincos_P4                      = f46
-sincos_Q4                      = f47
+sind_Q_temp1                 = f50
+sind_Q_temp2                 = f51
 
-sincos_P_temp1                 = f48
-sincos_P_temp2                 = f49
+sind_P                       = f52
+sind_Q                       = f53
 
-sincos_Q_temp1                 = f50
-sincos_Q_temp2                 = f51
+sind_srsq                    = f54
 
-sincos_P                       = f52
-sincos_Q                       = f53
+sind_SIG_INV_PI_BY_16_2TO61  = f55
+sind_RSHF_2TO61              = f56
+sind_RSHF                    = f57
+sind_2TOM61                  = f58
+sind_NFLOAT                  = f59
+sind_W_2TO61_RSH             = f60
 
-sincos_srsq                    = f54
-
-sincos_SIG_INV_PI_BY_16_2TO61  = f55
-sincos_RSHF_2TO61              = f56
-sincos_RSHF                    = f57
-sincos_2TOM61                  = f58
-sincos_NFLOAT                  = f59
-sincos_W_2TO61_RSH             = f60
-
-fp_tmp                         = f61
+fp_tmp                       = f61
 
 /////////////////////////////////////////////////////////////
 
-sincos_AD_1                    = r33
-sincos_AD_2                    = r34
-sincos_exp_limit               = r35
-sincos_r_signexp               = r36
-sincos_AD_beta_table           = r37
-sincos_r_sincos                = r38
+sind_AD_1                    = r33
+sind_AD_2                    = r34
+sind_exp_limit               = r35
+sind_r_signexp               = r36
+sind_AD_beta_table           = r37
+sind_r_sincos                = r38
 
-sincos_r_exp                   = r39
-sincos_r_17_ones               = r40
+sind_r_exp                   = r39
+sind_r_17_ones               = r40
 
-sincos_GR_sig_inv_pi_by_16     = r14
-sincos_GR_rshf_2to61           = r15
-sincos_GR_rshf                 = r16
-sincos_GR_exp_2tom61           = r17
-sincos_GR_n                    = r18
-sincos_GR_m                    = r19
-sincos_GR_32m                  = r19
-sincos_GR_all_ones             = r19
+sind_GR_sig_inv_pi_by_16     = r14
+sind_GR_rshf_2to61           = r15
+sind_GR_rshf                 = r16
+sind_GR_exp_2tom61           = r17
+sind_GR_n                    = r18
+sind_GR_m                    = r19
+sind_GR_32m                  = r19
 
-gr_tmp                         = r41
-GR_SAVE_PFS                    = r41
-GR_SAVE_B0                     = r42
-GR_SAVE_GP                     = r43
+gr_tmp                       = r41
+GR_SAVE_PFS                  = r41
+GR_SAVE_B0                   = r42
+GR_SAVE_GP                   = r43
 
 
-RODATA
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
 
-// Pi/16 parts
 .align 16
-LOCAL_OBJECT_START(double_sincos_pi)
-   data8 0xC90FDAA22168C234, 0x00003FFC // pi/16 1st part
-   data8 0xC4C6628B80DC1CD1, 0x00003FBC // pi/16 2nd part
-   data8 0xA4093822299F31D0, 0x00003F7A // pi/16 3rd part
-LOCAL_OBJECT_END(double_sincos_pi)
-
-// Coefficients for polynomials
-LOCAL_OBJECT_START(double_sincos_pq_k4)
+double_sind_pi:
+ASM_TYPE_DIRECTIVE(double_sind_pi,@object)
+//   data8 0xA2F9836E4E44152A, 0x00004001 // 16/pi (significand loaded w/ setf)
+//         c90fdaa22168c234
+   data8 0xC90FDAA22168C234, 0x00003FFC // pi/16 hi
+//         c4c6628b80dc1cd1  29024e088a
+   data8 0xC4C6628B80DC1CD1, 0x00003FBC // pi/16 lo
+ASM_SIZE_DIRECTIVE(double_sind_pi)
+
+double_sind_pq_k4:
+ASM_TYPE_DIRECTIVE(double_sind_pq_k4,@object)
    data8 0x3EC71C963717C63A // P4
    data8 0x3EF9FFBA8F191AE6 // Q4
    data8 0xBF2A01A00F4E11A8 // P3
@@ -281,119 +275,125 @@ LOCAL_OBJECT_START(double_sincos_pq_k4)
    data8 0x3FA555555554DD45 // Q2
    data8 0xBFC5555555555555 // P1
    data8 0xBFDFFFFFFFFFFFFC // Q1
-LOCAL_OBJECT_END(double_sincos_pq_k4)
+ASM_SIZE_DIRECTIVE(double_sind_pq_k4)
 
-// Sincos table (S[m], C[m])
-LOCAL_OBJECT_START(double_sin_cos_beta_k4)
 
+double_sin_cos_beta_k4:
+ASM_TYPE_DIRECTIVE(double_sin_cos_beta_k4,@object)
 data8 0x0000000000000000 , 0x00000000 // sin( 0 pi/16)  S0
 data8 0x8000000000000000 , 0x00003fff // cos( 0 pi/16)  C0
-//
+
 data8 0xc7c5c1e34d3055b3 , 0x00003ffc // sin( 1 pi/16)  S1
 data8 0xfb14be7fbae58157 , 0x00003ffe // cos( 1 pi/16)  C1
-//
+
 data8 0xc3ef1535754b168e , 0x00003ffd // sin( 2 pi/16)  S2
 data8 0xec835e79946a3146 , 0x00003ffe // cos( 2 pi/16)  C2
-//
+
 data8 0x8e39d9cd73464364 , 0x00003ffe // sin( 3 pi/16)  S3
 data8 0xd4db3148750d181a , 0x00003ffe // cos( 3 pi/16)  C3
-//
+
 data8 0xb504f333f9de6484 , 0x00003ffe // sin( 4 pi/16)  S4
 data8 0xb504f333f9de6484 , 0x00003ffe // cos( 4 pi/16)  C4
-//
-//
+
+
 data8 0xd4db3148750d181a , 0x00003ffe // sin( 5 pi/16)  C3
 data8 0x8e39d9cd73464364 , 0x00003ffe // cos( 5 pi/16)  S3
-//
+
 data8 0xec835e79946a3146 , 0x00003ffe // sin( 6 pi/16)  C2
 data8 0xc3ef1535754b168e , 0x00003ffd // cos( 6 pi/16)  S2
-//
+
 data8 0xfb14be7fbae58157 , 0x00003ffe // sin( 7 pi/16)  C1
 data8 0xc7c5c1e34d3055b3 , 0x00003ffc // cos( 7 pi/16)  S1
-//
+
 data8 0x8000000000000000 , 0x00003fff // sin( 8 pi/16)  C0
 data8 0x0000000000000000 , 0x00000000 // cos( 8 pi/16)  S0
-//
-//
+
+
 data8 0xfb14be7fbae58157 , 0x00003ffe // sin( 9 pi/16)  C1
 data8 0xc7c5c1e34d3055b3 , 0x0000bffc // cos( 9 pi/16)  -S1
-//
+
 data8 0xec835e79946a3146 , 0x00003ffe // sin(10 pi/16)  C2
 data8 0xc3ef1535754b168e , 0x0000bffd // cos(10 pi/16)  -S2
-//
+
 data8 0xd4db3148750d181a , 0x00003ffe // sin(11 pi/16)  C3
 data8 0x8e39d9cd73464364 , 0x0000bffe // cos(11 pi/16)  -S3
-//
+
 data8 0xb504f333f9de6484 , 0x00003ffe // sin(12 pi/16)  S4
 data8 0xb504f333f9de6484 , 0x0000bffe // cos(12 pi/16)  -S4
-//
-//
+
+
 data8 0x8e39d9cd73464364 , 0x00003ffe // sin(13 pi/16) S3
 data8 0xd4db3148750d181a , 0x0000bffe // cos(13 pi/16) -C3
-//
+
 data8 0xc3ef1535754b168e , 0x00003ffd // sin(14 pi/16) S2
 data8 0xec835e79946a3146 , 0x0000bffe // cos(14 pi/16) -C2
-//
+
 data8 0xc7c5c1e34d3055b3 , 0x00003ffc // sin(15 pi/16) S1
 data8 0xfb14be7fbae58157 , 0x0000bffe // cos(15 pi/16) -C1
-//
+
 data8 0x0000000000000000 , 0x00000000 // sin(16 pi/16) S0
 data8 0x8000000000000000 , 0x0000bfff // cos(16 pi/16) -C0
-//
-//
+
+
 data8 0xc7c5c1e34d3055b3 , 0x0000bffc // sin(17 pi/16) -S1
 data8 0xfb14be7fbae58157 , 0x0000bffe // cos(17 pi/16) -C1
-//
+
 data8 0xc3ef1535754b168e , 0x0000bffd // sin(18 pi/16) -S2
 data8 0xec835e79946a3146 , 0x0000bffe // cos(18 pi/16) -C2
-//
+
 data8 0x8e39d9cd73464364 , 0x0000bffe // sin(19 pi/16) -S3
 data8 0xd4db3148750d181a , 0x0000bffe // cos(19 pi/16) -C3
-//
+
 data8 0xb504f333f9de6484 , 0x0000bffe // sin(20 pi/16) -S4
 data8 0xb504f333f9de6484 , 0x0000bffe // cos(20 pi/16) -S4
-//
-//
+
+
 data8 0xd4db3148750d181a , 0x0000bffe // sin(21 pi/16) -C3
 data8 0x8e39d9cd73464364 , 0x0000bffe // cos(21 pi/16) -S3
-//
+
 data8 0xec835e79946a3146 , 0x0000bffe // sin(22 pi/16) -C2
 data8 0xc3ef1535754b168e , 0x0000bffd // cos(22 pi/16) -S2
-//
+
 data8 0xfb14be7fbae58157 , 0x0000bffe // sin(23 pi/16) -C1
 data8 0xc7c5c1e34d3055b3 , 0x0000bffc // cos(23 pi/16) -S1
-//
+
 data8 0x8000000000000000 , 0x0000bfff // sin(24 pi/16) -C0
 data8 0x0000000000000000 , 0x00000000 // cos(24 pi/16) S0
-//
-//
+
+
 data8 0xfb14be7fbae58157 , 0x0000bffe // sin(25 pi/16) -C1
 data8 0xc7c5c1e34d3055b3 , 0x00003ffc // cos(25 pi/16) S1
-//
+
 data8 0xec835e79946a3146 , 0x0000bffe // sin(26 pi/16) -C2
 data8 0xc3ef1535754b168e , 0x00003ffd // cos(26 pi/16) S2
-//
+
 data8 0xd4db3148750d181a , 0x0000bffe // sin(27 pi/16) -C3
 data8 0x8e39d9cd73464364 , 0x00003ffe // cos(27 pi/16) S3
-//
+
 data8 0xb504f333f9de6484 , 0x0000bffe // sin(28 pi/16) -S4
 data8 0xb504f333f9de6484 , 0x00003ffe // cos(28 pi/16) S4
-//
-//
+
+
 data8 0x8e39d9cd73464364 , 0x0000bffe // sin(29 pi/16) -S3
 data8 0xd4db3148750d181a , 0x00003ffe // cos(29 pi/16) C3
-//
+
 data8 0xc3ef1535754b168e , 0x0000bffd // sin(30 pi/16) -S2
 data8 0xec835e79946a3146 , 0x00003ffe // cos(30 pi/16) C2
-//
+
 data8 0xc7c5c1e34d3055b3 , 0x0000bffc // sin(31 pi/16) -S1
 data8 0xfb14be7fbae58157 , 0x00003ffe // cos(31 pi/16) C1
-//
+
 data8 0x0000000000000000 , 0x00000000 // sin(32 pi/16) S0
 data8 0x8000000000000000 , 0x00003fff // cos(32 pi/16) C0
-LOCAL_OBJECT_END(double_sin_cos_beta_k4)
+ASM_SIZE_DIRECTIVE(double_sin_cos_beta_k4)
 
-.section .text
+.align 32
+.global sin#
+.global cos#
+#ifdef _LIBC
+.global __sin#
+.global __cos#
+#endif
 
 ////////////////////////////////////////////////////////
 // There are two entry points: sin and cos
@@ -402,63 +402,85 @@ LOCAL_OBJECT_END(double_sin_cos_beta_k4)
 // If from sin, p8 is true
 // If from cos, p9 is true
 
-GLOBAL_IEEE754_ENTRY(sin)
+.section .text
+.proc  sin#
+#ifdef _LIBC
+.proc  __sin#
+#endif
+.align 32
+
+sin:
+#ifdef _LIBC
+__sin:
+#endif
 
 { .mlx
-      alloc         r32                 = ar.pfs, 1, 13, 0, 0
-      movl sincos_GR_sig_inv_pi_by_16   = 0xA2F9836E4E44152A // signd of 16/pi
+      alloc          r32=ar.pfs,1,13,0,0
+      movl sind_GR_sig_inv_pi_by_16 = 0xA2F9836E4E44152A // significand of 16/pi
 }
 { .mlx
-      addl          sincos_AD_1         = @ltoff(double_sincos_pi), gp
-      movl sincos_GR_rshf_2to61         = 0x47b8000000000000 // 1.1 2^(63+63-2)
+      addl           sind_AD_1   = @ltoff(double_sind_pi), gp
+      movl sind_GR_rshf_2to61 = 0x47b8000000000000 // 1.1000 2^(63+63-2)
 }
 ;;
 
 { .mfi
-      ld8           sincos_AD_1         = [sincos_AD_1]
-      fnorm.s0      sincos_NORM_f8      = f8  // Normalize argument
-      cmp.eq        p8,p9               = r0, r0 // set p8 (clear p9) for sin
+      ld8 sind_AD_1 = [sind_AD_1]
+      fnorm     sind_NORM_f8  = f8
+      cmp.eq     p8,p9         = r0, r0
 }
 { .mib
-      mov           sincos_GR_exp_2tom61  = 0xffff-61 // exponent of scale 2^-61
-      mov           sincos_r_sincos       = 0x0 // sincos_r_sincos = 0 for sin
-      br.cond.sptk  _SINCOS_COMMON  // go to common part
+      mov sind_GR_exp_2tom61 = 0xffff-61 // exponent of scaling factor 2^-61
+      mov            sind_r_sincos = 0x0
+      br.cond.sptk   L(SIND_SINCOS)
 }
 ;;
 
-GLOBAL_IEEE754_END(sin)
-GLOBAL_IEEE754_ENTRY(cos)
+.endp sin
+ASM_SIZE_DIRECTIVE(sin)
+
+
+.section .text
+.proc  cos#
+#ifdef _LIBC
+.proc  __cos#
+#endif
+.align 32
+cos:
+#ifdef _LIBC
+__cos:
+#endif
 
 { .mlx
-      alloc         r32                 = ar.pfs, 1, 13, 0, 0
-      movl sincos_GR_sig_inv_pi_by_16   = 0xA2F9836E4E44152A // signd of 16/pi
+      alloc          r32=ar.pfs,1,13,0,0
+      movl sind_GR_sig_inv_pi_by_16 = 0xA2F9836E4E44152A // significand of 16/pi
 }
 { .mlx
-      addl          sincos_AD_1         = @ltoff(double_sincos_pi), gp
-      movl sincos_GR_rshf_2to61         = 0x47b8000000000000 // 1.1 2^(63+63-2)
+      addl           sind_AD_1   = @ltoff(double_sind_pi), gp
+      movl sind_GR_rshf_2to61 = 0x47b8000000000000 // 1.1000 2^(63+63-2)
 }
 ;;
 
 { .mfi
-      ld8           sincos_AD_1         = [sincos_AD_1]
-      fnorm.s1      sincos_NORM_f8      = f8 // Normalize argument
-      cmp.eq        p9,p8               = r0, r0 // set p9 (clear p8) for cos
+      ld8 sind_AD_1 = [sind_AD_1]
+      fnorm.s1     sind_NORM_f8  = f8
+      cmp.eq     p9,p8         = r0, r0
 }
 { .mib
-      mov           sincos_GR_exp_2tom61  = 0xffff-61 // exp of scale 2^-61
-      mov           sincos_r_sincos       = 0x8 // sincos_r_sincos = 8 for cos
-      nop.b         999
+      mov sind_GR_exp_2tom61 = 0xffff-61 // exponent of scaling factor 2^-61
+      mov            sind_r_sincos = 0x8
+      br.cond.sptk   L(SIND_SINCOS)
 }
 ;;
 
+
 ////////////////////////////////////////////////////////
 // All entry points end up here.
-// If from sin, sincos_r_sincos is 0 and p8 is true
-// If from cos, sincos_r_sincos is 8 = 2^(k-1) and p9 is true
-// We add sincos_r_sincos to N
+// If from sin, sind_r_sincos is 0 and p8 is true
+// If from cos, sind_r_sincos is 8 = 2^(k-1) and p9 is true
+// We add sind_r_sincos to N
 
-///////////// Common sin and cos part //////////////////
-_SINCOS_COMMON:
+L(SIND_SINCOS):
 
 
 // Form two constants we need
@@ -466,320 +488,3014 @@ _SINCOS_COMMON:
 //  1.1000...000 * 2^(63+63-2) to right shift int(W) into the low significand
 // fcmp used to set denormal, and invalid on snans
 { .mfi
-      setf.sig      sincos_SIG_INV_PI_BY_16_2TO61 = sincos_GR_sig_inv_pi_by_16
-      fclass.m      p6,p0                         = f8, 0xe7 // if x = 0,inf,nan
-      mov           sincos_exp_limit              = 0x1001a
+      setf.sig sind_SIG_INV_PI_BY_16_2TO61 = sind_GR_sig_inv_pi_by_16
+      fcmp.eq.s0 p12,p0=f8,f0
+      mov       sind_r_17_ones    = 0x1ffff
 }
 { .mlx
-      setf.d        sincos_RSHF_2TO61   = sincos_GR_rshf_2to61
-      movl          sincos_GR_rshf      = 0x43e8000000000000 // 1.1 2^63
-}                                                            // Right shift
+      setf.d sind_RSHF_2TO61 = sind_GR_rshf_2to61
+      movl sind_GR_rshf = 0x43e8000000000000 // 1.1000 2^63 for right shift
+}
 ;;
 
 // Form another constant
 //  2^-61 for scaling Nfloat
-// 0x1001a is register_bias + 27.
-// So if f8 >= 2^27, go to large argument routines
-{ .mmi
-      getf.exp      sincos_r_signexp    = f8
-      setf.exp      sincos_2TOM61       = sincos_GR_exp_2tom61
-      addl          gr_tmp              = -1,r0 // For "inexect" constant create
+// 0x10009 is register_bias + 10.
+// So if f8 > 2^10 = Gamma, go to DBX
+{ .mfi
+      setf.exp sind_2TOM61 = sind_GR_exp_2tom61
+      fclass.m  p13,p0 = f8, 0x23           // Test for x inf
+      mov       sind_exp_limit = 0x10009
 }
 ;;
 
 // Load the two pieces of pi/16
 // Form another constant
 //  1.1000...000 * 2^63, the right shift constant
-{ .mmb
-      ldfe          sincos_Pi_by_16_1   = [sincos_AD_1],16
-      setf.d        sincos_RSHF         = sincos_GR_rshf
-(p6)  br.cond.spnt  _SINCOS_SPECIAL_ARGS
+{ .mmf
+      ldfe      sind_Pi_by_16_hi  = [sind_AD_1],16
+      setf.d sind_RSHF = sind_GR_rshf
+      fclass.m  p14,p0 = f8, 0xc3           // Test for x nan
 }
 ;;
 
-{ .mmi
-      ldfe          sincos_Pi_by_16_2   = [sincos_AD_1],16
-      setf.sig      fp_tmp              = gr_tmp // constant for inexact set
-      nop.i         999
-};;
+{ .mfi
+      ldfe      sind_Pi_by_16_lo  = [sind_AD_1],16
+(p13) frcpa.s0 f8,p12=f0,f0               // force qnan indef for x=inf
+      addl gr_tmp = -1,r0
+}
+{ .mfb
+      addl           sind_AD_beta_table   = @ltoff(double_sin_cos_beta_k4), gp
+      nop.f 999
+(p13) br.ret.spnt    b0 ;;                // Exit for x=inf
+}
 
+// Start loading P, Q coefficients
+// SIN(0)
 { .mfi
-      ldfe          sincos_Pi_by_16_3   = [sincos_AD_1],16
-      nop.f         999
-      nop.i         999
-};;
+      ldfpd      sind_P4,sind_Q4 = [sind_AD_1],16
+(p8)  fclass.m.unc  p6,p0 = f8, 0x07      // Test for sin(0)
+      nop.i 999
+}
+{ .mfb
+      addl           sind_AD_beta_table   = @ltoff(double_sin_cos_beta_k4), gp
+(p14) fma.d f8=f8,f1,f0                   // qnan for x=nan
+(p14) br.ret.spnt    b0 ;;                // Exit for x=nan
+}
 
-// Polynomial coefficients (Q4, P4, Q3, P3, Q2, Q1, P2, P1) loading
-{ .mmi
-      ldfpd         sincos_P4,sincos_Q4 = [sincos_AD_1],16
-      nop.m         999
-      nop.i         999
-};;
 
-// Select exponent (17 lsb)
-{ .mmi
-      ldfpd         sincos_P3,sincos_Q3 = [sincos_AD_1],16
-      nop.m         999
-      dep.z         sincos_r_exp        = sincos_r_signexp, 0, 17 
+// COS(0)
+{ .mfi
+      getf.exp  sind_r_signexp    = f8
+(p9)  fclass.m.unc  p7,p0 = f8, 0x07      // Test for sin(0)
+      nop.i 999
+}
+{ .mfi
+      ld8 sind_AD_beta_table = [sind_AD_beta_table]
+      nop.f 999
+      nop.i 999 ;;
 }
-;;
 
-// p10 is true if we must call routines to handle larger arguments
-// p10 is true if f8 exp is >= 0x1001a (2^27)
 { .mmb
-      ldfpd         sincos_P2,sincos_Q2 = [sincos_AD_1],16
-      cmp.ge        p10,p0              = sincos_r_exp,sincos_exp_limit 
-(p10) br.cond.spnt  _SINCOS_LARGE_ARGS // Go to "large args" routine
-};;
+      ldfpd      sind_P3,sind_Q3 = [sind_AD_1],16
+      setf.sig fp_tmp = gr_tmp // Create constant such that fmpy sets inexact
+(p6)  br.ret.spnt    b0 ;;
+}
+
+{ .mfb
+      and       sind_r_exp = sind_r_17_ones, sind_r_signexp
+(p7)  fmerge.s      f8 = f1,f1
+(p7)  br.ret.spnt    b0 ;;
+}
 
-// sincos_W          = x * sincos_Inv_Pi_by_16
+// p10 is true if we must call routines to handle larger arguments
+// p10 is true if f8 exp is > 0x10009
+
+{ .mfi
+      ldfpd      sind_P2,sind_Q2 = [sind_AD_1],16
+      nop.f 999
+      cmp.ge  p10,p0 = sind_r_exp,sind_exp_limit
+}
+;;
+
+// sind_W          = x * sind_Inv_Pi_by_16
 // Multiply x by scaled 16/pi and add large const to shift integer part of W to
 //   rightmost bits of significand
 { .mfi
-      ldfpd         sincos_P1,sincos_Q1 = [sincos_AD_1],16
-      fma.s1 sincos_W_2TO61_RSH = sincos_NORM_f8,sincos_SIG_INV_PI_BY_16_2TO61,sincos_RSHF_2TO61
-      nop.i         999
-};;
+      ldfpd      sind_P1,sind_Q1 = [sind_AD_1]
+      fma.s1 sind_W_2TO61_RSH = sind_NORM_f8,sind_SIG_INV_PI_BY_16_2TO61,sind_RSHF_2TO61
+      nop.i 999
+}
+{ .mbb
+(p10) cmp.ne.unc p11,p12=sind_r_sincos,r0  // p11 call __libm_cos_double_dbx
+                                           // p12 call __libm_sin_double_dbx
+(p11) br.cond.spnt L(COSD_DBX)
+(p12) br.cond.spnt L(SIND_DBX)
+}
+;;
+
 
-// sincos_NFLOAT = Round_Int_Nearest(sincos_W)
+// sind_NFLOAT = Round_Int_Nearest(sind_W)
 // This is done by scaling back by 2^-61 and subtracting the shift constant
 { .mfi
-      nop.m         999
-      fms.s1 sincos_NFLOAT = sincos_W_2TO61_RSH,sincos_2TOM61,sincos_RSHF
-      nop.i         999 
-};;
+      nop.m 999
+      fms.s1 sind_NFLOAT = sind_W_2TO61_RSH,sind_2TOM61,sind_RSHF
+      nop.i 999 ;;
+}
 
 
-// get N = (int)sincos_int_Nfloat
+// get N = (int)sind_int_Nfloat
 { .mfi
-      getf.sig      sincos_GR_n         = sincos_W_2TO61_RSH
-      nop.f         999
-      nop.i         999 
-};;
+      getf.sig  sind_GR_n = sind_W_2TO61_RSH
+      nop.f 999
+      nop.i 999 ;;
+}
 
-// Add 2^(k-1) (which is in sincos_r_sincos) to N
-// sincos_r          = -sincos_Nfloat * sincos_Pi_by_16_1 + x
+// Add 2^(k-1) (which is in sind_r_sincos) to N
+// sind_r          = -sind_Nfloat * sind_Pi_by_16_hi + x
+// sind_r          = sind_r -sind_Nfloat * sind_Pi_by_16_lo
 { .mfi
-      add           sincos_GR_n         = sincos_GR_n, sincos_r_sincos
-      fnma.s1 sincos_r = sincos_NFLOAT, sincos_Pi_by_16_1, sincos_NORM_f8
-      nop.i         999 
-};;
+      add       sind_GR_n = sind_GR_n, sind_r_sincos
+      fnma.s1  sind_r      = sind_NFLOAT, sind_Pi_by_16_hi, sind_NORM_f8
+      nop.i 999 ;;
+}
+
 
 // Get M (least k+1 bits of N)
 { .mmi
-      and           sincos_GR_m         = 0x1f,sincos_GR_n;;
-      nop.m         999
-      shl           sincos_GR_32m       = sincos_GR_m,5
-};;
+      and       sind_GR_m = 0x1f,sind_GR_n ;;
+      nop.m 999
+      shl       sind_GR_32m = sind_GR_m,5 ;;
+}
 
 // Add 32*M to address of sin_cos_beta table
+{ .mmi
+      add       sind_AD_2 = sind_GR_32m, sind_AD_beta_table
+      nop.m 999
+      nop.i 999 ;;
+}
+
 { .mfi
-      add           sincos_AD_2         = sincos_GR_32m, sincos_AD_1
-(p8)  fclass.m.unc  p10,p0              = f8,0x0b // For sin denorm. - set uflow
-      nop.i         999 
-};;
+      ldfe      sind_Sm = [sind_AD_2],16
+(p8)  fclass.m.unc p10,p0=f8,0x0b  // If sin, note denormal input to set uflow
+      nop.i 999 ;;
+}
 
-// Load Sin and Cos table value using obtained index m  (sincosf_AD_2)
 { .mfi
-      ldfe          sincos_Sm           = [sincos_AD_2],16
-(p9)  fclass.m.unc  p11,p0              = f8,0x0b // For cos denorm - set denorm
-      nop.i         999 
-};;
+      ldfe      sind_Cm = [sind_AD_2]
+      fnma.s1  sind_r      = sind_NFLOAT, sind_Pi_by_16_lo,  sind_r
+      nop.i 999 ;;
+}
 
-// sincos_r          = sincos_r -sincos_Nfloat * sincos_Pi_by_16_2
+// get rsq
 { .mfi
-      ldfe          sincos_Cm           = [sincos_AD_2]
-      fnma.s1 sincos_r = sincos_NFLOAT, sincos_Pi_by_16_2,  sincos_r
-      nop.i         999 
-};;
+      nop.m 999
+      fma.s1   sind_rsq  = sind_r, sind_r,   f0
+      nop.i 999
+}
+{ .mfi
+      nop.m 999
+      fmpy.s0  fp_tmp = fp_tmp,fp_tmp // fmpy forces inexact flag
+      nop.i 999 ;;
+}
 
-// get rsq = r*r
+// form P and Q series
 { .mfi
-      nop.m         999
-      fma.s1        sincos_rsq          = sincos_r, sincos_r,   f0 // r^2 = r*r
-      nop.i         999
+      nop.m 999
+      fma.s1      sind_P_temp1 = sind_rsq, sind_P4, sind_P3
+      nop.i 999
 }
+
 { .mfi
-      nop.m         999
-      fmpy.s0       fp_tmp              = fp_tmp,fp_tmp // forces inexact flag
-      nop.i         999 
-};;
+      nop.m 999
+      fma.s1      sind_Q_temp1 = sind_rsq, sind_Q4, sind_Q3
+      nop.i 999 ;;
+}
 
-// sincos_r_exact = sincos_r -sincos_Nfloat * sincos_Pi_by_16_3
+// get rcube and sm*rsq
 { .mfi
-      nop.m         999
-      fnma.s1 sincos_r_exact = sincos_NFLOAT, sincos_Pi_by_16_3, sincos_r
-      nop.i         999 
-};;
+      nop.m 999
+      fmpy.s1     sind_srsq    = sind_Sm,sind_rsq
+      nop.i 999
+}
 
-// Polynomials calculation 
-// P_1 = P4*r^2 + P3
-// Q_2 = Q4*r^2 + Q3
 { .mfi
-      nop.m         999
-      fma.s1        sincos_P_temp1      = sincos_rsq, sincos_P4, sincos_P3
-      nop.i         999
+      nop.m 999
+      fmpy.s1     sind_rcub    = sind_r, sind_rsq
+      nop.i 999 ;;
 }
+
 { .mfi
-      nop.m         999
-      fma.s1        sincos_Q_temp1      = sincos_rsq, sincos_Q4, sincos_Q3
-      nop.i         999 
-};;
+      nop.m 999
+      fma.s1      sind_Q_temp2 = sind_rsq, sind_Q_temp1, sind_Q2
+      nop.i 999
+}
 
-// get rcube = r^3 and S[m]*r^2
 { .mfi
-      nop.m         999
-      fmpy.s1       sincos_srsq         = sincos_Sm,sincos_rsq
-      nop.i         999
+      nop.m 999
+      fma.s1      sind_P_temp2 = sind_rsq, sind_P_temp1, sind_P2
+      nop.i 999 ;;
 }
+
 { .mfi
-      nop.m         999
-      fmpy.s1       sincos_rcub         = sincos_r_exact, sincos_rsq
-      nop.i         999 
-};;
+      nop.m 999
+      fma.s1      sind_Q       = sind_rsq, sind_Q_temp2, sind_Q1
+      nop.i 999
+}
 
-// Polynomials calculation 
-// Q_2 = Q_1*r^2 + Q2
-// P_1 = P_1*r^2 + P2
 { .mfi
-      nop.m         999
-      fma.s1        sincos_Q_temp2      = sincos_rsq, sincos_Q_temp1, sincos_Q2
-      nop.i         999
+      nop.m 999
+      fma.s1      sind_P       = sind_rsq, sind_P_temp2, sind_P1
+      nop.i 999 ;;
 }
+
+// Get final P and Q
 { .mfi
-      nop.m         999
-      fma.s1        sincos_P_temp2      = sincos_rsq, sincos_P_temp1, sincos_P2
-      nop.i         999 
-};;
+      nop.m 999
+      fma.s1   sind_Q = sind_srsq,sind_Q, sind_Sm
+      nop.i 999
+}
 
-// Polynomials calculation 
-// Q = Q_2*r^2 + Q1
-// P = P_2*r^2 + P1
 { .mfi
-      nop.m         999
-      fma.s1        sincos_Q            = sincos_rsq, sincos_Q_temp2, sincos_Q1
-      nop.i         999
+      nop.m 999
+      fma.s1   sind_P = sind_rcub,sind_P, sind_r
+      nop.i 999 ;;
 }
 
+// If sin(denormal), force inexact to be set
 { .mfi
-      nop.m         999
-      fma.s1        sincos_P            = sincos_rsq, sincos_P_temp2, sincos_P1
-      nop.i         999 
-};;
+      nop.m 999
+(p10) fmpy.d.s0 fp_tmp = f8,f8
+      nop.i 999 ;;
+}
 
-// Get final P and Q
-// Q = Q*S[m]*r^2 + S[m]
-// P = P*r^3 + r
+// Final calculation
+{ .mfb
+      nop.m 999
+      fma.d    f8     = sind_Cm, sind_P, sind_Q
+      br.ret.sptk    b0 ;;
+}
+.endp cos#
+ASM_SIZE_DIRECTIVE(cos#)
+
+
+
+.proc __libm_callout_1s
+__libm_callout_1s:
+L(SIND_DBX):
+.prologue
 { .mfi
-      nop.m         999
-      fma.s1        sincos_Q            = sincos_srsq,sincos_Q, sincos_Sm
-      nop.i         999
+        nop.m 0
+        nop.f 0
+.save   ar.pfs,GR_SAVE_PFS
+        mov  GR_SAVE_PFS=ar.pfs
 }
+;;
 
 { .mfi
-      nop.m         999
-      fma.s1        sincos_P            = sincos_rcub,sincos_P, sincos_r_exact
-      nop.i         999 
-};;
+        mov GR_SAVE_GP=gp
+        nop.f 0
+.save   b0, GR_SAVE_B0
+        mov GR_SAVE_B0=b0
+}
+
+.body
+{ .mib
+      nop.m 999
+      nop.i 999
+      br.call.sptk.many   b0=__libm_sin_double_dbx# ;;
+}
+;;
+
 
-// If sin(denormal), force underflow to be set
-.pred.rel "mutex",p10,p11
 { .mfi
-      nop.m         999
-(p10) fmpy.d.s0     fp_tmp              = f8,f8  // forces underflow flag
-      nop.i         999                          // for denormal sine args
+       mov gp        = GR_SAVE_GP
+       nop.f  999
+       mov b0        = GR_SAVE_B0
 }
+;;
+
+{ .mib
+      nop.m 999
+      mov ar.pfs    = GR_SAVE_PFS
+      br.ret.sptk     b0 ;;
+}
+.endp  __libm_callout_1s
+ASM_SIZE_DIRECTIVE(__libm_callout_1s)
+
+
+.proc __libm_callout_1c
+__libm_callout_1c:
+L(COSD_DBX):
+.prologue
 { .mfi
-      nop.m         999
-(p11) fma.d.s0      fp_tmp              = f8,f1, f8  // forces denormal flag
-      nop.i         999                              // for denormal cosine args
-};;
+        nop.m 0
+        nop.f 0
+.save   ar.pfs,GR_SAVE_PFS
+        mov  GR_SAVE_PFS=ar.pfs
+}
+;;
 
+{ .mfi
+        mov GR_SAVE_GP=gp
+        nop.f 0
+.save   b0, GR_SAVE_B0
+        mov GR_SAVE_B0=b0
+}
+
+.body
+{ .mib
+      nop.m 999
+      nop.i 999
+      br.call.sptk.many   b0=__libm_cos_double_dbx# ;;
+}
+;;
+
+
+{ .mfi
+       mov gp        = GR_SAVE_GP
+       nop.f  999
+       mov b0        = GR_SAVE_B0
+}
+;;
+
+{ .mib
+      nop.m 999
+      mov ar.pfs    = GR_SAVE_PFS
+      br.ret.sptk     b0 ;;
+}
+.endp  __libm_callout_1c
+ASM_SIZE_DIRECTIVE(__libm_callout_1c)
+
+
+// ====================================================================
+// ====================================================================
+
+// These functions calculate the sin and cos for inputs
+// greater than 2^10
+// __libm_sin_double_dbx# and __libm_cos_double_dbx#
+
+// *********************************************************************
+// *********************************************************************
+//
+// Function:   Combined sin(x) and cos(x), where
+//
+//             sin(x) = sine(x), for double precision x values
+//             cos(x) = cosine(x), for double precision x values
+//
+// *********************************************************************
+//
+// Accuracy:       Within .7 ulps for 80-bit floating point values
+//                 Very accurate for double precision values
+//
+// *********************************************************************
+//
+// Resources Used:
+//
+//    Floating-Point Registers: f8 (Input and Return Value)
+//                              f32-f99
+//
+//    General Purpose Registers:
+//      r32-r43
+//      r44-r45 (Used to pass arguments to pi_by_2 reduce routine)
+//
+//    Predicate Registers:      p6-p13
+//
+// *********************************************************************
+//
+//  IEEE Special Conditions:
+//
+//    Denormal  fault raised on denormal inputs
+//    Overflow exceptions do not occur
+//    Underflow exceptions raised when appropriate for sin
+//    (No specialized error handling for this routine)
+//    Inexact raised when appropriate by algorithm
+//
+//    sin(SNaN) = QNaN
+//    sin(QNaN) = QNaN
+//    sin(inf) = QNaN
+//    sin(+/-0) = +/-0
+//    cos(inf) = QNaN
+//    cos(SNaN) = QNaN
+//    cos(QNaN) = QNaN
+//    cos(0) = 1
+//
+// *********************************************************************
+//
+//  Mathematical Description
+//  ========================
+//
+//  The computation of FSIN and FCOS is best handled in one piece of
+//  code. The main reason is that given any argument Arg, computation
+//  of trigonometric functions first calculate N and an approximation
+//  to alpha where
+//
+//  Arg = N pi/2 + alpha, |alpha| <= pi/4.
+//
+//  Since
+//
+//  cos( Arg ) = sin( (N+1) pi/2 + alpha ),
+//
+//  therefore, the code for computing sine will produce cosine as long
+//  as 1 is added to N immediately after the argument reduction
+//  process.
+//
+//  Let M = N if sine
+//      N+1 if cosine.
+//
+//  Now, given
+//
+//  Arg = M pi/2  + alpha, |alpha| <= pi/4,
+//
+//  let I = M mod 4, or I be the two lsb of M when M is represented
+//  as 2's complement. I = [i_0 i_1]. Then
+//
+//  sin( Arg ) = (-1)^i_0  sin( alpha )	if i_1 = 0,
+//             = (-1)^i_0  cos( alpha )     if i_1 = 1.
+//
+//  For example:
+//       if M = -1, I = 11
+//         sin ((-pi/2 + alpha) = (-1) cos (alpha)
+//       if M = 0, I = 00
+//         sin (alpha) = sin (alpha)
+//       if M = 1, I = 01
+//         sin (pi/2 + alpha) = cos (alpha)
+//       if M = 2, I = 10
+//         sin (pi + alpha) = (-1) sin (alpha)
+//       if M = 3, I = 11
+//         sin ((3/2)pi + alpha) = (-1) cos (alpha)
+//
+//  The value of alpha is obtained by argument reduction and
+//  represented by two working precision numbers r and c where
+//
+//  alpha =  r  +  c     accurately.
+//
+//  The reduction method is described in a previous write up.
+//  The argument reduction scheme identifies 4 cases. For Cases 2
+//  and 4, because |alpha| is small, sin(r+c) and cos(r+c) can be
+//  computed very easily by 2 or 3 terms of the Taylor series
+//  expansion as follows:
+//
+//  Case 2:
+//  -------
+//
+//  sin(r + c) = r + c - r^3/6	accurately
+//  cos(r + c) = 1 - 2^(-67)	accurately
+//
+//  Case 4:
+//  -------
+//
+//  sin(r + c) = r + c - r^3/6 + r^5/120	accurately
+//  cos(r + c) = 1 - r^2/2 + r^4/24		accurately
+//
+//  The only cases left are Cases 1 and 3 of the argument reduction
+//  procedure. These two cases will be merged since after the
+//  argument is reduced in either cases, we have the reduced argument
+//  represented as r + c and that the magnitude |r + c| is not small
+//  enough to allow the usage of a very short approximation.
+//
+//  The required calculation is either
+//
+//  sin(r + c)  =  sin(r)  +  correction,  or
+//  cos(r + c)  =  cos(r)  +  correction.
+//
+//  Specifically,
+//
+//	sin(r + c) = sin(r) + c sin'(r) + O(c^2)
+//		   = sin(r) + c cos (r) + O(c^2)
+//		   = sin(r) + c(1 - r^2/2)  accurately.
+//  Similarly,
+//
+//	cos(r + c) = cos(r) - c sin(r) + O(c^2)
+//		   = cos(r) - c(r - r^3/6)  accurately.
+//
+//  We therefore concentrate on accurately calculating sin(r) and
+//  cos(r) for a working-precision number r, |r| <= pi/4 to within
+//  0.1% or so.
+//
+//  The greatest challenge of this task is that the second terms of
+//  the Taylor series
+//
+//	r - r^3/3! + r^r/5! - ...
+//
+//  and
+//
+//	1 - r^2/2! + r^4/4! - ...
+//
+//  are not very small when |r| is close to pi/4 and the rounding
+//  errors will be a concern if simple polynomial accumulation is
+//  used. When |r| < 2^-3, however, the second terms will be small
+//  enough (6 bits or so of right shift) that a normal Horner
+//  recurrence suffices. Hence there are two cases that we consider
+//  in the accurate computation of sin(r) and cos(r), |r| <= pi/4.
+//
+//  Case small_r: |r| < 2^(-3)
+//  --------------------------
+//
+//  Since Arg = M pi/4 + r + c accurately, and M mod 4 is [i_0 i_1],
+//  we have
+//
+//	sin(Arg) = (-1)^i_0 * sin(r + c)	if i_1 = 0
+//		 = (-1)^i_0 * cos(r + c) 	if i_1 = 1
+//
+//  can be accurately approximated by
+//
+//  sin(Arg) = (-1)^i_0 * [sin(r) + c]	if i_1 = 0
+//           = (-1)^i_0 * [cos(r) - c*r] if i_1 = 1
+//
+//  because |r| is small and thus the second terms in the correction
+//  are unneccessary.
+//
+//  Finally, sin(r) and cos(r) are approximated by polynomials of
+//  moderate lengths.
+//
+//  sin(r) =  r + S_1 r^3 + S_2 r^5 + ... + S_5 r^11
+//  cos(r) =  1 + C_1 r^2 + C_2 r^4 + ... + C_5 r^10
+//
+//  We can make use of predicates to selectively calculate
+//  sin(r) or cos(r) based on i_1.
+//
+//  Case normal_r: 2^(-3) <= |r| <= pi/4
+//  ------------------------------------
+//
+//  This case is more likely than the previous one if one considers
+//  r to be uniformly distributed in [-pi/4 pi/4]. Again,
+//
+//  sin(Arg) = (-1)^i_0 * sin(r + c)	if i_1 = 0
+//           = (-1)^i_0 * cos(r + c) 	if i_1 = 1.
+//
+//  Because |r| is now larger, we need one extra term in the
+//  correction. sin(Arg) can be accurately approximated by
+//
+//  sin(Arg) = (-1)^i_0 * [sin(r) + c(1-r^2/2)]      if i_1 = 0
+//           = (-1)^i_0 * [cos(r) - c*r*(1 - r^2/6)]    i_1 = 1.
+//
+//  Finally, sin(r) and cos(r) are approximated by polynomials of
+//  moderate lengths.
+//
+//	sin(r) =  r + PP_1_hi r^3 + PP_1_lo r^3 +
+//	              PP_2 r^5 + ... + PP_8 r^17
+//
+//	cos(r) =  1 + QQ_1 r^2 + QQ_2 r^4 + ... + QQ_8 r^16
+//
+//  where PP_1_hi is only about 16 bits long and QQ_1 is -1/2.
+//  The crux in accurate computation is to calculate
+//
+//  r + PP_1_hi r^3   or  1 + QQ_1 r^2
+//
+//  accurately as two pieces: U_hi and U_lo. The way to achieve this
+//  is to obtain r_hi as a 10 sig. bit number that approximates r to
+//  roughly 8 bits or so of accuracy. (One convenient way is
+//
+//  r_hi := frcpa( frcpa( r ) ).)
+//
+//  This way,
+//
+//	r + PP_1_hi r^3 =  r + PP_1_hi r_hi^3 +
+//	                        PP_1_hi (r^3 - r_hi^3)
+//		        =  [r + PP_1_hi r_hi^3]  +
+//			   [PP_1_hi (r - r_hi)
+//			      (r^2 + r_hi r + r_hi^2) ]
+//		        =  U_hi  +  U_lo
+//
+//  Since r_hi is only 10 bit long and PP_1_hi is only 16 bit long,
+//  PP_1_hi * r_hi^3 is only at most 46 bit long and thus computed
+//  exactly. Furthermore, r and PP_1_hi r_hi^3 are of opposite sign
+//  and that there is no more than 8 bit shift off between r and
+//  PP_1_hi * r_hi^3. Hence the sum, U_hi, is representable and thus
+//  calculated without any error. Finally, the fact that
+//
+//	|U_lo| <= 2^(-8) |U_hi|
+//
+//  says that U_hi + U_lo is approximating r + PP_1_hi r^3 to roughly
+//  8 extra bits of accuracy.
+//
+//  Similarly,
+//
+//	1 + QQ_1 r^2  =  [1 + QQ_1 r_hi^2]  +
+//	                    [QQ_1 (r - r_hi)(r + r_hi)]
+//		      =  U_hi  +  U_lo.
+//
+//  Summarizing, we calculate r_hi = frcpa( frcpa( r ) ).
+//
+//  If i_1 = 0, then
+//
+//    U_hi := r + PP_1_hi * r_hi^3
+//    U_lo := PP_1_hi * (r - r_hi) * (r^2 + r*r_hi + r_hi^2)
+//    poly := PP_1_lo r^3 + PP_2 r^5 + ... + PP_8 r^17
+//    correction := c * ( 1 + C_1 r^2 )
+//
+//  Else ...i_1 = 1
+//
+//    U_hi := 1 + QQ_1 * r_hi * r_hi
+//    U_lo := QQ_1 * (r - r_hi) * (r + r_hi)
+//    poly := QQ_2 * r^4 + QQ_3 * r^6 + ... + QQ_8 r^16
+//    correction := -c * r * (1 + S_1 * r^2)
+//
+//  End
+//
+//  Finally,
+//
+//	V := poly + ( U_lo + correction )
+//
+//                 /    U_hi  +  V         if i_0 = 0
+//	result := |
+//                 \  (-U_hi) -  V         if i_0 = 1
+//
+//  It is important that in the last step, negation of U_hi is
+//  performed prior to the subtraction which is to be performed in
+//  the user-set rounding mode.
+//
+//
+//  Algorithmic Description
+//  =======================
+//
+//  The argument reduction algorithm is tightly integrated into FSIN
+//  and FCOS which share the same code. The following is complete and
+//  self-contained. The argument reduction description given
+//  previously is repeated below.
+//
+//
+//  Step 0. Initialization.
+//
+//   If FSIN is invoked, set N_inc := 0; else if FCOS is invoked,
+//   set N_inc := 1.
+//
+//  Step 1. Check for exceptional and special cases.
+//
+//   * If Arg is +-0, +-inf, NaN, NaT, go to Step 10 for special
+//     handling.
+//   * If |Arg| < 2^24, go to Step 2 for reduction of moderate
+//     arguments. This is the most likely case.
+//   * If |Arg| < 2^63, go to Step 8 for pre-reduction of large
+//     arguments.
+//   * If |Arg| >= 2^63, go to Step 10 for special handling.
+//
+//  Step 2. Reduction of moderate arguments.
+//
+//  If |Arg| < pi/4 	...quick branch
+//     N_fix := N_inc	(integer)
+//     r     := Arg
+//     c     := 0.0
+//     Branch to Step 4, Case_1_complete
+//  Else 		...cf. argument reduction
+//     N     := Arg * two_by_PI	(fp)
+//     N_fix := fcvt.fx( N )	(int)
+//     N     := fcvt.xf( N_fix )
+//     N_fix := N_fix + N_inc
+//     s     := Arg - N * P_1	(first piece of pi/2)
+//     w     := -N * P_2	(second piece of pi/2)
+//
+//     If |s| >= 2^(-33)
+//        go to Step 3, Case_1_reduce
+//     Else
+//        go to Step 7, Case_2_reduce
+//     Endif
+//  Endif
+//
+//  Step 3. Case_1_reduce.
+//
+//  r := s + w
+//  c := (s - r) + w	...observe order
+//
+//  Step 4. Case_1_complete
+//
+//  ...At this point, the reduced argument alpha is
+//  ...accurately represented as r + c.
+//  If |r| < 2^(-3), go to Step 6, small_r.
+//
+//  Step 5. Normal_r.
+//
+//  Let [i_0 i_1] by the 2 lsb of N_fix.
+//  FR_rsq  := r * r
+//  r_hi := frcpa( frcpa( r ) )
+//  r_lo := r - r_hi
+//
+//  If i_1 = 0, then
+//    poly := r*FR_rsq*(PP_1_lo + FR_rsq*(PP_2 + ... FR_rsq*PP_8))
+//    U_hi := r + PP_1_hi*r_hi*r_hi*r_hi	...any order
+//    U_lo := PP_1_hi*r_lo*(r*r + r*r_hi + r_hi*r_hi)
+//    correction := c + c*C_1*FR_rsq		...any order
+//  Else
+//    poly := FR_rsq*FR_rsq*(QQ_2 + FR_rsq*(QQ_3 + ... + FR_rsq*QQ_8))
+//    U_hi := 1 + QQ_1 * r_hi * r_hi		...any order
+//    U_lo := QQ_1 * r_lo * (r + r_hi)
+//    correction := -c*(r + S_1*FR_rsq*r)	...any order
+//  Endif
+//
+//  V := poly + (U_lo + correction)	...observe order
+//
+//  result := (i_0 == 0?   1.0 : -1.0)
+//
+//  Last instruction in user-set rounding mode
+//
+//  result := (i_0 == 0?   result*U_hi + V :
+//                        result*U_hi - V)
+//
+//  Return
+//
+//  Step 6. Small_r.
+//
+//  ...Use flush to zero mode without causing exception
+//    Let [i_0 i_1] be the two lsb of N_fix.
+//
+//  FR_rsq := r * r
+//
+//  If i_1 = 0 then
+//     z := FR_rsq*FR_rsq; z := FR_rsq*z *r
+//     poly_lo := S_3 + FR_rsq*(S_4 + FR_rsq*S_5)
+//     poly_hi := r*FR_rsq*(S_1 + FR_rsq*S_2)
+//     correction := c
+//     result := r
+//  Else
+//     z := FR_rsq*FR_rsq; z := FR_rsq*z
+//     poly_lo := C_3 + FR_rsq*(C_4 + FR_rsq*C_5)
+//     poly_hi := FR_rsq*(C_1 + FR_rsq*C_2)
+//     correction := -c*r
+//     result := 1
+//  Endif
+//
+//  poly := poly_hi + (z * poly_lo + correction)
+//
+//  If i_0 = 1, result := -result
+//
+//  Last operation. Perform in user-set rounding mode
+//
+//  result := (i_0 == 0?     result + poly :
+//                          result - poly )
+//  Return
+//
+//  Step 7. Case_2_reduce.
+//
+//  ...Refer to the write up for argument reduction for
+//  ...rationale. The reduction algorithm below is taken from
+//  ...argument reduction description and integrated this.
+//
+//  w := N*P_3
+//  U_1 := N*P_2 + w		...FMA
+//  U_2 := (N*P_2 - U_1) + w	...2 FMA
+//  ...U_1 + U_2 is  N*(P_2+P_3) accurately
+//
+//  r := s - U_1
+//  c := ( (s - r) - U_1 ) - U_2
+//
+//  ...The mathematical sum r + c approximates the reduced
+//  ...argument accurately. Note that although compared to
+//  ...Case 1, this case requires much more work to reduce
+//  ...the argument, the subsequent calculation needed for
+//  ...any of the trigonometric function is very little because
+//  ...|alpha| < 1.01*2^(-33) and thus two terms of the
+//  ...Taylor series expansion suffices.
+//
+//  If i_1 = 0 then
+//     poly := c + S_1 * r * r * r	...any order
+//     result := r
+//  Else
+//     poly := -2^(-67)
+//     result := 1.0
+//  Endif
+//
+//  If i_0 = 1, result := -result
+//
+//  Last operation. Perform in user-set rounding mode
+//
+//  result := (i_0 == 0?     result + poly :
+//                           result - poly )
+//
+//  Return
+//
+//
+//  Step 8. Pre-reduction of large arguments.
+//
+//  ...Again, the following reduction procedure was described
+//  ...in the separate write up for argument reduction, which
+//  ...is tightly integrated here.
+
+//  N_0 := Arg * Inv_P_0
+//  N_0_fix := fcvt.fx( N_0 )
+//  N_0 := fcvt.xf( N_0_fix)
+
+//  Arg' := Arg - N_0 * P_0
+//  w := N_0 * d_1
+//  N := Arg' * two_by_PI
+//  N_fix := fcvt.fx( N )
+//  N := fcvt.xf( N_fix )
+//  N_fix := N_fix + N_inc
+//
+//  s := Arg' - N * P_1
+//  w := w - N * P_2
+//
+//  If |s| >= 2^(-14)
+//     go to Step 3
+//  Else
+//     go to Step 9
+//  Endif
+//
+//  Step 9. Case_4_reduce.
+//
+//    ...first obtain N_0*d_1 and -N*P_2 accurately
+//   U_hi := N_0 * d_1		V_hi := -N*P_2
+//   U_lo := N_0 * d_1 - U_hi	V_lo := -N*P_2 - U_hi	...FMAs
+//
+//   ...compute the contribution from N_0*d_1 and -N*P_3
+//   w := -N*P_3
+//   w := w + N_0*d_2
+//   t := U_lo + V_lo + w		...any order
+//
+//   ...at this point, the mathematical value
+//   ...s + U_hi + V_hi  + t approximates the true reduced argument
+//   ...accurately. Just need to compute this accurately.
+//
+//   ...Calculate U_hi + V_hi accurately:
+//   A := U_hi + V_hi
+//   if |U_hi| >= |V_hi| then
+//      a := (U_hi - A) + V_hi
+//   else
+//      a := (V_hi - A) + U_hi
+//   endif
+//   ...order in computing "a" must be observed. This branch is
+//   ...best implemented by predicates.
+//   ...A + a  is U_hi + V_hi accurately. Moreover, "a" is
+//   ...much smaller than A: |a| <= (1/2)ulp(A).
+//
+//   ...Just need to calculate   s + A + a + t
+//   C_hi := s + A		t := t + a
+//   C_lo := (s - C_hi) + A
+//   C_lo := C_lo + t
+//
+//   ...Final steps for reduction
+//   r := C_hi + C_lo
+//   c := (C_hi - r) + C_lo
+//
+//   ...At this point, we have r and c
+//   ...And all we need is a couple of terms of the corresponding
+//   ...Taylor series.
+//
+//   If i_1 = 0
+//      poly := c + r*FR_rsq*(S_1 + FR_rsq*S_2)
+//      result := r
+//   Else
+//      poly := FR_rsq*(C_1 + FR_rsq*C_2)
+//      result := 1
+//   Endif
+//
+//   If i_0 = 1, result := -result
+//
+//   Last operation. Perform in user-set rounding mode
+//
+//   result := (i_0 == 0?     result + poly :
+//                            result - poly )
+//   Return
+//
+//   Large Arguments: For arguments above 2**63, a Payne-Hanek
+//   style argument reduction is used and pi_by_2 reduce is called.
+//
+
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+.align 64
+
+FSINCOS_CONSTANTS:
+ASM_TYPE_DIRECTIVE(FSINCOS_CONSTANTS,@object)
+data4 0x4B800000, 0xCB800000, 0x00000000,0x00000000 // two**24, -two**24
+data4 0x4E44152A, 0xA2F9836E, 0x00003FFE,0x00000000 // Inv_pi_by_2
+data4 0xCE81B9F1, 0xC84D32B0, 0x00004016,0x00000000 // P_0
+data4 0x2168C235, 0xC90FDAA2, 0x00003FFF,0x00000000 // P_1
+data4 0xFC8F8CBB, 0xECE675D1, 0x0000BFBD,0x00000000 // P_2
+data4 0xACC19C60, 0xB7ED8FBB, 0x0000BF7C,0x00000000 // P_3
+data4 0x5F000000, 0xDF000000, 0x00000000,0x00000000 // two_to_63, -two_to_63
+data4 0x6EC6B45A, 0xA397E504, 0x00003FE7,0x00000000 // Inv_P_0
+data4 0xDBD171A1, 0x8D848E89, 0x0000BFBF,0x00000000 // d_1
+data4 0x18A66F8E, 0xD5394C36, 0x0000BF7C,0x00000000 // d_2
+data4 0x2168C234, 0xC90FDAA2, 0x00003FFE,0x00000000 // pi_by_4
+data4 0x2168C234, 0xC90FDAA2, 0x0000BFFE,0x00000000 // neg_pi_by_4
+data4 0x3E000000, 0xBE000000, 0x00000000,0x00000000 // two**-3, -two**-3
+data4 0x2F000000, 0xAF000000, 0x9E000000,0x00000000 // two**-33, -two**-33, -two**-67
+data4 0xA21C0BC9, 0xCC8ABEBC, 0x00003FCE,0x00000000 // PP_8
+data4 0x720221DA, 0xD7468A05, 0x0000BFD6,0x00000000 // PP_7
+data4 0x640AD517, 0xB092382F, 0x00003FDE,0x00000000 // PP_6
+data4 0xD1EB75A4, 0xD7322B47, 0x0000BFE5,0x00000000 // PP_5
+data4 0xFFFFFFFE, 0xFFFFFFFF, 0x0000BFFD,0x00000000 // C_1
+data4 0x00000000, 0xAAAA0000, 0x0000BFFC,0x00000000 // PP_1_hi
+data4 0xBAF69EEA, 0xB8EF1D2A, 0x00003FEC,0x00000000 // PP_4
+data4 0x0D03BB69, 0xD00D00D0, 0x0000BFF2,0x00000000 // PP_3
+data4 0x88888962, 0x88888888, 0x00003FF8,0x00000000 // PP_2
+data4 0xAAAB0000, 0xAAAAAAAA, 0x0000BFEC,0x00000000 // PP_1_lo
+data4 0xC2B0FE52, 0xD56232EF, 0x00003FD2,0x00000000 // QQ_8
+data4 0x2B48DCA6, 0xC9C99ABA, 0x0000BFDA,0x00000000 // QQ_7
+data4 0x9C716658, 0x8F76C650, 0x00003FE2,0x00000000 // QQ_6
+data4 0xFDA8D0FC, 0x93F27DBA, 0x0000BFE9,0x00000000 // QQ_5
+data4 0xAAAAAAAA, 0xAAAAAAAA, 0x0000BFFC,0x00000000 // S_1
+data4 0x00000000, 0x80000000, 0x0000BFFE,0x00000000 // QQ_1
+data4 0x0C6E5041, 0xD00D00D0, 0x00003FEF,0x00000000 // QQ_4
+data4 0x0B607F60, 0xB60B60B6, 0x0000BFF5,0x00000000 // QQ_3
+data4 0xAAAAAA9B, 0xAAAAAAAA, 0x00003FFA,0x00000000 // QQ_2
+data4 0xFFFFFFFE, 0xFFFFFFFF, 0x0000BFFD,0x00000000 // C_1
+data4 0xAAAA719F, 0xAAAAAAAA, 0x00003FFA,0x00000000 // C_2
+data4 0x0356F994, 0xB60B60B6, 0x0000BFF5,0x00000000 // C_3
+data4 0xB2385EA9, 0xD00CFFD5, 0x00003FEF,0x00000000 // C_4
+data4 0x292A14CD, 0x93E4BD18, 0x0000BFE9,0x00000000 // C_5
+data4 0xAAAAAAAA, 0xAAAAAAAA, 0x0000BFFC,0x00000000 // S_1
+data4 0x888868DB, 0x88888888, 0x00003FF8,0x00000000 // S_2
+data4 0x055EFD4B, 0xD00D00D0, 0x0000BFF2,0x00000000 // S_3
+data4 0x839730B9, 0xB8EF1C5D, 0x00003FEC,0x00000000 // S_4
+data4 0xE5B3F492, 0xD71EA3A4, 0x0000BFE5,0x00000000 // S_5
+data4 0x38800000, 0xB8800000, 0x00000000            // two**-14, -two**-14
+ASM_SIZE_DIRECTIVE(FSINCOS_CONSTANTS)
+
+FR_Input_X        = f8
+FR_Neg_Two_to_M3  = f32
+FR_Two_to_63      = f32
+FR_Two_to_24      = f33
+FR_Pi_by_4        = f33
+FR_Two_to_M14     = f34
+FR_Two_to_M33     = f35
+FR_Neg_Two_to_24  = f36
+FR_Neg_Pi_by_4    = f36
+FR_Neg_Two_to_M14 = f37
+FR_Neg_Two_to_M33 = f38
+FR_Neg_Two_to_M67 = f39
+FR_Inv_pi_by_2    = f40
+FR_N_float        = f41
+FR_N_fix          = f42
+FR_P_1            = f43
+FR_P_2            = f44
+FR_P_3            = f45
+FR_s              = f46
+FR_w              = f47
+FR_c              = f48
+FR_r              = f49
+FR_Z              = f50
+FR_A              = f51
+FR_a              = f52
+FR_t              = f53
+FR_U_1            = f54
+FR_U_2            = f55
+FR_C_1            = f56
+FR_C_2            = f57
+FR_C_3            = f58
+FR_C_4            = f59
+FR_C_5            = f60
+FR_S_1            = f61
+FR_S_2            = f62
+FR_S_3            = f63
+FR_S_4            = f64
+FR_S_5            = f65
+FR_poly_hi        = f66
+FR_poly_lo        = f67
+FR_r_hi           = f68
+FR_r_lo           = f69
+FR_rsq            = f70
+FR_r_cubed        = f71
+FR_C_hi           = f72
+FR_N_0            = f73
+FR_d_1            = f74
+FR_V              = f75
+FR_V_hi           = f75
+FR_V_lo           = f76
+FR_U_hi           = f77
+FR_U_lo           = f78
+FR_U_hiabs        = f79
+FR_V_hiabs        = f80
+FR_PP_8           = f81
+FR_QQ_8           = f81
+FR_PP_7           = f82
+FR_QQ_7           = f82
+FR_PP_6           = f83
+FR_QQ_6           = f83
+FR_PP_5           = f84
+FR_QQ_5           = f84
+FR_PP_4           = f85
+FR_QQ_4           = f85
+FR_PP_3           = f86
+FR_QQ_3           = f86
+FR_PP_2           = f87
+FR_QQ_2           = f87
+FR_QQ_1           = f88
+FR_N_0_fix        = f89
+FR_Inv_P_0        = f90
+FR_corr           = f91
+FR_poly           = f92
+FR_d_2            = f93
+FR_Two_to_M3      = f94
+FR_Neg_Two_to_63  = f94
+FR_P_0            = f95
+FR_C_lo           = f96
+FR_PP_1           = f97
+FR_PP_1_lo        = f98
+FR_ArgPrime       = f99
+
+GR_Table_Base  = r32
+GR_Table_Base1 = r33
+GR_i_0         = r34
+GR_i_1         = r35
+GR_N_Inc       = r36
+GR_Sin_or_Cos  = r37
+
+GR_SAVE_B0     = r39
+GR_SAVE_GP     = r40
+GR_SAVE_PFS    = r41
+
+.section .text
+.proc __libm_sin_double_dbx#
+.align 64
+__libm_sin_double_dbx:
+
+{ .mlx
+alloc GR_Table_Base = ar.pfs,0,12,2,0
+       movl GR_Sin_or_Cos = 0x0 ;;
+}
+
+{ .mmi
+      nop.m 999
+      addl           GR_Table_Base   = @ltoff(FSINCOS_CONSTANTS#), gp
+      nop.i 999
+}
+;;
+
+{ .mmi
+      ld8 GR_Table_Base = [GR_Table_Base]
+      nop.m 999
+      nop.i 999
+}
+;;
+
+
+{ .mib
+      nop.m 999
+      nop.i 999
+       br.cond.sptk L(SINCOS_CONTINUE) ;;
+}
+
+.endp __libm_sin_double_dbx#
+ASM_SIZE_DIRECTIVE(__libm_sin_double_dbx)
+
+.section .text
+.proc __libm_cos_double_dbx#
+__libm_cos_double_dbx:
+
+{ .mlx
+alloc GR_Table_Base= ar.pfs,0,12,2,0
+       movl GR_Sin_or_Cos = 0x1 ;;
+}
+
+{ .mmi
+      nop.m 999
+      addl           GR_Table_Base   = @ltoff(FSINCOS_CONSTANTS#), gp
+      nop.i 999
+}
+;;
+
+{ .mmi
+      ld8 GR_Table_Base = [GR_Table_Base]
+      nop.m 999
+      nop.i 999
+}
+;;
+
+//
+//     Load Table Address
+//
+L(SINCOS_CONTINUE):
+
+{ .mmi
+       add GR_Table_Base1 = 96, GR_Table_Base
+       ldfs	FR_Two_to_24 = [GR_Table_Base], 4
+       nop.i 999
+}
+;;
+
+{ .mmi
+      nop.m 999
+//
+//     Load 2**24, load 2**63.
+//
+       ldfs	FR_Neg_Two_to_24 = [GR_Table_Base], 12
+       mov   r41 = ar.pfs ;;
+}
+
+{ .mfi
+       ldfs	FR_Two_to_63 = [GR_Table_Base1], 4
+//
+//     Check for unnormals - unsupported operands. We do not want
+//     to generate denormal exception
+//     Check for NatVals, QNaNs, SNaNs, +/-Infs
+//     Check for EM unsupporteds
+//     Check for Zero
+//
+       fclass.m.unc  p6, p8 =  FR_Input_X, 0x1E3
+       mov   r40 = gp ;;
+}
+
+{ .mfi
+      nop.m 999
+       fclass.nm.unc p8, p0 =  FR_Input_X, 0x1FF
+// GR_Sin_or_Cos denotes
+       mov   r39 = b0
+}
 
-// Final calculation
-// result = C[m]*P + Q
 { .mfb
-      nop.m         999
-      fma.d.s0      f8                  = sincos_Cm, sincos_P, sincos_Q
-      br.ret.sptk   b0  // Exit for common path
-};;
+       ldfs	FR_Neg_Two_to_63 = [GR_Table_Base1], 12
+       fclass.m.unc p10, p0 = FR_Input_X, 0x007
+(p6)   br.cond.spnt L(SINCOS_SPECIAL) ;;
+}
+
+{ .mib
+      nop.m 999
+      nop.i 999
+(p8)   br.cond.spnt L(SINCOS_SPECIAL) ;;
+}
+
+{ .mib
+      nop.m 999
+      nop.i 999
+//
+//     Branch if +/- NaN, Inf.
+//     Load -2**24, load -2**63.
+//
+(p10)  br.cond.spnt L(SINCOS_ZERO) ;;
+}
+
+{ .mmb
+       ldfe	FR_Inv_pi_by_2 = [GR_Table_Base], 16
+       ldfe	FR_Inv_P_0 = [GR_Table_Base1], 16
+      nop.b 999 ;;
+}
+
+{ .mmb
+      nop.m 999
+       ldfe		FR_d_1 = [GR_Table_Base1], 16
+      nop.b 999 ;;
+}
+//
+//     Raise possible denormal operand flag with useful fcmp
+//     Is x <= -2**63
+//     Load Inv_P_0 for pre-reduction
+//     Load Inv_pi_by_2
+//
+
+{ .mmb
+       ldfe		FR_P_0 = [GR_Table_Base], 16
+       ldfe	FR_d_2 = [GR_Table_Base1], 16
+      nop.b 999 ;;
+}
+//
+//     Load P_0
+//     Load d_1
+//     Is x >= 2**63
+//     Is x <= -2**24?
+//
+
+{ .mmi
+       ldfe	FR_P_1 = [GR_Table_Base], 16 ;;
+//
+//     Load P_1
+//     Load d_2
+//     Is x >= 2**24?
+//
+       ldfe	FR_P_2 = [GR_Table_Base], 16
+      nop.i 999 ;;
+}
+
+{ .mmf
+      nop.m 999
+       ldfe	FR_P_3 = [GR_Table_Base], 16
+       fcmp.le.unc.s1	p7, p8 = FR_Input_X, FR_Neg_Two_to_24
+}
+
+{ .mfi
+      nop.m 999
+//
+//     Branch if +/- zero.
+//     Decide about the paths to take:
+//     If -2**24 < FR_Input_X < 2**24 - CASE 1 OR 2
+//     OTHERWISE - CASE 3 OR 4
+//
+       fcmp.le.unc.s0	p10, p11 = FR_Input_X, FR_Neg_Two_to_63
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+(p8)   fcmp.ge.s1 p7, p0 = FR_Input_X, FR_Two_to_24
+      nop.i 999
+}
+
+{ .mfi
+       ldfe	FR_Pi_by_4 = [GR_Table_Base1], 16
+(p11)  fcmp.ge.s1	p10, p0 = FR_Input_X, FR_Two_to_63
+      nop.i 999 ;;
+}
+
+{ .mmi
+       ldfe	FR_Neg_Pi_by_4 = [GR_Table_Base1], 16 ;;
+       ldfs	FR_Two_to_M3 = [GR_Table_Base1], 4
+      nop.i 999 ;;
+}
+
+{ .mib
+       ldfs	FR_Neg_Two_to_M3 = [GR_Table_Base1], 12
+      nop.i 999
+//
+//     Load P_2
+//     Load P_3
+//     Load pi_by_4
+//     Load neg_pi_by_4
+//     Load 2**(-3)
+//     Load -2**(-3).
+//
+(p10)  br.cond.spnt L(SINCOS_ARG_TOO_LARGE) ;;
+}
+
+{ .mib
+      nop.m 999
+      nop.i 999
+//
+//     Branch out if x >= 2**63. Use Payne-Hanek Reduction
+//
+(p7)   br.cond.spnt L(SINCOS_LARGER_ARG) ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//     Branch if Arg <= -2**24 or Arg >= 2**24 and use pre-reduction.
+//
+       fma.s1	FR_N_float = FR_Input_X, FR_Inv_pi_by_2, f0
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+       fcmp.lt.unc.s1	p6, p7 = FR_Input_X, FR_Pi_by_4
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//     Select the case when |Arg| < pi/4
+//     Else Select the case when |Arg| >= pi/4
+//
+       fcvt.fx.s1 FR_N_fix = FR_N_float
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//     N  = Arg * 2/pi
+//     Check if Arg < pi/4
+//
+(p6)   fcmp.gt.s1 p6, p7 = FR_Input_X, FR_Neg_Pi_by_4
+      nop.i 999 ;;
+}
+//
+//     Case 2: Convert integer N_fix back to normalized floating-point value.
+//     Case 1: p8 is only affected  when p6 is set
+//
+
+{ .mfi
+(p7)   ldfs FR_Two_to_M33 = [GR_Table_Base1], 4
+//
+//     Grab the integer part of N and call it N_fix
+//
+(p6)   fmerge.se FR_r = FR_Input_X, FR_Input_X
+//     If |x| < pi/4, r = x and c = 0
+//     lf |x| < pi/4, is x < 2**(-3).
+//     r = Arg
+//     c = 0
+(p6)   mov GR_N_Inc = GR_Sin_or_Cos ;;
+}
+
+{ .mmf
+      nop.m 999
+(p7)   ldfs FR_Neg_Two_to_M33 = [GR_Table_Base1], 4
+(p6)   fmerge.se FR_c = f0, f0
+}
+
+{ .mfi
+      nop.m 999
+(p6)   fcmp.lt.unc.s1	p8, p9 = FR_Input_X, FR_Two_to_M3
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//     lf |x| < pi/4, is -2**(-3)< x < 2**(-3) - set p8.
+//     If |x| >= pi/4,
+//     Create the right N for |x| < pi/4 and otherwise
+//     Case 2: Place integer part of N in GP register
+//
+(p7)   fcvt.xf FR_N_float = FR_N_fix
+      nop.i 999 ;;
+}
+
+{ .mmf
+      nop.m 999
+(p7)   getf.sig	GR_N_Inc = FR_N_fix
+(p8)   fcmp.gt.s1 p8, p0 = FR_Input_X, FR_Neg_Two_to_M3 ;;
+}
+
+{ .mib
+      nop.m 999
+      nop.i 999
+//
+//     Load 2**(-33), -2**(-33)
+//
+(p8)   br.cond.spnt L(SINCOS_SMALL_R) ;;
+}
+
+{ .mib
+      nop.m 999
+      nop.i 999
+(p6)   br.cond.sptk L(SINCOS_NORMAL_R) ;;
+}
+//
+//     if |x| < pi/4, branch based on |x| < 2**(-3) or otherwise.
+//
+//
+//     In this branch, |x| >= pi/4.
+//
 
-////////// x = 0/Inf/NaN path //////////////////
-_SINCOS_SPECIAL_ARGS:
-.pred.rel "mutex",p8,p9
-// sin(+/-0) = +/-0
-// sin(Inf)  = NaN
-// sin(NaN)  = NaN
 { .mfi
-      nop.m         999
-(p8)  fma.d.s0      f8                  = f8, f0, f0 // sin(+/-0,NaN,Inf)
-      nop.i         999
+       ldfs FR_Neg_Two_to_M67 = [GR_Table_Base1], 8
+//
+//     Load -2**(-67)
+//
+       fnma.s1	FR_s = FR_N_float, FR_P_1, FR_Input_X
+//
+//     w = N * P_2
+//     s = -N * P_1  + Arg
+//
+       add GR_N_Inc = GR_N_Inc, GR_Sin_or_Cos
+}
+
+{ .mfi
+      nop.m 999
+       fma.s1	FR_w = FR_N_float, FR_P_2, f0
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//     Adjust N_fix by N_inc to determine whether sine or
+//     cosine is being calculated
+//
+       fcmp.lt.unc.s1 p7, p6 = FR_s, FR_Two_to_M33
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+(p7)   fcmp.gt.s1 p7, p6 = FR_s, FR_Neg_Two_to_M33
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+//     Remember x >= pi/4.
+//     Is s <= -2**(-33) or s >= 2**(-33) (p6)
+//     or -2**(-33) < s < 2**(-33) (p7)
+(p6)   fms.s1 FR_r = FR_s, f1, FR_w
+      nop.i 999
+}
+
+{ .mfi
+      nop.m 999
+(p7)   fma.s1 FR_w = FR_N_float, FR_P_3, f0
+      nop.i 999 ;;
 }
-// cos(+/-0) = 1.0
-// cos(Inf)  = NaN
-// cos(NaN)  = NaN
+
+{ .mfi
+      nop.m 999
+(p7)   fma.s1 FR_U_1 = FR_N_float, FR_P_2, FR_w
+      nop.i 999
+}
+
+{ .mfi
+      nop.m 999
+(p6)   fms.s1 FR_c = FR_s, f1, FR_r
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//     For big s: r = s - w: No futher reduction is necessary
+//     For small s: w = N * P_3 (change sign) More reduction
+//
+(p6)   fcmp.lt.unc.s1 p8, p9 = FR_r, FR_Two_to_M3
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+(p8)   fcmp.gt.s1 p8, p9 = FR_r, FR_Neg_Two_to_M3
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+(p7)   fms.s1 FR_r = FR_s, f1, FR_U_1
+      nop.i 999
+}
+
 { .mfb
-      nop.m         999
-(p9)  fma.d.s0      f8                  = f8, f0, f1 // cos(+/-0,NaN,Inf)
-      br.ret.sptk   b0 // Exit for x = 0/Inf/NaN path
-};;
+      nop.m 999
+//
+//     For big s: Is |r| < 2**(-3)?
+//     For big s: c = S - r
+//     For small s: U_1 = N * P_2 + w
+//
+//     If p8 is set, prepare to branch to Small_R.
+//     If p9 is set, prepare to branch to Normal_R.
+//     For big s,  r is complete here.
+//
+(p6)   fms.s1 FR_c = FR_c, f1, FR_w
+//
+//     For big s: c = c + w (w has not been negated.)
+//     For small s: r = S - U_1
+//
+(p8)   br.cond.spnt	L(SINCOS_SMALL_R) ;;
+}
+
+{ .mib
+      nop.m 999
+      nop.i 999
+(p9)   br.cond.sptk	L(SINCOS_NORMAL_R) ;;
+}
+
+{ .mfi
+(p7)   add GR_Table_Base1 = 224, GR_Table_Base1
+//
+//     Branch to SINCOS_SMALL_R or SINCOS_NORMAL_R
+//
+(p7)   fms.s1 FR_U_2 = FR_N_float, FR_P_2, FR_U_1
+//
+//     c = S - U_1
+//     r = S_1 * r
+//
+//
+(p7)   extr.u	GR_i_1 = GR_N_Inc, 0, 1
+}
+
+{ .mmi
+      nop.m 999 ;;
+//
+//     Get [i_0,i_1] - two lsb of N_fix_gr.
+//     Do dummy fmpy so inexact is always set.
+//
+(p7)   cmp.eq.unc p9, p10 = 0x0, GR_i_1
+(p7)   extr.u	GR_i_0 = GR_N_Inc, 1, 1 ;;
+}
+//
+//     For small s: U_2 = N * P_2 - U_1
+//     S_1 stored constant - grab the one stored with the
+//     coefficients.
+//
+
+{ .mfi
+(p7)   ldfe FR_S_1 = [GR_Table_Base1], 16
+//
+//     Check if i_1 and i_0  != 0
+//
+(p10)  fma.s1	FR_poly = f0, f1, FR_Neg_Two_to_M67
+(p7)   cmp.eq.unc p11, p12 = 0x0, GR_i_0 ;;
+}
+
+{ .mfi
+      nop.m 999
+(p7)   fms.s1	FR_s = FR_s, f1, FR_r
+      nop.i 999
+}
+
+{ .mfi
+      nop.m 999
+//
+//     S = S - r
+//     U_2 = U_2 + w
+//     load S_1
+//
+(p7)   fma.s1	FR_rsq = FR_r, FR_r, f0
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+(p7)   fma.s1	FR_U_2 = FR_U_2, f1, FR_w
+      nop.i 999
+}
 
-GLOBAL_IEEE754_END(cos)
-//////////// x >= 2^27 - large arguments routine call ////////////
-LOCAL_LIBM_ENTRY(__libm_callout_sincos)
-_SINCOS_LARGE_ARGS:
-.prologue
 { .mfi
-      mov           sincos_GR_all_ones  = -1 // 0xffffffff
-      nop.f         999
-.save ar.pfs,GR_SAVE_PFS
-      mov           GR_SAVE_PFS         = ar.pfs
+      nop.m 999
+(p7)   fmerge.se FR_Input_X = FR_r, FR_r
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+(p10)  fma.s1 FR_Input_X = f0, f1, f1
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//     FR_rsq = r * r
+//     Save r as the result.
+//
+(p7)   fms.s1	FR_c = FR_s, f1, FR_U_1
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//     if ( i_1 ==0) poly = c + S_1*r*r*r
+//     else Result = 1
+//
+(p12)  fnma.s1 FR_Input_X = FR_Input_X, f1, f0
+      nop.i 999
+}
+
+{ .mfi
+      nop.m 999
+(p7)   fma.s1	FR_r = FR_S_1, FR_r, f0
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+(p7)   fma.d.s0	FR_S_1 = FR_S_1, FR_S_1, f0
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//     If i_1 != 0, poly = 2**(-67)
+//
+(p7)   fms.s1 FR_c = FR_c, f1, FR_U_2
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//     c = c - U_2
+//
+(p9)   fma.s1 FR_poly = FR_r, FR_rsq, FR_c
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//     i_0 != 0, so Result = -Result
+//
+(p11)  fma.d.s0 FR_Input_X = FR_Input_X, f1, FR_poly
+      nop.i 999 ;;
+}
+
+{ .mfb
+      nop.m 999
+(p12)  fms.d.s0 FR_Input_X = FR_Input_X, f1, FR_poly
+//
+//     if (i_0 == 0),  Result = Result + poly
+//     else            Result = Result - poly
+//
+       br.ret.sptk   b0 ;;
+}
+L(SINCOS_LARGER_ARG):
+
+{ .mfi
+      nop.m 999
+       fma.s1 FR_N_0 = FR_Input_X, FR_Inv_P_0, f0
+      nop.i 999
+}
+;;
+
+//     This path for argument > 2*24
+//     Adjust table_ptr1 to beginning of table.
+//
+
+{ .mmi
+      nop.m 999
+      addl           GR_Table_Base   = @ltoff(FSINCOS_CONSTANTS#), gp
+      nop.i 999
+}
+;;
+
+{ .mmi
+      ld8 GR_Table_Base = [GR_Table_Base]
+      nop.m 999
+      nop.i 999
 }
 ;;
 
+
+//
+//     Point to  2*-14
+//     N_0 = Arg * Inv_P_0
+//
+
+{ .mmi
+       add GR_Table_Base = 688, GR_Table_Base ;;
+       ldfs FR_Two_to_M14 = [GR_Table_Base], 4
+      nop.i 999 ;;
+}
+
 { .mfi
-      mov           GR_SAVE_GP          = gp
-      nop.f         999
-.save b0, GR_SAVE_B0
-      mov           GR_SAVE_B0          = b0
+       ldfs FR_Neg_Two_to_M14 = [GR_Table_Base], 0
+      nop.f 999
+      nop.i 999 ;;
 }
 
-.body
-{ .mbb
-      setf.sig      sincos_save_tmp     = sincos_GR_all_ones// inexact set
-      nop.b         999
-(p8)  br.call.sptk.many b0              = __libm_sin_large# // sin(large_X)
+{ .mfi
+      nop.m 999
+//
+//     Load values 2**(-14) and -2**(-14)
+//
+       fcvt.fx.s1 FR_N_0_fix = FR_N_0
+      nop.i 999 ;;
+}
 
-};;
+{ .mfi
+      nop.m 999
+//
+//     N_0_fix  = integer part of N_0
+//
+       fcvt.xf FR_N_0 = FR_N_0_fix
+      nop.i 999 ;;
+}
 
-{ .mbb
-      cmp.ne        p9,p0               = sincos_r_sincos, r0 // set p9 if cos
-      nop.b         999
-(p9)  br.call.sptk.many b0              = __libm_cos_large# // cos(large_X)
-};;
+{ .mfi
+      nop.m 999
+//
+//     Make N_0 the integer part
+//
+       fnma.s1 FR_ArgPrime = FR_N_0, FR_P_0, FR_Input_X
+      nop.i 999
+}
+
+{ .mfi
+      nop.m 999
+       fma.s1 FR_w = FR_N_0, FR_d_1, f0
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//     Arg' = -N_0 * P_0 + Arg
+//     w  = N_0 * d_1
+//
+       fma.s1 FR_N_float = FR_ArgPrime, FR_Inv_pi_by_2, f0
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//     N = A' * 2/pi
+//
+       fcvt.fx.s1 FR_N_fix = FR_N_float
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//     N_fix is the integer part
+//
+       fcvt.xf FR_N_float = FR_N_fix
+      nop.i 999 ;;
+}
+
+{ .mfi
+       getf.sig GR_N_Inc = FR_N_fix
+      nop.f 999
+      nop.i 999 ;;
+}
+
+{ .mii
+      nop.m 999
+      nop.i 999 ;;
+       add GR_N_Inc = GR_N_Inc, GR_Sin_or_Cos ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//     N is the integer part of the reduced-reduced argument.
+//     Put the integer in a GP register
+//
+       fnma.s1 FR_s = FR_N_float, FR_P_1, FR_ArgPrime
+      nop.i 999
+}
+
+{ .mfi
+      nop.m 999
+       fnma.s1 FR_w = FR_N_float, FR_P_2, FR_w
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//     s = -N*P_1 + Arg'
+//     w = -N*P_2 + w
+//     N_fix_gr = N_fix_gr + N_inc
+//
+       fcmp.lt.unc.s1 p9, p8 = FR_s, FR_Two_to_M14
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+(p9)   fcmp.gt.s1 p9, p8 = FR_s, FR_Neg_Two_to_M14
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//     For |s|  > 2**(-14) r = S + w (r complete)
+//     Else       U_hi = N_0 * d_1
+//
+(p9)   fma.s1 FR_V_hi = FR_N_float, FR_P_2, f0
+      nop.i 999
+}
+
+{ .mfi
+      nop.m 999
+(p9)   fma.s1 FR_U_hi = FR_N_0, FR_d_1, f0
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//     Either S <= -2**(-14) or S >= 2**(-14)
+//     or -2**(-14) < s < 2**(-14)
+//
+(p8)   fma.s1 FR_r = FR_s, f1, FR_w
+      nop.i 999
+}
+
+{ .mfi
+      nop.m 999
+(p9)   fma.s1 FR_w = FR_N_float, FR_P_3, f0
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//     We need abs of both U_hi and V_hi - don't
+//     worry about switched sign of V_hi.
+//
+(p9)   fms.s1 FR_A = FR_U_hi, f1, FR_V_hi
+      nop.i 999
+}
+
+{ .mfi
+      nop.m 999
+//
+//     Big s: finish up c = (S - r) + w (c complete)
+//     Case 4: A =  U_hi + V_hi
+//     Note: Worry about switched sign of V_hi, so subtract instead of add.
+//
+(p9)   fnma.s1 FR_V_lo = FR_N_float, FR_P_2, FR_V_hi
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+(p9)   fms.s1 FR_U_lo = FR_N_0, FR_d_1, FR_U_hi
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+(p9)   fmerge.s FR_V_hiabs = f0, FR_V_hi
+      nop.i 999
+}
+
+{ .mfi
+      nop.m 999
+//     For big s: c = S - r
+//     For small s do more work: U_lo = N_0 * d_1 - U_hi
+//
+(p9)   fmerge.s FR_U_hiabs = f0, FR_U_hi
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//     For big s: Is |r| < 2**(-3)
+//     For big s: if p12 set, prepare to branch to Small_R.
+//     For big s: If p13 set, prepare to branch to Normal_R.
+//
+(p8)   fms.s1 FR_c = FR_s, f1, FR_r
+      nop.i 999
+}
+
+{ .mfi
+      nop.m 999
+//
+//     For small S: V_hi = N * P_2
+//                  w = N * P_3
+//     Note the product does not include the (-) as in the writeup
+//     so (-) missing for V_hi and w.
+//
+(p8)   fcmp.lt.unc.s1 p12, p13 = FR_r, FR_Two_to_M3
+      nop.i 999 ;;
+}
 
 { .mfi
-      mov           gp                  = GR_SAVE_GP
-      fma.d.s0      f8                  = f8, f1, f0 // Round result to double
-      mov           b0                  = GR_SAVE_B0
+      nop.m 999
+(p12)  fcmp.gt.s1 p12, p13 = FR_r, FR_Neg_Two_to_M3
+      nop.i 999 ;;
 }
-// Force inexact set
+
 { .mfi
-      nop.m         999
-      fmpy.s0       sincos_save_tmp     = sincos_save_tmp, sincos_save_tmp
-      nop.i         999 
+      nop.m 999
+(p8)   fma.s1 FR_c = FR_c, f1, FR_w
+      nop.i 999
+}
+
+{ .mfb
+      nop.m 999
+(p9)   fms.s1 FR_w = FR_N_0, FR_d_2, FR_w
+(p12)  br.cond.spnt L(SINCOS_SMALL_R) ;;
+}
+
+{ .mib
+      nop.m 999
+      nop.i 999
+(p13)  br.cond.sptk L(SINCOS_NORMAL_R) ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//     Big s: Vector off when |r| < 2**(-3).  Recall that p8 will be true.
+//     The remaining stuff is for Case 4.
+//     Small s: V_lo = N * P_2 + U_hi (U_hi is in place of V_hi in writeup)
+//     Note: the (-) is still missing for V_lo.
+//     Small s: w = w + N_0 * d_2
+//     Note: the (-) is now incorporated in w.
+//
+(p9)   fcmp.ge.unc.s1 p10, p11 = FR_U_hiabs, FR_V_hiabs
+       extr.u	GR_i_1 = GR_N_Inc, 0, 1 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//     C_hi = S + A
+//
+(p9)   fma.s1 FR_t = FR_U_lo, f1, FR_V_lo
+       extr.u	GR_i_0 = GR_N_Inc, 1, 1 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//     t = U_lo + V_lo
+//
+//
+(p10)  fms.s1 FR_a = FR_U_hi, f1, FR_A
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+(p11)  fma.s1 FR_a = FR_V_hi, f1, FR_A
+      nop.i 999
+}
+;;
+
+{ .mmi
+      nop.m 999
+      addl           GR_Table_Base   = @ltoff(FSINCOS_CONSTANTS#), gp
+      nop.i 999
+}
+;;
+
+{ .mmi
+      ld8 GR_Table_Base = [GR_Table_Base]
+      nop.m 999
+      nop.i 999
+}
+;;
+
+
+{ .mfi
+       add GR_Table_Base = 528, GR_Table_Base
+//
+//     Is U_hiabs >= V_hiabs?
+//
+(p9)   fma.s1 FR_C_hi = FR_s, f1, FR_A
+      nop.i 999 ;;
+}
+
+{ .mmi
+       ldfe FR_C_1 = [GR_Table_Base], 16 ;;
+       ldfe FR_C_2 = [GR_Table_Base], 64
+      nop.i 999 ;;
+}
+
+{ .mmf
+      nop.m 999
+//
+//     c = c + C_lo  finished.
+//     Load  C_2
+//
+       ldfe	FR_S_1 = [GR_Table_Base], 16
+//
+//     C_lo = S - C_hi
+//
+       fma.s1 FR_t = FR_t, f1, FR_w ;;
+}
+//
+//     r and c have been computed.
+//     Make sure ftz mode is set - should be automatic when using wre
+//     |r| < 2**(-3)
+//     Get [i_0,i_1] - two lsb of N_fix.
+//     Load S_1
+//
+
+{ .mfi
+       ldfe FR_S_2 = [GR_Table_Base], 64
+//
+//     t = t + w
+//
+(p10)  fms.s1 FR_a = FR_a, f1, FR_V_hi
+       cmp.eq.unc p9, p10 = 0x0, GR_i_0
+}
+
+{ .mfi
+      nop.m 999
+//
+//     For larger u than v: a = U_hi - A
+//     Else a = V_hi - A (do an add to account for missing (-) on V_hi
+//
+       fms.s1 FR_C_lo = FR_s, f1, FR_C_hi
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+(p11)  fms.s1 FR_a = FR_U_hi, f1, FR_a
+       cmp.eq.unc p11, p12 = 0x0, GR_i_1
+}
+
+{ .mfi
+      nop.m 999
+//
+//     If u > v: a = (U_hi - A)  + V_hi
+//     Else      a = (V_hi - A)  + U_hi
+//     In each case account for negative missing from V_hi.
+//
+       fma.s1 FR_C_lo = FR_C_lo, f1, FR_A
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//     C_lo = (S - C_hi) + A
+//
+       fma.s1 FR_t = FR_t, f1, FR_a
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//     t = t + a
+//
+       fma.s1 FR_C_lo = FR_C_lo, f1, FR_t
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//     C_lo = C_lo + t
+//     Adjust Table_Base to beginning of table
+//
+       fma.s1 FR_r = FR_C_hi, f1, FR_C_lo
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//     Load S_2
+//
+       fma.s1 FR_rsq = FR_r, FR_r, f0
+      nop.i 999
+}
+
+{ .mfi
+      nop.m 999
+//
+//     Table_Base points to C_1
+//     r = C_hi + C_lo
+//
+       fms.s1 FR_c = FR_C_hi, f1, FR_r
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//     if i_1 ==0: poly = S_2 * FR_rsq + S_1
+//     else        poly = C_2 * FR_rsq + C_1
+//
+(p11)  fma.s1 FR_Input_X = f0, f1, FR_r
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+(p12)  fma.s1 FR_Input_X = f0, f1, f1
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//     Compute r_cube = FR_rsq * r
+//
+(p11)  fma.s1 FR_poly = FR_rsq, FR_S_2, FR_S_1
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+(p12)  fma.s1 FR_poly = FR_rsq, FR_C_2, FR_C_1
+      nop.i 999
+}
+
+{ .mfi
+      nop.m 999
+//
+//     Compute FR_rsq = r * r
+//     Is i_1 == 0 ?
+//
+       fma.s1 FR_r_cubed = FR_rsq, FR_r, f0
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//     c = C_hi - r
+//     Load  C_1
+//
+       fma.s1 FR_c = FR_c, f1, FR_C_lo
+      nop.i 999
+}
+
+{ .mfi
+      nop.m 999
+//
+//     if i_1 ==0: poly = r_cube * poly + c
+//     else        poly = FR_rsq * poly
+//
+(p10)  fms.s1 FR_Input_X = f0, f1, FR_Input_X
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//     if i_1 ==0: Result = r
+//     else        Result = 1.0
+//
+(p11)  fma.s1 FR_poly = FR_r_cubed, FR_poly, FR_c
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+(p12)  fma.s1 FR_poly = FR_rsq, FR_poly, f0
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//     if i_0 !=0: Result = -Result
+//
+(p9)   fma.d.s0 FR_Input_X = FR_Input_X, f1, FR_poly
+      nop.i 999 ;;
+}
+
+{ .mfb
+      nop.m 999
+(p10)  fms.d.s0 FR_Input_X = FR_Input_X, f1, FR_poly
+//
+//     if i_0 == 0: Result = Result + poly
+//     else         Result = Result - poly
+//
+       br.ret.sptk   b0 ;;
+}
+L(SINCOS_SMALL_R):
+
+{ .mii
+      nop.m 999
+      	extr.u	GR_i_1 = GR_N_Inc, 0, 1 ;;
+//
+//
+//      Compare both i_1 and i_0 with 0.
+//      if i_1 == 0, set p9.
+//      if i_0 == 0, set p11.
+//
+      	cmp.eq.unc p9, p10 = 0x0, GR_i_1 ;;
+}
+
+{ .mfi
+      nop.m 999
+      	fma.s1 FR_rsq = FR_r, FR_r, f0
+      	extr.u	GR_i_0 = GR_N_Inc, 1, 1 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+// 	Z = Z * FR_rsq
+//
+(p10)	fnma.s1	FR_c = FR_c, FR_r, f0
+      	cmp.eq.unc p11, p12 = 0x0, GR_i_0
+}
+;;
+
+// ******************************************************************
+// ******************************************************************
+// ******************************************************************
+//      r and c have been computed.
+//      We know whether this is the sine or cosine routine.
+//      Make sure ftz mode is set - should be automatic when using wre
+//      |r| < 2**(-3)
+//
+//      Set table_ptr1 to beginning of constant table.
+//      Get [i_0,i_1] - two lsb of N_fix_gr.
+//
+
+{ .mmi
+      nop.m 999
+      addl           GR_Table_Base   = @ltoff(FSINCOS_CONSTANTS#), gp
+      nop.i 999
+}
+;;
+
+{ .mmi
+      ld8 GR_Table_Base = [GR_Table_Base]
+      nop.m 999
+      nop.i 999
+}
+;;
+
+
+//
+//      Set table_ptr1 to point to S_5.
+//      Set table_ptr1 to point to C_5.
+//      Compute FR_rsq = r * r
+//
+
+{ .mfi
+(p9)  	add GR_Table_Base = 672, GR_Table_Base
+(p10)	fmerge.s FR_r = f1, f1
+(p10) 	add GR_Table_Base = 592, GR_Table_Base ;;
+}
+//
+//      Set table_ptr1 to point to S_5.
+//      Set table_ptr1 to point to C_5.
+//
+
+{ .mmi
+(p9)  	ldfe FR_S_5 = [GR_Table_Base], -16 ;;
+//
+//      if (i_1 == 0) load S_5
+//      if (i_1 != 0) load C_5
+//
+(p9)  	ldfe FR_S_4 = [GR_Table_Base], -16
+      nop.i 999 ;;
+}
+
+{ .mmf
+(p10) 	ldfe FR_C_5 = [GR_Table_Base], -16
+//
+//      Z = FR_rsq * FR_rsq
+//
+(p9)  	ldfe FR_S_3 = [GR_Table_Base], -16
+//
+//      Compute FR_rsq = r * r
+//      if (i_1 == 0) load S_4
+//      if (i_1 != 0) load C_4
+//
+       	fma.s1 FR_Z = FR_rsq, FR_rsq, f0 ;;
+}
+//
+//      if (i_1 == 0) load S_3
+//      if (i_1 != 0) load C_3
+//
+
+{ .mmi
+(p9)  	ldfe FR_S_2 = [GR_Table_Base], -16 ;;
+//
+//      if (i_1 == 0) load S_2
+//      if (i_1 != 0) load C_2
+//
+(p9)  	ldfe FR_S_1 = [GR_Table_Base], -16
+      nop.i 999
+}
+
+{ .mmi
+(p10) 	ldfe FR_C_4 = [GR_Table_Base], -16 ;;
+(p10)  	ldfe FR_C_3 = [GR_Table_Base], -16
+      nop.i 999 ;;
+}
+
+{ .mmi
+(p10) 	ldfe FR_C_2 = [GR_Table_Base], -16 ;;
+(p10) 	ldfe FR_C_1 = [GR_Table_Base], -16
+      nop.i 999
+}
+
+{ .mfi
+      nop.m 999
+//
+//      if (i_1 != 0):
+//      poly_lo = FR_rsq * C_5 + C_4
+//      poly_hi = FR_rsq * C_2 + C_1
+//
+(p9)  	fma.s1 FR_Z = FR_Z, FR_r, f0
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//      if (i_1 == 0) load S_1
+//      if (i_1 != 0) load C_1
+//
+(p9)  	fma.s1 FR_poly_lo = FR_rsq, FR_S_5, FR_S_4
+      nop.i 999
+}
+
+{ .mfi
+      nop.m 999
+//
+//      c = -c * r
+//      dummy fmpy's to flag inexact.
+//
+(p9)	fma.d.s0 FR_S_4 = FR_S_4, FR_S_4, f0
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//      poly_lo = FR_rsq * poly_lo + C_3
+//      poly_hi = FR_rsq * poly_hi
+//
+        fma.s1	FR_Z = FR_Z, FR_rsq, f0
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+(p9)  	fma.s1 FR_poly_hi = FR_rsq, FR_S_2, FR_S_1
+      nop.i 999
+}
+
+{ .mfi
+      nop.m 999
+//
+//      if (i_1 == 0):
+//      poly_lo = FR_rsq * S_5 + S_4
+//      poly_hi = FR_rsq * S_2 + S_1
+//
+(p10) 	fma.s1 FR_poly_lo = FR_rsq, FR_C_5, FR_C_4
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//      if (i_1 == 0):
+//      Z = Z * r  for only one of the small r cases - not there
+//      in original implementation notes.
+//
+(p9)  	fma.s1 FR_poly_lo = FR_rsq, FR_poly_lo, FR_S_3
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+(p10) 	fma.s1 FR_poly_hi = FR_rsq, FR_C_2, FR_C_1
+      nop.i 999
+}
+
+{ .mfi
+      nop.m 999
+(p10)	fma.d.s0 FR_C_1 = FR_C_1, FR_C_1, f0
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+(p9)  	fma.s1 FR_poly_hi = FR_poly_hi, FR_rsq, f0
+      nop.i 999
+}
+
+{ .mfi
+      nop.m 999
+//
+//      poly_lo = FR_rsq * poly_lo + S_3
+//      poly_hi = FR_rsq * poly_hi
+//
+(p10) 	fma.s1 FR_poly_lo = FR_rsq, FR_poly_lo, FR_C_3
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+(p10) 	fma.s1 FR_poly_hi = FR_poly_hi, FR_rsq, f0
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+// 	if (i_1 == 0): dummy fmpy's to flag inexact
+// 	r = 1
+//
+(p9)	fma.s1 FR_poly_hi = FR_r, FR_poly_hi, f0
+      nop.i 999
+}
+
+{ .mfi
+      nop.m 999
+//
+// 	poly_hi = r * poly_hi
+//
+        fma.s1	FR_poly = FR_Z, FR_poly_lo, FR_c
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+(p12)	fms.s1	FR_r = f0, f1, FR_r
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//      poly_hi = Z * poly_lo + c
+// 	if i_0 == 1: r = -r
+//
+     	fma.s1	FR_poly = FR_poly, f1, FR_poly_hi
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+(p12)	fms.d.s0 FR_Input_X = FR_r, f1, FR_poly
+      nop.i 999
+}
+
+{ .mfb
+      nop.m 999
+//
+//      poly = poly + poly_hi
+//
+(p11)	fma.d.s0 FR_Input_X = FR_r, f1, FR_poly
+//
+//      if (i_0 == 0) Result = r + poly
+//      if (i_0 != 0) Result = r - poly
+//
+       br.ret.sptk   b0 ;;
+}
+L(SINCOS_NORMAL_R):
+
+{ .mii
+      nop.m 999
+    	extr.u	GR_i_1 = GR_N_Inc, 0, 1 ;;
+//
+//      Set table_ptr1 and table_ptr2 to base address of
+//      constant table.
+    	cmp.eq.unc p9, p10 = 0x0, GR_i_1 ;;
+}
+
+{ .mfi
+      nop.m 999
+    	fma.s1	FR_rsq = FR_r, FR_r, f0
+    	extr.u	GR_i_0 = GR_N_Inc, 1, 1 ;;
+}
+
+{ .mfi
+      nop.m 999
+    	frcpa.s1 FR_r_hi, p6 = f1, FR_r
+    	cmp.eq.unc p11, p12 = 0x0, GR_i_0
+}
+;;
+
+// ******************************************************************
+// ******************************************************************
+// ******************************************************************
+//
+//      r and c have been computed.
+//      We known whether this is the sine or cosine routine.
+//      Make sure ftz mode is set - should be automatic when using wre
+//      Get [i_0,i_1] - two lsb of N_fix_gr alone.
+//
+
+{ .mmi
+      nop.m 999
+      addl           GR_Table_Base   = @ltoff(FSINCOS_CONSTANTS#), gp
+      nop.i 999
+}
+;;
+
+{ .mmi
+      ld8 GR_Table_Base = [GR_Table_Base]
+      nop.m 999
+      nop.i 999
+}
+;;
+
+
+{ .mfi
+(p10)	add GR_Table_Base = 384, GR_Table_Base
+(p12)	fms.s1 FR_Input_X = f0, f1, f1
+(p9)	add GR_Table_Base = 224, GR_Table_Base ;;
+}
+
+{ .mmf
+      nop.m 999
+(p10)	ldfe FR_QQ_8 = [GR_Table_Base], 16
+//
+//      if (i_1==0) poly = poly * FR_rsq + PP_1_lo
+//      else        poly = FR_rsq * poly
+//
+(p11)	fma.s1 FR_Input_X = f0, f1, f1 ;;
+}
+
+{ .mmf
+(p10)	ldfe FR_QQ_7 = [GR_Table_Base], 16
+//
+// 	Adjust table pointers based on i_0
+//      Compute rsq = r * r
+//
+(p9)	ldfe FR_PP_8 = [GR_Table_Base], 16
+    	fma.s1 FR_r_cubed = FR_r, FR_rsq, f0 ;;
+}
+
+{ .mmf
+(p9)	ldfe FR_PP_7 = [GR_Table_Base], 16
+(p10)	ldfe FR_QQ_6 = [GR_Table_Base], 16
+//
+//      Load PP_8 and QQ_8; PP_7 and QQ_7
+//
+    	frcpa.s1 FR_r_hi, p6 = f1, FR_r_hi ;;
+}
+//
+//      if (i_1==0) poly =   PP_7 + FR_rsq * PP_8.
+//      else        poly =   QQ_7 + FR_rsq * QQ_8.
+//
+
+{ .mmb
+(p9)	ldfe FR_PP_6 = [GR_Table_Base], 16
+(p10)	ldfe FR_QQ_5 = [GR_Table_Base], 16
+      nop.b 999 ;;
+}
+
+{ .mmb
+(p9)	ldfe FR_PP_5 = [GR_Table_Base], 16
+(p10)	ldfe FR_S_1 = [GR_Table_Base], 16
+      nop.b 999 ;;
+}
+
+{ .mmb
+(p10)	ldfe FR_QQ_1 = [GR_Table_Base], 16
+(p9)	ldfe FR_C_1 = [GR_Table_Base], 16
+      nop.b 999 ;;
+}
+
+{ .mmi
+(p10)	ldfe FR_QQ_4 = [GR_Table_Base], 16 ;;
+(p9)	ldfe FR_PP_1 = [GR_Table_Base], 16
+      nop.i 999 ;;
+}
+
+{ .mmf
+(p10)	ldfe FR_QQ_3 = [GR_Table_Base], 16
+//
+//      if (i_1=0) corr = corr + c*c
+//      else       corr = corr * c
+//
+(p9)	ldfe FR_PP_4 = [GR_Table_Base], 16
+(p10)	fma.s1 FR_poly = FR_rsq, FR_QQ_8, FR_QQ_7 ;;
+}
+//
+//      if (i_1=0) poly = rsq * poly + PP_5
+//      else       poly = rsq * poly + QQ_5
+//      Load PP_4 or QQ_4
+//
+
+{ .mmf
+(p9)	ldfe FR_PP_3 = [GR_Table_Base], 16
+(p10)	ldfe FR_QQ_2 = [GR_Table_Base], 16
+//
+//      r_hi =   frcpa(frcpa(r)).
+//      r_cube = r * FR_rsq.
+//
+(p9)	fma.s1 FR_poly = FR_rsq, FR_PP_8, FR_PP_7 ;;
+}
+//
+//      Do dummy multiplies so inexact is always set.
+//
+
+{ .mfi
+(p9)	ldfe FR_PP_2 = [GR_Table_Base], 16
+//
+//      r_lo = r - r_hi
+//
+(p9)	fma.s1 FR_U_lo = FR_r_hi, FR_r_hi, f0
+      nop.i 999 ;;
+}
+
+{ .mmf
+      nop.m 999
+(p9)	ldfe FR_PP_1_lo = [GR_Table_Base], 16
+(p10)	fma.s1 FR_corr = FR_S_1, FR_r_cubed, FR_r
+}
+
+{ .mfi
+      nop.m 999
+(p10)	fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_6
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//      if (i_1=0) U_lo = r_hi * r_hi
+//      else       U_lo = r_hi + r
+//
+(p9)	fma.s1 FR_corr = FR_C_1, FR_rsq, f0
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//      if (i_1=0) corr = C_1 * rsq
+//      else       corr = S_1 * r_cubed + r
+//
+(p9)	fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_6
+      nop.i 999
+}
+
+{ .mfi
+      nop.m 999
+(p10)	fma.s1 FR_U_lo = FR_r_hi, f1, FR_r
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//      if (i_1=0) U_hi = r_hi + U_hi
+//      else       U_hi = QQ_1 * U_hi + 1
+//
+(p9)	fma.s1 FR_U_lo = FR_r, FR_r_hi, FR_U_lo
+      nop.i 999
+}
+
+{ .mfi
+      nop.m 999
+//
+//      U_hi = r_hi * r_hi
+//
+    	fms.s1 FR_r_lo = FR_r, f1, FR_r_hi
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//      Load PP_1, PP_6, PP_5, and C_1
+//      Load QQ_1, QQ_6, QQ_5, and S_1
+//
+    	fma.s1 FR_U_hi = FR_r_hi, FR_r_hi, f0
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+(p10)	fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_5
+      nop.i 999
+}
+
+{ .mfi
+      nop.m 999
+(p10)	fnma.s1	FR_corr = FR_corr, FR_c, f0
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//      if (i_1=0) U_lo = r * r_hi + U_lo
+//      else       U_lo = r_lo * U_lo
+//
+(p9)	fma.s1 FR_corr = FR_corr, FR_c, FR_c
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+(p9)	fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_5
+      nop.i 999
+}
+
+{ .mfi
+      nop.m 999
+//
+//      if (i_1 =0) U_hi = r + U_hi
+//      if (i_1 =0) U_lo = r_lo * U_lo
+//
+//
+(p9)	fma.d.s0 FR_PP_5 = FR_PP_5, FR_PP_4, f0
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+(p9)	fma.s1 FR_U_lo = FR_r, FR_r, FR_U_lo
+      nop.i 999
+}
+
+{ .mfi
+      nop.m 999
+(p10)	fma.s1 FR_U_lo = FR_r_lo, FR_U_lo, f0
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//      if (i_1=0) poly = poly * rsq + PP_6
+//      else       poly = poly * rsq + QQ_6
+//
+(p9)	fma.s1 FR_U_hi = FR_r_hi, FR_U_hi, f0
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+(p10)	fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_4
+      nop.i 999
+}
+
+{ .mfi
+      nop.m 999
+(p10)	fma.s1 FR_U_hi = FR_QQ_1, FR_U_hi, f1
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+(p10)	fma.d.s0 FR_QQ_5 = FR_QQ_5, FR_QQ_5, f0
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//      if (i_1!=0) U_hi = PP_1 * U_hi
+//      if (i_1!=0) U_lo = r * r  + U_lo
+//      Load PP_3 or QQ_3
+//
+(p9)	fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_4
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+(p9)	fma.s1 FR_U_lo = FR_r_lo, FR_U_lo, f0
+      nop.i 999
+}
+
+{ .mfi
+      nop.m 999
+(p10)	fma.s1 FR_U_lo = FR_QQ_1,FR_U_lo, f0
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+(p9)	fma.s1 FR_U_hi = FR_PP_1, FR_U_hi, f0
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+(p10)	fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_3
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//      Load PP_2, QQ_2
+//
+(p9)	fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_3
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//      if (i_1==0) poly = FR_rsq * poly  + PP_3
+//      else        poly = FR_rsq * poly  + QQ_3
+//      Load PP_1_lo
+//
+(p9)	fma.s1 FR_U_lo = FR_PP_1, FR_U_lo, f0
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//      if (i_1 =0) poly = poly * rsq + pp_r4
+//      else        poly = poly * rsq + qq_r4
+//
+(p9)	fma.s1 FR_U_hi = FR_r, f1, FR_U_hi
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+(p10)	fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_2
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//      if (i_1==0) U_lo =  PP_1_hi * U_lo
+//      else        U_lo =  QQ_1 * U_lo
+//
+(p9)	fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_2
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//      if (i_0==0)  Result = 1
+//      else         Result = -1
+//
+     	fma.s1 FR_V = FR_U_lo, f1, FR_corr
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+(p10)	fma.s1 FR_poly = FR_rsq, FR_poly, f0
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//      if (i_1==0) poly =  FR_rsq * poly + PP_2
+//      else poly =  FR_rsq * poly + QQ_2
+//
+(p9)	fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_1_lo
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+(p10)	fma.s1 FR_poly = FR_rsq, FR_poly, f0
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//      V = U_lo + corr
+//
+(p9)	fma.s1 FR_poly = FR_r_cubed, FR_poly, f0
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+//
+//      if (i_1==0) poly = r_cube * poly
+//      else        poly = FR_rsq * poly
+//
+    	fma.s1	FR_V = FR_poly, f1, FR_V
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+(p12)	fms.d.s0 FR_Input_X = FR_Input_X, FR_U_hi, FR_V
+      nop.i 999
+}
+
+{ .mfb
+      nop.m 999
+//
+//      V = V + poly
+//
+(p11)	fma.d.s0 FR_Input_X = FR_Input_X, FR_U_hi, FR_V
+//
+//      if (i_0==0) Result = Result * U_hi + V
+//      else        Result = Result * U_hi - V
+//
+       br.ret.sptk   b0 ;;
+}
+
+//
+//      If cosine, FR_Input_X = 1
+//      If sine, FR_Input_X = +/-Zero (Input FR_Input_X)
+//      Results are exact, no exceptions
+//
+L(SINCOS_ZERO):
+
+{ .mmb
+        cmp.eq.unc p6, p7 = 0x1, GR_Sin_or_Cos
+      nop.m 999
+      nop.b 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+(p7)    fmerge.s FR_Input_X = FR_Input_X, FR_Input_X
+      nop.i 999
+}
+
+{ .mfb
+      nop.m 999
+(p6)    fmerge.s FR_Input_X = f1, f1
+       br.ret.sptk   b0 ;;
+}
+
+L(SINCOS_SPECIAL):
+
+//
+//      Path for Arg = +/- QNaN, SNaN, Inf
+//      Invalid can be raised. SNaNs
+//      become QNaNs
+//
+
+{ .mfb
+      nop.m 999
+        fmpy.d.s0 FR_Input_X = FR_Input_X, f0
+        br.ret.sptk   b0 ;;
+}
+.endp __libm_cos_double_dbx#
+ASM_SIZE_DIRECTIVE(__libm_cos_double_dbx#)
+
+
+
+//
+//      Call int pi_by_2_reduce(double* x, double *y)
+//      for |arguments| >= 2**63
+//      Address to save r and c as double
+//
+//
+//      psp    sp+64
+//             sp+48  -> f0 c
+//      r45    sp+32  -> f0 r
+//      r44 -> sp+16  -> InputX
+//      sp     sp     -> scratch provided to callee
+
+
+
+.proc __libm_callout_2
+__libm_callout_2:
+L(SINCOS_ARG_TOO_LARGE):
+
+.prologue
+{ .mfi
+        add   r45=-32,sp                        // Parameter: r address
+        nop.f 0
+.save   ar.pfs,GR_SAVE_PFS
+        mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+        add sp=-64,sp                           // Create new stack
+        nop.f 0
+        mov GR_SAVE_GP=gp                       // Save gp
+};;
+{ .mmi
+        stfe [r45] = f0,16                      // Clear Parameter r on stack
+        add  r44 = 16,sp                        // Parameter x address
+.save   b0, GR_SAVE_B0
+        mov GR_SAVE_B0=b0                       // Save b0
+};;
+.body
+{ .mib
+        stfe [r45] = f0,-16                     // Clear Parameter c on stack
+        nop.i 0
+        nop.b 0
+}
+{ .mib
+        stfe [r44] = FR_Input_X                 // Store Parameter x on stack
+        nop.i 0
+        br.call.sptk b0=__libm_pi_by_2_reduce# ;;
 };;
 
+
+{ .mii
+        ldfe  FR_Input_X =[r44],16
+//
+//      Get r and c off stack
+//
+        adds  GR_Table_Base1 = -16, GR_Table_Base1
+//
+//      Get r and c off stack
+//
+        add   GR_N_Inc = GR_Sin_or_Cos,r8 ;;
+}
+{ .mmb
+        ldfe  FR_r =[r45],16
+//
+//      Get X off the stack
+//      Readjust Table ptr
+//
+        ldfs FR_Two_to_M3 = [GR_Table_Base1],4
+        nop.b 999 ;;
+}
+{ .mmb
+        ldfs FR_Neg_Two_to_M3 = [GR_Table_Base1],0
+        ldfe  FR_c =[r45]
+        nop.b 999 ;;
+}
+
+{ .mfi
+.restore sp
+        add   sp = 64,sp                       // Restore stack pointer
+        fcmp.lt.unc.s1  p6, p0 = FR_r, FR_Two_to_M3
+        mov   b0 = GR_SAVE_B0                  // Restore return address
+};;
 { .mib
-      nop.m         999
-      mov           ar.pfs              = GR_SAVE_PFS
-      br.ret.sptk   b0 // Exit for large arguments routine call
+        mov   gp = GR_SAVE_GP                  // Restore gp
+        mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
+        nop.b 0
 };;
 
-LOCAL_LIBM_END(__libm_callout_sincos)
 
-.type    __libm_sin_large#,@function
-.global  __libm_sin_large#
-.type    __libm_cos_large#,@function
-.global  __libm_cos_large#
+{ .mfi
+      nop.m 999
+(p6)    fcmp.gt.unc.s1	p6, p0 = FR_r, FR_Neg_Two_to_M3
+      nop.i 999 ;;
+}
+
+{ .mib
+      nop.m 999
+      nop.i 999
+(p6)    br.cond.spnt L(SINCOS_SMALL_R) ;;
+}
+
+{ .mib
+      nop.m 999
+      nop.i 999
+        br.cond.sptk L(SINCOS_NORMAL_R) ;;
+}
+
+.endp __libm_callout_2
+ASM_SIZE_DIRECTIVE(__libm_callout_2)
+
+.type   __libm_pi_by_2_reduce#,@function
+.global __libm_pi_by_2_reduce#
+
 
+.type __libm_sin_double_dbx#,@function
+.global __libm_sin_double_dbx#
+.type __libm_cos_double_dbx#,@function
+.global __libm_cos_double_dbx#
diff --git a/sysdeps/ia64/fpu/s_cosf.S b/sysdeps/ia64/fpu/s_cosf.S
index 89cf82372d..0e47255b3f 100644
--- a/sysdeps/ia64/fpu/s_cosf.S
+++ b/sysdeps/ia64/fpu/s_cosf.S
@@ -1,10 +1,12 @@
+
 .file "sincosf.s"
 
 
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
 //
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,7 +22,7 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
+//
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,680 +37,663 @@
 //
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
-//
+// http://developer.intel.com/opensource.
+
+
 // History
 //==============================================================
-// 02/02/00 Initial version
-// 04/02/00 Unwind support added.
-// 06/16/00 Updated tables to enforce symmetry
-// 08/31/00 Saved 2 cycles in main path, and 9 in other paths.
-// 09/20/00 The updated tables regressed to an old version, so reinstated them
-// 10/18/00 Changed one table entry to ensure symmetry
-// 01/03/01 Improved speed, fixed flag settings for small arguments.
-// 02/18/02 Large arguments processing routine excluded
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 06/03/02 Insure inexact flag set for large arg result
-// 09/05/02 Single precision version is made using double precision one as base
-// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 2/02/00  Initial revision 
+// 4/02/00  Unwind support added.
+// 5/10/00  Improved speed with new algorithm.
+// 8/08/00  Improved speed by avoiding SIR flush.
+// 8/17/00  Changed predicate register macro-usage to direct predicate
+//          names due to an assembler bug.
+// 8/30/00  Put sin_of_r before sin_tbl_S_cos_of_r to gain a cycle 
+// 1/02/00  Fixed flag settings, improved speed.
 //
 // API
 //==============================================================
 // float sinf( float x);
 // float cosf( float x);
 //
-// Overview of operation
-//==============================================================
-//
-// Step 1
-// ======
-// Reduce x to region -1/2*pi/2^k ===== 0 ===== +1/2*pi/2^k  where k=4
-//    divide x by pi/2^k.
-//    Multiply by 2^k/pi.
-//    nfloat = Round result to integer (round-to-nearest)
-//
-// r = x -  nfloat * pi/2^k
-//    Do this as (x -  nfloat * HIGH(pi/2^k)) - nfloat * LOW(pi/2^k) 
-
-//    for increased accuracy.
-//    pi/2^k is stored as two numbers that when added make pi/2^k.
-//       pi/2^k = HIGH(pi/2^k) + LOW(pi/2^k)
-//    HIGH part is rounded to zero, LOW - to nearest
-//
-// x = (nfloat * pi/2^k) + r
-//    r is small enough that we can use a polynomial approximation
-//    and is referred to as the reduced argument.
-//
-// Step 3
-// ======
-// Take the unreduced part and remove the multiples of 2pi.
-// So nfloat = nfloat (with lower k+1 bits cleared) + lower k+1 bits
-//
-//    nfloat (with lower k+1 bits cleared) is a multiple of 2^(k+1)
-//    N * 2^(k+1)
-//    nfloat * pi/2^k = N * 2^(k+1) * pi/2^k + (lower k+1 bits) * pi/2^k
-//    nfloat * pi/2^k = N * 2 * pi + (lower k+1 bits) * pi/2^k
-//    nfloat * pi/2^k = N2pi + M * pi/2^k
-//
-//
-// Sin(x) = Sin((nfloat * pi/2^k) + r)
-//        = Sin(nfloat * pi/2^k) * Cos(r) + Cos(nfloat * pi/2^k) * Sin(r)
-//
-//          Sin(nfloat * pi/2^k) = Sin(N2pi + Mpi/2^k)
-//                               = Sin(N2pi)Cos(Mpi/2^k) + Cos(N2pi)Sin(Mpi/2^k)
-//                               = Sin(Mpi/2^k)
-//
-//          Cos(nfloat * pi/2^k) = Cos(N2pi + Mpi/2^k)
-//                               = Cos(N2pi)Cos(Mpi/2^k) + Sin(N2pi)Sin(Mpi/2^k)
-//                               = Cos(Mpi/2^k)
-//
-// Sin(x) = Sin(Mpi/2^k) Cos(r) + Cos(Mpi/2^k) Sin(r)
-//
-//
-// Step 4
-// ======
-// 0 <= M < 2^(k+1)
-// There are 2^(k+1) Sin entries in a table.
-// There are 2^(k+1) Cos entries in a table.
-//
-// Get Sin(Mpi/2^k) and Cos(Mpi/2^k) by table lookup.
-//
-//
-// Step 5
-// ======
-// Calculate Cos(r) and Sin(r) by polynomial approximation.
-//
-// Cos(r) = 1 + r^2 q1  + r^4 q2  = Series for Cos
-// Sin(r) = r + r^3 p1  + r^5 p2  = Series for Sin
-//
-// and the coefficients q1, q2 and p1, p2 are stored in a table
-//
-//
-// Calculate
-// Sin(x) = Sin(Mpi/2^k) Cos(r) + Cos(Mpi/2^k) Sin(r)
-//
-// as follows
-//
-//    S[m] = Sin(Mpi/2^k) and C[m] = Cos(Mpi/2^k)
-//    rsq = r*r
-//
-//
-//    P = P1 + r^2*P2
-//    Q = Q1 + r^2*Q2
-//
-//       rcub = r * rsq
-//       Sin(r) = r + rcub * P
-//              = r + r^3p1  + r^5p2 = Sin(r)
-//
-//            The coefficients are not exactly these values, but almost.
-//
-//            p1 = -1/6  = -1/3!
-//            p2 = 1/120 =  1/5!
-//            p3 = -1/5040 = -1/7!
-//            p4 = 1/362889 = 1/9!
-//
-//       P =  r + r^3 * P
-//
-//    Answer = S[m] Cos(r) + C[m] P
-//
-//       Cos(r) = 1 + rsq Q
-//       Cos(r) = 1 + r^2 Q
-//       Cos(r) = 1 + r^2 (q1 + r^2q2)
-//       Cos(r) = 1 + r^2q1 + r^4q2
-//
-//       S[m] Cos(r) = S[m](1 + rsq Q)
-//       S[m] Cos(r) = S[m] + S[m] rsq Q
-//       S[m] Cos(r) = S[m] + s_rsq Q
-//       Q         = S[m] + s_rsq Q
-//
-// Then,
-//
-//    Answer = Q + C[m] P
-
-
-// Registers used
-//==============================================================
-// general input registers:
-// r14 -> r19
-// r32 -> r45
-
-// predicate registers used:
-// p6 -> p14
-
-// floating-point registers used
-// f9 -> f15
-// f32 -> f61
 
+#include "libm_support.h"	
+	
 // Assembly macros
 //==============================================================
-sincosf_NORM_f8                 = f9
-sincosf_W                       = f10
-sincosf_int_Nfloat              = f11
-sincosf_Nfloat                  = f12
-
-sincosf_r                       = f13
-sincosf_rsq                     = f14
-sincosf_rcub                    = f15
-sincosf_save_tmp                = f15
 
-sincosf_Inv_Pi_by_16            = f32
-sincosf_Pi_by_16_1              = f33
-sincosf_Pi_by_16_2              = f34
+// SIN_Sin_Flag               = p6
+// SIN_Cos_Flag               = p7
+
+// integer registers used
+
+ SIN_AD_PQ_1                = r33
+ SIN_AD_PQ_2                = r33
+ sin_GR_sincos_flag         = r34
+ sin_GR_Mint                = r35
+
+ sin_GR_index               = r36
+ gr_tmp                     = r37
+
+ GR_SAVE_B0                 = r37
+ GR_SAVE_GP                 = r38
+ GR_SAVE_PFS                = r39
+
+
+// floating point registers used
+
+ sin_coeff_P1               = f32
+ sin_coeff_P2               = f33
+ sin_coeff_Q1               = f34
+ sin_coeff_Q2               = f35
+ sin_coeff_P4               = f36
+ sin_coeff_P5               = f37
+ sin_coeff_Q3               = f38
+ sin_coeff_Q4               = f39
+ sin_Mx                     = f40
+ sin_Mfloat                 = f41
+ sin_tbl_S                  = f42
+ sin_tbl_C                  = f43
+ sin_r                      = f44
+ sin_rcube                  = f45
+ sin_tsq                    = f46
+ sin_r7                     = f47
+ sin_t                      = f48
+ sin_poly_p2                = f49
+ sin_poly_p1                = f50
+ fp_tmp                     = f51
+ sin_poly_p3                = f52
+ sin_poly_p4                = f53
+ sin_of_r                   = f54
+ sin_S_t                    = f55
+ sin_poly_q2                = f56
+ sin_poly_q1                = f57
+ sin_S_tcube                = f58
+ sin_poly_q3                = f59
+ sin_poly_q4                = f60
+ sin_tbl_S_tcube            = f61
+ sin_tbl_S_cos_of_r         = f62
+
+ sin_coeff_Q5               = f63
+ sin_coeff_Q6               = f64
+ sin_coeff_P3               = f65
+
+ sin_poly_q5                = f66
+ sin_poly_q12               = f67
+ sin_poly_q3456             = f68
+ fp_tmp2                    = f69
+ SIN_NORM_f8                = f70
+
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
 
-sincosf_Inv_Pi_by_64            = f35
+.align 16
 
-sincosf_Pi_by_16_3              = f36
+sin_coeff_1_table:
+ASM_TYPE_DIRECTIVE(sin_coeff_1_table,@object)
+data8 0xBF56C16C16BF6462       // q3
+data8 0x3EFA01A0128B9EBC       // q4
+data8 0xBE927E42FDF33FFE       // q5
+data8 0x3E21DA5C72A446F3       // q6
+data8 0x3EC71DD1D5E421A4       // p4
+data8 0xBE5AC5C9D0ACF95A       // p5
+data8 0xBFC55555555554CA       // p1
+data8 0x3F811111110F2395       // p2
+data8 0xBFE0000000000000       // q1
+data8 0x3FA55555555554EF       // q2
+data8 0xBF2A01A011232913       // p3
+data8 0x0000000000000000       // pad
+ 
+
+/////////////////////////////////////////
+
+data8 0xBFE1A54991426566   //sin(-32)
+data8 0x3FEAB1F5305DE8E5   //cos(-32)
+data8 0x3FD9DBC0B640FC81   //sin(-31)
+data8 0x3FED4591C3E12A20   //cos(-31)
+data8 0x3FEF9DF47F1C903D   //sin(-30)
+data8 0x3FC3BE82F2505A52   //cos(-30)
+data8 0x3FE53C7D20A6C9E7   //sin(-29)
+data8 0xBFE7F01658314E47   //cos(-29)
+data8 0xBFD156853B4514D6   //sin(-28)
+data8 0xBFEECDAAD1582500   //cos(-28)
+data8 0xBFEE9AA1B0E5BA30   //sin(-27)
+data8 0xBFD2B266F959DED5   //cos(-27)
+data8 0xBFE866E0FAC32583   //sin(-26)
+data8 0x3FE4B3902691A9ED   //cos(-26)
+data8 0x3FC0F0E6F31E809D   //sin(-25)
+data8 0x3FEFB7EEF59504FF   //cos(-25)
+data8 0x3FECFA7F7919140F   //sin(-24)
+data8 0x3FDB25BFB50A609A   //cos(-24)
+data8 0x3FEB143CD0247D02   //sin(-23)
+data8 0xBFE10CF7D591F272   //cos(-23)
+data8 0x3F8220A29F6EB9F4   //sin(-22)
+data8 0xBFEFFFADD8D4ACDA   //cos(-22)
+data8 0xBFEAC5E20BB0D7ED   //sin(-21)
+data8 0xBFE186FF83773759   //cos(-21)
+data8 0xBFED36D8F55D3CE0   //sin(-20)
+data8 0x3FDA1E043964A83F   //cos(-20)
+data8 0xBFC32F2D28F584CF   //sin(-19)
+data8 0x3FEFA377DE108258   //cos(-19)
+data8 0x3FE8081668131E26   //sin(-18)
+data8 0x3FE52150815D2470   //cos(-18)
+data8 0x3FEEC3C4AC42882B   //sin(-17)
+data8 0xBFD19C46B07F58E7   //cos(-17)
+data8 0x3FD26D02085F20F8   //sin(-16)
+data8 0xBFEEA5257E962F74   //cos(-16)
+data8 0xBFE4CF2871CEC2E8   //sin(-15)
+data8 0xBFE84F5D069CA4F3   //cos(-15)
+data8 0xBFEFB30E327C5E45   //sin(-14)
+data8 0x3FC1809AEC2CA0ED   //cos(-14)
+data8 0xBFDAE4044881C506   //sin(-13)
+data8 0x3FED09CDD5260CB7   //cos(-13)
+data8 0x3FE12B9AF7D765A5   //sin(-12)
+data8 0x3FEB00DA046B65E3   //cos(-12)
+data8 0x3FEFFFEB762E93EB   //sin(-11)
+data8 0x3F7220AE41EE2FDF   //cos(-11)
+data8 0x3FE1689EF5F34F52   //sin(-10)
+data8 0xBFEAD9AC890C6B1F   //cos(-10)
+data8 0xBFDA6026360C2F91   //sin( -9)
+data8 0xBFED27FAA6A6196B   //cos( -9)
+data8 0xBFEFA8D2A028CF7B   //sin( -8)
+data8 0xBFC29FBEBF632F94   //cos( -8)
+data8 0xBFE50608C26D0A08   //sin( -7)
+data8 0x3FE81FF79ED92017   //cos( -7)
+data8 0x3FD1E1F18AB0A2C0   //sin( -6)
+data8 0x3FEEB9B7097822F5   //cos( -6)
+data8 0x3FEEAF81F5E09933   //sin( -5)
+data8 0x3FD22785706B4AD9   //cos( -5)
+data8 0x3FE837B9DDDC1EAE   //sin( -4)
+data8 0xBFE4EAA606DB24C1   //cos( -4)
+data8 0xBFC210386DB6D55B   //sin( -3)
+data8 0xBFEFAE04BE85E5D2   //cos( -3)
+data8 0xBFED18F6EAD1B446   //sin( -2)
+data8 0xBFDAA22657537205   //cos( -2)
+data8 0xBFEAED548F090CEE   //sin( -1)
+data8 0x3FE14A280FB5068C   //cos( -1)
+data8 0x0000000000000000   //sin(  0)
+data8 0x3FF0000000000000   //cos(  0)
+data8 0x3FEAED548F090CEE   //sin(  1)
+data8 0x3FE14A280FB5068C   //cos(  1)
+data8 0x3FED18F6EAD1B446   //sin(  2)
+data8 0xBFDAA22657537205   //cos(  2)
+data8 0x3FC210386DB6D55B   //sin(  3)
+data8 0xBFEFAE04BE85E5D2   //cos(  3)
+data8 0xBFE837B9DDDC1EAE   //sin(  4)
+data8 0xBFE4EAA606DB24C1   //cos(  4)
+data8 0xBFEEAF81F5E09933   //sin(  5)
+data8 0x3FD22785706B4AD9   //cos(  5)
+data8 0xBFD1E1F18AB0A2C0   //sin(  6)
+data8 0x3FEEB9B7097822F5   //cos(  6)
+data8 0x3FE50608C26D0A08   //sin(  7)
+data8 0x3FE81FF79ED92017   //cos(  7)
+data8 0x3FEFA8D2A028CF7B   //sin(  8)
+data8 0xBFC29FBEBF632F94   //cos(  8)
+data8 0x3FDA6026360C2F91   //sin(  9)
+data8 0xBFED27FAA6A6196B   //cos(  9)
+data8 0xBFE1689EF5F34F52   //sin( 10)
+data8 0xBFEAD9AC890C6B1F   //cos( 10)
+data8 0xBFEFFFEB762E93EB   //sin( 11)
+data8 0x3F7220AE41EE2FDF   //cos( 11)
+data8 0xBFE12B9AF7D765A5   //sin( 12)
+data8 0x3FEB00DA046B65E3   //cos( 12)
+data8 0x3FDAE4044881C506   //sin( 13)
+data8 0x3FED09CDD5260CB7   //cos( 13)
+data8 0x3FEFB30E327C5E45   //sin( 14)
+data8 0x3FC1809AEC2CA0ED   //cos( 14)
+data8 0x3FE4CF2871CEC2E8   //sin( 15)
+data8 0xBFE84F5D069CA4F3   //cos( 15)
+data8 0xBFD26D02085F20F8   //sin( 16)
+data8 0xBFEEA5257E962F74   //cos( 16)
+data8 0xBFEEC3C4AC42882B   //sin( 17)
+data8 0xBFD19C46B07F58E7   //cos( 17)
+data8 0xBFE8081668131E26   //sin( 18)
+data8 0x3FE52150815D2470   //cos( 18)
+data8 0x3FC32F2D28F584CF   //sin( 19)
+data8 0x3FEFA377DE108258   //cos( 19)
+data8 0x3FED36D8F55D3CE0   //sin( 20)
+data8 0x3FDA1E043964A83F   //cos( 20)
+data8 0x3FEAC5E20BB0D7ED   //sin( 21)
+data8 0xBFE186FF83773759   //cos( 21)
+data8 0xBF8220A29F6EB9F4   //sin( 22)
+data8 0xBFEFFFADD8D4ACDA   //cos( 22)
+data8 0xBFEB143CD0247D02   //sin( 23)
+data8 0xBFE10CF7D591F272   //cos( 23)
+data8 0xBFECFA7F7919140F   //sin( 24)
+data8 0x3FDB25BFB50A609A   //cos( 24)
+data8 0xBFC0F0E6F31E809D   //sin( 25)
+data8 0x3FEFB7EEF59504FF   //cos( 25)
+data8 0x3FE866E0FAC32583   //sin( 26)
+data8 0x3FE4B3902691A9ED   //cos( 26)
+data8 0x3FEE9AA1B0E5BA30   //sin( 27)
+data8 0xBFD2B266F959DED5   //cos( 27)
+data8 0x3FD156853B4514D6   //sin( 28)
+data8 0xBFEECDAAD1582500   //cos( 28)
+data8 0xBFE53C7D20A6C9E7   //sin( 29)
+data8 0xBFE7F01658314E47   //cos( 29)
+data8 0xBFEF9DF47F1C903D   //sin( 30)
+data8 0x3FC3BE82F2505A52   //cos( 30)
+data8 0xBFD9DBC0B640FC81   //sin( 31)
+data8 0x3FED4591C3E12A20   //cos( 31)
+data8 0x3FE1A54991426566   //sin( 32)
+data8 0x3FEAB1F5305DE8E5   //cos( 32)
+ASM_SIZE_DIRECTIVE(sin_coeff_1_table)
+
+//////////////////////////////////////////
+
+
+.global sinf
+.global cosf
+#ifdef _LIBC
+.global __sinf
+.global __cosf
+#endif
+
+.text
+.proc cosf
+#ifdef _LIBC
+.proc __cosf
+#endif
+.align 32
+
+
+cosf:
+#ifdef _LIBC
+__cosf:
+#endif
+{ .mfi
+     alloc          r32                      = ar.pfs,1,7,0,0
+     fcvt.fx.s1     sin_Mx                   =    f8
+     cmp.ne    p6,p7     =    r0,r0        // p7 set if cos
+}
+{ .mfi
+     addl           SIN_AD_PQ_1              =    @ltoff(sin_coeff_1_table),gp
+     fnorm.s0 SIN_NORM_f8 = f8        // Sets denormal or invalid
+     mov sin_GR_sincos_flag = 0x0
+}
+;;
 
-sincosf_r_exact                 = f37
+{ .mfi
+     ld8       SIN_AD_PQ_1    =    [SIN_AD_PQ_1]
+     fclass.m.unc  p9,p0      =    f8, 0x07
+     cmp.ne p8,p0 = r0,r0
+}
+{ .mfb
+     nop.m 999
+     nop.f 999
+     br.sptk L(SINCOSF_COMMON)
+}
+;;
 
-sincosf_Sm                      = f38
-sincosf_Cm                      = f39
+.endp cosf
+ASM_SIZE_DIRECTIVE(cosf)
 
-sincosf_P1                      = f40
-sincosf_Q1                      = f41
-sincosf_P2                      = f42
-sincosf_Q2                      = f43
-sincosf_P3                      = f44
-sincosf_Q3                      = f45
-sincosf_P4                      = f46
-sincosf_Q4                      = f47
 
-sincosf_P_temp1                 = f48
-sincosf_P_temp2                 = f49
+.text
+.proc  sinf
+#ifdef _LIBC
+.proc  __sinf
+#endif
+.align 32
 
-sincosf_Q_temp1                 = f50
-sincosf_Q_temp2                 = f51
+sinf:
+#ifdef _LIBC
+__sinf:	
+#endif
+{ .mfi
+     alloc          r32                      = ar.pfs,1,7,0,0
+     fcvt.fx.s1     sin_Mx                   =    f8
+     cmp.eq    p6,p7     =    r0,r0        // p6 set if sin
+}
+{ .mfi
+     addl           SIN_AD_PQ_1              =    @ltoff(sin_coeff_1_table),gp
+     fnorm.s0 SIN_NORM_f8 = f8        // Sets denormal or invalid
+     mov sin_GR_sincos_flag = 0x1
+}
+;;
 
-sincosf_P                       = f52
-sincosf_Q                       = f53
+{ .mfi
+     ld8       SIN_AD_PQ_1    =    [SIN_AD_PQ_1]
+     fclass.m.unc  p8,p0      =    f8, 0x07
+     cmp.ne p9,p0 = r0,r0
+}
+{ .mfb
+     nop.m 999
+     nop.f 999
+     br.sptk L(SINCOSF_COMMON)
+}
+;;
 
-sincosf_srsq                    = f54
 
-sincosf_SIG_INV_PI_BY_16_2TO61  = f55
-sincosf_RSHF_2TO61              = f56
-sincosf_RSHF                    = f57
-sincosf_2TOM61                  = f58
-sincosf_NFLOAT                  = f59
-sincosf_W_2TO61_RSH             = f60
+L(SINCOSF_COMMON):
 
-fp_tmp                          = f61
+// Here with p6 if sin, p7 if cos, p8 if sin(0), p9 if cos(0)
 
-/////////////////////////////////////////////////////////////
 
-sincosf_AD_1                    = r33
-sincosf_AD_2                    = r34
-sincosf_exp_limit               = r35
-sincosf_r_signexp               = r36
-sincosf_AD_beta_table           = r37
-sincosf_r_sincos                = r38
+{ .mmf
+     ldfpd      sin_coeff_Q3, sin_coeff_Q4     = [SIN_AD_PQ_1], 16
+     nop.m 999
+     fclass.m.unc  p11,p0      =    f8, 0x23	// Test for x=inf
+}
+;;
 
-sincosf_r_exp                   = r39
-sincosf_r_17_ones               = r40
+{ .mfb
+     ldfpd      sin_coeff_Q5, sin_coeff_Q6     = [SIN_AD_PQ_1], 16
+     fclass.m.unc  p10,p0      =    f8, 0xc3	// Test for x=nan
+(p8) br.ret.spnt b0                   // Exit for sin(0)
+}
+{ .mfb
+     nop.m 999
+(p9) fma.s      f8 = f1,f1,f0
+(p9) br.ret.spnt b0                   // Exit for cos(0)
+}
+;;
 
-sincosf_GR_sig_inv_pi_by_16     = r14
-sincosf_GR_rshf_2to61           = r15
-sincosf_GR_rshf                 = r16
-sincosf_GR_exp_2tom61           = r17
-sincosf_GR_n                    = r18
-sincosf_GR_m                    = r19
-sincosf_GR_32m                  = r19
-sincosf_GR_all_ones             = r19
+{ .mmf
+     ldfpd      sin_coeff_P4, sin_coeff_P5     = [SIN_AD_PQ_1], 16
+     addl gr_tmp = -1,r0
+     fcvt.xf    sin_Mfloat                     =    sin_Mx
+}
+;;
 
-gr_tmp                          = r41
-GR_SAVE_PFS                     = r41
-GR_SAVE_B0                      = r42
-GR_SAVE_GP                      = r43
+{     .mfi
+     getf.sig  sin_GR_Mint    =    sin_Mx
+(p11) frcpa.s0      f8,p13      =    f0,f0  // qnan indef if x=inf
+     nop.i 999
+}
+{     .mfb
+     ldfpd      sin_coeff_P1, sin_coeff_P2     = [SIN_AD_PQ_1], 16
+     nop.f 999
+(p11) br.ret.spnt b0                   // Exit for x=inf
+}
+;;
 
-RODATA
-.align 16
+{     .mfi
+     ldfpd      sin_coeff_Q1, sin_coeff_Q2     = [SIN_AD_PQ_1], 16
+     nop.f                      999
+     cmp.ge    p8,p9          = -33,sin_GR_Mint
+}
+{     .mfb
+     add       sin_GR_index   =    32,sin_GR_Mint
+(p10) fma.s      f8 = f8,f1,f0         // Force qnan if x=nan
+(p10) br.ret.spnt b0                   // Exit for x=nan
+}
+;;
 
-// Pi/16 parts
-LOCAL_OBJECT_START(double_sincosf_pi)
-   data8 0xC90FDAA22168C234, 0x00003FFC // pi/16 1st part
-   data8 0xC4C6628B80DC1CD1, 0x00003FBC // pi/16 2nd part
-LOCAL_OBJECT_END(double_sincosf_pi)
-
-// Coefficients for polynomials
-LOCAL_OBJECT_START(double_sincosf_pq_k4)
-   data8 0x3F810FABB668E9A2 // P2
-   data8 0x3FA552E3D6DE75C9 // Q2
-   data8 0xBFC555554447BC7F // P1
-   data8 0xBFDFFFFFC447610A // Q1
-LOCAL_OBJECT_END(double_sincosf_pq_k4)
-
-// Sincos table (S[m], C[m])
-LOCAL_OBJECT_START(double_sin_cos_beta_k4)
-    data8 0x0000000000000000 // sin ( 0 Pi / 16 )
-    data8 0x3FF0000000000000 // cos ( 0 Pi / 16 )
-//
-    data8 0x3FC8F8B83C69A60B // sin ( 1 Pi / 16 )
-    data8 0x3FEF6297CFF75CB0 // cos ( 1 Pi / 16 )
-//
-    data8 0x3FD87DE2A6AEA963 // sin ( 2 Pi / 16 )
-    data8 0x3FED906BCF328D46 // cos ( 2 Pi / 16 )
-//
-    data8 0x3FE1C73B39AE68C8 // sin ( 3 Pi / 16 )
-    data8 0x3FEA9B66290EA1A3 // cos ( 3 Pi / 16 )
-//
-    data8 0x3FE6A09E667F3BCD // sin ( 4 Pi / 16 )
-    data8 0x3FE6A09E667F3BCD // cos ( 4 Pi / 16 )
-//
-    data8 0x3FEA9B66290EA1A3 // sin ( 5 Pi / 16 )
-    data8 0x3FE1C73B39AE68C8 // cos ( 5 Pi / 16 )
-//
-    data8 0x3FED906BCF328D46 // sin ( 6 Pi / 16 )
-    data8 0x3FD87DE2A6AEA963 // cos ( 6 Pi / 16 )
-//
-    data8 0x3FEF6297CFF75CB0 // sin ( 7 Pi / 16 )
-    data8 0x3FC8F8B83C69A60B // cos ( 7 Pi / 16 )
-//
-    data8 0x3FF0000000000000 // sin ( 8 Pi / 16 )
-    data8 0x0000000000000000 // cos ( 8 Pi / 16 )
-//
-    data8 0x3FEF6297CFF75CB0 // sin ( 9 Pi / 16 )
-    data8 0xBFC8F8B83C69A60B // cos ( 9 Pi / 16 )
-//
-    data8 0x3FED906BCF328D46 // sin ( 10 Pi / 16 )
-    data8 0xBFD87DE2A6AEA963 // cos ( 10 Pi / 16 )
-//
-    data8 0x3FEA9B66290EA1A3 // sin ( 11 Pi / 16 )
-    data8 0xBFE1C73B39AE68C8 // cos ( 11 Pi / 16 )
-//
-    data8 0x3FE6A09E667F3BCD // sin ( 12 Pi / 16 )
-    data8 0xBFE6A09E667F3BCD // cos ( 12 Pi / 16 )
-//
-    data8 0x3FE1C73B39AE68C8 // sin ( 13 Pi / 16 )
-    data8 0xBFEA9B66290EA1A3 // cos ( 13 Pi / 16 )
-//
-    data8 0x3FD87DE2A6AEA963 // sin ( 14 Pi / 16 )
-    data8 0xBFED906BCF328D46 // cos ( 14 Pi / 16 )
-//
-    data8 0x3FC8F8B83C69A60B // sin ( 15 Pi / 16 )
-    data8 0xBFEF6297CFF75CB0 // cos ( 15 Pi / 16 )
-//
-    data8 0x0000000000000000 // sin ( 16 Pi / 16 )
-    data8 0xBFF0000000000000 // cos ( 16 Pi / 16 )
-//
-    data8 0xBFC8F8B83C69A60B // sin ( 17 Pi / 16 )
-    data8 0xBFEF6297CFF75CB0 // cos ( 17 Pi / 16 )
-//
-    data8 0xBFD87DE2A6AEA963 // sin ( 18 Pi / 16 )
-    data8 0xBFED906BCF328D46 // cos ( 18 Pi / 16 )
-//
-    data8 0xBFE1C73B39AE68C8 // sin ( 19 Pi / 16 )
-    data8 0xBFEA9B66290EA1A3 // cos ( 19 Pi / 16 )
-//
-    data8 0xBFE6A09E667F3BCD // sin ( 20 Pi / 16 )
-    data8 0xBFE6A09E667F3BCD // cos ( 20 Pi / 16 )
-//
-    data8 0xBFEA9B66290EA1A3 // sin ( 21 Pi / 16 )
-    data8 0xBFE1C73B39AE68C8 // cos ( 21 Pi / 16 )
-//
-    data8 0xBFED906BCF328D46 // sin ( 22 Pi / 16 )
-    data8 0xBFD87DE2A6AEA963 // cos ( 22 Pi / 16 )
-//
-    data8 0xBFEF6297CFF75CB0 // sin ( 23 Pi / 16 )
-    data8 0xBFC8F8B83C69A60B // cos ( 23 Pi / 16 )
-//
-    data8 0xBFF0000000000000 // sin ( 24 Pi / 16 )
-    data8 0x0000000000000000 // cos ( 24 Pi / 16 )
-//
-    data8 0xBFEF6297CFF75CB0 // sin ( 25 Pi / 16 )
-    data8 0x3FC8F8B83C69A60B // cos ( 25 Pi / 16 )
-//
-    data8 0xBFED906BCF328D46 // sin ( 26 Pi / 16 )
-    data8 0x3FD87DE2A6AEA963 // cos ( 26 Pi / 16 )
-//
-    data8 0xBFEA9B66290EA1A3 // sin ( 27 Pi / 16 )
-    data8 0x3FE1C73B39AE68C8 // cos ( 27 Pi / 16 )
-//
-    data8 0xBFE6A09E667F3BCD // sin ( 28 Pi / 16 )
-    data8 0x3FE6A09E667F3BCD // cos ( 28 Pi / 16 )
-//
-    data8 0xBFE1C73B39AE68C8 // sin ( 29 Pi / 16 )
-    data8 0x3FEA9B66290EA1A3 // cos ( 29 Pi / 16 )
-//
-    data8 0xBFD87DE2A6AEA963 // sin ( 30 Pi / 16 )
-    data8 0x3FED906BCF328D46 // cos ( 30 Pi / 16 )
-//
-    data8 0xBFC8F8B83C69A60B // sin ( 31 Pi / 16 )
-    data8 0x3FEF6297CFF75CB0 // cos ( 31 Pi / 16 )
-//
-    data8 0x0000000000000000 // sin ( 32 Pi / 16 )
-    data8 0x3FF0000000000000 // cos ( 32 Pi / 16 )
-LOCAL_OBJECT_END(double_sin_cos_beta_k4)
+{ .mmi
+     ldfd      sin_coeff_P3   = [SIN_AD_PQ_1], 16
+(p9) cmp.le    p8,p0        = 33, sin_GR_Mint 
+     shl       sin_GR_index   =    sin_GR_index,4
+}
+;;
 
-.section .text
 
-////////////////////////////////////////////////////////
-// There are two entry points: sin and cos
-// If from sin, p8 is true
-// If from cos, p9 is true
+{     .mfi
+     setf.sig fp_tmp = gr_tmp  // Create constant such that fmpy sets inexact
+     fnma.s1   sin_r     =    f1,sin_Mfloat,SIN_NORM_f8
+(p8) cmp.eq.unc p11,p12=sin_GR_sincos_flag,r0  // p11 if must call dbl cos
+                                               // p12 if must call dbl sin
+}
+{    .mbb
+     add       SIN_AD_PQ_2    =    sin_GR_index,SIN_AD_PQ_1
+(p11) br.cond.spnt COS_DOUBLE
+(p12) br.cond.spnt SIN_DOUBLE
+}
+;;
 
-GLOBAL_IEEE754_ENTRY(sinf)
+.pred.rel "mutex",p6,p7    //SIN_Sin_Flag, SIN_Cos_Flag
+{     .mmi
+(p6) ldfpd     sin_tbl_S,sin_tbl_C =    [SIN_AD_PQ_2]
+(p7) ldfpd     sin_tbl_C,sin_tbl_S =    [SIN_AD_PQ_2]
+               nop.i                           999
+}
+;;
 
-{ .mlx
-      alloc         r32                 = ar.pfs,1,13,0,0
-      movl  sincosf_GR_sig_inv_pi_by_16 = 0xA2F9836E4E44152A //signd of 16/pi
+{     .mfi
+     nop.m                 999
+(p6) fclass.m.unc p8,p0 = f8, 0x0b // If sin, note denormal input to set uflow
+     nop.i                 999
 }
-{ .mlx
-      addl         sincosf_AD_1         = @ltoff(double_sincosf_pi), gp
-      movl  sincosf_GR_rshf_2to61       = 0x47b8000000000000 // 1.1 2^(63+63-2)
-};;
+{     .mfi
+     nop.m                 999
+     fma.s1    sin_t     =    sin_r,sin_r,f0
+     nop.i                 999
+}
+;;
 
-{ .mfi
-      ld8           sincosf_AD_1        = [sincosf_AD_1]
-      fnorm.s1      sincosf_NORM_f8     = f8     // Normalize argument
-      cmp.eq        p8,p9               = r0, r0 // set p8 (clear p9) for sin
+{     .mfi
+     nop.m                 999
+     fma.s1    sin_rcube =    sin_t,sin_r,f0
+     nop.i                 999
 }
-{ .mib
-      mov           sincosf_GR_exp_2tom61 = 0xffff-61 // exponent of scale 2^-61
-      mov           sincosf_r_sincos      = 0x0       // 0 for sin
-      br.cond.sptk  _SINCOSF_COMMON                 // go to common part
-};;
+{     .mfi
+     nop.m                 999
+     fma.s1    sin_tsq   =    sin_t,sin_t,f0
+     nop.i                 999
+}
+;;
 
-GLOBAL_IEEE754_END(sinf)
-GLOBAL_IEEE754_ENTRY(cosf)
+{     .mfi
+     nop.m                      999
+     fma.s1    sin_poly_q3    =    sin_t,sin_coeff_Q4,sin_coeff_Q3
+     nop.i                      999
+}
+{     .mfi
+     nop.m                      999
+     fma.s1    sin_poly_q5    =    sin_t,sin_coeff_Q6,sin_coeff_Q5
+     nop.i                      999
+}
+;;
 
-{ .mlx
-      alloc         r32                 = ar.pfs,1,13,0,0
-      movl  sincosf_GR_sig_inv_pi_by_16 = 0xA2F9836E4E44152A //signd of 16/pi
+{     .mfi
+     nop.m                      999
+     fma.s1    sin_poly_p1    =    sin_t,sin_coeff_P5,sin_coeff_P4
+     nop.i                      999
+}
+{     .mfi
+     nop.m                      999
+     fma.s1    sin_poly_p2    =    sin_t,sin_coeff_P2,sin_coeff_P1
+     nop.i                      999
 }
-{ .mlx
-      addl          sincosf_AD_1        = @ltoff(double_sincosf_pi), gp
-      movl  sincosf_GR_rshf_2to61       = 0x47b8000000000000 // 1.1 2^(63+63-2)
-};;
+;;
 
-{ .mfi
-      ld8           sincosf_AD_1        = [sincosf_AD_1]
-      fnorm.s1      sincosf_NORM_f8     = f8        // Normalize argument
-      cmp.eq        p9,p8               = r0, r0    // set p9 (clear p8) for cos
+{     .mfi
+     nop.m                      999
+     fma.s1    sin_poly_q1    =    sin_t,sin_coeff_Q2,sin_coeff_Q1
+     nop.i                      999
 }
-{ .mib
-      mov           sincosf_GR_exp_2tom61 = 0xffff-61 // exponent of scale 2^-61
-      mov           sincosf_r_sincos      = 0x8       // 8 for cos
-      nop.b         999
-};;
-
-////////////////////////////////////////////////////////
-// All entry points end up here.
-// If from sin, sincosf_r_sincos is 0 and p8 is true
-// If from cos, sincosf_r_sincos is 8 = 2^(k-1) and p9 is true
-// We add sincosf_r_sincos to N
-
-///////////// Common sin and cos part //////////////////
-_SINCOSF_COMMON:
-
-//  Form two constants we need
-//  16/pi * 2^-2 * 2^63, scaled by 2^61 since we just loaded the significand
-//  1.1000...000 * 2^(63+63-2) to right shift int(W) into the low significand
-//  fcmp used to set denormal, and invalid on snans
-{ .mfi
-      setf.sig      sincosf_SIG_INV_PI_BY_16_2TO61 = sincosf_GR_sig_inv_pi_by_16
-      fclass.m      p6,p0                          = f8, 0xe7 // if x=0,inf,nan
-      mov           sincosf_exp_limit              = 0x10017
-}
-{ .mlx
-      setf.d        sincosf_RSHF_2TO61  = sincosf_GR_rshf_2to61
-      movl          sincosf_GR_rshf     = 0x43e8000000000000 // 1.1000 2^63
-};;                                                          // Right shift
-
-//  Form another constant
-//  2^-61 for scaling Nfloat
-//  0x10017 is register_bias + 24.
-//  So if f8 >= 2^24, go to large argument routines
-{ .mmi
-      getf.exp      sincosf_r_signexp   = f8
-      setf.exp      sincosf_2TOM61      = sincosf_GR_exp_2tom61
-      addl          gr_tmp              = -1,r0 // For "inexect" constant create
-};;
-
-// Load the two pieces of pi/16
-// Form another constant
-//  1.1000...000 * 2^63, the right shift constant
-{ .mmb
-      ldfe          sincosf_Pi_by_16_1  = [sincosf_AD_1],16
-      setf.d        sincosf_RSHF        = sincosf_GR_rshf
-(p6)  br.cond.spnt  _SINCOSF_SPECIAL_ARGS
-};;
+{     .mfi
+     nop.m                      999
+     fma.s1    sin_S_t   =    sin_t,sin_tbl_S,f0
+     nop.i                      999
+}
+;;
 
-// Getting argument's exp for "large arguments" filtering
-{ .mmi
-      ldfe          sincosf_Pi_by_16_2  = [sincosf_AD_1],16
-      setf.sig      fp_tmp              = gr_tmp // constant for inexact set
-      nop.i         999
-};;
+{     .mfi
+     nop.m                 999
+(p8) fmpy.s.s0 fp_tmp2 = f8,f8  // Dummy mult to set underflow if sin(denormal)
+     nop.i                 999
+}
+{     .mfi
+     nop.m                 999
+     fma.s1    sin_r7    =    sin_rcube,sin_tsq,f0
+     nop.i                 999
+}
+;;
 
-// Polynomial coefficients (Q2, Q1, P2, P1) loading
-{ .mmi
-      ldfpd         sincosf_P2,sincosf_Q2 = [sincosf_AD_1],16
-      nop.m         999 
-      nop.i         999 
-};;
+{     .mfi
+     nop.m                      999
+     fma.s1    sin_poly_q3456 =    sin_tsq,sin_poly_q5,sin_poly_q3
+     nop.i                      999
+}
+;;
 
-// Select exponent (17 lsb)
-{ .mmi
-      ldfpd         sincosf_P1,sincosf_Q1 = [sincosf_AD_1],16
-      nop.m         999 
-      dep.z         sincosf_r_exp         = sincosf_r_signexp, 0, 17
-};;
+{     .mfi
+     nop.m                      999
+     fma.s1    sin_poly_p3    =    sin_t,sin_poly_p1,sin_coeff_P3
+     nop.i                      999
+}
+{     .mfi
+     nop.m                      999
+     fma.s1    sin_poly_p4    =    sin_rcube,sin_poly_p2,sin_r
+     nop.i                      999
+}
+;;
 
-// p10 is true if we must call routines to handle larger arguments
-// p10 is true if f8 exp is >= 0x10017 (2^24)
-{ .mfb
-      cmp.ge        p10,p0              = sincosf_r_exp,sincosf_exp_limit
-      nop.f         999
-(p10) br.cond.spnt  _SINCOSF_LARGE_ARGS // Go to "large args" routine
-};;
-
-// sincosf_W          = x * sincosf_Inv_Pi_by_16
-// Multiply x by scaled 16/pi and add large const to shift integer part of W to
-//   rightmost bits of significand
-{ .mfi
-      nop.m         999 
-      fma.s1 sincosf_W_2TO61_RSH = sincosf_NORM_f8, sincosf_SIG_INV_PI_BY_16_2TO61, sincosf_RSHF_2TO61
-      nop.i         999 
-};;
+{     .mfi
+     nop.m                           999
+     fma.s1    sin_tbl_S_tcube     =    sin_S_t,sin_tsq,f0
+     nop.i                           999
+}
+{     .mfi
+     nop.m                      999
+     fma.s1    sin_poly_q12   =    sin_S_t,sin_poly_q1,sin_tbl_S
+     nop.i                      999
+}
+;;
 
-// sincosf_NFLOAT = Round_Int_Nearest(sincosf_W)
-// This is done by scaling back by 2^-61 and subtracting the shift constant
-{ .mfi
-      nop.m         999
-      fms.s1 sincosf_NFLOAT = sincosf_W_2TO61_RSH,sincosf_2TOM61,sincosf_RSHF
-      nop.i         999 
-};;
+{     .mfi
+     nop.m                 999
+     fma.d.s1  sin_of_r  =    sin_r7,sin_poly_p3,sin_poly_p4
+     nop.i                 999
+}
+;;
 
-// get N = (int)sincosf_int_Nfloat
-{ .mfi
-      getf.sig      sincosf_GR_n        = sincosf_W_2TO61_RSH // integer N value
-      nop.f         999
-      nop.i         999 
-};;
+{     .mfi
+     nop.m                           999
+     fma.d.s1  sin_tbl_S_cos_of_r  =    sin_tbl_S_tcube,sin_poly_q3456,sin_poly_q12
+     nop.i                           999
+}
+{     .mfi
+     nop.m                           999
+     fmpy.s0   fp_tmp = fp_tmp, fp_tmp  // Dummy mult to set inexact
+     nop.i                           999
+}
+;;
 
-// Add 2^(k-1) (which is in sincosf_r_sincos=8) to N
-// sincosf_r          = -sincosf_Nfloat * sincosf_Pi_by_16_1 + x
-{ .mfi
-      add           sincosf_GR_n        = sincosf_GR_n, sincosf_r_sincos
-      fnma.s1 sincosf_r = sincosf_NFLOAT, sincosf_Pi_by_16_1, sincosf_NORM_f8
-      nop.i         999 
-};;
 
-// Get M (least k+1 bits of N)
-{ .mmi
-      and           sincosf_GR_m        = 0x1f,sincosf_GR_n // Put mask 0x1F  - 
-      nop.m         999                                     // - select k+1 bits
-      nop.i         999
-};;
+.pred.rel "mutex",p6,p7    //SIN_Sin_Flag, SIN_Cos_Flag
+{     .mfi
+               nop.m            999
+//(SIN_Sin_Flag) fma.s     f8   =    sin_tbl_C,sin_of_r,sin_tbl_S_cos_of_r
+(p6) fma.s     f8   =    sin_tbl_C,sin_of_r,sin_tbl_S_cos_of_r
+               nop.i            999
+}
+{     .mfb
+               nop.m            999
+//(SIN_Cos_Flag) fnma.s    f8   =    sin_tbl_C,sin_of_r,sin_tbl_S_cos_of_r
+(p7) fnma.s    f8   =    sin_tbl_C,sin_of_r,sin_tbl_S_cos_of_r
+               br.ret.sptk     b0
+}
 
-// Add 16*M to address of sin_cos_beta table
-{ .mfi
-      shladd        sincosf_AD_2        = sincosf_GR_32m, 4, sincosf_AD_1
-(p8)  fclass.m.unc  p10,p0              = f8,0x0b  // If sin denormal input -
-      nop.i         999 
-};;
+.endp sinf
+ASM_SIZE_DIRECTIVE(sinf)
 
-// Load Sin and Cos table value using obtained index m  (sincosf_AD_2)
-{ .mfi
-      ldfd          sincosf_Sm          = [sincosf_AD_2],8 // Sin value S[m]
-(p9)  fclass.m.unc  p11,p0              = f8,0x0b  // If cos denormal input -
-      nop.i         999                            // - set denormal
-};;
 
-// sincosf_r          = sincosf_r -sincosf_Nfloat * sincosf_Pi_by_16_2
+.proc SIN_DOUBLE 
+SIN_DOUBLE:
+.prologue
 { .mfi
-      ldfd          sincosf_Cm          = [sincosf_AD_2] // Cos table value C[m]
-      fnma.s1  sincosf_r_exact = sincosf_NFLOAT, sincosf_Pi_by_16_2, sincosf_r
-      nop.i         999
+        nop.m 0
+        nop.f 0
+.save   ar.pfs,GR_SAVE_PFS
+        mov  GR_SAVE_PFS=ar.pfs
 }
-// get rsq = r*r
-{ .mfi
-      nop.m         999
-      fma.s1        sincosf_rsq         = sincosf_r, sincosf_r,  f0 // r^2 = r*r
-      nop.i         999
-};;
+;;
 
 { .mfi
-      nop.m         999
-      fmpy.s0       fp_tmp              = fp_tmp, fp_tmp // forces inexact flag
-      nop.i         999                                
-};;
-
-// Polynomials calculation 
-// Q = Q2*r^2 + Q1
-// P = P2*r^2 + P1
-{ .mfi
-      nop.m         999
-      fma.s1        sincosf_Q           = sincosf_rsq, sincosf_Q2, sincosf_Q1
-      nop.i         999
+        mov GR_SAVE_GP=gp
+        nop.f 0
+.save   b0, GR_SAVE_B0
+        mov GR_SAVE_B0=b0
 }
-{ .mfi
-      nop.m         999
-      fma.s1        sincosf_P           = sincosf_rsq, sincosf_P2, sincosf_P1
-      nop.i         999 
-};;
 
-// get rcube and S[m]*r^2
-{ .mfi
-      nop.m         999
-      fmpy.s1       sincosf_srsq        = sincosf_Sm,sincosf_rsq // r^2*S[m]
-      nop.i         999
+.body
+{ .mmb
+       nop.m 999
+       nop.m 999
+       br.call.sptk.many   b0=sin 
 }
+;;
+
 { .mfi
-      nop.m         999
-      fmpy.s1       sincosf_rcub        = sincosf_r_exact, sincosf_rsq
-      nop.i         999 
-};;
-
-// Get final P and Q
-// Q = Q*S[m]*r^2 + S[m]
-// P = P*r^3 + r
-{ .mfi
-      nop.m         999
-      fma.s1        sincosf_Q           = sincosf_srsq,sincosf_Q, sincosf_Sm
-      nop.i         999
+       mov gp        = GR_SAVE_GP
+       nop.f 999
+       mov b0        = GR_SAVE_B0
 }
-{ .mfi
-      nop.m         999
-      fma.s1        sincosf_P           = sincosf_rcub,sincosf_P,sincosf_r_exact
-      nop.i         999 
-};;
+;;
 
-// If sinf(denormal) - force underflow to be set
-.pred.rel "mutex",p10,p11
 { .mfi
-      nop.m         999
-(p10) fmpy.s.s0     fp_tmp              = f8,f8 // forces underflow flag
-      nop.i         999                         // for denormal sine args
+      nop.m 999
+      fma.s f8 = f8,f1,f0
+(p0)  mov ar.pfs    = GR_SAVE_PFS
 }
-// If cosf(denormal) - force denormal to be set
-{ .mfi
-      nop.m         999
-(p11) fma.s.s0     fp_tmp              = f8, f1, f8 // forces denormal flag
-      nop.i         999                              // for denormal cosine args
-};;
+{ .mib
+      nop.m 999
+      nop.i 999
+(p0)  br.ret.sptk     b0 
+}
+;;
 
+.endp  SIN_DOUBLE
+ASM_SIZE_DIRECTIVE(SIN_DOUBLE)
 
-// Final calculation
-// result = C[m]*P + Q
-{ .mfb
-      nop.m         999
-      fma.s.s0      f8                  = sincosf_Cm, sincosf_P, sincosf_Q
-      br.ret.sptk   b0 // Exit for common path
-};;
-
-////////// x = 0/Inf/NaN path //////////////////
-_SINCOSF_SPECIAL_ARGS:
-.pred.rel "mutex",p8,p9
-// sinf(+/-0) = +/-0
-// sinf(Inf)  = NaN
-// sinf(NaN)  = NaN
-{ .mfi
-      nop.m         999
-(p8)  fma.s.s0      f8                  = f8, f0, f0 // sinf(+/-0,NaN,Inf)
-      nop.i         999
-}
-// cosf(+/-0) = 1.0
-// cosf(Inf)  = NaN
-// cosf(NaN)  = NaN
-{ .mfb
-      nop.m         999
-(p9)  fma.s.s0      f8                  = f8, f0, f1 // cosf(+/-0,NaN,Inf)
-      br.ret.sptk   b0 // Exit for x = 0/Inf/NaN path
-};;
-
-GLOBAL_IEEE754_END(cosf)
-//////////// x >= 2^24 - large arguments routine call ////////////
-LOCAL_LIBM_ENTRY(__libm_callout_sincosf)
-_SINCOSF_LARGE_ARGS:
+
+.proc COS_DOUBLE 
+COS_DOUBLE:
 .prologue
 { .mfi
-      mov           sincosf_GR_all_ones = -1 // 0xffffffff
-      nop.f         999
-.save ar.pfs,GR_SAVE_PFS
-      mov           GR_SAVE_PFS         = ar.pfs
+        nop.m 0
+        nop.f 0
+.save   ar.pfs,GR_SAVE_PFS
+        mov  GR_SAVE_PFS=ar.pfs
 }
 ;;
 
 { .mfi
-      mov           GR_SAVE_GP          = gp
-      nop.f         999
-.save b0, GR_SAVE_B0
-      mov           GR_SAVE_B0          = b0
+        mov GR_SAVE_GP=gp
+        nop.f 0
+.save   b0, GR_SAVE_B0
+        mov GR_SAVE_B0=b0
 }
-.body
 
-{ .mbb
-      setf.sig      sincosf_save_tmp    = sincosf_GR_all_ones  // inexact set
-      nop.b         999
-(p8)  br.call.sptk.many b0              = __libm_sin_large# // sinf(large_X)
-};;
-
-{ .mbb
-      cmp.ne        p9,p0               = sincosf_r_sincos, r0 // set p9 if cos
-      nop.b         999
-(p9)  br.call.sptk.many b0              = __libm_cos_large# // cosf(large_X)
-};;
+.body
+{ .mmb
+       nop.m 999
+       nop.m 999
+       br.call.sptk.many   b0=cos 
+}
+;;
 
 { .mfi
-      mov           gp                  = GR_SAVE_GP
-      fma.s.s0      f8                  = f8, f1, f0 // Round result to single
-      mov           b0                  = GR_SAVE_B0
+       mov gp        = GR_SAVE_GP
+       nop.f 999
+       mov b0        = GR_SAVE_B0
 }
-{ .mfi // force inexact set
-      nop.m         999
-      fmpy.s0       sincosf_save_tmp    = sincosf_save_tmp, sincosf_save_tmp 
-      nop.i         999                                               
-};;
+;;
 
+{ .mfi
+      nop.m 999
+      fma.s f8 = f8,f1,f0
+(p0)  mov ar.pfs    = GR_SAVE_PFS
+}
 { .mib
-      nop.m         999
-      mov           ar.pfs              = GR_SAVE_PFS
-      br.ret.sptk   b0 // Exit for large arguments routine call
-};;
-LOCAL_LIBM_END(__libm_callout_sincosf)
+      nop.m 999
+      nop.i 999
+(p0)  br.ret.sptk     b0 
+}
+;;
+
+.endp  COS_DOUBLE
+ASM_SIZE_DIRECTIVE(COS_DOUBLE)
+
 
-.type    __libm_sin_large#, @function
-.global  __libm_sin_large#
-.type    __libm_cos_large#, @function
-.global  __libm_cos_large#
 
+.type sin,@function
+.global sin 
+.type cos,@function
+.global cos 
diff --git a/sysdeps/ia64/fpu/s_cosl.S b/sysdeps/ia64/fpu/s_cosl.S
index 374e822256..2755580c0d 100644
--- a/sysdeps/ia64/fpu/s_cosl.S
+++ b/sysdeps/ia64/fpu/s_cosl.S
@@ -1,10 +1,10 @@
 .file "sincosl.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,81 +20,76 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
 // Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
 //
-//*********************************************************************
+// *********************************************************************
 //
-// History:
-// 02/02/00 (hand-optimized)
-// 04/04/00 Unwind support added
-// 07/30/01 Improved speed on all paths
-// 08/20/01 Fixed bundling typo
-// 05/13/02 Changed interface to __libm_pi_by_2_reduce
-// 02/10/03 Reordered header: .section, .global, .proc, .align;
-//          used data8 for long double table values
+// History: 
+// 2/02/2000 (hand-optimized)
+// 4/04/00  Unwind support added
 //
-//*********************************************************************
+// *********************************************************************
 //
 // Function:   Combined sinl(x) and cosl(x), where
 //
 //             sinl(x) = sine(x), for double-extended precision x values
 //             cosl(x) = cosine(x), for double-extended precision x values
 //
-//*********************************************************************
+// *********************************************************************
 //
 // Resources Used:
 //
-//    Floating-Point Registers: f8 (Input and Return Value)
+//    Floating-Point Registers: f8 (Input and Return Value) 
 //                              f32-f99
 //
 //    General Purpose Registers:
-//      r32-r43
+//      r32-r43 
 //      r44-r45 (Used to pass arguments to pi_by_2 reduce routine)
 //
 //    Predicate Registers:      p6-p13
 //
-//*********************************************************************
+// *********************************************************************
 //
 //  IEEE Special Conditions:
 //
 //    Denormal  fault raised on denormal inputs
 //    Overflow exceptions do not occur
-//    Underflow exceptions raised when appropriate for sin
+//    Underflow exceptions raised when appropriate for sin 
 //    (No specialized error handling for this routine)
 //    Inexact raised when appropriate by algorithm
 //
 //    sinl(SNaN) = QNaN
 //    sinl(QNaN) = QNaN
-//    sinl(inf) = QNaN
+//    sinl(inf) = QNaN 
 //    sinl(+/-0) = +/-0
-//    cosl(inf) = QNaN
+//    cosl(inf) = QNaN 
 //    cosl(SNaN) = QNaN
 //    cosl(QNaN) = QNaN
 //    cosl(0) = 1
-//
-//*********************************************************************
+// 
+// *********************************************************************
 //
 //  Mathematical Description
 //  ========================
 //
-//  The computation of FSIN and FCOS is best handled in one piece of
-//  code. The main reason is that given any argument Arg, computation
-//  of trigonometric functions first calculate N and an approximation
+//  The computation of FSIN and FCOS is best handled in one piece of 
+//  code. The main reason is that given any argument Arg, computation 
+//  of trigonometric functions first calculate N and an approximation 
 //  to alpha where
 //
 //  Arg = N pi/2 + alpha, |alpha| <= pi/4.
@@ -103,62 +98,62 @@
 //
 //  cosl( Arg ) = sinl( (N+1) pi/2 + alpha ),
 //
-//  therefore, the code for computing sine will produce cosine as long
-//  as 1 is added to N immediately after the argument reduction
+//  therefore, the code for computing sine will produce cosine as long 
+//  as 1 is added to N immediately after the argument reduction 
 //  process.
 //
 //  Let M = N if sine
-//      N+1 if cosine.
+//      N+1 if cosine.  
 //
 //  Now, given
 //
 //  Arg = M pi/2  + alpha, |alpha| <= pi/4,
 //
-//  let I = M mod 4, or I be the two lsb of M when M is represented
+//  let I = M mod 4, or I be the two lsb of M when M is represented 
 //  as 2's complement. I = [i_0 i_1]. Then
 //
-//  sinl( Arg ) = (-1)^i_0  sinl( alpha )        if i_1 = 0,
+//  sinl( Arg ) = (-1)^i_0  sinl( alpha )	if i_1 = 0,
 //             = (-1)^i_0  cosl( alpha )     if i_1 = 1.
 //
 //  For example:
-//       if M = -1, I = 11
+//       if M = -1, I = 11   
 //         sin ((-pi/2 + alpha) = (-1) cos (alpha)
-//       if M = 0, I = 00
+//       if M = 0, I = 00   
 //         sin (alpha) = sin (alpha)
-//       if M = 1, I = 01
+//       if M = 1, I = 01   
 //         sin (pi/2 + alpha) = cos (alpha)
-//       if M = 2, I = 10
+//       if M = 2, I = 10   
 //         sin (pi + alpha) = (-1) sin (alpha)
-//       if M = 3, I = 11
+//       if M = 3, I = 11   
 //         sin ((3/2)pi + alpha) = (-1) cos (alpha)
 //
-//  The value of alpha is obtained by argument reduction and
+//  The value of alpha is obtained by argument reduction and 
 //  represented by two working precision numbers r and c where
 //
 //  alpha =  r  +  c     accurately.
 //
 //  The reduction method is described in a previous write up.
-//  The argument reduction scheme identifies 4 cases. For Cases 2
-//  and 4, because |alpha| is small, sinl(r+c) and cosl(r+c) can be
-//  computed very easily by 2 or 3 terms of the Taylor series
+//  The argument reduction scheme identifies 4 cases. For Cases 2 
+//  and 4, because |alpha| is small, sinl(r+c) and cosl(r+c) can be 
+//  computed very easily by 2 or 3 terms of the Taylor series 
 //  expansion as follows:
 //
 //  Case 2:
 //  -------
 //
-//  sinl(r + c) = r + c - r^3/6        accurately
-//  cosl(r + c) = 1 - 2^(-67)        accurately
+//  sinl(r + c) = r + c - r^3/6	accurately
+//  cosl(r + c) = 1 - 2^(-67)	accurately
 //
 //  Case 4:
 //  -------
 //
-//  sinl(r + c) = r + c - r^3/6 + r^5/120        accurately
-//  cosl(r + c) = 1 - r^2/2 + r^4/24                accurately
+//  sinl(r + c) = r + c - r^3/6 + r^5/120	accurately
+//  cosl(r + c) = 1 - r^2/2 + r^4/24		accurately
 //
-//  The only cases left are Cases 1 and 3 of the argument reduction
-//  procedure. These two cases will be merged since after the
-//  argument is reduced in either cases, we have the reduced argument
-//  represented as r + c and that the magnitude |r + c| is not small
+//  The only cases left are Cases 1 and 3 of the argument reduction 
+//  procedure. These two cases will be merged since after the 
+//  argument is reduced in either cases, we have the reduced argument 
+//  represented as r + c and that the magnitude |r + c| is not small 
 //  enough to allow the usage of a very short approximation.
 //
 //  The required calculation is either
@@ -168,32 +163,32 @@
 //
 //  Specifically,
 //
-//        sinl(r + c) = sinl(r) + c sin'(r) + O(c^2)
-//                   = sinl(r) + c cos (r) + O(c^2)
-//                   = sinl(r) + c(1 - r^2/2)  accurately.
+//	sinl(r + c) = sinl(r) + c sin'(r) + O(c^2)
+//		   = sinl(r) + c cos (r) + O(c^2)
+//		   = sinl(r) + c(1 - r^2/2)  accurately.
 //  Similarly,
 //
-//        cosl(r + c) = cosl(r) - c sinl(r) + O(c^2)
-//                   = cosl(r) - c(r - r^3/6)  accurately.
+//	cosl(r + c) = cosl(r) - c sinl(r) + O(c^2)
+//		   = cosl(r) - c(r - r^3/6)  accurately.
 //
-//  We therefore concentrate on accurately calculating sinl(r) and
+//  We therefore concentrate on accurately calculating sinl(r) and 
 //  cosl(r) for a working-precision number r, |r| <= pi/4 to within
 //  0.1% or so.
 //
-//  The greatest challenge of this task is that the second terms of
+//  The greatest challenge of this task is that the second terms of 
 //  the Taylor series
-//
-//        r - r^3/3! + r^r/5! - ...
+//	
+//	r - r^3/3! + r^r/5! - ...
 //
 //  and
 //
-//        1 - r^2/2! + r^4/4! - ...
+//	1 - r^2/2! + r^4/4! - ...
 //
-//  are not very small when |r| is close to pi/4 and the rounding
-//  errors will be a concern if simple polynomial accumulation is
-//  used. When |r| < 2^-3, however, the second terms will be small
-//  enough (6 bits or so of right shift) that a normal Horner
-//  recurrence suffices. Hence there are two cases that we consider
+//  are not very small when |r| is close to pi/4 and the rounding 
+//  errors will be a concern if simple polynomial accumulation is 
+//  used. When |r| < 2^-3, however, the second terms will be small 
+//  enough (6 bits or so of right shift) that a normal Horner 
+//  recurrence suffices. Hence there are two cases that we consider 
 //  in the accurate computation of sinl(r) and cosl(r), |r| <= pi/4.
 //
 //  Case small_r: |r| < 2^(-3)
@@ -202,88 +197,88 @@
 //  Since Arg = M pi/4 + r + c accurately, and M mod 4 is [i_0 i_1],
 //  we have
 //
-//        sinl(Arg) = (-1)^i_0 * sinl(r + c)        if i_1 = 0
-//                 = (-1)^i_0 * cosl(r + c)         if i_1 = 1
+//	sinl(Arg) = (-1)^i_0 * sinl(r + c)	if i_1 = 0
+//		 = (-1)^i_0 * cosl(r + c) 	if i_1 = 1
 //
 //  can be accurately approximated by
 //
-//  sinl(Arg) = (-1)^i_0 * [sinl(r) + c]        if i_1 = 0
+//  sinl(Arg) = (-1)^i_0 * [sinl(r) + c]	if i_1 = 0
 //           = (-1)^i_0 * [cosl(r) - c*r] if i_1 = 1
 //
-//  because |r| is small and thus the second terms in the correction
+//  because |r| is small and thus the second terms in the correction 
 //  are unneccessary.
 //
-//  Finally, sinl(r) and cosl(r) are approximated by polynomials of
+//  Finally, sinl(r) and cosl(r) are approximated by polynomials of 
 //  moderate lengths.
 //
 //  sinl(r) =  r + S_1 r^3 + S_2 r^5 + ... + S_5 r^11
 //  cosl(r) =  1 + C_1 r^2 + C_2 r^4 + ... + C_5 r^10
 //
-//  We can make use of predicates to selectively calculate
-//  sinl(r) or cosl(r) based on i_1.
+//  We can make use of predicates to selectively calculate 
+//  sinl(r) or cosl(r) based on i_1. 
 //
 //  Case normal_r: 2^(-3) <= |r| <= pi/4
 //  ------------------------------------
 //
 //  This case is more likely than the previous one if one considers
 //  r to be uniformly distributed in [-pi/4 pi/4]. Again,
+// 
+//  sinl(Arg) = (-1)^i_0 * sinl(r + c)	if i_1 = 0
+//           = (-1)^i_0 * cosl(r + c) 	if i_1 = 1.
 //
-//  sinl(Arg) = (-1)^i_0 * sinl(r + c)        if i_1 = 0
-//           = (-1)^i_0 * cosl(r + c)         if i_1 = 1.
-//
-//  Because |r| is now larger, we need one extra term in the
+//  Because |r| is now larger, we need one extra term in the 
 //  correction. sinl(Arg) can be accurately approximated by
 //
 //  sinl(Arg) = (-1)^i_0 * [sinl(r) + c(1-r^2/2)]      if i_1 = 0
 //           = (-1)^i_0 * [cosl(r) - c*r*(1 - r^2/6)]    i_1 = 1.
 //
-//  Finally, sinl(r) and cosl(r) are approximated by polynomials of
+//  Finally, sinl(r) and cosl(r) are approximated by polynomials of 
 //  moderate lengths.
 //
-//        sinl(r) =  r + PP_1_hi r^3 + PP_1_lo r^3 +
-//                      PP_2 r^5 + ... + PP_8 r^17
+//	sinl(r) =  r + PP_1_hi r^3 + PP_1_lo r^3 + 
+//	              PP_2 r^5 + ... + PP_8 r^17
 //
-//        cosl(r) =  1 + QQ_1 r^2 + QQ_2 r^4 + ... + QQ_8 r^16
+//	cosl(r) =  1 + QQ_1 r^2 + QQ_2 r^4 + ... + QQ_8 r^16
 //
-//  where PP_1_hi is only about 16 bits long and QQ_1 is -1/2.
-//  The crux in accurate computation is to calculate
+//  where PP_1_hi is only about 16 bits long and QQ_1 is -1/2. 
+//  The crux in accurate computation is to calculate 
 //
 //  r + PP_1_hi r^3   or  1 + QQ_1 r^2
 //
-//  accurately as two pieces: U_hi and U_lo. The way to achieve this
-//  is to obtain r_hi as a 10 sig. bit number that approximates r to
+//  accurately as two pieces: U_hi and U_lo. The way to achieve this 
+//  is to obtain r_hi as a 10 sig. bit number that approximates r to 
 //  roughly 8 bits or so of accuracy. (One convenient way is
 //
 //  r_hi := frcpa( frcpa( r ) ).)
 //
 //  This way,
 //
-//        r + PP_1_hi r^3 =  r + PP_1_hi r_hi^3 +
-//                                PP_1_hi (r^3 - r_hi^3)
-//                        =  [r + PP_1_hi r_hi^3]  +
-//                           [PP_1_hi (r - r_hi)
-//                              (r^2 + r_hi r + r_hi^2) ]
-//                        =  U_hi  +  U_lo
+//	r + PP_1_hi r^3 =  r + PP_1_hi r_hi^3 +
+//	                        PP_1_hi (r^3 - r_hi^3)
+//		        =  [r + PP_1_hi r_hi^3]  +  
+//			   [PP_1_hi (r - r_hi) 
+//			      (r^2 + r_hi r + r_hi^2) ]
+//		        =  U_hi  +  U_lo
 //
 //  Since r_hi is only 10 bit long and PP_1_hi is only 16 bit long,
-//  PP_1_hi * r_hi^3 is only at most 46 bit long and thus computed
-//  exactly. Furthermore, r and PP_1_hi r_hi^3 are of opposite sign
-//  and that there is no more than 8 bit shift off between r and
-//  PP_1_hi * r_hi^3. Hence the sum, U_hi, is representable and thus
-//  calculated without any error. Finally, the fact that
+//  PP_1_hi * r_hi^3 is only at most 46 bit long and thus computed 
+//  exactly. Furthermore, r and PP_1_hi r_hi^3 are of opposite sign 
+//  and that there is no more than 8 bit shift off between r and 
+//  PP_1_hi * r_hi^3. Hence the sum, U_hi, is representable and thus 
+//  calculated without any error. Finally, the fact that 
 //
-//        |U_lo| <= 2^(-8) |U_hi|
+//	|U_lo| <= 2^(-8) |U_hi|
 //
-//  says that U_hi + U_lo is approximating r + PP_1_hi r^3 to roughly
+//  says that U_hi + U_lo is approximating r + PP_1_hi r^3 to roughly 
 //  8 extra bits of accuracy.
 //
 //  Similarly,
 //
-//        1 + QQ_1 r^2  =  [1 + QQ_1 r_hi^2]  +
-//                            [QQ_1 (r - r_hi)(r + r_hi)]
-//                      =  U_hi  +  U_lo.
-//
-//  Summarizing, we calculate r_hi = frcpa( frcpa( r ) ).
+//	1 + QQ_1 r^2  =  [1 + QQ_1 r_hi^2]  +
+//	                    [QQ_1 (r - r_hi)(r + r_hi)]
+//		      =  U_hi  +  U_lo.
+//		      
+//  Summarizing, we calculate r_hi = frcpa( frcpa( r ) ). 
 //
 //  If i_1 = 0, then
 //
@@ -302,35 +297,35 @@
 //  End
 //
 //  Finally,
-//
-//        V := poly + ( U_lo + correction )
+// 
+//	V := poly + ( U_lo + correction )
 //
 //                 /    U_hi  +  V         if i_0 = 0
-//        result := |
+//	result := |
 //                 \  (-U_hi) -  V         if i_0 = 1
 //
-//  It is important that in the last step, negation of U_hi is
-//  performed prior to the subtraction which is to be performed in
-//  the user-set rounding mode.
+//  It is important that in the last step, negation of U_hi is 
+//  performed prior to the subtraction which is to be performed in 
+//  the user-set rounding mode. 
 //
 //
 //  Algorithmic Description
 //  =======================
 //
-//  The argument reduction algorithm is tightly integrated into FSIN
-//  and FCOS which share the same code. The following is complete and
-//  self-contained. The argument reduction description given
+//  The argument reduction algorithm is tightly integrated into FSIN 
+//  and FCOS which share the same code. The following is complete and 
+//  self-contained. The argument reduction description given 
 //  previously is repeated below.
 //
 //
-//  Step 0. Initialization.
+//  Step 0. Initialization. 
 //
 //   If FSIN is invoked, set N_inc := 0; else if FCOS is invoked,
 //   set N_inc := 1.
 //
 //  Step 1. Check for exceptional and special cases.
 //
-//   * If Arg is +-0, +-inf, NaN, NaT, go to Step 10 for special
+//   * If Arg is +-0, +-inf, NaN, NaT, go to Step 10 for special 
 //     handling.
 //   * If |Arg| < 2^24, go to Step 2 for reduction of moderate
 //     arguments. This is the most likely case.
@@ -340,18 +335,18 @@
 //
 //  Step 2. Reduction of moderate arguments.
 //
-//  If |Arg| < pi/4         ...quick branch
-//     N_fix := N_inc        (integer)
+//  If |Arg| < pi/4 	...quick branch
+//     N_fix := N_inc	(integer)
 //     r     := Arg
 //     c     := 0.0
 //     Branch to Step 4, Case_1_complete
-//  Else                 ...cf. argument reduction
-//     N     := Arg * two_by_PI        (fp)
-//     N_fix := fcvt.fx( N )        (int)
+//  Else 		...cf. argument reduction
+//     N     := Arg * two_by_PI	(fp)
+//     N_fix := fcvt.fx( N )	(int)
 //     N     := fcvt.xf( N_fix )
 //     N_fix := N_fix + N_inc
-//     s     := Arg - N * P_1        (first piece of pi/2)
-//     w     := -N * P_2        (second piece of pi/2)
+//     s     := Arg - N * P_1	(first piece of pi/2)
+//     w     := -N * P_2	(second piece of pi/2)
 //
 //     If |s| >= 2^(-33)
 //        go to Step 3, Case_1_reduce
@@ -363,8 +358,8 @@
 //  Step 3. Case_1_reduce.
 //
 //  r := s + w
-//  c := (s - r) + w        ...observe order
-//
+//  c := (s - r) + w	...observe order
+//   
 //  Step 4. Case_1_complete
 //
 //  ...At this point, the reduced argument alpha is
@@ -380,17 +375,17 @@
 //
 //  If i_1 = 0, then
 //    poly := r*FR_rsq*(PP_1_lo + FR_rsq*(PP_2 + ... FR_rsq*PP_8))
-//    U_hi := r + PP_1_hi*r_hi*r_hi*r_hi        ...any order
+//    U_hi := r + PP_1_hi*r_hi*r_hi*r_hi	...any order
 //    U_lo := PP_1_hi*r_lo*(r*r + r*r_hi + r_hi*r_hi)
-//    correction := c + c*C_1*FR_rsq                ...any order
+//    correction := c + c*C_1*FR_rsq		...any order
 //  Else
 //    poly := FR_rsq*FR_rsq*(QQ_2 + FR_rsq*(QQ_3 + ... + FR_rsq*QQ_8))
-//    U_hi := 1 + QQ_1 * r_hi * r_hi                ...any order
+//    U_hi := 1 + QQ_1 * r_hi * r_hi		...any order
 //    U_lo := QQ_1 * r_lo * (r + r_hi)
-//    correction := -c*(r + S_1*FR_rsq*r)        ...any order
+//    correction := -c*(r + S_1*FR_rsq*r)	...any order
 //  Endif
 //
-//  V := poly + (U_lo + correction)        ...observe order
+//  V := poly + (U_lo + correction)	...observe order
 //
 //  result := (i_0 == 0?   1.0 : -1.0)
 //
@@ -402,7 +397,7 @@
 //  Return
 //
 //  Step 6. Small_r.
-//
+// 
 //  ...Use flush to zero mode without causing exception
 //    Let [i_0 i_1] be the two lsb of N_fix.
 //
@@ -417,7 +412,7 @@
 //  Else
 //     z := FR_rsq*FR_rsq; z := FR_rsq*z
 //     poly_lo := C_3 + FR_rsq*(C_4 + FR_rsq*C_5)
-//     poly_hi := FR_rsq*(C_1 + FR_rsq*C_2)
+//     poly_hi := FR_rsq*(C_1 + FR_rsq*C_2) 
 //     correction := -c*r
 //     result := 1
 //  Endif
@@ -434,15 +429,15 @@
 //
 //  Step 7. Case_2_reduce.
 //
-//  ...Refer to the write up for argument reduction for
+//  ...Refer to the write up for argument reduction for 
 //  ...rationale. The reduction algorithm below is taken from
 //  ...argument reduction description and integrated this.
 //
 //  w := N*P_3
-//  U_1 := N*P_2 + w                ...FMA
-//  U_2 := (N*P_2 - U_1) + w        ...2 FMA
+//  U_1 := N*P_2 + w		...FMA
+//  U_2 := (N*P_2 - U_1) + w	...2 FMA
 //  ...U_1 + U_2 is  N*(P_2+P_3) accurately
-//
+//   
 //  r := s - U_1
 //  c := ( (s - r) - U_1 ) - U_2
 //
@@ -451,29 +446,29 @@
 //  ...Case 1, this case requires much more work to reduce
 //  ...the argument, the subsequent calculation needed for
 //  ...any of the trigonometric function is very little because
-//  ...|alpha| < 1.01*2^(-33) and thus two terms of the
+//  ...|alpha| < 1.01*2^(-33) and thus two terms of the 
 //  ...Taylor series expansion suffices.
 //
 //  If i_1 = 0 then
-//     poly := c + S_1 * r * r * r        ...any order
+//     poly := c + S_1 * r * r * r	...any order
 //     result := r
 //  Else
 //     poly := -2^(-67)
 //     result := 1.0
 //  Endif
-//
+//   
 //  If i_0 = 1, result := -result
 //
 //  Last operation. Perform in user-set rounding mode
 //
 //  result := (i_0 == 0?     result + poly :
 //                           result - poly )
-//
+//   
 //  Return
 //
-//
+//  
 //  Step 8. Pre-reduction of large arguments.
-//
+// 
 //  ...Again, the following reduction procedure was described
 //  ...in the separate write up for argument reduction, which
 //  ...is tightly integrated here.
@@ -481,13 +476,13 @@
 //  N_0 := Arg * Inv_P_0
 //  N_0_fix := fcvt.fx( N_0 )
 //  N_0 := fcvt.xf( N_0_fix)
-
+   
 //  Arg' := Arg - N_0 * P_0
 //  w := N_0 * d_1
 //  N := Arg' * two_by_PI
 //  N_fix := fcvt.fx( N )
 //  N := fcvt.xf( N_fix )
-//  N_fix := N_fix + N_inc
+//  N_fix := N_fix + N_inc 
 //
 //  s := Arg' - N * P_1
 //  w := w - N * P_2
@@ -499,15 +494,15 @@
 //  Endif
 //
 //  Step 9. Case_4_reduce.
-//
+// 
 //    ...first obtain N_0*d_1 and -N*P_2 accurately
-//   U_hi := N_0 * d_1                V_hi := -N*P_2
-//   U_lo := N_0 * d_1 - U_hi        V_lo := -N*P_2 - U_hi        ...FMAs
+//   U_hi := N_0 * d_1		V_hi := -N*P_2
+//   U_lo := N_0 * d_1 - U_hi	V_lo := -N*P_2 - U_hi	...FMAs
 //
 //   ...compute the contribution from N_0*d_1 and -N*P_3
 //   w := -N*P_3
 //   w := w + N_0*d_2
-//   t := U_lo + V_lo + w                ...any order
+//   t := U_lo + V_lo + w		...any order
 //
 //   ...at this point, the mathematical value
 //   ...s + U_hi + V_hi  + t approximates the true reduced argument
@@ -522,12 +517,12 @@
 //   endif
 //   ...order in computing "a" must be observed. This branch is
 //   ...best implemented by predicates.
-//   ...A + a  is U_hi + V_hi accurately. Moreover, "a" is
+//   ...A + a  is U_hi + V_hi accurately. Moreover, "a" is 
 //   ...much smaller than A: |a| <= (1/2)ulp(A).
 //
 //   ...Just need to calculate   s + A + a + t
-//   C_hi := s + A                t := t + a
-//   C_lo := (s - C_hi) + A
+//   C_hi := s + A		t := t + a
+//   C_lo := (s - C_hi) + A	
 //   C_lo := C_lo + t
 //
 //   ...Final steps for reduction
@@ -553,191 +548,156 @@
 //   result := (i_0 == 0?     result + poly :
 //                            result - poly )
 //   Return
-//
+//  
 //   Large Arguments: For arguments above 2**63, a Payne-Hanek
 //   style argument reduction is used and pi_by_2 reduce is called.
 //
 
-
-RODATA
-.align 16
-
-LOCAL_OBJECT_START(FSINCOSL_CONSTANTS)
-
-sincosl_table_p:
-data8 0xA2F9836E4E44152A, 0x00003FFE // Inv_pi_by_2
-data8 0xC84D32B0CE81B9F1, 0x00004016 // P_0
-data8 0xC90FDAA22168C235, 0x00003FFF // P_1
-data8 0xECE675D1FC8F8CBB, 0x0000BFBD // P_2
-data8 0xB7ED8FBBACC19C60, 0x0000BF7C // P_3
-data8 0x8D848E89DBD171A1, 0x0000BFBF // d_1
-data8 0xD5394C3618A66F8E, 0x0000BF7C // d_2
-LOCAL_OBJECT_END(FSINCOSL_CONSTANTS)
-
-LOCAL_OBJECT_START(sincosl_table_d)
-data8 0xC90FDAA22168C234, 0x00003FFE // pi_by_4
-data8 0xA397E5046EC6B45A, 0x00003FE7 // Inv_P_0
-data4 0x3E000000, 0xBE000000         // 2^-3 and -2^-3
-data4 0x2F000000, 0xAF000000         // 2^-33 and -2^-33
-data4 0x9E000000, 0x00000000         // -2^-67
-data4 0x00000000, 0x00000000         // pad
-LOCAL_OBJECT_END(sincosl_table_d)
-
-LOCAL_OBJECT_START(sincosl_table_pp)
-data8 0xCC8ABEBCA21C0BC9, 0x00003FCE // PP_8
-data8 0xD7468A05720221DA, 0x0000BFD6 // PP_7
-data8 0xB092382F640AD517, 0x00003FDE // PP_6
-data8 0xD7322B47D1EB75A4, 0x0000BFE5 // PP_5
-data8 0xFFFFFFFFFFFFFFFE, 0x0000BFFD // C_1
-data8 0xAAAA000000000000, 0x0000BFFC // PP_1_hi
-data8 0xB8EF1D2ABAF69EEA, 0x00003FEC // PP_4
-data8 0xD00D00D00D03BB69, 0x0000BFF2 // PP_3
-data8 0x8888888888888962, 0x00003FF8 // PP_2
-data8 0xAAAAAAAAAAAB0000, 0x0000BFEC // PP_1_lo
-LOCAL_OBJECT_END(sincosl_table_pp)
-
-LOCAL_OBJECT_START(sincosl_table_qq)
-data8 0xD56232EFC2B0FE52, 0x00003FD2 // QQ_8
-data8 0xC9C99ABA2B48DCA6, 0x0000BFDA // QQ_7
-data8 0x8F76C6509C716658, 0x00003FE2 // QQ_6
-data8 0x93F27DBAFDA8D0FC, 0x0000BFE9 // QQ_5
-data8 0xAAAAAAAAAAAAAAAA, 0x0000BFFC // S_1
-data8 0x8000000000000000, 0x0000BFFE // QQ_1
-data8 0xD00D00D00C6E5041, 0x00003FEF // QQ_4
-data8 0xB60B60B60B607F60, 0x0000BFF5 // QQ_3
-data8 0xAAAAAAAAAAAAAA9B, 0x00003FFA // QQ_2
-LOCAL_OBJECT_END(sincosl_table_qq)
-
-LOCAL_OBJECT_START(sincosl_table_c)
-data8 0xFFFFFFFFFFFFFFFE, 0x0000BFFD // C_1
-data8 0xAAAAAAAAAAAA719F, 0x00003FFA // C_2
-data8 0xB60B60B60356F994, 0x0000BFF5 // C_3
-data8 0xD00CFFD5B2385EA9, 0x00003FEF // C_4
-data8 0x93E4BD18292A14CD, 0x0000BFE9 // C_5
-LOCAL_OBJECT_END(sincosl_table_c)
-
-LOCAL_OBJECT_START(sincosl_table_s)
-data8 0xAAAAAAAAAAAAAAAA, 0x0000BFFC // S_1
-data8 0x88888888888868DB, 0x00003FF8 // S_2
-data8 0xD00D00D0055EFD4B, 0x0000BFF2 // S_3
-data8 0xB8EF1C5D839730B9, 0x00003FEC // S_4
-data8 0xD71EA3A4E5B3F492, 0x0000BFE5 // S_5
-data4 0x38800000, 0xB8800000                        // two**-14 and -two**-14
-LOCAL_OBJECT_END(sincosl_table_s)
-
-FR_Input_X        = f8
-FR_Result         = f8
-
-FR_r              = f8
-FR_c              = f9
-
-FR_norm_x         = f9
-FR_inv_pi_2to63   = f10
-FR_rshf_2to64     = f11
-FR_2tom64         = f12
-FR_rshf           = f13
-FR_N_float_signif = f14
-FR_abs_x          = f15
-FR_Pi_by_4        = f34
-FR_Two_to_M14     = f35
-FR_Neg_Two_to_M14 = f36
-FR_Two_to_M33     = f37
-FR_Neg_Two_to_M33 = f38
-FR_Neg_Two_to_M67 = f39
-FR_Inv_pi_by_2    = f40
-FR_N_float        = f41
-FR_N_fix          = f42
-FR_P_1            = f43
-FR_P_2            = f44
-FR_P_3            = f45
-FR_s              = f46
-FR_w              = f47
-FR_d_2            = f48
-FR_tmp_result     = f49
-FR_Z              = f50
-FR_A              = f51
-FR_a              = f52
-FR_t              = f53
-FR_U_1            = f54
-FR_U_2            = f55
-FR_C_1            = f56
-FR_C_2            = f57
-FR_C_3            = f58
-FR_C_4            = f59
-FR_C_5            = f60
-FR_S_1            = f61
-FR_S_2            = f62
-FR_S_3            = f63
-FR_S_4            = f64
-FR_S_5            = f65
-FR_poly_hi        = f66
-FR_poly_lo        = f67
-FR_r_hi           = f68
-FR_r_lo           = f69
-FR_rsq            = f70
-FR_r_cubed        = f71
-FR_C_hi           = f72
-FR_N_0            = f73
-FR_d_1            = f74
-FR_V              = f75
-FR_V_hi           = f75
-FR_V_lo           = f76
-FR_U_hi           = f77
-FR_U_lo           = f78
-FR_U_hiabs        = f79
-FR_V_hiabs        = f80
-FR_PP_8           = f81
-FR_QQ_8           = f101
-FR_PP_7           = f82
-FR_QQ_7           = f102
-FR_PP_6           = f83
-FR_QQ_6           = f103
-FR_PP_5           = f84
-FR_QQ_5           = f104
-FR_PP_4           = f85
-FR_QQ_4           = f105
-FR_PP_3           = f86
-FR_QQ_3           = f106
-FR_PP_2           = f87
-FR_QQ_2           = f107
-FR_QQ_1           = f108
-FR_r_hi_sq        = f88
-FR_N_0_fix        = f89
-FR_Inv_P_0        = f90
-FR_corr           = f91
-FR_poly           = f92
-FR_Neg_Two_to_M3  = f93
-FR_Two_to_M3      = f94
-FR_P_0            = f95
-FR_C_lo           = f96
-FR_PP_1           = f97
-FR_PP_1_lo        = f98
-FR_ArgPrime       = f99
-FR_inexact        = f100
-
-GR_sig_inv_pi  = r14
-GR_rshf_2to64  = r15
-GR_exp_2tom64  = r16
-GR_rshf        = r17
-GR_ad_p        = r18
-GR_ad_d        = r19
-GR_ad_pp       = r20
-GR_ad_qq       = r21
-GR_ad_c        = r22
-GR_ad_s        = r23
-GR_ad_ce       = r24
-GR_ad_se       = r25
-GR_ad_m14      = r26
-GR_ad_s1       = r27
-GR_exp_m2_to_m3= r36
-GR_N_Inc       = r37
-GR_Sin_or_Cos  = r38
-GR_signexp_x   = r40
-GR_exp_x       = r40
-GR_exp_mask    = r41
-GR_exp_2_to_63 = r42
-GR_exp_2_to_m3 = r43
-GR_exp_2_to_24 = r44
+#include "libm_support.h" 
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+.align 64 
+
+FSINCOSL_CONSTANTS:
+ASM_TYPE_DIRECTIVE(FSINCOSL_CONSTANTS,@object)
+data4 0x4B800000, 0xCB800000, 0x00000000,0x00000000 // two**24, -two**24
+data4 0x4E44152A, 0xA2F9836E, 0x00003FFE,0x00000000 // Inv_pi_by_2
+data4 0xCE81B9F1, 0xC84D32B0, 0x00004016,0x00000000 // P_0 
+data4 0x2168C235, 0xC90FDAA2, 0x00003FFF,0x00000000 // P_1 
+data4 0xFC8F8CBB, 0xECE675D1, 0x0000BFBD,0x00000000 // P_2 
+data4 0xACC19C60, 0xB7ED8FBB, 0x0000BF7C,0x00000000 // P_3 
+data4 0x5F000000, 0xDF000000, 0x00000000,0x00000000 // two_to_63, -two_to_63
+data4 0x6EC6B45A, 0xA397E504, 0x00003FE7,0x00000000 // Inv_P_0 
+data4 0xDBD171A1, 0x8D848E89, 0x0000BFBF,0x00000000 // d_1 
+data4 0x18A66F8E, 0xD5394C36, 0x0000BF7C,0x00000000 // d_2 
+data4 0x2168C234, 0xC90FDAA2, 0x00003FFE,0x00000000 // pi_by_4 
+data4 0x2168C234, 0xC90FDAA2, 0x0000BFFE,0x00000000 // neg_pi_by_4 
+data4 0x3E000000, 0xBE000000, 0x00000000,0x00000000 // two**-3, -two**-3
+data4 0x2F000000, 0xAF000000, 0x9E000000,0x00000000 // two**-33, -two**-33, -two**-67
+data4 0xA21C0BC9, 0xCC8ABEBC, 0x00003FCE,0x00000000 // PP_8 
+data4 0x720221DA, 0xD7468A05, 0x0000BFD6,0x00000000 // PP_7 
+data4 0x640AD517, 0xB092382F, 0x00003FDE,0x00000000 // PP_6 
+data4 0xD1EB75A4, 0xD7322B47, 0x0000BFE5,0x00000000 // PP_5 
+data4 0xFFFFFFFE, 0xFFFFFFFF, 0x0000BFFD,0x00000000 // C_1 
+data4 0x00000000, 0xAAAA0000, 0x0000BFFC,0x00000000 // PP_1_hi 
+data4 0xBAF69EEA, 0xB8EF1D2A, 0x00003FEC,0x00000000 // PP_4 
+data4 0x0D03BB69, 0xD00D00D0, 0x0000BFF2,0x00000000 // PP_3 
+data4 0x88888962, 0x88888888, 0x00003FF8,0x00000000 // PP_2
+data4 0xAAAB0000, 0xAAAAAAAA, 0x0000BFEC,0x00000000 // PP_1_lo 
+data4 0xC2B0FE52, 0xD56232EF, 0x00003FD2,0x00000000 // QQ_8
+data4 0x2B48DCA6, 0xC9C99ABA, 0x0000BFDA,0x00000000 // QQ_7
+data4 0x9C716658, 0x8F76C650, 0x00003FE2,0x00000000 // QQ_6
+data4 0xFDA8D0FC, 0x93F27DBA, 0x0000BFE9,0x00000000 // QQ_5
+data4 0xAAAAAAAA, 0xAAAAAAAA, 0x0000BFFC,0x00000000 // S_1 
+data4 0x00000000, 0x80000000, 0x0000BFFE,0x00000000 // QQ_1 
+data4 0x0C6E5041, 0xD00D00D0, 0x00003FEF,0x00000000 // QQ_4 
+data4 0x0B607F60, 0xB60B60B6, 0x0000BFF5,0x00000000 // QQ_3 
+data4 0xAAAAAA9B, 0xAAAAAAAA, 0x00003FFA,0x00000000 // QQ_2 
+data4 0xFFFFFFFE, 0xFFFFFFFF, 0x0000BFFD,0x00000000 // C_1 
+data4 0xAAAA719F, 0xAAAAAAAA, 0x00003FFA,0x00000000 // C_2 
+data4 0x0356F994, 0xB60B60B6, 0x0000BFF5,0x00000000 // C_3
+data4 0xB2385EA9, 0xD00CFFD5, 0x00003FEF,0x00000000 // C_4 
+data4 0x292A14CD, 0x93E4BD18, 0x0000BFE9,0x00000000 // C_5
+data4 0xAAAAAAAA, 0xAAAAAAAA, 0x0000BFFC,0x00000000 // S_1 
+data4 0x888868DB, 0x88888888, 0x00003FF8,0x00000000 // S_2 
+data4 0x055EFD4B, 0xD00D00D0, 0x0000BFF2,0x00000000 // S_3 
+data4 0x839730B9, 0xB8EF1C5D, 0x00003FEC,0x00000000 // S_4
+data4 0xE5B3F492, 0xD71EA3A4, 0x0000BFE5,0x00000000 // S_5
+data4 0x38800000, 0xB8800000, 0x00000000            // two**-14, -two**-14
+ASM_SIZE_DIRECTIVE(FSINCOSL_CONSTANTS)
+
+FR_Input_X        = f8 
+FR_Neg_Two_to_M3  = f32 
+FR_Two_to_63      = f32 
+FR_Two_to_24      = f33 
+FR_Pi_by_4        = f33 
+FR_Two_to_M14     = f34 
+FR_Two_to_M33     = f35 
+FR_Neg_Two_to_24  = f36 
+FR_Neg_Pi_by_4    = f36 
+FR_Neg_Two_to_M14 = f37 
+FR_Neg_Two_to_M33 = f38 
+FR_Neg_Two_to_M67 = f39 
+FR_Inv_pi_by_2    = f40 
+FR_N_float        = f41 
+FR_N_fix          = f42 
+FR_P_1            = f43 
+FR_P_2            = f44 
+FR_P_3            = f45 
+FR_s              = f46 
+FR_w              = f47 
+FR_c              = f48 
+FR_r              = f49 
+FR_Z              = f50 
+FR_A              = f51 
+FR_a              = f52 
+FR_t              = f53 
+FR_U_1            = f54 
+FR_U_2            = f55 
+FR_C_1            = f56 
+FR_C_2            = f57 
+FR_C_3            = f58 
+FR_C_4            = f59 
+FR_C_5            = f60 
+FR_S_1            = f61 
+FR_S_2            = f62 
+FR_S_3            = f63 
+FR_S_4            = f64 
+FR_S_5            = f65 
+FR_poly_hi        = f66 
+FR_poly_lo        = f67 
+FR_r_hi           = f68 
+FR_r_lo           = f69 
+FR_rsq            = f70 
+FR_r_cubed        = f71 
+FR_C_hi           = f72 
+FR_N_0            = f73 
+FR_d_1            = f74 
+FR_V              = f75 
+FR_V_hi           = f75 
+FR_V_lo           = f76 
+FR_U_hi           = f77 
+FR_U_lo           = f78 
+FR_U_hiabs        = f79 
+FR_V_hiabs        = f80 
+FR_PP_8           = f81 
+FR_QQ_8           = f81 
+FR_PP_7           = f82 
+FR_QQ_7           = f82 
+FR_PP_6           = f83 
+FR_QQ_6           = f83 
+FR_PP_5           = f84 
+FR_QQ_5           = f84 
+FR_PP_4           = f85 
+FR_QQ_4           = f85 
+FR_PP_3           = f86 
+FR_QQ_3           = f86 
+FR_PP_2           = f87 
+FR_QQ_2           = f87 
+FR_QQ_1           = f88 
+FR_N_0_fix        = f89 
+FR_Inv_P_0        = f90 
+FR_corr           = f91 
+FR_poly           = f92 
+FR_d_2            = f93 
+FR_Two_to_M3      = f94 
+FR_Neg_Two_to_63  = f94 
+FR_P_0            = f95 
+FR_C_lo           = f96 
+FR_PP_1           = f97 
+FR_PP_1_lo        = f98 
+FR_ArgPrime       = f99 
+
+GR_Table_Base  = r32 
+GR_Table_Base1 = r33 
+GR_i_0         = r34
+GR_i_1         = r35
+GR_N_Inc       = r36 
+GR_Sin_or_Cos  = r37 
 
 // Added for unwind support
 
@@ -746,376 +706,386 @@ GR_SAVE_GP     = r40
 GR_SAVE_PFS    = r41
 
 
-.section .text
+.global sinl#
+.global cosl#
+#ifdef _LIBC
+.global __sinl#
+.global __cosl#
+#endif
 
-GLOBAL_IEEE754_ENTRY(sinl)
+.section .text
+.proc sinl#
+#ifdef _LIBC
+.proc __sinl#
+#endif
+.align 64 
+sinl:
+#ifdef _LIBC
+__sinl:
+#endif
 { .mlx
-      alloc r32 = ar.pfs,0,12,2,0
-      movl GR_sig_inv_pi = 0xa2f9836e4e44152a // significand of 1/pi
+alloc GR_Table_Base = ar.pfs,0,12,2,0
+(p0)   movl GR_Sin_or_Cos = 0x0 ;;
 }
-{ .mlx
-      mov GR_Sin_or_Cos = 0x0
-      movl GR_rshf_2to64 = 0x47e8000000000000 // 1.1000 2^(63+64)
+
+{ .mmi
+      nop.m 999
+(p0)  addl           GR_Table_Base   = @ltoff(FSINCOSL_CONSTANTS#), gp
+      nop.i 999
 }
 ;;
 
-{ .mfi
-      addl           GR_ad_p   = @ltoff(FSINCOSL_CONSTANTS#), gp
-      fclass.m p6, p0 =  FR_Input_X, 0x1E3 // Test x natval, nan, inf
-      mov GR_exp_2_to_m3 = 0xffff - 3      // Exponent of 2^-3
-}
-{ .mfb
+{ .mmb
+      ld8 GR_Table_Base = [GR_Table_Base]
       nop.m 999
-      fnorm.s1 FR_norm_x = FR_Input_X      // Normalize x
-      br.cond.sptk SINCOSL_CONTINUE
+(p0)   br.cond.sptk L(SINCOSL_CONTINUE) ;;
 }
 ;;
 
-GLOBAL_IEEE754_END(sinl)
-GLOBAL_IEEE754_ENTRY(cosl)
-{ .mlx
-      alloc r32 = ar.pfs,0,12,2,0
-      movl GR_sig_inv_pi = 0xa2f9836e4e44152a // significand of 1/pi
-}
+
+.endp sinl#
+ASM_SIZE_DIRECTIVE(sinl#)
+
+.section .text
+.proc cosl#
+cosl:
+#ifdef _LIBC
+.proc __cosl#
+__cosl:
+#endif
 { .mlx
-      mov GR_Sin_or_Cos = 0x1
-      movl GR_rshf_2to64 = 0x47e8000000000000 // 1.1000 2^(63+64)
+alloc GR_Table_Base= ar.pfs,0,12,2,0
+(p0)   movl GR_Sin_or_Cos = 0x1 ;;
 }
 ;;
 
-{ .mfi
-      addl           GR_ad_p   = @ltoff(FSINCOSL_CONSTANTS#), gp
-      fclass.m p6, p0 =  FR_Input_X, 0x1E3 // Test x natval, nan, inf
-      mov GR_exp_2_to_m3 = 0xffff - 3      // Exponent of 2^-3
-}
-{ .mfi
+{ .mmi
       nop.m 999
-      fnorm.s1 FR_norm_x = FR_Input_X      // Normalize x
+(p0)  addl           GR_Table_Base   = @ltoff(FSINCOSL_CONSTANTS#), gp
       nop.i 999
 }
 ;;
 
-SINCOSL_CONTINUE:
-{ .mfi
-      setf.sig FR_inv_pi_2to63 = GR_sig_inv_pi // Form 1/pi * 2^63
-      nop.f 999
-      mov GR_exp_2tom64 = 0xffff - 64      // Scaling constant to compute N
-}
-{ .mlx
-      setf.d FR_rshf_2to64 = GR_rshf_2to64    // Form const 1.1000 * 2^(63+64)
-      movl GR_rshf = 0x43e8000000000000       // Form const 1.1000 * 2^63
+{ .mmb
+      ld8 GR_Table_Base = [GR_Table_Base]
+      nop.m 999
+      nop.b 999
 }
 ;;
 
-{ .mfi
-      ld8 GR_ad_p = [GR_ad_p]              // Point to Inv_pi_by_2
-      fclass.m p7, p0 = FR_Input_X, 0x0b   // Test x denormal
-      nop.i 999
-}
-;;
 
-{ .mfi
-      getf.exp GR_signexp_x = FR_Input_X   // Get sign and exponent of x
-      fclass.m p10, p0 = FR_Input_X, 0x007 // Test x zero
-      nop.i 999
+
+//
+//     Load Table Address
+//
+
+L(SINCOSL_CONTINUE): 
+{ .mmi
+(p0)   add GR_Table_Base1 = 96, GR_Table_Base
+(p0)   ldfs	FR_Two_to_24 = [GR_Table_Base], 4
+// GR_Sin_or_Cos denotes 
+(p0)   mov   r39 = b0 ;;
 }
-{ .mib
-      mov GR_exp_mask = 0x1ffff            // Exponent mask
-      nop.i 999
-(p6)  br.cond.spnt SINCOSL_SPECIAL         // Branch if x natval, nan, inf
+{ .mmi
+       nop.m 0
+//
+//     Load 2**24, load 2**63.
+//
+(p0)   ldfs	FR_Neg_Two_to_24 = [GR_Table_Base], 12
+       nop.i 0
 }
-;;
-
 { .mfi
-      setf.exp FR_2tom64 = GR_exp_2tom64   // Form 2^-64 for scaling N_float
-      nop.f 0
-      add GR_ad_d = 0x70, GR_ad_p          // Point to constant table d
+(p0)   ldfs	FR_Two_to_63 = [GR_Table_Base1], 4
+//
+//     Check for unnormals - unsupported operands. We do not want
+//     to generate denormal exception
+//     Check for NatVals, QNaNs, SNaNs, +/-Infs
+//     Check for EM unsupporteds
+//     Check for Zero 
+//
+(p0)   fclass.m.unc  p6, p0 =  FR_Input_X, 0x1E3
+       nop.i 0
+};;
+{ .mmf
+        nop.m 999
+(p0)   ldfs	FR_Neg_Two_to_63 = [GR_Table_Base1], 12
+(p0)   fclass.nm.unc p8, p0 =  FR_Input_X, 0x1FF
 }
-{ .mib
-      setf.d FR_rshf = GR_rshf         // Form right shift const 1.1000 * 2^63
-      mov  GR_exp_m2_to_m3 = 0x2fffc       // Form -(2^-3)
-(p7)  br.cond.spnt SINCOSL_DENORMAL        // Branch if x denormal
+{ .mfb
+	nop.m 999
+(p0)   fclass.m.unc p10, p0 = FR_Input_X, 0x007
+(p6)   br.cond.spnt L(SINCOSL_SPECIAL) ;;
 }
-;;
-
-SINCOSL_COMMON:
-{ .mfi
-      and GR_exp_x = GR_exp_mask, GR_signexp_x // Get exponent of x
-      fclass.nm p8, p0 = FR_Input_X, 0x1FF // Test x unsupported type
-      mov GR_exp_2_to_63 = 0xffff + 63     // Exponent of 2^63
+{ .mib
+	nop.m 999
+	nop.i 999
+(p8)   br.cond.spnt L(SINCOSL_SPECIAL) ;;
 }
 { .mib
-      add GR_ad_pp = 0x40, GR_ad_d         // Point to constant table pp
-      mov GR_exp_2_to_24 = 0xffff + 24     // Exponent of 2^24
-(p10) br.cond.spnt SINCOSL_ZERO            // Branch if x zero
+	nop.m 999
+	nop.i 999
+//
+//     Branch if +/- NaN, Inf.
+//     Load -2**24, load -2**63.
+//
+(p10)  br.cond.spnt L(SINCOSL_ZERO) ;;
 }
-;;
-
-{ .mfi
-      ldfe FR_Inv_pi_by_2 = [GR_ad_p], 16  // Load 2/pi
-      fcmp.eq.s0 p15, p0 = FR_Input_X, f0  // Dummy to set denormal
-      add GR_ad_qq = 0xa0, GR_ad_pp        // Point to constant table qq
+{ .mmb
+(p0)   ldfe	FR_Inv_pi_by_2 = [GR_Table_Base], 16
+(p0)   ldfe	FR_Inv_P_0 = [GR_Table_Base1], 16
+	nop.b 999 ;;
 }
-{ .mfi
-      ldfe FR_Pi_by_4 = [GR_ad_d], 16      // Load pi/4 for range test
-      nop.f 999
-      cmp.ge p10,p0 = GR_exp_x, GR_exp_2_to_63   // Is |x| >= 2^63
+{ .mmb
+(p0)   ldfe		FR_d_1 = [GR_Table_Base1], 16
+//
+//     Raise possible denormal operand flag with useful fcmp
+//     Is x <= -2**63
+//     Load Inv_P_0 for pre-reduction
+//     Load Inv_pi_by_2
+//
+(p0)   ldfe		FR_P_0 = [GR_Table_Base], 16
+	nop.b 999 ;;
 }
-;;
-
-{ .mfi
-      ldfe FR_P_0 = [GR_ad_p], 16          // Load P_0 for pi/4 <= |x| < 2^63
-      fmerge.s FR_abs_x = f1, FR_norm_x    // |x|
-      add GR_ad_c = 0x90, GR_ad_qq         // Point to constant table c
+{ .mmb
+(p0)   ldfe	FR_d_2 = [GR_Table_Base1], 16
+//
+//     Load P_0
+//     Load d_1
+//     Is x >= 2**63
+//     Is x <= -2**24?
+//
+(p0)   ldfe	FR_P_1 = [GR_Table_Base], 16
+	nop.b 999 ;;
 }
+//
+//     Load P_1
+//     Load d_2
+//     Is x >= 2**24?
+//
 { .mfi
-      ldfe FR_Inv_P_0 = [GR_ad_d], 16      // Load 1/P_0 for pi/4 <= |x| < 2^63
-      nop.f 999
-      cmp.ge p7,p0 = GR_exp_x, GR_exp_2_to_24   // Is |x| >= 2^24
+(p0)   ldfe	FR_P_2 = [GR_Table_Base], 16
+(p0)   fcmp.le.unc.s1	p7, p8 = FR_Input_X, FR_Neg_Two_to_24
+	nop.i 999 ;;
 }
-;;
-
-{ .mfi
-      ldfe FR_P_1 = [GR_ad_p], 16          // Load P_1 for pi/4 <= |x| < 2^63
-      nop.f 999
-      add GR_ad_s = 0x50, GR_ad_c          // Point to constant table s
+{ .mbb
+(p0)   ldfe	FR_P_3 = [GR_Table_Base], 16
+	nop.b 999
+	nop.b 999 ;;
 }
 { .mfi
-      ldfe FR_PP_8 = [GR_ad_pp], 16        // Load PP_8 for 2^-3 < |r| < pi/4
-      nop.f 999
-      nop.i 999
+	nop.m 999
+(p8)   fcmp.ge.s1 p7, p0 = FR_Input_X, FR_Two_to_24
+	nop.i 999
 }
-;;
-
 { .mfi
-      ldfe FR_P_2 = [GR_ad_p], 16          // Load P_2 for pi/4 <= |x| < 2^63
-      nop.f 999
-      add GR_ad_ce = 0x40, GR_ad_c         // Point to end of constant table c
+(p0)   ldfe	FR_Pi_by_4 = [GR_Table_Base1], 16
+//
+//     Branch if +/- zero.
+//     Decide about the paths to take:
+//     If -2**24 < FR_Input_X < 2**24 - CASE 1 OR 2 
+//     OTHERWISE - CASE 3 OR 4 
+//
+(p0)   fcmp.le.unc.s0	p10, p11 = FR_Input_X, FR_Neg_Two_to_63
+	nop.i 999 ;;
 }
-{ .mfi
-      ldfe FR_QQ_8 = [GR_ad_qq], 16        // Load QQ_8 for 2^-3 < |r| < pi/4
-      nop.f 999
-      nop.i 999
+{ .mmi
+(p0)   ldfe	FR_Neg_Pi_by_4 = [GR_Table_Base1], 16 ;;
+(p0)   ldfs	FR_Two_to_M3 = [GR_Table_Base1], 4
+	nop.i 999
 }
-;;
-
 { .mfi
-      ldfe FR_QQ_7 = [GR_ad_qq], 16        // Load QQ_7 for 2^-3 < |r| < pi/4
-      fma.s1        FR_N_float_signif = FR_Input_X, FR_inv_pi_2to63, FR_rshf_2to64
-      add GR_ad_se = 0x40, GR_ad_s         // Point to end of constant table s
+	nop.m 999
+(p11)  fcmp.ge.s1	p10, p0 = FR_Input_X, FR_Two_to_63
+	nop.i 999 ;;
 }
 { .mib
-      ldfe FR_PP_7 = [GR_ad_pp], 16        // Load PP_7 for 2^-3 < |r| < pi/4
-      mov GR_ad_s1 = GR_ad_s               // Save pointer to S_1
-(p10) br.cond.spnt SINCOSL_ARG_TOO_LARGE   // Branch if |x| >= 2^63
-                                           // Use Payne-Hanek Reduction
-}
-;;
-
-{ .mfi
-      ldfe FR_P_3 = [GR_ad_p], 16          // Load P_3 for pi/4 <= |x| < 2^63
-      fmerge.se FR_r = FR_norm_x, FR_norm_x // r = x, in case |x| < pi/4
-      add GR_ad_m14 = 0x50, GR_ad_s        // Point to constant table m14
-}
-{ .mfb
-      ldfps FR_Two_to_M3, FR_Neg_Two_to_M3 = [GR_ad_d], 8
-      fma.s1 FR_rsq = FR_norm_x, FR_norm_x, f0 // rsq = x*x, in case |x| < pi/4
-(p7)  br.cond.spnt SINCOSL_LARGER_ARG      // Branch if 2^24 <= |x| < 2^63
-                                           // Use pre-reduction
-}
-;;
-
-{ .mmf
-      ldfe FR_PP_6 = [GR_ad_pp], 16       // Load PP_6 for normal path
-      ldfe FR_QQ_6 = [GR_ad_qq], 16       // Load QQ_6 for normal path
-      fmerge.se FR_c = f0, f0             // c = 0 in case |x| < pi/4
+(p0)   ldfs	FR_Neg_Two_to_M3 = [GR_Table_Base1], 12
+	nop.i 999
+//
+//     Load P_2
+//     Load P_3
+//     Load pi_by_4
+//     Load neg_pi_by_4
+//     Load 2**(-3)
+//     Load -2**(-3).
+//
+(p10)  br.cond.spnt L(SINCOSL_ARG_TOO_LARGE) ;;
 }
-;;
-
-{ .mmf
-      ldfe FR_PP_5 = [GR_ad_pp], 16       // Load PP_5 for normal path
-      ldfe FR_QQ_5 = [GR_ad_qq], 16       // Load QQ_5 for normal path
-      nop.f 999
+{ .mib
+	nop.m 999
+	nop.i 999
+//
+//     Branch out if x >= 2**63. Use Payne-Hanek Reduction
+//
+(p7)   br.cond.spnt L(SINCOSL_LARGER_ARG) ;;
 }
-;;
-
-// Here if 0 < |x| < 2^24
 { .mfi
-      ldfe FR_S_5 = [GR_ad_se], -16       // Load S_5 if i_1=0
-      fcmp.lt.s1  p6, p7 = FR_abs_x, FR_Pi_by_4  // Test |x| < pi/4
-      nop.i 999
+	nop.m 999
+// 
+//     Branch if Arg <= -2**24 or Arg >= 2**24 and use pre-reduction.
+//
+(p0)   fma.s1	FR_N_float = FR_Input_X, FR_Inv_pi_by_2, f0
+	nop.i 999 ;;
 }
 { .mfi
-      ldfe FR_C_5 = [GR_ad_ce], -16       // Load C_5 if i_1=1
-      fms.s1 FR_N_float = FR_N_float_signif, FR_2tom64, FR_rshf
-      nop.i 999
+	nop.m 999
+(p0)   fcmp.lt.unc.s1	p6, p7 = FR_Input_X, FR_Pi_by_4
+	nop.i 999 ;;
 }
-;;
-
-{ .mmi
-      ldfe FR_S_4 = [GR_ad_se], -16       // Load S_4 if i_1=0
-      ldfe FR_C_4 = [GR_ad_ce], -16       // Load C_4 if i_1=1
-      nop.i 999
+{ .mfi
+	nop.m 999
+// 
+//     Select the case when |Arg| < pi/4 
+//     Else Select the case when |Arg| >= pi/4 
+//
+(p0)   fcvt.fx.s1 FR_N_fix = FR_N_float
+	nop.i 999 ;;
 }
-;;
-
+{ .mfi
+	nop.m 999
 //
 //     N  = Arg * 2/pi
 //     Check if Arg < pi/4
 //
+(p6)   fcmp.gt.s1 p6, p7 = FR_Input_X, FR_Neg_Pi_by_4
+	nop.i 999 ;;
+}
 //
 //     Case 2: Convert integer N_fix back to normalized floating-point value.
 //     Case 1: p8 is only affected  when p6 is set
 //
+{ .mfi
+(p7)   ldfs FR_Two_to_M33 = [GR_Table_Base1], 4
 //
 //     Grab the integer part of N and call it N_fix
 //
-{ .mfi
-(p7)  ldfps FR_Two_to_M33, FR_Neg_Two_to_M33 = [GR_ad_d], 8
-(p6)  fma.s1 FR_r_cubed = FR_r, FR_rsq, f0        // r^3 if |x| < pi/4
-(p6)  mov GR_N_Inc = GR_Sin_or_Cos                // N_Inc if |x| < pi/4
-}
-;;
-
-//     If |x| < pi/4, r = x and c = 0
+(p6)   fmerge.se FR_r = FR_Input_X, FR_Input_X
+//     If |x| < pi/4, r = x and c = 0 
 //     lf |x| < pi/4, is x < 2**(-3).
-//     r = Arg
+//     r = Arg 
 //     c = 0
-{ .mmi
-(p7)  getf.sig        GR_N_Inc = FR_N_float_signif
-(p6)  cmp.lt.unc p8,p0 = GR_exp_x, GR_exp_2_to_m3   // Is |x| < 2^-3
-(p6)  tbit.z p9,p10 = GR_N_Inc, 0         // p9  if i_1=0, N mod 4 = 0,1
-                                          // p10 if i_1=1, N mod 4 = 2,3
+(p6)   mov GR_N_Inc = GR_Sin_or_Cos ;;
 }
-;;
-
+{ .mmf
+	nop.m 999
+(p7)   ldfs FR_Neg_Two_to_M33 = [GR_Table_Base1], 4
+(p6)   fmerge.se FR_c = f0, f0
+}
+{ .mfi
+	nop.m 999
+(p6)   fcmp.lt.unc.s1	p8, p9 = FR_Input_X, FR_Two_to_M3
+	nop.i 999 ;;
+}
+{ .mfi
+	nop.m 999
 //
 //     lf |x| < pi/4, is -2**(-3)< x < 2**(-3) - set p8.
-//     If |x| >= pi/4,
-//     Create the right N for |x| < pi/4 and otherwise
+//     If |x| >= pi/4, 
+//     Create the right N for |x| < pi/4 and otherwise 
 //     Case 2: Place integer part of N in GP register
 //
-
-
-{ .mbb
-      nop.m 999
-(p8)  br.cond.spnt SINCOSL_SMALL_R_0    // Branch if 0 < |x| < 2^-3
-(p6)  br.cond.spnt SINCOSL_NORMAL_R_0   // Branch if 2^-3 <= |x| < pi/4
+(p7)   fcvt.xf FR_N_float = FR_N_fix
+	nop.i 999 ;;
 }
-;;
-
-// Here if pi/4 <= |x| < 2^24
-{ .mfi
-      ldfs FR_Neg_Two_to_M67 = [GR_ad_d], 8     // Load -2^-67
-      fnma.s1 FR_s = FR_N_float, FR_P_1, FR_Input_X // s = -N * P_1  + Arg
-      add GR_N_Inc = GR_N_Inc, GR_Sin_or_Cos    // Adjust N_Inc for sin/cos
+{ .mmf
+	nop.m 999
+(p7)   getf.sig	GR_N_Inc = FR_N_fix
+(p8)   fcmp.gt.s1 p8, p0 = FR_Input_X, FR_Neg_Two_to_M3 ;;
 }
-{ .mfi
-      nop.m 999
-      fma.s1 FR_w = FR_N_float, FR_P_2, f0      // w = N * P_2
-      nop.i 999
+{ .mib
+	nop.m 999
+	nop.i 999
+//
+//     Load 2**(-33), -2**(-33)
+//
+(p8)   br.cond.spnt L(SINCOSL_SMALL_R) ;;
 }
-;;
-
-{ .mfi
-      nop.m 999
-      fms.s1 FR_r = FR_s, f1, FR_w        // r = s - w, assume |s| >= 2^-33
-      tbit.z p9,p10 = GR_N_Inc, 0         // p9  if i_1=0, N mod 4 = 0,1
-                                          // p10 if i_1=1, N mod 4 = 2,3
+{ .mib
+	nop.m 999
+	nop.i 999
+(p6)   br.cond.sptk L(SINCOSL_NORMAL_R) ;;
 }
-;;
-
+//
+//     if |x| < pi/4, branch based on |x| < 2**(-3) or otherwise.
+//
+//
+//     In this branch, |x| >= pi/4.
+// 
 { .mfi
-      nop.m 999
-      fcmp.lt.s1 p7, p6 = FR_s, FR_Two_to_M33
-      nop.i 999
+(p0)   ldfs FR_Neg_Two_to_M67 = [GR_Table_Base1], 8
+//
+//     Load -2**(-67)
+// 
+(p0)   fnma.s1	FR_s = FR_N_float, FR_P_1, FR_Input_X
+//
+//     w = N * P_2
+//     s = -N * P_1  + Arg
+//
+(p0)   add GR_N_Inc = GR_N_Inc, GR_Sin_or_Cos
 }
-;;
-
 { .mfi
-      nop.m 999
-(p7)  fcmp.gt.s1 p7, p6 = FR_s, FR_Neg_Two_to_M33 // p6 if |s| >= 2^-33, else p7
-      nop.i 999
+	nop.m 999
+(p0)   fma.s1	FR_w = FR_N_float, FR_P_2, f0
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fms.s1 FR_c = FR_s, f1, FR_r             // c = s - r, for |s| >= 2^-33
-      nop.i 999
+	nop.m 999
+// 
+//     Adjust N_fix by N_inc to determine whether sine or
+//     cosine is being calculated
+//
+(p0)   fcmp.lt.unc.s1 p7, p6 = FR_s, FR_Two_to_M33
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-      fma.s1 FR_rsq = FR_r, FR_r, f0           // rsq = r * r, for |s| >= 2^-33
-      nop.i 999
+	nop.m 999
+(p7)   fcmp.gt.s1 p7, p6 = FR_s, FR_Neg_Two_to_M33
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-(p7)  fma.s1 FR_w = FR_N_float, FR_P_3, f0
-      nop.i 999
+	nop.m 999
+//     Remember x >= pi/4.
+//     Is s <= -2**(-33) or s >= 2**(-33) (p6)
+//     or -2**(-33) < s < 2**(-33) (p7)
+(p6)   fms.s1 FR_r = FR_s, f1, FR_w
+	nop.i 999
 }
-;;
-
-{ .mmf
-(p9)  ldfe FR_C_1 = [GR_ad_pp], 16     // Load C_1 if i_1=0
-(p10) ldfe FR_S_1 = [GR_ad_qq], 16     // Load S_1 if i_1=1
-      frcpa.s1 FR_r_hi, p15 = f1, FR_r  // r_hi = frcpa(r)
-}
-;;
-
 { .mfi
-      nop.m 999
-(p6)  fcmp.lt.unc.s1 p8, p13 = FR_r, FR_Two_to_M3 // If big s, test r with 2^-3
-      nop.i 999
+	nop.m 999
+(p7)   fma.s1 FR_w = FR_N_float, FR_P_3, f0
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-(p7)  fma.s1 FR_U_1 = FR_N_float, FR_P_2, FR_w
-      nop.i 999
+	nop.m 999
+(p7)   fma.s1 FR_U_1 = FR_N_float, FR_P_2, FR_w
+	nop.i 999
 }
-;;
-
-//
-//     For big s: r = s - w: No futher reduction is necessary
-//     For small s: w = N * P_3 (change sign) More reduction
-//
 { .mfi
-        nop.m 999
-(p8)   fcmp.gt.s1 p8, p13 = FR_r, FR_Neg_Two_to_M3 // If big s, p8 if |r| < 2^-3
-        nop.i 999 ;;
+	nop.m 999
+(p6)   fms.s1 FR_c = FR_s, f1, FR_r
+	nop.i 999 ;;
 }
-
 { .mfi
-      nop.m 999
-(p9)  fma.s1 FR_poly = FR_rsq, FR_PP_8, FR_PP_7 // poly = rsq*PP_8+PP_7 if i_1=0
-      nop.i 999
+	nop.m 999
+// 
+//     For big s: r = s - w: No futher reduction is necessary 
+//     For small s: w = N * P_3 (change sign) More reduction
+//
+(p6)   fcmp.lt.unc.s1 p8, p9 = FR_r, FR_Two_to_M3
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p10) fma.s1 FR_poly = FR_rsq, FR_QQ_8, FR_QQ_7 // poly = rsq*QQ_8+QQ_7 if i_1=1
-      nop.i 999
+	nop.m 999
+(p8)   fcmp.gt.s1 p8, p9 = FR_r, FR_Neg_Two_to_M3
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-        nop.m 999
+	nop.m 999
 (p7)   fms.s1 FR_r = FR_s, f1, FR_U_1
-        nop.i 999
+	nop.i 999
 }
-;;
-
-{ .mfi
-      nop.m 999
-(p6)  fma.s1 FR_r_cubed = FR_r, FR_rsq, f0  // rcubed = r * rsq
-      nop.i 999
-}
-;;
-
-{ .mfi
+{ .mfb
+	nop.m 999
 //
 //     For big s: Is |r| < 2**(-3)?
 //     For big s: c = S - r
@@ -1125,356 +1095,355 @@ SINCOSL_COMMON:
 //     If p9 is set, prepare to branch to Normal_R.
 //     For big s,  r is complete here.
 //
-//
+(p6)   fms.s1 FR_c = FR_c, f1, FR_w
+// 
 //     For big s: c = c + w (w has not been negated.)
 //     For small s: r = S - U_1
 //
-      nop.m 999
-(p6)  fms.s1 FR_c = FR_c, f1, FR_w
-      nop.i 999
+(p8)   br.cond.spnt	L(SINCOSL_SMALL_R) ;;
 }
-{ .mbb
-      nop.m 999
-(p8)  br.cond.spnt    SINCOSL_SMALL_R_1  // Branch if |s|>=2^-33, |r| < 2^-3,
-                                         // and pi/4 <= |x| < 2^24
-(p13) br.cond.sptk    SINCOSL_NORMAL_R_1 // Branch if |s|>=2^-33, |r| >= 2^-3,
-                                         // and pi/4 <= |x| < 2^24
+{ .mib
+	nop.m 999
+	nop.i 999
+(p9)   br.cond.sptk	L(SINCOSL_NORMAL_R) ;;
 }
-;;
-
-SINCOSL_S_TINY:
-//
-// Here if |s| < 2^-33, and pi/4 <= |x| < 2^24
-//
 { .mfi
-       fms.s1 FR_U_2 = FR_N_float, FR_P_2, FR_U_1
+(p7)   add GR_Table_Base1 = 224, GR_Table_Base1
+//
+//     Branch to SINCOSL_SMALL_R or SINCOSL_NORMAL_R
 //
+(p7)   fms.s1 FR_U_2 = FR_N_float, FR_P_2, FR_U_1
+// 
 //     c = S - U_1
 //     r = S_1 * r
 //
 //
+(p7)   extr.u	GR_i_1 = GR_N_Inc, 0, 1 ;;
 }
-;;
-
 { .mmi
-        nop.m 999
+	nop.m 999
 //
 //     Get [i_0,i_1] - two lsb of N_fix_gr.
 //     Do dummy fmpy so inexact is always set.
 //
-      tbit.z p9,p10 = GR_N_Inc, 0      // p9  if i_1=0, N mod 4 = 0,1
-                                       // p10 if i_1=1, N mod 4 = 2,3
+(p7)   cmp.eq.unc p9, p10 = 0x0, GR_i_1
+(p7)   extr.u	GR_i_0 = GR_N_Inc, 1, 1 ;;
 }
-;;
-
-//
+// 
 //     For small s: U_2 = N * P_2 - U_1
 //     S_1 stored constant - grab the one stored with the
 //     coefficients.
-//
+// 
 { .mfi
-       ldfe FR_S_1 = [GR_ad_s1], 16
+(p7)   ldfe FR_S_1 = [GR_Table_Base1], 16
 //
 //     Check if i_1 and i_0  != 0
 //
-(p10)  fma.s1        FR_poly = f0, f1, FR_Neg_Two_to_M67
-      tbit.z p11,p12 = GR_N_Inc, 1     // p11 if i_0=0, N mod 4 = 0,2
-                                       // p12 if i_0=1, N mod 4 = 1,3
+(p10)  fma.s1	FR_poly = f0, f1, FR_Neg_Two_to_M67
+(p7)   cmp.eq.unc p11, p12 = 0x0, GR_i_0 ;;
 }
-;;
-
 { .mfi
-        nop.m 999
-       fms.s1        FR_s = FR_s, f1, FR_r
-        nop.i 999
+	nop.m 999
+(p7)   fms.s1	FR_s = FR_s, f1, FR_r
+	nop.i 999
 }
 { .mfi
-        nop.m 999
-//
+	nop.m 999
+// 
 //     S = S - r
 //     U_2 = U_2 + w
 //     load S_1
 //
-       fma.s1        FR_rsq = FR_r, FR_r, f0
-        nop.i 999 ;;
+(p7)   fma.s1	FR_rsq = FR_r, FR_r, f0
+	nop.i 999 ;;
 }
 { .mfi
-        nop.m 999
-       fma.s1        FR_U_2 = FR_U_2, f1, FR_w
-        nop.i 999
+	nop.m 999
+(p7)   fma.s1	FR_U_2 = FR_U_2, f1, FR_w
+	nop.i 999
 }
 { .mfi
-        nop.m 999
-       fmerge.se FR_tmp_result = FR_r, FR_r
-        nop.i 999 ;;
+	nop.m 999
+(p7)   fmerge.se FR_Input_X = FR_r, FR_r
+	nop.i 999 ;;
 }
 { .mfi
-        nop.m 999
-(p10)  fma.s1 FR_tmp_result = f0, f1, f1
-        nop.i 999 ;;
+	nop.m 999
+(p10)  fma.s1 FR_Input_X = f0, f1, f1
+	nop.i 999 ;;
 }
 { .mfi
-        nop.m 999
-//
+	nop.m 999
+// 
 //     FR_rsq = r * r
 //     Save r as the result.
 //
-       fms.s1        FR_c = FR_s, f1, FR_U_1
-        nop.i 999 ;;
+(p7)   fms.s1	FR_c = FR_s, f1, FR_U_1
+	nop.i 999 ;;
 }
 { .mfi
-        nop.m 999
-//
+	nop.m 999
+// 
 //     if ( i_1 ==0) poly = c + S_1*r*r*r
 //     else Result = 1
 //
-(p12)  fnma.s1 FR_tmp_result = FR_tmp_result, f1, f0
-        nop.i 999
+(p12)  fnma.s1 FR_Input_X = FR_Input_X, f1, f0
+	nop.i 999
 }
 { .mfi
-        nop.m 999
-       fma.s1        FR_r = FR_S_1, FR_r, f0
-        nop.i 999 ;;
+	nop.m 999
+(p7)   fma.s1	FR_r = FR_S_1, FR_r, f0
+	nop.i 999 ;;
 }
 { .mfi
-        nop.m 999
-       fma.s0        FR_S_1 = FR_S_1, FR_S_1, f0
-        nop.i 999 ;;
+	nop.m 999
+(p7)   fma.s0	FR_S_1 = FR_S_1, FR_S_1, f0
+	nop.i 999 ;;
 }
 { .mfi
-        nop.m 999
+	nop.m 999
 //
 //     If i_1 != 0, poly = 2**(-67)
 //
-       fms.s1 FR_c = FR_c, f1, FR_U_2
-        nop.i 999 ;;
+(p7)   fms.s1 FR_c = FR_c, f1, FR_U_2
+	nop.i 999 ;;
 }
 { .mfi
-        nop.m 999
-//
+	nop.m 999
+// 
 //     c = c - U_2
-//
+// 
 (p9)   fma.s1 FR_poly = FR_r, FR_rsq, FR_c
-        nop.i 999 ;;
+	nop.i 999 ;;
 }
 { .mfi
-        nop.m 999
+	nop.m 999
 //
 //     i_0 != 0, so Result = -Result
 //
-(p11)  fma.s0 FR_Result = FR_tmp_result, f1, FR_poly
-        nop.i 999 ;;
+(p11)  fma.s0 FR_Input_X = FR_Input_X, f1, FR_poly
+	nop.i 999 ;;
 }
 { .mfb
-        nop.m 999
-(p12)  fms.s0 FR_Result = FR_tmp_result, f1, FR_poly
+	nop.m 999
+(p12)  fms.s0 FR_Input_X = FR_Input_X, f1, FR_poly
 //
 //     if (i_0 == 0),  Result = Result + poly
 //     else            Result = Result - poly
 //
-        br.ret.sptk   b0         // Exit if |s| < 2^-33, and pi/4 <= |x| < 2^24
+(p0)    br.ret.sptk   b0 ;;
+}
+L(SINCOSL_LARGER_ARG): 
+{ .mfi
+	nop.m 999
+(p0)   fma.s1 FR_N_0 = FR_Input_X, FR_Inv_P_0, f0
+	nop.i 999
 }
 ;;
 
-SINCOSL_LARGER_ARG:
-//
-// Here if 2^24 <= |x| < 2^63
+//     This path for argument > 2*24 
+//     Adjust table_ptr1 to beginning of table.
 //
-{ .mfi
-      ldfe FR_d_1 = [GR_ad_p], 16          // Load d_1 for |x| >= 2^24 path
-       fma.s1 FR_N_0 = FR_Input_X, FR_Inv_P_0, f0
-        nop.i 999
+
+{ .mmi
+      nop.m 999
+(p0)  addl           GR_Table_Base   = @ltoff(FSINCOSL_CONSTANTS#), gp
+      nop.i 999
 }
 ;;
 
-//
+{ .mmi
+      ld8 GR_Table_Base = [GR_Table_Base]
+      nop.m 999
+      nop.i 999
+}
+;;
+
+
+// 
+//     Point to  2*-14 
 //     N_0 = Arg * Inv_P_0
 //
-//     Load values 2**(-14) and -2**(-14)
 { .mmi
-       ldfps FR_Two_to_M14, FR_Neg_Two_to_M14 = [GR_ad_m14]
-        nop.i 999 ;;
+(p0)   add GR_Table_Base = 688, GR_Table_Base ;;
+(p0)   ldfs FR_Two_to_M14 = [GR_Table_Base], 4
+	nop.i 999 ;;
 }
 { .mfi
-      ldfe FR_d_2 = [GR_ad_p], 16          // Load d_2 for |x| >= 2^24 path
-        nop.f 999
-        nop.i 999 ;;
+(p0)   ldfs FR_Neg_Two_to_M14 = [GR_Table_Base], 0
+	nop.f 999
+	nop.i 999 ;;
 }
 { .mfi
-        nop.m 999
+	nop.m 999
 //
+//     Load values 2**(-14) and -2**(-14)
 //
-       fcvt.fx.s1 FR_N_0_fix = FR_N_0
-        nop.i 999 ;;
+(p0)   fcvt.fx.s1 FR_N_0_fix = FR_N_0
+	nop.i 999 ;;
 }
 { .mfi
-        nop.m 999
+	nop.m 999
 //
 //     N_0_fix  = integer part of N_0
 //
-       fcvt.xf FR_N_0 = FR_N_0_fix
-        nop.i 999 ;;
+(p0)   fcvt.xf FR_N_0 = FR_N_0_fix 
+	nop.i 999 ;;
 }
 { .mfi
-        nop.m 999
+	nop.m 999
 //
 //     Make N_0 the integer part
 //
-       fnma.s1 FR_ArgPrime = FR_N_0, FR_P_0, FR_Input_X
-        nop.i 999
+(p0)   fnma.s1 FR_ArgPrime = FR_N_0, FR_P_0, FR_Input_X
+	nop.i 999
 }
 { .mfi
-        nop.m 999
-       fma.s1 FR_w = FR_N_0, FR_d_1, f0
-        nop.i 999 ;;
+	nop.m 999
+(p0)   fma.s1 FR_w = FR_N_0, FR_d_1, f0
+	nop.i 999 ;;
 }
 { .mfi
-        nop.m 999
+	nop.m 999
 //
 //     Arg' = -N_0 * P_0 + Arg
 //     w  = N_0 * d_1
 //
-       fma.s1 FR_N_float = FR_ArgPrime, FR_Inv_pi_by_2, f0
-        nop.i 999 ;;
+(p0)   fma.s1 FR_N_float = FR_ArgPrime, FR_Inv_pi_by_2, f0
+	nop.i 999 ;;
 }
 { .mfi
-        nop.m 999
+	nop.m 999
 //
-//     N = A' * 2/pi
+//     N = A' * 2/pi	
 //
-       fcvt.fx.s1 FR_N_fix = FR_N_float
-        nop.i 999 ;;
+(p0)   fcvt.fx.s1 FR_N_fix = FR_N_float
+	nop.i 999 ;;
 }
 { .mfi
-        nop.m 999
+	nop.m 999
 //
-//     N_fix is the integer part
+//     N_fix is the integer part	
 //
-       fcvt.xf FR_N_float = FR_N_fix
-        nop.i 999 ;;
+(p0)   fcvt.xf FR_N_float = FR_N_fix 
+	nop.i 999 ;;
 }
 { .mfi
-       getf.sig GR_N_Inc = FR_N_fix
-        nop.f 999
-        nop.i 999 ;;
+(p0)   getf.sig GR_N_Inc = FR_N_fix
+	nop.f 999
+	nop.i 999 ;;
 }
 { .mii
-        nop.m 999
-        nop.i 999 ;;
-       add GR_N_Inc = GR_N_Inc, GR_Sin_or_Cos ;;
+	nop.m 999
+	nop.i 999 ;;
+(p0)   add GR_N_Inc = GR_N_Inc, GR_Sin_or_Cos ;;
 }
 { .mfi
-        nop.m 999
+	nop.m 999
 //
 //     N is the integer part of the reduced-reduced argument.
 //     Put the integer in a GP register
 //
-       fnma.s1 FR_s = FR_N_float, FR_P_1, FR_ArgPrime
-        nop.i 999
+(p0)   fnma.s1 FR_s = FR_N_float, FR_P_1, FR_ArgPrime
+	nop.i 999
 }
 { .mfi
-        nop.m 999
-       fnma.s1 FR_w = FR_N_float, FR_P_2, FR_w
-        nop.i 999 ;;
+	nop.m 999
+(p0)   fnma.s1 FR_w = FR_N_float, FR_P_2, FR_w
+	nop.i 999 ;;
 }
 { .mfi
-        nop.m 999
+	nop.m 999
 //
 //     s = -N*P_1 + Arg'
 //     w = -N*P_2 + w
 //     N_fix_gr = N_fix_gr + N_inc
 //
-       fcmp.lt.unc.s1 p9, p8 = FR_s, FR_Two_to_M14
-        nop.i 999 ;;
+(p0)   fcmp.lt.unc.s1 p9, p8 = FR_s, FR_Two_to_M14
+	nop.i 999 ;;
 }
 { .mfi
-        nop.m 999
-(p9)   fcmp.gt.s1 p9, p8 = FR_s, FR_Neg_Two_to_M14  // p9 if |s| < 2^-14
-        nop.i 999 ;;
+	nop.m 999
+(p9)   fcmp.gt.s1 p9, p8 = FR_s, FR_Neg_Two_to_M14
+	nop.i 999 ;;
 }
-
 { .mfi
-        nop.m 999
+	nop.m 999
 //
 //     For |s|  > 2**(-14) r = S + w (r complete)
 //     Else       U_hi = N_0 * d_1
 //
 (p9)   fma.s1 FR_V_hi = FR_N_float, FR_P_2, f0
-        nop.i 999
+	nop.i 999
 }
 { .mfi
-        nop.m 999
+	nop.m 999
 (p9)   fma.s1 FR_U_hi = FR_N_0, FR_d_1, f0
-        nop.i 999 ;;
+	nop.i 999 ;;
 }
 { .mfi
-        nop.m 999
+	nop.m 999
 //
 //     Either S <= -2**(-14) or S >= 2**(-14)
 //     or -2**(-14) < s < 2**(-14)
 //
 (p8)   fma.s1 FR_r = FR_s, f1, FR_w
-        nop.i 999
+	nop.i 999
 }
 { .mfi
-        nop.m 999
+	nop.m 999
 (p9)   fma.s1 FR_w = FR_N_float, FR_P_3, f0
-        nop.i 999 ;;
+	nop.i 999 ;;
 }
 { .mfi
-        nop.m 999
+	nop.m 999
 //
 //     We need abs of both U_hi and V_hi - don't
 //     worry about switched sign of V_hi.
 //
 (p9)   fms.s1 FR_A = FR_U_hi, f1, FR_V_hi
-        nop.i 999
+	nop.i 999
 }
 { .mfi
-        nop.m 999
+	nop.m 999
 //
-//     Big s: finish up c = (S - r) + w (c complete)
+//     Big s: finish up c = (S - r) + w (c complete)	
 //     Case 4: A =  U_hi + V_hi
 //     Note: Worry about switched sign of V_hi, so subtract instead of add.
 //
 (p9)   fnma.s1 FR_V_lo = FR_N_float, FR_P_2, FR_V_hi
-        nop.i 999 ;;
+	nop.i 999 ;;
 }
 { .mmf
-        nop.m 999
-        nop.m 999
+	nop.m 999
+	nop.m 999
 (p9)   fms.s1 FR_U_lo = FR_N_0, FR_d_1, FR_U_hi
 }
 { .mfi
-        nop.m 999
+	nop.m 999
 (p9)   fmerge.s FR_V_hiabs = f0, FR_V_hi
-        nop.i 999 ;;
+	nop.i 999 ;;
 }
-//{ .mfb
-//(p9)   fmerge.s f8= FR_V_lo,FR_V_lo
-//(p9)   br.ret.sptk b0
-//}
-//;;
 { .mfi
-        nop.m 999
+	nop.m 999
 //     For big s: c = S - r
 //     For small s do more work: U_lo = N_0 * d_1 - U_hi
 //
 (p9)   fmerge.s FR_U_hiabs = f0, FR_U_hi
-        nop.i 999
+	nop.i 999
 }
 { .mfi
-        nop.m 999
+	nop.m 999
 //
-//     For big s: Is |r| < 2**(-3)
+//     For big s: Is |r| < 2**(-3)	
 //     For big s: if p12 set, prepare to branch to Small_R.
 //     For big s: If p13 set, prepare to branch to Normal_R.
 //
-(p8)   fms.s1 FR_c = FR_s, f1, FR_r
-        nop.i 999 ;;
+(p8)   fms.s1 FR_c = FR_s, f1, FR_r 
+	nop.i 999 ;;
 }
 { .mfi
-        nop.m 999
+	nop.m 999
 //
 //     For small S: V_hi = N * P_2
 //                  w = N * P_3
@@ -1482,99 +1451,104 @@ SINCOSL_LARGER_ARG:
 //     so (-) missing for V_hi and w.
 //
 (p8)   fcmp.lt.unc.s1 p12, p13 = FR_r, FR_Two_to_M3
-        nop.i 999 ;;
+	nop.i 999 ;;
 }
 { .mfi
-        nop.m 999
+	nop.m 999
 (p12)  fcmp.gt.s1 p12, p13 = FR_r, FR_Neg_Two_to_M3
-        nop.i 999 ;;
+	nop.i 999 ;;
 }
 { .mfi
-        nop.m 999
+	nop.m 999
 (p8)   fma.s1 FR_c = FR_c, f1, FR_w
-        nop.i 999
+	nop.i 999
 }
 { .mfb
-        nop.m 999
+	nop.m 999
 (p9)   fms.s1 FR_w = FR_N_0, FR_d_2, FR_w
-(p12)  br.cond.spnt SINCOSL_SMALL_R      // Branch if |r| < 2^-3
-                                         // and 2^24 <= |x| < 2^63
+(p12)  br.cond.spnt L(SINCOSL_SMALL_R) ;;
 }
-;;
-
 { .mib
-        nop.m 999
-        nop.i 999
-(p13)  br.cond.sptk SINCOSL_NORMAL_R     // Branch if |r| >= 2^-3
-                                         // and 2^24 <= |x| < 2^63
+	nop.m 999
+	nop.i 999
+(p13)  br.cond.sptk L(SINCOSL_NORMAL_R) ;;
 }
-;;
-
-SINCOSL_LARGER_S_TINY:
-//
-// Here if |s| < 2^-14, and 2^24 <= |x| < 2^63
-//
 { .mfi
-        nop.m 999
-//
-//     Big s: Vector off when |r| < 2**(-3).  Recall that p8 will be true.
+	nop.m 999
+// 
+//     Big s: Vector off when |r| < 2**(-3).  Recall that p8 will be true. 
 //     The remaining stuff is for Case 4.
 //     Small s: V_lo = N * P_2 + U_hi (U_hi is in place of V_hi in writeup)
 //     Note: the (-) is still missing for V_lo.
 //     Small s: w = w + N_0 * d_2
 //     Note: the (-) is now incorporated in w.
 //
-       fcmp.ge.unc.s1 p7, p8 = FR_U_hiabs, FR_V_hiabs
+(p9)   fcmp.ge.unc.s1 p10, p11 = FR_U_hiabs, FR_V_hiabs
+(p0)   extr.u	GR_i_1 = GR_N_Inc, 0, 1
 }
 { .mfi
-        nop.m 999
+	nop.m 999
 //
 //     C_hi = S + A
 //
-       fma.s1 FR_t = FR_U_lo, f1, FR_V_lo
+(p9)   fma.s1 FR_t = FR_U_lo, f1, FR_V_lo
+(p0)   extr.u	GR_i_0 = GR_N_Inc, 1, 1 ;;
 }
-;;
-
 { .mfi
-        nop.m 999
+	nop.m 999
 //
-//     t = U_lo + V_lo
+//     t = U_lo + V_lo 
 //
 //
-(p7)  fms.s1 FR_a = FR_U_hi, f1, FR_A
-        nop.i 999 ;;
+(p10)  fms.s1 FR_a = FR_U_hi, f1, FR_A
+	nop.i 999 ;;
 }
 { .mfi
-        nop.m 999
-(p8)  fma.s1 FR_a = FR_V_hi, f1, FR_A
-        nop.i 999
+	nop.m 999
+(p11)  fma.s1 FR_a = FR_V_hi, f1, FR_A
+	nop.i 999
+}
+;;
+
+{ .mmi
+      nop.m 999
+(p0)  addl           GR_Table_Base   = @ltoff(FSINCOSL_CONSTANTS#), gp
+      nop.i 999
 }
 ;;
 
+{ .mmi
+      ld8 GR_Table_Base = [GR_Table_Base]
+      nop.m 999
+      nop.i 999
+}
+;;
+
+
 { .mfi
+(p0)   add GR_Table_Base = 528, GR_Table_Base
 //
 //     Is U_hiabs >= V_hiabs?
 //
-        nop.m 999
-       fma.s1 FR_C_hi = FR_s, f1, FR_A
-        nop.i 999 ;;
+(p9)   fma.s1 FR_C_hi = FR_s, f1, FR_A
+	nop.i 999 ;;
 }
 { .mmi
-       ldfe FR_C_1 = [GR_ad_c], 16 ;;
-       ldfe FR_C_2 = [GR_ad_c], 64
-        nop.i 999 ;;
+(p0)   ldfe FR_C_1 = [GR_Table_Base], 16 ;;
+(p0)   ldfe FR_C_2 = [GR_Table_Base], 64
+	nop.i 999 ;;
 }
 //
 //     c = c + C_lo  finished.
 //     Load  C_2
 //
 { .mfi
-       ldfe        FR_S_1 = [GR_ad_s], 16
+(p0)   ldfe	FR_S_1 = [GR_Table_Base], 16
 //
-//     C_lo = S - C_hi
+//     C_lo = S - C_hi 
 //
-       fma.s1 FR_t = FR_t, f1, FR_w
-        nop.i 999 ;;
+(p0)   fma.s1 FR_t = FR_t, f1, FR_w
+	nop.i 999 ;;
 }
 //
 //     r and c have been computed.
@@ -1584,695 +1558,855 @@ SINCOSL_LARGER_S_TINY:
 //     Load S_1
 //
 { .mfi
-       ldfe FR_S_2 = [GR_ad_s], 64
+(p0)   ldfe FR_S_2 = [GR_Table_Base], 64
 //
-//     t = t + w
+//     t = t + w	
 //
-(p7)  fms.s1 FR_a = FR_a, f1, FR_V_hi
-      tbit.z p9,p10 = GR_N_Inc, 0      // p9  if i_1=0, N mod 4 = 0,1
-                                       // p10 if i_1=1, N mod 4 = 2,3
+(p10)  fms.s1 FR_a = FR_a, f1, FR_V_hi
+(p0)   cmp.eq.unc p9, p10 = 0x0, GR_i_0 ;;
 }
-;;
 { .mfi
-        nop.m 999
+	nop.m 999
 //
 //     For larger u than v: a = U_hi - A
 //     Else a = V_hi - A (do an add to account for missing (-) on V_hi
 //
-       fms.s1 FR_C_lo = FR_s, f1, FR_C_hi
-        nop.i 999 ;;
+(p0)   fms.s1 FR_C_lo = FR_s, f1, FR_C_hi
+	nop.i 999 ;;
 }
 { .mfi
-        nop.m 999
-(p8)  fms.s1 FR_a = FR_U_hi, f1, FR_a
-      tbit.z p11,p12 = GR_N_Inc, 1     // p11 if i_0=0, N mod 4 = 0,2
-                                       // p12 if i_0=1, N mod 4 = 1,3
+	nop.m 999
+(p11)  fms.s1 FR_a = FR_U_hi, f1, FR_a
+(p0)   cmp.eq.unc p11, p12 = 0x0, GR_i_1 ;;
 }
-;;
-
 { .mfi
-        nop.m 999
+	nop.m 999
 //
 //     If u > v: a = (U_hi - A)  + V_hi
 //     Else      a = (V_hi - A)  + U_hi
 //     In each case account for negative missing from V_hi.
 //
-       fma.s1 FR_C_lo = FR_C_lo, f1, FR_A
-        nop.i 999 ;;
+(p0)   fma.s1 FR_C_lo = FR_C_lo, f1, FR_A
+	nop.i 999 ;;
 }
 { .mfi
-        nop.m 999
+	nop.m 999
 //
-//     C_lo = (S - C_hi) + A
+//     C_lo = (S - C_hi) + A	
 //
-       fma.s1 FR_t = FR_t, f1, FR_a
-        nop.i 999 ;;
+(p0)   fma.s1 FR_t = FR_t, f1, FR_a
+	nop.i 999 ;;
 }
 { .mfi
-        nop.m 999
+	nop.m 999
 //
-//     t = t + a
+//     t = t + a 
 //
-       fma.s1 FR_C_lo = FR_C_lo, f1, FR_t
-        nop.i 999 ;;
+(p0)   fma.s1 FR_C_lo = FR_C_lo, f1, FR_t
+	nop.i 999 ;;
 }
 { .mfi
-        nop.m 999
+	nop.m 999
 //
 //     C_lo = C_lo + t
+//     Adjust Table_Base to beginning of table
 //
-       fma.s1 FR_r = FR_C_hi, f1, FR_C_lo
-        nop.i 999 ;;
+(p0)   fma.s1 FR_r = FR_C_hi, f1, FR_C_lo
+	nop.i 999 ;;
 }
 { .mfi
-        nop.m 999
+	nop.m 999
 //
 //     Load S_2
 //
-       fma.s1 FR_rsq = FR_r, FR_r, f0
-        nop.i 999
+(p0)   fma.s1 FR_rsq = FR_r, FR_r, f0
+	nop.i 999
 }
 { .mfi
-        nop.m 999
+	nop.m 999
 //
+//     Table_Base points to C_1
 //     r = C_hi + C_lo
 //
-       fms.s1 FR_c = FR_C_hi, f1, FR_r
-        nop.i 999 ;;
+(p0)   fms.s1 FR_c = FR_C_hi, f1, FR_r
+	nop.i 999 ;;
 }
 { .mfi
-        nop.m 999
+	nop.m 999
 //
 //     if i_1 ==0: poly = S_2 * FR_rsq + S_1
 //     else        poly = C_2 * FR_rsq + C_1
 //
-(p9)  fma.s1 FR_tmp_result = f0, f1, FR_r
-        nop.i 999 ;;
+(p11)  fma.s1 FR_Input_X = f0, f1, FR_r
+	nop.i 999 ;;
 }
 { .mfi
-        nop.m 999
-(p10)  fma.s1 FR_tmp_result = f0, f1, f1
-        nop.i 999 ;;
+	nop.m 999
+(p12)  fma.s1 FR_Input_X = f0, f1, f1
+	nop.i 999 ;;
 }
 { .mfi
-        nop.m 999
+	nop.m 999
 //
-//     Compute r_cube = FR_rsq * r
+//     Compute r_cube = FR_rsq * r	
 //
-(p9)  fma.s1 FR_poly = FR_rsq, FR_S_2, FR_S_1
-        nop.i 999 ;;
+(p11)  fma.s1 FR_poly = FR_rsq, FR_S_2, FR_S_1
+	nop.i 999 ;;
 }
 { .mfi
-        nop.m 999
-(p10)  fma.s1 FR_poly = FR_rsq, FR_C_2, FR_C_1
-        nop.i 999
+	nop.m 999
+(p12)  fma.s1 FR_poly = FR_rsq, FR_C_2, FR_C_1
+	nop.i 999
 }
 { .mfi
-        nop.m 999
+	nop.m 999
 //
 //     Compute FR_rsq = r * r
 //     Is i_1 == 0 ?
 //
-       fma.s1 FR_r_cubed = FR_rsq, FR_r, f0
-        nop.i 999 ;;
+(p0)   fma.s1 FR_r_cubed = FR_rsq, FR_r, f0
+	nop.i 999 ;;
 }
 { .mfi
-        nop.m 999
+	nop.m 999
 //
 //     c = C_hi - r
 //     Load  C_1
 //
-       fma.s1 FR_c = FR_c, f1, FR_C_lo
-        nop.i 999
+(p0)   fma.s1 FR_c = FR_c, f1, FR_C_lo
+	nop.i 999
 }
 { .mfi
-        nop.m 999
+	nop.m 999
 //
 //     if i_1 ==0: poly = r_cube * poly + c
 //     else        poly = FR_rsq * poly
 //
-(p12)  fms.s1 FR_tmp_result = f0, f1, FR_tmp_result
-        nop.i 999 ;;
+(p10)  fms.s1 FR_Input_X = f0, f1, FR_Input_X
+	nop.i 999 ;;
 }
 { .mfi
-        nop.m 999
+	nop.m 999
 //
 //     if i_1 ==0: Result = r
 //     else        Result = 1.0
 //
-(p9)  fma.s1 FR_poly = FR_r_cubed, FR_poly, FR_c
-        nop.i 999 ;;
+(p11)  fma.s1 FR_poly = FR_r_cubed, FR_poly, FR_c
+	nop.i 999 ;;
 }
 { .mfi
-        nop.m 999
-(p10)  fma.s1 FR_poly = FR_rsq, FR_poly, f0
-        nop.i 999 ;;
+	nop.m 999
+(p12)  fma.s1 FR_poly = FR_rsq, FR_poly, f0
+	nop.i 999 ;;
 }
 { .mfi
-        nop.m 999
+	nop.m 999
 //
-//     if i_0 !=0: Result = -Result
+//     if i_0 !=0: Result = -Result 
 //
-(p11)   fma.s0 FR_Result = FR_tmp_result, f1, FR_poly
-        nop.i 999 ;;
+(p9)   fma.s0 FR_Input_X = FR_Input_X, f1, FR_poly
+	nop.i 999 ;;
 }
 { .mfb
-        nop.m 999
-(p12)  fms.s0 FR_Result = FR_tmp_result, f1, FR_poly
+	nop.m 999
+(p10)  fms.s0 FR_Input_X = FR_Input_X, f1, FR_poly
 //
 //     if i_0 == 0: Result = Result + poly
 //     else         Result = Result - poly
 //
-      br.ret.sptk   b0         // Exit for |s| < 2^-14, and 2^24 <= |x| < 2^63
+(p0)    br.ret.sptk   b0 ;;
 }
-;;
-
-
-SINCOSL_SMALL_R:
-//
-// Here if |r| < 2^-3
+L(SINCOSL_SMALL_R): 
+{ .mii
+	nop.m 999
+(p0)  	extr.u	GR_i_1 = GR_N_Inc, 0, 1 ;;
 //
-// Enter with r, c, and N_Inc computed
 //
 //      Compare both i_1 and i_0 with 0.
 //      if i_1 == 0, set p9.
 //      if i_0 == 0, set p11.
 //
-
+(p0)  	cmp.eq.unc p9, p10 = 0x0, GR_i_1 ;;
+}
 { .mfi
-      nop.m 999
-      fma.s1 FR_rsq = FR_r, FR_r, f0   // rsq = r * r
-      tbit.z p9,p10 = GR_N_Inc, 0      // p9  if i_1=0, N mod 4 = 0,1
-                                       // p10 if i_1=1, N mod 4 = 2,3
+	nop.m 999
+(p0)  	fma.s1 FR_rsq = FR_r, FR_r, f0
+(p0)  	extr.u	GR_i_0 = GR_N_Inc, 1, 1 ;;
+}
+{ .mfi
+	nop.m 999
+//
+// 	Z = Z * FR_rsq 
+//
+(p10)	fnma.s1	FR_c = FR_c, FR_r, f0
+(p0)  	cmp.eq.unc p11, p12 = 0x0, GR_i_0
 }
 ;;
 
+// ******************************************************************
+// ******************************************************************
+// ******************************************************************
+//      r and c have been computed.
+//      We know whether this is the sine or cosine routine.
+//      Make sure ftz mode is set - should be automatic when using wre
+//      |r| < 2**(-3)
+//
+//      Set table_ptr1 to beginning of constant table.
+//      Get [i_0,i_1] - two lsb of N_fix_gr.
+//
+
 { .mmi
-(p9)  ldfe FR_S_5 = [GR_ad_se], -16    // Load S_5 if i_1=0
-(p10) ldfe FR_C_5 = [GR_ad_ce], -16    // Load C_5 if i_1=1
+      nop.m 999
+(p0)  addl           GR_Table_Base   = @ltoff(FSINCOSL_CONSTANTS#), gp
       nop.i 999
 }
 ;;
 
 { .mmi
-(p9)  ldfe FR_S_4 = [GR_ad_se], -16    // Load S_4 if i_1=0
-(p10) ldfe FR_C_4 = [GR_ad_ce], -16    // Load C_4 if i_1=1
+      ld8 GR_Table_Base = [GR_Table_Base]
+      nop.m 999
       nop.i 999
 }
 ;;
 
-SINCOSL_SMALL_R_0:
-// Entry point for 2^-3 < |x| < pi/4
-.pred.rel "mutex",p9,p10
-SINCOSL_SMALL_R_1:
-// Entry point for pi/4 < |x| < 2^24 and |r| < 2^-3
-.pred.rel "mutex",p9,p10
-{ .mfi
-(p9)  ldfe FR_S_3 = [GR_ad_se], -16    // Load S_3 if i_1=0
-      fma.s1 FR_Z = FR_rsq, FR_rsq, f0 // Z = rsq * rsq
-      nop.i 999
-}
+
+// 
+//      Set table_ptr1 to point to S_5.
+//      Set table_ptr1 to point to C_5.
+//      Compute FR_rsq = r * r
+//
 { .mfi
-(p10) ldfe FR_C_3 = [GR_ad_ce], -16    // Load C_3 if i_1=1
-(p10) fnma.s1 FR_c = FR_c, FR_r, f0    // c = -c * r if i_1=0
-      nop.i 999
+(p9)  	add GR_Table_Base = 672, GR_Table_Base
+(p10)	fmerge.s FR_r = f1, f1
+(p10) 	add GR_Table_Base = 592, GR_Table_Base ;;
+}
+// 
+//      Set table_ptr1 to point to S_5.
+//      Set table_ptr1 to point to C_5.
+//
+{ .mmi
+(p9)  	ldfe FR_S_5 = [GR_Table_Base], -16 ;;
+//
+//      if (i_1 == 0) load S_5
+//      if (i_1 != 0) load C_5
+//
+(p9)  	ldfe FR_S_4 = [GR_Table_Base], -16
+	nop.i 999 ;;
 }
-;;
-
 { .mmf
-(p9)  ldfe FR_S_2 = [GR_ad_se], -16    // Load S_2 if i_1=0
-(p10) ldfe FR_C_2 = [GR_ad_ce], -16    // Load C_2 if i_1=1
-(p10) fmerge.s FR_r = f1, f1
+(p10) 	ldfe FR_C_5 = [GR_Table_Base], -16
+// 
+//      Z = FR_rsq * FR_rsq
+//
+(p9)  	ldfe FR_S_3 = [GR_Table_Base], -16
+//
+//      Compute FR_rsq = r * r
+//      if (i_1 == 0) load S_4
+//      if (i_1 != 0) load C_4
+//
+(p0)   	fma.s1 FR_Z = FR_rsq, FR_rsq, f0 ;;
 }
-;;
-
+//
+//      if (i_1 == 0) load S_3
+//      if (i_1 != 0) load C_3
+//
 { .mmi
-(p9)  ldfe FR_S_1 = [GR_ad_se], -16    // Load S_1 if i_1=0
-(p10) ldfe FR_C_1 = [GR_ad_ce], -16    // Load C_1 if i_1=1
-      nop.i 999
+(p9)  	ldfe FR_S_2 = [GR_Table_Base], -16 ;;
+//
+//      if (i_1 == 0) load S_2
+//      if (i_1 != 0) load C_2
+//
+(p9)  	ldfe FR_S_1 = [GR_Table_Base], -16
+	nop.i 999
 }
-;;
-
-{ .mfi
-      nop.m 999
-(p9)  fma.s1 FR_Z = FR_Z, FR_r, f0     // Z = Z * r if i_1=0
-      nop.i 999
+{ .mmi
+(p10) 	ldfe FR_C_4 = [GR_Table_Base], -16 ;;
+(p10)  	ldfe FR_C_3 = [GR_Table_Base], -16
+	nop.i 999 ;;
+}
+{ .mmi
+(p10) 	ldfe FR_C_2 = [GR_Table_Base], -16 ;;
+(p10) 	ldfe FR_C_1 = [GR_Table_Base], -16
+	nop.i 999
 }
-;;
-
 { .mfi
-      nop.m 999
-(p9)  fma.s1 FR_poly_lo = FR_rsq, FR_S_5, FR_S_4 // poly_lo=rsq*S_5+S_4 if i_1=0
-      nop.i 999
+	nop.m 999
+//
+//      if (i_1 != 0):
+//      poly_lo = FR_rsq * C_5 + C_4
+//      poly_hi = FR_rsq * C_2 + C_1
+//
+(p9)  	fma.s1 FR_Z = FR_Z, FR_r, f0
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p10) fma.s1 FR_poly_lo = FR_rsq, FR_C_5, FR_C_4 // poly_lo=rsq*C_5+C_4 if i_1=1
-      nop.i 999
+	nop.m 999
+//
+//      if (i_1 == 0) load S_1
+//      if (i_1 != 0) load C_1
+//
+(p9)  	fma.s1 FR_poly_lo = FR_rsq, FR_S_5, FR_S_4
+	nop.i 999
 }
-;;
-
 { .mfi
-      nop.m 999
-(p9)  fma.s1 FR_poly_hi = FR_rsq, FR_S_2, FR_S_1 // poly_hi=rsq*S_2+S_1 if i_1=0
-      nop.i 999
+	nop.m 999
+//
+//      c = -c * r
+//      dummy fmpy's to flag inexact.
+//
+(p9)	fma.s0 FR_S_4 = FR_S_4, FR_S_4, f0
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p10) fma.s1 FR_poly_hi = FR_rsq, FR_C_2, FR_C_1 // poly_hi=rsq*C_2+C_1 if i_1=1
-      nop.i 999
+	nop.m 999
+//
+//      poly_lo = FR_rsq * poly_lo + C_3
+//      poly_hi = FR_rsq * poly_hi
+//
+(p0)    fma.s1	FR_Z = FR_Z, FR_rsq, f0
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fma.s1 FR_Z = FR_Z, FR_rsq, f0             // Z = Z * rsq
-      nop.i 999
+	nop.m 999
+(p9)  	fma.s1 FR_poly_hi = FR_rsq, FR_S_2, FR_S_1
+	nop.i 999
 }
-;;
-
 { .mfi
-      nop.m 999
-(p9)  fma.s1 FR_poly_lo = FR_rsq, FR_poly_lo, FR_S_3 // p_lo=p_lo*rsq+S_3, i_1=0
-      nop.i 999
+	nop.m 999
+//
+//      if (i_1 == 0):
+//      poly_lo = FR_rsq * S_5 + S_4
+//      poly_hi = FR_rsq * S_2 + S_1
+//
+(p10) 	fma.s1 FR_poly_lo = FR_rsq, FR_C_5, FR_C_4
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p10) fma.s1 FR_poly_lo = FR_rsq, FR_poly_lo, FR_C_3 // p_lo=p_lo*rsq+C_3, i_1=1
-      nop.i 999
+	nop.m 999
+//
+//      if (i_1 == 0):
+//      Z = Z * r  for only one of the small r cases - not there
+//      in original implementation notes.
+// 
+(p9)  	fma.s1 FR_poly_lo = FR_rsq, FR_poly_lo, FR_S_3
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-(p9)  fma.s0 FR_inexact = FR_S_4, FR_S_4, f0     // Dummy op to set inexact
-      tbit.z p11,p12 = GR_N_Inc, 1     // p11 if i_0=0, N mod 4 = 0,2
-                                       // p12 if i_0=1, N mod 4 = 1,3
+	nop.m 999
+(p10) 	fma.s1 FR_poly_hi = FR_rsq, FR_C_2, FR_C_1
+	nop.i 999
 }
 { .mfi
-      nop.m 999
-(p10) fma.s0 FR_inexact = FR_C_1, FR_C_1, f0     // Dummy op to set inexact
-      nop.i 999
+	nop.m 999
+(p10)	fma.s0 FR_C_1 = FR_C_1, FR_C_1, f0
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-(p9)  fma.s1 FR_poly_hi = FR_poly_hi, FR_rsq, f0     // p_hi=p_hi*rsq if i_1=0
-      nop.i 999
+	nop.m 999
+(p9)  	fma.s1 FR_poly_hi = FR_poly_hi, FR_rsq, f0
+	nop.i 999
 }
 { .mfi
-      nop.m 999
-(p10) fma.s1 FR_poly_hi = FR_poly_hi, FR_rsq, f0     // p_hi=p_hi*rsq if i_1=1
-      nop.i 999
+	nop.m 999
+//
+//      poly_lo = FR_rsq * poly_lo + S_3
+//      poly_hi = FR_rsq * poly_hi
+//
+(p10) 	fma.s1 FR_poly_lo = FR_rsq, FR_poly_lo, FR_C_3
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fma.s1 FR_poly = FR_Z, FR_poly_lo, FR_c        // poly=Z*poly_lo+c
-      nop.i 999
+	nop.m 999
+(p10) 	fma.s1 FR_poly_hi = FR_poly_hi, FR_rsq, f0
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-(p9)  fma.s1 FR_poly_hi = FR_r, FR_poly_hi, f0       // p_hi=r*p_hi if i_1=0
-      nop.i 999
+	nop.m 999
+//
+// 	if (i_1 == 0): dummy fmpy's to flag inexact
+// 	r = 1
+//
+(p9)	fma.s1 FR_poly_hi = FR_r, FR_poly_hi, f0
+	nop.i 999
 }
-;;
-
 { .mfi
-      nop.m 999
-(p12) fms.s1 FR_r = f0, f1, FR_r                     // r = -r if i_0=1
-      nop.i 999
+	nop.m 999
+//
+// 	poly_hi = r * poly_hi 
+//
+(p0)    fma.s1	FR_poly = FR_Z, FR_poly_lo, FR_c
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fma.s1 FR_poly = FR_poly, f1, FR_poly_hi       // poly=poly+poly_hi
-      nop.i 999
+	nop.m 999
+(p12)	fms.s1	FR_r = f0, f1, FR_r
+	nop.i 999 ;;
 }
-;;
-
+{ .mfi
+	nop.m 999
 //
-//      if (i_0 == 0) Result = r + poly
-//      if (i_0 != 0) Result = r - poly
+//      poly_hi = Z * poly_lo + c	
+// 	if i_0 == 1: r = -r     
 //
+(p0) 	fma.s1	FR_poly = FR_poly, f1, FR_poly_hi
+	nop.i 999 ;;
+}
 { .mfi
-      nop.m 999
-(p11) fma.s0 FR_Result = FR_r, f1, FR_poly
-      nop.i 999
+	nop.m 999
+(p12)	fms.s0 FR_Input_X = FR_r, f1, FR_poly
+	nop.i 999
 }
 { .mfb
-      nop.m 999
-(p12) fms.s0 FR_Result = FR_r, f1, FR_poly
-      br.ret.sptk   b0                               // Exit for |r| < 2^-3
-}
-;;
-
-
-SINCOSL_NORMAL_R:
+	nop.m 999
+//
+//      poly = poly + poly_hi	
 //
-// Here if 2^-3 <= |r| < pi/4
-// THIS IS THE MAIN PATH
+(p11)	fma.s0 FR_Input_X = FR_r, f1, FR_poly
+//
+//      if (i_0 == 0) Result = r + poly
+//      if (i_0 != 0) Result = r - poly
 //
-// Enter with r, c, and N_Inc having been computed
+(p0)    br.ret.sptk   b0 ;;
+}
+L(SINCOSL_NORMAL_R): 
+{ .mii
+	nop.m 999
+(p0)	extr.u	GR_i_1 = GR_N_Inc, 0, 1 ;;
 //
+//      Set table_ptr1 and table_ptr2 to base address of
+//      constant table.
+(p0)	cmp.eq.unc p9, p10 = 0x0, GR_i_1 ;;
+}
 { .mfi
-      ldfe FR_PP_6 = [GR_ad_pp], 16    // Load PP_6
-      fma.s1 FR_rsq = FR_r, FR_r, f0   // rsq = r * r
-      tbit.z p9,p10 = GR_N_Inc, 0      // p9  if i_1=0, N mod 4 = 0,1
-                                       // p10 if i_1=1, N mod 4 = 2,3
+	nop.m 999
+(p0)	fma.s1	FR_rsq = FR_r, FR_r, f0
+(p0)	extr.u	GR_i_0 = GR_N_Inc, 1, 1 ;;
 }
 { .mfi
-      ldfe FR_QQ_6 = [GR_ad_qq], 16    // Load QQ_6
-      nop.f 999
-      nop.i 999
+	nop.m 999
+(p0)	frcpa.s1 FR_r_hi, p6 = f1, FR_r
+(p0)	cmp.eq.unc p11, p12 = 0x0, GR_i_0
 }
 ;;
 
+// ******************************************************************
+// ******************************************************************
+// ******************************************************************
+//
+//      r and c have been computed.
+//      We known whether this is the sine or cosine routine.
+//      Make sure ftz mode is set - should be automatic when using wre
+//      Get [i_0,i_1] - two lsb of N_fix_gr alone.
+//
+
 { .mmi
-(p9)  ldfe FR_PP_5 = [GR_ad_pp], 16    // Load PP_5 if i_1=0
-(p10) ldfe FR_QQ_5 = [GR_ad_qq], 16    // Load QQ_5 if i_1=1
+      nop.m 999
+(p0)  addl           GR_Table_Base   = @ltoff(FSINCOSL_CONSTANTS#), gp
       nop.i 999
 }
 ;;
 
-SINCOSL_NORMAL_R_0:
-// Entry for 2^-3 < |x| < pi/4
-.pred.rel "mutex",p9,p10
-{ .mmf
-(p9)  ldfe FR_C_1 = [GR_ad_pp], 16     // Load C_1 if i_1=0
-(p10) ldfe FR_S_1 = [GR_ad_qq], 16     // Load S_1 if i_1=1
-      frcpa.s1 FR_r_hi, p6 = f1, FR_r  // r_hi = frcpa(r)
+{ .mmi
+      ld8 GR_Table_Base = [GR_Table_Base]
+      nop.m 999
+      nop.i 999
 }
 ;;
 
+
 { .mfi
-      nop.m 999
-(p9)  fma.s1 FR_poly = FR_rsq, FR_PP_8, FR_PP_7 // poly = rsq*PP_8+PP_7 if i_1=0
-      nop.i 999
+(p10)	add GR_Table_Base = 384, GR_Table_Base
+(p12)	fms.s1 FR_Input_X = f0, f1, f1
+(p9)	add GR_Table_Base = 224, GR_Table_Base ;;
 }
 { .mfi
-      nop.m 999
-(p10) fma.s1 FR_poly = FR_rsq, FR_QQ_8, FR_QQ_7 // poly = rsq*QQ_8+QQ_7 if i_1=1
-      nop.i 999
+(p10)	ldfe FR_QQ_8 = [GR_Table_Base], 16
+//
+//      if (i_1==0) poly = poly * FR_rsq + PP_1_lo
+//      else        poly = FR_rsq * poly
+//
+(p11)	fma.s1 FR_Input_X = f0, f1, f1
+	nop.i 999 ;;
+}
+{ .mmb
+(p10)	ldfe FR_QQ_7 = [GR_Table_Base], 16
+//
+// 	Adjust table pointers based on i_0 
+//      Compute rsq = r * r
+//
+(p9)	ldfe FR_PP_8 = [GR_Table_Base], 16
+	nop.b 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fma.s1 FR_r_cubed = FR_r, FR_rsq, f0  // rcubed = r * rsq
-      nop.i 999
+	nop.m 999
+(p0)	fma.s1 FR_r_cubed = FR_r, FR_rsq, f0
+	nop.i 999 ;;
 }
-;;
-
-
-SINCOSL_NORMAL_R_1:
-// Entry for pi/4 <= |x| < 2^24
-.pred.rel "mutex",p9,p10
 { .mmf
-(p9)  ldfe FR_PP_1 = [GR_ad_pp], 16             // Load PP_1_hi if i_1=0
-(p10) ldfe FR_QQ_1 = [GR_ad_qq], 16             // Load QQ_1    if i_1=1
-      frcpa.s1 FR_r_hi, p6 = f1, FR_r_hi        // r_hi = frpca(frcpa(r))
+(p9)	ldfe FR_PP_7 = [GR_Table_Base], 16
+(p10)	ldfe FR_QQ_6 = [GR_Table_Base], 16
+//
+//      Load PP_8 and QQ_8; PP_7 and QQ_7
+//
+(p0)	frcpa.s1 FR_r_hi, p6 = f1, FR_r_hi ;;
 }
-;;
-
-{ .mfi
-(p9)  ldfe FR_PP_4 = [GR_ad_pp], 16             // Load PP_4 if i_1=0
-(p9)  fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_6 // poly = rsq*poly+PP_6 if i_1=0
-      nop.i 999
+//
+//      if (i_1==0) poly =   PP_7 + FR_rsq * PP_8.
+//      else        poly =   QQ_7 + FR_rsq * QQ_8.
+//
+{ .mmb
+(p9)	ldfe FR_PP_6 = [GR_Table_Base], 16
+(p10)	ldfe FR_QQ_5 = [GR_Table_Base], 16
+	nop.b 999 ;;
 }
-{ .mfi
-(p10) ldfe FR_QQ_4 = [GR_ad_qq], 16             // Load QQ_4 if i_1=1
-(p10) fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_6 // poly = rsq*poly+QQ_6 if i_1=1
-      nop.i 999
+{ .mmb
+(p9)	ldfe FR_PP_5 = [GR_Table_Base], 16
+(p10)	ldfe FR_S_1 = [GR_Table_Base], 16
+	nop.b 999 ;;
+}
+{ .mmb
+(p10)	ldfe FR_QQ_1 = [GR_Table_Base], 16
+(p9)	ldfe FR_C_1 = [GR_Table_Base], 16
+	nop.b 999 ;;
+}
+{ .mmb
+(p10)	ldfe FR_QQ_4 = [GR_Table_Base], 16
+(p9)	ldfe FR_PP_1 = [GR_Table_Base], 16
+	nop.b 999 ;;
+}
+{ .mmb
+(p10)	ldfe FR_QQ_3 = [GR_Table_Base], 16
+//
+//      if (i_1=0) corr = corr + c*c
+//      else       corr = corr * c 
+//
+(p9)	ldfe FR_PP_4 = [GR_Table_Base], 16
+	nop.b 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-(p9)  fma.s1 FR_corr = FR_C_1, FR_rsq, f0       // corr = C_1 * rsq if i_1=0
-      nop.i 999
+	nop.m 999
+(p10)	fma.s1 FR_poly = FR_rsq, FR_QQ_8, FR_QQ_7
+	nop.i 999 ;;
+}
+//
+//      if (i_1=0) poly = rsq * poly + PP_5 
+//      else       poly = rsq * poly + QQ_5 
+//      Load PP_4 or QQ_4
+//
+{ .mmi
+(p9)	ldfe FR_PP_3 = [GR_Table_Base], 16 ;;
+(p10)	ldfe FR_QQ_2 = [GR_Table_Base], 16
+	nop.i 999
 }
 { .mfi
-      nop.m 999
-(p10) fma.s1 FR_corr = FR_S_1, FR_r_cubed, FR_r // corr = S_1 * r^3 + r if i_1=1
-      nop.i 999
+	nop.m 999
+//
+//      r_hi =   frcpa(frcpa(r)).
+//      r_cube = r * FR_rsq.
+//
+(p9)	fma.s1 FR_poly = FR_rsq, FR_PP_8, FR_PP_7
+	nop.i 999 ;;
 }
-;;
-
+//
+//      Do dummy multiplies so inexact is always set. 
+//
 { .mfi
-(p9)  ldfe FR_PP_3 = [GR_ad_pp], 16             // Load PP_3 if i_1=0
-      fma.s1 FR_r_hi_sq = FR_r_hi, FR_r_hi, f0  // r_hi_sq = r_hi * r_hi
-      nop.i 999
+(p9)	ldfe FR_PP_2 = [GR_Table_Base], 16
+//
+//      r_lo = r - r_hi	
+//
+(p9)	fma.s1 FR_U_lo = FR_r_hi, FR_r_hi, f0
+	nop.i 999 ;;
+}
+{ .mbb
+(p9)	ldfe FR_PP_1_lo = [GR_Table_Base], 16
+	nop.b 999
+	nop.b 999 ;;
 }
 { .mfi
-(p10) ldfe FR_QQ_3 = [GR_ad_qq], 16             // Load QQ_3 if i_1=1
-      fms.s1 FR_r_lo = FR_r, f1, FR_r_hi        // r_lo = r - r_hi
-      nop.i 999
+	nop.m 999
+(p10)	fma.s1 FR_corr = FR_S_1, FR_r_cubed, FR_r
+	nop.i 999
 }
-;;
-
 { .mfi
-(p9)  ldfe FR_PP_2 = [GR_ad_pp], 16             // Load PP_2 if i_1=0
-(p9)  fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_5 // poly = rsq*poly+PP_5 if i_1=0
-      nop.i 999
+	nop.m 999
+(p10)	fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_6
+	nop.i 999 ;;
 }
 { .mfi
-(p10) ldfe FR_QQ_2 = [GR_ad_qq], 16             // Load QQ_2 if i_1=1
-(p10) fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_5 // poly = rsq*poly+QQ_5 if i_1=1
-      nop.i 999
+	nop.m 999
+//
+//      if (i_1=0) U_lo = r_hi * r_hi
+//      else       U_lo = r_hi + r
+//
+(p9)	fma.s1 FR_corr = FR_C_1, FR_rsq, f0
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-(p9)  ldfe FR_PP_1_lo = [GR_ad_pp], 16          // Load PP_1_lo if i_1=0
-(p9)  fma.s1 FR_corr = FR_corr, FR_c, FR_c      // corr = corr * c + c if i_1=0
-      nop.i 999
+	nop.m 999
+//
+//      if (i_1=0) corr = C_1 * rsq
+//      else       corr = S_1 * r_cubed + r
+//
+(p9)	fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_6
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p10) fnma.s1 FR_corr = FR_corr, FR_c, f0       // corr = -corr * c if i_1=1
-      nop.i 999
+	nop.m 999
+(p10)	fma.s1 FR_U_lo = FR_r_hi, f1, FR_r
+	nop.i 999
 }
-;;
-
 { .mfi
-      nop.m 999
-(p9)  fma.s1 FR_U_lo = FR_r, FR_r_hi, FR_r_hi_sq // U_lo = r*r_hi+r_hi_sq, i_1=0
-      nop.i 999
+	nop.m 999
+//
+//      if (i_1=0) U_hi = r_hi + U_hi 
+//      else       U_hi = QQ_1 * U_hi + 1
+//
+(p9)	fma.s1 FR_U_lo = FR_r, FR_r_hi, FR_U_lo
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p10) fma.s1 FR_U_lo = FR_r_hi, f1, FR_r        // U_lo = r_hi + r if i_1=1
-      nop.i 999
+	nop.m 999
+//
+//      U_hi = r_hi * r_hi	
+//
+(p0)	fms.s1 FR_r_lo = FR_r, f1, FR_r_hi
+	nop.i 999
 }
-;;
-
 { .mfi
-      nop.m 999
-(p9)  fma.s1 FR_U_hi = FR_r_hi, FR_r_hi_sq, f0  // U_hi = r_hi*r_hi_sq if i_1=0
-      nop.i 999
+	nop.m 999
+//
+//      Load PP_1, PP_6, PP_5, and C_1
+//      Load QQ_1, QQ_6, QQ_5, and S_1
+//
+(p0)	fma.s1 FR_U_hi = FR_r_hi, FR_r_hi, f0
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p10) fma.s1 FR_U_hi = FR_QQ_1, FR_r_hi_sq, f1  // U_hi = QQ_1*r_hi_sq+1, i_1=1
-      nop.i 999
+	nop.m 999
+(p10)	fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_5
+	nop.i 999
 }
-;;
-
 { .mfi
-      nop.m 999
-(p9)  fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_4 // poly = poly*rsq+PP_4 if i_1=0
-      nop.i 999
+	nop.m 999
+(p10)	fnma.s1	FR_corr = FR_corr, FR_c, f0
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p10) fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_4 // poly = poly*rsq+QQ_4 if i_1=1
-      nop.i 999
+	nop.m 999
+//
+//      if (i_1=0) U_lo = r * r_hi + U_lo 
+//      else       U_lo = r_lo * U_lo
+//
+(p9)	fma.s1 FR_corr = FR_corr, FR_c, FR_c
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-(p9)  fma.s1 FR_U_lo = FR_r, FR_r, FR_U_lo      // U_lo = r * r + U_lo if i_1=0
-      nop.i 999
+	nop.m 999
+(p9)	fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_5
+	nop.i 999
 }
 { .mfi
-      nop.m 999
-(p10) fma.s1 FR_U_lo = FR_r_lo, FR_U_lo, f0     // U_lo = r_lo * U_lo if i_1=1
-      nop.i 999
+	nop.m 999
+//
+//      if (i_1 =0) U_hi = r + U_hi
+//      if (i_1 =0) U_lo = r_lo * U_lo 
+//      
+//
+(p9)	fma.s0 FR_PP_5 = FR_PP_5, FR_PP_4, f0
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-(p9)  fma.s1 FR_U_hi = FR_PP_1, FR_U_hi, f0     // U_hi = PP_1 * U_hi if i_1=0
-      nop.i 999
+	nop.m 999
+(p9)	fma.s1 FR_U_lo = FR_r, FR_r, FR_U_lo
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-(p9)  fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_3 // poly = poly*rsq+PP_3 if i_1=0
-      nop.i 999
+	nop.m 999
+(p10)	fma.s1 FR_U_lo = FR_r_lo, FR_U_lo, f0
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p10) fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_3 // poly = poly*rsq+QQ_3 if i_1=1
-      nop.i 999
+	nop.m 999
+//
+//      if (i_1=0) poly = poly * rsq + PP_6
+//      else       poly = poly * rsq + QQ_6 
+//
+(p9)	fma.s1 FR_U_hi = FR_r_hi, FR_U_hi, f0
+	nop.i 999
 }
-;;
-
 { .mfi
-      nop.m 999
-(p9)  fma.s1 FR_U_lo = FR_r_lo, FR_U_lo, f0     // U_lo = r_lo * U_lo if i_1=0
-      nop.i 999
+	nop.m 999
+(p10)	fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_4
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p10) fma.s1 FR_U_lo = FR_QQ_1,FR_U_lo, f0      // U_lo = QQ_1 * U_lo if i_1=1
-      nop.i 999
+	nop.m 999
+(p10)	fma.s1 FR_U_hi = FR_QQ_1, FR_U_hi, f1
+	nop.i 999
 }
-;;
-
 { .mfi
-      nop.m 999
-(p9)  fma.s1 FR_U_hi = FR_r, f1, FR_U_hi        // U_hi = r + U_hi if i_1=0
-      nop.i 999
+	nop.m 999
+(p10)	fma.s0 FR_QQ_5 = FR_QQ_5, FR_QQ_5, f0
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-(p9)  fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_2 // poly = poly*rsq+PP_2 if i_1=0
-      nop.i 999
+	nop.m 999
+//
+//      if (i_1!=0) U_hi = PP_1 * U_hi  
+//      if (i_1!=0) U_lo = r * r  + U_lo  
+//      Load PP_3 or QQ_3
+//
+(p9)	fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_4
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p10) fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_2 // poly = poly*rsq+QQ_2 if i_1=1
-      nop.i 999
+	nop.m 999
+(p9)	fma.s1 FR_U_lo = FR_r_lo, FR_U_lo, f0
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-(p9)  fma.s1 FR_U_lo = FR_PP_1, FR_U_lo, f0     // U_lo = PP_1 * U_lo if i_1=0
-      nop.i 999
+	nop.m 999
+(p10)	fma.s1 FR_U_lo = FR_QQ_1,FR_U_lo, f0
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-(p9)  fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_1_lo // poly =poly*rsq+PP1lo i_1=0
-      nop.i 999
+	nop.m 999
+(p9)	fma.s1 FR_U_hi = FR_PP_1, FR_U_hi, f0
+	nop.i 999
 }
 { .mfi
-      nop.m 999
-(p10) fma.s1 FR_poly = FR_rsq, FR_poly, f0      // poly = poly*rsq if i_1=1
-      nop.i 999
+	nop.m 999
+(p10)	fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_3
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fma.s1 FR_V = FR_U_lo, f1, FR_corr        // V = U_lo + corr
-      tbit.z p11,p12 = GR_N_Inc, 1              // p11 if i_0=0, N mod 4 = 0,2
-                                                // p12 if i_0=1, N mod 4 = 1,3
+	nop.m 999
+//
+//      Load PP_2, QQ_2
+//
+(p9)	fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_3
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-(p9)  fma.s0 FR_inexact = FR_PP_5, FR_PP_4, f0  // Dummy op to set inexact
-      nop.i 999
+	nop.m 999
+//
+//      if (i_1==0) poly = FR_rsq * poly  + PP_3
+//      else        poly = FR_rsq * poly  + QQ_3
+//      Load PP_1_lo
+//
+(p9)	fma.s1 FR_U_lo = FR_PP_1, FR_U_lo, f0
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p10) fma.s0 FR_inexact = FR_QQ_5, FR_QQ_5, f0  // Dummy op to set inexact
-      nop.i 999
+	nop.m 999
+//
+//      if (i_1 =0) poly = poly * rsq + pp_r4
+//      else        poly = poly * rsq + qq_r4
+//
+(p9)	fma.s1 FR_U_hi = FR_r, f1, FR_U_hi
+	nop.i 999
 }
-;;
-
 { .mfi
-      nop.m 999
-(p9)  fma.s1 FR_poly = FR_r_cubed, FR_poly, f0  // poly = poly*r^3 if i_1=0
-      nop.i 999
+	nop.m 999
+(p10)	fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_2
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p10) fma.s1 FR_poly = FR_rsq, FR_poly, f0      // poly = poly*rsq if i_1=1
-      nop.i 999
+	nop.m 999
+//
+//      if (i_1==0) U_lo =  PP_1_hi * U_lo
+//      else        U_lo =  QQ_1 * U_lo
+//
+(p9)	fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_2
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-(p11) fma.s1 FR_tmp_result = f0, f1, f1// tmp_result=+1.0 if i_0=0
-      nop.i 999
+	nop.m 999
+//
+//      if (i_0==0)  Result = 1
+//      else         Result = -1
+//
+(p0) 	fma.s1 FR_V = FR_U_lo, f1, FR_corr
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p12) fms.s1 FR_tmp_result = f0, f1, f1// tmp_result=-1.0 if i_0=1
-      nop.i 999
+	nop.m 999
+(p10)	fma.s1 FR_poly = FR_rsq, FR_poly, f0
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fma.s1 FR_V = FR_poly, f1, FR_V           // V = poly + V
-      nop.i 999
+	nop.m 999
+//
+//      if (i_1==0) poly =  FR_rsq * poly + PP_2
+//      else poly =  FR_rsq * poly + QQ_2
+// 
+(p9)	fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_1_lo
+	nop.i 999 ;;
 }
-;;
-
-// If i_0 = 0  Result =  U_hi + V
-// If i_0 = 1  Result = -U_hi - V
 { .mfi
-        nop.m 999
-(p11)        fma.s0 FR_Result = FR_tmp_result, FR_U_hi, FR_V
-        nop.i 999
+	nop.m 999
+(p10)	fma.s1 FR_poly = FR_rsq, FR_poly, f0
+	nop.i 999 ;;
 }
-{ .mfb
-        nop.m 999
-(p12)        fms.s0 FR_Result = FR_tmp_result, FR_U_hi, FR_V
-        br.ret.sptk   b0                     // Exit for 2^-3 <= |r| < pi/4
+{ .mfi
+	nop.m 999
+//
+//      V = U_lo + corr
+//
+(p9)	fma.s1 FR_poly = FR_r_cubed, FR_poly, f0
+	nop.i 999 ;;
 }
-;;
-
-SINCOSL_ZERO:
-// Here if x = 0
 { .mfi
-      cmp.eq.unc p6, p7 = 0x1, GR_Sin_or_Cos
-      nop.f 999
-      nop.i 999
+	nop.m 999
+//
+//      if (i_1==0) poly = r_cube * poly
+//      else        poly = FR_rsq * poly
+//
+(p0)	fma.s1	FR_V = FR_poly, f1, FR_V
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-(p7)  fmerge.s FR_Result = FR_Input_X, FR_Input_X // If sin, result = input
-      nop.i 999
+	nop.m 999
+(p12)	fms.s0 FR_Input_X = FR_Input_X, FR_U_hi, FR_V
+	nop.i 999
 }
 { .mfb
-      nop.m 999
-(p6)  fma.s0 FR_Result = f1, f1, f0    // If cos, result=1.0
-      br.ret.sptk   b0                  // Exit for x=0
-}
-;;
+	nop.m 999
+//
+//      V = V + poly	
+//
+(p11)	fma.s0 FR_Input_X = FR_Input_X, FR_U_hi, FR_V
+//
+//      if (i_0==0) Result = Result * U_hi + V
+//      else        Result = Result * U_hi - V
+//
+(p0)    br.ret.sptk   b0 
+};;
 
+//
+//    	If cosine, FR_Input_X = 1
+//    	If sine, FR_Input_X = +/-Zero (Input FR_Input_X)
+//    	Results are exact, no exceptions
+//
 
-SINCOSL_DENORMAL:
-{ .mmb
-      getf.exp GR_signexp_x = FR_norm_x   // Get sign and exponent of x
-      nop.m 999
-      br.cond.sptk  SINCOSL_COMMON        // Return to common code
+L(SINCOSL_ZERO):
+{ .mbb
+(p0)    cmp.eq.unc p6, p7 = 0x1, GR_Sin_or_Cos
+        nop.b 999
+        nop.b 999 ;;
+}
+{ .mfi
+        nop.m 999
+(p7)    fmerge.s FR_Input_X = FR_Input_X, FR_Input_X
+        nop.i 999
 }
-;;
-
-SINCOSL_SPECIAL:
+{ .mfb
+        nop.m 999
+(p6)    fmerge.s FR_Input_X = f1, f1
+(p0)    br.ret.sptk   b0 ;;
+}
+L(SINCOSL_SPECIAL):
 { .mfb
         nop.m 999
 //
@@ -2280,82 +2414,106 @@ SINCOSL_SPECIAL:
 //      Invalid can be raised. SNaNs
 //      become QNaNs
 //
-        fmpy.s0 FR_Result = FR_Input_X, f0
-        br.ret.sptk   b0 ;;
+(p0)    fmpy.s0 FR_Input_X = FR_Input_X, f0
+(p0)    br.ret.sptk   b0 ;;
 }
+.endp cosl#
+ASM_SIZE_DIRECTIVE(cosl#)
 
-GLOBAL_IEEE754_END(cosl)
-// *******************************************************************
-// *******************************************************************
-// *******************************************************************
-//
-//     Special Code to handle very large argument case.
-//     Call int __libm_pi_by_2_reduce(x,r,c) for |arguments| >= 2**63
-//     The interface is custom:
-//       On input:
-//         (Arg or x) is in f8
-//       On output:
-//         r is in f8
-//         c is in f9
-//         N is in r8
-//     Be sure to allocate at least 2 GP registers as output registers for
-//     __libm_pi_by_2_reduce.  This routine uses r49-50. These are used as
-//     scratch registers within the __libm_pi_by_2_reduce routine (for speed).
-//
-//     We know also that __libm_pi_by_2_reduce preserves f10-15, f71-127.  We
-//     use this to eliminate save/restore of key fp registers in this calling
-//     function.
-//
-// *******************************************************************
-// *******************************************************************
-// *******************************************************************
+//      Call int pi_by_2_reduce(double* x, double *y)
+//      for |arguments| >= 2**63
+//      Address to save r and c as double 
+//
+//             sp+32  -> f0
+//      r45    sp+16  -> f0
+//      r44 -> sp     -> InputX  
+//      
 
-LOCAL_LIBM_ENTRY(__libm_callout)
-SINCOSL_ARG_TOO_LARGE:
+.proc __libm_callout
+__libm_callout:
+L(SINCOSL_ARG_TOO_LARGE): 
 .prologue
 { .mfi
+        add   r45=-32,sp                        // Parameter: r address 
         nop.f 0
 .save   ar.pfs,GR_SAVE_PFS
         mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+        add sp=-64,sp                           // Create new stack
+        nop.f 0
+        mov GR_SAVE_GP=gp                       // Save gp
 };;
-
 { .mmi
-        setf.exp FR_Two_to_M3 = GR_exp_2_to_m3  // Form 2^-3
-        mov GR_SAVE_GP=gp                       // Save gp
+        stfe [r45] = f0,16                      // Clear Parameter r on stack
+        add  r44 = 16,sp                        // Parameter x address
 .save   b0, GR_SAVE_B0
         mov GR_SAVE_B0=b0                       // Save b0
 };;
-
 .body
-//
-//     Call argument reduction with x in f8
-//     Returns with N in r8, r in f8, c in f9
-//     Assumes f71-127 are preserved across the call
-//
 { .mib
-        setf.exp FR_Neg_Two_to_M3 = GR_exp_m2_to_m3 // Form -(2^-3)
+        stfe [r45] = f0,-16                     // Clear Parameter c on stack 
+        nop.i 0
+        nop.b 0
+}
+{ .mib
+        stfe [r44] = FR_Input_X                 // Store Parameter x on stack
         nop.i 0
-        br.call.sptk b0=__libm_pi_by_2_reduce#
+(p0)    br.call.sptk b0=__libm_pi_by_2_reduce# ;;
 };;
-
+{ .mii
+(p0)    ldfe  FR_Input_X =[r44],16
+//
+//      Get r and c off stack
+//
+(p0)    adds  GR_Table_Base1 = -16, GR_Table_Base1
+//
+//      Get r and c off stack
+//
+(p0)    add   GR_N_Inc = GR_Sin_or_Cos,r8 ;;
+}
+{ .mmb
+(p0)    ldfe  FR_r =[r45],16
+//
+//      Get X off the stack
+//      Readjust Table ptr
+//
+(p0)    ldfs FR_Two_to_M3 = [GR_Table_Base1],4
+	nop.b 999 ;;
+}
+{ .mmb
+(p0)    ldfs FR_Neg_Two_to_M3 = [GR_Table_Base1],0
+(p0)    ldfe  FR_c =[r45]
+	nop.b 999 ;;
+}
 { .mfi
-        add   GR_N_Inc = GR_Sin_or_Cos,r8
-        fcmp.lt.unc.s1        p6, p0 = FR_r, FR_Two_to_M3
+.restore sp
+        add   sp = 64,sp                       // Restore stack pointer
+(p0)    fcmp.lt.unc.s1	p6, p0 = FR_r, FR_Two_to_M3
         mov   b0 = GR_SAVE_B0                  // Restore return address
 };;
-
-{ .mfi
+{ .mib
         mov   gp = GR_SAVE_GP                  // Restore gp
-(p6)    fcmp.gt.unc.s1        p6, p0 = FR_r, FR_Neg_Two_to_M3
         mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
+        nop.b 0
 };;
-
-{ .mbb
-        nop.m 999
-(p6)    br.cond.spnt SINCOSL_SMALL_R     // Branch if |r|< 2^-3 for |x| >= 2^63
-        br.cond.sptk SINCOSL_NORMAL_R    // Branch if |r|>=2^-3 for |x| >= 2^63
-};;
-
-.endp
+{ .mfi
+	nop.m 999
+(p6)    fcmp.gt.unc.s1	p6, p0 = FR_r, FR_Neg_Two_to_M3
+	nop.i 999 ;;
+}
+{ .mib
+	nop.m 999
+	nop.i 999
+(p6)    br.cond.spnt L(SINCOSL_SMALL_R) ;;
+}
+{ .mib
+	nop.m 999
+	nop.i 999
+(p0)    br.cond.sptk L(SINCOSL_NORMAL_R) ;;
+}
+.endp __libm_callout
+ASM_SIZE_DIRECTIVE(__libm_callout)
 .type   __libm_pi_by_2_reduce#,@function
 .global __libm_pi_by_2_reduce#
diff --git a/sysdeps/ia64/fpu/s_expm1.S b/sysdeps/ia64/fpu/s_expm1.S
index 41b9954ee8..19a237990c 100644
--- a/sysdeps/ia64/fpu/s_expm1.S
+++ b/sysdeps/ia64/fpu/s_expm1.S
@@ -1,10 +1,10 @@
 .file "exp_m1.s"
 
-
-// Copyright (c) 2000 - 2002, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,819 +20,1694 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
 // Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
-//
-// History
-//==============================================================
-// 02/02/00 Initial Version
-// 04/04/00 Unwind support added
-// 08/15/00 Bundle added after call to __libm_error_support to properly
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
+//
+// HISTORY
+// 2/02/00  Initial Version 
+// 4/04/00  Unwind support added
+// 8/15/00  Bundle added after call to __libm_error_support to properly
 //          set [the previously overwritten] GR_Parameter_RESULT.
-// 07/07/01 Improved speed of all paths
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 11/20/02 Improved speed, algorithm based on exp
-
-// API
-//==============================================================
-// double expm1(double)
-
-// Overview of operation
-//==============================================================
-// 1. Inputs of Nan, Inf, Zero, NatVal handled with special paths
-//
-// 2. |x| < 2^-60
-//    Result = x, computed by x + x*x to handle appropriate flags and rounding
 //
-// 3. 2^-60 <= |x| < 2^-2
-//    Result determined by 13th order Taylor series polynomial
-//    expm1f(x) = x + Q2*x^2 + ... + Q13*x^13
+// ********************************************************************* 
 //
-// 4. x < -48.0
-//    Here we know result is essentially -1 + eps, where eps only affects
-//    rounded result.  Set I.
+// Function:   Combined exp(x) and expm1(x), where
+//                       x 
+//             exp(x) = e , for double precision x values
+//                         x
+//             expm1(x) = e  - 1  for double precision x values
 //
-// 5. x >= 709.7827
-//    Result overflows.  Set I, O, and call error support
+// ********************************************************************* 
+//
+// Accuracy:       Within .7 ulps for 80-bit floating point values
+//                 Very accurate for double precision values
+//
+// ********************************************************************* 
+//
+// Resources Used:
+//
+//    Floating-Point Registers: f8  (Input and Return Value) 
+//                              f9,f32-f61, f99-f102 
+//
+//    General Purpose Registers: 
+//      r32-r61
+//      r62-r65 (Used to pass arguments to error handling routine)
+//                                     
+//    Predicate Registers:      p6-p15
+//
+// ********************************************************************* 
+//
+// IEEE Special Conditions:
+//
+//    Denormal  fault raised on denormal inputs  
+//    Overflow exceptions raised when appropriate for exp and expm1
+//    Underflow exceptions raised when appropriate for exp and expm1
+//    (Error Handling Routine called for overflow and Underflow)
+//    Inexact raised when appropriate by algorithm 
+//
+//    exp(inf) = inf
+//    exp(-inf) = +0
+//    exp(SNaN) = QNaN
+//    exp(QNaN) = QNaN
+//    exp(0) = 1
+//    exp(EM_special Values) = QNaN
+//    exp(inf) = inf
+//    expm1(-inf) = -1 
+//    expm1(SNaN) = QNaN
+//    expm1(QNaN) = QNaN
+//    expm1(0) = 0
+//    expm1(EM_special Values) = QNaN
+//    
+// ********************************************************************* 
+//
+// Implementation and Algorithm Notes:
+//
+//  ker_exp_64( in_FR  : X,
+//            in_GR  : Flag,
+//            in_GR  : Expo_Range
+//            out_FR : Y_hi,
+//            out_FR : Y_lo,
+//            out_FR : scale,
+//            out_PR : Safe )
+//
+// On input, X is in register format and 
+// Flag  = 0 for exp,
+// Flag  = 1 for expm1,
+//
+// On output, provided X and X_cor are real numbers, then
+//
+//   scale*(Y_hi + Y_lo)  approximates  exp(X)       if Flag is 0
+//   scale*(Y_hi + Y_lo)  approximates  exp(X)-1     if Flag is 1
+//
+// The accuracy is sufficient for a highly accurate 64 sig.
+// bit implementation.  Safe is set if there is no danger of 
+// overflow/underflow when the result is composed from scale, 
+// Y_hi and Y_lo. Thus, we can have a fast return if Safe is set. 
+// Otherwise, one must prepare to handle the possible exception 
+// appropriately.  Note that SAFE not set (false) does not mean 
+// that overflow/underflow will occur; only the setting of SAFE
+// guarantees the opposite.
+//
+// **** High Level Overview **** 
+//
+// The method consists of three cases.
+// 
+// If           |X| < Tiny	use case exp_tiny;
+// else if	|X| < 2^(-6)	use case exp_small;
+// else		use case exp_regular;
+//
+// Case exp_tiny:
+//
+//   1 + X     can be used to approximate exp(X) or exp(X+X_cor);
+//   X + X^2/2 can be used to approximate exp(X) - 1
+//
+// Case exp_small:
+//
+//   Here, exp(X), exp(X+X_cor), and exp(X) - 1 can all be 
+//   appproximated by a relatively simple polynomial.
+//
+//   This polynomial resembles the truncated Taylor series
+//
+//	exp(w) = 1 + w + w^2/2! + w^3/3! + ... + w^n/n!
+//
+// Case exp_regular:
+//
+//   Here we use a table lookup method. The basic idea is that in
+//   order to compute exp(X), we accurately decompose X into
+//
+//   X = N * log(2)/(2^12)  + r,	|r| <= log(2)/2^13.
+//
+//   Hence
+//
+//   exp(X) = 2^( N / 2^12 ) * exp(r).
+//
+//   The value 2^( N / 2^12 ) is obtained by simple combinations
+//   of values calculated beforehand and stored in table; exp(r)
+//   is approximated by a short polynomial because |r| is small.
+//
+//   We elaborate this method in 4 steps.
+//
+//   Step 1: Reduction
+//
+//   The value 2^12/log(2) is stored as a double-extended number
+//   L_Inv.
+//
+//   N := round_to_nearest_integer( X * L_Inv )
+//
+//   The value log(2)/2^12 is stored as two numbers L_hi and L_lo so
+//   that r can be computed accurately via
+//
+//   r := (X - N*L_hi) - N*L_lo
+//
+//   We pick L_hi such that N*L_hi is representable in 64 sig. bits
+//   and thus the FMA   X - N*L_hi   is error free. So r is the 
+//   1 rounding error from an exact reduction with respect to 
+//   
+//   L_hi + L_lo.
+//
+//   In particular, L_hi has 30 significant bit and can be stored
+//   as a double-precision number; L_lo has 64 significant bits and
+//   stored as a double-extended number.
+//
+//   In the case Flag = 2, we further modify r by
+//
+//   r := r + X_cor.
+//
+//   Step 2: Approximation
+//
+//   exp(r) - 1 is approximated by a short polynomial of the form
+//   
+//   r + A_1 r^2 + A_2 r^3 + A_3 r^4 .
+//
+//   Step 3: Composition from Table Values 
+//
+//   The value 2^( N / 2^12 ) can be composed from a couple of tables
+//   of precalculated values. First, express N as three integers
+//   K, M_1, and M_2 as
+//
+//     N  =  K * 2^12  + M_1 * 2^6 + M_2
+//
+//   Where 0 <= M_1, M_2 < 2^6; and K can be positive or negative.
+//   When N is represented in 2's complement, M_2 is simply the 6
+//   lsb's, M_1 is the next 6, and K is simply N shifted right
+//   arithmetically (sign extended) by 12 bits.
+//
+//   Now, 2^( N / 2^12 ) is simply  
+//	
+//      2^K * 2^( M_1 / 2^6 ) * 2^( M_2 / 2^12 )
+//
+//   Clearly, 2^K needs no tabulation. The other two values are less
+//   trivial because if we store each accurately to more than working
+//   precision, than its product is too expensive to calculate. We
+//   use the following method.
+//
+//   Define two mathematical values, delta_1 and delta_2, implicitly
+//   such that
+//
+//     T_1 = exp( [M_1 log(2)/2^6]  -  delta_1 ) 
+//     T_2 = exp( [M_2 log(2)/2^12] -  delta_2 )
+//
+//   are representable as 24 significant bits. To illustrate the idea,
+//   we show how we define delta_1: 
+//
+//     T_1     := round_to_24_bits( exp( M_1 log(2)/2^6 ) )
+//     delta_1  = (M_1 log(2)/2^6) - log( T_1 )  
+//
+//   The last equality means mathematical equality. We then tabulate
+//
+//     W_1 := exp(delta_1) - 1
+//     W_2 := exp(delta_2) - 1
+//
+//   Both in double precision.
+//
+//   From the tabulated values T_1, T_2, W_1, W_2, we compose the values
+//   T and W via
+//
+//     T := T_1 * T_2			...exactly
+//     W := W_1 + (1 + W_1)*W_2	
+//
+//   W approximates exp( delta ) - 1  where delta = delta_1 + delta_2.
+//   The mathematical product of T and (W+1) is an accurate representation
+//   of 2^(M_1/2^6) * 2^(M_2/2^12).
+//
+//   Step 4. Reconstruction
+//
+//   Finally, we can reconstruct exp(X), exp(X) - 1. 
+//   Because
+//
+//	X = K * log(2) + (M_1*log(2)/2^6  - delta_1) 
+//		       + (M_2*log(2)/2^12 - delta_2)
+//		       + delta_1 + delta_2 + r 		...accurately
+//   We have
+//
+//	exp(X) ~=~ 2^K * ( T + T*[exp(delta_1+delta_2+r) - 1] )
+//	       ~=~ 2^K * ( T + T*[exp(delta + r) - 1]         )
+//	       ~=~ 2^K * ( T + T*[(exp(delta)-1)  
+//				 + exp(delta)*(exp(r)-1)]   )
+//             ~=~ 2^K * ( T + T*( W + (1+W)*poly(r) ) )
+//             ~=~ 2^K * ( Y_hi  +  Y_lo )
+//
+//   where Y_hi = T  and Y_lo = T*(W + (1+W)*poly(r))
+//
+//   For exp(X)-1, we have
+//
+//	exp(X)-1 ~=~ 2^K * ( Y_hi + Y_lo ) - 1
+//		 ~=~ 2^K * ( Y_hi + Y_lo - 2^(-K) )
+//
+//   and we combine Y_hi + Y_lo - 2^(-N)  into the form of two 
+//   numbers  Y_hi + Y_lo carefully.
+//
+//   **** Algorithm Details ****
+//
+//   A careful algorithm must be used to realize the mathematical ideas
+//   accurately. We describe each of the three cases. We assume SAFE
+//   is preset to be TRUE.
+//
+//   Case exp_tiny:
+//
+//   The important points are to ensure an accurate result under 
+//   different rounding directions and a correct setting of the SAFE 
+//   flag.
+//
+//   If Flag is 1, then
+//      SAFE  := False	...possibility of underflow
+//      Scale := 1.0
+//      Y_hi  := X
+//      Y_lo  := 2^(-17000)
+//   Else
+//      Scale := 1.0
+//      Y_hi  := 1.0
+//      Y_lo  := X	...for different rounding modes
+//   Endif
+//
+//   Case exp_small:
+//
+//   Here we compute a simple polynomial. To exploit parallelism, we split
+//   the polynomial into several portions.
+//
+//   Let r = X 
+//
+//   If Flag is not 1	...i.e. exp( argument )
+//
+//      rsq := r * r; 
+//      r4  := rsq*rsq
+//      poly_lo := P_3 + r*(P_4 + r*(P_5 + r*P_6))
+//      poly_hi := r + rsq*(P_1 + r*P_2)
+//      Y_lo    := poly_hi + r4 * poly_lo
+//      set lsb(Y_lo) to 1
+//      Y_hi    := 1.0
+//      Scale   := 1.0
+//
+//   Else			...i.e. exp( argument ) - 1
+//
+//      rsq := r * r
+//      r4  := rsq * rsq
+//      r6  := rsq * r4
+//      poly_lo := r6*(Q_5 + r*(Q_6 + r*Q_7))
+//      poly_hi := Q_1 + r*(Q_2 + r*(Q_3 + r*Q_4))
+//      Y_lo    := rsq*poly_hi +  poly_lo
+//      set lsb(Y_lo) to 1
+//      Y_hi    := X
+//      Scale   := 1.0
+//
+//   Endif
+//
+//  Case exp_regular:
+//
+//  The previous description contain enough information except the
+//  computation of poly and the final Y_hi and Y_lo in the case for
+//  exp(X)-1.
+//
+//  The computation of poly for Step 2:
+//
+//   rsq := r*r
+//   poly := r + rsq*(A_1 + r*(A_2 + r*A_3))
+//
+//  For the case exp(X) - 1, we need to incorporate 2^(-K) into
+//  Y_hi and Y_lo at the end of Step 4.
+//
+//   If K > 10 then
+//      Y_lo := Y_lo - 2^(-K)
+//   Else
+//      If K < -10 then
+//	 Y_lo := Y_hi + Y_lo
+//	 Y_hi := -2^(-K)
+//      Else
+//	 Y_hi := Y_hi - 2^(-K)
+//      End If
+//   End If
 //
-// 6. 2^-2 <= x < 709.7827  or  -48.0 <= x < -2^-2  
-//    This is the main path.  The algorithm is described below:
-
-// Take the input x. w is "how many log2/128 in x?"
-//  w = x * 128/log2
-//  n = int(w)
-//  x = n log2/128 + r + delta
-
-//  n = 128M + index_1 + 2^4 index_2
-//  x = M log2 + (log2/128) index_1 + (log2/8) index_2 + r + delta
-
-//  exp(x) = 2^M  2^(index_1/128)  2^(index_2/8) exp(r) exp(delta)
-//       Construct 2^M
-//       Get 2^(index_1/128) from table_1;
-//       Get 2^(index_2/8)   from table_2;
-//       Calculate exp(r) by series by 5th order polynomial
-//          r = x - n (log2/128)_high
-//          delta = - n (log2/128)_low
-//       Calculate exp(delta) as 1 + delta
-
-
-// Special values
-//==============================================================
-// expm1(+0)    = +0.0
-// expm1(-0)    = -0.0
-
-// expm1(+qnan) = +qnan
-// expm1(-qnan) = -qnan
-// expm1(+snan) = +qnan
-// expm1(-snan) = -qnan
-
-// expm1(-inf)  = -1.0
-// expm1(+inf)  = +inf
-
-// Overflow and Underflow
-//=======================
-// expm1(x) = largest double normal when
-//     x = 709.7827 = 40862e42fefa39ef
-//
-// Underflow is handled as described in case 2 above.
-
-
-// Registers used
-//==============================================================
-// Floating Point registers used:
-// f8, input
-// f9 -> f15,  f32 -> f75
-
-// General registers used:
-// r14 -> r40
-
-// Predicate registers used:
-// p6 -> p15
-
-// Assembly macros
-//==============================================================
-
-rRshf                  = r14
-rAD_TB1                = r15
-rAD_T1                 = r15
-rAD_TB2                = r16
-rAD_T2                 = r16
-rAD_Ln2_lo             = r17
-rAD_P                  = r17
-
-rN                     = r18
-rIndex_1               = r19
-rIndex_2_16            = r20
-
-rM                     = r21
-rBiased_M              = r21
-rIndex_1_16            = r22
-rSignexp_x             = r23
-rExp_x                 = r24
-rSig_inv_ln2           = r25
-
-rAD_Q1                 = r26
-rAD_Q2                 = r27
-rTmp                   = r27
-rExp_bias              = r28
-rExp_mask              = r29
-rRshf_2to56            = r30
-
-rGt_ln                 = r31
-rExp_2tom56            = r31
-
-
-GR_SAVE_B0             = r33
-GR_SAVE_PFS            = r34
-GR_SAVE_GP             = r35
-GR_SAVE_SP             = r36
-
-GR_Parameter_X         = r37
-GR_Parameter_Y         = r38
-GR_Parameter_RESULT    = r39
-GR_Parameter_TAG       = r40
-
-
-FR_X                   = f10
-FR_Y                   = f1
-FR_RESULT              = f8
-
-fRSHF_2TO56            = f6
-fINV_LN2_2TO63         = f7
-fW_2TO56_RSH           = f9
-f2TOM56                = f11
-fP5                    = f12
-fP54                   = f50
-fP5432                 = f50
-fP4                    = f13
-fP3                    = f14
-fP32                   = f14
-fP2                    = f15
-
-fLn2_by_128_hi         = f33
-fLn2_by_128_lo         = f34
-
-fRSHF                  = f35
-fNfloat                = f36
-fW                     = f37
-fR                     = f38
-fF                     = f39
-
-fRsq                   = f40
-fRcube                 = f41
-
-f2M                    = f42
-fS1                    = f43
-fT1                    = f44
-
-fMIN_DBL_OFLOW_ARG     = f45
-fMAX_DBL_MINUS_1_ARG   = f46
-fMAX_DBL_NORM_ARG      = f47
-fP_lo                  = f51
-fP_hi                  = f52
-fP                     = f53
-fS                     = f54
-
-fNormX                 = f56
-
-fWre_urm_f8            = f57
-
-fGt_pln                = f58
-fTmp                   = f58
-
-fS2                    = f59
-fT2                    = f60
-fSm1                   = f61
-
-fXsq                   = f62
-fX6                    = f63
-fX4                    = f63
-fQ7                    = f64
-fQ76                   = f64
-fQ7654                 = f64
-fQ765432               = f64
-fQ6                    = f65
-fQ5                    = f66
-fQ54                   = f66
-fQ4                    = f67
-fQ3                    = f68
-fQ32                   = f68
-fQ2                    = f69
-fQD                    = f70
-fQDC                   = f70
-fQDCBA                 = f70
-fQDCBA98               = f70
-fQDCBA98765432         = f70
-fQC                    = f71
-fQB                    = f72
-fQBA                   = f72
-fQA                    = f73
-fQ9                    = f74
-fQ98                   = f74
-fQ8                    = f75
-
-// Data tables
-//==============================================================
-
-RODATA
-.align 16
-
-// ************* DO NOT CHANGE ORDER OF THESE TABLES ********************
-
-// double-extended 1/ln(2)
-// 3fff b8aa 3b29 5c17 f0bb be87fed0691d3e88
-// 3fff b8aa 3b29 5c17 f0bc
-// For speed the significand will be loaded directly with a movl and setf.sig
-//   and the exponent will be bias+63 instead of bias+0.  Thus subsequent
-//   computations need to scale appropriately.
-// The constant 128/ln(2) is needed for the computation of w.  This is also
-//   obtained by scaling the computations.
-//
-// Two shifting constants are loaded directly with movl and setf.d.
-//   1. fRSHF_2TO56 = 1.1000..00 * 2^(63-7)
-//        This constant is added to x*1/ln2 to shift the integer part of
-//        x*128/ln2 into the rightmost bits of the significand.
-//        The result of this fma is fW_2TO56_RSH.
-//   2. fRSHF       = 1.1000..00 * 2^(63)
-//        This constant is subtracted from fW_2TO56_RSH * 2^(-56) to give
-//        the integer part of w, n, as a floating-point number.
-//        The result of this fms is fNfloat.
-
-
-LOCAL_OBJECT_START(exp_Table_1)
-data8 0x40862e42fefa39f0 // smallest dbl overflow arg
-data8 0xc048000000000000 // approx largest arg for minus one result
-data8 0x40862e42fefa39ef // largest dbl arg to give normal dbl result
-data8 0x0                // pad
-data8 0xb17217f7d1cf79ab , 0x00003ff7 // ln2/128 hi
-data8 0xc9e3b39803f2f6af , 0x00003fb7 // ln2/128 lo
-//
-// Table 1 is 2^(index_1/128) where
-// index_1 goes from 0 to 15
-//
-data8 0x8000000000000000 , 0x00003FFF
-data8 0x80B1ED4FD999AB6C , 0x00003FFF
-data8 0x8164D1F3BC030773 , 0x00003FFF
-data8 0x8218AF4373FC25EC , 0x00003FFF
-data8 0x82CD8698AC2BA1D7 , 0x00003FFF
-data8 0x8383594EEFB6EE37 , 0x00003FFF
-data8 0x843A28C3ACDE4046 , 0x00003FFF
-data8 0x84F1F656379C1A29 , 0x00003FFF
-data8 0x85AAC367CC487B15 , 0x00003FFF
-data8 0x8664915B923FBA04 , 0x00003FFF
-data8 0x871F61969E8D1010 , 0x00003FFF
-data8 0x87DB357FF698D792 , 0x00003FFF
-data8 0x88980E8092DA8527 , 0x00003FFF
-data8 0x8955EE03618E5FDD , 0x00003FFF
-data8 0x8A14D575496EFD9A , 0x00003FFF
-data8 0x8AD4C6452C728924 , 0x00003FFF
-LOCAL_OBJECT_END(exp_Table_1)
-
-// Table 2 is 2^(index_1/8) where
-// index_2 goes from 0 to 7
-LOCAL_OBJECT_START(exp_Table_2)
-data8 0x8000000000000000 , 0x00003FFF
-data8 0x8B95C1E3EA8BD6E7 , 0x00003FFF
-data8 0x9837F0518DB8A96F , 0x00003FFF
-data8 0xA5FED6A9B15138EA , 0x00003FFF
-data8 0xB504F333F9DE6484 , 0x00003FFF
-data8 0xC5672A115506DADD , 0x00003FFF
-data8 0xD744FCCAD69D6AF4 , 0x00003FFF
-data8 0xEAC0C6E7DD24392F , 0x00003FFF
-LOCAL_OBJECT_END(exp_Table_2)
-
-
-LOCAL_OBJECT_START(exp_p_table)
-data8 0x3f8111116da21757 //P5
-data8 0x3fa55555d787761c //P4
-data8 0x3fc5555555555414 //P3
-data8 0x3fdffffffffffd6a //P2
-LOCAL_OBJECT_END(exp_p_table)
-
-LOCAL_OBJECT_START(exp_Q1_table)
-data8 0x3de6124613a86d09 // QD = 1/13!
-data8 0x3e21eed8eff8d898 // QC = 1/12!
-data8 0x3ec71de3a556c734 // Q9 = 1/9!
-data8 0x3efa01a01a01a01a // Q8 = 1/8!
-data8 0x8888888888888889,0x3ff8 // Q5 = 1/5!
-data8 0xaaaaaaaaaaaaaaab,0x3ffc // Q3 = 1/3!
-data8 0x0,0x0            // Pad to avoid bank conflicts
-LOCAL_OBJECT_END(exp_Q1_table)
-
-LOCAL_OBJECT_START(exp_Q2_table)
-data8 0x3e5ae64567f544e4 // QB = 1/11!
-data8 0x3e927e4fb7789f5c // QA = 1/10!
-data8 0x3f2a01a01a01a01a // Q7 = 1/7!
-data8 0x3f56c16c16c16c17 // Q6 = 1/6!
-data8 0xaaaaaaaaaaaaaaab,0x3ffa // Q4 = 1/4!
-data8 0x8000000000000000,0x3ffe // Q2 = 1/2!
-LOCAL_OBJECT_END(exp_Q2_table)
 
+#include "libm_support.h"
+
+GR_SAVE_PFS          = r59
+GR_SAVE_B0           = r60
+GR_SAVE_GP           = r61
+
+GR_Parameter_X       = r62
+GR_Parameter_Y       = r63
+GR_Parameter_RESULT  = r64
+
+FR_X             = f9
+FR_Y             = f1
+FR_RESULT        = f99
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
+.align 64 
+Constants_exp_64_Arg:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_Arg,@object)
+data4 0x5C17F0BC,0xB8AA3B29,0x0000400B,0x00000000 
+data4 0x00000000,0xB17217F4,0x00003FF2,0x00000000
+data4 0xF278ECE6,0xF473DE6A,0x00003FD4,0x00000000
+// /* Inv_L, L_hi, L_lo */
+ASM_SIZE_DIRECTIVE(Constants_exp_64_Arg)
+
+.align 64 
+Constants_exp_64_Exponents:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_Exponents,@object)
+data4 0x0000007E,0x00000000,0xFFFFFF83,0xFFFFFFFF
+data4 0x000003FE,0x00000000,0xFFFFFC03,0xFFFFFFFF
+data4 0x00003FFE,0x00000000,0xFFFFC003,0xFFFFFFFF
+data4 0x00003FFE,0x00000000,0xFFFFC003,0xFFFFFFFF
+data4 0xFFFFFFE2,0xFFFFFFFF,0xFFFFFFC4,0xFFFFFFFF
+data4 0xFFFFFFBA,0xFFFFFFFF,0xFFFFFFBA,0xFFFFFFFF
+ASM_SIZE_DIRECTIVE(Constants_exp_64_Exponents)
+
+.align 64 
+Constants_exp_64_A:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_A,@object)
+data4 0xB1B736A0,0xAAAAAAAB,0x00003FFA,0x00000000
+data4 0x90CD6327,0xAAAAAAAB,0x00003FFC,0x00000000
+data4 0xFFFFFFFF,0xFFFFFFFF,0x00003FFD,0x00000000
+// /* Reversed */
+ASM_SIZE_DIRECTIVE(Constants_exp_64_A)
+
+.align 64 
+Constants_exp_64_P:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_P,@object)
+data4 0x43914A8A,0xD00D6C81,0x00003FF2,0x00000000
+data4 0x30304B30,0xB60BC4AC,0x00003FF5,0x00000000
+data4 0x7474C518,0x88888888,0x00003FF8,0x00000000
+data4 0x8DAE729D,0xAAAAAAAA,0x00003FFA,0x00000000
+data4 0xAAAAAF61,0xAAAAAAAA,0x00003FFC,0x00000000
+data4 0x000004C7,0x80000000,0x00003FFE,0x00000000 
+// /* Reversed */
+ASM_SIZE_DIRECTIVE(Constants_exp_64_P)
+
+.align 64 
+Constants_exp_64_Q:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_Q,@object)
+data4 0xA49EF6CA,0xD00D56F7,0x00003FEF,0x00000000
+data4 0x1C63493D,0xD00D59AB,0x00003FF2,0x00000000
+data4 0xFB50CDD2,0xB60B60B5,0x00003FF5,0x00000000
+data4 0x7BA68DC8,0x88888888,0x00003FF8,0x00000000
+data4 0xAAAAAC8D,0xAAAAAAAA,0x00003FFA,0x00000000
+data4 0xAAAAACCA,0xAAAAAAAA,0x00003FFC,0x00000000
+data4 0x00000000,0x80000000,0x00003FFE,0x00000000 
+// /* Reversed */
+ASM_SIZE_DIRECTIVE(Constants_exp_64_Q)
+
+.align 64 
+Constants_exp_64_T1:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_T1,@object)
+data4 0x3F800000,0x3F8164D2,0x3F82CD87,0x3F843A29 
+data4 0x3F85AAC3,0x3F871F62,0x3F88980F,0x3F8A14D5 
+data4 0x3F8B95C2,0x3F8D1ADF,0x3F8EA43A,0x3F9031DC
+data4 0x3F91C3D3,0x3F935A2B,0x3F94F4F0,0x3F96942D
+data4 0x3F9837F0,0x3F99E046,0x3F9B8D3A,0x3F9D3EDA
+data4 0x3F9EF532,0x3FA0B051,0x3FA27043,0x3FA43516
+data4 0x3FA5FED7,0x3FA7CD94,0x3FA9A15B,0x3FAB7A3A
+data4 0x3FAD583F,0x3FAF3B79,0x3FB123F6,0x3FB311C4
+data4 0x3FB504F3,0x3FB6FD92,0x3FB8FBAF,0x3FBAFF5B
+data4 0x3FBD08A4,0x3FBF179A,0x3FC12C4D,0x3FC346CD
+data4 0x3FC5672A,0x3FC78D75,0x3FC9B9BE,0x3FCBEC15
+data4 0x3FCE248C,0x3FD06334,0x3FD2A81E,0x3FD4F35B
+data4 0x3FD744FD,0x3FD99D16,0x3FDBFBB8,0x3FDE60F5
+data4 0x3FE0CCDF,0x3FE33F89,0x3FE5B907,0x3FE8396A
+data4 0x3FEAC0C7,0x3FED4F30,0x3FEFE4BA,0x3FF28177
+data4 0x3FF5257D,0x3FF7D0DF,0x3FFA83B3,0x3FFD3E0C
+ASM_SIZE_DIRECTIVE(Constants_exp_64_T1)
+
+.align 64 
+Constants_exp_64_T2:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_T2,@object)
+data4 0x3F800000,0x3F80058C,0x3F800B18,0x3F8010A4 
+data4 0x3F801630,0x3F801BBD,0x3F80214A,0x3F8026D7 
+data4 0x3F802C64,0x3F8031F2,0x3F803780,0x3F803D0E 
+data4 0x3F80429C,0x3F80482B,0x3F804DB9,0x3F805349 
+data4 0x3F8058D8,0x3F805E67,0x3F8063F7,0x3F806987 
+data4 0x3F806F17,0x3F8074A8,0x3F807A39,0x3F807FCA 
+data4 0x3F80855B,0x3F808AEC,0x3F80907E,0x3F809610 
+data4 0x3F809BA2,0x3F80A135,0x3F80A6C7,0x3F80AC5A 
+data4 0x3F80B1ED,0x3F80B781,0x3F80BD14,0x3F80C2A8 
+data4 0x3F80C83C,0x3F80CDD1,0x3F80D365,0x3F80D8FA 
+data4 0x3F80DE8F,0x3F80E425,0x3F80E9BA,0x3F80EF50 
+data4 0x3F80F4E6,0x3F80FA7C,0x3F810013,0x3F8105AA 
+data4 0x3F810B41,0x3F8110D8,0x3F81166F,0x3F811C07 
+data4 0x3F81219F,0x3F812737,0x3F812CD0,0x3F813269 
+data4 0x3F813802,0x3F813D9B,0x3F814334,0x3F8148CE 
+data4 0x3F814E68,0x3F815402,0x3F81599C,0x3F815F37
+ASM_SIZE_DIRECTIVE(Constants_exp_64_T2)
+
+.align 64 
+Constants_exp_64_W1:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_W1,@object)
+data4 0x00000000,0x00000000,0x171EC4B4,0xBE384454
+data4 0x4AA72766,0xBE694741,0xD42518F8,0xBE5D32B6
+data4 0x3A319149,0x3E68D96D,0x62415F36,0xBE68F4DA
+data4 0xC9C86A3B,0xBE6DDA2F,0xF49228FE,0x3E6B2E50
+data4 0x1188B886,0xBE49C0C2,0x1A4C2F1F,0x3E64BFC2
+data4 0x2CB98B54,0xBE6A2FBB,0x9A55D329,0x3E5DC5DE
+data4 0x39A7AACE,0x3E696490,0x5C66DBA5,0x3E54728B
+data4 0xBA1C7D7D,0xBE62B0DB,0x09F1AF5F,0x3E576E04
+data4 0x1A0DD6A1,0x3E612500,0x795FBDEF,0xBE66A419
+data4 0xE1BD41FC,0xBE5CDE8C,0xEA54964F,0xBE621376
+data4 0x476E76EE,0x3E6370BE,0x3427EB92,0x3E390D1A 
+data4 0x2BF82BF8,0x3E1336DE,0xD0F7BD9E,0xBE5FF1CB 
+data4 0x0CEB09DD,0xBE60A355,0x0980F30D,0xBE5CA37E 
+data4 0x4C082D25,0xBE5C541B,0x3B467D29,0xBE5BBECA 
+data4 0xB9D946C5,0xBE400D8A,0x07ED374A,0xBE5E2A08 
+data4 0x365C8B0A,0xBE66CB28,0xD3403BCA,0x3E3AAD5B 
+data4 0xC7EA21E0,0x3E526055,0xE72880D6,0xBE442C75 
+data4 0x85222A43,0x3E58B2BB,0x522C42BF,0xBE5AAB79 
+data4 0x469DC2BC,0xBE605CB4,0xA48C40DC,0xBE589FA7 
+data4 0x1AA42614,0xBE51C214,0xC37293F4,0xBE48D087 
+data4 0xA2D673E0,0x3E367A1C,0x114F7A38,0xBE51BEBB 
+data4 0x661A4B48,0xBE6348E5,0x1D3B9962,0xBDF52643  
+data4 0x35A78A53,0x3E3A3B5E,0x1CECD788,0xBE46C46C 
+data4 0x7857D689,0xBE60B7EC,0xD14F1AD7,0xBE594D3D 
+data4 0x4C9A8F60,0xBE4F9C30,0x02DFF9D2,0xBE521873 
+data4 0x55E6D68F,0xBE5E4C88,0x667F3DC4,0xBE62140F 
+data4 0x3BF88747,0xBE36961B,0xC96EC6AA,0x3E602861 
+data4 0xD57FD718,0xBE3B5151,0xFC4A627B,0x3E561CD0 
+data4 0xCA913FEA,0xBE3A5217,0x9A5D193A,0x3E40A3CC 
+data4 0x10A9C312,0xBE5AB713,0xC5F57719,0x3E4FDADB 
+data4 0xDBDF59D5,0x3E361428,0x61B4180D,0x3E5DB5DB 
+data4 0x7408D856,0xBE42AD5F,0x31B2B707,0x3E2A3148 
+ASM_SIZE_DIRECTIVE(Constants_exp_64_W1)
+
+.align 64 
+Constants_exp_64_W2:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_W2,@object)
+data4 0x00000000,0x00000000,0x37A3D7A2,0xBE641F25 
+data4 0xAD028C40,0xBE68DD57,0xF212B1B6,0xBE5C77D8 
+data4 0x1BA5B070,0x3E57878F,0x2ECAE6FE,0xBE55A36A 
+data4 0x569DFA3B,0xBE620608,0xA6D300A3,0xBE53B50E 
+data4 0x223F8F2C,0x3E5B5EF2,0xD6DE0DF4,0xBE56A0D9 
+data4 0xEAE28F51,0xBE64EEF3,0x367EA80B,0xBE5E5AE2 
+data4 0x5FCBC02D,0x3E47CB1A,0x9BDAFEB7,0xBE656BA0 
+data4 0x805AFEE7,0x3E6E70C6,0xA3415EBA,0xBE6E0509 
+data4 0x49BFF529,0xBE56856B,0x00508651,0x3E66DD33 
+data4 0xC114BC13,0x3E51165F,0xC453290F,0x3E53333D 
+data4 0x05539FDA,0x3E6A072B,0x7C0A7696,0xBE47CD87 
+data4 0xEB05C6D9,0xBE668BF4,0x6AE86C93,0xBE67C3E3 
+data4 0xD0B3E84B,0xBE533904,0x556B53CE,0x3E63E8D9 
+data4 0x63A98DC8,0x3E212C89,0x032A7A22,0xBE33138F 
+data4 0xBC584008,0x3E530FA9,0xCCB93C97,0xBE6ADF82 
+data4 0x8370EA39,0x3E5F9113,0xFB6A05D8,0x3E5443A4 
+data4 0x181FEE7A,0x3E63DACD,0xF0F67DEC,0xBE62B29D 
+data4 0x3DDE6307,0x3E65C483,0xD40A24C1,0x3E5BF030  
+data4 0x14E437BE,0x3E658B8F,0xED98B6C7,0xBE631C29 
+data4 0x04CF7C71,0x3E6335D2,0xE954A79D,0x3E529EED 
+data4 0xF64A2FB8,0x3E5D9257,0x854ED06C,0xBE6BED1B 
+data4 0xD71405CB,0x3E5096F6,0xACB9FDF5,0xBE3D4893 
+data4 0x01B68349,0xBDFEB158,0xC6A463B9,0x3E628D35 
+data4 0xADE45917,0xBE559725,0x042FC476,0xBE68C29C 
+data4 0x01E511FA,0xBE67593B,0x398801ED,0xBE4A4313 
+data4 0xDA7C3300,0x3E699571,0x08062A9E,0x3E5349BE 
+data4 0x755BB28E,0x3E5229C4,0x77A1F80D,0x3E67E426 
+data4 0x6B69C352,0xBE52B33F,0x084DA57F,0xBE6B3550 
+data4 0xD1D09A20,0xBE6DB03F,0x2161B2C1,0xBE60CBC4 
+data4 0x78A2B771,0x3E56ED9C,0x9D0FA795,0xBE508E31 
+data4 0xFD1A54E9,0xBE59482A,0xB07FD23E,0xBE2A17CE 
+data4 0x17365712,0x3E68BF5C,0xB3785569,0x3E3956F9
+ASM_SIZE_DIRECTIVE(Constants_exp_64_W2)
 
 .section .text
-GLOBAL_IEEE754_ENTRY(expm1)
+.proc expm1#
+.global expm1#
+.align 64 
 
-{ .mlx
-      getf.exp        rSignexp_x = f8  // Must recompute if x unorm
-      movl            rSig_inv_ln2 = 0xb8aa3b295c17f0bc  // signif of 1/ln2
-}
-{ .mlx
-      addl            rAD_TB1    = @ltoff(exp_Table_1), gp
-      movl            rRshf_2to56 = 0x4768000000000000   // 1.10000 2^(63+56)
+expm1: 
+#ifdef _LIBC
+.global __expm1#
+__expm1:
+#endif
+
+
+{ .mii
+      alloc r32 = ar.pfs,0,30,4,0
+(p0)  add r33 = 1, r0  
+(p0)  cmp.eq.unc  p7, p0 =  r0, r0 
 }
 ;;
 
-// We do this fnorm right at the beginning to normalize
-// any input unnormals so that SWA is not taken.
+
+//
+//    Set p7 true for expm1
+//    Set Flag = r33 = 1 for expm1
+//    These are really no longer necesary, but are a remnant 
+//       when this file had multiple entry points.
+//       They should be carefully removed
+
+
+
 { .mfi
-      ld8             rAD_TB1    = [rAD_TB1]
-      fclass.m        p6,p0 = f8,0x0b  // Test for x=unorm
-      mov             rExp_mask = 0x1ffff
+(p0)  add r32 = 1,r0  
+(p0)  fnorm.s1 f9 = f8 
+      nop.i 999
 }
+
+
 { .mfi
-      mov             rExp_bias = 0xffff
-      fnorm.s1        fNormX   = f8
-      mov             rExp_2tom56 = 0xffff-56
+      nop.m 999
+(p0)  fclass.m.unc p6, p8 =  f8, 0x1E7 
+      nop.i 999
 }
-;;
 
-// Form two constants we need
-//  1/ln2 * 2^63  to compute  w = x * 1/ln2 * 128
-//  1.1000..000 * 2^(63+63-7) to right shift int(w) into the significand
+{ .mfi
+      nop.m 999
+(p0)  fclass.nm.unc p9, p0 =  f8, 0x1FF 
+      nop.i 999
+}
 
 { .mfi
-      setf.sig        fINV_LN2_2TO63 = rSig_inv_ln2 // form 1/ln2 * 2^63
-      fclass.m        p8,p0 = f8,0x07  // Test for x=0
-      nop.i           0
+	nop.m 999
+(p0)  mov f36 = f1 
+	nop.i 999 ;;
 }
+
+//     
+//    Identify NatVals, NaNs, Infs, and Zeros. 
+//    Identify EM unsupporteds. 
+//    Save special input registers 
+//
+//    Create FR_X_cor      = 0.0 
+//           GR_Flag       = 0 
+//           GR_Expo_Range = 1
+//           FR_Scale      = 1.0
+//
+
+{ .mfb
+	nop.m 999
+(p0)  mov f32 = f0 
+(p6)  br.cond.spnt EXP_64_SPECIAL ;; 
+}
+
+{ .mib
+	nop.m 999
+	nop.i 999
+(p9)  br.cond.spnt EXP_64_UNSUPPORTED ;; 
+}
+
+//     
+//    Branch out for special input values 
+//     
+
+{ .mfi
+(p0)  cmp.ne.unc p12, p13 = 0x01, r33
+(p0)  fcmp.lt.unc.s0 p9,p0 =  f8, f0 
+(p0)  cmp.eq.unc  p15, p0 =  r0, r0 
+}
+
+//     
+//    Raise possible denormal operand exception 
+//    Normalize x 
+//     
+//    This function computes exp( x  + x_cor) 
+//    Input  FR 1: FR_X            
+//    Input  FR 2: FR_X_cor  
+//    Input  GR 1: GR_Flag  
+//    Input  GR 2: GR_Expo_Range  
+//    Output FR 3: FR_Y_hi  
+//    Output FR 4: FR_Y_lo  
+//    Output FR 5: FR_Scale  
+//    Output PR 1: PR_Safe  
+
+//
+//    Prepare to load constants
+//    Set Safe = True
+//
+
+{ .mmi
+(p0)  addl           r34   = @ltoff(Constants_exp_64_Arg#), gp
+(p0)  addl           r40   = @ltoff(Constants_exp_64_W1#),  gp
+(p0)  addl           r41   = @ltoff(Constants_exp_64_W2#),  gp
+}
+;;
+
+{ .mmi
+      ld8 r34 = [r34]
+      ld8 r40 = [r40]
+(p0)  addl           r50   = @ltoff(Constants_exp_64_T1#),  gp
+}
+;;
+
+
+{ .mmi
+      ld8 r41  = [r41]
+(p0)  ldfe f37 = [r34],16 
+(p0)  addl           r51   = @ltoff(Constants_exp_64_T2#),  gp
+}
+;;
+
+//
+//    N = fcvt.fx(float_N)
+//    Set p14 if -6 > expo_X 
+//
+
+
+//
+//    Bias = 0x0FFFF
+//    expo_X = expo_X and Mask  
+//
+
+//
+//    Load L_lo
+//    Set p10 if 14 < expo_X 
+//
+
+{ .mmi
+      ld8  r50 = [r50]
+(p0)  ldfe f40 = [r34],16 
+      nop.i 999
+}
+;;
+
 { .mlx
-      setf.d          fRSHF_2TO56 = rRshf_2to56 // Form 1.100 * 2^(63+56)
-      movl            rRshf = 0x43e8000000000000   // 1.10000 2^63 for rshift
+	nop.m 999
+(p0)  movl r58 = 0x0FFFF 
 }
 ;;
 
-{ .mfi
-      setf.exp        f2TOM56 = rExp_2tom56 // form 2^-56 for scaling Nfloat
-      fclass.m        p9,p0 = f8,0x22  // Test for x=-inf
-      add             rAD_TB2 = 0x140, rAD_TB1 // Point to Table 2
+//
+//    Load W2_ptr
+//    Branch to SMALL is expo_X < -6
+//
+
+//
+//    float_N = X * L_Inv
+//    expo_X = exponent of X
+//    Mask = 0x1FFFF
+//
+
+{ .mmi
+      ld8  r51 = [r51]
+(p0)  ldfe f41 = [r34],16 
 }
-{ .mib
-      add             rAD_Q1 = 0x1e0, rAD_TB1 // Point to Q table for small path
-      add             rAD_Ln2_lo = 0x30, rAD_TB1 // Point to ln2_by_128_lo
-(p6)  br.cond.spnt    EXPM1_UNORM // Branch if x unorm
+;;
+
+{ .mlx
+(p0)  addl           r34   = @ltoff(Constants_exp_64_Exponents#),  gp
+(p0)  movl r39 = 0x1FFFF
+}
+;;
+
+{ .mmi
+      ld8  r34 = [r34]
+(p0)  getf.exp r37 = f9 
+      nop.i 999
 }
 ;;
 
-EXPM1_COMMON:
+{ .mii
+      nop.m 999
+      nop.i 999 
+(p0)  and  r37 = r37, r39 ;;  
+}
+
+{ .mmi
+(p0)  sub r37 = r37, r58 ;;  
+(p0)  cmp.gt.unc  p14, p0 =  -6, r37 
+(p0)  cmp.lt.unc  p10, p0 =  14, r37 ;; 
+}
+
 { .mfi
-      ldfpd           fMIN_DBL_OFLOW_ARG, fMAX_DBL_MINUS_1_ARG = [rAD_TB1],16
-      fclass.m        p10,p0 = f8,0x1e1  // Test for x=+inf, NaN, NaT
-      add             rAD_Q2 = 0x50, rAD_Q1   // Point to Q table for small path
+	nop.m 999
+//
+//    Load L_inv 
+//    Set p12 true for Flag = 0 (exp)
+//    Set p13 true for Flag = 1 (expm1)
+//
+(p0)  fmpy.s1 f38 = f9, f37 
+	nop.i 999 ;;
 }
+
 { .mfb
-      nop.m           0
-      nop.f           0
-(p8)  br.ret.spnt     b0                        // Exit for x=0, return x
+	nop.m 999
+//
+//    Load L_hi
+//    expo_X = expo_X - Bias
+//    get W1_ptr      
+//
+(p0)  fcvt.fx.s1 f39 = f38
+(p14) br.cond.spnt EXP_SMALL ;; 
+}
+
+{ .mib
+	nop.m 999
+	nop.i 999
+(p10) br.cond.spnt EXP_HUGE ;; 
+}
+
+{ .mmi
+(p0)  shladd r34 = r32,4,r34 
+(p0)  addl           r35   = @ltoff(Constants_exp_64_A#), gp
+      nop.i 999
 }
 ;;
 
+{ .mmi
+      ld8  r35 = [r35]
+      nop.m 999
+      nop.i 999
+}
+;;
+
+//
+//    Load T_1,T_2
+//
+
+{ .mmb
+(p0)  ldfe f51 = [r35],16 
+(p0)  ld8 r45 = [r34],8
+	nop.b 999 ;;
+}
+//    
+//    Set Safe = True  if k >= big_expo_neg  
+//    Set Safe = False if k < big_expo_neg  
+//    
+
+{ .mmb
+(p0)  ldfe f49 = [r35],16 
+(p0)  ld8 r48 = [r34],0
+	nop.b 999 ;;
+}
+
 { .mfi
-      ldfd            fMAX_DBL_NORM_ARG = [rAD_TB1],16
-      nop.f           0
-      and             rExp_x = rExp_mask, rSignexp_x // Biased exponent of x
+	nop.m 999
+//
+//    Branch to HUGE is expo_X > 14 
+//
+(p0)  fcvt.xf f38 = f39 
+	nop.i 999 ;;
 }
-{ .mfb
-      setf.d          fRSHF = rRshf // Form right shift const 1.100 * 2^63
-(p9)  fms.d.s0        f8 = f0,f0,f1            // quick exit for x=-inf
-(p9)  br.ret.spnt     b0
+
+{ .mfi
+(p0)  getf.sig r52 = f39 
+	nop.f 999
+	nop.i 999 ;;
+}
+
+{ .mii
+	nop.m 999
+(p0)  extr.u r43 = r52, 6, 6 ;;  
+//
+//    r = r - float_N * L_lo
+//    K = extr(N_fix,12,52)
+//
+(p0)  shladd r40 = r43,3,r40 ;; 
 }
-;;
 
 { .mfi
-      ldfpd           fQD, fQC = [rAD_Q1], 16  // Load coeff for small path
-      nop.f           0
-      sub             rExp_x = rExp_x, rExp_bias // True exponent of x
+(p0)  shladd r50 = r43,2,r50 
+(p0)  fnma.s1 f42 = f40, f38, f9 
+//
+//    float_N = float(N)
+//    N_fix = signficand N 
+//
+(p0)  extr.u r42 = r52, 0, 6  
 }
-{ .mfb
-      ldfpd           fQB, fQA = [rAD_Q2], 16  // Load coeff for small path
-(p10) fma.d.s0        f8 = f8, f1, f0          // For x=+inf, NaN, NaT
-(p10) br.ret.spnt     b0                       // Exit for x=+inf, NaN, NaT
+
+{ .mmi
+(p0)  ldfd  f43 = [r40],0 ;; 
+(p0)  shladd r41 = r42,3,r41 
+(p0)  shladd r51 = r42,2,r51 
+}
+//
+//    W_1_p1 = 1 + W_1
+//
+
+{ .mmi
+(p0)  ldfs  f44 = [r50],0 ;; 
+(p0)  ldfd  f45 = [r41],0 
+//
+//    M_2 = extr(N_fix,0,6)
+//    M_1 = extr(N_fix,6,6)
+//    r = X - float_N * L_hi
+//
+(p0)  extr r44 = r52, 12, 52  
+}
+
+{ .mmi
+(p0)  ldfs  f46 = [r51],0 ;; 
+(p0)  sub r46 = r58, r44  
+(p0)  cmp.gt.unc  p8, p15 =  r44, r45 
+}
+//    
+//    W = W_1 + W_1_p1*W_2 
+//    Load  A_2 
+//    Bias_m_K = Bias - K
+//
+
+{ .mii
+(p0)  ldfe f40 = [r35],16 
+//
+//    load A_1
+//    poly = A_2 + r*A_3 
+//    rsq = r * r  
+//    neg_2_mK = exponent of Bias_m_k
+//
+(p0)  add r47 = r58, r44 ;;  
+//    
+//    Set Safe = True  if k <= big_expo_pos  
+//    Set Safe = False  if k >  big_expo_pos  
+//    Load A_3
+//    
+(p15) cmp.lt p8,p15 = r44,r48 ;;
+}
+
+{ .mmf
+(p0)  setf.exp f61 = r46 
+//    
+//    Bias_p + K = Bias + K
+//    T = T_1 * T_2
+//    
+(p0)  setf.exp f36 = r47 
+(p0)  fnma.s1 f42 = f41, f38, f42 ;; 
+}
+
+{ .mfi
+	nop.m 999
+//
+//    Load W_1,W_2
+//    Load big_exp_pos, load big_exp_neg
+//
+(p0)  fadd.s1 f47 = f43, f1 
+	nop.i 999 ;;
 }
-;;
 
 { .mfi
-      ldfpd           fQ9, fQ8 = [rAD_Q1], 16  // Load coeff for small path
-      fma.s1          fXsq = fNormX, fNormX, f0  // x*x for small path
-      cmp.gt          p7, p8 = -2, rExp_x      // Test |x| < 2^(-2)
+	nop.m 999
+(p0)  fma.s1 f52 = f42, f51, f49 
+	nop.i 999
 }
+
 { .mfi
-      ldfpd           fQ7, fQ6 = [rAD_Q2], 16  // Load coeff for small path
-      nop.f           0
-      nop.i           0
+	nop.m 999
+(p0)  fmpy.s1 f48 = f42, f42 
+	nop.i 999 ;;
 }
-;;
 
 { .mfi
-      ldfe            fQ5 = [rAD_Q1], 16       // Load coeff for small path
-      nop.f           0
-      nop.i           0
+	nop.m 999
+(p0)  fmpy.s1 f53 = f44, f46 
+	nop.i 999 ;;
 }
-{ .mib
-      ldfe            fQ4 = [rAD_Q2], 16       // Load coeff for small path
-(p7)  cmp.gt.unc      p6, p7 = -60, rExp_x     // Test |x| < 2^(-60)
-(p7)  br.cond.spnt    EXPM1_SMALL              // Branch if 2^-60 <= |x| < 2^-2
+
+{ .mfi
+	nop.m 999
+(p0)  fma.s1 f54 = f45, f47, f43 
+	nop.i 999
 }
-;;
 
-// W = X * Inv_log2_by_128
-// By adding 1.10...0*2^63 we shift and get round_int(W) in significand.
-// We actually add 1.10...0*2^56 to X * Inv_log2 to do the same thing.
+{ .mfi
+	nop.m 999
+(p0)  fneg f61 =  f61 
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+(p0)  fma.s1 f52 = f42, f52, f40 
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+(p0)  fadd.s1 f55 = f54, f1 
+	nop.i 999
+}
+
+{ .mfi
+	nop.m 999
+//
+//    W + Wp1 * poly     
+// 
+(p0)  mov f34 = f53 
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+//
+//    A_1 + r * poly 
+//    Scale = setf_exp(Bias_p_k) 
+//
+(p0)  fma.s1 f52 = f48, f52, f42 
+	nop.i 999 ;;
+}
 
 { .mfi
-      ldfe            fLn2_by_128_hi  = [rAD_TB1],32
-      fma.s1          fW_2TO56_RSH  = fNormX, fINV_LN2_2TO63, fRSHF_2TO56
-      nop.i           0
+	nop.m 999
+//
+//    poly = r + rsq(A_1 + r*poly) 
+//    Wp1 = 1 + W
+//    neg_2_mK = -neg_2_mK
+//
+(p0)  fma.s1 f35 = f55, f52, f54
+	nop.i 999 ;;
 }
+
 { .mfb
-      ldfe            fLn2_by_128_lo  = [rAD_Ln2_lo]
-(p6)  fma.d.s0        f8 = f8, f8, f8 // If x < 2^-60, result=x+x*x
-(p6)  br.ret.spnt     b0              // Exit if x < 2^-60
+	nop.m 999
+(p0)  fmpy.s1 f35 = f35, f53 
+//   
+//    Y_hi = T
+//    Y_lo = T * (W + Wp1*poly)
+//
+(p12) br.cond.sptk EXP_MAIN ;; 
 }
-;;
+//
+//    Branch if exp(x)  
+//    Continue for exp(x-1)
+//
 
-// Divide arguments into the following categories:
-//  Certain minus one       p11 - -inf < x <= MAX_DBL_MINUS_1_ARG
-//  Possible Overflow       p14 - MAX_DBL_NORM_ARG < x < MIN_DBL_OFLOW_ARG
-//  Certain Overflow        p15 - MIN_DBL_OFLOW_ARG <= x < +inf
+{ .mii
+(p0)  cmp.lt.unc  p12, p13 =  10, r44 
+	nop.i 999 ;;
+//
+//    Set p12 if 10 < K, Else p13 
+//
+(p13) cmp.gt.unc  p13, p14 =  -10, r44 ;; 
+}
 //
-// If the input is really a double arg, then there will never be "Possible
-// Overflow" arguments.
+//    K > 10:  Y_lo = Y_lo + neg_2_mK
+//    K <=10:  Set p13 if -10 > K, Else set p14 
 //
 
-// After that last load, rAD_TB1 points to the beginning of table 1
+{ .mfi
+(p13) cmp.eq  p15, p0 =  r0, r0 
+(p14) fadd.s1 f34 = f61, f34 
+	nop.i 999 ;;
+}
 
 { .mfi
-      nop.m           0
-      fcmp.ge.s1      p15,p14 = fNormX,fMIN_DBL_OFLOW_ARG
-      nop.i           0
+	nop.m 999
+(p12) fadd.s1 f35 = f35, f61 
+	nop.i 999 ;;
 }
-;;
 
 { .mfi
-      add             rAD_P = 0x80, rAD_TB2
-      fcmp.le.s1      p11,p0 = fNormX,fMAX_DBL_MINUS_1_ARG
-      nop.i           0
+	nop.m 999
+(p13) fadd.s1 f35 = f35, f34 
+	nop.i 999
 }
-;;
 
 { .mfb
-      ldfpd           fP5, fP4  = [rAD_P] ,16
-(p14) fcmp.gt.unc.s1  p14,p0 = fNormX,fMAX_DBL_NORM_ARG
-(p15) br.cond.spnt    EXPM1_CERTAIN_OVERFLOW
+	nop.m 999
+//
+//    K <= 10 and K < -10, Set Safe = True
+//    K <= 10 and K < 10,   Y_lo = Y_hi + Y_lo 
+//    K <= 10 and K > =-10, Y_hi = Y_hi + neg_2_mk 
+// 
+(p13) mov f34 = f61 
+(p0)  br.cond.sptk EXP_MAIN ;; 
+}
+EXP_SMALL: 
+
+{ .mmi
+(p12)  addl           r35   = @ltoff(Constants_exp_64_P#), gp
+(p0)   addl           r34   = @ltoff(Constants_exp_64_Exponents#), gp
+      nop.i 999
 }
 ;;
 
-// Nfloat = round_int(W)
-// The signficand of fW_2TO56_RSH contains the rounded integer part of W,
-// as a twos complement number in the lower bits (that is, it may be negative).
-// That twos complement number (called N) is put into rN.
+{ .mmi
+(p12) ld8 r35 = [r35]
+      ld8 r34 = [r34]
+      nop.i 999
+}
+;;
 
-// Since fW_2TO56_RSH is scaled by 2^56, it must be multiplied by 2^-56
-// before the shift constant 1.10000 * 2^63 is subtracted to yield fNfloat.
-// Thus, fNfloat contains the floating point version of N
 
-{ .mfb
-      ldfpd           fP3, fP2  = [rAD_P]
-      fms.s1          fNfloat = fW_2TO56_RSH, f2TOM56, fRSHF
-(p11) br.cond.spnt    EXPM1_CERTAIN_MINUS_ONE
+{ .mmi
+(p13)  addl           r35   = @ltoff(Constants_exp_64_Q#), gp
+       nop.m 999
+       nop.i 999
 }
 ;;
 
+
+// 
+//    Return
+//    K <= 10 and K < 10,   Y_hi = neg_2_mk 
+// 
+//    /*******************************************************/
+//    /*********** Branch EXP_SMALL  *************************/
+//    /*******************************************************/
+
 { .mfi
-      getf.sig        rN = fW_2TO56_RSH
-      nop.f           0
-      nop.i           0
+(p13) ld8 r35 = [r35]
+(p0)  mov f42 = f9 
+(p0)  add r34 = 0x48,r34  
 }
 ;;
 
-// rIndex_1 has index_1
-// rIndex_2_16 has index_2 * 16
-// rBiased_M has M
-// rIndex_1_16 has index_1 * 16
+//
+//    Flag = 0
+//    r4 = rsq * rsq
+//
 
-// r = x - Nfloat * ln2_by_128_hi
-// f = 1 - Nfloat * ln2_by_128_lo
 { .mfi
-      and             rIndex_1 = 0x0f, rN
-      fnma.s1         fR   = fNfloat, fLn2_by_128_hi, fNormX
-      shr             rM = rN,  0x7
+(p0)  ld8 r49 =[r34],0
+	nop.f 999
+	nop.i 999 ;;
 }
+
+{ .mii
+	nop.m 999
+	nop.i 999 ;;
+//
+//    Flag = 1
+//
+(p0)  cmp.lt.unc  p14, p0 =  r37, r49 ;; 
+}
+
 { .mfi
-      and             rIndex_2_16 = 0x70, rN
-      fnma.s1         fF   = fNfloat, fLn2_by_128_lo, f1
-      nop.i           0
+	nop.m 999
+//
+//    r = X
+//
+(p0)  fmpy.s1 f48 = f42, f42 
+	nop.i 999 ;;
 }
-;;
 
-// rAD_T1 has address of T1
-// rAD_T2 has address if T2
+{ .mfb
+	nop.m 999
+//
+//    rsq = r * r
+//
+(p0)  fmpy.s1 f50 = f48, f48 
+//
+//    Is input very small?
+//
+(p14) br.cond.spnt EXP_VERY_SMALL ;; 
+}
+//
+//    Flag_not1: Y_hi = 1.0
+//    Flag is 1: r6 = rsq * r4
+//
+
+{ .mfi
+(p12) ldfe f52 = [r35],16 
+(p12) mov f34 = f1 
+(p0)  add r53 = 0x1,r0 ;;  
+}
+
+{ .mfi
+(p13) ldfe f51 = [r35],16 
+//
+//    Flag_not_1: Y_lo = poly_hi + r4 * poly_lo
+//
+(p13) mov f34 = f9 
+	nop.i 999 ;;
+}
+
+{ .mmf
+(p12) ldfe f53 = [r35],16 
+//
+//    For Flag_not_1, Y_hi = X
+//    Scale = 1
+//    Create 0x000...01
+//
+(p0)  setf.sig f37 = r53 
+(p0)  mov f36 = f1 ;; 
+}
 
 { .mmi
-      add             rBiased_M = rExp_bias, rM
-      add             rAD_T2 = rAD_TB2, rIndex_2_16
-      shladd          rAD_T1 = rIndex_1, 4, rAD_TB1
+(p13) ldfe f52 = [r35],16 ;; 
+(p12) ldfe f54 = [r35],16 
+	nop.i 999 ;;
 }
-;;
 
-// Create Scale = 2^M
-// Load T1 and T2
+{ .mfi
+(p13) ldfe f53 = [r35],16 
+(p13) fmpy.s1 f58 = f48, f50 
+	nop.i 999 ;;
+}
+//
+//    Flag_not1: poly_lo = P_5 + r*P_6
+//    Flag_1: poly_lo = Q_6 + r*Q_7
+//
+
 { .mmi
-      setf.exp        f2M = rBiased_M
-      ldfe            fT2  = [rAD_T2]
-      nop.i           0
+(p13) ldfe f54 = [r35],16 ;; 
+(p12) ldfe f55 = [r35],16 
+	nop.i 999 ;;
+}
+
+{ .mmi
+(p12) ldfe f56 = [r35],16 ;; 
+(p13) ldfe f55 = [r35],16 
+	nop.i 999 ;;
+}
+
+{ .mmi
+(p12) ldfe f57 = [r35],0 ;; 
+(p13) ldfe f56 = [r35],16 
+	nop.i 999 ;;
 }
-;;
 
 { .mfi
-      ldfe            fT1  = [rAD_T1]
-      fmpy.s0         fTmp = fLn2_by_128_lo, fLn2_by_128_lo // Force inexact
-      nop.i           0
+(p13) ldfe f57 = [r35],0 
+	nop.f 999
+	nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m           0
-      fma.s1          fP54 = fR, fP5, fP4
-      nop.i           0
+	nop.m 999
+//
+//    For  Flag_not_1, load p5,p6,p1,p2
+//    Else load p5,p6,p1,p2
+//
+(p12) fma.s1 f60 = f52, f42, f53 
+	nop.i 999 ;;
 }
+
 { .mfi
-      nop.m           0
-      fma.s1          fP32 = fR, fP3, fP2
-      nop.i           0
+	nop.m 999
+(p13) fma.s1 f60 = f51, f42, f52 
+	nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m           0
-      fma.s1          fRsq = fR, fR, f0
-      nop.i           0
+	nop.m 999
+(p12) fma.s1 f60 = f60, f42, f54 
+	nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m           0
-      fma.s1          fP5432  = fRsq, fP54, fP32
-      nop.i           0
+	nop.m 999
+(p12) fma.s1 f59 = f56, f42, f57 
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+(p13) fma.s1 f60 = f42, f60, f53 
+	nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m           0
-      fma.s1          fS2  = fF,fT2,f0
-      nop.i           0
+	nop.m 999
+(p12) fma.s1 f59 = f59, f48, f42 
+	nop.i 999 ;;
 }
+
 { .mfi
-      nop.m           0
-      fma.s1          fS1  = f2M,fT1,f0
-      nop.i           0
+	nop.m 999
+//
+//    Flag_1: poly_lo = Q_5 + r*(Q_6 + r*Q_7) 
+//    Flag_not1: poly_lo = P_4 + r*(P_5 + r*P_6)
+//    Flag_not1: poly_hi = (P_1 + r*P_2)
+//
+(p13) fmpy.s1 f60 = f60, f58 
+	nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m           0
-      fma.s1          fP = fRsq, fP5432, fR
-      nop.i           0
+	nop.m 999
+(p12) fma.s1 f60 = f60, f42, f55 
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+//
+//    Flag_1: poly_lo = r6 *(Q_5 + ....)
+//    Flag_not1: poly_hi =  r + rsq *(P_1 + r*P_2)
+//
+(p12) fma.s1 f35 = f60, f50, f59 
+	nop.i 999
 }
-;;
 
 { .mfi
-      nop.m           0
-      fms.s1          fSm1 = fS1,fS2,f1    // S - 1.0
-      nop.i           0
+	nop.m 999
+(p13) fma.s1 f59 = f54, f42, f55 
+	nop.i 999 ;;
 }
+
+{ .mfi
+	nop.m 999
+//
+//    Flag_not1: Y_lo = rsq* poly_hi + poly_lo 
+//    Flag_1: poly_lo = rsq* poly_hi + poly_lo 
+//
+(p13) fma.s1 f59 = f59, f42, f56 
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+//
+//    Flag_not_1: (P_1 + r*P_2) 
+//
+(p13) fma.s1 f59 = f59, f42, f57 
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+//
+//    Flag_not_1: poly_hi = r + rsq * (P_1 + r*P_2) 
+//
+(p13) fma.s1 f35 = f59, f48, f60 
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+//
+//    Create 0.000...01
+//
+(p0)  for f37 = f35, f37 
+	nop.i 999 ;;
+}
+
 { .mfb
-      nop.m           0
-      fma.s1          fS   = fS1,fS2,f0
-(p14) br.cond.spnt    EXPM1_POSSIBLE_OVERFLOW
+	nop.m 999
+//
+//    Set lsb of Y_lo to 1
+//
+(p0)  fmerge.se f35 = f35,f37 
+(p0)  br.cond.sptk EXP_MAIN ;; 
+}
+EXP_VERY_SMALL: 
+
+{ .mmi
+      nop.m 999
+(p13) addl r34 = @ltoff(Constants_exp_64_Exponents#),gp 
+      nop.i 999;;
+}
+
+{ .mfi
+(p13) ld8  r34 = [r34];
+(p12) mov f35 = f9 
+      nop.i 999 ;;
 }
-;;
 
 { .mfb
-      nop.m           0
-      fma.d.s0        f8 = fS, fP, fSm1
-      br.ret.sptk     b0                // Normal path exit
+	nop.m 999
+(p12) mov f34 = f1 
+(p12) br.cond.sptk EXP_MAIN ;; 
 }
-;;
 
-// Here if 2^-60 <= |x| <2^-2
-// Compute 13th order polynomial
-EXPM1_SMALL:
-{ .mmf
-      ldfe            fQ3 = [rAD_Q1], 16
-      ldfe            fQ2 = [rAD_Q2], 16
-      fma.s1          fX4 = fXsq, fXsq, f0
+{ .mlx
+(p13) add  r34 = 8,r34 
+(p13) movl r39 = 0x0FFFE ;; 
 }
-;;
+//
+//    Load big_exp_neg 
+//    Create 1/2's exponent
+//
+
+{ .mii
+(p13) setf.exp f56 = r39 
+(p13) shladd r34 = r32,4,r34 ;;  
+	nop.i 999
+}
+//
+//    Negative exponents are stored after positive
+//
 
 { .mfi
-      nop.m           0
-      fma.s1          fQDC = fQD, fNormX, fQC
-      nop.i           0
+(p13) ld8 r45 = [r34],0
+//
+//    Y_hi = x
+//    Scale = 1
+//
+(p13) fmpy.s1 f35 = f9, f9 
+	nop.i 999 ;;
 }
+
 { .mfi
-      nop.m           0
-      fma.s1          fQBA = fQB, fNormX, fQA
-      nop.i           0
+	nop.m 999
+//
+//    Reset Safe if necessary 
+//    Create 1/2
+//
+(p13) mov f34 = f9 
+	nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m           0
-      fma.s1          fQ98 = fQ9, fNormX, fQ8
-      nop.i           0
+(p13) cmp.lt.unc  p0, p15 =  r37, r45 
+(p13) mov f36 = f1 
+	nop.i 999 ;;
+}
+
+{ .mfb
+	nop.m 999
+//
+//    Y_lo = x * x
+//
+(p13) fmpy.s1 f35 = f35, f56 
+//
+//    Y_lo = x*x/2 
+//
+(p13) br.cond.sptk EXP_MAIN ;; 
 }
+EXP_HUGE: 
+
 { .mfi
-      nop.m           0
-      fma.s1          fQ76= fQ7, fNormX, fQ6
-      nop.i           0
+	nop.m 999
+(p0)  fcmp.gt.unc.s1 p14, p0 =  f9, f0 
+	nop.i 999
+}
+
+{ .mlx
+	nop.m 999
+(p0)  movl r39 = 0x15DC0 ;; 
 }
-;;
 
 { .mfi
-      nop.m           0
-      fma.s1          fQ54 = fQ5, fNormX, fQ4
-      nop.i           0
+(p14) setf.exp f34 = r39 
+(p14) mov f35 = f1 
+(p14) cmp.eq  p0, p15 =  r0, r0 ;; 
+}
+
+{ .mfb
+	nop.m 999
+(p14) mov f36 = f34 
+//
+//    If x > 0, Set Safe = False
+//    If x > 0, Y_hi = 2**(24,000)
+//    If x > 0, Y_lo = 1.0
+//    If x > 0, Scale = 2**(24,000)
+//
+(p14) br.cond.sptk EXP_MAIN ;; 
+}
+
+{ .mlx
+	nop.m 999
+(p12) movl r39 = 0xA240 
+}
+
+{ .mlx
+	nop.m 999
+(p12) movl r38 = 0xA1DC ;; 
+}
+
+{ .mmb
+(p13) cmp.eq  p15, p14 =  r0, r0 
+(p12) setf.exp f34 = r39 
+	nop.b 999 ;;
+}
+
+{ .mlx
+(p12) setf.exp f35 = r38 
+(p13) movl r39 = 0xFF9C 
 }
-;;
 
 { .mfi
-      nop.m           0
-      fma.s1          fX6 = fX4, fXsq, f0
-      nop.i           0
+	nop.m 999
+(p13) fsub.s1 f34 = f0, f1
+	nop.i 999 ;;
 }
+
 { .mfi
-      nop.m           0
-      fma.s1          fQ32= fQ3, fNormX, fQ2
-      nop.i           0
+	nop.m 999
+(p12) mov f36 = f34 
+(p12) cmp.eq  p0, p15 =  r0, r0 ;; 
 }
-;;
 
 { .mfi
-      nop.m           0
-      fma.s1          fQDCBA = fQDC, fXsq, fQBA
-      nop.i           0
+(p13) setf.exp f35 = r39 
+(p13) mov f36 = f1 
+	nop.i 999 ;;
 }
+EXP_MAIN: 
+
 { .mfi
-      nop.m           0
-      fma.s1          fQ7654 = fQ76, fXsq, fQ54
-      nop.i           0
+(p0)  cmp.ne.unc p12, p0 = 0x01, r33
+(p0)  fmpy.s1 f101 = f36, f35 
+	nop.i 999 ;;
+}
+
+{ .mfb
+	nop.m 999
+(p0)  fma.d.s0 f99 = f34, f36, f101 
+(p15)  br.cond.sptk EXP_64_RETURN;;
 }
-;;
 
 { .mfi
-      nop.m           0
-      fma.s1          fQDCBA98 = fQDCBA, fXsq, fQ98
-      nop.i           0
+	nop.m 999
+(p0)  fsetc.s3 0x7F,0x01
+	nop.i 999
 }
+
+{ .mlx
+	nop.m 999
+(p0)  movl r50 = 0x000000000103FF ;;
+}
+//    
+//    S0 user supplied status
+//    S2 user supplied status + WRE + TD  (Overflows) 
+//    S3 user supplied status + RZ + TD   (Underflows) 
+//    
+//    
+//    If (Safe) is true, then
+//        Compute result using user supplied status field.
+//        No overflow or underflow here, but perhaps inexact.
+//        Return
+//    Else
+//       Determine if overflow or underflow  was raised.
+//       Fetch +/- overflow threshold for IEEE single, double,
+//       double extended   
+//    
+
 { .mfi
-      nop.m           0
-      fma.s1          fQ765432 = fQ7654, fXsq, fQ32
-      nop.i           0
+(p0)  setf.exp f60 = r50
+(p0)  fma.d.s3 f102 = f34, f36, f101 
+	nop.i 999
 }
-;;
 
 { .mfi
-      nop.m           0
-      fma.s1          fQDCBA98765432 = fQDCBA98, fX6, fQ765432
-      nop.i           0
+	nop.m 999
+(p0)  fsetc.s3 0x7F,0x40 
+	nop.i 999 ;;
 }
-;;
 
-{ .mfb
-      nop.m           0
-      fma.d.s0        f8 = fQDCBA98765432, fXsq, fNormX
-      br.ret.sptk     b0                   // Exit small branch
+{ .mfi
+	nop.m 999
+//
+//    For Safe, no need to check for over/under. 
+//    For expm1, handle errors like exp. 
+//
+(p0)  fsetc.s2 0x7F,0x42
+	nop.i 999;;
 }
-;;
 
+{ .mfi
+	nop.m 999
+(p0)  fma.d.s2 f100 = f34, f36, f101 
+	nop.i 999 ;;
+}
 
-EXPM1_POSSIBLE_OVERFLOW:
+{ .mfi
+	nop.m 999
+(p0)  fsetc.s2 0x7F,0x40 
+	nop.i 999 ;;
+}
 
-// Here if fMAX_DBL_NORM_ARG < x < fMIN_DBL_OFLOW_ARG
-// This cannot happen if input is a double, only if input higher precision.
-// Overflow is a possibility, not a certainty.
+{ .mfi
+	nop.m 999
+(p7)  fclass.m.unc   p12, p0 =  f102, 0x00F
+	nop.i 999
+}
 
-// Recompute result using status field 2 with user's rounding mode,
-// and wre set.  If result is larger than largest double, then we have
-// overflow
+{ .mfi
+	nop.m 999
+(p0)  fclass.m.unc   p11, p0 =  f102, 0x00F
+	nop.i 999 ;;
+}
 
 { .mfi
-      mov             rGt_ln  = 0x103ff // Exponent for largest dbl + 1 ulp
-      fsetc.s2        0x7F,0x42         // Get user's round mode, set wre
-      nop.i           0
+	nop.m 999
+(p7)  fcmp.ge.unc.s1 p10, p0 =  f100, f60
+	nop.i 999
 }
-;;
 
 { .mfi
-      setf.exp        fGt_pln = rGt_ln  // Create largest double + 1 ulp
-      fma.d.s2        fWre_urm_f8 = fS, fP, fSm1  // Result with wre set
-      nop.i           0
+	nop.m 999
+//    
+//    Create largest double exponent + 1.
+//    Create smallest double exponent - 1.
+//    
+(p0)  fcmp.ge.unc.s1 p8, p0 =  f100, f60
+	nop.i 999 ;;
+}
+//    
+//    fcmp:   resultS2 >= + overflow threshold  -> set (a) if true
+//    fcmp:   resultS2 <= - overflow threshold  -> set (b) if true
+//    fclass: resultS3 is denorm/unorm/0        -> set (d) if true
+//    
+
+{ .mib
+(p10) mov   r65 = 41
+	nop.i 999
+(p10) br.cond.sptk __libm_error_region ;;
+}
+
+{ .mib
+(p8)  mov   r65 = 14
+	nop.i 999
+(p8)  br.cond.sptk __libm_error_region ;;
+}
+//    
+//    Report that exp overflowed
+//    
+
+{ .mib
+(p12) mov   r65 = 42
+	nop.i 999
+(p12) br.cond.sptk __libm_error_region ;;
 }
-;;
+
+{ .mib
+(p11) mov   r65 = 15
+	nop.i 999
+(p11) br.cond.sptk __libm_error_region ;;
+}
+
+{ .mib
+	nop.m 999
+	nop.i 999
+//    
+//    Report that exp underflowed
+//    
+(p0)  br.cond.sptk EXP_64_RETURN;;
+}
+EXP_64_SPECIAL: 
 
 { .mfi
-      nop.m           0
-      fsetc.s2        0x7F,0x40                   // Turn off wre in sf2
-      nop.i           0
+	nop.m 999
+(p0)  fclass.m.unc p6,  p0 =  f8, 0x0c3 
+	nop.i 999
 }
-;;
 
 { .mfi
-      nop.m           0
-      fcmp.ge.s1      p6, p0 =  fWre_urm_f8, fGt_pln // Test for overflow
-      nop.i           0
+	nop.m 999
+(p0)  fclass.m.unc p13, p8 =  f8, 0x007 
+	nop.i 999 ;;
 }
-;;
 
-{ .mfb
-      nop.m           0
-      nop.f           0
-(p6)  br.cond.spnt    EXPM1_CERTAIN_OVERFLOW // Branch if overflow
+{ .mfi
+	nop.m 999
+(p7)  fclass.m.unc p14, p0 =  f8, 0x007 
+	nop.i 999
 }
-;;
 
-{ .mfb
-      nop.m           0
-      fma.d.s0        f8 = fS, fP, fSm1
-      br.ret.sptk     b0                     // Exit if really no overflow
+{ .mfi
+	nop.m 999
+(p0)  fclass.m.unc p12, p9 =  f8, 0x021 
+	nop.i 999 ;;
 }
-;;
 
-EXPM1_CERTAIN_OVERFLOW:
-{ .mmi
-      sub             rTmp = rExp_mask, r0, 1
-;;
-      setf.exp        fTmp = rTmp
-      nop.i           0
+{ .mfi
+	nop.m 999
+(p0)  fclass.m.unc p11, p0 =  f8, 0x022 
+	nop.i 999
+}
+
+{ .mfi
+	nop.m 999
+(p7)  fclass.m.unc p10, p0 =  f8, 0x022 
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+//    
+//    Identify +/- 0, Inf, or -Inf 
+//    Generate the right kind of NaN.
+//    
+(p13) fadd.d.s0 f99 = f0, f1 
+	nop.i 999 ;;
 }
-;;
 
 { .mfi
-      alloc           r32=ar.pfs,1,4,4,0
-      fmerge.s        FR_X = f8,f8
-      nop.i           0
+	nop.m 999
+(p14) mov f99 = f8 
+	nop.i 999 ;;
 }
+
 { .mfb
-      mov             GR_Parameter_TAG = 41
-      fma.d.s0        FR_RESULT = fTmp, fTmp, f0    // Set I,O and +INF result
-      br.cond.sptk    __libm_error_region
+	nop.m 999
+(p6)  fadd.d.s0 f99 = f8, f1 
+//    
+//    exp(+/-0) = 1 
+//    expm1(+/-0) = +/-0 
+//    No exceptions raised
+//    
+(p6)  br.cond.sptk EXP_64_RETURN;;
+}
+
+{ .mib
+	nop.m 999
+	nop.i 999
+(p14)  br.cond.sptk EXP_64_RETURN;;
+}
+
+{ .mfi
+	nop.m 999
+(p11) mov f99 = f0 
+	nop.i 999 ;;
 }
-;;
 
-// Here if x unorm
-EXPM1_UNORM:
 { .mfb
-      getf.exp        rSignexp_x = fNormX    // Must recompute if x unorm
-      fcmp.eq.s0      p6, p0 = f8, f0        // Set D flag
-      br.cond.sptk    EXPM1_COMMON
+	nop.m 999
+(p10) fsub.d.s1 f99 = f0, f1 
+//    
+//    exp(-Inf) = 0 
+//    expm1(-Inf) = -1 
+//    No exceptions raised.
+//    
+(p10)  br.cond.sptk EXP_64_RETURN;;
 }
-;;
 
-// here if result will be -1 and inexact, x <= -48.0
-EXPM1_CERTAIN_MINUS_ONE:
-{ .mmi
-      mov             rTmp = 1
-;;
-      setf.exp        fTmp = rTmp
-      nop.i           0
+{ .mfb
+	nop.m 999
+(p12) fmpy.d.s1 f99 = f8, f1 
+//    
+//    exp(+Inf) = Inf 
+//    No exceptions raised.
+//    
+(p0)  br.cond.sptk EXP_64_RETURN;;
 }
-;;
+
+
+EXP_64_UNSUPPORTED: 
 
 { .mfb
-      nop.m           0
-      fms.d.s0        FR_RESULT = fTmp, fTmp, f1 // Set I, rounded -1+eps result
-      br.ret.sptk     b0
+       nop.m 999
+(p0)  fmpy.d.s0 f99 = f8, f0 
+      nop.b 0;;
 }
-;;
 
-GLOBAL_IEEE754_END(expm1)
+EXP_64_RETURN:
+{ .mfb
+      nop.m 999
+(p0)  mov   f8     = f99
+(p0)  br.ret.sptk   b0
+}
+.endp expm1
+ASM_SIZE_DIRECTIVE(expm1)
 
-LOCAL_LIBM_ENTRY(__libm_error_region)
+.proc __libm_error_region
+__libm_error_region:
 .prologue
+// (1)
 { .mfi
         add   GR_Parameter_Y=-32,sp             // Parameter 2 value
         nop.f 0
@@ -841,32 +1716,38 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
 }
 { .mfi
 .fframe 64
-        add sp=-64,sp                           // Create new stack
+        add sp=-64,sp                          // Create new stack
         nop.f 0
-        mov GR_SAVE_GP=gp                       // Save gp
+        mov GR_SAVE_GP=gp                      // Save gp
 };;
+
+// (2)
 { .mmi
         stfd [GR_Parameter_Y] = FR_Y,16         // STORE Parameter 2 on stack
-        add GR_Parameter_X = 16,sp              // Parameter 1 address
+        add GR_Parameter_X = 16,sp            // Parameter 1 address
 .save   b0, GR_SAVE_B0
-        mov GR_SAVE_B0=b0                       // Save b0
+        mov GR_SAVE_B0=b0                     // Save b0
 };;
+
 .body
+// (3)
 { .mib
-        stfd [GR_Parameter_X] = FR_X            // STORE Parameter 1 on stack
+        stfd [GR_Parameter_X] = FR_X                    // STORE Parameter 1 on stack
         add   GR_Parameter_RESULT = 0,GR_Parameter_Y  // Parameter 3 address
-	nop.b 0
+        nop.b 0                                 
 }
 { .mib
-        stfd [GR_Parameter_Y] = FR_RESULT       // STORE Parameter 3 on stack
+        stfd [GR_Parameter_Y] = FR_RESULT                   // STORE Parameter 3 on stack
         add   GR_Parameter_Y = -16,GR_Parameter_Y
-        br.call.sptk b0=__libm_error_support#   // Call error handling function
+        br.call.sptk b0=__libm_error_support#         // Call error handling function
 };;
 { .mmi
-        add   GR_Parameter_RESULT = 48,sp
         nop.m 0
-        nop.i 0
+        nop.m 0
+        add   GR_Parameter_RESULT = 48,sp
 };;
+
+// (4)
 { .mmi
         ldfd  f8 = [GR_Parameter_RESULT]       // Get return result off stack
 .restore sp
@@ -879,6 +1760,9 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
         br.ret.sptk     b0                     // Return
 };;
 
-LOCAL_LIBM_END(__libm_error_region)
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+
 .type   __libm_error_support#,@function
 .global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_expm1f.S b/sysdeps/ia64/fpu/s_expm1f.S
index 0c5f2e67a8..cc2c537ba2 100644
--- a/sysdeps/ia64/fpu/s_expm1f.S
+++ b/sysdeps/ia64/fpu/s_expm1f.S
@@ -1,10 +1,10 @@
-.file "expf_m1.s"
+.file "exp_m1f.s"
 
-
-// Copyright (c) 2000 - 2002, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,649 +20,1735 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
 // Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
-
-// History
-//*********************************************************************
-// 02/02/00 Initial Version
-// 04/04/00 Unwind support added
-// 08/15/00 Bundle added after call to __libm_error_support to properly
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
+//
+// HISTORY
+// 2/02/00  Initial Version
+// 4/04/00  Unwind support added
+// 8/15/00  Bundle added after call to __libm_error_support to properly
 //          set [the previously overwritten] GR_Parameter_RESULT.
-// 07/07/01 Improved speed of all paths
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 11/20/02 Improved speed, algorithm based on expf
 //
+// ********************************************************************* 
+//
+// Function:   Combined expf(x) and expm1f(x), where
+//                        x 
+//             expf(x) = e , for single precision x values
+//                          x
+//             expm1f(x) = e  - 1  for single precision x values
+//
+// ********************************************************************* 
+//
+// Accuracy:       Within .7 ulps for 80-bit floating point values
+//                 Very accurate for single precision values
+//
+// ********************************************************************* 
+//
+// Resources Used:
+//
+//    Floating-Point Registers: f8  (Input and Return Value) 
+//                              f9,f32-f61, f99-f102 
+//
+//    General Purpose Registers: 
+//      r32-r61
+//      r62-r65 (Used to pass arguments to error handling routine)
+//                                     
+//    Predicate Registers:      p6-p15
+//
+// ********************************************************************* 
+//
+// IEEE Special Conditions:
+//
+//    Denormal  fault raised on denormal inputs  
+//    Overflow exceptions raised when appropriate for exp and expm1
+//    Underflow exceptions raised when appropriate for exp and expm1
+//    (Error Handling Routine called for overflow and Underflow)
+//    Inexact raised when appropriate by algorithm 
+//
+//    expf(inf) = inf
+//    expf(-inf) = +0
+//    expf(SNaN) = QNaN
+//    expf(QNaN) = QNaN
+//    expf(0) = 1
+//    expf(EM_special Values) = QNaN
+//    expf(inf) = inf
+//    expm1f(-inf) = -1 
+//    expm1f(SNaN) = QNaN
+//    expm1f(QNaN) = QNaN
+//    expm1f(0) = 0
+//    expm1f(EM_special Values) = QNaN
+//    
+// ********************************************************************* 
+//
+// Implementation and Algorithm Notes:
+//
+//  ker_exp_64( in_FR  : X,
+//            in_GR  : Flag,
+//            in_GR  : Expo_Range
+//            out_FR : Y_hi,
+//            out_FR : Y_lo,
+//            out_FR : scale,
+//            out_PR : Safe )
+//
+// On input, X is in register format and 
+// Flag  = 0 for exp,
+// Flag  = 1 for expm1,
+//
+// On output, provided X and X_cor are real numbers, then
+//
+//   scale*(Y_hi + Y_lo)  approximates  expf(X)       if Flag is 0
+//   scale*(Y_hi + Y_lo)  approximates  expf(X)-1     if Flag is 1
+//
+// The accuracy is sufficient for a highly accurate 64 sig.
+// bit implementation.  Safe is set if there is no danger of 
+// overflow/underflow when the result is composed from scale, 
+// Y_hi and Y_lo. Thus, we can have a fast return if Safe is set. 
+// Otherwise, one must prepare to handle the possible exception 
+// appropriately.  Note that SAFE not set (false) does not mean 
+// that overflow/underflow will occur; only the setting of SAFE
+// guarantees the opposite.
+//
+// **** High Level Overview **** 
+//
+// The method consists of three cases.
+// 
+// If           |X| < Tiny	use case exp_tiny;
+// else if	|X| < 2^(-6)	use case exp_small;
+// else		use case exp_regular;
+//
+// Case exp_tiny:
+//
+//   1 + X     can be used to approximate expf(X) or expf(X+X_cor);
+//   X + X^2/2 can be used to approximate expf(X) - 1
+//
+// Case exp_small:
+//
+//   Here, expf(X), expf(X+X_cor), and expf(X) - 1 can all be 
+//   appproximated by a relatively simple polynomial.
+//
+//   This polynomial resembles the truncated Taylor series
+//
+//	expf(w) = 1 + w + w^2/2! + w^3/3! + ... + w^n/n!
+//
+// Case exp_regular:
+//
+//   Here we use a table lookup method. The basic idea is that in
+//   order to compute expf(X), we accurately decompose X into
+//
+//   X = N * log(2)/(2^12)  + r,	|r| <= log(2)/2^13.
+//
+//   Hence
+//
+//   expf(X) = 2^( N / 2^12 ) * expf(r).
+//
+//   The value 2^( N / 2^12 ) is obtained by simple combinations
+//   of values calculated beforehand and stored in table; expf(r)
+//   is approximated by a short polynomial because |r| is small.
+//
+//   We elaborate this method in 4 steps.
+//
+//   Step 1: Reduction
+//
+//   The value 2^12/log(2) is stored as a double-extended number
+//   L_Inv.
+//
+//   N := round_to_nearest_integer( X * L_Inv )
+//
+//   The value log(2)/2^12 is stored as two numbers L_hi and L_lo so
+//   that r can be computed accurately via
+//
+//   r := (X - N*L_hi) - N*L_lo
+//
+//   We pick L_hi such that N*L_hi is representable in 64 sig. bits
+//   and thus the FMA   X - N*L_hi   is error free. So r is the 
+//   1 rounding error from an exact reduction with respect to 
+//   
+//   L_hi + L_lo.
+//
+//   In particular, L_hi has 30 significant bit and can be stored
+//   as a double-precision number; L_lo has 64 significant bits and
+//   stored as a double-extended number.
+//
+//   In the case Flag = 2, we further modify r by
+//
+//   r := r + X_cor.
+//
+//   Step 2: Approximation
+//
+//   expf(r) - 1 is approximated by a short polynomial of the form
+//   
+//   r + A_1 r^2 + A_2 r^3 + A_3 r^4 .
+//
+//   Step 3: Composition from Table Values 
+//
+//   The value 2^( N / 2^12 ) can be composed from a couple of tables
+//   of precalculated values. First, express N as three integers
+//   K, M_1, and M_2 as
+//
+//     N  =  K * 2^12  + M_1 * 2^6 + M_2
+//
+//   Where 0 <= M_1, M_2 < 2^6; and K can be positive or negative.
+//   When N is represented in 2's complement, M_2 is simply the 6
+//   lsb's, M_1 is the next 6, and K is simply N shifted right
+//   arithmetically (sign extended) by 12 bits.
+//
+//   Now, 2^( N / 2^12 ) is simply  
+//	
+//      2^K * 2^( M_1 / 2^6 ) * 2^( M_2 / 2^12 )
+//
+//   Clearly, 2^K needs no tabulation. The other two values are less
+//   trivial because if we store each accurately to more than working
+//   precision, than its product is too expensive to calculate. We
+//   use the following method.
+//
+//   Define two mathematical values, delta_1 and delta_2, implicitly
+//   such that
+//
+//     T_1 = expf( [M_1 log(2)/2^6]  -  delta_1 ) 
+//     T_2 = expf( [M_2 log(2)/2^12] -  delta_2 )
+//
+//   are representable as 24 significant bits. To illustrate the idea,
+//   we show how we define delta_1: 
+//
+//     T_1     := round_to_24_bits( expf( M_1 log(2)/2^6 ) )
+//     delta_1  = (M_1 log(2)/2^6) - log( T_1 )  
+//
+//   The last equality means mathematical equality. We then tabulate
+//
+//     W_1 := expf(delta_1) - 1
+//     W_2 := expf(delta_2) - 1
+//
+//   Both in double precision.
+//
+//   From the tabulated values T_1, T_2, W_1, W_2, we compose the values
+//   T and W via
+//
+//     T := T_1 * T_2			...exactly
+//     W := W_1 + (1 + W_1)*W_2	
+//
+//   W approximates expf( delta ) - 1  where delta = delta_1 + delta_2.
+//   The mathematical product of T and (W+1) is an accurate representation
+//   of 2^(M_1/2^6) * 2^(M_2/2^12).
+//
+//   Step 4. Reconstruction
+//
+//   Finally, we can reconstruct expf(X), expf(X) - 1. 
+//   Because
+//
+//	X = K * log(2) + (M_1*log(2)/2^6  - delta_1) 
+//		       + (M_2*log(2)/2^12 - delta_2)
+//		       + delta_1 + delta_2 + r 		...accurately
+//   We have
+//
+//	expf(X) ~=~ 2^K * ( T + T*[expf(delta_1+delta_2+r) - 1] )
+//	       ~=~ 2^K * ( T + T*[expf(delta + r) - 1]         )
+//	       ~=~ 2^K * ( T + T*[(expf(delta)-1)  
+//				 + expf(delta)*(expf(r)-1)]   )
+//             ~=~ 2^K * ( T + T*( W + (1+W)*poly(r) ) )
+//             ~=~ 2^K * ( Y_hi  +  Y_lo )
+//
+//   where Y_hi = T  and Y_lo = T*(W + (1+W)*poly(r))
+//
+//   For expf(X)-1, we have
+//
+//	expf(X)-1 ~=~ 2^K * ( Y_hi + Y_lo ) - 1
+//		 ~=~ 2^K * ( Y_hi + Y_lo - 2^(-K) )
+//
+//   and we combine Y_hi + Y_lo - 2^(-N)  into the form of two 
+//   numbers  Y_hi + Y_lo carefully.
+//
+//   **** Algorithm Details ****
+//
+//   A careful algorithm must be used to realize the mathematical ideas
+//   accurately. We describe each of the three cases. We assume SAFE
+//   is preset to be TRUE.
+//
+//   Case exp_tiny:
+//
+//   The important points are to ensure an accurate result under 
+//   different rounding directions and a correct setting of the SAFE 
+//   flag.
+//
+//   If Flag is 1, then
+//      SAFE  := False	...possibility of underflow
+//      Scale := 1.0
+//      Y_hi  := X
+//      Y_lo  := 2^(-17000)
+//   Else
+//      Scale := 1.0
+//      Y_hi  := 1.0
+//      Y_lo  := X	...for different rounding modes
+//   Endif
+//
+//   Case exp_small:
+//
+//   Here we compute a simple polynomial. To exploit parallelism, we split
+//   the polynomial into several portions.
+//
+//   Let r = X 
+//
+//   If Flag is not 1	...i.e. expf( argument )
+//
+//      rsq := r * r; 
+//      r4  := rsq*rsq
+//      poly_lo := P_3 + r*(P_4 + r*(P_5 + r*P_6))
+//      poly_hi := r + rsq*(P_1 + r*P_2)
+//      Y_lo    := poly_hi + r4 * poly_lo
+//      set lsb(Y_lo) to 1
+//      Y_hi    := 1.0
+//      Scale   := 1.0
+//
+//   Else			...i.e. expf( argument ) - 1
+//
+//      rsq := r * r
+//      r4  := rsq * rsq
+//      r6  := rsq * r4
+//      poly_lo := r6*(Q_5 + r*(Q_6 + r*Q_7))
+//      poly_hi := Q_1 + r*(Q_2 + r*(Q_3 + r*Q_4))
+//      Y_lo    := rsq*poly_hi +  poly_lo
+//      set lsb(Y_lo) to 1
+//      Y_hi    := X
+//      Scale   := 1.0
+//
+//   Endif
+//
+//  Case exp_regular:
+//
+//  The previous description contain enough information except the
+//  computation of poly and the final Y_hi and Y_lo in the case for
+//  expf(X)-1.
+//
+//  The computation of poly for Step 2:
+//
+//   rsq := r*r
+//   poly := r + rsq*(A_1 + r*(A_2 + r*A_3))
+//
+//  For the case expf(X) - 1, we need to incorporate 2^(-K) into
+//  Y_hi and Y_lo at the end of Step 4.
+//
+//   If K > 10 then
+//      Y_lo := Y_lo - 2^(-K)
+//   Else
+//      If K < -10 then
+//	 Y_lo := Y_hi + Y_lo
+//	 Y_hi := -2^(-K)
+//      Else
+//	 Y_hi := Y_hi - 2^(-K)
+//      End If
+//   End If
 //
-// API
-//*********************************************************************
-// float expm1f(float)
-//
-// Overview of operation
-//*********************************************************************
-// 1. Inputs of Nan, Inf, Zero, NatVal handled with special paths
-//
-// 2. |x| < 2^-40
-//    Result = x, computed by x + x*x to handle appropriate flags and rounding
-//
-// 3. 2^-40 <= |x| < 2^-2
-//    Result determined by 8th order Taylor series polynomial
-//    expm1f(x) = x + A2*x^2 + ... + A8*x^8
-//
-// 4. x < -24.0
-//    Here we know result is essentially -1 + eps, where eps only affects
-//    rounded result.  Set I.
-//
-// 5. x >= 88.7228 
-//    Result overflows.  Set I, O, and call error support
-//
-// 6. 2^-2 <= x < 88.7228  or  -24.0 <= x < -2^-2  
-//    This is the main path.  The algorithm is described below:
-
-// Take the input x. w is "how many log2/128 in x?"
-//  w = x * 64/log2
-//  NJ = int(w)
-//  x = NJ*log2/64 + R
-
-//  NJ = 64*n + j
-//  x = n*log2 + (log2/64)*j + R
-//
-//  So, exp(x) = 2^n * 2^(j/64)* exp(R)
-//
-//  T =  2^n * 2^(j/64)
-//       Construct 2^n
-//       Get 2^(j/64) table
-//           actually all the entries of 2^(j/64) table are stored in DP and
-//           with exponent bits set to 0 -> multiplication on 2^n can be
-//           performed by doing logical "or" operation with bits presenting 2^n
-
-//  exp(R) = 1 + (exp(R) - 1)
-//  P = exp(R) - 1 approximated by Taylor series of 3rd degree
-//      P = A3*R^3 + A2*R^2 + R, A3 = 1/6, A2 = 1/2
-//
-
-//  The final result is reconstructed as follows
-//  expm1f(x) = T*P + (T - 1.0)
-
-// Special values
-//*********************************************************************
-// expm1f(+0)    = +0.0
-// expm1f(-0)    = -0.0
-
-// expm1f(+qnan) = +qnan
-// expm1f(-qnan) = -qnan
-// expm1f(+snan) = +qnan
-// expm1f(-snan) = -qnan
-
-// expm1f(-inf)  = -1.0
-// expm1f(+inf)  = +inf
-
-// Overflow and Underflow
-//*********************************************************************
-// expm1f(x) = largest single normal when
-//     x = 88.7228 = 0x42b17217
-//
-// Underflow is handled as described in case 2 above.
-
-
-// Registers used
-//*********************************************************************
-// Floating Point registers used:
-// f8, input
-// f6,f7, f9 -> f15,  f32 -> f45
-
-// General registers used:
-// r3, r20 -> r38
-
-// Predicate registers used:
-// p9 -> p15
-
-// Assembly macros
-//*********************************************************************
-// integer registers used
-// scratch
-rNJ                   = r3
-
-rExp_half             = r20
-rSignexp_x            = r21
-rExp_x                = r22
-rExp_mask             = r23
-rExp_bias             = r24
-rTmp                  = r25
-rM1_lim               = r25
-rGt_ln                = r25
-rJ                    = r26
-rN                    = r27
-rTblAddr              = r28
-rLn2Div64             = r29
-rRightShifter         = r30
-r64DivLn2             = r31
-// stacked
-GR_SAVE_PFS           = r32
-GR_SAVE_B0            = r33
-GR_SAVE_GP            = r34
-GR_Parameter_X        = r35
-GR_Parameter_Y        = r36
-GR_Parameter_RESULT   = r37
-GR_Parameter_TAG      = r38
-
-// floating point registers used
-FR_X                  = f10
-FR_Y                  = f1
-FR_RESULT             = f8
-// scratch
-fRightShifter         = f6
-f64DivLn2             = f7
-fNormX                = f9
-fNint                 = f10
-fN                    = f11
-fR                    = f12
-fLn2Div64             = f13
-fA2                   = f14
-fA3                   = f15
-// stacked
-fP                    = f32
-fX3                   = f33
-fT                    = f34
-fMIN_SGL_OFLOW_ARG    = f35
-fMAX_SGL_NORM_ARG     = f36
-fMAX_SGL_MINUS_1_ARG  = f37
-fA4                   = f38
-fA43                  = f38
-fA432                 = f38
-fRSqr                 = f39
-fA5                   = f40
-fTmp                  = f41
-fGt_pln               = f41
-fXsq                  = f41
-fA7                   = f42
-fA6                   = f43
-fA65                  = f43
-fTm1                  = f44
-fA8                   = f45
-fA87                  = f45
-fA8765                = f45
-fA8765432             = f45
-fWre_urm_f8           = f45
-
-RODATA
-.align 16
-LOCAL_OBJECT_START(_expf_table)
-data8 0x3efa01a01a01a01a // A8 = 1/8!
-data8 0x3f2a01a01a01a01a // A7 = 1/7!
-data8 0x3f56c16c16c16c17 // A6 = 1/6!
-data8 0x3f81111111111111 // A5 = 1/5!
-data8 0x3fa5555555555555 // A4 = 1/4!
-data8 0x3fc5555555555555 // A3 = 1/3!
-//
-data4 0x42b17218         // Smallest sgl arg to overflow sgl result
-data4 0x42b17217         // Largest sgl arg to give sgl result
-//
-// 2^(j/64) table, j goes from 0 to 63
-data8 0x0000000000000000 // 2^(0/64)
-data8 0x00002C9A3E778061 // 2^(1/64)
-data8 0x000059B0D3158574 // 2^(2/64)
-data8 0x0000874518759BC8 // 2^(3/64)
-data8 0x0000B5586CF9890F // 2^(4/64)
-data8 0x0000E3EC32D3D1A2 // 2^(5/64)
-data8 0x00011301D0125B51 // 2^(6/64)
-data8 0x0001429AAEA92DE0 // 2^(7/64)
-data8 0x000172B83C7D517B // 2^(8/64)
-data8 0x0001A35BEB6FCB75 // 2^(9/64)
-data8 0x0001D4873168B9AA // 2^(10/64)
-data8 0x0002063B88628CD6 // 2^(11/64)
-data8 0x0002387A6E756238 // 2^(12/64)
-data8 0x00026B4565E27CDD // 2^(13/64)
-data8 0x00029E9DF51FDEE1 // 2^(14/64)
-data8 0x0002D285A6E4030B // 2^(15/64)
-data8 0x000306FE0A31B715 // 2^(16/64)
-data8 0x00033C08B26416FF // 2^(17/64)
-data8 0x000371A7373AA9CB // 2^(18/64)
-data8 0x0003A7DB34E59FF7 // 2^(19/64)
-data8 0x0003DEA64C123422 // 2^(20/64)
-data8 0x0004160A21F72E2A // 2^(21/64)
-data8 0x00044E086061892D // 2^(22/64)
-data8 0x000486A2B5C13CD0 // 2^(23/64)
-data8 0x0004BFDAD5362A27 // 2^(24/64)
-data8 0x0004F9B2769D2CA7 // 2^(25/64)
-data8 0x0005342B569D4F82 // 2^(26/64)
-data8 0x00056F4736B527DA // 2^(27/64)
-data8 0x0005AB07DD485429 // 2^(28/64)
-data8 0x0005E76F15AD2148 // 2^(29/64)
-data8 0x0006247EB03A5585 // 2^(30/64)
-data8 0x0006623882552225 // 2^(31/64)
-data8 0x0006A09E667F3BCD // 2^(32/64)
-data8 0x0006DFB23C651A2F // 2^(33/64)
-data8 0x00071F75E8EC5F74 // 2^(34/64)
-data8 0x00075FEB564267C9 // 2^(35/64)
-data8 0x0007A11473EB0187 // 2^(36/64)
-data8 0x0007E2F336CF4E62 // 2^(37/64)
-data8 0x00082589994CCE13 // 2^(38/64)
-data8 0x000868D99B4492ED // 2^(39/64)
-data8 0x0008ACE5422AA0DB // 2^(40/64)
-data8 0x0008F1AE99157736 // 2^(41/64)
-data8 0x00093737B0CDC5E5 // 2^(42/64)
-data8 0x00097D829FDE4E50 // 2^(43/64)
-data8 0x0009C49182A3F090 // 2^(44/64)
-data8 0x000A0C667B5DE565 // 2^(45/64)
-data8 0x000A5503B23E255D // 2^(46/64)
-data8 0x000A9E6B5579FDBF // 2^(47/64)
-data8 0x000AE89F995AD3AD // 2^(48/64)
-data8 0x000B33A2B84F15FB // 2^(49/64)
-data8 0x000B7F76F2FB5E47 // 2^(50/64)
-data8 0x000BCC1E904BC1D2 // 2^(51/64)
-data8 0x000C199BDD85529C // 2^(52/64)
-data8 0x000C67F12E57D14B // 2^(53/64)
-data8 0x000CB720DCEF9069 // 2^(54/64)
-data8 0x000D072D4A07897C // 2^(55/64)
-data8 0x000D5818DCFBA487 // 2^(56/64)
-data8 0x000DA9E603DB3285 // 2^(57/64)
-data8 0x000DFC97337B9B5F // 2^(58/64)
-data8 0x000E502EE78B3FF6 // 2^(59/64)
-data8 0x000EA4AFA2A490DA // 2^(60/64)
-data8 0x000EFA1BEE615A27 // 2^(61/64)
-data8 0x000F50765B6E4540 // 2^(62/64)
-data8 0x000FA7C1819E90D8 // 2^(63/64)
-LOCAL_OBJECT_END(_expf_table)
 
+#include "libm_support.h"
+
+
+GR_SAVE_B0                    = r60
+GR_SAVE_PFS                   = r59
+GR_SAVE_GP                    = r61 
+
+GR_Parameter_X                = r62
+GR_Parameter_Y                = r63
+GR_Parameter_RESULT           = r64
+GR_Parameter_TAG              = r65
+
+FR_X             = f9
+FR_Y             = f1
+FR_RESULT        = f99
+
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+
+.align 64 
+Constants_exp_64_Arg:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_Arg,@object)
+data4 0x5C17F0BC,0xB8AA3B29,0x0000400B,0x00000000 
+data4 0x00000000,0xB17217F4,0x00003FF2,0x00000000
+data4 0xF278ECE6,0xF473DE6A,0x00003FD4,0x00000000
+// /* Inv_L, L_hi, L_lo */
+ASM_SIZE_DIRECTIVE(Constants_exp_64_Arg)
+
+.align 64 
+Constants_exp_64_Exponents:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_Exponents,@object)
+data4 0x0000007E,0x00000000,0xFFFFFF83,0xFFFFFFFF
+data4 0x000003FE,0x00000000,0xFFFFFC03,0xFFFFFFFF
+data4 0x00003FFE,0x00000000,0xFFFFC003,0xFFFFFFFF
+data4 0x00003FFE,0x00000000,0xFFFFC003,0xFFFFFFFF
+data4 0xFFFFFFE2,0xFFFFFFFF,0xFFFFFFC4,0xFFFFFFFF
+data4 0xFFFFFFBA,0xFFFFFFFF,0xFFFFFFBA,0xFFFFFFFF
+ASM_SIZE_DIRECTIVE(Constants_exp_64_Exponents)
+
+.align 64 
+Constants_exp_64_A:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_A,@object)
+data4 0xB1B736A0,0xAAAAAAAB,0x00003FFA,0x00000000
+data4 0x90CD6327,0xAAAAAAAB,0x00003FFC,0x00000000
+data4 0xFFFFFFFF,0xFFFFFFFF,0x00003FFD,0x00000000
+// /* Reversed */
+ASM_SIZE_DIRECTIVE(Constants_exp_64_A)
+
+.align 64 
+Constants_exp_64_P:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_P,@object)
+data4 0x43914A8A,0xD00D6C81,0x00003FF2,0x00000000
+data4 0x30304B30,0xB60BC4AC,0x00003FF5,0x00000000
+data4 0x7474C518,0x88888888,0x00003FF8,0x00000000
+data4 0x8DAE729D,0xAAAAAAAA,0x00003FFA,0x00000000
+data4 0xAAAAAF61,0xAAAAAAAA,0x00003FFC,0x00000000
+data4 0x000004C7,0x80000000,0x00003FFE,0x00000000 
+// /* Reversed */
+ASM_SIZE_DIRECTIVE(Constants_exp_64_P)
+
+.align 64 
+Constants_exp_64_Q:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_Q,@object)
+data4 0xA49EF6CA,0xD00D56F7,0x00003FEF,0x00000000
+data4 0x1C63493D,0xD00D59AB,0x00003FF2,0x00000000
+data4 0xFB50CDD2,0xB60B60B5,0x00003FF5,0x00000000
+data4 0x7BA68DC8,0x88888888,0x00003FF8,0x00000000
+data4 0xAAAAAC8D,0xAAAAAAAA,0x00003FFA,0x00000000
+data4 0xAAAAACCA,0xAAAAAAAA,0x00003FFC,0x00000000
+data4 0x00000000,0x80000000,0x00003FFE,0x00000000 
+// /* Reversed */
+ASM_SIZE_DIRECTIVE(Constants_exp_64_Q)
+
+.align 64 
+Constants_exp_64_T1:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_T1,@object)
+data4 0x3F800000,0x3F8164D2,0x3F82CD87,0x3F843A29 
+data4 0x3F85AAC3,0x3F871F62,0x3F88980F,0x3F8A14D5 
+data4 0x3F8B95C2,0x3F8D1ADF,0x3F8EA43A,0x3F9031DC
+data4 0x3F91C3D3,0x3F935A2B,0x3F94F4F0,0x3F96942D
+data4 0x3F9837F0,0x3F99E046,0x3F9B8D3A,0x3F9D3EDA
+data4 0x3F9EF532,0x3FA0B051,0x3FA27043,0x3FA43516
+data4 0x3FA5FED7,0x3FA7CD94,0x3FA9A15B,0x3FAB7A3A
+data4 0x3FAD583F,0x3FAF3B79,0x3FB123F6,0x3FB311C4
+data4 0x3FB504F3,0x3FB6FD92,0x3FB8FBAF,0x3FBAFF5B
+data4 0x3FBD08A4,0x3FBF179A,0x3FC12C4D,0x3FC346CD
+data4 0x3FC5672A,0x3FC78D75,0x3FC9B9BE,0x3FCBEC15
+data4 0x3FCE248C,0x3FD06334,0x3FD2A81E,0x3FD4F35B
+data4 0x3FD744FD,0x3FD99D16,0x3FDBFBB8,0x3FDE60F5
+data4 0x3FE0CCDF,0x3FE33F89,0x3FE5B907,0x3FE8396A
+data4 0x3FEAC0C7,0x3FED4F30,0x3FEFE4BA,0x3FF28177
+data4 0x3FF5257D,0x3FF7D0DF,0x3FFA83B3,0x3FFD3E0C
+ASM_SIZE_DIRECTIVE(Constants_exp_64_T1)
+
+.align 64 
+Constants_exp_64_T2:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_T2,@object)
+data4 0x3F800000,0x3F80058C,0x3F800B18,0x3F8010A4 
+data4 0x3F801630,0x3F801BBD,0x3F80214A,0x3F8026D7 
+data4 0x3F802C64,0x3F8031F2,0x3F803780,0x3F803D0E 
+data4 0x3F80429C,0x3F80482B,0x3F804DB9,0x3F805349 
+data4 0x3F8058D8,0x3F805E67,0x3F8063F7,0x3F806987 
+data4 0x3F806F17,0x3F8074A8,0x3F807A39,0x3F807FCA 
+data4 0x3F80855B,0x3F808AEC,0x3F80907E,0x3F809610 
+data4 0x3F809BA2,0x3F80A135,0x3F80A6C7,0x3F80AC5A 
+data4 0x3F80B1ED,0x3F80B781,0x3F80BD14,0x3F80C2A8 
+data4 0x3F80C83C,0x3F80CDD1,0x3F80D365,0x3F80D8FA 
+data4 0x3F80DE8F,0x3F80E425,0x3F80E9BA,0x3F80EF50 
+data4 0x3F80F4E6,0x3F80FA7C,0x3F810013,0x3F8105AA 
+data4 0x3F810B41,0x3F8110D8,0x3F81166F,0x3F811C07 
+data4 0x3F81219F,0x3F812737,0x3F812CD0,0x3F813269 
+data4 0x3F813802,0x3F813D9B,0x3F814334,0x3F8148CE 
+data4 0x3F814E68,0x3F815402,0x3F81599C,0x3F815F37
+ASM_SIZE_DIRECTIVE(Constants_exp_64_T2)
+
+.align 64 
+Constants_exp_64_W1:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_W1,@object)
+data4 0x00000000,0x00000000,0x171EC4B4,0xBE384454
+data4 0x4AA72766,0xBE694741,0xD42518F8,0xBE5D32B6
+data4 0x3A319149,0x3E68D96D,0x62415F36,0xBE68F4DA
+data4 0xC9C86A3B,0xBE6DDA2F,0xF49228FE,0x3E6B2E50
+data4 0x1188B886,0xBE49C0C2,0x1A4C2F1F,0x3E64BFC2
+data4 0x2CB98B54,0xBE6A2FBB,0x9A55D329,0x3E5DC5DE
+data4 0x39A7AACE,0x3E696490,0x5C66DBA5,0x3E54728B
+data4 0xBA1C7D7D,0xBE62B0DB,0x09F1AF5F,0x3E576E04
+data4 0x1A0DD6A1,0x3E612500,0x795FBDEF,0xBE66A419
+data4 0xE1BD41FC,0xBE5CDE8C,0xEA54964F,0xBE621376
+data4 0x476E76EE,0x3E6370BE,0x3427EB92,0x3E390D1A 
+data4 0x2BF82BF8,0x3E1336DE,0xD0F7BD9E,0xBE5FF1CB 
+data4 0x0CEB09DD,0xBE60A355,0x0980F30D,0xBE5CA37E 
+data4 0x4C082D25,0xBE5C541B,0x3B467D29,0xBE5BBECA 
+data4 0xB9D946C5,0xBE400D8A,0x07ED374A,0xBE5E2A08 
+data4 0x365C8B0A,0xBE66CB28,0xD3403BCA,0x3E3AAD5B 
+data4 0xC7EA21E0,0x3E526055,0xE72880D6,0xBE442C75 
+data4 0x85222A43,0x3E58B2BB,0x522C42BF,0xBE5AAB79 
+data4 0x469DC2BC,0xBE605CB4,0xA48C40DC,0xBE589FA7 
+data4 0x1AA42614,0xBE51C214,0xC37293F4,0xBE48D087 
+data4 0xA2D673E0,0x3E367A1C,0x114F7A38,0xBE51BEBB 
+data4 0x661A4B48,0xBE6348E5,0x1D3B9962,0xBDF52643  
+data4 0x35A78A53,0x3E3A3B5E,0x1CECD788,0xBE46C46C 
+data4 0x7857D689,0xBE60B7EC,0xD14F1AD7,0xBE594D3D 
+data4 0x4C9A8F60,0xBE4F9C30,0x02DFF9D2,0xBE521873 
+data4 0x55E6D68F,0xBE5E4C88,0x667F3DC4,0xBE62140F 
+data4 0x3BF88747,0xBE36961B,0xC96EC6AA,0x3E602861 
+data4 0xD57FD718,0xBE3B5151,0xFC4A627B,0x3E561CD0 
+data4 0xCA913FEA,0xBE3A5217,0x9A5D193A,0x3E40A3CC 
+data4 0x10A9C312,0xBE5AB713,0xC5F57719,0x3E4FDADB 
+data4 0xDBDF59D5,0x3E361428,0x61B4180D,0x3E5DB5DB 
+data4 0x7408D856,0xBE42AD5F,0x31B2B707,0x3E2A3148 
+ASM_SIZE_DIRECTIVE(Constants_exp_64_W1)
+
+.align 64 
+Constants_exp_64_W2:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_W2,@object)
+data4 0x00000000,0x00000000,0x37A3D7A2,0xBE641F25 
+data4 0xAD028C40,0xBE68DD57,0xF212B1B6,0xBE5C77D8 
+data4 0x1BA5B070,0x3E57878F,0x2ECAE6FE,0xBE55A36A 
+data4 0x569DFA3B,0xBE620608,0xA6D300A3,0xBE53B50E 
+data4 0x223F8F2C,0x3E5B5EF2,0xD6DE0DF4,0xBE56A0D9 
+data4 0xEAE28F51,0xBE64EEF3,0x367EA80B,0xBE5E5AE2 
+data4 0x5FCBC02D,0x3E47CB1A,0x9BDAFEB7,0xBE656BA0 
+data4 0x805AFEE7,0x3E6E70C6,0xA3415EBA,0xBE6E0509 
+data4 0x49BFF529,0xBE56856B,0x00508651,0x3E66DD33 
+data4 0xC114BC13,0x3E51165F,0xC453290F,0x3E53333D 
+data4 0x05539FDA,0x3E6A072B,0x7C0A7696,0xBE47CD87 
+data4 0xEB05C6D9,0xBE668BF4,0x6AE86C93,0xBE67C3E3 
+data4 0xD0B3E84B,0xBE533904,0x556B53CE,0x3E63E8D9 
+data4 0x63A98DC8,0x3E212C89,0x032A7A22,0xBE33138F 
+data4 0xBC584008,0x3E530FA9,0xCCB93C97,0xBE6ADF82 
+data4 0x8370EA39,0x3E5F9113,0xFB6A05D8,0x3E5443A4 
+data4 0x181FEE7A,0x3E63DACD,0xF0F67DEC,0xBE62B29D 
+data4 0x3DDE6307,0x3E65C483,0xD40A24C1,0x3E5BF030  
+data4 0x14E437BE,0x3E658B8F,0xED98B6C7,0xBE631C29 
+data4 0x04CF7C71,0x3E6335D2,0xE954A79D,0x3E529EED 
+data4 0xF64A2FB8,0x3E5D9257,0x854ED06C,0xBE6BED1B 
+data4 0xD71405CB,0x3E5096F6,0xACB9FDF5,0xBE3D4893 
+data4 0x01B68349,0xBDFEB158,0xC6A463B9,0x3E628D35 
+data4 0xADE45917,0xBE559725,0x042FC476,0xBE68C29C 
+data4 0x01E511FA,0xBE67593B,0x398801ED,0xBE4A4313 
+data4 0xDA7C3300,0x3E699571,0x08062A9E,0x3E5349BE 
+data4 0x755BB28E,0x3E5229C4,0x77A1F80D,0x3E67E426 
+data4 0x6B69C352,0xBE52B33F,0x084DA57F,0xBE6B3550 
+data4 0xD1D09A20,0xBE6DB03F,0x2161B2C1,0xBE60CBC4 
+data4 0x78A2B771,0x3E56ED9C,0x9D0FA795,0xBE508E31 
+data4 0xFD1A54E9,0xBE59482A,0xB07FD23E,0xBE2A17CE 
+data4 0x17365712,0x3E68BF5C,0xB3785569,0x3E3956F9
+ASM_SIZE_DIRECTIVE(Constants_exp_64_W2)
 
 .section .text
-GLOBAL_IEEE754_ENTRY(expm1f)
+.proc expm1f#
+.global expm1f#
+.align 64 
 
-{ .mlx
-      getf.exp        rSignexp_x = f8      // Must recompute if x unorm
-      movl            r64DivLn2 = 0x40571547652B82FE // 64/ln(2)
-}
-{ .mlx
-      addl            rTblAddr = @ltoff(_expf_table),gp
-      movl            rRightShifter = 0x43E8000000000000 // DP Right Shifter
+expm1f: 
+#ifdef _LIBC
+.global __expm1f#
+__expm1f:
+#endif
+
+
+{ .mii
+      alloc r32 = ar.pfs,0,30,4,0
+(p0)  add r33 = 1, r0  
+(p0)  cmp.eq.unc  p7, p0 =  r0, r0 
 }
 ;;
 
+//
+//    Set p7 true for expm1
+//    Set Flag = r33 = 1 for expm1
+//    These are really no longer necesary, but are a remnant
+//       when this file had multiple entry points.
+//       They should be carefully removed
+
+
 { .mfi
-      // point to the beginning of the table
-      ld8             rTblAddr = [rTblAddr]
-      fclass.m        p14, p0 = f8 , 0x22  // test for -INF
-      mov             rExp_mask = 0x1ffff   // Exponent mask
+(p0)  add r32 = 0,r0  
+(p0)  fnorm.s1 f9 = f8 
+	  nop.i 0
 }
+
 { .mfi
-      nop.m           0
-      fnorm.s1        fNormX = f8 // normalized x
-      nop.i           0
+	  nop.m 0
+//
+//    Set p7 false for exp
+//    Set Flag = r33 = 0 for exp
+//    
+(p0)  fclass.m.unc p6, p8 =  f8, 0x1E7 
+	  nop.i 0 ;;
 }
-;;
 
 { .mfi
-      setf.d          f64DivLn2 = r64DivLn2 // load 64/ln(2) to FP reg
-      fclass.m        p9, p0 = f8 , 0x0b    // test for x unorm
-      mov             rExp_bias = 0xffff    // Exponent bias
+	nop.m 999
+(p0)  fclass.nm.unc p9, p0 =  f8, 0x1FF 
+	 nop.i 0 
+}
+
+{ .mfi
+	nop.m 999
+(p0)  mov f36 = f1 
+	nop.i 999 ;;
+}
+
+//     
+//    Identify NatVals, NaNs, Infs, and Zeros. 
+//    Identify EM unsupporteds. 
+//    Save special input registers 
+//
+//    Create FR_X_cor      = 0.0 
+//           GR_Flag       = 0 
+//           GR_Expo_Range = 0 (r32) for single precision 
+//           FR_Scale      = 1.0
+//
+
+{ .mfb
+	nop.m 999
+(p0)  mov f32 = f0 
+(p6)  br.cond.spnt EXPF_64_SPECIAL ;; 
+}
+
+{ .mib
+	nop.m 999
+	nop.i 999
+(p9)  br.cond.spnt EXPF_64_UNSUPPORTED ;; 
+}
+
+//     
+//    Branch out for special input values 
+//     
+
+{ .mfi
+(p0)  cmp.ne.unc p12, p13 = 0x01, r33
+(p0)  fcmp.lt.unc.s0 p9,p0 =  f8, f0 
+(p0)  cmp.eq.unc  p15, p0 =  r0, r0 
 }
+
+//     
+//    Raise possible denormal operand exception 
+//    Normalize x 
+//     
+//    This function computes expf( x  + x_cor) 
+//    Input  FR 1: FR_X            
+//    Input  FR 2: FR_X_cor  
+//    Input  GR 1: GR_Flag  
+//    Input  GR 2: GR_Expo_Range  
+//    Output FR 3: FR_Y_hi  
+//    Output FR 4: FR_Y_lo  
+//    Output FR 5: FR_Scale  
+//    Output PR 1: PR_Safe  
+
+//
+//    Prepare to load constants
+//    Set Safe = True
+//
+
+{ .mmi
+(p0)  addl r34 = @ltoff(Constants_exp_64_Arg#),gp  
+(p0)  addl r40 = @ltoff(Constants_exp_64_W1#),gp 
+(p0)  addl r41 = @ltoff(Constants_exp_64_W2#),gp  
+};;
+
+{ .mmi
+      ld8 r34 = [r34]
+      ld8 r40 = [r40]
+(p0)  addl           r50   = @ltoff(Constants_exp_64_T1#),  gp
+}
+;;
+{ .mmi
+      ld8 r41  = [r41]
+(p0)  ldfe f37 = [r34],16
+(p0)  addl           r51   = @ltoff(Constants_exp_64_T2#),  gp
+}
+;;
+//
+//    N = fcvt.fx(float_N)
+//    Set p14 if -6 > expo_X 
+//
+//
+//    Bias = 0x0FFFF
+//    expo_X = expo_X and Mask  
+//
+
+{ .mmi
+      ld8  r50 = [r50]
+(p0)  ldfe f40 = [r34],16 
+      nop.i 999
+}
+;;
+
+{ .mlx
+      nop.m 999
+(p0)  movl r58 = 0x0FFFF 
+};;
+
+//
+//    Load W2_ptr
+//    Branch to SMALL is expo_X < -6
+//
+//
+//    float_N = X * L_Inv
+//    expo_X = exponent of X
+//    Mask = 0x1FFFF
+//
+
+{ .mmi
+      ld8  r51 = [r51]
+(p0)  ldfe f41 = [r34],16 
+//
+//    float_N = X * L_Inv
+//    expo_X = exponent of X
+//    Mask = 0x1FFFF
+//
+      nop.i 0
+};;
+
 { .mlx
-      // load Right Shifter to FP reg
-      setf.d          fRightShifter = rRightShifter
-      movl            rLn2Div64 = 0x3F862E42FEFA39EF // DP ln(2)/64 in GR
+(p0)  addl r34   = @ltoff(Constants_exp_64_Exponents#),  gp
+(p0)  movl r39 = 0x1FFFF  
 }
 ;;
 
+{ .mmi
+      ld8 r34 = [r34]
+(p0)  getf.exp r37 = f9 
+      nop.i 999
+}
+;;
+
+{ .mii
+      nop.m 999
+      nop.i 999 
+(p0)  and  r37 = r37, r39 ;;  
+}
+
+{ .mmi
+(p0)  sub r37 = r37, r58 ;;  
+(p0)  cmp.gt.unc  p14, p0 =  -6, r37 
+(p0)  cmp.lt.unc  p10, p0 =  14, r37 ;; 
+}
+
 { .mfi
-      ldfpd           fA8, fA7 = [rTblAddr], 16
-      fcmp.eq.s1      p13, p0 = f0, f8      // test for x = 0.0
-      mov             rExp_half = 0xfffe
+	nop.m 999
+//
+//    Load L_inv 
+//    Set p12 true for Flag = 0 (exp)
+//    Set p13 true for Flag = 1 (expm1)
+//
+(p0)  fmpy.s1 f38 = f9, f37 
+	nop.i 999 ;;
 }
+
 { .mfb
-      setf.d          fLn2Div64 = rLn2Div64 // load ln(2)/64 to FP reg
-      nop.f           0
-(p9)  br.cond.spnt    EXPM1_UNORM // Branch if x unorm
+	nop.m 999
+//
+//    Load L_hi
+//    expo_X = expo_X - Bias
+//    get W1_ptr      
+//
+(p0)  fcvt.fx.s1 f39 = f38
+(p14) br.cond.spnt EXPF_SMALL ;; 
 }
-;;
 
-EXPM1_COMMON:
-{ .mfb
-      ldfpd           fA6, fA5 = [rTblAddr], 16
-(p14) fms.s.s0        f8 = f0, f0, f1       // result if x = -inf
-(p14) br.ret.spnt     b0                    // exit here if x = -inf
+{ .mib
+	nop.m 999
+	nop.i 999
+(p10) br.cond.spnt EXPF_HUGE ;; 
+}
+
+{ .mmi
+(p0)  shladd r34 = r32,4,r34 
+(p0)  addl r35 = @ltoff(Constants_exp_64_A#),gp  
+      nop.i 999
 }
 ;;
 
-{ .mfb
-      ldfpd           fA4, fA3 = [rTblAddr], 16
-      fclass.m        p15, p0 = f8 , 0x1e1  // test for NaT,NaN,+Inf
-(p13) br.ret.spnt     b0                    // exit here if x =0.0, result is x
+{ .mmi
+      ld8 r35 = [r35]
+      nop.m 999
+      nop.i 999
 }
 ;;
 
+//
+//    Load T_1,T_2
+//
+
+{ .mmb
+(p0)  ldfe f51 = [r35],16 
+(p0)  ld8 r45 = [r34],8
+	nop.b 999 ;;
+}
+//    
+//    Set Safe = True  if k >= big_expo_neg  
+//    Set Safe = False if k < big_expo_neg  
+//    
+
+{ .mmb
+(p0)  ldfe f49 = [r35],16 
+(p0)  ld8 r48 = [r34],0
+	nop.b 999 ;;
+}
+
 { .mfi
-      // overflow thresholds
-      ldfps           fMIN_SGL_OFLOW_ARG, fMAX_SGL_NORM_ARG = [rTblAddr], 8
-      fma.s1          fXsq = fNormX, fNormX, f0      // x^2 for small path
-      and             rExp_x = rExp_mask, rSignexp_x // Biased exponent of x
+	nop.m 999
+//
+//    Branch to HUGE is expo_X > 14 
+//
+(p0)  fcvt.xf f38 = f39 
+	nop.i 999 ;;
 }
-{ .mlx
-      nop.m           0
-      movl            rM1_lim = 0xc1c00000  // Minus -1 limit (-24.0), SP
+
+{ .mfi
+(p0)  getf.sig r52 = f39 
+	nop.f 999
+	nop.i 999 ;;
+}
+
+{ .mii
+	nop.m 999
+(p0)  extr.u r43 = r52, 6, 6 ;;  
+//
+//    r = r - float_N * L_lo
+//    K = extr(N_fix,12,52)
+//
+(p0)  shladd r40 = r43,3,r40 ;; 
 }
-;;
 
 { .mfi
-      setf.exp        fA2 = rExp_half
-      // x*(64/ln(2)) + Right Shifter
-      fma.s1          fNint = fNormX, f64DivLn2, fRightShifter
-      sub             rExp_x = rExp_x, rExp_bias     // True exponent of x
+(p0)  shladd r50 = r43,2,r50 
+(p0)  fnma.s1 f42 = f40, f38, f9 
+//
+//    float_N = float(N)
+//    N_fix = signficand N 
+//
+(p0)  extr.u r42 = r52, 0, 6  
 }
-{ .mfb
-      nop.m           0
-(p15) fma.s.s0        f8 = f8, f1, f0       // result if x = NaT,NaN,+Inf
-(p15) br.ret.spnt     b0                    // exit here if x = NaT,NaN,+Inf
+
+{ .mmi
+(p0)  ldfd  f43 = [r40],0 ;; 
+(p0)  shladd r41 = r42,3,r41 
+(p0)  shladd r51 = r42,2,r51 
+}
+//
+//    W_1_p1 = 1 + W_1
+//
+
+{ .mmi
+(p0)  ldfs  f44 = [r50],0 ;; 
+(p0)  ldfd  f45 = [r41],0 
+//
+//    M_2 = extr(N_fix,0,6)
+//    M_1 = extr(N_fix,6,6)
+//    r = X - float_N * L_hi
+//
+(p0)  extr r44 = r52, 12, 52  
+}
+
+{ .mmi
+(p0)  ldfs  f46 = [r51],0 ;; 
+(p0)  sub r46 = r58, r44  
+(p0)  cmp.gt.unc  p8, p15 =  r44, r45 
+}
+//    
+//    W = W_1 + W_1_p1*W_2 
+//    Load  A_2 
+//    Bias_m_K = Bias - K
+//
+
+{ .mii
+(p0)  ldfe f40 = [r35],16 
+//
+//    load A_1
+//    poly = A_2 + r*A_3 
+//    rsq = r * r  
+//    neg_2_mK = exponent of Bias_m_k
+//
+(p0)  add r47 = r58, r44 ;;  
+//    
+//    Set Safe = True  if k <= big_expo_pos  
+//    Set Safe = False  if k >  big_expo_pos  
+//    Load A_3
+//    
+(p15) cmp.lt p8,p15 = r44,r48 ;;
+}
+
+{ .mmf
+(p0)  setf.exp f61 = r46 
+//    
+//    Bias_p + K = Bias + K
+//    T = T_1 * T_2
+//    
+(p0)  setf.exp f36 = r47 
+(p0)  fnma.s1 f42 = f41, f38, f42 ;; 
 }
-;;
 
 { .mfi
-      setf.s          fMAX_SGL_MINUS_1_ARG = rM1_lim // -1 threshold, -24.0
-      nop.f           0
-      cmp.gt          p7, p8 = -2, rExp_x      // Test |x| < 2^(-2)
+	nop.m 999
+//
+//    Load W_1,W_2
+//    Load big_exp_pos, load big_exp_neg
+//
+(p0)  fadd.s1 f47 = f43, f1 
+	nop.i 999 ;;
 }
-;;
 
 { .mfi
-(p7)  cmp.gt.unc      p6, p7 = -40, rExp_x     // Test |x| < 2^(-40)
-      fma.s1          fA87 = fA8, fNormX, fA7  // Small path, A8*x+A7
-      nop.i           0
+	nop.m 999
+(p0)  fma.s1 f52 = f42, f51, f49 
+	nop.i 999
 }
+
 { .mfi
-      nop.m           0
-      fma.s1          fA65 = fA6, fNormX, fA5  // Small path, A6*x+A5
-      nop.i           0
+	nop.m 999
+(p0)  fmpy.s1 f48 = f42, f42 
+	nop.i 999 ;;
 }
-;;
 
-{ .mfb
-      nop.m           0
-(p6)  fma.s.s0        f8 = f8, f8, f8          // If x < 2^-40, result=x+x*x
-(p6)  br.ret.spnt     b0                       // Exit if x < 2^-40
+{ .mfi
+	nop.m 999
+(p0)  fmpy.s1 f53 = f44, f46 
+	nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m           0
-      // check for overflow
-      fcmp.gt.s1      p15, p14 = fNormX, fMIN_SGL_OFLOW_ARG
-      nop.i           0
+	nop.m 999
+(p0)  fma.s1 f54 = f45, f47, f43 
+	nop.i 999
 }
+
 { .mfi
-      nop.m           0
-      fms.s1          fN = fNint, f1, fRightShifter // n in FP register
-      nop.i           0
+	nop.m 999
+(p0)  fneg f61 =  f61 
+	nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m           0
-(p7)  fma.s1          fA43 = fA4, fNormX, fA3   // Small path, A4*x+A3
-      nop.i           0
+	nop.m 999
+(p0)  fma.s1 f52 = f42, f52, f40 
+	nop.i 999 ;;
 }
-;;
 
 { .mfi
-      getf.sig        rNJ = fNint               // bits of n, j
-(p7)  fma.s1          fA8765 = fA87, fXsq, fA65 // Small path, A87*xsq+A65
-      nop.i           0
+	nop.m 999
+(p0)  fadd.s1 f55 = f54, f1 
+	nop.i 999
+}
+
+{ .mfi
+	nop.m 999
+//
+//    W + Wp1 * poly     
+// 
+(p0)  mov f34 = f53 
+	nop.i 999 ;;
 }
+
+{ .mfi
+	nop.m 999
+//
+//    A_1 + r * poly 
+//    Scale = setf_expf(Bias_p_k) 
+//
+(p0)  fma.s1 f52 = f48, f52, f42 
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+//
+//    poly = r + rsq(A_1 + r*poly) 
+//    Wp1 = 1 + W
+//    neg_2_mK = -neg_2_mK
+//
+(p0)  fma.s1 f35 = f55, f52, f54
+	nop.i 999 ;;
+}
+
 { .mfb
-      nop.m           0
-(p7)  fma.s1          fX3 = fXsq, fNormX, f0    // Small path, x^3
-      // branch out if overflow
-(p15) br.cond.spnt    EXPM1_CERTAIN_OVERFLOW
+	nop.m 999
+(p0)  fmpy.s1 f35 = f35, f53 
+//   
+//    Y_hi = T
+//    Y_lo = T * (W + Wp1*poly)
+//
+(p12) br.cond.sptk EXPF_MAIN ;; 
 }
-;;
+//
+//    Branch if expf(x)  
+//    Continue for expf(x-1)
+//
+
+{ .mii
+(p0)  cmp.lt.unc  p12, p13 =  10, r44 
+	nop.i 999 ;;
+//
+//    Set p12 if 10 < K, Else p13 
+//
+(p13) cmp.gt.unc  p13, p14 =  -10, r44 ;; 
+}
+//
+//    K > 10:  Y_lo = Y_lo + neg_2_mK
+//    K <=10:  Set p13 if -10 > K, Else set p14 
+//
 
 { .mfi
-      addl            rN = 0xffff-63, rNJ    // biased and shifted n
-      fnma.s1         fR = fLn2Div64, fN, fNormX // R = x - N*ln(2)/64
-      extr.u          rJ = rNJ , 0 , 6       // bits of j
+(p13) cmp.eq  p15, p0 =  r0, r0 
+(p14) fadd.s1 f34 = f61, f34 
+	nop.i 999 ;;
 }
-;;
 
 { .mfi
-      shladd          rJ = rJ, 3, rTblAddr   // address in the 2^(j/64) table
-      // check for certain -1
-      fcmp.le.s1      p13, p0 = fNormX, fMAX_SGL_MINUS_1_ARG
-      shr             rN = rN, 6             // biased n
+	nop.m 999
+(p12) fadd.s1 f35 = f35, f61 
+	nop.i 999 ;;
 }
+
 { .mfi
-      nop.m           0
-(p7)  fma.s1          fA432 = fA43, fNormX, fA2 // Small path, A43*x+A2
-      nop.i           0
+	nop.m 999
+(p13) fadd.s1 f35 = f35, f34 
+	nop.i 999
+}
+
+{ .mfb
+	nop.m 999
+//
+//    K <= 10 and K < -10, Set Safe = True
+//    K <= 10 and K < 10,   Y_lo = Y_hi + Y_lo 
+//    K <= 10 and K > =-10, Y_hi = Y_hi + neg_2_mk 
+// 
+(p13) mov f34 = f61 
+(p0)  br.cond.sptk EXPF_MAIN ;; 
+}
+EXPF_SMALL: 
+{ .mmi
+(p12)  addl           r35   = @ltoff(Constants_exp_64_P#), gp
+(p0)   addl           r34   = @ltoff(Constants_exp_64_Exponents#), gp
+      nop.i 999
 }
 ;;
 
-{ .mfi
-      ld8             rJ = [rJ]
-      nop.f           0
-      shl             rN = rN , 52           // 2^n bits in DP format
+{ .mmi
+(p12) ld8 r35 = [r35]
+      ld8 r34 = [r34]
+      nop.i 999
 }
 ;;
 
+
 { .mmi
-      or              rN = rN, rJ        // bits of 2^n * 2^(j/64) in DP format
-(p13) mov             rTmp = 1           // Make small value for -1 path
-      nop.i           0
+(p13)  addl           r35   = @ltoff(Constants_exp_64_Q#), gp
+       nop.m 999
+       nop.i 999
 }
 ;;
 
+
+//
+//    Return
+//    K <= 10 and K < 10,   Y_hi = neg_2_mk
+//
+//    /*******************************************************/
+//    /*********** Branch EXP_SMALL  *************************/
+//    /*******************************************************/
+
 { .mfi
-      setf.d          fT = rN            // 2^n
-      // check for possible overflow (only happens if input higher precision)
-(p14) fcmp.gt.s1      p14, p0 = fNormX, fMAX_SGL_NORM_ARG
-      nop.i           0
+(p13) ld8 r35 = [r35]
+(p0)  mov f42 = f9 
+(p0)  add r34 = 0x48,r34  
 }
+;;
+
+//
+//    Flag = 0
+//    r4 = rsq * rsq
+//
+
 { .mfi
-      nop.m           0
-(p7)  fma.s1          fA8765432 = fA8765, fX3, fA432 // A8765*x^3+A432
-      nop.i           0
+(p0)  ld8 r49 =[r34],0
+	nop.f 999
+	nop.i 999 ;;
+}
+
+{ .mii
+	nop.m 999
+	nop.i 999 ;;
+//
+//    Flag = 1
+//
+(p0)  cmp.lt.unc  p14, p0 =  r37, r49 ;; 
 }
-;;
 
 { .mfi
-(p13) setf.exp        fTmp = rTmp        // Make small value for -1 path
-      fma.s1          fP = fA3, fR, fA2  // A3*R + A2
-      nop.i           0
+	nop.m 999
+//
+//    r = X
+//
+(p0)  fmpy.s1 f48 = f42, f42 
+	nop.i 999 ;;
 }
+
 { .mfb
-      nop.m           0
-      fma.s1          fRSqr = fR, fR, f0 // R^2
-(p13) br.cond.spnt    EXPM1_CERTAIN_MINUS_ONE // Branch if x < -24.0
+	nop.m 999
+//
+//    rsq = r * r
+//
+(p0)  fmpy.s1 f50 = f48, f48 
+//
+//    Is input very small?
+//
+(p14) br.cond.spnt EXPF_VERY_SMALL ;; 
 }
-;;
+//
+//    Flag_not1: Y_hi = 1.0
+//    Flag is 1: r6 = rsq * r4
+//
 
-{ .mfb
-      nop.m           0
-(p7)  fma.s.s0        f8 = fA8765432, fXsq, fNormX // Small path, 
-                                         // result=xsq*A8765432+x
-(p7)  br.ret.spnt     b0                 // Exit if 2^-40 <= |x| < 2^-2
+{ .mfi
+(p12) ldfe f52 = [r35],16 
+(p12) mov f34 = f1 
+(p0)  add r53 = 0x1,r0 ;;  
 }
-;;
 
 { .mfi
-      nop.m           0
-      fma.s1          fP = fP, fRSqr, fR // P = (A3*R + A2)*Rsqr + R
-      nop.i           0
+(p13) ldfe f51 = [r35],16 
+//
+//    Flag_not_1: Y_lo = poly_hi + r4 * poly_lo
+//
+(p13) mov f34 = f9 
+	nop.i 999 ;;
+}
+
+{ .mmf
+(p12) ldfe f53 = [r35],16 
+//
+//    For Flag_not_1, Y_hi = X
+//    Scale = 1
+//    Create 0x000...01
+//
+(p0)  setf.sig f37 = r53 
+(p0)  mov f36 = f1 ;; 
+}
+
+{ .mmi
+(p13) ldfe f52 = [r35],16 ;; 
+(p12) ldfe f54 = [r35],16 
+	nop.i 999 ;;
+}
+
+{ .mfi
+(p13) ldfe f53 = [r35],16 
+(p13) fmpy.s1 f58 = f48, f50 
+	nop.i 999 ;;
+}
+//
+//    Flag_not1: poly_lo = P_5 + r*P_6
+//    Flag_1: poly_lo = Q_6 + r*Q_7
+//
+
+{ .mmi
+(p13) ldfe f54 = [r35],16 ;; 
+(p12) ldfe f55 = [r35],16 
+	nop.i 999 ;;
+}
+
+{ .mmi
+(p12) ldfe f56 = [r35],16 ;; 
+(p13) ldfe f55 = [r35],16 
+	nop.i 999 ;;
+}
+
+{ .mmi
+(p12) ldfe f57 = [r35],0 ;; 
+(p13) ldfe f56 = [r35],16 
+	nop.i 999 ;;
+}
+
+{ .mfi
+(p13) ldfe f57 = [r35],0 
+	nop.f 999
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+//
+//    For  Flag_not_1, load p5,p6,p1,p2
+//    Else load p5,p6,p1,p2
+//
+(p12) fma.s1 f60 = f52, f42, f53 
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+(p13) fma.s1 f60 = f51, f42, f52 
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+(p12) fma.s1 f60 = f60, f42, f54 
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+(p12) fma.s1 f59 = f56, f42, f57 
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+(p13) fma.s1 f60 = f42, f60, f53 
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+(p12) fma.s1 f59 = f59, f48, f42 
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+//
+//    Flag_1: poly_lo = Q_5 + r*(Q_6 + r*Q_7) 
+//    Flag_not1: poly_lo = P_4 + r*(P_5 + r*P_6)
+//    Flag_not1: poly_hi = (P_1 + r*P_2)
+//
+(p13) fmpy.s1 f60 = f60, f58 
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+(p12) fma.s1 f60 = f60, f42, f55 
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+//
+//    Flag_1: poly_lo = r6 *(Q_5 + ....)
+//    Flag_not1: poly_hi =  r + rsq *(P_1 + r*P_2)
+//
+(p12) fma.s1 f35 = f60, f50, f59 
+	nop.i 999
+}
+
+{ .mfi
+	nop.m 999
+(p13) fma.s1 f59 = f54, f42, f55 
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+//
+//    Flag_not1: Y_lo = rsq* poly_hi + poly_lo 
+//    Flag_1: poly_lo = rsq* poly_hi + poly_lo 
+//
+(p13) fma.s1 f59 = f59, f42, f56 
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+//
+//    Flag_not_1: (P_1 + r*P_2) 
+//
+(p13) fma.s1 f59 = f59, f42, f57 
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+//
+//    Flag_not_1: poly_hi = r + rsq * (P_1 + r*P_2) 
+//
+(p13) fma.s1 f35 = f59, f48, f60 
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+//
+//    Create 0.000...01
+//
+(p0)  for f37 = f35, f37 
+	nop.i 999 ;;
 }
-;;
 
 { .mfb
-      nop.m           0
-      fms.s1          fTm1 = fT, f1, f1  // T - 1.0
-(p14) br.cond.spnt    EXPM1_POSSIBLE_OVERFLOW
+	nop.m 999
+//
+//    Set lsb of Y_lo to 1
+//
+(p0)  fmerge.se f35 = f35,f37 
+(p0)  br.cond.sptk EXPF_MAIN ;; 
+}
+EXPF_VERY_SMALL: 
+
+{ .mmi
+      nop.m 999
+(p13) addl r34 = @ltoff(Constants_exp_64_Exponents#),gp
+      nop.i 999;;
+}
+
+{ .mfi
+(p13) ld8  r34 = [r34];
+(p12) mov f35 = f9
+      nop.i 999 ;;
 }
-;;
 
 { .mfb
-      nop.m           0
-      fma.s.s0        f8 = fP, fT, fTm1
-      br.ret.sptk     b0                 // Result for main path
-                                         // minus_one_limit < x < -2^-2
-                                         // and +2^-2 <= x < overflow_limit
+	nop.m 999
+(p12) mov f34 = f1 
+(p12) br.cond.sptk EXPF_MAIN ;; 
+}
+
+{ .mlx
+(p13) add  r34 = 8,r34 
+(p13) movl r39 = 0x0FFFE ;; 
+}
+//
+//    Load big_exp_neg 
+//    Create 1/2's exponent
+//
+
+{ .mii
+(p13) setf.exp f56 = r39 
+(p13) shladd r34 = r32,4,r34 ;;  
+	nop.i 999
+}
+//
+//    Negative exponents are stored after positive
+//
+
+{ .mfi
+(p13) ld8 r45 = [r34],0
+//
+//    Y_hi = x
+//    Scale = 1
+//
+(p13) fmpy.s1 f35 = f9, f9 
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+//
+//    Reset Safe if necessary 
+//    Create 1/2
+//
+(p13) mov f34 = f9 
+	nop.i 999 ;;
+}
+
+{ .mfi
+(p13) cmp.lt.unc  p0, p15 =  r37, r45 
+(p13) mov f36 = f1 
+	nop.i 999 ;;
 }
-;;
 
-// Here if x unorm
-EXPM1_UNORM:
 { .mfb
-      getf.exp        rSignexp_x = fNormX // Must recompute if x unorm
-      fcmp.eq.s0      p6, p0 = f8, f0     // Set D flag
-      br.cond.sptk    EXPM1_COMMON
+	nop.m 999
+//
+//    Y_lo = x * x
+//
+(p13) fmpy.s1 f35 = f35, f56 
+//
+//    Y_lo = x*x/2 
+//
+(p13) br.cond.sptk EXPF_MAIN ;; 
+}
+EXPF_HUGE: 
+
+{ .mfi
+	nop.m 999
+(p0)  fcmp.gt.unc.s1 p14, p0 =  f9, f0 
+	nop.i 999
+}
+
+{ .mlx
+	nop.m 999
+(p0)  movl r39 = 0x15DC0 ;; 
+}
+
+{ .mfi
+(p14) setf.exp f34 = r39 
+(p14) mov f35 = f1 
+(p14) cmp.eq  p0, p15 =  r0, r0 ;; 
 }
-;;
 
-// here if result will be -1 and inexact, x <= -24.0
-EXPM1_CERTAIN_MINUS_ONE:
 { .mfb
-      nop.m           0
-      fms.s.s0        f8 = fTmp, fTmp, f1  // Result -1, and Inexact set
-      br.ret.sptk     b0
+	nop.m 999
+(p14) mov f36 = f34 
+//
+//    If x > 0, Set Safe = False
+//    If x > 0, Y_hi = 2**(24,000)
+//    If x > 0, Y_lo = 1.0
+//    If x > 0, Scale = 2**(24,000)
+//
+(p14) br.cond.sptk EXPF_MAIN ;; 
 }
-;;
 
-EXPM1_POSSIBLE_OVERFLOW:
+{ .mlx
+	nop.m 999
+(p12) movl r39 = 0xA240 
+}
+
+{ .mlx
+	nop.m 999
+(p12) movl r38 = 0xA1DC ;; 
+}
 
-// Here if fMAX_SGL_NORM_ARG < x < fMIN_SGL_OFLOW_ARG
-// This cannot happen if input is a single, only if input higher precision.
-// Overflow is a possibility, not a certainty.
+{ .mmb
+(p13) cmp.eq  p15, p14 =  r0, r0 
+(p12) setf.exp f34 = r39 
+	nop.b 999 ;;
+}
 
-// Recompute result using status field 2 with user's rounding mode,
-// and wre set.  If result is larger than largest single, then we have
-// overflow
+{ .mlx
+(p12) setf.exp f35 = r38 
+(p13) movl r39 = 0xFF9C 
+}
 
 { .mfi
-      mov             rGt_ln  = 0x1007f // Exponent for largest sgl + 1 ulp
-      fsetc.s2        0x7F,0x42         // Get user's round mode, set wre
-      nop.i           0
+	nop.m 999
+(p13) fsub.s1 f34 = f0, f1
+	nop.i 999 ;;
 }
-;;
 
 { .mfi
-      setf.exp        fGt_pln = rGt_ln  // Create largest single + 1 ulp
-      fma.s.s2        fWre_urm_f8 = fP, fT, fTm1  // Result with wre set
-      nop.i           0
+	nop.m 999
+(p12) mov f36 = f34 
+(p12) cmp.eq  p0, p15 =  r0, r0 ;; 
 }
-;;
 
 { .mfi
-      nop.m           0
-      fsetc.s2        0x7F,0x40                   // Turn off wre in sf2
-      nop.i           0
+(p13) setf.exp f35 = r39 
+(p13) mov f36 = f1 
+	nop.i 999 ;;
 }
-;;
+EXPF_MAIN: 
 
 { .mfi
-      nop.m           0
-      fcmp.ge.s1      p6, p0 =  fWre_urm_f8, fGt_pln // Test for overflow
-      nop.i           0
+(p0)  cmp.ne.unc p12, p0 = 0x01, r33
+(p0)  fmpy.s1 f101 = f36, f35 
+	nop.i 999 ;;
 }
-;;
 
 { .mfb
-      nop.m           0
-      nop.f           0
-(p6)  br.cond.spnt    EXPM1_CERTAIN_OVERFLOW // Branch if overflow
+	nop.m 999
+(p0)  fma.s.s0 f99 = f34, f36, f101 
+(p15) br.cond.sptk EXPF_64_RETURN ;;
+}
+
+{ .mfi
+	nop.m 999
+(p0)  fsetc.s3 0x7F,0x01
+	nop.i 999
+}
+
+{ .mlx
+	nop.m 999
+(p0)  movl r50 = 0x0000000001007F ;;
+}
+//    
+//    S0 user supplied status
+//    S2 user supplied status + WRE + TD  (Overflows) 
+//    S3 user supplied status + RZ + TD   (Underflows) 
+//    
+//    
+//    If (Safe) is true, then
+//        Compute result using user supplied status field.
+//        No overflow or underflow here, but perhaps inexact.
+//        Return
+//    Else
+//       Determine if overflow or underflow  was raised.
+//       Fetch +/- overflow threshold for IEEE single, double,
+//       double extended   
+//    
+
+{ .mfi
+(p0)  setf.exp f60 = r50
+(p0)  fma.s.s3 f102 = f34, f36, f101 
+	nop.i 999
+}
+
+{ .mfi
+	nop.m 999
+(p0)  fsetc.s3 0x7F,0x40 
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+//
+//    For Safe, no need to check for over/under. 
+//    For expm1, handle errors like exp. 
+//
+(p0)  fsetc.s2 0x7F,0x42
+	nop.i 999;;
+}
+
+{ .mfi
+	nop.m 999
+(p0)  fma.s.s2 f100 = f34, f36, f101 
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+(p0)  fsetc.s2 0x7F,0x40 
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+(p7)  fclass.m.unc   p12, p0 =  f102, 0x00F
+	nop.i 999
+}
+
+{ .mfi
+	nop.m 999
+(p0)  fclass.m.unc   p11, p0 =  f102, 0x00F
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+(p7)  fcmp.ge.unc.s1 p10, p0 =  f100, f60
+	nop.i 999
+}
+
+{ .mfi
+	nop.m 999
+//    
+//    Create largest double exponent + 1.
+//    Create smallest double exponent - 1.
+//    
+(p0)  fcmp.ge.unc.s1 p8, p0 =  f100, f60
+	nop.i 999 ;;
+}
+//    
+//    fcmp:   resultS2 >= + overflow threshold  -> set (a) if true
+//    fcmp:   resultS2 <= - overflow threshold  -> set (b) if true
+//    fclass: resultS3 is denorm/unorm/0        -> set (d) if true
+//    
+
+{ .mib
+(p10) mov   GR_Parameter_TAG = 43
+	nop.i 999
+(p10) br.cond.sptk __libm_error_region ;;
+}
+
+{ .mib
+(p8)  mov   GR_Parameter_TAG = 16
+	nop.i 999
+(p8)  br.cond.sptk __libm_error_region ;;
+}
+//    
+//    Report that exp overflowed
+//    
+
+{ .mib
+(p12) mov   GR_Parameter_TAG = 44
+	nop.i 999
+(p12) br.cond.sptk __libm_error_region ;;
+}
+
+{ .mib
+(p11) mov   GR_Parameter_TAG = 17
+	nop.i 999
+(p11) br.cond.sptk __libm_error_region ;;
+}
+
+{ .mib
+	nop.m 999
+	nop.i 999
+//    
+//    Report that exp underflowed
+//    
+(p0)  br.cond.sptk EXPF_64_RETURN ;;
+}
+EXPF_64_SPECIAL: 
+
+{ .mfi
+	nop.m 999
+(p0)  fclass.m.unc p6,  p0 =  f8, 0x0c3 
+	nop.i 999
+}
+
+{ .mfi
+	nop.m 999
+(p0)  fclass.m.unc p13, p8 =  f8, 0x007 
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+(p7)  fclass.m.unc p14, p0 =  f8, 0x007 
+	nop.i 999
+}
+
+{ .mfi
+	nop.m 999
+(p0)  fclass.m.unc p12, p9 =  f8, 0x021 
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+(p0)  fclass.m.unc p11, p0 =  f8, 0x022 
+	nop.i 999
+}
+
+{ .mfi
+	nop.m 999
+(p7)  fclass.m.unc p10, p0 =  f8, 0x022 
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+//    
+//    Identify +/- 0, Inf, or -Inf 
+//    Generate the right kind of NaN.
+//    
+(p13) fadd.s.s0 f99 = f0, f1 
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+(p14) mov f99 = f8 
+	nop.i 999 ;;
 }
-;;
 
 { .mfb
-      nop.m           0
-      fma.s.s0        f8 = fP, fT, fTm1
-      br.ret.sptk     b0                     // Exit if really no overflow
+	nop.m 999
+(p6)  fadd.s.s0 f99 = f8, f1 
+//    
+//    expf(+/-0) = 1 
+//    expm1f(+/-0) = +/-0 
+//    No exceptions raised
+//    
+(p6)  br.cond.sptk EXPF_64_RETURN ;;
 }
-;;
 
-// here if overflow
-EXPM1_CERTAIN_OVERFLOW:
-{ .mmi
-      addl            rTmp = 0x1FFFE, r0;;
-      setf.exp        fTmp = rTmp
-      nop.i 999
+{ .mib
+	nop.m 999
+	nop.i 999
+(p14)  br.cond.sptk EXPF_64_RETURN ;;
 }
-;;
 
 { .mfi
-      alloc           r32 = ar.pfs, 0, 3, 4, 0 // get some registers
-      fmerge.s        FR_X = fNormX,fNormX
-      nop.i           0
+	nop.m 999
+(p11) mov f99 = f0 
+	nop.i 999 ;;
 }
+
 { .mfb
-      mov             GR_Parameter_TAG = 43
-      fma.s.s0        FR_RESULT = fTmp, fTmp, f0 // Set I,O and +INF result
-      br.cond.sptk    __libm_error_region
+	nop.m 999
+(p10) fsub.s.s1 f99 = f0, f1 
+//    
+//    expf(-Inf) = 0 
+//    expm1f(-Inf) = -1 
+//    No exceptions raised.
+//    
+(p10)  br.cond.sptk EXPF_64_RETURN ;;
 }
-;;
 
-GLOBAL_IEEE754_END(expm1f)
+{ .mfb
+	nop.m 999
+(p12) fmpy.s.s1 f99 = f8, f1 
+//    
+//    expf(+Inf) = Inf 
+//    No exceptions raised.
+//    
+(p0)  br.cond.sptk EXPF_64_RETURN ;; 
+}
+EXPF_64_UNSUPPORTED: 
+
+{ .mfb
+      nop.m 999
+(p0)  fmpy.s.s0 f99 = f8, f0 
+      nop.b 0;;
+}
+
+EXPF_64_RETURN:
+{ .mfb
+      nop.m 999
+(p0)  mov   f8     = f99
+(p0)  br.ret.sptk   b0
+}
+.endp expm1f
+ASM_SIZE_DIRECTIVE(expm1f)
+
 
-LOCAL_LIBM_ENTRY(__libm_error_region)
+.proc __libm_error_region
+__libm_error_region:
 .prologue
 { .mfi
-      add   GR_Parameter_Y=-32,sp             // Parameter 2 value
-      nop.f 999
+        add   GR_Parameter_Y=-32,sp             // Parameter 2 value
+		nop.f 0                   
 .save   ar.pfs,GR_SAVE_PFS
-      mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
+        mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
 }
 { .mfi
 .fframe 64
-      add sp=-64,sp                           // Create new stack
-      nop.f 0
-      mov GR_SAVE_GP=gp                       // Save gp
+        add sp=-64,sp                           // Create new stack
+        nop.f 0
+        mov GR_SAVE_GP=gp                       // Save gp
 };;
 { .mmi
-      stfs [GR_Parameter_Y] = FR_Y,16         // Store Parameter 2 on stack
-      add GR_Parameter_X = 16,sp              // Parameter 1 address
+        stfs [GR_Parameter_Y] = FR_Y,16         // Store Parameter 2 on stack
+        add GR_Parameter_X = 16,sp              // Parameter 1 address
 .save   b0, GR_SAVE_B0
-      mov GR_SAVE_B0=b0                       // Save b0
+        mov GR_SAVE_B0=b0                       // Save b0
 };;
 .body
-{ .mfi
-      stfs [GR_Parameter_X] = FR_X            // Store Parameter 1 on stack
-      nop.f 0
-      add   GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+{ .mib
+        stfs [GR_Parameter_X] = FR_X            // Store Parameter 1 on stack
+        add   GR_Parameter_RESULT = 0,GR_Parameter_Y
+        nop.b 0                                 // Parameter 3 address
 }
 { .mib
-      stfs [GR_Parameter_Y] = FR_RESULT       // Store Parameter 3 on stack
-      add   GR_Parameter_Y = -16,GR_Parameter_Y
-      br.call.sptk b0=__libm_error_support#   // Call error handling function
+        stfs [GR_Parameter_Y] = FR_RESULT       // Store Parameter 3 on stack
+        add   GR_Parameter_Y = -16,GR_Parameter_Y
+        br.call.sptk b0=__libm_error_support#   // Call error handling function
 };;
-
 { .mmi
-      add   GR_Parameter_RESULT = 48,sp
-      nop.m 0
-      nop.i 0
+        nop.m 0
+        nop.m 0
+        add   GR_Parameter_RESULT = 48,sp
 };;
-
 { .mmi
-      ldfs  f8 = [GR_Parameter_RESULT]       // Get return result off stack
+        ldfs  f8 = [GR_Parameter_RESULT]       // Get return result off stack
 .restore sp
-      add   sp = 64,sp                       // Restore stack pointer
-      mov   b0 = GR_SAVE_B0                  // Restore return address
+        add   sp = 64,sp                       // Restore stack pointer
+        mov   b0 = GR_SAVE_B0                  // Restore return address
 };;
 { .mib
-      mov   gp = GR_SAVE_GP                  // Restore gp
-      mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
-      br.ret.sptk     b0                     // Return
-};;
+        mov   gp = GR_SAVE_GP                  // Restore gp 
+        mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
+        br.ret.sptk     b0                     // Return
+};; 
 
-LOCAL_LIBM_END(__libm_error_region)
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
 
 
 .type   __libm_error_support#,@function
diff --git a/sysdeps/ia64/fpu/s_expm1l.S b/sysdeps/ia64/fpu/s_expm1l.S
index 069856d244..e53d3c8d7c 100644
--- a/sysdeps/ia64/fpu/s_expm1l.S
+++ b/sysdeps/ia64/fpu/s_expm1l.S
@@ -1,10 +1,10 @@
-.file "expl_m1.s"
+.file "exp_m1l.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
+//
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,22 +35,15 @@
 // 
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at 
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// http://developer.intel.com/opensource.
 //
 // History
 //==============================================================
-// 02/02/00 Initial Version
-// 04/04/00 Unwind support added
-// 08/15/00 Bundle added after call to __libm_error_support to properly
+// 4/04/00  Unwind support added
+// 8/15/00  Bundle added after call to __libm_error_support to properly
 //          set [the previously overwritten] GR_Parameter_RESULT.
-// 07/07/01 Improved speed of all paths
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 02/10/03 Reordered header: .section, .global, .proc, .align;
-//          used data8 for long double table values
-// 03/11/03 Improved accuracy and performance, corrected missing inexact flags
-// 04/17/03 Eliminated misplaced and unused data label
 //
-//********************************************************************* 
+// ********************************************************************* 
 //
 // Function:   Combined expl(x) and expm1l(x), where
 //                        x 
@@ -58,20 +51,20 @@
 //                          x
 //             expm1l(x) = e  - 1  for double-extended precision x values
 //
-//********************************************************************* 
+// ********************************************************************* 
 //
 // Resources Used:
 //
 //    Floating-Point Registers: f8  (Input and Return Value) 
-//                              f9-f15,f32-f77 
+//                              f9,f32-f61, f99-f102 
 //
 //    General Purpose Registers: 
-//      r14-r38
-//      r35-r38 (Used to pass arguments to error handling routine)
+//      r32-r61
+//      r62-r65 (Used to pass arguments to error handling routine)
 //                                     
 //    Predicate Registers:      p6-p15
 //
-//********************************************************************* 
+// ********************************************************************* 
 //
 // IEEE Special Conditions:
 //
@@ -81,37 +74,39 @@
 //    (Error Handling Routine called for overflow and Underflow)
 //    Inexact raised when appropriate by algorithm 
 //
-//    exp(inf) = inf
-//    exp(-inf) = +0
-//    exp(SNaN) = QNaN
-//    exp(QNaN) = QNaN
-//    exp(0) = 1
-//    exp(EM_special Values) = QNaN
-//    exp(inf) = inf
-//    expm1(-inf) = -1 
-//    expm1(SNaN) = QNaN
-//    expm1(QNaN) = QNaN
-//    expm1(0) = 0
-//    expm1(EM_special Values) = QNaN
+//    expl(inf) = inf
+//    expl(-inf) = +0
+//    expl(SNaN) = QNaN
+//    expl(QNaN) = QNaN
+//    expl(0) = 1
+//    expl(EM_special Values) = QNaN
+//    expl(inf) = inf
+//    expm1l(-inf) = -1 
+//    expm1l(SNaN) = QNaN
+//    expm1l(QNaN) = QNaN
+//    expm1l(0) = 0
+//    expm1l(EM_special Values) = QNaN
 //    
-//********************************************************************* 
+// ********************************************************************* 
 //
 // Implementation and Algorithm Notes:
 //
 //  ker_exp_64( in_FR  : X,
+//            in_GR  : Flag,
+//            in_GR  : Expo_Range
 //            out_FR : Y_hi,
 //            out_FR : Y_lo,
 //            out_FR : scale,
 //            out_PR : Safe )
 //
-// On input, X is in register format
-// p6 for exp,
-// p7 for expm1,
+// On input, X is in register format and 
+// Flag  = 0 for exp,
+// Flag  = 1 for expm1,
 //
-// On output, 
+// On output, provided X and X_cor are real numbers, then
 //
-//   scale*(Y_hi + Y_lo)  approximates  exp(X)       if exp
-//   scale*(Y_hi + Y_lo)  approximates  exp(X)-1     if expm1
+//   scale*(Y_hi + Y_lo)  approximates  expl(X)       if Flag is 0
+//   scale*(Y_hi + Y_lo)  approximates  expl(X)-1     if Flag is 1
 //
 // The accuracy is sufficient for a highly accurate 64 sig.
 // bit implementation.  Safe is set if there is no danger of 
@@ -127,36 +122,36 @@
 // The method consists of three cases.
 // 
 // If           |X| < Tiny	use case exp_tiny;
-// else if	|X| < 2^(-m)	use case exp_small; m=12 for exp, m=7 for expm1
+// else if	|X| < 2^(-6)	use case exp_small;
 // else		use case exp_regular;
 //
 // Case exp_tiny:
 //
-//   1 + X     can be used to approximate exp(X) 
-//   X + X^2/2 can be used to approximate exp(X) - 1
+//   1 + X     can be used to approximate expl(X) or expl(X+X_cor);
+//   X + X^2/2 can be used to approximate expl(X) - 1
 //
 // Case exp_small:
 //
-//   Here, exp(X) and exp(X) - 1 can all be 
+//   Here, expl(X), expl(X+X_cor), and expl(X) - 1 can all be 
 //   appproximated by a relatively simple polynomial.
 //
 //   This polynomial resembles the truncated Taylor series
 //
-//	exp(w) = 1 + w + w^2/2! + w^3/3! + ... + w^n/n!
+//	expl(w) = 1 + w + w^2/2! + w^3/3! + ... + w^n/n!
 //
 // Case exp_regular:
 //
 //   Here we use a table lookup method. The basic idea is that in
-//   order to compute exp(X), we accurately decompose X into
+//   order to compute expl(X), we accurately decompose X into
 //
 //   X = N * log(2)/(2^12)  + r,	|r| <= log(2)/2^13.
 //
 //   Hence
 //
-//   exp(X) = 2^( N / 2^12 ) * exp(r).
+//   expl(X) = 2^( N / 2^12 ) * expl(r).
 //
 //   The value 2^( N / 2^12 ) is obtained by simple combinations
-//   of values calculated beforehand and stored in table; exp(r)
+//   of values calculated beforehand and stored in table; expl(r)
 //   is approximated by a short polynomial because |r| is small.
 //
 //   We elaborate this method in 4 steps.
@@ -183,9 +178,13 @@
 //   as a double-precision number; L_lo has 64 significant bits and
 //   stored as a double-extended number.
 //
+//   In the case Flag = 2, we further modify r by
+//
+//   r := r + X_cor.
+//
 //   Step 2: Approximation
 //
-//   exp(r) - 1 is approximated by a short polynomial of the form
+//   expl(r) - 1 is approximated by a short polynomial of the form
 //   
 //   r + A_1 r^2 + A_2 r^3 + A_3 r^4 .
 //
@@ -214,19 +213,19 @@
 //   Define two mathematical values, delta_1 and delta_2, implicitly
 //   such that
 //
-//     T_1 = exp( [M_1 log(2)/2^6]  -  delta_1 ) 
-//     T_2 = exp( [M_2 log(2)/2^12] -  delta_2 )
+//     T_1 = expl( [M_1 log(2)/2^6]  -  delta_1 ) 
+//     T_2 = expl( [M_2 log(2)/2^12] -  delta_2 )
 //
 //   are representable as 24 significant bits. To illustrate the idea,
 //   we show how we define delta_1: 
 //
-//     T_1     := round_to_24_bits( exp( M_1 log(2)/2^6 ) )
+//     T_1     := round_to_24_bits( expl( M_1 log(2)/2^6 ) )
 //     delta_1  = (M_1 log(2)/2^6) - log( T_1 )  
 //
 //   The last equality means mathematical equality. We then tabulate
 //
-//     W_1 := exp(delta_1) - 1
-//     W_2 := exp(delta_2) - 1
+//     W_1 := expl(delta_1) - 1
+//     W_2 := expl(delta_2) - 1
 //
 //   Both in double precision.
 //
@@ -236,13 +235,13 @@
 //     T := T_1 * T_2			...exactly
 //     W := W_1 + (1 + W_1)*W_2	
 //
-//   W approximates exp( delta ) - 1  where delta = delta_1 + delta_2.
+//   W approximates expl( delta ) - 1  where delta = delta_1 + delta_2.
 //   The mathematical product of T and (W+1) is an accurate representation
 //   of 2^(M_1/2^6) * 2^(M_2/2^12).
 //
 //   Step 4. Reconstruction
 //
-//   Finally, we can reconstruct exp(X), exp(X) - 1. 
+//   Finally, we can reconstruct expl(X), expl(X) - 1. 
 //   Because
 //
 //	X = K * log(2) + (M_1*log(2)/2^6  - delta_1) 
@@ -250,18 +249,18 @@
 //		       + delta_1 + delta_2 + r 		...accurately
 //   We have
 //
-//	exp(X) ~=~ 2^K * ( T + T*[exp(delta_1+delta_2+r) - 1] )
-//	       ~=~ 2^K * ( T + T*[exp(delta + r) - 1]         )
-//	       ~=~ 2^K * ( T + T*[(exp(delta)-1)  
-//				 + exp(delta)*(exp(r)-1)]   )
+//	expl(X) ~=~ 2^K * ( T + T*[expl(delta_1+delta_2+r) - 1] )
+//	       ~=~ 2^K * ( T + T*[expl(delta + r) - 1]         )
+//	       ~=~ 2^K * ( T + T*[(expl(delta)-1)  
+//				 + expl(delta)*(expl(r)-1)]   )
 //             ~=~ 2^K * ( T + T*( W + (1+W)*poly(r) ) )
 //             ~=~ 2^K * ( Y_hi  +  Y_lo )
 //
 //   where Y_hi = T  and Y_lo = T*(W + (1+W)*poly(r))
 //
-//   For exp(X)-1, we have
+//   For expl(X)-1, we have
 //
-//	exp(X)-1 ~=~ 2^K * ( Y_hi + Y_lo ) - 1
+//	expl(X)-1 ~=~ 2^K * ( Y_hi + Y_lo ) - 1
 //		 ~=~ 2^K * ( Y_hi + Y_lo - 2^(-K) )
 //
 //   and we combine Y_hi + Y_lo - 2^(-N)  into the form of two 
@@ -279,7 +278,7 @@
 //   different rounding directions and a correct setting of the SAFE 
 //   flag.
 //
-//   If expm1 is 1, then
+//   If Flag is 1, then
 //      SAFE  := False	...possibility of underflow
 //      Scale := 1.0
 //      Y_hi  := X
@@ -297,25 +296,26 @@
 //
 //   Let r = X 
 //
-//   If exp 	...i.e. exp( argument )
+//   If Flag is not 1	...i.e. expl( argument )
 //
 //      rsq := r * r; 
 //      r4  := rsq*rsq
 //      poly_lo := P_3 + r*(P_4 + r*(P_5 + r*P_6))
 //      poly_hi := r + rsq*(P_1 + r*P_2)
 //      Y_lo    := poly_hi + r4 * poly_lo
+//      set lsb(Y_lo) to 1
 //      Y_hi    := 1.0
 //      Scale   := 1.0
 //
-//   Else			...i.e. exp( argument ) - 1
+//   Else			...i.e. expl( argument ) - 1
 //
 //      rsq := r * r
 //      r4  := rsq * rsq
-//      poly_lo := Q_7 + r*(Q_8 + r*Q_9))
-//      poly_med:= Q_3 + r*Q_4 + rsq*(Q_5 + r*Q_6)
-//      poly_med:= poly_med + r4*poly_lo
-//      poly_hi := Q_1 + r*Q_2
-//      Y_lo    := rsq*(poly_hi +  rsq*poly_lo)
+//      r6  := rsq * r4
+//      poly_lo := r6*(Q_5 + r*(Q_6 + r*Q_7))
+//      poly_hi := Q_1 + r*(Q_2 + r*(Q_3 + r*Q_4))
+//      Y_lo    := rsq*poly_hi +  poly_lo
+//      set lsb(Y_lo) to 1
 //      Y_hi    := X
 //      Scale   := 1.0
 //
@@ -325,14 +325,14 @@
 //
 //  The previous description contain enough information except the
 //  computation of poly and the final Y_hi and Y_lo in the case for
-//  exp(X)-1.
+//  expl(X)-1.
 //
 //  The computation of poly for Step 2:
 //
 //   rsq := r*r
 //   poly := r + rsq*(A_1 + r*(A_2 + r*A_3))
 //
-//  For the case exp(X) - 1, we need to incorporate 2^(-K) into
+//  For the case expl(X) - 1, we need to incorporate 2^(-K) into
 //  Y_hi and Y_lo at the end of Step 4.
 //
 //   If K > 10 then
@@ -346,197 +346,72 @@
 //      End If
 //   End If
 //
-//=======================================================
-// General Purpose Registers
-//
-GR_ad_Arg           = r14
-GR_ad_A             = r15
-GR_sig_inv_ln2      = r15
-GR_rshf_2to51       = r16
-GR_ad_PQ            = r16
-GR_ad_Q             = r16
-GR_signexp_x        = r17
-GR_exp_x            = r17
-GR_small_exp        = r18
-GR_rshf             = r18
-GR_exp_mask         = r19
-GR_ad_W1            = r20
-GR_exp_2tom51       = r20
-GR_ad_W2            = r21
-GR_exp_underflow    = r21
-GR_M2               = r22
-GR_huge_exp         = r22
-GR_M1               = r23
-GR_huge_signif      = r23
-GR_K                = r24
-GR_one              = r24
-GR_minus_one        = r24
-GR_exp_bias         = r25
-GR_ad_Limits        = r26
-GR_N_fix            = r26
-GR_exp_2_mk         = r26
-GR_ad_P             = r27
-GR_exp_2_k          = r27
-GR_big_expo_neg     = r28
-GR_very_small_exp   = r29
-GR_exp_half         = r29
-GR_ad_T1            = r30
-GR_ad_T2            = r31
 
-GR_SAVE_PFS         = r32
-GR_SAVE_B0          = r33
-GR_SAVE_GP          = r34
-GR_Parameter_X      = r35
-GR_Parameter_Y      = r36
-GR_Parameter_RESULT = r37
-GR_Parameter_TAG    = r38 
+#include "libm_support.h"
 
-// Floating Point Registers
-//
-FR_norm_x           = f9
-FR_RSHF_2TO51       = f10
-FR_INV_LN2_2TO63    = f11
-FR_W_2TO51_RSH      = f12
-FR_2TOM51           = f13
-FR_RSHF             = f14
-FR_Y_hi             = f34
-FR_Y_lo             = f35
-FR_scale            = f36
-FR_tmp              = f37
-FR_float_N          = f38
-FR_N_signif         = f39
-FR_L_hi             = f40
-FR_L_lo             = f41
-FR_r                = f42
-FR_W1               = f43
-FR_T1               = f44
-FR_W2               = f45
-FR_T2               = f46
-FR_W1_p1            = f47
-FR_rsq              = f48
-FR_A2               = f49
-FR_r4               = f50
-FR_A3               = f51
-FR_poly             = f52
-FR_T                = f53
-FR_W                = f54
-FR_Wp1              = f55
-FR_p21              = f59
-FR_p210             = f59
-FR_p65              = f60
-FR_p654             = f60
-FR_p6543            = f60
-FR_2_mk             = f61
-FR_P4Q7             = f61
-FR_P4               = f61
-FR_Q7               = f61
-FR_P3Q6             = f62
-FR_P3               = f62
-FR_Q6               = f62
-FR_q65              = f62
-FR_q6543            = f62
-FR_P2Q5             = f63
-FR_P2               = f63
-FR_Q5               = f63
-FR_P1Q4             = f64
-FR_P1               = f64
-FR_Q4               = f64
-FR_q43              = f64
-FR_Q3               = f65
-FR_Q2               = f66
-FR_q21              = f66
-FR_Q1               = f67
-FR_A1               = f68
-FR_P6Q9             = f68
-FR_P6               = f68
-FR_Q9               = f68
-FR_P5Q8             = f69
-FR_P5               = f69
-FR_Q8               = f69
-FR_q987             = f69
-FR_q98              = f69
-FR_q9876543         = f69
-FR_min_oflow_x      = f70
-FR_huge_exp         = f70
-FR_zero_uflow_x     = f71
-FR_huge_signif      = f71
-FR_huge             = f72
-FR_small            = f72
-FR_half             = f73
-FR_T_scale          = f74
-FR_result_lo        = f75
-FR_W_T_scale        = f76
-FR_Wp1_T_scale      = f77
-FR_ftz              = f77
-FR_half_x           = f77
-//
-
-FR_X                = f9
-FR_Y                = f0
-FR_RESULT           = f15
-
-// ************* DO NOT CHANGE ORDER OF THESE TABLES ********************
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
 
-// double-extended 1/ln(2)
-// 3fff b8aa 3b29 5c17 f0bb be87fed0691d3e88
-// 3fff b8aa 3b29 5c17 f0bc 
-// For speed the significand will be loaded directly with a movl and setf.sig
-//   and the exponent will be bias+63 instead of bias+0.  Thus subsequent
-//   computations need to scale appropriately.
-// The constant 2^12/ln(2) is needed for the computation of N.  This is also 
-//   obtained by scaling the computations.
-//
-// Two shifting constants are loaded directly with movl and setf.d. 
-//   1. RSHF_2TO51 = 1.1000..00 * 2^(63-12) 
-//        This constant is added to x*1/ln2 to shift the integer part of
-//        x*2^12/ln2 into the rightmost bits of the significand.
-//        The result of this fma is N_signif.
-//   2. RSHF       = 1.1000..00 * 2^(63) 
-//        This constant is subtracted from N_signif * 2^(-51) to give
-//        the integer part of N, N_fix, as a floating-point number.
-//        The result of this fms is float_N.
-
-RODATA
 .align 64 
-LOCAL_OBJECT_START(Constants_exp_64_Arg)
-//data8 0xB8AA3B295C17F0BC,0x0000400B // Inv_L = 2^12/log(2)
-data8 0xB17217F400000000,0x00003FF2 // L_hi = hi part log(2)/2^12
-data8 0xF473DE6AF278ECE6,0x00003FD4 // L_lo = lo part log(2)/2^12
-LOCAL_OBJECT_END(Constants_exp_64_Arg)
+Constants_exp_64_Arg:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_Arg,@object)
+data4 0x5C17F0BC,0xB8AA3B29,0x0000400B,0x00000000 
+data4 0x00000000,0xB17217F4,0x00003FF2,0x00000000
+data4 0xF278ECE6,0xF473DE6A,0x00003FD4,0x00000000
+// /* Inv_L, L_hi, L_lo */
+ASM_SIZE_DIRECTIVE(Constants_exp_64_Arg)
 
-LOCAL_OBJECT_START(Constants_exp_64_Limits)
-data8 0xb17217f7d1cf79ac,0x0000400c // Smallest long dbl oflow x
-data8 0xb220000000000000,0x0000c00c // Small long dbl uflow zero x
-LOCAL_OBJECT_END(Constants_exp_64_Limits)
+.align 64 
+Constants_exp_64_Exponents:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_Exponents,@object)
+data4 0x0000007E,0x00000000,0xFFFFFF83,0xFFFFFFFF
+data4 0x000003FE,0x00000000,0xFFFFFC03,0xFFFFFFFF
+data4 0x00003FFE,0x00000000,0xFFFFC003,0xFFFFFFFF
+data4 0x00003FFE,0x00000000,0xFFFFC003,0xFFFFFFFF
+data4 0xFFFFFFE2,0xFFFFFFFF,0xFFFFFFC4,0xFFFFFFFF
+data4 0xFFFFFFBA,0xFFFFFFFF,0xFFFFFFBA,0xFFFFFFFF
+ASM_SIZE_DIRECTIVE(Constants_exp_64_Exponents)
 
-LOCAL_OBJECT_START(Constants_exp_64_A)
-data8 0xAAAAAAABB1B736A0,0x00003FFA // A3
-data8 0xAAAAAAAB90CD6327,0x00003FFC // A2
-data8 0xFFFFFFFFFFFFFFFF,0x00003FFD // A1
-LOCAL_OBJECT_END(Constants_exp_64_A)
+.align 64 
+Constants_exp_64_A:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_A,@object)
+data4 0xB1B736A0,0xAAAAAAAB,0x00003FFA,0x00000000
+data4 0x90CD6327,0xAAAAAAAB,0x00003FFC,0x00000000
+data4 0xFFFFFFFF,0xFFFFFFFF,0x00003FFD,0x00000000
+// /* Reversed */
+ASM_SIZE_DIRECTIVE(Constants_exp_64_A)
 
-LOCAL_OBJECT_START(Constants_exp_64_P)
-data8 0xD00D6C8143914A8A,0x00003FF2 // P6
-data8 0xB60BC4AC30304B30,0x00003FF5 // P5
-data8 0x888888887474C518,0x00003FF8 // P4
-data8 0xAAAAAAAA8DAE729D,0x00003FFA // P3
-data8 0xAAAAAAAAAAAAAF61,0x00003FFC // P2
-data8 0x80000000000004C7,0x00003FFE // P1
-LOCAL_OBJECT_END(Constants_exp_64_P)
+.align 64 
+Constants_exp_64_P:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_P,@object)
+data4 0x43914A8A,0xD00D6C81,0x00003FF2,0x00000000
+data4 0x30304B30,0xB60BC4AC,0x00003FF5,0x00000000
+data4 0x7474C518,0x88888888,0x00003FF8,0x00000000
+data4 0x8DAE729D,0xAAAAAAAA,0x00003FFA,0x00000000
+data4 0xAAAAAF61,0xAAAAAAAA,0x00003FFC,0x00000000
+data4 0x000004C7,0x80000000,0x00003FFE,0x00000000 
+// /* Reversed */
+ASM_SIZE_DIRECTIVE(Constants_exp_64_P)
 
-LOCAL_OBJECT_START(Constants_exp_64_Q)
-data8 0x93F2AC5F7471F32E, 0x00003FE9 // Q9
-data8 0xB8DA0F3550B3E764, 0x00003FEC // Q8
-data8 0xD00D00D0028E89C4, 0x00003FEF // Q7
-data8 0xD00D00DAEB8C4E91, 0x00003FF2 // Q6
-data8 0xB60B60B60B60B6F5, 0x00003FF5 // Q5
-data8 0x888888888886CC23, 0x00003FF8 // Q4
-data8 0xAAAAAAAAAAAAAAAB, 0x00003FFA // Q3
-data8 0xAAAAAAAAAAAAAAAB, 0x00003FFC // Q2
-data8 0x8000000000000000, 0x00003FFE // Q1
-LOCAL_OBJECT_END(Constants_exp_64_Q)
+.align 64 
+Constants_exp_64_Q:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_Q,@object)
+data4 0xA49EF6CA,0xD00D56F7,0x00003FEF,0x00000000
+data4 0x1C63493D,0xD00D59AB,0x00003FF2,0x00000000
+data4 0xFB50CDD2,0xB60B60B5,0x00003FF5,0x00000000
+data4 0x7BA68DC8,0x88888888,0x00003FF8,0x00000000
+data4 0xAAAAAC8D,0xAAAAAAAA,0x00003FFA,0x00000000
+data4 0xAAAAACCA,0xAAAAAAAA,0x00003FFC,0x00000000
+data4 0x00000000,0x80000000,0x00003FFE,0x00000000 
+// /* Reversed */
+ASM_SIZE_DIRECTIVE(Constants_exp_64_Q)
 
-LOCAL_OBJECT_START(Constants_exp_64_T1)
+.align 64 
+Constants_exp_64_T1:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_T1,@object)
 data4 0x3F800000,0x3F8164D2,0x3F82CD87,0x3F843A29 
 data4 0x3F85AAC3,0x3F871F62,0x3F88980F,0x3F8A14D5 
 data4 0x3F8B95C2,0x3F8D1ADF,0x3F8EA43A,0x3F9031DC
@@ -553,9 +428,11 @@ data4 0x3FD744FD,0x3FD99D16,0x3FDBFBB8,0x3FDE60F5
 data4 0x3FE0CCDF,0x3FE33F89,0x3FE5B907,0x3FE8396A
 data4 0x3FEAC0C7,0x3FED4F30,0x3FEFE4BA,0x3FF28177
 data4 0x3FF5257D,0x3FF7D0DF,0x3FFA83B3,0x3FFD3E0C
-LOCAL_OBJECT_END(Constants_exp_64_T1)
+ASM_SIZE_DIRECTIVE(Constants_exp_64_T1)
 
-LOCAL_OBJECT_START(Constants_exp_64_T2)
+.align 64 
+Constants_exp_64_T2:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_T2,@object)
 data4 0x3F800000,0x3F80058C,0x3F800B18,0x3F8010A4 
 data4 0x3F801630,0x3F801BBD,0x3F80214A,0x3F8026D7 
 data4 0x3F802C64,0x3F8031F2,0x3F803780,0x3F803D0E 
@@ -572,824 +449,1124 @@ data4 0x3F810B41,0x3F8110D8,0x3F81166F,0x3F811C07
 data4 0x3F81219F,0x3F812737,0x3F812CD0,0x3F813269 
 data4 0x3F813802,0x3F813D9B,0x3F814334,0x3F8148CE 
 data4 0x3F814E68,0x3F815402,0x3F81599C,0x3F815F37
-LOCAL_OBJECT_END(Constants_exp_64_T2)
+ASM_SIZE_DIRECTIVE(Constants_exp_64_T2)
 
-LOCAL_OBJECT_START(Constants_exp_64_W1)
-data8 0x0000000000000000, 0xBE384454171EC4B4
-data8 0xBE6947414AA72766, 0xBE5D32B6D42518F8
-data8 0x3E68D96D3A319149, 0xBE68F4DA62415F36
-data8 0xBE6DDA2FC9C86A3B, 0x3E6B2E50F49228FE
-data8 0xBE49C0C21188B886, 0x3E64BFC21A4C2F1F
-data8 0xBE6A2FBB2CB98B54, 0x3E5DC5DE9A55D329
-data8 0x3E69649039A7AACE, 0x3E54728B5C66DBA5
-data8 0xBE62B0DBBA1C7D7D, 0x3E576E0409F1AF5F
-data8 0x3E6125001A0DD6A1, 0xBE66A419795FBDEF
-data8 0xBE5CDE8CE1BD41FC, 0xBE621376EA54964F
-data8 0x3E6370BE476E76EE, 0x3E390D1A3427EB92
-data8 0x3E1336DE2BF82BF8, 0xBE5FF1CBD0F7BD9E
-data8 0xBE60A3550CEB09DD, 0xBE5CA37E0980F30D
-data8 0xBE5C541B4C082D25, 0xBE5BBECA3B467D29
-data8 0xBE400D8AB9D946C5, 0xBE5E2A0807ED374A
-data8 0xBE66CB28365C8B0A, 0x3E3AAD5BD3403BCA
-data8 0x3E526055C7EA21E0, 0xBE442C75E72880D6
-data8 0x3E58B2BB85222A43, 0xBE5AAB79522C42BF
-data8 0xBE605CB4469DC2BC, 0xBE589FA7A48C40DC
-data8 0xBE51C2141AA42614, 0xBE48D087C37293F4
-data8 0x3E367A1CA2D673E0, 0xBE51BEBB114F7A38
-data8 0xBE6348E5661A4B48, 0xBDF526431D3B9962
-data8 0x3E3A3B5E35A78A53, 0xBE46C46C1CECD788
-data8 0xBE60B7EC7857D689, 0xBE594D3DD14F1AD7
-data8 0xBE4F9C304C9A8F60, 0xBE52187302DFF9D2
-data8 0xBE5E4C8855E6D68F, 0xBE62140F667F3DC4
-data8 0xBE36961B3BF88747, 0x3E602861C96EC6AA
-data8 0xBE3B5151D57FD718, 0x3E561CD0FC4A627B
-data8 0xBE3A5217CA913FEA, 0x3E40A3CC9A5D193A
-data8 0xBE5AB71310A9C312, 0x3E4FDADBC5F57719
-data8 0x3E361428DBDF59D5, 0x3E5DB5DB61B4180D
-data8 0xBE42AD5F7408D856, 0x3E2A314831B2B707
-LOCAL_OBJECT_END(Constants_exp_64_W1)
+.align 64 
+Constants_exp_64_W1:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_W1,@object)
+data4 0x00000000,0x00000000,0x171EC4B4,0xBE384454
+data4 0x4AA72766,0xBE694741,0xD42518F8,0xBE5D32B6
+data4 0x3A319149,0x3E68D96D,0x62415F36,0xBE68F4DA
+data4 0xC9C86A3B,0xBE6DDA2F,0xF49228FE,0x3E6B2E50
+data4 0x1188B886,0xBE49C0C2,0x1A4C2F1F,0x3E64BFC2
+data4 0x2CB98B54,0xBE6A2FBB,0x9A55D329,0x3E5DC5DE
+data4 0x39A7AACE,0x3E696490,0x5C66DBA5,0x3E54728B
+data4 0xBA1C7D7D,0xBE62B0DB,0x09F1AF5F,0x3E576E04
+data4 0x1A0DD6A1,0x3E612500,0x795FBDEF,0xBE66A419
+data4 0xE1BD41FC,0xBE5CDE8C,0xEA54964F,0xBE621376
+data4 0x476E76EE,0x3E6370BE,0x3427EB92,0x3E390D1A 
+data4 0x2BF82BF8,0x3E1336DE,0xD0F7BD9E,0xBE5FF1CB 
+data4 0x0CEB09DD,0xBE60A355,0x0980F30D,0xBE5CA37E 
+data4 0x4C082D25,0xBE5C541B,0x3B467D29,0xBE5BBECA 
+data4 0xB9D946C5,0xBE400D8A,0x07ED374A,0xBE5E2A08 
+data4 0x365C8B0A,0xBE66CB28,0xD3403BCA,0x3E3AAD5B 
+data4 0xC7EA21E0,0x3E526055,0xE72880D6,0xBE442C75 
+data4 0x85222A43,0x3E58B2BB,0x522C42BF,0xBE5AAB79 
+data4 0x469DC2BC,0xBE605CB4,0xA48C40DC,0xBE589FA7 
+data4 0x1AA42614,0xBE51C214,0xC37293F4,0xBE48D087 
+data4 0xA2D673E0,0x3E367A1C,0x114F7A38,0xBE51BEBB 
+data4 0x661A4B48,0xBE6348E5,0x1D3B9962,0xBDF52643  
+data4 0x35A78A53,0x3E3A3B5E,0x1CECD788,0xBE46C46C 
+data4 0x7857D689,0xBE60B7EC,0xD14F1AD7,0xBE594D3D 
+data4 0x4C9A8F60,0xBE4F9C30,0x02DFF9D2,0xBE521873 
+data4 0x55E6D68F,0xBE5E4C88,0x667F3DC4,0xBE62140F 
+data4 0x3BF88747,0xBE36961B,0xC96EC6AA,0x3E602861 
+data4 0xD57FD718,0xBE3B5151,0xFC4A627B,0x3E561CD0 
+data4 0xCA913FEA,0xBE3A5217,0x9A5D193A,0x3E40A3CC 
+data4 0x10A9C312,0xBE5AB713,0xC5F57719,0x3E4FDADB 
+data4 0xDBDF59D5,0x3E361428,0x61B4180D,0x3E5DB5DB 
+data4 0x7408D856,0xBE42AD5F,0x31B2B707,0x3E2A3148 
+ASM_SIZE_DIRECTIVE(Constants_exp_64_W1)
 
-LOCAL_OBJECT_START(Constants_exp_64_W2)
-data8 0x0000000000000000, 0xBE641F2537A3D7A2
-data8 0xBE68DD57AD028C40, 0xBE5C77D8F212B1B6
-data8 0x3E57878F1BA5B070, 0xBE55A36A2ECAE6FE
-data8 0xBE620608569DFA3B, 0xBE53B50EA6D300A3
-data8 0x3E5B5EF2223F8F2C, 0xBE56A0D9D6DE0DF4
-data8 0xBE64EEF3EAE28F51, 0xBE5E5AE2367EA80B
-data8 0x3E47CB1A5FCBC02D, 0xBE656BA09BDAFEB7
-data8 0x3E6E70C6805AFEE7, 0xBE6E0509A3415EBA
-data8 0xBE56856B49BFF529, 0x3E66DD3300508651
-data8 0x3E51165FC114BC13, 0x3E53333DC453290F
-data8 0x3E6A072B05539FDA, 0xBE47CD877C0A7696
-data8 0xBE668BF4EB05C6D9, 0xBE67C3E36AE86C93
-data8 0xBE533904D0B3E84B, 0x3E63E8D9556B53CE
-data8 0x3E212C8963A98DC8, 0xBE33138F032A7A22
-data8 0x3E530FA9BC584008, 0xBE6ADF82CCB93C97
-data8 0x3E5F91138370EA39, 0x3E5443A4FB6A05D8
-data8 0x3E63DACD181FEE7A, 0xBE62B29DF0F67DEC
-data8 0x3E65C4833DDE6307, 0x3E5BF030D40A24C1
-data8 0x3E658B8F14E437BE, 0xBE631C29ED98B6C7
-data8 0x3E6335D204CF7C71, 0x3E529EEDE954A79D
-data8 0x3E5D9257F64A2FB8, 0xBE6BED1B854ED06C
-data8 0x3E5096F6D71405CB, 0xBE3D4893ACB9FDF5
-data8 0xBDFEB15801B68349, 0x3E628D35C6A463B9
-data8 0xBE559725ADE45917, 0xBE68C29C042FC476
-data8 0xBE67593B01E511FA, 0xBE4A4313398801ED
-data8 0x3E699571DA7C3300, 0x3E5349BE08062A9E
-data8 0x3E5229C4755BB28E, 0x3E67E42677A1F80D
-data8 0xBE52B33F6B69C352, 0xBE6B3550084DA57F
-data8 0xBE6DB03FD1D09A20, 0xBE60CBC42161B2C1
-data8 0x3E56ED9C78A2B771, 0xBE508E319D0FA795
-data8 0xBE59482AFD1A54E9, 0xBE2A17CEB07FD23E
-data8 0x3E68BF5C17365712, 0x3E3956F9B3785569
-LOCAL_OBJECT_END(Constants_exp_64_W2)
+.align 64 
+Constants_exp_64_W2:
+ASM_TYPE_DIRECTIVE(Constants_exp_64_W2,@object)
+data4 0x00000000,0x00000000,0x37A3D7A2,0xBE641F25 
+data4 0xAD028C40,0xBE68DD57,0xF212B1B6,0xBE5C77D8 
+data4 0x1BA5B070,0x3E57878F,0x2ECAE6FE,0xBE55A36A 
+data4 0x569DFA3B,0xBE620608,0xA6D300A3,0xBE53B50E 
+data4 0x223F8F2C,0x3E5B5EF2,0xD6DE0DF4,0xBE56A0D9 
+data4 0xEAE28F51,0xBE64EEF3,0x367EA80B,0xBE5E5AE2 
+data4 0x5FCBC02D,0x3E47CB1A,0x9BDAFEB7,0xBE656BA0 
+data4 0x805AFEE7,0x3E6E70C6,0xA3415EBA,0xBE6E0509 
+data4 0x49BFF529,0xBE56856B,0x00508651,0x3E66DD33 
+data4 0xC114BC13,0x3E51165F,0xC453290F,0x3E53333D 
+data4 0x05539FDA,0x3E6A072B,0x7C0A7696,0xBE47CD87 
+data4 0xEB05C6D9,0xBE668BF4,0x6AE86C93,0xBE67C3E3 
+data4 0xD0B3E84B,0xBE533904,0x556B53CE,0x3E63E8D9 
+data4 0x63A98DC8,0x3E212C89,0x032A7A22,0xBE33138F 
+data4 0xBC584008,0x3E530FA9,0xCCB93C97,0xBE6ADF82 
+data4 0x8370EA39,0x3E5F9113,0xFB6A05D8,0x3E5443A4 
+data4 0x181FEE7A,0x3E63DACD,0xF0F67DEC,0xBE62B29D 
+data4 0x3DDE6307,0x3E65C483,0xD40A24C1,0x3E5BF030  
+data4 0x14E437BE,0x3E658B8F,0xED98B6C7,0xBE631C29 
+data4 0x04CF7C71,0x3E6335D2,0xE954A79D,0x3E529EED 
+data4 0xF64A2FB8,0x3E5D9257,0x854ED06C,0xBE6BED1B 
+data4 0xD71405CB,0x3E5096F6,0xACB9FDF5,0xBE3D4893 
+data4 0x01B68349,0xBDFEB158,0xC6A463B9,0x3E628D35 
+data4 0xADE45917,0xBE559725,0x042FC476,0xBE68C29C 
+data4 0x01E511FA,0xBE67593B,0x398801ED,0xBE4A4313 
+data4 0xDA7C3300,0x3E699571,0x08062A9E,0x3E5349BE 
+data4 0x755BB28E,0x3E5229C4,0x77A1F80D,0x3E67E426 
+data4 0x6B69C352,0xBE52B33F,0x084DA57F,0xBE6B3550 
+data4 0xD1D09A20,0xBE6DB03F,0x2161B2C1,0xBE60CBC4 
+data4 0x78A2B771,0x3E56ED9C,0x9D0FA795,0xBE508E31 
+data4 0xFD1A54E9,0xBE59482A,0xB07FD23E,0xBE2A17CE 
+data4 0x17365712,0x3E68BF5C,0xB3785569,0x3E3956F9
+ASM_SIZE_DIRECTIVE(Constants_exp_64_W2)
+
+GR_SAVE_PFS         = r59
+GR_SAVE_B0          = r60
+GR_SAVE_GP          = r61
+GR_Parameter_X      = r62
+GR_Parameter_Y      = r63
+GR_Parameter_RESULT = r64
+GR_Parameter_TAG    = r65 
 
+FR_X                = f9
+FR_Y                = f9
+FR_RESULT           = f99
 
 .section .text
-
-GLOBAL_IEEE754_ENTRY(expm1l)
+.proc expm1l#
+.global expm1l#
+.align 64 
+expm1l: 
+#ifdef _LIBC
+.global __expm1l#
+__expm1l:
+#endif
+{ .mii
+alloc r32 = ar.pfs,0,30,4,0
+(p0)  add r33 = 1, r0  
+(p0)  cmp.eq.unc  p7, p0 =  r0, r0 
+}
+{ .mbb
+	nop.m 999
+(p0)  br.cond.sptk exp_continue 
+	nop.b 999 ;;
+}
 
 //
-//    Set p7 true for expm1, p6 false
+//    Set p7 true for expm1
+//    Set Flag = r33 = 1 for expm1
 //    
 
-{ .mlx
-      getf.exp GR_signexp_x = f8  // Get sign and exponent of x, redo if unorm
-      movl GR_sig_inv_ln2 = 0xb8aa3b295c17f0bc  // significand of 1/ln2
-}
-{ .mlx
-      addl GR_ad_Arg = @ltoff(Constants_exp_64_Arg#),gp  
-      movl GR_rshf_2to51 = 0x4718000000000000 // 1.10000 2^(63+51)
-}
-;;
-
-{ .mfi
-      ld8  GR_ad_Arg = [GR_ad_Arg]       // Point to Arg table
-      fclass.m p8, p0 =  f8, 0x1E7       // Test x for natval, nan, inf, zero
-      cmp.eq  p7, p6 =  r0, r0 
-}
-{ .mfb
-      mov GR_exp_half = 0x0FFFE          // Exponent of 0.5, for very small path
-      fnorm.s1 FR_norm_x = f8            // Normalize x
-      br.cond.sptk exp_continue 
-}
-;;
+.endp expm1l
+ASM_SIZE_DIRECTIVE(expm1l)
 
-GLOBAL_IEEE754_END(expm1l)
+#ifdef _LIBC
+libm_hidden_def (__expm1l)
+#endif
 
-GLOBAL_IEEE754_ENTRY(expl)
-//
-//    Set p7 false for exp, p6 true
-//    
-{ .mlx
-      getf.exp GR_signexp_x = f8  // Get sign and exponent of x, redo if unorm
-      movl GR_sig_inv_ln2 = 0xb8aa3b295c17f0bc  // significand of 1/ln2
-}
-{ .mlx
-      addl GR_ad_Arg = @ltoff(Constants_exp_64_Arg#),gp  
-      movl GR_rshf_2to51 = 0x4718000000000000 // 1.10000 2^(63+51)
+.section .text
+.proc expl#
+.global expl#
+.align 64 
+expl: 
+#ifdef _LIBC
+.global __ieee754_expl#
+__ieee754_expl:
+#endif
+{ .mii
+alloc r32 = ar.pfs,0,30,4,0
+(p0)  add r33 = r0, r0  
+(p0)  cmp.eq.unc  p0, p7 =  r0, r0 ;; 
 }
-;;
-
+exp_continue: 
 { .mfi
-      ld8  GR_ad_Arg = [GR_ad_Arg]       // Point to Arg table
-      fclass.m p8, p0 =  f8, 0x1E7       // Test x for natval, nan, inf, zero
-      cmp.eq  p6, p7 =  r0, r0
+(p0)  add r32 = 2,r0  
+(p0)  fnorm.s1 f9 = f8 
+      nop.i 0
 }
 { .mfi
-      mov GR_exp_half = 0x0FFFE          // Exponent of 0.5, for very small path
-      fnorm.s1 FR_norm_x = f8            // Normalize x
-      nop.i 999
+(p0)  nop.m 0 
+//
+//    Set p7 false for exp
+//    Set Flag = r33 = 0 for exp
+//    
+(p0)  fclass.m.unc p6, p8 =  f8, 0x1E7 
+      nop.i 0;;
 }
-;;
-
-exp_continue: 
-// Form two constants we need
-//  1/ln2 * 2^63  to compute  w = x * 1/ln2 * 128 
-//  1.1000..000 * 2^(63+63-12) to right shift int(N) into the significand
-
 { .mfi
-      setf.sig  FR_INV_LN2_2TO63 = GR_sig_inv_ln2 // form 1/ln2 * 2^63
-      fclass.nm.unc p9, p0 =  f8, 0x1FF  // Test x for unsupported
-      mov GR_exp_2tom51 = 0xffff-51
-}
-{ .mlx
-      setf.d  FR_RSHF_2TO51 = GR_rshf_2to51 // Form const 1.1000 * 2^(63+51)
-      movl GR_rshf = 0x43e8000000000000  // 1.10000 2^63 for right shift
+	nop.m 999
+(p0)  fclass.nm.unc p9, p0 =  f8, 0x1FF 
+      nop.i 0
 }
-;;
-
 { .mfi
-      setf.exp FR_half = GR_exp_half     // Form 0.5 for very small path
-      fma.s1 FR_scale = f1,f1,f0         // Scale = 1.0
-      mov GR_exp_bias = 0x0FFFF          // Set exponent bias
-}
-{ .mib
-      add GR_ad_Limits = 0x20, GR_ad_Arg // Point to Limits table
-      mov GR_exp_mask = 0x1FFFF          // Form exponent mask
-(p8)  br.cond.spnt EXP_64_SPECIAL        // Branch if natval, nan, inf, zero
+	nop.m 999
+(p0)  mov f36 = f1 
+	nop.i 999 ;;
 }
-;;
-
-{ .mfi
-      setf.exp FR_2TOM51 = GR_exp_2tom51 // Form 2^-51 for scaling float_N
-      nop.f 999
-      add GR_ad_A = 0x40, GR_ad_Arg      // Point to A table
+{ .mfb
+	nop.m 999
+//     
+//    Identify NatVals, NaNs, Infs, and Zeros. 
+//    Identify EM unsupporteds. 
+//    Save special input registers 
+(p0)  mov f32 = f0 
+//
+//    Create FR_X_cor      = 0.0 
+//           GR_Flag       = 0 
+//           GR_Expo_Range = 2 (r32) for double-extended precision 
+//           FR_Scale      = 1.0
+//
+(p6)  br.cond.spnt EXPL_64_SPECIAL ;; 
 }
 { .mib
-      setf.d  FR_RSHF = GR_rshf          // Form right shift const 1.1000 * 2^63
-      add GR_ad_T1 = 0x160, GR_ad_Arg    // Point to T1 table
-(p9)  br.cond.spnt EXP_64_UNSUPPORTED    // Branch if unsupported
+	nop.m 999
+	nop.i 999
+(p9)  br.cond.spnt EXPL_64_UNSUPPORTED ;; 
 }
-;;
-
-.pred.rel "mutex",p6,p7
 { .mfi
-      ldfe FR_L_hi = [GR_ad_Arg],16      // Get L_hi
-      fcmp.eq.s0 p9,p0 =  f8, f0         // Dummy op to flag denormals
-(p6)  add GR_ad_PQ = 0x30, GR_ad_A       // Point to P table for exp
+(p0)  cmp.ne.unc p12, p13 = 0x01, r33
+//     
+//    Branch out for special input values 
+//     
+(p0)  fcmp.lt.unc.s0 p9,p0 =  f8, f0 
+(p0)  cmp.eq.unc  p15, p0 =  r0, r0 
 }
-{ .mfi
-      ldfe FR_min_oflow_x = [GR_ad_Limits],16 // Get min x to cause overflow
-      fmpy.s1 FR_rsq = f8, f8            // rsq = x * x for small path
-(p7)  add GR_ad_PQ = 0x90, GR_ad_A       // Point to Q table for expm1
+{ .mmi
+	nop.m 999
+//     
+//    Raise possible denormal operand exception 
+//    Normalize x 
+//     
+//    This function computes expl( x  + x_cor) 
+//    Input  FR 1: FR_X            
+//    Input  FR 2: FR_X_cor  
+//    Input  GR 1: GR_Flag  
+//    Input  GR 2: GR_Expo_Range  
+//    Output FR 3: FR_Y_hi  
+//    Output FR 4: FR_Y_lo  
+//    Output FR 5: FR_Scale  
+//    Output PR 1: PR_Safe  
+(p0)  addl r34 = @ltoff(Constants_exp_64_Arg#),gp  
+(p0)  addl r40 = @ltoff(Constants_exp_64_W1#),gp 
 };;
+//
+//    Prepare to load constants
+//    Set Safe = True
+//
 
 { .mmi
-      ldfe FR_L_lo = [GR_ad_Arg],16      // Get L_lo
-      ldfe FR_zero_uflow_x = [GR_ad_Limits],16 // Get x for zero uflow result
-      add GR_ad_W1 = 0x200, GR_ad_T1     // Point to W1 table
-}
-;;
+      ld8  r34 = [r34]
+      ld8  r40 = [r40]
+(p0)  addl r41 = @ltoff(Constants_exp_64_W2#),gp  
+};;
 
-{ .mfi
-      ldfe FR_P6Q9 = [GR_ad_PQ],16       // P6(exp) or Q9(expm1) for small path
-      mov FR_r = FR_norm_x               // r = X for small path
-      mov GR_very_small_exp = -60        // Exponent of x for very small path
-}
-{ .mfi
-      add GR_ad_W2 = 0x400, GR_ad_T1     // Point to W2 table
-      nop.f 999
-(p7)  mov GR_small_exp = -7              // Exponent of x for small path expm1
+{ .mmi
+(p0)  ldfe f37 = [r34],16 
+(p0)  ld8 r41 = [r41] ;; 
 }
-;;
+
+//
+//    N = fcvt.fx(float_N)
+//    Set p14 if -6 > expo_X 
+//
+//
+//    Bias = 0x0FFFF
+//    expo_X = expo_X and Mask  
+//
 
 { .mmi
-      ldfe FR_P5Q8 = [GR_ad_PQ],16       // P5(exp) or Q8(expm1) for small path
-      and  GR_exp_x = GR_signexp_x, GR_exp_mask
-(p6)  mov GR_small_exp = -12             // Exponent of x for small path exp
+(p0)  ldfe f40 = [r34],16 
+      nop.m 999
+//
+//    Load L_lo
+//    Set p10 if 14 < expo_X 
+//
+(p0)  addl r50 = @ltoff(Constants_exp_64_T1#),gp 
 }
-;;
+{ .mmi
+	nop.m 999
+	nop.m 999
+(p0)  addl r51 = @ltoff(Constants_exp_64_T2#),gp ;; 
+}
+//
+//    Load W2_ptr
+//    Branch to SMALL is expo_X < -6
+//
 
-// N_signif = X * Inv_log2_by_2^12
-// By adding 1.10...0*2^63 we shift and get round_int(N_signif) in significand.
-// We actually add 1.10...0*2^51 to X * Inv_log2 to do the same thing.
-{ .mfi
-      ldfe FR_P4Q7 = [GR_ad_PQ],16       // P4(exp) or Q7(expm1) for small path
-      fma.s1 FR_N_signif = FR_norm_x, FR_INV_LN2_2TO63, FR_RSHF_2TO51
-      nop.i 999
+{.mmi
+(p0)  ld8 r50 = [r50]  
+(p0)  ld8 r51 = [r51]  
+};;
+
+{ .mlx
+(p0)  ldfe f41 = [r34],16 
+//
+//    float_N = X * L_Inv
+//    expo_X = exponent of X
+//    Mask = 0x1FFFF
+//
+(p0)  movl r58 = 0x0FFFF 
 }
-{ .mfi
-      sub GR_exp_x = GR_exp_x, GR_exp_bias // Get exponent
-      fmpy.s1 FR_r4 = FR_rsq, FR_rsq     // Form r4 for small path
-      cmp.eq.unc  p15, p0 =  r0, r0      // Set Safe as default
+{ .mlx
+	nop.m 999
+(p0)  movl r39 = 0x1FFFF ;; 
 }
-;;
-
 { .mmi
-      ldfe FR_P3Q6 = [GR_ad_PQ],16       // P3(exp) or Q6(expm1) for small path
-      cmp.lt  p14, p0 =  GR_exp_x, GR_very_small_exp // Is |x| < 2^-60?
-      nop.i 999
+(p0)  getf.exp r37 = f9 
+	nop.m 999
+(p0)  addl r34 = @ltoff(Constants_exp_64_Exponents#),gp ;; 
 }
-;;
-
-{ .mfi
-      ldfe FR_P2Q5 = [GR_ad_PQ],16       // P2(exp) or Q5(expm1) for small path
-      fmpy.s1 FR_half_x = FR_half, FR_norm_x // 0.5 * x for very small path
-      cmp.lt  p13, p0 =  GR_exp_x, GR_small_exp // Is |x| < 2^-m?
+{ .mii
+(p0)  ld8 r34 = [r34]  
+      nop.i 999 
+(p0)  and  r37 = r37, r39 ;;  
 }
-{ .mib
-      nop.m 999
-      nop.i 999
-(p14) br.cond.spnt EXP_VERY_SMALL        // Branch if |x| < 2^-60
+{ .mmi
+(p0)  sub r37 = r37, r58 ;;  
+(p0)  cmp.gt.unc  p14, p0 =  -6, r37 
+(p0)  cmp.lt.unc  p10, p0 =  14, r37 ;; 
 }
-;;
-
 { .mfi
-      ldfe FR_A3 = [GR_ad_A],16          // Get A3 for normal path
-      fcmp.ge.s1 p10,p0 = FR_norm_x, FR_min_oflow_x // Will result overflow?
-      mov GR_big_expo_neg = -16381       // -0x3ffd
+(p0)  nop.m 0  
+//
+//    Load L_inv 
+//    Set p12 true for Flag = 0 (exp)
+//    Set p13 true for Flag = 1 (expm1)
+//
+(p0)  fmpy.s1 f38 = f9, f37 
+	nop.i 999 ;;
 }
 { .mfb
-      ldfe FR_P1Q4 = [GR_ad_PQ],16       // P1(exp) or Q4(expm1) for small path
-      nop.f 999
-(p13) br.cond.spnt EXP_SMALL             // Branch if |x| < 2^-m
-                                         // m=12 for exp, m=7 for expm1
-}
-;;
-
-// Now we are on the main path for |x| >= 2^-m, m=12 for exp, m=7 for expm1
+	nop.m 999
 //
-// float_N = round_int(N_signif) 
-// The signficand of N_signif contains the rounded integer part of X * 2^12/ln2,
-// as a twos complement number in the lower bits (that is, it may be negative).
-// That twos complement number (called N) is put into GR_N.
-
-// Since N_signif is scaled by 2^51, it must be multiplied by 2^-51
-// before the shift constant 1.10000 * 2^63 is subtracted to yield float_N.
-// Thus, float_N contains the floating point version of N
-
-
-{ .mfi
-      ldfe FR_A2 = [GR_ad_A],16          // Get A2 for main path
-      fcmp.lt.s1 p11,p0 = FR_norm_x, FR_zero_uflow_x // Certain zero, uflow?
-      add GR_ad_T2 = 0x100, GR_ad_T1     // Point to T2 table
+//    Load L_hi
+//    expo_X = expo_X - Bias
+//    get W1_ptr      
+//
+(p0)  fcvt.fx.s1 f39 = f38
+(p14) br.cond.spnt EXPL_SMALL ;; 
 }
-{ .mfi
+{ .mib
+	nop.m 999
+	nop.i 999
+(p10) br.cond.spnt EXPL_HUGE ;; 
+}
+{ .mmi
+(p0)  shladd r34 = r32,4,r34 
       nop.m 999
-      fms.s1 FR_float_N = FR_N_signif, FR_2TOM51, FR_RSHF // Form float_N
-      nop.i 999
+(p0)  addl r35 = @ltoff(Constants_exp_64_A#),gp ;; 
 }
-;;
-
-{ .mbb
-      getf.sig GR_N_fix = FR_N_signif    // Get N from significand
-(p10) br.cond.spnt  EXP_OVERFLOW         // Branch if result will overflow
-(p11) br.cond.spnt  EXP_CERTAIN_UNDERFLOW_ZERO // Branch if certain zero, uflow
+//
+//    Load T_1,T_2
+//
+{ .mmi
+   nop.m 999
+   ld8   r35 =[r35]
+   nop.i 99
+};;
+{ .mmb
+(p0)  ldfe f51 = [r35],16 
+(p0)  ld8 r45 = [r34],8
+	nop.b 999 ;;
 }
-;;
-
-{ .mfi
-      ldfe FR_A1 = [GR_ad_A],16          // Get A1 for main path
-      fnma.s1 FR_r = FR_L_hi, FR_float_N, FR_norm_x  // r = -L_hi * float_N + x
-      extr.u GR_M1 = GR_N_fix, 6, 6      // Extract index M_1
+//    
+//    Set Safe = True  if k >= big_expo_neg  
+//    Set Safe = False if k < big_expo_neg  
+//    
+{ .mmb
+(p0)  ldfe f49 = [r35],16 
+(p0)  ld8 r48 = [r34],0
+	nop.b 999 ;;
 }
 { .mfi
-      and GR_M2 = 0x3f, GR_N_fix         // Extract index M_2
-      nop.f 999
-      nop.i 999
+	nop.m 999
+//
+//    Branch to HUGE is expo_X > 14 
+//
+(p0)  fcvt.xf f38 = f39 
+	nop.i 999 ;;
 }
-;;
-
-// N_fix is only correct up to 50 bits because of our right shift technique.
-// Actually in the normal path we will have restricted K to about 14 bits.
-// Somewhat arbitrarily we extract 32 bits.
 { .mfi
-      shladd GR_ad_W1 = GR_M1,3,GR_ad_W1 // Point to W1
-      nop.f 999
-      extr GR_K = GR_N_fix, 12, 32       // Extract limited range K
+(p0)  getf.sig r52 = f39 
+	nop.f 999
+	nop.i 999 ;;
+}
+{ .mii
+	nop.m 999
+(p0)  extr.u r43 = r52, 6, 6 ;;  
+//
+//    r = r - float_N * L_lo
+//    K = extr(N_fix,12,52)
+//
+(p0)  shladd r40 = r43,3,r40 ;; 
 }
 { .mfi
-      shladd GR_ad_T1 = GR_M1,2,GR_ad_T1 // Point to T1
-      nop.f 999
-      shladd GR_ad_T2 = GR_M2,2,GR_ad_T2 // Point to T2
+(p0)  shladd r50 = r43,2,r50 
+(p0)  fnma.s1 f42 = f40, f38, f9 
+//
+//    float_N = float(N)
+//    N_fix = signficand N 
+//
+(p0)  extr.u r42 = r52, 0, 6  
 }
-;;
-
 { .mmi
-      ldfs  FR_T1 = [GR_ad_T1],0         // Get T1
-      ldfd  FR_W1 = [GR_ad_W1],0         // Get W1
-      add GR_exp_2_k = GR_exp_bias, GR_K // Form exponent of 2^k
+(p0)  ldfd  f43 = [r40],0 ;; 
+(p0)  shladd r41 = r42,3,r41 
+(p0)  shladd r51 = r42,2,r51 
 }
-;;
-
+//
+//    W_1_p1 = 1 + W_1
+//
 { .mmi
-      ldfs  FR_T2 = [GR_ad_T2],0         // Get T2
-      shladd GR_ad_W2 = GR_M2,3,GR_ad_W2 // Point to W2
-      sub GR_exp_2_mk = GR_exp_bias, GR_K // Form exponent of 2^-k
+(p0)  ldfs  f44 = [r50],0 ;; 
+(p0)  ldfd  f45 = [r41],0 
+//
+//    M_2 = extr(N_fix,0,6)
+//    M_1 = extr(N_fix,6,6)
+//    r = X - float_N * L_hi
+//
+(p0)  extr r44 = r52, 12, 52  
+}
+{ .mmi
+(p0)  ldfs  f46 = [r51],0 ;; 
+(p0)  sub r46 = r58, r44  
+(p0)  cmp.gt.unc  p8, p15 =  r44, r45 
+}
+//    
+//    W = W_1 + W_1_p1*W_2 
+//    Load  A_2 
+//    Bias_m_K = Bias - K
+//
+{ .mii
+(p0)  ldfe f40 = [r35],16 
+//
+//    load A_1
+//    poly = A_2 + r*A_3 
+//    rsq = r * r  
+//    neg_2_mK = exponent of Bias_m_k
+//
+(p0)  add r47 = r58, r44 ;;  
+//    
+//    Set Safe = True  if k <= big_expo_pos  
+//    Set Safe = False  if k >  big_expo_pos  
+//    Load A_3
+//    
+(p15) cmp.lt p8,p15 = r44,r48 ;;
 }
-;;
-
 { .mmf
-      ldfd  FR_W2 = [GR_ad_W2],0         // Get W2
-      setf.exp FR_scale = GR_exp_2_k     // Set scale = 2^k
-      fnma.s1 FR_r = FR_L_lo, FR_float_N, FR_r // r = -L_lo * float_N + r
+(p0)  setf.exp f61 = r46 
+//    
+//    Bias_p + K = Bias + K
+//    T = T_1 * T_2
+//    
+(p0)  setf.exp f36 = r47 
+(p0)  fnma.s1 f42 = f41, f38, f42 ;; 
 }
-;;
-
 { .mfi
-      setf.exp FR_2_mk = GR_exp_2_mk     // Form 2^-k
-      fma.s1 FR_poly = FR_r, FR_A3, FR_A2 // poly = r * A3 + A2
-      cmp.lt p8,p15 = GR_K,GR_big_expo_neg // Set Safe if K > big_expo_neg
+	nop.m 999
+//
+//    Load W_1,W_2
+//    Load big_exp_pos, load big_exp_neg
+//
+(p0)  fadd.s1 f47 = f43, f1 
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-      fmpy.s1 FR_rsq = FR_r, FR_r         // rsq = r * r
-      nop.i 999
+	nop.m 999
+(p0)  fma.s1 f52 = f42, f51, f49 
+	nop.i 999
 }
-;;
-
 { .mfi
-      nop.m 999
-      fmpy.s1 FR_T = FR_T1, FR_T2         // T = T1 * T2
-      nop.i 999
+	nop.m 999
+(p0)  fmpy.s1 f48 = f42, f42 
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-      fadd.s1 FR_W1_p1 = FR_W1, f1        // W1_p1 = W1 + 1.0
-      nop.i 999
+	nop.m 999
+(p0)  fmpy.s1 f53 = f44, f46 
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-(p7)  cmp.lt.unc  p8, p9 =  10, GR_K       // If expm1, set p8 if K > 10 
-      fma.s1 FR_poly = FR_r, FR_poly, FR_A1 // poly = r * poly + A1
-      nop.i 999
+	nop.m 999
+(p0)  fma.s1 f54 = f45, f47, f43 
+	nop.i 999
 }
-;;
-
 { .mfi
-(p7)  cmp.eq  p15, p0 =  r0, r0            // If expm1, set Safe flag
-      fma.s1 FR_T_scale = FR_T, FR_scale, f0 // T_scale = T * scale
-(p9)  cmp.gt.unc  p9, p10 =  -10, GR_K     // If expm1, set p9 if K < -10
-                                           // If expm1, set p10 if -10<=K<=10
+	nop.m 999
+(p0)  fneg f61 =  f61 
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-      fma.s1 FR_W = FR_W2, FR_W1_p1, FR_W1 // W = W2 * (W1+1.0) + W1
-      nop.i 999
+	nop.m 999
+(p0)  fma.s1 f52 = f42, f52, f40 
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      mov FR_Y_hi = FR_T                   // Assume Y_hi = T
-      nop.i 999
+	nop.m 999
+(p0)  fadd.s1 f55 = f54, f1 
+	nop.i 999
 }
-;;
-
 { .mfi
-      nop.m 999
-      fma.s1 FR_poly = FR_rsq, FR_poly, FR_r // poly = rsq * poly + r
-      nop.i 999
+	nop.m 999
+//
+//    W + Wp1 * poly     
+// 
+(p0)  mov f34 = f53 
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fma.s1 FR_Wp1_T_scale = FR_W, FR_T_scale, FR_T_scale // (W+1)*T*scale
-      nop.i 999
+	nop.m 999
+//
+//    A_1 + r * poly 
+//    Scale = setf_expl(Bias_p_k) 
+//
+(p0)  fma.s1 f52 = f48, f52, f42 
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-      fma.s1 FR_W_T_scale = FR_W, FR_T_scale, f0 // W*T*scale
-      nop.i 999
+	nop.m 999
+//
+//    poly = r + rsq(A_1 + r*poly) 
+//    Wp1 = 1 + W
+//    neg_2_mK = -neg_2_mK
+//
+(p0)  fma.s1 f35 = f55, f52, f54
+	nop.i 999 ;;
 }
-;;
-
+{ .mfb
+	nop.m 999
+(p0)  fmpy.s1 f35 = f35, f53 
+//   
+//    Y_hi = T
+//    Y_lo = T * (W + Wp1*poly)
+//
+(p12) br.cond.sptk EXPL_MAIN ;; 
+}
+//
+//    Branch if expl(x)  
+//    Continue for expl(x-1)
+//
+{ .mii
+(p0)  cmp.lt.unc  p12, p13 =  10, r44 
+	nop.i 999 ;;
+//
+//    Set p12 if 10 < K, Else p13 
+//
+(p13) cmp.gt.unc  p13, p14 =  -10, r44 ;; 
+}
+//
+//    K > 10:  Y_lo = Y_lo + neg_2_mK
+//    K <=10:  Set p13 if -10 > K, Else set p14 
+//
 { .mfi
-      nop.m 999
-(p9)  fsub.s1 FR_Y_hi = f0, FR_2_mk      // If expm1, if K < -10 set Y_hi
-      nop.i 999
+(p13) cmp.eq  p15, p0 =  r0, r0 
+(p14) fadd.s1 f34 = f61, f34 
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p10) fsub.s1 FR_Y_hi = FR_T, FR_2_mk    // If expm1, if |K|<=10 set Y_hi
-      nop.i 999
+	nop.m 999
+(p12) fadd.s1 f35 = f35, f61 
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
+	nop.m 999
+(p13) fadd.s1 f35 = f35, f34 
+	nop.i 999
+}
+{ .mfb
+	nop.m 999
+//
+//    K <= 10 and K < -10, Set Safe = True
+//    K <= 10 and K < 10,   Y_lo = Y_hi + Y_lo 
+//    K <= 10 and K > =-10, Y_hi = Y_hi + neg_2_mk 
+// 
+(p13) mov f34 = f61 
+(p0)  br.cond.sptk EXPL_MAIN ;; 
+}
+EXPL_SMALL: 
+{ .mmi
       nop.m 999
-      fma.s1 FR_result_lo = FR_Wp1_T_scale, FR_poly, FR_W_T_scale
-      nop.i 999
+(p0)  addl r34 = @ltoff(Constants_exp_64_Exponents#),gp  
+(p12) addl r35 = @ltoff(Constants_exp_64_P#),gp ;; 
 }
-;;
-
-.pred.rel "mutex",p8,p9
-// If K > 10 adjust result_lo = result_lo - scale * 2^-k
-// If |K| <= 10 adjust result_lo = result_lo + scale * T
+.pred.rel "mutex",p12,p13
+{ .mmi
+(p12) ld8  r35=[r35]      
+nop.m 999
+(p13) addl r35 = @ltoff(Constants_exp_64_Q#),gp 
+};;
+{ .mmi
+(p13) ld8  r35=[r35]      
+(p0) ld8  r34=[r34]      
+nop.i 999
+};;
 { .mfi
-      nop.m 999
-(p8)  fnma.s1 FR_result_lo = FR_scale, FR_2_mk, FR_result_lo // If K > 10
-      nop.i 999
+(p0)  add r34 = 0x48,r34  
+// 
+//    Return
+//    K <= 10 and K < 10,   Y_hi = neg_2_mk 
+// 
+//    /*******************************************************/
+//    /*********** Branch EXPL_SMALL  ************************/
+//    /*******************************************************/
+(p0)  mov f42 = f9 
+	nop.i 999 ;;
 }
+//
+//    Flag = 0
+//    r4 = rsq * rsq
+//
 { .mfi
-      nop.m 999
-(p9)  fma.s1 FR_result_lo = FR_T_scale, f1, FR_result_lo // If |K| <= 10
-      nop.i 999
+(p0)  ld8 r49 =[r34],0
+	nop.f 999
+	nop.i 999 ;;
+}
+{ .mii
+	nop.m 999
+	nop.i 999 ;;
+//
+//    Flag = 1
+//
+(p0)  cmp.lt.unc  p14, p0 =  r37, r49 ;; 
 }
-;;
-
 { .mfi
-      nop.m 999
-      fmpy.s0 FR_tmp = FR_A1, FR_A1         // Dummy op to set inexact
-      nop.i 999
+	nop.m 999
+//
+//    r = X
+//
+(p0)  fmpy.s1 f48 = f42, f42 
+	nop.i 999 ;;
 }
 { .mfb
-      nop.m 999
-(p15) fma.s0 f8 = FR_Y_hi, FR_scale, FR_result_lo  // Safe result
-(p15) br.ret.sptk b0                        // Safe exit for normal path
+	nop.m 999
+//
+//    rsq = r * r
+//
+(p0)  fmpy.s1 f50 = f48, f48 
+//
+//    Is input very small?
+//
+(p14) br.cond.spnt EXPL_VERY_SMALL ;; 
 }
-;;
-
-// Here if unsafe, will only be here for exp with K < big_expo_neg
-{ .mfb
-      nop.m 999
-      fma.s0 FR_RESULT = FR_Y_hi, FR_scale, FR_result_lo  // Prelim result
-      br.cond.sptk EXP_POSSIBLE_UNDERFLOW  // Branch to unsafe code
+//
+//    Flag_not1: Y_hi = 1.0
+//    Flag is 1: r6 = rsq * r4
+//
+{ .mfi
+(p12) ldfe f52 = [r35],16 
+(p12) mov f34 = f1 
+(p0)  add r53 = 0x1,r0 ;;  
 }
-;;
-
- 
-EXP_SMALL: 
-// Here if 2^-60 < |x| < 2^-m, m=12 for exp, m=7 for expm1
 { .mfi
-(p7)  ldfe FR_Q3 = [GR_ad_Q],16          // Get Q3 for small path, if expm1
-(p6)  fma.s1 FR_p65 = FR_P6, FR_r, FR_P5  // If exp, p65 = P6 * r + P5
-      nop.i 999
+(p13) ldfe f51 = [r35],16 
+//
+//    Flag_not_1: Y_lo = poly_hi + r4 * poly_lo
+//
+(p13) mov f34 = f9 
+	nop.i 999 ;;
+}
+{ .mmf
+(p12) ldfe f53 = [r35],16 
+//
+//    For Flag_not_1, Y_hi = X
+//    Scale = 1
+//    Create 0x000...01
+//
+(p0)  setf.sig f37 = r53 
+(p0)  mov f36 = f1 ;; 
+}
+{ .mmi
+(p13) ldfe f52 = [r35],16 ;; 
+(p12) ldfe f54 = [r35],16 
+	nop.i 999 ;;
 }
 { .mfi
-      mov GR_minus_one = -1
-(p7)  fma.s1 FR_q98 = FR_Q9, FR_r, FR_Q8  // If expm1, q98 = Q9 * r + Q8
-      nop.i 999
+(p13) ldfe f53 = [r35],16 
+(p13) fmpy.s1 f58 = f48, f50 
+	nop.i 999 ;;
+}
+//
+//    Flag_not1: poly_lo = P_5 + r*P_6
+//    Flag_1: poly_lo = Q_6 + r*Q_7
+//
+{ .mmi
+(p13) ldfe f54 = [r35],16 ;; 
+(p12) ldfe f55 = [r35],16 
+	nop.i 999 ;;
+}
+{ .mmi
+(p12) ldfe f56 = [r35],16 ;; 
+(p13) ldfe f55 = [r35],16 
+	nop.i 999 ;;
+}
+{ .mmi
+(p12) ldfe f57 = [r35],0 ;; 
+(p13) ldfe f56 = [r35],16 
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-(p7)  ldfe FR_Q2 = [GR_ad_Q],16           // Get Q2 for small path, if expm1
-(p7)  fma.s1 FR_q65 = FR_Q6, FR_r, FR_Q5  // If expm1, q65 = Q6 * r + Q5
-      nop.i 999
+(p13) ldfe f57 = [r35],0 
+	nop.f 999
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      setf.sig FR_tmp = GR_minus_one      // Create value to force inexact
-(p6)  fma.s1 FR_p21 = FR_P2, FR_r, FR_P1  // If exp, p21 = P2 * r + P1
-      nop.i 999
+	nop.m 999
+//
+//    For  Flag_not_1, load p5,p6,p1,p2
+//    Else load p5,p6,p1,p2
+//
+(p12) fma.s1 f60 = f52, f42, f53 
+	nop.i 999 ;;
 }
 { .mfi
-(p7)  ldfe FR_Q1 = [GR_ad_Q],16           // Get Q1 for small path, if expm1
-(p7)  fma.s1 FR_q43 = FR_Q4, FR_r, FR_Q3  // If expm1, q43 = Q4 * r + Q3
-      nop.i 999
+	nop.m 999
+(p13) fma.s1 f60 = f51, f42, f52 
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-(p6)  fma.s1 FR_p654 = FR_p65, FR_r, FR_P4 // If exp, p654 = p65 * r + P4
-      nop.i 999
+	nop.m 999
+(p12) fma.s1 f60 = f60, f42, f54 
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p7)  fma.s1 FR_q987 = FR_q98, FR_r, FR_Q7 // If expm1, q987 = q98 * r + Q7
-      nop.i 999
+	nop.m 999
+(p12) fma.s1 f59 = f56, f42, f57 
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-(p7)  fma.s1 FR_q21 = FR_Q2, FR_r, FR_Q1  // If expm1, q21 = Q2 * r + Q1
-      nop.i 999
+	nop.m 999
+(p13) fma.s1 f60 = f42, f60, f53 
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-(p6)  fma.s1 FR_p210 = FR_p21, FR_rsq, FR_r // If exp, p210 = p21 * r + P0
-      nop.i 999
+	nop.m 999
+(p12) fma.s1 f59 = f59, f48, f42 
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p7)  fma.s1 FR_q6543 = FR_q65, FR_rsq, FR_q43 // If expm1, q6543 = q65*r2+q43
-      nop.i 999
+	nop.m 999
+//
+//    Flag_1: poly_lo = Q_5 + r*(Q_6 + r*Q_7) 
+//    Flag_not1: poly_lo = P_4 + r*(P_5 + r*P_6)
+//    Flag_not1: poly_hi = (P_1 + r*P_2)
+//
+(p13) fmpy.s1 f60 = f60, f58 
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-(p6)  fma.s1 FR_p6543 = FR_p654, FR_r, FR_P3 // If exp, p6543 = p654 * r + P3
-      nop.i 999
+	nop.m 999
+(p12) fma.s1 f60 = f60, f42, f55 
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p7)  fma.s1 FR_q9876543 = FR_q987, FR_r4, FR_q6543 // If expm1, q9876543 = ...
-      nop.i 999
+	nop.m 999
+//
+//    Flag_1: poly_lo = r6 *(Q_5 + ....)
+//    Flag_not1: poly_hi =  r + rsq *(P_1 + r*P_2)
+//
+(p12) fma.s1 f35 = f60, f50, f59 
+	nop.i 999
 }
-;;
-
 { .mfi
-      nop.m 999
-(p6)  fma.s1 FR_Y_lo = FR_p6543, FR_r4, FR_p210 // If exp, form Y_lo
-      nop.i 999
+	nop.m 999
+(p13) fma.s1 f59 = f54, f42, f55 
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-(p7)  fma.s1 FR_Y_lo = FR_q9876543, FR_rsq, FR_q21 // If expm1, form Y_lo
-      nop.i 999
+	nop.m 999
+//
+//    Flag_not1: Y_lo = rsq* poly_hi + poly_lo 
+//    Flag_1: poly_lo = rsq* poly_hi + poly_lo 
+//
+(p13) fma.s1 f59 = f59, f42, f56 
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fmpy.s0  FR_tmp = FR_tmp, FR_tmp   // Dummy op to set inexact
-      nop.i 999
+	nop.m 999
+//
+//    Flag_not_1: (P_1 + r*P_2) 
+//
+(p13) fma.s1 f59 = f59, f42, f57 
+	nop.i 999 ;;
 }
-;;
-
-.pred.rel "mutex",p6,p7
 { .mfi
-      nop.m 999
-(p6)  fma.s0 f8 = FR_Y_lo, f1, f1          // If exp, result = 1 + Y_lo
-      nop.i 999
+	nop.m 999
+//
+//    Flag_not_1: poly_hi = r + rsq * (P_1 + r*P_2) 
+//
+(p13) fma.s1 f35 = f59, f48, f60 
+	nop.i 999 ;;
+}
+{ .mfi
+	nop.m 999
+//
+//    Create 0.000...01
+//
+(p0)  for f37 = f35, f37 
+	nop.i 999 ;;
 }
 { .mfb
-      nop.m 999
-(p7)  fma.s0 f8 = FR_Y_lo, FR_rsq, FR_norm_x // If expm1, result = Y_lo*r2+x
-      br.ret.sptk  b0                      // Exit for 2^-60 <= |x| < 2^-m
-                                           // m=12 for exp, m=7 for expm1
+	nop.m 999
+//
+//    Set lsb of Y_lo to 1
+//
+(p0)  fmerge.se f35 = f35,f37 
+(p0)  br.cond.sptk EXPL_MAIN ;; 
+}
+EXPL_VERY_SMALL: 
+{ .mmi
+	nop.m 999
+	nop.m 999
+(p13) addl r34 = @ltoff(Constants_exp_64_Exponents#),gp 
+}
+{ .mfi
+	nop.m 999
+(p12) mov f35 = f9 
+	nop.i 999 ;;
+}
+{ .mfb
+(p13) ld8 r34 = [r34] 
+(p12) mov f34 = f1 
+(p12) br.cond.sptk EXPL_MAIN ;; 
+}
+{ .mlx
+(p13) add  r34 = 8,r34 
+(p13) movl r39 = 0x0FFFE ;; 
 }
-;;
-
-
-EXP_VERY_SMALL: 
 //
-// Here if 0 < |x| < 2^-60
-// If exp, result = 1.0 + x
-// If expm1, result = x +x*x/2, but have to check for possible underflow
+//    Load big_exp_neg 
+//    Create 1/2's exponent
+//
+{ .mii
+(p13) setf.exp f56 = r39 
+(p13) shladd r34 = r32,4,r34 ;;  
+	nop.i 999
+}
+//
+//    Negative exponents are stored after positive
 //
-
 { .mfi
-(p7)  mov GR_exp_underflow = -16381        // Exponent for possible underflow
-(p6)  fadd.s0 f8 = f1, FR_norm_x           // If exp, result = 1+x
-      nop.i 999
+(p13) ld8 r45 = [r34],0
+//
+//    Y_hi = x
+//    Scale = 1
+//
+(p13) fmpy.s1 f35 = f9, f9 
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p7)  fmpy.s1 FR_result_lo = FR_half_x, FR_norm_x  // If expm1 result_lo = x*x/2
-      nop.i 999
+	nop.m 999
+//
+//    Reset Safe if necessary 
+//    Create 1/2
+//
+(p13) mov f34 = f9 
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-(p7)  cmp.lt.unc p0, p8 = GR_exp_x, GR_exp_underflow // Unsafe if expm1 x small
-(p7)  mov FR_Y_hi = FR_norm_x              // If expm1, Y_hi = x
-(p7)  cmp.lt p0, p15 = GR_exp_x, GR_exp_underflow // Unsafe if expm1 x small
+(p13) cmp.lt.unc  p0, p15 =  r37, r45 
+(p13) mov f36 = f1 
+	nop.i 999 ;;
 }
-;;
-
 { .mfb
-      nop.m 999
-(p8)  fma.s0 f8 = FR_norm_x, f1, FR_result_lo // If expm1, result=x+x*x/2
-(p15) br.ret.sptk b0                       // If Safe, exit
+	nop.m 999
+//
+//    Y_lo = x * x
+//
+(p13) fmpy.s1 f35 = f35, f56 
+//
+//    Y_lo = x*x/2 
+//
+(p13) br.cond.sptk EXPL_MAIN ;; 
+}
+EXPL_HUGE: 
+{ .mfi
+	nop.m 999
+(p0)  fcmp.gt.unc.s1 p14, p0 =  f9, f0 
+	nop.i 999
+}
+{ .mlx
+	nop.m 999
+(p0)  movl r39 = 0x15DC0 ;; 
+}
+{ .mfi
+(p14) setf.exp f34 = r39 
+(p14) mov f35 = f1 
+(p14) cmp.eq  p0, p15 =  r0, r0 ;; 
 }
-;;
-
-// Here if expm1 and 0 < |x| < 2^-16381;  may be possible underflow
 { .mfb
-      nop.m 999
-      fma.s0 FR_RESULT = FR_Y_hi, FR_scale, FR_result_lo // Prelim result
-      br.cond.sptk EXP_POSSIBLE_UNDERFLOW  // Branch to unsafe code
+	nop.m 999
+(p14) mov f36 = f34 
+//
+//    If x > 0, Set Safe = False
+//    If x > 0, Y_hi = 2**(24,000)
+//    If x > 0, Y_lo = 1.0
+//    If x > 0, Scale = 2**(24,000)
+//
+(p14) br.cond.sptk EXPL_MAIN ;; 
 }
-;;
-
-EXP_CERTAIN_UNDERFLOW_ZERO:
-// Here if x < zero_uflow_x
-// For exp, set result to tiny+0.0 and set I, U, and branch to error handling
-// For expm1, set result to tiny-1.0 and set I, and exit
-{ .mmi
-      alloc GR_SAVE_PFS = ar.pfs,0,3,4,0
-      nop.m 999
-      mov GR_one = 1
+{ .mlx
+	nop.m 999
+(p12) movl r39 = 0xA240 
 }
-;;
-
-{ .mmi
-      setf.exp FR_small = GR_one               // Form small value
-      nop.m 999
-(p6)  mov GR_Parameter_TAG = 13                // Error tag for exp underflow
+{ .mlx
+	nop.m 999
+(p12) movl r38 = 0xA1DC ;; 
+}
+{ .mmb
+(p13) cmp.eq  p15, p14 =  r0, r0 
+(p12) setf.exp f34 = r39 
+	nop.b 999 ;;
+}
+{ .mlx
+(p12) setf.exp f35 = r38 
+(p13) movl r39 = 0xFF9C 
 }
-;;
-
 { .mfi
-      nop.m 999
-      fmerge.s FR_X = f8,f8                    // Save x for error call
-      nop.i 999
+	nop.m 999
+(p13) fsub.s1 f34 = f0, f1
+	nop.i 999 ;;
 }
-;;
-
-.pred.rel "mutex",p6,p7
-{ .mfb
-      nop.m 999
-(p6)  fma.s0 FR_RESULT = FR_small, FR_small, f0 // If exp, set I,U, tiny result
-(p6)  br.cond.sptk __libm_error_region          // If exp, go to error handling
+{ .mfi
+	nop.m 999
+(p12) mov f36 = f34 
+(p12) cmp.eq  p0, p15 =  r0, r0 ;; 
+}
+{ .mfi
+(p13) setf.exp f35 = r39 
+(p13) mov f36 = f1 
+	nop.i 999 ;;
+}
+EXPL_MAIN: 
+{ .mfi
+(p0)  cmp.ne.unc p12, p0 = 0x01, r33
+(p0)  fmpy.s1 f101 = f36, f35 
+	nop.i 999 ;;
 }
 { .mfb
-      nop.m 999
-(p7)  fms.s0 f8 = FR_small, FR_small, f1        // If expm1, set I, result -1.0
-(p7)  br.ret.sptk  b0                           // If expm1, exit
-}
-;;
-     
-  
-EXP_OVERFLOW:
-// Here if x >= min_oflow_x
-{ .mmi
-      alloc GR_SAVE_PFS = ar.pfs,0,3,4,0
-      mov GR_huge_exp = 0x1fffe
-      nop.i 999
+	nop.m 999
+(p0)  fma.s0 f99 = f34, f36, f101 
+(p15) br.cond.sptk EXPL_64_RETURN ;;
 }
 { .mfi
-      mov GR_huge_signif = -0x1
-      nop.f 999
-(p6)  mov GR_Parameter_TAG = 12                // Error tag for exp overflow
+	nop.m 999
+(p0)  fsetc.s3 0x7F,0x01
+	nop.i 999
 }
-;;
-
-{ .mmf
-      setf.exp FR_huge_exp = GR_huge_exp       // Create huge value
-      setf.sig FR_huge_signif = GR_huge_signif // Create huge value
-      fmerge.s FR_X = f8,f8                    // Save x for error call
+{ .mlx
+	nop.m 999
+(p0)  movl r50 = 0x00000000013FFF ;;
 }
-;;
-
+//    
+//    S0 user supplied status
+//    S2 user supplied status + WRE + TD  (Overflows) 
+//    S3 user supplied status + RZ + TD   (Underflows) 
+//    
+//    
+//    If (Safe) is true, then
+//        Compute result using user supplied status field.
+//        No overflow or underflow here, but perhaps inexact.
+//        Return
+//    Else
+//       Determine if overflow or underflow  was raised.
+//       Fetch +/- overflow threshold for IEEE single, double,
+//       double extended   
+//    
 { .mfi
-      nop.m 999
-      fmerge.se FR_huge = FR_huge_exp, FR_huge_signif
-(p7)  mov GR_Parameter_TAG = 39                // Error tag for expm1 overflow
+(p0)  setf.exp f60 = r50
+(p0)  fma.s3 f102 = f34, f36, f101 
+	nop.i 999
 }
-;;
-
-{ .mfb
-      nop.m 999
-      fma.s0 FR_RESULT = FR_huge, FR_huge, FR_huge // Force I, O, and Inf
-      br.cond.sptk __libm_error_region         // Branch to error handling
+{ .mfi
+	nop.m 999
+(p0)  fsetc.s3 0x7F,0x40 
+	nop.i 999 ;;
 }
-;;
-
-
-
-EXP_POSSIBLE_UNDERFLOW:
-// Here if exp and zero_uflow_x < x < about -11356 [where k < -16381]
-// Here if expm1 and |x| < 2^-16381
 { .mfi
-      alloc GR_SAVE_PFS = ar.pfs,0,3,4,0
-      fsetc.s2 0x7F,0x41                   // Set FTZ and disable traps
-      nop.i 999
+	nop.m 999
+//
+//    For Safe, no need to check for over/under. 
+//    For expm1, handle errors like exp. 
+//
+(p0)  fsetc.s2 0x7F,0x42
+	nop.i 999;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fma.s2 FR_ftz = FR_Y_hi, FR_scale, FR_result_lo   // Result with FTZ
-      nop.i 999
+	nop.m 999
+(p0)  fma.s2 f100 = f34, f36, f101 
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fsetc.s2 0x7F,0x40                   // Disable traps (set s2 default)
-      nop.i 999
+	nop.m 999
+(p0)  fsetc.s2 0x7F,0x40 
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-(p7)  fclass.m.unc p12, p0 = FR_ftz, 0x00F // If expm1, FTZ result denorm, zero?
-      nop.i 999
+	nop.m 999
+(p7)  fclass.m.unc   p12, p0 =  f102, 0x00F
+	nop.i 999
 }
-;;
-
 { .mfi
-      nop.m 999
-(p6)  fclass.m.unc p11, p0 = FR_ftz, 0x00F // If exp, FTZ result denorm or zero?
-      nop.i 999
+	nop.m 999
+(p0)  fclass.m.unc   p11, p0 =  f102, 0x00F
+	nop.i 999 ;;
 }
-;;
-
-{ .mfb
-(p12) mov   GR_Parameter_TAG = 40             // expm1 underflow
-      fmerge.s FR_X = f8,f8                   // Save x for error call
-(p12) br.cond.spnt __libm_error_region        // Branch on expm1 underflow
+{ .mfi
+	nop.m 999
+(p7)  fcmp.ge.unc.s1 p10, p0 =  f100, f60
+	nop.i 999
 }
-;;
-
+{ .mfi
+	nop.m 999
+//    
+//    Create largest double exponent + 1.
+//    Create smallest double exponent - 1.
+//    
+(p0)  fcmp.ge.unc.s1 p8, p0 =  f100, f60
+	nop.i 999 ;;
+}
+//    
+//    fcmp:   resultS2 >= + overflow threshold  -> set (a) if true
+//    fcmp:   resultS2 <= - overflow threshold  -> set (b) if true
+//    fclass: resultS3 is denorm/unorm/0        -> set (d) if true
+//    
 { .mib
-(p11) mov   GR_Parameter_TAG = 13             // exp underflow
-      nop.i 999
-(p11) br.cond.spnt __libm_error_region        // Branch on exp underflow
+(p10) mov   GR_Parameter_TAG = 39
+	nop.i 999
+(p10) br.cond.sptk __libm_error_region ;;
 }
-;;
-
-{ .mfb
-      nop.m 999
-      mov   f8     = FR_RESULT                // Was safe after all
-      br.ret.sptk   b0
+{ .mib
+(p8)  mov   GR_Parameter_TAG = 12
+	nop.i 999
+(p8)  br.cond.sptk __libm_error_region ;;
 }
-;;
-
-
-EXP_64_SPECIAL: 
-// Here if x natval, nan, inf, zero
-// If x natval, +inf, or if expm1 and x zero, just return x.
-// The other cases must be tested for, and results set.
-// These cases do not generate exceptions.
+//    
+//    Report that exp overflowed
+//    
+{ .mib
+(p12) mov   GR_Parameter_TAG = 40
+	nop.i 999
+(p12) br.cond.sptk __libm_error_region ;;
+}
+{ .mib
+(p11) mov   GR_Parameter_TAG = 13
+	nop.i 999
+(p11) br.cond.sptk __libm_error_region ;;
+}
+{ .mib
+	nop.m 999
+	nop.i 999
+//    
+//    Report that exp underflowed
+//    
+(p0)  br.cond.sptk EXPL_64_RETURN ;;
+}
+EXPL_64_SPECIAL: 
 { .mfi
-      nop.m 999
-      fclass.m p8, p0 =  f8, 0x0c3            // Is x nan?
-      nop.i 999
+	nop.m 999
+(p0)  fclass.m.unc p6,  p0 =  f8, 0x0c3 
+	nop.i 999
 }
-;;
-
 { .mfi
-      nop.m 999
-(p6)  fclass.m.unc p13, p0 =  f8, 0x007       // If exp, is x zero?
-      nop.i 999
+	nop.m 999
+(p0)  fclass.m.unc p13, p8 =  f8, 0x007 
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-(p6)  fclass.m.unc p11, p0 =  f8, 0x022       // If exp, is x -inf?
-      nop.i 999
+	nop.m 999
+(p7)  fclass.m.unc p14, p0 =  f8, 0x007 
+	nop.i 999
 }
 { .mfi
-      nop.m 999
-(p8)  fadd.s0 f8 = f8, f1                     // If x nan, result quietized x
-      nop.i 999
+	nop.m 999
+(p0)  fclass.m.unc p12, p9 =  f8, 0x021 
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-(p7)  fclass.m.unc p10, p0 =  f8, 0x022       // If expm1, is x -inf?
-      nop.i 999
+	nop.m 999
+(p0)  fclass.m.unc p11, p0 =  f8, 0x022 
+	nop.i 999
 }
 { .mfi
-      nop.m 999
-(p13) fadd.s0 f8 = f0, f1                     // If exp and x zero, result 1.0
-      nop.i 999
+	nop.m 999
+(p7)  fclass.m.unc p10, p0 =  f8, 0x022 
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-(p11) mov f8 = f0                             // If exp and x -inf, result 0
-      nop.i 999
+	nop.m 999
+//    
+//    Identify +/- 0, Inf, or -Inf 
+//    Generate the right kind of NaN.
+//    
+(p13) fadd.s0 f99 = f0, f1 
+	nop.i 999 ;;
+}
+{ .mfi
+	nop.m 999
+(p14) mov f99 = f8 
+	nop.i 999 ;;
 }
-;;
-
 { .mfb
-      nop.m 999
-(p10) fsub.s1 f8 = f0, f1                     // If expm1, x -inf, result -1.0
-      br.ret.sptk b0                          // Exit special cases
+	nop.m 999
+(p6)  fadd.s0 f99 = f8, f1 
+//    
+//    expl(+/-0) = 1 
+//    expm1l(+/-0) = +/-0 
+//    No exceptions raised
+//    
+(p6)  br.cond.sptk EXPL_64_RETURN ;; 
+}
+{ .mib
+	nop.m 999
+	nop.i 999
+(p14) br.cond.sptk EXPL_64_RETURN ;; 
+}
+{ .mfi
+	nop.m 999
+(p11) mov f99 = f0 
+	nop.i 999 ;;
 }
-;;
-
-
-EXP_64_UNSUPPORTED: 
-// Here if x unsupported type
+{ .mfb
+	nop.m 999
+(p10) fsub.s1 f99 = f0, f1 
+//    
+//    expl(-Inf) = 0 
+//    expm1l(-Inf) = -1 
+//    No exceptions raised.
+//    
+(p10) br.cond.sptk EXPL_64_RETURN ;; 
+}
+{ .mfb
+	nop.m 999
+(p12) fmpy.s1 f99 = f8, f1 
+//    
+//    expl(+Inf) = Inf 
+//    No exceptions raised.
+//    
+(p0)  br.cond.sptk EXPL_64_RETURN ;; 
+}
+EXPL_64_UNSUPPORTED: 
+{ .mfb
+	nop.m 999
+(p0)  fmpy.s0 f99 = f8, f0 
+(p0)  br.cond.sptk EXPL_64_RETURN ;; 
+}
+EXPL_64_RETURN: 
 { .mfb
       nop.m 999
-      fmpy.s0 f8 = f8, f0                     // Return nan
-      br.ret.sptk   b0
+(p0)  mov   f8     = f99
+(p0)  br.ret.sptk   b0
 }
-;;
+.endp
+ASM_SIZE_DIRECTIVE(expl) 
 
-GLOBAL_IEEE754_END(expl)
-LOCAL_LIBM_ENTRY(__libm_error_region)
+.proc __libm_error_region
+__libm_error_region:
 .prologue
 { .mfi
         add   GR_Parameter_Y=-32,sp             // Parameter 2 value
@@ -1421,9 +1598,9 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
         br.call.sptk b0=__libm_error_support#  // Call error handling function
 };;
 { .mmi
-        add   GR_Parameter_RESULT = 48,sp
         nop.m 0
-        nop.i 0
+        nop.m 0
+        add   GR_Parameter_RESULT = 48,sp
 };;
 { .mmi
         ldfe  f8 = [GR_Parameter_RESULT]       // Get return result off stack
@@ -1436,7 +1613,8 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
         mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
         br.ret.sptk     b0                     // Return
 };;
-LOCAL_LIBM_END(__libm_error_region#)
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
 
 .type   __libm_error_support#,@function
 .global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_fabs.S b/sysdeps/ia64/fpu/s_fabs.S
index 3434389a3c..ea3908dbc3 100644
--- a/sysdeps/ia64/fpu/s_fabs.S
+++ b/sysdeps/ia64/fpu/s_fabs.S
@@ -1,82 +1,34 @@
-.file "fabs.s"
-
-
-// Copyright (c) 2000 - 2003, Intel Corporation
-// All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// * Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-// * The name of Intel Corporation may not be used to endorse or promote
-// products derived from this software without specific prior written
-// permission.
-
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
-// 
-// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at 
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
-//
-// History
-//==============================================================
-// 02/02/00 Initial version 
-// 02/07/02 Added __libm_fabs entry point to test in case compiler inlines
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 02/10/03 Reordered header: .section, .global, .proc, .align
-//
-// API
-//==============================================================
-// double fabs  (double x)
-//
-// Overview of operation
-//==============================================================
-// returns absolute value of x 
-
-// floating-point registers used: 1
-// f8, input
-
-.section .text
-.global __libm_fabs#
-
-.proc __libm_fabs#
-__libm_fabs:
-.endp __libm_fabs#
-
-GLOBAL_IEEE754_ENTRY(fabs)
-
-// set invalid or denormal flags and take fault if
-// necessary
-
-{ .mfi
-      nop.m 999
-      fcmp.eq.unc.s0 p6,p7 = f8,f1             
-      nop.i 999 ;;
-}
-
-{ .mfb
-      nop.m 999
-      fmerge.s       f8 = f0,f8                   
-      br.ret.sptk    b0 ;;                        
-}
-
-GLOBAL_IEEE754_END(fabs)
+/* Copyright (C) 2000 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#undef ret
+
+ENTRY (__fabs)
+{
+	fabs fret0 = farg0
+	br.ret.sptk.many rp
+}	
+END (__fabs)
+
+strong_alias (__fabs, __fabsf)
+strong_alias (__fabs, __fabsl)
+
+weak_alias (__fabs, fabs)
+weak_alias (__fabsf, fabsf)
+weak_alias (__fabsl, fabsl)
diff --git a/sysdeps/ia64/fpu/s_fabsf.S b/sysdeps/ia64/fpu/s_fabsf.S
index 71bb6da882..7e5abde625 100644
--- a/sysdeps/ia64/fpu/s_fabsf.S
+++ b/sysdeps/ia64/fpu/s_fabsf.S
@@ -1,82 +1 @@
-.file "fabsf.s"
-
-
-// Copyright (c) 2000 - 2003, Intel Corporation
-// All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// * Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-// * The name of Intel Corporation may not be used to endorse or promote
-// products derived from this software without specific prior written
-// permission.
-
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
-// 
-// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at 
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
-//
-// History
-//==============================================================
-// 02/02/00 Initial version 
-// 02/07/02 Added __libm_fabsf entry point to test in case compiler inlines
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 02/10/03 Reordered header: .section, .global, .proc, .align
-//
-// API
-//==============================================================
-// float fabsf  (float x)
-//
-// Overview of operation
-//==============================================================
-// returns absolute value of x 
-
-// floating-point registers used: 1
-// f8, input
-
-.section .text
-.global __libm_fabsf#
-
-.proc __libm_fabsf#
-__libm_fabsf:
-.endp __libm_fabsf#
-
-GLOBAL_IEEE754_ENTRY(fabsf)
-
-// set invalid or denormal flags and take fault if
-// necessary
-
-{ .mfi
-      nop.m 999
-      fcmp.eq.unc.s0 p6,p7 = f8,f1             
-      nop.i 999 ;;
-}
-
-{ .mfb
-      nop.m 999
-      fmerge.s       f8 = f0,f8                   
-      br.ret.sptk    b0 ;;                        
-}
-
-GLOBAL_IEEE754_END(fabsf)
+/* __fabsf is in s_fabs.S. */
diff --git a/sysdeps/ia64/fpu/s_fabsl.S b/sysdeps/ia64/fpu/s_fabsl.S
index a048949147..3d7a41fe2b 100644
--- a/sysdeps/ia64/fpu/s_fabsl.S
+++ b/sysdeps/ia64/fpu/s_fabsl.S
@@ -1,82 +1 @@
-.file "fabsl.s"
-
-
-// Copyright (c) 2000 - 2003, Intel Corporation
-// All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// * Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-// * The name of Intel Corporation may not be used to endorse or promote
-// products derived from this software without specific prior written
-// permission.
-
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
-// 
-// Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at 
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
-//
-// History
-//==============================================================
-// 02/02/00 Initial version 
-// 02/07/02 Added __libm_fabsl entry point to test in case compiler inlines
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 02/10/03 Reordered header: .section, .global, .proc, .align
-//
-// API
-//==============================================================
-// long double fabsl  (long double x)
-//
-// Overview of operation
-//==============================================================
-// returns absolute value of x 
-
-// floating-point registers used: 1
-// f8, input
-
-.section .text
-.global __libm_fabsl#
-
-.proc __libm_fabsl#
-__libm_fabsl:
-.endp __libm_fabsl#
-
-GLOBAL_IEEE754_ENTRY(fabsl)
-
-// set invalid or denormal flags and take fault if
-// necessary
-
-{ .mfi
-      nop.m 999
-      fcmp.eq.unc.s0 p6,p7 = f8,f1             
-      nop.i 999 ;;
-}
-
-{ .mfb
-      nop.m 999
-      fmerge.s       f8 = f0,f8                   
-      br.ret.sptk    b0 ;;                        
-}
-
-GLOBAL_IEEE754_END(fabsl)
+/* __fabsl is in s_fabs.S. */
diff --git a/sysdeps/ia64/fpu/s_floor.S b/sysdeps/ia64/fpu/s_floor.S
index 9ed9d6dcdb..438b0fa867 100644
--- a/sysdeps/ia64/fpu/s_floor.S
+++ b/sysdeps/ia64/fpu/s_floor.S
@@ -1,10 +1,10 @@
 .file "floor.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,68 +20,86 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
 // Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
 //
+.align 32
+.global floor#
+
+.section .text
+.proc  floor#
+.align 32
+
 // History
 //==============================================================
-// 02/02/00 Initial version
-// 03/22/00 Updated to improve performance
-// 06/13/00 Improved speed, fixed setting of inexact flag
-// 06/27/00 Eliminated incorrect invalid flag setting
-// 02/07/01 Corrected sign of zero result in round to -inf mode
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 01/28/03 Improved performance
-//==============================================================
+// 2/02/00: Initial version
+// 3/22/00: Updated to improve performance 
+// 6/13/00: Improved speed, fixed setting of inexact flag
+// 6/27/00: Eliminated incorrect invalid flag setting
+// 2/07/01: Corrected sign of zero result in round to -inf mode
 
 // API
 //==============================================================
 // double floor(double x)
-//==============================================================
 
-// general input registers:
-// r14 - r18
+// general input registers:  
+
+floor_GR_FFFF      = r14
+floor_GR_signexp   = r15
+floor_GR_exponent  = r16
+floor_GR_expmask   = r17
+floor_GR_bigexp    = r18
+
 
-rSignexp   = r14
-rExp       = r15
-rExpMask   = r16
-rBigexp    = r17
-rM1        = r18
+// predicate registers used: 
 
-// floating-point registers:
-// f8 - f13
+// p6  ==> Input is NaN, infinity, zero
+// p7  ==> Input is denormal
+// p8  ==> Input is <0
+// p9  ==> Input is >=0
+// p10 ==> Input is already an integer (bigger than largest integer)
+// p11 ==> Input is not a large integer
+// p12 ==> Input is a smaller integer
+// p13 ==> Input is not an even integer, so inexact must be set
 
-fXInt      = f9
-fNormX     = f10
-fTmp       = f11
-fAdj       = f12
-fPreResult = f13
 
-// predicate registers used:
-// p6 - p9
+// floating-point registers used: 
+
+FLOOR_NORM_f8      = f9                        
+FLOOR_FFFF         = f10 
+FLOOR_INEXACT      = f11 
+FLOOR_FLOAT_INT_f8 = f12
+FLOOR_INT_f8       = f13
+FLOOR_adj          = f14
 
 // Overview of operation
 //==============================================================
+
 // double floor(double x)
-// Return an integer value (represented as a double) that is the largest
+// Return an integer value (represented as a double) that is the largest 
 // value not greater than x
 // This is x rounded toward -infinity to an integral value.
 // Inexact is set if x != floor(x)
-//==============================================================
+// **************************************************************************
+
+// Set denormal flag for denormal input and
+// and take denormal fault if necessary.
+
+// Is the input an integer value already?
 
 // double_extended
 // if the exponent is > 1003e => 3F(true) = 63(decimal)
@@ -102,115 +120,121 @@ fPreResult = f13
 // If we multiply by 2^23, we no longer have a fractional part
 // So input is an integer value already.
 
+// If x is NAN, ZERO, or INFINITY, then  return
 
-.section .text
-GLOBAL_IEEE754_ENTRY(floor)
+// qnan snan inf norm     unorm 0 -+
+// 1    1    1   0        0     1 11     0xe7
+
+#include "libm_support.h"
+
+floor:
+#ifdef _LIBC
+.global __floor
+__floor:
+#endif
 
 { .mfi
-      getf.exp         rSignexp  = f8        // Get signexp, recompute if unorm
-      fclass.m         p7,p0 = f8, 0x0b      // Test x unorm
-      addl             rBigexp = 0x10033, r0 // Set exponent at which is integer
+      getf.exp floor_GR_signexp  = f8
+      fcvt.fx.trunc.s1     FLOOR_INT_f8  = f8
+      addl        floor_GR_bigexp = 0x10033, r0
 }
 { .mfi
-      mov              rM1 = -1              // Set all ones
-      fcvt.fx.trunc.s1 fXInt  = f8           // Convert to int in significand
-      mov              rExpMask    = 0x1FFFF // Form exponent mask
+      addl        floor_GR_FFFF      = -1,r0
+      fcmp.lt.s1  p8,p9 = f8,f0
+      mov         floor_GR_expmask    = 0x1FFFF ;;
 }
-;;
 
+// p7 ==> denorm
 { .mfi
-      nop.m            0
-      fcmp.lt.s1       p8,p9 = f8, f0        // Test x < 0
-      nop.i            0
+      setf.sig    FLOOR_FFFF  = floor_GR_FFFF
+      fclass.m    p7,p0 = f8, 0x0b
+      nop.i 999
 }
-{ .mfb
-      setf.sig         fTmp = rM1            // Make const for setting inexact
-      fnorm.s1         fNormX  = f8          // Normalize input
-(p7)  br.cond.spnt     FLOOR_UNORM           // Branch if x unorm
+{ .mfi
+      nop.m 999
+      fnorm.s1           FLOOR_NORM_f8  = f8
+      nop.i 999 ;;
 }
-;;
 
-FLOOR_COMMON:
-// Return here from FLOOR_UNORM
-{ .mfi
-      nop.m            0
-      fclass.m         p6,p0 = f8, 0x1e7     // Test x natval, nan, inf, 0
-      nop.i            0
+// p6 ==> NAN, INF, ZERO
+{ .mfb
+      nop.m 999
+      fclass.m      p6,p10 = f8, 0xe7
+(p7)  br.cond.spnt  L(FLOOR_DENORM) ;;
 }
-;;
 
+L(FLOOR_COMMON):
 .pred.rel "mutex",p8,p9
+// Set adjustment to subtract from trunc(x) for result
+//   If x<0,  adjustment is -1.0
+//   If x>=0, adjustment is 0.0
 { .mfi
-      nop.m            0
-(p8)  fnma.s1          fAdj = f1, f1, f0     // If x < 0, adjustment is -1
-      nop.i            0
+      and      floor_GR_exponent = floor_GR_signexp, floor_GR_expmask
+(p8)  fnma.s1  FLOOR_adj = f1,f1,f0
+      nop.i 999
 }
 { .mfi
-      nop.m            0
-(p9)  fma.s1           fAdj = f0, f0, f0     // If x > 0, adjustment is 0
-      nop.i            0
+      nop.m 999
+(p9)  fadd.s1  FLOOR_adj = f0,f0
+      nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m            0
-      fcvt.xf          fPreResult = fXInt    // trunc(x)
-      nop.i            0
+      nop.m 999
+      fcmp.eq.s0  p12,p0 = f8,f0 // Dummy op to set denormal and invalid flag
+      nop.i 999
 }
-{ .mfb
-      nop.m            0
-(p6)  fma.d.s0         f8 = f8, f1, f0       // Result if x natval, nan, inf, 0
-(p6)  br.ret.spnt      b0                    // Exit if x natval, nan, inf, 0
+{ .mfi
+(p10) cmp.ge.unc    p10,p11 = floor_GR_exponent, floor_GR_bigexp
+(p6)  fnorm.d f8 = f8
+      nop.i 999 ;;
 }
-;;
 
-{ .mmi
-      and              rExp = rSignexp, rExpMask // Get biased exponent
-;;
-      cmp.ge           p7,p6 = rExp, rBigexp  // Is |x| >= 2^52?
-      nop.i            0
+{ .mfi
+      nop.m 999
+(p11) fcvt.xf         FLOOR_FLOAT_INT_f8   = FLOOR_INT_f8
+      nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m            0
-(p6)  fma.d.s0         f8 = fPreResult, f1, fAdj // Result if !int, |x| < 2^52
-      nop.i            0
+      nop.m 999
+(p10) fnorm.d f8 = FLOOR_NORM_f8
+      nop.i 999 ;;
 }
+
+
 { .mfi
-      nop.m            0
-(p7)  fma.d.s0         f8 = fNormX, f1, f0    // Result, if |x| >= 2^52
-      nop.i            0
+      nop.m 999
+(p11) fadd.d   f8 = FLOOR_FLOAT_INT_f8,FLOOR_adj
+      nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m            0
-(p6)  fcmp.eq.unc.s1   p8, p9 = fPreResult, fNormX // Is trunc(x) = x ?
-      nop.i            0
+      nop.m 999
+(p11) fcmp.eq.unc.s1  p12,p13  = FLOOR_FLOAT_INT_f8, FLOOR_NORM_f8
+      nop.i 999 ;;
 }
-;;
 
+// Set inexact if result not equal to input
 { .mfi
-      nop.m            0
-(p9)  fmpy.s0          fTmp = fTmp, fTmp      // Dummy to set inexact
-      nop.i            0
+      nop.m 999
+(p13) fmpy.s0     FLOOR_INEXACT = FLOOR_FFFF,FLOOR_FFFF
+      nop.i 999
 }
+// Set result to input if integer
 { .mfb
-      nop.m            0
-(p8)  fma.d.s0         f8 = fNormX, f1, f0    // If x int, result normalized x
-      br.ret.sptk      b0                     // Exit main path, 0 < |x| < 2^52
+      nop.m 999
+(p12) fnorm.d f8 = FLOOR_NORM_f8
+      br.ret.sptk    b0 ;;
 }
-;;
-
 
-FLOOR_UNORM:
-// Here if x unorm
+// Here if input denorm
+L(FLOOR_DENORM):
 { .mfb
-      getf.exp         rSignexp  = fNormX     // Get signexp, recompute if unorm
-      fcmp.eq.s0       p7,p0 = f8, f0         // Dummy op to set denormal flag
-      br.cond.sptk     FLOOR_COMMON           // Return to main path
+      getf.exp floor_GR_signexp  = FLOOR_NORM_f8
+      fcvt.fx.trunc.s1     FLOOR_INT_f8  = FLOOR_NORM_f8
+      br.cond.sptk  L(FLOOR_COMMON) ;;
 }
-;;
 
-GLOBAL_IEEE754_END(floor)
+.endp floor
+ASM_SIZE_DIRECTIVE(floor)
diff --git a/sysdeps/ia64/fpu/s_floorf.S b/sysdeps/ia64/fpu/s_floorf.S
index a3f2095931..15b2bbd31d 100644
--- a/sysdeps/ia64/fpu/s_floorf.S
+++ b/sysdeps/ia64/fpu/s_floorf.S
@@ -1,10 +1,10 @@
 .file "floorf.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,67 +20,85 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
 // Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
 //
+.align 32
+.global floorf#
+
+.section .text
+.proc  floorf#
+.align 32
+
 // History
 //==============================================================
-// 02/02/00 Initial version
-// 06/13/00 Improved speed
-// 06/27/00 Eliminated incorrect invalid flag setting
-// 02/07/01 Corrected sign of zero result in round to -inf mode
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 01/28/03 Improved performance
-//==============================================================
+// 2/02/00: Initial version
+// 6/13/00: Improved speed
+// 6/27/00: Eliminated incorrect invalid flag setting
+// 2/07/01: Corrected sign of zero result in round to -inf mode
 
 // API
 //==============================================================
 // float floorf(float x)
-//==============================================================
 
-// general input registers:
-// r14 - r18
+// general input registers:  
+
+floor_GR_FFFF      = r14
+floor_GR_signexp   = r15
+floor_GR_exponent  = r16
+floor_GR_expmask   = r17
+floor_GR_bigexp    = r18
+
 
-rSignexp   = r14
-rExp       = r15
-rExpMask   = r16
-rBigexp    = r17
-rM1        = r18
+// predicate registers used: 
 
-// floating-point registers:
-// f8 - f13
+// p6  ==> Input is NaN, infinity, zero
+// p7  ==> Input is denormal
+// p8  ==> Input is <0
+// p9  ==> Input is >=0
+// p10 ==> Input is already an integer (bigger than largest integer)
+// p11 ==> Input is not a large integer
+// p12 ==> Input is a smaller integer
+// p13 ==> Input is not an even integer, so inexact must be set
 
-fXInt      = f9
-fNormX     = f10
-fTmp       = f11
-fAdj       = f12
-fPreResult = f13
 
-// predicate registers used:
-// p6 - p9
+// floating-point registers used: 
+
+FLOOR_NORM_f8      = f9                        
+FLOOR_FFFF         = f10 
+FLOOR_INEXACT      = f11 
+FLOOR_FLOAT_INT_f8 = f12
+FLOOR_INT_f8       = f13
+FLOOR_adj          = f14
 
 // Overview of operation
 //==============================================================
+
 // float floorf(float x)
-// Return an integer value (represented as a float) that is the largest
+// Return an integer value (represented as a float) that is the largest 
 // value not greater than x
 // This is x rounded toward -infinity to an integral value.
 // Inexact is set if x != floorf(x)
-//==============================================================
+// **************************************************************************
+
+// Set denormal flag for denormal input and
+// and take denormal fault if necessary.
+
+// Is the input an integer value already?
 
 // double_extended
 // if the exponent is > 1003e => 3F(true) = 63(decimal)
@@ -101,115 +119,119 @@ fPreResult = f13
 // If we multiply by 2^23, we no longer have a fractional part
 // So input is an integer value already.
 
+// If x is NAN, ZERO, or INFINITY, then  return
 
-.section .text
-GLOBAL_IEEE754_ENTRY(floorf)
+// qnan snan inf norm     unorm 0 -+
+// 1    1    1   0        0     1 11     0xe7
+
+#include "libm_support.h"
+
+floorf:
+#ifdef _LIBC
+.global __floorf
+__floorf:
+#endif
 
 { .mfi
-      getf.exp         rSignexp  = f8        // Get signexp, recompute if unorm
-      fclass.m         p7,p0 = f8, 0x0b      // Test x unorm
-      addl             rBigexp = 0x10016, r0 // Set exponent at which is integer
+      getf.exp floor_GR_signexp  = f8
+      fcvt.fx.trunc.s1     FLOOR_INT_f8  = f8
+      addl        floor_GR_bigexp = 0x10016, r0
 }
 { .mfi
-      mov              rM1 = -1              // Set all ones
-      fcvt.fx.trunc.s1 fXInt  = f8           // Convert to int in significand
-      mov              rExpMask    = 0x1FFFF // Form exponent mask
+      addl        floor_GR_FFFF      = -1,r0
+      fcmp.lt.s1  p8,p9 = f8,f0
+      mov         floor_GR_expmask    = 0x1FFFF ;;
 }
-;;
 
+// p7 ==> denorm
 { .mfi
-      nop.m            0
-      fcmp.lt.s1       p8,p9 = f8, f0        // Test x < 0
-      nop.i            0
+      setf.sig    FLOOR_FFFF  = floor_GR_FFFF
+      fclass.m    p7,p0 = f8, 0x0b
+      nop.i 999
 }
-{ .mfb
-      setf.sig         fTmp = rM1            // Make const for setting inexact
-      fnorm.s1         fNormX  = f8          // Normalize input
-(p7)  br.cond.spnt     FLOOR_UNORM           // Branch if x unorm
+{ .mfi
+      nop.m 999
+      fnorm.s1           FLOOR_NORM_f8  = f8
+      nop.i 999 ;;
 }
-;;
 
-FLOOR_COMMON:
-// Return here from FLOOR_UNORM
-{ .mfi
-      nop.m            0
-      fclass.m         p6,p0 = f8, 0x1e7     // Test x natval, nan, inf, 0
-      nop.i            0
+// p6 ==> NAN, INF, ZERO
+{ .mfb
+      nop.m 999
+      fclass.m      p6,p10 = f8, 0xe7
+(p7)  br.cond.spnt  L(FLOOR_DENORM) ;;
 }
-;;
 
+L(FLOOR_COMMON):
 .pred.rel "mutex",p8,p9
+// Set adjustment to subtract from trunc(x) for result
+//   If x<0,  adjustment is -1.0
+//   If x>=0, adjustment is 0.0
 { .mfi
-      nop.m            0
-(p8)  fnma.s1          fAdj = f1, f1, f0     // If x < 0, adjustment is -1
-      nop.i            0
+      and      floor_GR_exponent = floor_GR_signexp, floor_GR_expmask
+(p8)  fnma.s1  FLOOR_adj = f1,f1,f0
+      nop.i 999
 }
 { .mfi
-      nop.m            0
-(p9)  fma.s1           fAdj = f0, f0, f0     // If x > 0, adjustment is 0
-      nop.i            0
+      nop.m 999
+(p9)  fadd.s1  FLOOR_adj = f0,f0
+      nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m            0
-      fcvt.xf          fPreResult = fXInt    // trunc(x)
-      nop.i            0
+      nop.m 999
+      fcmp.eq.s0  p12,p0 = f8,f0 // Dummy op to set denormal and invalid flag
+      nop.i 999
 }
-{ .mfb
-      nop.m            0
-(p6)  fma.s.s0         f8 = f8, f1, f0       // Result if x natval, nan, inf, 0
-(p6)  br.ret.spnt      b0                    // Exit if x natval, nan, inf, 0
+{ .mfi
+(p10) cmp.ge.unc    p10,p11 = floor_GR_exponent, floor_GR_bigexp
+(p6)  fnorm.s f8 = f8
+      nop.i 999 ;;
 }
-;;
 
-{ .mmi
-      and              rExp = rSignexp, rExpMask // Get biased exponent
-;;
-      cmp.ge           p7,p6 = rExp, rBigexp  // Is |x| >= 2^23?
-      nop.i            0
+{ .mfi
+      nop.m 999
+(p11) fcvt.xf         FLOOR_FLOAT_INT_f8   = FLOOR_INT_f8
+      nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m            0
-(p6)  fma.s.s0         f8 = fPreResult, f1, fAdj // Result if !int, |x| < 2^23
-      nop.i            0
+      nop.m 999
+(p10) fnorm.s f8 = FLOOR_NORM_f8
+      nop.i 999 ;;
 }
+
 { .mfi
-      nop.m            0
-(p7)  fma.s.s0         f8 = fNormX, f1, f0    // Result, if |x| >= 2^23
-      nop.i            0
+      nop.m 999
+(p11) fadd.s   f8 = FLOOR_FLOAT_INT_f8,FLOOR_adj
+      nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m            0
-(p6)  fcmp.eq.unc.s1   p8, p9 = fPreResult, fNormX // Is trunc(x) = x ?
-      nop.i            0
+      nop.m 999
+(p11) fcmp.eq.unc.s1  p12,p13  = FLOOR_FLOAT_INT_f8, FLOOR_NORM_f8
+      nop.i 999 ;;
 }
-;;
 
+// Set inexact if result not equal to input
 { .mfi
-      nop.m            0
-(p9)  fmpy.s0          fTmp = fTmp, fTmp      // Dummy to set inexact
-      nop.i            0
+      nop.m 999
+(p13) fmpy.s0     FLOOR_INEXACT = FLOOR_FFFF,FLOOR_FFFF
+      nop.i 999
 }
+// Set result to input if integer
 { .mfb
-      nop.m            0
-(p8)  fma.s.s0         f8 = fNormX, f1, f0    // If x int, result normalized x
-      br.ret.sptk      b0                     // Exit main path, 0 < |x| < 2^23
+      nop.m 999
+(p12) fnorm.s f8 = FLOOR_NORM_f8
+      br.ret.sptk    b0 ;;
 }
-;;
-
 
-FLOOR_UNORM:
-// Here if x unorm
+// Here if input denorm
+L(FLOOR_DENORM):
 { .mfb
-      getf.exp         rSignexp  = fNormX     // Get signexp, recompute if unorm
-      fcmp.eq.s0       p7,p0 = f8, f0         // Dummy op to set denormal flag
-      br.cond.sptk     FLOOR_COMMON           // Return to main path
+      getf.exp floor_GR_signexp  = FLOOR_NORM_f8
+      fcvt.fx.trunc.s1     FLOOR_INT_f8  = FLOOR_NORM_f8
+      br.cond.sptk  L(FLOOR_COMMON) ;;
 }
-;;
 
-GLOBAL_IEEE754_END(floorf)
+.endp floorf
+ASM_SIZE_DIRECTIVE(floorf)
diff --git a/sysdeps/ia64/fpu/s_floorl.S b/sysdeps/ia64/fpu/s_floorl.S
index 345c4f30dd..294578e1a7 100644
--- a/sysdeps/ia64/fpu/s_floorl.S
+++ b/sysdeps/ia64/fpu/s_floorl.S
@@ -1,10 +1,10 @@
 .file "floorl.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,67 +20,85 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
 // Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
 //
+.align 32
+.global floorl#
+
+.section .text
+.proc  floorl#
+.align 32
+
 // History
 //==============================================================
-// 02/02/00 Initial version
-// 06/13/00 Improved speed
-// 06/27/00 Eliminated incorrect invalid flag setting
-// 02/07/01 Corrected sign of zero result in round to -inf mode
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 01/28/03 Improved performance
-//==============================================================
+// 2/02/00: Initial version
+// 6/13/00: Improved speed
+// 6/27/00: Eliminated incorrect invalid flag setting
+// 2/07/01: Corrected sign of zero result in round to -inf mode
 
 // API
 //==============================================================
 // long double floorl(long double x)
-//==============================================================
 
-// general input registers:
-// r14 - r18
+// general input registers:  
+
+floor_GR_FFFF      = r14
+floor_GR_signexp   = r15
+floor_GR_exponent  = r16
+floor_GR_expmask   = r17
+floor_GR_bigexp    = r18
+
 
-rSignexp   = r14
-rExp       = r15
-rExpMask   = r16
-rBigexp    = r17
-rM1        = r18
+// predicate registers used: 
 
-// floating-point registers:
-// f8 - f13
+// p6  ==> Input is NaN, infinity, zero
+// p7  ==> Input is denormal
+// p8  ==> Input is <0
+// p9  ==> Input is >=0
+// p10 ==> Input is already an integer (bigger than largest integer)
+// p11 ==> Input is not a large integer
+// p12 ==> Input is a smaller integer
+// p13 ==> Input is not an even integer, so inexact must be set
 
-fXInt      = f9
-fNormX     = f10
-fTmp       = f11
-fAdj       = f12
-fPreResult = f13
 
-// predicate registers used:
-// p6 - p9
+// floating-point registers used: 
+
+FLOOR_NORM_f8      = f9                        
+FLOOR_FFFF         = f10 
+FLOOR_INEXACT      = f11 
+FLOOR_FLOAT_INT_f8 = f12
+FLOOR_INT_f8       = f13
+FLOOR_adj          = f14
 
 // Overview of operation
 //==============================================================
+
 // long double floorl(long double x)
-// Return an integer value (represented as a long double) that is the largest
+// Return an integer value (represented as a long double) that is the largest 
 // value not greater than x
 // This is x rounded toward -infinity to an integral value.
 // Inexact is set if x != floorl(x)
-//==============================================================
+// **************************************************************************
+
+// Set denormal flag for denormal input and
+// and take denormal fault if necessary.
+
+// Is the input an integer value already?
 
 // double_extended
 // if the exponent is > 1003e => 3F(true) = 63(decimal)
@@ -101,115 +119,119 @@ fPreResult = f13
 // If we multiply by 2^23, we no longer have a fractional part
 // So input is an integer value already.
 
+// If x is NAN, ZERO, or INFINITY, then  return
 
-.section .text
-GLOBAL_IEEE754_ENTRY(floorl)
+// qnan snan inf norm     unorm 0 -+
+// 1    1    1   0        0     1 11     0xe7
+
+#include "libm_support.h"
+
+floorl:
+#ifdef _LIBC
+.global __floorl
+__floorl:
+#endif
 
 { .mfi
-      getf.exp         rSignexp  = f8        // Get signexp, recompute if unorm
-      fclass.m         p7,p0 = f8, 0x0b      // Test x unorm
-      addl             rBigexp = 0x1003e, r0 // Set exponent at which is integer
+      getf.exp floor_GR_signexp  = f8
+      fcvt.fx.trunc.s1     FLOOR_INT_f8  = f8
+      addl        floor_GR_bigexp = 0x1003e, r0
 }
 { .mfi
-      mov              rM1 = -1              // Set all ones
-      fcvt.fx.trunc.s1 fXInt  = f8           // Convert to int in significand
-      mov              rExpMask    = 0x1FFFF // Form exponent mask
+      addl        floor_GR_FFFF      = -1,r0
+      fcmp.lt.s1  p8,p9 = f8,f0
+      mov         floor_GR_expmask    = 0x1FFFF ;;
 }
-;;
 
+// p7 ==> denorm
 { .mfi
-      nop.m            0
-      fcmp.lt.s1       p8,p9 = f8, f0        // Test x < 0
-      nop.i            0
+      setf.sig    FLOOR_FFFF  = floor_GR_FFFF
+      fclass.m    p7,p0 = f8, 0x0b
+      nop.i 999
 }
-{ .mfb
-      setf.sig         fTmp = rM1            // Make const for setting inexact
-      fnorm.s1         fNormX  = f8          // Normalize input
-(p7)  br.cond.spnt     FLOOR_UNORM           // Branch if x unorm
+{ .mfi
+      nop.m 999
+      fnorm.s1           FLOOR_NORM_f8  = f8
+      nop.i 999 ;;
 }
-;;
 
-FLOOR_COMMON:
-// Return here from FLOOR_UNORM
-{ .mfi
-      nop.m            0
-      fclass.m         p6,p0 = f8, 0x1e7     // Test x natval, nan, inf, 0
-      nop.i            0
+// p6 ==> NAN, INF, ZERO
+{ .mfb
+      nop.m 999
+      fclass.m      p6,p10 = f8, 0xe7
+(p7)  br.cond.spnt  L(FLOOR_DENORM) ;;
 }
-;;
 
+L(FLOOR_COMMON):
 .pred.rel "mutex",p8,p9
+// Set adjustment to subtract from trunc(x) for result
+//   If x<0,  adjustment is -1.0
+//   If x>=0, adjustment is 0.0
 { .mfi
-      nop.m            0
-(p8)  fnma.s1          fAdj = f1, f1, f0     // If x < 0, adjustment is -1
-      nop.i            0
+      and      floor_GR_exponent = floor_GR_signexp, floor_GR_expmask
+(p8)  fnma.s1  FLOOR_adj = f1,f1,f0
+      nop.i 999
 }
 { .mfi
-      nop.m            0
-(p9)  fma.s1           fAdj = f0, f0, f0     // If x > 0, adjustment is 0
-      nop.i            0
+      nop.m 999
+(p9)  fadd.s1  FLOOR_adj = f0,f0
+      nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m            0
-      fcvt.xf          fPreResult = fXInt    // trunc(x)
-      nop.i            0
+      nop.m 999
+      fcmp.eq.s0  p12,p0 = f8,f0 // Dummy op to set denormal and invalid flag
+      nop.i 999
 }
-{ .mfb
-      nop.m            0
-(p6)  fma.s0           f8 = f8, f1, f0       // Result if x natval, nan, inf, 0
-(p6)  br.ret.spnt      b0                    // Exit if x natval, nan, inf, 0
+{ .mfi
+(p10) cmp.ge.unc    p10,p11 = floor_GR_exponent, floor_GR_bigexp
+(p6)  fnorm   f8 = f8
+      nop.i 999 ;;
 }
-;;
 
-{ .mmi
-      and              rExp = rSignexp, rExpMask // Get biased exponent
-;;
-      cmp.ge           p7,p6 = rExp, rBigexp  // Is |x| >= 2^63?
-      nop.i            0
+{ .mfi
+      nop.m 999
+(p11) fcvt.xf         FLOOR_FLOAT_INT_f8   = FLOOR_INT_f8
+      nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m            0
-(p6)  fma.s0           f8 = fPreResult, f1, fAdj // Result if !int, |x| < 2^63
-      nop.i            0
+      nop.m 999
+(p10) fnorm   f8 = FLOOR_NORM_f8
+      nop.i 999 ;;
 }
+
 { .mfi
-      nop.m            0
-(p7)  fma.s0           f8 = fNormX, f1, f0    // Result, if |x| >= 2^63
-      nop.i            0
+      nop.m 999
+(p11) fadd     f8 = FLOOR_FLOAT_INT_f8,FLOOR_adj
+      nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m            0
-(p6)  fcmp.eq.unc.s1   p8, p9 = fPreResult, fNormX // Is trunc(x) = x ?
-      nop.i            0
+      nop.m 999
+(p11) fcmp.eq.unc.s1  p12,p13  = FLOOR_FLOAT_INT_f8, FLOOR_NORM_f8
+      nop.i 999 ;;
 }
-;;
 
+// Set inexact if result not equal to input
 { .mfi
-      nop.m            0
-(p9)  fmpy.s0          fTmp = fTmp, fTmp      // Dummy to set inexact
-      nop.i            0
+      nop.m 999
+(p13) fmpy.s0     FLOOR_INEXACT = FLOOR_FFFF,FLOOR_FFFF
+      nop.i 999
 }
+// Set result to input if integer
 { .mfb
-      nop.m            0
-(p8)  fma.s0           f8 = fNormX, f1, f0    // If x int, result normalized x
-      br.ret.sptk      b0                     // Exit main path, 0 < |x| < 2^63
+      nop.m 999
+(p12) fnorm   f8 = FLOOR_NORM_f8
+      br.ret.sptk    b0 ;;
 }
-;;
-
 
-FLOOR_UNORM:
-// Here if x unorm
+// Here if input denorm
+L(FLOOR_DENORM):
 { .mfb
-      getf.exp         rSignexp  = fNormX     // Get signexp, recompute if unorm
-      fcmp.eq.s0       p7,p0 = f8, f0         // Dummy op to set denormal flag
-      br.cond.sptk     FLOOR_COMMON           // Return to main path
+      getf.exp floor_GR_signexp  = FLOOR_NORM_f8
+      fcvt.fx.trunc.s1     FLOOR_INT_f8  = FLOOR_NORM_f8
+      br.cond.sptk  L(FLOOR_COMMON) ;;
 }
-;;
 
-GLOBAL_IEEE754_END(floorl)
+.endp floorl
+ASM_SIZE_DIRECTIVE(floorl)
diff --git a/sysdeps/ia64/fpu/s_frexp.c b/sysdeps/ia64/fpu/s_frexp.c
index c67500695f..98349bca47 100644
--- a/sysdeps/ia64/fpu/s_frexp.c
+++ b/sysdeps/ia64/fpu/s_frexp.c
@@ -1,10 +1,8 @@
-/* file: frexp.c */
-
-
-// Copyright (c) 2000-2002, Intel Corporation
+//
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
 //
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story, 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
 // and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -21,15 +19,14 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
 //
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
@@ -37,30 +34,22 @@
 //
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
-//
+// http://developer.intel.com/opensource.
 //
-// History
-//=====================================================================
-//  2/02/00  Initial version
-//  1/23/02  Calls kernel with parameter to specify 32- or 64-bit int
 //
-//=====================================================================
 
 #include "libm_support.h"
 
-double __libm_frexp(double, int*, int);
-
 double frexp(double x, int *y)
 {
 
-#ifdef SIZE_INT_64 
-   return( __libm_frexp(x, y, 1) );
+#ifdef SIZE_INT_64
+   return( __libm_frexp_8(x, y) );
 
 #else
 
-#ifdef SIZE_INT_32 
-   return( __libm_frexp(x, y, 0) );
+#ifdef SIZE_INT_32
+   return( _GI___libm_frexp_4(x, y) );
 #endif
 
 #endif
diff --git a/sysdeps/ia64/fpu/s_frexpf.c b/sysdeps/ia64/fpu/s_frexpf.c
index c21a21dfba..f666304147 100644
--- a/sysdeps/ia64/fpu/s_frexpf.c
+++ b/sysdeps/ia64/fpu/s_frexpf.c
@@ -1,10 +1,8 @@
-/* file: frexpf.c */
-
-
-// Copyright (c) 2000-2002, Intel Corporation
+//
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
 //
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story, 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
 // and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -21,15 +19,14 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
 //
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
@@ -37,30 +34,22 @@
 //
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
-//
+// http://developer.intel.com/opensource.
 //
-// History
-//=====================================================================
-//  2/02/00  Initial version
-//  1/23/02  Calls kernel with parameter to specify 32- or 64-bit int
 //
-//=====================================================================
 
 #include "libm_support.h"
 
-float __libm_frexpf(float, int*, int);
-
 float frexpf(float x, int *y)
 {
 
-#ifdef SIZE_INT_64 
-   return( __libm_frexpf(x, y, 1) );
+#ifdef SIZE_INT_64
+   return( __libm_frexp_8f(x, y) );
 
 #else
 
-#ifdef SIZE_INT_32 
-   return( __libm_frexpf(x, y, 0) );
+#ifdef SIZE_INT_32
+   return( _GI___libm_frexp_4f(x, y) );
 #endif
 
 #endif
diff --git a/sysdeps/ia64/fpu/s_frexpl.c b/sysdeps/ia64/fpu/s_frexpl.c
index 13d44ab8b5..3edc971e3f 100644
--- a/sysdeps/ia64/fpu/s_frexpl.c
+++ b/sysdeps/ia64/fpu/s_frexpl.c
@@ -1,10 +1,8 @@
-/* file: frexpl.c */
-
-
-// Copyright (c) 2000-2002, Intel Corporation
+//
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
 //
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story, 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
 // and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -21,15 +19,14 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
 //
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
@@ -37,30 +34,22 @@
 //
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
-//
+// http://developer.intel.com/opensource.
 //
-// History
-//=====================================================================
-//  2/02/00  Initial version
-//  1/23/02  Calls kernel with parameter to specify 32- or 64-bit int
 //
-//=====================================================================
 
 #include "libm_support.h"
 
-long double __libm_frexpl(long double, int*, int);
-
 long double frexpl(long double x, int *y)
 {
 
-#ifdef SIZE_INT_64 
-   return( __libm_frexpl(x, y, 1) );
+#ifdef SIZE_INT_64
+   return( __libm_frexp_8l(x, y) );
 
 #else
 
-#ifdef SIZE_INT_32 
-   return( __libm_frexpl(x, y, 0) );
+#ifdef SIZE_INT_32
+   return( _GI___libm_frexp_4l(x, y) );
 #endif
 
 #endif
diff --git a/sysdeps/ia64/fpu/s_ilogb.S b/sysdeps/ia64/fpu/s_ilogb.S
index 3f2733cabd..61975dd941 100644
--- a/sysdeps/ia64/fpu/s_ilogb.S
+++ b/sysdeps/ia64/fpu/s_ilogb.S
@@ -1,10 +1,10 @@
 .file "ilogb.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,248 +20,234 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
 // Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
 //
 // History
 //==============================================================
-// 02/03/00 Initial version
-// 05/26/00 Fix bug when x a double-extended denormal;
+// 2/03/00  Initial version
+// 5/26/00  Fix bug when x a double-extended denormal; 
 //          if x=0 call error routine, per C9X
-// 08/15/00 Bundle added after call to __libm_error_support to properly
+// 8/15/00  Bundle added after call to __libm_error_support to properly
 //          set [the previously overwritten] GR_Parameter_RESULT.
-// 01/20/01 Fixed result for x=0, corrected error tag value.
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 01/20/03 Improved performance
-//
+// 1/20/01  Fixed result for x=0, corrected error tag value.
+
+.align 32
+.global ilogb#
+
+.section .text
+.proc  ilogb#
+.align 32
+
 // API
 //==============================================================
-// int ilogb( double x );
-//
+// int = ilogb(double)
+
 // Overview of operation
 //==============================================================
-// The ilogb function extracts the exponent of x as an integer
+// ilogb computes log2(x) as an int
 // and returns it in r8
-//
-// ilogb is similar to logb but differs in the following ways:
+
+// ilogb is similar to logb but differs in the  following ways:
 //         +-inf
 //            ilogb: returns INT_MAX
 //             logb: returns +inf
-//         Nan  returns FP_LOGBNAN (which is either INT_MAX or INT_MIN)
+//         Nan  returns FP_ILOGBNAN (which is either INT_MAX or INT_MIN)
 //            ilogb: returns INT_MAX (7fffffff)
-//             logb: returns QNAN (quietized SNAN)
+//             logb: returns QNAN (quieted SNAN)
 //         0    returns FP_ILOGB0 (which is either INT_MIN or -INT_MAX)
-//            ilogb: returns -INT_MAX (80000001)
-//             logb: returns -inf, raises the divide-by-zero exception,
-//                   and calls libm_error_support to set domain error
-//
+//            ilogb: returns INT_MIN (80000000)
+//             logb: returns -inf
+
 // Registers used
 //==============================================================
-// general registers used:
-// r26 -> r39
-// r36 -> r39 used as parameters to error path
-//
-// predicate registers used:
-// p6 -> p10
-// floating-point registers used:
-// f9, f10, f11
-// f8, input
 
-rExpBias            = r26
-rExpMask            = r27
-rSignexp_x          = r28
-rExp_x              = r29
-rIntMax             = r30
-rExp_2to64          = r31
+// general local registers: 
+// ar.pfs r32
+// r33 -> r37
+// r38 -> r41 used as parameters to error path
+
+// predicate registers used: 
+// p6 - x nan, inf
+// p7 - x 0
+// p8 - x norm, unorm
+// p9 - x unorm
+
+// floating-point registers used: 
+// f8 - f10
+
+#include "libm_support.h"
 
 GR_SAVE_PFS         = r32
-rTrialResult        = r33
 GR_SAVE_B0          = r34
 GR_SAVE_GP          = r35
+GR_Parameter_X      = r38
+GR_Parameter_Y      = r39
+GR_Parameter_RESULT = r40
+GR_Parameter_TAG    = r41
 
-GR_Parameter_X      = r36
-GR_Parameter_Y      = r37
-GR_Parameter_RESULT = r38
-GR_Parameter_TAG    = r39
+FR_X                = f8
+FR_Y                = f0
+FR_RESULT           = f0
 
-fTmp                = f9
-fNorm_x             = f10
-f2to64              = f11
 
-.section .text
-GLOBAL_LIBM_ENTRY(ilogb)
+ilogb: 
 
-// X NORMAL
-// TrueExp_x = exp(f8) - 0xffff
-// r8 = TrueExp_x
-{ .mfi
-      getf.exp        rSignexp_x = f8
-      fclass.m        p8,p0 = f8, 0x0b   // Test for x unorm
-      mov             rExpBias = 0xffff  // Exponent bias
+// Form signexp of 2^64 in case need to scale denormal
+{ .mmf
+      alloc          r32=ar.pfs,1,5,4,0
+(p0)  mov      r37 = 0x1003f
+(p0)  fnorm    f9 = f8 ;;
 }
+
+// Form 2^64 in case need to scale denormal
 { .mfi
-      nop.m           0
-      fnorm.s1        fNorm_x = f8
-      mov             rExpMask = 0x1ffff // Exponent mask
+(p0)  setf.exp f10 = r37
+(p0)  fclass.m.unc p7, p8 = f8, 0xe3        
+(p0)  mov      r34 = 0xffff ;;
 }
-;;
 
-// Form signexp of 2^64 in case need to scale denormal
-{ .mfb
-      mov             rExp_2to64 = 0x1003f
-      fclass.m        p6,p9 = f8, 0x1e3  // Test x natval, nan, inf
-(p8)  br.cond.spnt    ILOGB_DENORM       // Branch if x unorm
-}
-;;
+// qnan snan inf norm     unorm 0 -+
+// 1    1    1   0        0     0 11
+// e                      3
+// X ZERO, returns INT_MIN
+// X INF or NAN, returns INT_MAX
 
-ILOGB_COMMON:
-// Return here from ILOGB_DENORM
 { .mfi
-      and             rExp_x = rSignexp_x, rExpMask // Get biased exponent
-      fclass.m        p7,p10 = f8, 0x07   // Test x zero
-      nop.i           0
+(p0)  mov      r35 = 0x1ffff
+(p8)    fclass.m.unc p6, p8 = f8, 0x07
+	nop.i 999 ;;
 }
 { .mlx
-      nop.m           0
-      movl            rIntMax = 0x000000007fffffff // Form INT_MAX
+	nop.m 999
+(p7)    movl r8 = 0x000000007fffffff ;;       
 }
-;;
 
-.pred.rel "mutex",p6,p9
-{ .mfi
-(p9)  sub             r8 = rExp_x, rExpBias // Get true exponent for normal path
-(p6)  fma.s0          fTmp = f8, f8, f0     // Dummy to set Invalid flag
-(p6)  mov             r8 = rIntMax          // If nan, inf, return INT_MAX
-}
-{ .mbb
-      nop.m           0
-(p7)  br.cond.spnt    ILOGB_ZERO            // Branch if x zero
-(p10) br.ret.sptk     b0                    // Exit if x not zero
+{ .mib
+	nop.m 999
+	nop.i 999
+(p6)    br.cond.spnt  L(ILOGB_ZERO) ;;
 }
-;;
-
 
-ILOGB_DENORM:
-// Form 2^64 in case need to scale denormal
-// Check to see if double-extended denormal
+// Test for denormal
 { .mfi
-      setf.exp        f2to64 = rExp_2to64
-      fclass.m        p8,p0 = fNorm_x, 0x0b
-      nop.i           0
+	nop.m 999
+(p8)    fclass.m.unc p9, p0 = f9, 0x0b        
+	nop.i 999 ;;
 }
-;;
 
-{ .mfi
-      nop.m           0
-      fcmp.eq.s0      p7,p0 = f8, f0           // Dummy op to set denormal flag
-      nop.i           0
+L(ILOGB_COMMON):
+// X NORMAL returns true exponent
+{ .mmi
+	nop.m 999 
+(p8)    getf.exp r33 = f9
+	nop.i 999 ;;
 }
-;;
 
-// If double-extended denormal add 64 to exponent bias for scaling
-// If double-extended denormal form x * 2^64 which is normal
-{ .mfi
-(p8)  add             rExpBias = 64, rExpBias
-(p8)  fmpy.s1         fNorm_x = fNorm_x, f2to64
-      nop.i           0
+// If denormal add 64 to exponent bias for scaling
+{ .mfb
+(p9)    add     r34 = 64, r34   
+	nop.f 999 
+(p9)    br.cond.spnt  L(ILOGB_DENORM) ;;
 }
-;;
 
-// Logic is the same as normal path but use normalized input
-{ .mib
-      getf.exp        rSignexp_x = fNorm_x
-      nop.i           0
-      br.cond.sptk    ILOGB_COMMON             // Return to main path
+{ .mmi
+(p8)    and      r36 = r35, r33
+	nop.m 999
+	nop.i 999 ;;
 }
-;;
 
-ILOGB_ZERO:
-// Here if x zero
-// Return INT_MIN, call error support
-
-{ .mlx
-      alloc           r32=ar.pfs,1,3,4,0
-      movl            rTrialResult = 0x0000000080000000
-}
 { .mib
-      mov             GR_Parameter_TAG = 157  // Error code
-      nop.i           0
-      br.cond.sptk    __libm_error_region     // Call error support
+(p8)    sub r8 = r36, r34                  
+	nop.i 999
+(p0)    br.ret.sptk    b0 ;;                     
 }
-;;
 
-GLOBAL_LIBM_END(ilogb)
+L(ILOGB_DENORM):
+// Here if x denormal
+// Form x * 2^64 which is normal
+// Return to common code
+{ .mfb
+        cmp.eq p8,p9 = r0,r0
+        fmpy f9 = f9, f10
+        br.cond.sptk  L(ILOGB_COMMON) ;;
+}
 
-LOCAL_LIBM_ENTRY(__libm_error_region)
-.prologue
+// X ZERO
+// return INT_MIN, call error support
+L(ILOGB_ZERO): 
+{.mlx
+      mov            GR_Parameter_TAG = 157
+(p6)  movl r33 = 0x0000000080000000 ;;
+};;
+.endp ilogb
+ASM_SIZE_DIRECTIVE(ilogb)
 
+.proc __libm_error_region
+__libm_error_region:
+.prologue
 { .mfi
-        add   GR_Parameter_Y=-32,sp           // Parameter 2 value
+        add   GR_Parameter_Y=-32,sp             // Parameter 2 value
         nop.f 0
 .save   ar.pfs,GR_SAVE_PFS
-        mov  GR_SAVE_PFS=ar.pfs               // Save ar.pfs
+        mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
 }
 { .mfi
 .fframe 64
-        add sp=-64,sp                         // Create new stack
+        add sp=-64,sp                           // Create new stack
         nop.f 0
-        mov GR_SAVE_GP=gp                     // Save gp
+        mov GR_SAVE_GP=gp                       // Save gp
 };;
-
 { .mmi
-        stfd [GR_Parameter_Y] = f0,16         // STORE Parameter 2 on stack
-        add GR_Parameter_X    = 16,sp         // Parameter 1 address
+        stfd [GR_Parameter_Y] = FR_Y,16         // Save Parameter 2 on stack
+        add GR_Parameter_X = 16,sp              // Parameter 1 address
 .save   b0, GR_SAVE_B0
-        mov GR_SAVE_B0=b0                     // Save b0
+        mov GR_SAVE_B0=b0                       // Save b0
 };;
-
 .body
 { .mib
-        stfd [GR_Parameter_X] = f8            // STORE Parameter 1 on stack
-        add   GR_Parameter_RESULT = 0,GR_Parameter_Y    // Parameter 3 address
-        nop.b 0
+        stfd [GR_Parameter_X] = FR_X            // Store Parameter 1 on stack
+        add   GR_Parameter_RESULT = 0,GR_Parameter_Y
+        nop.b 0                                 // Parameter 3 address
 }
 { .mib
-        stfd [GR_Parameter_Y] = f9            // Store Parameter 3 on stack
+        stfd [GR_Parameter_Y] = FR_RESULT      // Store Parameter 3 on stack
         add   GR_Parameter_Y = -16,GR_Parameter_Y
-        br.call.sptk b0=__libm_error_support# // Call error handling function
+        br.call.sptk b0=__libm_error_support#  // Call error handling function
 };;
-
 { .mmi
-        add   GR_Parameter_RESULT = 48,sp
         nop.m 0
-        nop.i 0
+        nop.m 0
+        add   GR_Parameter_RESULT = 48,sp
 };;
-
 { .mmi
-        mov   r8 = rTrialResult
+        mov   r8 = r33                         // Store result
 .restore sp
         add   sp = 64,sp                       // Restore stack pointer
         mov   b0 = GR_SAVE_B0                  // Restore return address
 };;
-
 { .mib
         mov   gp = GR_SAVE_GP                  // Restore gp
         mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
-        br.ret.sptk   b0
+        br.ret.sptk     b0                     // Return
 };;
 
-LOCAL_LIBM_END(__libm_error_region)
-
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
 
 .type   __libm_error_support#,@function
 .global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_ilogbf.S b/sysdeps/ia64/fpu/s_ilogbf.S
index 1b6ade6148..ffa6d3b672 100644
--- a/sysdeps/ia64/fpu/s_ilogbf.S
+++ b/sysdeps/ia64/fpu/s_ilogbf.S
@@ -1,10 +1,10 @@
 .file "ilogbf.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,248 +20,234 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
 // Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
 //
 // History
 //==============================================================
-// 02/03/00 Initial version
-// 05/26/00 Fix bug when x a double-extended denormal;
+// 2/03/00  Initial version
+// 5/26/00  Fix bug when x a double-extended denormal; 
 //          if x=0 call error routine, per C9X
-// 08/15/00 Bundle added after call to __libm_error_support to properly
+// 8/15/00  Bundle added after call to __libm_error_support to properly
 //          set [the previously overwritten] GR_Parameter_RESULT.
-// 01/20/01 Fixed result for x=0
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 01/20/03 Improved performance
-//
+// 1/20/01  Fixed result for x=0
+
+.align 32
+.global ilogbf#
+
+.section .text
+.proc  ilogbf#
+.align 32
+
 // API
 //==============================================================
-// int ilogbf( float x );
-//
+// int = ilogbf(float)
+
 // Overview of operation
 //==============================================================
-// The ilogbf function extracts the exponent of x as an integer
+// ilogbf computes log2(x) as an int
 // and returns it in r8
-//
-// ilogbf is similar to logbf but differs in the following ways:
+
+// ilogbf is similar to logbf but differs in the  following ways:
 //         +-inf
 //            ilogbf: returns INT_MAX
 //             logbf: returns +inf
-//         Nan  returns FP_LOGBNAN (which is either INT_MAX or INT_MIN)
+//         Nan  returns FP_ILOGBNAN (which is either INT_MAX or INT_MIN)
 //            ilogbf: returns INT_MAX (7fffffff)
-//             logbf: returns QNAN (quietized SNAN)
+//             logbf: returns QNAN (quieted SNAN)
 //         0    returns FP_ILOGB0 (which is either INT_MIN or -INT_MAX)
-//            ilogbf: returns -INT_MAX (80000001)
-//             logbf: returns -inf, raises the divide-by-zero exception,
-//                   and calls libm_error_support to set domain error
-//
+//            ilogbf: returns INT_MIN (80000000)
+//             logbf: returns -inf
+
 // Registers used
 //==============================================================
-// general registers used:
-// r26 -> r39
-// r36 -> r39 used as parameters to error path
-//
-// predicate registers used:
-// p6 -> p10
-// floating-point registers used:
-// f9, f10, f11
-// f8, input
 
-rExpBias            = r26
-rExpMask            = r27
-rSignexp_x          = r28
-rExp_x              = r29
-rIntMax             = r30
-rExp_2to64          = r31
+// general local registers: 
+// ar.pfs r32
+// r33 -> r37 
+// r38 -> r41 used as parameters to error path
+
+// predicate registers used: 
+// p6 - x nan, inf
+// p7 - x 0
+// p8 - x norm, unorm
+// p9 - x unorm
+
+// floating-point registers used: 
+// f8 - f10
+
+#include "libm_support.h"
 
 GR_SAVE_PFS         = r32
-rTrialResult        = r33
 GR_SAVE_B0          = r34
 GR_SAVE_GP          = r35
+GR_Parameter_X      = r38
+GR_Parameter_Y      = r39
+GR_Parameter_RESULT = r40
+GR_Parameter_TAG    = r41
 
-GR_Parameter_X      = r36
-GR_Parameter_Y      = r37
-GR_Parameter_RESULT = r38
-GR_Parameter_TAG    = r39
+FR_X                = f8
+FR_Y                = f0
+FR_RESULT           = f0
 
-fTmp                = f9
-fNorm_x             = f10
-f2to64              = f11
 
-.section .text
-GLOBAL_LIBM_ENTRY(ilogbf)
+ilogbf: 
 
-// X NORMAL
-// TrueExp_x = exp(f8) - 0xffff
-// r8 = TrueExp_x
-{ .mfi
-      getf.exp        rSignexp_x = f8
-      fclass.m        p8,p0 = f8, 0x0b   // Test for x unorm
-      mov             rExpBias = 0xffff  // Exponent bias
+// Form signexp of 2^64 in case need to scale denormal
+{ .mmf
+      alloc          r32=ar.pfs,1,5,4,0
+(p0)  mov      r37 = 0x1003f
+(p0)  fnorm    f9 = f8 ;;
 }
+
+// Form 2^64 in case need to scale denormal
 { .mfi
-      nop.m           0
-      fnorm.s1        fNorm_x = f8
-      mov             rExpMask = 0x1ffff // Exponent mask
+(p0)  setf.exp f10 = r37
+(p0)  fclass.m.unc p7, p8 = f8, 0xe3        
+(p0)  mov      r34 = 0xffff ;;
 }
-;;
 
-// Form signexp of 2^64 in case need to scale denormal
-{ .mfb
-      mov             rExp_2to64 = 0x1003f
-      fclass.m        p6,p9 = f8, 0x1e3  // Test x natval, nan, inf
-(p8)  br.cond.spnt    ILOGB_DENORM       // Branch if x unorm
-}
-;;
+// qnan snan inf norm     unorm 0 -+
+// 1    1    1   0        0     0 11
+// e                      3
+// X ZERO, returns INT_MIN
+// X INF or NAN, returns INT_MAX
 
-ILOGB_COMMON:
-// Return here from ILOGB_DENORM
 { .mfi
-      and             rExp_x = rSignexp_x, rExpMask // Get biased exponent
-      fclass.m        p7,p10 = f8, 0x07   // Test x zero
-      nop.i           0
+(p0)  mov      r35 = 0x1ffff
+(p8)    fclass.m.unc p6, p8 = f8, 0x07
+	nop.i 999 ;;
 }
 { .mlx
-      nop.m           0
-      movl            rIntMax = 0x000000007fffffff // Form INT_MAX
+	nop.m 999
+(p7)    movl r8 = 0x000000007fffffff ;;       
 }
-;;
 
-.pred.rel "mutex",p6,p9
-{ .mfi
-(p9)  sub             r8 = rExp_x, rExpBias // Get true exponent for normal path
-(p6)  fma.s0          fTmp = f8, f8, f0     // Dummy to set Invalid flag
-(p6)  mov             r8 = rIntMax          // If nan, inf, return INT_MAX
-}
-{ .mbb
-      nop.m           0
-(p7)  br.cond.spnt    ILOGB_ZERO            // Branch if x zero
-(p10) br.ret.sptk     b0                    // Exit if x not zero
+{ .mib
+	nop.m 999
+	nop.i 999
+(p6)    br.cond.spnt  L(ILOGB_ZERO) ;;
 }
-;;
-
 
-ILOGB_DENORM:
-// Form 2^64 in case need to scale denormal
-// Check to see if double-extended denormal
+// Test for denormal
 { .mfi
-      setf.exp        f2to64 = rExp_2to64
-      fclass.m        p8,p0 = fNorm_x, 0x0b
-      nop.i           0
+	nop.m 999
+(p8)    fclass.m.unc p9, p0 = f9, 0x0b        
+	nop.i 999 ;;
 }
-;;
 
-{ .mfi
-      nop.m           0
-      fcmp.eq.s0      p7,p0 = f8, f0           // Dummy op to set denormal flag
-      nop.i           0
+L(ILOGB_COMMON):
+// X NORMAL returns true exponent
+{ .mmi
+	nop.m 999 
+(p8)    getf.exp r33 = f9
+	nop.i 999 ;;
 }
-;;
 
-// If double-extended denormal add 64 to exponent bias for scaling
-// If double-extended denormal form x * 2^64 which is normal
-{ .mfi
-(p8)  add             rExpBias = 64, rExpBias
-(p8)  fmpy.s1         fNorm_x = fNorm_x, f2to64
-      nop.i           0
+// If denormal add 64 to exponent bias for scaling
+{ .mfb
+(p9)    add     r34 = 64, r34   
+	nop.f 999 
+(p9)    br.cond.spnt  L(ILOGB_DENORM) ;;
 }
-;;
 
-// Logic is the same as normal path but use normalized input
-{ .mib
-      getf.exp        rSignexp_x = fNorm_x
-      nop.i           0
-      br.cond.sptk    ILOGB_COMMON             // Return to main path
+{ .mmi
+(p8)    and      r36 = r35, r33
+	nop.m 999
+	nop.i 999 ;;
 }
-;;
 
-ILOGB_ZERO:
-// Here if x zero
-// Return INT_MIN, call error support
-
-{ .mlx
-      alloc           r32=ar.pfs,1,3,4,0
-      movl            rTrialResult = 0x0000000080000000
-}
 { .mib
-      mov             GR_Parameter_TAG = 158  // Error code
-      nop.i           0
-      br.cond.sptk    __libm_error_region     // Call error support
+(p8)    sub r8 = r36, r34                  
+	nop.i 999
+(p0)    br.ret.sptk    b0 ;;                     
 }
-;;
 
-GLOBAL_LIBM_END(ilogbf)
+L(ILOGB_DENORM):
+// Here if x denormal
+// Form x * 2^64 which is normal
+// Return to common code
+{ .mfb
+        cmp.eq p8,p9 = r0,r0
+        fmpy f9 = f9, f10
+        br.cond.sptk  L(ILOGB_COMMON) ;;
+}
 
-LOCAL_LIBM_ENTRY(__libm_error_region)
-.prologue
+// X ZERO
+// return INT_MIN, call error support
+L(ILOGB_ZERO): 
+{.mlx
+      mov            GR_Parameter_TAG = 158
+(p6)  movl r33 = 0x0000000080000000 ;;
+};;
+.endp ilogbf
+ASM_SIZE_DIRECTIVE(ilogbf)
 
+.proc __libm_error_region
+__libm_error_region:
+.prologue
 { .mfi
-        add   GR_Parameter_Y=-32,sp           // Parameter 2 value
+        add   GR_Parameter_Y=-32,sp             // Parameter 2 value
         nop.f 0
 .save   ar.pfs,GR_SAVE_PFS
-        mov  GR_SAVE_PFS=ar.pfs               // Save ar.pfs
+        mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
 }
 { .mfi
 .fframe 64
-        add sp=-64,sp                         // Create new stack
+        add sp=-64,sp                           // Create new stack
         nop.f 0
-        mov GR_SAVE_GP=gp                     // Save gp
+        mov GR_SAVE_GP=gp                       // Save gp
 };;
-
 { .mmi
-        stfs [GR_Parameter_Y] = f0,16         // STORE Parameter 2 on stack
-        add GR_Parameter_X    = 16,sp         // Parameter 1 address
+        stfs [GR_Parameter_Y] = FR_Y,16         // Save Parameter 2 on stack
+        add GR_Parameter_X = 16,sp              // Parameter 1 address
 .save   b0, GR_SAVE_B0
-        mov GR_SAVE_B0=b0                     // Save b0
+        mov GR_SAVE_B0=b0                       // Save b0
 };;
-
 .body
 { .mib
-        stfs [GR_Parameter_X] = f8            // STORE Parameter 1 on stack
-        add   GR_Parameter_RESULT = 0,GR_Parameter_Y    // Parameter 3 address
-        nop.b 0
+        stfs [GR_Parameter_X] = FR_X            // Store Parameter 1 on stack
+        add   GR_Parameter_RESULT = 0,GR_Parameter_Y
+        nop.b 0                                 // Parameter 3 address
 }
 { .mib
-        stfs [GR_Parameter_Y] = f9            // Store Parameter 3 on stack
+        stfs [GR_Parameter_Y] = FR_RESULT      // Store Parameter 3 on stack
         add   GR_Parameter_Y = -16,GR_Parameter_Y
-        br.call.sptk b0=__libm_error_support# // Call error handling function
+        br.call.sptk b0=__libm_error_support#  // Call error handling function
 };;
-
 { .mmi
-        add   GR_Parameter_RESULT = 48,sp
         nop.m 0
-        nop.i 0
+        nop.m 0
+        add   GR_Parameter_RESULT = 48,sp
 };;
-
 { .mmi
-        mov   r8 = rTrialResult
+        mov   r8 = r33                         // Store result
 .restore sp
         add   sp = 64,sp                       // Restore stack pointer
         mov   b0 = GR_SAVE_B0                  // Restore return address
 };;
-
 { .mib
         mov   gp = GR_SAVE_GP                  // Restore gp
         mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
-        br.ret.sptk   b0
+        br.ret.sptk     b0                     // Return
 };;
 
-LOCAL_LIBM_END(__libm_error_region)
-
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
 
 .type   __libm_error_support#,@function
 .global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_ilogbl.S b/sysdeps/ia64/fpu/s_ilogbl.S
index e462fb706e..240da060bf 100644
--- a/sysdeps/ia64/fpu/s_ilogbl.S
+++ b/sysdeps/ia64/fpu/s_ilogbl.S
@@ -1,10 +1,10 @@
 .file "ilogbl.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,248 +20,234 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
 // Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
 //
 // History
 //==============================================================
-// 02/03/00 Initial version
-// 05/26/00 Fix bug when x a double-extended denormal;
+// 2/03/00  Initial version
+// 5/26/00  Fix bug when x a double-extended denormal; 
 //          if x=0 call error routine, per C9X
-// 08/15/00 Bundle added after call to __libm_error_support to properly
+// 8/15/00  Bundle added after call to __libm_error_support to properly
 //          set [the previously overwritten] GR_Parameter_RESULT.
-// 01/20/01 Fixed result for x=0
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 01/20/03 Improved performance
-//
+// 1/20/01  Fixed result for x=0
+
+.align 32
+.global ilogbl#
+
+.section .text
+.proc  ilogbl#
+.align 32
+
 // API
 //==============================================================
-// int ilogbl( long double x );
-//
+// int = ilogbl(double_extended)
+
 // Overview of operation
 //==============================================================
-// The ilogbl function extracts the exponent of x as an integer
+// ilogbl computes log2(x) as an int
 // and returns it in r8
-//
-// ilogbl is similar to logbl but differs in the following ways:
+
+// ilogbl is similar to logbl but differs in the  following ways:
 //         +-inf
 //            ilogbl: returns INT_MAX
 //             logbl: returns +inf
-//         Nan  returns FP_LOGBNAN (which is either INT_MAX or INT_MIN)
+//         Nan  returns FP_ILOGBNAN (which is either INT_MAX or INT_MIN)
 //            ilogbl: returns INT_MAX (7fffffff)
-//             logbl: returns QNAN (quietized SNAN)
+//             logbl: returns QNAN (quieted SNAN)
 //         0    returns FP_ILOGB0 (which is either INT_MIN or -INT_MAX)
-//            ilogbl: returns -INT_MAX (80000001)
-//             logbl: returns -inf, raises the divide-by-zero exception,
-//                   and calls libm_error_support to set domain error
-//
+//            ilogbl: returns INT_MIN (80000000)
+//             logbl: returns -inf
+
 // Registers used
 //==============================================================
-// general registers used:
-// r26 -> r39
-// r36 -> r39 used as parameters to error path
-//
-// predicate registers used:
-// p6 -> p10
-// floating-point registers used:
-// f9, f10, f11
-// f8, input
 
-rExpBias            = r26
-rExpMask            = r27
-rSignexp_x          = r28
-rExp_x              = r29
-rIntMax             = r30
-rExp_2to64          = r31
+// general local registers: 
+// ar.pfs r32
+// r33 -> r37
+// r38 -> r41 used as parameters to error path
+
+// predicate registers used: 
+// p6 - x nan, inf
+// p7 - x 0
+// p8 - x norm, unorm
+// p9 - x unorm
+
+// floating-point registers used: 
+// f8 - f10
+
+#include "libm_support.h"
 
 GR_SAVE_PFS         = r32
-rTrialResult        = r33
 GR_SAVE_B0          = r34
 GR_SAVE_GP          = r35
+GR_Parameter_X      = r38
+GR_Parameter_Y      = r39
+GR_Parameter_RESULT = r40
+GR_Parameter_TAG    = r41
 
-GR_Parameter_X      = r36
-GR_Parameter_Y      = r37
-GR_Parameter_RESULT = r38
-GR_Parameter_TAG    = r39
+FR_X                = f8
+FR_Y                = f0
+FR_RESULT           = f0
 
-fTmp                = f9
-fNorm_x             = f10
-f2to64              = f11
 
-.section .text
-GLOBAL_LIBM_ENTRY(ilogbl)
+ilogbl: 
 
-// X NORMAL
-// TrueExp_x = exp(f8) - 0xffff
-// r8 = TrueExp_x
-{ .mfi
-      getf.exp        rSignexp_x = f8
-      fclass.m        p8,p0 = f8, 0x0b   // Test for x unorm
-      mov             rExpBias = 0xffff  // Exponent bias
+// Form signexp of 2^64 in case need to scale denormal
+{ .mmf
+      alloc          r32=ar.pfs,1,5,4,0
+(p0)  mov      r37 = 0x1003f
+(p0)  fnorm    f9 = f8 ;;
 }
+
+// Form 2^64 in case need to scale denormal
 { .mfi
-      nop.m           0
-      fnorm.s1        fNorm_x = f8
-      mov             rExpMask = 0x1ffff // Exponent mask
+(p0)  setf.exp f10 = r37
+(p0)  fclass.m.unc p7, p8 = f8, 0xe3        
+(p0)  mov      r34 = 0xffff ;;
 }
-;;
 
-// Form signexp of 2^64 in case need to scale denormal
-{ .mfb
-      mov             rExp_2to64 = 0x1003f
-      fclass.m        p6,p9 = f8, 0x1e3  // Test x natval, nan, inf
-(p8)  br.cond.spnt    ILOGB_DENORM       // Branch if x unorm
-}
-;;
+// qnan snan inf norm     unorm 0 -+
+// 1    1    1   0        0     0 11
+// e                      3
+// X ZERO, returns INT_MIN
+// X INF or NAN, returns INT_MAX
 
-ILOGB_COMMON:
-// Return here from ILOGB_DENORM
 { .mfi
-      and             rExp_x = rSignexp_x, rExpMask // Get biased exponent
-      fclass.m        p7,p10 = f8, 0x07   // Test x zero
-      nop.i           0
+(p0)  mov      r35 = 0x1ffff
+(p8)    fclass.m.unc p6, p8 = f8, 0x07
+	nop.i 999 ;;
 }
 { .mlx
-      nop.m           0
-      movl            rIntMax = 0x000000007fffffff // Form INT_MAX
+	nop.m 999
+(p7)    movl r8 = 0x000000007fffffff ;;       
 }
-;;
 
-.pred.rel "mutex",p6,p9
-{ .mfi
-(p9)  sub             r8 = rExp_x, rExpBias // Get true exponent for normal path
-(p6)  fma.s0          fTmp = f8, f8, f0     // Dummy to set Invalid flag
-(p6)  mov             r8 = rIntMax          // If nan, inf, return INT_MAX
-}
-{ .mbb
-      nop.m           0
-(p7)  br.cond.spnt    ILOGB_ZERO            // Branch if x zero
-(p10) br.ret.sptk     b0                    // Exit if x not zero
+{ .mib
+	nop.m 999
+	nop.i 999
+(p6)    br.cond.spnt  L(ILOGB_ZERO) ;;
 }
-;;
-
 
-ILOGB_DENORM:
-// Form 2^64 in case need to scale denormal
-// Check to see if double-extended denormal
+// Test for denormal
 { .mfi
-      setf.exp        f2to64 = rExp_2to64
-      fclass.m        p8,p0 = fNorm_x, 0x0b
-      nop.i           0
+	nop.m 999
+(p8)    fclass.m.unc p9, p0 = f9, 0x0b        
+	nop.i 999 ;;
 }
-;;
 
-{ .mfi
-      nop.m           0
-      fcmp.eq.s0      p7,p0 = f8, f0           // Dummy op to set denormal flag
-      nop.i           0
+L(ILOGB_COMMON):
+// X NORMAL returns true exponent
+{ .mmi
+	nop.m 999 
+(p8)    getf.exp r33 = f9
+	nop.i 999 ;;
 }
-;;
 
-// If double-extended denormal add 64 to exponent bias for scaling
-// If double-extended denormal form x * 2^64 which is normal
-{ .mfi
-(p8)  add             rExpBias = 64, rExpBias
-(p8)  fmpy.s1         fNorm_x = fNorm_x, f2to64
-      nop.i           0
+// If denormal add 64 to exponent bias for scaling
+{ .mfb
+(p9)    add     r34 = 64, r34   
+	nop.f 999 
+(p9)    br.cond.spnt  L(ILOGB_DENORM) ;;
 }
-;;
 
-// Logic is the same as normal path but use normalized input
-{ .mib
-      getf.exp        rSignexp_x = fNorm_x
-      nop.i           0
-      br.cond.sptk    ILOGB_COMMON             // Return to main path
+{ .mmi
+(p8)    and      r36 = r35, r33
+	nop.m 999
+	nop.i 999 ;;
 }
-;;
 
-ILOGB_ZERO:
-// Here if x zero
-// Return INT_MIN, call error support
-
-{ .mlx
-      alloc           r32=ar.pfs,1,3,4,0
-      movl            rTrialResult = 0x0000000080000000
-}
 { .mib
-      mov             GR_Parameter_TAG = 156  // Error code
-      nop.i           0
-      br.cond.sptk    __libm_error_region     // Call error support
+(p8)    sub r8 = r36, r34                  
+	nop.i 999
+(p0)    br.ret.sptk    b0 ;;                     
 }
-;;
 
-GLOBAL_LIBM_END(ilogbl)
+L(ILOGB_DENORM):
+// Here if x denormal
+// Form x * 2^64 which is normal
+// Return to common code
+{ .mfb
+        cmp.eq p8,p9 = r0,r0
+        fmpy f9 = f9, f10
+        br.cond.sptk  L(ILOGB_COMMON) ;;
+}
 
-LOCAL_LIBM_ENTRY(__libm_error_region)
-.prologue
+// X ZERO
+// return INT_MIN, call error support
+L(ILOGB_ZERO): 
+{.mlx
+      mov            GR_Parameter_TAG = 156
+(p6)  movl r33 = 0x0000000080000000 ;;
+};;
+.endp ilogbl
+ASM_SIZE_DIRECTIVE(ilogbl)
 
+.proc __libm_error_region
+__libm_error_region:
+.prologue
 { .mfi
-        add   GR_Parameter_Y=-32,sp           // Parameter 2 value
+        add   GR_Parameter_Y=-32,sp             // Parameter 2 value
         nop.f 0
 .save   ar.pfs,GR_SAVE_PFS
-        mov  GR_SAVE_PFS=ar.pfs               // Save ar.pfs
+        mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
 }
 { .mfi
 .fframe 64
-        add sp=-64,sp                         // Create new stack
+        add sp=-64,sp                           // Create new stack
         nop.f 0
-        mov GR_SAVE_GP=gp                     // Save gp
+        mov GR_SAVE_GP=gp                       // Save gp
 };;
-
 { .mmi
-        stfe [GR_Parameter_Y] = f0,16         // STORE Parameter 2 on stack
-        add GR_Parameter_X    = 16,sp         // Parameter 1 address
+        stfe [GR_Parameter_Y] = FR_Y,16         // Save Parameter 2 on stack
+        add GR_Parameter_X = 16,sp              // Parameter 1 address
 .save   b0, GR_SAVE_B0
-        mov GR_SAVE_B0=b0                     // Save b0
+        mov GR_SAVE_B0=b0                       // Save b0
 };;
-
 .body
 { .mib
-        stfe [GR_Parameter_X] = f8            // STORE Parameter 1 on stack
-        add   GR_Parameter_RESULT = 0,GR_Parameter_Y    // Parameter 3 address
-        nop.b 0
+        stfe [GR_Parameter_X] = FR_X            // Store Parameter 1 on stack
+        add   GR_Parameter_RESULT = 0,GR_Parameter_Y
+        nop.b 0                                 // Parameter 3 address
 }
 { .mib
-        stfe [GR_Parameter_Y] = f9            // Store Parameter 3 on stack
+        stfe [GR_Parameter_Y] = FR_RESULT      // Store Parameter 3 on stack
         add   GR_Parameter_Y = -16,GR_Parameter_Y
-        br.call.sptk b0=__libm_error_support# // Call error handling function
+        br.call.sptk b0=__libm_error_support#  // Call error handling function
 };;
-
 { .mmi
-        add   GR_Parameter_RESULT = 48,sp
         nop.m 0
-        nop.i 0
+        nop.m 0
+        add   GR_Parameter_RESULT = 48,sp
 };;
-
 { .mmi
-        mov   r8 = rTrialResult
+        mov   r8 = r33                         // Store result
 .restore sp
         add   sp = 64,sp                       // Restore stack pointer
         mov   b0 = GR_SAVE_B0                  // Restore return address
 };;
-
 { .mib
         mov   gp = GR_SAVE_GP                  // Restore gp
         mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
-        br.ret.sptk   b0
+        br.ret.sptk     b0                     // Return
 };;
 
-LOCAL_LIBM_END(__libm_error_region)
-
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
 
 .type   __libm_error_support#,@function
 .global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_ldexp.S b/sysdeps/ia64/fpu/s_ldexp.S
new file mode 100644
index 0000000000..4dcd671c9f
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_ldexp.S
@@ -0,0 +1,380 @@
+.file "ldexp.s"
+
+// Copyright (C) 2000, 2001, Intel Corporation
+// All rights reserved.
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00  Initial version
+// 1/26/01  ldex pcompletely reworked and now standalone version 
+//
+// API
+//==============================================================
+// double = ldexp  (double x, int n) 
+// input  floating point f8 and int n (r33) 
+// output floating point f8
+//
+// Returns x* 2**n using an fma and detects overflow
+// and underflow.   
+//
+//
+
+#include "libm_support.h"
+
+FR_Big         = f6
+FR_NBig        = f7
+FR_Floating_X  = f8
+FR_Result      = f8
+FR_Result2     = f9
+FR_Result3     = f11
+FR_Norm_X      = f12
+FR_Two_N       = f14
+FR_Two_to_Big  = f15
+
+GR_N_Biased    = r15
+GR_Big         = r16
+GR_NBig        = r17
+GR_Scratch     = r18
+GR_Scratch1    = r19
+GR_Bias        = r20
+GR_N_as_int    = r21
+
+GR_SAVE_B0          = r32
+GR_SAVE_GP          = r33
+GR_SAVE_PFS         = r34
+GR_Parameter_X      = r35
+GR_Parameter_Y      = r36
+GR_Parameter_RESULT = r37
+GR_Tag              = r38
+
+.align 32
+.global ldexp
+
+.section .text
+.proc  ldexp
+.align 32
+
+ldexp: 
+
+//
+//   Is x NAN, INF, ZERO, +-?
+//   Build the exponent Bias
+//
+{    .mfi
+     alloc         r32=ar.pfs,1,2,4,0
+     fclass.m.unc  p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
+     addl          GR_Bias = 0x0FFFF,r0
+}
+
+//
+//   Sign extend input
+//   Is N zero?
+//   Normalize x
+//
+{    .mfi
+     cmp.eq.unc    p6,p0 = r33,r0  
+     fnorm.s1      FR_Norm_X  =   FR_Floating_X 
+     sxt4          GR_N_as_int = r33
+}
+;;
+
+//
+//   Normalize x
+//   Branch and return special values.
+//   Create -35000
+//   Create 35000
+//
+{    .mfi
+     addl          GR_Big = 35000,r0
+     nop.f 0
+     add           GR_N_Biased = GR_Bias,GR_N_as_int
+}
+{    .mfb
+     addl          GR_NBig = -35000,r0
+(p7) fma.d.s0      FR_Result = FR_Floating_X,f1, f0 
+(p7) br.ret.spnt   b0  
+};;
+
+//
+//   Build the exponent Bias
+//   Return x when N = 0
+//
+{    .mfi
+     setf.exp      FR_Two_N = GR_N_Biased                   
+     nop.f 0
+     addl          GR_Scratch1  = 0x063BF,r0 
+}
+{    .mfb
+     addl          GR_Scratch  = 0x019C3F,r0 
+(p6) fma.d.s0      FR_Result = FR_Floating_X,f1, f0 
+(p6) br.ret.spnt   b0  
+};;
+
+//
+//   Create 2*big
+//   Create 2**-big 
+//   Is N > 35000     
+//   Is N < -35000     
+//   Raise Denormal operand flag with compare
+//   Main path, create 2**N
+//
+{    .mfi
+     setf.exp      FR_NBig = GR_Scratch1                  
+     nop.f 0
+     cmp.ge.unc    p6, p0 = GR_N_as_int, GR_Big
+}
+{    .mfi
+     setf.exp      FR_Big = GR_Scratch                  
+     fcmp.ge.s0    p0,p11 = FR_Floating_X,f0
+     cmp.le.unc    p8, p0 = GR_N_as_int, GR_NBig
+};;
+
+//
+//   Adjust 2**N if N was very small or very large
+//
+{    .mfi
+     nop.m 0
+(p6) fma.s1        FR_Two_N = FR_Big,f1,f0
+     nop.i 0
+}
+{ .mlx
+     nop.m 999
+(p0) movl          GR_Scratch = 0x00000000000303FF 
+};;
+
+
+{    .mfi
+     nop.m 0
+(p8) fma.s1        FR_Two_N = FR_NBig,f1,f0
+     nop.i 0
+}
+{    .mlx
+     nop.m 999
+(p0) movl          GR_Scratch1= 0x00000000000103FF 
+};;
+
+//   Set up necessary status fields 
+//
+//   S0 user supplied status
+//   S2 user supplied status + WRE + TD  (Overflows)
+//   S3 user supplied status + FZ + TD   (Underflows)
+//
+{    .mfi
+     nop.m 999
+(p0) fsetc.s3      0x7F,0x41
+     nop.i 999
+}
+{    .mfi
+     nop.m 999
+(p0) fsetc.s2      0x7F,0x42
+     nop.i 999
+};;
+
+//
+//   Do final operation
+//
+{    .mfi
+     setf.exp     FR_NBig = GR_Scratch
+     fma.d.s0     FR_Result = FR_Two_N,FR_Norm_X,f0 
+     nop.i                           999
+}
+{    .mfi
+     nop.m                           999
+     fma.d.s3     FR_Result3 = FR_Two_N,FR_Norm_X,f0 
+     nop.i                           999
+};;
+{    .mfi
+     setf.exp     FR_Big = GR_Scratch1
+     fma.d.s2     FR_Result2 = FR_Two_N,FR_Norm_X,f0 
+     nop.i                           999
+};;
+
+//
+//   Check for overflow or underflow.
+//   Restore s3
+//   Restore s2
+//
+{    .mfi
+     nop.m 0
+     fsetc.s3     0x7F,0x40
+     nop.i 999 
+}
+{    .mfi
+     nop.m 0
+     fsetc.s2     0x7F,0x40
+     nop.i 999
+};;
+
+//
+//   Is the result zero?
+//
+{    .mfi
+     nop.m 999
+     fclass.m.unc p6, p0 =  FR_Result3, 0x007
+     nop.i 999 
+} 
+{    .mfi
+     addl           GR_Tag = 146, r0
+     fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big
+     nop.i 0
+};;
+
+//
+//   Detect masked underflow - Tiny + Inexact Only
+//
+{    .mfi
+     nop.m 999
+(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
+     nop.i 999 
+};; 
+
+//
+//   Is result bigger the allowed range?
+//   Branch out for underflow
+//
+{    .mfb
+(p6) addl            GR_Tag = 147, r0
+(p8) fcmp.le.unc.s1  p9, p10 = FR_Result2 , FR_NBig
+(p6) br.cond.spnt    L(LDEXP_UNDERFLOW) 
+};;
+
+//
+//   Branch out for overflow
+//
+{ .mbb
+     nop.m 0
+(p7) br.cond.spnt    L(LDEXP_OVERFLOW) 
+(p9) br.cond.spnt    L(LDEXP_OVERFLOW) 
+};;
+
+//
+//   Return from main path.
+//
+{    .mfb
+     nop.m 999
+     nop.f 0
+     br.ret.sptk     b0;;                   
+}
+
+.endp ldexp
+ASM_SIZE_DIRECTIVE(ldexp)
+.proc __libm_error_region
+__libm_error_region:
+
+L(LDEXP_OVERFLOW): 
+L(LDEXP_UNDERFLOW): 
+
+//
+// Get stack address of N
+//
+.prologue
+{ .mfi
+    add   GR_Parameter_Y=-32,sp         
+    nop.f 0
+.save   ar.pfs,GR_SAVE_PFS
+    mov  GR_SAVE_PFS=ar.pfs              
+}
+//
+// Adjust sp 
+//
+{ .mfi
+.fframe 64
+   add sp=-64,sp                         
+   nop.f 0
+   mov GR_SAVE_GP=gp       
+};;
+
+//
+//  Store N on stack in correct position 
+//  Locate the address of x on stack
+//
+{ .mmi
+   st8 [GR_Parameter_Y] =  GR_N_as_int,16       
+   add GR_Parameter_X = 16,sp          
+.save   b0, GR_SAVE_B0
+   mov GR_SAVE_B0=b0                  
+};;
+
+//
+// Store x on the stack.
+// Get address for result on stack.
+//
+.body
+{ .mib
+   stfd [GR_Parameter_X] = FR_Norm_X 
+   add   GR_Parameter_RESULT = 0,GR_Parameter_Y   
+   nop.b 0
+}
+{ .mib
+   stfd [GR_Parameter_Y] = FR_Result                 
+   add   GR_Parameter_Y = -16,GR_Parameter_Y
+   br.call.sptk b0=__libm_error_support#   
+};;
+
+//
+//  Get location of result on stack
+//
+{ .mmi
+   nop.m 0
+   nop.m 0
+   add   GR_Parameter_RESULT = 48,sp    
+};;
+
+//
+//  Get the new result 
+//
+{ .mmi
+   ldfd  FR_Result = [GR_Parameter_RESULT]      
+.restore sp
+   add   sp = 64,sp                       
+   mov   b0 = GR_SAVE_B0                  
+};;
+
+//
+//  Restore gp, ar.pfs and return
+//
+{ .mib
+   mov   gp = GR_SAVE_GP                  
+   mov   ar.pfs = GR_SAVE_PFS             
+   br.ret.sptk     b0                  
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+.type   __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_ldexpf.S b/sysdeps/ia64/fpu/s_ldexpf.S
new file mode 100644
index 0000000000..36f0111fe1
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_ldexpf.S
@@ -0,0 +1,379 @@
+//.file "ldexpf.s"
+
+// Copyright (C) 2000, 2001, Intel Corporation
+// All rights reserved.
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00  Initial version
+// 1/26/01  ldexpf completely reworked and now standalone version 
+//
+// API
+//==============================================================
+// float = ldexpf  (float x, int n) 
+// input  floating point f8 and int n (r33) 
+// output floating point f8
+//
+// Returns x* 2**n using an fma and detects overflow
+// and underflow.   
+//
+//
+
+#include "libm_support.h"
+
+FR_Big         = f6
+FR_NBig        = f7
+FR_Floating_X  = f8
+FR_Result      = f8
+FR_Result2     = f9
+FR_Result3     = f11
+FR_Norm_X      = f12
+FR_Two_N       = f14
+FR_Two_to_Big  = f15
+
+GR_N_Biased    = r15
+GR_Big         = r16
+GR_NBig        = r17
+GR_Scratch     = r18
+GR_Scratch1    = r19
+GR_Bias        = r20
+GR_N_as_int    = r21
+
+GR_SAVE_B0          = r32
+GR_SAVE_GP          = r33
+GR_SAVE_PFS         = r34
+GR_Parameter_X      = r35
+GR_Parameter_Y      = r36
+GR_Parameter_RESULT = r37
+GR_Tag              = r38
+
+.align 32
+.global ldexpf
+
+.section .text
+.proc  ldexpf
+.align 32
+
+ldexpf: 
+
+//
+//   Is x NAN, INF, ZERO, +-?
+//   Build the exponent Bias
+//
+{    .mfi
+     alloc         r32=ar.pfs,1,2,4,0
+     fclass.m.unc  p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
+     addl          GR_Bias = 0x0FFFF,r0
+}
+
+//
+//   Sign extend input
+//   Is N zero?
+//   Normalize x
+//
+{    .mfi
+     cmp.eq.unc    p6,p0 = r33,r0  
+     fnorm.s1      FR_Norm_X  =   FR_Floating_X 
+     sxt4          GR_N_as_int = r33
+}
+;;
+
+//
+//   Normalize x
+//   Branch and return special values.
+//   Create -35000
+//   Create 35000
+//
+{    .mfi
+     addl          GR_Big = 35000,r0
+     nop.f         0
+     add           GR_N_Biased = GR_Bias,GR_N_as_int
+}
+{    .mfb
+     addl          GR_NBig = -35000,r0
+(p7) fma.s.s0      FR_Result = FR_Floating_X,f1, f0 
+(p7) br.ret.spnt   b0  
+};;
+
+//
+//   Build the exponent Bias
+//   Return x when N = 0
+//
+{    .mfi
+     setf.exp      FR_Two_N = GR_N_Biased                   
+     nop.f         0
+     addl          GR_Scratch1  = 0x063BF,r0 
+}
+{    .mfb
+     addl          GR_Scratch  = 0x019C3F,r0 
+(p6) fma.s.s0      FR_Result = FR_Floating_X,f1, f0 
+(p6) br.ret.spnt   b0  
+};;
+
+//
+//   Create 2*big
+//   Create 2**-big 
+//   Is N > 35000     
+//   Is N < -35000     
+//   Raise Denormal operand flag with compare
+//   Main path, create 2**N
+//
+{    .mfi
+     setf.exp      FR_NBig = GR_Scratch1                  
+     nop.f         0
+     cmp.ge.unc    p6, p0 = GR_N_as_int, GR_Big
+}
+{    .mfi
+     setf.exp      FR_Big = GR_Scratch                  
+     fcmp.ge.s0    p0,p11 = FR_Floating_X,f0
+     cmp.le.unc    p8, p0 = GR_N_as_int, GR_NBig
+};;
+
+//
+//   Adjust 2**N if N was very small or very large
+//
+{    .mfi
+     nop.m 0
+(p6) fma.s1        FR_Two_N = FR_Big,f1,f0
+     nop.i 0
+}
+{ .mlx
+     nop.m 999
+(p0) movl          GR_Scratch = 0x000000000003007F 
+};;
+
+
+{    .mfi
+     nop.m 0
+(p8) fma.s1        FR_Two_N = FR_NBig,f1,f0
+     nop.i 0
+}
+{    .mlx
+     nop.m 999
+(p0) movl          GR_Scratch1= 0x000000000001007F 
+};;
+
+//   Set up necessary status fields 
+//
+//   S0 user supplied status
+//   S2 user supplied status + WRE + TD  (Overflows)
+//   S3 user supplied status + FZ + TD   (Underflows)
+//
+{    .mfi
+     nop.m 999
+(p0) fsetc.s3      0x7F,0x41
+     nop.i 999
+}
+{    .mfi
+     nop.m 999
+(p0) fsetc.s2      0x7F,0x42
+     nop.i 999
+};;
+
+//
+//   Do final operation
+//
+{    .mfi
+     setf.exp      FR_NBig = GR_Scratch
+     fma.s.s0      FR_Result = FR_Two_N,FR_Norm_X,f0 
+     nop.i         999
+}
+{    .mfi
+     nop.m         999
+     fma.s.s3      FR_Result3 = FR_Two_N,FR_Norm_X,f0 
+     nop.i         999
+};;
+{    .mfi
+     setf.exp      FR_Big = GR_Scratch1
+     fma.s.s2      FR_Result2 = FR_Two_N,FR_Norm_X,f0 
+     nop.i         999
+};;
+
+//   Check for overflow or underflow.
+//   Restore s3
+//   Restore s2
+//
+{    .mfi
+     nop.m 0
+     fsetc.s3      0x7F,0x40
+     nop.i 999 
+}
+{    .mfi
+     nop.m 0
+     fsetc.s2      0x7F,0x40
+     nop.i 999
+};;
+
+//
+//   Is the result zero?
+//
+{    .mfi
+     nop.m 999
+     fclass.m.unc  p6, p0 =  FR_Result3, 0x007
+     nop.i 999 
+} 
+{    .mfi
+     addl          GR_Tag = 148, r0
+     fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big
+     nop.i 0
+};;
+
+//
+//   Detect masked underflow - Tiny + Inexact Only
+//
+{    .mfi
+     nop.m 999
+(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
+     nop.i 999 
+};; 
+
+//
+//   Is result bigger the allowed range?
+//   Branch out for underflow
+//
+{    .mfb
+(p6) addl           GR_Tag = 149, r0
+(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
+(p6) br.cond.spnt   L(ldexpf_UNDERFLOW) 
+};;
+
+//
+//   Branch out for overflow
+//
+{ .mbb
+     nop.m 0
+(p7) br.cond.spnt   L(ldexpf_OVERFLOW) 
+(p9) br.cond.spnt   L(ldexpf_OVERFLOW) 
+};;
+
+//
+//   Return from main path.
+//
+{    .mfb
+     nop.m 999
+     nop.f 0
+     br.ret.sptk     b0;;                   
+}
+
+.endp ldexpf
+ASM_SIZE_DIRECTIVE(ldexpf)
+.proc __libm_error_region
+__libm_error_region:
+
+L(ldexpf_OVERFLOW): 
+L(ldexpf_UNDERFLOW): 
+
+//
+// Get stack address of N
+//
+.prologue
+{ .mfi
+    add   GR_Parameter_Y=-32,sp         
+    nop.f 0
+.save   ar.pfs,GR_SAVE_PFS
+    mov  GR_SAVE_PFS=ar.pfs              
+}
+//
+// Adjust sp 
+//
+{ .mfi
+.fframe 64
+   add sp=-64,sp                         
+   nop.f 0
+   mov GR_SAVE_GP=gp       
+};;
+
+//
+//  Store N on stack in correct position 
+//  Locate the address of x on stack
+//
+{ .mmi
+   st8 [GR_Parameter_Y] =  GR_N_as_int,16       
+   add GR_Parameter_X = 16,sp          
+.save   b0, GR_SAVE_B0
+   mov GR_SAVE_B0=b0                  
+};;
+
+//
+// Store x on the stack.
+// Get address for result on stack.
+//
+.body
+{ .mib
+   stfs [GR_Parameter_X] = FR_Norm_X 
+   add   GR_Parameter_RESULT = 0,GR_Parameter_Y   
+   nop.b 0
+}
+{ .mib
+   stfs [GR_Parameter_Y] = FR_Result                 
+   add   GR_Parameter_Y = -16,GR_Parameter_Y
+   br.call.sptk b0=__libm_error_support#   
+};;
+
+//
+//  Get location of result on stack
+//
+{ .mmi
+   nop.m 0
+   nop.m 0
+   add   GR_Parameter_RESULT = 48,sp    
+};;
+
+//
+//  Get the new result 
+//
+{ .mmi
+   ldfs  FR_Result = [GR_Parameter_RESULT]      
+.restore sp
+   add   sp = 64,sp                       
+   mov   b0 = GR_SAVE_B0                  
+};;
+
+//
+//  Restore gp, ar.pfs and return
+//
+{ .mib
+   mov   gp = GR_SAVE_GP                  
+   mov   ar.pfs = GR_SAVE_PFS             
+   br.ret.sptk     b0                  
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+.type   __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_ldexpl.S b/sysdeps/ia64/fpu/s_ldexpl.S
new file mode 100644
index 0000000000..fb5d3fd452
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_ldexpl.S
@@ -0,0 +1,379 @@
+//.file "ldexpl.s"
+
+// Copyright (C) 2000, 2001, Intel Corporation
+// All rights reserved.
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00  Initial version
+// 1/26/01  ldexpl completely reworked and now standalone version 
+//
+// API
+//==============================================================
+// double-extended = ldexpl  (double-extended x, int n) 
+// input  floating point f8 and int n (r34) 
+// output floating point f8
+//
+// Returns x* 2**n using an fma and detects overflow
+// and underflow.   
+//
+//
+
+#include "libm_support.h"
+
+FR_Big         = f6
+FR_NBig        = f7
+FR_Floating_X  = f8
+FR_Result      = f8
+FR_Result2     = f9
+FR_Result3     = f11
+FR_Norm_X      = f12
+FR_Two_N       = f14
+FR_Two_to_Big  = f15
+
+GR_N_Biased    = r15
+GR_Big         = r16
+GR_NBig        = r17
+GR_Scratch     = r18
+GR_Scratch1    = r19
+GR_Bias        = r20
+GR_N_as_int    = r21
+
+GR_SAVE_B0          = r32
+GR_SAVE_GP          = r33
+GR_SAVE_PFS         = r34
+GR_Parameter_X      = r35
+GR_Parameter_Y      = r36
+GR_Parameter_RESULT = r37
+GR_Tag              = r38
+
+.align 32
+.global ldexpl
+
+.section .text
+.proc  ldexpl
+.align 32
+
+ldexpl: 
+
+//
+//   Is x NAN, INF, ZERO, +-?
+//   Build the exponent Bias
+//
+{    .mfi
+     alloc         r32=ar.pfs,2,1,4,0
+     fclass.m.unc  p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
+     addl          GR_Bias = 0x0FFFF,r0
+}
+
+//
+//   Sign extend input
+//   Is N zero?
+//   Normalize x
+//
+{    .mfi
+     cmp.eq.unc    p6,p0 = r34,r0  
+     fnorm.s1      FR_Norm_X  =   FR_Floating_X 
+     sxt4          GR_N_as_int = r34
+}
+;;
+
+//
+//   Normalize x
+//   Branch and return special values.
+//   Create -35000
+//   Create 35000
+//
+{    .mfi
+     addl          GR_Big = 35000,r0
+     nop.f         0
+     add           GR_N_Biased = GR_Bias,GR_N_as_int
+}
+{    .mfb
+     addl          GR_NBig = -35000,r0
+(p7) fma.s0      FR_Result = FR_Floating_X,f1, f0 
+(p7) br.ret.spnt   b0  
+};;
+
+//
+//   Build the exponent Bias
+//   Return x when N = 0
+//
+{    .mfi
+     setf.exp      FR_Two_N = GR_N_Biased                   
+     nop.f         0
+     addl          GR_Scratch1  = 0x063BF,r0 
+}
+{    .mfb
+     addl          GR_Scratch  = 0x019C3F,r0 
+(p6) fma.s0      FR_Result = FR_Floating_X,f1, f0 
+(p6) br.ret.spnt   b0  
+};;
+
+//
+//   Create 2*big
+//   Create 2**-big 
+//   Is N > 35000     
+//   Is N < -35000     
+//   Raise Denormal operand flag with compare
+//   Main path, create 2**N
+//
+{    .mfi
+     setf.exp      FR_NBig = GR_Scratch1                  
+     nop.f         0
+     cmp.ge.unc    p6, p0 = GR_N_as_int, GR_Big
+}
+{    .mfi
+     setf.exp      FR_Big = GR_Scratch                  
+     fcmp.ge.s0    p0,p11 = FR_Floating_X,f0
+     cmp.le.unc    p8, p0 = GR_N_as_int, GR_NBig
+};;
+
+//
+//   Adjust 2**N if N was very small or very large
+//
+{    .mfi
+     nop.m 0
+(p6) fma.s1        FR_Two_N = FR_Big,f1,f0
+     nop.i 0
+}
+{ .mlx
+     nop.m 999
+(p0) movl          GR_Scratch = 0x0000000000033FFF 
+};;
+
+
+{    .mfi
+     nop.m 0
+(p8) fma.s1        FR_Two_N = FR_NBig,f1,f0
+     nop.i 0
+}
+{    .mlx
+     nop.m 999
+(p0) movl          GR_Scratch1= 0x0000000000013FFF 
+};;
+
+//   Set up necessary status fields 
+//
+//   S0 user supplied status
+//   S2 user supplied status + WRE + TD  (Overflows)
+//   S3 user supplied status + FZ + TD   (Underflows)
+//
+{    .mfi
+     nop.m 999
+(p0) fsetc.s3      0x7F,0x41
+     nop.i 999
+}
+{    .mfi
+     nop.m 999
+(p0) fsetc.s2      0x7F,0x42
+     nop.i 999
+};;
+
+//
+//   Do final operation
+//
+{    .mfi
+     setf.exp      FR_NBig = GR_Scratch
+     fma.s0      FR_Result = FR_Two_N,FR_Norm_X,f0 
+     nop.i         999
+}
+{    .mfi
+     nop.m         999
+     fma.s3      FR_Result3 = FR_Two_N,FR_Norm_X,f0 
+     nop.i         999
+};;
+{    .mfi
+     setf.exp      FR_Big = GR_Scratch1
+     fma.s2      FR_Result2 = FR_Two_N,FR_Norm_X,f0 
+     nop.i         999
+};;
+
+//   Check for overflow or underflow.
+//   Restore s3
+//   Restore s2
+//
+{    .mfi
+     nop.m 0
+     fsetc.s3      0x7F,0x40
+     nop.i 999 
+}
+{    .mfi
+     nop.m 0
+     fsetc.s2      0x7F,0x40
+     nop.i 999
+};;
+
+//
+//   Is the result zero?
+//
+{    .mfi
+     nop.m 999
+     fclass.m.unc  p6, p0 =  FR_Result3, 0x007
+     nop.i 999 
+} 
+{    .mfi
+     addl          GR_Tag = 144, r0
+     fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big
+     nop.i 0
+};;
+
+//
+//   Detect masked underflow - Tiny + Inexact Only
+//
+{    .mfi
+     nop.m 999
+(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
+     nop.i 999 
+};; 
+
+//
+//   Is result bigger the allowed range?
+//   Branch out for underflow
+//
+{    .mfb
+(p6) addl           GR_Tag = 145, r0
+(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
+(p6) br.cond.spnt   L(ldexpl_UNDERFLOW) 
+};;
+
+//
+//   Branch out for overflow
+//
+{ .mbb
+     nop.m 0
+(p7) br.cond.spnt   L(ldexpl_OVERFLOW) 
+(p9) br.cond.spnt   L(ldexpl_OVERFLOW) 
+};;
+
+//
+//   Return from main path.
+//
+{    .mfb
+     nop.m 999
+     nop.f 0
+     br.ret.sptk     b0;;                   
+}
+
+.endp ldexpl
+ASM_SIZE_DIRECTIVE(ldexpl)
+.proc __libm_error_region
+__libm_error_region:
+
+L(ldexpl_OVERFLOW): 
+L(ldexpl_UNDERFLOW): 
+
+//
+// Get stack address of N
+//
+.prologue
+{ .mfi
+    add   GR_Parameter_Y=-32,sp         
+    nop.f 0
+.save   ar.pfs,GR_SAVE_PFS
+    mov  GR_SAVE_PFS=ar.pfs              
+}
+//
+// Adjust sp 
+//
+{ .mfi
+.fframe 64
+   add sp=-64,sp                         
+   nop.f 0
+   mov GR_SAVE_GP=gp       
+};;
+
+//
+//  Store N on stack in correct position 
+//  Locate the address of x on stack
+//
+{ .mmi
+   st8 [GR_Parameter_Y] =  GR_N_as_int,16       
+   add GR_Parameter_X = 16,sp          
+.save   b0, GR_SAVE_B0
+   mov GR_SAVE_B0=b0                  
+};;
+
+//
+// Store x on the stack.
+// Get address for result on stack.
+//
+.body
+{ .mib
+   stfe [GR_Parameter_X] = FR_Norm_X 
+   add   GR_Parameter_RESULT = 0,GR_Parameter_Y   
+   nop.b 0
+}
+{ .mib
+   stfe [GR_Parameter_Y] = FR_Result                 
+   add   GR_Parameter_Y = -16,GR_Parameter_Y
+   br.call.sptk b0=__libm_error_support#   
+};;
+
+//
+//  Get location of result on stack
+//
+{ .mmi
+   nop.m 0
+   nop.m 0
+   add   GR_Parameter_RESULT = 48,sp    
+};;
+
+//
+//  Get the new result 
+//
+{ .mmi
+   ldfe  FR_Result = [GR_Parameter_RESULT]      
+.restore sp
+   add   sp = 64,sp                       
+   mov   b0 = GR_SAVE_B0                  
+};;
+
+//
+//  Restore gp, ar.pfs and return
+//
+{ .mib
+   mov   gp = GR_SAVE_GP                  
+   mov   ar.pfs = GR_SAVE_PFS             
+   br.ret.sptk     b0                  
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+.type   __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_log1p.S b/sysdeps/ia64/fpu/s_log1p.S
index cd3551984a..0d96c14a55 100644
--- a/sysdeps/ia64/fpu/s_log1p.S
+++ b/sysdeps/ia64/fpu/s_log1p.S
@@ -1,10 +1,10 @@
-.file "log1p.s"
+.file "log1p.s" 
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,1082 +20,1608 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
 // Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
 //
 // History
 //==============================================================
-// 02/02/00 Initial version
-// 04/04/00 Unwind support added
-// 08/15/00 Bundle added after call to __libm_error_support to properly
+// 2/02/00  Initial version
+// 4/04/00  Unwind support added
+// 8/15/00  Bundle added after call to __libm_error_support to properly
 //          set [the previously overwritten] GR_Parameter_RESULT.
-// 06/29/01 Improved speed of all paths
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 10/02/02 Improved performance by basing on log algorithm
-// 02/10/03 Reordered header: .section, .global, .proc, .align
-// 04/18/03 Eliminate possible WAW dependency warning
 //
-// API
-//==============================================================
-// double log1p(double)
+// *********************************************************************
 //
-// log1p(x) = log(x+1)
+// Function:   log1p(x) = ln(x+1), for double precision x values
 //
-// Overview of operation
-//==============================================================
-// Background
-// ----------
+// *********************************************************************
 //
-// This algorithm is based on fact that
-// log1p(x) = log(1+x) and
-// log(a b) = log(a) + log(b).
-// In our case we have 1+x = 2^N f, where 1 <= f < 2.
-// So
-//   log(1+x) = log(2^N f) = log(2^N) + log(f) = n*log(2) + log(f)
+// Accuracy:   Very accurate for double precision values
 //
-// To calculate log(f) we do following
-//   log(f) = log(f * frcpa(f) / frcpa(f)) =
-//          = log(f * frcpa(f)) + log(1/frcpa(f))
+// *********************************************************************
 //
-// According to definition of IA-64's frcpa instruction it's a
-// floating point that approximates 1/f using a lookup on the
-// top of 8 bits of the input number's + 1 significand with relative
-// error < 2^(-8.886). So we have following
+// Resources Used:
 //
-// |(1/f - frcpa(f)) / (1/f))| = |1 - f*frcpa(f)| < 1/256
+//    Floating-Point Registers: f8 (Input and Return Value)
+//                              f9,f33-f55,f99 
 //
-// and
+//    General Purpose Registers:
+//      r32-r53
+//      r54-r57 (Used to pass arguments to error handling routine)
 //
-// log(f) = log(f * frcpa(f)) + log(1/frcpa(f)) =
-//        = log(1 + r) + T
+//    Predicate Registers:      p6-p15
 //
-// The first value can be computed by polynomial P(r) approximating
-// log(1 + r) on |r| < 1/256 and the second is precomputed tabular
-// value defined by top 8 bit of f.
+// *********************************************************************
 //
-// Finally we have that  log(1+x) ~ (N*log(2) + T) + P(r)
+// IEEE Special Conditions:
 //
-// Note that if input argument is close to 0.0 (in our case it means
-// that |x| < 1/256) we can use just polynomial approximation
-// because 1+x = 2^0 * f = f = 1 + r and
-// log(1+x) = log(1 + r) ~ P(r)
+//    Denormal  fault raised on denormal inputs
+//    Overflow exceptions cannot occur  
+//    Underflow exceptions raised when appropriate for log1p 
+//    (Error Handling Routine called for underflow)
+//    Inexact raised when appropriate by algorithm
 //
+//    log1p(inf) = inf
+//    log1p(-inf) = QNaN 
+//    log1p(+/-0) = +/-0 
+//    log1p(-1) =  -inf 
+//    log1p(SNaN) = QNaN
+//    log1p(QNaN) = QNaN
+//    log1p(EM_special Values) = QNaN
 //
-// Implementation
-// --------------
+// *********************************************************************
 //
-// 1. |x| >= 2^(-8), and x > -1
-//   InvX = frcpa(x+1)
-//   r = InvX*(x+1) - 1
-//   P(r) = r*((r*A3 - A2) + r^4*((A4 + r*A5) + r^2*(A6 + r*A7)),
-//   all coefficients are calcutated in quad and rounded to double
-//   precision. A7,A6,A5,A4 are stored in memory whereas A3 and A2
-//   created with setf.
+// Computation is based on the following kernel.
 //
-//   N = float(n) where n is true unbiased exponent of x
+// ker_log_64( in_FR    :  X,
+// 	    in_FR    :  E,
+// 	    in_FR    :  Em1,
+// 	    in_GR    :  Expo_Range,
+// 	    out_FR   :  Y_hi,
+// 	    out_FR   :  Y_lo,
+// 	    out_FR   :  Scale,
+// 	    out_PR   :  Safe  )
+// 
+// Overview
 //
-//   T is tabular value of log(1/frcpa(x)) calculated in quad precision
-//   and represented by two floating-point numbers 64-bit Thi and 32-bit Tlo.
-//   To load Thi,Tlo we get bits from 55 to 62 of register format significand
-//   as index and calculate two addresses
-//     ad_Thi = Thi_table_base_addr + 8 * index
-//     ad_Tlo = Tlo_table_base_addr + 4 * index
+// The method consists of three cases.
 //
-//   L1 (log(2)) is calculated in quad
-//   precision and represented by two floating-point 64-bit numbers L1hi,L1lo
-//   stored in memory.
+// If	|X+Em1| < 2^(-80)	use case log1p_small;
+// elseif	|X+Em1| < 2^(-7)	use case log_near1;
+// else				use case log_regular;
 //
-//   And final result = ((L1hi*N + Thi) + (N*L1lo + Tlo)) + P(r)
+// Case log1p_small:
 //
+// log( 1 + (X+Em1) ) can be approximated by (X+Em1).
 //
-// 2. 2^(-80) <= |x| < 2^(-8)
-//   r = x
-//   P(r) = r*((r*A3 - A2) + r^4*((A4 + r*A5) + r^2*(A6 + r*A7)),
-//   A7,A6,A5,A4,A3,A2 are the same as in case |x| >= 1/256
+// Case log_near1:
 //
-//   And final results
-//     log(1+x)   = P(r)
+//   log( 1 + (X+Em1) ) can be approximated by a simple polynomial
+//   in W = X+Em1. This polynomial resembles the truncated Taylor
+//   series W - W^/2 + W^3/3 - ...
+// 
+// Case log_regular:
 //
-// 3. 0 < |x| < 2^(-80)
-//   Although log1p(x) is basically x, we would like to preserve the inexactness
-//   nature as well as consistent behavior under different rounding modes.
-//   We can do this by computing the result as
+//   Here we use a table lookup method. The basic idea is that in
+//   order to compute log(Arg) for an argument Arg in [1,2), we 
+//   construct a value G such that G*Arg is close to 1 and that
+//   log(1/G) is obtainable easily from a table of values calculated
+//   beforehand. Thus
 //
-//     log1p(x) = x - x*x
+//	log(Arg) = log(1/G) + log(G*Arg)
+//		 = log(1/G) + log(1 + (G*Arg - 1))
 //
+//   Because |G*Arg - 1| is small, the second term on the right hand
+//   side can be approximated by a short polynomial. We elaborate
+//   this method in four steps.
 //
-//    Note: NaT, any NaNs, +/-INF, +/-0, negatives and unnormalized numbers are
-//          filtered and processed on special branches.
+//   Step 0: Initialization
 //
-
+//   We need to calculate log( E + X ). Obtain N, S_hi, S_lo such that
 //
-// Special values
-//==============================================================
+//	E + X = 2^N * ( S_hi + S_lo )	exactly
 //
-// log1p(-1)    = -inf            // Call error support
+//   where S_hi in [1,2) and S_lo is a correction to S_hi in the sense
+//   that |S_lo| <= ulp(S_hi).
 //
-// log1p(+qnan) = +qnan
-// log1p(-qnan) = -qnan
-// log1p(+snan) = +qnan
-// log1p(-snan) = -qnan
+//   Step 1: Argument Reduction
 //
-// log1p(x),x<-1= QNAN Indefinite // Call error support
-// log1p(-inf)  = QNAN Indefinite
-// log1p(+inf)  = +inf
-// log1p(+/-0)  = +/-0
+//   Based on S_hi, obtain G_1, G_2, G_3 from a table and calculate
 //
+//	G := G_1 * G_2 * G_3
+//	r := (G * S_hi - 1)  + G * S_lo
 //
-// Registers used
-//==============================================================
-// Floating Point registers used:
-// f8, input
-// f7 -> f15,  f32 -> f40
+//   These G_j's have the property that the product is exactly 
+//   representable and that |r| < 2^(-12) as a result.
+//
+//   Step 2: Approximation
+//
+//
+//   log(1 + r) is approximated by a short polynomial poly(r).
+//
+//   Step 3: Reconstruction
+//
+//
+//   Finally, log( E + X ) is given by
+//
+//   log( E + X )   =   log( 2^N * (S_hi + S_lo) )
+//                 ~=~  N*log(2) + log(1/G) + log(1 + r)
+//                 ~=~  N*log(2) + log(1/G) + poly(r).
+//
+// **** Algorithm ****
+//
+// Case log1p_small:
+//
+// Although log(1 + (X+Em1)) is basically X+Em1, we would like to 
+// preserve the inexactness nature as well as consistent behavior
+// under different rounding modes. Note that this case can only be
+// taken if E is set to be 1.0. In this case, Em1 is zero, and that
+// X can be very tiny and thus the final result can possibly underflow.
+// Thus, we compare X against a threshold that is dependent on the
+// input Expo_Range. If |X| is smaller than this threshold, we set
+// SAFE to be FALSE. 
+//
+// The result is returned as Y_hi, Y_lo, and in the case of SAFE 
+// is FALSE, an additional value Scale is also returned. 
+//
+//	W    := X + Em1
+//      Threshold := Threshold_Table( Expo_Range )
+//      Tiny      := Tiny_Table( Expo_Range )
+//
+//      If ( |W| > Threshold ) then
+//         Y_hi  := W
+//         Y_lo  := -W*W
+//      Else
+//         Y_hi  := W
+//         Y_lo  := -Tiny
+//         Scale := 2^(-100)
+//         Safe  := FALSE
+//      EndIf
+//
+//
+// One may think that Y_lo should be -W*W/2; however, it does not matter
+// as Y_lo will be rounded off completely except for the correct effect in 
+// directed rounding. Clearly -W*W is simplier to compute. Moreover,
+// because of the difference in exponent value, Y_hi + Y_lo or 
+// Y_hi + Scale*Y_lo is always inexact.
+//
+// Case log_near1:
+//
+// Here we compute a simple polynomial. To exploit parallelism, we split
+// the polynomial into two portions.
+// 
+// 	W := X + Em1
+// 	Wsq := W * W
+// 	W4  := Wsq*Wsq
+// 	W6  := W4*Wsq
+// 	Y_hi := W + Wsq*(P_1 + W*(P_2 + W*(P_3 + W*P_4))
+// 	Y_lo := W6*(P_5 + W*(P_6 + W*(P_7 + W*P_8)))
+//      set lsb(Y_lo) to be 1
+//
+// Case log_regular:
+//
+// We present the algorithm in four steps.
+//
+//   Step 0. Initialization
+//   ----------------------
+//
+//   Z := X + E
+//   N := unbaised exponent of Z
+//   S_hi := 2^(-N) * Z
+//   S_lo := 2^(-N) * { (max(X,E)-Z) + min(X,E) }
+//
+//   Note that S_lo is always 0 for the case E = 0.
+//
+//   Step 1. Argument Reduction
+//   --------------------------
+//
+//   Let
+//
+//	Z = 2^N * S_hi = 2^N * 1.d_1 d_2 d_3 ... d_63
+//
+//   We obtain G_1, G_2, G_3 by the following steps.
+//
+//
+//	Define		X_0 := 1.d_1 d_2 ... d_14. This is extracted
+//			from S_hi.
+//
+//	Define		A_1 := 1.d_1 d_2 d_3 d_4. This is X_0 truncated
+//			to lsb = 2^(-4).
+//
+//	Define		index_1 := [ d_1 d_2 d_3 d_4 ].
+//
+//	Fetch 		Z_1 := (1/A_1) rounded UP in fixed point with
+//	fixed point	lsb = 2^(-15).
+//			Z_1 looks like z_0.z_1 z_2 ... z_15
+//		        Note that the fetching is done using index_1.
+//			A_1 is actually not needed in the implementation
+//			and is used here only to explain how is the value
+//			Z_1 defined.
+//
+//	Fetch		G_1 := (1/A_1) truncated to 21 sig. bits.
+//	floating pt.	Again, fetching is done using index_1. A_1
+//			explains how G_1 is defined.
+//
+//	Calculate	X_1 := X_0 * Z_1 truncated to lsb = 2^(-14)
+//			     = 1.0 0 0 0 d_5 ... d_14
+//			This is accomplised by integer multiplication.
+//			It is proved that X_1 indeed always begin
+//			with 1.0000 in fixed point.
+//
+//
+//	Define		A_2 := 1.0 0 0 0 d_5 d_6 d_7 d_8. This is X_1 
+//			truncated to lsb = 2^(-8). Similar to A_1,
+//			A_2 is not needed in actual implementation. It
+//			helps explain how some of the values are defined.
+//
+//	Define		index_2 := [ d_5 d_6 d_7 d_8 ].
+//
+//	Fetch 		Z_2 := (1/A_2) rounded UP in fixed point with
+//	fixed point	lsb = 2^(-15). Fetch done using index_2.
+//			Z_2 looks like z_0.z_1 z_2 ... z_15
+//
+//	Fetch		G_2 := (1/A_2) truncated to 21 sig. bits.
+//	floating pt.
+//
+//	Calculate	X_2 := X_1 * Z_2 truncated to lsb = 2^(-14)
+//			     = 1.0 0 0 0 0 0 0 0 d_9 d_10 ... d_14
+//			This is accomplised by integer multiplication.
+//			It is proved that X_2 indeed always begin
+//			with 1.00000000 in fixed point.
+//
+//
+//	Define		A_3 := 1.0 0 0 0 0 0 0 0 d_9 d_10 d_11 d_12 d_13 1.
+//			This is 2^(-14) + X_2 truncated to lsb = 2^(-13).
+//
+//	Define		index_3 := [ d_9 d_10 d_11 d_12 d_13 ].
+//
+//	Fetch		G_3 := (1/A_3) truncated to 21 sig. bits.
+//	floating pt.	Fetch is done using index_3.
+//
+//	Compute		G := G_1 * G_2 * G_3. 
 //
-// General registers used:
-// r8  -> r11
-// r14 -> r20
+//	This is done exactly since each of G_j only has 21 sig. bits.
+//
+//	Compute   
+//
+//		r := (G*S_hi - 1) + G*S_lo   using 2 FMA operations.
+//
+//	thus, r approximates G*(S_hi+S_lo) - 1 to within a couple of 
+//	rounding errors.
+//
+//
+//  Step 2. Approximation
+//  ---------------------
+//
+//   This step computes an approximation to log( 1 + r ) where r is the
+//   reduced argument just obtained. It is proved that |r| <= 1.9*2^(-13);
+//   thus log(1+r) can be approximated by a short polynomial:
+//
+//	log(1+r) ~=~ poly = r + Q1 r^2 + ... + Q4 r^5
+//
+//
+//  Step 3. Reconstruction
+//  ----------------------
+//
+//   This step computes the desired result of log(X+E):
+//
+//	log(X+E)  =   log( 2^N * (S_hi + S_lo) )
+//		  =   N*log(2) + log( S_hi + S_lo )
+//		  =   N*log(2) + log(1/G) +
+//		      log(1 + C*(S_hi+S_lo) - 1 )
+//
+//   log(2), log(1/G_j) are stored as pairs of (single,double) numbers:
+//   log2_hi, log2_lo, log1byGj_hi, log1byGj_lo. The high parts are
+//   single-precision numbers and the low parts are double precision
+//   numbers. These have the property that
+//
+//	N*log2_hi + SUM ( log1byGj_hi )
+//
+//   is computable exactly in double-extended precision (64 sig. bits).
+//   Finally
+//
+//	Y_hi := N*log2_hi + SUM ( log1byGj_hi )
+//	Y_lo := poly_hi + [ poly_lo + 
+//	        ( SUM ( log1byGj_lo ) + N*log2_lo ) ]
+//      set lsb(Y_lo) to be 1
 //
-// Predicate registers used:
-// p6 -> p12
 
-// Assembly macros
-//==============================================================
-GR_TAG                 = r8
-GR_ad_1                = r8
-GR_ad_2                = r9
-GR_Exp                 = r10
-GR_N                   = r11
+#include "libm_support.h"
 
-GR_signexp_x           = r14
-GR_exp_mask            = r15
-GR_exp_bias            = r16
-GR_05                  = r17
-GR_A3                  = r18
-GR_Sig                 = r19
-GR_Ind                 = r19
-GR_exp_x               = r20
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
 
+// P_7, P_6, P_5, P_4, P_3, P_2, and P_1 
 
-GR_SAVE_B0             = r33
-GR_SAVE_PFS            = r34
-GR_SAVE_GP             = r35
-GR_SAVE_SP             = r36
+.align 64
+Constants_P:
+ASM_TYPE_DIRECTIVE(Constants_P,@object)
+data4  0xEFD62B15,0xE3936754,0x00003FFB,0x00000000
+data4  0xA5E56381,0x8003B271,0x0000BFFC,0x00000000
+data4  0x73282DB0,0x9249248C,0x00003FFC,0x00000000
+data4  0x47305052,0xAAAAAA9F,0x0000BFFC,0x00000000
+data4  0xCCD17FC9,0xCCCCCCCC,0x00003FFC,0x00000000
+data4  0x00067ED5,0x80000000,0x0000BFFD,0x00000000
+data4  0xAAAAAAAA,0xAAAAAAAA,0x00003FFD,0x00000000
+data4  0xFFFFFFFE,0xFFFFFFFF,0x0000BFFD,0x00000000
+ASM_SIZE_DIRECTIVE(Constants_P)
+ 
+// log2_hi, log2_lo, Q_4, Q_3, Q_2, and Q_1 
 
-GR_Parameter_X         = r37
-GR_Parameter_Y         = r38
-GR_Parameter_RESULT    = r39
-GR_Parameter_TAG       = r40
+.align 64
+Constants_Q:
+ASM_TYPE_DIRECTIVE(Constants_Q,@object)
+data4  0x00000000,0xB1721800,0x00003FFE,0x00000000 
+data4  0x4361C4C6,0x82E30865,0x0000BFE2,0x00000000
+data4  0x328833CB,0xCCCCCAF2,0x00003FFC,0x00000000
+data4  0xA9D4BAFB,0x80000077,0x0000BFFD,0x00000000
+data4  0xAAABE3D2,0xAAAAAAAA,0x00003FFD,0x00000000
+data4  0xFFFFDAB7,0xFFFFFFFF,0x0000BFFD,0x00000000
+ASM_SIZE_DIRECTIVE(Constants_Q)
+ 
+// Z1 - 16 bit fixed, G1 and H1 - IEEE single 
+ 
+.align 64
+Constants_Z_G_H_h1:
+ASM_TYPE_DIRECTIVE(Constants_Z_G_H_h1,@object)
+data4  0x00008000,0x3F800000,0x00000000,0x00000000,0x00000000,0x00000000
+data4  0x00007879,0x3F70F0F0,0x3D785196,0x00000000,0x617D741C,0x3DA163A6
+data4  0x000071C8,0x3F638E38,0x3DF13843,0x00000000,0xCBD3D5BB,0x3E2C55E6
+data4  0x00006BCB,0x3F579430,0x3E2FF9A0,0x00000000,0xD86EA5E7,0xBE3EB0BF
+data4  0x00006667,0x3F4CCCC8,0x3E647FD6,0x00000000,0x86B12760,0x3E2E6A8C
+data4  0x00006187,0x3F430C30,0x3E8B3AE7,0x00000000,0x5C0739BA,0x3E47574C
+data4  0x00005D18,0x3F3A2E88,0x3EA30C68,0x00000000,0x13E8AF2F,0x3E20E30F
+data4  0x0000590C,0x3F321640,0x3EB9CEC8,0x00000000,0xF2C630BD,0xBE42885B
+data4  0x00005556,0x3F2AAAA8,0x3ECF9927,0x00000000,0x97E577C6,0x3E497F34
+data4  0x000051EC,0x3F23D708,0x3EE47FC5,0x00000000,0xA6B0A5AB,0x3E3E6A6E
+data4  0x00004EC5,0x3F1D89D8,0x3EF8947D,0x00000000,0xD328D9BE,0xBDF43E3C
+data4  0x00004BDB,0x3F17B420,0x3F05F3A1,0x00000000,0x0ADB090A,0x3E4094C3
+data4  0x00004925,0x3F124920,0x3F0F4303,0x00000000,0xFC1FE510,0xBE28FBB2
+data4  0x0000469F,0x3F0D3DC8,0x3F183EBF,0x00000000,0x10FDE3FA,0x3E3A7895
+data4  0x00004445,0x3F088888,0x3F20EC80,0x00000000,0x7CC8C98F,0x3E508CE5
+data4  0x00004211,0x3F042108,0x3F29516A,0x00000000,0xA223106C,0xBE534874
+ASM_SIZE_DIRECTIVE(Constants_Z_G_H_h1)
+ 
+// Z2 - 16 bit fixed, G2 and H2 - IEEE single 
 
+.align 64 
+Constants_Z_G_H_h2:
+ASM_TYPE_DIRECTIVE(Constants_Z_G_H_h2,@object)
+data4  0x00008000,0x3F800000,0x00000000,0x00000000,0x00000000,0x00000000
+data4  0x00007F81,0x3F7F00F8,0x3B7F875D,0x00000000,0x22C42273,0x3DB5A116
+data4  0x00007F02,0x3F7E03F8,0x3BFF015B,0x00000000,0x21F86ED3,0x3DE620CF
+data4  0x00007E85,0x3F7D08E0,0x3C3EE393,0x00000000,0x484F34ED,0xBDAFA07E
+data4  0x00007E08,0x3F7C0FC0,0x3C7E0586,0x00000000,0x3860BCF6,0xBDFE07F0
+data4  0x00007D8D,0x3F7B1880,0x3C9E75D2,0x00000000,0xA78093D6,0x3DEA370F
+data4  0x00007D12,0x3F7A2328,0x3CBDC97A,0x00000000,0x72A753D0,0x3DFF5791
+data4  0x00007C98,0x3F792FB0,0x3CDCFE47,0x00000000,0xA7EF896B,0x3DFEBE6C
+data4  0x00007C20,0x3F783E08,0x3CFC15D0,0x00000000,0x409ECB43,0x3E0CF156
+data4  0x00007BA8,0x3F774E38,0x3D0D874D,0x00000000,0xFFEF71DF,0xBE0B6F97
+data4  0x00007B31,0x3F766038,0x3D1CF49B,0x00000000,0x5D59EEE8,0xBE080483
+data4  0x00007ABB,0x3F757400,0x3D2C531D,0x00000000,0xA9192A74,0x3E1F91E9
+data4  0x00007A45,0x3F748988,0x3D3BA322,0x00000000,0xBF72A8CD,0xBE139A06
+data4  0x000079D1,0x3F73A0D0,0x3D4AE46F,0x00000000,0xF8FBA6CF,0x3E1D9202
+data4  0x0000795D,0x3F72B9D0,0x3D5A1756,0x00000000,0xBA796223,0xBE1DCCC4
+data4  0x000078EB,0x3F71D488,0x3D693B9D,0x00000000,0xB6B7C239,0xBE049391
+ASM_SIZE_DIRECTIVE(Constants_Z_G_H_h2)
+ 
+// G3 and H3 - IEEE single and h3 -IEEE double 
 
+.align 64 
+Constants_Z_G_H_h3:
+ASM_TYPE_DIRECTIVE(Constants_Z_G_H_h3,@object)
+data4  0x3F7FFC00,0x38800100,0x562224CD,0x3D355595
+data4  0x3F7FF400,0x39400480,0x06136FF6,0x3D8200A2
+data4  0x3F7FEC00,0x39A00640,0xE8DE9AF0,0x3DA4D68D
+data4  0x3F7FE400,0x39E00C41,0xB10238DC,0xBD8B4291
+data4  0x3F7FDC00,0x3A100A21,0x3B1952CA,0xBD89CCB8
+data4  0x3F7FD400,0x3A300F22,0x1DC46826,0xBDB10707
+data4  0x3F7FCC08,0x3A4FF51C,0xF43307DB,0x3DB6FCB9
+data4  0x3F7FC408,0x3A6FFC1D,0x62DC7872,0xBD9B7C47
+data4  0x3F7FBC10,0x3A87F20B,0x3F89154A,0xBDC3725E
+data4  0x3F7FB410,0x3A97F68B,0x62B9D392,0xBD93519D
+data4  0x3F7FAC18,0x3AA7EB86,0x0F21BD9D,0x3DC18441
+data4  0x3F7FA420,0x3AB7E101,0x2245E0A6,0xBDA64B95
+data4  0x3F7F9C20,0x3AC7E701,0xAABB34B8,0x3DB4B0EC
+data4  0x3F7F9428,0x3AD7DD7B,0x6DC40A7E,0x3D992337
+data4  0x3F7F8C30,0x3AE7D474,0x4F2083D3,0x3DC6E17B
+data4  0x3F7F8438,0x3AF7CBED,0x811D4394,0x3DAE314B
+data4  0x3F7F7C40,0x3B03E1F3,0xB08F2DB1,0xBDD46F21
+data4  0x3F7F7448,0x3B0BDE2F,0x6D34522B,0xBDDC30A4
+data4  0x3F7F6C50,0x3B13DAAA,0xB1F473DB,0x3DCB0070
+data4  0x3F7F6458,0x3B1BD766,0x6AD282FD,0xBDD65DDC
+data4  0x3F7F5C68,0x3B23CC5C,0xF153761A,0xBDCDAB83
+data4  0x3F7F5470,0x3B2BC997,0x341D0F8F,0xBDDADA40
+data4  0x3F7F4C78,0x3B33C711,0xEBC394E8,0x3DCD1BD7
+data4  0x3F7F4488,0x3B3BBCC6,0x52E3E695,0xBDC3532B
+data4  0x3F7F3C90,0x3B43BAC0,0xE846B3DE,0xBDA3961E
+data4  0x3F7F34A0,0x3B4BB0F4,0x785778D4,0xBDDADF06
+data4  0x3F7F2CA8,0x3B53AF6D,0xE55CE212,0x3DCC3ED1
+data4  0x3F7F24B8,0x3B5BA620,0x9E382C15,0xBDBA3103
+data4  0x3F7F1CC8,0x3B639D12,0x5C5AF197,0x3D635A0B
+data4  0x3F7F14D8,0x3B6B9444,0x71D34EFC,0xBDDCCB19
+data4  0x3F7F0CE0,0x3B7393BC,0x52CD7ADA,0x3DC74502
+data4  0x3F7F04F0,0x3B7B8B6D,0x7D7F2A42,0xBDB68F17
+ASM_SIZE_DIRECTIVE(Constants_Z_G_H_h3)
+ 
+// 
+//  Exponent Thresholds and Tiny Thresholds
+//  for 8, 11, 15, and 17 bit exponents
+// 
+//  Expo_Range             Value
+// 
+//  0 (8  bits)            2^(-126)
+//  1 (11 bits)            2^(-1022)
+//  2 (15 bits)            2^(-16382)
+//  3 (17 bits)            2^(-16382)
+// 
+//  Tiny_Table
+//  ----------
+//  Expo_Range             Value
+// 
+//  0 (8  bits)            2^(-16382)
+//  1 (11 bits)            2^(-16382)
+//  2 (15 bits)            2^(-16382)
+//  3 (17 bits)            2^(-16382)
+// 
 
-FR_NormX               = f7
-FR_RcpX                = f9
-FR_r                   = f10
-FR_r2                  = f11
-FR_r4                  = f12
-FR_N                   = f13
-FR_Ln2hi               = f14
-FR_Ln2lo               = f15
+.align 64 
+Constants_Threshold:
+ASM_TYPE_DIRECTIVE(Constants_Threshold,@object)
+data4  0x00000000,0x80000000,0x00003F81,0x00000000
+data4  0x00000000,0x80000000,0x00000001,0x00000000
+data4  0x00000000,0x80000000,0x00003C01,0x00000000
+data4  0x00000000,0x80000000,0x00000001,0x00000000
+data4  0x00000000,0x80000000,0x00000001,0x00000000
+data4  0x00000000,0x80000000,0x00000001,0x00000000
+data4  0x00000000,0x80000000,0x00000001,0x00000000
+data4  0x00000000,0x80000000,0x00000001,0x00000000
+ASM_SIZE_DIRECTIVE(Constants_Threshold)
 
-FR_A7                  = f32
-FR_A6                  = f33
-FR_A5                  = f34
-FR_A4                  = f35
-FR_A3                  = f36
-FR_A2                  = f37
+.align 64
+Constants_1_by_LN10:
+ASM_TYPE_DIRECTIVE(Constants_1_by_LN10,@object)
+data4  0x37287195,0xDE5BD8A9,0x00003FFD,0x00000000
+data4  0xACCF70C8,0xD56EAABE,0x00003FBD,0x00000000
+ASM_SIZE_DIRECTIVE(Constants_1_by_LN10)
 
-FR_Thi                 = f38
-FR_NxLn2hipThi         = f38
-FR_NxLn2pT             = f38
-FR_Tlo                 = f39
-FR_NxLn2lopTlo         = f39
+FR_Input_X = f8 
+FR_Neg_One = f9
+FR_E       = f33
+FR_Em1     = f34
+FR_Y_hi    = f34  
+// Shared with Em1
+FR_Y_lo    = f35
+FR_Scale   = f36
+FR_X_Prime = f37 
+FR_Z       = f38 
+FR_S_hi    = f38  
+// Shared with Z  
+FR_W       = f39
+FR_G       = f40
+FR_wsq     = f40 
+// Shared with G 
+FR_H       = f41
+FR_w4      = f41
+// Shared with H  
+FR_h       = f42
+FR_w6      = f42  
+// Shared with h     
+FR_G_tmp   = f43
+FR_poly_lo = f43
+// Shared with G_tmp 
+FR_P8      = f43  
+// Shared with G_tmp 
+FR_H_tmp   = f44
+FR_poly_hi = f44
+  // Shared with H_tmp
+FR_P7      = f44  
+// Shared with H_tmp
+FR_h_tmp   = f45 
+FR_rsq     = f45  
+// Shared with h_tmp
+FR_P6      = f45
+// Shared with h_tmp
+FR_abs_W   = f46
+FR_r       = f46  
+// Shared with abs_W  
+FR_AA      = f47 
+FR_log2_hi = f47  
+// Shared with AA  
+FR_BB          = f48
+FR_log2_lo     = f48  
+// Shared with BB  
+FR_S_lo        = f49 
+FR_two_negN    = f50  
+FR_float_N     = f51 
+FR_Q4          = f52 
+FR_dummy       = f52  
+// Shared with Q4
+FR_P4          = f52  
+// Shared with Q4
+FR_Threshold    = f52
+// Shared with Q4
+FR_Q3          = f53  
+FR_P3          = f53  
+// Shared with Q3
+FR_Tiny        = f53  
+// Shared with Q3
+FR_Q2          = f54 
+FR_P2          = f54  
+// Shared with Q2
+FR_1LN10_hi     = f54 
+// Shared with Q2
+FR_Q1           = f55 
+FR_P1           = f55 
+// Shared with Q1 
+FR_1LN10_lo     = f55 
+// Shared with Q1 
+FR_P5           = f98 
+FR_SCALE        = f98 
+FR_Output_X_tmp = f99 
 
-FR_Xp1                 = f40
+GR_Expo_Range   = r32
+GR_Table_Base   = r34
+GR_Table_Base1  = r35
+GR_Table_ptr    = r36 
+GR_Index2       = r37 
+GR_signif       = r38 
+GR_X_0          = r39 
+GR_X_1          = r40 
+GR_X_2          = r41 
+GR_Z_1          = r42 
+GR_Z_2          = r43 
+GR_N            = r44 
+GR_Bias         = r45 
+GR_M            = r46 
+GR_ScaleN       = r47  
+GR_Index3       = r48 
+GR_Perturb      = r49 
+GR_Table_Scale  = r50 
 
 
-FR_Y                   = f1
-FR_X                   = f10
-FR_RESULT              = f8
+GR_SAVE_PFS     = r51
+GR_SAVE_B0      = r52
+GR_SAVE_GP      = r53
 
+GR_Parameter_X       = r54
+GR_Parameter_Y       = r55
+GR_Parameter_RESULT  = r56
+
+GR_Parameter_TAG = r57 
 
-// Data
-//==============================================================
-RODATA
-.align 16
-
-LOCAL_OBJECT_START(log_data)
-// coefficients of polynomial approximation
-data8 0x3FC2494104381A8E // A7
-data8 0xBFC5556D556BBB69 // A6
-data8 0x3FC999999988B5E9 // A5
-data8 0xBFCFFFFFFFF6FFF5 // A4
-//
-// hi parts of ln(1/frcpa(1+i/256)), i=0...255
-data8 0x3F60040155D5889D // 0
-data8 0x3F78121214586B54 // 1
-data8 0x3F841929F96832EF // 2
-data8 0x3F8C317384C75F06 // 3
-data8 0x3F91A6B91AC73386 // 4
-data8 0x3F95BA9A5D9AC039 // 5
-data8 0x3F99D2A8074325F3 // 6
-data8 0x3F9D6B2725979802 // 7
-data8 0x3FA0C58FA19DFAA9 // 8
-data8 0x3FA2954C78CBCE1A // 9
-data8 0x3FA4A94D2DA96C56 // 10
-data8 0x3FA67C94F2D4BB58 // 11
-data8 0x3FA85188B630F068 // 12
-data8 0x3FAA6B8ABE73AF4C // 13
-data8 0x3FAC441E06F72A9E // 14
-data8 0x3FAE1E6713606D06 // 15
-data8 0x3FAFFA6911AB9300 // 16
-data8 0x3FB0EC139C5DA600 // 17
-data8 0x3FB1DBD2643D190B // 18
-data8 0x3FB2CC7284FE5F1C // 19
-data8 0x3FB3BDF5A7D1EE64 // 20
-data8 0x3FB4B05D7AA012E0 // 21
-data8 0x3FB580DB7CEB5701 // 22
-data8 0x3FB674F089365A79 // 23
-data8 0x3FB769EF2C6B568D // 24
-data8 0x3FB85FD927506A47 // 25
-data8 0x3FB9335E5D594988 // 26
-data8 0x3FBA2B0220C8E5F4 // 27
-data8 0x3FBB0004AC1A86AB // 28
-data8 0x3FBBF968769FCA10 // 29
-data8 0x3FBCCFEDBFEE13A8 // 30
-data8 0x3FBDA727638446A2 // 31
-data8 0x3FBEA3257FE10F79 // 32
-data8 0x3FBF7BE9FEDBFDE5 // 33
-data8 0x3FC02AB352FF25F3 // 34
-data8 0x3FC097CE579D204C // 35
-data8 0x3FC1178E8227E47B // 36
-data8 0x3FC185747DBECF33 // 37
-data8 0x3FC1F3B925F25D41 // 38
-data8 0x3FC2625D1E6DDF56 // 39
-data8 0x3FC2D1610C868139 // 40
-data8 0x3FC340C59741142E // 41
-data8 0x3FC3B08B6757F2A9 // 42
-data8 0x3FC40DFB08378003 // 43
-data8 0x3FC47E74E8CA5F7C // 44
-data8 0x3FC4EF51F6466DE4 // 45
-data8 0x3FC56092E02BA516 // 46
-data8 0x3FC5D23857CD74D4 // 47
-data8 0x3FC6313A37335D76 // 48
-data8 0x3FC6A399DABBD383 // 49
-data8 0x3FC70337DD3CE41A // 50
-data8 0x3FC77654128F6127 // 51
-data8 0x3FC7E9D82A0B022D // 52
-data8 0x3FC84A6B759F512E // 53
-data8 0x3FC8AB47D5F5A30F // 54
-data8 0x3FC91FE49096581B // 55
-data8 0x3FC981634011AA75 // 56
-data8 0x3FC9F6C407089664 // 57
-data8 0x3FCA58E729348F43 // 58
-data8 0x3FCABB55C31693AC // 59
-data8 0x3FCB1E104919EFD0 // 60
-data8 0x3FCB94EE93E367CA // 61
-data8 0x3FCBF851C067555E // 62
-data8 0x3FCC5C0254BF23A5 // 63
-data8 0x3FCCC000C9DB3C52 // 64
-data8 0x3FCD244D99C85673 // 65
-data8 0x3FCD88E93FB2F450 // 66
-data8 0x3FCDEDD437EAEF00 // 67
-data8 0x3FCE530EFFE71012 // 68
-data8 0x3FCEB89A1648B971 // 69
-data8 0x3FCF1E75FADF9BDE // 70
-data8 0x3FCF84A32EAD7C35 // 71
-data8 0x3FCFEB2233EA07CD // 72
-data8 0x3FD028F9C7035C1C // 73
-data8 0x3FD05C8BE0D9635A // 74
-data8 0x3FD085EB8F8AE797 // 75
-data8 0x3FD0B9C8E32D1911 // 76
-data8 0x3FD0EDD060B78080 // 77
-data8 0x3FD122024CF0063F // 78
-data8 0x3FD14BE2927AECD4 // 79
-data8 0x3FD180618EF18ADF // 80
-data8 0x3FD1B50BBE2FC63B // 81
-data8 0x3FD1DF4CC7CF242D // 82
-data8 0x3FD214456D0EB8D4 // 83
-data8 0x3FD23EC5991EBA49 // 84
-data8 0x3FD2740D9F870AFB // 85
-data8 0x3FD29ECDABCDFA03 // 86
-data8 0x3FD2D46602ADCCEE // 87
-data8 0x3FD2FF66B04EA9D4 // 88
-data8 0x3FD335504B355A37 // 89
-data8 0x3FD360925EC44F5C // 90
-data8 0x3FD38BF1C3337E74 // 91
-data8 0x3FD3C25277333183 // 92
-data8 0x3FD3EDF463C1683E // 93
-data8 0x3FD419B423D5E8C7 // 94
-data8 0x3FD44591E0539F48 // 95
-data8 0x3FD47C9175B6F0AD // 96
-data8 0x3FD4A8B341552B09 // 97
-data8 0x3FD4D4F39089019F // 98
-data8 0x3FD501528DA1F967 // 99
-data8 0x3FD52DD06347D4F6 // 100
-data8 0x3FD55A6D3C7B8A89 // 101
-data8 0x3FD5925D2B112A59 // 102
-data8 0x3FD5BF406B543DB1 // 103
-data8 0x3FD5EC433D5C35AD // 104
-data8 0x3FD61965CDB02C1E // 105
-data8 0x3FD646A84935B2A1 // 106
-data8 0x3FD6740ADD31DE94 // 107
-data8 0x3FD6A18DB74A58C5 // 108
-data8 0x3FD6CF31058670EC // 109
-data8 0x3FD6F180E852F0B9 // 110
-data8 0x3FD71F5D71B894EF // 111
-data8 0x3FD74D5AEFD66D5C // 112
-data8 0x3FD77B79922BD37D // 113
-data8 0x3FD7A9B9889F19E2 // 114
-data8 0x3FD7D81B037EB6A6 // 115
-data8 0x3FD8069E33827230 // 116
-data8 0x3FD82996D3EF8BCA // 117
-data8 0x3FD85855776DCBFA // 118
-data8 0x3FD8873658327CCE // 119
-data8 0x3FD8AA75973AB8CE // 120
-data8 0x3FD8D992DC8824E4 // 121
-data8 0x3FD908D2EA7D9511 // 122
-data8 0x3FD92C59E79C0E56 // 123
-data8 0x3FD95BD750EE3ED2 // 124
-data8 0x3FD98B7811A3EE5B // 125
-data8 0x3FD9AF47F33D406B // 126
-data8 0x3FD9DF270C1914A7 // 127
-data8 0x3FDA0325ED14FDA4 // 128
-data8 0x3FDA33440224FA78 // 129
-data8 0x3FDA57725E80C382 // 130
-data8 0x3FDA87D0165DD199 // 131
-data8 0x3FDAAC2E6C03F895 // 132
-data8 0x3FDADCCC6FDF6A81 // 133
-data8 0x3FDB015B3EB1E790 // 134
-data8 0x3FDB323A3A635948 // 135
-data8 0x3FDB56FA04462909 // 136
-data8 0x3FDB881AA659BC93 // 137
-data8 0x3FDBAD0BEF3DB164 // 138
-data8 0x3FDBD21297781C2F // 139
-data8 0x3FDC039236F08818 // 140
-data8 0x3FDC28CB1E4D32FC // 141
-data8 0x3FDC4E19B84723C1 // 142
-data8 0x3FDC7FF9C74554C9 // 143
-data8 0x3FDCA57B64E9DB05 // 144
-data8 0x3FDCCB130A5CEBAF // 145
-data8 0x3FDCF0C0D18F326F // 146
-data8 0x3FDD232075B5A201 // 147
-data8 0x3FDD490246DEFA6B // 148
-data8 0x3FDD6EFA918D25CD // 149
-data8 0x3FDD9509707AE52F // 150
-data8 0x3FDDBB2EFE92C554 // 151
-data8 0x3FDDEE2F3445E4AE // 152
-data8 0x3FDE148A1A2726CD // 153
-data8 0x3FDE3AFC0A49FF3F // 154
-data8 0x3FDE6185206D516D // 155
-data8 0x3FDE882578823D51 // 156
-data8 0x3FDEAEDD2EAC990C // 157
-data8 0x3FDED5AC5F436BE2 // 158
-data8 0x3FDEFC9326D16AB8 // 159
-data8 0x3FDF2391A21575FF // 160
-data8 0x3FDF4AA7EE03192C // 161
-data8 0x3FDF71D627C30BB0 // 162
-data8 0x3FDF991C6CB3B379 // 163
-data8 0x3FDFC07ADA69A90F // 164
-data8 0x3FDFE7F18EB03D3E // 165
-data8 0x3FE007C053C5002E // 166
-data8 0x3FE01B942198A5A0 // 167
-data8 0x3FE02F74400C64EA // 168
-data8 0x3FE04360BE7603AC // 169
-data8 0x3FE05759AC47FE33 // 170
-data8 0x3FE06B5F1911CF51 // 171
-data8 0x3FE078BF0533C568 // 172
-data8 0x3FE08CD9687E7B0E // 173
-data8 0x3FE0A10074CF9019 // 174
-data8 0x3FE0B5343A234476 // 175
-data8 0x3FE0C974C89431CD // 176
-data8 0x3FE0DDC2305B9886 // 177
-data8 0x3FE0EB524BAFC918 // 178
-data8 0x3FE0FFB54213A475 // 179
-data8 0x3FE114253DA97D9F // 180
-data8 0x3FE128A24F1D9AFF // 181
-data8 0x3FE1365252BF0864 // 182
-data8 0x3FE14AE558B4A92D // 183
-data8 0x3FE15F85A19C765B // 184
-data8 0x3FE16D4D38C119FA // 185
-data8 0x3FE18203C20DD133 // 186
-data8 0x3FE196C7BC4B1F3A // 187
-data8 0x3FE1A4A738B7A33C // 188
-data8 0x3FE1B981C0C9653C // 189
-data8 0x3FE1CE69E8BB106A // 190
-data8 0x3FE1DC619DE06944 // 191
-data8 0x3FE1F160A2AD0DA3 // 192
-data8 0x3FE2066D7740737E // 193
-data8 0x3FE2147DBA47A393 // 194
-data8 0x3FE229A1BC5EBAC3 // 195
-data8 0x3FE237C1841A502E // 196
-data8 0x3FE24CFCE6F80D9A // 197
-data8 0x3FE25B2C55CD5762 // 198
-data8 0x3FE2707F4D5F7C40 // 199
-data8 0x3FE285E0842CA383 // 200
-data8 0x3FE294294708B773 // 201
-data8 0x3FE2A9A2670AFF0C // 202
-data8 0x3FE2B7FB2C8D1CC0 // 203
-data8 0x3FE2C65A6395F5F5 // 204
-data8 0x3FE2DBF557B0DF42 // 205
-data8 0x3FE2EA64C3F97654 // 206
-data8 0x3FE3001823684D73 // 207
-data8 0x3FE30E97E9A8B5CC // 208
-data8 0x3FE32463EBDD34E9 // 209
-data8 0x3FE332F4314AD795 // 210
-data8 0x3FE348D90E7464CF // 211
-data8 0x3FE35779F8C43D6D // 212
-data8 0x3FE36621961A6A99 // 213
-data8 0x3FE37C299F3C366A // 214
-data8 0x3FE38AE2171976E7 // 215
-data8 0x3FE399A157A603E7 // 216
-data8 0x3FE3AFCCFE77B9D1 // 217
-data8 0x3FE3BE9D503533B5 // 218
-data8 0x3FE3CD7480B4A8A2 // 219
-data8 0x3FE3E3C43918F76C // 220
-data8 0x3FE3F2ACB27ED6C6 // 221
-data8 0x3FE4019C2125CA93 // 222
-data8 0x3FE4181061389722 // 223
-data8 0x3FE42711518DF545 // 224
-data8 0x3FE436194E12B6BF // 225
-data8 0x3FE445285D68EA69 // 226
-data8 0x3FE45BCC464C893A // 227
-data8 0x3FE46AED21F117FC // 228
-data8 0x3FE47A1527E8A2D3 // 229
-data8 0x3FE489445EFFFCCB // 230
-data8 0x3FE4A018BCB69835 // 231
-data8 0x3FE4AF5A0C9D65D7 // 232
-data8 0x3FE4BEA2A5BDBE87 // 233
-data8 0x3FE4CDF28F10AC46 // 234
-data8 0x3FE4DD49CF994058 // 235
-data8 0x3FE4ECA86E64A683 // 236
-data8 0x3FE503C43CD8EB68 // 237
-data8 0x3FE513356667FC57 // 238
-data8 0x3FE522AE0738A3D7 // 239
-data8 0x3FE5322E26867857 // 240
-data8 0x3FE541B5CB979809 // 241
-data8 0x3FE55144FDBCBD62 // 242
-data8 0x3FE560DBC45153C6 // 243
-data8 0x3FE5707A26BB8C66 // 244
-data8 0x3FE587F60ED5B8FF // 245
-data8 0x3FE597A7977C8F31 // 246
-data8 0x3FE5A760D634BB8A // 247
-data8 0x3FE5B721D295F10E // 248
-data8 0x3FE5C6EA94431EF9 // 249
-data8 0x3FE5D6BB22EA86F5 // 250
-data8 0x3FE5E6938645D38F // 251
-data8 0x3FE5F673C61A2ED1 // 252
-data8 0x3FE6065BEA385926 // 253
-data8 0x3FE6164BFA7CC06B // 254
-data8 0x3FE62643FECF9742 // 255
-//
-// two parts of ln(2)
-data8 0x3FE62E42FEF00000,0x3DD473DE6AF278ED
-//
-// lo parts of ln(1/frcpa(1+i/256)), i=0...255
-data4 0x20E70672 // 0
-data4 0x1F60A5D0 // 1
-data4 0x218EABA0 // 2
-data4 0x21403104 // 3
-data4 0x20E9B54E // 4
-data4 0x21EE1382 // 5
-data4 0x226014E3 // 6
-data4 0x2095E5C9 // 7
-data4 0x228BA9D4 // 8
-data4 0x22932B86 // 9
-data4 0x22608A57 // 10
-data4 0x220209F3 // 11
-data4 0x212882CC // 12
-data4 0x220D46E2 // 13
-data4 0x21FA4C28 // 14
-data4 0x229E5BD9 // 15
-data4 0x228C9838 // 16
-data4 0x2311F954 // 17
-data4 0x221365DF // 18
-data4 0x22BD0CB3 // 19
-data4 0x223D4BB7 // 20
-data4 0x22A71BBE // 21
-data4 0x237DB2FA // 22
-data4 0x23194C9D // 23
-data4 0x22EC639E // 24
-data4 0x2367E669 // 25
-data4 0x232E1D5F // 26
-data4 0x234A639B // 27
-data4 0x2365C0E0 // 28
-data4 0x234646C1 // 29
-data4 0x220CBF9C // 30
-data4 0x22A00FD4 // 31
-data4 0x2306A3F2 // 32
-data4 0x23745A9B // 33
-data4 0x2398D756 // 34
-data4 0x23DD0B6A // 35
-data4 0x23DE338B // 36
-data4 0x23A222DF // 37
-data4 0x223164F8 // 38
-data4 0x23B4E87B // 39
-data4 0x23D6CCB8 // 40
-data4 0x220C2099 // 41
-data4 0x21B86B67 // 42
-data4 0x236D14F1 // 43
-data4 0x225A923F // 44
-data4 0x22748723 // 45
-data4 0x22200D13 // 46
-data4 0x23C296EA // 47
-data4 0x2302AC38 // 48
-data4 0x234B1996 // 49
-data4 0x2385E298 // 50
-data4 0x23175BE5 // 51
-data4 0x2193F482 // 52
-data4 0x23BFEA90 // 53
-data4 0x23D70A0C // 54
-data4 0x231CF30A // 55
-data4 0x235D9E90 // 56
-data4 0x221AD0CB // 57
-data4 0x22FAA08B // 58
-data4 0x23D29A87 // 59
-data4 0x20C4B2FE // 60
-data4 0x2381B8B7 // 61
-data4 0x23F8D9FC // 62
-data4 0x23EAAE7B // 63
-data4 0x2329E8AA // 64
-data4 0x23EC0322 // 65
-data4 0x2357FDCB // 66
-data4 0x2392A9AD // 67
-data4 0x22113B02 // 68
-data4 0x22DEE901 // 69
-data4 0x236A6D14 // 70
-data4 0x2371D33E // 71
-data4 0x2146F005 // 72
-data4 0x23230B06 // 73
-data4 0x22F1C77D // 74
-data4 0x23A89FA3 // 75
-data4 0x231D1241 // 76
-data4 0x244DA96C // 77
-data4 0x23ECBB7D // 78
-data4 0x223E42B4 // 79
-data4 0x23801BC9 // 80
-data4 0x23573263 // 81
-data4 0x227C1158 // 82
-data4 0x237BD749 // 83
-data4 0x21DDBAE9 // 84
-data4 0x23401735 // 85
-data4 0x241D9DEE // 86
-data4 0x23BC88CB // 87
-data4 0x2396D5F1 // 88
-data4 0x23FC89CF // 89
-data4 0x2414F9A2 // 90
-data4 0x2474A0F5 // 91
-data4 0x24354B60 // 92
-data4 0x23C1EB40 // 93
-data4 0x2306DD92 // 94
-data4 0x24353B6B // 95
-data4 0x23CD1701 // 96
-data4 0x237C7A1C // 97
-data4 0x245793AA // 98
-data4 0x24563695 // 99
-data4 0x23C51467 // 100
-data4 0x24476B68 // 101
-data4 0x212585A9 // 102
-data4 0x247B8293 // 103
-data4 0x2446848A // 104
-data4 0x246A53F8 // 105
-data4 0x246E496D // 106
-data4 0x23ED1D36 // 107
-data4 0x2314C258 // 108
-data4 0x233244A7 // 109
-data4 0x245B7AF0 // 110
-data4 0x24247130 // 111
-data4 0x22D67B38 // 112
-data4 0x2449F620 // 113
-data4 0x23BBC8B8 // 114
-data4 0x237D3BA0 // 115
-data4 0x245E8F13 // 116
-data4 0x2435573F // 117
-data4 0x242DE666 // 118
-data4 0x2463BC10 // 119
-data4 0x2466587D // 120
-data4 0x2408144B // 121
-data4 0x2405F0E5 // 122
-data4 0x22381CFF // 123
-data4 0x24154F9B // 124
-data4 0x23A4E96E // 125
-data4 0x24052967 // 126
-data4 0x2406963F // 127
-data4 0x23F7D3CB // 128
-data4 0x2448AFF4 // 129
-data4 0x24657A21 // 130
-data4 0x22FBC230 // 131
-data4 0x243C8DEA // 132
-data4 0x225DC4B7 // 133
-data4 0x23496EBF // 134
-data4 0x237C2B2B // 135
-data4 0x23A4A5B1 // 136
-data4 0x2394E9D1 // 137
-data4 0x244BC950 // 138
-data4 0x23C7448F // 139
-data4 0x2404A1AD // 140
-data4 0x246511D5 // 141
-data4 0x24246526 // 142
-data4 0x23111F57 // 143
-data4 0x22868951 // 144
-data4 0x243EB77F // 145
-data4 0x239F3DFF // 146
-data4 0x23089666 // 147
-data4 0x23EBFA6A // 148
-data4 0x23C51312 // 149
-data4 0x23E1DD5E // 150
-data4 0x232C0944 // 151
-data4 0x246A741F // 152
-data4 0x2414DF8D // 153
-data4 0x247B5546 // 154
-data4 0x2415C980 // 155
-data4 0x24324ABD // 156
-data4 0x234EB5E5 // 157
-data4 0x2465E43E // 158
-data4 0x242840D1 // 159
-data4 0x24444057 // 160
-data4 0x245E56F0 // 161
-data4 0x21AE30F8 // 162
-data4 0x23FB3283 // 163
-data4 0x247A4D07 // 164
-data4 0x22AE314D // 165
-data4 0x246B7727 // 166
-data4 0x24EAD526 // 167
-data4 0x24B41DC9 // 168
-data4 0x24EE8062 // 169
-data4 0x24A0C7C4 // 170
-data4 0x24E8DA67 // 171
-data4 0x231120F7 // 172
-data4 0x24401FFB // 173
-data4 0x2412DD09 // 174
-data4 0x248C131A // 175
-data4 0x24C0A7CE // 176
-data4 0x243DD4C8 // 177
-data4 0x24457FEB // 178
-data4 0x24DEEFBB // 179
-data4 0x243C70AE // 180
-data4 0x23E7A6FA // 181
-data4 0x24C2D311 // 182
-data4 0x23026255 // 183
-data4 0x2437C9B9 // 184
-data4 0x246BA847 // 185
-data4 0x2420B448 // 186
-data4 0x24C4CF5A // 187
-data4 0x242C4981 // 188
-data4 0x24DE1525 // 189
-data4 0x24F5CC33 // 190
-data4 0x235A85DA // 191
-data4 0x24A0B64F // 192
-data4 0x244BA0A4 // 193
-data4 0x24AAF30A // 194
-data4 0x244C86F9 // 195
-data4 0x246D5B82 // 196
-data4 0x24529347 // 197
-data4 0x240DD008 // 198
-data4 0x24E98790 // 199
-data4 0x2489B0CE // 200
-data4 0x22BC29AC // 201
-data4 0x23F37C7A // 202
-data4 0x24987FE8 // 203
-data4 0x22AFE20B // 204
-data4 0x24C8D7C2 // 205
-data4 0x24B28B7D // 206
-data4 0x23B6B271 // 207
-data4 0x24C77CB6 // 208
-data4 0x24EF1DCA // 209
-data4 0x24A4F0AC // 210
-data4 0x24CF113E // 211
-data4 0x2496BBAB // 212
-data4 0x23C7CC8A // 213
-data4 0x23AE3961 // 214
-data4 0x2410A895 // 215
-data4 0x23CE3114 // 216
-data4 0x2308247D // 217
-data4 0x240045E9 // 218
-data4 0x24974F60 // 219
-data4 0x242CB39F // 220
-data4 0x24AB8D69 // 221
-data4 0x23436788 // 222
-data4 0x24305E9E // 223
-data4 0x243E71A9 // 224
-data4 0x23C2A6B3 // 225
-data4 0x23FFE6CF // 226
-data4 0x2322D801 // 227
-data4 0x24515F21 // 228
-data4 0x2412A0D6 // 229
-data4 0x24E60D44 // 230
-data4 0x240D9251 // 231
-data4 0x247076E2 // 232
-data4 0x229B101B // 233
-data4 0x247B12DE // 234
-data4 0x244B9127 // 235
-data4 0x2499EC42 // 236
-data4 0x21FC3963 // 237
-data4 0x23E53266 // 238
-data4 0x24CE102D // 239
-data4 0x23CC45D2 // 240
-data4 0x2333171D // 241
-data4 0x246B3533 // 242
-data4 0x24931129 // 243
-data4 0x24405FFA // 244
-data4 0x24CF464D // 245
-data4 0x237095CD // 246
-data4 0x24F86CBD // 247
-data4 0x24E2D84B // 248
-data4 0x21ACBB44 // 249
-data4 0x24F43A8C // 250
-data4 0x249DB931 // 251
-data4 0x24A385EF // 252
-data4 0x238B1279 // 253
-data4 0x2436213E // 254
-data4 0x24F18A3B // 255
-LOCAL_OBJECT_END(log_data)
-
-
-// Code
-//==============================================================
 
 .section .text
-GLOBAL_IEEE754_ENTRY(log1p)
+.proc log1p#
+.global log1p#
+.align 64 
+log1p:
+#ifdef _LIBC
+.global __log1p
+__log1p:
+#endif
+
 { .mfi
-      getf.exp      GR_signexp_x = f8 // if x is unorm then must recompute
-      fadd.s1       FR_Xp1 = f8, f1       // Form 1+x
-      mov           GR_05 = 0xfffe
+alloc r32 = ar.pfs,0,22,4,0
+(p0)  fsub.s1 FR_Neg_One = f0,f1 
+(p0)  cmp.eq.unc  p7, p0 = r0, r0 
 }
-{ .mlx
-      addl          GR_ad_1 = @ltoff(log_data),gp
-      movl          GR_A3 = 0x3fd5555555555557 // double precision memory
-                                               // representation of A3
+
+{ .mfi
+(p0)  cmp.ne.unc  p14, p0 = r0, r0 
+(p0)  fnorm.s1 FR_X_Prime = FR_Input_X 
+(p0)  cmp.eq.unc  p15, p0 = r0, r0 ;; 
 }
-;;
 
 { .mfi
-      ld8           GR_ad_1 = [GR_ad_1]
-      fclass.m      p8,p0 = f8,0xb // Is x unorm?
-      mov           GR_exp_mask = 0x1ffff
+      nop.m 999
+(p0)  fclass.m.unc p6, p0 =  FR_Input_X, 0x1E3 
+      nop.i 999
 }
+;;
+
 { .mfi
-      nop.m         0
-      fnorm.s1      FR_NormX = f8              // Normalize x
-      mov           GR_exp_bias = 0xffff
+	nop.m 999
+(p0)  fclass.nm.unc p10, p0 =  FR_Input_X, 0x1FF 
+      nop.i 999
 }
 ;;
 
 { .mfi
-      setf.exp      FR_A2 = GR_05 // create A2 = 0.5
-      fclass.m      p9,p0 = f8,0x1E1 // is x NaN, NaT or +Inf?
-      nop.i         0
+	nop.m 999
+(p0)  fcmp.eq.unc.s1 p9, p0 =  FR_Input_X, f0 
+      nop.i 999
 }
-{ .mib
-      setf.d        FR_A3 = GR_A3 // create A3
-      add           GR_ad_2 = 16,GR_ad_1 // address of A5,A4
-(p8)  br.cond.spnt  log1p_unorm          // Branch if x=unorm
+
+{ .mfi
+	nop.m 999
+(p0)  fadd FR_Em1 = f0,f0 
+	nop.i 999 ;;
 }
-;;
 
-log1p_common:
 { .mfi
-      nop.m         0
-      frcpa.s1      FR_RcpX,p0 = f1,FR_Xp1
-      nop.i         0
+	nop.m 999
+(p0)  fadd FR_E = f0,f1 
+	nop.i 999 ;;
 }
-{ .mfb
-      nop.m         0
-(p9)  fma.d.s0      f8 = f8,f1,f0 // set V-flag
-(p9)  br.ret.spnt   b0 // exit for NaN, NaT and +Inf
+
+{ .mfi
+	nop.m 999
+(p0)  fcmp.eq.unc.s1 p8, p0 =  FR_Input_X, FR_Neg_One 
+	nop.i 999
 }
-;;
 
 { .mfi
-      getf.exp      GR_Exp = FR_Xp1            // signexp of x+1
-      fclass.m      p10,p0 = FR_Xp1,0x3A // is 1+x < 0?
-      and           GR_exp_x = GR_exp_mask, GR_signexp_x // biased exponent of x
+	nop.m 999
+(p0)  fcmp.lt.unc.s1 p13, p0 =  FR_Input_X, FR_Neg_One 
+	nop.i 999
 }
+
+
+L(LOG_BEGIN): 
+
 { .mfi
-      ldfpd         FR_A7,FR_A6 = [GR_ad_1]
-      nop.f         0
-      nop.i         0
+	nop.m 999
+(p0)  fadd.s1 FR_Z = FR_X_Prime, FR_E 
+	nop.i 999
+}
+
+{ .mlx
+	nop.m 999
+(p0)  movl GR_Table_Scale = 0x0000000000000018 ;; 
+}
+
+{ .mmi
+	nop.m 999
+//     
+//    Create E = 1 and Em1 = 0 
+//    Check for X == 0, meaning log(1+0)
+//    Check for X < -1, meaning log(negative)
+//    Check for X == -1, meaning log(0)
+//    Normalize x 
+//    Identify NatVals, NaNs, Infs. 
+//    Identify EM unsupporteds. 
+//    Identify Negative values - us S1 so as
+//    not to raise denormal operand exception 
+//    Set p15 to true for log1p
+//    Set p14 to false for log1p
+//    Set p7 true for log and log1p
+//    
+(p0)  addl GR_Table_Base = @ltoff(Constants_Z_G_H_h1#),gp
+      nop.i  999
 }
-;;
 
 { .mfi
-      getf.sig      GR_Sig = FR_Xp1 // get significand to calculate index
-                                    // for Thi,Tlo if |x| >= 2^-8
-      fcmp.eq.s1    p12,p0 = f8,f0     // is x equal to 0?
-      sub           GR_exp_x = GR_exp_x, GR_exp_bias // true exponent of x
+	nop.m 999
+(p0)  fmax.s1 FR_AA = FR_X_Prime, FR_E 
+	nop.i 999 ;;
 }
-;;
 
 { .mfi
-      sub           GR_N = GR_Exp,GR_exp_bias // true exponent of x+1
-      fcmp.eq.s1    p11,p0 = FR_Xp1,f0     // is x = -1?
-      cmp.gt        p6,p7 = -8, GR_exp_x  // Is |x| < 2^-8
+      ld8    GR_Table_Base = [GR_Table_Base]
+(p0)  fmin.s1 FR_BB = FR_X_Prime, FR_E 
+	nop.i 999
 }
+
 { .mfb
-      ldfpd         FR_A5,FR_A4 = [GR_ad_2],16
-      nop.f         0
-(p10) br.cond.spnt  log1p_lt_minus_1   // jump if x < -1
+	nop.m 999
+(p0)  fadd.s1 FR_W = FR_X_Prime, FR_Em1 
+//     
+//    Begin load of constants base
+//    FR_Z = Z = |x| + E 
+//    FR_W = W = |x| + Em1
+//    AA = fmax(|x|,E)
+//    BB = fmin(|x|,E)
+//
+(p6)  br.cond.spnt L(LOG_64_special) ;; 
+}
+
+{ .mib
+	nop.m 999
+	nop.i 999
+(p10) br.cond.spnt L(LOG_64_unsupported) ;; 
+}
+
+{ .mib
+	nop.m 999
+	nop.i 999
+(p13) br.cond.spnt L(LOG_64_negative) ;; 
+}
+
+{ .mib
+(p0)  getf.sig GR_signif = FR_Z 
+	nop.i 999
+(p9)  br.cond.spnt L(LOG_64_one) ;; 
+}
+
+{ .mib
+	nop.m 999
+	nop.i 999
+(p8)  br.cond.spnt L(LOG_64_zero) ;; 
 }
-;;
 
-// p6 is true if |x| < 1/256
-// p7 is true if |x| >= 1/256
-.pred.rel "mutex",p6,p7
 { .mfi
-(p7)  add           GR_ad_1 = 0x820,GR_ad_1 // address of log(2) parts
-(p6)  fms.s1        FR_r = f8,f1,f0 // range reduction for |x|<1/256
-(p6)  cmp.gt.unc    p10,p0 = -80, GR_exp_x  // Is |x| < 2^-80
+(p0)  getf.exp GR_N =  FR_Z 
+//   
+//    Raise possible denormal operand exception 
+//    Create Bias
+// 
+//    This function computes ln( x + e ) 
+//    Input  FR 1: FR_X   = FR_Input_X          
+//    Input  FR 2: FR_E   = FR_E
+//    Input  FR 3: FR_Em1 = FR_Em1 
+//    Input  GR 1: GR_Expo_Range = GR_Expo_Range = 1
+//    Output FR 4: FR_Y_hi  
+//    Output FR 5: FR_Y_lo  
+//    Output FR 6: FR_Scale  
+//    Output PR 7: PR_Safe  
+//
+(p0)  fsub.s1 FR_S_lo = FR_AA, FR_Z 
+//
+//    signif = getf.sig(Z)
+//    abs_W = fabs(w)
+//
+(p0)  extr.u GR_Table_ptr = GR_signif, 59, 4 ;; 
 }
-{ .mfb
-(p7)  setf.sig      FR_N = GR_N // copy unbiased exponent of x to the
-                                // significand field of FR_N
-(p7)  fms.s1        FR_r = FR_RcpX,FR_Xp1,f1 // range reduction for |x|>=1/256
-(p12) br.ret.spnt   b0 // exit for x=0, return x
+
+{ .mfi
+	nop.m 999
+(p0)  fmerge.se FR_S_hi =  f1,FR_Z 
+(p0)  extr.u GR_X_0 = GR_signif, 49, 15  
+}
+
+{ .mmi
+      nop.m 999
+(p0)  addl GR_Table_Base1 = @ltoff(Constants_Z_G_H_h2#),gp  
+      nop.i 999
 }
 ;;
 
+{ .mlx
+      ld8    GR_Table_Base1 = [GR_Table_Base1]
+(p0)  movl GR_Bias = 0x000000000000FFFF ;; 
+}
+
+{ .mfi
+	nop.m 999
+(p0)  fabs FR_abs_W =  FR_W 
+(p0)  pmpyshr2.u GR_Table_ptr = GR_Table_ptr,GR_Table_Scale,0 
+}
+
+{ .mfi
+	nop.m 999
+//    
+//    Branch out for special input values 
+//    
+(p0)  fcmp.lt.unc.s0 p8, p0 =  FR_Input_X, f0 
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+//
+//    X_0 = extr.u(signif,49,15)
+//    Index1 = extr.u(signif,59,4)
+//
+(p0)  fadd.s1 FR_S_lo = FR_S_lo, FR_BB 
+	nop.i 999 ;;
+}
+
+{ .mii
+	nop.m 999
+	nop.i 999 ;;
+//
+//    Offset_to_Z1 = 24 * Index1
+//    For performance, don't use result
+//    for 3 or 4 cycles.
+//
+(p0)  add GR_Table_ptr = GR_Table_ptr, GR_Table_Base ;; 
+}
+//
+//    Add Base to Offset for Z1
+//    Create Bias
+
+{ .mmi
+(p0)  ld4 GR_Z_1 = [GR_Table_ptr],4 ;; 
+(p0)  ldfs  FR_G = [GR_Table_ptr],4 
+	nop.i 999 ;;
+}
+
+{ .mmi
+(p0)  ldfs  FR_H = [GR_Table_ptr],8 ;; 
+(p0)  ldfd  FR_h = [GR_Table_ptr],0 
+(p0)  pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15 
+}
+//
+//    Load Z_1 
+//    Get Base of Table2 
+//
+
+{ .mfi
+(p0)  getf.exp GR_M = FR_abs_W 
+	nop.f 999
+	nop.i 999 ;;
+}
+
+{ .mii
+	nop.m 999
+	nop.i 999 ;;
+//
+//    M = getf.exp(abs_W)
+//    S_lo = AA - Z
+//    X_1 = pmpyshr2(X_0,Z_1,15)
+//
+(p0)  sub GR_M = GR_M, GR_Bias ;; 
+}
+//     
+//    M = M - Bias
+//    Load G1
+//    N = getf.exp(Z)
+//
+
+{ .mii
+(p0)  cmp.gt.unc  p11, p0 =  -80, GR_M 
+(p0)  cmp.gt.unc  p12, p0 =  -7, GR_M ;; 
+(p0)  extr.u GR_Index2 = GR_X_1, 6, 4 ;; 
+}
+
+{ .mib
+	nop.m 999
+//
+//    if -80 > M, set p11
+//    Index2 = extr.u(X_1,6,4)
+//    if -7  > M, set p12
+//    Load H1
+//
+(p0)  pmpyshr2.u GR_Index2 = GR_Index2,GR_Table_Scale,0 
+(p11) br.cond.spnt L(log1p_small) ;; 
+}
+
 { .mib
-(p7)  ldfpd         FR_Ln2hi,FR_Ln2lo = [GR_ad_1],16
-(p7)  extr.u        GR_Ind = GR_Sig,55,8 // get bits from 55 to 62 as index
-(p11) br.cond.spnt  log1p_eq_minus_1 // jump if x = -1
+      nop.m 999
+	nop.i 999
+(p12) br.cond.spnt L(log1p_near) ;; 
 }
-;;
 
-{ .mmf
-(p7)  shladd        GR_ad_2 = GR_Ind,3,GR_ad_2 // address of Thi
-(p7)  shladd        GR_ad_1 = GR_Ind,2,GR_ad_1 // address of Tlo
-(p10) fnma.d.s0     f8 = f8,f8,f8   // If |x| very small, result=x-x*x
+{ .mii
+(p0)  sub GR_N = GR_N, GR_Bias 
+//
+//    poly_lo = r * poly_lo 
+//
+(p0)  add GR_Perturb = 0x1, r0 ;; 
+(p0)  sub GR_ScaleN = GR_Bias, GR_N  
 }
-;;
+
+{ .mii
+(p0)  setf.sig FR_float_N = GR_N 
+	nop.i 999 ;;
+//
+//    Prepare Index2 - pmpyshr2.u(X_1,Z_2,15)
+//    Load h1
+//    S_lo = S_lo + BB 
+//    Branch for -80 > M
+//   
+(p0)  add GR_Index2 = GR_Index2, GR_Table_Base1
+}
+
+{ .mmi
+(p0)  setf.exp FR_two_negN = GR_ScaleN 
+      nop.m 999
+(p0)  addl GR_Table_Base = @ltoff(Constants_Z_G_H_h3#),gp  
+};;
+
+//
+//    Index2 points to Z2
+//    Branch for -7 > M
+//
 
 { .mmb
-(p7)  ldfd          FR_Thi = [GR_ad_2]
-(p7)  ldfs          FR_Tlo = [GR_ad_1]
-(p10) br.ret.spnt   b0                   // Exit if |x| < 2^(-80)
+(p0)  ld4 GR_Z_2 = [GR_Index2],4 
+      ld8 GR_Table_Base = [GR_Table_Base]
+      nop.b 999 ;;
 }
-;;
+(p0)  nop.i 999
+//
+//    Load Z_2
+//    N = N - Bias
+//    Tablebase points to Table3
+//
+
+{ .mmi
+(p0)  ldfs  FR_G_tmp = [GR_Index2],4 ;; 
+//
+//    Load G_2
+//    pmpyshr2  X_2= (X_1,Z_2,15)
+//    float_N = setf.sig(N)
+//    ScaleN = Bias - N
+//
+(p0)  ldfs  FR_H_tmp = [GR_Index2],8 
+	nop.i 999 ;;
+}
+//
+//    Load H_2
+//    two_negN = setf.exp(scaleN)
+//    G = G_1 * G_2
+//
 
 { .mfi
-      nop.m         0
-      fma.s1        FR_r2 = FR_r,FR_r,f0 // r^2
-      nop.i         0
+(p0)  ldfd  FR_h_tmp = [GR_Index2],0 
+	nop.f 999
+(p0)  pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 ;; 
 }
+
+{ .mii
+	nop.m 999
+(p0)  extr.u GR_Index3 = GR_X_2, 1, 5 ;; 
+//
+//    Load h_2
+//    H = H_1 + H_2 
+//    h = h_1 + h_2 
+//    Index3 = extr.u(X_2,1,5)
+//
+(p0)  shladd GR_Index3 = GR_Index3,4,GR_Table_Base 
+}
+
+{ .mmi
+	nop.m 999
+	nop.m 999
+//
+//    float_N = fcvt.xf(float_N)
+//    load G3
+//
+(p0)  addl GR_Table_Base = @ltoff(Constants_Q#),gp ;; 
+}
+
 { .mfi
-      nop.m         0
-      fms.s1        FR_A2 = FR_A3,FR_r,FR_A2 // A3*r+A2
-      nop.i         0
+ld8    GR_Table_Base = [GR_Table_Base]
+nop.f 999
+nop.i 999
+} ;;
+
+{ .mfi
+(p0)  ldfe FR_log2_hi = [GR_Table_Base],16 
+(p0)  fmpy.s1 FR_S_lo = FR_S_lo, FR_two_negN 
+	nop.i 999 ;;
+}
+
+{ .mmf
+	nop.m 999
+//
+//    G = G3 * G
+//    Load h3
+//    Load log2_hi
+//    H = H + H3
+//
+(p0)  ldfe FR_log2_lo = [GR_Table_Base],16 
+(p0)  fmpy.s1 FR_G = FR_G, FR_G_tmp ;; 
+}
+
+{ .mmf
+(p0)  ldfs  FR_G_tmp = [GR_Index3],4 
+//
+//    h = h + h3
+//    r = G * S_hi + 1 
+//    Load log2_lo
+//
+(p0)  ldfe FR_Q4 = [GR_Table_Base],16 
+(p0)  fadd.s1 FR_h = FR_h, FR_h_tmp ;; 
 }
-;;
 
 { .mfi
-      nop.m         0
-      fma.s1        FR_A6 = FR_A7,FR_r,FR_A6 // A7*r+A6
-      nop.i         0
+(p0)  ldfe FR_Q3 = [GR_Table_Base],16 
+(p0)  fadd.s1 FR_H = FR_H, FR_H_tmp 
+	nop.i 999 ;;
 }
+
+{ .mmf
+(p0)  ldfs  FR_H_tmp = [GR_Index3],4 
+(p0)  ldfe FR_Q2 = [GR_Table_Base],16 
+//
+//    Comput Index for Table3
+//    S_lo = S_lo * two_negN
+//
+(p0)  fcvt.xf FR_float_N = FR_float_N ;; 
+}
+//
+//    If S_lo == 0, set p8 false
+//    Load H3
+//    Load ptr to table of polynomial coeff.
+//
+
+{ .mmf
+(p0)  ldfd  FR_h_tmp = [GR_Index3],0 
+(p0)  ldfe FR_Q1 = [GR_Table_Base],0 
+(p0)  fcmp.eq.unc.s1 p0, p8 =  FR_S_lo, f0 ;; 
+}
+
 { .mfi
-      nop.m         0
-      fma.s1        FR_A4 = FR_A5,FR_r,FR_A4 // A5*r+A4
-      nop.i         0
+	nop.m 999
+(p0)  fmpy.s1 FR_G = FR_G, FR_G_tmp 
+	nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m         0
-(p7)  fcvt.xf       FR_N = FR_N
-      nop.i         0
+	nop.m 999
+(p0)  fadd.s1 FR_H = FR_H, FR_H_tmp 
+	nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m         0
-      fma.s1        FR_r4 = FR_r2,FR_r2,f0 // r^4
-      nop.i         0
+	nop.m 999
+(p0)  fms.s1 FR_r = FR_G, FR_S_hi, f1 
+	nop.i 999
 }
+
 { .mfi
-      nop.m         0
-      // (A3*r+A2)*r^2+r
-      fma.s1        FR_A2 = FR_A2,FR_r2,FR_r
-      nop.i         0
+	nop.m 999
+(p0)  fadd.s1 FR_h = FR_h, FR_h_tmp 
+	nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m         0
-      // (A7*r+A6)*r^2+(A5*r+A4)
-      fma.s1        FR_A4 = FR_A6,FR_r2,FR_A4
-      nop.i         0
+	nop.m 999
+(p0)  fma.s1 FR_Y_hi = FR_float_N, FR_log2_hi, FR_H 
+	nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m         0
-      // N*Ln2hi+Thi
-(p7)  fma.s1        FR_NxLn2hipThi = FR_N,FR_Ln2hi,FR_Thi
-      nop.i         0
+	nop.m 999
+//
+//    Load Q4 
+//    Load Q3 
+//    Load Q2 
+//    Load Q1 
+//
+(p8) fma.s1 FR_r = FR_G, FR_S_lo, FR_r 
+	nop.i 999
 }
+
 { .mfi
-      nop.m         0
-      // N*Ln2lo+Tlo
-(p7)  fma.s1        FR_NxLn2lopTlo = FR_N,FR_Ln2lo,FR_Tlo
-      nop.i         0
+	nop.m 999
+//
+//    poly_lo = r * Q4 + Q3
+//    rsq = r* r
+//
+(p0)  fma.s1 FR_h = FR_float_N, FR_log2_lo, FR_h 
+	nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m         0
-(p7)  fma.s1        f8 = FR_A4,FR_r4,FR_A2 // P(r) if |x| >= 1/256
-      nop.i         0
+	nop.m 999
+//
+//    If (S_lo!=0) r = s_lo * G + r
+//
+(p0)  fma.s1 FR_poly_lo = FR_r, FR_Q4, FR_Q3 
+	nop.i 999
 }
+//
+//    Create a 0x00000....01
+//    poly_lo = poly_lo * rsq + h
+//
+
 { .mfi
-      nop.m         0
-      // (N*Ln2hi+Thi) + (N*Ln2lo+Tlo)
-(p7)  fma.s1        FR_NxLn2pT = FR_NxLn2hipThi,f1,FR_NxLn2lopTlo
-      nop.i         0
+(p0)  setf.sig FR_dummy = GR_Perturb 
+(p0)  fmpy.s1 FR_rsq = FR_r, FR_r 
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+//
+//    h = N * log2_lo + h 
+//    Y_hi = n * log2_hi + H 
+//
+(p0)  fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_Q2 
+	nop.i 999
+}
+
+{ .mfi
+	nop.m 999
+(p0)  fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r 
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+//
+//    poly_lo = r * poly_o + Q2 
+//    poly_hi = Q1 * rsq + r 
+//
+(p0)  fmpy.s1 FR_poly_lo = FR_poly_lo, FR_r 
+	nop.i 999 ;;
 }
-;;
 
-.pred.rel "mutex",p6,p7
 { .mfi
-      nop.m         0
-(p6)  fma.d.s0      f8 = FR_A4,FR_r4,FR_A2 // result if 2^(-80) <= |x| < 1/256
-      nop.i         0
+	nop.m 999
+(p0)  fma.s1 FR_poly_lo = FR_poly_lo, FR_rsq, FR_h 
+	nop.i 999 ;;
 }
+
 { .mfb
-      nop.m         0
-(p7)  fma.d.s0      f8 = f8,f1,FR_NxLn2pT  // result if |x| >= 1/256
-      br.ret.sptk   b0                     // Exit if |x| >= 2^(-80)
+	nop.m 999
+(p0)  fadd.s1 FR_Y_lo = FR_poly_hi, FR_poly_lo 
+//
+//    Create the FR for a binary "or"
+//    Y_lo = poly_hi + poly_lo
+//
+// (p0)  for FR_dummy = FR_Y_lo,FR_dummy ;;
+//
+//    Turn the lsb of Y_lo ON
+//
+// (p0)  fmerge.se FR_Y_lo =  FR_Y_lo,FR_dummy ;;
+//
+//    Merge the new lsb into Y_lo, for alone doesn't
+//
+(p0)  br.cond.sptk L(LOG_main) ;; 
+}
+
+
+L(log1p_near): 
+
+{ .mmi
+	nop.m 999
+	nop.m 999
+//    /*******************************************************/
+//    /*********** Branch log1p_near  ************************/
+//    /*******************************************************/
+(p0)  addl GR_Table_Base = @ltoff(Constants_P#),gp ;; 
+}
+//
+//    Load base address of poly. coeff.
+//
+{.mmi
+      nop.m 999
+      ld8    GR_Table_Base = [GR_Table_Base]
+      nop.i 999
+};;
+
+{ .mmb
+(p0)  add GR_Table_ptr = 0x40,GR_Table_Base  
+//
+//    Address tables with separate pointers 
+//
+(p0)  ldfe FR_P8 = [GR_Table_Base],16 
+	nop.b 999 ;;
+}
+
+{ .mmb
+(p0)  ldfe FR_P4 = [GR_Table_ptr],16 
+//
+//    Load P4
+//    Load P8
+//
+(p0)  ldfe FR_P7 = [GR_Table_Base],16 
+	nop.b 999 ;;
+}
+
+{ .mmf
+(p0)  ldfe FR_P3 = [GR_Table_ptr],16 
+//
+//    Load P3
+//    Load P7
+//
+(p0)  ldfe FR_P6 = [GR_Table_Base],16 
+(p0)  fmpy.s1 FR_wsq = FR_W, FR_W ;; 
+}
+
+{ .mfi
+(p0)  ldfe FR_P2 = [GR_Table_ptr],16 
+	nop.f 999
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+(p0)  fma.s1 FR_Y_hi = FR_W, FR_P4, FR_P3 
+	nop.i 999
+}
+//
+//    Load P2
+//    Load P6
+//    Wsq = w * w
+//    Y_hi = p4 * w + p3
+//
+
+{ .mfi
+(p0)  ldfe FR_P5 = [GR_Table_Base],16 
+(p0)  fma.s1 FR_Y_lo = FR_W, FR_P8, FR_P7 
+	nop.i 999 ;;
+}
+
+{ .mfi
+(p0)  ldfe FR_P1 = [GR_Table_ptr],16 
+//
+//    Load P1
+//    Load P5
+//    Y_lo = p8 * w + P7
+//
+(p0)  fmpy.s1 FR_w4 = FR_wsq, FR_wsq 
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+(p0)  fma.s1 FR_Y_hi = FR_W, FR_Y_hi, FR_P2 
+	nop.i 999
+}
+
+{ .mfi
+	nop.m 999
+(p0)  fma.s1 FR_Y_lo = FR_W, FR_Y_lo, FR_P6 
+(p0)  add GR_Perturb = 0x1, r0 ;; 
+}
+
+{ .mfi
+	nop.m 999
+//
+//    w4 = w2 * w2 
+//    Y_hi = y_hi * w + p2 
+//    Y_lo = y_lo * w + p6 
+//    Create perturbation bit
+//
+(p0)  fmpy.s1 FR_w6 = FR_w4, FR_wsq 
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+(p0)  fma.s1 FR_Y_hi = FR_W, FR_Y_hi, FR_P1 
+	nop.i 999
+}
+//
+//    Y_hi = y_hi * w + p1 
+//    w6 = w4 * w2 
+//
+
+{ .mfi
+(p0)  setf.sig FR_Q4 = GR_Perturb 
+(p0)  fma.s1 FR_Y_lo = FR_W, FR_Y_lo, FR_P5 
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+(p0)  fma.s1 FR_Y_hi = FR_wsq,FR_Y_hi, FR_W 
+	nop.i 999
 }
-;;
 
-.align 32
-log1p_unorm:
-// Here if x=unorm
 { .mfb
-      getf.exp      GR_signexp_x = FR_NormX // recompute biased exponent
-      nop.f         0
-      br.cond.sptk  log1p_common
+	nop.m 999
+//
+//    Y_hi = y_hi * wsq + w 
+//    Y_lo = y_lo * w + p5 
+//
+(p0)  fmpy.s1 FR_Y_lo = FR_w6, FR_Y_lo 
+//
+//    Y_lo = y_lo * w6  
+//
+// (p0)  for FR_dummy = FR_Y_lo,FR_dummy ;;
+//
+//    Set lsb on: Taken out to improve performance 
+//
+// (p0)  fmerge.se FR_Y_lo =  FR_Y_lo,FR_dummy ;;
+//
+//    Make sure it's on in Y_lo also.  Taken out to improve
+//    performance
+//
+(p0)  br.cond.sptk L(LOG_main) ;; 
+}
+
+
+L(log1p_small): 
+
+{ .mmi
+	nop.m 999
+	nop.m 999
+//  /*******************************************************/
+//  /*********** Branch log1p_small  ***********************/
+//  /*******************************************************/
+(p0)  addl GR_Table_Base = @ltoff(Constants_Threshold#),gp 
 }
-;;
 
-.align 32
-log1p_eq_minus_1:
-// Here if x=-1
 { .mfi
-      nop.m         0
-      fmerge.s      FR_X = f8,f8 // keep input argument for subsequent
-                                 // call of __libm_error_support#
-      nop.i         0
+	nop.m 999
+(p0)  mov FR_Em1 = FR_W 
+(p0)  cmp.eq.unc  p7, p0 = r0, r0 ;; 
+}
+
+{ .mlx
+      ld8    GR_Table_Base = [GR_Table_Base]
+(p0)  movl GR_Expo_Range = 0x0000000000000002 ;; 
+}
+//
+//    Set Safe to true
+//    Set Expo_Range = 0 for single
+//    Set Expo_Range = 2 for double 
+//    Set Expo_Range = 4 for double-extended 
+//
+
+{ .mmi
+(p0)  shladd GR_Table_Base = GR_Expo_Range,4,GR_Table_Base ;; 
+(p0)  ldfe FR_Threshold = [GR_Table_Base],16 
+	nop.i 999
+}
+
+{ .mlx
+	nop.m 999
+(p0)  movl GR_Bias = 0x000000000000FF9B ;; 
 }
-;;
 
 { .mfi
-      mov           GR_TAG = 140  // set libm error in case of log1p(-1).
-      frcpa.s0      f8,p0 = f8,f0 // log1p(-1) should be equal to -INF.
-                                      // We can get it using frcpa because it
-                                      // sets result to the IEEE-754 mandated
-                                      // quotient of f8/f0.
-      nop.i         0
+(p0)  ldfe FR_Tiny = [GR_Table_Base],0 
+	nop.f 999
+	nop.i 999 ;;
 }
-{ .mib
-      nop.m         0
-      nop.i         0
-      br.cond.sptk  log_libm_err
+
+{ .mfi
+	nop.m 999
+(p0)  fcmp.gt.unc.s1 p13, p12 =  FR_abs_W, FR_Threshold 
+	nop.i 999 ;;
 }
-;;
 
-.align 32
-log1p_lt_minus_1:
-// Here if x < -1
 { .mfi
-      nop.m         0
-      fmerge.s      FR_X = f8,f8
-      nop.i         0
+	nop.m 999
+(p13) fnmpy.s1 FR_Y_lo = FR_W, FR_W 
+	nop.i 999
+}
+
+{ .mfi
+	nop.m 999
+(p13) fadd FR_SCALE = f0, f1 
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+(p12) fsub.s1 FR_Y_lo = f0, FR_Tiny 
+(p12) cmp.ne.unc  p7, p0 = r0, r0 
 }
-;;
 
 { .mfi
-      mov           GR_TAG = 141  // set libm error in case of x < -1.
-      frcpa.s0      f8,p0 = f0,f0 // log1p(x) x < -1 should be equal to NaN.
-                                  // We can get it using frcpa because it
-                                  // sets result to the IEEE-754 mandated
-                                  // quotient of f0/f0 i.e. NaN.
-      nop.i         0
+(p12) setf.exp FR_SCALE = GR_Bias 
+	nop.f 999
+	nop.i 999 ;;
+}
+
+//
+//    Set p7 to SAFE = FALSE
+//    Set Scale = 2^-100 
+//
+{ .mfb
+	nop.m 999
+(p0)  fma.d.s0 FR_Input_X = FR_Y_lo,FR_SCALE,FR_Y_hi
+(p0)  br.ret.sptk   b0
 }
 ;;
 
-.align 32
-log_libm_err:
-{ .mmi
-      alloc         r32 = ar.pfs,1,4,4,0
-      mov           GR_Parameter_TAG = GR_TAG
-      nop.i         0
+L(LOG_64_one): 
+
+{ .mfb
+	nop.m 999
+(p0)  fmpy.d.s0 FR_Input_X = FR_Input_X, f0 
+(p0)  br.ret.sptk   b0
 }
 ;;
 
-GLOBAL_IEEE754_END(log1p)
+//    
+//    Raise divide by zero for +/-0 input.
+//    
+L(LOG_64_zero): 
 
-LOCAL_LIBM_ENTRY(__libm_error_region)
+{ .mfi
+(p0)  mov   GR_Parameter_TAG = 140
+//
+//    If we have log1p(0), return -Inf.
+//  
+(p0)  fsub.s0 FR_Output_X_tmp = f0, f1 
+      nop.i 999 ;;
+}
+{ .mfb
+      nop.m 999
+(p0)  frcpa.s0 FR_Output_X_tmp, p8 =  FR_Output_X_tmp, f0 
+(p0)  br.cond.sptk L(LOG_ERROR_Support) ;; 
+}
+
+L(LOG_64_special): 
+
+{ .mfi
+      nop.m 999
+//    
+//    Return -Inf or value from handler.
+//    
+(p0)  fclass.m.unc p7, p0 =  FR_Input_X, 0x1E1 
+      nop.i 999 ;;
+}
+{ .mfb
+      nop.m 999
+//     
+//    Check for Natval, QNan, SNaN, +Inf   
+//    
+(p7)  fmpy.d.s0  f8 =  FR_Input_X, f1 
+//     
+//    For SNaN raise invalid and return QNaN.
+//    For QNaN raise invalid and return QNaN.
+//    For +Inf return +Inf.
+//    
+(p7)  br.ret.sptk   b0
+}
+;;
+
+//    
+//    For -Inf raise invalid and return QNaN.
+//    
+
+{ .mfb
+(p0)  mov   GR_Parameter_TAG = 141 
+(p0)  fmpy.d.s0  FR_Output_X_tmp =  FR_Input_X, f0 
+(p0)  br.cond.sptk L(LOG_ERROR_Support) ;; 
+}
+
+//     
+//    Report that log1p(-Inf) computed
+//     
+
+L(LOG_64_unsupported): 
+
+//    
+//    Return generated NaN or other value .
+//    
+
+{ .mfb
+      nop.m 999
+(p0)  fmpy.d.s0 FR_Input_X = FR_Input_X, f0 
+(p0)  br.ret.sptk   b0 ;;
+}
+
+L(LOG_64_negative): 
+
+{ .mfi
+      nop.m 999
+//     
+//    Deal with x < 0 in a special way 
+//    
+(p0)  frcpa.s0 FR_Output_X_tmp, p8 =  f0, f0 
+//     
+//    Deal with x < 0 in a special way - raise
+//    invalid and produce QNaN indefinite.
+//    
+(p0)  mov   GR_Parameter_TAG = 141
+}
+
+.endp log1p#
+ASM_SIZE_DIRECTIVE(log1p)
+
+.proc __libm_error_region
+__libm_error_region:
+L(LOG_ERROR_Support): 
 .prologue
+
+// (1)
 { .mfi
-        add   GR_Parameter_Y = -32,sp         // Parameter 2 value
+        add   GR_Parameter_Y=-32,sp             // Parameter 2 value
         nop.f 0
 .save   ar.pfs,GR_SAVE_PFS
-        mov  GR_SAVE_PFS = ar.pfs             // Save ar.pfs
+        mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
 }
 { .mfi
 .fframe 64
-        add sp = -64,sp                       // Create new stack
+        add sp=-64,sp                          // Create new stack
         nop.f 0
-        mov GR_SAVE_GP = gp                   // Save gp
+        mov GR_SAVE_GP=gp                      // Save gp
 };;
+
+
+// (2)
 { .mmi
-        stfd [GR_Parameter_Y] = FR_Y,16       // STORE Parameter 2 on stack
+        stfd [GR_Parameter_Y] = f0,16         // STORE Parameter 2 on stack
         add GR_Parameter_X = 16,sp            // Parameter 1 address
 .save   b0, GR_SAVE_B0
-        mov GR_SAVE_B0 = b0                   // Save b0
+        mov GR_SAVE_B0=b0                     // Save b0
 };;
+
 .body
+// (3)
 { .mib
-        stfd [GR_Parameter_X] = FR_X          // STORE Parameter 1 on stack
-        add   GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
-        nop.b 0
+        stfd [GR_Parameter_X] =FR_Input_X               // STORE Parameter 1 on stack
+        add   GR_Parameter_RESULT = 0,GR_Parameter_Y    // Parameter 3 address
+        nop.b 0                                      
 }
 { .mib
-        stfd [GR_Parameter_Y] = FR_RESULT     // STORE Parameter 3 on stack
+        stfd [GR_Parameter_Y] = FR_Output_X_tmp         // STORE Parameter 3 on stack
         add   GR_Parameter_Y = -16,GR_Parameter_Y
-        br.call.sptk b0=__libm_error_support# // Call error handling function
+        br.call.sptk b0=__libm_error_support#           // Call error handling function
 };;
 { .mmi
-        add   GR_Parameter_RESULT = 48,sp
         nop.m 0
-        nop.i 0
+        nop.m 0
+        add   GR_Parameter_RESULT = 48,sp
 };;
+
+// (4)
 { .mmi
-        ldfd  f8 = [GR_Parameter_RESULT]      // Get return result off stack
+        ldfd  FR_Input_X = [GR_Parameter_RESULT]       // Get return result off stack
 .restore sp
-        add   sp = 64,sp                      // Restore stack pointer
-        mov   b0 = GR_SAVE_B0                 // Restore return address
+        add   sp = 64,sp                       // Restore stack pointer
+        mov   b0 = GR_SAVE_B0                  // Restore return address
 };;
 { .mib
-        mov   gp = GR_SAVE_GP                 // Restore gp
-        mov   ar.pfs = GR_SAVE_PFS            // Restore ar.pfs
-        br.ret.sptk     b0                    // Return
+        mov   gp = GR_SAVE_GP                  // Restore gp
+        mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
+        br.ret.sptk   b0 
 };;
-LOCAL_LIBM_END(__libm_error_region)
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+.proc __libm_LOG_main 
+__libm_LOG_main:
+L(LOG_main): 
+
+//
+//    kernel_log_64 computes ln(X + E)
+//
+
+{ .mfi
+	nop.m 999
+(p7)  fadd.d.s0 FR_Input_X = FR_Y_lo,FR_Y_hi
+        nop.i 999
+}
+
+{ .mmi
+	nop.m 999
+	nop.m 999
+(p14) addl GR_Table_Base = @ltoff(Constants_1_by_LN10#),gp ;; 
+}
+
+{ .mmi
+      nop.m 999
+(p14) ld8    GR_Table_Base = [GR_Table_Base]
+      nop.i 999
+};;
+
+{ .mmi
+(p14) ldfe FR_1LN10_hi = [GR_Table_Base],16 ;; 
+(p14) ldfe FR_1LN10_lo = [GR_Table_Base]
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+(p14) fmpy.s1 FR_Output_X_tmp = FR_Y_lo,FR_1LN10_hi
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+(p14) fma.s1  FR_Output_X_tmp = FR_Y_hi,FR_1LN10_lo,FR_Output_X_tmp
+	nop.i 999 ;;
+}
+
+{ .mfb
+	nop.m 999
+(p14) fma.d.s0 FR_Input_X = FR_Y_hi,FR_1LN10_hi,FR_Output_X_tmp
+(p0)  br.ret.sptk   b0 ;; 
+}
+.endp __libm_LOG_main
+ASM_SIZE_DIRECTIVE(__libm_LOG_main)
+
 
 .type   __libm_error_support#,@function
 .global __libm_error_support#
-
diff --git a/sysdeps/ia64/fpu/s_log1pf.S b/sysdeps/ia64/fpu/s_log1pf.S
index a148d4b272..8aff9b895a 100644
--- a/sysdeps/ia64/fpu/s_log1pf.S
+++ b/sysdeps/ia64/fpu/s_log1pf.S
@@ -1,10 +1,10 @@
-.file "log1pf.s"
+.file "log1pf.s" 
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,768 +20,1610 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
 // Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
 //
 // History
 //==============================================================
-// 02/02/00 Initial version
-// 04/04/00 Unwind support added
-// 08/15/00 Bundle added after call to __libm_error_support to properly
+// 2/02/00  Initial version
+// 4/04/00  Unwind support added
+// 8/15/00  Bundle added after call to __libm_error_support to properly
 //          set [the previously overwritten] GR_Parameter_RESULT.
-// 06/29/01 Improved speed of all paths
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 10/02/02 Improved performance by basing on log algorithm
-// 02/10/03 Reordered header: .section, .global, .proc, .align
-// 04/18/03 Eliminate possible WAW dependency warning
 //
-// API
-//==============================================================
-// float log1pf(float)
+// *********************************************************************
 //
-// log1p(x) = log(x+1)
+// Function:   log1pf(x) = ln(x+1), for single precision values
 //
-// Overview of operation
-//==============================================================
-// Background
-// ----------
+// *********************************************************************
 //
-// This algorithm is based on fact that
-// log1p(x) = log(1+x) and
-// log(a b) = log(a) + log(b).
-// In our case we have 1+x = 2^N f, where 1 <= f < 2.
-// So
-//   log(1+x) = log(2^N f) = log(2^N) + log(f) = n*log(2) + log(f)
+// Accuracy:   Very accurate for single precision values
 //
-// To calculate log(f) we do following
-//   log(f) = log(f * frcpa(f) / frcpa(f)) =
-//          = log(f * frcpa(f)) + log(1/frcpa(f))
+// *********************************************************************
 //
-// According to definition of IA-64's frcpa instruction it's a
-// floating point that approximates 1/f using a lookup on the
-// top of 8 bits of the input number's + 1 significand with relative
-// error < 2^(-8.886). So we have following
+// Resources Used:
 //
-// |(1/f - frcpa(f)) / (1/f))| = |1 - f*frcpa(f)| < 1/256
+//    Floating-Point Registers: f8 (Input and Return Value)
+//                              f9,f33-f55,f99 
 //
-// and
+//    General Purpose Registers:
+//      r32-r53
+//      r54-r57 (Used to pass arguments to error handling routine)
 //
-// log(f) = log(f * frcpa(f)) + log(1/frcpa(f)) =
-//        = log(1 + r) + T
+//    Predicate Registers:      p6-p15
 //
-// The first value can be computed by polynomial P(r) approximating
-// log(1 + r) on |r| < 1/256 and the second is precomputed tabular
-// value defined by top 8 bit of f.
+// *********************************************************************
 //
-// Finally we have that  log(1+x) ~ (N*log(2) + T) + P(r)
+// IEEE Special Conditions:
 //
-// Note that if input argument is close to 0.0 (in our case it means
-// that |x| < 1/256) we can use just polynomial approximation
-// because 1+x = 2^0 * f = f = 1 + r and
-// log(1+x) = log(1 + r) ~ P(r)
+//    Denormal  fault raised on denormal inputs
+//    Overflow exceptions cannot occur  
+//    Underflow exceptions raised when appropriate for log1pf 
+//    (Error Handling Routine called for underflow)
+//    Inexact raised when appropriate by algorithm
 //
+//    log1pf(inf) = inf
+//    log1pf(-inf) = QNaN 
+//    log1pf(+/-0) = +/-0 
+//    log1pf(-1) =  -inf 
+//    log1pf(SNaN) = QNaN
+//    log1pf(QNaN) = QNaN
+//    log1pf(EM_special Values) = QNaN
 //
-// Implementation
-// --------------
+// *********************************************************************
 //
-// 1. |x| >= 2^(-8), and x > -1
-//   InvX = frcpa(x+1)
-//   r = InvX*(x+1) - 1
-//   P(r) = r*((1 - A2*4) + r^2*(A3 - A4*r)) = r*P2(r),
-//   A4,A3,A2 are created with setf instruction.
-//   We use Taylor series and so A4 = 1/4, A3 = 1/3,
-//   A2 = 1/2 rounded to double.
+// Computation is based on the following kernel.
 //
-//   N = float(n) where n is true unbiased exponent of x
+// ker_log_64( in_FR    :  X,
+// 	    in_FR    :  E,
+// 	    in_FR    :  Em1,
+// 	    in_GR    :  Expo_Range,
+// 	    out_FR   :  Y_hi,
+// 	    out_FR   :  Y_lo,
+// 	    out_FR   :  Scale,
+// 	    out_PR   :  Safe  )
+// 
+// Overview
 //
-//   T is tabular value of log(1/frcpa(x)) calculated in quad precision
-//   and rounded to double.  To load T we get bits from 55 to 62 of register
-//   format significand as index and calculate address
-//     ad_T = table_base_addr + 8 * index
+// The method consists of three cases.
 //
-//   L1 (log(2)) is calculated in quad precision and rounded to double;
-//   it's created with setf
+// If	|X+Em1| < 2^(-80)	use case log1pf_small;
+// elseif	|X+Em1| < 2^(-7)	use case log_near1;
+// else				use case log_regular;
 //
-//   And final result = P2(r)*r + (T + N*L1)
+// Case log1pf_small:
 //
+// log( 1 + (X+Em1) ) can be approximated by (X+Em1).
 //
-// 2. 2^(-40) <= |x| < 2^(-8)
-//   r = x
-//   P(r) = r*((1 - A2*4) + r^2*(A3 - A4*r)) = r*P2(r),
-//   A4,A3,A2 are the same as in case |x| >= 1/256
+// Case log_near1:
 //
-//   And final result = P2(r)*r
+//   log( 1 + (X+Em1) ) can be approximated by a simple polynomial
+//   in W = X+Em1. This polynomial resembles the truncated Taylor
+//   series W - W^/2 + W^3/3 - ...
+// 
+// Case log_regular:
 //
-// 3. 0 < |x| < 2^(-40)
-//   Although log1p(x) is basically x, we would like to preserve the inexactness
-//   nature as well as consistent behavior under different rounding modes.
-//   We can do this by computing the result as
+//   Here we use a table lookup method. The basic idea is that in
+//   order to compute log(Arg) for an argument Arg in [1,2), we 
+//   construct a value G such that G*Arg is close to 1 and that
+//   log(1/G) is obtainable easily from a table of values calculated
+//   beforehand. Thus
 //
-//     log1p(x) = x - x*x
+//	log(Arg) = log(1/G) + log(G*Arg)
+//		 = log(1/G) + log(1 + (G*Arg - 1))
 //
+//   Because |G*Arg - 1| is small, the second term on the right hand
+//   side can be approximated by a short polynomial. We elaborate
+//   this method in four steps.
 //
-//    Note: NaT, any NaNs, +/-INF, +/-0, negatives and unnormalized numbers are
-//          filtered and processed on special branches.
+//   Step 0: Initialization
 //
-
+//   We need to calculate log( E + X ). Obtain N, S_hi, S_lo such that
 //
-// Special values
-//==============================================================
+//	E + X = 2^N * ( S_hi + S_lo )	exactly
 //
-// log1p(-1)    = -inf            // Call error support
+//   where S_hi in [1,2) and S_lo is a correction to S_hi in the sense
+//   that |S_lo| <= ulp(S_hi).
 //
-// log1p(+qnan) = +qnan
-// log1p(-qnan) = -qnan
-// log1p(+snan) = +qnan
-// log1p(-snan) = -qnan
+//   Step 1: Argument Reduction
 //
-// log1p(x),x<-1= QNAN Indefinite // Call error support
-// log1p(-inf)  = QNAN Indefinite
-// log1p(+inf)  = +inf
-// log1p(+/-0)  = +/-0
+//   Based on S_hi, obtain G_1, G_2, G_3 from a table and calculate
 //
+//	G := G_1 * G_2 * G_3
+//	r := (G * S_hi - 1)  + G * S_lo
 //
-// Registers used
-//==============================================================
-// Floating Point registers used:
-// f8, input
-// f7 -> f15,  f32 -> f36
+//   These G_j's have the property that the product is exactly 
+//   representable and that |r| < 2^(-12) as a result.
+//
+//   Step 2: Approximation
+//
+//
+//   log(1 + r) is approximated by a short polynomial poly(r).
+//
+//   Step 3: Reconstruction
+//
+//
+//   Finally, log( E + X ) is given by
+//
+//   log( E + X )   =   log( 2^N * (S_hi + S_lo) )
+//                 ~=~  N*log(2) + log(1/G) + log(1 + r)
+//                 ~=~  N*log(2) + log(1/G) + poly(r).
+//
+// **** Algorithm ****
+//
+// Case log1pf_small:
+//
+// Although log(1 + (X+Em1)) is basically X+Em1, we would like to 
+// preserve the inexactness nature as well as consistent behavior
+// under different rounding modes. Note that this case can only be
+// taken if E is set to be 1.0. In this case, Em1 is zero, and that
+// X can be very tiny and thus the final result can possibly underflow.
+// Thus, we compare X against a threshold that is dependent on the
+// input Expo_Range. If |X| is smaller than this threshold, we set
+// SAFE to be FALSE. 
+//
+// The result is returned as Y_hi, Y_lo, and in the case of SAFE 
+// is FALSE, an additional value Scale is also returned. 
+//
+//	W    := X + Em1
+//      Threshold := Threshold_Table( Expo_Range )
+//      Tiny      := Tiny_Table( Expo_Range )
+//
+//      If ( |W| > Threshold ) then
+//         Y_hi  := W
+//         Y_lo  := -W*W
+//      Else
+//         Y_hi  := W
+//         Y_lo  := -Tiny
+//         Scale := 2^(-100)
+//         Safe  := FALSE
+//      EndIf
+//
+//
+// One may think that Y_lo should be -W*W/2; however, it does not matter
+// as Y_lo will be rounded off completely except for the correct effect in 
+// directed rounding. Clearly -W*W is simplier to compute. Moreover,
+// because of the difference in exponent value, Y_hi + Y_lo or 
+// Y_hi + Scale*Y_lo is always inexact.
+//
+// Case log_near1:
+//
+// Here we compute a simple polynomial. To exploit parallelism, we split
+// the polynomial into two portions.
+// 
+// 	W := X + Em1
+// 	Wsq := W * W
+// 	W4  := Wsq*Wsq
+// 	W6  := W4*Wsq
+// 	Y_hi := W + Wsq*(P_1 + W*(P_2 + W*(P_3 + W*P_4))
+// 	Y_lo := W6*(P_5 + W*(P_6 + W*(P_7 + W*P_8)))
+//      set lsb(Y_lo) to be 1
+//
+// Case log_regular:
+//
+// We present the algorithm in four steps.
+//
+//   Step 0. Initialization
+//   ----------------------
+//
+//   Z := X + E
+//   N := unbaised exponent of Z
+//   S_hi := 2^(-N) * Z
+//   S_lo := 2^(-N) * { (max(X,E)-Z) + min(X,E) }
+//
+//   Note that S_lo is always 0 for the case E = 0.
+//
+//   Step 1. Argument Reduction
+//   --------------------------
+//
+//   Let
+//
+//	Z = 2^N * S_hi = 2^N * 1.d_1 d_2 d_3 ... d_63
+//
+//   We obtain G_1, G_2, G_3 by the following steps.
+//
+//
+//	Define		X_0 := 1.d_1 d_2 ... d_14. This is extracted
+//			from S_hi.
+//
+//	Define		A_1 := 1.d_1 d_2 d_3 d_4. This is X_0 truncated
+//			to lsb = 2^(-4).
+//
+//	Define		index_1 := [ d_1 d_2 d_3 d_4 ].
+//
+//	Fetch 		Z_1 := (1/A_1) rounded UP in fixed point with
+//	fixed point	lsb = 2^(-15).
+//			Z_1 looks like z_0.z_1 z_2 ... z_15
+//		        Note that the fetching is done using index_1.
+//			A_1 is actually not needed in the implementation
+//			and is used here only to explain how is the value
+//			Z_1 defined.
+//
+//	Fetch		G_1 := (1/A_1) truncated to 21 sig. bits.
+//	floating pt.	Again, fetching is done using index_1. A_1
+//			explains how G_1 is defined.
+//
+//	Calculate	X_1 := X_0 * Z_1 truncated to lsb = 2^(-14)
+//			     = 1.0 0 0 0 d_5 ... d_14
+//			This is accomplised by integer multiplication.
+//			It is proved that X_1 indeed always begin
+//			with 1.0000 in fixed point.
+//
+//
+//	Define		A_2 := 1.0 0 0 0 d_5 d_6 d_7 d_8. This is X_1 
+//			truncated to lsb = 2^(-8). Similar to A_1,
+//			A_2 is not needed in actual implementation. It
+//			helps explain how some of the values are defined.
+//
+//	Define		index_2 := [ d_5 d_6 d_7 d_8 ].
+//
+//	Fetch 		Z_2 := (1/A_2) rounded UP in fixed point with
+//	fixed point	lsb = 2^(-15). Fetch done using index_2.
+//			Z_2 looks like z_0.z_1 z_2 ... z_15
+//
+//	Fetch		G_2 := (1/A_2) truncated to 21 sig. bits.
+//	floating pt.
+//
+//	Calculate	X_2 := X_1 * Z_2 truncated to lsb = 2^(-14)
+//			     = 1.0 0 0 0 0 0 0 0 d_9 d_10 ... d_14
+//			This is accomplised by integer multiplication.
+//			It is proved that X_2 indeed always begin
+//			with 1.00000000 in fixed point.
+//
+//
+//	Define		A_3 := 1.0 0 0 0 0 0 0 0 d_9 d_10 d_11 d_12 d_13 1.
+//			This is 2^(-14) + X_2 truncated to lsb = 2^(-13).
+//
+//	Define		index_3 := [ d_9 d_10 d_11 d_12 d_13 ].
+//
+//	Fetch		G_3 := (1/A_3) truncated to 21 sig. bits.
+//	floating pt.	Fetch is done using index_3.
+//
+//	Compute		G := G_1 * G_2 * G_3. 
+//
+//	This is done exactly since each of G_j only has 21 sig. bits.
+//
+//	Compute   
 //
-// General registers used:
-// r8  -> r11
-// r14 -> r22
+//		r := (G*S_hi - 1) + G*S_lo   using 2 FMA operations.
+//
+//	thus, r approximates G*(S_hi+S_lo) - 1 to within a couple of 
+//	rounding errors.
+//
+//
+//  Step 2. Approximation
+//  ---------------------
+//
+//   This step computes an approximation to log( 1 + r ) where r is the
+//   reduced argument just obtained. It is proved that |r| <= 1.9*2^(-13);
+//   thus log(1+r) can be approximated by a short polynomial:
+//
+//	log(1+r) ~=~ poly = r + Q1 r^2 + ... + Q4 r^5
+//
+//
+//  Step 3. Reconstruction
+//  ----------------------
+//
+//   This step computes the desired result of log(X+E):
+//
+//	log(X+E)  =   log( 2^N * (S_hi + S_lo) )
+//		  =   N*log(2) + log( S_hi + S_lo )
+//		  =   N*log(2) + log(1/G) +
+//		      log(1 + C*(S_hi+S_lo) - 1 )
+//
+//   log(2), log(1/G_j) are stored as pairs of (single,double) numbers:
+//   log2_hi, log2_lo, log1byGj_hi, log1byGj_lo. The high parts are
+//   single-precision numbers and the low parts are double precision
+//   numbers. These have the property that
+//
+//	N*log2_hi + SUM ( log1byGj_hi )
+//
+//   is computable exactly in double-extended precision (64 sig. bits).
+//   Finally
+//
+//	Y_hi := N*log2_hi + SUM ( log1byGj_hi )
+//	Y_lo := poly_hi + [ poly_lo + 
+//	        ( SUM ( log1byGj_lo ) + N*log2_lo ) ]
+//      set lsb(Y_lo) to be 1
 //
-// Predicate registers used:
-// p6 -> p12
 
-// Assembly macros
-//==============================================================
-GR_TAG                 = r8
-GR_ad_T                = r9
-GR_Exp                 = r10
-GR_N                   = r11
+#include "libm_support.h"
 
-GR_signexp_x           = r14
-GR_exp_mask            = r15
-GR_exp_bias            = r16
-GR_05                  = r17
-GR_A3                  = r18
-GR_Sig                 = r19
-GR_Ind                 = r19
-GR_exp_x               = r20
-GR_Ln2                 = r21
-GR_025                 = r22
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
 
+// P_7, P_6, P_5, P_4, P_3, P_2, and P_1 
 
-GR_SAVE_B0             = r33
-GR_SAVE_PFS            = r34
-GR_SAVE_GP             = r35
-GR_SAVE_SP             = r36
+.align 64
+Constants_P:
+ASM_TYPE_DIRECTIVE(Constants_P,@object)
+data4  0xEFD62B15,0xE3936754,0x00003FFB,0x00000000
+data4  0xA5E56381,0x8003B271,0x0000BFFC,0x00000000
+data4  0x73282DB0,0x9249248C,0x00003FFC,0x00000000
+data4  0x47305052,0xAAAAAA9F,0x0000BFFC,0x00000000
+data4  0xCCD17FC9,0xCCCCCCCC,0x00003FFC,0x00000000
+data4  0x00067ED5,0x80000000,0x0000BFFD,0x00000000
+data4  0xAAAAAAAA,0xAAAAAAAA,0x00003FFD,0x00000000
+data4  0xFFFFFFFE,0xFFFFFFFF,0x0000BFFD,0x00000000
+ASM_SIZE_DIRECTIVE(Constants_P)
+ 
+// log2_hi, log2_lo, Q_4, Q_3, Q_2, and Q_1 
 
-GR_Parameter_X         = r37
-GR_Parameter_Y         = r38
-GR_Parameter_RESULT    = r39
-GR_Parameter_TAG       = r40
+.align 64
+Constants_Q:
+ASM_TYPE_DIRECTIVE(Constants_Q,@object)
+data4  0x00000000,0xB1721800,0x00003FFE,0x00000000 
+data4  0x4361C4C6,0x82E30865,0x0000BFE2,0x00000000
+data4  0x328833CB,0xCCCCCAF2,0x00003FFC,0x00000000
+data4  0xA9D4BAFB,0x80000077,0x0000BFFD,0x00000000
+data4  0xAAABE3D2,0xAAAAAAAA,0x00003FFD,0x00000000
+data4  0xFFFFDAB7,0xFFFFFFFF,0x0000BFFD,0x00000000
+ASM_SIZE_DIRECTIVE(Constants_Q)
+ 
+// Z1 - 16 bit fixed, G1 and H1 - IEEE single 
+ 
+.align 64
+Constants_Z_G_H_h1:
+ASM_TYPE_DIRECTIVE(Constants_Z_G_H_h1,@object)
+data4  0x00008000,0x3F800000,0x00000000,0x00000000,0x00000000,0x00000000
+data4  0x00007879,0x3F70F0F0,0x3D785196,0x00000000,0x617D741C,0x3DA163A6
+data4  0x000071C8,0x3F638E38,0x3DF13843,0x00000000,0xCBD3D5BB,0x3E2C55E6
+data4  0x00006BCB,0x3F579430,0x3E2FF9A0,0x00000000,0xD86EA5E7,0xBE3EB0BF
+data4  0x00006667,0x3F4CCCC8,0x3E647FD6,0x00000000,0x86B12760,0x3E2E6A8C
+data4  0x00006187,0x3F430C30,0x3E8B3AE7,0x00000000,0x5C0739BA,0x3E47574C
+data4  0x00005D18,0x3F3A2E88,0x3EA30C68,0x00000000,0x13E8AF2F,0x3E20E30F
+data4  0x0000590C,0x3F321640,0x3EB9CEC8,0x00000000,0xF2C630BD,0xBE42885B
+data4  0x00005556,0x3F2AAAA8,0x3ECF9927,0x00000000,0x97E577C6,0x3E497F34
+data4  0x000051EC,0x3F23D708,0x3EE47FC5,0x00000000,0xA6B0A5AB,0x3E3E6A6E
+data4  0x00004EC5,0x3F1D89D8,0x3EF8947D,0x00000000,0xD328D9BE,0xBDF43E3C
+data4  0x00004BDB,0x3F17B420,0x3F05F3A1,0x00000000,0x0ADB090A,0x3E4094C3
+data4  0x00004925,0x3F124920,0x3F0F4303,0x00000000,0xFC1FE510,0xBE28FBB2
+data4  0x0000469F,0x3F0D3DC8,0x3F183EBF,0x00000000,0x10FDE3FA,0x3E3A7895
+data4  0x00004445,0x3F088888,0x3F20EC80,0x00000000,0x7CC8C98F,0x3E508CE5
+data4  0x00004211,0x3F042108,0x3F29516A,0x00000000,0xA223106C,0xBE534874
+ASM_SIZE_DIRECTIVE(Constants_Z_G_H_h1)
+ 
+// Z2 - 16 bit fixed, G2 and H2 - IEEE single 
 
+.align 64 
+Constants_Z_G_H_h2:
+ASM_TYPE_DIRECTIVE(Constants_Z_G_H_h2,@object)
+data4  0x00008000,0x3F800000,0x00000000,0x00000000,0x00000000,0x00000000
+data4  0x00007F81,0x3F7F00F8,0x3B7F875D,0x00000000,0x22C42273,0x3DB5A116
+data4  0x00007F02,0x3F7E03F8,0x3BFF015B,0x00000000,0x21F86ED3,0x3DE620CF
+data4  0x00007E85,0x3F7D08E0,0x3C3EE393,0x00000000,0x484F34ED,0xBDAFA07E
+data4  0x00007E08,0x3F7C0FC0,0x3C7E0586,0x00000000,0x3860BCF6,0xBDFE07F0
+data4  0x00007D8D,0x3F7B1880,0x3C9E75D2,0x00000000,0xA78093D6,0x3DEA370F
+data4  0x00007D12,0x3F7A2328,0x3CBDC97A,0x00000000,0x72A753D0,0x3DFF5791
+data4  0x00007C98,0x3F792FB0,0x3CDCFE47,0x00000000,0xA7EF896B,0x3DFEBE6C
+data4  0x00007C20,0x3F783E08,0x3CFC15D0,0x00000000,0x409ECB43,0x3E0CF156
+data4  0x00007BA8,0x3F774E38,0x3D0D874D,0x00000000,0xFFEF71DF,0xBE0B6F97
+data4  0x00007B31,0x3F766038,0x3D1CF49B,0x00000000,0x5D59EEE8,0xBE080483
+data4  0x00007ABB,0x3F757400,0x3D2C531D,0x00000000,0xA9192A74,0x3E1F91E9
+data4  0x00007A45,0x3F748988,0x3D3BA322,0x00000000,0xBF72A8CD,0xBE139A06
+data4  0x000079D1,0x3F73A0D0,0x3D4AE46F,0x00000000,0xF8FBA6CF,0x3E1D9202
+data4  0x0000795D,0x3F72B9D0,0x3D5A1756,0x00000000,0xBA796223,0xBE1DCCC4
+data4  0x000078EB,0x3F71D488,0x3D693B9D,0x00000000,0xB6B7C239,0xBE049391
+ASM_SIZE_DIRECTIVE(Constants_Z_G_H_h2)
+ 
+// G3 and H3 - IEEE single and h3 -IEEE double 
 
+.align 64 
+Constants_Z_G_H_h3:
+ASM_TYPE_DIRECTIVE(Constants_Z_G_H_h3,@object)
+data4  0x3F7FFC00,0x38800100,0x562224CD,0x3D355595
+data4  0x3F7FF400,0x39400480,0x06136FF6,0x3D8200A2
+data4  0x3F7FEC00,0x39A00640,0xE8DE9AF0,0x3DA4D68D
+data4  0x3F7FE400,0x39E00C41,0xB10238DC,0xBD8B4291
+data4  0x3F7FDC00,0x3A100A21,0x3B1952CA,0xBD89CCB8
+data4  0x3F7FD400,0x3A300F22,0x1DC46826,0xBDB10707
+data4  0x3F7FCC08,0x3A4FF51C,0xF43307DB,0x3DB6FCB9
+data4  0x3F7FC408,0x3A6FFC1D,0x62DC7872,0xBD9B7C47
+data4  0x3F7FBC10,0x3A87F20B,0x3F89154A,0xBDC3725E
+data4  0x3F7FB410,0x3A97F68B,0x62B9D392,0xBD93519D
+data4  0x3F7FAC18,0x3AA7EB86,0x0F21BD9D,0x3DC18441
+data4  0x3F7FA420,0x3AB7E101,0x2245E0A6,0xBDA64B95
+data4  0x3F7F9C20,0x3AC7E701,0xAABB34B8,0x3DB4B0EC
+data4  0x3F7F9428,0x3AD7DD7B,0x6DC40A7E,0x3D992337
+data4  0x3F7F8C30,0x3AE7D474,0x4F2083D3,0x3DC6E17B
+data4  0x3F7F8438,0x3AF7CBED,0x811D4394,0x3DAE314B
+data4  0x3F7F7C40,0x3B03E1F3,0xB08F2DB1,0xBDD46F21
+data4  0x3F7F7448,0x3B0BDE2F,0x6D34522B,0xBDDC30A4
+data4  0x3F7F6C50,0x3B13DAAA,0xB1F473DB,0x3DCB0070
+data4  0x3F7F6458,0x3B1BD766,0x6AD282FD,0xBDD65DDC
+data4  0x3F7F5C68,0x3B23CC5C,0xF153761A,0xBDCDAB83
+data4  0x3F7F5470,0x3B2BC997,0x341D0F8F,0xBDDADA40
+data4  0x3F7F4C78,0x3B33C711,0xEBC394E8,0x3DCD1BD7
+data4  0x3F7F4488,0x3B3BBCC6,0x52E3E695,0xBDC3532B
+data4  0x3F7F3C90,0x3B43BAC0,0xE846B3DE,0xBDA3961E
+data4  0x3F7F34A0,0x3B4BB0F4,0x785778D4,0xBDDADF06
+data4  0x3F7F2CA8,0x3B53AF6D,0xE55CE212,0x3DCC3ED1
+data4  0x3F7F24B8,0x3B5BA620,0x9E382C15,0xBDBA3103
+data4  0x3F7F1CC8,0x3B639D12,0x5C5AF197,0x3D635A0B
+data4  0x3F7F14D8,0x3B6B9444,0x71D34EFC,0xBDDCCB19
+data4  0x3F7F0CE0,0x3B7393BC,0x52CD7ADA,0x3DC74502
+data4  0x3F7F04F0,0x3B7B8B6D,0x7D7F2A42,0xBDB68F17
+ASM_SIZE_DIRECTIVE(Constants_Z_G_H_h3)
+ 
+// 
+//  Exponent Thresholds and Tiny Thresholds
+//  for 8, 11, 15, and 17 bit exponents
+// 
+//  Expo_Range             Value
+// 
+//  0 (8  bits)            2^(-126)
+//  1 (11 bits)            2^(-1022)
+//  2 (15 bits)            2^(-16382)
+//  3 (17 bits)            2^(-16382)
+// 
+//  Tiny_Table
+//  ----------
+//  Expo_Range             Value
+// 
+//  0 (8  bits)            2^(-16382)
+//  1 (11 bits)            2^(-16382)
+//  2 (15 bits)            2^(-16382)
+//  3 (17 bits)            2^(-16382)
+// 
 
-FR_NormX               = f7
-FR_RcpX                = f9
-FR_r                   = f10
-FR_r2                  = f11
-FR_r4                  = f12
-FR_N                   = f13
-FR_Ln2                 = f14
-FR_Xp1                 = f15
+.align 64 
+Constants_Threshold:
+ASM_TYPE_DIRECTIVE(Constants_Threshold,@object)
+data4  0x00000000,0x80000000,0x00003F81,0x00000000
+data4  0x00000000,0x80000000,0x00000001,0x00000000
+data4  0x00000000,0x80000000,0x00003C01,0x00000000
+data4  0x00000000,0x80000000,0x00000001,0x00000000
+data4  0x00000000,0x80000000,0x00000001,0x00000000
+data4  0x00000000,0x80000000,0x00000001,0x00000000
+data4  0x00000000,0x80000000,0x00000001,0x00000000
+data4  0x00000000,0x80000000,0x00000001,0x00000000
+ASM_SIZE_DIRECTIVE(Constants_Threshold)
 
-FR_A4                  = f33
-FR_A3                  = f34
-FR_A2                  = f35
+.align 64
+Constants_1_by_LN10:
+ASM_TYPE_DIRECTIVE(Constants_1_by_LN10,@object)
+data4  0x37287195,0xDE5BD8A9,0x00003FFD,0x00000000
+data4  0xACCF70C8,0xD56EAABE,0x00003FBD,0x00000000
+ASM_SIZE_DIRECTIVE(Constants_1_by_LN10)
 
-FR_T                   = f36
-FR_NxLn2pT             = f36
+FR_Input_X = f8 
+FR_Neg_One = f9
+FR_E       = f33
+FR_Em1     = f34
+FR_Y_hi    = f34  
+// Shared with Em1
+FR_Y_lo    = f35
+FR_Scale   = f36
+FR_X_Prime = f37 
+FR_Z       = f38 
+FR_S_hi    = f38  
+// Shared with Z  
+FR_W       = f39
+FR_G       = f40
+FR_wsq     = f40 
+// Shared with G 
+FR_H       = f41
+FR_w4      = f41
+// Shared with H  
+FR_h       = f42
+FR_w6      = f42  
+// Shared with h     
+FR_G_tmp   = f43
+FR_poly_lo = f43
+// Shared with G_tmp 
+FR_P8      = f43  
+// Shared with G_tmp 
+FR_H_tmp   = f44
+FR_poly_hi = f44
+  // Shared with H_tmp
+FR_P7      = f44  
+// Shared with H_tmp
+FR_h_tmp   = f45 
+FR_rsq     = f45  
+// Shared with h_tmp
+FR_P6      = f45
+// Shared with h_tmp
+FR_abs_W   = f46
+FR_r       = f46  
+// Shared with abs_W  
+FR_AA      = f47 
+FR_log2_hi = f47  
+// Shared with AA  
+FR_BB          = f48
+FR_log2_lo     = f48  
+// Shared with BB  
+FR_S_lo        = f49 
+FR_two_negN    = f50  
+FR_float_N     = f51 
+FR_Q4          = f52 
+FR_dummy       = f52  
+// Shared with Q4
+FR_P4          = f52  
+// Shared with Q4
+FR_Threshold    = f52
+// Shared with Q4
+FR_Q3          = f53  
+FR_P3          = f53  
+// Shared with Q3
+FR_Tiny        = f53  
+// Shared with Q3
+FR_Q2          = f54 
+FR_P2          = f54  
+// Shared with Q2
+FR_1LN10_hi     = f54 
+// Shared with Q2
+FR_Q1           = f55 
+FR_P1           = f55 
+// Shared with Q1 
+FR_1LN10_lo     = f55 
+// Shared with Q1 
+FR_P5           = f98 
+FR_SCALE        = f98 
+FR_Output_X_tmp = f99 
 
+GR_Expo_Range   = r32
+GR_Table_Base   = r34
+GR_Table_Base1  = r35
+GR_Table_ptr    = r36 
+GR_Index2       = r37 
+GR_signif       = r38 
+GR_X_0          = r39 
+GR_X_1          = r40 
+GR_X_2          = r41 
+GR_Z_1          = r42 
+GR_Z_2          = r43 
+GR_N            = r44 
+GR_Bias         = r45 
+GR_M            = r46 
+GR_ScaleN       = r47  
+GR_Index3       = r48 
+GR_Perturb      = r49 
+GR_Table_Scale  = r50 
 
 
-FR_Y                   = f1
-FR_X                   = f10
-FR_RESULT              = f8
+GR_SAVE_PFS     = r51
+GR_SAVE_B0      = r52
+GR_SAVE_GP      = r53
 
+GR_Parameter_X       = r54
+GR_Parameter_Y       = r55
+GR_Parameter_RESULT  = r56
+
+GR_Parameter_TAG = r57 
 
-// Data
-//==============================================================
-RODATA
-.align 16
-
-LOCAL_OBJECT_START(log_data)
-// ln(1/frcpa(1+i/256)), i=0...255
-data8 0x3F60040155D5889E // 0
-data8 0x3F78121214586B54 // 1
-data8 0x3F841929F96832F0 // 2
-data8 0x3F8C317384C75F06 // 3
-data8 0x3F91A6B91AC73386 // 4
-data8 0x3F95BA9A5D9AC039 // 5
-data8 0x3F99D2A8074325F4 // 6
-data8 0x3F9D6B2725979802 // 7
-data8 0x3FA0C58FA19DFAAA // 8
-data8 0x3FA2954C78CBCE1B // 9
-data8 0x3FA4A94D2DA96C56 // 10
-data8 0x3FA67C94F2D4BB58 // 11
-data8 0x3FA85188B630F068 // 12
-data8 0x3FAA6B8ABE73AF4C // 13
-data8 0x3FAC441E06F72A9E // 14
-data8 0x3FAE1E6713606D07 // 15
-data8 0x3FAFFA6911AB9301 // 16
-data8 0x3FB0EC139C5DA601 // 17
-data8 0x3FB1DBD2643D190B // 18
-data8 0x3FB2CC7284FE5F1C // 19
-data8 0x3FB3BDF5A7D1EE64 // 20
-data8 0x3FB4B05D7AA012E0 // 21
-data8 0x3FB580DB7CEB5702 // 22
-data8 0x3FB674F089365A7A // 23
-data8 0x3FB769EF2C6B568D // 24
-data8 0x3FB85FD927506A48 // 25
-data8 0x3FB9335E5D594989 // 26
-data8 0x3FBA2B0220C8E5F5 // 27
-data8 0x3FBB0004AC1A86AC // 28
-data8 0x3FBBF968769FCA11 // 29
-data8 0x3FBCCFEDBFEE13A8 // 30
-data8 0x3FBDA727638446A2 // 31
-data8 0x3FBEA3257FE10F7A // 32
-data8 0x3FBF7BE9FEDBFDE6 // 33
-data8 0x3FC02AB352FF25F4 // 34
-data8 0x3FC097CE579D204D // 35
-data8 0x3FC1178E8227E47C // 36
-data8 0x3FC185747DBECF34 // 37
-data8 0x3FC1F3B925F25D41 // 38
-data8 0x3FC2625D1E6DDF57 // 39
-data8 0x3FC2D1610C86813A // 40
-data8 0x3FC340C59741142E // 41
-data8 0x3FC3B08B6757F2A9 // 42
-data8 0x3FC40DFB08378003 // 43
-data8 0x3FC47E74E8CA5F7C // 44
-data8 0x3FC4EF51F6466DE4 // 45
-data8 0x3FC56092E02BA516 // 46
-data8 0x3FC5D23857CD74D5 // 47
-data8 0x3FC6313A37335D76 // 48
-data8 0x3FC6A399DABBD383 // 49
-data8 0x3FC70337DD3CE41B // 50
-data8 0x3FC77654128F6127 // 51
-data8 0x3FC7E9D82A0B022D // 52
-data8 0x3FC84A6B759F512F // 53
-data8 0x3FC8AB47D5F5A310 // 54
-data8 0x3FC91FE49096581B // 55
-data8 0x3FC981634011AA75 // 56
-data8 0x3FC9F6C407089664 // 57
-data8 0x3FCA58E729348F43 // 58
-data8 0x3FCABB55C31693AD // 59
-data8 0x3FCB1E104919EFD0 // 60
-data8 0x3FCB94EE93E367CB // 61
-data8 0x3FCBF851C067555F // 62
-data8 0x3FCC5C0254BF23A6 // 63
-data8 0x3FCCC000C9DB3C52 // 64
-data8 0x3FCD244D99C85674 // 65
-data8 0x3FCD88E93FB2F450 // 66
-data8 0x3FCDEDD437EAEF01 // 67
-data8 0x3FCE530EFFE71012 // 68
-data8 0x3FCEB89A1648B971 // 69
-data8 0x3FCF1E75FADF9BDE // 70
-data8 0x3FCF84A32EAD7C35 // 71
-data8 0x3FCFEB2233EA07CD // 72
-data8 0x3FD028F9C7035C1C // 73
-data8 0x3FD05C8BE0D9635A // 74
-data8 0x3FD085EB8F8AE797 // 75
-data8 0x3FD0B9C8E32D1911 // 76
-data8 0x3FD0EDD060B78081 // 77
-data8 0x3FD122024CF0063F // 78
-data8 0x3FD14BE2927AECD4 // 79
-data8 0x3FD180618EF18ADF // 80
-data8 0x3FD1B50BBE2FC63B // 81
-data8 0x3FD1DF4CC7CF242D // 82
-data8 0x3FD214456D0EB8D4 // 83
-data8 0x3FD23EC5991EBA49 // 84
-data8 0x3FD2740D9F870AFB // 85
-data8 0x3FD29ECDABCDFA04 // 86
-data8 0x3FD2D46602ADCCEE // 87
-data8 0x3FD2FF66B04EA9D4 // 88
-data8 0x3FD335504B355A37 // 89
-data8 0x3FD360925EC44F5D // 90
-data8 0x3FD38BF1C3337E75 // 91
-data8 0x3FD3C25277333184 // 92
-data8 0x3FD3EDF463C1683E // 93
-data8 0x3FD419B423D5E8C7 // 94
-data8 0x3FD44591E0539F49 // 95
-data8 0x3FD47C9175B6F0AD // 96
-data8 0x3FD4A8B341552B09 // 97
-data8 0x3FD4D4F3908901A0 // 98
-data8 0x3FD501528DA1F968 // 99
-data8 0x3FD52DD06347D4F6 // 100
-data8 0x3FD55A6D3C7B8A8A // 101
-data8 0x3FD5925D2B112A59 // 102
-data8 0x3FD5BF406B543DB2 // 103
-data8 0x3FD5EC433D5C35AE // 104
-data8 0x3FD61965CDB02C1F // 105
-data8 0x3FD646A84935B2A2 // 106
-data8 0x3FD6740ADD31DE94 // 107
-data8 0x3FD6A18DB74A58C5 // 108
-data8 0x3FD6CF31058670EC // 109
-data8 0x3FD6F180E852F0BA // 110
-data8 0x3FD71F5D71B894F0 // 111
-data8 0x3FD74D5AEFD66D5C // 112
-data8 0x3FD77B79922BD37E // 113
-data8 0x3FD7A9B9889F19E2 // 114
-data8 0x3FD7D81B037EB6A6 // 115
-data8 0x3FD8069E33827231 // 116
-data8 0x3FD82996D3EF8BCB // 117
-data8 0x3FD85855776DCBFB // 118
-data8 0x3FD8873658327CCF // 119
-data8 0x3FD8AA75973AB8CF // 120
-data8 0x3FD8D992DC8824E5 // 121
-data8 0x3FD908D2EA7D9512 // 122
-data8 0x3FD92C59E79C0E56 // 123
-data8 0x3FD95BD750EE3ED3 // 124
-data8 0x3FD98B7811A3EE5B // 125
-data8 0x3FD9AF47F33D406C // 126
-data8 0x3FD9DF270C1914A8 // 127
-data8 0x3FDA0325ED14FDA4 // 128
-data8 0x3FDA33440224FA79 // 129
-data8 0x3FDA57725E80C383 // 130
-data8 0x3FDA87D0165DD199 // 131
-data8 0x3FDAAC2E6C03F896 // 132
-data8 0x3FDADCCC6FDF6A81 // 133
-data8 0x3FDB015B3EB1E790 // 134
-data8 0x3FDB323A3A635948 // 135
-data8 0x3FDB56FA04462909 // 136
-data8 0x3FDB881AA659BC93 // 137
-data8 0x3FDBAD0BEF3DB165 // 138
-data8 0x3FDBD21297781C2F // 139
-data8 0x3FDC039236F08819 // 140
-data8 0x3FDC28CB1E4D32FD // 141
-data8 0x3FDC4E19B84723C2 // 142
-data8 0x3FDC7FF9C74554C9 // 143
-data8 0x3FDCA57B64E9DB05 // 144
-data8 0x3FDCCB130A5CEBB0 // 145
-data8 0x3FDCF0C0D18F326F // 146
-data8 0x3FDD232075B5A201 // 147
-data8 0x3FDD490246DEFA6B // 148
-data8 0x3FDD6EFA918D25CD // 149
-data8 0x3FDD9509707AE52F // 150
-data8 0x3FDDBB2EFE92C554 // 151
-data8 0x3FDDEE2F3445E4AF // 152
-data8 0x3FDE148A1A2726CE // 153
-data8 0x3FDE3AFC0A49FF40 // 154
-data8 0x3FDE6185206D516E // 155
-data8 0x3FDE882578823D52 // 156
-data8 0x3FDEAEDD2EAC990C // 157
-data8 0x3FDED5AC5F436BE3 // 158
-data8 0x3FDEFC9326D16AB9 // 159
-data8 0x3FDF2391A2157600 // 160
-data8 0x3FDF4AA7EE03192D // 161
-data8 0x3FDF71D627C30BB0 // 162
-data8 0x3FDF991C6CB3B379 // 163
-data8 0x3FDFC07ADA69A910 // 164
-data8 0x3FDFE7F18EB03D3E // 165
-data8 0x3FE007C053C5002E // 166
-data8 0x3FE01B942198A5A1 // 167
-data8 0x3FE02F74400C64EB // 168
-data8 0x3FE04360BE7603AD // 169
-data8 0x3FE05759AC47FE34 // 170
-data8 0x3FE06B5F1911CF52 // 171
-data8 0x3FE078BF0533C568 // 172
-data8 0x3FE08CD9687E7B0E // 173
-data8 0x3FE0A10074CF9019 // 174
-data8 0x3FE0B5343A234477 // 175
-data8 0x3FE0C974C89431CE // 176
-data8 0x3FE0DDC2305B9886 // 177
-data8 0x3FE0EB524BAFC918 // 178
-data8 0x3FE0FFB54213A476 // 179
-data8 0x3FE114253DA97D9F // 180
-data8 0x3FE128A24F1D9AFF // 181
-data8 0x3FE1365252BF0865 // 182
-data8 0x3FE14AE558B4A92D // 183
-data8 0x3FE15F85A19C765B // 184
-data8 0x3FE16D4D38C119FA // 185
-data8 0x3FE18203C20DD133 // 186
-data8 0x3FE196C7BC4B1F3B // 187
-data8 0x3FE1A4A738B7A33C // 188
-data8 0x3FE1B981C0C9653D // 189
-data8 0x3FE1CE69E8BB106B // 190
-data8 0x3FE1DC619DE06944 // 191
-data8 0x3FE1F160A2AD0DA4 // 192
-data8 0x3FE2066D7740737E // 193
-data8 0x3FE2147DBA47A394 // 194
-data8 0x3FE229A1BC5EBAC3 // 195
-data8 0x3FE237C1841A502E // 196
-data8 0x3FE24CFCE6F80D9A // 197
-data8 0x3FE25B2C55CD5762 // 198
-data8 0x3FE2707F4D5F7C41 // 199
-data8 0x3FE285E0842CA384 // 200
-data8 0x3FE294294708B773 // 201
-data8 0x3FE2A9A2670AFF0C // 202
-data8 0x3FE2B7FB2C8D1CC1 // 203
-data8 0x3FE2C65A6395F5F5 // 204
-data8 0x3FE2DBF557B0DF43 // 205
-data8 0x3FE2EA64C3F97655 // 206
-data8 0x3FE3001823684D73 // 207
-data8 0x3FE30E97E9A8B5CD // 208
-data8 0x3FE32463EBDD34EA // 209
-data8 0x3FE332F4314AD796 // 210
-data8 0x3FE348D90E7464D0 // 211
-data8 0x3FE35779F8C43D6E // 212
-data8 0x3FE36621961A6A99 // 213
-data8 0x3FE37C299F3C366A // 214
-data8 0x3FE38AE2171976E7 // 215
-data8 0x3FE399A157A603E7 // 216
-data8 0x3FE3AFCCFE77B9D1 // 217
-data8 0x3FE3BE9D503533B5 // 218
-data8 0x3FE3CD7480B4A8A3 // 219
-data8 0x3FE3E3C43918F76C // 220
-data8 0x3FE3F2ACB27ED6C7 // 221
-data8 0x3FE4019C2125CA93 // 222
-data8 0x3FE4181061389722 // 223
-data8 0x3FE42711518DF545 // 224
-data8 0x3FE436194E12B6BF // 225
-data8 0x3FE445285D68EA69 // 226
-data8 0x3FE45BCC464C893A // 227
-data8 0x3FE46AED21F117FC // 228
-data8 0x3FE47A1527E8A2D3 // 229
-data8 0x3FE489445EFFFCCC // 230
-data8 0x3FE4A018BCB69835 // 231
-data8 0x3FE4AF5A0C9D65D7 // 232
-data8 0x3FE4BEA2A5BDBE87 // 233
-data8 0x3FE4CDF28F10AC46 // 234
-data8 0x3FE4DD49CF994058 // 235
-data8 0x3FE4ECA86E64A684 // 236
-data8 0x3FE503C43CD8EB68 // 237
-data8 0x3FE513356667FC57 // 238
-data8 0x3FE522AE0738A3D8 // 239
-data8 0x3FE5322E26867857 // 240
-data8 0x3FE541B5CB979809 // 241
-data8 0x3FE55144FDBCBD62 // 242
-data8 0x3FE560DBC45153C7 // 243
-data8 0x3FE5707A26BB8C66 // 244
-data8 0x3FE587F60ED5B900 // 245
-data8 0x3FE597A7977C8F31 // 246
-data8 0x3FE5A760D634BB8B // 247
-data8 0x3FE5B721D295F10F // 248
-data8 0x3FE5C6EA94431EF9 // 249
-data8 0x3FE5D6BB22EA86F6 // 250
-data8 0x3FE5E6938645D390 // 251
-data8 0x3FE5F673C61A2ED2 // 252
-data8 0x3FE6065BEA385926 // 253
-data8 0x3FE6164BFA7CC06B // 254
-data8 0x3FE62643FECF9743 // 255
-LOCAL_OBJECT_END(log_data)
-
-
-// Code
-//==============================================================
 
 .section .text
-GLOBAL_IEEE754_ENTRY(log1pf)
+.proc log1pf#
+.global log1pf#
+.align 64 
+log1pf:
+#ifdef _LIBC
+.global __log1pf
+__log1pf:
+#endif
+
 { .mfi
-      getf.exp      GR_signexp_x = f8 // if x is unorm then must recompute
-      fadd.s1       FR_Xp1 = f8, f1       // Form 1+x
-      mov           GR_05 = 0xfffe
+alloc r32 = ar.pfs,0,22,4,0
+(p0)  fsub.s1 FR_Neg_One = f0,f1 
+(p0)  cmp.eq.unc  p7, p0 = r0, r0 
 }
-{ .mlx
-      addl          GR_ad_T = @ltoff(log_data),gp
-      movl          GR_A3 = 0x3fd5555555555555 // double precision memory
-                                               // representation of A3
+
+{ .mfi
+(p0)  cmp.ne.unc  p14, p0 = r0, r0 
+(p0)  fnorm.s1 FR_X_Prime = FR_Input_X 
+(p0)  cmp.eq.unc  p15, p0 = r0, r0 ;; 
 }
-;;
 
 { .mfi
-      ld8           GR_ad_T = [GR_ad_T]
-      fclass.m      p8,p0 = f8,0xb // Is x unorm?
-      mov           GR_exp_mask = 0x1ffff
+      nop.m 999
+(p0)  fclass.m.unc p6, p0 =  FR_Input_X, 0x1E3 
+      nop.i 999
 }
+;;
+
 { .mfi
-      mov           GR_025 = 0xfffd            // Exponent of 0.25
-      fnorm.s1      FR_NormX = f8              // Normalize x
-      mov           GR_exp_bias = 0xffff
+	nop.m 999
+(p0)  fclass.nm.unc p10, p0 =  FR_Input_X, 0x1FF 
+      nop.i 999
 }
 ;;
 
 { .mfi
-      setf.exp      FR_A2 = GR_05 // create A2 = 0.5
-      fclass.m      p9,p0 = f8,0x1E1 // is x NaN, NaT or +Inf?
-      nop.i         0
+	nop.m 999
+(p0)  fcmp.eq.unc.s1 p9, p0 =  FR_Input_X, f0 
+      nop.i 999
 }
-{ .mib
-      setf.d        FR_A3 = GR_A3 // create A3
-      nop.i         0
-(p8)  br.cond.spnt  log1p_unorm          // Branch if x=unorm
+
+{ .mfi
+	nop.m 999
+(p0)  fadd FR_Em1 = f0,f0 
+	nop.i 999 ;;
 }
-;;
 
-log1p_common:
 { .mfi
-      setf.exp      FR_A4 = GR_025 // create A4 = 0.25
-      frcpa.s1      FR_RcpX,p0 = f1,FR_Xp1
-      nop.i         0
+	nop.m 999
+(p0)  fadd FR_E = f0,f1 
+	nop.i 999 ;;
 }
-{ .mfb
-      nop.m         0
-(p9)  fma.s.s0      f8 = f8,f1,f0 // set V-flag
-(p9)  br.ret.spnt   b0 // exit for NaN, NaT and +Inf
+
+{ .mfi
+	nop.m 999
+(p0)  fcmp.eq.unc.s1 p8, p0 =  FR_Input_X, FR_Neg_One 
+	nop.i 999
+}
+
+{ .mfi
+	nop.m 999
+(p0)  fcmp.lt.unc.s1 p13, p0 =  FR_Input_X, FR_Neg_One 
+	nop.i 999
 }
-;;
+
+
+L(LOG_BEGIN): 
 
 { .mfi
-      getf.exp      GR_Exp = FR_Xp1            // signexp of x+1
-      fclass.m      p10,p0 = FR_Xp1,0x3A // is 1+x < 0?
-      and           GR_exp_x = GR_exp_mask, GR_signexp_x // biased exponent of x
+	nop.m 999
+(p0)  fadd.s1 FR_Z = FR_X_Prime, FR_E 
+	nop.i 999
 }
+
 { .mlx
-      nop.m         0
-      movl          GR_Ln2 = 0x3FE62E42FEFA39EF // double precision memory
-                                                // representation of log(2)
+	nop.m 999
+(p0)  movl GR_Table_Scale = 0x0000000000000018 ;; 
+}
+
+{ .mmi
+	nop.m 999
+//     
+//    Create E = 1 and Em1 = 0 
+//    Check for X == 0, meaning log(1+0)
+//    Check for X < -1, meaning log(negative)
+//    Check for X == -1, meaning log(0)
+//    Normalize x 
+//    Identify NatVals, NaNs, Infs. 
+//    Identify EM unsupporteds. 
+//    Identify Negative values - us S1 so as
+//    not to raise denormal operand exception 
+//    Set p15 to true for log1pf
+//    Set p14 to false for log1pf
+//    Set p7 true for log and log1pf
+//    
+(p0)  addl GR_Table_Base = @ltoff(Constants_Z_G_H_h1#),gp
+      nop.i  999
 }
-;;
 
 { .mfi
-      getf.sig      GR_Sig = FR_Xp1 // get significand to calculate index
-                                    // for T if |x| >= 2^-8
-      fcmp.eq.s1    p12,p0 = f8,f0     // is x equal to 0?
-      sub           GR_exp_x = GR_exp_x, GR_exp_bias // true exponent of x
+	nop.m 999
+(p0)  fmax.s1 FR_AA = FR_X_Prime, FR_E 
+	nop.i 999 ;;
 }
-;;
 
 { .mfi
-      sub           GR_N = GR_Exp,GR_exp_bias // true exponent of x+1
-      fcmp.eq.s1    p11,p0 = FR_Xp1,f0     // is x = -1?
-      cmp.gt        p6,p7 = -8, GR_exp_x  // Is |x| < 2^-8
+      ld8    GR_Table_Base = [GR_Table_Base]
+(p0)  fmin.s1 FR_BB = FR_X_Prime, FR_E 
+	nop.i 999
 }
+
 { .mfb
-      nop.m         0
-      nop.f         0
-(p10) br.cond.spnt  log1p_lt_minus_1   // jump if x < -1
+	nop.m 999
+(p0)  fadd.s1 FR_W = FR_X_Prime, FR_Em1 
+//     
+//    Begin load of constants base
+//    FR_Z = Z = |x| + E 
+//    FR_W = W = |x| + Em1
+//    AA = fmax(|x|,E)
+//    BB = fmin(|x|,E)
+//
+(p6)  br.cond.spnt L(LOG_64_special) ;; 
+}
+
+{ .mib
+	nop.m 999
+	nop.i 999
+(p10) br.cond.spnt L(LOG_64_unsupported) ;; 
+}
+
+{ .mib
+	nop.m 999
+	nop.i 999
+(p13) br.cond.spnt L(LOG_64_negative) ;; 
+}
+
+{ .mib
+(p0)  getf.sig GR_signif = FR_Z 
+	nop.i 999
+(p9)  br.cond.spnt L(LOG_64_one) ;; 
+}
+
+{ .mib
+	nop.m 999
+	nop.i 999
+(p8)  br.cond.spnt L(LOG_64_zero) ;; 
 }
-;;
 
-// p6 is true if |x| < 1/256
-// p7 is true if |x| >= 1/256
-.pred.rel "mutex",p6,p7
 { .mfi
-      nop.m         0
-(p6)  fms.s1        FR_r = f8,f1,f0 // range reduction for |x|<1/256
-(p6)  cmp.gt.unc    p10,p0 = -40, GR_exp_x  // Is |x| < 2^-40
+(p0)  getf.exp GR_N =  FR_Z 
+//   
+//    Raise possible denormal operand exception 
+//    Create Bias
+// 
+//    This function computes ln( x + e ) 
+//    Input  FR 1: FR_X   = FR_Input_X          
+//    Input  FR 2: FR_E   = FR_E
+//    Input  FR 3: FR_Em1 = FR_Em1 
+//    Input  GR 1: GR_Expo_Range = GR_Expo_Range = 1
+//    Output FR 4: FR_Y_hi  
+//    Output FR 5: FR_Y_lo  
+//    Output FR 6: FR_Scale  
+//    Output PR 7: PR_Safe  
+//
+(p0)  fsub.s1 FR_S_lo = FR_AA, FR_Z 
+//
+//    signif = getf.sig(Z)
+//    abs_W = fabs(w)
+//
+(p0)  extr.u GR_Table_ptr = GR_signif, 59, 4 ;; 
 }
-{ .mfb
-(p7)  setf.sig      FR_N = GR_N // copy unbiased exponent of x to the
-                                // significand field of FR_N
-(p7)  fms.s1        FR_r = FR_RcpX,FR_Xp1,f1 // range reduction for |x|>=1/256
-(p12) br.ret.spnt   b0 // exit for x=0, return x
+
+{ .mfi
+	nop.m 999
+(p0)  fmerge.se FR_S_hi =  f1,FR_Z 
+(p0)  extr.u GR_X_0 = GR_signif, 49, 15  
+}
+
+{ .mmi
+      nop.m 999
+(p0)  addl GR_Table_Base1 = @ltoff(Constants_Z_G_H_h2#),gp  
+      nop.i 999
 }
 ;;
 
+{ .mlx
+      ld8    GR_Table_Base1 = [GR_Table_Base1]
+(p0)  movl GR_Bias = 0x000000000000FFFF ;; 
+}
+
+{ .mfi
+	nop.m 999
+(p0)  fabs FR_abs_W =  FR_W 
+(p0)  pmpyshr2.u GR_Table_ptr = GR_Table_ptr,GR_Table_Scale,0 
+}
+
+{ .mfi
+	nop.m 999
+//    
+//    Branch out for special input values 
+//    
+(p0)  fcmp.lt.unc.s0 p8, p0 =  FR_Input_X, f0 
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+//
+//    X_0 = extr.u(signif,49,15)
+//    Index1 = extr.u(signif,59,4)
+//
+(p0)  fadd.s1 FR_S_lo = FR_S_lo, FR_BB 
+	nop.i 999 ;;
+}
+
+{ .mii
+	nop.m 999
+	nop.i 999 ;;
+//
+//    Offset_to_Z1 = 24 * Index1
+//    For performance, don't use result
+//    for 3 or 4 cycles.
+//
+(p0)  add GR_Table_ptr = GR_Table_ptr, GR_Table_Base ;; 
+}
+//
+//    Add Base to Offset for Z1
+//    Create Bias
+
+{ .mmi
+(p0)  ld4 GR_Z_1 = [GR_Table_ptr],4 ;; 
+(p0)  ldfs  FR_G = [GR_Table_ptr],4 
+	nop.i 999 ;;
+}
+
+{ .mmi
+(p0)  ldfs  FR_H = [GR_Table_ptr],8 ;; 
+(p0)  ldfd  FR_h = [GR_Table_ptr],0 
+(p0)  pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15 
+}
+//
+//    Load Z_1 
+//    Get Base of Table2 
+//
+
+{ .mfi
+(p0)  getf.exp GR_M = FR_abs_W 
+	nop.f 999
+	nop.i 999 ;;
+}
+
+{ .mii
+	nop.m 999
+	nop.i 999 ;;
+//
+//    M = getf.exp(abs_W)
+//    S_lo = AA - Z
+//    X_1 = pmpyshr2(X_0,Z_1,15)
+//
+(p0)  sub GR_M = GR_M, GR_Bias ;; 
+}
+//     
+//    M = M - Bias
+//    Load G1
+//    N = getf.exp(Z)
+//
+
+{ .mii
+(p0)  cmp.gt.unc  p11, p0 =  -80, GR_M 
+(p0)  cmp.gt.unc  p12, p0 =  -7, GR_M ;; 
+(p0)  extr.u GR_Index2 = GR_X_1, 6, 4 ;; 
+}
+
 { .mib
-      setf.d        FR_Ln2 = GR_Ln2 // create log(2)
-(p7)  extr.u        GR_Ind = GR_Sig,55,8 // get bits from 55 to 62 as index
-(p11) br.cond.spnt  log1p_eq_minus_1 // jump if x = -1
+	nop.m 999
+//
+//    if -80 > M, set p11
+//    Index2 = extr.u(X_1,6,4)
+//    if -7  > M, set p12
+//    Load H1
+//
+(p0)  pmpyshr2.u GR_Index2 = GR_Index2,GR_Table_Scale,0 
+(p11) br.cond.spnt L(log1pf_small) ;; 
 }
-;;
 
-{ .mmf
-(p7)  shladd        GR_ad_T = GR_Ind,3,GR_ad_T // address of T
-      nop.m         0
-(p10) fnma.s.s0     f8 = f8,f8,f8   // If |x| very small, result=x-x*x
+{ .mib
+      nop.m 999
+	nop.i 999
+(p12) br.cond.spnt L(log1pf_near) ;; 
 }
-;;
+
+{ .mii
+(p0)  sub GR_N = GR_N, GR_Bias 
+//
+//    poly_lo = r * poly_lo 
+//
+(p0)  add GR_Perturb = 0x1, r0 ;; 
+(p0)  sub GR_ScaleN = GR_Bias, GR_N  
+}
+
+{ .mii
+(p0)  setf.sig FR_float_N = GR_N 
+	nop.i 999 ;;
+//
+//    Prepare Index2 - pmpyshr2.u(X_1,Z_2,15)
+//    Load h1
+//    S_lo = S_lo + BB 
+//    Branch for -80 > M
+//   
+(p0)  add GR_Index2 = GR_Index2, GR_Table_Base1
+}
+
+{ .mmi
+(p0)  setf.exp FR_two_negN = GR_ScaleN 
+      nop.m 999
+(p0)  addl GR_Table_Base = @ltoff(Constants_Z_G_H_h3#),gp  
+};;
+
+//
+//    Index2 points to Z2
+//    Branch for -7 > M
+//
 
 { .mmb
-(p7)  ldfd          FR_T = [GR_ad_T]
-      nop.m         0
-(p10) br.ret.spnt   b0              // Exit if |x| < 2^-40
+(p0)  ld4 GR_Z_2 = [GR_Index2],4 
+      ld8 GR_Table_Base = [GR_Table_Base]
+      nop.b 999 ;;
 }
-;;
+(p0)  nop.i 999
+//
+//    Load Z_2
+//    N = N - Bias
+//    Tablebase points to Table3
+//
+
+{ .mmi
+(p0)  ldfs  FR_G_tmp = [GR_Index2],4 ;; 
+//
+//    Load G_2
+//    pmpyshr2  X_2= (X_1,Z_2,15)
+//    float_N = setf.sig(N)
+//    ScaleN = Bias - N
+//
+(p0)  ldfs  FR_H_tmp = [GR_Index2],8 
+	nop.i 999 ;;
+}
+//
+//    Load H_2
+//    two_negN = setf.exp(scaleN)
+//    G = G_1 * G_2
+//
 
 { .mfi
-      nop.m         0
-      fma.s1        FR_r2 = FR_r,FR_r,f0 // r^2
-      nop.i         0
+(p0)  ldfd  FR_h_tmp = [GR_Index2],0 
+	nop.f 999
+(p0)  pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 ;; 
 }
+
+{ .mii
+	nop.m 999
+(p0)  extr.u GR_Index3 = GR_X_2, 1, 5 ;; 
+//
+//    Load h_2
+//    H = H_1 + H_2 
+//    h = h_1 + h_2 
+//    Index3 = extr.u(X_2,1,5)
+//
+(p0)  shladd GR_Index3 = GR_Index3,4,GR_Table_Base 
+}
+
+{ .mmi
+	nop.m 999
+	nop.m 999
+//
+//    float_N = fcvt.xf(float_N)
+//    load G3
+//
+(p0)  addl GR_Table_Base = @ltoff(Constants_Q#),gp ;; 
+}
+
 { .mfi
-      nop.m         0
-      fnma.s1       FR_A2 = FR_A2,FR_r,f1      // 1.0 - A2*r
-      nop.i         0
+ld8    GR_Table_Base = [GR_Table_Base]
+nop.f 999
+nop.i 999
+} ;;
+
+{ .mfi
+(p0)  ldfe FR_log2_hi = [GR_Table_Base],16 
+(p0)  fmpy.s1 FR_S_lo = FR_S_lo, FR_two_negN 
+	nop.i 999 ;;
+}
+
+{ .mmf
+	nop.m 999
+//
+//    G = G3 * G
+//    Load h3
+//    Load log2_hi
+//    H = H + H3
+//
+(p0)  ldfe FR_log2_lo = [GR_Table_Base],16 
+(p0)  fmpy.s1 FR_G = FR_G, FR_G_tmp ;; 
+}
+
+{ .mmf
+(p0)  ldfs  FR_G_tmp = [GR_Index3],4 
+//
+//    h = h + h3
+//    r = G * S_hi + 1 
+//    Load log2_lo
+//
+(p0)  ldfe FR_Q4 = [GR_Table_Base],16 
+(p0)  fadd.s1 FR_h = FR_h, FR_h_tmp ;; 
 }
-;;
 
 { .mfi
-      nop.m         0
-      fnma.s1       FR_A3 = FR_A4,FR_r,FR_A3 // A3 - A4*r
-      nop.i         0
+(p0)  ldfe FR_Q3 = [GR_Table_Base],16 
+(p0)  fadd.s1 FR_H = FR_H, FR_H_tmp 
+	nop.i 999 ;;
+}
+
+{ .mmf
+(p0)  ldfs  FR_H_tmp = [GR_Index3],4 
+(p0)  ldfe FR_Q2 = [GR_Table_Base],16 
+//
+//    Comput Index for Table3
+//    S_lo = S_lo * two_negN
+//
+(p0)  fcvt.xf FR_float_N = FR_float_N ;; 
+}
+//
+//    If S_lo == 0, set p8 false
+//    Load H3
+//    Load ptr to table of polynomial coeff.
+//
+
+{ .mmf
+(p0)  ldfd  FR_h_tmp = [GR_Index3],0 
+(p0)  ldfe FR_Q1 = [GR_Table_Base],0 
+(p0)  fcmp.eq.unc.s1 p0, p8 =  FR_S_lo, f0 ;; 
 }
-;;
 
 { .mfi
-      nop.m         0
-(p7)  fcvt.xf       FR_N = FR_N
-      nop.i         0
+	nop.m 999
+(p0)  fmpy.s1 FR_G = FR_G, FR_G_tmp 
+	nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m         0
-      // (A3*r+A2)*r^2+r
-      fma.s1        FR_A2 = FR_A3,FR_r2,FR_A2 // (A4*r+A3)*r^2+(A2*r+1)
-      nop.i         0
+	nop.m 999
+(p0)  fadd.s1 FR_H = FR_H, FR_H_tmp 
+	nop.i 999 ;;
 }
-;;
 
 { .mfi
-      nop.m         0
-      // N*Ln2hi+T
-(p7)  fma.s1        FR_NxLn2pT = FR_N,FR_Ln2,FR_T
-      nop.i         0
+	nop.m 999
+(p0)  fms.s1 FR_r = FR_G, FR_S_hi, f1 
+	nop.i 999
+}
+
+{ .mfi
+	nop.m 999
+(p0)  fadd.s1 FR_h = FR_h, FR_h_tmp 
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+(p0)  fma.s1 FR_Y_hi = FR_float_N, FR_log2_hi, FR_H 
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+//
+//    Load Q4 
+//    Load Q3 
+//    Load Q2 
+//    Load Q1 
+//
+(p8) fma.s1 FR_r = FR_G, FR_S_lo, FR_r 
+	nop.i 999
+}
+
+{ .mfi
+	nop.m 999
+//
+//    poly_lo = r * Q4 + Q3
+//    rsq = r* r
+//
+(p0)  fma.s1 FR_h = FR_float_N, FR_log2_lo, FR_h 
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+//
+//    If (S_lo!=0) r = s_lo * G + r
+//
+(p0)  fma.s1 FR_poly_lo = FR_r, FR_Q4, FR_Q3 
+	nop.i 999
+}
+//
+//    Create a 0x00000....01
+//    poly_lo = poly_lo * rsq + h
+//
+
+{ .mfi
+(p0)  setf.sig FR_dummy = GR_Perturb 
+(p0)  fmpy.s1 FR_rsq = FR_r, FR_r 
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+//
+//    h = N * log2_lo + h 
+//    Y_hi = n * log2_hi + H 
+//
+(p0)  fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_Q2 
+	nop.i 999
+}
+
+{ .mfi
+	nop.m 999
+(p0)  fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r 
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+//
+//    poly_lo = r * poly_o + Q2 
+//    poly_hi = Q1 * rsq + r 
+//
+(p0)  fmpy.s1 FR_poly_lo = FR_poly_lo, FR_r 
+	nop.i 999 ;;
 }
-;;
 
-.pred.rel "mutex",p6,p7
 { .mfi
-      nop.m         0
-(p6)  fma.s.s0      f8 = FR_A2,FR_r,f0 // result if 2^(-40) <= |x| < 1/256
-      nop.i         0
+	nop.m 999
+(p0)  fma.s1 FR_poly_lo = FR_poly_lo, FR_rsq, FR_h 
+	nop.i 999 ;;
 }
+
 { .mfb
-      nop.m         0
-(p7)  fma.s.s0      f8 = FR_A2,FR_r,FR_NxLn2pT  // result if |x| >= 1/256
-      br.ret.sptk   b0                          // Exit if |x| >= 2^(-40)
+	nop.m 999
+(p0)  fadd.s1 FR_Y_lo = FR_poly_hi, FR_poly_lo 
+//
+//    Create the FR for a binary "or"
+//    Y_lo = poly_hi + poly_lo
+//
+// (p0)  for FR_dummy = FR_Y_lo,FR_dummy ;;
+//
+//    Turn the lsb of Y_lo ON
+//
+// (p0)  fmerge.se FR_Y_lo =  FR_Y_lo,FR_dummy ;;
+//
+//    Merge the new lsb into Y_lo, for alone doesn't
+//
+(p0)  br.cond.sptk L(LOG_main) ;; 
+}
+
+
+L(log1pf_near): 
+
+{ .mmi
+	nop.m 999
+	nop.m 999
+//    /*******************************************************/
+//    /*********** Branch log1pf_near  ************************/
+//    /*******************************************************/
+(p0)  addl GR_Table_Base = @ltoff(Constants_P#),gp ;; 
+}
+//
+//    Load base address of poly. coeff.
+//
+{.mmi
+      nop.m 999
+      ld8    GR_Table_Base = [GR_Table_Base]
+      nop.i 999
+};;
+
+{ .mmb
+(p0)  add GR_Table_ptr = 0x40,GR_Table_Base  
+//
+//    Address tables with separate pointers 
+//
+(p0)  ldfe FR_P8 = [GR_Table_Base],16 
+	nop.b 999 ;;
+}
+
+{ .mmb
+(p0)  ldfe FR_P4 = [GR_Table_ptr],16 
+//
+//    Load P4
+//    Load P8
+//
+(p0)  ldfe FR_P7 = [GR_Table_Base],16 
+	nop.b 999 ;;
+}
+
+{ .mmf
+(p0)  ldfe FR_P3 = [GR_Table_ptr],16 
+//
+//    Load P3
+//    Load P7
+//
+(p0)  ldfe FR_P6 = [GR_Table_Base],16 
+(p0)  fmpy.s1 FR_wsq = FR_W, FR_W ;; 
+}
+
+{ .mfi
+(p0)  ldfe FR_P2 = [GR_Table_ptr],16 
+	nop.f 999
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+(p0)  fma.s1 FR_Y_hi = FR_W, FR_P4, FR_P3 
+	nop.i 999
+}
+//
+//    Load P2
+//    Load P6
+//    Wsq = w * w
+//    Y_hi = p4 * w + p3
+//
+
+{ .mfi
+(p0)  ldfe FR_P5 = [GR_Table_Base],16 
+(p0)  fma.s1 FR_Y_lo = FR_W, FR_P8, FR_P7 
+	nop.i 999 ;;
+}
+
+{ .mfi
+(p0)  ldfe FR_P1 = [GR_Table_ptr],16 
+//
+//    Load P1
+//    Load P5
+//    Y_lo = p8 * w + P7
+//
+(p0)  fmpy.s1 FR_w4 = FR_wsq, FR_wsq 
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+(p0)  fma.s1 FR_Y_hi = FR_W, FR_Y_hi, FR_P2 
+	nop.i 999
+}
+
+{ .mfi
+	nop.m 999
+(p0)  fma.s1 FR_Y_lo = FR_W, FR_Y_lo, FR_P6 
+(p0)  add GR_Perturb = 0x1, r0 ;; 
+}
+
+{ .mfi
+	nop.m 999
+//
+//    w4 = w2 * w2 
+//    Y_hi = y_hi * w + p2 
+//    Y_lo = y_lo * w + p6 
+//    Create perturbation bit
+//
+(p0)  fmpy.s1 FR_w6 = FR_w4, FR_wsq 
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+(p0)  fma.s1 FR_Y_hi = FR_W, FR_Y_hi, FR_P1 
+	nop.i 999
+}
+//
+//    Y_hi = y_hi * w + p1 
+//    w6 = w4 * w2 
+//
+
+{ .mfi
+(p0)  setf.sig FR_Q4 = GR_Perturb 
+(p0)  fma.s1 FR_Y_lo = FR_W, FR_Y_lo, FR_P5 
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+(p0)  fma.s1 FR_Y_hi = FR_wsq,FR_Y_hi, FR_W 
+	nop.i 999
 }
-;;
 
-.align 32
-log1p_unorm:
-// Here if x=unorm
 { .mfb
-      getf.exp      GR_signexp_x = FR_NormX // recompute biased exponent
-      nop.f         0
-      br.cond.sptk  log1p_common
+	nop.m 999
+//
+//    Y_hi = y_hi * wsq + w 
+//    Y_lo = y_lo * w + p5 
+//
+(p0)  fmpy.s1 FR_Y_lo = FR_w6, FR_Y_lo 
+//
+//    Y_lo = y_lo * w6  
+//
+// (p0)  for FR_dummy = FR_Y_lo,FR_dummy ;;
+//
+//    Set lsb on: Taken out to improve performance 
+//
+// (p0)  fmerge.se FR_Y_lo =  FR_Y_lo,FR_dummy ;;
+//
+//    Make sure it's on in Y_lo also.  Taken out to improve
+//    performance
+//
+(p0)  br.cond.sptk L(LOG_main) ;; 
+}
+
+
+L(log1pf_small): 
+
+{ .mmi
+	nop.m 999
+	nop.m 999
+//  /*******************************************************/
+//  /*********** Branch log1pf_small  ***********************/
+//  /*******************************************************/
+(p0)  addl GR_Table_Base = @ltoff(Constants_Threshold#),gp 
 }
-;;
 
-.align 32
-log1p_eq_minus_1:
-// Here if x=-1
 { .mfi
-      nop.m         0
-      fmerge.s      FR_X = f8,f8 // keep input argument for subsequent
-                                 // call of __libm_error_support#
-      nop.i         0
+	nop.m 999
+(p0)  mov FR_Em1 = FR_W 
+(p0)  cmp.eq.unc  p7, p0 = r0, r0 ;; 
+}
+
+{ .mlx
+      ld8    GR_Table_Base = [GR_Table_Base]
+(p0)  movl GR_Expo_Range = 0x0000000000000002 ;; 
+}
+//
+//    Set Safe to true
+//    Set Expo_Range = 0 for single
+//    Set Expo_Range = 2 for double 
+//    Set Expo_Range = 4 for double-extended 
+//
+
+{ .mmi
+(p0)  shladd GR_Table_Base = GR_Expo_Range,4,GR_Table_Base ;; 
+(p0)  ldfe FR_Threshold = [GR_Table_Base],16 
+	nop.i 999
+}
+
+{ .mlx
+	nop.m 999
+(p0)  movl GR_Bias = 0x000000000000FF9B ;; 
 }
-;;
 
 { .mfi
-      mov           GR_TAG = 142  // set libm error in case of log1p(-1).
-      frcpa.s0      f8,p0 = f8,f0 // log1p(-1) should be equal to -INF.
-                                      // We can get it using frcpa because it
-                                      // sets result to the IEEE-754 mandated
-                                      // quotient of f8/f0.
-      nop.i         0
+(p0)  ldfe FR_Tiny = [GR_Table_Base],0 
+	nop.f 999
+	nop.i 999 ;;
 }
-{ .mib
-      nop.m         0
-      nop.i         0
-      br.cond.sptk  log_libm_err
+
+{ .mfi
+	nop.m 999
+(p0)  fcmp.gt.unc.s1 p13, p12 =  FR_abs_W, FR_Threshold 
+	nop.i 999 ;;
 }
-;;
 
-.align 32
-log1p_lt_minus_1:
-// Here if x < -1
 { .mfi
-      nop.m         0
-      fmerge.s      FR_X = f8,f8
-      nop.i         0
+	nop.m 999
+(p13) fnmpy.s1 FR_Y_lo = FR_W, FR_W 
+	nop.i 999
+}
+
+{ .mfi
+	nop.m 999
+(p13) fadd FR_SCALE = f0, f1 
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+(p12) fsub.s1 FR_Y_lo = f0, FR_Tiny 
+(p12) cmp.ne.unc  p7, p0 = r0, r0 
 }
-;;
 
 { .mfi
-      mov           GR_TAG = 143  // set libm error in case of x < -1.
-      frcpa.s0      f8,p0 = f0,f0 // log1p(x) x < -1 should be equal to NaN.
-                                  // We can get it using frcpa because it
-                                  // sets result to the IEEE-754 mandated
-                                  // quotient of f0/f0 i.e. NaN.
-      nop.i         0
+(p12) setf.exp FR_SCALE = GR_Bias 
+	nop.f 999
+	nop.i 999 ;;
+}
+
+//
+//    Set p7 to SAFE = FALSE
+//    Set Scale = 2^-100 
+//
+{ .mfb
+	nop.m 999
+(p0)  fma.s.s0 FR_Input_X = FR_Y_lo,FR_SCALE,FR_Y_hi
+(p0)  br.ret.sptk   b0
 }
 ;;
 
-.align 32
-log_libm_err:
-{ .mmi
-      alloc         r32 = ar.pfs,1,4,4,0
-      mov           GR_Parameter_TAG = GR_TAG
-      nop.i         0
+L(LOG_64_one): 
+
+{ .mfb
+	nop.m 999
+(p0)  fmpy.s.s0 FR_Input_X = FR_Input_X, f0 
+(p0)  br.ret.sptk   b0
 }
 ;;
+//    
+//    Raise divide by zero for +/-0 input.
+//    
+
+L(LOG_64_zero): 
+
+{ .mfi
+(p0)  mov   GR_Parameter_TAG = 142 
+//
+//    If we have log1pf(0), return -Inf.
+//  
+(p0)  fsub.s0 FR_Output_X_tmp = f0, f1 
+      nop.i 999 ;;
+}
+{ .mfb
+      nop.m 999
+(p0)  frcpa.s0 FR_Output_X_tmp, p8 =  FR_Output_X_tmp, f0 
+(p0)  br.cond.sptk L(LOG_ERROR_Support) ;; 
+}
 
-GLOBAL_IEEE754_END(log1pf)
+L(LOG_64_special): 
 
-LOCAL_LIBM_ENTRY(__libm_error_region)
+{ .mfi
+      nop.m 999
+//    
+//    Return -Inf or value from handler.
+//    
+(p0)  fclass.m.unc p7, p0 =  FR_Input_X, 0x1E1 
+      nop.i 999 ;;
+}
+
+{ .mfb
+      nop.m 999
+//     
+//    Check for Natval, QNan, SNaN, +Inf   
+//    
+(p7)  fmpy.s.s0  f8 =  FR_Input_X, f1 
+//     
+//    For SNaN raise invalid and return QNaN.
+//    For QNaN raise invalid and return QNaN.
+//    For +Inf return +Inf.
+//    
+(p7)  br.ret.sptk   b0
+}
+;;
+
+//    
+//    For -Inf raise invalid and return QNaN.
+//    
+
+{ .mfb
+(p0)  mov   GR_Parameter_TAG = 143 
+(p0)  fmpy.s.s0  FR_Output_X_tmp =  FR_Input_X, f0 
+(p0)  br.cond.sptk L(LOG_ERROR_Support) ;; 
+}
+
+//
+//    Report that log1pf(-Inf) computed
+//     
+
+L(LOG_64_unsupported): 
+
+//    
+//    Return generated NaN or other value .
+//    
+
+{ .mfb
+      nop.m 999
+(p0)  fmpy.s.s0 FR_Input_X = FR_Input_X, f0 
+(p0)  br.ret.sptk   b0 ;;
+}
+
+L(LOG_64_negative): 
+
+{ .mfi
+      nop.m 999
+//     
+//    Deal with x < 0 in a special way 
+//    
+(p0)  frcpa.s0 FR_Output_X_tmp, p8 =  f0, f0 
+//     
+//    Deal with x < 0 in a special way - raise
+//    invalid and produce QNaN indefinite.
+//    
+(p0)  mov   GR_Parameter_TAG = 143;;
+}
+
+.endp log1pf#
+ASM_SIZE_DIRECTIVE(log1pf)
+
+.proc __libm_error_region
+__libm_error_region:
+L(LOG_ERROR_Support): 
 .prologue
+
+// (1)
 { .mfi
-        add   GR_Parameter_Y = -32,sp         // Parameter 2 value
+        add   GR_Parameter_Y=-32,sp             // Parameter 2 value
         nop.f 0
 .save   ar.pfs,GR_SAVE_PFS
-        mov  GR_SAVE_PFS = ar.pfs             // Save ar.pfs
+        mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
 }
 { .mfi
 .fframe 64
-        add sp = -64,sp                       // Create new stack
+        add sp=-64,sp                          // Create new stack
         nop.f 0
-        mov GR_SAVE_GP = gp                   // Save gp
+        mov GR_SAVE_GP=gp                      // Save gp
 };;
+
+
+// (2)
 { .mmi
-        stfs [GR_Parameter_Y] = FR_Y,16       // STORE Parameter 2 on stack
+        stfs [GR_Parameter_Y] = f0,16         // STORE Parameter 2 on stack
         add GR_Parameter_X = 16,sp            // Parameter 1 address
 .save   b0, GR_SAVE_B0
-        mov GR_SAVE_B0 = b0                   // Save b0
+        mov GR_SAVE_B0=b0                     // Save b0
 };;
+
 .body
+// (3)
 { .mib
-        stfs [GR_Parameter_X] = FR_X          // STORE Parameter 1 on stack
-        add   GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
-        nop.b 0
+        stfs [GR_Parameter_X] =FR_Input_X               // STORE Parameter 1 on stack
+        add   GR_Parameter_RESULT = 0,GR_Parameter_Y    // Parameter 3 address
+        nop.b 0                                      
 }
 { .mib
-        stfs [GR_Parameter_Y] = FR_RESULT     // STORE Parameter 3 on stack
+        stfs [GR_Parameter_Y] = FR_Output_X_tmp         // STORE Parameter 3 on stack
         add   GR_Parameter_Y = -16,GR_Parameter_Y
-        br.call.sptk b0=__libm_error_support# // Call error handling function
+        br.call.sptk b0=__libm_error_support#           // Call error handling function
 };;
 { .mmi
-        add   GR_Parameter_RESULT = 48,sp
         nop.m 0
-        nop.i 0
+        nop.m 0
+        add   GR_Parameter_RESULT = 48,sp
 };;
+
+// (4)
 { .mmi
-        ldfs  f8 = [GR_Parameter_RESULT]      // Get return result off stack
+        ldfs  FR_Input_X = [GR_Parameter_RESULT]       // Get return result off stack
 .restore sp
-        add   sp = 64,sp                      // Restore stack pointer
-        mov   b0 = GR_SAVE_B0                 // Restore return address
+        add   sp = 64,sp                       // Restore stack pointer
+        mov   b0 = GR_SAVE_B0                  // Restore return address
 };;
 { .mib
-        mov   gp = GR_SAVE_GP                 // Restore gp
-        mov   ar.pfs = GR_SAVE_PFS            // Restore ar.pfs
-        br.ret.sptk     b0                    // Return
+        mov   gp = GR_SAVE_GP                  // Restore gp
+        mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
+        br.ret.sptk   b0 
 };;
-LOCAL_LIBM_END(__libm_error_region)
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+
+.proc __libm_LOG_main 
+__libm_LOG_main:
+L(LOG_main): 
+
+//
+//    kernel_log_64 computes ln(X + E)
+//
+
+{ .mfi
+	nop.m 999
+(p7)  fadd.s.s0 FR_Input_X = FR_Y_lo,FR_Y_hi
+        nop.i 999
+}
+
+{ .mmi
+	nop.m 999
+	nop.m 999
+(p14) addl GR_Table_Base = @ltoff(Constants_1_by_LN10#),gp ;; 
+}
+
+{ .mmi
+      nop.m 999
+(p14) ld8    GR_Table_Base = [GR_Table_Base]
+      nop.i 999
+};;
+
+{ .mmi
+(p14) ldfe FR_1LN10_hi = [GR_Table_Base],16 ;; 
+(p14) ldfe FR_1LN10_lo = [GR_Table_Base]
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+(p14) fmpy.s1 FR_Output_X_tmp = FR_Y_lo,FR_1LN10_hi
+	nop.i 999 ;;
+}
+
+{ .mfi
+	nop.m 999
+(p14) fma.s1  FR_Output_X_tmp = FR_Y_hi,FR_1LN10_lo,FR_Output_X_tmp
+	nop.i 999 ;;
+}
+
+{ .mfb
+	nop.m 999
+(p14) fma.s.s0 FR_Input_X = FR_Y_hi,FR_1LN10_hi,FR_Output_X_tmp
+(p0)  br.ret.sptk   b0 ;; 
+}
+.endp __libm_LOG_main
+ASM_SIZE_DIRECTIVE(__libm_LOG_main)
+
 
 .type   __libm_error_support#,@function
 .global __libm_error_support#
-
diff --git a/sysdeps/ia64/fpu/s_log1pl.S b/sysdeps/ia64/fpu/s_log1pl.S
index d392a58edf..7cd3f7834c 100644
--- a/sysdeps/ia64/fpu/s_log1pl.S
+++ b/sysdeps/ia64/fpu/s_log1pl.S
@@ -1,10 +1,10 @@
 .file "log1pl.s" 
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
+//
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,49 +35,55 @@
 // 
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at 
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// http://developer.intel.com/opensource.
 //
-//*********************************************************************
+// *********************************************************************
 //
 // History: 
-// 02/02/00 Initial version
-// 04/04/00 Unwind support added
-// 08/15/00 Bundle added after call to __libm_error_support to properly
+// 2/02/00  hand-optimized
+// 4/04/00  Unwind support added
+// 8/15/00  Bundle added after call to __libm_error_support to properly
 //          set [the previously overwritten] GR_Parameter_RESULT.
-// 05/21/01 Removed logl and log10l, putting them in a separate file
-// 06/29/01 Improved speed of all paths
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 02/10/03 Reordered header: .section, .global, .proc, .align;
-//          used data8 for long double table values
 //
-//*********************************************************************
+// *********************************************************************
 //
-//*********************************************************************
+// *********************************************************************
 //
-// Function:   log1pl(x) = ln(x+1), for double-extended precision x values
+// Function:   Combined logl(x), log1pl(x), and log10l(x) where
+//             logl(x)   = ln(x), for double-extended precision x values
+//             log1pl(x) = ln(x+1), for double-extended precision x values
+//             log10l(x) = log (x), for double-extended precision x values
+//                           10
 //
-//*********************************************************************
+// *********************************************************************
 //
 // Resources Used:
 //
 //    Floating-Point Registers: f8 (Input and Return Value)
-//                              f34-f82
+//                              f9,f33-f55,f99 
 //
 //    General Purpose Registers:
-//      r32-r56
-//      r53-r56 (Used to pass arguments to error handling routine)
+//      r32-r53
+//      r54-r57 (Used to pass arguments to error handling routine)
 //
-//    Predicate Registers:      p6-p13
+//    Predicate Registers:      p6-p15
 //
-//*********************************************************************
+// *********************************************************************
 //
 // IEEE Special Conditions:
 //
-//    Denormal fault raised on denormal inputs
+//    Denormal  fault raised on denormal inputs
 //    Overflow exceptions cannot occur  
 //    Underflow exceptions raised when appropriate for log1p 
+//    (Error Handling Routine called for underflow)
 //    Inexact raised when appropriate by algorithm
 //
+//    logl(inf) = inf
+//    logl(-inf) = QNaN 
+//    logl(+/-0) = -inf 
+//    logl(SNaN) = QNaN
+//    logl(QNaN) = QNaN
+//    logl(EM_special Values) = QNaN
 //    log1pl(inf) = inf
 //    log1pl(-inf) = QNaN 
 //    log1pl(+/-0) = +/-0 
@@ -85,37 +91,54 @@
 //    log1pl(SNaN) = QNaN
 //    log1pl(QNaN) = QNaN
 //    log1pl(EM_special Values) = QNaN
-//
-//*********************************************************************
-//
+//    log10l(inf) = inf
+//    log10l(-inf) = QNaN 
+//    log10l(+/-0) = -inf 
+//    log10l(SNaN) = QNaN
+//    log10l(QNaN) = QNaN
+//    log10l(EM_special Values) = QNaN
+//
+// *********************************************************************
+//
+// Computation is based on the following kernel.
+//
+// ker_log_64( in_FR    :  X,
+// 	    in_FR    :  E,
+// 	    in_FR    :  Em1,
+// 	    in_GR    :  Expo_Range,
+// 	    out_FR   :  Y_hi,
+// 	    out_FR   :  Y_lo,
+// 	    out_FR   :  Scale,
+// 	    out_PR   :  Safe  )
+// 
 // Overview
 //
 // The method consists of three cases.
 //
-// If      |X| < 2^(-80)	use case log1p_small;
-// else    |X| < 2^(-7)	        use case log_near1;
-// else      			use case log_regular;
+// If	|X+Em1| < 2^(-80)	use case log1pl_small;
+// elseif	|X+Em1| < 2^(-7)	use case log_near1;
+// else				use case log_regular;
 //
-// Case log1p_small:
+// Case log1pl_small:
 //
-//   log1pl( X ) = logl( X+1 ) can be approximated by X
+// logl( 1 + (X+Em1) ) can be approximated by (X+Em1).
 //
 // Case log_near1:
 //
-//   log1pl( X ) = log( X+1 ) can be approximated by a simple polynomial
-//   in W = X. This polynomial resembles the truncated Taylor
+//   logl( 1 + (X+Em1) ) can be approximated by a simple polynomial
+//   in W = X+Em1. This polynomial resembles the truncated Taylor
 //   series W - W^/2 + W^3/3 - ...
 // 
 // Case log_regular:
 //
 //   Here we use a table lookup method. The basic idea is that in
-//   order to compute logl(Arg) = log1pl (Arg-1) for an argument Arg in [1,2), 
-//   we construct a value G such that G*Arg is close to 1 and that
+//   order to compute logl(Arg) for an argument Arg in [1,2), we 
+//   construct a value G such that G*Arg is close to 1 and that
 //   logl(1/G) is obtainable easily from a table of values calculated
 //   beforehand. Thus
 //
-//      logl(Arg) = logl(1/G) + logl(G*Arg)
-//      	 = logl(1/G) + logl(1 + (G*Arg - 1))
+//	logl(Arg) = logl(1/G) + logl(G*Arg)
+//		 = logl(1/G) + logl(1 + (G*Arg - 1))
 //
 //   Because |G*Arg - 1| is small, the second term on the right hand
 //   side can be approximated by a short polynomial. We elaborate
@@ -123,9 +146,9 @@
 //
 //   Step 0: Initialization
 //
-//   We need to calculate logl( X+1 ). Obtain N, S_hi such that
+//   We need to calculate logl( E + X ). Obtain N, S_hi, S_lo such that
 //
-//      X+1 = 2^N * ( S_hi + S_lo )   exactly
+//	E + X = 2^N * ( S_hi + S_lo )	exactly
 //
 //   where S_hi in [1,2) and S_lo is a correction to S_hi in the sense
 //   that |S_lo| <= ulp(S_hi).
@@ -134,8 +157,8 @@
 //
 //   Based on S_hi, obtain G_1, G_2, G_3 from a table and calculate
 //
-//      G := G_1 * G_2 * G_3
-//      r := (G * S_hi - 1) + G * S_lo
+//	G := G_1 * G_2 * G_3
+//	r := (G * S_hi - 1)  + G * S_lo
 //
 //   These G_j's have the property that the product is exactly 
 //   representable and that |r| < 2^(-12) as a result.
@@ -148,34 +171,61 @@
 //   Step 3: Reconstruction
 //
 //
-//   Finally, log1pl( X ) = logl( X+1 ) is given by
+//   Finally, logl( E + X ) is given by
 //
-//   logl( X+1 )   =   logl( 2^N * (S_hi + S_lo) )
+//   logl( E + X )   =   logl( 2^N * (S_hi + S_lo) )
 //                 ~=~  N*logl(2) + logl(1/G) + logl(1 + r)
 //                 ~=~  N*logl(2) + logl(1/G) + poly(r).
 //
 // **** Algorithm ****
 //
-// Case log1p_small:
-//
-// Although log1pl(X) is basically X, we would like to preserve the inexactness
-// nature as well as consistent behavior under different rounding modes.
-// We can do this by computing the result as 
-//    
-//     log1pl(X) = X - X*X
-//
+// Case log1pl_small:
+//
+// Although logl(1 + (X+Em1)) is basically X+Em1, we would like to 
+// preserve the inexactness nature as well as consistent behavior
+// under different rounding modes. Note that this case can only be
+// taken if E is set to be 1.0. In this case, Em1 is zero, and that
+// X can be very tiny and thus the final result can possibly underflow.
+// Thus, we compare X against a threshold that is dependent on the
+// input Expo_Range. If |X| is smaller than this threshold, we set
+// SAFE to be FALSE. 
+//
+// The result is returned as Y_hi, Y_lo, and in the case of SAFE 
+// is FALSE, an additional value Scale is also returned. 
+//
+//	W    := X + Em1
+//      Threshold := Threshold_Table( Expo_Range )
+//      Tiny      := Tiny_Table( Expo_Range )
+//
+//      If ( |W| > Threshold ) then
+//         Y_hi  := W
+//         Y_lo  := -W*W
+//      Else
+//         Y_hi  := W
+//         Y_lo  := -Tiny
+//         Scale := 2^(-100)
+//         Safe  := FALSE
+//      EndIf
+//
+//
+// One may think that Y_lo should be -W*W/2; however, it does not matter
+// as Y_lo will be rounded off completely except for the correct effect in 
+// directed rounding. Clearly -W*W is simplier to compute. Moreover,
+// because of the difference in exponent value, Y_hi + Y_lo or 
+// Y_hi + Scale*Y_lo is always inexact.
 //
 // Case log_near1:
 //
 // Here we compute a simple polynomial. To exploit parallelism, we split
 // the polynomial into two portions.
 // 
-//       W := X
-//       Wsq := W * W
-//       W4  := Wsq*Wsq
-//       W6  := W4*Wsq
-//       Y_hi := W + Wsq*(P_1 + W*(P_2 + W*(P_3 + W*P_4))
-//       Y_lo := W6*(P_5 + W*(P_6 + W*(P_7 + W*P_8)))
+// 	W := X + Em1
+// 	Wsq := W * W
+// 	W4  := Wsq*Wsq
+// 	W6  := W4*Wsq
+// 	Y_hi := W + Wsq*(P_1 + W*(P_2 + W*(P_3 + W*P_4))
+// 	Y_lo := W6*(P_5 + W*(P_6 + W*(P_7 + W*P_8)))
+//      set lsb(Y_lo) to be 1
 //
 // Case log_regular:
 //
@@ -184,87 +234,89 @@
 //   Step 0. Initialization
 //   ----------------------
 //
-//   Z := X + 1
+//   Z := X + E
 //   N := unbaised exponent of Z
 //   S_hi := 2^(-N) * Z
-//   S_lo := 2^(-N) * { (max(X,1)-Z) + min(X,1) }
+//   S_lo := 2^(-N) * { (max(X,E)-Z) + min(X,E) }
+//
+//   Note that S_lo is always 0 for the case E = 0.
 //
 //   Step 1. Argument Reduction
 //   --------------------------
 //
 //   Let
 //
-//      Z = 2^N * S_hi = 2^N * 1.d_1 d_2 d_3 ... d_63
+//	Z = 2^N * S_hi = 2^N * 1.d_1 d_2 d_3 ... d_63
 //
 //   We obtain G_1, G_2, G_3 by the following steps.
 //
 //
-//      Define		X_0 := 1.d_1 d_2 ... d_14. This is extracted
-//      		from S_hi.
+//	Define		X_0 := 1.d_1 d_2 ... d_14. This is extracted
+//			from S_hi.
 //
-//      Define		A_1 := 1.d_1 d_2 d_3 d_4. This is X_0 truncated
-//      		to lsb = 2^(-4).
+//	Define		A_1 := 1.d_1 d_2 d_3 d_4. This is X_0 truncated
+//			to lsb = 2^(-4).
 //
-//      Define		index_1 := [ d_1 d_2 d_3 d_4 ].
+//	Define		index_1 := [ d_1 d_2 d_3 d_4 ].
 //
-//      Fetch 		Z_1 := (1/A_1) rounded UP in fixed point with
-//      fixed point	lsb = 2^(-15).
-//      		Z_1 looks like z_0.z_1 z_2 ... z_15
-//      	        Note that the fetching is done using index_1.
-//      		A_1 is actually not needed in the implementation
-//      		and is used here only to explain how is the value
-//      		Z_1 defined.
+//	Fetch 		Z_1 := (1/A_1) rounded UP in fixed point with
+//	fixed point	lsb = 2^(-15).
+//			Z_1 looks like z_0.z_1 z_2 ... z_15
+//		        Note that the fetching is done using index_1.
+//			A_1 is actually not needed in the implementation
+//			and is used here only to explain how is the value
+//			Z_1 defined.
 //
-//      Fetch		G_1 := (1/A_1) truncated to 21 sig. bits.
-//      floating pt.	Again, fetching is done using index_1. A_1
-//      		explains how G_1 is defined.
+//	Fetch		G_1 := (1/A_1) truncated to 21 sig. bits.
+//	floating pt.	Again, fetching is done using index_1. A_1
+//			explains how G_1 is defined.
 //
-//      Calculate	X_1 := X_0 * Z_1 truncated to lsb = 2^(-14)
-//      		     = 1.0 0 0 0 d_5 ... d_14
-//      		This is accomplised by integer multiplication.
-//      		It is proved that X_1 indeed always begin
-//      		with 1.0000 in fixed point.
+//	Calculate	X_1 := X_0 * Z_1 truncated to lsb = 2^(-14)
+//			     = 1.0 0 0 0 d_5 ... d_14
+//			This is accomplised by integer multiplication.
+//			It is proved that X_1 indeed always begin
+//			with 1.0000 in fixed point.
 //
 //
-//      Define		A_2 := 1.0 0 0 0 d_5 d_6 d_7 d_8. This is X_1 
-//      		truncated to lsb = 2^(-8). Similar to A_1,
-//      		A_2 is not needed in actual implementation. It
-//      		helps explain how some of the values are defined.
+//	Define		A_2 := 1.0 0 0 0 d_5 d_6 d_7 d_8. This is X_1 
+//			truncated to lsb = 2^(-8). Similar to A_1,
+//			A_2 is not needed in actual implementation. It
+//			helps explain how some of the values are defined.
 //
-//      Define		index_2 := [ d_5 d_6 d_7 d_8 ].
+//	Define		index_2 := [ d_5 d_6 d_7 d_8 ].
 //
-//      Fetch 		Z_2 := (1/A_2) rounded UP in fixed point with
-//      fixed point	lsb = 2^(-15). Fetch done using index_2.
-//      		Z_2 looks like z_0.z_1 z_2 ... z_15
+//	Fetch 		Z_2 := (1/A_2) rounded UP in fixed point with
+//	fixed point	lsb = 2^(-15). Fetch done using index_2.
+//			Z_2 looks like z_0.z_1 z_2 ... z_15
 //
-//      Fetch		G_2 := (1/A_2) truncated to 21 sig. bits.
-//      floating pt.
+//	Fetch		G_2 := (1/A_2) truncated to 21 sig. bits.
+//	floating pt.
 //
-//      Calculate	X_2 := X_1 * Z_2 truncated to lsb = 2^(-14)
-//      		     = 1.0 0 0 0 0 0 0 0 d_9 d_10 ... d_14
-//      		This is accomplised by integer multiplication.
-//      		It is proved that X_2 indeed always begin
-//      		with 1.00000000 in fixed point.
+//	Calculate	X_2 := X_1 * Z_2 truncated to lsb = 2^(-14)
+//			     = 1.0 0 0 0 0 0 0 0 d_9 d_10 ... d_14
+//			This is accomplised by integer multiplication.
+//			It is proved that X_2 indeed always begin
+//			with 1.00000000 in fixed point.
 //
 //
-//      Define		A_3 := 1.0 0 0 0 0 0 0 0 d_9 d_10 d_11 d_12 d_13 1.
-//      		This is 2^(-14) + X_2 truncated to lsb = 2^(-13).
+//	Define		A_3 := 1.0 0 0 0 0 0 0 0 d_9 d_10 d_11 d_12 d_13 1.
+//			This is 2^(-14) + X_2 truncated to lsb = 2^(-13).
 //
-//      Define		index_3 := [ d_9 d_10 d_11 d_12 d_13 ].
+//	Define		index_3 := [ d_9 d_10 d_11 d_12 d_13 ].
 //
-//      Fetch		G_3 := (1/A_3) truncated to 21 sig. bits.
-//      floating pt.	Fetch is done using index_3.
+//	Fetch		G_3 := (1/A_3) truncated to 21 sig. bits.
+//	floating pt.	Fetch is done using index_3.
 //
-//      Compute		G := G_1 * G_2 * G_3. 
+//	Compute		G := G_1 * G_2 * G_3. 
 //
-//      This is done exactly since each of G_j only has 21 sig. bits.
+//	This is done exactly since each of G_j only has 21 sig. bits.
 //
-//      Compute   
+//	Compute   
 //
-//      	r := (G*S_hi - 1) + G*S_lo using 2 FMA operations.
+//		r := (G*S_hi - 1) + G*S_lo   using 2 FMA operations.
 //
-//      Thus r approximates G*(S_hi + S_lo) - 1 to within a couple of
-//      rounding errors.
+//	thus, r approximates G*(S_hi+S_lo) - 1 to within a couple of 
+//	rounding errors.
 //
 //
 //  Step 2. Approximation
@@ -274,878 +326,1258 @@
 //   reduced argument just obtained. It is proved that |r| <= 1.9*2^(-13);
 //   thus logl(1+r) can be approximated by a short polynomial:
 //
-//      logl(1+r) ~=~ poly = r + Q1 r^2 + ... + Q4 r^5
+//	logl(1+r) ~=~ poly = r + Q1 r^2 + ... + Q4 r^5
 //
 //
 //  Step 3. Reconstruction
 //  ----------------------
 //
-//   This step computes the desired result of logl(X+1):
+//   This step computes the desired result of logl(X+E):
 //
-//      logl(X+1) =   logl( 2^N * (S_hi + S_lo) )
-//      	  =   N*logl(2) + logl( S_hi + S_lo) )
-//      	  =   N*logl(2) + logl(1/G) +
-//      	      logl(1 + G * ( S_hi + S_lo ) - 1 )
+//	logl(X+E)  =   logl( 2^N * (S_hi + S_lo) )
+//		  =   N*logl(2) + logl( S_hi + S_lo )
+//		  =   N*logl(2) + logl(1/G) +
+//		      logl(1 + C*(S_hi+S_lo) - 1 )
 //
 //   logl(2), logl(1/G_j) are stored as pairs of (single,double) numbers:
 //   log2_hi, log2_lo, log1byGj_hi, log1byGj_lo. The high parts are
 //   single-precision numbers and the low parts are double precision
 //   numbers. These have the property that
 //
-//      N*log2_hi + SUM ( log1byGj_hi )
+//	N*log2_hi + SUM ( log1byGj_hi )
 //
 //   is computable exactly in double-extended precision (64 sig. bits).
 //   Finally
 //
-//      Y_hi := N*log2_hi + SUM ( log1byGj_hi )
-//      Y_lo := poly_hi + [ poly_lo + 
-//              ( SUM ( log1byGj_lo ) + N*log2_lo ) ]
+//	Y_hi := N*log2_hi + SUM ( log1byGj_hi )
+//	Y_lo := poly_hi + [ poly_lo + 
+//	        ( SUM ( log1byGj_lo ) + N*log2_lo ) ]
+//      set lsb(Y_lo) to be 1
 //
 
-RODATA
-.align 64
-
-// ************* DO NOT CHANGE THE ORDER OF THESE TABLES *************
+#include "libm_support.h"
 
-// P_8, P_7, P_6, P_5, P_4, P_3, P_2, and P_1 
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
 
-LOCAL_OBJECT_START(Constants_P)
-//data4  0xEFD62B15,0xE3936754,0x00003FFB,0x00000000
-//data4  0xA5E56381,0x8003B271,0x0000BFFC,0x00000000
-//data4  0x73282DB0,0x9249248C,0x00003FFC,0x00000000
-//data4  0x47305052,0xAAAAAA9F,0x0000BFFC,0x00000000
-//data4  0xCCD17FC9,0xCCCCCCCC,0x00003FFC,0x00000000
-//data4  0x00067ED5,0x80000000,0x0000BFFD,0x00000000
-//data4  0xAAAAAAAA,0xAAAAAAAA,0x00003FFD,0x00000000
-//data4  0xFFFFFFFE,0xFFFFFFFF,0x0000BFFD,0x00000000
-data8  0xE3936754EFD62B15,0x00003FFB
-data8  0x8003B271A5E56381,0x0000BFFC
-data8  0x9249248C73282DB0,0x00003FFC
-data8  0xAAAAAA9F47305052,0x0000BFFC
-data8  0xCCCCCCCCCCD17FC9,0x00003FFC
-data8  0x8000000000067ED5,0x0000BFFD
-data8  0xAAAAAAAAAAAAAAAA,0x00003FFD
-data8  0xFFFFFFFFFFFFFFFE,0x0000BFFD
-LOCAL_OBJECT_END(Constants_P)
+// P_7, P_6, P_5, P_4, P_3, P_2, and P_1 
 
+.align 64
+Constants_P:
+ASM_TYPE_DIRECTIVE(Constants_P,@object)
+data4  0xEFD62B15,0xE3936754,0x00003FFB,0x00000000
+data4  0xA5E56381,0x8003B271,0x0000BFFC,0x00000000
+data4  0x73282DB0,0x9249248C,0x00003FFC,0x00000000
+data4  0x47305052,0xAAAAAA9F,0x0000BFFC,0x00000000
+data4  0xCCD17FC9,0xCCCCCCCC,0x00003FFC,0x00000000
+data4  0x00067ED5,0x80000000,0x0000BFFD,0x00000000
+data4  0xAAAAAAAA,0xAAAAAAAA,0x00003FFD,0x00000000
+data4  0xFFFFFFFE,0xFFFFFFFF,0x0000BFFD,0x00000000
+ASM_SIZE_DIRECTIVE(Constants_P)
+ 
 // log2_hi, log2_lo, Q_4, Q_3, Q_2, and Q_1 
 
-LOCAL_OBJECT_START(Constants_Q)
-//data4  0x00000000,0xB1721800,0x00003FFE,0x00000000 
-//data4  0x4361C4C6,0x82E30865,0x0000BFE2,0x00000000
-//data4  0x328833CB,0xCCCCCAF2,0x00003FFC,0x00000000
-//data4  0xA9D4BAFB,0x80000077,0x0000BFFD,0x00000000
-//data4  0xAAABE3D2,0xAAAAAAAA,0x00003FFD,0x00000000
-//data4  0xFFFFDAB7,0xFFFFFFFF,0x0000BFFD,0x00000000 
-data8  0xB172180000000000,0x00003FFE
-data8  0x82E308654361C4C6,0x0000BFE2
-data8  0xCCCCCAF2328833CB,0x00003FFC
-data8  0x80000077A9D4BAFB,0x0000BFFD
-data8  0xAAAAAAAAAAABE3D2,0x00003FFD
-data8  0xFFFFFFFFFFFFDAB7,0x0000BFFD
-LOCAL_OBJECT_END(Constants_Q)
-
-// 1/ln10_hi, 1/ln10_lo
-
-LOCAL_OBJECT_START(Constants_1_by_LN10)
-//data4  0x37287195,0xDE5BD8A9,0x00003FFD,0x00000000
-//data4  0xACCF70C8,0xD56EAABE,0x00003FBB,0x00000000
-data8  0xDE5BD8A937287195,0x00003FFD
-data8  0xD56EAABEACCF70C8,0x00003FBB
-LOCAL_OBJECT_END(Constants_1_by_LN10)
-
-
-// Z1 - 16 bit fixed
+.align 64
+Constants_Q:
+ASM_TYPE_DIRECTIVE(Constants_Q,@object)
+data4  0x00000000,0xB1721800,0x00003FFE,0x00000000 
+data4  0x4361C4C6,0x82E30865,0x0000BFE2,0x00000000
+data4  0x328833CB,0xCCCCCAF2,0x00003FFC,0x00000000
+data4  0xA9D4BAFB,0x80000077,0x0000BFFD,0x00000000
+data4  0xAAABE3D2,0xAAAAAAAA,0x00003FFD,0x00000000
+data4  0xFFFFDAB7,0xFFFFFFFF,0x0000BFFD,0x00000000
+ASM_SIZE_DIRECTIVE(Constants_Q)
  
-LOCAL_OBJECT_START(Constants_Z_1)
-data4  0x00008000
-data4  0x00007879
-data4  0x000071C8
-data4  0x00006BCB
-data4  0x00006667
-data4  0x00006187
-data4  0x00005D18
-data4  0x0000590C
-data4  0x00005556
-data4  0x000051EC
-data4  0x00004EC5
-data4  0x00004BDB
-data4  0x00004925
-data4  0x0000469F
-data4  0x00004445
-data4  0x00004211
-LOCAL_OBJECT_END(Constants_Z_1)
-
-// G1 and H1 - IEEE single and h1 - IEEE double
-
-LOCAL_OBJECT_START(Constants_G_H_h1)
-data4  0x3F800000,0x00000000
-data8  0x0000000000000000
-data4  0x3F70F0F0,0x3D785196
-data8  0x3DA163A6617D741C
-data4  0x3F638E38,0x3DF13843
-data8  0x3E2C55E6CBD3D5BB
-data4  0x3F579430,0x3E2FF9A0
-data8  0xBE3EB0BFD86EA5E7
-data4  0x3F4CCCC8,0x3E647FD6
-data8  0x3E2E6A8C86B12760
-data4  0x3F430C30,0x3E8B3AE7
-data8  0x3E47574C5C0739BA
-data4  0x3F3A2E88,0x3EA30C68
-data8  0x3E20E30F13E8AF2F
-data4  0x3F321640,0x3EB9CEC8
-data8  0xBE42885BF2C630BD
-data4  0x3F2AAAA8,0x3ECF9927
-data8  0x3E497F3497E577C6
-data4  0x3F23D708,0x3EE47FC5
-data8  0x3E3E6A6EA6B0A5AB
-data4  0x3F1D89D8,0x3EF8947D
-data8  0xBDF43E3CD328D9BE
-data4  0x3F17B420,0x3F05F3A1
-data8  0x3E4094C30ADB090A
-data4  0x3F124920,0x3F0F4303
-data8  0xBE28FBB2FC1FE510
-data4  0x3F0D3DC8,0x3F183EBF
-data8  0x3E3A789510FDE3FA
-data4  0x3F088888,0x3F20EC80
-data8  0x3E508CE57CC8C98F
-data4  0x3F042108,0x3F29516A
-data8  0xBE534874A223106C
-LOCAL_OBJECT_END(Constants_G_H_h1)
-
-// Z2 - 16 bit fixed
-
-LOCAL_OBJECT_START(Constants_Z_2)
-data4  0x00008000
-data4  0x00007F81
-data4  0x00007F02
-data4  0x00007E85
-data4  0x00007E08
-data4  0x00007D8D
-data4  0x00007D12
-data4  0x00007C98
-data4  0x00007C20
-data4  0x00007BA8
-data4  0x00007B31
-data4  0x00007ABB
-data4  0x00007A45
-data4  0x000079D1
-data4  0x0000795D
-data4  0x000078EB
-LOCAL_OBJECT_END(Constants_Z_2)
-
-// G2 and H2 - IEEE single and h2 - IEEE double
-
-LOCAL_OBJECT_START(Constants_G_H_h2)
-data4  0x3F800000,0x00000000
-data8  0x0000000000000000
-data4  0x3F7F00F8,0x3B7F875D
-data8  0x3DB5A11622C42273
-data4  0x3F7E03F8,0x3BFF015B
-data8  0x3DE620CF21F86ED3
-data4  0x3F7D08E0,0x3C3EE393
-data8  0xBDAFA07E484F34ED
-data4  0x3F7C0FC0,0x3C7E0586
-data8  0xBDFE07F03860BCF6
-data4  0x3F7B1880,0x3C9E75D2
-data8  0x3DEA370FA78093D6
-data4  0x3F7A2328,0x3CBDC97A
-data8  0x3DFF579172A753D0
-data4  0x3F792FB0,0x3CDCFE47
-data8  0x3DFEBE6CA7EF896B
-data4  0x3F783E08,0x3CFC15D0
-data8  0x3E0CF156409ECB43
-data4  0x3F774E38,0x3D0D874D
-data8  0xBE0B6F97FFEF71DF
-data4  0x3F766038,0x3D1CF49B
-data8  0xBE0804835D59EEE8
-data4  0x3F757400,0x3D2C531D
-data8  0x3E1F91E9A9192A74
-data4  0x3F748988,0x3D3BA322
-data8  0xBE139A06BF72A8CD
-data4  0x3F73A0D0,0x3D4AE46F
-data8  0x3E1D9202F8FBA6CF
-data4  0x3F72B9D0,0x3D5A1756
-data8  0xBE1DCCC4BA796223
-data4  0x3F71D488,0x3D693B9D
-data8  0xBE049391B6B7C239
-LOCAL_OBJECT_END(Constants_G_H_h2)
-
-// G3 and H3 - IEEE single and h3 - IEEE double 
-
-LOCAL_OBJECT_START(Constants_G_H_h3)
-data4  0x3F7FFC00,0x38800100
-data8  0x3D355595562224CD
-data4  0x3F7FF400,0x39400480
-data8  0x3D8200A206136FF6
-data4  0x3F7FEC00,0x39A00640
-data8  0x3DA4D68DE8DE9AF0
-data4  0x3F7FE400,0x39E00C41
-data8  0xBD8B4291B10238DC
-data4  0x3F7FDC00,0x3A100A21
-data8  0xBD89CCB83B1952CA
-data4  0x3F7FD400,0x3A300F22
-data8  0xBDB107071DC46826
-data4  0x3F7FCC08,0x3A4FF51C
-data8  0x3DB6FCB9F43307DB
-data4  0x3F7FC408,0x3A6FFC1D
-data8  0xBD9B7C4762DC7872
-data4  0x3F7FBC10,0x3A87F20B
-data8  0xBDC3725E3F89154A
-data4  0x3F7FB410,0x3A97F68B
-data8  0xBD93519D62B9D392
-data4  0x3F7FAC18,0x3AA7EB86
-data8  0x3DC184410F21BD9D
-data4  0x3F7FA420,0x3AB7E101
-data8  0xBDA64B952245E0A6
-data4  0x3F7F9C20,0x3AC7E701
-data8  0x3DB4B0ECAABB34B8
-data4  0x3F7F9428,0x3AD7DD7B
-data8  0x3D9923376DC40A7E
-data4  0x3F7F8C30,0x3AE7D474
-data8  0x3DC6E17B4F2083D3
-data4  0x3F7F8438,0x3AF7CBED
-data8  0x3DAE314B811D4394
-data4  0x3F7F7C40,0x3B03E1F3
-data8  0xBDD46F21B08F2DB1
-data4  0x3F7F7448,0x3B0BDE2F
-data8  0xBDDC30A46D34522B
-data4  0x3F7F6C50,0x3B13DAAA
-data8  0x3DCB0070B1F473DB
-data4  0x3F7F6458,0x3B1BD766
-data8  0xBDD65DDC6AD282FD
-data4  0x3F7F5C68,0x3B23CC5C
-data8  0xBDCDAB83F153761A
-data4  0x3F7F5470,0x3B2BC997
-data8  0xBDDADA40341D0F8F
-data4  0x3F7F4C78,0x3B33C711
-data8  0x3DCD1BD7EBC394E8
-data4  0x3F7F4488,0x3B3BBCC6
-data8  0xBDC3532B52E3E695
-data4  0x3F7F3C90,0x3B43BAC0
-data8  0xBDA3961EE846B3DE
-data4  0x3F7F34A0,0x3B4BB0F4
-data8  0xBDDADF06785778D4
-data4  0x3F7F2CA8,0x3B53AF6D
-data8  0x3DCC3ED1E55CE212
-data4  0x3F7F24B8,0x3B5BA620
-data8  0xBDBA31039E382C15
-data4  0x3F7F1CC8,0x3B639D12
-data8  0x3D635A0B5C5AF197
-data4  0x3F7F14D8,0x3B6B9444
-data8  0xBDDCCB1971D34EFC
-data4  0x3F7F0CE0,0x3B7393BC
-data8  0x3DC7450252CD7ADA
-data4  0x3F7F04F0,0x3B7B8B6D
-data8  0xBDB68F177D7F2A42
-LOCAL_OBJECT_END(Constants_G_H_h3)
-
-
-// Floating Point Registers
-
-FR_Input_X      = f8 
-
-FR_Y_hi         = f34  
-FR_Y_lo         = f35
-
-FR_Scale        = f36
-FR_X_Prime      = f37 
-FR_S_hi         = f38  
-FR_W            = f39
-FR_G            = f40
-
-FR_H            = f41
-FR_wsq          = f42 
-FR_w4           = f43
-FR_h            = f44
-FR_w6           = f45  
-
-FR_G2           = f46
-FR_H2           = f47
-FR_poly_lo      = f48
-FR_P8           = f49  
-FR_poly_hi      = f50
-
-FR_P7           = f51  
-FR_h2           = f52 
-FR_rsq          = f53  
-FR_P6           = f54
-FR_r            = f55  
-
-FR_log2_hi      = f56  
-FR_log2_lo      = f57  
-FR_p87          = f58  
-FR_p876         = f58  
-FR_p8765        = f58  
-FR_float_N      = f59 
-FR_Q4           = f60 
+// Z1 - 16 bit fixed, G1 and H1 - IEEE single 
+ 
+.align 64
+Constants_Z_G_H_h1:
+ASM_TYPE_DIRECTIVE(Constants_Z_G_H_h1,@object)
+data4  0x00008000,0x3F800000,0x00000000,0x00000000,0x00000000,0x00000000
+data4  0x00007879,0x3F70F0F0,0x3D785196,0x00000000,0x617D741C,0x3DA163A6
+data4  0x000071C8,0x3F638E38,0x3DF13843,0x00000000,0xCBD3D5BB,0x3E2C55E6
+data4  0x00006BCB,0x3F579430,0x3E2FF9A0,0x00000000,0xD86EA5E7,0xBE3EB0BF
+data4  0x00006667,0x3F4CCCC8,0x3E647FD6,0x00000000,0x86B12760,0x3E2E6A8C
+data4  0x00006187,0x3F430C30,0x3E8B3AE7,0x00000000,0x5C0739BA,0x3E47574C
+data4  0x00005D18,0x3F3A2E88,0x3EA30C68,0x00000000,0x13E8AF2F,0x3E20E30F
+data4  0x0000590C,0x3F321640,0x3EB9CEC8,0x00000000,0xF2C630BD,0xBE42885B
+data4  0x00005556,0x3F2AAAA8,0x3ECF9927,0x00000000,0x97E577C6,0x3E497F34
+data4  0x000051EC,0x3F23D708,0x3EE47FC5,0x00000000,0xA6B0A5AB,0x3E3E6A6E
+data4  0x00004EC5,0x3F1D89D8,0x3EF8947D,0x00000000,0xD328D9BE,0xBDF43E3C
+data4  0x00004BDB,0x3F17B420,0x3F05F3A1,0x00000000,0x0ADB090A,0x3E4094C3
+data4  0x00004925,0x3F124920,0x3F0F4303,0x00000000,0xFC1FE510,0xBE28FBB2
+data4  0x0000469F,0x3F0D3DC8,0x3F183EBF,0x00000000,0x10FDE3FA,0x3E3A7895
+data4  0x00004445,0x3F088888,0x3F20EC80,0x00000000,0x7CC8C98F,0x3E508CE5
+data4  0x00004211,0x3F042108,0x3F29516A,0x00000000,0xA223106C,0xBE534874
+ASM_SIZE_DIRECTIVE(Constants_Z_G_H_h1)
+ 
+// Z2 - 16 bit fixed, G2 and H2 - IEEE single 
+
+.align 64 
+Constants_Z_G_H_h2:
+ASM_TYPE_DIRECTIVE(Constants_Z_G_H_h2,@object)
+data4  0x00008000,0x3F800000,0x00000000,0x00000000,0x00000000,0x00000000
+data4  0x00007F81,0x3F7F00F8,0x3B7F875D,0x00000000,0x22C42273,0x3DB5A116
+data4  0x00007F02,0x3F7E03F8,0x3BFF015B,0x00000000,0x21F86ED3,0x3DE620CF
+data4  0x00007E85,0x3F7D08E0,0x3C3EE393,0x00000000,0x484F34ED,0xBDAFA07E
+data4  0x00007E08,0x3F7C0FC0,0x3C7E0586,0x00000000,0x3860BCF6,0xBDFE07F0
+data4  0x00007D8D,0x3F7B1880,0x3C9E75D2,0x00000000,0xA78093D6,0x3DEA370F
+data4  0x00007D12,0x3F7A2328,0x3CBDC97A,0x00000000,0x72A753D0,0x3DFF5791
+data4  0x00007C98,0x3F792FB0,0x3CDCFE47,0x00000000,0xA7EF896B,0x3DFEBE6C
+data4  0x00007C20,0x3F783E08,0x3CFC15D0,0x00000000,0x409ECB43,0x3E0CF156
+data4  0x00007BA8,0x3F774E38,0x3D0D874D,0x00000000,0xFFEF71DF,0xBE0B6F97
+data4  0x00007B31,0x3F766038,0x3D1CF49B,0x00000000,0x5D59EEE8,0xBE080483
+data4  0x00007ABB,0x3F757400,0x3D2C531D,0x00000000,0xA9192A74,0x3E1F91E9
+data4  0x00007A45,0x3F748988,0x3D3BA322,0x00000000,0xBF72A8CD,0xBE139A06
+data4  0x000079D1,0x3F73A0D0,0x3D4AE46F,0x00000000,0xF8FBA6CF,0x3E1D9202
+data4  0x0000795D,0x3F72B9D0,0x3D5A1756,0x00000000,0xBA796223,0xBE1DCCC4
+data4  0x000078EB,0x3F71D488,0x3D693B9D,0x00000000,0xB6B7C239,0xBE049391
+ASM_SIZE_DIRECTIVE(Constants_Z_G_H_h2)
+ 
+// G3 and H3 - IEEE single and h3 -IEEE double 
+
+.align 64 
+Constants_Z_G_H_h3:
+ASM_TYPE_DIRECTIVE(Constants_Z_G_H_h3,@object)
+data4  0x3F7FFC00,0x38800100,0x562224CD,0x3D355595
+data4  0x3F7FF400,0x39400480,0x06136FF6,0x3D8200A2
+data4  0x3F7FEC00,0x39A00640,0xE8DE9AF0,0x3DA4D68D
+data4  0x3F7FE400,0x39E00C41,0xB10238DC,0xBD8B4291
+data4  0x3F7FDC00,0x3A100A21,0x3B1952CA,0xBD89CCB8
+data4  0x3F7FD400,0x3A300F22,0x1DC46826,0xBDB10707
+data4  0x3F7FCC08,0x3A4FF51C,0xF43307DB,0x3DB6FCB9
+data4  0x3F7FC408,0x3A6FFC1D,0x62DC7872,0xBD9B7C47
+data4  0x3F7FBC10,0x3A87F20B,0x3F89154A,0xBDC3725E
+data4  0x3F7FB410,0x3A97F68B,0x62B9D392,0xBD93519D
+data4  0x3F7FAC18,0x3AA7EB86,0x0F21BD9D,0x3DC18441
+data4  0x3F7FA420,0x3AB7E101,0x2245E0A6,0xBDA64B95
+data4  0x3F7F9C20,0x3AC7E701,0xAABB34B8,0x3DB4B0EC
+data4  0x3F7F9428,0x3AD7DD7B,0x6DC40A7E,0x3D992337
+data4  0x3F7F8C30,0x3AE7D474,0x4F2083D3,0x3DC6E17B
+data4  0x3F7F8438,0x3AF7CBED,0x811D4394,0x3DAE314B
+data4  0x3F7F7C40,0x3B03E1F3,0xB08F2DB1,0xBDD46F21
+data4  0x3F7F7448,0x3B0BDE2F,0x6D34522B,0xBDDC30A4
+data4  0x3F7F6C50,0x3B13DAAA,0xB1F473DB,0x3DCB0070
+data4  0x3F7F6458,0x3B1BD766,0x6AD282FD,0xBDD65DDC
+data4  0x3F7F5C68,0x3B23CC5C,0xF153761A,0xBDCDAB83
+data4  0x3F7F5470,0x3B2BC997,0x341D0F8F,0xBDDADA40
+data4  0x3F7F4C78,0x3B33C711,0xEBC394E8,0x3DCD1BD7
+data4  0x3F7F4488,0x3B3BBCC6,0x52E3E695,0xBDC3532B
+data4  0x3F7F3C90,0x3B43BAC0,0xE846B3DE,0xBDA3961E
+data4  0x3F7F34A0,0x3B4BB0F4,0x785778D4,0xBDDADF06
+data4  0x3F7F2CA8,0x3B53AF6D,0xE55CE212,0x3DCC3ED1
+data4  0x3F7F24B8,0x3B5BA620,0x9E382C15,0xBDBA3103
+data4  0x3F7F1CC8,0x3B639D12,0x5C5AF197,0x3D635A0B
+data4  0x3F7F14D8,0x3B6B9444,0x71D34EFC,0xBDDCCB19
+data4  0x3F7F0CE0,0x3B7393BC,0x52CD7ADA,0x3DC74502
+data4  0x3F7F04F0,0x3B7B8B6D,0x7D7F2A42,0xBDB68F17
+ASM_SIZE_DIRECTIVE(Constants_Z_G_H_h3)
+ 
+// 
+//  Exponent Thresholds and Tiny Thresholds
+//  for 8, 11, 15, and 17 bit exponents
+// 
+//  Expo_Range             Value
+// 
+//  0 (8  bits)            2^(-126)
+//  1 (11 bits)            2^(-1022)
+//  2 (15 bits)            2^(-16382)
+//  3 (17 bits)            2^(-16382)
+// 
+//  Tiny_Table
+//  ----------
+//  Expo_Range             Value
+// 
+//  0 (8  bits)            2^(-16382)
+//  1 (11 bits)            2^(-16382)
+//  2 (15 bits)            2^(-16382)
+//  3 (17 bits)            2^(-16382)
+// 
 
-FR_p43          = f61  
-FR_p432         = f61  
-FR_p4321        = f61  
-FR_P4           = f62  
-FR_G3           = f63  
-FR_H3           = f64  
-FR_h3           = f65  
+.align 64 
+Constants_Threshold:
+ASM_TYPE_DIRECTIVE(Constants_Threshold,@object)
+data4  0x00000000,0x80000000,0x00003F81,0x00000000
+data4  0x00000000,0x80000000,0x00000001,0x00000000
+data4  0x00000000,0x80000000,0x00003C01,0x00000000
+data4  0x00000000,0x80000000,0x00000001,0x00000000
+data4  0x00000000,0x80000000,0x00000001,0x00000000
+data4  0x00000000,0x80000000,0x00000001,0x00000000
+data4  0x00000000,0x80000000,0x00000001,0x00000000
+data4  0x00000000,0x80000000,0x00000001,0x00000000
+ASM_SIZE_DIRECTIVE(Constants_Threshold)
 
-FR_Q3           = f66  
-FR_P3           = f67  
-FR_Q2           = f68 
-FR_P2           = f69  
-FR_1LN10_hi     = f70 
+.align 64
+Constants_1_by_LN10:
+ASM_TYPE_DIRECTIVE(Constants_1_by_LN10,@object)
+data4  0x37287195,0xDE5BD8A9,0x00003FFD,0x00000000
+data4  0xACCF70C8,0xD56EAABE,0x00003FBB,0x00000000
+ASM_SIZE_DIRECTIVE(Constants_1_by_LN10)
+
+FR_Input_X = f8 
+FR_Neg_One = f9
+FR_E       = f33
+FR_Em1     = f34
+FR_Y_hi    = f34  
+// Shared with Em1
+FR_Y_lo    = f35
+FR_Scale   = f36
+FR_X_Prime = f37 
+FR_Z       = f38 
+FR_S_hi    = f38  
+// Shared with Z  
+FR_W       = f39
+FR_G       = f40
+FR_wsq     = f40 
+// Shared with G 
+FR_H       = f41
+FR_w4      = f41
+// Shared with H  
+FR_h       = f42
+FR_w6      = f42  
+// Shared with h     
+FR_G_tmp   = f43
+FR_poly_lo = f43
+// Shared with G_tmp 
+FR_P8      = f43  
+// Shared with G_tmp 
+FR_H_tmp   = f44
+FR_poly_hi = f44
+  // Shared with H_tmp
+FR_P7      = f44  
+// Shared with H_tmp
+FR_h_tmp   = f45 
+FR_rsq     = f45  
+// Shared with h_tmp
+FR_P6      = f45
+// Shared with h_tmp
+FR_abs_W   = f46
+FR_r       = f46  
+// Shared with abs_W  
+FR_AA      = f47 
+FR_log2_hi = f47  
+// Shared with AA  
+FR_BB          = f48
+FR_log2_lo     = f48  
+// Shared with BB  
+FR_S_lo        = f49 
+FR_two_negN    = f50  
+FR_float_N     = f51 
+FR_Q4          = f52 
+FR_dummy       = f52  
+// Shared with Q4
+FR_P4          = f52  
+// Shared with Q4
+FR_Threshold    = f52
+// Shared with Q4
+FR_Q3          = f53  
+FR_P3          = f53  
+// Shared with Q3
+FR_Tiny        = f53  
+// Shared with Q3
+FR_Q2          = f54 
+FR_P2          = f54  
+// Shared with Q2
+FR_1LN10_hi     = f54 
+// Shared with Q2
+FR_Q1           = f55 
+FR_P1           = f55 
+// Shared with Q1 
+FR_1LN10_lo     = f55 
+// Shared with Q1 
+FR_P5           = f98 
+FR_SCALE        = f98 
+FR_Output_X_tmp = f99 
+
+GR_Expo_Range   = r32
+GR_Table_Base   = r34
+GR_Table_Base1  = r35
+GR_Table_ptr    = r36 
+GR_Index2       = r37 
+GR_signif       = r38 
+GR_X_0          = r39 
+GR_X_1          = r40 
+GR_X_2          = r41 
+GR_Z_1          = r42 
+GR_Z_2          = r43 
+GR_N            = r44 
+GR_Bias         = r45 
+GR_M            = r46 
+GR_ScaleN       = r47  
+GR_Index3       = r48 
+GR_Perturb      = r49 
+GR_Table_Scale  = r50 
 
-FR_Q1           = f71 
-FR_P1           = f72 
-FR_1LN10_lo     = f73 
-FR_P5           = f74 
-FR_rcub         = f75 
+//
+// Added for unwind support
+//
 
-FR_Output_X_tmp = f76 
-FR_Neg_One      = f77 
-FR_Z            = f78 
-FR_AA           = f79 
-FR_BB           = f80 
-FR_S_lo         = f81 
-FR_2_to_minus_N = f82 
+GR_SAVE_PFS         = r51
+GR_SAVE_B0          = r52
+GR_SAVE_GP          = r53
+GR_Parameter_X      = r54
+GR_Parameter_Y      = r55
+GR_Parameter_RESULT = r56
+GR_Parameter_TAG    = r57
 
 FR_X                = f8
 FR_Y                = f0
-FR_RESULT           = f76
-
+FR_RESULT           = f99
 
-// General Purpose Registers
+.section .text
+.proc logl#
+.global logl#
+.align 64 
+logl:
+#ifdef _LIBC
+.global __ieee754_logl
+__ieee754_logl:
+#endif 
+{ .mfi
+alloc r32 = ar.pfs,0,22,4,0
+(p0)  fnorm.s1 FR_X_Prime = FR_Input_X 
+(p0)  cmp.eq.unc  p7, p0 = r0, r0 
+}
+{ .mfi
+(p0)  cmp.ne.unc  p14, p0 = r0, r0 
+(p0)  fclass.m.unc p6, p0 =  FR_Input_X, 0x1E3 
+(p0)  cmp.ne.unc  p15, p0 = r0, r0 ;; 
+}
+{ .mfi
+ nop.m 0
+(p0)  fclass.nm.unc p10, p0 =  FR_Input_X, 0x1FF 
+ nop.i 0
+}
+{ .mfi
+nop.m 999
+(p0)  fcmp.eq.unc.s1 p8, p0 =  FR_Input_X, f0 
+ nop.i 0
+}
+{ .mfi
+	nop.m 999
+(p0)  fcmp.lt.unc.s1 p13, p0 =  FR_Input_X, f0 
+ nop.i 0
+}
+{ .mfi
+	nop.m 999
+(p0)  fcmp.eq.unc.s1 p9, p0 =  FR_Input_X, f1 
+	nop.i 999 ;;
+}
+{ .mfi
+	nop.m 999
+(p0)  fsub.s1 FR_Em1 = f0,f1 
+	nop.i 999
+}
+{ .mfb
+	nop.m 999
+(p0)  fadd FR_E = f0,f0 
+//     
+//    Create E = 0 and Em1 = -1 
+//    Check for X == 1, meaning logl(1)
+//    Check for X < 0, meaning logl(negative)
+//    Check for X == 0, meaning logl(0)
+//    Identify NatVals, NaNs, Infs. 
+//    Identify EM unsupporteds. 
+//    Identify Negative values - us S1 so as
+//    not to raise denormal operand exception 
+//    Set p15 to false for log
+//    Set p14 to false for log
+//    Set p7 true for log and log1p
+//    
+(p0)  br.cond.sptk L(LOGL_BEGIN) ;; 
+}
 
-GR_ad_p         = r33
-GR_Index1       = r34 
-GR_Index2       = r35 
-GR_signif       = r36 
-GR_X_0          = r37 
-GR_X_1          = r38 
-GR_X_2          = r39 
-GR_minus_N      = r39
-GR_Z_1          = r40 
-GR_Z_2          = r41 
-GR_N            = r42 
-GR_Bias         = r43 
-GR_M            = r44 
-GR_Index3       = r45 
-GR_exp_2tom80   = r45 
-GR_ad_p2        = r46
-GR_exp_mask     = r47 
-GR_exp_2tom7    = r48 
-GR_ad_ln10      = r49 
-GR_ad_tbl_1     = r50
-GR_ad_tbl_2     = r51
-GR_ad_tbl_3     = r52
-GR_ad_q         = r53
-GR_ad_z_1       = r54
-GR_ad_z_2       = r55
-GR_ad_z_3       = r56
-GR_minus_N      = r39
+.endp logl
+ASM_SIZE_DIRECTIVE(logl)
 
-//
-// Added for unwind support
-//
+.section .text
+.proc log10l#
+.global log10l#
+.align 64 
+log10l:
+#ifdef _LIBC
+.global __ieee754_log10l
+__ieee754_log10l:
+#endif
+{ .mfi
+alloc r32 = ar.pfs,0,22,4,0
+(p0)  fadd FR_E = f0,f0 
+      nop.i 0
+}
+{ .mfi
+      nop.m 0
+(p0)  fsub.s1 FR_Em1 = f0,f1 
+      nop.i 0
+}
+{ .mfi
+(p0)  cmp.ne.unc  p15, p0 = r0, r0 
+(p0)  fcmp.eq.unc.s1 p9, p0 =  FR_Input_X, f1 
+      nop.i 0
+}
+{ .mfi
+(p0)  cmp.eq.unc  p14, p0 = r0, r0 
+(p0)  fcmp.lt.unc.s1 p13, p0 =  FR_Input_X, f0 
+(p0)  cmp.ne.unc  p7, p0 = r0, r0 ;; 
+}
+{ .mfi
+	nop.m 999
+(p0)  fcmp.eq.unc.s1 p8, p0 =  FR_Input_X, f0 
+	nop.i 999
+}
+{ .mfi
+	nop.m 999
+(p0)  fclass.nm.unc p10, p0 =  FR_Input_X, 0x1FF 
+	nop.i 999 ;;
+}
+{ .mfi
+	nop.m 999
+(p0)  fclass.m.unc p6, p0 =  FR_Input_X, 0x1E3 
+	nop.i 999
+}
+{ .mfb
+	nop.m 999
+(p0)  fnorm.s1 FR_X_Prime = FR_Input_X 
+//     
+//    Create E = 0 and Em1 = -1 
+//    Check for X == 1, meaning logl(1)
+//    Check for X < 0, meaning logl(negative)
+//    Check for X == 0, meaning logl(0)
+//    Identify NatVals, NaNs, Infs. 
+//    Identify EM unsupporteds. 
+//    Identify Negative values - us S1 so as
+//    Identify Negative values - us S1 so as
+//    not to raise denormal operand exception 
+//    Set p15 to false for log10
+//    Set p14 to true for log10
+//    Set p7 to false for log10
+//    
+(p0)  br.cond.sptk L(LOGL_BEGIN) ;; 
+}
 
-GR_SAVE_PFS         = r50
-GR_SAVE_B0          = r51
-GR_SAVE_GP          = r52
-GR_Parameter_X      = r53
-GR_Parameter_Y      = r54
-GR_Parameter_RESULT = r55
-GR_Parameter_TAG    = r56
+.endp log10l
+ASM_SIZE_DIRECTIVE(log10l)
 
 .section .text
-GLOBAL_IEEE754_ENTRY(log1pl)
+.proc log1pl#
+.global log1pl#
+.align 64 
+log1pl:
+#ifdef _LIBC
+.global __log1pl
+__log1pl:
+#endif
 { .mfi
-      alloc r32 = ar.pfs,0,21,4,0
-      fclass.m p6, p0 =  FR_Input_X, 0x1E3  // Test for natval, nan, inf
-      nop.i 999
+alloc r32 = ar.pfs,0,22,4,0
+(p0)  fsub.s1 FR_Neg_One = f0,f1 
+(p0)  cmp.eq.unc  p7, p0 = r0, r0 
 }
 { .mfi
-      addl GR_ad_z_1 = @ltoff(Constants_Z_1#),gp
-      fma.s1 FR_Z = FR_Input_X, f1, f1      // x+1
-      nop.i 999
+(p0)  cmp.ne.unc  p14, p0 = r0, r0 
+(p0)  fnorm.s1 FR_X_Prime = FR_Input_X 
+(p0)  cmp.eq.unc  p15, p0 = r0, r0 ;; 
+}
+{ .mfi
+      nop.m 0
+(p0)  fclass.m.unc p6, p0 =  FR_Input_X, 0x1E3 
+      nop.i 0
 }
-;;
-
 { .mfi
       nop.m 999
-      fmerge.ns FR_Neg_One = f1, f1         // Form -1.0
-      nop.i 999
+(p0)  fclass.nm.unc p10, p0 =  FR_Input_X, 0x1FF 
+      nop.i 0
 }
 { .mfi
       nop.m 999
-      fnorm.s1 FR_X_Prime = FR_Input_X      // Normalize x
-      nop.i 999
+(p0)  fcmp.eq.unc.s1 p9, p0 =  FR_Input_X, f0 
+      nop.i 0 
 }
-;;
-
 { .mfi
-      ld8    GR_ad_z_1 = [GR_ad_z_1]          // Get pointer to Constants_Z_1
-      nop.f 999
-      mov GR_exp_2tom7 = 0x0fff8              // Exponent of 2^-7
+      nop.m 999
+(p0)  fadd FR_Em1 = f0,f0 
+      nop.i 999 ;;
 }
-;;
-
-{ .mfb
-      getf.sig GR_signif = FR_Z               // Get significand of x+1
-      fcmp.eq.s1 p9, p0 =  FR_Input_X, f0     // Test for x=0
-(p6)  br.cond.spnt LOG1P_special              // Branch for nan, inf, natval
+{ .mfi
+	nop.m 999
+(p0)  fadd FR_E = f0,f1 
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      add   GR_ad_tbl_1 = 0x040, GR_ad_z_1    // Point to Constants_G_H_h1
-      fcmp.lt.s1 p13, p0 =  FR_X_Prime, FR_Neg_One // Test for x<-1
-      add   GR_ad_p = -0x100, GR_ad_z_1       // Point to Constants_P
+	nop.m 999
+(p0)  fcmp.eq.unc.s1 p8, p0 =  FR_Input_X, FR_Neg_One 
+	nop.i 999
 }
 { .mfi
-      add   GR_ad_z_2 = 0x140, GR_ad_z_1      // Point to Constants_Z_2
-      nop.f 999
-      add   GR_ad_tbl_2 = 0x180, GR_ad_z_1    // Point to Constants_G_H_h2
+	nop.m 999
+(p0)  fcmp.lt.unc.s1 p13, p0 =  FR_Input_X, FR_Neg_One 
+	nop.i 999
 }
-;;
-
+L(LOGL_BEGIN): 
 { .mfi
-      add   GR_ad_q = 0x080, GR_ad_p          // Point to Constants_Q
-      fcmp.eq.s1 p8, p0 =  FR_X_Prime, FR_Neg_One // Test for x=-1
-      extr.u GR_Index1 = GR_signif, 59, 4     // Get high 4 bits of signif
+	nop.m 999
+(p0)  fadd.s1 FR_Z = FR_X_Prime, FR_E 
+	nop.i 999
 }
-{ .mfb
-      add   GR_ad_tbl_3 = 0x280, GR_ad_z_1    // Point to Constants_G_H_h3
-      nop.f 999
-(p9)  br.ret.spnt  b0                         // Exit if x=0, return input
+{ .mlx
+	nop.m 999
+(p0)  movl GR_Table_Scale = 0x0000000000000018 ;; 
 }
-;;
-
-{ .mfi
-      shladd GR_ad_z_1 = GR_Index1, 2, GR_ad_z_1  // Point to Z_1
-      fclass.nm p10, p0 =  FR_Input_X, 0x1FF  // Test for unsupported
-      extr.u GR_X_0 = GR_signif, 49, 15       // Get high 15 bits of significand
+{ .mmi
+	nop.m 999
+	nop.m 999
+//     
+//    Create E = 1 and Em1 = 0 
+//    Check for X == 0, meaning logl(1+0)
+//    Check for X < -1, meaning logl(negative)
+//    Check for X == -1, meaning logl(0)
+//    Normalize x 
+//    Identify NatVals, NaNs, Infs. 
+//    Identify EM unsupporteds. 
+//    Identify Negative values - us S1 so as
+//    not to raise denormal operand exception 
+//    Set p15 to true for log1p
+//    Set p14 to false for log1p
+//    Set p7 true for log and log1p
+//    
+(p0)  addl GR_Table_Base = @ltoff(Constants_Z_G_H_h1#),gp
 }
 { .mfi
-      ldfe FR_P8 = [GR_ad_p],16               // Load P_8 for near1 path
-      fsub.s1 FR_W = FR_X_Prime, f0           // W = x
-      add   GR_ad_ln10 = 0x060, GR_ad_q       // Point to Constants_1_by_LN10
+      nop.m 999
+(p0)  fmax.s1 FR_AA = FR_X_Prime, FR_E 
+      nop.i 999 ;;
 }
-;;
-
 { .mfi
-      ld4 GR_Z_1 = [GR_ad_z_1]                // Load Z_1
-      fmax.s1  FR_AA = FR_X_Prime, f1         // For S_lo, form AA = max(X,1.0)
-      mov GR_exp_mask = 0x1FFFF               // Create exponent mask
+      ld8    GR_Table_Base = [GR_Table_Base]
+(p0)  fmin.s1 FR_BB = FR_X_Prime, FR_E 
+      nop.i 999
+}
+{ .mfb
+      nop.m 999
+(p0)  fadd.s1 FR_W = FR_X_Prime, FR_Em1 
+//     
+//    Begin load of constants base
+//    FR_Z = Z = |x| + E 
+//    FR_W = W = |x| + Em1
+//    AA = fmax(|x|,E)
+//    BB = fmin(|x|,E)
+//
+(p6)  br.cond.spnt L(LOGL_64_special) ;; 
 }
 { .mib
-      shladd GR_ad_tbl_1 = GR_Index1, 4, GR_ad_tbl_1  // Point to G_1
-      mov GR_Bias = 0x0FFFF                   // Create exponent bias
-(p13) br.cond.spnt LOG1P_LT_Minus_1           // Branch if x<-1
+	nop.m 999
+	nop.i 999
+(p10) br.cond.spnt L(LOGL_64_unsupported) ;; 
 }
-;;
-
-{ .mfb
-      ldfps  FR_G, FR_H = [GR_ad_tbl_1],8     // Load G_1, H_1
-      fmerge.se FR_S_hi =  f1,FR_Z            // Form |x+1|
-(p8)  br.cond.spnt LOG1P_EQ_Minus_1           // Branch if x=-1
+{ .mib
+	nop.m 999
+	nop.i 999
+(p13) br.cond.spnt L(LOGL_64_negative) ;; 
 }
-;;
-
-{ .mmb
-      getf.exp GR_N =  FR_Z                   // Get N = exponent of x+1
-      ldfd  FR_h = [GR_ad_tbl_1]              // Load h_1
-(p10) br.cond.spnt LOG1P_unsupported          // Branch for unsupported type
+{ .mib
+(p0)  getf.sig GR_signif = FR_Z 
+	nop.i 999
+(p9)  br.cond.spnt L(LOGL_64_one) ;; 
 }
-;;
-
-{ .mfi
-      ldfe FR_log2_hi = [GR_ad_q],16          // Load log2_hi
-      fcmp.eq.s0 p8, p0 =  FR_Input_X, f0     // Dummy op to flag denormals
-      pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15    // Get bits 30-15 of X_0 * Z_1
+{ .mib
+	nop.m 999
+	nop.i 999
+(p8)  br.cond.spnt L(LOGL_64_zero) ;; 
 }
-;;
-
+{ .mfi
+(p0)  getf.exp GR_N =  FR_Z 
+//   
+//    Raise possible denormal operand exception 
+//    Create Bias
+// 
+//    This function computes ln( x + e ) 
+//    Input  FR 1: FR_X   = FR_Input_X          
+//    Input  FR 2: FR_E   = FR_E
+//    Input  FR 3: FR_Em1 = FR_Em1 
+//    Input  GR 1: GR_Expo_Range = GR_Expo_Range = 1
+//    Output FR 4: FR_Y_hi  
+//    Output FR 5: FR_Y_lo  
+//    Output FR 6: FR_Scale  
+//    Output PR 7: PR_Safe  
 //
-//    For performance, don't use result of pmpyshr2.u for 4 cycles.
+(p0)  fsub.s1 FR_S_lo = FR_AA, FR_Z 
 //
+//    signif = getf.sig(Z)
+//    abs_W = fabs(w)
+//
+(p0)  extr.u GR_Table_ptr = GR_signif, 59, 4 ;; 
+}
+{ .mfi
+	nop.m 999
+(p0)  fmerge.se FR_S_hi =  f1,FR_Z 
+(p0)  extr.u GR_X_0 = GR_signif, 49, 15  
+}
 { .mmi
-      ldfe FR_log2_lo = [GR_ad_q],16          // Load log2_lo
-      sub GR_N = GR_N, GR_Bias 
-      mov GR_exp_2tom80 = 0x0ffaf             // Exponent of 2^-80
+       nop.m 999
+       nop.m 999
+(p0)  addl GR_Table_Base1 = @ltoff(Constants_Z_G_H_h2#),gp ;; 
+}
+{ .mlx
+      ld8    GR_Table_Base1 = [GR_Table_Base1]
+(p0)  movl GR_Bias = 0x000000000000FFFF ;; 
 }
-;;
-
 { .mfi
-      ldfe FR_Q4 = [GR_ad_q],16               // Load Q4
-      fms.s1  FR_S_lo = FR_AA, f1, FR_Z       // Form S_lo = AA - Z 
-      sub GR_minus_N = GR_Bias, GR_N          // Form exponent of 2^(-N)
+	nop.m 999
+(p0)  fabs FR_abs_W =  FR_W 
+(p0)  pmpyshr2.u GR_Table_ptr = GR_Table_ptr,GR_Table_Scale,0 
 }
-;;
-
-{ .mmf
-      ldfe FR_Q3 = [GR_ad_q],16               // Load Q3
-      setf.sig FR_float_N = GR_N   // Put integer N into rightmost significand
-      fmin.s1  FR_BB = FR_X_Prime, f1         // For S_lo, form BB = min(X,1.0)
+{ .mfi
+	nop.m 999
+//    
+//    Branch out for special input values 
+//    
+(p0)  fcmp.lt.unc.s0 p8, p0 =  FR_Input_X, f0 
+	nop.i 999 ;;
 }
-;;
-
+{ .mfi
+	nop.m 999
+//
+//    X_0 = extr.u(signif,49,15)
+//    Index1 = extr.u(signif,59,4)
+//
+(p0)  fadd.s1 FR_S_lo = FR_S_lo, FR_BB 
+	nop.i 999 ;;
+}
+{ .mii
+	nop.m 999
+	nop.i 999 ;;
+//
+//    Offset_to_Z1 = 24 * Index1
+//    For performance, don't use result
+//    for 3 or 4 cycles.
+//
+(p0)  add GR_Table_ptr = GR_Table_ptr, GR_Table_Base ;; 
+}
+//
+//    Add Base to Offset for Z1
+//    Create Bias
 { .mmi
-      getf.exp GR_M = FR_W                    // Get signexp of w = x
-      ldfe FR_Q2 = [GR_ad_q],16               // Load Q2
-      extr.u GR_Index2 = GR_X_1, 6, 4         // Extract bits 6-9 of X_1 
+(p0)  ld4 GR_Z_1 = [GR_Table_ptr],4 ;; 
+(p0)  ldfs  FR_G = [GR_Table_ptr],4 
+	nop.i 999 ;;
 }
-;;
-
 { .mmi
-      ldfe FR_Q1 = [GR_ad_q]                  // Load Q1
-      shladd GR_ad_z_2 = GR_Index2, 2, GR_ad_z_2  // Point to Z_2
-      add GR_ad_p2  = 0x30,GR_ad_p            // Point to P_4
+(p0)  ldfs  FR_H = [GR_Table_ptr],8 ;; 
+(p0)  ldfd  FR_h = [GR_Table_ptr],0 
+(p0)  pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15 
+}
+//
+//    Load Z_1 
+//    Get Base of Table2 
+//
+{ .mfi
+(p0)  getf.exp GR_M = FR_abs_W 
+	nop.f 999
+	nop.i 999 ;;
+}
+{ .mii
+	nop.m 999
+	nop.i 999 ;;
+//
+//    M = getf.exp(abs_W)
+//    S_lo = AA - Z
+//    X_1 = pmpyshr2(X_0,Z_1,15)
+//
+(p0)  sub GR_M = GR_M, GR_Bias ;; 
+}
+//     
+//    M = M - Bias
+//    Load G1
+//    N = getf.exp(Z)
+//
+{ .mii
+(p0)  cmp.gt.unc  p11, p0 =  -80, GR_M 
+(p0)  cmp.gt.unc  p12, p0 =  -7, GR_M ;; 
+(p0)  extr.u GR_Index2 = GR_X_1, 6, 4 ;; 
+}
+{ .mib
+	nop.m 999
+//
+//    if -80 > M, set p11
+//    Index2 = extr.u(X_1,6,4)
+//    if -7  > M, set p12
+//    Load H1
+//
+(p0)  pmpyshr2.u GR_Index2 = GR_Index2,GR_Table_Scale,0 
+(p11) br.cond.spnt L(log1pl_small) ;; 
+}
+{ .mib
+      nop.m 999
+	nop.i 999
+(p12) br.cond.spnt L(log1pl_near) ;; 
+}
+{ .mii
+(p0)  sub GR_N = GR_N, GR_Bias 
+//
+//    poly_lo = r * poly_lo 
+//
+(p0)  add GR_Perturb = 0x1, r0 ;; 
+(p0)  sub GR_ScaleN = GR_Bias, GR_N  
+}
+{ .mii
+(p0)  setf.sig FR_float_N = GR_N 
+	nop.i 999 ;;
+//
+//    Prepare Index2 - pmpyshr2.u(X_1,Z_2,15)
+//    Load h1
+//    S_lo = S_lo + BB 
+//    Branch for -80 > M
+//   
+(p0)  add GR_Index2 = GR_Index2, GR_Table_Base1
 }
-;;
-
 { .mmi
-      ld4 GR_Z_2 = [GR_ad_z_2]                // Load Z_2
-      shladd GR_ad_tbl_2 = GR_Index2, 4, GR_ad_tbl_2  // Point to G_2
-      and GR_M = GR_exp_mask, GR_M            // Get exponent of w = x
+(p0)  setf.exp FR_two_negN = GR_ScaleN 
+      nop.m 999
+(p0)  addl GR_Table_Base = @ltoff(Constants_Z_G_H_h3#),gp ;; 
 }
-;;
-
+//
+//    Index2 points to Z2
+//    Branch for -7 > M
+//
+{ .mmb
+(p0)  ld4 GR_Z_2 = [GR_Index2],4 
+(p0)  ld8 GR_Table_Base = [GR_Table_Base] 
+	nop.b 999 ;;
+}
+(p0)  nop.i 999
+//
+//    Load Z_2
+//    N = N - Bias
+//    Tablebase points to Table3
+//
 { .mmi
-      ldfps  FR_G2, FR_H2 = [GR_ad_tbl_2],8   // Load G_2, H_2
-      cmp.lt  p8, p9 =  GR_M, GR_exp_2tom7    // Test |x| < 2^-7
-      cmp.lt  p7, p0 =  GR_M, GR_exp_2tom80   // Test |x| < 2^-80
+(p0)  ldfs  FR_G_tmp = [GR_Index2],4 ;; 
+//
+//    Load G_2
+//    pmpyshr2  X_2= (X_1,Z_2,15)
+//    float_N = setf.sig(N)
+//    ScaleN = Bias - N
+//
+(p0)  ldfs  FR_H_tmp = [GR_Index2],8 
+	nop.i 999 ;;
 }
-;;
-
-// Small path is separate code
-//  p7 is for the small path: |x| < 2^-80
-// near1 and regular paths are merged.
-//  p8 is for the near1 path: |x| < 2^-7
-//  p9 is for regular path:   |x| >= 2^-7
-
+//
+//    Load H_2
+//    two_negN = setf.exp(scaleN)
+//    G = G_1 * G_2
+//
 { .mfi
-      ldfd  FR_h2 = [GR_ad_tbl_2]             // Load h_2
-      nop.f 999
-      nop.i 999
+(p0)  ldfd  FR_h_tmp = [GR_Index2],0 
+	nop.f 999
+(p0)  pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 ;; 
 }
-{ .mfb
-(p9)  setf.exp FR_2_to_minus_N = GR_minus_N   // Form 2^(-N)
-(p7)  fnma.s0  f8 = FR_X_Prime, FR_X_Prime, FR_X_Prime // Result x - x*x
-(p7)  br.ret.spnt  b0                         // Branch if |x| < 2^-80
+{ .mii
+	nop.m 999
+(p0)  extr.u GR_Index3 = GR_X_2, 1, 5 ;; 
+//
+//    Load h_2
+//    H = H_1 + H_2 
+//    h = h_1 + h_2 
+//    Index3 = extr.u(X_2,1,5)
+//
+(p0)  shladd GR_Index3 = GR_Index3,4,GR_Table_Base 
 }
-;;
-
 { .mmi
-(p8)  ldfe FR_P7 = [GR_ad_p],16               // Load P_7 for near1 path
-(p8)  ldfe FR_P4 = [GR_ad_p2],16              // Load P_4 for near1 path
-(p9)  pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15    // Get bits 30-15 of X_1 * Z_2
+	nop.m 999
+	nop.m 999
+//
+//    float_N = fcvt.xf(float_N)
+//    load G3
+//
+(p0)  addl GR_Table_Base = @ltoff(Constants_Q#),gp ;; 
 }
-;;
+{ .mmi
+      nop.m 999
+      ld8    GR_Table_Base = [GR_Table_Base]
+      nop.i 999
+};;
 
+{ .mfi
+(p0)  ldfe FR_log2_hi = [GR_Table_Base],16 
+(p0)  fmpy.s1 FR_S_lo = FR_S_lo, FR_two_negN 
+	nop.i 999 ;;
+}
+{ .mmf
+	nop.m 999
 //
-//    For performance, don't use result of pmpyshr2.u for 4 cycles.
+//    G = G3 * G
+//    Load h3
+//    Load log2_hi
+//    H = H + H3
 //
+(p0)  ldfe FR_log2_lo = [GR_Table_Base],16 
+(p0)  fmpy.s1 FR_G = FR_G, FR_G_tmp ;; 
+}
 { .mmf
-(p8)  ldfe FR_P6 = [GR_ad_p],16               // Load P_6 for near1 path
-(p8)  ldfe FR_P3 = [GR_ad_p2],16              // Load P_3 for near1 path
-(p9)  fma.s1  FR_S_lo = FR_S_lo, f1, FR_BB    // S_lo = S_lo + BB
+(p0)  ldfs  FR_G_tmp = [GR_Index3],4 
+//
+//    h = h + h3
+//    r = G * S_hi + 1 
+//    Load log2_lo
+//
+(p0)  ldfe FR_Q4 = [GR_Table_Base],16 
+(p0)  fadd.s1 FR_h = FR_h, FR_h_tmp ;; 
+}
+{ .mfi
+(p0)  ldfe FR_Q3 = [GR_Table_Base],16 
+(p0)  fadd.s1 FR_H = FR_H, FR_H_tmp 
+	nop.i 999 ;;
 }
-;;
-
 { .mmf
-(p8)  ldfe FR_P5 = [GR_ad_p],16               // Load P_5 for near1 path
-(p8)  ldfe FR_P2 = [GR_ad_p2],16              // Load P_2 for near1 path
-(p8)  fmpy.s1 FR_wsq = FR_W, FR_W             // wsq = w * w for near1 path
+(p0)  ldfs  FR_H_tmp = [GR_Index3],4 
+(p0)  ldfe FR_Q2 = [GR_Table_Base],16 
+//
+//    Comput Index for Table3
+//    S_lo = S_lo * two_negN
+//
+(p0)  fcvt.xf FR_float_N = FR_float_N ;; 
 }
-;;
-
-{ .mmi
-(p8)  ldfe FR_P1 = [GR_ad_p2],16 ;;           // Load P_1 for near1 path
-      nop.m 999
-(p9)  extr.u GR_Index3 = GR_X_2, 1, 5         // Extract bits 1-5 of X_2
+//
+//    If S_lo == 0, set p8 false
+//    Load H3
+//    Load ptr to table of polynomial coeff.
+//
+{ .mmf
+(p0)  ldfd  FR_h_tmp = [GR_Index3],0 
+(p0)  ldfe FR_Q1 = [GR_Table_Base],0 
+(p0)  fcmp.eq.unc.s1 p0, p8 =  FR_S_lo, f0 ;; 
 }
-;;
-
 { .mfi
-(p9)  shladd GR_ad_tbl_3 = GR_Index3, 4, GR_ad_tbl_3  // Point to G_3
-(p9)  fcvt.xf FR_float_N = FR_float_N
-      nop.i 999
+	nop.m 999
+(p0)  fmpy.s1 FR_G = FR_G, FR_G_tmp 
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-(p9)  ldfps  FR_G3, FR_H3 = [GR_ad_tbl_3],8   // Load G_3, H_3
-      nop.f 999
-      nop.i 999
+	nop.m 999
+(p0)  fadd.s1 FR_H = FR_H, FR_H_tmp 
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-(p9)  ldfd  FR_h3 = [GR_ad_tbl_3]             // Load h_3
-(p9)  fmpy.s1 FR_G = FR_G, FR_G2              // G = G_1 * G_2
-      nop.i 999
+	nop.m 999
+(p0)  fms.s1 FR_r = FR_G, FR_S_hi, f1 
+	nop.i 999
 }
 { .mfi
-      nop.m 999
-(p9)  fadd.s1 FR_H = FR_H, FR_H2              // H = H_1 + H_2
-      nop.i 999
+	nop.m 999
+(p0)  fadd.s1 FR_h = FR_h, FR_h_tmp 
+	nop.i 999 ;;
 }
-;;
-
-{ .mmf
-      nop.m 999
-      nop.m 999
-(p9)  fadd.s1 FR_h = FR_h, FR_h2              // h = h_1 + h_2
+{ .mfi
+	nop.m 999
+(p0)  fma.s1 FR_Y_hi = FR_float_N, FR_log2_hi, FR_H 
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-(p8)  fmpy.s1 FR_w4 = FR_wsq, FR_wsq          // w4 = w^4 for near1 path
-      nop.i 999
+	nop.m 999
+//
+//    Load Q4 
+//    Load Q3 
+//    Load Q2 
+//    Load Q1 
+//
+(p8) fma.s1 FR_r = FR_G, FR_S_lo, FR_r 
+	nop.i 999
 }
 { .mfi
-      nop.m 999
-(p8)  fma.s1 FR_p87 = FR_W, FR_P8, FR_P7      // p87 = w * P8 + P7
-      nop.i 999
+	nop.m 999
+//
+//    poly_lo = r * Q4 + Q3
+//    rsq = r* r
+//
+(p0)  fma.s1 FR_h = FR_float_N, FR_log2_lo, FR_h 
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-(p9)  fma.s1  FR_S_lo = FR_S_lo, FR_2_to_minus_N, f0 // S_lo = S_lo * 2^(-N)
-      nop.i 999
+	nop.m 999
+//
+//    If (S_lo!=0) r = s_lo * G + r
+//
+(p0)  fma.s1 FR_poly_lo = FR_r, FR_Q4, FR_Q3 
+	nop.i 999
 }
+//
+//    Create a 0x00000....01
+//    poly_lo = poly_lo * rsq + h
+//
 { .mfi
-      nop.m 999
-(p8)  fma.s1 FR_p43 = FR_W, FR_P4, FR_P3      // p43 = w * P4 + P3
-      nop.i 999
+(p0)  setf.sig FR_dummy = GR_Perturb 
+(p0)  fmpy.s1 FR_rsq = FR_r, FR_r 
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-(p9)  fmpy.s1 FR_G = FR_G, FR_G3              // G = (G_1 * G_2) * G_3
-      nop.i 999
+	nop.m 999
+//
+//    h = N * log2_lo + h 
+//    Y_hi = n * log2_hi + H 
+//
+(p0)  fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_Q2 
+	nop.i 999
 }
 { .mfi
-      nop.m 999
-(p9)  fadd.s1 FR_H = FR_H, FR_H3              // H = (H_1 + H_2) + H_3
-      nop.i 999
+	nop.m 999
+(p0)  fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r 
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-(p9)  fadd.s1 FR_h = FR_h, FR_h3              // h = (h_1 + h_2) + h_3
-      nop.i 999
+	nop.m 999
+//
+//    poly_lo = r * poly_o + Q2 
+//    poly_hi = Q1 * rsq + r 
+//
+(p0)  fmpy.s1 FR_poly_lo = FR_poly_lo, FR_r 
+	nop.i 999 ;;
 }
 { .mfi
+	nop.m 999
+(p0)  fma.s1 FR_poly_lo = FR_poly_lo, FR_rsq, FR_h 
+	nop.i 999 ;;
+}
+{ .mfb
+	nop.m 999
+(p0)  fadd.s1 FR_Y_lo = FR_poly_hi, FR_poly_lo 
+//
+//    Create the FR for a binary "or"
+//    Y_lo = poly_hi + poly_lo
+//
+// (p0)  for FR_dummy = FR_Y_lo,FR_dummy ;;
+//
+//    Turn the lsb of Y_lo ON
+//
+// (p0)  fmerge.se FR_Y_lo =  FR_Y_lo,FR_dummy ;;
+//
+//    Merge the new lsb into Y_lo, for alone doesn't
+//
+(p0)  br.cond.sptk LOGL_main ;; 
+}
+L(log1pl_near): 
+{ .mmi
+	nop.m 999
+	nop.m 999
+//    /*******************************************************/
+//    /*********** Branch log1pl_near  ************************/
+//    /*******************************************************/
+(p0)  addl GR_Table_Base = @ltoff(Constants_P#),gp ;; 
+}
+{ .mmi
       nop.m 999
-(p8)  fmpy.s1 FR_w6 = FR_w4, FR_wsq           // w6 = w^6 for near1 path
+      ld8    GR_Table_Base = [GR_Table_Base]
       nop.i 999
+};;
+//
+//    Load base address of poly. coeff.
+//
+{ .mmb
+(p0)  add GR_Table_ptr = 0x40,GR_Table_Base  
+//
+//    Address tables with separate pointers 
+//
+(p0)  ldfe FR_P8 = [GR_Table_Base],16 
+	nop.b 999 ;;
+}
+{ .mmb
+(p0)  ldfe FR_P4 = [GR_Table_ptr],16 
+//
+//    Load P4
+//    Load P8
+//
+(p0)  ldfe FR_P7 = [GR_Table_Base],16 
+	nop.b 999 ;;
+}
+{ .mmf
+(p0)  ldfe FR_P3 = [GR_Table_ptr],16 
+//
+//    Load P3
+//    Load P7
+//
+(p0)  ldfe FR_P6 = [GR_Table_Base],16 
+(p0)  fmpy.s1 FR_wsq = FR_W, FR_W ;; 
 }
-;;
-
 { .mfi
-      nop.m 999
-(p8)  fma.s1 FR_p432 = FR_W, FR_p43, FR_P2    // p432 = w * p43 + P2
-      nop.i 999
+(p0)  ldfe FR_P2 = [GR_Table_ptr],16 
+	nop.f 999
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p8)  fma.s1 FR_p876 = FR_W, FR_p87, FR_P6    // p876 = w * p87 + P6
-      nop.i 999
+	nop.m 999
+(p0)  fma.s1 FR_Y_hi = FR_W, FR_P4, FR_P3 
+	nop.i 999
 }
-;;
-
+//
+//    Load P2
+//    Load P6
+//    Wsq = w * w
+//    Y_hi = p4 * w + p3
+//
 { .mfi
-      nop.m 999
-(p9)  fms.s1 FR_r = FR_G, FR_S_hi, f1         // r = G * S_hi - 1
-      nop.i 999
+(p0)  ldfe FR_P5 = [GR_Table_Base],16 
+(p0)  fma.s1 FR_Y_lo = FR_W, FR_P8, FR_P7 
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p9)  fma.s1 FR_Y_hi = FR_float_N, FR_log2_hi, FR_H // Y_hi = N * log2_hi + H
-      nop.i 999
+(p0)  ldfe FR_P1 = [GR_Table_ptr],16 
+//
+//    Load P1
+//    Load P5
+//    Y_lo = p8 * w + P7
+//
+(p0)  fmpy.s1 FR_w4 = FR_wsq, FR_wsq 
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-(p9)  fma.s1 FR_h = FR_float_N, FR_log2_lo, FR_h  // h = N * log2_lo + h
-      nop.i 999
+	nop.m 999
+(p0)  fma.s1 FR_Y_hi = FR_W, FR_Y_hi, FR_P2 
+	nop.i 999
 }
-;;
-
 { .mfi
-      nop.m 999
-(p9)  fma.s1 FR_r = FR_G, FR_S_lo, FR_r        // r = G * S_lo + (G * S_hi - 1)
-      nop.i 999
+	nop.m 999
+(p0)  fma.s1 FR_Y_lo = FR_W, FR_Y_lo, FR_P6 
+(p0)  add GR_Perturb = 0x1, r0 ;; 
 }
-;;
-
 { .mfi
-      nop.m 999
-(p8)  fma.s1 FR_p4321 = FR_W, FR_p432, FR_P1      // p4321 = w * p432 + P1
-      nop.i 999
+	nop.m 999
+//
+//    w4 = w2 * w2 
+//    Y_hi = y_hi * w + p2 
+//    Y_lo = y_lo * w + p6 
+//    Create perturbation bit
+//
+(p0)  fmpy.s1 FR_w6 = FR_w4, FR_wsq 
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p8)  fma.s1 FR_p8765 = FR_W, FR_p876, FR_P5      // p8765 = w * p876 + P5
-      nop.i 999
+	nop.m 999
+(p0)  fma.s1 FR_Y_hi = FR_W, FR_Y_hi, FR_P1 
+	nop.i 999
 }
-;;
-
+//
+//    Y_hi = y_hi * w + p1 
+//    w6 = w4 * w2 
+//
 { .mfi
-      nop.m 999
-(p9)  fma.s1 FR_poly_lo = FR_r, FR_Q4, FR_Q3      // poly_lo = r * Q4 + Q3
-      nop.i 999
+(p0)  setf.sig FR_Q4 = GR_Perturb 
+(p0)  fma.s1 FR_Y_lo = FR_W, FR_Y_lo, FR_P5 
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p9)  fmpy.s1 FR_rsq = FR_r, FR_r                 // rsq = r * r
-      nop.i 999
+	nop.m 999
+(p0)  fma.s1 FR_dummy = FR_wsq,FR_Y_hi, f0 
+	nop.i 999
 }
-;;
-
 { .mfi
-      nop.m 999
-(p8)  fma.s1 FR_Y_lo = FR_wsq, FR_p4321, f0       // Y_lo = wsq * p4321
-      nop.i 999
+	nop.m 999
+(p0)  fma.s1 FR_Y_hi = FR_W,f1,f0 
+	nop.i 999
+};;
+{ .mfb
+	nop.m 999
+//
+//    Y_hi = w 
+//    Y_lo = y_lo * w + p5 
+//
+(p0)  fma.s1 FR_Y_lo = FR_w6, FR_Y_lo,FR_dummy 
+//
+//    Y_lo = y_lo * w6   + y_high order part. 
+//
+//    performance
+//
+(p0)  br.cond.sptk LOGL_main ;; 
+}
+L(log1pl_small): 
+{ .mmi
+	nop.m 999
+//  /*******************************************************/
+//  /*********** Branch log1pl_small  ***********************/
+//  /*******************************************************/
+(p0)  addl GR_Table_Base = @ltoff(Constants_Threshold#),gp
 }
 { .mfi
       nop.m 999
-(p8)  fma.s1 FR_Y_hi = FR_W, f1, f0               // Y_hi = w for near1 path
-      nop.i 999
+(p0)  mov FR_Em1 = FR_W 
+(p0)  cmp.eq.unc  p7, p0 = r0, r0 ;; 
+}
+{ .mlx
+      ld8    GR_Table_Base = [GR_Table_Base]
+(p0)  movl GR_Expo_Range = 0x0000000000000004 ;; 
+}
+//
+//    Set Safe to true
+//    Set Expo_Range = 0 for single
+//    Set Expo_Range = 2 for double 
+//    Set Expo_Range = 4 for double-extended 
+//
+{ .mmi
+(p0)  shladd GR_Table_Base = GR_Expo_Range,4,GR_Table_Base ;; 
+(p0)  ldfe FR_Threshold = [GR_Table_Base],16 
+	nop.i 999
+}
+{ .mlx
+	nop.m 999
+(p0)  movl GR_Bias = 0x000000000000FF9B ;; 
 }
-;;
-
 { .mfi
-      nop.m 999
-(p9)  fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_Q2 // poly_lo = poly_lo * r + Q2
-      nop.i 999
+(p0)  ldfe FR_Tiny = [GR_Table_Base],0 
+	nop.f 999
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p9)  fma.s1 FR_rcub = FR_rsq, FR_r, f0           // rcub = r^3
-      nop.i 999
+	nop.m 999
+(p0)  fcmp.gt.unc.s1 p13, p12 =  FR_abs_W, FR_Threshold 
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-(p8)  fma.s1 FR_Y_lo = FR_w6, FR_p8765,FR_Y_lo // Y_lo = w6 * p8765 + w2 * p4321
-      nop.i 999
+	nop.m 999
+(p13) fnmpy.s1 FR_Y_lo = FR_W, FR_W 
+	nop.i 999
 }
-;;
-
 { .mfi
-      nop.m 999
-(p9)  fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r     // poly_hi = Q1 * rsq + r
-      nop.i 999
+	nop.m 999
+(p13) fadd FR_SCALE = f0, f1 
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-(p9)  fma.s1 FR_poly_lo = FR_poly_lo, FR_rcub, FR_h // poly_lo = poly_lo*r^3 + h
-      nop.i 999
+	nop.m 999
+(p12) fsub.s1 FR_Y_lo = f0, FR_Tiny 
+(p12) cmp.ne.unc  p7, p0 = r0, r0 
 }
-;;
-
 { .mfi
-      nop.m 999
-(p9)  fadd.s1 FR_Y_lo = FR_poly_hi, FR_poly_lo    // Y_lo = poly_hi + poly_lo 
-      nop.i 999
+(p12) setf.exp FR_SCALE = GR_Bias 
+	nop.f 999
+	nop.i 999 ;;
 }
-;;
-
-// Remainder of code is common for near1 and regular paths
 { .mfb
-      nop.m 999
-      fadd.s0  f8 = FR_Y_lo,FR_Y_hi               // Result=Y_lo+Y_hi
-      br.ret.sptk   b0                       // Common exit for 2^-80 < x < inf
+	nop.m 999
+//
+//    Set p7 to SAFE = FALSE
+//    Set Scale = 2^-100 
+//
+(p0)  fma.s0 f8 = FR_Y_lo,FR_SCALE,FR_Y_hi
+(p0)  br.ret.sptk   b0 ;; 
 }
-;;
-
-
-// Here if x=-1
-LOG1P_EQ_Minus_1: 
+L(LOGL_64_one): 
+{ .mfb
+	nop.m 999
+(p0)  fmpy.s0 f8 = FR_Input_X, f0 
+(p0)  br.ret.sptk   b0 ;; 
+}
+//    
+//    Raise divide by zero for +/-0 input.
+//    
+L(LOGL_64_zero): 
+{ .mfi
+(p0)  mov   GR_Parameter_TAG = 0
 //
-//    If x=-1 raise divide by zero and return -inf
+//    If we have logl(1), log10l(1) or log1pl(0), return 0.
 //  
-{ .mfi
-      mov   GR_Parameter_TAG = 138
-      fsub.s1 FR_Output_X_tmp = f0, f1 
-      nop.i 999
+(p0)  fsub.s0 FR_Output_X_tmp = f0, f1 
+	nop.i 999 ;;
+}
+{ .mii
+(p14) mov   GR_Parameter_TAG = 6 
+	nop.i 999 ;;
+(p15) mov   GR_Parameter_TAG = 138 ;; 
 }
-;;
-
 { .mfb
-      nop.m 999
-      frcpa.s0 FR_Output_X_tmp, p8 =  FR_Output_X_tmp, f0 
-      br.cond.sptk __libm_error_region
+	nop.m 999
+(p0)  frcpa.s0 FR_Output_X_tmp, p8 =  FR_Output_X_tmp, f0 
+(p0)  br.cond.sptk __libm_error_region ;; 
+}
+{ .mfb
+	nop.m 999
+//     
+//    Report that logl(0) computed
+//     { .mfb
+(p0)  mov   FR_Input_X     = FR_Output_X_tmp
+(p0)  br.ret.sptk   b0 ;;
 }
-;;
 
-LOG1P_special: 
+L(LOGL_64_special): 
 { .mfi
-      nop.m 999
-      fclass.m.unc p8, p0 =  FR_Input_X, 0x1E1  // Test for natval, nan, +inf
-      nop.i 999
+	nop.m 999
+//    
+//    Return -Inf or value from handler.
+//    
+(p0)  fclass.m.unc p7, p0 =  FR_Input_X, 0x1E1 
+	nop.i 999 ;;
 }
-;;
-
+{ .mfb
+	nop.m 999
+//     
+//    Check for Natval, QNan, SNaN, +Inf   
+//    
+(p7)  fmpy.s0 f8 =  FR_Input_X, f1 
 //     
 //    For SNaN raise invalid and return QNaN.
 //    For QNaN raise invalid and return QNaN.
 //    For +Inf return +Inf.
 //    
-{ .mfb
-      nop.m 999
-(p8)  fmpy.s0 f8 =  FR_Input_X, f1 
-(p8)  br.ret.sptk   b0                          // Return for natval, nan, +inf
+(p7)  br.ret.sptk   b0 ;;
 }
-;;
-
 //    
 //    For -Inf raise invalid and return QNaN.
 //    
+{ .mii
+(p0)  mov   GR_Parameter_TAG = 1
+	nop.i 999 ;;
+(p14) mov   GR_Parameter_TAG = 7 ;;
+}
+{ .mfi
+(p15) mov   GR_Parameter_TAG = 139 
+	nop.f 999
+	nop.i 999 ;;
+}
 { .mfb
-      mov   GR_Parameter_TAG = 139
-      fmpy.s0 FR_Output_X_tmp =  FR_Input_X, f0 
-      br.cond.sptk __libm_error_region
+	nop.m 999
+(p0)  fmpy.s0 FR_Output_X_tmp =  FR_Input_X, f0 
+(p0)  br.cond.sptk __libm_error_region ;; 
 }
-;;
-
-
-LOG1P_unsupported: 
+//     
+//    Report that logl(-Inf) computed
+//    Report that log10l(-Inf) computed
+//    Report that log1p(-Inf) computed
+//     
+{ .mfb
+      nop.m 0
+(p0)  mov   FR_Input_X     = FR_Output_X_tmp
+(p0)  br.ret.sptk   b0 ;;
+}
+L(LOGL_64_unsupported): 
+{ .mfb
+	nop.m 999
 //    
-//    Return generated NaN or other value.
+//    Return generated NaN or other value .
 //    
-{ .mfb
-      nop.m 999
-      fmpy.s0 f8 = FR_Input_X, f0 
-      br.ret.sptk   b0
+(p0)  fmpy.s0 f8 = FR_Input_X, f0 
+(p0)  br.ret.sptk   b0 ;;
 }
-;;
-
-// Here if -inf < x < -1
-LOG1P_LT_Minus_1: 
+L(LOGL_64_negative): 
+{ .mfi
+	nop.m 999
 //     
-//    Deal with x < -1 in a special way - raise
+//    Deal with x < 0 in a special way 
+//    
+(p0)  frcpa.s0 FR_Output_X_tmp, p8 =  f0, f0 
+//     
+//    Deal with x < 0 in a special way - raise
 //    invalid and produce QNaN indefinite.
 //    
-{ .mfb
-      mov   GR_Parameter_TAG = 139
-      frcpa.s0 FR_Output_X_tmp, p8 =  f0, f0
-      br.cond.sptk __libm_error_region
+(p0)  mov   GR_Parameter_TAG = 1 ;; 
 }
-;;
-
+{ .mii
+(p14) mov   GR_Parameter_TAG = 7
+	nop.i 999 ;;
+(p15) mov   GR_Parameter_TAG = 139
+}
+.endp log1pl
+ASM_SIZE_DIRECTIVE(log1pl) 
 
-GLOBAL_IEEE754_END(log1pl)
-LOCAL_LIBM_ENTRY(__libm_error_region)
+.proc __libm_error_region
+__libm_error_region:
 .prologue
 { .mfi
         add   GR_Parameter_Y=-32,sp             // Parameter 2 value
@@ -1177,8 +1609,8 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
         br.call.sptk b0=__libm_error_support#  // Call error handling function
 };;
 { .mmi
-        nop.m 999
-        nop.m 999
+        nop.m 0
+        nop.m 0
         add   GR_Parameter_RESULT = 48,sp
 };;
 { .mmi
@@ -1193,7 +1625,52 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
         br.ret.sptk     b0                     // Return
 };;
 
-LOCAL_LIBM_END(__libm_error_region#)
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+.proc LOGL_main 
+LOGL_main: 
+{ .mfi
+	nop.m 999
+//
+//    kernel_log_64 computes ln(X + E)
+//
+(p7)  fadd.s0 FR_Input_X = FR_Y_lo,FR_Y_hi
+      nop.i 0
+}
+{ .mmi
+      nop.m 999
+      nop.m 999
+(p14) addl GR_Table_Base = @ltoff(Constants_1_by_LN10#),gp ;; 
+}
+{ .mmi
+      nop.m 999
+(p14) ld8    GR_Table_Base = [GR_Table_Base]
+      nop.i 999
+};;
+
+{ .mmi
+(p14) ldfe FR_1LN10_hi = [GR_Table_Base],16 ;; 
+(p14) ldfe FR_1LN10_lo = [GR_Table_Base]
+	nop.i 999 ;;
+}
+{ .mfi
+	nop.m 999
+(p14) fmpy.s1 FR_Output_X_tmp = FR_Y_lo,FR_1LN10_hi
+	nop.i 999 ;;
+}
+{ .mfi
+	nop.m 999
+(p14) fma.s1  FR_Output_X_tmp = FR_Y_hi,FR_1LN10_lo,FR_Output_X_tmp
+	nop.i 999 ;;
+}
+{ .mfb
+	nop.m 999
+(p14) fma.s0 FR_Input_X = FR_Y_hi,FR_1LN10_hi,FR_Output_X_tmp
+(p0)  br.ret.sptk   b0 ;; 
+}
+.endp LOGL_main
+ASM_SIZE_DIRECTIVE(LOGL_main) 
 
 .type   __libm_error_support#,@function
 .global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_logb.S b/sysdeps/ia64/fpu/s_logb.S
index dfe581a826..76c4fe778e 100644
--- a/sysdeps/ia64/fpu/s_logb.S
+++ b/sysdeps/ia64/fpu/s_logb.S
@@ -1,10 +1,10 @@
 .file "logb.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,43 +20,41 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
 // Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
 //
 // History
 //==============================================================
-// 02/02/00 Initial version
-// 02/16/00 Modified to conform to C9X
-// 03/16/00 Improved speed
-// 04/04/00 Unwind support added
-// 05/30/00 Fixed bug when x double-extended denormal
-// 08/15/00 Bundle added after call to __libm_error_support to properly
+// 2/02/00  Initial version
+// 2/16/00  Modified to conform to C9X 
+// 3/16/00  Improved speed
+// 4/04/00  Unwind support added
+// 5/30/00  Fixed bug when x double-extended denormal
+// 8/15/00  Bundle added after call to __libm_error_support to properly
 //          set [the previously overwritten] GR_Parameter_RESULT.
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 01/20/03 Improved performance
 //
 // API
 //==============================================================
-// double logb( double x );
+// double logb( double x);
 //
 // Overview of operation
 //==============================================================
-// The logb function extracts the exponent of x as an integer in
-// floating-point format.
+// The logb function extracts the exponent of x as an integer in 
+// floating-point format. 
 // logb computes log2 of x as a double
 //
 // logb is similar to ilogb but differs in the  following ways:
@@ -73,168 +71,217 @@
 //
 // Registers used
 //==============================================================
-// general registers used:
-// r26 -> r38
-// r35 -> r38 used as parameters to error path
+// general registers used: 
+// ar.pfs r32 
+// r33 -> r37
+// r38 -> r41 used as parameters to error path
 //
-// predicate registers used:
+// predicate registers used: 
 // p6, p7, p8
-// floating-point registers used:
+// floating-point registers used: 
 // f9, f10, f11
 // f8, input
 
-rExpBias            = r26
-rExpMask            = r27
-rSignexp_x          = r28
-rExp_x              = r29
-rTrueExp_x          = r30
-rExp_2to64          = r31
+#include "libm_support.h"
 
+GR_SAVE_B0          = r34
+GR_SAVE_GP          = r35
 GR_SAVE_PFS         = r32
-GR_SAVE_B0          = r33
-GR_SAVE_GP          = r34
 
-GR_Parameter_X      = r35
-GR_Parameter_Y      = r36
-GR_Parameter_RESULT = r37
-GR_Parameter_TAG    = r38
+GR_Parameter_X      = r38
+GR_Parameter_Y      = r39
+GR_Parameter_RESULT = r40
 
-fExp_in_signif      = f9
-fNorm_x             = f10
-fFloat_Exp          = f10
-f2to64              = f11
+.align 32
+.global logb#
 
 .section .text
-GLOBAL_LIBM_ENTRY(logb)
+.proc  logb#
+.align 32
 
-// X NORMAL
-// TrueExp_x = exp(f8) - 0xffff
-// sig = TrueExp_x
-// f8 = convert_to_fp (sig))
+
+logb: 
+
+// qnan snan inf norm     unorm 0 -+
+// 0    0    0   0        1     0 11
+// 0                      b
 { .mfi
-      getf.exp        rSignexp_x = f8
-      fclass.m        p8,p0 = f8, 0x0b   // Test for x unorm
-      mov             rExpBias = 0xffff  // Exponent bias
+      alloc          r32=ar.pfs,1,5,4,0
+(p0)  fclass.m.unc  p8,p0 = f8, 0x0b
+      nop.i 999
 }
+// X NORMAL
+// r37 = exp(f8) - - 0xffff
+// sig(f8) = r37
+// f8 = convert_to_fp (sig))
 { .mfi
-      nop.m           0
-      fnorm.s1        fNorm_x = f8
-      mov             rExpMask = 0x1ffff // Exponent mask
+(p0)  getf.exp      r35 = f8
+(p0)  fnorm         f10=f8
+      nop.i 999 ;;
 }
-;;
 
-// Form signexp of 2^64 in case need to scale denormal
-{ .mfb
-      mov             rExp_2to64 = 0x1003f
-      fclass.m        p6,p0 = f8, 0x1e3  // Test x natval, nan, inf
-(p8)  br.cond.spnt    LOGB_DENORM        // Branch if x unorm
+// qnan snan inf norm     unorm 0 -+
+// 1    1    1   0        0     0 11
+// e                      3
+{ .mmf
+(p0)  mov      r33 = 0xffff
+(p0)  mov      r34 = 0x1ffff
+(p0)  fclass.m.unc  p6,p0 = f8, 0xe3 ;;
 }
-;;
 
-LOGB_COMMON:
-// Return here from LOGB_DENORM
-{ .mfi
-      and             rExp_x = rSignexp_x, rExpMask // Get biased exponent
-      fclass.m        p7,p0 = f8, 0x07   // Test x zero
-      nop.i           0
+{ .mfb
+(p0)  and           r36 = r35, r34
+(p0)  fclass.m.unc  p7,p0 = f8, 0x07
+(p8)  br.cond.spnt  L(LOGB_DENORM) ;;
 }
-;;
 
-// X NAN or INFINITY, return f8 * f8
-{ .mfb
-      sub             rTrueExp_x = rExp_x, rExpBias // Get true exponent
-(p6)  fma.d.s0        f8= f8,f8,f0       // Result if x natval, nan, inf
-(p6)  br.ret.spnt     b0                 // Exit if x natval, nan, inf
+{ .mib
+(p0)  sub           r37 = r36, r33
+      nop.i 999
+(p6)  br.cond.spnt  L(LOGB_NAN_INF) ;;
 }
-;;
 
 { .mib
-      setf.sig        fExp_in_signif = rTrueExp_x // Exponent as integer in fp
+(p0)  setf.sig      f9  = r37
       nop.i 999
-(p7)  br.cond.spnt    LOGB_ZERO
+(p7)  br.cond.spnt  L(LOGB_ZERO) ;;
+}
+
+{ .mfi
+      nop.m 999
+(p0)  fcvt.xf       f10 = f9
+      nop.i 999 ;;
 }
-;;
 
-// Result can be represented in less than 24 bits, so no precision completer
-// is needed.
 { .mfb
-      nop.m           0
-      fcvt.xf         f8 = fExp_in_signif
-      br.ret.sptk     b0                 // Exit main path, 0 < |x| < inf
+      nop.m 999
+(p0)  fnorm.d       f8 = f10
+(p0)  br.ret.sptk    b0 ;;
 }
-;;
 
-LOGB_DENORM:
-// Form 2^64 in case need to scale denormal
+L(LOGB_DENORM):
+// Form signexp of 2^64 in case need to scale denormal
 // Check to see if double-extended denormal
 { .mfi
-      setf.exp        f2to64 = rExp_2to64
-      fclass.m        p8,p0 = fNorm_x, 0x0b
-      nop.i           0
+(p0)  mov r38 = 0x1003f
+(p0)  fclass.m.unc  p8,p0 = f10, 0x0b
+      nop.i 999 ;;
 }
-;;
 
+// Form 2^64 in case need to scale denormal
 { .mfi
-      nop.m           0
-      fcmp.eq.s0      p7,p0 = f8, f0           // Dummy op to set denormal flag
-      nop.i           0
+(p0)  setf.exp f11 = r38
+      nop.f 999
+      nop.i 999 ;;
 }
-;;
 
 // If double-extended denormal add 64 to exponent bias for scaling
 // If double-extended denormal form x * 2^64 which is normal
 { .mfi
-(p8)  add             rExpBias = 64, rExpBias
-(p8)  fmpy.s1         fNorm_x = fNorm_x, f2to64
-      nop.i           0
+(p8)  add r33 = 64, r33
+(p8)  fmpy f10 = f10, f11    
+      nop.i 999 ;;
 }
-;;
 
 // Logic is the same as normal path but use normalized input
-{ .mib
-      getf.exp        rSignexp_x = fNorm_x
-      nop.i           0
-      br.cond.sptk    LOGB_COMMON              // Return to main path
+{ .mmi
+(p0)  getf.exp      r35 = f10 ;;
+      nop.m 999
+      nop.i 999 ;;
 }
-;;
 
-LOGB_ZERO:
-// Here if x zero
-// f10  = -|f8|
-// f9  = 1.0/f10 = -1.0/|f8| = -inf
+{ .mmi
+(p0)  and           r36 = r35, r34 ;;
+(p0)  sub           r37 = r36, r33
+      nop.i 999 ;;
+}
 
-{ .mmf
-      alloc           r32=ar.pfs,1,2,4,0
-      mov             GR_Parameter_TAG = 151  // Error code
-      fmerge.ns       f10 = f0,f8
+{ .mmi
+(p0)  setf.sig      f9  = r37
+      nop.m 999
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+(p0)  fcvt.xf       f10 = f9
+      nop.i 999 ;;
 }
-;;
 
 { .mfb
-      nop.m           0
-      frcpa.s0        f9,p6 = f1,f10          // Produce -inf, Z flag
-      br.cond.sptk    __libm_error_region     // Call error support
+      nop.m 999
+(p0)  fnorm.d       f8 = f10
+(p0)  br.ret.sptk    b0 ;;
 }
-;;
 
-GLOBAL_LIBM_END(logb)
+L(LOGB_NAN_INF): 
 
-LOCAL_LIBM_ENTRY(__libm_error_region)
+// X NAN or INFINITY, return f8 * f8
+{ .mfb
+      nop.m 999
+(p0)  fma.d      f8= f8,f8,f0
+(p0)  br.ret.sptk   b0 ;;
+}
+
+.endp logb#
+ASM_SIZE_DIRECTIVE(logb)
+
+// Stack operations when calling error support.
+//       (1)               (2)                          (3) (call)              (4)
+//   sp   -> +          psp -> +                     psp -> +                   sp -> +
+//           |                 |                            |                         |
+//           |                 | <- GR_Y               R3 ->| <- GR_RESULT            | -> f8
+//           |                 |                            |                         |
+//           | <-GR_Y      Y2->|                       Y2 ->| <- GR_Y                 |
+//           |                 |                            |                         |
+//           |                 | <- GR_X               X1 ->|                         |
+//           |                 |                            |                         |
+//  sp-64 -> +          sp ->  +                     sp ->  +                         +
+//    save ar.pfs          save b0                                               restore gp
+//    save gp                                                                    restore ar.pfs
+
+
+
+.proc __libm_error_region
+__libm_error_region:
+L(LOGB_ZERO): 
 .prologue
 
+// f9  = |f8|
+// f10 = -f9 = -|f8|
+// f9  = 1.0/f10 = -1.0/-|f8|
+
+{ .mfi 
+      mov           r41 = 151      // Error code
+(p0)  fmerge.s      f9 = f0,f8
+      nop.i 999
+}
+;;
+
+
 { .mfi
-        add   GR_Parameter_Y=-32,sp           // Parameter 2 value
-        nop.f 0
+      nop.m 999
+      fmerge.ns     f10 = f0,f9
+      nop.i 999
+}
+;;
+
+// (1)
+{ .mfi
+        add   GR_Parameter_Y=-32,sp             // Parameter 2 value
+        frcpa         f9,p6 = f1,f10
 .save   ar.pfs,GR_SAVE_PFS
-        mov  GR_SAVE_PFS=ar.pfs               // Save ar.pfs
+        mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
 }
 { .mfi
 .fframe 64
-        add sp=-64,sp                         // Create new stack
+        add sp=-64,sp                          // Create new stack
         nop.f 0
-        mov GR_SAVE_GP=gp                     // Save gp
+        mov GR_SAVE_GP=gp                      // Save gp
 };;
 
+
+// (2)
 { .mmi
         stfd [GR_Parameter_Y] = f0,16         // STORE Parameter 2 on stack
         add GR_Parameter_X    = 16,sp         // Parameter 1 address
@@ -243,38 +290,38 @@ LOCAL_LIBM_ENTRY(__libm_error_region)
 };;
 
 .body
+// (3)
 { .mib
         stfd [GR_Parameter_X] = f8            // STORE Parameter 1 on stack
         add   GR_Parameter_RESULT = 0,GR_Parameter_Y    // Parameter 3 address
-        nop.b 0
+        nop.b 0                                         
 }
 { .mib
         stfd [GR_Parameter_Y] = f9            // Store Parameter 3 on stack
         add   GR_Parameter_Y = -16,GR_Parameter_Y
         br.call.sptk b0=__libm_error_support# // Call error handling function
 };;
-
 { .mmi
-        add   GR_Parameter_RESULT = 48,sp
         nop.m 0
-        nop.i 0
+        nop.m 0
+        add   GR_Parameter_RESULT = 48,sp
 };;
 
+// (4)
 { .mmi
         ldfd  f8 = [GR_Parameter_RESULT]       // Get return result off stack
 .restore sp
         add   sp = 64,sp                       // Restore stack pointer
         mov   b0 = GR_SAVE_B0                  // Restore return address
 };;
-
 { .mib
         mov   gp = GR_SAVE_GP                  // Restore gp
         mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
         br.ret.sptk   b0
 };;
 
-LOCAL_LIBM_END(__libm_error_region)
-
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
 
 .type   __libm_error_support#,@function
 .global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_logbf.S b/sysdeps/ia64/fpu/s_logbf.S
index 1d605cd97c..f2f671f892 100644
--- a/sysdeps/ia64/fpu/s_logbf.S
+++ b/sysdeps/ia64/fpu/s_logbf.S
@@ -1,10 +1,10 @@
 .file "logbf.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,46 +20,44 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
 // Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
 //
 // History
 //==============================================================
-// 02/02/00 Initial version
-// 02/16/00 Modified to conform to C9X
-// 03/16/00 Improved speed
-// 04/04/00 Unwind support added
-// 05/30/00 Fixed bug when x double-extended denormal
-// 08/15/00 Bundle added after call to __libm_error_support to properly
+// 2/02/00  Initial version
+// 2/16/00  Modified to conform to C9X 
+// 3/16/00  Improved speed
+// 4/04/00  Unwind support added
+// 5/30/00  Fixed bug when x double-extended denormal
+// 8/15/00  Bundle added after call to __libm_error_support to properly
 //          set [the previously overwritten] GR_Parameter_RESULT.
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 01/20/03 Improved performance
 //
 // API
 //==============================================================
-// float logbf( float x );
+// float logbf( float x);
 //
 // Overview of operation
 //==============================================================
-// The logbf function extracts the exponent of x as an integer in
-// floating-point format.
+// The logbf function extracts the exponent of x as an integer in 
+// floating-point format. 
 // logbf computes log2 of x as a float
-//
-// logbf is similar to ilogbf but differs in the following ways:
+
+// logbf is similar to ilogbf but differs in the  following ways:
 //         +-inf
 //            ilogbf: returns INT_MAX
 //             logbf: returns +inf
@@ -73,207 +71,243 @@
 //
 // Registers used
 //==============================================================
-// general registers used:
-// r26 -> r38
-// r35 -> r38 used as parameters to error path
+// general registers used: 
+// ar.pfs r32 
+// r33 -> r37
+// r38 -> r41 used as parameters to error path
 //
-// predicate registers used:
+// predicate registers used: 
 // p6, p7, p8
-// floating-point registers used:
+//
+// floating-point registers used: 
 // f9, f10, f11
 // f8, input
 
-rExpBias            = r26
-rExpMask            = r27
-rSignexp_x          = r28
-rExp_x              = r29
-rTrueExp_x          = r30
-rExp_2to64          = r31
+#include "libm_support.h"
+
+GR_SAVE_B0                    = r34
+// r40 is address of table of coefficients
+GR_SAVE_PFS                   = r32
+GR_SAVE_GP                    = r35 
 
-GR_SAVE_PFS         = r32
-GR_SAVE_B0          = r33
-GR_SAVE_GP          = r34
+GR_Parameter_X                = r38
+GR_Parameter_Y                = r39
+GR_Parameter_RESULT           = r40
+GR_Parameter_TAG              = r41
 
-GR_Parameter_X      = r35
-GR_Parameter_Y      = r36
-GR_Parameter_RESULT = r37
-GR_Parameter_TAG    = r38
+FR_X             = f8
+FR_Y             = f0
+FR_RESULT        = f10
 
-fExp_in_signif      = f9
-fNorm_x             = f10
-fFloat_Exp          = f10
-f2to64              = f11
+
+.align 32
+.global logbf#
 
 .section .text
-GLOBAL_LIBM_ENTRY(logbf)
+.proc  logbf#
+.align 32
 
-// X NORMAL
-// TrueExp_x = exp(f8) - 0xffff
-// sig = TrueExp_x
-// f8 = convert_to_fp (sig))
+
+logbf: 
+
+// qnan snan inf norm     unorm 0 -+
+// 0    0    0   0        1     0 11
+// 0                      b
 { .mfi
-      getf.exp        rSignexp_x = f8
-      fclass.m        p8,p0 = f8, 0x0b   // Test for x unorm
-      mov             rExpBias = 0xffff  // Exponent bias
+      alloc          r32=ar.pfs,1,5,4,0
+(p0)  fclass.m.unc  p8,p0 = f8, 0x0b
+      nop.i 999
 }
+// X NORMAL
+// r37 = exp(f8) - - 0xffff
+// sig(f8) = r37
+// f8 = convert_to_fp (sig))
 { .mfi
-      nop.m           0
-      fnorm.s1        fNorm_x = f8
-      mov             rExpMask = 0x1ffff // Exponent mask
+(p0)  getf.exp      r35 = f8
+(p0)  fnorm         f10=f8
+      nop.i 999 ;;
 }
-;;
 
-// Form signexp of 2^64 in case need to scale denormal
-{ .mfb
-      mov             rExp_2to64 = 0x1003f
-      fclass.m        p6,p0 = f8, 0x1e3  // Test x natval, nan, inf
-(p8)  br.cond.spnt    LOGB_DENORM        // Branch if x unorm
+// qnan snan inf norm     unorm 0 -+
+// 1    1    1   0        0     0 11
+// e                      3
+{ .mmf
+(p0)  mov      r33 = 0xffff
+(p0)  mov      r34 = 0x1ffff
+(p0)  fclass.m.unc  p6,p0 = f8, 0xe3 ;;
 }
-;;
 
-LOGB_COMMON:
-// Return here from LOGB_DENORM
-{ .mfi
-      and             rExp_x = rSignexp_x, rExpMask // Get biased exponent
-      fclass.m        p7,p0 = f8, 0x07   // Test x zero
-      nop.i           0
+{ .mfb
+(p0)  and           r36 = r35, r34
+(p0)  fclass.m.unc  p7,p0 = f8, 0x07
+(p8)  br.cond.spnt  L(LOGB_DENORM) ;;
 }
-;;
 
-// X NAN or INFINITY, return f8 * f8
-{ .mfb
-      sub             rTrueExp_x = rExp_x, rExpBias // Get true exponent
-(p6)  fma.s.s0        f8= f8,f8,f0       // Result if x natval, nan, inf
-(p6)  br.ret.spnt     b0                 // Exit if x natval, nan, inf
+{ .mib
+(p0)  sub           r37 = r36, r33
+      nop.i 999
+(p6)  br.cond.spnt  L(LOGB_NAN_INF) ;;
 }
-;;
 
 { .mib
-      setf.sig        fExp_in_signif = rTrueExp_x // Exponent as integer in fp
+(p0)  setf.sig      f9  = r37
       nop.i 999
-(p7)  br.cond.spnt    LOGB_ZERO
+(p7)  br.cond.spnt  L(LOGB_ZERO) ;;
+}
+
+{ .mfi
+      nop.m 999
+(p0)  fcvt.xf       f10 = f9
+      nop.i 999 ;;
 }
-;;
 
-// Result can be represented in less than 24 bits, so no precision completer
-// is needed.
 { .mfb
-      nop.m           0
-      fcvt.xf         f8 = fExp_in_signif
-      br.ret.sptk     b0                 // Exit main path, 0 < |x| < inf
+      nop.m 999
+(p0)  fnorm.s       f8 = f10
+(p0)  br.ret.sptk    b0 ;;
 }
-;;
 
-LOGB_DENORM:
-// Form 2^64 in case need to scale denormal
+L(LOGB_DENORM):
+// Form signexp of 2^64 in case need to scale denormal
 // Check to see if double-extended denormal
 { .mfi
-      setf.exp        f2to64 = rExp_2to64
-      fclass.m        p8,p0 = fNorm_x, 0x0b
-      nop.i           0
+(p0)  mov r38 = 0x1003f
+(p0)  fclass.m.unc  p8,p0 = f10, 0x0b
+      nop.i 999 ;;
 }
-;;
 
+// Form 2^64 in case need to scale denormal
 { .mfi
-      nop.m           0
-      fcmp.eq.s0      p7,p0 = f8, f0           // Dummy op to set denormal flag
-      nop.i           0
+(p0)  setf.exp f11 = r38
+      nop.f 999
+      nop.i 999 ;;
 }
-;;
 
 // If double-extended denormal add 64 to exponent bias for scaling
 // If double-extended denormal form x * 2^64 which is normal
 { .mfi
-(p8)  add             rExpBias = 64, rExpBias
-(p8)  fmpy.s1         fNorm_x = fNorm_x, f2to64
-      nop.i           0
+(p8)  add r33 = 64, r33
+(p8)  fmpy f10 = f10, f11    
+      nop.i 999 ;;
 }
-;;
 
 // Logic is the same as normal path but use normalized input
-{ .mib
-      getf.exp        rSignexp_x = fNorm_x
-      nop.i           0
-      br.cond.sptk    LOGB_COMMON              // Return to main path
+{ .mmi
+(p0)  getf.exp      r35 = f10 ;;
+      nop.m 999
+      nop.i 999 ;;
 }
-;;
 
-LOGB_ZERO:
-// Here if x zero
-// f10  = -|f8|
-// f9  = 1.0/f10 = -1.0/|f8| = -inf
+{ .mmi
+(p0)  and           r36 = r35, r34 ;;
+(p0)  sub           r37 = r36, r33
+      nop.i 999 ;;
+}
 
-{ .mmf
-      alloc           r32=ar.pfs,1,2,4,0
-      mov             GR_Parameter_TAG = 152  // Error code
-      fmerge.ns       f10 = f0,f8
+{ .mmi
+(p0)  setf.sig      f9  = r37
+      nop.m 999
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+(p0)  fcvt.xf       f10 = f9
+      nop.i 999 ;;
 }
-;;
 
 { .mfb
-      nop.m           0
-      frcpa.s0        f9,p6 = f1,f10          // Produce -inf, Z flag
-      br.cond.sptk    __libm_error_region     // Call error support
+      nop.m 999
+(p0)  fnorm.s       f8 = f10
+(p0)  br.ret.sptk    b0 ;;
 }
-;;
 
-GLOBAL_LIBM_END(logbf)
+L(LOGB_NAN_INF): 
 
-LOCAL_LIBM_ENTRY(__libm_error_region)
-.prologue
+// X NAN or INFINITY, return f8 * f8
+{ .mfb
+      nop.m 999
+(p0)  fma.s      f8= f8,f8,f0
+(p0)  br.ret.sptk   b0 ;;
+}
 
+L(LOGB_ZERO): 
+
+// X ZERO
+// return -1.0/fabs(f8)=-inf, set divide-by-zero flag, call error support
 { .mfi
-        add   GR_Parameter_Y=-32,sp           // Parameter 2 value
-        nop.f 0
+      nop.m 999
+(p0)  fmerge.s      f9 = f0,f8
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+(p0)  fmerge.ns     f10 = f0,f9
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+(p0)  frcpa         f10,p6 = f1,f10
+      nop.i 999 ;;
+}
+
+.endp logbf
+ASM_SIZE_DIRECTIVE(logbf)
+
+
+.proc __libm_error_region
+__libm_error_region:
+.prologue
+{ .mii
+        add   GR_Parameter_Y=-32,sp             // Parameter 2 value
+(p0)    mov   GR_Parameter_TAG = 152                   
 .save   ar.pfs,GR_SAVE_PFS
-        mov  GR_SAVE_PFS=ar.pfs               // Save ar.pfs
+        mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
 }
 { .mfi
 .fframe 64
-        add sp=-64,sp                         // Create new stack
+        add sp=-64,sp                           // Create new stack
         nop.f 0
-        mov GR_SAVE_GP=gp                     // Save gp
+        mov GR_SAVE_GP=gp                       // Save gp
 };;
-
 { .mmi
-        stfs [GR_Parameter_Y] = f0,16         // STORE Parameter 2 on stack
-        add GR_Parameter_X    = 16,sp         // Parameter 1 address
+        stfs [GR_Parameter_Y] = FR_Y,16         // Store Parameter 2 on stack
+        add GR_Parameter_X = 16,sp              // Parameter 1 address
 .save   b0, GR_SAVE_B0
-        mov GR_SAVE_B0=b0                     // Save b0
+        mov GR_SAVE_B0=b0                       // Save b0
 };;
-
 .body
 { .mib
-        stfs [GR_Parameter_X] = f8            // STORE Parameter 1 on stack
-        add   GR_Parameter_RESULT = 0,GR_Parameter_Y    // Parameter 3 address
-        nop.b 0
+        stfs [GR_Parameter_X] = FR_X            // Store Parameter 1 on stack
+        add   GR_Parameter_RESULT = 0,GR_Parameter_Y
+        nop.b 0                                 // Parameter 3 address
 }
 { .mib
-        stfs [GR_Parameter_Y] = f9            // Store Parameter 3 on stack
+        stfs [GR_Parameter_Y] = FR_RESULT       // Store Parameter 3 on stack
         add   GR_Parameter_Y = -16,GR_Parameter_Y
-        br.call.sptk b0=__libm_error_support# // Call error handling function
+        br.call.sptk b0=__libm_error_support#   // Call error handling function
 };;
-
 { .mmi
-        add   GR_Parameter_RESULT = 48,sp
         nop.m 0
-        nop.i 0
+        nop.m 0
+        add   GR_Parameter_RESULT = 48,sp
 };;
-
 { .mmi
         ldfs  f8 = [GR_Parameter_RESULT]       // Get return result off stack
 .restore sp
         add   sp = 64,sp                       // Restore stack pointer
         mov   b0 = GR_SAVE_B0                  // Restore return address
 };;
-
 { .mib
-        mov   gp = GR_SAVE_GP                  // Restore gp
+        mov   gp = GR_SAVE_GP                  // Restore gp 
         mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
-        br.ret.sptk   b0
-};;
+        br.ret.sptk     b0                     // Return
+};; 
 
-LOCAL_LIBM_END(__libm_error_region)
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
 
 
 .type   __libm_error_support#,@function
diff --git a/sysdeps/ia64/fpu/s_logbl.S b/sysdeps/ia64/fpu/s_logbl.S
index 6a08e94201..38b131f3aa 100644
--- a/sysdeps/ia64/fpu/s_logbl.S
+++ b/sysdeps/ia64/fpu/s_logbl.S
@@ -1,10 +1,10 @@
 .file "logbl.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,46 +20,44 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
 // Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
 //
 // History
 //==============================================================
-// 02/02/00 Initial version
-// 02/16/00 Modified to conform to C9X
-// 03/16/00 Improved speed
-// 04/04/00 Unwind support added
-// 05/30/00 Fixed bug when x double-extended denormal
-// 08/15/00 Bundle added after call to __libm_error_support to properly
+// 2/02/00  Initial version
+// 2/16/00  Modified to conform to C9X 
+// 3/16/00  Improved speed
+// 4/04/00  Unwind support added
+// 5/30/00  Fixed bug when x double-extended denormal
+// 8/15/00  Bundle added after call to __libm_error_support to properly
 //          set [the previously overwritten] GR_Parameter_RESULT.
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 01/20/03 Improved performance
 //
 // API
 //==============================================================
-// long double logbl( long double x );
+// long double logbl( long double x);
 //
 // Overview of operation
 //==============================================================
-// The logbl function extracts the exponent of x as an integer in
-// floating-point format.
+// The logbl function extracts the exponent of x as an integer in 
+// floating-point format. 
 // logbl computes log2 of x as a long double
 //
-// logbl is similar to ilogbl but differs in the following ways:
+// logbl is similar to ilogbl but differs in the  following ways:
 //         +-inf
 //            ilogbl: returns INT_MAX
 //             logbl: returns +inf
@@ -73,208 +71,229 @@
 //
 // Registers used
 //==============================================================
-// general registers used:
-// r26 -> r38
-// r35 -> r38 used as parameters to error path
+// general registers used: 
+// ar.pfs r32 
+// r33 -> r37
+// r38 -> r41 used as parameters to error path
 //
-// predicate registers used:
+// predicate registers used: 
 // p6, p7, p8
-// floating-point registers used:
+//
+// floating-point registers used: 
 // f9, f10, f11
 // f8, input
 
-rExpBias            = r26
-rExpMask            = r27
-rSignexp_x          = r28
-rExp_x              = r29
-rTrueExp_x          = r30
-rExp_2to64          = r31
+#include "libm_support.h"
 
 GR_SAVE_PFS         = r32
-GR_SAVE_B0          = r33
-GR_SAVE_GP          = r34
+GR_SAVE_B0          = r34
+GR_SAVE_GP          = r35
+GR_Parameter_X      = r38
+GR_Parameter_Y      = r39
+GR_Parameter_RESULT = r40
+GR_Parameter_TAG    = r41
 
-GR_Parameter_X      = r35
-GR_Parameter_Y      = r36
-GR_Parameter_RESULT = r37
-GR_Parameter_TAG    = r38
+FR_X                = f8
+FR_Y                = f0
+FR_RESULT           = f10
 
-fExp_in_signif      = f9
-fNorm_x             = f10
-fFloat_Exp          = f10
-f2to64              = f11
+.align 32
+.global logbl#
 
 .section .text
-GLOBAL_LIBM_ENTRY(logbl)
+.proc  logbl#
+.align 32
 
-// X NORMAL
-// TrueExp_x = exp(f8) - 0xffff
-// sig = TrueExp_x
-// f8 = convert_to_fp (sig))
+
+logbl: 
+
+// qnan snan inf norm     unorm 0 -+
+// 0    0    0   0        1     0 11
+// 0                      b
 { .mfi
-      getf.exp        rSignexp_x = f8
-      fclass.m        p8,p0 = f8, 0x0b   // Test for x unorm
-      mov             rExpBias = 0xffff  // Exponent bias
+      alloc          r32=ar.pfs,1,5,4,0
+(p0)  fclass.m.unc  p8,p0 = f8, 0x0b
+      nop.i 999
 }
+// X NORMAL
+// r37 = exp(f8) - - 0xffff
+// sig(f8) = r37
+// f8 = convert_to_fp (sig))
 { .mfi
-      nop.m           0
-      fnorm.s1        fNorm_x = f8
-      mov             rExpMask = 0x1ffff // Exponent mask
+(p0)  getf.exp      r35 = f8
+(p0)  fnorm         f10=f8
+      nop.i 999 ;;
 }
-;;
 
-// Form signexp of 2^64 in case need to scale denormal
-{ .mfb
-      mov             rExp_2to64 = 0x1003f
-      fclass.m        p6,p0 = f8, 0x1e3  // Test x natval, nan, inf
-(p8)  br.cond.spnt    LOGB_DENORM        // Branch if x unorm
+// qnan snan inf norm     unorm 0 -+
+// 1    1    1   0        0     0 11
+// e                      3
+{ .mmf
+(p0)  mov      r33 = 0xffff
+(p0)  mov      r34 = 0x1ffff
+(p0)  fclass.m.unc  p6,p0 = f8, 0xe3 ;;
 }
-;;
 
-LOGB_COMMON:
-// Return here from LOGB_DENORM
-{ .mfi
-      and             rExp_x = rSignexp_x, rExpMask // Get biased exponent
-      fclass.m        p7,p0 = f8, 0x07   // Test x zero
-      nop.i           0
+{ .mfb
+(p0)  and           r36 = r35, r34
+(p0)  fclass.m.unc  p7,p0 = f8, 0x07
+(p8)  br.cond.spnt  L(LOGB_DENORM) ;;
 }
-;;
 
-// X NAN or INFINITY, return f8 * f8
-{ .mfb
-      sub             rTrueExp_x = rExp_x, rExpBias // Get true exponent
-(p6)  fma.s0          f8= f8,f8,f0       // Result if x natval, nan, inf
-(p6)  br.ret.spnt     b0                 // Exit if x natval, nan, inf
+{ .mib
+(p0)  sub           r37 = r36, r33
+      nop.i 999
+(p6)  br.cond.spnt  L(LOGB_NAN_INF) ;;
 }
-;;
 
 { .mib
-      setf.sig        fExp_in_signif = rTrueExp_x // Exponent as integer in fp
+(p0)  setf.sig      f9  = r37
       nop.i 999
-(p7)  br.cond.spnt    LOGB_ZERO
+(p7)  br.cond.spnt  L(LOGB_ZERO) ;;
+}
+{ .mfi
+      nop.m 999
+(p0)  fcvt.xf       f10 = f9
+      nop.i 999 ;;
 }
-;;
 
-// Result can be represented in less than 24 bits, so no precision completer
-// is needed.
 { .mfb
-      nop.m           0
-      fcvt.xf         f8 = fExp_in_signif
-      br.ret.sptk     b0                 // Exit main path, 0 < |x| < inf
+      nop.m 999
+(p0)  fnorm       f8 = f10
+(p0)  br.ret.sptk    b0 ;;
 }
-;;
 
-LOGB_DENORM:
-// Form 2^64 in case need to scale denormal
+L(LOGB_DENORM):
+// Form signexp of 2^64 in case need to scale denormal
 // Check to see if double-extended denormal
 { .mfi
-      setf.exp        f2to64 = rExp_2to64
-      fclass.m        p8,p0 = fNorm_x, 0x0b
-      nop.i           0
+(p0)  mov r38 = 0x1003f
+(p0)  fclass.m.unc  p8,p0 = f10, 0x0b
+      nop.i 999 ;;
 }
-;;
 
+// Form 2^64 in case need to scale denormal
 { .mfi
-      nop.m           0
-      fcmp.eq.s0      p7,p0 = f8, f0           // Dummy op to set denormal flag
-      nop.i           0
+(p0)  setf.exp f11 = r38
+      nop.f 999
+      nop.i 999 ;;
 }
-;;
 
 // If double-extended denormal add 64 to exponent bias for scaling
 // If double-extended denormal form x * 2^64 which is normal
 { .mfi
-(p8)  add             rExpBias = 64, rExpBias
-(p8)  fmpy.s1         fNorm_x = fNorm_x, f2to64
-      nop.i           0
+(p8)  add r33 = 64, r33
+(p8)  fmpy f10 = f10, f11    
+      nop.i 999 ;;
 }
-;;
 
 // Logic is the same as normal path but use normalized input
-{ .mib
-      getf.exp        rSignexp_x = fNorm_x
-      nop.i           0
-      br.cond.sptk    LOGB_COMMON              // Return to main path
+{ .mmi
+(p0)  getf.exp      r35 = f10 ;;
+      nop.m 999
+      nop.i 999 ;;
 }
-;;
 
-LOGB_ZERO:
-// Here if x zero
-// f10  = -|f8|
-// f9  = 1.0/f10 = -1.0/|f8| = -inf
+{ .mmi
+(p0)  and           r36 = r35, r34 ;;
+(p0)  sub           r37 = r36, r33
+      nop.i 999 ;;
+}
 
-{ .mmf
-      alloc           r32=ar.pfs,1,2,4,0
-      mov             GR_Parameter_TAG = 150  // Error code
-      fmerge.ns       f10 = f0,f8
+{ .mmi
+(p0)  setf.sig      f9  = r37
+      nop.m 999
+      nop.i 999 ;;
+}
+
+{ .mfi
+      nop.m 999
+(p0)  fcvt.xf       f10 = f9
+      nop.i 999 ;;
 }
-;;
 
 { .mfb
-      nop.m           0
-      frcpa.s0        f9,p6 = f1,f10          // Produce -inf, Z flag
-      br.cond.sptk    __libm_error_region     // Call error support
+      nop.m 999
+(p0)  fnorm       f8 = f10
+(p0)  br.ret.sptk    b0 ;;
 }
-;;
 
-GLOBAL_LIBM_END(logbl)
+L(LOGB_NAN_INF): 
 
-LOCAL_LIBM_ENTRY(__libm_error_region)
-.prologue
+// X NAN or INFINITY, return f8 * f8
+{ .mfb
+      nop.m 999
+(p0)  fma        f8= f8,f8,f0
+(p0)  br.ret.sptk   b0 ;;
+}
 
+L(LOGB_ZERO): 
+{.mfi
+      nop.m 0
+(p0)  frcpa.s0       f10,p6 = f1,f0
+      nop.i 0
+};;
+{.mfi
+      mov            GR_Parameter_TAG = 150
+(p0)  fms.s1         f10 = f0,f0,f10
+      nop.i 0
+};;
+// X ZERO
+// return -1.0/fabs(f8)=-inf, set divide-by-zero flag, call error support
+.endp logbl
+ASM_SIZE_DIRECTIVE(logbl)
+
+.proc __libm_error_region
+__libm_error_region:
+.prologue
 { .mfi
-        add   GR_Parameter_Y=-32,sp           // Parameter 2 value
+        add   GR_Parameter_Y=-32,sp             // Parameter 2 value
         nop.f 0
 .save   ar.pfs,GR_SAVE_PFS
-        mov  GR_SAVE_PFS=ar.pfs               // Save ar.pfs
+        mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
 }
 { .mfi
 .fframe 64
-        add sp=-64,sp                         // Create new stack
+        add sp=-64,sp                           // Create new stack
         nop.f 0
-        mov GR_SAVE_GP=gp                     // Save gp
+        mov GR_SAVE_GP=gp                       // Save gp
 };;
-
 { .mmi
-        stfe [GR_Parameter_Y] = f0,16         // STORE Parameter 2 on stack
-        add GR_Parameter_X    = 16,sp         // Parameter 1 address
+        stfe [GR_Parameter_Y] = FR_Y,16         // Save Parameter 2 on stack
+        add GR_Parameter_X = 16,sp              // Parameter 1 address
 .save   b0, GR_SAVE_B0
-        mov GR_SAVE_B0=b0                     // Save b0
+        mov GR_SAVE_B0=b0                       // Save b0
 };;
-
 .body
 { .mib
-        stfe [GR_Parameter_X] = f8            // STORE Parameter 1 on stack
-        add   GR_Parameter_RESULT = 0,GR_Parameter_Y    // Parameter 3 address
-        nop.b 0
+        stfe [GR_Parameter_X] = FR_X            // Store Parameter 1 on stack
+        add   GR_Parameter_RESULT = 0,GR_Parameter_Y
+        nop.b 0                                 // Parameter 3 address
 }
 { .mib
-        stfe [GR_Parameter_Y] = f9            // Store Parameter 3 on stack
+        stfe [GR_Parameter_Y] = FR_RESULT      // Store Parameter 3 on stack
         add   GR_Parameter_Y = -16,GR_Parameter_Y
-        br.call.sptk b0=__libm_error_support# // Call error handling function
+        br.call.sptk b0=__libm_error_support#  // Call error handling function
 };;
-
 { .mmi
-        add   GR_Parameter_RESULT = 48,sp
         nop.m 0
-        nop.i 0
+        nop.m 0
+        add   GR_Parameter_RESULT = 48,sp
 };;
-
 { .mmi
         ldfe  f8 = [GR_Parameter_RESULT]       // Get return result off stack
 .restore sp
         add   sp = 64,sp                       // Restore stack pointer
         mov   b0 = GR_SAVE_B0                  // Restore return address
 };;
-
 { .mib
         mov   gp = GR_SAVE_GP                  // Restore gp
         mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
-        br.ret.sptk   b0
+        br.ret.sptk     b0                     // Return
 };;
 
-LOCAL_LIBM_END(__libm_error_region)
-
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
 
 .type   __libm_error_support#,@function
 .global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_modf.S b/sysdeps/ia64/fpu/s_modf.S
index 2008bbfc5c..e8e672adfe 100644
--- a/sysdeps/ia64/fpu/s_modf.S
+++ b/sysdeps/ia64/fpu/s_modf.S
@@ -1,10 +1,10 @@
 .file "modf.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
+//
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,16 +35,14 @@
 // 
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at 
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// http://developer.intel.com/opensource.
 //
 // History
 //==============================================================
-// 02/02/00 Initial version
-// 04/04/00 Improved speed, corrected result for NaN input
+// 2/02/00: Initial version
+// 4/04/00: Improved speed, corrected result for NaN input
 // 12/22/00 Fixed so inexact flag is never set, and invalid is not set for 
 //            qnans nor for inputs larger than 2^63.
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 02/10/03 Reordered header: .section, .global, .proc, .align
 //
 // API
 //==============================================================
@@ -99,6 +97,8 @@
 //    p13 --------------------------------------------------->|
 //
 
+#include "libm_support.h"
+
 // floating-point registers used: 
 MODF_NORM_F8               = f9
 MODF_FRACTION_PART         = f10
@@ -115,17 +115,23 @@ modf_exp        = r18
 // r33 = iptr
      
 
+.align 32
+.global modf#
+
 .section .text
-GLOBAL_LIBM_ENTRY(modf)
+.proc  modf#
+.align 32
+
 
 // Main path is p9, p11, p8 FALSE and p12 TRUE
 
 // Assume input is normalized and get signexp
 // Normalize input just in case
 // Form exponent bias 
+modf: 
 { .mfi
       getf.exp  modf_signexp = f8
-      fnorm.s0          MODF_NORM_F8  = f8
+      fnorm          MODF_NORM_F8  = f8
       addl           modf_GR_FFFF  = 0xffff, r0
 }
 // Get integer part of input
@@ -170,10 +176,10 @@ GLOBAL_LIBM_ENTRY(modf)
 { .mfb
 (p10) cmp.ge.unc p9,p12  = modf_exp, modf_GR_no_frac
 (p6)  fclass.m.unc p6,p7 = f8, 0x23
-(p8)  br.cond.spnt MODF_DENORM ;;
+(p8)  br.cond.spnt L(MODF_DENORM) ;;
 }
 
-MODF_COMMON:
+L(MODF_COMMON):
 // For HUGE set fraction to signed 0
 { .mfi
       nop.m 999
@@ -183,7 +189,7 @@ MODF_COMMON:
 // For HUGE set integer part to normalized input
 { .mfi
       nop.m 999
-(p9)  fnorm.d.s0 MODF_INTEGER_PART = MODF_NORM_F8
+(p9)  fnorm.d MODF_INTEGER_PART = MODF_NORM_F8
       nop.i 999 ;;
 }
 
@@ -195,7 +201,7 @@ MODF_COMMON:
 }
 { .mfi
       nop.m 999
-(p11) fnorm.d.s0 f8 = MODF_NORM_F8
+(p11) fnorm.d f8 = MODF_NORM_F8
       nop.i 999 ;;
 }
 
@@ -236,7 +242,7 @@ MODF_COMMON:
 // For NORMAL test if fraction part is zero; if so append correct sign
 { .mfi
       nop.m 999
-(p12) fcmp.eq.unc.s0 p7,p0 = MODF_NORM_F8, MODF_INTEGER_PART
+(p12) fcmp.eq.unc p7,p0 = MODF_NORM_F8, MODF_INTEGER_PART
       nop.i 999 ;;
 }
 
@@ -253,7 +259,7 @@ MODF_COMMON:
       br.ret.sptk    b0 ;;
 }
 
-MODF_DENORM:
+L(MODF_DENORM):
 // If x unorm get signexp from normalized input
 // If x unorm get integer part from normalized input
 { .mfi
@@ -272,7 +278,8 @@ MODF_DENORM:
 { .mfb
 (p10) cmp.ge.unc p9,p12  = modf_exp, modf_GR_no_frac
       nop.f 999
-      br.cond.spnt MODF_COMMON ;;
+      br.cond.spnt L(MODF_COMMON) ;;
 }
 
-GLOBAL_LIBM_END(modf)
+.endp modf
+ASM_SIZE_DIRECTIVE(modf)
diff --git a/sysdeps/ia64/fpu/s_modff.S b/sysdeps/ia64/fpu/s_modff.S
index edc1120971..6aa43c884d 100644
--- a/sysdeps/ia64/fpu/s_modff.S
+++ b/sysdeps/ia64/fpu/s_modff.S
@@ -1,10 +1,10 @@
 .file "modff.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
+//
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,16 +35,14 @@
 // 
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at 
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// http://developer.intel.com/opensource.
 //
 // History
 //==============================================================
-// 02/02/00 Initial version
-// 04/04/00 Improved speed, corrected result for NaN input
+// 2/02/00: Initial version
+// 4/04/00: Improved speed, corrected result for NaN input
 // 12/22/00 Fixed so inexact flag is never set, and invalid is not set for 
 //            qnans nor for inputs larger than 2^63.
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 02/10/03 Reordered header: .section, .global, .proc, .align
 //
 // API
 //==============================================================
@@ -99,6 +97,8 @@
 //    p13 --------------------------------------------------->|
 //
 
+#include "libm_support.h"
+
 // floating-point registers used: 
 MODF_NORM_F8               = f9
 MODF_FRACTION_PART         = f10
@@ -115,17 +115,23 @@ modf_exp        = r18
 // r33 = iptr
      
 
+.align 32
+.global modff#
+
 .section .text
-GLOBAL_LIBM_ENTRY(modff)
+.proc  modff#
+.align 32
+
 
 // Main path is p9, p11, p8 FALSE and p12 TRUE
 
 // Assume input is normalized and get signexp
 // Normalize input just in case
 // Form exponent bias 
+modff: 
 { .mfi
       getf.exp  modf_signexp = f8
-      fnorm.s0          MODF_NORM_F8  = f8
+      fnorm          MODF_NORM_F8  = f8
       addl           modf_GR_FFFF  = 0xffff, r0
 }
 // Get integer part of input
@@ -170,10 +176,10 @@ GLOBAL_LIBM_ENTRY(modff)
 { .mfb
 (p10) cmp.ge.unc p9,p12  = modf_exp, modf_GR_no_frac
 (p6)  fclass.m.unc p6,p7 = f8, 0x23
-(p8)  br.cond.spnt MODF_DENORM ;;
+(p8)  br.cond.spnt L(MODF_DENORM) ;;
 }
 
-MODF_COMMON:
+L(MODF_COMMON):
 // For HUGE set fraction to signed 0
 { .mfi
       nop.m 999
@@ -183,7 +189,7 @@ MODF_COMMON:
 // For HUGE set integer part to normalized input
 { .mfi
       nop.m 999
-(p9)  fnorm.s.s0 MODF_INTEGER_PART = MODF_NORM_F8
+(p9)  fnorm.s MODF_INTEGER_PART = MODF_NORM_F8
       nop.i 999 ;;
 }
 
@@ -195,7 +201,7 @@ MODF_COMMON:
 }
 { .mfi
       nop.m 999
-(p11) fnorm.s.s0 f8 = MODF_NORM_F8
+(p11) fnorm.s f8 = MODF_NORM_F8
       nop.i 999 ;;
 }
 
@@ -236,7 +242,7 @@ MODF_COMMON:
 // For NORMAL test if fraction part is zero; if so append correct sign
 { .mfi
       nop.m 999
-(p12) fcmp.eq.unc.s0 p7,p0 = MODF_NORM_F8, MODF_INTEGER_PART
+(p12) fcmp.eq.unc p7,p0 = MODF_NORM_F8, MODF_INTEGER_PART
       nop.i 999 ;;
 }
 
@@ -253,7 +259,7 @@ MODF_COMMON:
       br.ret.sptk    b0 ;;
 }
 
-MODF_DENORM:
+L(MODF_DENORM):
 // If x unorm get signexp from normalized input
 // If x unorm get integer part from normalized input
 { .mfi
@@ -272,7 +278,8 @@ MODF_DENORM:
 { .mfb
 (p10) cmp.ge.unc p9,p12  = modf_exp, modf_GR_no_frac
       nop.f 999
-      br.cond.spnt MODF_COMMON ;;
+      br.cond.spnt L(MODF_COMMON) ;;
 }
 
-GLOBAL_LIBM_END(modff)
+.endp modff
+ASM_SIZE_DIRECTIVE(modff)
diff --git a/sysdeps/ia64/fpu/s_modfl.S b/sysdeps/ia64/fpu/s_modfl.S
index eaf410cb6c..b5eb509adf 100644
--- a/sysdeps/ia64/fpu/s_modfl.S
+++ b/sysdeps/ia64/fpu/s_modfl.S
@@ -1,10 +1,10 @@
 .file "modfl.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
+//
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,17 +35,15 @@
 // 
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at 
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// http://developer.intel.com/opensource.
 //
 // History
 //==============================================================
-// 02/02/00 Initial version
-// 04/04/00 Improved speed, corrected result for NaN input
-// 05/30/00 Fixed bug for exponent 0x1003e
+// 2/02/00: Initial version
+// 4/04/00: Improved speed, corrected result for NaN input
+// 5/30/00  Fixed bug for exponent 0x1003e
 // 12/22/00 Fixed so inexact flag is never set, and invalid is not set for 
 //            qnans nor for inputs larger than 2^63.
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 02/10/03 Reordered header: .section, .global, .proc, .align
 //
 // API
 //==============================================================
@@ -94,6 +92,8 @@
 //    p13 --------------------------------------------------->|
 //
 
+#include "libm_support.h"
+
 // floating-point registers used: 
 MODF_NORM_F8               = f9
 MODF_FRACTION_PART         = f10
@@ -110,17 +110,23 @@ modf_exp        = r18
 // r34 = iptr
      
 
+.align 32
+.global modfl#
+
 .section .text
-GLOBAL_LIBM_ENTRY(modfl)
+.proc  modfl#
+.align 32
+
 
 // Main path is p9, p11, p8 FALSE and p12 TRUE
 
 // Assume input is normalized and get signexp
 // Normalize input just in case
 // Form exponent bias 
+modfl: 
 { .mfi
       getf.exp  modf_signexp = f8
-      fnorm.s0          MODF_NORM_F8  = f8
+      fnorm          MODF_NORM_F8  = f8
       addl           modf_GR_FFFF  = 0xffff, r0
 }
 // Get integer part of input
@@ -165,10 +171,10 @@ GLOBAL_LIBM_ENTRY(modfl)
 { .mfb
 (p10) cmp.ge.unc p9,p12  = modf_exp, modf_GR_no_frac
 (p6)  fclass.m.unc p6,p7 = f8, 0x23
-(p8)  br.cond.spnt MODF_DENORM ;;
+(p8)  br.cond.spnt L(MODF_DENORM) ;;
 }
 
-MODF_COMMON:
+L(MODF_COMMON):
 // For HUGE set fraction to signed 0
 { .mfi
       nop.m 999
@@ -178,7 +184,7 @@ MODF_COMMON:
 // For HUGE set integer part to normalized input
 { .mfi
       nop.m 999
-(p9)  fnorm.s0 MODF_INTEGER_PART = MODF_NORM_F8
+(p9)  fnorm MODF_INTEGER_PART = MODF_NORM_F8
       nop.i 999 ;;
 }
 
@@ -190,7 +196,7 @@ MODF_COMMON:
 }
 { .mfi
       nop.m 999
-(p11) fnorm.s0   f8 = MODF_NORM_F8
+(p11) fnorm   f8 = MODF_NORM_F8
       nop.i 999 ;;
 }
 
@@ -231,7 +237,7 @@ MODF_COMMON:
 // For NORMAL test if fraction part is zero; if so append correct sign
 { .mfi
       nop.m 999
-(p12) fcmp.eq.unc.s0 p7,p0 = MODF_NORM_F8, MODF_INTEGER_PART
+(p12) fcmp.eq.unc p7,p0 = MODF_NORM_F8, MODF_INTEGER_PART
       nop.i 999 ;;
 }
 
@@ -248,7 +254,7 @@ MODF_COMMON:
       br.ret.sptk    b0 ;;
 }
 
-MODF_DENORM:
+L(MODF_DENORM):
 // If x unorm get signexp from normalized input
 // If x unorm get integer part from normalized input
 { .mfi
@@ -267,7 +273,8 @@ MODF_DENORM:
 { .mfb
 (p10) cmp.ge.unc p9,p12  = modf_exp, modf_GR_no_frac
       nop.f 999
-      br.cond.spnt MODF_COMMON ;;
+      br.cond.spnt L(MODF_COMMON) ;;
 }
 
-GLOBAL_LIBM_END(modfl)
+.endp modfl
+ASM_SIZE_DIRECTIVE(modfl)
diff --git a/sysdeps/ia64/fpu/s_nearbyint.S b/sysdeps/ia64/fpu/s_nearbyint.S
index cba74e61d3..6ee01ea260 100644
--- a/sysdeps/ia64/fpu/s_nearbyint.S
+++ b/sysdeps/ia64/fpu/s_nearbyint.S
@@ -1,10 +1,11 @@
 .file "nearbyint.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 10/19/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
+// Bob Norin, Tom Rowan, Shane Story, and Ping Tak Peter Tang of the
+// Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,7 +21,7 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
+//
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,19 +36,20 @@
 // 
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at 
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// http://developer.intel.com/opensource.
 //
 // History
 //==============================================================
-// 10/19/00 Created
-// 02/08/01 Corrected behavior for all rounding modes.
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 10/19/2000: Created
+// 2/08/01  Corrected behavior for all rounding modes.
 //==============================================================
 //
 // API
 //==============================================================
 // double nearbyint(double x)
+
+#include "libm_support.h"
+
 //
 // general registers used:  
 //
@@ -108,8 +110,15 @@ NEARBYINT_INT_f8       = f11
 // 1    1    1   0        0     1 11     0xe7
 
 
+.align 32
+.global nearbyint#
+
 .section .text
-GLOBAL_LIBM_ENTRY(nearbyint)
+.proc  nearbyint#
+.align 32
+
+
+nearbyint: 
 
 { .mfi
       mov nearbyint_GR_fpsr = ar40    // Read the fpsr--need to check rc.s0
@@ -132,7 +141,7 @@ GLOBAL_LIBM_ENTRY(nearbyint)
 
 { .mfb
 	nop.m 999
-(p6)  fnorm.d.s0 f8 = f8
+(p6)  fnorm.d f8 = f8
 (p6)  br.ret.spnt   b0    // Exit if x nan, inf, zero
 ;;
 }
@@ -168,11 +177,11 @@ GLOBAL_LIBM_ENTRY(nearbyint)
 
 // Check to see if s0 rounding mode is round to nearest.  If not then set s2
 // rounding mode to that of s0 and repeat conversions.
-NEARBYINT_COMMON:
+L(NEARBYINT_COMMON):
 { .mfb
       cmp.ne   p11,p0 = nearbyint_GR_rcs0, r0
 (p6) fclass.m.unc   p9,p10  = NEARBYINT_FLOAT_INT_f8, 0x07  // Test for result=0
-(p11) br.cond.spnt NEARBYINT_NOT_ROUND_NEAREST  // Branch if not round to nearest
+(p11) br.cond.spnt L(NEARBYINT_NOT_ROUND_NEAREST)  // Branch if not round to nearest
 ;;
 }
 
@@ -191,13 +200,13 @@ NEARBYINT_COMMON:
 }
 { .mfb
       nop.m 999
-(p10) fnorm.d.s0 f8 = NEARBYINT_FLOAT_INT_f8
+(p10) fnorm.d f8 = NEARBYINT_FLOAT_INT_f8
      br.ret.sptk    b0
 ;;                             
 }
 
 
-NEARBYINT_NOT_ROUND_NEAREST:
+L(NEARBYINT_NOT_ROUND_NEAREST):
 // Set rounding mode of s2 to that of s0
 { .mfi
       mov nearbyint_GR_rcs0 = r0       // Clear so we don't come back here
@@ -216,9 +225,10 @@ NEARBYINT_NOT_ROUND_NEAREST:
 { .mfb
 	nop.m 999
       fcvt.xf         NEARBYINT_FLOAT_INT_f8   = NEARBYINT_INT_f8
-      br.cond.sptk  NEARBYINT_COMMON
+      br.cond.sptk  L(NEARBYINT_COMMON)
 ;;
 }
 
 
-GLOBAL_LIBM_END(nearbyint)
+.endp nearbyint
+ASM_SIZE_DIRECTIVE(nearbyint)
diff --git a/sysdeps/ia64/fpu/s_nearbyintf.S b/sysdeps/ia64/fpu/s_nearbyintf.S
index 6471232513..7050ddc52c 100644
--- a/sysdeps/ia64/fpu/s_nearbyintf.S
+++ b/sysdeps/ia64/fpu/s_nearbyintf.S
@@ -1,10 +1,11 @@
 .file "nearbyintf.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 10/19/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
+// Bob Norin, Tom Rowan, Shane Story, and Ping Tak Peter Tang of the
+// Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,7 +21,7 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
+//
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,19 +36,20 @@
 // 
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at 
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// http://developer.intel.com/opensource.
 //
 // History
 //==============================================================
-// 10/19/00 Created
-// 02/08/01 Corrected behavior for all rounding modes.
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 10/19/2000: Created
+// 2/08/01  Corrected behavior for all rounding modes.
 //==============================================================
 //
 // API
 //==============================================================
 // float nearbyintf(float x)
+
+#include "libm_support.h"
+
 //
 // general registers used:  
 //
@@ -108,8 +110,15 @@ NEARBYINT_INT_f8       = f11
 // 1    1    1   0        0     1 11     0xe7
 
 
+.align 32
+.global nearbyintf#
+
 .section .text
-GLOBAL_LIBM_ENTRY(nearbyintf)
+.proc  nearbyintf#
+.align 32
+
+
+nearbyintf: 
 
 { .mfi
       mov nearbyint_GR_fpsr = ar40           // Read the fpsr--need to check rc.s0
@@ -132,7 +141,7 @@ GLOBAL_LIBM_ENTRY(nearbyintf)
 
 { .mfb
 	nop.m 999
-(p6)  fnorm.s.s0 f8 = f8
+(p6)  fnorm.s f8 = f8
 (p6)  br.ret.spnt   b0    // Exit if x nan, inf, zero
 ;;
 }
@@ -168,11 +177,11 @@ GLOBAL_LIBM_ENTRY(nearbyintf)
 
 // Check to see if s0 rounding mode is round to nearest.  If not then set s2
 // rounding mode to that of s0 and repeat conversions.
-NEARBYINT_COMMON:
+L(NEARBYINT_COMMON):
 { .mfb
       cmp.ne   p11,p0 = nearbyint_GR_rcs0, r0
 (p6) fclass.m.unc   p9,p10  = NEARBYINT_FLOAT_INT_f8, 0x07  // Test for result=0
-(p11) br.cond.spnt NEARBYINT_NOT_ROUND_NEAREST  // Branch if not round to nearest
+(p11) br.cond.spnt L(NEARBYINT_NOT_ROUND_NEAREST)  // Branch if not round to nearest
 ;;
 }
 
@@ -191,13 +200,13 @@ NEARBYINT_COMMON:
 }
 { .mfb
       nop.m 999
-(p10) fnorm.s.s0 f8 = NEARBYINT_FLOAT_INT_f8
+(p10) fnorm.s f8 = NEARBYINT_FLOAT_INT_f8
      br.ret.sptk    b0
 ;;
 }
 
 
-NEARBYINT_NOT_ROUND_NEAREST:
+L(NEARBYINT_NOT_ROUND_NEAREST):
 // Set rounding mode of s2 to that of s0
 { .mfi
       mov nearbyint_GR_rcs0 = r0       // Clear so we don't come back here
@@ -216,9 +225,10 @@ NEARBYINT_NOT_ROUND_NEAREST:
 { .mfb
 	nop.m 999
       fcvt.xf         NEARBYINT_FLOAT_INT_f8   = NEARBYINT_INT_f8
-      br.cond.sptk  NEARBYINT_COMMON
+      br.cond.sptk  L(NEARBYINT_COMMON)
 ;;
 }
 
 
-GLOBAL_LIBM_END(nearbyintf)
+.endp nearbyintf
+ASM_SIZE_DIRECTIVE(nearbyintf)
diff --git a/sysdeps/ia64/fpu/s_nearbyintl.S b/sysdeps/ia64/fpu/s_nearbyintl.S
index 9c4c2e4f16..95ba6ab260 100644
--- a/sysdeps/ia64/fpu/s_nearbyintl.S
+++ b/sysdeps/ia64/fpu/s_nearbyintl.S
@@ -1,10 +1,11 @@
 .file "nearbyintl.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 10/19/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
+// Bob Norin, Tom Rowan, Shane Story, and Ping Tak Peter Tang of the
+// Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,7 +21,7 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
+//
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,19 +36,20 @@
 // 
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at 
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// http://developer.intel.com/opensource.
 //
 // History
 //==============================================================
-// 10/19/00 Created
-// 02/08/01 Corrected behavior for all rounding modes.
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 10/19/2000: Created
+// 2/08/01  Corrected behavior for all rounding modes.
 //==============================================================
 //
 // API
 //==============================================================
 // long double nearbyintl(long double x)
+
+#include "libm_support.h"
+
 //
 // general registers used:  
 //
@@ -109,8 +111,15 @@ NEARBYINT_SIGNED_FLOAT_INT_f8 = f12
 // 1    1    1   0        0     1 11     0xe7
 
 
+.align 32
+.global nearbyintl#
+
 .section .text
-GLOBAL_LIBM_ENTRY(nearbyintl)
+.proc  nearbyintl#
+.align 32
+
+
+nearbyintl: 
 
 { .mfi
       mov nearbyint_GR_fpsr = ar40           // Read the fpsr--need to check rc.s0
@@ -133,7 +142,7 @@ GLOBAL_LIBM_ENTRY(nearbyintl)
 
 { .mfb
 	nop.m 999
-(p6)  fnorm.s0 f8 = f8
+(p6)  fnorm f8 = f8
 (p6)  br.ret.spnt   b0    // Exit if x nan, inf, zero
 ;;
 }
@@ -171,11 +180,11 @@ GLOBAL_LIBM_ENTRY(nearbyintl)
 // rounding mode to that of s0 and repeat conversions.
 // Must merge the original sign for cases where the result is zero or the input
 // is the largest that still has a fraction (0x1007dfffffffffff)
-NEARBYINT_COMMON:
+L(NEARBYINT_COMMON):
 { .mfb
       cmp.ne   p11,p0 = nearbyint_GR_rcs0, r0
 (p6) fmerge.s  NEARBYINT_SIGNED_FLOAT_INT_f8 = f8, NEARBYINT_FLOAT_INT_f8
-(p11) br.cond.spnt NEARBYINT_NOT_ROUND_NEAREST  // Branch if not round to nearest
+(p11) br.cond.spnt L(NEARBYINT_NOT_ROUND_NEAREST)  // Branch if not round to nearest
 ;;
 }
 
@@ -188,13 +197,13 @@ NEARBYINT_COMMON:
 
 { .mfb
       nop.m 999
-(p6) fnorm.s0 f8 = NEARBYINT_SIGNED_FLOAT_INT_f8
+(p6) fnorm f8 = NEARBYINT_SIGNED_FLOAT_INT_f8
      br.ret.sptk    b0
 ;;
 }
 
 
-NEARBYINT_NOT_ROUND_NEAREST:
+L(NEARBYINT_NOT_ROUND_NEAREST):
 // Set rounding mode of s2 to that of s0
 { .mfi
       mov nearbyint_GR_rcs0 = r0       // Clear so we don't come back here
@@ -213,9 +222,10 @@ NEARBYINT_NOT_ROUND_NEAREST:
 { .mfb
 	nop.m 999
       fcvt.xf         NEARBYINT_FLOAT_INT_f8   = NEARBYINT_INT_f8
-      br.cond.sptk  NEARBYINT_COMMON
+      br.cond.sptk  L(NEARBYINT_COMMON)
 ;;
 }
 
 
-GLOBAL_LIBM_END(nearbyintl)
+.endp nearbyintl
+ASM_SIZE_DIRECTIVE(nearbyintl)
diff --git a/sysdeps/ia64/fpu/s_nextafterl.c b/sysdeps/ia64/fpu/s_nextafterl.c
new file mode 100644
index 0000000000..f59f16848f
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_nextafterl.c
@@ -0,0 +1 @@
+#include <sysdeps/i386/fpu/s_nextafterl.c>
diff --git a/sysdeps/ia64/fpu/s_nexttoward.c b/sysdeps/ia64/fpu/s_nexttoward.c
new file mode 100644
index 0000000000..aee2bb5895
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_nexttoward.c
@@ -0,0 +1 @@
+#include <sysdeps/i386/fpu/s_nexttoward.c>
diff --git a/sysdeps/ia64/fpu/s_nexttowardf.c b/sysdeps/ia64/fpu/s_nexttowardf.c
new file mode 100644
index 0000000000..55e95f6916
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_nexttowardf.c
@@ -0,0 +1 @@
+#include <sysdeps/i386/fpu/s_nexttowardf.c>
diff --git a/sysdeps/ia64/fpu/s_rint.S b/sysdeps/ia64/fpu/s_rint.S
index 1735d9b498..d04f06a31f 100644
--- a/sysdeps/ia64/fpu/s_rint.S
+++ b/sysdeps/ia64/fpu/s_rint.S
@@ -1,10 +1,10 @@
 .file "rint.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,68 +20,74 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
 // Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
 //
 // History
 //==============================================================
-// 02/02/00 Initial version
-// 02/08/01 Corrected behavior for all rounding modes.
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 01/20/03 Improved performance
-//==============================================================
-
+// 2/02/00: Initial version
+// 2/08/01  Corrected behavior for all rounding modes.
+//
 // API
 //==============================================================
 // double rint(double x)
-//==============================================================
 
-// general input registers:
-// r14 - r21
+#include "libm_support.h"
+
+//
+// general registers used:  
+//
+rint_GR_FFFF      = r14
+rint_GR_signexp   = r15
+rint_GR_exponent  = r16
+rint_GR_17ones    = r17
+rint_GR_10033     = r18
+rint_GR_fpsr      = r19
+rint_GR_rcs0      = r20
+rint_GR_rcs0_mask = r21
 
-rSignexp   = r14
-rExp       = r15
-rExpMask   = r16
-rBigexp    = r17
-rM1        = r18
-rFpsr      = r19
-rRcs0      = r20
-rRcs0Mask  = r21
 
-// floating-point registers:
-// f8 - f11
+// predicate registers used: 
+// p6-11
 
-fXInt      = f9
-fNormX     = f10
-fTmp       = f11
+// floating-point registers used: 
 
-// predicate registers used:
-// p6 - p10
+RINT_NORM_f8      = f9                        
+RINT_FFFF         = f10 
+RINT_INEXACT      = f11 
+RINT_FLOAT_INT_f8 = f12
+RINT_INT_f8       = f13
 
 // Overview of operation
 //==============================================================
+
 // double rint(double x)
-// Return an integer value (represented as a double) that is x
-// rounded to integer in current rounding mode
+// Return an integer value (represented as a double) that is x rounded to integer in current
+// rounding mode 
 // Inexact is set if x != rint(x)
-//==============================================================
+// *******************************************************************************
+
+// Set denormal flag for denormal input and
+// and take denormal fault if necessary.
+
+// Is the input an integer value already?
 
 // double_extended
-// if the exponent is > 1003e => 3F(true) = 63(decimal)
+// if the exponent is >= 1003e => 3F(true) = 63(decimal)
 // we have a significand of 64 bits 1.63-bits.
 // If we multiply by 2^63, we no longer have a fractional part
 // So input is an integer value already.
@@ -94,136 +100,155 @@ fTmp       = f11
 // So input is an integer value already.
 
 // single
-// if the exponent is > 10016 => 17(true) = 23(decimal)
-// we have a significand of 24 bits 1.23-bits. (implicit 1)
-// If we multiply by 2^23, we no longer have a fractional part
+// if the exponent is >= 10016 => 17(true) = 23(decimal)
+// we have a significand of 53 bits 1.52-bits. (implicit 1)
+// If we multiply by 2^52, we no longer have a fractional part
 // So input is an integer value already.
 
+// If x is NAN, ZERO, or INFINITY, then  return
+
+// qnan snan inf norm     unorm 0 -+
+// 1    1    1   0        0     1 11     0xe7
+
+
+.align 32
+.global rint#
+
 .section .text
-GLOBAL_IEEE754_ENTRY(rint)
+.proc  rint#
+.align 32
+
+
+rint: 
+#ifdef _LIBC
+.global __rint
+.type __rint,@function
+__rint:
+#endif
 
 { .mfi
-      getf.exp         rSignexp  = f8        // Get signexp, recompute if unorm
-      fclass.m         p7,p0 = f8, 0x0b      // Test x unorm
-      addl             rBigexp = 0x10033, r0 // Set exponent at which is integer
+      mov rint_GR_fpsr = ar40           // Read the fpsr--need to check rc.s0
+      fcvt.fx.s1     RINT_INT_f8  = f8
+      addl            rint_GR_10033 = 0x10033, r0
 }
 { .mfi
-      mov              rM1 = -1              // Set all ones
-      fcvt.fx.s1       fXInt  = f8           // Convert to int in significand
-      mov              rExpMask    = 0x1FFFF // Form exponent mask
-}
+      mov        rint_GR_FFFF      = -1
+      fnorm.s1        RINT_NORM_f8  = f8
+      mov         rint_GR_17ones    = 0x1FFFF
 ;;
+}
 
 { .mfi
-      mov              rFpsr = ar40          // Read fpsr -- check rc.s0
-      fclass.m         p6,p0 = f8, 0x1e3     // Test x natval, nan, inf
-      nop.i            0
-}
-{ .mfb
-      setf.sig         fTmp = rM1            // Make const for setting inexact
-      fnorm.s1         fNormX  = f8          // Normalize input
-(p7)  br.cond.spnt     RINT_UNORM            // Branch if x unorm
-}
+      setf.sig    RINT_FFFF  = rint_GR_FFFF
+      fclass.m.unc  p6,p0 = f8, 0xe7
+      mov         rint_GR_rcs0_mask  = 0x0c00
 ;;
+}
 
-
-RINT_COMMON:
-// Return here from RINT_UNORM
 { .mfb
-      and              rExp = rSignexp, rExpMask // Get biased exponent
-(p6)  fma.d.s0         f8 = f8, f1, f0       // Result if x natval, nan, inf
-(p6)  br.ret.spnt      b0                    // Exit if x natval, nan, inf
-}
+	nop.m 999
+(p6)  fnorm.d f8 = f8
+(p6)  br.ret.spnt   b0    // Exit if x nan, inf, zero
 ;;
+}
 
 { .mfi
-      mov              rRcs0Mask = 0x0c00     // Mask for rc.s0
-      fcvt.xf          f8 = fXInt             // Result assume |x| < 2^52
-      cmp.ge           p7,p8 = rExp, rBigexp  // Is |x| >= 2^52?
-}
+	nop.m 999
+      fcvt.xf         RINT_FLOAT_INT_f8   = RINT_INT_f8
+	nop.i 999
 ;;
+}
 
-// We must correct result if |x| >= 2^52
 { .mfi
-      nop.m            0
-(p7)  fma.d.s0         f8 = fNormX, f1, f0    // If |x| >= 2^52, result x
-      nop.i            0
-}
+      getf.exp rint_GR_signexp  = RINT_NORM_f8
+      fcmp.eq.s0  p8,p0 = f8,f0      // Dummy op to set denormal
+        nop.i 999
 ;;
+}
 
-{ .mfi
-      nop.m            0
-      fcmp.eq.unc.s1   p0, p9 = f8, fNormX    // Is result = x ?
-      nop.i            0
+
+{ .mii
+	nop.m 999
+	nop.i 999 
+      and      rint_GR_exponent = rint_GR_signexp, rint_GR_17ones
+;;
 }
-{ .mfi
-      nop.m            0
-(p8)  fmerge.s         f8 = fNormX, f8        // Make sure sign rint(x) = sign x
-      nop.i            0
+
+{ .mmi
+      cmp.ge.unc      p7,p6 = rint_GR_exponent, rint_GR_10033
+      and rint_GR_rcs0 = rint_GR_rcs0_mask, rint_GR_fpsr
+	nop.i 999
+;;
 }
+
+// Check to see if s0 rounding mode is round to nearest.  If not then set s2
+// rounding mode to that of s0 and repeat conversions.
+L(RINT_COMMON):
+{ .mfb
+      cmp.ne   p11,p0 = rint_GR_rcs0, r0
+(p6) fclass.m.unc   p9,p10  = RINT_FLOAT_INT_f8, 0x07  // Test for result=0
+(p11) br.cond.spnt L(RINT_NOT_ROUND_NEAREST)  // Branch if not round to nearest
 ;;
+}
 
 { .mfi
-(p8)  and              rRcs0 = rFpsr, rRcs0Mask // Get rounding mode for sf0
-      nop.f            0
-      nop.i            0
+	nop.m 999
+(p6) fcmp.eq.unc.s1  p0,p8  = RINT_FLOAT_INT_f8, RINT_NORM_f8
+	nop.i 999
 }
+{ .mfi
+	nop.m 999
+(p7) fnorm.d.s0   f8 = f8
+	nop.i 999
 ;;
+}
 
-// If |x| < 2^52 we must test for other rounding modes
+// If result is zero, merge sign of input
 { .mfi
-(p8)  cmp.ne.unc       p10,p0 = rRcs0, r0     // Test for other rounding modes
-(p9)  fmpy.s0          fTmp = fTmp, fTmp      // Dummy to set inexact
-      nop.i            0
-}
-{ .mbb
-      nop.m            0
-(p10) br.cond.spnt     RINT_NOT_ROUND_NEAREST // Branch if not round nearest
-      br.ret.sptk      b0                     // Exit main path if round nearest
+     nop.m 999
+(p9) fmerge.s f8 = f8, RINT_FLOAT_INT_f8
+     nop.i 999
 }
+{ .mfi
+      nop.m 999
+(p10) fnorm.d f8 = RINT_FLOAT_INT_f8
+     nop.i 999
 ;;
+}
 
-
-
-RINT_UNORM:
-// Here if x unorm
 { .mfb
-      getf.exp         rSignexp  = fNormX     // Get signexp, recompute if unorm
-      fcmp.eq.s0       p7,p0 = f8, f0         // Dummy op to set denormal flag
-      br.cond.sptk     RINT_COMMON            // Return to main path
-}
+     nop.m 999
+(p8) fmpy.s0     RINT_INEXACT = RINT_FFFF,RINT_FFFF  // Dummy to set inexact
+     br.ret.sptk    b0
 ;;
-
-RINT_NOT_ROUND_NEAREST:
-// Here if not round to nearest, and |x| < 2^52
-// Set rounding mode of s2 to that of s0, and repeat the conversion using s2
-{ .mfi
-      nop.m            0
-      fsetc.s2         0x7f, 0x40
-      nop.i            0
 }
-;;
 
+L(RINT_NOT_ROUND_NEAREST):
+// Set rounding mode of s2 to that of s0
 { .mfi
-      nop.m            0
-      fcvt.fx.s2       fXInt  = fNormX        // Convert to int in significand
-      nop.i            0
-}
+      mov rint_GR_rcs0 = r0       // Clear so we don't come back here
+      fsetc.s2     0x7f, 0x40
+	nop.i 999
 ;;
+}
 
 { .mfi
-      nop.m            0
-      fcvt.xf          f8 = fXInt             // Expected result
-      nop.i            0
-}
+	nop.m 999
+      fcvt.fx.s2     RINT_INT_f8  = f8
+	nop.i 999
 ;;
+}
 
-// Be sure sign of result = sign of input.  Fixes cases where result is 0.
 { .mfb
-      nop.m            0
-      fmerge.s         f8 = fNormX, f8
-      br.ret.sptk      b0                     // Exit main path
-}
+	nop.m 999
+      fcvt.xf         RINT_FLOAT_INT_f8   = RINT_INT_f8
+      br.cond.sptk  L(RINT_COMMON)
 ;;
+}
+
 
-GLOBAL_IEEE754_END(rint)
+.endp rint
+ASM_SIZE_DIRECTIVE(rint)
+#ifdef _LIBC
+ASM_SIZE_DIRECTIVE(__rint)
+#endif
diff --git a/sysdeps/ia64/fpu/s_rintf.S b/sysdeps/ia64/fpu/s_rintf.S
index 05d6b411f2..73cb98a048 100644
--- a/sysdeps/ia64/fpu/s_rintf.S
+++ b/sysdeps/ia64/fpu/s_rintf.S
@@ -1,10 +1,10 @@
 .file "rintf.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,68 +20,74 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
 // Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
 //
 // History
 //==============================================================
-// 02/02/00 Initial version
-// 02/08/01 Corrected behavior for all rounding modes.
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 01/20/03 Improved performance
-//==============================================================
-
+// 2/02/00: Initial version
+// 2/08/01  Corrected behavior for all rounding modes.
+//
 // API
 //==============================================================
 // float rintf(float x)
-//==============================================================
 
-// general input registers:
-// r14 - r21
+#include "libm_support.h"
+
+//
+// general registers used:  
+//
+rint_GR_FFFF      = r14
+rint_GR_signexp   = r15
+rint_GR_exponent  = r16
+rint_GR_17ones    = r17
+rint_GR_10033     = r18
+rint_GR_fpsr      = r19
+rint_GR_rcs0      = r20
+rint_GR_rcs0_mask = r21
 
-rSignexp   = r14
-rExp       = r15
-rExpMask   = r16
-rBigexp    = r17
-rM1        = r18
-rFpsr      = r19
-rRcs0      = r20
-rRcs0Mask  = r21
 
-// floating-point registers:
-// f8 - f11
+// predicate registers used: 
+// p6-11
 
-fXInt      = f9
-fNormX     = f10
-fTmp       = f11
+// floating-point registers used: 
 
-// predicate registers used:
-// p6 - p10
+RINT_NORM_f8      = f9                        
+RINT_FFFF         = f10 
+RINT_INEXACT      = f11 
+RINT_FLOAT_INT_f8 = f12
+RINT_INT_f8       = f13
 
 // Overview of operation
 //==============================================================
+
 // float rintf(float x)
-// Return an integer value (represented as a float) that is x
-// rounded to integer in current rounding mode
-// Inexact is set if x != rint(x)
-//==============================================================
+// Return an integer value (represented as a float) that is x rounded to integer in current
+// rounding mode 
+// Inexact is set if x != rintf(x)
+// *******************************************************************************
+
+// Set denormal flag for denormal input and
+// and take denormal fault if necessary.
+
+// Is the input an integer value already?
 
 // double_extended
-// if the exponent is > 1003e => 3F(true) = 63(decimal)
+// if the exponent is >= 1003e => 3F(true) = 63(decimal)
 // we have a significand of 64 bits 1.63-bits.
 // If we multiply by 2^63, we no longer have a fractional part
 // So input is an integer value already.
@@ -94,136 +100,155 @@ fTmp       = f11
 // So input is an integer value already.
 
 // single
-// if the exponent is > 10016 => 17(true) = 23(decimal)
-// we have a significand of 24 bits 1.23-bits. (implicit 1)
-// If we multiply by 2^23, we no longer have a fractional part
+// if the exponent is >= 10016 => 17(true) = 23(decimal)
+// we have a significand of 53 bits 1.52-bits. (implicit 1)
+// If we multiply by 2^52, we no longer have a fractional part
 // So input is an integer value already.
 
+// If x is NAN, ZERO, or INFINITY, then  return
+
+// qnan snan inf norm     unorm 0 -+
+// 1    1    1   0        0     1 11     0xe7
+
+
+.align 32
+.global rintf#
+
 .section .text
-GLOBAL_IEEE754_ENTRY(rintf)
+.proc  rintf#
+.align 32
+
+
+rintf:
+#ifdef _LIBC
+.global __rintf
+.type __rintf,@function
+__rintf:
+#endif
 
 { .mfi
-      getf.exp         rSignexp  = f8        // Get signexp, recompute if unorm
-      fclass.m         p7,p0 = f8, 0x0b      // Test x unorm
-      addl             rBigexp = 0x10016, r0 // Set exponent at which is integer
+      mov rint_GR_fpsr = ar40           // Read the fpsr--need to check rc.s0
+      fcvt.fx.s1     RINT_INT_f8  = f8
+      addl            rint_GR_10033 = 0x10016, r0
 }
 { .mfi
-      mov              rM1 = -1              // Set all ones
-      fcvt.fx.s1       fXInt  = f8           // Convert to int in significand
-      mov              rExpMask    = 0x1FFFF // Form exponent mask
-}
+      mov        rint_GR_FFFF      = -1
+      fnorm.s1        RINT_NORM_f8  = f8
+      mov         rint_GR_17ones    = 0x1FFFF
 ;;
+}
 
 { .mfi
-      mov              rFpsr = ar40          // Read fpsr -- check rc.s0
-      fclass.m         p6,p0 = f8, 0x1e3     // Test x natval, nan, inf
-      nop.i            0
-}
-{ .mfb
-      setf.sig         fTmp = rM1            // Make const for setting inexact
-      fnorm.s1         fNormX  = f8          // Normalize input
-(p7)  br.cond.spnt     RINT_UNORM            // Branch if x unorm
-}
+      setf.sig    RINT_FFFF  = rint_GR_FFFF
+      fclass.m.unc  p6,p0 = f8, 0xe7
+      mov         rint_GR_rcs0_mask  = 0x0c00
 ;;
+}
 
-
-RINT_COMMON:
-// Return here from RINT_UNORM
 { .mfb
-      and              rExp = rSignexp, rExpMask // Get biased exponent
-(p6)  fma.s.s0         f8 = f8, f1, f0       // Result if x natval, nan, inf
-(p6)  br.ret.spnt      b0                    // Exit if x natval, nan, inf
-}
+	nop.m 999
+(p6)  fnorm.s f8 = f8
+(p6)  br.ret.spnt   b0    // Exit if x nan, inf, zero
 ;;
+}
 
 { .mfi
-      mov              rRcs0Mask = 0x0c00     // Mask for rc.s0
-      fcvt.xf          f8 = fXInt             // Result assume |x| < 2^23
-      cmp.ge           p7,p8 = rExp, rBigexp  // Is |x| >= 2^23?
-}
+	nop.m 999
+      fcvt.xf         RINT_FLOAT_INT_f8   = RINT_INT_f8
+	nop.i 999
 ;;
+}
 
-// We must correct result if |x| >= 2^23
 { .mfi
-      nop.m            0
-(p7)  fma.s.s0         f8 = fNormX, f1, f0    // If |x| >= 2^23, result x
-      nop.i            0
-}
+      getf.exp rint_GR_signexp  = RINT_NORM_f8
+      fcmp.eq.s0  p8,p0 = f8,f0      // Dummy op to set denormal
+        nop.i 999
 ;;
+}
 
-{ .mfi
-      nop.m            0
-      fcmp.eq.unc.s1   p0, p9 = f8, fNormX    // Is result = x ?
-      nop.i            0
+
+{ .mii
+	nop.m 999
+	nop.i 999 
+      and      rint_GR_exponent = rint_GR_signexp, rint_GR_17ones
+;;
 }
-{ .mfi
-      nop.m            0
-(p8)  fmerge.s         f8 = fNormX, f8        // Make sure sign rint(x) = sign x
-      nop.i            0
+
+{ .mmi
+      cmp.ge.unc      p7,p6 = rint_GR_exponent, rint_GR_10033
+      and rint_GR_rcs0 = rint_GR_rcs0_mask, rint_GR_fpsr
+	nop.i 999
+;;
 }
+
+// Check to see if s0 rounding mode is round to nearest.  If not then set s2
+// rounding mode to that of s0 and repeat conversions.
+L(RINT_COMMON):
+{ .mfb
+      cmp.ne   p11,p0 = rint_GR_rcs0, r0
+(p6) fclass.m.unc   p9,p10  = RINT_FLOAT_INT_f8, 0x07  // Test for result=0
+(p11) br.cond.spnt L(RINT_NOT_ROUND_NEAREST)  // Branch if not round to nearest
 ;;
+}
 
 { .mfi
-(p8)  and              rRcs0 = rFpsr, rRcs0Mask // Get rounding mode for sf0
-      nop.f            0
-      nop.i            0
+	nop.m 999
+(p6) fcmp.eq.unc.s1  p0,p8  = RINT_FLOAT_INT_f8, RINT_NORM_f8
+	nop.i 999
 }
+{ .mfi
+	nop.m 999
+(p7) fnorm.s.s0   f8 = f8
+	nop.i 999
 ;;
+}
 
-// If |x| < 2^23 we must test for other rounding modes
+// If result is zero, merge sign of input
 { .mfi
-(p8)  cmp.ne.unc       p10,p0 = rRcs0, r0     // Test for other rounding modes
-(p9)  fmpy.s0          fTmp = fTmp, fTmp      // Dummy to set inexact
-      nop.i            0
-}
-{ .mbb
-      nop.m            0
-(p10) br.cond.spnt     RINT_NOT_ROUND_NEAREST // Branch if not round nearest
-      br.ret.sptk      b0                     // Exit main path if round nearest
+     nop.m 999
+(p9) fmerge.s f8 = f8, RINT_FLOAT_INT_f8
+     nop.i 999
 }
+{ .mfi
+      nop.m 999
+(p10) fnorm.s f8 = RINT_FLOAT_INT_f8
+     nop.i 999
 ;;
+}
 
-
-
-RINT_UNORM:
-// Here if x unorm
 { .mfb
-      getf.exp         rSignexp  = fNormX     // Get signexp, recompute if unorm
-      fcmp.eq.s0       p7,p0 = f8, f0         // Dummy op to set denormal flag
-      br.cond.sptk     RINT_COMMON            // Return to main path
-}
+     nop.m 999
+(p8) fmpy.s0     RINT_INEXACT = RINT_FFFF,RINT_FFFF  // Dummy to set inexact
+     br.ret.sptk    b0
 ;;
-
-RINT_NOT_ROUND_NEAREST:
-// Here if not round to nearest, and |x| < 2^23
-// Set rounding mode of s2 to that of s0, and repeat the conversion using s2
-{ .mfi
-      nop.m            0
-      fsetc.s2         0x7f, 0x40
-      nop.i            0
 }
-;;
 
+L(RINT_NOT_ROUND_NEAREST):
+// Set rounding mode of s2 to that of s0
 { .mfi
-      nop.m            0
-      fcvt.fx.s2       fXInt  = fNormX        // Convert to int in significand
-      nop.i            0
-}
+      mov rint_GR_rcs0 = r0       // Clear so we don't come back here
+      fsetc.s2     0x7f, 0x40
+	nop.i 999
 ;;
+}
 
 { .mfi
-      nop.m            0
-      fcvt.xf          f8 = fXInt             // Expected result
-      nop.i            0
-}
+	nop.m 999
+      fcvt.fx.s2     RINT_INT_f8  = f8
+	nop.i 999
 ;;
+}
 
-// Be sure sign of result = sign of input.  Fixes cases where result is 0.
 { .mfb
-      nop.m            0
-      fmerge.s         f8 = fNormX, f8
-      br.ret.sptk      b0                     // Exit main path
-}
+	nop.m 999
+      fcvt.xf         RINT_FLOAT_INT_f8   = RINT_INT_f8
+      br.cond.sptk  L(RINT_COMMON)
 ;;
+}
+
 
-GLOBAL_IEEE754_END(rintf)
+.endp rintf
+ASM_SIZE_DIRECTIVE(rintf)
+#ifdef _LIBC
+ASM_SIZE_DIRECTIVE(__rintf)
+#endif
diff --git a/sysdeps/ia64/fpu/s_rintl.S b/sysdeps/ia64/fpu/s_rintl.S
index b5402149ec..857e8d5208 100644
--- a/sysdeps/ia64/fpu/s_rintl.S
+++ b/sysdeps/ia64/fpu/s_rintl.S
@@ -1,10 +1,10 @@
 .file "rintl.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,68 +20,76 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
 // Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
 //
 // History
 //==============================================================
-// 02/02/00 Initial version
-// 02/08/01 Corrected behavior for all rounding modes.
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 01/20/03 Improved performance
-//==============================================================
-
+// 2/02/00: Initial version
+// 5/24/00  Fixed case of 2^63 - 1 + 0.5 (0x1007dffffffffffffffff)
+// 2/08/01  Corrected behavior for all rounding modes.
+//
 // API
 //==============================================================
 // long double rintl(long double x)
-//==============================================================
 
-// general input registers:
-// r14 - r21
+#include "libm_support.h"
+
+//
+// general registers used:  
+//
+rint_GR_FFFF      = r14
+rint_GR_signexp   = r15
+rint_GR_exponent  = r16
+rint_GR_17ones    = r17
+rint_GR_10033     = r18
+rint_GR_fpsr      = r19
+rint_GR_rcs0      = r20
+rint_GR_rcs0_mask = r21
 
-rSignexp   = r14
-rExp       = r15
-rExpMask   = r16
-rBigexp    = r17
-rM1        = r18
-rFpsr      = r19
-rRcs0      = r20
-rRcs0Mask  = r21
 
-// floating-point registers:
-// f8 - f11
+// predicate registers used: 
+// p6-11
 
-fXInt      = f9
-fNormX     = f10
-fTmp       = f11
+// floating-point registers used: 
 
-// predicate registers used:
-// p6 - p10
+RINT_NORM_f8      = f9                        
+RINT_FFFF         = f10 
+RINT_INEXACT      = f11 
+RINT_FLOAT_INT_f8 = f12
+RINT_INT_f8       = f13
+RINT_SIGNED_FLOAT_INT_f8 = f14
 
 // Overview of operation
 //==============================================================
+
 // long double rintl(long double x)
-// Return an integer value (represented as a long double) that is x
-// rounded to integer in current rounding mode
-// Inexact is set if x != rint(x)
-//==============================================================
+// Return an integer value (represented as a long double) that is x rounded to integer in current
+// rounding mode 
+// Inexact is set if x != rintl(x)
+// *******************************************************************************
+
+// Set denormal flag for denormal input and
+// and take denormal fault if necessary.
+
+// Is the input an integer value already?
 
 // double_extended
-// if the exponent is > 1003e => 3F(true) = 63(decimal)
+// if the exponent is >= 1003e => 3F(true) = 63(decimal)
 // we have a significand of 64 bits 1.63-bits.
 // If we multiply by 2^63, we no longer have a fractional part
 // So input is an integer value already.
@@ -94,136 +102,151 @@ fTmp       = f11
 // So input is an integer value already.
 
 // single
-// if the exponent is > 10016 => 17(true) = 23(decimal)
-// we have a significand of 24 bits 1.23-bits. (implicit 1)
-// If we multiply by 2^23, we no longer have a fractional part
+// if the exponent is >= 10016 => 17(true) = 23(decimal)
+// we have a significand of 53 bits 1.52-bits. (implicit 1)
+// If we multiply by 2^52, we no longer have a fractional part
 // So input is an integer value already.
 
+// If x is NAN, ZERO, or INFINITY, then  return
+
+// qnan snan inf norm     unorm 0 -+
+// 1    1    1   0        0     1 11     0xe7
+
+
+.align 32
+.global rintl#
+
 .section .text
-GLOBAL_IEEE754_ENTRY(rintl)
+.proc  rintl#
+.align 32
+
+
+rintl: 
+#ifdef _LIBC
+.global __rintl
+.type __rintl,@function
+__rintl:
+#endif
 
 { .mfi
-      getf.exp         rSignexp  = f8        // Get signexp, recompute if unorm
-      fclass.m         p7,p0 = f8, 0x0b      // Test x unorm
-      addl             rBigexp = 0x1003e, r0 // Set exponent at which is integer
+      mov rint_GR_fpsr = ar40           // Read the fpsr--need to check rc.s0
+      fcvt.fx.s1     RINT_INT_f8  = f8
+      addl            rint_GR_10033 = 0x1003e, r0
 }
 { .mfi
-      mov              rM1 = -1              // Set all ones
-      fcvt.fx.s1       fXInt  = f8           // Convert to int in significand
-      mov              rExpMask    = 0x1FFFF // Form exponent mask
-}
+      mov        rint_GR_FFFF      = -1
+      fnorm.s1        RINT_NORM_f8  = f8
+      mov         rint_GR_17ones    = 0x1FFFF
 ;;
+}
 
 { .mfi
-      mov              rFpsr = ar40          // Read fpsr -- check rc.s0
-      fclass.m         p6,p0 = f8, 0x1e3     // Test x natval, nan, inf
-      nop.i            0
-}
-{ .mfb
-      setf.sig         fTmp = rM1            // Make const for setting inexact
-      fnorm.s1         fNormX  = f8          // Normalize input
-(p7)  br.cond.spnt     RINT_UNORM            // Branch if x unorm
-}
+      setf.sig    RINT_FFFF  = rint_GR_FFFF
+      fclass.m.unc  p6,p0 = f8, 0xe7
+      mov         rint_GR_rcs0_mask  = 0x0c00
 ;;
+}
 
-
-RINT_COMMON:
-// Return here from RINT_UNORM
 { .mfb
-      and              rExp = rSignexp, rExpMask // Get biased exponent
-(p6)  fma.s0           f8 = f8, f1, f0       // Result if x natval, nan, inf
-(p6)  br.ret.spnt      b0                    // Exit if x natval, nan, inf
-}
+	nop.m 999
+(p6)  fnorm f8 = f8
+(p6)  br.ret.spnt   b0    // Exit if x nan, inf, zero
 ;;
+}
 
 { .mfi
-      mov              rRcs0Mask = 0x0c00     // Mask for rc.s0
-      fcvt.xf          f8 = fXInt             // Result assume |x| < 2^63
-      cmp.ge           p7,p8 = rExp, rBigexp  // Is |x| >= 2^63?
-}
+	nop.m 999
+      fcvt.xf         RINT_FLOAT_INT_f8   = RINT_INT_f8
+	nop.i 999
 ;;
+}
 
-// We must correct result if |x| >= 2^63
 { .mfi
-      nop.m            0
-(p7)  fma.s0           f8 = fNormX, f1, f0    // If |x| >= 2^63, result x
-      nop.i            0
-}
+      getf.exp rint_GR_signexp  = RINT_NORM_f8
+      fcmp.eq.s0  p8,p0 = f8,f0      // Dummy op to set denormal
+        nop.i 999
 ;;
+}
 
-{ .mfi
-      nop.m            0
-      fcmp.eq.unc.s1   p0, p9 = f8, fNormX    // Is result = x ?
-      nop.i            0
+
+{ .mii
+	nop.m 999
+	nop.i 999 
+      and      rint_GR_exponent = rint_GR_signexp, rint_GR_17ones
+;;
 }
-{ .mfi
-      nop.m            0
-(p8)  fmerge.s         f8 = fNormX, f8        // Make sure sign rint(x) = sign x
-      nop.i            0
+
+{ .mmi
+      cmp.ge.unc      p7,p6 = rint_GR_exponent, rint_GR_10033
+      and rint_GR_rcs0 = rint_GR_rcs0_mask, rint_GR_fpsr
+	nop.i 999
+;;
 }
+
+// Check to see if s0 rounding mode is round to nearest.  If not then set s2
+// rounding mode to that of s0 and repeat conversions.
+// Must merge the original sign for cases where the result is zero or the input
+// is the largest that still has a fraction (0x1007dfffffffffff)
+L(RINT_COMMON):
+{ .mfb
+      cmp.ne   p11,p0 = rint_GR_rcs0, r0
+(p6) fmerge.s  RINT_SIGNED_FLOAT_INT_f8 = f8, RINT_FLOAT_INT_f8
+(p11) br.cond.spnt L(RINT_NOT_ROUND_NEAREST)  // Branch if not round to nearest
 ;;
+}
 
 { .mfi
-(p8)  and              rRcs0 = rFpsr, rRcs0Mask // Get rounding mode for sf0
-      nop.f            0
-      nop.i            0
+	nop.m 999
+(p6) fcmp.eq.unc.s1  p0,p8  = RINT_FLOAT_INT_f8, RINT_NORM_f8
+	nop.i 999
 }
+{ .mfi
+	nop.m 999
+(p7) fnorm.s0   f8 = f8
+	nop.i 999
 ;;
+}
 
-// If |x| < 2^63 we must test for other rounding modes
 { .mfi
-(p8)  cmp.ne.unc       p10,p0 = rRcs0, r0     // Test for other rounding modes
-(p9)  fmpy.s0          fTmp = fTmp, fTmp      // Dummy to set inexact
-      nop.i            0
-}
-{ .mbb
-      nop.m            0
-(p10) br.cond.spnt     RINT_NOT_ROUND_NEAREST // Branch if not round nearest
-      br.ret.sptk      b0                     // Exit main path if round nearest
-}
+      nop.m 999
+(p6) fnorm f8 = RINT_SIGNED_FLOAT_INT_f8
+     nop.i 999
 ;;
+}
 
-
-
-RINT_UNORM:
-// Here if x unorm
 { .mfb
-      getf.exp         rSignexp  = fNormX     // Get signexp, recompute if unorm
-      fcmp.eq.s0       p7,p0 = f8, f0         // Dummy op to set denormal flag
-      br.cond.sptk     RINT_COMMON            // Return to main path
-}
+     nop.m 999
+(p8) fmpy.s0     RINT_INEXACT = RINT_FFFF,RINT_FFFF  // Dummy to set inexact
+     br.ret.sptk    b0
 ;;
-
-RINT_NOT_ROUND_NEAREST:
-// Here if not round to nearest, and |x| < 2^63
-// Set rounding mode of s2 to that of s0, and repeat the conversion using s2
-{ .mfi
-      nop.m            0
-      fsetc.s2         0x7f, 0x40
-      nop.i            0
 }
-;;
 
+L(RINT_NOT_ROUND_NEAREST):
+// Set rounding mode of s2 to that of s0
 { .mfi
-      nop.m            0
-      fcvt.fx.s2       fXInt  = fNormX        // Convert to int in significand
-      nop.i            0
-}
+      mov rint_GR_rcs0 = r0       // Clear so we don't come back here
+      fsetc.s2     0x7f, 0x40
+	nop.i 999
 ;;
+}
 
 { .mfi
-      nop.m            0
-      fcvt.xf          f8 = fXInt             // Expected result
-      nop.i            0
-}
+	nop.m 999
+      fcvt.fx.s2     RINT_INT_f8  = f8
+	nop.i 999
 ;;
+}
 
-// Be sure sign of result = sign of input.  Fixes cases where result is 0.
 { .mfb
-      nop.m            0
-      fmerge.s         f8 = fNormX, f8
-      br.ret.sptk      b0                     // Exit main path
-}
+	nop.m 999
+      fcvt.xf         RINT_FLOAT_INT_f8   = RINT_INT_f8
+      br.cond.sptk  L(RINT_COMMON)
 ;;
+}
+
 
-GLOBAL_IEEE754_END(rintl)
+.endp rintl
+ASM_SIZE_DIRECTIVE(rintl)
+#ifdef _LIBC
+ASM_SIZE_DIRECTIVE(__rintl)
+#endif
diff --git a/sysdeps/ia64/fpu/s_round.S b/sysdeps/ia64/fpu/s_round.S
index 04033b4aa2..b08ede1740 100644
--- a/sysdeps/ia64/fpu/s_round.S
+++ b/sysdeps/ia64/fpu/s_round.S
@@ -1,10 +1,11 @@
 .file "round.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 10/25/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
+// Bob Norin, Tom Rowan, Shane Story, and Ping Tak Peter Tang of the
+// Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,202 +21,229 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
 // Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
 //
 // History
 //==============================================================
-// 10/25/00 Initial version
-// 06/14/01 Changed cmp to an equivalent form
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 01/20/03 Improved performance and reduced code size
-// 04/18/03 Eliminate possible WAW dependency warning
+// 10/25/2000: Created
 //==============================================================
-
+//
 // API
 //==============================================================
 // double round(double x)
-//==============================================================
-
-// general input registers:
-// r14 - r19
+//
 
-rSignexp   = r14
-rExp       = r15
-rExpMask   = r16
-rBigexp    = r17
-rExpHalf   = r18
-rExpMHalf  = r19
+#include "libm_support.h"
 
-// floating-point registers:
-// f8 - f13
+// general input registers:  
+//
+round_GR_half      = r14
+round_GR_big       = r15
+round_GR_expmask   = r16
+round_GR_signexp   = r17
+round_GR_exp       = r18
+round_GR_expdiff   = r19
+
+// predicate registers used: 
+// p6 - p10
 
-fXtruncInt = f9
-fNormX     = f10
-fHalf      = f11
-fMHalf     = f12
-fRem       = f13
+// floating-point registers used: 
 
-// predicate registers used:
-// p6 - p10
+ROUND_NORM_f8        = f9                        
+ROUND_TRUNC_f8       = f10
+ROUND_RINT_f8        = f11
+ROUND_FLOAT_TRUNC_f8 = f12
+ROUND_FLOAT_RINT_f8  = f13
+ROUND_REMAINDER      = f14
+ROUND_HALF           = f15
 
 // Overview of operation
 //==============================================================
+
 // double round(double x)
-// Return an integer value (represented as a double) that is x
-// rounded to nearest integer, halfway cases rounded away from
-// zero.
+// Return an integer value (represented as a double) that is x 
+// rounded to nearest integer, halfway cases rounded away from 
+// zero. 
 //  if x>0   result = trunc(x+0.5)
 //  if x<0   result = trunc(x-0.5)
-//
-//==============================================================
+// *******************************************************************************
+
+// Set denormal flag for denormal input and
+// and take denormal fault if necessary.
 
-// double_extended
-// if the exponent is > 1003e => 3F(true) = 63(decimal)
-// we have a significand of 64 bits 1.63-bits.
-// If we multiply by 2^63, we no longer have a fractional part
-// So input is an integer value already.
+// If x is NAN, ZERO, INFINITY, or >= 2^52 then return
 
-// double
-// if the exponent is >= 10033 => 34(true) = 52(decimal)
-// 34 + 3ff = 433
-// we have a significand of 53 bits 1.52-bits. (implicit 1)
-// If we multiply by 2^52, we no longer have a fractional part
-// So input is an integer value already.
+// qnan snan inf norm     unorm 0 -+
+// 1    1    1   0        0     1 11     0xe7
 
-// single
-// if the exponent is > 10016 => 17(true) = 23(decimal)
-// we have a significand of 24 bits 1.23-bits. (implicit 1)
-// If we multiply by 2^23, we no longer have a fractional part
-// So input is an integer value already.
 
+.align 32
+.global round#
 
 .section .text
-GLOBAL_LIBM_ENTRY(round)
+.proc  round#
+.align 32
+
 
+round: 
+	
+// Get exponent for +0.5
+// Truncate x to integer
 { .mfi
-      getf.exp         rSignexp  = f8        // Get signexp, recompute if unorm
-      fcvt.fx.trunc.s1 fXtruncInt  = f8      // Convert to int in significand
-      addl             rBigexp = 0x10033, r0 // Set exponent at which is integer
-}
+      addl           round_GR_half  = 0x0fffe, r0
+      fcvt.fx.trunc.s1     ROUND_TRUNC_f8 = f8
+      nop.i 999
+}
+	
+// Get signexp of x
+// Normalize input
+// Form exponent mask
 { .mfi
-      mov              rExpHalf    = 0x0FFFE // Form sign and exponent of 0.5
-      fnorm.s1         fNormX  = f8          // Normalize input
-      mov              rExpMask    = 0x1FFFF // Form exponent mask
+      getf.exp  round_GR_signexp = f8
+      fnorm     ROUND_NORM_f8 = f8                        
+      addl      round_GR_expmask  = 0x1ffff, r0 ;;
 }
-;;
 
-{ .mmf
-      setf.exp         fHalf = rExpHalf      // Form 0.5
-      mov              rExpMHalf   = 0x2FFFE // Form sign and exponent of -0.5
-      fclass.m         p7,p0 = f8, 0x0b      // Test x unorm
+// Form +0.5
+// Round x to integer
+{ .mfi
+      setf.exp    ROUND_HALF  = round_GR_half                      
+      fcvt.fx.s1  ROUND_RINT_f8 = f8
+      nop.i 999 ;;
 }
-;;
-
-{ .mfb
-      setf.exp         fMHalf = rExpMHalf    // Form -0.5
-      fclass.m         p6,p0 = f8, 0x1e3     // Test x natval, nan, inf
-(p7)  br.cond.spnt     ROUND_UNORM           // Branch if x unorm
+// Get exp of x
+// Test for NAN, INF, ZERO
+// Get exponent at which input has no fractional part
+{ .mfi
+      and         round_GR_exp = round_GR_expmask, round_GR_signexp
+      fclass.m    p8,p9 = f8,0xe7
+      addl        round_GR_big  = 0x10033, r0 ;;
+}
+
+// Get exp-bigexp
+// If exp is so big there is no fractional part, then turn on p8, off p9
+{ .mmi
+      sub    round_GR_expdiff = round_GR_exp, round_GR_big ;;
+#ifdef _LIBC
+(p9)  cmp.lt.or.andcm  p8,p9 = r0, round_GR_expdiff
+#else
+(p9)  cmp.ge.or.andcm  p8,p9 = round_GR_expdiff, r0
+#endif
+      nop.i 999 ;;
+}
+     
+// Set p6 if x<0, else set p7
+{ .mfi
+      nop.m 999
+(p9)  fcmp.lt.unc  p6,p7 = f8,f0
+      nop.i 999
 }
-;;
-
-ROUND_COMMON:
-// Return here from ROUND_UNORM
+	
+// If NAN, INF, ZERO, or no fractional part, result is just normalized input
 { .mfi
-      nop.m            0
-      fcmp.lt.s1       p8,p9 = f8, f0        // Test if x < 0
-      nop.i            0
+      nop.m 999
+(p8)  fnorm.d.s0  f8 = f8
+      nop.i 999 ;;
 }
-{ .mfb
-      and              rExp = rSignexp, rExpMask // Get biased exponent
-(p6)  fma.d.s0         f8 = f8, f1, f0       // Result if x natval, nan, inf
-(p6)  br.ret.spnt      b0                    // Exit if x natval, nan, inf
+
+// Float the truncated integer
+{ .mfi
+      nop.m 999
+(p9)  fcvt.xf     ROUND_FLOAT_TRUNC_f8 = ROUND_TRUNC_f8
+      nop.i 999 ;;
 }
-;;
 
+// Float the rounded integer to get preliminary result
 { .mfi
-      cmp.lt           p6,p0 = rExp, rExpHalf // Is |x| < 0.5?
-      fcvt.xf          f8 = fXtruncInt        // Pre-Result if 0.5 <= |x| < 2^52
-      cmp.ge           p7,p0 = rExp, rBigexp  // Is |x| >= 2^52?
+      nop.m 999
+(p9)  fcvt.xf     ROUND_FLOAT_RINT_f8 = ROUND_RINT_f8
+      nop.i 999 ;;
+}
+
+// If x<0 and the difference of the truncated input minus the input is 0.5
+//    then result = truncated input - 1.0
+// Else if x>0 and the difference of the input minus truncated input is 0.5
+//    then result = truncated input + 1.0
+// Else 
+//    result = rounded input
+// Endif
+{ .mfi
+      nop.m 999
+(p6)  fsub.s1   ROUND_REMAINDER = ROUND_FLOAT_TRUNC_f8, ROUND_NORM_f8 
+      nop.i 999
 }
+	
 { .mfi
-      cmp.lt           p10,p0 = rExp, rExpHalf // Is |x| < 0.5? 
-      nop.f            0
-      nop.i            0
+      nop.m 999
+(p7)  fsub.s1   ROUND_REMAINDER = ROUND_NORM_f8, ROUND_FLOAT_TRUNC_f8
+      nop.i 999 ;;
 }
-;;
 
-// We must correct result if |x| < 0.5, or |x| >= 2^52
-.pred.rel "mutex",p6,p7
+// Assume preliminary result is rounded integer
 { .mfi
-      nop.m            0
-(p6)  fmerge.s         f8 = fNormX, f0        // If |x| < 0.5, result sgn(x)*0
-      nop.i            0
+      nop.m 999
+(p9)  fnorm.d.s0  f8 = ROUND_FLOAT_RINT_f8
+      nop.i 999 
 }
-{ .mfb
-(p7)  cmp.eq           p10,p0 = r0, r0        // Also turn on p10 if |x| >= 2^52
-(p7)  fma.d.s0         f8 = fNormX, f1, f0    // If |x| >= 2^52, result x
-(p10) br.ret.spnt      b0                     // Exit |x| < 0.5 or |x| >= 2^52
-}
-;;
 
-// Here if 0.5 <= |x| < 2^52
+// If x<0, test if result=0
 { .mfi
-      nop.m            0
-      fms.s1           fRem = fNormX, f1, f8  // Get remainder = x - trunc(x)
-      nop.i            0
+      nop.m 999
+(p6)  fcmp.eq.unc  p10,p0 = ROUND_FLOAT_RINT_f8,f0
+      nop.i 999 ;;
 }
-;;
 
+// If x<0 and result=0, set result=-0
 { .mfi
-      nop.m            0
-(p8)  fcmp.le.s1       p8,p0 = fRem, fMHalf
-      nop.i            0
+      nop.m 999
+(p10) fmerge.ns  f8 = f1,f8
+      nop.i 999
 }
+	
+// If x<0, test if remainder=0.5
 { .mfi
-      nop.m            0
-(p9)  fcmp.ge.s1       p9,p0 = fRem, fHalf
-      nop.i            0
+      nop.m 999
+(p6)  fcmp.eq.unc  p6,p0 = ROUND_REMAINDER, ROUND_HALF
+      nop.i 999 ;; 
 }
-;;
-
-// If x < 0 and remainder <= -0.5, then subtract 1 from result
-// If x > 0 and remainder >= +0.5, then add 1 to result
-.pred.rel "mutex",p8,p9
+	
+// If x>0, test if remainder=0.5
 { .mfi
-      nop.m            0
-(p8)  fms.d.s0         f8 = f8, f1, f1
-      nop.i            0
-}
-{ .mfb
-      nop.m            0
-(p9)  fma.d.s0         f8 = f8, f1, f1
-      br.ret.sptk      b0
+      nop.m 999
+(p7)  fcmp.eq.unc  p7,p0 = ROUND_REMAINDER, ROUND_HALF
+      nop.i 999 ;;
 }
-;;
 
-
-ROUND_UNORM:
-// Here if x unorm
+// If x<0 and remainder=0.5, result=truncated-1.0
+// If x>0 and remainder=0.5, result=truncated+1.0
+// Exit
+.pred.rel "mutex",p6,p7
+{ .mfi
+      nop.m 999
+(p6)  fsub.d.s0  f8 = ROUND_FLOAT_TRUNC_f8,f1
+      nop.i 999 
+}
+	
 { .mfb
-      getf.exp         rSignexp  = fNormX     // Get signexp, recompute if unorm
-      fcmp.eq.s0       p7,p0 = f8, f0         // Dummy op to set denormal flag
-      br.cond.sptk     ROUND_COMMON           // Return to main path
+      nop.m 999
+(p7)  fadd.d.s0  f8 = ROUND_FLOAT_TRUNC_f8,f1
+      br.ret.sptk  b0 ;;
 }
-;;
 
-GLOBAL_LIBM_END(round)
+.endp round
+ASM_SIZE_DIRECTIVE(round)
diff --git a/sysdeps/ia64/fpu/s_roundf.S b/sysdeps/ia64/fpu/s_roundf.S
index 1e8dc78777..42ee60b218 100644
--- a/sysdeps/ia64/fpu/s_roundf.S
+++ b/sysdeps/ia64/fpu/s_roundf.S
@@ -1,10 +1,11 @@
 .file "roundf.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 10/25/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
+// Bob Norin, Tom Rowan, Shane Story, and Ping Tak Peter Tang of the
+// Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,202 +21,229 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
 // Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
 //
 // History
 //==============================================================
-// 10/25/00 Initial version
-// 06/14/01 Changed cmp to an equivalent form
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 01/20/03 Improved performance and reduced code size
-// 04/18/03 Eliminate possible WAW dependency warning
+// 10/25/2000: Created
 //==============================================================
-
+//
 // API
 //==============================================================
 // float roundf(float x)
-//==============================================================
-
-// general input registers:
-// r14 - r19
+//
 
-rSignexp   = r14
-rExp       = r15
-rExpMask   = r16
-rBigexp    = r17
-rExpHalf   = r18
-rExpMHalf  = r19
+#include "libm_support.h"
 
-// floating-point registers:
-// f8 - f13
+// general input registers:  
+//
+roundf_GR_half      = r14
+roundf_GR_big       = r15
+roundf_GR_expmask   = r16
+roundf_GR_signexp   = r17
+roundf_GR_exp       = r18
+roundf_GR_expdiff   = r19
+
+// predicate registers used: 
+// p6 - p10
 
-fXtruncInt = f9
-fNormX     = f10
-fHalf      = f11
-fMHalf     = f12
-fRem       = f13
+// floating-point registers used: 
 
-// predicate registers used:
-// p6 - p10
+ROUNDF_NORM_f8        = f9                        
+ROUNDF_TRUNC_f8       = f10
+ROUNDF_RINT_f8        = f11
+ROUNDF_FLOAT_TRUNC_f8 = f12
+ROUNDF_FLOAT_RINT_f8  = f13
+ROUNDF_REMAINDER      = f14
+ROUNDF_HALF           = f15
 
 // Overview of operation
 //==============================================================
+
 // float roundf(float x)
-// Return an integer value (represented as a float) that is x
-// rounded to nearest integer, halfway cases rounded away from
-// zero.
+// Return an integer value (represented as a float) that is x 
+// rounded to nearest integer, halfway cases rounded away from 
+// zero. 
 //  if x>0   result = trunc(x+0.5)
 //  if x<0   result = trunc(x-0.5)
-//
-//==============================================================
+// *******************************************************************************
+
+// Set denormal flag for denormal input and
+// and take denormal fault if necessary.
 
-// double_extended
-// if the exponent is > 1003e => 3F(true) = 63(decimal)
-// we have a significand of 64 bits 1.63-bits.
-// If we multiply by 2^63, we no longer have a fractional part
-// So input is an integer value already.
+// If x is NAN, ZERO, INFINITY, or >= 2^23 then return
 
-// double
-// if the exponent is >= 10033 => 34(true) = 52(decimal)
-// 34 + 3ff = 433
-// we have a significand of 53 bits 1.52-bits. (implicit 1)
-// If we multiply by 2^52, we no longer have a fractional part
-// So input is an integer value already.
+// qnan snan inf norm     unorm 0 -+
+// 1    1    1   0        0     1 11     0xe7
 
-// single
-// if the exponent is > 10016 => 17(true) = 23(decimal)
-// we have a significand of 24 bits 1.23-bits. (implicit 1)
-// If we multiply by 2^23, we no longer have a fractional part
-// So input is an integer value already.
 
+.align 32
+.global roundf#
 
 .section .text
-GLOBAL_LIBM_ENTRY(roundf)
+.proc  roundf#
+.align 32
+
 
+roundf: 
+	
+// Get exponent for +0.5
+// Truncate x to integer
 { .mfi
-      getf.exp         rSignexp  = f8        // Get signexp, recompute if unorm
-      fcvt.fx.trunc.s1 fXtruncInt  = f8      // Convert to int in significand
-      addl             rBigexp = 0x10016, r0 // Set exponent at which is integer
-}
+      addl           roundf_GR_half  = 0x0fffe, r0
+      fcvt.fx.trunc.s1     ROUNDF_TRUNC_f8 = f8
+      nop.i 999
+}
+	
+// Get signexp of x
+// Normalize input
+// Form exponent mask
 { .mfi
-      mov              rExpHalf    = 0x0FFFE // Form sign and exponent of 0.5
-      fnorm.s1         fNormX  = f8          // Normalize input
-      mov              rExpMask    = 0x1FFFF // Form exponent mask
+      getf.exp  roundf_GR_signexp = f8
+      fnorm     ROUNDF_NORM_f8 = f8                        
+      addl      roundf_GR_expmask  = 0x1ffff, r0 ;;
 }
-;;
 
-{ .mmf
-      setf.exp         fHalf = rExpHalf      // Form 0.5
-      mov              rExpMHalf   = 0x2FFFE // Form sign and exponent of -0.5
-      fclass.m         p7,p0 = f8, 0x0b      // Test x unorm
+// Form +0.5
+// Round x to integer
+{ .mfi
+      setf.exp    ROUNDF_HALF  = roundf_GR_half                      
+      fcvt.fx.s1  ROUNDF_RINT_f8 = f8
+      nop.i 999 ;;
 }
-;;
-
-{ .mfb
-      setf.exp         fMHalf = rExpMHalf    // Form -0.5
-      fclass.m         p6,p0 = f8, 0x1e3     // Test x natval, nan, inf
-(p7)  br.cond.spnt     ROUND_UNORM           // Branch if x unorm
+// Get exp of x
+// Test for NAN, INF, ZERO
+// Get exponent at which input has no fractional part
+{ .mfi
+      and         roundf_GR_exp = roundf_GR_expmask, roundf_GR_signexp
+      fclass.m    p8,p9 = f8,0xe7
+      addl        roundf_GR_big  = 0x10016, r0 ;;
+}
+
+// Get exp-bigexp
+// If exp is so big there is no fractional part, then turn on p8, off p9
+{ .mmi
+      sub    roundf_GR_expdiff = roundf_GR_exp, roundf_GR_big ;;
+#ifdef _LIBC
+(p9)  cmp.lt.or.andcm  p8,p9 = r0, roundf_GR_expdiff
+#else
+(p9)  cmp.ge.or.andcm  p8,p9 = roundf_GR_expdiff, r0
+#endif
+      nop.i 999 ;;
+}
+     
+// Set p6 if x<0, else set p7
+{ .mfi
+      nop.m 999
+(p9)  fcmp.lt.unc  p6,p7 = f8,f0
+      nop.i 999
 }
-;;
-
-ROUND_COMMON:
-// Return here from ROUND_UNORM
+	
+// If NAN, INF, ZERO, or no fractional part, result is just normalized input
 { .mfi
-      nop.m            0
-      fcmp.lt.s1       p8,p9 = f8, f0        // Test if x < 0
-      nop.i            0
+      nop.m 999
+(p8)  fnorm.s.s0  f8 = f8
+      nop.i 999 ;;
 }
-{ .mfb
-      and              rExp = rSignexp, rExpMask // Get biased exponent
-(p6)  fma.s.s0         f8 = f8, f1, f0       // Result if x natval, nan, inf
-(p6)  br.ret.spnt      b0                    // Exit if x natval, nan, inf
+
+// Float the truncated integer
+{ .mfi
+      nop.m 999
+(p9)  fcvt.xf     ROUNDF_FLOAT_TRUNC_f8 = ROUNDF_TRUNC_f8
+      nop.i 999 ;;
 }
-;;
 
+// Float the rounded integer to get preliminary result
 { .mfi
-      cmp.lt           p6,p0 = rExp, rExpHalf // Is |x| < 0.5?
-      fcvt.xf          f8 = fXtruncInt        // Pre-Result if 0.5 <= |x| < 2^23
-      cmp.ge           p7,p0 = rExp, rBigexp  // Is |x| >= 2^23?
+      nop.m 999
+(p9)  fcvt.xf     ROUNDF_FLOAT_RINT_f8 = ROUNDF_RINT_f8
+      nop.i 999 ;;
+}
+
+// If x<0 and the difference of the truncated input minus the input is 0.5
+//    then result = truncated input - 1.0
+// Else if x>0 and the difference of the input minus truncated input is 0.5
+//    then result = truncated input + 1.0
+// Else 
+//    result = rounded input
+// Endif
+{ .mfi
+      nop.m 999
+(p6)  fsub.s1   ROUNDF_REMAINDER = ROUNDF_FLOAT_TRUNC_f8, ROUNDF_NORM_f8 
+      nop.i 999
 }
+	
 { .mfi
-      cmp.lt           p10,p0 = rExp, rExpHalf // Is |x| < 0.5? 
-      nop.f            0
-      nop.i            0
+      nop.m 999
+(p7)  fsub.s1   ROUNDF_REMAINDER = ROUNDF_NORM_f8, ROUNDF_FLOAT_TRUNC_f8
+      nop.i 999 ;;
 }
-;;
 
-// We must correct result if |x| < 0.5, or |x| >= 2^23
-.pred.rel "mutex",p6,p7
+// Assume preliminary result is rounded integer
 { .mfi
-      nop.m            0
-(p6)  fmerge.s         f8 = fNormX, f0        // If |x| < 0.5, result sgn(x)*0
-      nop.i            0
+      nop.m 999
+(p9)  fnorm.s.s0  f8 = ROUNDF_FLOAT_RINT_f8
+      nop.i 999
 }
-{ .mfb
-(p7)  cmp.eq           p10,p0 = r0, r0        // Also turn on p10 if |x| >= 2^23
-(p7)  fma.s.s0         f8 = fNormX, f1, f0    // If |x| >= 2^23, result x
-(p10) br.ret.spnt      b0                     // Exit |x| < 0.5 or |x| >= 2^23
-}
-;;
 
-// Here if 0.5 <= |x| < 2^23
+// If x<0, test if result=0
 { .mfi
-      nop.m            0
-      fms.s1           fRem = fNormX, f1, f8  // Get remainder = x - trunc(x)
-      nop.i            0
+      nop.m 999
+(p6)  fcmp.eq.unc  p10,p0 = ROUNDF_FLOAT_RINT_f8,f0
+      nop.i 999 ;;
 }
-;;
 
+// If x<0 and result=0, set result=-0
 { .mfi
-      nop.m            0
-(p8)  fcmp.le.s1       p8,p0 = fRem, fMHalf
-      nop.i            0
+      nop.m 999
+(p10) fmerge.ns  f8 = f1,f8
+      nop.i 999
 }
+	
+// If x<0, test if remainder=0.5
 { .mfi
-      nop.m            0
-(p9)  fcmp.ge.s1       p9,p0 = fRem, fHalf
-      nop.i            0
+      nop.m 999
+(p6)  fcmp.eq.unc  p6,p0 = ROUNDF_REMAINDER, ROUNDF_HALF
+      nop.i 999 ;;
 }
-;;
-
-// If x < 0 and remainder <= -0.5, then subtract 1 from result
-// If x > 0 and remainder >= +0.5, then add 1 to result
-.pred.rel "mutex",p8,p9
+	
+// If x>0, test if remainder=0.5
 { .mfi
-      nop.m            0
-(p8)  fms.s.s0         f8 = f8, f1, f1
-      nop.i            0
-}
-{ .mfb
-      nop.m            0
-(p9)  fma.s.s0         f8 = f8, f1, f1
-      br.ret.sptk      b0
+      nop.m 999
+(p7)  fcmp.eq.unc  p7,p0 = ROUNDF_REMAINDER, ROUNDF_HALF
+      nop.i 999 ;;
 }
-;;
 
-
-ROUND_UNORM:
-// Here if x unorm
+// If x<0 and remainder=0.5, result=truncated-1.0
+// If x>0 and remainder=0.5, result=truncated+1.0
+// Exit
+.pred.rel "mutex",p6,p7
+{ .mfi
+      nop.m 999
+(p6)  fsub.s.s0  f8 = ROUNDF_FLOAT_TRUNC_f8,f1
+      nop.i 999 
+}
+	
 { .mfb
-      getf.exp         rSignexp  = fNormX     // Get signexp, recompute if unorm
-      fcmp.eq.s0       p7,p0 = f8, f0         // Dummy op to set denormal flag
-      br.cond.sptk     ROUND_COMMON           // Return to main path
+      nop.m 999
+(p7)  fadd.s.s0  f8 = ROUNDF_FLOAT_TRUNC_f8,f1
+      br.ret.sptk  b0 ;;
 }
-;;
 
-GLOBAL_LIBM_END(roundf)
+.endp roundf
+ASM_SIZE_DIRECTIVE(roundf)
diff --git a/sysdeps/ia64/fpu/s_roundl.S b/sysdeps/ia64/fpu/s_roundl.S
index 79dff00c06..b30f590917 100644
--- a/sysdeps/ia64/fpu/s_roundl.S
+++ b/sysdeps/ia64/fpu/s_roundl.S
@@ -1,10 +1,11 @@
 .file "roundl.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 10/25/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
+// Bob Norin, Tom Rowan, Shane Story, and Ping Tak Peter Tang of the
+// Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,202 +21,229 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
 // Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
 //
 // History
 //==============================================================
-// 10/25/00 Initial version
-// 06/14/01 Changed cmp to an equivalent form
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 01/20/03 Improved performance and reduced code size
-// 04/18/03 Eliminate possible WAW dependency warning
+// 10/25/2000: Created
 //==============================================================
-
+//
 // API
 //==============================================================
 // long double roundl(long double x)
-//==============================================================
-
-// general input registers:
-// r14 - r19
+//
 
-rSignexp   = r14
-rExp       = r15
-rExpMask   = r16
-rBigexp    = r17
-rExpHalf   = r18
-rExpMHalf  = r19
+#include "libm_support.h"
 
-// floating-point registers:
-// f8 - f13
+// general input registers:  
+//
+roundl_GR_half      = r14
+roundl_GR_big       = r15
+roundl_GR_expmask   = r16
+roundl_GR_signexp   = r17
+roundl_GR_exp       = r18
+roundl_GR_expdiff   = r19
+
+// predicate registers used: 
+// p6 - p10
 
-fXtruncInt = f9
-fNormX     = f10
-fHalf      = f11
-fMHalf     = f12
-fRem       = f13
+// floating-point registers used: 
 
-// predicate registers used:
-// p6 - p10
+ROUNDL_NORM_f8        = f9                        
+ROUNDL_TRUNC_f8       = f10
+ROUNDL_RINT_f8        = f11
+ROUNDL_FLOAT_TRUNC_f8 = f12
+ROUNDL_FLOAT_RINT_f8  = f13
+ROUNDL_REMAINDER      = f14
+ROUNDL_HALF           = f15
 
 // Overview of operation
 //==============================================================
+
 // long double roundl(long double x)
-// Return an integer value (represented as a long double) that is x
-// rounded to nearest integer, halfway cases rounded away from
-// zero.
+// Return an integer value (represented as a long double) that is x 
+// rounded to nearest integer, halfway cases rounded away from 
+// zero. 
 //  if x>0   result = trunc(x+0.5)
 //  if x<0   result = trunc(x-0.5)
-//
-//==============================================================
+// *******************************************************************************
+
+// Set denormal flag for denormal input and
+// and take denormal fault if necessary.
 
-// double_extended
-// if the exponent is > 1003e => 3F(true) = 63(decimal)
-// we have a significand of 64 bits 1.63-bits.
-// If we multiply by 2^63, we no longer have a fractional part
-// So input is an integer value already.
+// If x is NAN, ZERO, INFINITY, or >= 2^63 then return
 
-// double
-// if the exponent is >= 10033 => 34(true) = 52(decimal)
-// 34 + 3ff = 433
-// we have a significand of 53 bits 1.52-bits. (implicit 1)
-// If we multiply by 2^52, we no longer have a fractional part
-// So input is an integer value already.
+// qnan snan inf norm     unorm 0 -+
+// 1    1    1   0        0     1 11     0xe7
 
-// single
-// if the exponent is > 10016 => 17(true) = 23(decimal)
-// we have a significand of 24 bits 1.23-bits. (implicit 1)
-// If we multiply by 2^23, we no longer have a fractional part
-// So input is an integer value already.
 
+.align 32
+.global roundl#
 
 .section .text
-GLOBAL_LIBM_ENTRY(roundl)
+.proc  roundl#
+.align 32
+
 
+roundl: 
+	
+// Get exponent for +0.5
+// Truncate x to integer
 { .mfi
-      getf.exp         rSignexp  = f8        // Get signexp, recompute if unorm
-      fcvt.fx.trunc.s1 fXtruncInt  = f8      // Convert to int in significand
-      addl             rBigexp = 0x1003e, r0 // Set exponent at which is integer
-}
+      addl           roundl_GR_half  = 0x0fffe, r0
+      fcvt.fx.trunc.s1     ROUNDL_TRUNC_f8 = f8
+      nop.i 999
+}
+	
+// Get signexp of x
+// Normalize input
+// Form exponent mask
 { .mfi
-      mov              rExpHalf    = 0x0FFFE // Form sign and exponent of 0.5
-      fnorm.s1         fNormX  = f8          // Normalize input
-      mov              rExpMask    = 0x1FFFF // Form exponent mask
+      getf.exp  roundl_GR_signexp = f8
+      fnorm     ROUNDL_NORM_f8 = f8                        
+      addl      roundl_GR_expmask  = 0x1ffff, r0 ;;
 }
-;;
 
-{ .mmf
-      setf.exp         fHalf = rExpHalf      // Form 0.5
-      mov              rExpMHalf   = 0x2FFFE // Form sign and exponent of -0.5
-      fclass.m         p7,p0 = f8, 0x0b      // Test x unorm
+// Form +0.5
+// Round x to integer
+{ .mfi
+      setf.exp    ROUNDL_HALF  = roundl_GR_half                      
+      fcvt.fx.s1  ROUNDL_RINT_f8 = f8
+      nop.i 999 ;;
 }
-;;
-
-{ .mfb
-      setf.exp         fMHalf = rExpMHalf    // Form -0.5
-      fclass.m         p6,p0 = f8, 0x1e3     // Test x natval, nan, inf
-(p7)  br.cond.spnt     ROUND_UNORM           // Branch if x unorm
+// Get exp of x
+// Test for NAN, INF, ZERO
+// Get exponent at which input has no fractional part
+{ .mfi
+      and         roundl_GR_exp = roundl_GR_expmask, roundl_GR_signexp
+      fclass.m    p8,p9 = f8,0xe7
+      addl        roundl_GR_big  = 0x1003e, r0 ;;
+}
+
+// Get exp-bigexp
+// If exp is so big there is no fractional part, then turn on p8, off p9
+{ .mmi
+      sub    roundl_GR_expdiff = roundl_GR_exp, roundl_GR_big ;;
+#ifdef _LIBC
+(p9)  cmp.lt.or.andcm  p8,p9 = r0, roundl_GR_expdiff
+#else
+(p9)  cmp.ge.or.andcm  p8,p9 = roundl_GR_expdiff, r0
+#endif
+      nop.i 999 ;;
+}
+     
+// Set p6 if x<0, else set p7
+{ .mfi
+      nop.m 999
+(p9)  fcmp.lt.unc  p6,p7 = f8,f0
+      nop.i 999
 }
-;;
-
-ROUND_COMMON:
-// Return here from ROUND_UNORM
+	
+// If NAN, INF, ZERO, or no fractional part, result is just normalized input
 { .mfi
-      nop.m            0
-      fcmp.lt.s1       p8,p9 = f8, f0        // Test if x < 0
-      nop.i            0
+      nop.m 999
+(p8)  fnorm.s0  f8 = f8
+      nop.i 999 ;;
 }
-{ .mfb
-      and              rExp = rSignexp, rExpMask // Get biased exponent
-(p6)  fma.s0           f8 = f8, f1, f0       // Result if x natval, nan, inf
-(p6)  br.ret.spnt      b0                    // Exit if x natval, nan, inf
+
+// Float the truncated integer
+{ .mfi
+      nop.m 999
+(p9)  fcvt.xf     ROUNDL_FLOAT_TRUNC_f8 = ROUNDL_TRUNC_f8
+      nop.i 999 ;;
 }
-;;
 
+// Float the rounded integer to get preliminary result
 { .mfi
-      cmp.lt           p6,p0 = rExp, rExpHalf // Is |x| < 0.5?
-      fcvt.xf          f8 = fXtruncInt        // Pre-Result if 0.5 <= |x| < 2^63
-      cmp.ge           p7,p0 = rExp, rBigexp  // Is |x| >= 2^63?
+      nop.m 999
+(p9)  fcvt.xf     ROUNDL_FLOAT_RINT_f8 = ROUNDL_RINT_f8
+      nop.i 999 ;;
+}
+
+// If x<0 and the difference of the truncated input minus the input is 0.5
+//    then result = truncated input - 1.0
+// Else if x>0 and the difference of the input minus truncated input is 0.5
+//    then result = truncated input + 1.0
+// Else 
+//    result = rounded input
+// Endif
+{ .mfi
+      nop.m 999
+(p6)  fsub.s1   ROUNDL_REMAINDER = ROUNDL_FLOAT_TRUNC_f8, ROUNDL_NORM_f8 
+      nop.i 999
 }
+	
 { .mfi
-      cmp.lt           p10,p0 = rExp, rExpHalf // Is |x| < 0.5? 
-      nop.f            0
-      nop.i            0
+      nop.m 999
+(p7)  fsub.s1   ROUNDL_REMAINDER = ROUNDL_NORM_f8, ROUNDL_FLOAT_TRUNC_f8
+      nop.i 999 ;;
 }
-;;
 
-// We must correct result if |x| < 0.5, or |x| >= 2^63
-.pred.rel "mutex",p6,p7
+// Assume preliminary result is rounded integer
 { .mfi
-      nop.m            0
-(p6)  fmerge.s         f8 = fNormX, f0        // If |x| < 0.5, result sgn(x)*0
-      nop.i            0
+      nop.m 999
+(p9)  fnorm.s0  f8 = ROUNDL_FLOAT_RINT_f8
+      nop.i 999
 }
-{ .mfb
-(p7)  cmp.eq           p10,p0 = r0, r0        // Also turn on p10 if |x| >= 2^63
-(p7)  fma.s0           f8 = fNormX, f1, f0    // If |x| >= 2^63, result x
-(p10) br.ret.spnt      b0                     // Exit |x| < 0.5 or |x| >= 2^63
-}
-;;
 
-// Here if 0.5 <= |x| < 2^63
+// If x<0, test if result=0
 { .mfi
-      nop.m            0
-      fms.s1           fRem = fNormX, f1, f8  // Get remainder = x - trunc(x)
-      nop.i            0
+      nop.m 999
+(p6)  fcmp.eq.unc  p10,p0 = ROUNDL_FLOAT_RINT_f8,f0
+      nop.i 999 ;;
 }
-;;
 
+// If x<0 and result=0, set result=-0
 { .mfi
-      nop.m            0
-(p8)  fcmp.le.s1       p8,p0 = fRem, fMHalf
-      nop.i            0
+      nop.m 999
+(p10) fmerge.ns  f8 = f1,f8
+      nop.i 999
 }
+	
+// If x<0, test if remainder=0.5
 { .mfi
-      nop.m            0
-(p9)  fcmp.ge.s1       p9,p0 = fRem, fHalf
-      nop.i            0
+      nop.m 999
+(p6)  fcmp.eq.unc  p6,p0 = ROUNDL_REMAINDER, ROUNDL_HALF
+      nop.i 999 ;;
 }
-;;
-
-// If x < 0 and remainder <= -0.5, then subtract 1 from result
-// If x > 0 and remainder >= +0.5, then add 1 to result
-.pred.rel "mutex",p8,p9
+	
+// If x>0, test if remainder=0.5
 { .mfi
-      nop.m            0
-(p8)  fms.s0           f8 = f8, f1, f1
-      nop.i            0
-}
-{ .mfb
-      nop.m            0
-(p9)  fma.s0           f8 = f8, f1, f1
-      br.ret.sptk      b0
+      nop.m 999
+(p7)  fcmp.eq.unc  p7,p0 = ROUNDL_REMAINDER, ROUNDL_HALF
+      nop.i 999 ;;
 }
-;;
 
-
-ROUND_UNORM:
-// Here if x unorm
+// If x<0 and remainder=0.5, result=truncated-1.0
+// If x>0 and remainder=0.5, result=truncated+1.0
+// Exit
+.pred.rel "mutex",p6,p7
+{ .mfi
+      nop.m 999
+(p6)  fsub.s0  f8 = ROUNDL_FLOAT_TRUNC_f8,f1
+      nop.i 999 
+}
+	
 { .mfb
-      getf.exp         rSignexp  = fNormX     // Get signexp, recompute if unorm
-      fcmp.eq.s0       p7,p0 = f8, f0         // Dummy op to set denormal flag
-      br.cond.sptk     ROUND_COMMON           // Return to main path
+      nop.m 999
+(p7)  fadd.s0  f8 = ROUNDL_FLOAT_TRUNC_f8,f1
+      br.ret.sptk  b0 ;;
 }
-;;
 
-GLOBAL_LIBM_END(roundl)
+.endp roundl
+ASM_SIZE_DIRECTIVE(roundl)
diff --git a/sysdeps/ia64/fpu/s_scalbn.S b/sysdeps/ia64/fpu/s_scalbn.S
new file mode 100644
index 0000000000..50d14b4e30
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_scalbn.S
@@ -0,0 +1,379 @@
+.file "scalbn.s"
+
+// Copyright (C) 2000, 2001, Intel Corporation
+// All rights reserved.
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00  Initial version
+// 1/26/01  Scalbn completely reworked and now standalone version 
+//
+// API
+//==============================================================
+// double = scalbn  (double x, int n) 
+// input  floating point f8 and int n (r33) 
+// output floating point f8
+//
+// Returns x* 2**n using an fma and detects overflow
+// and underflow.   
+//
+//
+
+#include "libm_support.h"
+
+FR_Big         = f6
+FR_NBig        = f7
+FR_Floating_X  = f8
+FR_Result      = f8
+FR_Result2     = f9
+FR_Result3     = f11
+FR_Norm_X      = f12
+FR_Two_N       = f14
+FR_Two_to_Big  = f15
+
+GR_N_Biased    = r15
+GR_Big         = r16
+GR_NBig        = r17
+GR_Scratch     = r18
+GR_Scratch1    = r19
+GR_Bias        = r20
+GR_N_as_int    = r21
+
+GR_SAVE_B0          = r32
+GR_SAVE_GP          = r33
+GR_SAVE_PFS         = r34
+GR_Parameter_X      = r35
+GR_Parameter_Y      = r36
+GR_Parameter_RESULT = r37
+GR_Tag              = r38
+
+.align 32
+.global scalbn
+
+.section .text
+.proc  scalbn
+.align 32
+
+scalbn: 
+
+//
+//   Is x NAN, INF, ZERO, +-?
+//   Build the exponent Bias
+//
+{    .mfi
+     alloc         r32=ar.pfs,1,2,4,0
+     fclass.m.unc  p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
+     addl          GR_Bias = 0x0FFFF,r0
+}
+
+//
+//   Sign extend input
+//   Is N zero?
+//   Normalize x
+//
+{    .mfi
+     cmp.eq.unc    p6,p0 = r33,r0  
+     fnorm.s1      FR_Norm_X  =   FR_Floating_X 
+     sxt4          GR_N_as_int = r33
+}
+;;
+
+//
+//   Normalize x
+//   Branch and return special values.
+//   Create -35000
+//   Create 35000
+//
+{    .mfi
+     addl          GR_Big = 35000,r0
+     nop.f         0
+     add           GR_N_Biased = GR_Bias,GR_N_as_int
+}
+{    .mfb
+     addl          GR_NBig = -35000,r0
+(p7) fma.d.s0      FR_Result = FR_Floating_X,f1, f0 
+(p7) br.ret.spnt   b0  
+};;
+
+//
+//   Build the exponent Bias
+//   Return x when N = 0
+//
+{    .mfi
+     setf.exp      FR_Two_N = GR_N_Biased                   
+     nop.f         0
+     addl          GR_Scratch1  = 0x063BF,r0 
+}
+{    .mfb
+     addl          GR_Scratch  = 0x019C3F,r0 
+(p6) fma.d.s0      FR_Result = FR_Floating_X,f1, f0 
+(p6) br.ret.spnt   b0  
+};;
+
+//
+//   Create 2*big
+//   Create 2**-big 
+//   Is N > 35000     
+//   Is N < -35000     
+//   Raise Denormal operand flag with compare
+//   Main path, create 2**N
+//
+{    .mfi
+     setf.exp      FR_NBig = GR_Scratch1                  
+     nop.f         0
+     cmp.ge.unc    p6, p0 = GR_N_as_int, GR_Big
+}
+{    .mfi
+     setf.exp      FR_Big = GR_Scratch                  
+     fcmp.ge.s0    p0,p11 = FR_Floating_X,f0
+     cmp.le.unc    p8, p0 = GR_N_as_int, GR_NBig
+};;
+
+//
+//   Adjust 2**N if N was very small or very large
+//
+{    .mfi
+     nop.m 0
+(p6) fma.s1        FR_Two_N = FR_Big,f1,f0
+     nop.i 0
+}
+{ .mlx
+     nop.m 999
+(p0) movl          GR_Scratch = 0x00000000000303FF 
+};;
+
+
+{    .mfi
+     nop.m 0
+(p8) fma.s1        FR_Two_N = FR_NBig,f1,f0
+     nop.i 0
+}
+{    .mlx
+     nop.m 999
+(p0) movl          GR_Scratch1= 0x00000000000103FF 
+};;
+
+//   Set up necessary status fields 
+//
+//   S0 user supplied status
+//   S2 user supplied status + WRE + TD  (Overflows)
+//   S3 user supplied status + FZ + TD   (Underflows)
+//
+{    .mfi
+     nop.m 999
+(p0) fsetc.s3      0x7F,0x41
+     nop.i 999
+}
+{    .mfi
+     nop.m 999
+(p0) fsetc.s2      0x7F,0x42
+     nop.i 999
+};;
+
+//
+//   Do final operation
+//
+{    .mfi
+     setf.exp      FR_NBig = GR_Scratch
+     fma.d.s0      FR_Result = FR_Two_N,FR_Norm_X,f0 
+     nop.i         999
+}
+{    .mfi
+     nop.m         999
+     fma.d.s3      FR_Result3 = FR_Two_N,FR_Norm_X,f0 
+     nop.i         999
+};;
+{    .mfi
+     setf.exp      FR_Big = GR_Scratch1
+     fma.d.s2      FR_Result2 = FR_Two_N,FR_Norm_X,f0 
+     nop.i         999
+};;
+
+//   Check for overflow or underflow.
+//   Restore s3
+//   Restore s2
+//
+{    .mfi
+     nop.m 0
+     fsetc.s3      0x7F,0x40
+     nop.i 999 
+}
+{    .mfi
+     nop.m 0
+     fsetc.s2      0x7F,0x40
+     nop.i 999
+};;
+
+//
+//   Is the result zero?
+//
+{    .mfi
+     nop.m 999
+     fclass.m.unc  p6, p0 =  FR_Result3, 0x007
+     nop.i 999 
+} 
+{    .mfi
+     addl          GR_Tag = 176, r0
+     fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big
+     nop.i 0
+};;
+
+//
+//   Detect masked underflow - Tiny + Inexact Only
+//
+{    .mfi
+     nop.m 999
+(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
+     nop.i 999 
+};; 
+
+//
+//   Is result bigger the allowed range?
+//   Branch out for underflow
+//
+{    .mfb
+(p6) addl           GR_Tag = 177, r0
+(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
+(p6) br.cond.spnt   L(SCALBN_UNDERFLOW) 
+};;
+
+//
+//   Branch out for overflow
+//
+{ .mbb
+     nop.m 0
+(p7) br.cond.spnt   L(SCALBN_OVERFLOW) 
+(p9) br.cond.spnt   L(SCALBN_OVERFLOW) 
+};;
+
+//
+//   Return from main path.
+//
+{    .mfb
+     nop.m 999
+     nop.f 0
+     br.ret.sptk     b0;;                   
+}
+
+.endp scalbn
+ASM_SIZE_DIRECTIVE(scalbn)
+.proc __libm_error_region
+__libm_error_region:
+
+L(SCALBN_OVERFLOW): 
+L(SCALBN_UNDERFLOW): 
+
+//
+// Get stack address of N
+//
+.prologue
+{ .mfi
+    add   GR_Parameter_Y=-32,sp         
+    nop.f 0
+.save   ar.pfs,GR_SAVE_PFS
+    mov  GR_SAVE_PFS=ar.pfs              
+}
+//
+// Adjust sp 
+//
+{ .mfi
+.fframe 64
+   add sp=-64,sp                         
+   nop.f 0
+   mov GR_SAVE_GP=gp       
+};;
+
+//
+//  Store N on stack in correct position 
+//  Locate the address of x on stack
+//
+{ .mmi
+   st8 [GR_Parameter_Y] =  GR_N_as_int,16       
+   add GR_Parameter_X = 16,sp          
+.save   b0, GR_SAVE_B0
+   mov GR_SAVE_B0=b0                  
+};;
+
+//
+// Store x on the stack.
+// Get address for result on stack.
+//
+.body
+{ .mib
+   stfd [GR_Parameter_X] = FR_Norm_X 
+   add   GR_Parameter_RESULT = 0,GR_Parameter_Y   
+   nop.b 0
+}
+{ .mib
+   stfd [GR_Parameter_Y] = FR_Result                 
+   add   GR_Parameter_Y = -16,GR_Parameter_Y
+   br.call.sptk b0=__libm_error_support#   
+};;
+
+//
+//  Get location of result on stack
+//
+{ .mmi
+   nop.m 0
+   nop.m 0
+   add   GR_Parameter_RESULT = 48,sp    
+};;
+
+//
+//  Get the new result 
+//
+{ .mmi
+   ldfd  FR_Result = [GR_Parameter_RESULT]      
+.restore sp
+   add   sp = 64,sp                       
+   mov   b0 = GR_SAVE_B0                  
+};;
+
+//
+//  Restore gp, ar.pfs and return
+//
+{ .mib
+   mov   gp = GR_SAVE_GP                  
+   mov   ar.pfs = GR_SAVE_PFS             
+   br.ret.sptk     b0                  
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(scalbn)
+
+.type   __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_scalbnf.S b/sysdeps/ia64/fpu/s_scalbnf.S
new file mode 100644
index 0000000000..ff7d1ca637
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_scalbnf.S
@@ -0,0 +1,379 @@
+//.file "scalbnf.s"
+
+// Copyright (C) 2000, 2001, Intel Corporation
+// All rights reserved.
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00  Initial version
+// 1/26/01  scalbnf completely reworked and now standalone version 
+//
+// API
+//==============================================================
+// float = scalbnf  (float x, int n) 
+// input  floating point f8 and int n (r33) 
+// output floating point f8
+//
+// Returns x* 2**n using an fma and detects overflow
+// and underflow.   
+//
+//
+
+#include "libm_support.h"
+
+FR_Big         = f6
+FR_NBig        = f7
+FR_Floating_X  = f8
+FR_Result      = f8
+FR_Result2     = f9
+FR_Result3     = f11
+FR_Norm_X      = f12
+FR_Two_N       = f14
+FR_Two_to_Big  = f15
+
+GR_N_Biased    = r15
+GR_Big         = r16
+GR_NBig        = r17
+GR_Scratch     = r18
+GR_Scratch1    = r19
+GR_Bias        = r20
+GR_N_as_int    = r21
+
+GR_SAVE_B0          = r32
+GR_SAVE_GP          = r33
+GR_SAVE_PFS         = r34
+GR_Parameter_X      = r35
+GR_Parameter_Y      = r36
+GR_Parameter_RESULT = r37
+GR_Tag              = r38
+
+.align 32
+.global scalbnf
+
+.section .text
+.proc  scalbnf
+.align 32
+
+scalbnf: 
+
+//
+//   Is x NAN, INF, ZERO, +-?
+//   Build the exponent Bias
+//
+{    .mfi
+     alloc         r32=ar.pfs,1,2,4,0
+     fclass.m.unc  p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
+     addl          GR_Bias = 0x0FFFF,r0
+}
+
+//
+//   Sign extend input
+//   Is N zero?
+//   Normalize x
+//
+{    .mfi
+     cmp.eq.unc    p6,p0 = r33,r0  
+     fnorm.s1      FR_Norm_X  =   FR_Floating_X 
+     sxt4          GR_N_as_int = r33
+}
+;;
+
+//
+//   Normalize x
+//   Branch and return special values.
+//   Create -35000
+//   Create 35000
+//
+{    .mfi
+     addl          GR_Big = 35000,r0
+     nop.f         0
+     add           GR_N_Biased = GR_Bias,GR_N_as_int
+}
+{    .mfb
+     addl          GR_NBig = -35000,r0
+(p7) fma.s.s0      FR_Result = FR_Floating_X,f1, f0 
+(p7) br.ret.spnt   b0  
+};;
+
+//
+//   Build the exponent Bias
+//   Return x when N = 0
+//
+{    .mfi
+     setf.exp      FR_Two_N = GR_N_Biased                   
+     nop.f         0
+     addl          GR_Scratch1  = 0x063BF,r0 
+}
+{    .mfb
+     addl          GR_Scratch  = 0x019C3F,r0 
+(p6) fma.s.s0      FR_Result = FR_Floating_X,f1, f0 
+(p6) br.ret.spnt   b0  
+};;
+
+//
+//   Create 2*big
+//   Create 2**-big 
+//   Is N > 35000     
+//   Is N < -35000     
+//   Raise Denormal operand flag with compare
+//   Main path, create 2**N
+//
+{    .mfi
+     setf.exp      FR_NBig = GR_Scratch1                  
+     nop.f         0
+     cmp.ge.unc    p6, p0 = GR_N_as_int, GR_Big
+}
+{    .mfi
+     setf.exp      FR_Big = GR_Scratch                  
+     fcmp.ge.s0    p0,p11 = FR_Floating_X,f0
+     cmp.le.unc    p8, p0 = GR_N_as_int, GR_NBig
+};;
+
+//
+//   Adjust 2**N if N was very small or very large
+//
+{    .mfi
+     nop.m 0
+(p6) fma.s1        FR_Two_N = FR_Big,f1,f0
+     nop.i 0
+}
+{ .mlx
+     nop.m 999
+(p0) movl          GR_Scratch = 0x000000000003007F 
+};;
+
+
+{    .mfi
+     nop.m 0
+(p8) fma.s1        FR_Two_N = FR_NBig,f1,f0
+     nop.i 0
+}
+{    .mlx
+     nop.m 999
+(p0) movl          GR_Scratch1= 0x000000000001007F 
+};;
+
+//   Set up necessary status fields 
+//
+//   S0 user supplied status
+//   S2 user supplied status + WRE + TD  (Overflows)
+//   S3 user supplied status + FZ + TD   (Underflows)
+//
+{    .mfi
+     nop.m 999
+(p0) fsetc.s3      0x7F,0x41
+     nop.i 999
+}
+{    .mfi
+     nop.m 999
+(p0) fsetc.s2      0x7F,0x42
+     nop.i 999
+};;
+
+//
+//   Do final operation
+//
+{    .mfi
+     setf.exp      FR_NBig = GR_Scratch
+     fma.s.s0      FR_Result = FR_Two_N,FR_Norm_X,f0 
+     nop.i         999
+}
+{    .mfi
+     nop.m         999
+     fma.s.s3      FR_Result3 = FR_Two_N,FR_Norm_X,f0 
+     nop.i         999
+};;
+{    .mfi
+     setf.exp      FR_Big = GR_Scratch1
+     fma.s.s2      FR_Result2 = FR_Two_N,FR_Norm_X,f0 
+     nop.i         999
+};;
+
+//   Check for overflow or underflow.
+//   Restore s3
+//   Restore s2
+//
+{    .mfi
+     nop.m 0
+     fsetc.s3      0x7F,0x40
+     nop.i 999 
+}
+{    .mfi
+     nop.m 0
+     fsetc.s2      0x7F,0x40
+     nop.i 999
+};;
+
+//
+//   Is the result zero?
+//
+{    .mfi
+     nop.m 999
+     fclass.m.unc  p6, p0 =  FR_Result3, 0x007
+     nop.i 999 
+} 
+{    .mfi
+     addl          GR_Tag = 178, r0
+     fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big
+     nop.i 0
+};;
+
+//
+//   Detect masked underflow - Tiny + Inexact Only
+//
+{    .mfi
+     nop.m 999
+(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
+     nop.i 999 
+};; 
+
+//
+//   Is result bigger the allowed range?
+//   Branch out for underflow
+//
+{    .mfb
+(p6) addl           GR_Tag = 179, r0
+(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
+(p6) br.cond.spnt   L(scalbnf_UNDERFLOW) 
+};;
+
+//
+//   Branch out for overflow
+//
+{ .mbb
+     nop.m 0
+(p7) br.cond.spnt   L(scalbnf_OVERFLOW) 
+(p9) br.cond.spnt   L(scalbnf_OVERFLOW) 
+};;
+
+//
+//   Return from main path.
+//
+{    .mfb
+     nop.m 999
+     nop.f 0
+     br.ret.sptk     b0;;                   
+}
+
+.endp scalbnf
+ASM_SIZE_DIRECTIVE(scalbnf)
+.proc __libm_error_region
+__libm_error_region:
+
+L(scalbnf_OVERFLOW): 
+L(scalbnf_UNDERFLOW): 
+
+//
+// Get stack address of N
+//
+.prologue
+{ .mfi
+    add   GR_Parameter_Y=-32,sp         
+    nop.f 0
+.save   ar.pfs,GR_SAVE_PFS
+    mov  GR_SAVE_PFS=ar.pfs              
+}
+//
+// Adjust sp 
+//
+{ .mfi
+.fframe 64
+   add sp=-64,sp                         
+   nop.f 0
+   mov GR_SAVE_GP=gp       
+};;
+
+//
+//  Store N on stack in correct position 
+//  Locate the address of x on stack
+//
+{ .mmi
+   st8 [GR_Parameter_Y] =  GR_N_as_int,16       
+   add GR_Parameter_X = 16,sp          
+.save   b0, GR_SAVE_B0
+   mov GR_SAVE_B0=b0                  
+};;
+
+//
+// Store x on the stack.
+// Get address for result on stack.
+//
+.body
+{ .mib
+   stfs [GR_Parameter_X] = FR_Norm_X 
+   add   GR_Parameter_RESULT = 0,GR_Parameter_Y   
+   nop.b 0
+}
+{ .mib
+   stfs [GR_Parameter_Y] = FR_Result                 
+   add   GR_Parameter_Y = -16,GR_Parameter_Y
+   br.call.sptk b0=__libm_error_support#   
+};;
+
+//
+//  Get location of result on stack
+//
+{ .mmi
+   nop.m 0
+   nop.m 0
+   add   GR_Parameter_RESULT = 48,sp    
+};;
+
+//
+//  Get the new result 
+//
+{ .mmi
+   ldfs  FR_Result = [GR_Parameter_RESULT]      
+.restore sp
+   add   sp = 64,sp                       
+   mov   b0 = GR_SAVE_B0                  
+};;
+
+//
+//  Restore gp, ar.pfs and return
+//
+{ .mib
+   mov   gp = GR_SAVE_GP                  
+   mov   ar.pfs = GR_SAVE_PFS             
+   br.ret.sptk     b0                  
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+.type   __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_scalbnl.S b/sysdeps/ia64/fpu/s_scalbnl.S
new file mode 100644
index 0000000000..9e54a2ec0a
--- /dev/null
+++ b/sysdeps/ia64/fpu/s_scalbnl.S
@@ -0,0 +1,379 @@
+//.file "scalbnl.s"
+
+// Copyright (C) 2000, 2001, Intel Corporation
+// All rights reserved.
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
+//
+// History
+//==============================================================
+// 2/02/00  Initial version
+// 1/26/01  scalbnl completely reworked and now standalone version 
+//
+// API
+//==============================================================
+// double-extended = scalbnl  (double-extended x, int n) 
+// input  floating point f8 and int n (r34) 
+// output floating point f8
+//
+// Returns x* 2**n using an fma and detects overflow
+// and underflow.   
+//
+//
+
+#include "libm_support.h"
+
+FR_Big         = f6
+FR_NBig        = f7
+FR_Floating_X  = f8
+FR_Result      = f8
+FR_Result2     = f9
+FR_Result3     = f11
+FR_Norm_X      = f12
+FR_Two_N       = f14
+FR_Two_to_Big  = f15
+
+GR_N_Biased    = r15
+GR_Big         = r16
+GR_NBig        = r17
+GR_Scratch     = r18
+GR_Scratch1    = r19
+GR_Bias        = r20
+GR_N_as_int    = r21
+
+GR_SAVE_B0          = r32
+GR_SAVE_GP          = r33
+GR_SAVE_PFS         = r34
+GR_Parameter_X      = r35
+GR_Parameter_Y      = r36
+GR_Parameter_RESULT = r37
+GR_Tag              = r38
+
+.align 32
+.global scalbnl
+
+.section .text
+.proc  scalbnl
+.align 32
+
+scalbnl: 
+
+//
+//   Is x NAN, INF, ZERO, +-?
+//   Build the exponent Bias
+//
+{    .mfi
+     alloc         r32=ar.pfs,2,1,4,0
+     fclass.m.unc  p7,p0 = FR_Floating_X, 0xe7 //@snan | @qnan | @inf | @zero
+     addl          GR_Bias = 0x0FFFF,r0
+}
+
+//
+//   Sign extend input
+//   Is N zero?
+//   Normalize x
+//
+{    .mfi
+     cmp.eq.unc    p6,p0 = r34,r0  
+     fnorm.s1      FR_Norm_X  =   FR_Floating_X 
+     sxt4          GR_N_as_int = r34
+}
+;;
+
+//
+//   Normalize x
+//   Branch and return special values.
+//   Create -35000
+//   Create 35000
+//
+{    .mfi
+     addl          GR_Big = 35000,r0
+     nop.f         0
+     add           GR_N_Biased = GR_Bias,GR_N_as_int
+}
+{    .mfb
+     addl          GR_NBig = -35000,r0
+(p7) fma.s0        FR_Result = FR_Floating_X,f1, f0 
+(p7) br.ret.spnt   b0  
+};;
+
+//
+//   Build the exponent Bias
+//   Return x when N = 0
+//
+{    .mfi
+     setf.exp      FR_Two_N = GR_N_Biased                   
+     nop.f         0
+     addl          GR_Scratch1  = 0x063BF,r0 
+}
+{    .mfb
+     addl          GR_Scratch  = 0x019C3F,r0 
+(p6) fma.s0        FR_Result = FR_Floating_X,f1, f0 
+(p6) br.ret.spnt   b0  
+};;
+
+//
+//   Create 2*big
+//   Create 2**-big 
+//   Is N > 35000     
+//   Is N < -35000     
+//   Raise Denormal operand flag with compare
+//   Main path, create 2**N
+//
+{    .mfi
+     setf.exp      FR_NBig = GR_Scratch1                  
+     nop.f         0
+     cmp.ge.unc    p6, p0 = GR_N_as_int, GR_Big
+}
+{    .mfi
+     setf.exp      FR_Big = GR_Scratch                  
+     fcmp.ge.s0    p0,p11 = FR_Floating_X,f0
+     cmp.le.unc    p8, p0 = GR_N_as_int, GR_NBig
+};;
+
+//
+//   Adjust 2**N if N was very small or very large
+//
+{    .mfi
+     nop.m 0
+(p6) fma.s1        FR_Two_N = FR_Big,f1,f0
+     nop.i 0
+}
+{ .mlx
+     nop.m 999
+(p0) movl          GR_Scratch = 0x0000000000033FFF 
+};;
+
+
+{    .mfi
+     nop.m 0
+(p8) fma.s1        FR_Two_N = FR_NBig,f1,f0
+     nop.i 0
+}
+{    .mlx
+     nop.m 999
+(p0) movl          GR_Scratch1= 0x0000000000013FFF 
+};;
+
+//   Set up necessary status fields 
+//
+//   S0 user supplied status
+//   S2 user supplied status + WRE + TD  (Overflows)
+//   S3 user supplied status + FZ + TD   (Underflows)
+//
+{    .mfi
+     nop.m 999
+(p0) fsetc.s3      0x7F,0x41
+     nop.i 999
+}
+{    .mfi
+     nop.m 999
+(p0) fsetc.s2      0x7F,0x42
+     nop.i 999
+};;
+
+//
+//   Do final operation
+//
+{    .mfi
+     setf.exp      FR_NBig = GR_Scratch
+     fma.s0      FR_Result = FR_Two_N,FR_Norm_X,f0 
+     nop.i         999
+}
+{    .mfi
+     nop.m         999
+     fma.s3      FR_Result3 = FR_Two_N,FR_Norm_X,f0 
+     nop.i         999
+};;
+{    .mfi
+     setf.exp      FR_Big = GR_Scratch1
+     fma.s2      FR_Result2 = FR_Two_N,FR_Norm_X,f0 
+     nop.i         999
+};;
+
+//   Check for overflow or underflow.
+//   Restore s3
+//   Restore s2
+//
+{    .mfi
+     nop.m 0
+     fsetc.s3      0x7F,0x40
+     nop.i 999 
+}
+{    .mfi
+     nop.m 0
+     fsetc.s2      0x7F,0x40
+     nop.i 999
+};;
+
+//
+//   Is the result zero?
+//
+{    .mfi
+     nop.m 999
+     fclass.m.unc  p6, p0 =  FR_Result3, 0x007
+     nop.i 999 
+} 
+{    .mfi
+     addl          GR_Tag = 174, r0
+     fcmp.ge.unc.s1 p7, p8 = FR_Result2 , FR_Big
+     nop.i 0
+};;
+
+//
+//   Detect masked underflow - Tiny + Inexact Only
+//
+{    .mfi
+     nop.m 999
+(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
+     nop.i 999 
+};; 
+
+//
+//   Is result bigger the allowed range?
+//   Branch out for underflow
+//
+{    .mfb
+(p6) addl           GR_Tag = 175, r0
+(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
+(p6) br.cond.spnt   L(scalbnl_UNDERFLOW) 
+};;
+
+//
+//   Branch out for overflow
+//
+{ .mbb
+     nop.m 0
+(p7) br.cond.spnt   L(scalbnl_OVERFLOW) 
+(p9) br.cond.spnt   L(scalbnl_OVERFLOW) 
+};;
+
+//
+//   Return from main path.
+//
+{    .mfb
+     nop.m 999
+     nop.f 0
+     br.ret.sptk     b0;;                   
+}
+
+.endp scalbnl
+ASM_SIZE_DIRECTIVE(scalbnl)
+.proc __libm_error_region
+__libm_error_region:
+
+L(scalbnl_OVERFLOW): 
+L(scalbnl_UNDERFLOW): 
+
+//
+// Get stack address of N
+//
+.prologue
+{ .mfi
+    add   GR_Parameter_Y=-32,sp         
+    nop.f 0
+.save   ar.pfs,GR_SAVE_PFS
+    mov  GR_SAVE_PFS=ar.pfs              
+}
+//
+// Adjust sp 
+//
+{ .mfi
+.fframe 64
+   add sp=-64,sp                         
+   nop.f 0
+   mov GR_SAVE_GP=gp       
+};;
+
+//
+//  Store N on stack in correct position 
+//  Locate the address of x on stack
+//
+{ .mmi
+   st8 [GR_Parameter_Y] =  GR_N_as_int,16       
+   add GR_Parameter_X = 16,sp          
+.save   b0, GR_SAVE_B0
+   mov GR_SAVE_B0=b0                  
+};;
+
+//
+// Store x on the stack.
+// Get address for result on stack.
+//
+.body
+{ .mib
+   stfe [GR_Parameter_X] = FR_Norm_X 
+   add   GR_Parameter_RESULT = 0,GR_Parameter_Y   
+   nop.b 0
+}
+{ .mib
+   stfe [GR_Parameter_Y] = FR_Result                 
+   add   GR_Parameter_Y = -16,GR_Parameter_Y
+   br.call.sptk b0=__libm_error_support#   
+};;
+
+//
+//  Get location of result on stack
+//
+{ .mmi
+   nop.m 0
+   nop.m 0
+   add   GR_Parameter_RESULT = 48,sp    
+};;
+
+//
+//  Get the new result 
+//
+{ .mmi
+   ldfe  FR_Result = [GR_Parameter_RESULT]      
+.restore sp
+   add   sp = 64,sp                       
+   mov   b0 = GR_SAVE_B0                  
+};;
+
+//
+//  Restore gp, ar.pfs and return
+//
+{ .mib
+   mov   gp = GR_SAVE_GP                  
+   mov   ar.pfs = GR_SAVE_PFS             
+   br.ret.sptk     b0                  
+};;
+
+.endp __libm_error_region
+ASM_SIZE_DIRECTIVE(__libm_error_region)
+
+.type   __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/s_significand.S b/sysdeps/ia64/fpu/s_significand.S
index 720e043e5c..84141daf4d 100644
--- a/sysdeps/ia64/fpu/s_significand.S
+++ b/sysdeps/ia64/fpu/s_significand.S
@@ -1,10 +1,10 @@
 .file "significand.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
+//
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,15 +35,13 @@
 // 
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at 
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// http://developer.intel.com/opensource.
 //
 // History
 //==============================================================
-// 02/02/00 Initial version
-// 04/04/00 Unwind support added
-// 05/31/00 Fixed bug when x a double-extended denormal
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 2/02/00: Initial version
+// 4/04/00  Unwind support added
+// 5/31/00: Fixed bug when x a double-extended denormal
 //
 // API
 //==============================================================
@@ -58,10 +56,18 @@
 // p6, p7
 //
 // floating-point registers used:  
-// f8, f9, f10 
+// f8, f9, f10
+
+#include "libm_support.h"
+
+.align 32
+.global significand#
 
 .section .text
-GLOBAL_LIBM_ENTRY(significand)
+.proc  significand#
+.align 32
+
+significand: 
 
 // qnan snan inf norm     unorm 0 -+
 // 1    1    1   0        0     1 11
@@ -69,19 +75,19 @@ GLOBAL_LIBM_ENTRY(significand)
 // f10 gets f8(sign) with f1(exp,significand)
 { .mfi
       nop.m 999
-      fmerge.s       f10 = f8,f1               
+(p0)  fmerge.s       f10 = f8,f1               
       nop.i 999
 }
 { .mfi
       nop.m 999
-      fnorm.s0          f9  = f8                  
+(p0)  fnorm          f9  = f8                  
       nop.i 999 ;;
 }
 
 // Test for denormal input
 { .mfi
       nop.m 999
-      fclass.m.unc   p7,p0 = f8, 0x0b
+(p0)  fclass.m.unc   p7,p0 = f8, 0x0b
       nop.i 999 ;;
 }
 
@@ -91,14 +97,14 @@ GLOBAL_LIBM_ENTRY(significand)
 //               return sign(f8) exp(f8) significand(f8), normalized.
 { .mfi
       nop.m 999
-      fclass.m.unc   p0,p6 = f8, 0xe7          
+(p0)  fclass.m.unc   p0,p6 = f8, 0xe7          
       nop.i 999 ;;
 }
 
 { .mmb
       nop.m 999
       nop.m 999
-(p7)  br.cond.spnt SIGNIFICAND_DENORM ;; // Branch if x denormal
+(p7)  br.cond.spnt L(SIGNIFICAND_DENORM) ;; // Branch if x denormal
 }
 
 { .mfi
@@ -109,29 +115,29 @@ GLOBAL_LIBM_ENTRY(significand)
 
 { .mfb
       nop.m 999
-      fnorm.d.s0        f8 = f8                   
-      br.ret.sptk    b0 ;;
+(p0)  fnorm.d        f8 = f8                   
+(p0)  br.ret.sptk    b0 ;;
 }
 
-SIGNIFICAND_DENORM:
+L(SIGNIFICAND_DENORM):
 // Here if x denorm
 { .mfi
       nop.m 999
-      fmerge.se      f8 = f10,f9
+(p0)  fmerge.se      f8 = f10,f9
       nop.i 999 ;;
 }
 
 // Check if fnorm(x) still denormal, means x double-extended denormal
 { .mfi
       nop.m 999
-      fclass.m.unc   p7,p0 = f9, 0x0b
+(p0)  fclass.m.unc   p7,p0 = f9, 0x0b
       nop.i 999 ;;
 }
 
 // This will be the final result unless x double-extended denormal
 { .mfi
       nop.m 999
-      fnorm.d.s0        f8 = f8
+(p0)  fnorm.d        f8 = f8
       nop.i 999 ;;
 }
 
@@ -146,8 +152,9 @@ SIGNIFICAND_DENORM:
 // Final normalization if x double-extended denorm
 { .mfb
       nop.m 999
-(p7)  fnorm.d.s0        f8 = f8
-      br.ret.sptk    b0 ;;
+(p7)  fnorm.d        f8 = f8
+(p0)  br.ret.sptk    b0 ;;
 }
 
-GLOBAL_LIBM_END(significand)
+.endp significand
+ASM_SIZE_DIRECTIVE(significand)
diff --git a/sysdeps/ia64/fpu/s_significandf.S b/sysdeps/ia64/fpu/s_significandf.S
index 5c8299b944..d8cdc159f6 100644
--- a/sysdeps/ia64/fpu/s_significandf.S
+++ b/sysdeps/ia64/fpu/s_significandf.S
@@ -1,10 +1,10 @@
 .file "significandf.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
+//
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,15 +35,13 @@
 // 
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at 
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// http://developer.intel.com/opensource.
 //
 // History
 //==============================================================
-// 02/02/00 Initial version
-// 02/03/00 Modified to improve speed
-// 05/31/00 Fixed bug when x a double-extended denormal
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 2/02/00: Initial version
+// 2/03/00: Modified to improve speed
+// 5/31/00: Fixed bug when x a double-extended denormal
 //
 // API
 //==============================================================
@@ -57,10 +55,18 @@
 // p6, p7
 //
 // floating-point registers used:  
-// f8, f9, f10 
+// f8, f9, f10
+
+#include "libm_support.h"
+
+.align 32
+.global significandf#
 
 .section .text
-GLOBAL_LIBM_ENTRY(significandf)
+.proc  significandf#
+.align 32
+
+significandf: 
 
 // qnan snan inf norm     unorm 0 -+
 // 1    1    1   0        0     1 11
@@ -68,19 +74,19 @@ GLOBAL_LIBM_ENTRY(significandf)
 // f10 gets f8(sign) with f1(exp,significand)
 { .mfi
       nop.m 999
-      fmerge.s       f10 = f8,f1               
+(p0)  fmerge.s       f10 = f8,f1               
       nop.i 999
 }
 { .mfi
       nop.m 999
-      fnorm.s0          f9  = f8                  
+(p0)  fnorm          f9  = f8                  
       nop.i 999 ;;
 }
 
 // Test for denormal input
 { .mfi
       nop.m 999
-      fclass.m.unc   p7,p0 = f8, 0x0b
+(p0)  fclass.m.unc   p7,p0 = f8, 0x0b
       nop.i 999 ;;
 }
 
@@ -90,14 +96,14 @@ GLOBAL_LIBM_ENTRY(significandf)
 //               return sign(f8) exp(f8) significand(f8), normalized.
 { .mfi
       nop.m 999
-      fclass.m.unc   p0,p6 = f8, 0xe7          
+(p0)  fclass.m.unc   p0,p6 = f8, 0xe7          
       nop.i 999 ;;
 }
 
 { .mmb
       nop.m 999
       nop.m 999
-(p7)  br.cond.spnt SIGNIFICAND_DENORM ;; // Branch if x denormal
+(p7)  br.cond.spnt L(SIGNIFICAND_DENORM) ;; // Branch if x denormal
 }
 
 { .mfi
@@ -108,29 +114,29 @@ GLOBAL_LIBM_ENTRY(significandf)
 
 { .mfb
       nop.m 999
-      fnorm.s.s0        f8 = f8
-      br.ret.sptk    b0 ;;
+(p0)  fnorm.s        f8 = f8
+(p0)  br.ret.sptk    b0 ;;
 }
 
-SIGNIFICAND_DENORM:
+L(SIGNIFICAND_DENORM):
 // Here if x denorm
 { .mfi
       nop.m 999
-      fmerge.se      f8 = f10,f9
+(p0)  fmerge.se      f8 = f10,f9
       nop.i 999 ;;
 }
 
 // Check if fnorm(x) still denormal, means x double-extended denormal
 { .mfi
       nop.m 999
-      fclass.m.unc   p7,p0 = f9, 0x0b
+(p0)  fclass.m.unc   p7,p0 = f9, 0x0b
       nop.i 999 ;;
 }
 
 // This will be the final result unless x double-extended denormal
 { .mfi
       nop.m 999
-      fnorm.s.s0        f8 = f8
+(p0)  fnorm.s        f8 = f8
       nop.i 999 ;;
 }
 
@@ -145,8 +151,9 @@ SIGNIFICAND_DENORM:
 // Final normalization if x double-extended denorm
 { .mfb
       nop.m 999
-(p7)  fnorm.s.s0        f8 = f8
-      br.ret.sptk    b0 ;;
+(p7)  fnorm.s        f8 = f8
+(p0)  br.ret.sptk    b0 ;;
 }
 
-GLOBAL_LIBM_END(significandf)
+.endp significandf
+ASM_SIZE_DIRECTIVE(significandf)
diff --git a/sysdeps/ia64/fpu/s_significandl.S b/sysdeps/ia64/fpu/s_significandl.S
index f62df4310c..268d3567d0 100644
--- a/sysdeps/ia64/fpu/s_significandl.S
+++ b/sysdeps/ia64/fpu/s_significandl.S
@@ -1,10 +1,10 @@
 .file "significandl.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
+//
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,15 +35,13 @@
 // 
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at 
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// http://developer.intel.com/opensource.
 //
 // History
 //==============================================================
-// 02/02/00 Initial version
-// 02/03/00 Modified to improve speed
-// 05/31/00 Fixed bug when x a double-extended denormal
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 2/02/00: Initial version
+// 2/03/00: Modified to improve speed
+// 5/31/00: Fixed bug when x a double-extended denormal
 //
 // API
 //==============================================================
@@ -58,10 +56,18 @@
 // p6, p7
 //
 // floating-point registers used:  
-// f8, f9, f10 
+// f8, f9, f10
+
+#include "libm_support.h"
+
+.align 32
+.global significandl#
 
 .section .text
-GLOBAL_LIBM_ENTRY(significandl)
+.proc  significandl#
+.align 32
+
+significandl: 
 
 // qnan snan inf norm     unorm 0 -+
 // 1    1    1   0        0     1 11
@@ -69,19 +75,19 @@ GLOBAL_LIBM_ENTRY(significandl)
 // f10 gets f8(sign) with f1(exp,significand)
 { .mfi
       nop.m 999
-      fmerge.s       f10 = f8,f1               
+(p0)  fmerge.s       f10 = f8,f1               
       nop.i 999
 }
 { .mfi
       nop.m 999
-      fnorm.s0          f9  = f8                  
+(p0)  fnorm          f9  = f8                  
       nop.i 999 ;;
 }
 
 // Test for denormal input
 { .mfi
       nop.m 999
-      fclass.m.unc   p7,p0 = f8, 0x0b
+(p0)  fclass.m.unc   p7,p0 = f8, 0x0b
       nop.i 999 ;;
 }
           
@@ -91,14 +97,14 @@ GLOBAL_LIBM_ENTRY(significandl)
 //               return sign(f8) exp(f8) significand(f8), normalized.
 { .mfi
       nop.m 999
-      fclass.m.unc   p0,p6 = f8, 0xe7          
+(p0)  fclass.m.unc   p0,p6 = f8, 0xe7          
       nop.i 999 ;;
 }
 
 { .mmb
       nop.m 999
       nop.m 999
-(p7)  br.cond.spnt SIGNIFICAND_DENORM ;; // Branch if x denormal
+(p7)  br.cond.spnt L(SIGNIFICAND_DENORM) ;; // Branch if x denormal
 }
 
 { .mfi
@@ -109,29 +115,29 @@ GLOBAL_LIBM_ENTRY(significandl)
 
 { .mfb
       nop.m 999
-      fnorm.s0          f8 = f8
-      br.ret.sptk    b0 ;;
+(p0)  fnorm          f8 = f8
+(p0)  br.ret.sptk    b0 ;;
 }
 
-SIGNIFICAND_DENORM:
+L(SIGNIFICAND_DENORM):
 // Here if x denorm
 { .mfi
       nop.m 999
-      fmerge.se      f8 = f10,f9
+(p0)  fmerge.se      f8 = f10,f9
       nop.i 999 ;;
 }
 
 // Check if fnorm(x) still denormal, means x double-extended denormal
 { .mfi
       nop.m 999
-      fclass.m.unc   p7,p0 = f9, 0x0b
+(p0)  fclass.m.unc   p7,p0 = f9, 0x0b
       nop.i 999 ;;
 }
 
 // This will be the final result unless x double-extended denormal
 { .mfi
       nop.m 999
-      fnorm.s0          f8 = f8                   
+(p0)  fnorm          f8 = f8                   
       nop.i 999 ;;
 }
 
@@ -146,8 +152,9 @@ SIGNIFICAND_DENORM:
 // Final normalization if x double-extended denorm
 { .mfb
       nop.m 999
-(p7)  fnorm.s0          f8 = f8                   
-      br.ret.sptk    b0 ;;
+(p7)  fnorm          f8 = f8                   
+(p0)  br.ret.sptk    b0 ;;
 }
 
-GLOBAL_LIBM_END(significandl)
+.endp significandl
+ASM_SIZE_DIRECTIVE(significandl)
diff --git a/sysdeps/ia64/fpu/s_sincos.c b/sysdeps/ia64/fpu/s_sincos.c
index 41254ae60a..1ddbc2122a 100644
--- a/sysdeps/ia64/fpu/s_sincos.c
+++ b/sysdeps/ia64/fpu/s_sincos.c
@@ -1 +1,9 @@
-/* Not needed. */
+#include <math.h>
+
+void
+__sincos (double x, double *s, double *c)
+{
+  *s = sin (x);
+  *c = cos (x);
+}
+weak_alias (__sincos, sincos)
diff --git a/sysdeps/ia64/fpu/s_sincosf.c b/sysdeps/ia64/fpu/s_sincosf.c
index 41254ae60a..efd0fe3038 100644
--- a/sysdeps/ia64/fpu/s_sincosf.c
+++ b/sysdeps/ia64/fpu/s_sincosf.c
@@ -1 +1,9 @@
-/* Not needed. */
+#include <math.h>
+
+void
+__sincosf (float x, float *s, float *c)
+{
+  *s = sinf (x);
+  *c = cosf (x);
+}
+weak_alias (__sincosf, sincosf)
diff --git a/sysdeps/ia64/fpu/s_sincosl.c b/sysdeps/ia64/fpu/s_sincosl.c
index 41254ae60a..a835b772e2 100644
--- a/sysdeps/ia64/fpu/s_sincosl.c
+++ b/sysdeps/ia64/fpu/s_sincosl.c
@@ -1 +1,9 @@
-/* Not needed. */
+#include <math.h>
+
+void
+__sincosl (long double x, long double *s, long double *c)
+{
+  *s = sinl (x);
+  *c = cosl (x);
+}
+weak_alias (__sincosl, sincosl)
diff --git a/sysdeps/ia64/fpu/s_tan.S b/sysdeps/ia64/fpu/s_tan.S
index 3000f5ee06..3a497fcf4c 100644
--- a/sysdeps/ia64/fpu/s_tan.S
+++ b/sysdeps/ia64/fpu/s_tan.S
@@ -1,10 +1,10 @@
-.file "tancot.s"
+.file "tan.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
+//
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -32,24 +32,20 @@
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
-//
+// 
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at 
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// http://developer.intel.com/opensource.
 //
 // History
 //==============================================================
-// 02/02/00 Initial version
-// 04/04/00 Unwind support added
+// 2/02/00: Initial version
+// 4/04/00  Unwind support added
 // 12/27/00 Improved speed
-// 02/21/01 Updated to call tanl
-// 05/30/02 Added cot
-// 02/10/03 Reordered header: .section, .global, .proc, .align
 //
 // API
 //==============================================================
-// double tan(double x);
-// double cot(double x);
+// double tan( double x);
 //
 // Overview of operation
 //==============================================================
@@ -65,14 +61,11 @@
 //     Nfloat = round_int(tan_W)
 //
 //     tan_r  = x - Nfloat * (pi/2)_hi
-//  a) tan_r  = tan_r - Nfloat * (pi/2)_lo (for tan)
-//  b) tan_r  = Nfloat * (pi/2)_lo - tan_r (for cot)
+//     tan_r  = tan_r - Nfloat * (pi/2)_lo
 //
 // We have two paths: p8, when Nfloat is even and p9. when Nfloat is odd.
-//  a) for tan:  p8: tan(X) =  tan(r)
-//               p9: tan(X) = -cot(r)
-//  b) for cot:  p9: cot(X) =  cot(r)
-//               p8: cot(X) = -tan(r)
+// p8: tan(X) =  tan(r)
+// p9: tan(X) = -cot(r)
 //
 // Each is evaluated as a series. The p9 path requires 1/r.
 //
@@ -82,16 +75,19 @@
 // Registers used
 //==============================================================
 //
-// predicate registers used:
-// p6-12
+// predicate registers used:  
+// p6-10
 //
-// floating-point registers used:
-// f10-15, f32-106
+// floating-point registers used:  
+// f10-15, f32-105
 // f8, input
 //
 // general registers used
-// r14-26, r32-39
+// r14-18, r32-43
 //
+
+#include "libm_support.h"
+
 // Assembly macros
 //==============================================================
 TAN_INV_PI_BY_2_2TO64        = f10
@@ -109,28 +105,28 @@ tan_Pi_by_2_lo               = f34
 tan_P0                       = f35
 tan_P1                       = f36
 tan_P2                       = f37
-tan_P3                       = f38
-tan_P4                       = f39
-tan_P5                       = f40
+tan_P3                       = f38 
+tan_P4                       = f39 
+tan_P5                       = f40 
 tan_P6                       = f41
 tan_P7                       = f42
-tan_P8                       = f43
-tan_P9                       = f44
-tan_P10                      = f45
+tan_P8                       = f43 
+tan_P9                       = f44 
+tan_P10                      = f45 
 tan_P11                      = f46
-tan_P12                      = f47
+tan_P12                      = f47 
 tan_P13                      = f48
 tan_P14                      = f49
 tan_P15                      = f50
 
-tan_Q0                       = f51
-tan_Q1                       = f52
-tan_Q2                       = f53
-tan_Q3                       = f54
-tan_Q4                       = f55
-tan_Q5                       = f56
-tan_Q6                       = f57
-tan_Q7                       = f58
+tan_Q0                       = f51 
+tan_Q1                       = f52 
+tan_Q2                       = f53 
+tan_Q3                       = f54 
+tan_Q4                       = f55 
+tan_Q5                       = f56 
+tan_Q6                       = f57 
+tan_Q7                       = f58 
 tan_Q8                       = f59
 tan_Q9                       = f60
 tan_Q10                      = f61
@@ -157,19 +153,19 @@ tan_v10                      = f79
 tan_v2                       = f80
 tan_v9                       = f81
 tan_v1                       = f82
-tan_int_Nfloat               = f83
-tan_Nfloat                   = f84
+tan_int_Nfloat               = f83 
+tan_Nfloat                   = f84 
 
-tan_NORM_f8                  = f85
+tan_NORM_f8                  = f85 
 tan_W                        = f86
 
 tan_y0                       = f87
-tan_d                        = f88
-tan_y1                       = f89
-tan_dsq                      = f90
-tan_y2                       = f91
-tan_d4                       = f92
-tan_inv_r                    = f93
+tan_d                        = f88 
+tan_y1                       = f89 
+tan_dsq                      = f90 
+tan_y2                       = f91 
+tan_d4                       = f92 
+tan_inv_r                    = f93 
 
 tan_z1                       = f94
 tan_z2                       = f95
@@ -184,7 +180,6 @@ tan_z10                      = f103
 tan_z11                      = f104
 tan_z12                      = f105
 
-arg_copy                     = f106
 
 /////////////////////////////////////////////////////////////
 
@@ -193,33 +188,37 @@ tan_GR_rshf_2to64            = r15
 tan_GR_exp_2tom64            = r16
 tan_GR_n                     = r17
 tan_GR_rshf                  = r18
-tan_AD                       = r19
-tan_GR_10009                 = r20
-tan_GR_17_ones               = r21
-tan_GR_N_odd_even            = r22
-tan_GR_N                     = r23
-tan_signexp                  = r24
-tan_exp                      = r25
-tan_ADQ                      = r26
-
-GR_SAVE_B0                   = r33
-GR_SAVE_PFS                  = r34
-GR_SAVE_GP                   = r35
-GR_Parameter_X               = r36
-GR_Parameter_Y               = r37
-GR_Parameter_RESULT          = r38
-GR_Parameter_Tag             = r39
-
-
-RODATA
+
+tan_AD                       = r33
+tan_GR_10009                 = r34 
+tan_GR_17_ones               = r35 
+tan_GR_N_odd_even            = r36 
+tan_GR_N                     = r37 
+tan_signexp                  = r38
+tan_exp                      = r39
+tan_ADQ                      = r40
+
+GR_SAVE_PFS                  = r41 
+GR_SAVE_B0                   = r42       
+GR_SAVE_GP                   = r43      
+
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
 
 .align 16
 
-LOCAL_OBJECT_START(double_tan_constants)
+double_tan_constants:
+ASM_TYPE_DIRECTIVE(double_tan_constants,@object)
+//   data8 0xA2F9836E4E44152A, 0x00003FFE // 2/pi
    data8 0xC90FDAA22168C234, 0x00003FFF // pi/2 hi
-   data8 0xBEEA54580DDEA0E1 // P14
+
+   data8 0xBEEA54580DDEA0E1 // P14 
    data8 0x3ED3021ACE749A59 // P15
-   data8 0xBEF312BD91DC8DA1 // P12
+   data8 0xBEF312BD91DC8DA1 // P12 
    data8 0x3EFAE9AFC14C5119 // P13
    data8 0x3F2F342BF411E769 // P8
    data8 0x3F1A60FC9F3B0227 // P9
@@ -233,9 +232,10 @@ LOCAL_OBJECT_START(double_tan_constants)
    data8 0x3FC11111111111C2 // P1
    data8 0x3FABA1BA1BA0E850 // P2
    data8 0x3F9664F4886725A7 // P3
-LOCAL_OBJECT_END(double_tan_constants)
+ASM_SIZE_DIRECTIVE(double_tan_constants)
 
-LOCAL_OBJECT_START(double_Q_tan_constants)
+double_Q_tan_constants:
+ASM_TYPE_DIRECTIVE(double_Q_tan_constants,@object)
    data8 0xC4C6628B80DC1CD1, 0x00003FBF // pi/2 lo
    data8 0x3E223A73BA576E48 // Q8
    data8 0x3DF54AD8D1F2CA43 // Q9
@@ -248,46 +248,35 @@ LOCAL_OBJECT_START(double_Q_tan_constants)
    data8 0x3F61566ABBFFB489 // Q2
    data8 0x3F2BBD77945C1733 // Q3
    data8 0x3D927FB33E2B0E04 // Q10
-LOCAL_OBJECT_END(double_Q_tan_constants)
+ASM_SIZE_DIRECTIVE(double_Q_tan_constants)
 
 
-.section .text
+   
+.align 32
+.global tan#
+#ifdef _LIBC
+.global __tan#
+#endif
 
 ////////////////////////////////////////////////////////
 
-LOCAL_LIBM_ENTRY(cot)
-// The initial fnorm will take any unmasked faults and
-// normalize any single/double unorms
-
-{ .mlx
-      cmp.eq    p12, p11 = r0, r0 // set p12=1, p11=0 for cot
-      movl tan_GR_sig_inv_pi_by_2 = 0xA2F9836E4E44152A // significand of 2/pi
-}
-{ .mlx
-      addl           tan_AD   = @ltoff(double_tan_constants), gp
-      movl tan_GR_rshf_2to64 = 0x47e8000000000000 // 1.1000 2^(63+63+1)
-}
-;;
-
-{ .mlx
-      mov tan_GR_exp_2tom64 = 0xffff-64 // exponent of scaling factor 2^-64
-      movl tan_GR_rshf = 0x43e8000000000000 // 1.1000 2^63 for right shift
-}
-{ .mfb
-      ld8 tan_AD = [tan_AD]
-      fnorm.s0  tan_NORM_f8  = f8
-      br.cond.sptk COMMON_PATH
-}
-;;
 
-LOCAL_LIBM_END(cot)
 
-GLOBAL_IEEE754_ENTRY(tan)
+.section .text
+.proc  tan#
+#ifdef _LIBC
+.proc  __tan#
+#endif
+.align 32
+tan: 
+#ifdef _LIBC
+__tan: 
+#endif
 // The initial fnorm will take any unmasked faults and
 // normalize any single/double unorms
 
 { .mlx
-      cmp.eq    p11, p12 = r0, r0 // set p11=1, p12=0 for tan
+      alloc          r32=ar.pfs,1,11,0,0               
       movl tan_GR_sig_inv_pi_by_2 = 0xA2F9836E4E44152A // significand of 2/pi
 }
 { .mlx
@@ -296,20 +285,18 @@ GLOBAL_IEEE754_ENTRY(tan)
 }
 ;;
 
-{ .mlx
-      mov tan_GR_exp_2tom64 = 0xffff-64 // exponent of scaling factor 2^-64
-      movl tan_GR_rshf = 0x43e8000000000000 // 1.1000 2^63 for right shift
-}
 { .mfi
       ld8 tan_AD = [tan_AD]
-      fnorm.s0  tan_NORM_f8  = f8
-      nop.i     0
+      fnorm     tan_NORM_f8  = f8                      
+      mov tan_GR_exp_2tom64 = 0xffff-64 // exponent of scaling factor 2^-64
+}
+{ .mlx
+      nop.m 999
+      movl tan_GR_rshf = 0x43e8000000000000 // 1.1000 2^63 for right shift
 }
 ;;
 
 
-// Common path for both tan and cot
-COMMON_PATH:
 // Form two constants we need
 //   2/pi * 2^1 * 2^63, scaled by 2^64 since we just loaded the significand
 //   1.1000...000 * 2^(63+63+1) to right shift int(W) into the significand
@@ -326,7 +313,7 @@ COMMON_PATH:
 { .mmf
       setf.exp TAN_2TOM64 = tan_GR_exp_2tom64
       adds tan_ADQ = double_Q_tan_constants - double_tan_constants, tan_AD
-(p11) fclass.m.unc  p6,p0 = f8, 0x07  // Test for x=0 (tan)
+      fclass.m.unc  p6,p0 = f8, 0x07  // Test for x=0
 }
 ;;
 
@@ -336,79 +323,79 @@ COMMON_PATH:
 //   1.1000...000 * 2^63, the right shift constant
 { .mmf
       setf.d TAN_RSHF = tan_GR_rshf
-      ldfe      tan_Pi_by_2_hi = [tan_AD],16
+      ldfe      tan_Pi_by_2_hi = [tan_AD],16 
       fclass.m.unc  p7,p0 = f8, 0x23  // Test for x=inf
 }
 ;;
 
 { .mfb
-      ldfe      tan_Pi_by_2_lo = [tan_ADQ],16
+      ldfe      tan_Pi_by_2_lo = [tan_ADQ],16           
       fclass.m.unc  p8,p0 = f8, 0xc3  // Test for x=nan
-(p6)  br.ret.spnt    b0    ;;         // Exit for x=0 (tan only)
+(p6)  br.ret.spnt    b0    ;;         // Exit for x=0
 }
 
 { .mfi
-      ldfpd     tan_P14,tan_P15 = [tan_AD],16
+      ldfpd     tan_P14,tan_P15 = [tan_AD],16                         
 (p7)  frcpa.s0  f8,p9=f0,f0           // Set qnan indef if x=inf
       mov       tan_GR_10009 = 0x10009
 }
 { .mib
-      ldfpd      tan_Q8,tan_Q9  = [tan_ADQ],16
+      ldfpd      tan_Q8,tan_Q9  = [tan_ADQ],16                        
       nop.i 999
 (p7)  br.ret.spnt    b0    ;;         // Exit for x=inf
 }
 
 { .mfi
-      ldfpd      tan_P12,tan_P13 = [tan_AD],16
-(p12) fclass.m.unc  p6,p0 = f8, 0x07  // Test for x=0 (cot)
+      ldfpd      tan_P12,tan_P13 = [tan_AD],16                         
+(p8)  fma.d f8=f8,f1,f8               // Set qnan if x=nan
       nop.i 999
 }
-{ .mfb
-      ldfpd      tan_Q4,tan_Q5  = [tan_ADQ],16
-(p8)  fma.d.s0   f8=f8,f1,f8          // Set qnan if x=nan
+{ .mib
+      ldfpd      tan_Q4,tan_Q5  = [tan_ADQ],16                        
+      nop.i 999
 (p8)  br.ret.spnt    b0    ;;         // Exit for x=nan
 }
 
-{ .mmf
-      getf.exp  tan_signexp    = tan_NORM_f8
-      ldfpd     tan_P8,tan_P9  = [tan_AD],16
-      fmerge.s  arg_copy       = f8, f8 ;; // Save input for error call
+{ .mmi
+      getf.exp  tan_signexp    = tan_NORM_f8                 
+      ldfpd      tan_P8,tan_P9  = [tan_AD],16                         
+      nop.i 999 ;;
 }
 
-// Multiply x by scaled 2/pi and add large const to shift integer part of W to
+// Multiply x by scaled 2/pi and add large const to shift integer part of W to 
 //   rightmost bits of significand
-{ .mmf
-      alloc      r32=ar.pfs,0,4,4,0
+{ .mfi
       ldfpd      tan_Q6,tan_Q7  = [tan_ADQ],16
       fma.s1 TAN_W_2TO64_RSH = tan_NORM_f8,TAN_INV_PI_BY_2_2TO64,TAN_RSHF_2TO64
-};;
+      nop.i 999 ;;
+}
 
-{ .mmf
-      ldfpd      tan_P10,tan_P11 = [tan_AD],16
-      and       tan_exp = tan_GR_17_ones, tan_signexp
-(p6)  frcpa.s0  f8, p0 = f1, f8 ;;        // cot(+-0) = +-Inf
+{ .mmi
+      ldfpd      tan_P10,tan_P11 = [tan_AD],16                         
+      nop.m 999
+      and       tan_exp = tan_GR_17_ones, tan_signexp         ;;
 }
 
 
 // p7 is true if we must call DBX TAN
 // p7 is true if f8 exp is > 0x10009 (which includes all ones
 //    NAN or inf)
-{ .mmb
-      ldfpd      tan_Q0,tan_Q1  = [tan_ADQ],16
-      cmp.ge.unc  p7,p0 = tan_exp,tan_GR_10009
-(p7)  br.cond.spnt   TAN_DBX ;;
+{ .mmi
+      ldfpd      tan_Q0,tan_Q1  = [tan_ADQ],16                         
+      cmp.ge.unc  p7,p0 = tan_exp,tan_GR_10009               
+      nop.i 999 ;;
 }
 
 
 { .mmb
-      ldfpd      tan_P4,tan_P5  = [tan_AD],16
-(p6)  mov GR_Parameter_Tag = 226          // (cot)
-(p6)  br.cond.spnt __libm_error_region ;; // call error support if cot(+-0)
+      ldfpd      tan_P4,tan_P5  = [tan_AD],16                         
+      nop.m 999
+(p7)  br.cond.spnt   L(TAN_DBX) ;;                                  
 }
 
 
 { .mmi
-      ldfpd      tan_Q2,tan_Q3  = [tan_ADQ],16
+      ldfpd      tan_Q2,tan_Q3  = [tan_ADQ],16                         
       nop.m 999
       nop.i 999 ;;
 }
@@ -417,8 +404,8 @@ COMMON_PATH:
 
 // TAN_NFLOAT = Round_Int_Nearest(tan_W)
 { .mfi
-      ldfpd      tan_P6,tan_P7  = [tan_AD],16
-      fms.s1 TAN_NFLOAT = TAN_W_2TO64_RSH,TAN_2TOM64,TAN_RSHF
+      ldfpd      tan_P6,tan_P7  = [tan_AD],16                         
+      fms.s1 TAN_NFLOAT = TAN_W_2TO64_RSH,TAN_2TOM64,TAN_RSHF      
       nop.i 999 ;;
 }
 
@@ -431,22 +418,22 @@ COMMON_PATH:
 
 
 { .mfi
-      ldfpd      tan_P0,tan_P1  = [tan_AD],16
+      ldfpd      tan_P0,tan_P1  = [tan_AD],16                         
       nop.f 999
       nop.i 999 ;;
 }
 
 
-{ .mmi
+{ .mfi
       getf.sig    tan_GR_n = TAN_W_2TO64_RSH
-      ldfpd      tan_P2,tan_P3  = [tan_AD]
+      nop.f 999
       nop.i 999 ;;
 }
 
 // tan_r          = -tan_Nfloat * tan_Pi_by_2_hi + x
 { .mfi
-(p12) add        tan_GR_n = 0x1, tan_GR_n // N = N + 1 (for cot)
-      fnma.s1  tan_r      = TAN_NFLOAT, tan_Pi_by_2_hi,  tan_NORM_f8
+      ldfpd      tan_P2,tan_P3  = [tan_AD]
+      fnma.s1  tan_r      = TAN_NFLOAT, tan_Pi_by_2_hi,  tan_NORM_f8         
       nop.i 999 ;;
 }
 
@@ -454,49 +441,42 @@ COMMON_PATH:
 // p8 ==> even
 // p9 ==> odd
 { .mmi
-      and         tan_GR_N_odd_even = 0x1, tan_GR_n ;;
+      and         tan_GR_N_odd_even = 0x1, tan_GR_n ;;          
       nop.m 999
       cmp.eq.unc  p8,p9          = tan_GR_N_odd_even, r0      ;;
 }
 
 
-.pred.rel "mutex", p11, p12
-// tan_r          = tan_r -tan_Nfloat * tan_Pi_by_2_lo (tan)
+// tan_r          = tan_r -tan_Nfloat * tan_Pi_by_2_lo 
 { .mfi
       nop.m 999
-(p11) fnma.s1  tan_r      = TAN_NFLOAT, tan_Pi_by_2_lo,  tan_r
-      nop.i 999
-}
-// tan_r          = -(tan_r -tan_Nfloat * tan_Pi_by_2_lo) (cot)
-{ .mfi
-      nop.m 999
-(p12) fms.s1   tan_r      = TAN_NFLOAT, tan_Pi_by_2_lo,  tan_r
+      fnma.s1  tan_r      = TAN_NFLOAT, tan_Pi_by_2_lo,  tan_r      
       nop.i 999 ;;
 }
 
 
 { .mfi
       nop.m 999
-      fma.s1   tan_rsq    = tan_r, tan_r,   f0
+      fma.s1   tan_rsq    = tan_r, tan_r,   f0                      
       nop.i 999 ;;
 }
 
 
 { .mfi
       nop.m 999
-(p9)  frcpa.s1   tan_y0, p0 = f1,tan_r
+(p9)  frcpa.s1   tan_y0, p10 = f1,tan_r                  
       nop.i 999  ;;
 }
 
 
 { .mfi
       nop.m 999
-(p8)  fma.s1  tan_v18 = tan_rsq, tan_P15, tan_P14
+(p8)  fma.s1  tan_v18 = tan_rsq, tan_P15, tan_P14        
       nop.i 999
 }
 { .mfi
       nop.m 999
-(p8)  fma.s1  tan_v4  = tan_rsq, tan_P1, tan_P0
+(p8)  fma.s1  tan_v4  = tan_rsq, tan_P1, tan_P0          
       nop.i 999  ;;
 }
 
@@ -504,12 +484,12 @@ COMMON_PATH:
 
 { .mfi
       nop.m 999
-(p8)  fma.s1  tan_v16 = tan_rsq, tan_P13, tan_P12
-      nop.i 999
+(p8)  fma.s1  tan_v16 = tan_rsq, tan_P13, tan_P12        
+      nop.i 999 
 }
 { .mfi
       nop.m 999
-(p8)  fma.s1  tan_v17 = tan_rsq, tan_rsq, f0
+(p8)  fma.s1  tan_v17 = tan_rsq, tan_rsq, f0             
       nop.i 999 ;;
 }
 
@@ -517,12 +497,12 @@ COMMON_PATH:
 
 { .mfi
       nop.m 999
-(p8)  fma.s1  tan_v12 = tan_rsq, tan_P9, tan_P8
-      nop.i 999
+(p8)  fma.s1  tan_v12 = tan_rsq, tan_P9, tan_P8          
+      nop.i 999 
 }
 { .mfi
       nop.m 999
-(p8)  fma.s1  tan_v13 = tan_rsq, tan_P11, tan_P10
+(p8)  fma.s1  tan_v13 = tan_rsq, tan_P11, tan_P10        
       nop.i 999 ;;
 }
 
@@ -530,12 +510,12 @@ COMMON_PATH:
 
 { .mfi
       nop.m 999
-(p8)  fma.s1  tan_v7  = tan_rsq, tan_P5, tan_P4
-      nop.i 999
+(p8)  fma.s1  tan_v7  = tan_rsq, tan_P5, tan_P4          
+      nop.i 999 
 }
 { .mfi
       nop.m 999
-(p8)  fma.s1  tan_v8  = tan_rsq, tan_P7, tan_P6
+(p8)  fma.s1  tan_v8  = tan_rsq, tan_P7, tan_P6          
       nop.i 999 ;;
 }
 
@@ -543,12 +523,12 @@ COMMON_PATH:
 
 { .mfi
       nop.m 999
-(p9)  fnma.s1    tan_d   = tan_r, tan_y0, f1
-      nop.i 999
+(p9)  fnma.s1    tan_d   = tan_r, tan_y0, f1   
+      nop.i 999 
 }
 { .mfi
       nop.m 999
-(p8)  fma.s1  tan_v5  = tan_rsq, tan_P3, tan_P2
+(p8)  fma.s1  tan_v5  = tan_rsq, tan_P3, tan_P2          
       nop.i 999 ;;
 }
 
@@ -556,36 +536,36 @@ COMMON_PATH:
 
 { .mfi
       nop.m 999
-(p9)  fma.s1  tan_z11 = tan_rsq, tan_Q9, tan_Q8
+(p9)  fma.s1  tan_z11 = tan_rsq, tan_Q9, tan_Q8         
       nop.i 999
 }
 { .mfi
       nop.m 999
-(p9)  fma.s1  tan_z12 = tan_rsq, tan_rsq, f0
+(p9)  fma.s1  tan_z12 = tan_rsq, tan_rsq, f0            
       nop.i 999 ;;
 }
 
 
 { .mfi
       nop.m 999
-(p8)  fma.s1  tan_v15 = tan_v17, tan_v18, tan_v16
-      nop.i 999
+(p8)  fma.s1  tan_v15 = tan_v17, tan_v18, tan_v16        
+      nop.i 999 
 }
 { .mfi
       nop.m 999
-(p9)  fma.s1  tan_z7 = tan_rsq, tan_Q5, tan_Q4
+(p9)  fma.s1  tan_z7 = tan_rsq, tan_Q5, tan_Q4          
       nop.i 999 ;;
 }
 
 
 { .mfi
       nop.m 999
-(p8)  fma.s1  tan_v11 = tan_v17, tan_v13, tan_v12
+(p8)  fma.s1  tan_v11 = tan_v17, tan_v13, tan_v12        
       nop.i 999
 }
 { .mfi
       nop.m 999
-(p9)  fma.s1  tan_z8 = tan_rsq, tan_Q7, tan_Q6
+(p9)  fma.s1  tan_z8 = tan_rsq, tan_Q7, tan_Q6          
       nop.i 999 ;;
 }
 
@@ -593,13 +573,13 @@ COMMON_PATH:
 
 { .mfi
       nop.m 999
-(p8)  fma.s1  tan_v14 = tan_v17, tan_v17, f0
-      nop.i 999
+(p8)  fma.s1  tan_v14 = tan_v17, tan_v17, f0             
+      nop.i 999 
 }
 { .mfi
       nop.m 999
-(p9)  fma.s1  tan_z3 = tan_rsq, tan_Q1, tan_Q0
-      nop.i 999 ;;
+(p9)  fma.s1  tan_z3 = tan_rsq, tan_Q1, tan_Q0          
+      nop.i 999 ;; 
 }
 
 
@@ -607,12 +587,12 @@ COMMON_PATH:
 
 { .mfi
       nop.m 999
-(p8)  fma.s1  tan_v3 = tan_v17, tan_v5, tan_v4
+(p8)  fma.s1  tan_v3 = tan_v17, tan_v5, tan_v4           
       nop.i 999
 }
 { .mfi
       nop.m 999
-(p8)  fma.s1  tan_v6 = tan_v17, tan_v8, tan_v7
+(p8)  fma.s1  tan_v6 = tan_v17, tan_v8, tan_v7           
       nop.i 999 ;;
 }
 
@@ -620,89 +600,89 @@ COMMON_PATH:
 
 { .mfi
       nop.m 999
-(p9)  fma.s1     tan_y1  = tan_y0, tan_d, tan_y0
-      nop.i 999
+(p9)  fma.s1     tan_y1  = tan_y0, tan_d, tan_y0    
+      nop.i 999 
 }
 { .mfi
       nop.m 999
-(p9)  fma.s1     tan_dsq = tan_d, tan_d, f0
-      nop.i 999 ;;
+(p9)  fma.s1     tan_dsq = tan_d, tan_d, f0        
+      nop.i 999 ;; 
 }
 
 
 { .mfi
       nop.m 999
-(p9)  fma.s1  tan_z10 = tan_z12, tan_Q10, tan_z11
-      nop.i 999
+(p9)  fma.s1  tan_z10 = tan_z12, tan_Q10, tan_z11       
+      nop.i 999 
 }
 { .mfi
       nop.m 999
-(p9)  fma.s1  tan_z9  = tan_z12, tan_z12,f0
+(p9)  fma.s1  tan_z9  = tan_z12, tan_z12,f0             
       nop.i 999 ;;
 }
 
 
 { .mfi
       nop.m 999
-(p9)  fma.s1  tan_z4 = tan_rsq, tan_Q3, tan_Q2
-      nop.i 999
+(p9)  fma.s1  tan_z4 = tan_rsq, tan_Q3, tan_Q2          
+      nop.i 999 
 }
 { .mfi
       nop.m 999
-(p9)  fma.s1  tan_z6  = tan_z12, tan_z8, tan_z7
-      nop.i 999 ;;
+(p9)  fma.s1  tan_z6  = tan_z12, tan_z8, tan_z7         
+      nop.i 999 ;; 
 }
 
 
 
 { .mfi
       nop.m 999
-(p8)  fma.s1  tan_v10 = tan_v14, tan_v15, tan_v11
-      nop.i 999 ;;
+(p8)  fma.s1  tan_v10 = tan_v14, tan_v15, tan_v11        
+      nop.i 999 ;; 
 }
 
 
 
 { .mfi
       nop.m 999
-(p9)  fma.s1     tan_y2  = tan_y1, tan_d, tan_y0
-      nop.i 999
+(p9)  fma.s1     tan_y2  = tan_y1, tan_d, tan_y0         
+      nop.i 999 
 }
 { .mfi
       nop.m 999
-(p9)  fma.s1     tan_d4  = tan_dsq, tan_dsq, tan_d
+(p9)  fma.s1     tan_d4  = tan_dsq, tan_dsq, tan_d       
       nop.i 999  ;;
 }
 
 
 { .mfi
       nop.m 999
-(p8)  fma.s1  tan_v2 = tan_v14, tan_v6, tan_v3
+(p8)  fma.s1  tan_v2 = tan_v14, tan_v6, tan_v3           
       nop.i 999
 }
 { .mfi
       nop.m 999
-(p8)  fma.s1  tan_v9 = tan_v14, tan_v14, f0
+(p8)  fma.s1  tan_v9 = tan_v14, tan_v14, f0              
       nop.i 999 ;;
 }
 
 
 { .mfi
       nop.m 999
-(p9)  fma.s1  tan_z2  = tan_z12, tan_z4, tan_z3
-      nop.i 999
+(p9)  fma.s1  tan_z2  = tan_z12, tan_z4, tan_z3         
+      nop.i 999 
 }
 { .mfi
       nop.m 999
-(p9)  fma.s1  tan_z5  = tan_z9, tan_z10, tan_z6
+(p9)  fma.s1  tan_z5  = tan_z9, tan_z10, tan_z6         
       nop.i 999  ;;
 }
 
 
 { .mfi
       nop.m 999
-(p9)  fma.s1     tan_inv_r = tan_d4, tan_y2, tan_y0
-      nop.i 999
+(p9)  fma.s1     tan_inv_r = tan_d4, tan_y2, tan_y0      
+      nop.i 999 
 }
 { .mfi
       nop.m 999
@@ -714,12 +694,12 @@ COMMON_PATH:
 
 { .mfi
       nop.m 999
-(p8)  fma.s1  tan_v1 = tan_v9, tan_v10, tan_v2
-      nop.i 999
+(p8)  fma.s1  tan_v1 = tan_v9, tan_v10, tan_v2           
+      nop.i 999 
 }
 { .mfi
       nop.m 999
-(p9)  fma.s1  tan_z1  = tan_z9, tan_z5, tan_z2
+(p9)  fma.s1  tan_z1  = tan_z9, tan_z5, tan_z2          
       nop.i 999   ;;
 }
 
@@ -727,150 +707,64 @@ COMMON_PATH:
 
 { .mfi
       nop.m 999
-(p8)  fma.d.s0  f8  = tan_v1, tan_rcube, tan_r
-      nop.i 999
+(p8)  fma.d.s0  f8  = tan_v1, tan_rcube, tan_r             
+      nop.i 999  
 }
 { .mfb
       nop.m 999
-(p9)  fms.d.s0  f8  = tan_r, tan_z1, tan_inv_r
-      br.ret.sptk    b0 ;;
+(p9)  fms.d.s0  f8  = tan_r, tan_z1, tan_inv_r        
+      br.ret.sptk    b0 ;;    
 }
-GLOBAL_IEEE754_END(tan)
+.endp tan#
+ASM_SIZE_DIRECTIVE(tan)
+
 
-LOCAL_LIBM_ENTRY(__libm_callout)
-TAN_DBX:
+.proc __libm_callout
+__libm_callout:
+L(TAN_DBX): 
 .prologue
 
 { .mfi
-      nop.m 0
-      fmerge.s f9 = f0,f0
-.save ar.pfs,GR_SAVE_PFS
-      mov  GR_SAVE_PFS=ar.pfs
+        nop.m 0
+     fmerge.s f9 = f0,f0 
+.save   ar.pfs,GR_SAVE_PFS
+        mov  GR_SAVE_PFS=ar.pfs
 }
 ;;
 
 { .mfi
-      mov GR_SAVE_GP=gp
-      nop.f 0
-.save b0, GR_SAVE_B0
-      mov GR_SAVE_B0=b0
+        mov GR_SAVE_GP=gp
+        nop.f 0
+.save   b0, GR_SAVE_B0
+        mov GR_SAVE_B0=b0
 }
 
 .body
-{ .mmb
-      nop.m 999
+{ .mfb
       nop.m 999
-(p11) br.cond.sptk.many  call_tanl ;;
+      nop.f 999
+       br.call.sptk.many  b0=__libm_tan# ;;
 }
 
-// Here if we should call cotl
-{ .mmb
-      nop.m 999
-      nop.m 999
-      br.call.sptk.many  b0=__libm_cotl# ;;
-}
 
 { .mfi
-      mov gp        = GR_SAVE_GP
-      fnorm.d.s0 f8 = f8
-      mov b0        = GR_SAVE_B0
+       mov gp        = GR_SAVE_GP
+      fnorm.d     f8 = f8
+       mov b0        = GR_SAVE_B0 
 }
 ;;
 
-{ .mib
-      nop.m 999
-      mov ar.pfs    = GR_SAVE_PFS
-      br.ret.sptk     b0
-;;
-}
-
-// Here if we should call tanl
-call_tanl:
-{ .mmb
-      nop.m 999
-      nop.m 999
-      br.call.sptk.many  b0=__libm_tanl# ;;
-}
-
-{ .mfi
-      mov gp        = GR_SAVE_GP
-      fnorm.d.s0 f8 = f8
-      mov b0        = GR_SAVE_B0
-}
-;;
 
 { .mib
-      nop.m 999
+         nop.m 999
       mov ar.pfs    = GR_SAVE_PFS
       br.ret.sptk     b0
 ;;
 }
 
-LOCAL_LIBM_END(__libm_callout)
-
-.type __libm_tanl#,@function
-.global __libm_tanl#
-.type __libm_cotl#,@function
-.global __libm_cotl#
-
-LOCAL_LIBM_ENTRY(__libm_error_region)
-.prologue
-
-// (1)
-{ .mfi
-      add           GR_Parameter_Y=-32,sp        // Parameter 2 value
-      nop.f 0
-.save   ar.pfs,GR_SAVE_PFS
-      mov           GR_SAVE_PFS=ar.pfs           // Save ar.pfs
-}
-{ .mfi
-.fframe 64
-      add sp=-64,sp                              // Create new stack
-      nop.f 0
-      mov GR_SAVE_GP=gp                          // Save gp
-};;
-
-// (2)
-{ .mmi
-      stfd [GR_Parameter_Y] = f1,16              // STORE Parameter 2 on stack
-      add GR_Parameter_X = 16,sp                 // Parameter 1 address
-.save   b0, GR_SAVE_B0
-      mov GR_SAVE_B0=b0                          // Save b0
-};;
-
-.body
-// (3)
-{ .mib
-      stfd [GR_Parameter_X] = arg_copy           // STORE Parameter 1 on stack
-      add   GR_Parameter_RESULT = 0,GR_Parameter_Y  // Parameter 3 address
-      nop.b 0
-}
-{ .mib
-      stfd [GR_Parameter_Y] = f8                 // STORE Parameter 3 on stack
-      add   GR_Parameter_Y = -16,GR_Parameter_Y
-      br.call.sptk b0=__libm_error_support#      // Call error handling function
-};;
-{ .mmi
-      nop.m 0
-      nop.m 0
-      add   GR_Parameter_RESULT = 48,sp
-};;
-
-// (4)
-{ .mmi
-      ldfd  f8 = [GR_Parameter_RESULT]           // Get return result off stack
-.restore sp
-      add   sp = 64,sp                           // Restore stack pointer
-      mov   b0 = GR_SAVE_B0                      // Restore return address
-};;
-{ .mib
-      mov   gp = GR_SAVE_GP                      // Restore gp
-      mov   ar.pfs = GR_SAVE_PFS                 // Restore ar.pfs
-      br.ret.sptk     b0                         // Return
-};;
-
-LOCAL_LIBM_END(__libm_error_region)
 
-.type   __libm_error_support#,@function
-.global __libm_error_support#
+.endp  __libm_callout
+ASM_SIZE_DIRECTIVE(__libm_callout)
 
+.type __libm_tan#,@function
+.global __libm_tan#
diff --git a/sysdeps/ia64/fpu/s_tanf.S b/sysdeps/ia64/fpu/s_tanf.S
index 48f82345f9..a84009e2fe 100644
--- a/sysdeps/ia64/fpu/s_tanf.S
+++ b/sysdeps/ia64/fpu/s_tanf.S
@@ -1,10 +1,10 @@
-.file "tancotf.s"
+.file "tanf.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
+//
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -32,658 +32,739 @@
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
-//
+// 
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at 
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// http://developer.intel.com/opensource.
 //
 // History
 //==============================================================
-// 02/02/00 Initial version
-// 04/04/00 Unwind support added
+// 2/02/00: Initial version
+// 4/04/00  Unwind support added
 // 12/27/00 Improved speed
-// 02/21/01 Updated to call tanl
-// 05/30/02 Improved speed, added cotf.
-// 11/25/02 Added explicit completer on fnorm
-// 02/10/03 Reordered header: .section, .global, .proc, .align
-// 04/17/03 Eliminated redundant stop bits
 //
-// APIs
+// API
 //==============================================================
-// float tanf(float)
-// float cotf(float)
+// float tan( float x);
 //
-// Algorithm Description for tanf
+// Overview of operation
 //==============================================================
-// The tanf function computes the principle value of the tangent of x,
-// where x is radian argument.
-//
-// There are 5 paths:
-// 1. x = +/-0.0
-//    Return tanf(x) = +/-0.0
+// If the input value in radians is |x| >= 1.xxxxx 2^10 call the
+// older slower version.
 //
-// 2. x = [S,Q]NaN
-//    Return tanf(x) = QNaN
+// The new algorithm is used when |x| <= 1.xxxxx 2^9.
 //
-// 3. x = +/-Inf
-//    Return tanf(x) = QNaN
+// Represent the input X as Nfloat * pi/2 + r
+//    where r can be negative and |r| <= pi/4
 //
-// 4. x = r + (Pi/2)*N, N = RoundInt(x*(2/Pi)), N is even, |r|<Pi/4
-//    Return tanf(x) = P19(r) = A1*r + A3*r^3 + A5*r^5 + ... + A19*r^19 =
-//    = r*(A1 + A3*t + A5*t^2 + ... + A19*t^9) = r*P9(t), where t = r^2
+//     tan_W  = x * 2/pi
+//     Nfloat = round_int(tan_W)
 //
-// 5. x = r + (Pi/2)*N, N = RoundInt(x*(2/Pi)), N is odd, |r|<Pi/4
-//    Return tanf(x) = -1/r + P11(r) = -1/r + B1*r + B3*r^3 + ... + B11*r^11 =
-//    = -1/r + r*(B1 + B3*t + B5*t^2 + ... + B11*t^5) = -1/r + r*P11(t),
-//    where t = r^2
-//
-// Algorithm Description for cotf
-//==============================================================
-// The cotf function computes the principle value of the cotangent of x,
-// where x is radian argument.
+//     tan_r  = x - Nfloat * (pi/2)_hi
+//     tan_r  = tan_r - Nfloat * (pi/2)_lo
 //
-// There are 5 paths:
-// 1. x = +/-0.0
-//    Return cotf(x) = +/-Inf and error handling is called
+// We have two paths: p8, when Nfloat is even and p9. when Nfloat is odd.
+// p8: tan(X) =  tan(r)
+// p9: tan(X) = -cot(r)
 //
-// 2. x = [S,Q]NaN
-//    Return cotf(x) = QNaN
-//
-// 3. x = +/-Inf
-//    Return cotf(x) = QNaN
-//
-// 4. x = r + (Pi/2)*N, N = RoundInt(x*(2/Pi)), N is odd, |r|<Pi/4
-//    Return cotf(x) = P19(-r) = A1*(-r) + A3*(-r^3) + ... + A19*(-r^19) =
-//    = -r*(A1 + A3*t + A5*t^2 + ... + A19*t^9) = -r*P9(t), where t = r^2
-//
-// 5. x = r + (Pi/2)*N, N = RoundInt(x*(2/Pi)), N is even, |r|<Pi/4
-//    Return cotf(x) = 1/r + P11(-r) = 1/r + B1*(-r) + ... + B11*(-r^11) =
-//    = 1/r - r*(B1 + B3*t + B5*t^2 + ... + B11*t^5) = 1/r - r*P11(t),
-//    where t = r^2
-//
-//    We set p10 and clear p11 if computing tanf, vice versa for cotf.
+// Each is evaluated as a series. The p9 path requires 1/r.
 //
+// The coefficients used in the series are stored in a table as
+// are the pi constants.
 //
 // Registers used
 //==============================================================
-// Floating Point registers used:
-// f8, input
-// f32 -> f80
 //
-// General registers used:
-// r14 -> r23, r32 -> r39
+// predicate registers used:  
+// p6-10
 //
-// Predicate registers used:
-// p6 -> p13
+// floating-point registers used:  
+// f10-15, f32-105
+// f8, input
+//
+// general registers used
+// r14-18, r32-43
 //
-// Assembly macros
-//==============================================================
-// integer registers
-rExp                        = r14
-rSignMask                   = r15
-rRshf                       = r16
-rScFctrExp                  = r17
-rIntN                       = r18
-rSigRcpPiby2                = r19
-rScRshf                     = r20
-rCoeffA                     = r21
-rCoeffB                     = r22
-rExpCut                     = r23
-
-GR_SAVE_B0                  = r33
-GR_SAVE_PFS                 = r34
-GR_SAVE_GP                  = r35
-GR_Parameter_X              = r36
-GR_Parameter_Y              = r37
-GR_Parameter_RESULT         = r38
-GR_Parameter_Tag            = r39
 
-//==============================================================
-// floating point registers
-fScRcpPiby2                 = f32
-fScRshf                     = f33
-fNormArg                    = f34
-fScFctr                     = f35
-fRshf                       = f36
-fShiftedN                   = f37
-fN                          = f38
-fR                          = f39
-fA01                        = f40
-fA03                        = f41
-fA05                        = f42
-fA07                        = f43
-fA09                        = f44
-fA11                        = f45
-fA13                        = f46
-fA15                        = f47
-fA17                        = f48
-fA19                        = f49
-fB01                        = f50
-fB03                        = f51
-fB05                        = f52
-fB07                        = f53
-fB09                        = f54
-fB11                        = f55
-fA03_01                     = f56
-fA07_05                     = f57
-fA11_09                     = f58
-fA15_13                     = f59
-fA19_17                     = f60
-fA11_05                     = f61
-fA19_13                     = f62
-fA19_05                     = f63
-fRbyA03_01                  = f64
-fB03_01                     = f65
-fB07_05                     = f66
-fB11_09                     = f67
-fB11_05                     = f68
-fRbyB03_01                  = f69
-fRbyB11_01                  = f70
-fRp2                        = f71
-fRp4                        = f72
-fRp8                        = f73
-fRp5                        = f74
-fY0                         = f75
-fY1                         = f76
-fD                          = f77
-fDp2                        = f78
-fInvR                       = f79
-fPiby2                      = f80
-//==============================================================
+#include "libm_support.h"
 
+// Assembly macros
+//==============================================================
+TAN_INV_PI_BY_2_2TO64        = f10
+TAN_RSHF_2TO64               = f11
+TAN_2TOM64                   = f12
+TAN_RSHF                     = f13
+TAN_W_2TO64_RSH              = f14
+TAN_NFLOAT                   = f15
+
+tan_Inv_Pi_by_2              = f32
+tan_Pi_by_2_hi               = f33
+tan_Pi_by_2_lo               = f34
+
+
+tan_P0                       = f35
+tan_P1                       = f36
+tan_P2                       = f37
+tan_P3                       = f38 
+tan_P4                       = f39 
+tan_P5                       = f40 
+tan_P6                       = f41
+tan_P7                       = f42
+tan_P8                       = f43 
+tan_P9                       = f44 
+tan_P10                      = f45 
+tan_P11                      = f46
+tan_P12                      = f47 
+tan_P13                      = f48
+tan_P14                      = f49
+tan_P15                      = f50
+
+tan_Q0                       = f51 
+tan_Q1                       = f52 
+tan_Q2                       = f53 
+tan_Q3                       = f54 
+tan_Q4                       = f55 
+tan_Q5                       = f56 
+tan_Q6                       = f57 
+tan_Q7                       = f58 
+tan_Q8                       = f59
+tan_Q9                       = f60
+tan_Q10                      = f61
+
+tan_r                        = f62
+tan_rsq                      = f63
+tan_rcube                    = f64
+
+tan_v18                      = f65
+tan_v16                      = f66
+tan_v17                      = f67
+tan_v12                      = f68
+tan_v13                      = f69
+tan_v7                       = f70
+tan_v8                       = f71
+tan_v4                       = f72
+tan_v5                       = f73
+tan_v15                      = f74
+tan_v11                      = f75
+tan_v14                      = f76
+tan_v3                       = f77
+tan_v6                       = f78
+tan_v10                      = f79
+tan_v2                       = f80
+tan_v9                       = f81
+tan_v1                       = f82
+tan_int_Nfloat               = f83 
+tan_Nfloat                   = f84 
+
+tan_NORM_f8                  = f85 
+tan_W                        = f86
+
+tan_y0                       = f87
+tan_d                        = f88 
+tan_y1                       = f89 
+tan_dsq                      = f90 
+tan_y2                       = f91 
+tan_d4                       = f92 
+tan_inv_r                    = f93 
+
+tan_z1                       = f94
+tan_z2                       = f95
+tan_z3                       = f96
+tan_z4                       = f97
+tan_z5                       = f98
+tan_z6                       = f99
+tan_z7                       = f100
+tan_z8                       = f101
+tan_z9                       = f102
+tan_z10                      = f103
+tan_z11                      = f104
+tan_z12                      = f105
+
+
+/////////////////////////////////////////////////////////////
+
+tan_GR_sig_inv_pi_by_2       = r14
+tan_GR_rshf_2to64            = r15
+tan_GR_exp_2tom64            = r16
+tan_GR_n                     = r17
+tan_GR_rshf                  = r18
+
+tan_AD                       = r33
+tan_GR_10009                 = r34 
+tan_GR_17_ones               = r35 
+tan_GR_N_odd_even            = r36 
+tan_GR_N                     = r37 
+tan_signexp                  = r38
+tan_exp                      = r39
+tan_ADQ                      = r40
+
+GR_SAVE_PFS                  = r41 
+GR_SAVE_B0                   = r42       
+GR_SAVE_GP                   = r43      
+
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
 
-RODATA
 .align 16
 
-LOCAL_OBJECT_START(coeff_A)
-data8 0x3FF0000000000000 // A1  = 1.00000000000000000000e+00
-data8 0x3FD5555556BCE758 // A3  = 3.33333334641442641606e-01
-data8 0x3FC111105C2DAE48 // A5  = 1.33333249100689099175e-01
-data8 0x3FABA1F876341060 // A7  = 5.39701122561673229739e-02
-data8 0x3F965FB86D12A38D // A9  = 2.18495194027670719750e-02
-data8 0x3F8265F62415F9D6 // A11 = 8.98353860497717439465e-03
-data8 0x3F69E3AE64CCF58D // A13 = 3.16032468108912746342e-03
-data8 0x3F63920D09D0E6F6 // A15 = 2.38897844840557235331e-03
-LOCAL_OBJECT_END(coeff_A)
-
-LOCAL_OBJECT_START(coeff_B)
-data8 0xC90FDAA22168C235, 0x3FFF // pi/2
-data8 0x3FD55555555358DB // B1  = 3.33333333326107426583e-01
-data8 0x3F96C16C252F643F // B3  = 2.22222230621336129239e-02
-data8 0x3F61566243AB3C60 // B5  = 2.11638633968606896785e-03
-data8 0x3F2BC1169BD4438B // B7  = 2.11748132564551094391e-04
-data8 0x3EF611B4CEA056A1 // B9  = 2.10467959860990200942e-05
-data8 0x3EC600F9E32194BF // B11 = 2.62305891234274186608e-06
-data8 0xBF42BA7BCC177616 // A17 =-5.71546981685324877205e-04
-data8 0x3F4F2614BC6D3BB8 // A19 = 9.50584530849832782542e-04
-LOCAL_OBJECT_END(coeff_B)
+double_tan_constants:
+ASM_TYPE_DIRECTIVE(double_tan_constants,@object)
+//   data8 0xA2F9836E4E44152A, 0x00003FFE // 2/pi
+   data8 0xC90FDAA22168C234, 0x00003FFF // pi/2 hi
+
+   data8 0xBEEA54580DDEA0E1 // P14 
+   data8 0x3ED3021ACE749A59 // P15
+   data8 0xBEF312BD91DC8DA1 // P12 
+   data8 0x3EFAE9AFC14C5119 // P13
+   data8 0x3F2F342BF411E769 // P8
+   data8 0x3F1A60FC9F3B0227 // P9
+   data8 0x3EFF246E78E5E45B // P10
+   data8 0x3F01D9D2E782875C // P11
+   data8 0x3F8226E34C4499B6 // P4
+   data8 0x3F6D6D3F12C236AC // P5
+   data8 0x3F57DA1146DCFD8B // P6
+   data8 0x3F43576410FE3D75 // P7
+   data8 0x3FD5555555555555 // P0
+   data8 0x3FC11111111111C2 // P1
+   data8 0x3FABA1BA1BA0E850 // P2
+   data8 0x3F9664F4886725A7 // P3
+ASM_SIZE_DIRECTIVE(double_tan_constants)
+
+double_Q_tan_constants:
+ASM_TYPE_DIRECTIVE(double_Q_tan_constants,@object)
+   data8 0xC4C6628B80DC1CD1, 0x00003FBF // pi/2 lo
+   data8 0x3E223A73BA576E48 // Q8
+   data8 0x3DF54AD8D1F2CA43 // Q9
+   data8 0x3EF66A8EE529A6AA // Q4
+   data8 0x3EC2281050410EE6 // Q5
+   data8 0x3E8D6BB992CC3CF5 // Q6
+   data8 0x3E57F88DE34832E4 // Q7
+   data8 0x3FD5555555555555 // Q0
+   data8 0x3F96C16C16C16DB8 // Q1
+   data8 0x3F61566ABBFFB489 // Q2
+   data8 0x3F2BBD77945C1733 // Q3
+   data8 0x3D927FB33E2B0E04 // Q10
+ASM_SIZE_DIRECTIVE(double_Q_tan_constants)
+
+
+   
+.align 32
+.global tanf#
+#ifdef _LIBC
+.global __tanf#
+#endif
+
+////////////////////////////////////////////////////////
 
 
-.section .text
 
-LOCAL_LIBM_ENTRY(cotf)
+.section .text
+.proc  tanf#
+#ifdef _LIBC
+.proc  __tanf#
+#endif
+.align 32
+tanf:
+#ifdef _LIBC
+__tanf:
+#endif
+// The initial fnorm will take any unmasked faults and
+// normalize any single/double unorms
 
 { .mlx
-      getf.exp  rExp        = f8                    // ***** Get 2�17 * s + E
-      movl      rSigRcpPiby2= 0xA2F9836E4E44152A    // significand of 2/Pi
+      alloc          r32=ar.pfs,1,11,0,0               
+      movl tan_GR_sig_inv_pi_by_2 = 0xA2F9836E4E44152A // significand of 2/pi
 }
 { .mlx
-      addl      rCoeffA     = @ltoff(coeff_A), gp
-      movl      rScRshf     = 0x47e8000000000000    // 1.5*2^(63+63+1)
+      addl           tan_AD   = @ltoff(double_tan_constants), gp
+      movl tan_GR_rshf_2to64 = 0x47e8000000000000 // 1.1000 2^(63+63+1)
 }
 ;;
 
 { .mfi
-      alloc     r32         = ar.pfs, 0, 4, 4, 0
-      fclass.m  p9, p0      = f8, 0xc3              // Test for x=nan
-      cmp.eq    p11, p10    = r0, r0                // if p11=1 we compute cotf
+      ld8 tan_AD = [tan_AD]
+      fnorm     tan_NORM_f8  = f8                      
+      mov tan_GR_exp_2tom64 = 0xffff-64 // exponent of scaling factor 2^-64
 }
-{ .mib
-      ld8       rCoeffA     = [rCoeffA]
-      mov       rExpCut     = 0x10009               // cutoff for exponent
-      br.cond.sptk Common_Path
+{ .mlx
+      nop.m 999
+      movl tan_GR_rshf = 0x43e8000000000000 // 1.1000 2^63 for right shift
 }
 ;;
 
-LOCAL_LIBM_END(cotf)
 
-GLOBAL_IEEE754_ENTRY(tanf)
-
-{ .mlx
-      getf.exp  rExp        = f8                    // ***** Get 2�17 * s + E
-      movl      rSigRcpPiby2= 0xA2F9836E4E44152A    // significand of 2/Pi
+// Form two constants we need
+//   2/pi * 2^1 * 2^63, scaled by 2^64 since we just loaded the significand
+//   1.1000...000 * 2^(63+63+1) to right shift int(W) into the significand
+{ .mmi
+      setf.sig TAN_INV_PI_BY_2_2TO64 = tan_GR_sig_inv_pi_by_2
+      setf.d TAN_RSHF_2TO64 = tan_GR_rshf_2to64
+      mov       tan_GR_17_ones     = 0x1ffff             ;;
 }
-{ .mlx
-      addl      rCoeffA     = @ltoff(coeff_A), gp
-      movl      rScRshf     = 0x47e8000000000000    // 1.5*2^(63+63+1)
+
+
+// Form another constant
+//   2^-64 for scaling Nfloat
+//   1.1000...000 * 2^63, the right shift constant
+{ .mmf
+      setf.exp TAN_2TOM64 = tan_GR_exp_2tom64
+      adds tan_ADQ = double_Q_tan_constants - double_tan_constants, tan_AD
+      fclass.m.unc  p6,p0 = f8, 0x07  // Test for x=0
 }
 ;;
 
-{ .mfi
-      alloc     r32         = ar.pfs, 0, 4, 4, 0
-      fclass.m  p9, p0      = f8, 0xc3              // Test for x=nan
-      cmp.eq    p10, p11    = r0, r0                // if p10=1 we compute tandf
-}
-{ .mib
-      ld8       rCoeffA     = [rCoeffA]
-      mov       rExpCut     = 0x10009               // cutoff for exponent
-      nop.b     0
+
+// Form another constant
+//   2^-64 for scaling Nfloat
+//   1.1000...000 * 2^63, the right shift constant
+{ .mmf
+      setf.d TAN_RSHF = tan_GR_rshf
+      ldfe      tan_Pi_by_2_hi = [tan_AD],16 
+      fclass.m.unc  p7,p0 = f8, 0x23  // Test for x=inf
 }
 ;;
 
-// Below is common path for both tandf and cotdf
-Common_Path:
-{ .mfi
-      setf.sig  fScRcpPiby2 = rSigRcpPiby2          // 2^(63+1)*(2/Pi)
-      fclass.m  p8, p0      = f8, 0x23              // Test for x=inf
-      mov       rSignMask   = 0x1ffff               // mask for sign bit
-}
-{ .mlx
-      setf.d    fScRshf     = rScRshf               // 1.5*2^(63+63+1)
-      movl      rRshf       = 0x43e8000000000000    // 1.5 2^63 for right shift
+{ .mfb
+      ldfe      tan_Pi_by_2_lo = [tan_ADQ],16           
+      fclass.m.unc  p8,p0 = f8, 0xc3  // Test for x=nan
+(p6)  br.ret.spnt    b0    ;;         // Exit for x=0
 }
-;;
 
 { .mfi
-      and       rSignMask   = rSignMask, rExp       // clear sign bit
-(p10) fclass.m.unc p7, p0   = f8, 0x07              // Test for x=0 (for tanf)
-      mov       rScFctrExp  = 0xffff-64             // exp of scaling factor
+      ldfpd     tan_P14,tan_P15 = [tan_AD],16                         
+(p7)  frcpa.s0  f8,p9=f0,f0           // Set qnan indef if x=inf
+      mov       tan_GR_10009 = 0x10009
 }
-{ .mfb
-      adds      rCoeffB     = coeff_B - coeff_A, rCoeffA
-(p9)  fma.s.s0  f8          = f8, f1, f8            // Set qnan if x=nan
-(p9)  br.ret.spnt b0                                // Exit for x=nan
+{ .mib
+      ldfpd      tan_Q8,tan_Q9  = [tan_ADQ],16                        
+      nop.i 999
+(p7)  br.ret.spnt    b0    ;;         // Exit for x=inf
 }
-;;
 
 { .mfi
-      cmp.ge    p6, p0      = rSignMask, rExpCut    // p6 = (E => 0x10009)
-(p8)  frcpa.s0  f8, p0      = f0, f0                // Set qnan indef if x=inf
-      mov GR_Parameter_Tag  = 227                   // (cotf)
+      ldfpd      tan_P12,tan_P13 = [tan_AD],16                         
+(p8)  fma.s f8=f8,f1,f8               // Set qnan if x=nan
+      nop.i 999
 }
-{ .mbb
-      ldfe      fPiby2      = [rCoeffB], 16
-(p8)  br.ret.spnt b0                                // Exit for x=inf
-(p6)  br.cond.spnt Huge_Argument                    // Branch if |x|>=2^10
+{ .mib
+      ldfpd      tan_Q4,tan_Q5  = [tan_ADQ],16                        
+      nop.i 999
+(p8)  br.ret.spnt    b0    ;;         // Exit for x=nan
 }
-;;
 
-{ .mfi
-      nop.m     0
-(p11) fclass.m.unc p6, p0   = f8, 0x07              // Test for x=0 (for cotf)
-      nop.i     0
+{ .mmi
+      getf.exp  tan_signexp    = tan_NORM_f8                 
+      ldfpd      tan_P8,tan_P9  = [tan_AD],16                         
+      nop.i 999 ;;
 }
-{ .mfb
-      nop.m     0
-      fnorm.s0  fNormArg    = f8
-(p7)  br.ret.spnt b0                                // Exit for x=0 (for tanf)
+
+// Multiply x by scaled 2/pi and add large const to shift integer part of W to 
+//   rightmost bits of significand
+{ .mfi
+      ldfpd      tan_Q6,tan_Q7  = [tan_ADQ],16
+      fma.s1 TAN_W_2TO64_RSH = tan_NORM_f8,TAN_INV_PI_BY_2_2TO64,TAN_RSHF_2TO64
+      nop.i 999 ;;
 }
-;;
 
-{ .mmf
-      ldfpd     fA01, fA03  = [rCoeffA], 16
-      ldfpd     fB01, fB03  = [rCoeffB], 16
-      fmerge.s  f10         = f8, f8                // Save input for error call
+{ .mmi
+      ldfpd      tan_P10,tan_P11 = [tan_AD],16                         
+      nop.m 999
+      and       tan_exp = tan_GR_17_ones, tan_signexp         ;;
 }
-;;
 
-{ .mmf
-      setf.exp  fScFctr     = rScFctrExp            // get as real
-      setf.d    fRshf       = rRshf                 // get right shifter as real
-(p6)  frcpa.s0  f8, p0      = f1, f8                // cotf(+-0) = +-Inf
+
+// p7 is true if we must call DBX TAN
+// p7 is true if f8 exp is > 0x10009 (which includes all ones
+//    NAN or inf)
+{ .mmi
+      ldfpd      tan_Q0,tan_Q1  = [tan_ADQ],16                         
+      cmp.ge.unc  p7,p0 = tan_exp,tan_GR_10009               
+      nop.i 999 ;;
 }
-;;
+
 
 { .mmb
-      ldfpd     fA05, fA07  = [rCoeffA], 16
-      ldfpd     fB05, fB07  = [rCoeffB], 16
-(p6)  br.cond.spnt __libm_error_region    // call error support if cotf(+-0)
+      ldfpd      tan_P4,tan_P5  = [tan_AD],16                         
+      nop.m 999
+(p7)  br.cond.spnt   L(TAN_DBX) ;;                                  
 }
-;;
+
 
 { .mmi
-      ldfpd     fA09, fA11  = [rCoeffA], 16
-      ldfpd     fB09, fB11  = [rCoeffB], 16
-      nop.i     0
+      ldfpd      tan_Q2,tan_Q3  = [tan_ADQ],16                         
+      nop.m 999
+      nop.i 999 ;;
 }
-;;
 
+
+
+// TAN_NFLOAT = Round_Int_Nearest(tan_W)
 { .mfi
-      nop.m     0
-      fma.s1    fShiftedN = fNormArg,fScRcpPiby2,fScRshf // x*2^70*(2/Pi)+ScRshf
-      nop.i     0
+      ldfpd      tan_P6,tan_P7  = [tan_AD],16                         
+      fms.s1 TAN_NFLOAT = TAN_W_2TO64_RSH,TAN_2TOM64,TAN_RSHF      
+      nop.i 999 ;;
 }
-;;
+
 
 { .mfi
-      nop.m     0
-      fms.s1    fN          = fShiftedN, fScFctr, fRshf // N = Y*2^(-70) - Rshf
-      nop.i     0
+      ldfd      tan_Q10 = [tan_ADQ]
+      nop.f 999
+      nop.i 999 ;;
 }
-;;
 
-.pred.rel "mutex", p10, p11
+
 { .mfi
-      getf.sig  rIntN       = fShiftedN             // get N as integer
-(p10) fnma.s1   fR          = fN, fPiby2, fNormArg  // R = x - (Pi/2)*N (tanf)
-      nop.i     0
+      ldfpd      tan_P0,tan_P1  = [tan_AD],16                         
+      nop.f 999
+      nop.i 999 ;;
 }
+
+
 { .mfi
-      nop.m     0
-(p11) fms.s1    fR          = fN, fPiby2, fNormArg  // R = (Pi/2)*N - x (cotf)
-      nop.i     0
+      getf.sig    tan_GR_n = TAN_W_2TO64_RSH
+      nop.f 999
+      nop.i 999 ;;
 }
-;;
 
+// tan_r          = -tan_Nfloat * tan_Pi_by_2_hi + x
+{ .mfi
+      ldfpd      tan_P2,tan_P3  = [tan_AD]
+      fnma.s1  tan_r      = TAN_NFLOAT, tan_Pi_by_2_hi,  tan_NORM_f8         
+      nop.i 999 ;;
+}
+
+
+// p8 ==> even
+// p9 ==> odd
 { .mmi
-      ldfpd     fA13, fA15  = [rCoeffA], 16
-      ldfpd     fA17, fA19  = [rCoeffB], 16
-      nop.i     0
+      and         tan_GR_N_odd_even = 0x1, tan_GR_n ;;          
+      nop.m 999
+      cmp.eq.unc  p8,p9          = tan_GR_N_odd_even, r0      ;;
 }
-;;
 
-Return_From_Huges:
+
+// tan_r          = tan_r -tan_Nfloat * tan_Pi_by_2_lo 
 { .mfi
-      nop.m     0
-      fma.s1    fRp2        = fR, fR, f0            // R^2
-(p11) add       rIntN       = 0x1, rIntN            // N = N + 1 (cotf)
+      nop.m 999
+      fnma.s1  tan_r      = TAN_NFLOAT, tan_Pi_by_2_lo,  tan_r      
+      nop.i 999 ;;
 }
-;;
+
 
 { .mfi
-      nop.m     0
-      frcpa.s1  fY0, p0     = f1, fR                // Y0 ~ 1/R
-      tbit.z    p8, p9      = rIntN, 0              // p8=1 if N is even
+      nop.m 999
+      fma.s1   tan_rsq    = tan_r, tan_r,   f0                      
+      nop.i 999 ;;
 }
-;;
 
-// Below are mixed polynomial calculations (mixed for even and odd N)
+
+{ .mfi
+      nop.m 999
+(p9)  frcpa.s1   tan_y0, p10 = f1,tan_r                  
+      nop.i 999  ;;
+}
+
+
 { .mfi
-      nop.m     0
-(p9)  fma.s1    fB03_01     = fRp2, fB03, fB01      // R^2*B3 + B1
-      nop.i     0
+      nop.m 999
+(p8)  fma.s1  tan_v18 = tan_rsq, tan_P15, tan_P14        
+      nop.i 999
 }
 { .mfi
-      nop.m     0
-      fma.s1    fRp4        = fRp2, fRp2, f0        // R^4
-      nop.i     0
+      nop.m 999
+(p8)  fma.s1  tan_v4  = tan_rsq, tan_P1, tan_P0          
+      nop.i 999  ;;
 }
-;;
+
+
 
 { .mfi
-      nop.m     0
-(p8)  fma.s1    fA15_13     = fRp2, fA15, fA13      // R^2*A15 + A13
-      nop.i     0
+      nop.m 999
+(p8)  fma.s1  tan_v16 = tan_rsq, tan_P13, tan_P12        
+      nop.i 999 
 }
 { .mfi
-      nop.m     0
-(p8)  fma.s1    fA19_17     = fRp2, fA19, fA17      // R^2*A19 + A17
-      nop.i     0
+      nop.m 999
+(p8)  fma.s1  tan_v17 = tan_rsq, tan_rsq, f0             
+      nop.i 999 ;;
 }
-;;
+
+
 
 { .mfi
-      nop.m     0
-(p8)  fma.s1    fA07_05     = fRp2, fA07, fA05      // R^2*A7 + A5
-      nop.i     0
+      nop.m 999
+(p8)  fma.s1  tan_v12 = tan_rsq, tan_P9, tan_P8          
+      nop.i 999 
 }
 { .mfi
-      nop.m     0
-(p8)  fma.s1    fA11_09     = fRp2, fA11, fA09      // R^2*A11 + A9
-      nop.i     0
+      nop.m 999
+(p8)  fma.s1  tan_v13 = tan_rsq, tan_P11, tan_P10        
+      nop.i 999 ;;
 }
-;;
+
+
 
 { .mfi
-      nop.m     0
-(p9)  fma.s1    fB07_05     = fRp2, fB07, fB05      // R^2*B7 + B5
-      nop.i     0
+      nop.m 999
+(p8)  fma.s1  tan_v7  = tan_rsq, tan_P5, tan_P4          
+      nop.i 999 
 }
 { .mfi
-      nop.m     0
-(p9)  fma.s1    fB11_09     = fRp2, fB11, fB09      // R^2*B11 + B9
-      nop.i     0
+      nop.m 999
+(p8)  fma.s1  tan_v8  = tan_rsq, tan_P7, tan_P6          
+      nop.i 999 ;;
 }
-;;
+
+
 
 { .mfi
-      nop.m     0
-(p9)  fnma.s1   fD          = fR, fY0, f1           // D = 1 - R*Y0
-      nop.i     0
+      nop.m 999
+(p9)  fnma.s1    tan_d   = tan_r, tan_y0, f1   
+      nop.i 999 
 }
 { .mfi
-      nop.m     0
-(p8)  fma.s1    fA03_01     = fRp2, fA03, fA01      // R^2*A3 + A1
-      nop.i     0
+      nop.m 999
+(p8)  fma.s1  tan_v5  = tan_rsq, tan_P3, tan_P2          
+      nop.i 999 ;;
 }
-;;
+
+
 
 { .mfi
-      nop.m     0
-      fma.s1    fRp8        = fRp4, fRp4, f0        // R^8
-      nop.i     0
+      nop.m 999
+(p9)  fma.s1  tan_z11 = tan_rsq, tan_Q9, tan_Q8         
+      nop.i 999
 }
 { .mfi
-      nop.m     0
-      fma.s1    fRp5        = fR, fRp4, f0          // R^5
-      nop.i     0
+      nop.m 999
+(p9)  fma.s1  tan_z12 = tan_rsq, tan_rsq, f0            
+      nop.i 999 ;;
 }
-;;
+
 
 { .mfi
-      nop.m     0
-(p8)  fma.s1    fA11_05     = fRp4, fA11_09, fA07_05 // R^4*(R^2*A11 + A9) + ...
-      nop.i     0
+      nop.m 999
+(p8)  fma.s1  tan_v15 = tan_v17, tan_v18, tan_v16        
+      nop.i 999 
 }
 { .mfi
-      nop.m     0
-(p8)  fma.s1    fA19_13     = fRp4, fA19_17, fA15_13 // R^4*(R^2*A19 + A17) + ..
-      nop.i     0
+      nop.m 999
+(p9)  fma.s1  tan_z7 = tan_rsq, tan_Q5, tan_Q4          
+      nop.i 999 ;;
 }
-;;
+
 
 { .mfi
-      nop.m     0
-(p9)  fma.s1    fB11_05     = fRp4, fB11_09, fB07_05 // R^4*(R^2*B11 + B9) + ...
-      nop.i     0
+      nop.m 999
+(p8)  fma.s1  tan_v11 = tan_v17, tan_v13, tan_v12        
+      nop.i 999
 }
 { .mfi
-      nop.m     0
-(p9)  fma.s1    fRbyB03_01  = fR, fB03_01, f0       // R*(R^2*B3 + B1)
-      nop.i     0
+      nop.m 999
+(p9)  fma.s1  tan_z8 = tan_rsq, tan_Q7, tan_Q6          
+      nop.i 999 ;;
 }
-;;
+
+
 
 { .mfi
-      nop.m     0
-(p9)  fma.s1    fY1         = fY0, fD, fY0          // Y1 = Y0*D + Y0
-      nop.i     0
+      nop.m 999
+(p8)  fma.s1  tan_v14 = tan_v17, tan_v17, f0             
+      nop.i 999 
 }
 { .mfi
-      nop.m     0
-(p9)  fma.s1    fDp2        = fD, fD, f0            // D^2
-      nop.i     0
+      nop.m 999
+(p9)  fma.s1  tan_z3 = tan_rsq, tan_Q1, tan_Q0          
+      nop.i 999 ;; 
 }
-;;
+
+
+
 
 { .mfi
-      nop.m     0
-   // R^8*(R^6*A19 + R^4*A17 + R^2*A15 + A13) + R^6*A11 + R^4*A9 + R^2*A7 + A5
-(p8)  fma.d.s1  fA19_05     = fRp8, fA19_13, fA11_05
-      nop.i     0
+      nop.m 999
+(p8)  fma.s1  tan_v3 = tan_v17, tan_v5, tan_v4           
+      nop.i 999
 }
 { .mfi
-      nop.m     0
-(p8)  fma.d.s1  fRbyA03_01  = fR, fA03_01, f0       // R*(R^2*A3 + A1)
-      nop.i     0
+      nop.m 999
+(p8)  fma.s1  tan_v6 = tan_v17, tan_v8, tan_v7           
+      nop.i 999 ;;
 }
-;;
+
+
 
 { .mfi
-      nop.m     0
-(p9)  fma.d.s1  fInvR       = fY1, fDp2, fY1        // 1/R = Y1*D^2 + Y1
-      nop.i     0
+      nop.m 999
+(p9)  fma.s1     tan_y1  = tan_y0, tan_d, tan_y0    
+      nop.i 999 
 }
 { .mfi
-      nop.m     0
-   // R^5*(R^6*B11 + R^4*B9 + R^2*B7 + B5) + R^3*B3 + R*B1
-(p9)  fma.d.s1  fRbyB11_01  = fRp5, fB11_05, fRbyB03_01
-      nop.i     0
+      nop.m 999
+(p9)  fma.s1     tan_dsq = tan_d, tan_d, f0        
+      nop.i 999 ;; 
 }
-;;
 
-.pred.rel "mutex", p8, p9
+
 { .mfi
-      nop.m     0
-   // Result = R^5*(R^14*A19 + R^12*A17 + R^10*A15 + ...) + R^3*A3 + R*A1
-(p8)  fma.s.s0  f8          = fRp5, fA19_05, fRbyA03_01
-      nop.i 0
+      nop.m 999
+(p9)  fma.s1  tan_z10 = tan_z12, tan_Q10, tan_z11       
+      nop.i 999 
 }
-{ .mfb
-      nop.m     0
-   // Result = -1/R + R^11*B11 + R^9*B9 + R^7*B7 + R^5*B5 + R^3*B3 + R*B1
-(p9)  fnma.s.s0 f8          = f1, fInvR, fRbyB11_01
-      br.ret.sptk b0                                // exit for main path
+{ .mfi
+      nop.m 999
+(p9)  fma.s1  tan_z9  = tan_z12, tan_z12,f0             
+      nop.i 999 ;;
 }
-;;
 
-GLOBAL_IEEE754_END(tanf)
-
-LOCAL_LIBM_ENTRY(__libm_callout)
-Huge_Argument:
-.prologue
 
 { .mfi
-      nop.m 0
-      fmerge.s f9 = f0,f0
-.save ar.pfs,GR_SAVE_PFS
-      mov  GR_SAVE_PFS=ar.pfs
+      nop.m 999
+(p9)  fma.s1  tan_z4 = tan_rsq, tan_Q3, tan_Q2          
+      nop.i 999 
 }
-;;
+{ .mfi
+      nop.m 999
+(p9)  fma.s1  tan_z6  = tan_z12, tan_z8, tan_z7         
+      nop.i 999 ;; 
+}
+
+
 
 { .mfi
-      mov GR_SAVE_GP=gp
-      nop.f 0
-.save b0, GR_SAVE_B0
-      mov GR_SAVE_B0=b0
+      nop.m 999
+(p8)  fma.s1  tan_v10 = tan_v14, tan_v15, tan_v11        
+      nop.i 999 ;; 
 }
 
-.body
-{ .mmb
+
+
+{ .mfi
       nop.m 999
+(p9)  fma.s1     tan_y2  = tan_y1, tan_d, tan_y0         
+      nop.i 999 
+}
+{ .mfi
       nop.m 999
-(p10) br.cond.sptk.many  call_tanl ;;
+(p9)  fma.s1     tan_d4  = tan_dsq, tan_dsq, tan_d       
+      nop.i 999  ;;
 }
 
-// Here if we should call cotl (p10=0, p11=1)
-{ .mmb
+
+{ .mfi
       nop.m 999
+(p8)  fma.s1  tan_v2 = tan_v14, tan_v6, tan_v3           
+      nop.i 999
+}
+{ .mfi
       nop.m 999
-      br.call.sptk.many  b0=__libm_cotl# ;;
+(p8)  fma.s1  tan_v9 = tan_v14, tan_v14, f0              
+      nop.i 999 ;;
 }
 
+
 { .mfi
-      mov gp        = GR_SAVE_GP
-      fnorm.s.s0 f8 = f8
-      mov b0        = GR_SAVE_B0
+      nop.m 999
+(p9)  fma.s1  tan_z2  = tan_z12, tan_z4, tan_z3         
+      nop.i 999 
 }
-;;
-
-{ .mib
+{ .mfi
       nop.m 999
-      mov ar.pfs    = GR_SAVE_PFS
-      br.ret.sptk     b0
-;;
+(p9)  fma.s1  tan_z5  = tan_z9, tan_z10, tan_z6         
+      nop.i 999  ;;
 }
 
-// Here if we should call tanl (p10=1, p11=0)
-call_tanl:
-{ .mmb
+
+{ .mfi
       nop.m 999
+(p9)  fma.s1     tan_inv_r = tan_d4, tan_y2, tan_y0      
+      nop.i 999 
+}
+{ .mfi
       nop.m 999
-      br.call.sptk.many  b0=__libm_tanl# ;;
+(p8)  fma.s1   tan_rcube  = tan_rsq, tan_r,   f0
+      nop.i 999  ;;
 }
 
+
+
 { .mfi
-      mov gp        = GR_SAVE_GP
-      fnorm.s.s0 f8 = f8
-      mov b0        = GR_SAVE_B0
+      nop.m 999
+(p8)  fma.s1  tan_v1 = tan_v9, tan_v10, tan_v2           
+      nop.i 999 
 }
-;;
-
-{ .mib
+{ .mfi
       nop.m 999
-      mov ar.pfs    = GR_SAVE_PFS
-      br.ret.sptk     b0
-;;
+(p9)  fma.s1  tan_z1  = tan_z9, tan_z5, tan_z2          
+      nop.i 999   ;;
 }
 
-LOCAL_LIBM_END(__libm_callout)
 
-.type __libm_tanl#,@function
-.global __libm_tanl#
-.type __libm_cotl#,@function
-.global __libm_cotl#
+
+{ .mfi
+      nop.m 999
+(p8)  fma.s.s0  f8  = tan_v1, tan_rcube, tan_r             
+      nop.i 999  
+}
+{ .mfb
+      nop.m 999
+(p9)  fms.s.s0  f8  = tan_r, tan_z1, tan_inv_r        
+      br.ret.sptk    b0 ;;    
+}
+.endp tanf#
+ASM_SIZE_DIRECTIVE(tanf#)
 
 
-LOCAL_LIBM_ENTRY(__libm_error_region)
+.proc __libm_callout
+__libm_callout:
+L(TAN_DBX): 
 .prologue
 
-// (1)
 { .mfi
-      add           GR_Parameter_Y=-32,sp        // Parameter 2 value
-      nop.f         0
+        nop.m 0
+     fmerge.s f9 = f0,f0 
 .save   ar.pfs,GR_SAVE_PFS
-      mov           GR_SAVE_PFS=ar.pfs           // Save ar.pfs
+        mov  GR_SAVE_PFS=ar.pfs
 }
-{ .mfi
-.fframe 64
-      add sp=-64,sp                              // Create new stack
-      nop.f 0
-      mov GR_SAVE_GP=gp                          // Save gp
-};;
+;;
 
-// (2)
-{ .mmi
-      stfs [GR_Parameter_Y] = f1,16              // STORE Parameter 2 on stack
-      add GR_Parameter_X = 16,sp                 // Parameter 1 address
+{ .mfi
+        mov GR_SAVE_GP=gp
+        nop.f 0
 .save   b0, GR_SAVE_B0
-      mov GR_SAVE_B0=b0                          // Save b0
-};;
+        mov GR_SAVE_B0=b0
+}
 
 .body
-// (3)
-{ .mib
-      stfs [GR_Parameter_X] = f10                // STORE Parameter 1 on stack
-      add   GR_Parameter_RESULT = 0,GR_Parameter_Y  // Parameter 3 address
-      nop.b 0
+{ .mfb
+      nop.m 999
+      nop.f 999
+       br.call.sptk.many  b0=__libm_tan# ;;
 }
-{ .mib
-      stfs [GR_Parameter_Y] = f8                 // STORE Parameter 3 on stack
-      add   GR_Parameter_Y = -16,GR_Parameter_Y
-      br.call.sptk b0=__libm_error_support#      // Call error handling function
-};;
-{ .mmi
-      nop.m 0
-      nop.m 0
-      add   GR_Parameter_RESULT = 48,sp
-};;
 
-// (4)
-{ .mmi
-      ldfs  f8 = [GR_Parameter_RESULT]           // Get return result off stack
-.restore sp
-      add   sp = 64,sp                           // Restore stack pointer
-      mov   b0 = GR_SAVE_B0                      // Restore return address
-};;
+
+{ .mfi
+       mov gp        = GR_SAVE_GP
+      fnorm.s     f8 = f8
+       mov b0        = GR_SAVE_B0 
+}
+;;
+
+
 { .mib
-      mov   gp = GR_SAVE_GP                      // Restore gp
-      mov   ar.pfs = GR_SAVE_PFS                 // Restore ar.pfs
-      br.ret.sptk     b0                         // Return
-};;
+         nop.m 999
+      mov ar.pfs    = GR_SAVE_PFS
+      br.ret.sptk     b0
+;;
+}
 
-LOCAL_LIBM_END(__libm_error_region)
 
-.type   __libm_error_support#,@function
-.global __libm_error_support#
+.endp  __libm_callout
+ASM_SIZE_DIRECTIVE(__libm_callout)
 
+.type __libm_tan#,@function
+.global __libm_tan#
diff --git a/sysdeps/ia64/fpu/s_tanl.S b/sysdeps/ia64/fpu/s_tanl.S
index 345a059c5f..e13e6c6cbd 100644
--- a/sysdeps/ia64/fpu/s_tanl.S
+++ b/sysdeps/ia64/fpu/s_tanl.S
@@ -1,10 +1,10 @@
-.file "tancotl.s"
+.file "tanl.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
+// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,7 +20,7 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
+//
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -35,77 +35,50 @@
 // 
 // Intel Corporation is the author of this code, and requests that all
 // problem reports or change requests be submitted to it directly at 
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// http://developer.intel.com/opensource.
 //
-//*********************************************************************
+// *********************************************************************
 //
 // History: 
 //
-// 02/02/00 (hand-optimized)
-// 04/04/00 Unwind support added
+// 2/02/2000 (hand-optimized)
+// 4/04/00  Unwind support added
 // 12/28/00 Fixed false invalid flags
-// 02/06/02 Improved speed
-// 05/07/02 Changed interface to __libm_pi_by_2_reduce
-// 05/30/02 Added cotl
-// 02/10/03 Reordered header: .section, .global, .proc, .align;
-//          used data8 for long double table values
-// 05/15/03 Reformatted data tables
 //
-//*********************************************************************
+// *********************************************************************
 //
-// Functions:   tanl(x) = tangent(x), for double-extended precision x values
-//              cotl(x) = cotangent(x), for double-extended precision x values
+// Function:   tanl(x) = tangent(x), for double-extended precision x values
 //
-//*********************************************************************
+// *********************************************************************
 //
 // Resources Used:
 //
 //    Floating-Point Registers: f8 (Input and Return Value)
 //                              f9-f15
-//                              f32-f121
+//                              f32-f112
 //
 //    General Purpose Registers:
-//      r14-r26,r32-r57
+//      r32-r48
+//      r49-r50 (Used to pass arguments to pi_by_2 reduce routine)
 //
 //    Predicate Registers:      p6-p15
 //
-//*********************************************************************
+// *********************************************************************
 //
-// IEEE Special Conditions for tanl:
+// IEEE Special Conditions:
 //
 //    Denormal  fault raised on denormal inputs
 //    Overflow exceptions do not occur
-//    Underflow exceptions raised when appropriate for tan
+//    Underflow exceptions raised when appropriate for tan 
 //    (No specialized error handling for this routine)
 //    Inexact raised when appropriate by algorithm
 //
-//    tanl(SNaN) = QNaN
-//    tanl(QNaN) = QNaN
-//    tanl(inf) = QNaN
-//    tanl(+/-0) = +/-0
-//
-//*********************************************************************
-//
-// IEEE Special Conditions for cotl:
-//
-//    Denormal  fault raised on denormal inputs
-//    Overflow exceptions occur at zero and near zero
-//    Underflow exceptions do not occur
-//    Inexact raised when appropriate by algorithm
-//
-//    cotl(SNaN) = QNaN
-//    cotl(QNaN) = QNaN
-//    cotl(inf) = QNaN
-//    cotl(+/-0) = +/-Inf and error handling is called
-//
-//*********************************************************************
+//    tan(SNaN) = QNaN
+//    tan(QNaN) = QNaN
+//    tan(inf) = QNaN
+//    tan(+/-0) = +/-0
 //
-//    Below are mathematical and algorithmic descriptions for tanl.
-//    For cotl we use next identity cot(x) = -tan(x + Pi/2).
-//    So, to compute cot(x) we just need to increment N (N = N + 1)
-//    and invert sign of the computed result.
-//
-//*********************************************************************
+// *********************************************************************
 //
 // Mathematical Description
 //
@@ -133,13 +106,13 @@
 // -------
 //
 //      tan(r + c) = r + c + r^3/3          ...accurately
-//     -cot(r + c) = -1/(r+c) + r/3          ...accurately
+//        -cot(r + c) = -1/(r+c) + r/3          ...accurately
 //
 // Case 4:
 // -------
 //
 //      tan(r + c) = r + c + r^3/3 + 2r^5/15     ...accurately
-//     -cot(r + c) = -1/(r+c) + r/3 + r^3/45     ...accurately
+//        -cot(r + c) = -1/(r+c) + r/3 + r^3/45     ...accurately
 //
 //
 // The only cases left are Cases 1 and 3 of the argument reduction
@@ -170,13 +143,13 @@
 // Since Arg = N pi/4 + r + c accurately, we have
 //
 //      tan(Arg) =  tan(r+c)            for N even,
-//               = -cot(r+c)            otherwise.
+//            = -cot(r+c)          otherwise.
 //
 // Here for this case, both tan(r) and -cot(r) can be approximated
 // by simple polynomials:
 //
 //      tan(r) =    r + P1_1 r^3 + P1_2 r^5 + ... + P1_9 r^19
-//     -cot(r) = -1/r + Q1_1 r   + Q1_2 r^3 + ... + Q1_7 r^13
+//        -cot(r) = -1/r + Q1_1 r   + Q1_2 r^3 + ... + Q1_7 r^13
 //
 // accurately. Since |r| is relatively small, tan(r+c) and
 // -cot(r+c) can be accurately approximated by replacing r with
@@ -205,21 +178,21 @@
 // The required calculation is either
 //
 //      tan(r + c)  =  tan(r)  +  correction,  or
-//     -cot(r + c)  = -cot(r)  +  correction.
+//        -cot(r + c)  = -cot(r)  +  correction.
 //
 // Specifically,
 //
 //      tan(r + c) =  tan(r) + c tan'(r)  + O(c^2)
-//                 =  tan(r) + c sec^2(r) + O(c^2)
-//                 =  tan(r) + c SEC_sq     ...accurately
+//              =  tan(r) + c sec^2(r) + O(c^2)
+//              =  tan(r) + c SEC_sq     ...accurately
 //                as long as SEC_sq approximates sec^2(r)
 //                to, say, 5 bits or so.
 //
 // Similarly,
 //
-//     -cot(r + c) = -cot(r) - c cot'(r)  + O(c^2)
-//                 = -cot(r) + c csc^2(r) + O(c^2)
-//                 = -cot(r) + c CSC_sq     ...accurately
+//        -cot(r + c) = -cot(r) - c cot'(r)  + O(c^2)
+//              = -cot(r) + c csc^2(r) + O(c^2)
+//              = -cot(r) + c CSC_sq     ...accurately
 //                as long as CSC_sq approximates csc^2(r)
 //                to, say, 5 bits or so.
 //
@@ -235,14 +208,14 @@
 // where
 //
 //      B = 2^k * 1.b_1 b_2 ... b_5 1
-//      x = |r| - B
+//         x = |r| - B
 //
 // Now,
 //                   tan(B)  +   tan(x)
 //      tan( B + x ) =  ------------------------
 //                   1 -  tan(B)*tan(x)
 //
-//               /                         \
+//               /                         \ 
 //               |   tan(B)  +   tan(x)          |
 
 //      = tan(B) +  | ------------------------ - tan(B) |
@@ -275,7 +248,7 @@
 //      cot( B + x ) =  ------------------------
 //                   tan(B)  +  tan(x)
 //
-//               /                           \
+//               /                           \ 
 //               |   1 - tan(B)*tan(x)              |
 
 //      = cot(B) +  | ----------------------- - cot(B) |
@@ -300,7 +273,7 @@
 //      Arg = N * pi/2 +  r + c          ...accurately
 //
 //      tan(Arg) =  tan(r) + correction    if N is even;
-//               = -cot(r) + correction    otherwise.
+//            = -cot(r) + correction    otherwise.
 //
 // For Cases 2 and 4,
 //
@@ -319,8 +292,8 @@
 //      tan(Arg) =  r + P1_1 r^3 + P1_2 r^5 + ... + P1_9 r^19
 //                     + c*(1 + r^2)               N even
 //
-//               = -1/(r+c) + Q1_1 r   + Q1_2 r^3 + ... + Q1_7 r^13
-//                     + Q1_1*c                    N odd
+//                  = -1/(r+c) + Q1_1 r   + Q1_2 r^3 + ... + Q1_7 r^13
+//               + Q1_1*c                    N odd
 //
 //     Case normal_r: 2^(-2) <= |r| <= pi/4
 //
@@ -331,15 +304,15 @@
 //
 //      tan(Arg) = tan(r) + c*sec^2(r)
 //               = tan( sgn_r * (B+x) ) + c * sec^2(|r|)
-//               = sgn_r * ( tan(B+x)  + sgn_r*c*sec^2(|r|) )
-//               = sgn_r * ( tan(B+x)  + sgn_r*c*sec^2(B) )
+//                  = sgn_r * ( tan(B+x)  + sgn_r*c*sec^2(|r|) )
+//                  = sgn_r * ( tan(B+x)  + sgn_r*c*sec^2(B) )
 //
 // since B approximates |r| to 2^(-6) in relative accuracy.
 //
 //                 /            (1/[sin(B)*cos(B)]) * tan(x)
 //    tan(Arg) = sgn_r * | tan(B) + --------------------------------
 //                 \                     cot(B)  -  tan(x)
-//                                        \
+//                                        \ 
 //                       + CORR  |
 
 //                                     /
@@ -351,15 +324,15 @@
 //
 //      tan(Arg) = -cot(r) + c*csc^2(r)
 //               = -cot( sgn_r * (B+x) ) + c * csc^2(|r|)
-//               = sgn_r * ( -cot(B+x)  + sgn_r*c*csc^2(|r|) )
-//               = sgn_r * ( -cot(B+x)  + sgn_r*c*csc^2(B) )
+//                  = sgn_r * ( -cot(B+x)  + sgn_r*c*csc^2(|r|) )
+//                  = sgn_r * ( -cot(B+x)  + sgn_r*c*csc^2(B) )
 //
 // since B approximates |r| to 2^(-6) in relative accuracy.
 //
 //                 /            (1/[sin(B)*cos(B)]) * tan(x)
 //    tan(Arg) = sgn_r * | -cot(B) + --------------------------------
 //                 \                     tan(B)  +  tan(x)
-//                                        \
+//                                        \ 
 //                       + CORR  |
 
 //                                     /
@@ -383,8 +356,8 @@
 //    For N even,
 //
 //    rsq := r * r
-//    Poly := c + r * rsq * P1_1
-//    Result := r + Poly          ...in user-defined rounding
+//    Result := c + r * rsq * P1_1
+//    Result := r + Result          ...in user-defined rounding
 //
 //    For N odd,
 //    S_hi  := -frcpa(r)               ...8 bits
@@ -402,8 +375,8 @@
 //    For N even,
 //
 //    rsq := r * r
-//    Poly := c + r * rsq * (P1_1 + rsq * P1_2)
-//    Result := r + Poly          ...in user-defined rounding
+//    Result := c + r * rsq * (P1_1 + rsq * P1_2)
+//    Result := r + Result          ...in user-defined rounding
 //
 //    For N odd,
 //    S_hi  := -frcpa(r)               ...8 bits
@@ -441,8 +414,8 @@
 //      Poly2 := P1_4 + rsq*(P1_5 + rsq*(P1_6 + ... rsq*P1_9))
 //      CORR  := c * ( 1 + rsq )
 //      Poly  := Poly1 + r_to_the_8*Poly2
-//      Poly := r*Poly + CORR
-//      Result := r + Poly     ...in user-defined rounding
+//      Result := r*Poly + CORR
+//      Result := r + Result     ...in user-defined rounding
 //      ...note that Poly1 and r_to_the_8 can be computed in parallel
 //      ...with Poly2 (Poly1 is intentionally set to be much
 //      ...shorter than Poly2 so that r_to_the_8 and CORR can be hidden)
@@ -461,8 +434,8 @@
 //      rsq := r*r
 //      P   := Q1_1 + rsq*(Q1_2 + rsq*(Q1_3 + ... + rsq*Q1_7))
 //
-//      Poly :=  r*P + S_lo
-//      Result :=  S_hi  +  Poly      ...in user-defined rounding
+//      Result :=  r*P + S_lo
+//      Result :=  S_hi  +  Result      ...in user-defined rounding
 //
 //
 // Algorithm for the case of normal_r
@@ -481,7 +454,7 @@
 //           /           (1/[sin(B)*cos(B)]) * tan(x)
 //      sgn_r * | tan(B) + --------------------------------  +
 //           \                     cot(B)  -  tan(x)
-//                                \
+//                                \ 
 //                          CORR  |
 
 //                                /
@@ -490,7 +463,7 @@
 // calculated beforehand and stored in a table. Specifically,
 // the table values are
 //
-//      tan(B)             as  T_hi  +  T_lo;
+//      tan(B)                as  T_hi  +  T_lo;
 //      cot(B)             as  C_hi  +  C_lo;
 //      1/[sin(B)*cos(B)]  as  SC_inv
 //
@@ -586,7 +559,7 @@
 //           /             (1/[sin(B)*cos(B)]) * tan(x)
 //      sgn_r * | -cot(B) + --------------------------------  +
 //           \                     tan(B)  +  tan(x)
-//                                \
+//                                \ 
 //                          CORR  |
 
 //                                /
@@ -595,7 +568,7 @@
 // calculated beforehand and stored in a table. Specifically,
 // the table values are
 //
-//      tan(B)             as  T_hi  +  T_lo;
+//      tan(B)                as  T_hi  +  T_lo;
 //      cot(B)             as  C_hi  +  C_lo;
 //      1/[sin(B)*cos(B)]  as  SC_inv
 //
@@ -702,382 +675,254 @@
 //
 //
 
-RODATA
-.align 16
-
-LOCAL_OBJECT_START(TANL_BASE_CONSTANTS)
-
-tanl_table_1:
-data8    0xA2F9836E4E44152A, 0x00003FFE // two_by_pi
-data8    0xC84D32B0CE81B9F1, 0x00004016 // P_0
-data8    0xC90FDAA22168C235, 0x00003FFF // P_1
-data8    0xECE675D1FC8F8CBB, 0x0000BFBD // P_2
-data8    0xB7ED8FBBACC19C60, 0x0000BF7C // P_3
-LOCAL_OBJECT_END(TANL_BASE_CONSTANTS)
-
-LOCAL_OBJECT_START(tanl_table_2)
-data8    0xC90FDAA22168C234, 0x00003FFE // PI_BY_4
-data8    0xA397E5046EC6B45A, 0x00003FE7 // Inv_P_0
-data8    0x8D848E89DBD171A1, 0x0000BFBF // d_1
-data8    0xD5394C3618A66F8E, 0x0000BF7C // d_2
-data4    0x3E800000 // two**-2
-data4    0xBE800000 // -two**-2
-data4    0x00000000 // pad
-data4    0x00000000 // pad
-LOCAL_OBJECT_END(tanl_table_2)
-
-LOCAL_OBJECT_START(tanl_table_p1)
-data8    0xAAAAAAAAAAAAAABD, 0x00003FFD // P1_1
-data8    0x8888888888882E6A, 0x00003FFC // P1_2
-data8    0xDD0DD0DD0F0177B6, 0x00003FFA // P1_3
-data8    0xB327A440646B8C6D, 0x00003FF9 // P1_4
-data8    0x91371B251D5F7D20, 0x00003FF8 // P1_5
-data8    0xEB69A5F161C67914, 0x00003FF6 // P1_6
-data8    0xBEDD37BE019318D2, 0x00003FF5 // P1_7
-data8    0x9979B1463C794015, 0x00003FF4 // P1_8
-data8    0x8EBD21A38C6EB58A, 0x00003FF3 // P1_9
-LOCAL_OBJECT_END(tanl_table_p1)
-
-LOCAL_OBJECT_START(tanl_table_q1)
-data8    0xAAAAAAAAAAAAAAB4, 0x00003FFD // Q1_1
-data8    0xB60B60B60B5FC93E, 0x00003FF9 // Q1_2
-data8    0x8AB355E00C9BBFBF, 0x00003FF6 // Q1_3
-data8    0xDDEBBC89CBEE3D4C, 0x00003FF2 // Q1_4
-data8    0xB3548A685F80BBB6, 0x00003FEF // Q1_5
-data8    0x913625604CED5BF1, 0x00003FEC // Q1_6
-data8    0xF189D95A8EE92A83, 0x00003FE8 // Q1_7
-LOCAL_OBJECT_END(tanl_table_q1)
-
-LOCAL_OBJECT_START(tanl_table_p2)
-data8    0xAAAAAAAAAAAB362F, 0x00003FFD // P2_1
-data8    0x88888886E97A6097, 0x00003FFC // P2_2
-data8    0xDD108EE025E716A1, 0x00003FFA // P2_3
-LOCAL_OBJECT_END(tanl_table_p2)
-
-LOCAL_OBJECT_START(tanl_table_tm2)
+#include "libm_support.h"
+
+#ifdef _LIBC
+.rodata
+#else
+.data
+#endif
+.align 128
+
+TANL_BASE_CONSTANTS:
+ASM_TYPE_DIRECTIVE(TANL_BASE_CONSTANTS,@object)
+data4    0x4B800000, 0xCB800000, 0x38800000, 0xB8800000 // two**24, -two**24
+                                                        // two**-14, -two**-14
+data4    0x4E44152A, 0xA2F9836E, 0x00003FFE, 0x00000000 // two_by_pi
+data4    0xCE81B9F1, 0xC84D32B0, 0x00004016, 0x00000000 // P_0
+data4    0x2168C235, 0xC90FDAA2, 0x00003FFF, 0x00000000 // P_1
+data4    0xFC8F8CBB, 0xECE675D1, 0x0000BFBD, 0x00000000 // P_2
+data4    0xACC19C60, 0xB7ED8FBB, 0x0000BF7C, 0x00000000 // P_3
+data4    0x5F000000, 0xDF000000, 0x00000000, 0x00000000 // two_to_63, -two_to_63
+data4    0x6EC6B45A, 0xA397E504, 0x00003FE7, 0x00000000 // Inv_P_0
+data4    0xDBD171A1, 0x8D848E89, 0x0000BFBF, 0x00000000 // d_1
+data4    0x18A66F8E, 0xD5394C36, 0x0000BF7C, 0x00000000 // d_2
+data4    0x2168C234, 0xC90FDAA2, 0x00003FFE, 0x00000000 // PI_BY_4
+data4    0x2168C234, 0xC90FDAA2, 0x0000BFFE, 0x00000000 // MPI_BY_4
+data4    0x3E800000, 0xBE800000, 0x00000000, 0x00000000 // two**-2, -two**-2
+data4    0x2F000000, 0xAF000000, 0x00000000, 0x00000000 // two**-33, -two**-33
+data4    0xAAAAAABD, 0xAAAAAAAA, 0x00003FFD, 0x00000000 // P1_1
+data4    0x88882E6A, 0x88888888, 0x00003FFC, 0x00000000 // P1_2
+data4    0x0F0177B6, 0xDD0DD0DD, 0x00003FFA, 0x00000000 // P1_3
+data4    0x646B8C6D, 0xB327A440, 0x00003FF9, 0x00000000 // P1_4
+data4    0x1D5F7D20, 0x91371B25, 0x00003FF8, 0x00000000 // P1_5
+data4    0x61C67914, 0xEB69A5F1, 0x00003FF6, 0x00000000 // P1_6
+data4    0x019318D2, 0xBEDD37BE, 0x00003FF5, 0x00000000 // P1_7
+data4    0x3C794015, 0x9979B146, 0x00003FF4, 0x00000000 // P1_8
+data4    0x8C6EB58A, 0x8EBD21A3, 0x00003FF3, 0x00000000 // P1_9
+data4    0xAAAAAAB4, 0xAAAAAAAA, 0x00003FFD, 0x00000000 // Q1_1
+data4    0x0B5FC93E, 0xB60B60B6, 0x00003FF9, 0x00000000 // Q1_2
+data4    0x0C9BBFBF, 0x8AB355E0, 0x00003FF6, 0x00000000 // Q1_3
+data4    0xCBEE3D4C, 0xDDEBBC89, 0x00003FF2, 0x00000000 // Q1_4
+data4    0x5F80BBB6, 0xB3548A68, 0x00003FEF, 0x00000000 // Q1_5
+data4    0x4CED5BF1, 0x91362560, 0x00003FEC, 0x00000000 // Q1_6
+data4    0x8EE92A83, 0xF189D95A, 0x00003FE8, 0x00000000 // Q1_7
+data4    0xAAAB362F, 0xAAAAAAAA, 0x00003FFD, 0x00000000 // P2_1
+data4    0xE97A6097, 0x88888886, 0x00003FFC, 0x00000000 // P2_2
+data4    0x25E716A1, 0xDD108EE0, 0x00003FFA, 0x00000000 // P2_3
 //
 //  Entries T_hi   double-precision memory format
 //  Index = 0,1,...,31  B = 2^(-2)*(1+Index/32+1/64)
 //  Entries T_lo  single-precision memory format
 //  Index = 0,1,...,31  B = 2^(-2)*(1+Index/32+1/64)
 //
-data8 0x3FD09BC362400794
-data4 0x23A05C32, 0x00000000
-data8 0x3FD124A9DFFBC074
-data4 0x240078B2, 0x00000000
-data8 0x3FD1AE235BD4920F
-data4 0x23826B8E, 0x00000000
-data8 0x3FD2383515E2701D
-data4 0x22D31154, 0x00000000
-data8 0x3FD2C2E463739C2D
-data4 0x2265C9E2, 0x00000000
-data8 0x3FD34E36AFEEA48B
-data4 0x245C05EB, 0x00000000
-data8 0x3FD3DA317DBB35D1
-data4 0x24749F2D, 0x00000000
-data8 0x3FD466DA67321619
-data4 0x2462CECE, 0x00000000
-data8 0x3FD4F4371F94A4D5
-data4 0x246D0DF1, 0x00000000
-data8 0x3FD5824D740C3E6D
-data4 0x240A85B5, 0x00000000
-data8 0x3FD611234CB1E73D
-data4 0x23F96E33, 0x00000000
-data8 0x3FD6A0BEAD9EA64B
-data4 0x247C5393, 0x00000000
-data8 0x3FD73125B804FD01
-data4 0x241F3B29, 0x00000000
-data8 0x3FD7C25EAB53EE83
-data4 0x2479989B, 0x00000000
-data8 0x3FD8546FE6640EED
-data4 0x23B343BC, 0x00000000
-data8 0x3FD8E75FE8AF1892
-data4 0x241454D1, 0x00000000
-data8 0x3FD97B3553928BDA
-data4 0x238613D9, 0x00000000
-data8 0x3FDA0FF6EB9DE4DE
-data4 0x22859FA7, 0x00000000
-data8 0x3FDAA5AB99ECF92D
-data4 0x237A6D06, 0x00000000
-data8 0x3FDB3C5A6D8F1796
-data4 0x23952F6C, 0x00000000
-data8 0x3FDBD40A9CFB8BE4
-data4 0x2280FC95, 0x00000000
-data8 0x3FDC6CC387943100
-data4 0x245D2EC0, 0x00000000
-data8 0x3FDD068CB736C500
-data4 0x23C4AD7D, 0x00000000
-data8 0x3FDDA16DE1DDBC31
-data4 0x23D076E6, 0x00000000
-data8 0x3FDE3D6EEB515A93
-data4 0x244809A6, 0x00000000
-data8 0x3FDEDA97E6E9E5F1
-data4 0x220856C8, 0x00000000
-data8 0x3FDF78F11963CE69
-data4 0x244BE993, 0x00000000
-data8 0x3FE00C417D635BCE
-data4 0x23D21799, 0x00000000
-data8 0x3FE05CAB1C302CD3
-data4 0x248A1B1D, 0x00000000
-data8 0x3FE0ADB9DB6A1FA0
-data4 0x23D53E33, 0x00000000
-data8 0x3FE0FF724A20BA81
-data4 0x24DB9ED5, 0x00000000
-data8 0x3FE151D9153FA6F5
-data4 0x24E9E451, 0x00000000
-LOCAL_OBJECT_END(tanl_table_tm2)
-
-LOCAL_OBJECT_START(tanl_table_tm1)
+data4    0x62400794, 0x3FD09BC3, 0x23A05C32, 0x00000000
+data4    0xDFFBC074, 0x3FD124A9, 0x240078B2, 0x00000000
+data4    0x5BD4920F, 0x3FD1AE23, 0x23826B8E, 0x00000000
+data4    0x15E2701D, 0x3FD23835, 0x22D31154, 0x00000000
+data4    0x63739C2D, 0x3FD2C2E4, 0x2265C9E2, 0x00000000
+data4    0xAFEEA48B, 0x3FD34E36, 0x245C05EB, 0x00000000
+data4    0x7DBB35D1, 0x3FD3DA31, 0x24749F2D, 0x00000000
+data4    0x67321619, 0x3FD466DA, 0x2462CECE, 0x00000000
+data4    0x1F94A4D5, 0x3FD4F437, 0x246D0DF1, 0x00000000
+data4    0x740C3E6D, 0x3FD5824D, 0x240A85B5, 0x00000000
+data4    0x4CB1E73D, 0x3FD61123, 0x23F96E33, 0x00000000
+data4    0xAD9EA64B, 0x3FD6A0BE, 0x247C5393, 0x00000000
+data4    0xB804FD01, 0x3FD73125, 0x241F3B29, 0x00000000
+data4    0xAB53EE83, 0x3FD7C25E, 0x2479989B, 0x00000000
+data4    0xE6640EED, 0x3FD8546F, 0x23B343BC, 0x00000000
+data4    0xE8AF1892, 0x3FD8E75F, 0x241454D1, 0x00000000
+data4    0x53928BDA, 0x3FD97B35, 0x238613D9, 0x00000000
+data4    0xEB9DE4DE, 0x3FDA0FF6, 0x22859FA7, 0x00000000
+data4    0x99ECF92D, 0x3FDAA5AB, 0x237A6D06, 0x00000000
+data4    0x6D8F1796, 0x3FDB3C5A, 0x23952F6C, 0x00000000
+data4    0x9CFB8BE4, 0x3FDBD40A, 0x2280FC95, 0x00000000
+data4    0x87943100, 0x3FDC6CC3, 0x245D2EC0, 0x00000000
+data4    0xB736C500, 0x3FDD068C, 0x23C4AD7D, 0x00000000
+data4    0xE1DDBC31, 0x3FDDA16D, 0x23D076E6, 0x00000000
+data4    0xEB515A93, 0x3FDE3D6E, 0x244809A6, 0x00000000
+data4    0xE6E9E5F1, 0x3FDEDA97, 0x220856C8, 0x00000000
+data4    0x1963CE69, 0x3FDF78F1, 0x244BE993, 0x00000000
+data4    0x7D635BCE, 0x3FE00C41, 0x23D21799, 0x00000000
+data4    0x1C302CD3, 0x3FE05CAB, 0x248A1B1D, 0x00000000
+data4    0xDB6A1FA0, 0x3FE0ADB9, 0x23D53E33, 0x00000000
+data4    0x4A20BA81, 0x3FE0FF72, 0x24DB9ED5, 0x00000000
+data4    0x153FA6F5, 0x3FE151D9, 0x24E9E451, 0x00000000
 //
 //  Entries T_hi   double-precision memory format
 //  Index = 0,1,...,19  B = 2^(-1)*(1+Index/32+1/64)
 //  Entries T_lo  single-precision memory format
 //  Index = 0,1,...,19  B = 2^(-1)*(1+Index/32+1/64)
 //
-data8 0x3FE1CEC4BA1BE39E
-data4 0x24B60F9E, 0x00000000
-data8 0x3FE277E45ABD9B2D
-data4 0x248C2474, 0x00000000
-data8 0x3FE324180272B110
-data4 0x247B8311, 0x00000000
-data8 0x3FE3D38B890E2DF0
-data4 0x24C55751, 0x00000000
-data8 0x3FE4866D46236871
-data4 0x24E5BC34, 0x00000000
-data8 0x3FE53CEE45E044B0
-data4 0x24001BA4, 0x00000000
-data8 0x3FE5F74282EC06E4
-data4 0x24B973DC, 0x00000000
-data8 0x3FE6B5A125DF43F9
-data4 0x24895440, 0x00000000
-data8 0x3FE77844CAFD348C
-data4 0x240021CA, 0x00000000
-data8 0x3FE83F6BCEED6B92
-data4 0x24C45372, 0x00000000
-data8 0x3FE90B58A34F3665
-data4 0x240DAD33, 0x00000000
-data8 0x3FE9DC522C1E56B4
-data4 0x24F846CE, 0x00000000
-data8 0x3FEAB2A427041578
-data4 0x2323FB6E, 0x00000000
-data8 0x3FEB8E9F9DD8C373
-data4 0x24B3090B, 0x00000000
-data8 0x3FEC709B65C9AA7B
-data4 0x2449F611, 0x00000000
-data8 0x3FED58F4ACCF8435
-data4 0x23616A7E, 0x00000000
-data8 0x3FEE480F97635082
-data4 0x24C2FEAE, 0x00000000
-data8 0x3FEF3E57F0ACC544
-data4 0x242CE964, 0x00000000
-data8 0x3FF01E20F7E06E4B
-data4 0x2480D3EE, 0x00000000
-data8 0x3FF0A1258A798A69
-data4 0x24DB8967, 0x00000000
-LOCAL_OBJECT_END(tanl_table_tm1)
-
-LOCAL_OBJECT_START(tanl_table_cm2)
+data4    0xBA1BE39E, 0x3FE1CEC4, 0x24B60F9E, 0x00000000
+data4    0x5ABD9B2D, 0x3FE277E4, 0x248C2474, 0x00000000
+data4    0x0272B110, 0x3FE32418, 0x247B8311, 0x00000000
+data4    0x890E2DF0, 0x3FE3D38B, 0x24C55751, 0x00000000
+data4    0x46236871, 0x3FE4866D, 0x24E5BC34, 0x00000000
+data4    0x45E044B0, 0x3FE53CEE, 0x24001BA4, 0x00000000
+data4    0x82EC06E4, 0x3FE5F742, 0x24B973DC, 0x00000000
+data4    0x25DF43F9, 0x3FE6B5A1, 0x24895440, 0x00000000
+data4    0xCAFD348C, 0x3FE77844, 0x240021CA, 0x00000000
+data4    0xCEED6B92, 0x3FE83F6B, 0x24C45372, 0x00000000
+data4    0xA34F3665, 0x3FE90B58, 0x240DAD33, 0x00000000
+data4    0x2C1E56B4, 0x3FE9DC52, 0x24F846CE, 0x00000000
+data4    0x27041578, 0x3FEAB2A4, 0x2323FB6E, 0x00000000
+data4    0x9DD8C373, 0x3FEB8E9F, 0x24B3090B, 0x00000000
+data4    0x65C9AA7B, 0x3FEC709B, 0x2449F611, 0x00000000
+data4    0xACCF8435, 0x3FED58F4, 0x23616A7E, 0x00000000
+data4    0x97635082, 0x3FEE480F, 0x24C2FEAE, 0x00000000
+data4    0xF0ACC544, 0x3FEF3E57, 0x242CE964, 0x00000000
+data4    0xF7E06E4B, 0x3FF01E20, 0x2480D3EE, 0x00000000
+data4    0x8A798A69, 0x3FF0A125, 0x24DB8967, 0x00000000
 //
 //  Entries C_hi   double-precision memory format
 //  Index = 0,1,...,31  B = 2^(-2)*(1+Index/32+1/64)
 //  Entries C_lo  single-precision memory format
 //  Index = 0,1,...,31  B = 2^(-2)*(1+Index/32+1/64)
 //
-data8 0x400ED3E2E63EFBD0
-data4 0x259D94D4, 0x00000000
-data8 0x400DDDB4C515DAB5
-data4 0x245F0537, 0x00000000
-data8 0x400CF57ABE19A79F
-data4 0x25D4EA9F, 0x00000000
-data8 0x400C1A06D15298ED
-data4 0x24AE40A0, 0x00000000
-data8 0x400B4A4C164B2708
-data4 0x25A5AAB6, 0x00000000
-data8 0x400A855A5285B068
-data4 0x25524F18, 0x00000000
-data8 0x4009CA5A3FFA549F
-data4 0x24C999C0, 0x00000000
-data8 0x4009188A646AF623
-data4 0x254FD801, 0x00000000
-data8 0x40086F3C6084D0E7
-data4 0x2560F5FD, 0x00000000
-data8 0x4007CDD2A29A76EE
-data4 0x255B9D19, 0x00000000
-data8 0x400733BE6C8ECA95
-data4 0x25CB021B, 0x00000000
-data8 0x4006A07E1F8DDC52
-data4 0x24AB4722, 0x00000000
-data8 0x4006139BC298AD58
-data4 0x252764E2, 0x00000000
-data8 0x40058CABBAD7164B
-data4 0x24DAF5DB, 0x00000000
-data8 0x40050B4BAE31A5D3
-data4 0x25EA20F4, 0x00000000
-data8 0x40048F2189F85A8A
-data4 0x2583A3E8, 0x00000000
-data8 0x400417DAA862380D
-data4 0x25DCC4CC, 0x00000000
-data8 0x4003A52B1088FCFE
-data4 0x2430A492, 0x00000000
-data8 0x400336CCCD3527D5
-data4 0x255F77CF, 0x00000000
-data8 0x4002CC7F5760766D
-data4 0x25DA0BDA, 0x00000000
-data8 0x4002660711CE02E3
-data4 0x256FF4A2, 0x00000000
-data8 0x4002032CD37BBE04
-data4 0x25208AED, 0x00000000
-data8 0x4001A3BD7F050775
-data4 0x24B72DD6, 0x00000000
-data8 0x40014789A554848A
-data4 0x24AB4DAA, 0x00000000
-data8 0x4000EE65323E81B7
-data4 0x2584C440, 0x00000000
-data8 0x4000982721CF1293
-data4 0x25C9428D, 0x00000000
-data8 0x400044A93D415EEB
-data4 0x25DC8482, 0x00000000
-data8 0x3FFFE78FBD72C577
-data4 0x257F5070, 0x00000000
-data8 0x3FFF4AC375EFD28E
-data4 0x23EBBF7A, 0x00000000
-data8 0x3FFEB2AF60B52DDE
-data4 0x22EECA07, 0x00000000
-data8 0x3FFE1F1935204180
-data4 0x24191079, 0x00000000
-data8 0x3FFD8FCA54F7E60A
-data4 0x248D3058, 0x00000000
-LOCAL_OBJECT_END(tanl_table_cm2)
-
-LOCAL_OBJECT_START(tanl_table_cm1)
+data4    0xE63EFBD0, 0x400ED3E2, 0x259D94D4, 0x00000000
+data4    0xC515DAB5, 0x400DDDB4, 0x245F0537, 0x00000000
+data4    0xBE19A79F, 0x400CF57A, 0x25D4EA9F, 0x00000000
+data4    0xD15298ED, 0x400C1A06, 0x24AE40A0, 0x00000000
+data4    0x164B2708, 0x400B4A4C, 0x25A5AAB6, 0x00000000
+data4    0x5285B068, 0x400A855A, 0x25524F18, 0x00000000
+data4    0x3FFA549F, 0x4009CA5A, 0x24C999C0, 0x00000000
+data4    0x646AF623, 0x4009188A, 0x254FD801, 0x00000000
+data4    0x6084D0E7, 0x40086F3C, 0x2560F5FD, 0x00000000
+data4    0xA29A76EE, 0x4007CDD2, 0x255B9D19, 0x00000000
+data4    0x6C8ECA95, 0x400733BE, 0x25CB021B, 0x00000000
+data4    0x1F8DDC52, 0x4006A07E, 0x24AB4722, 0x00000000
+data4    0xC298AD58, 0x4006139B, 0x252764E2, 0x00000000
+data4    0xBAD7164B, 0x40058CAB, 0x24DAF5DB, 0x00000000
+data4    0xAE31A5D3, 0x40050B4B, 0x25EA20F4, 0x00000000
+data4    0x89F85A8A, 0x40048F21, 0x2583A3E8, 0x00000000
+data4    0xA862380D, 0x400417DA, 0x25DCC4CC, 0x00000000
+data4    0x1088FCFE, 0x4003A52B, 0x2430A492, 0x00000000
+data4    0xCD3527D5, 0x400336CC, 0x255F77CF, 0x00000000
+data4    0x5760766D, 0x4002CC7F, 0x25DA0BDA, 0x00000000
+data4    0x11CE02E3, 0x40026607, 0x256FF4A2, 0x00000000
+data4    0xD37BBE04, 0x4002032C, 0x25208AED, 0x00000000
+data4    0x7F050775, 0x4001A3BD, 0x24B72DD6, 0x00000000
+data4    0xA554848A, 0x40014789, 0x24AB4DAA, 0x00000000
+data4    0x323E81B7, 0x4000EE65, 0x2584C440, 0x00000000
+data4    0x21CF1293, 0x40009827, 0x25C9428D, 0x00000000
+data4    0x3D415EEB, 0x400044A9, 0x25DC8482, 0x00000000
+data4    0xBD72C577, 0x3FFFE78F, 0x257F5070, 0x00000000
+data4    0x75EFD28E, 0x3FFF4AC3, 0x23EBBF7A, 0x00000000
+data4    0x60B52DDE, 0x3FFEB2AF, 0x22EECA07, 0x00000000
+data4    0x35204180, 0x3FFE1F19, 0x24191079, 0x00000000
+data4    0x54F7E60A, 0x3FFD8FCA, 0x248D3058, 0x00000000
 //
 //  Entries C_hi   double-precision memory format
 //  Index = 0,1,...,19  B = 2^(-1)*(1+Index/32+1/64)
 //  Entries C_lo  single-precision memory format
 //  Index = 0,1,...,19  B = 2^(-1)*(1+Index/32+1/64)
 //
-data8 0x3FFCC06A79F6FADE
-data4 0x239C7886, 0x00000000
-data8 0x3FFBB91F891662A6
-data4 0x250BD191, 0x00000000
-data8 0x3FFABFB6529F155D
-data4 0x256CC3E6, 0x00000000
-data8 0x3FF9D3002E964AE9
-data4 0x250843E3, 0x00000000
-data8 0x3FF8F1EF89DCB383
-data4 0x2277C87E, 0x00000000
-data8 0x3FF81B937C87DBD6
-data4 0x256DA6CF, 0x00000000
-data8 0x3FF74F141042EDE4
-data4 0x2573D28A, 0x00000000
-data8 0x3FF68BAF1784B360
-data4 0x242E489A, 0x00000000
-data8 0x3FF5D0B57C923C4C
-data4 0x2532D940, 0x00000000
-data8 0x3FF51D88F418EF20
-data4 0x253C7DD6, 0x00000000
-data8 0x3FF4719A02F88DAE
-data4 0x23DB59BF, 0x00000000
-data8 0x3FF3CC6649DA0788
-data4 0x252B4756, 0x00000000
-data8 0x3FF32D770B980DB8
-data4 0x23FE585F, 0x00000000
-data8 0x3FF2945FE56C987A
-data4 0x25378A63, 0x00000000
-data8 0x3FF200BDB16523F6
-data4 0x247BB2E0, 0x00000000
-data8 0x3FF172358CE27778
-data4 0x24446538, 0x00000000
-data8 0x3FF0E873FDEFE692
-data4 0x2514638F, 0x00000000
-data8 0x3FF0632C33154062
-data4 0x24A7FC27, 0x00000000
-data8 0x3FEFC42EB3EF115F
-data4 0x248FD0FE, 0x00000000
-data8 0x3FEEC9E8135D26F6
-data4 0x2385C719, 0x00000000
-LOCAL_OBJECT_END(tanl_table_cm1)
-
-LOCAL_OBJECT_START(tanl_table_scim2)
+data4    0x79F6FADE, 0x3FFCC06A, 0x239C7886, 0x00000000
+data4    0x891662A6, 0x3FFBB91F, 0x250BD191, 0x00000000
+data4    0x529F155D, 0x3FFABFB6, 0x256CC3E6, 0x00000000
+data4    0x2E964AE9, 0x3FF9D300, 0x250843E3, 0x00000000
+data4    0x89DCB383, 0x3FF8F1EF, 0x2277C87E, 0x00000000
+data4    0x7C87DBD6, 0x3FF81B93, 0x256DA6CF, 0x00000000
+data4    0x1042EDE4, 0x3FF74F14, 0x2573D28A, 0x00000000
+data4    0x1784B360, 0x3FF68BAF, 0x242E489A, 0x00000000
+data4    0x7C923C4C, 0x3FF5D0B5, 0x2532D940, 0x00000000
+data4    0xF418EF20, 0x3FF51D88, 0x253C7DD6, 0x00000000
+data4    0x02F88DAE, 0x3FF4719A, 0x23DB59BF, 0x00000000
+data4    0x49DA0788, 0x3FF3CC66, 0x252B4756, 0x00000000
+data4    0x0B980DB8, 0x3FF32D77, 0x23FE585F, 0x00000000
+data4    0xE56C987A, 0x3FF2945F, 0x25378A63, 0x00000000
+data4    0xB16523F6, 0x3FF200BD, 0x247BB2E0, 0x00000000
+data4    0x8CE27778, 0x3FF17235, 0x24446538, 0x00000000
+data4    0xFDEFE692, 0x3FF0E873, 0x2514638F, 0x00000000
+data4    0x33154062, 0x3FF0632C, 0x24A7FC27, 0x00000000
+data4    0xB3EF115F, 0x3FEFC42E, 0x248FD0FE, 0x00000000
+data4    0x135D26F6, 0x3FEEC9E8, 0x2385C719, 0x00000000
 //
 //  Entries SC_inv in Swapped IEEE format (extended)
 //  Index = 0,1,...,31  B = 2^(-2)*(1+Index/32+1/64)
 //
-data8    0x839D6D4A1BF30C9E, 0x00004001
-data8    0x80092804554B0EB0, 0x00004001
-data8    0xF959F94CA1CF0DE9, 0x00004000
-data8    0xF3086BA077378677, 0x00004000
-data8    0xED154515CCD4723C, 0x00004000
-data8    0xE77909441C27CF25, 0x00004000
-data8    0xE22D037D8DDACB88, 0x00004000
-data8    0xDD2B2D8A89C73522, 0x00004000
-data8    0xD86E1A23BB2C1171, 0x00004000
-data8    0xD3F0E288DFF5E0F9, 0x00004000
-data8    0xCFAF16B1283BEBD5, 0x00004000
-data8    0xCBA4AFAA0D88DD53, 0x00004000
-data8    0xC7CE03CCCA67C43D, 0x00004000
-data8    0xC427BC820CA0DDB0, 0x00004000
-data8    0xC0AECD57F13D8CAB, 0x00004000
-data8    0xBD606C3871ECE6B1, 0x00004000
-data8    0xBA3A0A96A44C4929, 0x00004000
-data8    0xB7394F6FE5CCCEC1, 0x00004000
-data8    0xB45C12039637D8BC, 0x00004000
-data8    0xB1A0552892CB051B, 0x00004000
-data8    0xAF04432B6BA2FFD0, 0x00004000
-data8    0xAC862A237221235F, 0x00004000
-data8    0xAA2478AF5F00A9D1, 0x00004000
-data8    0xA7DDBB0C81E082BF, 0x00004000
-data8    0xA5B0987D45684FEE, 0x00004000
-data8    0xA39BD0F5627A8F53, 0x00004000
-data8    0xA19E3B036EC5C8B0, 0x00004000
-data8    0x9FB6C1F091CD7C66, 0x00004000
-data8    0x9DE464101FA3DF8A, 0x00004000
-data8    0x9C263139A8F6B888, 0x00004000
-data8    0x9A7B4968C27B0450, 0x00004000
-data8    0x98E2DB7E5EE614EE, 0x00004000
-LOCAL_OBJECT_END(tanl_table_scim2)
-
-LOCAL_OBJECT_START(tanl_table_scim1)
+data4    0x1BF30C9E, 0x839D6D4A, 0x00004001, 0x00000000
+data4    0x554B0EB0, 0x80092804, 0x00004001, 0x00000000
+data4    0xA1CF0DE9, 0xF959F94C, 0x00004000, 0x00000000
+data4    0x77378677, 0xF3086BA0, 0x00004000, 0x00000000
+data4    0xCCD4723C, 0xED154515, 0x00004000, 0x00000000
+data4    0x1C27CF25, 0xE7790944, 0x00004000, 0x00000000
+data4    0x8DDACB88, 0xE22D037D, 0x00004000, 0x00000000
+data4    0x89C73522, 0xDD2B2D8A, 0x00004000, 0x00000000
+data4    0xBB2C1171, 0xD86E1A23, 0x00004000, 0x00000000
+data4    0xDFF5E0F9, 0xD3F0E288, 0x00004000, 0x00000000
+data4    0x283BEBD5, 0xCFAF16B1, 0x00004000, 0x00000000
+data4    0x0D88DD53, 0xCBA4AFAA, 0x00004000, 0x00000000
+data4    0xCA67C43D, 0xC7CE03CC, 0x00004000, 0x00000000
+data4    0x0CA0DDB0, 0xC427BC82, 0x00004000, 0x00000000
+data4    0xF13D8CAB, 0xC0AECD57, 0x00004000, 0x00000000
+data4    0x71ECE6B1, 0xBD606C38, 0x00004000, 0x00000000
+data4    0xA44C4929, 0xBA3A0A96, 0x00004000, 0x00000000
+data4    0xE5CCCEC1, 0xB7394F6F, 0x00004000, 0x00000000
+data4    0x9637D8BC, 0xB45C1203, 0x00004000, 0x00000000
+data4    0x92CB051B, 0xB1A05528, 0x00004000, 0x00000000
+data4    0x6BA2FFD0, 0xAF04432B, 0x00004000, 0x00000000
+data4    0x7221235F, 0xAC862A23, 0x00004000, 0x00000000
+data4    0x5F00A9D1, 0xAA2478AF, 0x00004000, 0x00000000
+data4    0x81E082BF, 0xA7DDBB0C, 0x00004000, 0x00000000
+data4    0x45684FEE, 0xA5B0987D, 0x00004000, 0x00000000
+data4    0x627A8F53, 0xA39BD0F5, 0x00004000, 0x00000000
+data4    0x6EC5C8B0, 0xA19E3B03, 0x00004000, 0x00000000
+data4    0x91CD7C66, 0x9FB6C1F0, 0x00004000, 0x00000000
+data4    0x1FA3DF8A, 0x9DE46410, 0x00004000, 0x00000000
+data4    0xA8F6B888, 0x9C263139, 0x00004000, 0x00000000
+data4    0xC27B0450, 0x9A7B4968, 0x00004000, 0x00000000
+data4    0x5EE614EE, 0x98E2DB7E, 0x00004000, 0x00000000
 //
 //  Entries SC_inv in Swapped IEEE format (extended)
 //  Index = 0,1,...,19  B = 2^(-1)*(1+Index/32+1/64)
 //
-data8    0x969F335C13B2B5BA, 0x00004000
-data8    0x93D446D9D4C0F548, 0x00004000
-data8    0x9147094F61B798AF, 0x00004000
-data8    0x8EF317CC758787AC, 0x00004000
-data8    0x8CD498B3B99EEFDB, 0x00004000
-data8    0x8AE82A7DDFF8BC37, 0x00004000
-data8    0x892AD546E3C55D42, 0x00004000
-data8    0x8799FEA9D15573C1, 0x00004000
-data8    0x86335F88435A4B4C, 0x00004000
-data8    0x84F4FB6E3E93A87B, 0x00004000
-data8    0x83DD195280A382FB, 0x00004000
-data8    0x82EA3D7FA4CB8C9E, 0x00004000
-data8    0x821B247C6861D0A8, 0x00004000
-data8    0x816EBED163E8D244, 0x00004000
-data8    0x80E42D9127E4CFC6, 0x00004000
-data8    0x807ABF8D28E64AFD, 0x00004000
-data8    0x8031EF26863B4FD8, 0x00004000
-data8    0x800960ADAE8C11FD, 0x00004000
-data8    0x8000E1475FDBEC21, 0x00004000
-data8    0x80186650A07791FA, 0x00004000
-LOCAL_OBJECT_END(tanl_table_scim1)
-
-Arg                 = f8
-Save_Norm_Arg       = f8        // For input to reduction routine
+data4    0x13B2B5BA, 0x969F335C, 0x00004000, 0x00000000
+data4    0xD4C0F548, 0x93D446D9, 0x00004000, 0x00000000
+data4    0x61B798AF, 0x9147094F, 0x00004000, 0x00000000
+data4    0x758787AC, 0x8EF317CC, 0x00004000, 0x00000000
+data4    0xB99EEFDB, 0x8CD498B3, 0x00004000, 0x00000000
+data4    0xDFF8BC37, 0x8AE82A7D, 0x00004000, 0x00000000
+data4    0xE3C55D42, 0x892AD546, 0x00004000, 0x00000000
+data4    0xD15573C1, 0x8799FEA9, 0x00004000, 0x00000000
+data4    0x435A4B4C, 0x86335F88, 0x00004000, 0x00000000
+data4    0x3E93A87B, 0x84F4FB6E, 0x00004000, 0x00000000
+data4    0x80A382FB, 0x83DD1952, 0x00004000, 0x00000000
+data4    0xA4CB8C9E, 0x82EA3D7F, 0x00004000, 0x00000000
+data4    0x6861D0A8, 0x821B247C, 0x00004000, 0x00000000
+data4    0x63E8D244, 0x816EBED1, 0x00004000, 0x00000000
+data4    0x27E4CFC6, 0x80E42D91, 0x00004000, 0x00000000
+data4    0x28E64AFD, 0x807ABF8D, 0x00004000, 0x00000000
+data4    0x863B4FD8, 0x8031EF26, 0x00004000, 0x00000000
+data4    0xAE8C11FD, 0x800960AD, 0x00004000, 0x00000000
+data4    0x5FDBEC21, 0x8000E147, 0x00004000, 0x00000000
+data4    0xA07791FA, 0x80186650, 0x00004000, 0x00000000
+ASM_SIZE_DIRECTIVE(TANL_BASE_CONSTANTS)
+
+Arg                 = f8   
 Result              = f8
-r                   = f8        // For output from reduction routine
-c                   = f9        // For output from reduction routine
+fp_tmp              = f9
 U_2                 = f10
-rsq                 = f11
+rsq                =  f11
 C_hi                = f12
 C_lo                = f13
 T_hi                = f14
 T_lo                = f15
 
+N_0                 = f32
 d_1                 = f33
-N_0                 = f34
+MPI_BY_4            = f34
 tail                = f35
 tanx                = f36
 Cx                  = f37
@@ -1104,6 +949,8 @@ P1_7                = f51
 P1_8                = f52
 P1_9                = f53
 
+TWO_TO_63           = f54
+NEGTWO_TO_63        = f55
 x                   = f56
 xsq                 = f57
 Tx                  = f58
@@ -1119,10 +966,12 @@ B                   = f67
 SC_inv              = f68
 Pos_r               = f69
 N_0_fix             = f70
-d_2                 = f71
-PI_BY_4             = f72
+PI_BY_4             = f71
+NEGTWO_TO_NEG2      = f72
+TWO_TO_24           = f73
 TWO_TO_NEG14        = f74
 TWO_TO_NEG33        = f75
+NEGTWO_TO_24        = f76
 NEGTWO_TO_NEG14     = f76
 NEGTWO_TO_NEG33     = f77
 two_by_PI           = f78
@@ -1133,14 +982,13 @@ P_2                 = f82
 P_3                 = f83
 s_val               = f84
 w                   = f85
-B_mask1             = f86
-B_mask2             = f87
-w2                  = f88
+c                   = f86
+r                   = f87
 A                   = f89
 a                   = f90
 t                   = f91
 U_1                 = f92
-NEGTWO_TO_NEG2      = f93
+d_2                 = f93
 TWO_TO_NEG2         = f94
 Q1_1                = f95
 Q1_2                = f96
@@ -1161,641 +1009,609 @@ V_hiabs             = f110
 V                   = f111
 Inv_P_0             = f112
 
-FR_inv_pi_2to63     = f113
-FR_rshf_2to64       = f114
-FR_2tom64           = f115
-FR_rshf             = f116
-Norm_Arg            = f117
-Abs_Arg             = f118
-TWO_TO_NEG65        = f119
-fp_tmp              = f120
-mOne                = f121
-
-GR_sig_inv_pi  = r14
-GR_rshf_2to64  = r15
-GR_exp_2tom64  = r16
-GR_rshf        = r17
-GR_exp_2_to_63 = r18
-GR_exp_2_to_24 = r19
-GR_signexp_x   = r20
-GR_exp_x       = r21
-GR_exp_mask    = r22
-GR_exp_2tom14  = r23
-GR_exp_m2tom14 = r24
-GR_exp_2tom33  = r25
-GR_exp_m2tom33 = r26
-
 GR_SAVE_B0     = r33
 GR_SAVE_GP     = r34
 GR_SAVE_PFS    = r35
-table_base     = r36
+delta1         = r36
 table_ptr1     = r37
 table_ptr2     = r38
-table_ptr3     = r39
-lookup         = r40
-N_fix_gr       = r41
-GR_exp_2tom2   = r42
-GR_exp_2tom65  = r43
-exp_r          = r44
-sig_r          = r45
-bmask1         = r46
-table_offset   = r47
-bmask2         = r48
+i_0            = r39
+i_1            = r40 
+N_fix_gr       = r41 
+N_inc          = r42 
+exp_Arg        = r43 
+exp_r          = r44 
+sig_r          = r45 
+lookup         = r46   
+table_offset   = r47 
+Create_B       = r48 
 gr_tmp         = r49
-cot_flag       = r50
-
-GR_SAVE_B0                  = r51
-GR_SAVE_PFS                 = r52
-GR_SAVE_GP                  = r53
-GR_Parameter_X              = r54
-GR_Parameter_Y              = r55
-GR_Parameter_RESULT         = r56
-GR_Parameter_Tag            = r57
-
 
 .section .text
-.global __libm_tanl#
-.global __libm_cotl#
-
-.proc __libm_cotl#
-__libm_cotl:
-.endp __libm_cotl#
-LOCAL_LIBM_ENTRY(cotl)
-
-{ .mlx
-      alloc r32 = ar.pfs, 0,22,4,0
-      movl GR_sig_inv_pi = 0xa2f9836e4e44152a // significand of 1/pi
-}
-{ .mlx
-      mov GR_exp_mask = 0x1ffff            // Exponent mask
-      movl GR_rshf_2to64 = 0x47e8000000000000 // 1.1000 2^(63+64)
-}
-;;
-
-//     Check for NatVals, Infs , NaNs, and Zeros
-{ .mfi
-      getf.exp GR_signexp_x = Arg          // Get sign and exponent of x
-      fclass.m  p6,p0 = Arg, 0x1E7         // Test for natval, nan, inf, zero
-      mov cot_flag = 0x1
-}
-{ .mfb
-      addl table_base = @ltoff(TANL_BASE_CONSTANTS), gp // Pointer to table ptr
-      fnorm.s1 Norm_Arg = Arg              // Normalize x
-      br.cond.sptk COMMON_PATH
+.global tanl
+.proc tanl
+tanl:
+#ifdef _LIBC
+.global __tanl
+.proc __tanl
+__tanl:
+#endif
+{ .mfi
+alloc r32 = ar.pfs, 0,17,2,0
+(p0)   fclass.m.unc  p6,p0 = Arg, 0x1E7
+      addl gr_tmp = -1,r0             
+}
+{ .mfi
+  nop.m 0
+(p0)   fclass.nm.unc  p7,p0 = Arg, 0x1FF
+  nop.i 0
 };;
 
-LOCAL_LIBM_END(cotl)
-
-.proc __libm_tanl#
-__libm_tanl:
-.endp __libm_tanl#
-GLOBAL_IEEE754_ENTRY(tanl)
-
-{ .mlx
-      alloc r32 = ar.pfs, 0,22,4,0
-      movl GR_sig_inv_pi = 0xa2f9836e4e44152a // significand of 1/pi
-}
-{ .mlx
-      mov GR_exp_mask = 0x1ffff            // Exponent mask
-      movl GR_rshf_2to64 = 0x47e8000000000000 // 1.1000 2^(63+64)
-}
-;;
-
-//     Check for NatVals, Infs , NaNs, and Zeros
 { .mfi
-      getf.exp GR_signexp_x = Arg          // Get sign and exponent of x
-      fclass.m  p6,p0 = Arg, 0x1E7         // Test for natval, nan, inf, zero
-      mov cot_flag = 0x0
-}
-{ .mfi
-      addl table_base = @ltoff(TANL_BASE_CONSTANTS), gp // Pointer to table ptr
-      fnorm.s1 Norm_Arg = Arg              // Normalize x
+(p0)  addl           table_ptr1   = @ltoff(TANL_BASE_CONSTANTS), gp
+	nop.f 999
       nop.i 0
-};;
-
-// Common path for both tanl and cotl
-COMMON_PATH:
-{ .mfi
-      setf.sig FR_inv_pi_2to63 = GR_sig_inv_pi // Form 1/pi * 2^63
-      fclass.m p9, p0 = Arg, 0x0b          // Test x denormal
-      mov GR_exp_2tom64 = 0xffff - 64      // Scaling constant to compute N
-}
-{ .mlx
-      setf.d FR_rshf_2to64 = GR_rshf_2to64 // Form const 1.1000 * 2^(63+64)
-      movl GR_rshf = 0x43e8000000000000    // Form const 1.1000 * 2^63
 }
 ;;
-
-// Check for everything - if false, then must be pseudo-zero or pseudo-nan.
-// Branch out to deal with special values.
-{ .mfi
-      addl gr_tmp = -1,r0
-      fclass.nm  p7,p0 = Arg, 0x1FF        // Test x unsupported
-      mov GR_exp_2_to_63 = 0xffff + 63     // Exponent of 2^63
-}
-{ .mfb
-      ld8 table_base = [table_base]        // Get pointer to constant table
-      fms.s1 mOne = f0, f0, f1
-(p6)  br.cond.spnt TANL_SPECIAL            // Branch if x natval, nan, inf, zero
-}
-;;
-
-{ .mmb
+{ .mmi
+(p0)  ld8 table_ptr1 = [table_ptr1]
       setf.sig fp_tmp = gr_tmp   // Make a constant so fmpy produces inexact
-      mov GR_exp_2_to_24 = 0xffff + 24     // Exponent of 2^24
-(p9)  br.cond.spnt TANL_DENORMAL           // Branch if x denormal
+      nop.i 999
 }
 ;;
 
-TANL_COMMON:
-// Return to here if x denormal
 //
-// Do fcmp to generate Denormal exception
-//  - can't do FNORM (will generate Underflow when U is unmasked!)
-// Branch out to deal with unsupporteds values.
-{ .mfi
-      setf.exp FR_2tom64 = GR_exp_2tom64 // Form 2^-64 for scaling N_float
-      fcmp.eq.s0 p0, p6 = Arg, f1        // Dummy to flag denormals
-      add table_ptr1 = 0, table_base     // Point to tanl_table_1
+//     Check for NatVals, Infs , NaNs, and Zeros 
+//     Check for everything - if false, then must be pseudo-zero
+//     or pseudo-nan.
+//     Local table pointer
+//
+{ .mbb
+(p0)   add table_ptr2 = 96, table_ptr1
+(p6)   br.cond.spnt L(TANL_SPECIAL) 
+(p7)   br.cond.spnt L(TANL_SPECIAL) ;;
 }
-{ .mib
-      setf.d FR_rshf = GR_rshf           // Form right shift const 1.1000 * 2^63
-      add table_ptr2 = 80, table_base    // Point to tanl_table_2
-(p7)  br.cond.spnt TANL_UNSUPPORTED      // Branch if x unsupported type
+//
+//     Point to Inv_P_0
+//     Branch out to deal with unsupporteds and special values. 
+//
+{ .mmf
+(p0)   ldfs TWO_TO_24 = [table_ptr1],4
+(p0)   ldfs TWO_TO_63 = [table_ptr2],4
+//
+//     Load -2**24, load -2**63.
+//
+(p0)   fcmp.eq.s0 p0, p6 = Arg, f1 ;;
 }
-;;
-
 { .mfi
-      and GR_exp_x = GR_exp_mask, GR_signexp_x // Get exponent of x
-      fmpy.s1 Save_Norm_Arg = Norm_Arg, f1     // Save x if large arg reduction
-      dep.z bmask1 = 0x7c, 56, 8               // Form mask to get 5 msb of r
-                                               // bmask1 = 0x7c00000000000000
+(p0)   ldfs NEGTWO_TO_63 = [table_ptr2],12
+(p0)   fnorm.s1     Arg = Arg
+	nop.i 999
+}
+//
+//     Load 2**24, Load 2**63.
+//
+{ .mmi
+(p0)   ldfs NEGTWO_TO_24 = [table_ptr1],12 ;;
+//
+//     Do fcmp to generate Denormal exception 
+//     - can't do FNORM (will generate Underflow when U is unmasked!)
+//     Normalize input argument.
+//
+(p0)   ldfe two_by_PI = [table_ptr1],16
+	nop.i 999
+}
+{ .mmi
+(p0)   ldfe Inv_P_0 = [table_ptr2],16 ;;
+(p0)   ldfe d_1 = [table_ptr2],16
+	nop.i 999
 }
-;;
-
 //
 //     Decide about the paths to take:
-//     Set PR_6 if |Arg| >= 2**63
-//     Set PR_9 if |Arg| < 2**24 - CASE 1 OR 2
-//     OTHERWISE Set PR_8 - CASE 3 OR 4
+//     PR_1 and PR_3 set if -2**24 < Arg < 2**24 - CASE 1 OR 2
+//     OTHERWISE - CASE 3 OR 4
+//     Load inverse of P_0 .
+//     Set PR_6 if Arg <= -2**63
+//     Are there any Infs, NaNs, or zeros?
 //
-//     Branch out if the magnitude of the input argument is >= 2^63
-//     - do this branch before the next.
-{ .mfi
-      ldfe two_by_PI = [table_ptr1],16        // Load 2/pi
-      nop.f 999
-      dep.z bmask2 = 0x41, 57, 7              // Form mask to OR to produce B
-                                              // bmask2 = 0x8200000000000000
+{ .mmi
+(p0)   ldfe P_0 = [table_ptr1],16 ;;
+(p0)   ldfe d_2 = [table_ptr2],16
+	nop.i 999
 }
-{ .mib
-      ldfe PI_BY_4 = [table_ptr2],16          // Load pi/4
-      cmp.ge p6,p0 = GR_exp_x, GR_exp_2_to_63 // Is |x| >= 2^63
-(p6)  br.cond.spnt TANL_ARG_TOO_LARGE         // Branch if |x| >= 2^63
+//
+//     Set PR_8 if Arg <= -2**24
+//     Set PR_6 if Arg >=  2**63
+//
+{ .mmi
+(p0)   ldfe P_1 = [table_ptr1],16 ;;
+(p0)   ldfe PI_BY_4 = [table_ptr2],16
+	nop.i 999
 }
-;;
-
+//
+//     Set PR_8 if Arg >= 2**24
+//
 { .mmi
-      ldfe P_0 = [table_ptr1],16              // Load P_0
-      ldfe Inv_P_0 = [table_ptr2],16          // Load Inv_P_0
-      nop.i 999
+(p0)   ldfe P_2 = [table_ptr1],16 ;;
+(p0)   ldfe   MPI_BY_4 = [table_ptr2],16
+	nop.i 999
 }
-;;
-
+//
+//     Load  P_2 and PI_BY_4
+//
 { .mfi
-      ldfe P_1 = [table_ptr1],16              // Load P_1
-      fmerge.s Abs_Arg = f0, Norm_Arg         // Get |x|
-      mov GR_exp_m2tom33 = 0x2ffff - 33       // Form signexp of -2^-33
+(p0)   ldfe   P_3 = [table_ptr1],16
+	nop.f 999
+	nop.i 999 ;;
 }
 { .mfi
-      ldfe d_1 = [table_ptr2],16              // Load d_1 for 2^24 <= |x| < 2^63
-      nop.f 999
-      mov GR_exp_2tom33 = 0xffff - 33         // Form signexp of 2^-33
+	nop.m 999
+(p0)   fcmp.le.unc.s1 p6,p7 = Arg,NEGTWO_TO_63
+	nop.i 999
 }
-;;
-
-{ .mmi
-      ldfe P_2 = [table_ptr1],16              // Load P_2
-      ldfe d_2 = [table_ptr2],16              // Load d_2 for 2^24 <= |x| < 2^63
-      cmp.ge p8,p0 = GR_exp_x, GR_exp_2_to_24 // Is |x| >= 2^24
+{ .mfi
+	nop.m 999
+(p0)   fcmp.le.unc.s1 p8,p9 = Arg,NEGTWO_TO_24
+	nop.i 999 ;;
 }
-;;
-
-// Use special scaling to right shift so N=Arg * 2/pi is in rightmost bits
-// Branch to Cases 3 or 4 if Arg <= -2**24 or Arg >= 2**24
-{ .mfb
-      ldfe   P_3 = [table_ptr1],16            // Load P_3
-      fma.s1      N_fix = Norm_Arg, FR_inv_pi_2to63, FR_rshf_2to64
-(p8)  br.cond.spnt TANL_LARGER_ARG            // Branch if 2^24 <= |x| < 2^63
+{ .mfi
+	nop.m 999
+(p7)   fcmp.ge.s1 p6,p0 = Arg,TWO_TO_63
+	nop.i 999
 }
-;;
-
-// Here if 0 < |x| < 2^24
-//     ARGUMENT REDUCTION CODE - CASE 1 and 2
+{ .mfi
+	nop.m 999
+(p9)   fcmp.ge.s1 p8,p0 = Arg,TWO_TO_24
+	nop.i 999 ;;
+}
+{ .mib
+	nop.m 999
+	nop.i 999
 //
-{ .mmf
-      setf.exp TWO_TO_NEG33 = GR_exp_2tom33      // Form 2^-33
-      setf.exp NEGTWO_TO_NEG33 = GR_exp_m2tom33  // Form -2^-33
-      fmerge.s r = Norm_Arg,Norm_Arg          // Assume r=x, ok if |x| < pi/4
+//     Load  P_3 and -PI_BY_4
+//
+(p6)   br.cond.spnt L(TANL_ARG_TOO_LARGE) ;;
 }
-;;
-
+{ .mib
+	nop.m 999
+	nop.i 999
 //
-// If |Arg| < pi/4,  set PR_8, else  pi/4 <=|Arg| < 2^24 - set PR_9.
+//     Load 2**(-2).
+//     Load -2**(-2).
+//     Branch out if we have a special argument.
+//     Branch out if the magnitude of the input argument is too large
+//     - do this branch before the next.
+//
+(p8)   br.cond.spnt L(TANL_LARGER_ARG) ;;
+}
+//
+//     Branch to Cases 3 or 4 if Arg <= -2**24 or Arg >= 2**24
 //
-//     Case 2: Convert integer N_fix back to normalized floating-point value.
 { .mfi
-      getf.sig sig_r = Norm_Arg               // Get sig_r if 1/4 <= |x| < pi/4
-      fcmp.lt.s1 p8,p9= Abs_Arg,PI_BY_4       // Test |x| < pi/4
-      mov GR_exp_2tom2 = 0xffff - 2           // Form signexp of 2^-2
+(p0)   ldfs TWO_TO_NEG2 = [table_ptr2],4
+//     ARGUMENT REDUCTION CODE - CASE 1 and 2
+//     Load 2**(-2).
+//     Load -2**(-2).
+(p0)   fmpy.s1 N = Arg,two_by_PI
+	nop.i 999 ;;
 }
 { .mfi
-      ldfps TWO_TO_NEG2, NEGTWO_TO_NEG2 = [table_ptr2] // Load 2^-2, -2^-2
-      fms.s1 N = N_fix, FR_2tom64, FR_rshf    // Use scaling to get N floated
-      mov N_fix_gr = r0                       // Assume N=0, ok if |x| < pi/4
+(p0)   ldfs NEGTWO_TO_NEG2 = [table_ptr2],12
+//
+//     N = Arg * 2/pi
+//
+(p0)   fcmp.lt.unc.s1 p8,p9= Arg,PI_BY_4
+	nop.i 999 ;;
+}
+{ .mfi
+	nop.m 999
+//
+//     if Arg < pi/4,  set PR_8.
+//
+(p8)   fcmp.gt.s1 p8,p9= Arg,MPI_BY_4
+	nop.i 999 ;;
 }
-;;
-
 //
 //     Case 1: Is |r| < 2**(-2).
 //     Arg is the same as r in this case.
 //     r = Arg
 //     c = 0
 //
-//     Case 2: Place integer part of N in GP register.
 { .mfi
-(p9)  getf.sig N_fix_gr = N_fix
-      fmerge.s c = f0, f0                     // Assume c=0, ok if |x| < pi/4
-      cmp.lt p10, p0 = GR_exp_x, GR_exp_2tom2 // Test if |x| < 1/4
+(p8)   mov N_fix_gr = r0
+//
+//     if Arg > -pi/4, reset PR_8.
+//     Select the case when |Arg| < pi/4 - set PR[8] = true.
+//     Else Select the case when |Arg| >= pi/4 - set PR[9] = true.
+//
+(p0)   fcvt.fx.s1 N_fix = N
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      setf.sig B_mask1 = bmask1               // Form mask to get 5 msb of r
-      nop.f 999
-      mov exp_r = GR_exp_x                    // Get exp_r if 1/4 <= |x| < pi/4
+	nop.m 999
+//
+//     Grab the integer part of N .
+//
+(p8)   mov r = Arg
+	nop.i 999
 }
-{ .mbb
-      setf.sig B_mask2 = bmask2               // Form mask to form B from r
-(p10) br.cond.spnt TANL_SMALL_R               // Branch if 0 < |x| < 1/4
-(p8)  br.cond.spnt TANL_NORMAL_R              // Branch if 1/4 <= |x| < pi/4
+{ .mfi
+	nop.m 999
+(p8)   mov c = f0
+	nop.i 999 ;;
+}
+{ .mfi
+	nop.m 999
+(p8)   fcmp.lt.unc.s1 p10, p11 = Arg, TWO_TO_NEG2
+	nop.i 999 ;;
+}
+{ .mfi
+	nop.m 999
+(p10)  fcmp.gt.s1 p10,p0 = Arg, NEGTWO_TO_NEG2
+	nop.i 999 ;;
+}
+{ .mfi
+	nop.m 999
+//
+//     Case 2: Place integer part of N in GP register.
+//
+(p9)   fcvt.xf N = N_fix
+	nop.i 999 ;;
+}
+{ .mib
+(p9)   getf.sig N_fix_gr = N_fix
+	nop.i 999
+//
+//     Case 2: Convert integer N_fix back to normalized floating-point value.
+//
+(p10)  br.cond.spnt L(TANL_SMALL_R) ;;
+}
+{ .mib
+	nop.m 999
+	nop.i 999
+(p8)   br.cond.sptk L(TANL_NORMAL_R) ;;
 }
-;;
-
-// Here if pi/4 <= |x| < 2^24
 //
 //     Case 1: PR_3 is only affected  when PR_1 is set.
 //
+{ .mmi
+(p9)   ldfs TWO_TO_NEG33 = [table_ptr2], 4 ;;
 //
-//     Case 2: w = N * P_2
-//     Case 2: s_val = -N * P_1  + Arg
+//     Case 2: Load 2**(-33).
 //
-
-{ .mfi
-      nop.m 999
-      fnma.s1 s_val = N, P_1, Norm_Arg
-      nop.i 999
+(p9)   ldfs NEGTWO_TO_NEG33 = [table_ptr2], 4
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-      fmpy.s1 w = N, P_2                     // w = N * P_2 for |s| >= 2^-33
-      nop.i 999
+	nop.m 999
+//
+//     Case 2: Load -2**(-33).
+//
+(p9)   fnma.s1 s_val = N, P_1, Arg
+	nop.i 999
 }
-;;
-
-//     Case 2_reduce: w = N * P_3 (change sign)
 { .mfi
-      nop.m 999
-      fmpy.s1 w2 = N, P_3                    // w = N * P_3 for |s| < 2^-33
-      nop.i 999
+	nop.m 999
+(p9)   fmpy.s1 w = N, P_2
+	nop.i 999 ;;
 }
-;;
-
-//     Case 1_reduce: r = s + w (change sign)
 { .mfi
-      nop.m 999
-      fsub.s1 r = s_val, w                   // r = s_val - w for |s| >= 2^-33
-      nop.i 999
+	nop.m 999
+//
+//     Case 2: w = N * P_2
+//     Case 2: s_val = -N * P_1  + Arg
+//
+(p0)   fcmp.lt.unc.s1 p9,p8 = s_val, TWO_TO_NEG33
+	nop.i 999 ;;
 }
-;;
-
-//     Case 2_reduce: U_1 = N * P_2 + w
 { .mfi
-      nop.m 999
-      fma.s1  U_1 = N, P_2, w2              // U_1 = N * P_2 + w for |s| < 2^-33
-      nop.i 999
-}
-;;
-
+	nop.m 999
 //
 //     Decide between case_1 and case_2 reduce:
-//     Case 1_reduce:  |s| >= 2**(-33)
-//     Case 2_reduce:  |s| < 2**(-33)
 //
-{ .mfi
-      nop.m 999
-      fcmp.lt.s1 p9, p8 = s_val, TWO_TO_NEG33
-      nop.i 999
+(p9)   fcmp.gt.s1 p9, p8 = s_val, NEGTWO_TO_NEG33
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-(p9)  fcmp.gt.s1 p9, p8 = s_val, NEGTWO_TO_NEG33
-      nop.i 999
+	nop.m 999
+//
+//     Case 1_reduce:  s <= -2**(-33) or s >= 2**(-33)
+//     Case 2_reduce: -2**(-33) < s < 2**(-33)
+//
+(p8)   fsub.s1 r = s_val, w
+	nop.i 999
 }
-;;
-
-//     Case 1_reduce: c = s - r
 { .mfi
-      nop.m 999
-      fsub.s1 c = s_val, r                     // c = s_val - r for |s| >= 2^-33
-      nop.i 999
+	nop.m 999
+(p9)   fmpy.s1 w = N, P_3
+	nop.i 999 ;;
 }
-;;
-
-//     Case 2_reduce: r is complete here - continue to calculate c .
-//     r = s - U_1
 { .mfi
-      nop.m 999
-(p9)  fsub.s1 r = s_val, U_1
-      nop.i 999
+	nop.m 999
+(p9)   fma.s1  U_1 = N, P_2, w
+	nop.i 999
 }
 { .mfi
-      nop.m 999
-(p9)  fms.s1 U_2 = N, P_2, U_1
-      nop.i 999
-}
-;;
-
+	nop.m 999
 //
 //     Case 1_reduce: Is |r| < 2**(-2), if so set PR_10
-//     else set PR_13.
+//     else set PR_11.
 //
-
-{ .mfi
-      nop.m 999
-      fand B = B_mask1, r
-      nop.i 999
+(p8)   fsub.s1 c = s_val, r
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p8)  fcmp.lt.unc.s1 p10, p13 = r, TWO_TO_NEG2
-      nop.i 999
+	nop.m 999
+//
+//     Case 1_reduce: r = s + w (change sign)
+//     Case 2_reduce: w = N * P_3 (change sign)
+//
+(p8)   fcmp.lt.unc.s1 p10, p11 = r, TWO_TO_NEG2
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-(p8)  getf.sig sig_r = r               // Get signif of r if |s| >= 2^-33
-      nop.f 999
-      nop.i 999
+	nop.m 999
+(p10)  fcmp.gt.s1 p10, p11 = r, NEGTWO_TO_NEG2
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-(p8)  getf.exp exp_r = r               // Extract signexp of r if |s| >= 2^-33
-(p10) fcmp.gt.s1 p10, p13 = r, NEGTWO_TO_NEG2
-      nop.i 999
+	nop.m 999
+(p9)   fsub.s1 r = s_val, U_1
+	nop.i 999
 }
-;;
-
+{ .mfi
+	nop.m 999
+//
 //     Case 1_reduce: c is complete here.
-//     Case 1: Branch to SMALL_R or NORMAL_R.
 //     c = c + w (w has not been negated.)
-{ .mfi
-      nop.m 999
-(p8)  fsub.s1 c = c, w                         // c = c - w for |s| >= 2^-33
-      nop.i 999
-}
-{ .mbb
-      nop.m 999
-(p10) br.cond.spnt TANL_SMALL_R     // Branch if pi/4 < |x| < 2^24 and |r|<1/4
-(p13) br.cond.sptk TANL_NORMAL_R_A  // Branch if pi/4 < |x| < 2^24 and |r|>=1/4
+//     Case 2_reduce: r is complete here - continue to calculate c .
+//     r = s - U_1
+//
+(p9)   fms.s1 U_2 = N, P_2, U_1
+	nop.i 999 ;;
 }
-;;
-
-
-// Here if pi/4 < |x| < 2^24 and |s| < 2^-33
+{ .mfi
+	nop.m 999
 //
-//     Is i_1 = lsb of N_fix_gr even or odd?
-//     if i_1 == 0, set p11, else set p12.
+//     Case 1_reduce: c = s - r
+//     Case 2_reduce: U_1 = N * P_2 + w
 //
-{ .mfi
-      nop.m 999
-      fsub.s1 s_val = s_val, r
-      add N_fix_gr = N_fix_gr, cot_flag // N = N + 1 (for cotl)
+(p8)   fsub.s1 c = c, w
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
+(p9)   fsub.s1 s_val = s_val, r
+	nop.i 999
+}
+{ .mfb
+	nop.m 999
 //
 //     Case 2_reduce:
 //     U_2 = N * P_2 - U_1
 //     Not needed until later.
 //
-      fadd.s1 U_2 = U_2, w2
+(p9)   fadd.s1 U_2 = U_2, w
 //
 //     Case 2_reduce:
 //     s = s - r
 //     U_2 = U_2 + w
 //
-      nop.i 999
+(p10)  br.cond.spnt L(TANL_SMALL_R) ;;
 }
-;;
-
+{ .mib
+	nop.m 999
+	nop.i 999
+(p11)  br.cond.sptk L(TANL_NORMAL_R) ;;
+}
+{ .mii
+	nop.m 999
 //
 //     Case 2_reduce:
 //     c = c - U_2
 //     c is complete here
 //     Argument reduction ends here.
 //
+(p9)   extr.u i_1 = N_fix_gr, 0, 1 ;;
+(p9)   cmp.eq.unc p11, p12 = 0x0000,i_1 ;;
+}
 { .mfi
-      nop.m 999
-      fmpy.s1 rsq = r, r
-      tbit.z p11, p12 = N_fix_gr, 0 ;;    // Set p11 if N even, p12 if odd
+	nop.m 999
+//
+//     Is i_1  even or odd?
+//     if i_1 == 0, set p11, else set p12.
+//
+(p11)  fmpy.s1 rsq = r, r
+	nop.i 999 ;;
 }
-
 { .mfi
-      nop.m 999
-(p12) frcpa.s1 S_hi,p0 = f1, r
-      nop.i 999
+	nop.m 999
+(p12)  frcpa.s1 S_hi,p0 = f1, r
+	nop.i 999
 }
+
+
+
+//
+//     Case 1: Branch to SMALL_R or NORMAL_R.
+//     Case 1 is done now.
+//
+
 { .mfi
+(p9)   addl           table_ptr1   = @ltoff(TANL_BASE_CONSTANTS), gp
+(p9)   fsub.s1 c = s_val, U_1
+       nop.i 999 ;;
+}
+;;
+
+{ .mmi
+(p9)  ld8 table_ptr1 = [table_ptr1]
       nop.m 999
-      fsub.s1 c = s_val, U_1
       nop.i 999
 }
 ;;
 
+
 { .mmi
-      add table_ptr1 = 160, table_base ;;  // Point to tanl_table_p1
-      ldfe P1_1 = [table_ptr1],144
-      nop.i 999 ;;
+(p9)   add table_ptr1 = 224, table_ptr1 ;;
+(p9)   ldfe P1_1 = [table_ptr1],144
+	nop.i 999 ;;
 }
 //
+//     Get [i_1] -  lsb of N_fix_gr .
 //     Load P1_1 and point to Q1_1 .
 //
 { .mfi
-      ldfe Q1_1 = [table_ptr1]
+(p9)   ldfe Q1_1 = [table_ptr1] , 0
 //
 //     N even: rsq = r * Z
 //     N odd:  S_hi = frcpa(r)
 //
-(p12) fmerge.ns S_hi = S_hi, S_hi
-      nop.i 999
+(p12)  fmerge.ns S_hi = S_hi, S_hi
+	nop.i 999
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
 //     Case 2_reduce:
 //     c = s - U_1
 //
-(p9)  fsub.s1 c = c, U_2
-      nop.i 999 ;;
+(p9)   fsub.s1 c = c, U_2
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p12) fma.s1  poly1 = S_hi, r, f1
-      nop.i 999 ;;
+	nop.m 999
+(p12)  fma.s1  poly1 = S_hi, r, f1
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
 //     N odd:  Change sign of S_hi
 //
-(p11) fmpy.s1 rsq = rsq, P1_1
-      nop.i 999 ;;
+(p11)  fmpy.s1 rsq = rsq, P1_1
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p12) fma.s1 S_hi = S_hi, poly1, S_hi
-      nop.i 999 ;;
+	nop.m 999
+(p12)  fma.s1 S_hi = S_hi, poly1, S_hi
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
 //     N even: rsq = rsq * P1_1
 //     N odd:  poly1 =  1.0 +  S_hi * r    16 bits partial  account for necessary
 //
-(p11) fma.s1 Poly = r, rsq, c
-      nop.i 999 ;;
+(p11)  fma.s1 Result = r, rsq, c
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
-//     N even: Poly = c  + r * rsq
+//     N even: Result = c  + r * rsq
 //     N odd:  S_hi  = S_hi + S_hi*poly1  16 bits account for necessary
 //
-(p12) fma.s1 poly1 = S_hi, r, f1
-(p11) tbit.z.unc p14, p15 = cot_flag, 0 ;; // p14=1 for tanl; p15=1 for cotl
+(p12)  fma.s1 poly1 = S_hi, r, f1
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
-//     N even: Result = Poly + r
+//     N even: Result = Result + r
 //     N odd:  poly1  = 1.0 + S_hi * r        32 bits partial
 //
-(p14) fadd.s0 Result = r, Poly             // for tanl
-      nop.i 999
-}
-{ .mfi
-      nop.m 999
-(p15) fms.s0 Result = r, mOne, Poly        // for cotl
-      nop.i 999
+(p11)  fadd.s0 Result = r, Result
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-(p12) fma.s1  S_hi = S_hi, poly1, S_hi
-      nop.i 999 ;;
+	nop.m 999
+(p12)  fma.s1  S_hi = S_hi, poly1, S_hi
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
 //     N even: Result1 = Result + r
 //     N odd:   S_hi  = S_hi * poly1 + S_hi   32 bits
 //
-(p12) fma.s1 poly1 = S_hi, r, f1
-      nop.i 999 ;;
+(p12)  fma.s1 poly1 = S_hi, r, f1
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
 //     N odd:  poly1  =  S_hi * r + 1.0       64 bits partial
 //
-(p12) fma.s1 S_hi = S_hi, poly1, S_hi
-      nop.i 999 ;;
+(p12)  fma.s1 S_hi = S_hi, poly1, S_hi
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
 //     N odd:  poly1  =  S_hi * poly + 1.0    64 bits
 //
-(p12) fma.s1 poly1 = S_hi, r, f1
-      nop.i 999 ;;
+(p12)  fma.s1 poly1 = S_hi, r, f1
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
 //     N odd:  poly1  =  S_hi * r + 1.0
 //
-(p12) fma.s1 poly1 = S_hi, c, poly1
-      nop.i 999 ;;
+(p12)  fma.s1 poly1 = S_hi, c, poly1
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
 //     N odd:  poly1  =  S_hi * c + poly1
 //
-(p12) fmpy.s1 S_lo = S_hi, poly1
-      nop.i 999 ;;
+(p12)  fmpy.s1 S_lo = S_hi, poly1
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
 //     N odd:  S_lo  =  S_hi *  poly1
 //
-(p12) fma.s1 S_lo = Q1_1, r, S_lo
-(p12) tbit.z.unc p14, p15 = cot_flag, 0 // p14=1 for tanl; p15=1 for cotl
+(p12)  fma.s1 S_lo = Q1_1, r, S_lo
+	nop.i 999
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
 //     N odd:  Result =  S_hi + S_lo
 //
-      fmpy.s0 fp_tmp = fp_tmp, fp_tmp  // Dummy mult to set inexact
-      nop.i 999 ;;
+(p0)   fmpy.s0 fp_tmp = fp_tmp, fp_tmp  // Dummy mult to set inexact
+	nop.i 999 ;;
 }
-{ .mfi
-      nop.m 999
+{ .mfb
+	nop.m 999
 //
 //     N odd:  S_lo  =  S_lo + Q1_1 * r
 //
-(p14) fadd.s0 Result = S_hi, S_lo          // for tanl
-      nop.i 999
-}
-{ .mfb
-      nop.m 999
-(p15) fms.s0 Result = S_hi, mOne, S_lo     // for cotl
-      br.ret.sptk b0 ;;          // Exit for pi/4 <= |x| < 2^24 and |s| < 2^-33
+(p12)  fadd.s0 Result = S_hi, S_lo
+(p0)   br.ret.sptk b0 ;;
 }
 
 
-TANL_LARGER_ARG:
-// Here if 2^24 <= |x| < 2^63
+L(TANL_LARGER_ARG): 
+
 //
 // ARGUMENT REDUCTION CODE - CASE 3 and 4
 //
 
-{ .mmf
-      mov GR_exp_2tom14 = 0xffff - 14          // Form signexp of 2^-14
-      mov GR_exp_m2tom14 = 0x2ffff - 14        // Form signexp of -2^-14
-      fmpy.s1 N_0 = Norm_Arg, Inv_P_0
+{ .mfi
+(p0)  addl           table_ptr1   = @ltoff(TANL_BASE_CONSTANTS), gp
+(p0)  fmpy.s1 N_0 = Arg, Inv_P_0 
+	nop.i 999
 }
 ;;
 
 { .mmi
-      setf.exp TWO_TO_NEG14 = GR_exp_2tom14    // Form 2^-14
-      setf.exp NEGTWO_TO_NEG14 = GR_exp_m2tom14// Form -2^-14
+(p0)  ld8 table_ptr1 = [table_ptr1]
+      nop.m 999
       nop.i 999
 }
 ;;
@@ -1806,605 +1622,661 @@ TANL_LARGER_ARG:
 //    N_0 = Arg * Inv_P_0
 //
 { .mmi
-      add table_ptr2 = 144, table_base ;;     // Point to 2^-2
-      ldfps TWO_TO_NEG2, NEGTWO_TO_NEG2 = [table_ptr2]
-      nop.i 999
+(p0)  add table_ptr1 = 8, table_ptr1 ;;
+//
+//    Point to  2*-14
+//
+(p0)  ldfs TWO_TO_NEG14 = [table_ptr1], 4
+	nop.i 999 ;;
 }
-;;
-
+//
+//    Load 2**(-14).
+//
+{ .mmi
+(p0)  ldfs NEGTWO_TO_NEG14 = [table_ptr1], 180 ;;
 //
 //    N_0_fix  = integer part of N_0 .
+//    Adjust table_ptr1 to beginning of table.
 //
+(p0)  ldfs TWO_TO_NEG2 = [table_ptr1], 4
+	nop.i 999 ;;
+}
 //
 //    Make N_0 the integer part.
 //
 { .mfi
-      nop.m 999
-      fcvt.fx.s1 N_0_fix = N_0
-      nop.i 999 ;;
+(p0)  ldfs NEGTWO_TO_NEG2 = [table_ptr1]
+//
+//    Load -2**(-14).
+//
+(p0)  fcvt.fx.s1 N_0_fix = N_0
+	nop.i 999 ;;
 }
 { .mfi
-      setf.sig B_mask1 = bmask1               // Form mask to get 5 msb of r
-      fcvt.xf N_0 = N_0_fix
-      nop.i 999 ;;
+	nop.m 999
+(p0)  fcvt.xf N_0 = N_0_fix
+	nop.i 999 ;;
 }
 { .mfi
-      setf.sig B_mask2 = bmask2               // Form mask to form B from r
-      fnma.s1 ArgPrime = N_0, P_0, Norm_Arg
-      nop.i 999
+	nop.m 999
+(p0)  fnma.s1 ArgPrime = N_0, P_0, Arg
+	nop.i 999
 }
 { .mfi
-      nop.m 999
-      fmpy.s1 w = N_0, d_1
-      nop.i 999 ;;
+	nop.m 999
+(p0)  fmpy.s1 w = N_0, d_1
+	nop.i 999 ;;
 }
+{ .mfi
+	nop.m 999
 //
 //    ArgPrime = -N_0 * P_0 + Arg
 //    w  = N_0 * d_1
 //
+(p0)  fmpy.s1 N = ArgPrime, two_by_PI
+	nop.i 999 ;;
+}
+{ .mfi
+	nop.m 999
 //
 //    N = ArgPrime * 2/pi
 //
-//      fcvt.fx.s1 N_fix = N
-// Use special scaling to right shift so N=Arg * 2/pi is in rightmost bits
-// Branch to Cases 3 or 4 if Arg <= -2**24 or Arg >= 2**24
+(p0)  fcvt.fx.s1 N_fix = N
+	nop.i 999 ;;
+}
 { .mfi
-      nop.m 999
-      fma.s1      N_fix = ArgPrime, FR_inv_pi_2to63, FR_rshf_2to64
-
-      nop.i 999 ;;
+	nop.m 999
+//
+//    N_fix is the integer part.
+//
+(p0)  fcvt.xf N = N_fix
+	nop.i 999 ;;
 }
-//     Convert integer N_fix back to normalized floating-point value.
 { .mfi
-      nop.m 999
-      fms.s1 N = N_fix, FR_2tom64, FR_rshf    // Use scaling to get N floated
-      nop.i 999
+(p0)  getf.sig N_fix_gr = N_fix
+	nop.f 999
+	nop.i 999 ;;
 }
-;;
-
+{ .mfi
+	nop.m 999
 //
 //    N is the integer part of the reduced-reduced argument.
 //    Put the integer in a GP register.
 //
+(p0)  fnma.s1 s_val = N, P_1, ArgPrime
+	nop.i 999
+}
 { .mfi
-      getf.sig N_fix_gr = N_fix
-      nop.f 999
-      nop.i 999
+	nop.m 999
+(p0)  fnma.s1 w = N, P_2, w
+	nop.i 999 ;;
 }
-;;
-
+{ .mfi
+	nop.m 999
 //
 //    s_val = -N*P_1 + ArgPrime
 //    w = -N*P_2 + w
 //
-{ .mfi
-      nop.m 999
-      fnma.s1 s_val = N, P_1, ArgPrime
-      nop.i 999
+(p0)  fcmp.lt.unc.s1 p11, p10 = s_val, TWO_TO_NEG14
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-      fnma.s1 w = N, P_2, w
-      nop.i 999
+	nop.m 999
+(p11) fcmp.gt.s1 p11, p10 = s_val, NEGTWO_TO_NEG14
+	nop.i 999 ;;
 }
-;;
-
-//    Case 4: V_hi = N * P_2
-//    Case 4: U_hi = N_0 * d_1
 { .mfi
-      nop.m 999
-      fmpy.s1 V_hi = N, P_2               // V_hi = N * P_2 for |s| < 2^-14
-      nop.i 999
+	nop.m 999
+//
+//    Case 3: r = s_val + w (Z complete)
+//    Case 4: U_hi = N_0 * d_1
+//
+(p10) fmpy.s1 V_hi = N, P_2
+	nop.i 999
 }
 { .mfi
-      nop.m 999
-      fmpy.s1 U_hi = N_0, d_1             // U_hi = N_0 * d_1 for |s| < 2^-14
-      nop.i 999
+	nop.m 999
+(p11) fmpy.s1 U_hi = N_0, d_1
+	nop.i 999 ;;
 }
-;;
-
-//    Case 3: r = s_val + w (Z complete)
-//    Case 4: w = N * P_3
 { .mfi
-      nop.m 999
-      fadd.s1 r = s_val, w                // r = s_val + w for |s| >= 2^-14
-      nop.i 999
+	nop.m 999
+//
+//    Case 3: r = s_val + w (Z complete)
+//    Case 4: U_hi = N_0 * d_1
+//
+(p11) fmpy.s1 V_hi = N, P_2
+	nop.i 999
 }
 { .mfi
-      nop.m 999
-      fmpy.s1 w2 = N, P_3                 // w = N * P_3 for |s| < 2^-14
-      nop.i 999
+	nop.m 999
+(p11) fmpy.s1 U_hi = N_0, d_1
+	nop.i 999 ;;
 }
-;;
-
-//    Case 4: A =  U_hi + V_hi
-//    Note: Worry about switched sign of V_hi, so subtract instead of add.
-//    Case 4: V_lo = -N * P_2 - V_hi (U_hi is in place of V_hi in writeup)
-//    Note: the (-) is still missing for V_hi.
 { .mfi
-      nop.m 999
-      fsub.s1 A = U_hi, V_hi           // A = U_hi - V_hi for |s| < 2^-14
-      nop.i 999
+	nop.m 999
+//
+//    Decide between case 3 and 4:
+//    Case 3:  s <= -2**(-14) or s >= 2**(-14)
+//    Case 4: -2**(-14) < s < 2**(-14)
+//
+(p10) fadd.s1 r = s_val, w
+	nop.i 999
 }
 { .mfi
-      nop.m 999
-      fnma.s1 V_lo = N, P_2, V_hi      // V_lo = V_hi - N * P_2 for |s| < 2^-14
-      nop.i 999
+	nop.m 999
+(p11) fmpy.s1 w = N, P_3
+	nop.i 999 ;;
 }
-;;
-
-//    Decide between case 3 and 4:
-//    Case 3:  |s| >= 2**(-14)     Set p10
-//    Case 4:  |s| <  2**(-14)     Set p11
+{ .mfi
+	nop.m 999
 //
-//    Case 4: U_lo = N_0 * d_1 - U_hi
+//    Case 4: We need abs of both U_hi and V_hi - dont
+//    worry about switched sign of V_hi .
+//
+(p11) fsub.s1 A = U_hi, V_hi
+	nop.i 999
+}
 { .mfi
-      nop.m 999
-      fms.s1 U_lo = N_0, d_1, U_hi     // U_lo = N_0*d_1 - U_hi for |s| < 2^-14
-      nop.i 999
+	nop.m 999
+//
+//    Case 4: A =  U_hi + V_hi
+//    Note: Worry about switched sign of V_hi, so subtract instead of add.
+//
+(p11) fnma.s1 V_lo = N, P_2, V_hi
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-      fcmp.lt.s1 p11, p10 = s_val, TWO_TO_NEG14
-      nop.i 999
+	nop.m 999
+(p11) fms.s1 U_lo = N_0, d_1, U_hi
+	nop.i 999 ;;
 }
-;;
-
-//    Case 4: We need abs of both U_hi and V_hi - dont
-//    worry about switched sign of V_hi.
 { .mfi
-      nop.m 999
-      fabs V_hiabs = V_hi              // |V_hi| for |s| < 2^-14
-      nop.i 999
+	nop.m 999
+(p11) fabs V_hiabs = V_hi
+	nop.i 999
 }
 { .mfi
-      nop.m 999
-(p11) fcmp.gt.s1 p11, p10 = s_val, NEGTWO_TO_NEG14
-      nop.i 999
+	nop.m 999
+//
+//    Case 4: V_hi = N * P_2
+//            w = N * P_3
+//    Note the product does not include the (-) as in the writeup
+//    so (-) missing for V_hi and w .
+(p10) fadd.s1 r = s_val, w
+	nop.i 999 ;;
 }
-;;
-
+{ .mfi
+	nop.m 999
+//
 //    Case 3: c = s_val - r
+//    Case 4: U_lo = N_0 * d_1 - U_hi
+//
+(p11) fabs U_hiabs = U_hi
+	nop.i 999
+}
 { .mfi
-      nop.m 999
-      fabs U_hiabs = U_hi              // |U_hi| for |s| < 2^-14
-      nop.i 999
+	nop.m 999
+(p11) fmpy.s1 w = N, P_3
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-      fsub.s1 c = s_val, r             // c = s_val - r    for |s| >= 2^-14
-      nop.i 999
+	nop.m 999
+//
+//    Case 4: Set P_12 if U_hiabs >= V_hiabs
+//
+(p11) fadd.s1 C_hi = s_val, A
+	nop.i 999 ;;
 }
-;;
-
-// For Case 3, |s| >= 2^-14, determine if |r| < 1/4
+{ .mfi
+	nop.m 999
 //
 //    Case 4: C_hi = s_val + A
 //
-{ .mfi
-      nop.m 999
-(p11) fadd.s1 C_hi = s_val, A              // C_hi = s_val + A for |s| < 2^-14
-      nop.i 999
+(p11) fadd.s1 t = U_lo, V_lo
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p10) fcmp.lt.unc.s1 p14, p15 = r, TWO_TO_NEG2
-      nop.i 999
+	nop.m 999
+//
+//    Case 3: Is |r| < 2**(-2), if so set PR_7
+//    else set PR_8.
+//    Case 3: If PR_7 is set, prepare to branch to Small_R.
+//    Case 3: If PR_8 is set, prepare to branch to Normal_R.
+//
+(p10) fsub.s1 c = s_val, r
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      getf.sig sig_r = r               // Get signif of r if |s| >= 2^-33
-      fand B = B_mask1, r
-      nop.i 999
+	nop.m 999
+//
+//    Case 3: c = (s - r) + w (c complete)
+//
+(p11) fcmp.ge.unc.s1 p12, p13 = U_hiabs, V_hiabs
+	nop.i 999
 }
-;;
-
-//    Case 4: t = U_lo + V_lo
 { .mfi
-      getf.exp exp_r = r               // Extract signexp of r if |s| >= 2^-33
-(p11) fadd.s1 t = U_lo, V_lo               // t = U_lo + V_lo for |s| < 2^-14
-      nop.i 999
+	nop.m 999
+(p11) fms.s1 w = N_0, d_2, w
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p14) fcmp.gt.s1 p14, p15 = r, NEGTWO_TO_NEG2
-      nop.i 999
+	nop.m 999
+//
+//    Case 4: V_hi = N * P_2
+//            w = N * P_3
+//    Note the product does not include the (-) as in the writeup
+//    so (-) missing for V_hi and w .
+//
+(p10) fcmp.lt.unc.s1 p14, p15 = r, TWO_TO_NEG2
+	nop.i 999 ;;
 }
-;;
-
-//    Case 3: c = (s - r) + w (c complete)
 { .mfi
-      nop.m 999
-(p10) fadd.s1 c = c, w              // c = c + w for |s| >= 2^-14
-      nop.i 999
-}
-{ .mbb
-      nop.m 999
-(p14) br.cond.spnt TANL_SMALL_R     // Branch if 2^24 <= |x| < 2^63 and |r|< 1/4
-(p15) br.cond.sptk TANL_NORMAL_R_A  // Branch if 2^24 <= |x| < 2^63 and |r|>=1/4
+	nop.m 999
+(p14) fcmp.gt.s1 p14, p15 = r, NEGTWO_TO_NEG2
+	nop.i 999 ;;
 }
-;;
-
-
-// Here if 2^24 <= |x| < 2^63 and |s| < 2^-14  >>>>>>>  Case 4.
+{ .mfb
+	nop.m 999
 //
-//    Case 4: Set P_12 if U_hiabs >= V_hiabs
+//    Case 4: V_lo = -N * P_2 - V_hi (U_hi is in place of V_hi in writeup)
+//    Note: the (-) is still missing for V_hi .
 //    Case 4: w = w + N_0 * d_2
 //    Note: the (-) is now incorporated in w .
-{ .mfi
-      add table_ptr1 = 160, table_base           // Point to tanl_table_p1
-      fcmp.ge.unc.s1 p12, p13 = U_hiabs, V_hiabs
-      nop.i 999
+//
+(p10) fadd.s1 c = c, w
+//
+//    Case 4: t = U_lo + V_lo
+//    Note: remember V_lo should be (-), subtract instead of add. NO
+//
+(p14) br.cond.spnt L(TANL_SMALL_R) ;;
+}
+{ .mib
+	nop.m 999
+	nop.i 999
+(p15) br.cond.spnt L(TANL_NORMAL_R) ;;
 }
 { .mfi
-      nop.m 999
-      fms.s1 w2 = N_0, d_2, w2
-      nop.i 999
+	nop.m 999
+//
+//    Case 3: Vector off when |r| < 2**(-2).  Recall that PR_3 will be true.
+//    The remaining stuff is for Case 4.
+//
+(p12) fsub.s1 a = U_hi, A
+(p11) extr.u i_1 = N_fix_gr, 0, 1 ;;
 }
-;;
-
+{ .mfi
+	nop.m 999
+//
 //    Case 4: C_lo = s_val - C_hi
+//
+(p11) fadd.s1 t = t, w
+	nop.i 999
+}
 { .mfi
-      ldfe P1_1 = [table_ptr1], 16               // Load P1_1
-      fsub.s1 C_lo = s_val, C_hi
-      nop.i 999
+	nop.m 999
+(p13) fadd.s1 a = V_hi, A
+	nop.i 999 ;;
 }
-;;
+
+
 
 //
 //    Case 4: a = U_hi - A
 //            a = V_hi - A (do an add to account for missing (-) on V_hi
 //
-{ .mfi
-      ldfe P1_2 = [table_ptr1], 128              // Load P1_2
-(p12) fsub.s1 a = U_hi, A
-      nop.i 999
-}
-{ .mfi
-      nop.m 999
-(p13) fadd.s1 a = V_hi, A
-      nop.i 999
-}
-;;
 
-//    Case 4: t = U_lo + V_lo  + w
 { .mfi
-      ldfe Q1_1 = [table_ptr1], 16               // Load Q1_1
-      fadd.s1 t = t, w2
-      nop.i 999
+(p11)  addl           table_ptr1   = @ltoff(TANL_BASE_CONSTANTS), gp
+(p11) fsub.s1 C_lo = s_val, C_hi
+	nop.i 999
 }
 ;;
 
+
+
+//
 //    Case 4: a = (U_hi - A)  + V_hi
 //            a = (V_hi - A)  + U_hi
 //    In each case account for negative missing form V_hi .
 //
-{ .mfi
-      ldfe Q1_2 = [table_ptr1], 16               // Load Q1_2
-(p12) fsub.s1 a = a, V_hi
-      nop.i 999
-}
-{ .mfi
+
+
+{ .mmi
+(p11)  ld8 table_ptr1 = [table_ptr1]
       nop.m 999
-(p13) fsub.s1 a = U_hi, a
       nop.i 999
 }
 ;;
 
+
 //
 //    Case 4: C_lo = (s_val - C_hi) + A
 //
+{ .mmi
+(p11) add table_ptr1 = 224, table_ptr1 ;;
+(p11) ldfe P1_1 = [table_ptr1], 16
+	nop.i 999 ;;
+}
 { .mfi
-      nop.m 999
-      fadd.s1 C_lo = C_lo, A
-      nop.i 999 ;;
+(p11) ldfe P1_2 = [table_ptr1], 128
+//
+//    Case 4: w = U_lo + V_lo  + w
+//
+(p12) fsub.s1 a = a, V_hi
+	nop.i 999 ;;
 }
 //
-//    Case 4: t = t + a
+//    Case 4: r = C_hi + C_lo
 //
 { .mfi
-      nop.m 999
-      fadd.s1 t = t, a
-      nop.i 999
+(p11) ldfe Q1_1 = [table_ptr1], 16
+(p11) fadd.s1 C_lo = C_lo, A
+	nop.i 999 ;;
 }
-;;
-
-//    Case 4: C_lo = C_lo + t
-//    Case 4: r = C_hi + C_lo
+//
+//    Case 4: c = C_hi - r
+//    Get [i_1] - lsb of N_fix_gr.
+//
 { .mfi
-      nop.m 999
-      fadd.s1 C_lo = C_lo, t
-      nop.i 999
+(p11) ldfe Q1_2 = [table_ptr1], 16
+	nop.f 999
+	nop.i 999 ;;
 }
-;;
-
 { .mfi
-      nop.m 999
-      fadd.s1 r = C_hi, C_lo
-      nop.i 999
+	nop.m 999
+(p13) fsub.s1 a = U_hi, a
+	nop.i 999 ;;
 }
-;;
-
+{ .mfi
+	nop.m 999
+(p11) fadd.s1 t = t, a
+	nop.i 999 ;;
+}
+{ .mfi
+	nop.m 999
 //
-//    Case 4: c = C_hi - r
+//    Case 4: t = t + a
 //
+(p11) fadd.s1 C_lo = C_lo, t
+	nop.i 999 ;;
+}
 { .mfi
-      nop.m 999
-      fsub.s1 c = C_hi, r
-      nop.i 999
+	nop.m 999
+//
+//    Case 4: C_lo = C_lo + t
+//
+(p11) fadd.s1 r = C_hi, C_lo
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-      fmpy.s1 rsq = r, r
-      add N_fix_gr = N_fix_gr, cot_flag // N = N + 1 (for cotl)
+	nop.m 999
+(p11) fsub.s1 c = C_hi, r
+	nop.i 999
 }
-;;
-
-//    Case 4: c = c + C_lo  finished.
+{ .mfi
+	nop.m 999
 //
-//    Is i_1 = lsb of N_fix_gr even or odd?
-//    if i_1 == 0, set PR_11, else set PR_12.
+//    Case 4: c = c + C_lo  finished.
+//    Is i_1  even or odd?
+//    if i_1 == 0, set PR_4, else set PR_5.
 //
+// r and c have been computed.
+// We known whether this is the sine or cosine routine.
+// Make sure ftz mode is set - should be automatic when using wre
+(p0)  fmpy.s1 rsq = r, r
+	nop.i 999 ;;
+}
 { .mfi
-      nop.m 999
-      fadd.s1 c = c , C_lo
-      tbit.z p11, p12 =  N_fix_gr, 0
+	nop.m 999
+(p11) fadd.s1 c = c , C_lo
+(p11) cmp.eq.unc p11, p12 =  0x0000, i_1 ;;
 }
-;;
-
-// r and c have been computed.
 { .mfi
-      nop.m 999
+	nop.m 999
 (p12) frcpa.s1 S_hi, p0 = f1, r
-      nop.i 999
+	nop.i 999
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
 //    N odd: Change sign of S_hi
 //
-(p11) fma.s1 Poly = rsq, P1_2, P1_1
-      nop.i 999 ;;
+(p11) fma.s1 Result = rsq, P1_2, P1_1
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 (p12) fma.s1 P = rsq, Q1_2, Q1_1
-      nop.i 999
+	nop.i 999
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
 //    N odd:  Result  =  S_hi + S_lo      (User supplied rounding mode for C1)
 //
-       fmpy.s0 fp_tmp = fp_tmp, fp_tmp  // Dummy mult to set inexact
-      nop.i 999 ;;
+(p0)   fmpy.s0 fp_tmp = fp_tmp, fp_tmp  // Dummy mult to set inexact
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
 //    N even: rsq = r * r
 //    N odd:  S_hi = frcpa(r)
 //
 (p12) fmerge.ns S_hi = S_hi, S_hi
-      nop.i 999
+	nop.i 999
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
 //    N even: rsq = rsq * P1_2 + P1_1
 //    N odd:  poly1 =  1.0 +  S_hi * r    16 bits partial  account for necessary
 //
-(p11) fmpy.s1 Poly = rsq, Poly
-      nop.i 999 ;;
+(p11) fmpy.s1 Result = rsq, Result
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 (p12) fma.s1 poly1 = S_hi, r,f1
-(p11) tbit.z.unc p14, p15 = cot_flag, 0 // p14=1 for tanl; p15=1 for cotl
+	nop.i 999
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
-//    N even: Poly =  Poly * rsq
+//    N even: Result =  Result * rsq
 //    N odd:  S_hi  = S_hi + S_hi*poly1  16 bits account for necessary
 //
-(p11) fma.s1 Poly = r, Poly, c
-      nop.i 999 ;;
+(p11) fma.s1 Result = r, Result, c
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 (p12) fma.s1 S_hi = S_hi, poly1, S_hi
-      nop.i 999
+	nop.i 999
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
 //    N odd:   S_hi  = S_hi * poly1 + S_hi   32 bits
 //
-(p14) fadd.s0 Result = r, Poly          // for tanl
-      nop.i 999 ;;
+(p11) fadd.s0 Result= r, Result
+	nop.i 999 ;;
 }
-
-.pred.rel "mutex",p15,p12
 { .mfi
-      nop.m 999
-(p15) fms.s0 Result = r, mOne, Poly     // for cotl
-      nop.i 999
-}
-{ .mfi
-      nop.m 999
+	nop.m 999
 (p12) fma.s1 poly1 =  S_hi, r, f1
-      nop.i 999 ;;
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
-//    N even: Poly = Poly * r + c
+//    N even: Result = Result * r + c
 //    N odd:  poly1  = 1.0 + S_hi * r        32 bits partial
 //
 (p12) fma.s1 S_hi = S_hi, poly1, S_hi
-      nop.i 999 ;;
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 (p12) fma.s1 poly1 = S_hi, r, f1
-      nop.i 999 ;;
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
-//    N even: Result = Poly + r  (Rounding mode S0)
+//    N even: Result1 = Result + r  (Rounding mode S0)
 //    N odd:  poly1  =  S_hi * r + 1.0       64 bits partial
 //
 (p12) fma.s1 S_hi = S_hi, poly1, S_hi
-      nop.i 999 ;;
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
 //    N odd:  poly1  =  S_hi * poly + S_hi    64 bits
 //
 (p12) fma.s1 poly1 = S_hi, r, f1
-      nop.i 999 ;;
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
 //    N odd:  poly1  =  S_hi * r + 1.0
 //
 (p12) fma.s1 poly1 = S_hi, c, poly1
-      nop.i 999 ;;
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
 //    N odd:  poly1  =  S_hi * c + poly1
 //
 (p12) fmpy.s1 S_lo = S_hi, poly1
-      nop.i 999 ;;
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
 //    N odd:  S_lo  =  S_hi *  poly1
 //
 (p12) fma.s1 S_lo = P, r, S_lo
-(p12) tbit.z.unc p14, p15 = cot_flag, 0 ;; // p14=1 for tanl; p15=1 for cotl
-}
-
-{ .mfi
-      nop.m 999
-(p14) fadd.s0 Result = S_hi, S_lo           // for tanl
-      nop.i 999
+	nop.i 999 ;;
 }
 { .mfb
-      nop.m 999
+	nop.m 999
 //
 //    N odd:  S_lo  =  S_lo + r * P
 //
-(p15) fms.s0 Result = S_hi, mOne, S_lo      // for cotl
-      br.ret.sptk b0 ;;      // Exit for 2^24 <= |x| < 2^63 and |s| < 2^-14
+(p12) fadd.s0 Result = S_hi, S_lo
+(p0)   br.ret.sptk b0 ;;
 }
 
 
-TANL_SMALL_R:
-// Here if |r| < 1/4
-// r and c have been computed.
-// *****************************************************************
-// *****************************************************************
-// *****************************************************************
-//    N odd:  S_hi = frcpa(r)
-//    Get [i_1] - lsb of N_fix_gr.  Set p11 if N even, p12 if N odd.
-//    N even: rsq = r * r
+L(TANL_SMALL_R): 
+{ .mii
+	nop.m 999
+(p0)  extr.u i_1 = N_fix_gr, 0, 1 ;;
+(p0)  cmp.eq.unc p11, p12 = 0x0000, i_1
+}
 { .mfi
-      add table_ptr1 = 160, table_base    // Point to tanl_table_p1
-      frcpa.s1 S_hi, p0 = f1, r           // S_hi for N odd
-      add N_fix_gr = N_fix_gr, cot_flag   // N = N + 1 (for cotl)
+	nop.m 999
+(p0)  fmpy.s1 rsq = r, r
+	nop.i 999 ;;
 }
 { .mfi
-      add table_ptr2 = 400, table_base    // Point to Q1_7
-      fmpy.s1 rsq = r, r
-      nop.i 999
+(p0)  addl           table_ptr1   = @ltoff(TANL_BASE_CONSTANTS), gp
+(p12) frcpa.s1 S_hi, p0 = f1, r
+	nop.i 999
 }
 ;;
 
+
 { .mmi
-      ldfe P1_1 = [table_ptr1], 16
-;;
-      ldfe P1_2 = [table_ptr1], 16
-      tbit.z p11, p12 = N_fix_gr, 0
+(p0)  ld8 table_ptr1 = [table_ptr1]
+      nop.m 999
+      nop.i 999
 }
 ;;
 
+// *****************************************************************
+// *****************************************************************
+// *****************************************************************
+
 
+{ .mmi
+(p0)  add table_ptr1 = 224, table_ptr1 ;;
+(p0)  ldfe P1_1 = [table_ptr1], 16
+	nop.i 999 ;;
+}
+//    r and c have been computed.
+//    We known whether this is the sine or cosine routine.
+//    Make sure ftz mode is set - should be automatic when using wre
+//    |r| < 2**(-2)
 { .mfi
-      ldfe P1_3 = [table_ptr1], 96
-      nop.f 999
-      nop.i 999
+(p0)  ldfe P1_2 = [table_ptr1], 16
+(p11) fmpy.s1 r_to_the_8 = rsq, rsq
+	nop.i 999 ;;
 }
-;;
-
+//
+//    Set table_ptr1 to beginning of constant table.
+//    Get [i_1] - lsb of N_fix_gr.
+//
 { .mfi
-(p11) ldfe P1_9 = [table_ptr1], -16
+(p0)  ldfe P1_3 = [table_ptr1], 96
+//
+//    N even: rsq = r * r
+//    N odd:  S_hi = frcpa(r)
+//
 (p12) fmerge.ns S_hi = S_hi, S_hi
-      nop.i 999
+	nop.i 999 ;;
 }
+//
+//    Is i_1  even or odd?
+//    if i_1 == 0, set PR_11.
+//    if i_1 != 0, set PR_12.
+//
 { .mfi
-      nop.m 999
-(p11) fmpy.s1 r_to_the_8 = rsq, rsq
-      nop.i 999
-}
-;;
-
+(p11) ldfe P1_9 = [table_ptr1], -16
 //
 //    N even: Poly2 = P1_7 + Poly2 * rsq
 //    N odd:  poly2 = Q1_5 + poly2 * rsq
 //
-{ .mfi
-(p11) ldfe P1_8 = [table_ptr1], -16
 (p11) fadd.s1 CORR = rsq, f1
-      nop.i 999
+	nop.i 999 ;;
 }
-;;
-
+{ .mmi
+(p11) ldfe P1_8 = [table_ptr1], -16 ;;
 //
 //    N even: Poly1 = P1_2 + P1_3 * rsq
-//    N odd:  poly1 =  1.0 +  S_hi * r
+//    N odd:  poly1 =  1.0 +  S_hi * r     
 //    16 bits partial  account for necessary (-1)
 //
-{ .mmi
 (p11) ldfe P1_7 = [table_ptr1], -16
-;;
-(p11) ldfe P1_6 = [table_ptr1], -16
-      nop.i 999
+	nop.i 999 ;;
 }
-;;
-
 //
 //    N even: Poly1 = P1_1 + Poly1 * rsq
 //    N odd:  S_hi  =  S_hi + S_hi * poly1)     16 bits account for necessary
 //
+{ .mfi
+(p11) ldfe P1_6 = [table_ptr1], -16
 //
 //    N even: Poly2 = P1_5 + Poly2 * rsq
 //    N odd:  poly2 = Q1_3 + poly2 * rsq
 //
-{ .mfi
-(p11) ldfe P1_5 = [table_ptr1], -16
 (p11) fmpy.s1 r_to_the_8 = r_to_the_8, r_to_the_8
-      nop.i 999
+	nop.i 999 ;;
 }
-{ .mfi
-      nop.m 999
-(p12) fma.s1 poly1 =  S_hi, r, f1
-      nop.i 999
-}
-;;
-
 //
 //    N even: Poly1 =  Poly1 * rsq
 //    N odd:  poly1  = 1.0 + S_hi * r         32 bits partial
 //
+{ .mfi
+(p11) ldfe P1_5 = [table_ptr1], -16
+(p12) fma.s1 poly1 =  S_hi, r, f1
+	nop.i 999 ;;
+}
 
 //
 //    N even: CORR =  CORR * c
@@ -2418,30 +2290,44 @@ TANL_SMALL_R:
 
 { .mmf
 (p11) ldfe P1_4 = [table_ptr1], -16
-      nop.m 999
+(p0)  addl           table_ptr2   = @ltoff(TANL_BASE_CONSTANTS), gp
 (p11) fmpy.s1 CORR =  CORR, c
 }
 ;;
 
-{ .mfi
+
+{ .mmi
+(p0)  ld8 table_ptr2 = [table_ptr2]
       nop.m 999
+      nop.i 999
+}
+;;
+
+
+{ .mii
+(p0)  add table_ptr2 = 464, table_ptr2
+	nop.i 999 ;;
+	nop.i 999
+}
+{ .mfi
+	nop.m 999
 (p11) fma.s1 Poly1 = P1_3, rsq, P1_2
-      nop.i 999 ;;
+	nop.i 999 ;;
 }
 { .mfi
-(p12) ldfe Q1_7 = [table_ptr2], -16
+(p0)  ldfe Q1_7 = [table_ptr2], -16
 (p12) fma.s1 S_hi = S_hi, poly1, S_hi
-      nop.i 999 ;;
+	nop.i 999 ;;
 }
 { .mfi
-(p12) ldfe Q1_6 = [table_ptr2], -16
+(p0)  ldfe Q1_6 = [table_ptr2], -16
 (p11) fma.s1 Poly2 = P1_9, rsq, P1_8
-      nop.i 999 ;;
+	nop.i 999 ;;
 }
 { .mmi
-(p12) ldfe Q1_5 = [table_ptr2], -16 ;;
+(p0)  ldfe Q1_5 = [table_ptr2], -16 ;;
 (p12) ldfe Q1_4 = [table_ptr2], -16
-      nop.i 999 ;;
+	nop.i 999 ;;
 }
 { .mfi
 (p12) ldfe Q1_3 = [table_ptr2], -16
@@ -2450,795 +2336,735 @@ TANL_SMALL_R:
 //    N odd:  poly2 = Q1_6 + Q1_7 * rsq
 //
 (p11) fma.s1 Poly1 = Poly1, rsq, P1_1
-      nop.i 999 ;;
+	nop.i 999 ;;
 }
 { .mfi
 (p12) ldfe Q1_2 = [table_ptr2], -16
 (p12) fma.s1 poly1 = S_hi, r, f1
-      nop.i 999 ;;
+	nop.i 999 ;;
 }
 { .mfi
 (p12) ldfe Q1_1 = [table_ptr2], -16
 (p11) fma.s1 Poly2 = Poly2, rsq, P1_7
-      nop.i 999 ;;
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
 //    N even: CORR =  rsq + 1
 //    N even: r_to_the_8 =  rsq * rsq
 //
 (p11) fmpy.s1 Poly1 = Poly1, rsq
-      nop.i 999 ;;
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 (p12) fma.s1 S_hi = S_hi, poly1, S_hi
-      nop.i 999
+	nop.i 999
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 (p12) fma.s1 poly2 = Q1_7, rsq, Q1_6
-      nop.i 999 ;;
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 (p11) fma.s1 Poly2 = Poly2, rsq, P1_6
-      nop.i 999 ;;
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 (p12) fma.s1 poly1 = S_hi, r, f1
-      nop.i 999
+	nop.i 999
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 (p12) fma.s1 poly2 = poly2, rsq, Q1_5
-      nop.i 999 ;;
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 (p11) fma.s1 Poly2= Poly2, rsq, P1_5
-      nop.i 999 ;;
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 (p12) fma.s1 S_hi =  S_hi, poly1, S_hi
-      nop.i 999
+	nop.i 999
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 (p12) fma.s1 poly2 = poly2, rsq, Q1_4
-      nop.i 999 ;;
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
 //    N even: r_to_the_8 = r_to_the_8 * r_to_the_8
 //    N odd:  poly1  =  S_hi * r + 1.0       64 bits partial
 //
 (p11) fma.s1 Poly2 = Poly2, rsq, P1_4
-      nop.i 999 ;;
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
-//    N even: Poly = CORR + Poly * r
+//    N even: Result = CORR + Poly * r
 //    N odd:  P = Q1_1 + poly2 * rsq
 //
 (p12) fma.s1 poly1 = S_hi, r, f1
-      nop.i 999
+	nop.i 999
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 (p12) fma.s1 poly2 = poly2, rsq, Q1_3
-      nop.i 999 ;;
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
 //    N even: Poly2 = P1_4 + Poly2 * rsq
 //    N odd:  poly2 = Q1_2 + poly2 * rsq
 //
 (p11) fma.s1 Poly = Poly2, r_to_the_8, Poly1
-      nop.i 999 ;;
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 (p12) fma.s1 poly1 = S_hi, c, poly1
-      nop.i 999
+	nop.i 999
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 (p12) fma.s1 poly2 = poly2, rsq, Q1_2
-      nop.i 999 ;;
+	nop.i 999 ;;
 }
 
 { .mfi
-      nop.m 999
+	nop.m 999
 //
 //    N even: Poly = Poly1 + Poly2 * r_to_the_8
 //    N odd:  S_hi =  S_hi * poly1 + S_hi    64 bits
 //
-(p11) fma.s1 Poly = Poly, r, CORR
-      nop.i 999 ;;
+(p11) fma.s1 Result = Poly, r, CORR
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
-//    N even: Result =  r + Poly  (User supplied rounding mode)
+//    N even: Result =  r + Result  (User supplied rounding mode)
 //    N odd:  poly1  =  S_hi * c + poly1
 //
 (p12) fmpy.s1 S_lo = S_hi, poly1
-(p11) tbit.z.unc p14, p15 = cot_flag, 0 // p14=1 for tanl; p15=1 for cotl
+	nop.i 999
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 (p12) fma.s1 P = poly2, rsq, Q1_1
-      nop.i 999 ;;
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
 //    N odd:  poly1  =  S_hi * r + 1.0
 //
 //
 //    N odd:  S_lo  =  S_hi *  poly1
 //
-(p14) fadd.s0 Result = Poly, r          // for tanl
-      nop.i 999
+(p11) fadd.s0 Result = Result, r
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-(p15) fms.s0 Result = Poly, mOne, r     // for cotl
-      nop.i 999 ;;
-}
-
-{ .mfi
-      nop.m 999
+	nop.m 999
 //
 //    N odd:  S_lo  =  Q1_1 * c + S_lo
 //
 (p12) fma.s1 S_lo = Q1_1, c, S_lo
-      nop.i 999
+	nop.i 999
 }
 { .mfi
-      nop.m 999
-      fmpy.s0 fp_tmp = fp_tmp, fp_tmp  // Dummy mult to set inexact
-      nop.i 999 ;;
+	nop.m 999
+(p0)   fmpy.s0 fp_tmp = fp_tmp, fp_tmp  // Dummy mult to set inexact
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
 //    N odd:  Result =  S_lo + r * P
 //
 (p12) fma.s1 Result = P, r, S_lo
-(p12) tbit.z.unc p14, p15 = cot_flag, 0 ;; // p14=1 for tanl; p15=1 for cotl
+	nop.i 999 ;;
 }
-
+{ .mfb
+	nop.m 999
 //
 //    N odd:  Result = Result + S_hi  (user supplied rounding mode)
 //
-{ .mfi
-      nop.m 999
-(p14) fadd.s0 Result = Result, S_hi         // for tanl
-      nop.i 999
-}
-{ .mfb
-      nop.m 999
-(p15) fms.s0 Result = Result, mOne, S_hi    // for cotl
-      br.ret.sptk b0 ;;              // Exit |r| < 1/4 path
+(p12) fadd.s0 Result = Result, S_hi
+(p0)   br.ret.sptk b0 ;;
 }
 
 
-TANL_NORMAL_R:
-// Here if 1/4 <= |x| < pi/4  or  if |x| >= 2^63 and |r| >= 1/4
+L(TANL_NORMAL_R): 
+{ .mfi
+(p0)  getf.sig sig_r = r
 // *******************************************************************
 // *******************************************************************
 // *******************************************************************
 //
 //    r and c have been computed.
+//    Make sure ftz mode is set - should be automatic when using wre
+//
 //
+//    Get [i_1] -  lsb of N_fix_gr alone.
+//
+(p0)  fmerge.s  Pos_r = f1, r
+(p0)  extr.u i_1 = N_fix_gr, 0, 1 ;;
+}
 { .mfi
-      nop.m 999
-      fand B = B_mask1, r
-      nop.i 999
+	nop.m 999
+(p0)  fmerge.s  sgn_r =  r, f1
+(p0)  cmp.eq.unc p11, p12 = 0x0000, i_1 ;;
+}
+{ .mfi
+	nop.m 999
+	nop.f 999
+(p0)  extr.u lookup = sig_r, 58, 5
+}
+{ .mlx
+	nop.m 999
+(p0)  movl Create_B = 0x8200000000000000 ;;
+}
+{ .mfi
+(p0)  addl           table_ptr1   = @ltoff(TANL_BASE_CONSTANTS), gp
+	nop.f 999
+(p0)  dep Create_B = lookup, Create_B, 58, 5
 }
 ;;
 
-TANL_NORMAL_R_A:
-// Enter here if pi/4 <= |x| < 2^63 and |r| >= 1/4
-//    Get the 5 bits or r for the lookup.   1.xxxxx ....
+
+//
+//    Get [i_1] -  lsb of N_fix_gr alone.
+//    Pos_r = abs (r)
+//
+
+
 { .mmi
-      add table_ptr1 = 416, table_base     // Point to tanl_table_p2
-      mov GR_exp_2tom65 = 0xffff - 65      // Scaling constant for B
-      extr.u lookup = sig_r, 58, 5
+(p0)  ld8 table_ptr1 = [table_ptr1]
+      nop.m 999
+      nop.i 999
 }
 ;;
 
+
 { .mmi
-      ldfe P2_1 = [table_ptr1], 16
-      setf.exp TWO_TO_NEG65 = GR_exp_2tom65  // 2^-65 for scaling B if exp_r=-2
-      add N_fix_gr = N_fix_gr, cot_flag      // N = N + 1 (for cotl)
+	nop.m 999
+(p0)  setf.sig B = Create_B
+//
+//    Set table_ptr1 and table_ptr2 to base address of
+//    constant table.
+//
+(p0)  add table_ptr1 = 480, table_ptr1 ;;
 }
-;;
-
-.pred.rel "mutex",p11,p12
-//    B =  2^63 * 1.xxxxx 100...0
-{ .mfi
-      ldfe P2_2 = [table_ptr1], 16
-      for B = B_mask2, B
-      mov table_offset = 512               // Assume table offset is 512
+{ .mmb
+	nop.m 999
+//
+//    Is i_1 or i_0  == 0 ?
+//    Create the constant  1 00000 1000000000000000000000...
+//
+(p0)  ldfe P2_1 = [table_ptr1], 16
+	nop.b 999
 }
-;;
-
-{ .mfi
-      ldfe P2_3 = [table_ptr1], 16
-      fmerge.s  Pos_r = f1, r
-      tbit.nz p8,p9 = exp_r, 0
+{ .mmi
+	nop.m 999 ;;
+(p0)  getf.exp exp_r = Pos_r
+	nop.i 999
 }
-;;
-
+//
+//    Get r's exponent
+//    Get r's significand
+//
+{ .mmi
+(p0)  ldfe P2_2 = [table_ptr1], 16 ;;
+//
+//    Get the 5 bits or r for the lookup.   1.xxxxx ....
+//    from sig_r.
+//    Grab  lsb of exp of B
+//
+(p0)  ldfe P2_3 = [table_ptr1], 16
+	nop.i 999 ;;
+}
+{ .mii
+	nop.m 999
+(p0)  andcm table_offset = 0x0001, exp_r ;;
+(p0)  shl table_offset = table_offset, 9 ;;
+}
+{ .mii
+	nop.m 999
+//
+//    Deposit   0 00000 1000000000000000000000... on
+//              1 xxxxx yyyyyyyyyyyyyyyyyyyyyy...,
+//    getting rid of the ys.
 //    Is  B = 2** -2 or  B= 2** -1? If 2**-1, then
 //    we want an offset of 512 for table addressing.
-{ .mii
-      add table_ptr2 = 1296, table_base     // Point to tanl_table_cm2
-(p9)  shladd table_offset = lookup, 4, table_offset
-(p8)  shladd table_offset = lookup, 4, r0
+//
+(p0)  shladd table_offset = lookup, 4, table_offset ;;
+//
+//    B =  ........ 1xxxxx 1000000000000000000...
+//
+(p0)  add table_ptr1 = table_ptr1, table_offset ;;
 }
-;;
-
-{ .mmi
-      add table_ptr1 = table_ptr1, table_offset  // Point to T_hi
-      add table_ptr2 = table_ptr2, table_offset  // Point to C_hi
-      add table_ptr3 = 2128, table_base     // Point to tanl_table_scim2
+{ .mmb
+	nop.m 999
+//
+//   B =  ........ 1xxxxx 1000000000000000000...
+//   Convert B so it has the same exponent as Pos_r
+//
+(p0)  ldfd T_hi = [table_ptr1], 8
+	nop.b 999 ;;
 }
-;;
 
-{ .mmi
-      ldfd T_hi = [table_ptr1], 8                // Load T_hi
-;;
-      ldfd C_hi = [table_ptr2], 8                // Load C_hi
-      add table_ptr3 = table_ptr3, table_offset  // Point to SC_inv
-}
-;;
+
 
 //
 //    x = |r| - B
+//    Load T_hi.
+//    Load C_hi.
 //
-//   Convert B so it has the same exponent as Pos_r before subtracting
-{ .mfi
-      ldfs T_lo = [table_ptr1]                   // Load T_lo
-(p9)  fnma.s1 x = B, FR_2tom64, Pos_r
-      nop.i 999
-}
-{ .mfi
-      nop.m 999
-(p8)  fnma.s1 x = B, TWO_TO_NEG65, Pos_r
-      nop.i 999
+
+{ .mmf
+(p0)  addl           table_ptr2   = @ltoff(TANL_BASE_CONSTANTS), gp
+(p0)  ldfs T_lo = [table_ptr1]
+(p0)  fmerge.se B = Pos_r, B
 }
 ;;
 
-{ .mfi
-      ldfs C_lo = [table_ptr2]                   // Load C_lo
-      nop.f 999
+
+{ .mmi
+(p0)  ld8 table_ptr2 = [table_ptr2]
+      nop.m 999
       nop.i 999
 }
 ;;
 
-{ .mfi
-      ldfe SC_inv = [table_ptr3]                 // Load SC_inv
-      fmerge.s  sgn_r = r, f1
-      tbit.z p11, p12 = N_fix_gr, 0              // p11 if N even, p12 if odd
 
+{ .mii
+(p0)  add table_ptr2 = 1360, table_ptr2
+	nop.i 999 ;;
+(p0)  add table_ptr2 = table_ptr2, table_offset ;;
 }
-;;
-
+{ .mfi
+(p0)  ldfd C_hi = [table_ptr2], 8
+(p0)  fsub.s1 x = Pos_r, B
+	nop.i 999 ;;
+}
+{ .mii
+(p0)  ldfs C_lo = [table_ptr2],255
+	nop.i 999 ;;
 //
 //    xsq = x * x
 //    N even: Tx = T_hi * x
+//    Load T_lo.
+//    Load C_lo - increment pointer to get SC_inv 
+//    - cant get all the way, do an add later.
+//
+(p0)  add table_ptr2 = 569, table_ptr2 ;;
+}
 //
 //    N even: Tx1 = Tx + 1
 //    N odd:  Cx1 = 1 - Cx
 //
-
 { .mfi
-      nop.m 999
-      fmpy.s1 xsq = x, x
-      nop.i 999
+(p0)  ldfe SC_inv = [table_ptr2], 0
+	nop.f 999
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
+(p0)  fmpy.s1 xsq = x, x
+	nop.i 999
+}
+{ .mfi
+	nop.m 999
 (p11) fmpy.s1 Tx = T_hi, x
-      nop.i 999
+	nop.i 999 ;;
 }
-;;
-
-//
-//    N odd: Cx = C_hi * x
-//
 { .mfi
-      nop.m 999
+	nop.m 999
 (p12) fmpy.s1 Cx = C_hi, x
-      nop.i 999
+	nop.i 999 ;;
 }
-;;
+{ .mfi
+	nop.m 999
 //
-//    N even and odd: P = P2_3 + P2_2 * xsq
+//    N odd: Cx = C_hi * x
 //
-{ .mfi
-      nop.m 999
-      fma.s1 P = P2_3, xsq, P2_2
-      nop.i 999
+(p0)  fma.s1 P = P2_3, xsq, P2_2
+	nop.i 999
 }
 { .mfi
-      nop.m 999
+	nop.m 999
+//
+//    N even and odd: P = P2_3 + P2_2 * xsq
+//
 (p11) fadd.s1 Tx1 = Tx, f1
-      nop.i 999 ;;
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
 //    N even: D = C_hi - tanx
 //    N odd: D = T_hi + tanx
 //
 (p11) fmpy.s1 CORR = SC_inv, T_hi
-      nop.i 999
+	nop.i 999
 }
 { .mfi
-      nop.m 999
-      fmpy.s1 Sx = SC_inv, x
-      nop.i 999 ;;
+	nop.m 999
+(p0)  fmpy.s1 Sx = SC_inv, x
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 (p12) fmpy.s1 CORR = SC_inv, C_hi
-      nop.i 999 ;;
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 (p12) fsub.s1 V_hi = f1, Cx
-      nop.i 999 ;;
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-      fma.s1 P = P, xsq, P2_1
-      nop.i 999
+	nop.m 999
+(p0)  fma.s1 P = P, xsq, P2_1
+	nop.i 999
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
 //    N even and odd: P = P2_1 + P * xsq
 //
 (p11) fma.s1 V_hi = Tx, Tx1, f1
-      nop.i 999 ;;
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
 //    N even: Result  = sgn_r * tail + T_hi (user rounding mode for C1)
 //    N odd:  Result  = sgn_r * tail + C_hi (user rounding mode for C1)
 //
-      fmpy.s0 fp_tmp = fp_tmp, fp_tmp  // Dummy mult to set inexact
-      nop.i 999 ;;
+(p0)   fmpy.s0 fp_tmp = fp_tmp, fp_tmp  // Dummy mult to set inexact
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-      fmpy.s1 CORR = CORR, c
-      nop.i 999 ;;
+	nop.m 999
+(p0)  fmpy.s1 CORR = CORR, c
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 (p12) fnma.s1 V_hi = Cx,V_hi,f1
-      nop.i 999 ;;
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
 //    N even: V_hi = Tx * Tx1 + 1
 //    N odd: Cx1 = 1 - Cx * Cx1
 //
-      fmpy.s1 P = P, xsq
-      nop.i 999
+(p0)  fmpy.s1 P = P, xsq
+	nop.i 999
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
 //    N even and odd: P = P * xsq
 //
 (p11) fmpy.s1 V_hi = V_hi, T_hi
-      nop.i 999 ;;
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
 //    N even and odd: tail = P * tail + V_lo
 //
 (p11) fmpy.s1 T_hi = sgn_r, T_hi
-      nop.i 999 ;;
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
-      fmpy.s1 CORR = CORR, sgn_r
-      nop.i 999 ;;
+	nop.m 999
+(p0)  fmpy.s1 CORR = CORR, sgn_r
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 (p12) fmpy.s1 V_hi = V_hi,C_hi
-      nop.i 999 ;;
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
 //    N even: V_hi = T_hi * V_hi
 //    N odd: V_hi  = C_hi * V_hi
 //
-      fma.s1 tanx = P, x, x
-      nop.i 999
+(p0)  fma.s1 tanx = P, x, x
+	nop.i 999
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 (p12) fnmpy.s1 C_hi = sgn_r, C_hi
-      nop.i 999 ;;
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
 //    N even: V_lo = 1 - V_hi + C_hi
 //    N odd: V_lo = 1 - V_hi + T_hi
 //
 (p11) fadd.s1 CORR = CORR, T_lo
-      nop.i 999
+	nop.i 999
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 (p12) fsub.s1 CORR = CORR, C_lo
-      nop.i 999 ;;
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
 //    N even and odd: tanx = x + x * P
 //    N even and odd: Sx = SC_inv * x
 //
 (p11) fsub.s1 D = C_hi, tanx
-      nop.i 999
+	nop.i 999
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 (p12) fadd.s1 D = T_hi, tanx
-      nop.i 999 ;;
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
 //    N odd: CORR = SC_inv * C_hi
 //    N even: CORR = SC_inv * T_hi
 //
-      fnma.s1 D = V_hi, D, f1
-      nop.i 999 ;;
+(p0)  fnma.s1 D = V_hi, D, f1
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
 //    N even and odd: D = 1 - V_hi * D
 //    N even and odd: CORR = CORR * c
 //
-      fma.s1 V_hi = V_hi, D, V_hi
-      nop.i 999 ;;
+(p0)  fma.s1 V_hi = V_hi, D, V_hi
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
 //    N even and odd: V_hi = V_hi + V_hi * D
 //    N even and odd: CORR = sgn_r * CORR
 //
 (p11) fnma.s1 V_lo = V_hi, C_hi, f1
-      nop.i 999
+	nop.i 999
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 (p12) fnma.s1 V_lo = V_hi, T_hi, f1
-      nop.i 999 ;;
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
 //    N even: CORR = COOR + T_lo
 //    N odd: CORR = CORR - C_lo
 //
 (p11) fma.s1 V_lo = tanx, V_hi, V_lo
-      tbit.nz p15, p0 = cot_flag, 0       // p15=1 if we compute cotl
+	nop.i 999
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 (p12) fnma.s1 V_lo = tanx, V_hi, V_lo
-      nop.i 999 ;;
+	nop.i 999 ;;
 }
-
 { .mfi
-      nop.m 999
-(p15) fms.s1 T_hi = f0, f0, T_hi        // to correct result's sign for cotl
-      nop.i 999
-}
-{ .mfi
-      nop.m 999
-(p15) fms.s1 C_hi = f0, f0, C_hi        // to correct result's sign for cotl
-      nop.i 999
-};;
-
-{ .mfi
-      nop.m 999
-(p15) fms.s1 sgn_r = f0, f0, sgn_r      // to correct result's sign for cotl
-      nop.i 999
-};;
-
-{ .mfi
-      nop.m 999
+	nop.m 999
 //
 //    N even: V_lo = V_lo + V_hi * tanx
 //    N odd: V_lo = V_lo - V_hi * tanx
 //
 (p11) fnma.s1 V_lo = C_lo, V_hi, V_lo
-      nop.i 999
+	nop.i 999
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 (p12) fnma.s1 V_lo = T_lo, V_hi, V_lo
-      nop.i 999 ;;
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
 //    N  even: V_lo = V_lo - V_hi * C_lo
 //    N  odd: V_lo = V_lo - V_hi * T_lo
 //
-      fmpy.s1 V_lo = V_hi, V_lo
-      nop.i 999 ;;
+(p0)  fmpy.s1 V_lo = V_hi, V_lo
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
 //    N even and odd: V_lo = V_lo * V_hi
 //
-      fadd.s1 tail = V_hi, V_lo
-      nop.i 999 ;;
+(p0)  fadd.s1 tail = V_hi, V_lo
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
 //    N even and odd: tail = V_hi + V_lo
 //
-      fma.s1 tail = tail, P, V_lo
-      nop.i 999 ;;
+(p0)  fma.s1 tail = tail, P, V_lo
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
 //    N even: T_hi = sgn_r * T_hi
 //    N odd : C_hi = -sgn_r * C_hi
 //
-      fma.s1 tail = tail, Sx, CORR
-      nop.i 999 ;;
+(p0)  fma.s1 tail = tail, Sx, CORR
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
 //    N even and odd: tail = Sx * tail + CORR
 //
-      fma.s1 tail = V_hi, Sx, tail
-      nop.i 999 ;;
+(p0)  fma.s1 tail = V_hi, Sx, tail
+	nop.i 999 ;;
 }
 { .mfi
-      nop.m 999
+	nop.m 999
 //
 //    N even an odd: tail = Sx * V_hi + tail
 //
 (p11) fma.s0 Result = sgn_r, tail, T_hi
-      nop.i 999
+	nop.i 999
 }
 { .mfb
-      nop.m 999
+	nop.m 999
 (p12) fma.s0 Result = sgn_r, tail, C_hi
-      br.ret.sptk b0 ;;                 // Exit for 1/4 <= |r| < pi/4
+(p0)   br.ret.sptk b0 ;;
 }
 
-TANL_DENORMAL:
-// Here if x denormal
+L(TANL_SPECIAL):
 { .mfb
-      getf.exp GR_signexp_x = Norm_Arg          // Get sign and exponent of x
-      nop.f 999
-      br.cond.sptk TANL_COMMON                  // Return to common code
+        nop.m 999
+(p0)   fmpy.s0 Arg = Arg, f0
+(p0)   br.ret.sptk b0 ;;
 }
-;;
-
-
-TANL_SPECIAL:
-TANL_UNSUPPORTED:
 //
 //     Code for NaNs, Unsupporteds, Infs, or +/- zero ?
 //     Invalid raised for Infs and SNaNs.
 //
 
-{ .mfi
-      nop.m 999
-      fmerge.s  f10 = f8, f8            // Save input for error call
-      tbit.nz p6, p7 = cot_flag, 0      // p6=1 if we compute cotl
-}
-;;
-
-{ .mfi
-      nop.m 999
-(p6)  fclass.m p6, p7 = f8, 0x7         // Test for zero (cotl only)
-      nop.i 999
-}
-;;
+.endp  tanl
+ASM_SIZE_DIRECTIVE(tanl)
 
-.pred.rel "mutex", p6, p7
-{ .mfi
-(p6)  mov GR_Parameter_Tag = 225        // (cotl)
-(p6)  frcpa.s0  f8, p0 = f1, f8         // cotl(+-0) = +-Inf
-      nop.i 999
-}
-{ .mfb
-      nop.m 999
-(p7)  fmpy.s0 f8 = f8, f0
-(p7)  br.ret.sptk b0
-}
-;;
-
-GLOBAL_IEEE754_END(tanl)
+// *******************************************************************
+// *******************************************************************
+// *******************************************************************
+//
+//     Special Code to handle very large argument case.
+//     Call int pi_by_2_reduce(&x,&r,&c)
+//     for |arguments| >= 2**63
+//     (Arg or x) is in f8
+//     Address to save r and c as double
+// *******************************************************************
+// *******************************************************************
+// *******************************************************************
 
-LOCAL_LIBM_ENTRY(__libm_error_region)
+.proc __libm_callout
+__libm_callout:
+L(TANL_ARG_TOO_LARGE): 
 .prologue
-
-// (1)
 { .mfi
-      add           GR_Parameter_Y=-32,sp        // Parameter 2 value
-      nop.f         0
+        add   r50=-32,sp                        // Parameter: r address
+        nop.f 0
 .save   ar.pfs,GR_SAVE_PFS
-      mov           GR_SAVE_PFS=ar.pfs           // Save ar.pfs
+        mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
 }
 { .mfi
 .fframe 64
-      add sp=-64,sp                              // Create new stack
-      nop.f 0
-      mov GR_SAVE_GP=gp                          // Save gp
+        add sp=-64,sp                           // Create new stack
+        nop.f 0
+        mov GR_SAVE_GP=gp                       // Save gp
 };;
-
-// (2)
 { .mmi
-      stfe [GR_Parameter_Y] = f1,16              // STORE Parameter 2 on stack
-      add GR_Parameter_X = 16,sp                 // Parameter 1 address
+        stfe [r50] = f0,16                      // Clear Parameter r on stack
+        add  r49 = 16,sp                        // Parameter x address
 .save   b0, GR_SAVE_B0
-      mov GR_SAVE_B0=b0                          // Save b0
+        mov GR_SAVE_B0=b0                       // Save b0
 };;
-
 .body
-// (3)
 { .mib
-      stfe [GR_Parameter_X] = f10                // STORE Parameter 1 on stack
-      add   GR_Parameter_RESULT = 0,GR_Parameter_Y  // Parameter 3 address
-      nop.b 0
+        stfe [r50] = f0,-16                     // Clear Parameter c on stack
+        nop.i 0
+        nop.b 0
 }
 { .mib
-      stfe [GR_Parameter_Y] = f8                 // STORE Parameter 3 on stack
-      add   GR_Parameter_Y = -16,GR_Parameter_Y
-      br.call.sptk b0=__libm_error_support#      // Call error handling function
-};;
-{ .mmi
-      nop.m 0
-      nop.m 0
-      add   GR_Parameter_RESULT = 48,sp
-};;
-
-// (4)
-{ .mmi
-      ldfe  f8 = [GR_Parameter_RESULT]           // Get return result off stack
-.restore sp
-      add   sp = 64,sp                           // Restore stack pointer
-      mov   b0 = GR_SAVE_B0                      // Restore return address
+        stfe [r49] = Arg                        // Store Parameter x on stack
+        nop.i 0
+(p0)    br.call.sptk b0=__libm_pi_by_2_reduce# ;;
 };;
-{ .mib
-      mov   gp = GR_SAVE_GP                      // Restore gp
-      mov   ar.pfs = GR_SAVE_PFS                 // Restore ar.pfs
-      br.ret.sptk     b0                         // Return
-};;
-
-LOCAL_LIBM_END(__libm_error_region)
-
-.type   __libm_error_support#,@function
-.global __libm_error_support#
-
-
-// *******************************************************************
-// *******************************************************************
-// *******************************************************************
 //
-//     Special Code to handle very large argument case.
-//     Call int __libm_pi_by_2_reduce(x,r,c) for |arguments| >= 2**63
-//     The interface is custom:
-//       On input:
-//         (Arg or x) is in f8
-//       On output:
-//         r is in f8
-//         c is in f9
-//         N is in r8
-//     We know also that __libm_pi_by_2_reduce preserves f10-15, f71-127.  We
-//     use this to eliminate save/restore of key fp registers in this calling
-//     function.
+//     Load 2^-2
 //
-// *******************************************************************
-// *******************************************************************
-// *******************************************************************
-
-LOCAL_LIBM_ENTRY(__libm_callout)
-TANL_ARG_TOO_LARGE:
-.prologue
-{ .mfi
-      add table_ptr2 = 144, table_base        // Point to 2^-2
-      nop.f 999
-.save   ar.pfs,GR_SAVE_PFS
-      mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
-}
-;;
-
-//     Load 2^-2, -2^-2
 { .mmi
-      ldfps  TWO_TO_NEG2, NEGTWO_TO_NEG2 = [table_ptr2]
-      setf.sig B_mask1 = bmask1               // Form mask to get 5 msb of r
-.save   b0, GR_SAVE_B0
-      mov GR_SAVE_B0=b0                       // Save b0
-};;
-
-.body
+(p0)   ldfe  Arg =[r49],16   
 //
-//     Call argument reduction with x in f8
-//     Returns with N in r8, r in f8, c in f9
-//     Assumes f71-127 are preserved across the call
+//     Call argument reduction
 //
-{ .mib
-      setf.sig B_mask2 = bmask2               // Form mask to form B from r
-      mov GR_SAVE_GP=gp                       // Save gp
-      br.call.sptk b0=__libm_pi_by_2_reduce#
+(p0)   ldfs  TWO_TO_NEG2 = [table_ptr2],4
+//     Get Arg off stack
+//     Get r off stack - hi order part
+//     Get c off stack - lo order part
+(p0)   mov   N_fix_gr = r8 ;;
 }
-;;
-
-//
-//     Is |r| < 2**(-2)
-//
-{ .mfi
-      getf.sig sig_r = r                     // Extract significand of r
-      fcmp.lt.s1  p6, p0 = r, TWO_TO_NEG2
-      mov   gp = GR_SAVE_GP                  // Restore gp
+{ .mmb
+(p0)   ldfe  r =[r50],16  
+(p0)   ldfs  NEGTWO_TO_NEG2 = [table_ptr2],4
+	nop.b 999 ;;
 }
-;;
-
 { .mfi
-      getf.exp exp_r = r                     // Extract signexp of r
-      nop.f 999
-      mov    b0 = GR_SAVE_B0                 // Restore return address
+(p0)   ldfe  c =[r50],-32  
+	nop.f 999
+	nop.i 999 ;;
 }
-;;
-
+{ .mfi
+.restore sp
+       add   sp = 64,sp                       // Restore stack pointer
 //
-//     Get N_fix_gr
+//     Is |r| < 2**(-2)
 //
+(p0)   fcmp.lt.unc.s1  p6, p0 = r, TWO_TO_NEG2 
+mov    b0 = GR_SAVE_B0                        // Restore return address
+};;
 { .mfi
-      mov   N_fix_gr = r8
-(p6)  fcmp.gt.unc.s1  p6, p0 = r, NEGTWO_TO_NEG2
-      mov   ar.pfs = GR_SAVE_PFS             // Restore pfs
-}
-;;
-
+       mov   gp = GR_SAVE_GP                  // Restore gp
+(p6)   fcmp.gt.unc.s1  p6, p0 = r, NEGTWO_TO_NEG2 
+       mov   ar.pfs = GR_SAVE_PFS             // Restore gp
+};;
 { .mbb
-      nop.m 999
-(p6)  br.cond.spnt TANL_SMALL_R              // Branch if |r| < 1/4
-      br.cond.sptk TANL_NORMAL_R             // Branch if 1/4 <= |r| < pi/4
+	nop.m 999
+(p6)   br.cond.spnt L(TANL_SMALL_R)
+(p0)   br.cond.sptk L(TANL_NORMAL_R) ;;
 }
-;;
 
-LOCAL_LIBM_END(__libm_callout)
+.endp __libm_callout
+ASM_SIZE_DIRECTIVE(__libm_callout)
 
 .type __libm_pi_by_2_reduce#,@function
 .global __libm_pi_by_2_reduce#
diff --git a/sysdeps/ia64/fpu/s_trunc.S b/sysdeps/ia64/fpu/s_trunc.S
index b9ad03b5a8..0be91200e3 100644
--- a/sysdeps/ia64/fpu/s_trunc.S
+++ b/sysdeps/ia64/fpu/s_trunc.S
@@ -1,10 +1,11 @@
 .file "trunc.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 7/7/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
+// Bob Norin, Shane Story, and Ping Tak Peter Tang of the
+// Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,28 +21,33 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
 // Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
 //
+.align 32
+.global trunc#
+
+.section .text
+.proc  trunc#
+.align 32
+
 // History
 //==============================================================
-// 07/07/00 Created
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 01/20/03 Improved performance and reduced code size
+// 7/7/00: Created 
 //==============================================================
 
 // API
@@ -49,28 +55,25 @@
 // double trunc(double x)
 //==============================================================
 
-// general input registers:
-// r14 - r18
+#include "libm_support.h"
 
-rExpBias   = r14
-rSignexp   = r15
-rExp       = r16
-rExpMask   = r17
-rBigexp    = r18
+// general input registers:  
+TRUNC_GR_FFFF      = r14
+TRUNC_GR_signexp   = r15
+TRUNC_GR_exponent  = r16
+TRUNC_GR_expmask   = r17
+TRUNC_GR_bigexp    = r18
 
 // floating-point registers:
-// f8 - f10
-
-fXtruncInt = f9
-fNormX     = f10
+// f8, f9, f11, f12
 
-// predicate registers used:
-// p6, p7
+// predicate registers used: 
+// p6, p7, p8, p9, p10, p11
 
 // Overview of operation
 //==============================================================
 // double trunc(double x)
-// Return an integer value (represented as a double) less than or
+// Return an integer value (represented as a double) less than or 
 // equal to x in magnitude.
 // This is x rounded toward zero to an integral value.
 //==============================================================
@@ -94,73 +97,105 @@ fNormX     = f10
 // If we multiply by 2^23, we no longer have a fractional part
 // So input is an integer value already.
 
-.section .text
-GLOBAL_LIBM_ENTRY(trunc)
+trunc:
 
 { .mfi
-      getf.exp         rSignexp  = f8        // Get signexp, recompute if unorm
-      fcvt.fx.trunc.s1 fXtruncInt  = f8      // Convert to int in significand
-      addl             rBigexp = 0x10033, r0 // Set exponent at which is integer
+      getf.exp         TRUNC_GR_signexp  = f8
+      fcvt.fx.trunc.s1 f9  = f8
+      addl             TRUNC_GR_bigexp = 0x10033, r0
 }
 { .mfi
-      mov              rExpBias    = 0x0FFFF // Form exponent bias
-      fnorm.s1         fNormX  = f8          // Normalize input
-      mov              rExpMask    = 0x1FFFF // Form exponent mask
-}
-;;
+      mov              TRUNC_GR_FFFF    = 0x0FFFF
+      fnorm.d          f11  = f8
+      mov              TRUNC_GR_expmask    = 0x1FFFF 
+};;
+//    get the exponent of x
+//    convert x to integer in signficand of f9 
+//    Normalize x - this will raise invalid on SNaNs, the
+//    denormal operand flag - and possibly a spurious U flag
+//    get exponent only mask (will exclude sign bit)
 
 { .mfi
       nop.m            0
-      fclass.m         p7,p0 = f8, 0x0b      // Test x unorm
+      fclass.m         p7,p8 = f8, 0x0b  
       nop.i            0
 }
-;;
-
-{ .mfb
+{ .mfi
       nop.m            0
-      fclass.m         p6,p0 = f8, 0x1e3     // Test x natval, nan, inf
-(p7)  br.cond.spnt     TRUNC_UNORM           // Branch if x unorm
-}
-;;
-
-TRUNC_COMMON:
-// Return here from TRUNC_UNORM
+      fcmp.eq.unc.s1   p9,p0 = f8,f0
+      nop.i      0       
+};;
+//    fclass to set p7 if unnorm 
+{ .mmi
+      and              TRUNC_GR_exponent = TRUNC_GR_signexp, TRUNC_GR_expmask ;;
+(p8)  cmp.ge.unc       p10,p11 = TRUNC_GR_exponent, TRUNC_GR_bigexp
+(p8)  cmp.ne.unc       p6,p0 = TRUNC_GR_exponent, TRUNC_GR_signexp
+};;
+//    Get the exponent of x
+//    Test if exponent such that result already an integer
+//    Test if x < 0
+{ .mmi
+(p9)  cmp.eq.andcm     p10,p11 = r0, r0 
+(p6)  cmp.lt.unc       p6,p0 = TRUNC_GR_exponent, TRUNC_GR_FFFF
+      nop.i 0 
+};;
+//    If -1 < x < 0, set p6, turn off p10 and p11, and set result to -0.0
+{ .mfb
+(p6)  cmp.eq.andcm     p10,p11 = r0, r0 
+(p6)  fmerge.s         f8 = f8, f0
+      nop.b 0
+};;
+//    If not a unnorm, set p10 if x already is a big int, nan, or inf?
+//    If not a unnorm, set p10 if x already is a big int, nan, or inf?
+.pred.rel "mutex",p10,p11    
 { .mfb
-      and              rExp = rSignexp, rExpMask // Get biased exponent
-(p6)  fma.d.s0         f8 = f8, f1, f0       // Result if x natval, nan, inf
-(p6)  br.ret.spnt      b0                    // Exit if x natval, nan, inf
-}
-;;
-
-{ .mfi
-      cmp.lt           p6,p0 = rExp, rExpBias // Is |x| < 1?
-      fcvt.xf          f8 = fXtruncInt        // Result, assume 1 <= |x| < 2^52
-      cmp.ge           p7,p0 = rExp, rBigexp  // Is |x| >= 2^52?
-}
-;;
-
-// We must correct result if |x| < 1, or |x| >= 2^52
-.pred.rel "mutex",p6,p7
-{ .mfi
       nop.m            0
-(p6)  fmerge.s         f8 = fNormX, f0        // If |x| < 1, result sgn(x)*0
-      nop.i            0
+(p11) fcvt.xf          f8   = f9 
+      nop.b            0 
 }
 { .mfb
       nop.m            0
-(p7)  fma.d.s0         f8 = fNormX, f1, f0    // If |x| >= 2^52, result x
-      br.ret.sptk      b0                     // Exit main path
-}
-;;
-
-
-TRUNC_UNORM:
-// Here if x unorm
+(p10) fma.d.s1         f8   = f11,f1,f0 
+(p8)  br.ret.sptk      b0 
+};;
+//    If not a unnorm and not an big int, nan,or +/-inf convert signficand
+//    back to f8.
+//    If not a unorm  and a big int, nan, or +/-inf, return fnorm'd x 
+//    If not a unorm, Return
+//    If unnorm, get the exponent again - perhaps it wasn't a denorm. 
+{ .mfb
+(p7)  getf.exp         TRUNC_GR_signexp  = f11
+(p7)  fcvt.fx.trunc.s1 f12   = f11 
+      nop.b 0
+};;
+{ .mfb
+      and              TRUNC_GR_exponent = TRUNC_GR_signexp, TRUNC_GR_expmask
+      fcmp.lt.unc.s1   p9,p0 = f8,f0
+      nop.b            0
+};;
 { .mfb
-      getf.exp         rSignexp  = fNormX     // Get signexp, recompute if unorm
-      fcmp.eq.s0       p7,p0 = f8, f0         // Dummy op to set denormal flag
-      br.cond.sptk     TRUNC_COMMON           // Return to main path
+      cmp.ge.unc       p10,p11 = TRUNC_GR_exponent, TRUNC_GR_bigexp
+      nop.f            0
+      nop.b            0
+};;
+// If a unnorm, check to see if value is already a big int. 
+{ .mfb
+      nop.m 0
+(p11) fcvt.xf          f8   = f12 
+      nop.b 0
 }
-;;
-
-GLOBAL_LIBM_END(trunc)
+{ .mfi
+      nop.m 0
+(p10) fma.d.s1         f8   = f11,f1,f0 
+      nop.i 0
+};;
+{ .mfb
+      nop.m 0
+(p9)  fmerge.ns        f8   = f1,f8 
+      br.ret.sptk      b0 
+};;
+// If so return it. Otherwise, return (fcvt.xf(fcvt.fx.trunc(x)))
+// Make sure the result is negative if it should be - that is
+// negative(denormal) -> -0.
+.endp trunc
+ASM_SIZE_DIRECTIVE(trunc)
diff --git a/sysdeps/ia64/fpu/s_truncf.S b/sysdeps/ia64/fpu/s_truncf.S
index ff40bc7101..0ac4181209 100644
--- a/sysdeps/ia64/fpu/s_truncf.S
+++ b/sysdeps/ia64/fpu/s_truncf.S
@@ -1,10 +1,11 @@
 .file "truncf.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 7/7/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
+// Bob Norin, Shane Story, and Ping Tak Peter Tang of the
+// Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,28 +21,33 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
 // Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
 //
+.align 32
+.global truncf#
+
+.section .text
+.proc  truncf#
+.align 32
+
 // History
 //==============================================================
-// 07/07/00 Created
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 01/20/03 Improved performance and reduced code size
+// 7/7/00: Created 
 //==============================================================
 
 // API
@@ -49,28 +55,25 @@
 // float truncf(float x)
 //==============================================================
 
-// general input registers:
-// r14 - r18
+#include "libm_support.h"
 
-rExpBias   = r14
-rSignexp   = r15
-rExp       = r16
-rExpMask   = r17
-rBigexp    = r18
+// general input registers:  
+TRUNC_GR_FFFF      = r14
+TRUNC_GR_signexp   = r15
+TRUNC_GR_exponent  = r16
+TRUNC_GR_expmask   = r17
+TRUNC_GR_bigexp    = r18
 
 // floating-point registers:
-// f8 - f10
-
-fXtruncInt = f9
-fNormX     = f10
+// f8, f9, f11, f12
 
-// predicate registers used:
-// p6, p7
+// predicate registers used: 
+// p6, p7, p8, p9, p10, p11
 
 // Overview of operation
 //==============================================================
 // float truncf(float x)
-// Return an integer value (represented as a float) less than or
+// Return an integer value (represented as a float) less than or 
 // equal to x in magnitude.
 // This is x rounded toward zero to an integral value.
 //==============================================================
@@ -94,73 +97,105 @@ fNormX     = f10
 // If we multiply by 2^23, we no longer have a fractional part
 // So input is an integer value already.
 
-.section .text
-GLOBAL_LIBM_ENTRY(truncf)
+truncf:
 
 { .mfi
-      getf.exp         rSignexp  = f8        // Get signexp, recompute if unorm
-      fcvt.fx.trunc.s1 fXtruncInt  = f8      // Convert to int in significand
-      addl             rBigexp = 0x10016, r0 // Set exponent at which is integer
+      getf.exp         TRUNC_GR_signexp  = f8
+      fcvt.fx.trunc.s1 f9  = f8
+      addl             TRUNC_GR_bigexp = 0x10016, r0
 }
 { .mfi
-      mov              rExpBias    = 0x0FFFF // Form exponent bias
-      fnorm.s1         fNormX  = f8          // Normalize input
-      mov              rExpMask    = 0x1FFFF // Form exponent mask
-}
-;;
+      mov              TRUNC_GR_FFFF    = 0x0FFFF
+      fnorm.s          f11  = f8
+      mov              TRUNC_GR_expmask    = 0x1FFFF 
+};;
+//    get the exponent of x
+//    convert x to integer in signficand of f9 
+//    Normalize x - this will raise invalid on SNaNs, the
+//    denormal operand flag - and possibly a spurious U flag
+//    get exponent only mask (will exclude sign bit)
 
 { .mfi
       nop.m            0
-      fclass.m         p7,p0 = f8, 0x0b      // Test x unorm
+      fclass.m         p7,p8 = f8, 0x0b  
       nop.i            0
 }
-;;
-
-{ .mfb
+{ .mfi
       nop.m            0
-      fclass.m         p6,p0 = f8, 0x1e3     // Test x natval, nan, inf
-(p7)  br.cond.spnt     TRUNC_UNORM           // Branch if x unorm
-}
-;;
-
-TRUNC_COMMON:
-// Return here from TRUNC_UNORM
+      fcmp.eq.unc.s1   p9,p0 = f8,f0
+      nop.i      0       
+};;
+//    fclass to set p7 if unnorm 
+{ .mmi
+      and              TRUNC_GR_exponent = TRUNC_GR_signexp, TRUNC_GR_expmask ;;
+(p8)  cmp.ge.unc       p10,p11 = TRUNC_GR_exponent, TRUNC_GR_bigexp
+(p8)  cmp.ne.unc       p6,p0 = TRUNC_GR_exponent, TRUNC_GR_signexp
+};;
+//    Get the exponent of x
+//    Test if exponent such that result already an integer
+//    Test if x < 0
+{ .mmi
+(p9)  cmp.eq.andcm     p10,p11 = r0, r0 
+(p6)  cmp.lt.unc       p6,p0 = TRUNC_GR_exponent, TRUNC_GR_FFFF
+      nop.i 0 
+};;
+//    If -1 < x < 0, set p6, turn off p10 and p11, and set result to -0.0
+{ .mfb
+(p6)  cmp.eq.andcm     p10,p11 = r0, r0 
+(p6)  fmerge.s         f8 = f8, f0
+      nop.b 0
+};;
+//    If not a unnorm, set p10 if x already is a big int, nan, or inf?
+//    If not a unnorm, set p10 if x already is a big int, nan, or inf?
+.pred.rel "mutex",p10,p11    
 { .mfb
-      and              rExp = rSignexp, rExpMask // Get biased exponent
-(p6)  fma.s.s0         f8 = f8, f1, f0       // Result if x natval, nan, inf
-(p6)  br.ret.spnt      b0                    // Exit if x natval, nan, inf
-}
-;;
-
-{ .mfi
-      cmp.lt           p6,p0 = rExp, rExpBias // Is |x| < 1?
-      fcvt.xf          f8 = fXtruncInt        // Result, assume 1 <= |x| < 2^23
-      cmp.ge           p7,p0 = rExp, rBigexp  // Is |x| >= 2^23?
-}
-;;
-
-// We must correct result if |x| < 1, or |x| >= 2^23
-.pred.rel "mutex",p6,p7
-{ .mfi
       nop.m            0
-(p6)  fmerge.s         f8 = fNormX, f0        // If |x| < 1, result sgn(x)*0
-      nop.i            0
+(p11) fcvt.xf          f8   = f9 
+      nop.b            0 
 }
 { .mfb
       nop.m            0
-(p7)  fma.s.s0         f8 = fNormX, f1, f0    // If |x| >= 2^23, result x
-      br.ret.sptk      b0                     // Exit main path
-}
-;;
-
-
-TRUNC_UNORM:
-// Here if x unorm
+(p10) fma.s.s1         f8   = f11,f1,f0 
+(p8)  br.ret.sptk      b0 
+};;
+//    If not a unnorm and not an big int, nan,or +/-inf convert signficand
+//    back to f8.
+//    If not a unorm  and a big int, nan, or +/-inf, return fnorm'd x 
+//    If not a unorm, Return
+//    If unnorm, get the exponent again - perhaps it wasn't a denorm. 
+{ .mfb
+(p7)  getf.exp         TRUNC_GR_signexp  = f11
+(p7)  fcvt.fx.trunc.s1 f12   = f11 
+      nop.b 0
+};;
+{ .mfb
+      and              TRUNC_GR_exponent = TRUNC_GR_signexp, TRUNC_GR_expmask
+      fcmp.lt.unc.s1   p9,p0 = f8,f0
+      nop.b            0
+};;
 { .mfb
-      getf.exp         rSignexp  = fNormX     // Get signexp, recompute if unorm
-      fcmp.eq.s0       p7,p0 = f8, f0         // Dummy op to set denormal flag
-      br.cond.sptk     TRUNC_COMMON           // Return to main path
+      cmp.ge.unc       p10,p11 = TRUNC_GR_exponent, TRUNC_GR_bigexp
+      nop.f            0
+      nop.b            0
+};;
+// If a unnorm, check to see if value is already a big int. 
+{ .mfb
+      nop.m 0
+(p11) fcvt.xf          f8   = f12 
+      nop.b 0
 }
-;;
-
-GLOBAL_LIBM_END(truncf)
+{ .mfi
+      nop.m 0
+(p10) fma.s.s1         f8   = f11,f1,f0 
+      nop.i 0
+};;
+{ .mfb
+      nop.m 0
+(p9)  fmerge.ns        f8   = f1,f8 
+      br.ret.sptk      b0 
+};;
+// If so return it. Otherwise, return (fcvt.xf(fcvt.fx.trunc(x)))
+// Make sure the result is negative if it should be - that is
+// negative(denormal) -> -0.
+.endp truncf
+ASM_SIZE_DIRECTIVE(truncf)
diff --git a/sysdeps/ia64/fpu/s_truncl.S b/sysdeps/ia64/fpu/s_truncl.S
index 1afa19ba2b..91bf96ce90 100644
--- a/sysdeps/ia64/fpu/s_truncl.S
+++ b/sysdeps/ia64/fpu/s_truncl.S
@@ -1,10 +1,11 @@
 .file "truncl.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 7/7/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
+// Bob Norin, Shane Story, and Ping Tak Peter Tang of the
+// Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,57 +21,59 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
 // Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
 //
+.align 32
+.global truncl#
+
+.section .text
+.proc  truncl#
+.align 32
+
 // History
 //==============================================================
-// 07/07/00 Created
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 01/20/03 Improved performance and reduced code size
+// 7/7/00: Created 
 //==============================================================
 
 // API
 //==============================================================
-// long double truncl(long double x)
+// long double truncl(float x)
 //==============================================================
 
-// general input registers:
-// r14 - r18
+#include "libm_support.h"
 
-rExpBias   = r14
-rSignexp   = r15
-rExp       = r16
-rExpMask   = r17
-rBigexp    = r18
+// general input registers:  
+TRUNC_GR_FFFF      = r14
+TRUNC_GR_signexp   = r15
+TRUNC_GR_exponent  = r16
+TRUNC_GR_expmask   = r17
+TRUNC_GR_bigexp    = r18
 
 // floating-point registers:
-// f8 - f10
+// f8, f9, f11, f12
 
-fXtruncInt = f9
-fNormX     = f10
-
-// predicate registers used:
-// p6, p7
+// predicate registers used: 
+// p6, p7, p8, p9, p10, p11
 
 // Overview of operation
 //==============================================================
 // long double truncl(long double x)
-// Return an integer value (represented as a long double) less than or
+// Return an integer value (represented as a long double) less than or 
 // equal to x in magnitude.
 // This is x rounded toward zero to an integral value.
 //==============================================================
@@ -94,73 +97,105 @@ fNormX     = f10
 // If we multiply by 2^23, we no longer have a fractional part
 // So input is an integer value already.
 
-.section .text
-GLOBAL_LIBM_ENTRY(truncl)
+truncl:
 
 { .mfi
-      getf.exp         rSignexp  = f8        // Get signexp, recompute if unorm
-      fcvt.fx.trunc.s1 fXtruncInt  = f8      // Convert to int in significand
-      addl             rBigexp = 0x1003e, r0 // Set exponent at which is integer
+      getf.exp         TRUNC_GR_signexp  = f8
+      fcvt.fx.trunc.s1 f9  = f8
+      addl             TRUNC_GR_bigexp = 0x1003e, r0
 }
 { .mfi
-      mov              rExpBias    = 0x0FFFF // Form exponent bias
-      fnorm.s1         fNormX  = f8          // Normalize input
-      mov              rExpMask    = 0x1FFFF // Form exponent mask
-}
-;;
+      mov              TRUNC_GR_FFFF    = 0x0FFFF
+      fnorm            f11  = f8
+      mov              TRUNC_GR_expmask    = 0x1FFFF 
+};;
+//    get the exponent of x
+//    convert x to integer in signficand of f9 
+//    Normalize x - this will raise invalid on SNaNs, the
+//    denormal operand flag - and possibly a spurious U flag
+//    get exponent only mask (will exclude sign bit)
 
 { .mfi
       nop.m            0
-      fclass.m         p7,p0 = f8, 0x0b      // Test x unorm
+      fclass.m         p7,p8 = f8, 0x0b  
       nop.i            0
 }
-;;
-
-{ .mfb
+{ .mfi
       nop.m            0
-      fclass.m         p6,p0 = f8, 0x1e3     // Test x natval, nan, inf
-(p7)  br.cond.spnt     TRUNC_UNORM           // Branch if x unorm
-}
-;;
-
-TRUNC_COMMON:
-// Return here from TRUNC_UNORM
+      fcmp.eq.unc.s1   p9,p0 = f8,f0
+      nop.i      0       
+};;
+//    fclass to set p7 if unnorm 
+{ .mmi
+      and              TRUNC_GR_exponent = TRUNC_GR_signexp, TRUNC_GR_expmask ;;
+(p8)  cmp.ge.unc       p10,p11 = TRUNC_GR_exponent, TRUNC_GR_bigexp
+(p8)  cmp.ne.unc       p6,p0 = TRUNC_GR_exponent, TRUNC_GR_signexp
+};;
+//    Get the exponent of x
+//    Test if exponent such that result already an integer
+//    Test if x < 0
+{ .mmi
+(p9)  cmp.eq.andcm     p10,p11 = r0, r0 
+(p6)  cmp.lt.unc       p6,p0 = TRUNC_GR_exponent, TRUNC_GR_FFFF
+      nop.i 0 
+};;
+//    If -1 < x < 0, set p6, turn off p10 and p11, and set result to -0.0
+{ .mfb
+(p6)  cmp.eq.andcm     p10,p11 = r0, r0 
+(p6)  fmerge.s         f8 = f8, f0
+      nop.b 0
+};;
+//    If not a unnorm, set p10 if x already is a big int, nan, or inf?
+//    If not a unnorm, set p10 if x already is a big int, nan, or inf?
+.pred.rel "mutex",p10,p11    
 { .mfb
-      and              rExp = rSignexp, rExpMask // Get biased exponent
-(p6)  fma.s0           f8 = f8, f1, f0       // Result if x natval, nan, inf
-(p6)  br.ret.spnt      b0                    // Exit if x natval, nan, inf
-}
-;;
-
-{ .mfi
-      cmp.lt           p6,p0 = rExp, rExpBias // Is |x| < 1?
-      fcvt.xf          f8 = fXtruncInt        // Result, assume 1 <= |x| < 2^63
-      cmp.ge           p7,p0 = rExp, rBigexp  // Is |x| >= 2^63?
-}
-;;
-
-// We must correct result if |x| < 1, or |x| >= 2^63
-.pred.rel "mutex",p6,p7
-{ .mfi
       nop.m            0
-(p6)  fmerge.s         f8 = fNormX, f0        // If |x| < 1, result sgn(x)*0
-      nop.i            0
+(p11) fcvt.xf          f8   = f9 
+      nop.b            0 
 }
 { .mfb
       nop.m            0
-(p7)  fma.s0           f8 = fNormX, f1, f0    // If |x| >= 2^63, result x
-      br.ret.sptk      b0                     // Exit main path
-}
-;;
-
-
-TRUNC_UNORM:
-// Here if x unorm
+(p10) fma.s1           f8   = f11,f1,f0 
+(p8)  br.ret.sptk      b0 
+};;
+//    If not a unnorm and not an big int, nan,or +/-inf convert signficand
+//    back to f8.
+//    If not a unorm  and a big int, nan, or +/-inf, return fnorm'd x 
+//    If not a unorm, Return
+//    If unnorm, get the exponent again - perhaps it wasn't a denorm. 
+{ .mfb
+(p7)  getf.exp         TRUNC_GR_signexp  = f11
+(p7)  fcvt.fx.trunc.s1 f12   = f11 
+      nop.b 0
+};;
+{ .mfb
+      and              TRUNC_GR_exponent = TRUNC_GR_signexp, TRUNC_GR_expmask
+      fcmp.lt.unc.s1   p9,p0 = f8,f0
+      nop.b            0
+};;
 { .mfb
-      getf.exp         rSignexp  = fNormX     // Get signexp, recompute if unorm
-      fcmp.eq.s0       p7,p0 = f8, f0         // Dummy op to set denormal flag
-      br.cond.sptk     TRUNC_COMMON           // Return to main path
+      cmp.ge.unc       p10,p11 = TRUNC_GR_exponent, TRUNC_GR_bigexp
+      nop.f            0
+      nop.b            0
+};;
+// If a unnorm, check to see if value is already a big int. 
+{ .mfb
+      nop.m 0
+(p11) fcvt.xf          f8   = f12 
+      nop.b 0
 }
-;;
-
-GLOBAL_LIBM_END(truncl)
+{ .mfi
+      nop.m 0
+(p10) fma.s1           f8   = f11,f1,f0 
+      nop.i 0
+};;
+{ .mfb
+      nop.m 0
+(p9)  fmerge.ns        f8   = f1,f8 
+      br.ret.sptk      b0 
+};;
+// If so return it. Otherwise, return (fcvt.xf(fcvt.fx.trunc(x)))
+// Make sure the result is negative if it should be - that is
+// negative(denormal) -> -0.
+.endp truncl
+ASM_SIZE_DIRECTIVE(truncl)
diff --git a/sysdeps/ia64/libc-tls.c b/sysdeps/ia64/libc-tls.c
index 3a6b8e5532..f1df4cd12e 100644
--- a/sysdeps/ia64/libc-tls.c
+++ b/sysdeps/ia64/libc-tls.c
@@ -30,7 +30,7 @@ void *
 __tls_get_addr (size_t m, size_t offset)
 {
   dtv_t *dtv = THREAD_DTV ();
-  return (char *) dtv[1].pointer.val + offset;
+  return (char *) dtv[1].pointer + offset;
 }
 
 #endif
diff --git a/sysdeps/linkmap.h b/sysdeps/linkmap.h
deleted file mode 100644
index 470b4d3e5f..0000000000
--- a/sysdeps/linkmap.h
+++ /dev/null
@@ -1,4 +0,0 @@
-struct link_map_machine
-  {
-    /* empty by default */
-  };
diff --git a/sysdeps/m68k/bits/link.h b/sysdeps/m68k/bits/link.h
deleted file mode 100644
index 9d0a94592f..0000000000
--- a/sysdeps/m68k/bits/link.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/* Copyright (C) 2005 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef	_LINK_H
-# error "Never include <bits/link.h> directly; use <link.h> instead."
-#endif
-
-
-/* Registers for entry into PLT on M68K.  */
-typedef struct La_m68k_regs
-{
-  uint32_t lr_a0;
-  uint32_t lr_a1;
-  uint32_t lr_sp;
-} La_m68k_regs;
-
-/* Return values for calls from PLT on M68K.  */
-typedef struct La_m68k_retval
-{
-  uint32_t lrv_d0;
-  uint32_t lrv_d1;
-  uint32_t lrv_a0;
-  long double lrv_fp0;
-} La_m68k_retval;
-
-
-__BEGIN_DECLS
-
-extern Elf32_Addr la_m68k_gnu_pltenter (Elf32_Sym *__sym, unsigned int __ndx,
-					uintptr_t *__refcook,
-					uintptr_t *__defcook,
-					La_m68k_regs *__regs,
-					unsigned int *__flags,
-					const char *__symname,
-					long int *__framesizep);
-extern unsigned int la_m68k_gnu_pltexit (Elf32_Sym *__sym, unsigned int __ndx,
-					 uintptr_t *__refcook,
-					 uintptr_t *__defcook,
-					 const La_m68k_regs *__inregs,
-					 La_m68k_retval *__outregs,
-					 const char *symname);
-
-__END_DECLS
diff --git a/sysdeps/m68k/dl-machine.h b/sysdeps/m68k/dl-machine.h
index f31b68772d..146c5866a9 100644
--- a/sysdeps/m68k/dl-machine.h
+++ b/sysdeps/m68k/dl-machine.h
@@ -1,5 +1,5 @@
 /* Machine-dependent ELF dynamic relocation inline functions.  m68k version.
-   Copyright (C) 1996-2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+   Copyright (C) 1996-2001, 2002, 2003, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -85,8 +85,7 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
 	{
 	  got[2] = (Elf32_Addr) &_dl_runtime_profile;
 
-	  if (GLRO(dl_profile) != NULL
-	      && _dl_name_match_p (GLRO(dl_profile), l))
+	  if (_dl_name_match_p (GLRO(dl_profile), l))
 	    {
 	      /* This is the object we are looking for.  Say that we really
 		 want profiling and the timers are started.  */
@@ -102,6 +101,36 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
   return lazy;
 }
 
+/* This code is used in dl-runtime.c to call the `fixup' function
+   and then redirect to the address it returns.  */
+#define TRAMPOLINE_TEMPLATE(tramp_name, fixup_name) \
+"| Trampoline for " #fixup_name "\n\
+	.globl " #tramp_name "\n\
+	.type " #tramp_name ", @function\n\
+" #tramp_name ":\n\
+	| Save %a0 (struct return address) and %a1.\n\
+	move.l %a0, -(%sp)\n\
+	move.l %a1, -(%sp)\n\
+	| Call the real address resolver.\n\
+	jbsr " #fixup_name "\n\
+	| Restore register %a0 and %a1.\n\
+	move.l (%sp)+, %a1\n\
+	move.l (%sp)+, %a0\n\
+	| Pop parameters\n\
+	addq.l #8, %sp\n\
+	| Call real function.\n\
+	jmp (%d0)\n\
+	.size " #tramp_name ", . - " #tramp_name "\n"
+#ifndef PROF
+#define ELF_MACHINE_RUNTIME_TRAMPOLINE \
+asm (TRAMPOLINE_TEMPLATE (_dl_runtime_resolve, fixup) \
+     TRAMPOLINE_TEMPLATE (_dl_runtime_profile, profile_fixup));
+#else
+#define ELF_MACHINE_RUNTIME_TRAMPOLINE \
+asm (TRAMPOLINE_TEMPLATE (_dl_runtime_resolve, fixup) \
+     ".globl _dl_runtime_profile\n" \
+     ".set _dl_runtime_profile, _dl_runtime_resolve");
+#endif
 #define ELF_MACHINE_RUNTIME_FIXUP_ARGS long int save_a0, long int save_a1
 
 
@@ -187,13 +216,9 @@ elf_machine_plt_value (struct link_map *map, const Elf32_Rela *reloc,
   return value;
 }
 
-/* Names of the architecture-specific auditing callback functions.  */
-#define ARCH_LA_PLTENTER m68k_gnu_pltenter
-#define ARCH_LA_PLTEXIT m68k_gnu_pltexit
-
 #endif /* !dl_machine_h */
 
-#ifdef RESOLVE_MAP
+#ifdef RESOLVE
 
 /* Perform the relocation specified by RELOC and SYM (which is fully resolved).
    MAP is the object containing the reloc.  */
@@ -211,8 +236,9 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
   else
     {
       const Elf32_Sym *const refsym = sym;
-      struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type);
-      Elf32_Addr value = sym == NULL ? 0 : sym_map->l_addr + sym->st_value;
+      Elf32_Addr value = RESOLVE (&sym, version, r_type);
+      if (sym)
+	value += sym->st_value;
 
       switch (r_type)
 	{
@@ -287,4 +313,4 @@ elf_machine_lazy_rel (struct link_map *map,
     _dl_reloc_bad_type (map, ELF32_R_TYPE (reloc->r_info), 1);
 }
 
-#endif /* RESOLVE_MAP */
+#endif /* RESOLVE */
diff --git a/sysdeps/m68k/dl-trampoline.S b/sysdeps/m68k/dl-trampoline.S
deleted file mode 100644
index 8791280371..0000000000
--- a/sysdeps/m68k/dl-trampoline.S
+++ /dev/null
@@ -1,129 +0,0 @@
-/* PLT trampolines.  m68k version.
-   Copyright (C) 2005 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#include <sysdep.h>
-
-	.text
-	.globl _dl_runtime_resolve
-	.type _dl_runtime_resolve, @function
-_dl_runtime_resolve:
-	| Save %a0 (struct return address) and %a1.
-	move.l %a0, -(%sp)
-	move.l %a1, -(%sp)
-	| Call the real address resolver.
-	jbsr _dl_fixup
-	| Restore register %a0 and %a1.
-	move.l (%sp)+, %a1
-	move.l (%sp)+, %a0
-	| Pop parameters
-	addq.l #8, %sp
-	| Call real function.
-	jmp (%d0)
-	.size _dl_runtime_resolve, . - _dl_runtime_resolve
-
-	.text
-	.globl _dl_runtime_profile
-	.type _dl_runtime_profile, @function
-_dl_runtime_profile:
-	pea 8(%sp)
-	move.l %a1, -(%sp)
-	move.l %a0, -(%sp)
-	pea -1.w
-	| Push parameters for _dl_profile_fixup
-	pea (%sp)
-	pea 8(%sp)
-	move.l 32(%sp), -(%sp)
-	move.l 32(%sp), -(%sp)
-	move.l 32(%sp), -(%sp)
-	subq.l #8, %sp
-	| Call the real address resolver.
-	jbsr _dl_profile_fixup
-	| Pop parameters
-	lea 28(%sp), %sp
-	move.l (%sp), %d1
-	jpl 1f
-	addq.l #4, %sp
-	| Restore register %a0 and %a1.
-	move.l (%sp)+, %a0
-	move.l (%sp)+, %a1
-	lea 12(%sp), %sp
-	| Call real function.
-	jmp (%d0)
-
-	/*
-	    +24     return address
-	    +20     PLT1
-	    +16     PLT2
-	    +12     %sp
-	    +8      %a1
-	    +4      %a0
-	   %sp      free
-	*/
-1:	move.l %a2, (%sp)
-	move.l %sp, %a2
-	move.l %sp, %a0
-	lea 28(%sp), %a1
-	| Round framesize up to even
-	addq.l #1, %d1
-	lsr #1, %d1
-	sub.l %d1, %a0
-	sub.l %d1, %a0
-	move.l %a0, %sp
-	jra 2f
-1:	move.w (%a1)+, (%a0)+
-2:	dbra %d1,1b
-	/*
-	   %a2+24  return address
-	   %a2+20  PLT1
-	   %a2+16  PLT2
-	   %a2+12  %sp
-	   %a2+8   %a1
-	   %a2+4   %a0
-	   %a2     %a2
-	   %sp     copied stack frame
-	*/
-
-	move.l 4(%a2), %a0
-	move.l 8(%a2), %a1
-	jsr (%d0)
-	move.l %a2, %sp
-	move.l (%sp)+, %a2
-	/*
-	    +20     return address
-	    +16     PLT1
-	    +12     PLT2
-	    +8      %sp
-	    +4      %a1
-	   %sp      %a0
-	*/
-	fmove.x %fp0, -(%sp)
-	move.l %d1, -(%sp)
-	move.l %d0, -(%sp)
-	pea (%sp)
-	pea 20(%sp)
-	move.l 40(%sp), -(%sp)
-	move.l 40(%sp), -(%sp)
-	jbsr _dl_call_pltexit
-	lea 16(%sp), %sp
-	move.l (%sp)+, %d0
-	move.l (%sp)+, %d1
-	fmove.x (%sp)+, %fp0
-	lea 20(%sp), %sp
-	rts
-	.size _dl_runtime_profile, . - _dl_runtime_profile
diff --git a/sysdeps/mach/hurd/tls.h b/sysdeps/mach/hurd/tls.h
index cce42ef374..bc15b4dab6 100644
--- a/sysdeps/mach/hurd/tls.h
+++ b/sysdeps/mach/hurd/tls.h
@@ -1,5 +1,5 @@
 /* Definitions for thread-local data handling.  Hurd version.
-   Copyright (C) 2003, 2005 Free Software Foundation, Inc.
+   Copyright (C) 2003 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -23,7 +23,6 @@
 #if defined HAVE_TLS_SUPPORT && !defined ASSEMBLER
 
 # include <stddef.h>
-# include <stdbool.h>
 # include <mach/mig_errors.h>
 # include <mach.h>
 
@@ -32,11 +31,7 @@
 typedef union dtv
 {
   size_t counter;
-  struct
-  {
-    void *val;
-    bool is_static;
-  } pointer;
+  void *pointer;
 } dtv_t;
 
 
diff --git a/sysdeps/mips/Makefile b/sysdeps/mips/Makefile
index 49ad3e1b91..849785a550 100644
--- a/sysdeps/mips/Makefile
+++ b/sysdeps/mips/Makefile
@@ -6,7 +6,3 @@ endif
 ifeq ($(subdir),setjmp)
 sysdep_routines += setjmp_aux
 endif
-
-ifeq ($(subdir),rt)
-librt-sysdep_routines += rt-sysdep
-endif
diff --git a/sysdeps/powerpc/bits/link.h b/sysdeps/powerpc/bits/link.h
deleted file mode 100644
index f8e6734fe0..0000000000
--- a/sysdeps/powerpc/bits/link.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/* Machine-specific declarations for dynamic linker interface.  PowerPC version
-   Copyright (C) 2004, 2005 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef	_LINK_H
-# error "Never include <bits/link.h> directly; use <link.h> instead."
-#endif
-
-
-#if __ELF_NATIVE_CLASS == 32
-
-/* Registers for entry into PLT on PPC32.  */
-typedef struct La_ppc32_regs
-{
-  uint32_t lr_reg[8];
-  double lr_fp[8];
-  uint32_t lr_vreg[12][4];
-  uint32_t lr_r1;
-  uint32_t lr_lr;
-} La_ppc32_regs;
-
-/* Return values for calls from PLT on PPC32.  */
-typedef struct La_ppc32_retval
-{
-  uint32_t lrv_r3;
-  uint32_t lrv_r4;
-  double lrv_fp[8];
-  uint32_t lrv_v2[4];
-} La_ppc32_retval;
-
-
-__BEGIN_DECLS
-
-extern Elf32_Addr la_ppc32_gnu_pltenter (Elf32_Sym *__sym,
-					 unsigned int __ndx,
-					 uintptr_t *__refcook,
-					 uintptr_t *__defcook,
-					 La_ppc32_regs *__regs,
-					 unsigned int *__flags,
-					 const char *__symname,
-					 long int *__framesizep);
-extern unsigned int la_ppc32_gnu_pltexit (Elf32_Sym *__sym,
-					  unsigned int __ndx,
-					  uintptr_t *__refcook,
-					  uintptr_t *__defcook,
-					  const La_ppc32_regs *__inregs,
-					  La_ppc32_retval *__outregs,
-					  const char *symname);
-
-__END_DECLS
-
-#else
-
-/* Registers for entry into PLT on PPC64.  */
-typedef struct La_ppc64_regs
-{
-  uint64_t lr_reg[8];
-  double lr_fp[13];
-  uint64_t __padding;
-  uint32_t lr_vreg[12][4];
-  uint64_t lr_r1;
-  uint64_t lr_lr;
-} La_ppc64_regs;
-
-/* Return values for calls from PLT on PPC64.  */
-typedef struct La_ppc64_retval
-{
-  uint64_t lrv_r3;
-  uint64_t lrv_r4;
-  double lrv_fp[8];
-  uint32_t lrv_v2[4];
-} La_ppc64_retval;
-
-
-__BEGIN_DECLS
-
-extern Elf64_Addr la_ppc64_gnu_pltenter (Elf64_Sym *__sym,
-					 unsigned int __ndx,
-					 uintptr_t *__refcook,
-					 uintptr_t *__defcook,
-					 La_ppc64_regs *__regs,
-					 unsigned int *__flags,
-					 const char *__symname,
-					 long int *__framesizep);
-extern unsigned int la_ppc64_gnu_pltexit (Elf64_Sym *__sym,
-					  unsigned int __ndx,
-					  uintptr_t *__refcook,
-					  uintptr_t *__defcook,
-					  const La_ppc64_regs *__inregs,
-					  La_ppc64_retval *__outregs,
-					  const char *symname);
-
-__END_DECLS
-
-#endif
diff --git a/sysdeps/powerpc/powerpc32/dl-machine.c b/sysdeps/powerpc/powerpc32/dl-machine.c
index 4120a02382..06960716b9 100644
--- a/sysdeps/powerpc/powerpc32/dl-machine.c
+++ b/sysdeps/powerpc/powerpc32/dl-machine.c
@@ -1,5 +1,5 @@
 /* Machine-dependent ELF dynamic relocation functions.  PowerPC version.
-   Copyright (C) 1995-2003, 2004, 2005 Free Software Foundation, Inc.
+   Copyright (C) 1995-2003, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -242,8 +242,7 @@ __elf_machine_runtime_setup (struct link_map *map, int lazy, int profile)
 					 : _dl_runtime_resolve);
 	  Elf32_Word offset;
 
-	  if (profile && GLRO(dl_profile) != NULL
-	      && _dl_name_match_p (GLRO(dl_profile), map))
+	  if (profile && _dl_name_match_p (GLRO(dl_profile), map))
 	    /* This is the object we are looking for.  Say that we really
 	       want profiling and the timers are started.  */
 	    GL(dl_profile_map) = map;
diff --git a/sysdeps/powerpc/powerpc32/dl-machine.h b/sysdeps/powerpc/powerpc32/dl-machine.h
index de3b9e923a..a8c1e3e490 100644
--- a/sysdeps/powerpc/powerpc32/dl-machine.h
+++ b/sysdeps/powerpc/powerpc32/dl-machine.h
@@ -1,5 +1,5 @@
 /* Machine-dependent ELF dynamic relocation inline functions.  PowerPC version.
-   Copyright (C) 1995-2002, 2003, 2005 Free Software Foundation, Inc.
+   Copyright (C) 1995-2002, 2003 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -100,6 +100,160 @@ elf_machine_load_address (void)
 /* The PLT uses Elf32_Rela relocs.  */
 #define elf_machine_relplt elf_machine_rela
 
+/* This code is used in dl-runtime.c to call the `fixup' function
+   and then redirect to the address it returns.  It is called
+   from code built in the PLT by elf_machine_runtime_setup.  */
+#if !defined PROF
+#define ELF_MACHINE_RUNTIME_TRAMPOLINE asm ("\n\
+	.section \".text\"	\n\
+	.align 2	\n\
+	.globl _dl_runtime_resolve	\n\
+	.type _dl_runtime_resolve,@function	\n\
+_dl_runtime_resolve:	\n\
+ # We need to save the registers used to pass parameters, and register 0,\n\
+ # which is used by _mcount; the registers are saved in a stack frame.\n\
+	stwu 1,-64(1)	\n\
+	stw 0,12(1)	\n\
+	stw 3,16(1)	\n\
+	stw 4,20(1)	\n\
+ # The code that calls this has put parameters for `fixup' in r12 and r11.\n\
+	mr 3,12	\n\
+	stw 5,24(1)	\n\
+	mr 4,11	\n\
+	stw 6,28(1)	\n\
+	mflr 0	\n\
+ # We also need to save some of the condition register fields.\n\
+	stw 7,32(1)	\n\
+	stw 0,48(1)	\n\
+	stw 8,36(1)	\n\
+	mfcr 0	\n\
+	stw 9,40(1)	\n\
+	stw 10,44(1)	\n\
+	stw 0,8(1)	\n\
+	bl fixup@local	\n\
+ # 'fixup' returns the address we want to branch to.\n\
+	mtctr 3	\n\
+ # Put the registers back...\n\
+	lwz 0,48(1)	\n\
+	lwz 10,44(1)	\n\
+	lwz 9,40(1)	\n\
+	mtlr 0	\n\
+	lwz 8,36(1)	\n\
+	lwz 0,8(1)	\n\
+	lwz 7,32(1)	\n\
+	lwz 6,28(1)	\n\
+	mtcrf 0xFF,0	\n\
+	lwz 5,24(1)	\n\
+	lwz 4,20(1)	\n\
+	lwz 3,16(1)	\n\
+	lwz 0,12(1)	\n\
+ # ...unwind the stack frame, and jump to the PLT entry we updated.\n\
+	addi 1,1,64	\n\
+	bctr	\n\
+	.size	 _dl_runtime_resolve,.-_dl_runtime_resolve	\n\
+	\n\
+	.align 2	\n\
+	.globl _dl_prof_resolve	\n\
+	.type _dl_prof_resolve,@function	\n\
+_dl_prof_resolve:	\n\
+ # We need to save the registers used to pass parameters, and register 0,\n\
+ # which is used by _mcount; the registers are saved in a stack frame.\n\
+	stwu 1,-64(1)	\n\
+        stw 0,12(1)	\n\
+	stw 3,16(1)	\n\
+	stw 4,20(1)	\n\
+ # The code that calls this has put parameters for `fixup' in r12 and r11.\n\
+	mr 3,12	\n\
+	stw 5,24(1)	\n\
+	mr 4,11	\n\
+	stw 6,28(1)	\n\
+	mflr 5	\n\
+ # We also need to save some of the condition register fields.\n\
+	stw 7,32(1)	\n\
+	stw 5,48(1)	\n\
+	stw 8,36(1)	\n\
+	mfcr 0	\n\
+	stw 9,40(1)	\n\
+	stw 10,44(1)	\n\
+	stw 0,8(1)	\n\
+	bl profile_fixup@local	\n\
+ # 'fixup' returns the address we want to branch to.\n\
+	mtctr 3	\n\
+ # Put the registers back...\n\
+	lwz 0,48(1)	\n\
+	lwz 10,44(1)	\n\
+	lwz 9,40(1)	\n\
+	mtlr 0	\n\
+	lwz 8,36(1)	\n\
+	lwz 0,8(1)	\n\
+	lwz 7,32(1)	\n\
+	lwz 6,28(1)	\n\
+	mtcrf 0xFF,0	\n\
+	lwz 5,24(1)	\n\
+	lwz 4,20(1)	\n\
+	lwz 3,16(1)	\n\
+        lwz 0,12(1)	\n\
+ # ...unwind the stack frame, and jump to the PLT entry we updated.\n\
+	addi 1,1,64	\n\
+	bctr	\n\
+	.size	 _dl_prof_resolve,.-_dl_prof_resolve	\n\
+ # Undo '.section text'.\n\
+	.previous	\n\
+");
+#else
+# define ELF_MACHINE_RUNTIME_TRAMPOLINE asm ("\n\
+	.section \".text\"	\n\
+	.align 2	\n\
+	.globl _dl_runtime_resolve	\n\
+	.globl _dl_prof_resolve	\n\
+	.type _dl_runtime_resolve,@function	\n\
+	.type _dl_prof_resolve,@function	\n\
+_dl_runtime_resolve:	\n\
+_dl_prof_resolve:	\n\
+ # We need to save the registers used to pass parameters, and register 0,\n\
+ # which is used by _mcount; the registers are saved in a stack frame.\n\
+	stwu 1,-64(1)	\n\
+	stw 0,12(1)	\n\
+	stw 3,16(1)	\n\
+	stw 4,20(1)	\n\
+ # The code that calls this has put parameters for `fixup' in r12 and r11.\n\
+	mr 3,12	\n\
+	stw 5,24(1)	\n\
+	mr 4,11	\n\
+	stw 6,28(1)	\n\
+	mflr 0	\n\
+ # We also need to save some of the condition register fields.\n\
+	stw 7,32(1)	\n\
+	stw 0,48(1)	\n\
+	stw 8,36(1)	\n\
+	mfcr 0	\n\
+	stw 9,40(1)	\n\
+	stw 10,44(1)	\n\
+	stw 0,8(1)	\n\
+	bl fixup@local	\n\
+ # 'fixup' returns the address we want to branch to.\n\
+	mtctr 3	\n\
+ # Put the registers back...\n\
+	lwz 0,48(1)	\n\
+	lwz 10,44(1)	\n\
+	lwz 9,40(1)	\n\
+	mtlr 0	\n\
+	lwz 8,36(1)	\n\
+	lwz 0,8(1)	\n\
+	lwz 7,32(1)	\n\
+	lwz 6,28(1)	\n\
+	mtcrf 0xFF,0	\n\
+	lwz 5,24(1)	\n\
+	lwz 4,20(1)	\n\
+	lwz 3,16(1)	\n\
+	lwz 0,12(1)	\n\
+ # ...unwind the stack frame, and jump to the PLT entry we updated.\n\
+	addi 1,1,64	\n\
+	bctr	\n\
+	.size	 _dl_runtime_resolve,.-_dl_runtime_resolve	\n\
+");
+#endif
+
 /* Mask identifying addresses reserved for the user program,
    where the dynamic linker should not map anything.  */
 #define ELF_MACHINE_USER_ADDRESS_MASK	0xf0000000UL
@@ -174,14 +328,9 @@ elf_machine_plt_value (struct link_map *map, const Elf32_Rela *reloc,
   return value + reloc->r_addend;
 }
 
-
-/* Names of the architecture-specific auditing callback functions.  */
-#define ARCH_LA_PLTENTER ppc32_gnu_pltenter
-#define ARCH_LA_PLTEXIT ppc32_gnu_pltexit
-
 #endif /* dl_machine_h */
 
-#ifdef RESOLVE_MAP
+#ifdef RESOLVE
 
 /* Do the actual processing of a reloc, once its target address
    has been determined.  */
@@ -232,8 +381,16 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
     value = map->l_addr;
   else
     {
+# if defined USE_TLS && !defined RTLD_BOOTSTRAP
       sym_map = RESOLVE_MAP (&sym, version, r_type);
-      value = sym_map == NULL ? 0 : sym_map->l_addr + sym->st_value;
+      value = sym == NULL ? 0 : sym_map->l_addr + sym->st_value;
+# else
+      value = RESOLVE (&sym, version, r_type);
+#  ifndef RTLD_BOOTSTRAP
+      if (sym != NULL)
+#  endif
+	value += sym->st_value;
+# endif
     }
   value += reloc->r_addend;
 #else
@@ -317,4 +474,4 @@ elf_machine_lazy_rel (struct link_map *map,
    DT_RELA table.  */
 #define ELF_MACHINE_PLTREL_OVERLAP 1
 
-#endif /* RESOLVE_MAP */
+#endif /* RESOLVE */
diff --git a/sysdeps/powerpc/powerpc32/dl-trampoline.S b/sysdeps/powerpc/powerpc32/dl-trampoline.S
deleted file mode 100644
index ea5ce7b45a..0000000000
--- a/sysdeps/powerpc/powerpc32/dl-trampoline.S
+++ /dev/null
@@ -1,174 +0,0 @@
-/* PLT trampolines.  PPC32 version.
-   Copyright (C) 2005 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#include <sysdep.h>
-
-	.section ".text"
-	.align 2
-	.globl _dl_runtime_resolve
-	.type _dl_runtime_resolve,@function
-_dl_runtime_resolve:
- # We need to save the registers used to pass parameters, and register 0,
- # which is used by _mcount; the registers are saved in a stack frame.
-	stwu r1,-64(r1)
-	stw r0,12(r1)
-	stw r3,16(r1)
-	stw r4,20(r1)
- # The code that calls this has put parameters for `fixup' in r12 and r11.
-	mr r3,r12
-	stw r5,24(r1)
-	mr r4,r11
-	stw r6,28(r1)
-	mflr r0
- # We also need to save some of the condition register fields
-	stw r7,32(r1)
-	stw r0,48(r1)
-	stw r8,36(r1)
-	mfcr r0
-	stw r9,40(r1)
-	stw r10,44(r1)
-	stw r0,8(r1)
-	bl _dl_fixup@local
- # 'fixup' returns the address we want to branch to.
-	mtctr r3
- # Put the registers back...
-	lwz r0,48(r1)
-	lwz r10,44(r1)
-	lwz r9,40(r1)
-	mtlr r0
-	lwz r8,36(r1)
-	lwz r0,8(r1)
-	lwz r7,32(r1)
-	lwz r6,28(r1)
-	mtcrf 0xFF,r0
-	lwz r5,24(r1)
-	lwz r4,20(r1)
-	lwz r3,16(r1)
-	lwz r0,12(r1)
- # ...unwind the stack frame, and jump to the PLT entry we updated.
-	addi r1,r1,64
-	bctr
-	.size	 _dl_runtime_resolve,.-_dl_runtime_resolve
-
-	.align 2
-	.globl _dl_prof_resolve
-	.type _dl_prof_resolve,@function
-_dl_prof_resolve:
- # We need to save the registers used to pass parameters, and register 0,
- # which is used by _mcount; the registers are saved in a stack frame.
-	stwu r1,-320(r1)
-	/* Stack layout:
-
-	  +312   stackframe
-	  +308   lr
-	  +304   r1
-	  +288   v12
-	  +272   v11
-	  +256   v10
-	  +240   v9
-	  +224   v8
-	  +208   v7
-	  +192   v6
-	  +176   v5
-	  +160   v4
-	  +144   v3
-	  +128   v2
-	  +112   v1
-	  +104   fp8
-	  +96    fp7
-	  +88    fp6
-	  +80    fp5
-	  +72    fp4
-	  +64    fp3
-	  +56    fp2
-	  +48    fp1
-	  +44    r10
-	  +40    r9
-	  +36    r8
-	  +32    r7
-	  +28    r6
-	  +24    r5
-	  +20    r4
-	  +16    r3
-	  +12    r0
-	  +8     cr
-	   r1    link
-	*/
-        stw r0,12(r1)
-	stw r3,16(r1)
-	stw r4,20(r1)
- # The code that calls this has put parameters for `fixup' in r12 and r11.
-	mr r3,r12
-	stw r5,24(r1)
-	mr r4,r11
-	stw r6,28(r1)
-	mflr r5
- # We also need to save some of the condition register fields.
-	stw r7,32(r1)
-	stw r5,308(r1)
-	stw r8,36(r1)
-	mfcr r0
-	stw r9,40(r1)
-	stw r10,44(r1)
-	stw r0,8(r1)
- # Save the floating point registers
-	stfd fp1,48(r1)
-	stfd fp2,56(r1)
-	stfd fp3,64(r1)
-	stfd fp4,72(r1)
-	stfd fp5,80(r1)
-	stfd fp6,88(r1)
-	stfd fp7,96(r1)
-	stfd fp8,104(r1)
- # XXX TODO: store vmx registers
- # Load the extra parameters.
-	addi r6,r1,16
-	addi r7,r1,312
-	li r0,-1
-	stw r0,0(r7)
-	bl _dl_profile_fixup@local
- # 'fixup' returns the address we want to branch to.
-	mtctr r3
- # Put the registers back...
-	lwz r0,308(r1)
-	lwz r10,44(r1)
-	lwz r9,40(r1)
-	mtlr r0
-	lwz r8,36(r1)
-	lwz r0,8(r1)
-	lwz r7,32(r1)
-	lwz r6,28(r1)
-	mtcrf 0xFF,r0
-	lwz r5,24(r1)
-	lwz r4,20(r1)
-	lwz r3,16(r1)
-        lwz r0,12(r1)
- # Load the floating point registers.
-	lfd fp1,48(r1)
-	lfd fp2,56(r1)
-	lfd fp3,64(r1)
-	lfd fp4,72(r1)
-	lfd fp5,80(r1)
-	lfd fp6,88(r1)
-	lfd fp7,96(r1)
-	lfd fp8,104(r1)
- # ...unwind the stack frame, and jump to the PLT entry we updated.
-	addi r1,r1,320
-	bctr
-	.size	 _dl_prof_resolve,.-_dl_prof_resolve
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_ceil.S b/sysdeps/powerpc/powerpc32/fpu/s_ceil.S
index 22cf76e54c..d211314bbf 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_ceil.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_ceil.S
@@ -26,12 +26,20 @@
 TWO52.0:
 	.long 0x43300000
 	.long 0
+	.type	NEGZERO.0,@object
+	.size	NEGZERO.0,8
+NEGZERO.0:
+	.long 0x80000000
+	.long 0
 
 	.section	.rodata.cst8,"aM",@progbits,8
 	.align 3
 .LC0:	/* 2**52 */
 	.long 0x43300000
 	.long 0
+.LC1:	/* -0.0 */
+	.long 0x80000000
+	.long 0
 
 	.section	".text"
 ENTRY (__ceil)
@@ -56,18 +64,27 @@ ENTRY (__ceil)
 	ble-	cr6,.L4
 	fadd	fp1,fp1,fp13	/* x+= TWO52;  */
 	fsub	fp1,fp1,fp13	/* x-= TWO52;  */
-	fabs	fp1,fp1		/* if (x == 0.0)  */
-				/* x = 0.0; */
+.L9:	
 	mtfsf	0x01,fp11	/* restore previous rounding mode.  */
 	blr
 .L4:
 	bge-	cr6,.L9		/* if (x < 0.0)  */
 	fsub	fp1,fp1,fp13	/* x-= TWO52;  */
 	fadd	fp1,fp1,fp13	/* x+= TWO52;  */
-	fnabs	fp1,fp1		/* if (x == 0.0)  */
-				/* x = -0.0; */
-.L9:
+	fcmpu	cr5,fp1,fp12	/* if (x > 0.0)  */
 	mtfsf	0x01,fp11	/* restore previous rounding mode.  */
+	bnelr+	cr5
+#ifdef SHARED
+	mflr	r11
+	bl	_GLOBAL_OFFSET_TABLE_@local-4
+	mflr	r10
+	lwz	r9,.LC1@got(10)
+	mtlr	r11
+	lfd	fp1,0(r9)
+#else
+	lis	r9,.LC1@ha
+	lfd	fp1,.LC1@l(r9)
+#endif
 	blr
 	END (__ceil)
 
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_ceilf.S b/sysdeps/powerpc/powerpc32/fpu/s_ceilf.S
index e7a72186c9..4439dc2338 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_ceilf.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_ceilf.S
@@ -20,16 +20,26 @@
 #include <sysdep.h>
 
 	.section	.rodata
-	.align 2
-	.type	TWO23.0,@object
-	.size	TWO23.0,4
-TWO23.0:
-	.long 0x4b000000
+	.align 3
+	.type	TWO52.0,@object
+	.size	TWO52.0,8
+TWO52.0:
+	.long 0x43300000
+	.long 0
+	.type	NEGZERO.0,@object
+	.size	NEGZERO.0,8
+NEGZERO.0:
+	.long 0x80000000
+	.long 0
 
-	.section	.rodata.cst4,"aM",@progbits,4
-	.align 2
+	.section	.rodata.cst8,"aM",@progbits,8
+	.align 3
 .LC0:	/* 2**23 */
-	.long 0x4b000000
+	.long 0x41600000
+	.long 0
+.LC1:	/* -0.0 */
+	.long 0x80000000
+	.long 0
 
 	.section	".text"
 ENTRY (__ceilf)
@@ -40,10 +50,10 @@ ENTRY (__ceilf)
 	mflr	r10
 	lwz	r9,.LC0@got(10)
 	mtlr	r11
-	lfs	fp13,0(r9)
+	lfd	fp13,0(r9)
 #else
 	lis	r9,.LC0@ha
-	lfs	fp13,.LC0@l(r9)
+	lfd	fp13,.LC0@l(r9)
 #endif
 	fabs	fp0,fp1
 	fsubs	fp12,fp13,fp13	/* generate 0.0  */
@@ -54,18 +64,27 @@ ENTRY (__ceilf)
 	ble-	cr6,.L4
 	fadds	fp1,fp1,fp13	/* x+= TWO23;  */
 	fsubs	fp1,fp1,fp13	/* x-= TWO23;  */
-	fabs	fp1,fp1		/* if (x == 0.0)  */
-				/* x = 0.0; */
+.L9:	
 	mtfsf	0x01,fp11	/* restore previous rounding mode.  */
 	blr
 .L4:
 	bge-	cr6,.L9		/* if (x < 0.0)  */
 	fsubs	fp1,fp1,fp13	/* x-= TWO23;  */
 	fadds	fp1,fp1,fp13	/* x+= TWO23;  */
-	fnabs	fp1,fp1		/* if (x == 0.0)  */
-				/* x = -0.0; */
-.L9:
+	fcmpu	cr5,fp1,fp12	/* if (x > 0.0)  */
 	mtfsf	0x01,fp11	/* restore previous rounding mode.  */
+	bnelr+	cr5
+#ifdef SHARED
+	mflr	r11
+	bl	_GLOBAL_OFFSET_TABLE_@local-4
+	mflr	r10
+	lwz	r9,.LC1@got(10)
+	mtlr	r11
+	lfd	fp1,0(r9)
+#else
+	lis	r9,.LC1@ha
+	lfd	fp1,.LC1@l(r9)
+#endif
 	blr
 	END (__ceilf)
 
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_floor.S b/sysdeps/powerpc/powerpc32/fpu/s_floor.S
index 812ea7ced2..143f907b18 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_floor.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_floor.S
@@ -56,16 +56,15 @@ ENTRY (__floor)
 	ble-	cr6,.L4
 	fadd	fp1,fp1,fp13	/* x+= TWO52;  */
 	fsub	fp1,fp1,fp13	/* x-= TWO52;  */
-	fabs	fp1,fp1		/* if (x == 0.0)  */
-				/* x = 0.0; */
+	fcmpu	cr5,fp1,fp12	/* if (x > 0.0)  */
 	mtfsf	0x01,fp11	/* restore previous rounding mode.  */
+	bnelr+	cr5
+	fmr	fp1,fp12	/* x must be +0.0 for the 0.0 case.  */
 	blr
 .L4:
 	bge-	cr6,.L9		/* if (x < 0.0)  */
 	fsub	fp1,fp1,fp13	/* x-= TWO52;  */
 	fadd	fp1,fp1,fp13	/* x+= TWO52;  */
-	fnabs	fp1,fp1		/* if (x == 0.0)  */
-				/* x = -0.0; */
 .L9:
 	mtfsf	0x01,fp11	/* restore previous rounding mode.  */	
 	blr
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_floorf.S b/sysdeps/powerpc/powerpc32/fpu/s_floorf.S
index ead41d4657..154bc30ff7 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_floorf.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_floorf.S
@@ -20,16 +20,18 @@
 #include <sysdep.h>
 
 	.section	.rodata
-	.align 2
+	.align 3
 	.type	TWO23.0,@object
-	.size	TWO23.0,4
+	.size	TWO23.0,8
 TWO23.0:
-	.long 0x4b000000
+	.long 0x41600000
+	.long 0
 
-	.section	.rodata.cst4,"aM",@progbits,4
-	.align 2
+	.section	.rodata.cst8,"aM",@progbits,8
+	.align 3
 .LC0:	/* 2**23 */
-	.long 0x4b000000
+	.long 0x41600000
+	.long 0
 
 	.section	".text"
 ENTRY (__floorf)
@@ -40,10 +42,10 @@ ENTRY (__floorf)
 	mflr	r10
 	lwz	r9,.LC0@got(10)
 	mtlr	r11
-	lfs	fp13,0(r9)
+	lfd	fp13,0(r9)
 #else
 	lis	r9,.LC0@ha
-	lfs	fp13,.LC0@l(r9)
+	lfd	fp13,.LC0@l(r9)
 #endif
 	fabs	fp0,fp1
 	fsubs	fp12,fp13,fp13	/* generate 0.0  */
@@ -54,16 +56,15 @@ ENTRY (__floorf)
 	ble-	cr6,.L4
 	fadds	fp1,fp1,fp13	/* x+= TWO23;  */
 	fsubs	fp1,fp1,fp13	/* x-= TWO23;  */
-	fabs	fp1,fp1		/* if (x == 0.0)  */
-				/* x = 0.0; */
+	fcmpu	cr5,fp1,fp12	/* if (x > 0.0)  */
 	mtfsf	0x01,fp11	/* restore previous rounding mode.  */
+	bnelr+	cr5
+	fmr	fp1,fp12	/* x must be +0.0 for the 0.0 case.  */
 	blr
 .L4:
 	bge-	cr6,.L9		/* if (x < 0.0)  */
 	fsubs	fp1,fp1,fp13	/* x-= TWO23;  */
 	fadds	fp1,fp1,fp13	/* x+= TWO23;  */
-	fnabs	fp1,fp1		/* if (x == 0.0)  */
-				/* x = -0.0; */
 .L9:
 	mtfsf	0x01,fp11	/* restore previous rounding mode.  */	
 	blr
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_rint.S b/sysdeps/powerpc/powerpc32/fpu/s_rint.S
index fa02dbc59c..dee25f204f 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_rint.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_rint.S
@@ -57,14 +57,13 @@ ENTRY (__rint)
 	bng-	cr6,.L4
 	fadd	fp1,fp1,fp13	/* x+= TWO52;  */
 	fsub	fp1,fp1,fp13	/* x-= TWO52;  */
-	fabs	fp1,fp1		/* if (x == 0.0)  */
-	blr			/* x = 0.0; */
+	blr
 .L4:
 	bnllr-	cr6		/* if (x < 0.0)  */
-	fsub	fp1,fp1,fp13	/* x-= TWO52;  */
-	fadd	fp1,fp1,fp13	/* x+= TWO52;  */
-	fnabs	fp1,fp1		/* if (x == 0.0)  */
-	blr			/* x = -0.0; */
+	fsub	fp1,fp13,fp1	/* x = TWO52 - x;  */
+	fsub	fp0,fp1,fp13	/* x = - (x - TWO52);  */
+	fneg	fp1,fp0
+	blr
 	END (__rint)
 
 weak_alias (__rint, rint)
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_rintf.S b/sysdeps/powerpc/powerpc32/fpu/s_rintf.S
index 7825951268..cebf6423af 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_rintf.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_rintf.S
@@ -21,16 +21,18 @@
 
 
 	.section	.rodata
-	.align 2
+	.align 3
 	.type	TWO23.0,@object
-	.size	TWO23.0,4
+	.size	TWO23.0,8
 TWO23.0:
-	.long 0x4b000000
+	.long 0x41600000
+	.long 0
 
-	.section	.rodata.cst4,"aM",@progbits,4
-	.align 2
+	.section	.rodata.cst8,"aM",@progbits,8
+	.align 3
 .LC0:	/* 2**23 */
-	.long 0x4b000000
+	.long 0x41600000
+	.long 0
 
 	.section	".text"
 ENTRY (__rintf)
@@ -40,10 +42,10 @@ ENTRY (__rintf)
 	mflr	r10
 	lwz	r9,.LC0@got(10)
 	mtlr	r11
-	lfs	fp13,0(r9)
+	lfd	fp13,0(r9)
 #else
 	lis	r9,.LC0@ha
-	lfs	fp13,.LC0@l(r9)
+	lfd	fp13,.LC0@l(r9)
 #endif
 	fabs	fp0,fp1
 	fsubs	fp12,fp13,fp13	/* generate 0.0  */
@@ -53,14 +55,13 @@ ENTRY (__rintf)
 	bng-	cr6,.L4
 	fadds	fp1,fp1,fp13	/* x+= TWO23;  */
 	fsubs	fp1,fp1,fp13	/* x-= TWO23;  */
-	fabs	fp1,fp1		/* if (x == 0.0)  */
-	blr			/* x = 0.0; */
+	blr
 .L4:
 	bnllr-	cr6		/* if (x < 0.0)  */
-	fsubs	fp1,fp1,fp13	/* x-= TWO23;  */
-	fadds	fp1,fp1,fp13	/* x+= TWO23;  */
-	fnabs	fp1,fp1		/* if (x == 0.0)  */
-	blr			/* x = -0.0; */
+	fsubs	fp1,fp13,fp1	/* x = TWO23 - x;  */
+	fsubs	fp0,fp1,fp13	/* x = - (x - TWO23);  */
+	fneg	fp1,fp0
+	blr
 	END (__rintf)
 
 weak_alias (__rintf, rintf)
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_round.S b/sysdeps/powerpc/powerpc32/fpu/s_round.S
index 39eab232f6..13fc74f001 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_round.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_round.S
@@ -31,6 +31,11 @@ TWO52.0:
 POINTFIVE.0:
 	.long 0x3fe00000
 	.long 0
+	.type	NEGZERO.0,@object
+	.size	NEGZERO.0,8
+NEGZERO.0:
+	.long 0x80000000
+	.long 0
 
 	.section	.rodata.cst8,"aM",@progbits,8
 	.align 3
@@ -40,6 +45,9 @@ POINTFIVE.0:
 .LC1:	/* 0.5 */
 	.long 0x3fe00000
 	.long 0
+.LC2:	/* -0.0 */
+	.long 0x80000000
+	.long 0
 
 /* double [fp1] round (double x [fp1])
    IEEE 1003.1 round function.  IEEE specifies "round to the nearest 
@@ -81,8 +89,7 @@ ENTRY (__round)
 	fadd	fp1,fp1,fp10	/* x+= 0.5;  */
 	fadd	fp1,fp1,fp13	/* x+= TWO52;  */
 	fsub	fp1,fp1,fp13	/* x-= TWO52;  */
-	fabs	fp1,fp1		/* if (x == 0.0)  */
-				/* x = 0.0; */
+.L9:	
 	mtfsf	0x01,fp11	/* restore previous rounding mode.  */
 	blr
 .L4:
@@ -90,10 +97,16 @@ ENTRY (__round)
 	bge-	cr6,.L9		/* if (x < 0.0)  */
 	fsub	fp1,fp9,fp13	/* x-= TWO52;  */
 	fadd	fp1,fp1,fp13	/* x+= TWO52;  */
-	fnabs	fp1,fp1		/* if (x == 0.0)  */
-				/* x = -0.0; */
-.L9:
+	fcmpu	cr5,fp1,fp12	/* if (x > 0.0)  */
 	mtfsf	0x01,fp11	/* restore previous rounding mode.  */
+	bnelr+	cr5
+#ifdef SHARED
+	lwz	r9,.LC2@got(10)
+	lfd	fp1,0(r9)
+#else
+	lis	r9,.LC2@ha
+	lfd	fp1,.LC2@l(r9)
+#endif
 	blr
 	END (__round)
 
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_roundf.S b/sysdeps/powerpc/powerpc32/fpu/s_roundf.S
index a9b42f0170..ea8aaf3add 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_roundf.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_roundf.S
@@ -20,22 +20,34 @@
 #include <sysdep.h>
 
 	.section	.rodata
-	.align 2
+	.align 3
 	.type	TWO23.0,@object
-	.size	TWO23.0,4
+	.size	TWO23.0,8
 TWO23.0:
-	.long 0x4b000000
+	.long 0x43300000
+	.long 0
 	.type	POINTFIVE.0,@object
-	.size	POINTFIVE.0,4
+	.size	POINTFIVE.0,8
 POINTFIVE.0:
-	.long 0x3f000000
+	.long 0x3fe00000
+	.long 0
+	.type	NEGZERO.0,@object
+	.size	NEGZERO.0,8
+NEGZERO.0:
+	.long 0x80000000
+	.long 0
 
-	.section	.rodata.cst4,"aM",@progbits,4
-	.align 2
+	.section	.rodata.cst8,"aM",@progbits,8
+	.align 3
 .LC0:	/* 2**23 */
-	.long 0x4b000000
+	.long 0x41600000
+	.long 0
 .LC1:	/* 0.5 */
-	.long 0x3f000000
+	.long 0x3fe00000
+	.long 0
+.LC2:	/* -0.0 */
+	.long 0x80000000
+	.long 0
 
 /* float [fp1] roundf  (float x [fp1])
    IEEE 1003.1 round function.  IEEE specifies "round to the nearest 
@@ -55,10 +67,10 @@ ENTRY (__roundf )
 	mflr	r10
 	lwz	r9,.LC0@got(10)
 	mtlr	r11
-	lfs	fp13,0(r9)
+	lfd	fp13,0(r9)
 #else
 	lis	r9,.LC0@ha
-	lfs	fp13,.LC0@l(r9)
+	lfd	fp13,.LC0@l(r9)
 #endif
 	fabs	fp0,fp1
 	fsubs	fp12,fp13,fp13	/* generate 0.0  */
@@ -68,17 +80,16 @@ ENTRY (__roundf )
 	mtfsfi	7,1		/* Set rounding mode toward 0.  */
 #ifdef SHARED
 	lwz	r9,.LC1@got(10)
-	lfs	fp10,0(r9)
+	lfd	fp10,0(r9)
 #else
 	lis	r9,.LC1@ha
-	lfs	fp10,.LC1@l(r9)
+	lfd	fp10,.LC1@l(r9)
 #endif
 	ble-	cr6,.L4
 	fadds	fp1,fp1,fp10	/* x+= 0.5;  */
 	fadds	fp1,fp1,fp13	/* x+= TWO23;  */
 	fsubs	fp1,fp1,fp13	/* x-= TWO23;  */
-	fabs	fp1,fp1		/* if (x == 0.0)  */
-				/* x = 0.0; */
+.L9:	
 	mtfsf	0x01,fp11	/* restore previous rounding mode.  */
 	blr
 .L4:
@@ -86,10 +97,16 @@ ENTRY (__roundf )
 	bge-	cr6,.L9		/* if (x < 0.0)  */
 	fsubs	fp1,fp9,fp13	/* x-= TWO23;  */
 	fadds	fp1,fp1,fp13	/* x+= TWO23;  */
-	fnabs	fp1,fp1		/* if (x == 0.0)  */
-				/* x = -0.0; */
-.L9:
+	fcmpu	cr5,fp1,fp12	/* if (x > 0.0)  */
 	mtfsf	0x01,fp11	/* restore previous rounding mode.  */
+	bnelr+	cr5
+#ifdef SHARED
+	lwz	r9,.LC2@got(10)
+	lfd	fp1,0(r9)
+#else
+	lis	r9,.LC2@ha
+	lfd	fp1,.LC2@l(r9)
+#endif
 	blr
 	END (__roundf)
 
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_trunc.S b/sysdeps/powerpc/powerpc32/fpu/s_trunc.S
index 08acc00cb2..a4be651f8c 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_trunc.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_trunc.S
@@ -26,12 +26,20 @@
 TWO52.0:
 	.long 0x43300000
 	.long 0
+	.type	NEGZERO.0,@object
+	.size	NEGZERO.0,8
+NEGZERO.0:
+	.long 0x80000000
+	.long 0
 
 	.section	.rodata.cst8,"aM",@progbits,8
 	.align 3
 .LC0:	/* 2**52 */
 	.long 0x43300000
 	.long 0
+.LC1:	/* -0.0 */
+	.long 0x80000000
+	.long 0
 
 /* double [fp1] trunc (double x [fp1])
    IEEE 1003.1 trunc function.  IEEE specifies "trunc to the integer
@@ -62,18 +70,23 @@ ENTRY (__trunc)
 	ble-	cr6,.L4
 	fadd	fp1,fp1,fp13	/* x+= TWO52;  */
 	fsub	fp1,fp1,fp13	/* x-= TWO52;  */
-	fabs	fp1,fp1		/* if (x == 0.0)  */
-				/* x = 0.0; */
+.L9:	
 	mtfsf	0x01,fp11	/* restore previous truncing mode.  */
 	blr
 .L4:
 	bge-	cr6,.L9		/* if (x < 0.0)  */
 	fsub	fp1,fp1,fp13	/* x-= TWO52;  */
 	fadd	fp1,fp1,fp13	/* x+= TWO52;  */
-	fnabs	fp1,fp1		/* if (x == 0.0)  */
-				/* x = -0.0; */
-.L9:
+	fcmpu	cr5,fp1,fp12	/* if (x > 0.0)  */
 	mtfsf	0x01,fp11	/* restore previous rounding mode.  */
+	bnelr+	cr5
+#ifdef SHARED
+	lwz	r9,.LC1@got(10)
+	lfd	fp1,0(r9)
+#else
+	lis	r9,.LC1@ha
+	lfd	fp1,.LC1@l(r9)
+#endif
 	blr
 	END (__trunc)
 
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_truncf.S b/sysdeps/powerpc/powerpc32/fpu/s_truncf.S
index 3b6fe731b4..9a8dae931b 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_truncf.S
+++ b/sysdeps/powerpc/powerpc32/fpu/s_truncf.S
@@ -20,16 +20,26 @@
 #include <sysdep.h>
 
 	.section	.rodata
-	.align 2
+	.align 3
 	.type	TWO23.0,@object
-	.size	TWO23.0,2
+	.size	TWO23.0,8
 TWO23.0:
-	.long 0x4b000000
+	.long 0x41600000
+	.long 0
+	.type	NEGZERO.0,@object
+	.size	NEGZERO.0,8
+NEGZERO.0:
+	.long 0x80000000
+	.long 0
 
-	.section	.rodata.cst4,"aM",@progbits,4
-	.align 2
+	.section	.rodata.cst8,"aM",@progbits,8
+	.align 3
 .LC0:	/* 2**23 */
-	.long 0x4b000000
+	.long 0x41600000
+	.long 0
+.LC1:	/* -0.0 */
+	.long 0x80000000
+	.long 0
 
 /* float [fp1] truncf (float x [fp1])
    IEEE 1003.1 trunc function.  IEEE specifies "trunc to the integer
@@ -46,10 +56,10 @@ ENTRY (__truncf)
 	mflr	r10
 	lwz	r9,.LC0@got(10)
 	mtlr	r11
-	lfs	fp13,0(r9)
+	lfd	fp13,0(r9)
 #else
 	lis	r9,.LC0@ha
-	lfs	fp13,.LC0@l(r9)
+	lfd	fp13,.LC0@l(r9)
 #endif
 	fabs	fp0,fp1
 	fsubs	fp12,fp13,fp13	/* generate 0.0  */
@@ -60,18 +70,23 @@ ENTRY (__truncf)
 	ble-	cr6,.L4
 	fadds	fp1,fp1,fp13	/* x+= TWO23;  */
 	fsubs	fp1,fp1,fp13	/* x-= TWO23;  */
-	fabs	fp1,fp1		/* if (x == 0.0)  */
-				/* x = 0.0; */
+.L9:	
 	mtfsf	0x01,fp11	/* restore previous truncing mode.  */
 	blr
 .L4:
 	bge-	cr6,.L9		/* if (x < 0.0)  */
 	fsubs	fp1,fp1,fp13	/* x-= TWO23;  */
 	fadds	fp1,fp1,fp13	/* x+= TWO23;  */
-	fnabs	fp1,fp1		/* if (x == 0.0)  */
-				/* x = -0.0; */
-.L9:
+	fcmpu	cr5,fp1,fp12	/* if (x > 0.0)  */
 	mtfsf	0x01,fp11	/* restore previous rounding mode.  */
+	bnelr+	cr5
+#ifdef SHARED
+	lwz	r9,.LC1@got(10)
+	lfd	fp1,0(r9)
+#else
+	lis	r9,.LC1@ha
+	lfd	fp1,.LC1@l(r9)
+#endif
 	blr
 	END (__truncf)
 
diff --git a/bits/sys_errlist.h b/sysdeps/powerpc/powerpc64/dl-lookupcfg.h
index ab839b7073..e502941015 100644
--- a/bits/sys_errlist.h
+++ b/sysdeps/powerpc/powerpc64/dl-lookupcfg.h
@@ -1,4 +1,4 @@
-/* Declare sys_errlist and sys_nerr, or don't.  Don't version.
+/* Configuration of lookup functions.  PowerPC64 version.
    Copyright (C) 2002 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
@@ -17,8 +17,6 @@
    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307 USA.  */
 
-#ifndef _STDIO_H
-# error "Never include <bits/sys_errlist.h> directly; use <stdio.h> instead."
-#endif
+/* Return the symbol map from the symbol lookup function.  */
 
-/* sys_errlist and sys_nerr are deprecated.  Use strerror instead.  */
+#define DL_LOOKUP_RETURNS_MAP 1
diff --git a/sysdeps/powerpc/powerpc64/dl-machine.h b/sysdeps/powerpc/powerpc64/dl-machine.h
index 5ddc22e3c9..3fcf77df71 100644
--- a/sysdeps/powerpc/powerpc64/dl-machine.h
+++ b/sysdeps/powerpc/powerpc64/dl-machine.h
@@ -1,6 +1,6 @@
 /* Machine-dependent ELF dynamic relocation inline functions.
    PowerPC64 version.
-   Copyright 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005
+   Copyright 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004
    Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
@@ -107,6 +107,92 @@ elf_machine_dynamic (void)
 /* The PLT uses Elf64_Rela relocs.  */
 #define elf_machine_relplt elf_machine_rela
 
+/* This code gets called via a .glink stub which loads PLT0.  It is
+   used in dl-runtime.c to call the `fixup' function and then redirect
+   to the address `fixup' returns.
+
+   Enter with r0 = plt reloc index,
+   r2 = ld.so tocbase,
+   r11 = ld.so link map.  */
+
+#define TRAMPOLINE_TEMPLATE(tramp_name, fixup_name) \
+  asm (".section \".text\"\n"						\
+"	.align	2\n"							\
+"	.type	" BODY_PREFIX #tramp_name ",@function\n"		\
+"	.section \".opd\",\"aw\"\n"					\
+"	.align	3\n"							\
+"	.globl	" #tramp_name "\n"					\
+"	" ENTRY_2(tramp_name) "\n"					\
+#tramp_name ":\n"							\
+"	" OPD_ENT(tramp_name) "\n"					\
+"	.previous\n"							\
+BODY_PREFIX #tramp_name ":\n"						\
+/* We need to save the registers used to pass parameters, ie. r3 thru	\
+   r10; the registers are saved in a stack frame.  */			\
+"	stdu	1,-128(1)\n"						\
+"	std	3,48(1)\n"						\
+"	mr	3,11\n"							\
+"	std	4,56(1)\n"						\
+"	sldi	4,0,1\n"						\
+"	std	5,64(1)\n"						\
+"	add	4,4,0\n"						\
+"	std	6,72(1)\n"						\
+"	sldi	4,4,3\n"						\
+"	std	7,80(1)\n"						\
+"	mflr	0\n"							\
+"	std	8,88(1)\n"						\
+/* Store the LR in the LR Save area of the previous frame.  */    	\
+"	std	0,128+16(1)\n"						\
+"	mfcr	0\n"							\
+"	std	9,96(1)\n"						\
+"	std	10,104(1)\n"						\
+/* I'm almost certain we don't have to save cr...  be safe.  */    	\
+"	std	0,8(1)\n"						\
+"	bl	" DOT_PREFIX #fixup_name "\n"				\
+/* Put the registers back.  */						\
+"	ld	0,128+16(1)\n"						\
+"	ld	10,104(1)\n"						\
+"	ld	9,96(1)\n"						\
+"	ld	8,88(1)\n"						\
+"	ld	7,80(1)\n"						\
+"	mtlr	0\n"							\
+"	ld	0,8(1)\n"						\
+"	ld	6,72(1)\n"						\
+"	ld	5,64(1)\n"						\
+"	ld	4,56(1)\n"						\
+"	mtcrf	0xFF,0\n"						\
+/* Load the target address, toc and static chain reg from the function  \
+   descriptor returned by fixup.  */					\
+"	ld	0,0(3)\n"						\
+"	ld	2,8(3)\n"						\
+"	mtctr	0\n"							\
+"	ld	11,16(3)\n"						\
+"	ld	3,48(1)\n"						\
+/* Unwind the stack frame, and jump.  */				\
+"	addi	1,1,128\n"						\
+"	bctr\n"								\
+".LT_" #tramp_name ":\n"						\
+"	.long 0\n"							\
+"	.byte 0x00,0x0c,0x24,0x40,0x00,0x00,0x00,0x00\n"		\
+"	.long .LT_" #tramp_name "-" BODY_PREFIX #tramp_name "\n"	\
+"	.short .LT_" #tramp_name "_name_end-.LT_" #tramp_name "_name_start\n" \
+".LT_" #tramp_name "_name_start:\n"					\
+"	.ascii \"" #tramp_name "\"\n"					\
+".LT_" #tramp_name "_name_end:\n"					\
+"	.align 2\n"							\
+"	" END_2(tramp_name) "\n"					\
+"	.previous");
+
+#ifndef PROF
+#define ELF_MACHINE_RUNTIME_TRAMPOLINE			\
+  TRAMPOLINE_TEMPLATE (_dl_runtime_resolve, fixup);	\
+  TRAMPOLINE_TEMPLATE (_dl_profile_resolve, profile_fixup);
+#else
+#define ELF_MACHINE_RUNTIME_TRAMPOLINE			\
+  TRAMPOLINE_TEMPLATE (_dl_runtime_resolve, fixup);	\
+  void _dl_runtime_resolve (void);			\
+  strong_alias (_dl_runtime_resolve, _dl_profile_resolve);
+#endif
 
 #ifdef HAVE_INLINED_SYSCALLS
 /* We do not need _dl_starting_up.  */
@@ -334,8 +420,7 @@ elf_machine_runtime_setup (struct link_map *map, int lazy, int profile)
 
 	  resolve_fd = (Elf64_FuncDesc *) (profile ? _dl_profile_resolve
 					   : _dl_runtime_resolve);
-	  if (profile && GLRO(dl_profile) != NULL
-	      && _dl_name_match_p (GLRO(dl_profile), map))
+	  if (profile && _dl_name_match_p (GLRO(dl_profile), map))
 	    /* This is the object we are looking for.  Say that we really
 	       want profiling and the timers are started.  */
 	    GL(dl_profile_map) = map;
@@ -460,11 +545,6 @@ elf_machine_plt_value (struct link_map *map, const Elf64_Rela *reloc,
   return value + reloc->r_addend;
 }
 
-
-/* Names of the architecture-specific auditing callback functions.  */
-#define ARCH_LA_PLTENTER ppc64_gnu_pltenter
-#define ARCH_LA_PLTEXIT ppc64_gnu_pltexit
-
 #endif /* dl_machine_h */
 
 #ifdef RESOLVE_MAP
diff --git a/sysdeps/powerpc/powerpc64/dl-trampoline.S b/sysdeps/powerpc/powerpc64/dl-trampoline.S
deleted file mode 100644
index 316f17a405..0000000000
--- a/sysdeps/powerpc/powerpc64/dl-trampoline.S
+++ /dev/null
@@ -1,196 +0,0 @@
-/* PLT trampolines.  PPC64 version.
-   Copyright (C) 2005 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#include <sysdep.h>
-
-	.section ".text"
-
-EALIGN(_dl_runtime_resolve, 4, 0)
-/* We need to save the registers used to pass parameters, ie. r3 thru
-   r10; the registers are saved in a stack frame.  */
-	stdu	r1,-128(r1)
-	std	r3,48(r1)
-	mr	r3,r11
-	std	r4,56(r1)
-	sldi	r4,r0,1
-	std	r5,64(r1)
-	add	r4,r4,r0
-	std	r6,72(r1)
-	sldi	r4,r4,3
-	std	r7,80(r1)
-	mflr	r0
-	std	r8,88(r1)
-/* Store the LR in the LR Save area of the previous frame.  */
-	std	r0,128+16(r1)
-	mfcr	r0
-	std	r9,96(r1)
-	std	r10,104(r1)
-/* I'm almost certain we don't have to save cr...  be safe.  */
-	std	r0,8(r1)
-	bl	JUMPTARGET(_dl_fixup)
-/* Put the registers back.  */
-	ld	r0,128+16(r1)
-	ld	r10,104(r1)
-	ld	r9,96(r1)
-	ld	r8,88(r1)
-	ld	r7,80(r1)
-	mtlr	r0
-	ld	r0,8(r1)
-	ld	r6,72(r1)
-	ld	r5,64(r1)
-	ld	r4,56(r1)
-	mtcrf	0xFF,r0
-/* Load the target address, toc and static chain reg from the function
-   descriptor returned by fixup.  */
-	ld	r0,0(r3)
-	ld	r2,8(r3)
-	mtctr	r0
-	ld	r11,16(r3)
-	ld	r3,48(r1)
-/* Unwind the stack frame, and jump.  */
-	addi	r1,r1,128
-	bctr
-END(_dl_runtime_resolve)
-
-
-
-EALIGN(_dl_profile_resolve, 4, 0)
-/* We need to save the registers used to pass parameters, ie. r3 thru
-   r10; the registers are saved in a stack frame.  */
-	stdu	r1,-448(r1)
-	/* Stack layout:
-
-	  +432   stackframe
-	  +424   lr
-	  +416   r1
-	  +400   v12
-	  +384   v11
-	  +368   v10
-	  +362   v9
-	  +336   v8
-	  +320   v7
-	  +304   v6
-	  +288   v5
-	  +272   v4
-	  +256   v3
-	  +240   v2
-	  +224   v1
-	  +216   free
-	  +208   fp13
-	  +200   fp12
-	  +192   fp11
-	  +184   fp10
-	  +176   fp9
-	  +168   fp8
-	  +160   fp7
-	  +152   fp6
-	  +144   fp5
-	  +136   fp4
-	  +128   fp3
-	  +120   fp2
-	  +112   fp1
-	  +104   r10
-	  +96    r9
-	  +88    r8
-	  +80    r7
-	  +72    r6
-	  +64    r5
-	  +56    r4
-	  +48    r3
-	  +8     cr
-	   r1    link
-	*/
-	std	r3,48(r1)
-	mr	r3,r11
-	std	r4,56(r1)
-	sldi	r4,r0,1
-	std	r5,64(r1)
-	add	r4,r4,0
-	std	r6,72(r1)
-	sldi	r4,r4,3
-	std	r7,80(r1)
-	mflr	r5
-	std	r8,88(r1)
-/* Store the LR in the LR Save area of the previous frame.  */
-/* XXX Do we have to do this?  */
-	std	r5,448+16(r1)
-	std	r5,424(r1)
-	mfcr	r0
-	std	r9,96(r1)
-	std	r10,104(r1)
-/* I'm almost certain we don't have to save cr...  be safe.  */
-	std	r0,8(r1)
-/* Save floating registers.  */
-	stfd	fp1,112(r1)
-	stfd	fp2,120(r1)
-	stfd	fp3,128(r1)
-	stfd	fp4,136(r1)
-	stfd	fp5,144(r1)
-	stfd	fp6,152(r1)
-	stfd	fp7,160(r1)
-	stfd	fp8,168(r1)
-	stfd	fp9,176(r1)
-	stfd	fp10,184(r1)
-	stfd	fp11,192(r1)
-	stfd	fp12,200(r1)
-	stfd	fp13,208(r1)
-/* XXX TODO: store vmx registers.  */
-/* Load the extra parameters.  */
-	addi	r6,r1,48
-	addi	r7,r1,432
-	li	r0,-1
-	stdu	r0,0(r7)
-	bl	JUMPTARGET(_dl_profile_fixup)
-/* Put the registers back.  */
-	ld	r0,448+16(r1)
-	ld	r10,104(r1)
-	ld	r9,96(r1)
-	ld	r8,88(r1)
-	ld	r7,80(r1)
-	mtlr	r0
-	ld	r0,8(r1)
-	ld	r6,72(r1)
-	ld	r5,64(r1)
-	ld	r4,56(r1)
-	mtcrf	0xFF,r0
-/* Load the target address, toc and static chain reg from the function
-   descriptor returned by fixup.  */
-	ld	r0,0(r3)
-	ld	r2,8(r3)
-	mtctr	r0
-	ld	r11,16(r3)
-	ld	r3,48(r1)
-/* Load the floating point registers.  */
-	lfd	fp1,112(r1)
-	lfd	fp2,120(r1)
-	lfd	fp3,128(r1)
-	lfd	fp4,136(r1)
-	lfd	fp5,144(r1)
-	lfd	fp6,152(r1)
-	lfd	fp7,160(r1)
-	lfd	fp8,168(r1)
-	lfd	fp9,176(r1)
-	lfd	fp10,184(r1)
-	lfd	fp11,192(r1)
-	lfd	fp12,200(r1)
-	lfd	fp13,208(r1)
-/* Unwind the stack frame, and jump.  */
-	addi	r1,r1,448
-	bctr
-END(_dl_profile_resolve)
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_ceil.S b/sysdeps/powerpc/powerpc64/fpu/s_ceil.S
index 9809e24d26..a1bfaa70c2 100644
--- a/sysdeps/powerpc/powerpc64/fpu/s_ceil.S
+++ b/sysdeps/powerpc/powerpc64/fpu/s_ceil.S
@@ -22,9 +22,11 @@
 	.section	".toc","aw"
 .LC0:	/* 2**52 */
 	.tc FD_43300000_0[TC],0x4330000000000000
+.LC1:	/* -0.0 */
+	.tc FD_80000000_0[TC],0x8000000000000000
 	.section	".text"
 
-EALIGN (__ceil, 4, 0)
+ENTRY (__ceil)
 	CALL_MCOUNT 0
 	mffs	fp11		/* Save current FPU rounding mode.  */
 	lfd	fp13,.LC0@toc(2)
@@ -37,18 +39,17 @@ EALIGN (__ceil, 4, 0)
 	ble-	cr6,.L4
 	fadd	fp1,fp1,fp13	/* x+= TWO52;  */
 	fsub	fp1,fp1,fp13	/* x-= TWO52;  */
-	fabs	fp1,fp1		/* if (x == 0.0)  */
-				/* x = 0.0; */
+.L9:	
 	mtfsf	0x01,fp11	/* restore previous rounding mode.  */
 	blr
 .L4:
 	bge-	cr6,.L9		/* if (x < 0.0)  */
 	fsub	fp1,fp1,fp13	/* x-= TWO52;  */
 	fadd	fp1,fp1,fp13	/* x+= TWO52;  */
-	fnabs	fp1,fp1		/* if (x == 0.0)  */
-				/* x = -0.0; */
-.L9:
+	fcmpu	cr5,fp1,fp12	/* if (x > 0.0)  */
 	mtfsf	0x01,fp11	/* restore previous rounding mode.  */
+	bnelr+	cr5
+	lfd	fp1,.LC1@toc(2)	/* x must be -0.0 for the 0.0 case.  */	
 	blr
 	END (__ceil)
 
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_ceilf.S b/sysdeps/powerpc/powerpc64/fpu/s_ceilf.S
index 1ccd133b66..42eb274389 100644
--- a/sysdeps/powerpc/powerpc64/fpu/s_ceilf.S
+++ b/sysdeps/powerpc/powerpc64/fpu/s_ceilf.S
@@ -21,13 +21,15 @@
 
 	.section	".toc","aw"
 .LC0:	/* 2**23 */
-	.tc FD_4b000000_0[TC],0x4b00000000000000
+	.tc FD_41600000_0[TC],0x4160000000000000
+.LC1:	/* -0.0 */
+	.tc FD_80000000_0[TC],0x8000000000000000
 	.section	".text"
 
-EALIGN (__ceilf, 4, 0)
+ENTRY (__ceilf)
 	CALL_MCOUNT 0
 	mffs	fp11		/* Save current FPU rounding mode.  */
-	lfs	fp13,.LC0@toc(2)
+	lfd	fp13,.LC0@toc(2)
 	fabs	fp0,fp1
 	fsubs	fp12,fp13,fp13	/* generate 0.0  */
 	fcmpu	cr7,fp0,fp13	/* if (fabs(x) > TWO23)  */
@@ -37,18 +39,17 @@ EALIGN (__ceilf, 4, 0)
 	ble-	cr6,.L4
 	fadds	fp1,fp1,fp13	/* x+= TWO23;  */
 	fsubs	fp1,fp1,fp13	/* x-= TWO23;  */
-	fabs	fp1,fp1		/* if (x == 0.0)  */
-				/* x = 0.0; */
+.L9:	
 	mtfsf	0x01,fp11	/* restore previous rounding mode.  */
 	blr
 .L4:
 	bge-	cr6,.L9		/* if (x < 0.0)  */
 	fsubs	fp1,fp1,fp13	/* x-= TWO23;  */
 	fadds	fp1,fp1,fp13	/* x+= TWO23;  */
-	fnabs	fp1,fp1		/* if (x == 0.0)  */
-				/* x = -0.0; */
-.L9:
+	fcmpu	cr5,fp1,fp12	/* if (x > 0.0)  */
 	mtfsf	0x01,fp11	/* restore previous rounding mode.  */
+	bnelr+	cr5
+	lfd	fp1,.LC1@toc(2)	/* x must be -0.0 for the 0.0 case.  */	
 	blr
 	END (__ceilf)
 
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_floor.S b/sysdeps/powerpc/powerpc64/fpu/s_floor.S
index 183423c2b3..80cbdc5709 100644
--- a/sysdeps/powerpc/powerpc64/fpu/s_floor.S
+++ b/sysdeps/powerpc/powerpc64/fpu/s_floor.S
@@ -24,7 +24,7 @@
 	.tc FD_43300000_0[TC],0x4330000000000000
 	.section	".text"
 
-EALIGN (__floor, 4, 0)
+ENTRY (__floor)
 	CALL_MCOUNT 0
 	mffs	fp11		/* Save current FPU rounding mode.  */
 	lfd	fp13,.LC0@toc(2)
@@ -37,16 +37,15 @@ EALIGN (__floor, 4, 0)
 	ble-	cr6,.L4
 	fadd	fp1,fp1,fp13	/* x+= TWO52;  */
 	fsub	fp1,fp1,fp13	/* x-= TWO52;  */
-	fabs	fp1,fp1		/* if (x == 0.0)  */
-				/* x = 0.0; */
+	fcmpu	cr5,fp1,fp12	/* if (x > 0.0)  */
 	mtfsf	0x01,fp11	/* restore previous rounding mode.  */
+	bnelr+	cr5
+	fmr	fp1,fp12	/* x must be +0.0 for the 0.0 case.  */
 	blr
 .L4:
 	bge-	cr6,.L9		/* if (x < 0.0)  */
 	fsub	fp1,fp1,fp13	/* x-= TWO52;  */
 	fadd	fp1,fp1,fp13	/* x+= TWO52;  */
-	fnabs	fp1,fp1		/* if (x == 0.0)  */
-				/* x = -0.0; */
 .L9:
 	mtfsf	0x01,fp11	/* restore previous rounding mode.  */	
 	blr
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_floorf.S b/sysdeps/powerpc/powerpc64/fpu/s_floorf.S
index bcdbf7823d..20cbb15ebd 100644
--- a/sysdeps/powerpc/powerpc64/fpu/s_floorf.S
+++ b/sysdeps/powerpc/powerpc64/fpu/s_floorf.S
@@ -21,13 +21,13 @@
 
 	.section	".toc","aw"
 .LC0:	/* 2**23 */
-	.tc FD_4b000000_0[TC],0x4b00000000000000
+	.tc FD_41600000_0[TC],0x4160000000000000
 	.section	".text"
 
-EALIGN (__floorf, 4, 0)
+ENTRY (__floorf)
 	CALL_MCOUNT 0
 	mffs	fp11		/* Save current FPU rounding mode.  */
-	lfs	fp13,.LC0@toc(2)
+	lfd	fp13,.LC0@toc(2)
 	fabs	fp0,fp1
 	fsubs	fp12,fp13,fp13	/* generate 0.0  */
 	fcmpu	cr7,fp0,fp13	/* if (fabs(x) > TWO23)  */
@@ -37,16 +37,15 @@ EALIGN (__floorf, 4, 0)
 	ble-	cr6,.L4
 	fadds	fp1,fp1,fp13	/* x+= TWO23;  */
 	fsubs	fp1,fp1,fp13	/* x-= TWO23;  */
-	fabs	fp1,fp1		/* if (x == 0.0)  */
-				/* x = 0.0; */
+	fcmpu	cr5,fp1,fp12	/* if (x > 0.0)  */
 	mtfsf	0x01,fp11	/* restore previous rounding mode.  */
+	bnelr+	cr5
+	fmr	fp1,fp12	/* x must be +0.0 for the 0.0 case.  */
 	blr
 .L4:
 	bge-	cr6,.L9		/* if (x < 0.0)  */
 	fsubs	fp1,fp1,fp13	/* x-= TWO23;  */
 	fadds	fp1,fp1,fp13	/* x+= TWO23;  */
-	fnabs	fp1,fp1		/* if (x == 0.0)  */
-				/* x = -0.0; */
 .L9:
 	mtfsf	0x01,fp11	/* restore previous rounding mode.  */	
 	blr
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_rint.S b/sysdeps/powerpc/powerpc64/fpu/s_rint.S
index 0c0e0ba67b..79e807269d 100644
--- a/sysdeps/powerpc/powerpc64/fpu/s_rint.S
+++ b/sysdeps/powerpc/powerpc64/fpu/s_rint.S
@@ -27,7 +27,7 @@
 	.tc FD_43300000_0[TC],0x4330000000000000
 	.section	".text"
 
-EALIGN (__rint, 4, 0)
+ENTRY (__rint)
 	CALL_MCOUNT 0
 	lfd	fp13,.LC0@toc(2)
 	fabs	fp0,fp1
@@ -38,14 +38,13 @@ EALIGN (__rint, 4, 0)
 	bng-	cr6,.L4
 	fadd	fp1,fp1,fp13	/* x+= TWO52;  */
 	fsub	fp1,fp1,fp13	/* x-= TWO52;  */
-	fabs	fp1,fp1		/* if (x == 0.0)  */
-	blr			/* x = 0.0; */
+	blr
 .L4:
 	bnllr-	cr6		/* if (x < 0.0)  */
-	fsub	fp1,fp1,fp13	/* x-= TWO52;  */
-	fadd	fp1,fp1,fp13	/* x+= TWO52;  */
-	fnabs	fp1,fp1		/* if (x == 0.0)  */
-	blr			/* x = -0.0; */
+	fsub	fp1,fp13,fp1	/* x = TWO52 - x;  */
+	fsub	fp0,fp1,fp13	/* x = - (x - TWO52);  */
+	fneg	fp1,fp0
+	blr
 	END (__rint)
 
 weak_alias (__rint, rint)
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_rintf.S b/sysdeps/powerpc/powerpc64/fpu/s_rintf.S
index e4fa9ba2e6..eb34dd5e77 100644
--- a/sysdeps/powerpc/powerpc64/fpu/s_rintf.S
+++ b/sysdeps/powerpc/powerpc64/fpu/s_rintf.S
@@ -21,12 +21,12 @@
 
 	.section	".toc","aw"
 .LC0:	/* 2**23 */
-	.tc FD_4b000000_0[TC],0x4b00000000000000
+	.tc FD_41600000_0[TC],0x4160000000000000
 	.section	".text"
 
-EALIGN (__rintf, 4, 0)
+ENTRY (__rintf)
 	CALL_MCOUNT 0
-	lfs	fp13,.LC0@toc(2)
+	lfd	fp13,.LC0@toc(2)
 	fabs	fp0,fp1
 	fsubs	fp12,fp13,fp13	/* generate 0.0  */
 	fcmpu	cr7,fp0,fp13	/* if (fabs(x) > TWO23)  */
@@ -35,14 +35,13 @@ EALIGN (__rintf, 4, 0)
 	bng-	cr6,.L4
 	fadds	fp1,fp1,fp13	/* x+= TWO23;  */
 	fsubs	fp1,fp1,fp13	/* x-= TWO23;  */
-	fabs	fp1,fp1		/* if (x == 0.0)  */
-	blr			/* x = 0.0; */
+	blr
 .L4:
 	bnllr-	cr6		/* if (x < 0.0)  */
-	fsubs	fp1,fp1,fp13	/* x-= TWO23;  */
-	fadds	fp1,fp1,fp13	/* x+= TWO23;  */
-	fnabs	fp1,fp1		/* if (x == 0.0)  */
-	blr			/* x = -0.0; */
+	fsubs	fp1,fp13,fp1	/* x = TWO23 - x;  */
+	fsubs	fp0,fp1,fp13	/* x = - (x - TWO23);  */
+	fneg	fp1,fp0
+	blr
 	END (__rintf)
 
 weak_alias (__rintf, rintf)
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_round.S b/sysdeps/powerpc/powerpc64/fpu/s_round.S
index b07a7ea15a..c0b6d46fea 100644
--- a/sysdeps/powerpc/powerpc64/fpu/s_round.S
+++ b/sysdeps/powerpc/powerpc64/fpu/s_round.S
@@ -24,6 +24,8 @@
 	.tc FD_43300000_0[TC],0x4330000000000000
 .LC1:	/* 0.5 */
 	.tc FD_3fe00000_0[TC],0x3fe0000000000000
+.LC2:	/* -0.0 */
+	.tc FD_80000000_0[TC],0x8000000000000000
 	.section	".text"
 	
 /* double [fp1] round (double x [fp1])
@@ -36,7 +38,7 @@
    "Round toward Zero" mode and round by adding +-0.5 before rounding
    to the integer value.  */
 
-EALIGN (__round, 4, 0)
+ENTRY (__round)
 	CALL_MCOUNT 0
 	mffs	fp11		/* Save current FPU rounding mode.  */
 	lfd	fp13,.LC0@toc(2)
@@ -51,8 +53,7 @@ EALIGN (__round, 4, 0)
 	fadd	fp1,fp1,fp10	/* x+= 0.5;  */
 	fadd	fp1,fp1,fp13	/* x+= TWO52;  */
 	fsub	fp1,fp1,fp13	/* x-= TWO52;  */
-	fabs	fp1,fp1		/* if (x == 0.0)  */
-				/* x = 0.0; */
+.L9:	
 	mtfsf	0x01,fp11	/* restore previous rounding mode.  */
 	blr
 .L4:
@@ -60,10 +61,10 @@ EALIGN (__round, 4, 0)
 	bge-	cr6,.L9		/* if (x < 0.0)  */
 	fsub	fp1,fp9,fp13	/* x-= TWO52;  */
 	fadd	fp1,fp1,fp13	/* x+= TWO52;  */
-	fnabs	fp1,fp1		/* if (x == 0.0)  */
-				/* x = -0.0; */
-.L9:
-	mtfsf	0x01,fp11	/* restore previous rounding mode.  */	
+	fcmpu	cr5,fp1,fp12	/* if (x > 0.0)  */
+	mtfsf	0x01,fp11	/* restore previous rounding mode.  */
+	bnelr+	cr5
+	lfd	fp1,.LC2@toc(2)	/* x must be -0.0 for the 0.0 case.  */	
 	blr
 	END (__round)
 
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_roundf.S b/sysdeps/powerpc/powerpc64/fpu/s_roundf.S
index d2e29fdb8f..23ee4c052b 100644
--- a/sysdeps/powerpc/powerpc64/fpu/s_roundf.S
+++ b/sysdeps/powerpc/powerpc64/fpu/s_roundf.S
@@ -21,9 +21,11 @@
 
 	.section	".toc","aw"
 .LC0:	/* 2**23 */
-	.tc FD_4b000000_0[TC],0x4b00000000000000
+	.tc FD_41600000_0[TC],0x4160000000000000
 .LC1:	/* 0.5 */
-	.tc FD_3f000000_0[TC],0x3f00000000000000
+	.tc FD_3fe00000_0[TC],0x3fe0000000000000
+.LC2:	/* -0.0 */
+	.tc FD_80000000_0[TC],0x8000000000000000
 	.section	".text"
 	
 /* float [fp1] roundf  (float x [fp1])
@@ -36,23 +38,22 @@
    "Round toward Zero" mode and round by adding +-0.5 before rounding
    to the integer value.  */
 
-EALIGN (__roundf, 4, 0)
+ENTRY (__roundf )
 	CALL_MCOUNT 0
 	mffs	fp11		/* Save current FPU rounding mode.  */
-	lfs	fp13,.LC0@toc(2)
+	lfd	fp13,.LC0@toc(2)
 	fabs	fp0,fp1
 	fsubs	fp12,fp13,fp13	/* generate 0.0  */
 	fcmpu	cr7,fp0,fp13	/* if (fabs(x) > TWO23)  */
 	fcmpu	cr6,fp1,fp12	/* if (x > 0.0)  */
 	bnllr-	cr7
 	mtfsfi	7,1		/* Set rounding mode toward 0.  */
-	lfs	fp10,.LC1@toc(2)
+	lfd	fp10,.LC1@toc(2)
 	ble-	cr6,.L4
 	fadds	fp1,fp1,fp10	/* x+= 0.5;  */
 	fadds	fp1,fp1,fp13	/* x+= TWO23;  */
 	fsubs	fp1,fp1,fp13	/* x-= TWO23;  */
-	fabs	fp1,fp1		/* if (x == 0.0)  */
-				/* x = 0.0; */
+.L9:	
 	mtfsf	0x01,fp11	/* restore previous rounding mode.  */
 	blr
 .L4:
@@ -60,10 +61,10 @@ EALIGN (__roundf, 4, 0)
 	bge-	cr6,.L9		/* if (x < 0.0)  */
 	fsubs	fp1,fp9,fp13	/* x-= TWO23;  */
 	fadds	fp1,fp1,fp13	/* x+= TWO23;  */
-	fnabs	fp1,fp1		/* if (x == 0.0)  */
-				/* x = -0.0; */
-.L9:
-	mtfsf	0x01,fp11	/* restore previous rounding mode.  */	
+	fcmpu	cr5,fp1,fp12	/* if (x > 0.0)  */
+	mtfsf	0x01,fp11	/* restore previous rounding mode.  */
+	bnelr+	cr5
+	lfd	fp1,.LC2@toc(2)	/* x must be -0.0 for the 0.0 case.  */	
 	blr
 	END (__roundf)
 
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_trunc.S b/sysdeps/powerpc/powerpc64/fpu/s_trunc.S
index d69e371b61..3ddd298525 100644
--- a/sysdeps/powerpc/powerpc64/fpu/s_trunc.S
+++ b/sysdeps/powerpc/powerpc64/fpu/s_trunc.S
@@ -22,6 +22,8 @@
 	.section	".toc","aw"
 .LC0:	/* 2**52 */
 	.tc FD_43300000_0[TC],0x4330000000000000
+.LC2:	/* -0.0 */
+	.tc FD_80000000_0[TC],0x8000000000000000
 	.section	".text"
 	
 /* double [fp1] trunc (double x [fp1])
@@ -31,7 +33,7 @@
    We set "round toward Zero" mode and trunc by adding +-2**52 then
    subtracting +-2**52.  */
 
-EALIGN (__trunc, 4, 0)
+ENTRY (__trunc)
 	CALL_MCOUNT 0
 	mffs	fp11		/* Save current FPU rounding mode.  */
 	lfd	fp13,.LC0@toc(2)
@@ -44,18 +46,17 @@ EALIGN (__trunc, 4, 0)
 	ble-	cr6,.L4
 	fadd	fp1,fp1,fp13	/* x+= TWO52;  */
 	fsub	fp1,fp1,fp13	/* x-= TWO52;  */
-	fabs	fp1,fp1		/* if (x == 0.0)  */
-				/* x = 0.0; */
+.L9:	
 	mtfsf	0x01,fp11	/* restore previous truncing mode.  */
 	blr
 .L4:
 	bge-	cr6,.L9		/* if (x < 0.0)  */
 	fsub	fp1,fp1,fp13	/* x-= TWO52;  */
 	fadd	fp1,fp1,fp13	/* x+= TWO52;  */
-	fnabs	fp1,fp1		/* if (x == 0.0)  */
-				/* x = -0.0; */
-.L9:
+	fcmpu	cr5,fp1,fp12	/* if (x > 0.0)  */
 	mtfsf	0x01,fp11	/* restore previous rounding mode.  */
+	bnelr+	cr5
+	lfd	fp1,.LC2@toc(2)	/* x must be -0.0 for the 0.0 case.  */	
 	blr
 	END (__trunc)
 
diff --git a/sysdeps/powerpc/powerpc64/fpu/s_truncf.S b/sysdeps/powerpc/powerpc64/fpu/s_truncf.S
index 15f53da8ca..b38b722a6f 100644
--- a/sysdeps/powerpc/powerpc64/fpu/s_truncf.S
+++ b/sysdeps/powerpc/powerpc64/fpu/s_truncf.S
@@ -21,7 +21,9 @@
 
 	.section	".toc","aw"
 .LC0:	/* 2**23 */
-	.tc FD_4b000000_0[TC],0x4b00000000000000
+	.tc FD_41600000_0[TC],0x4160000000000000
+.LC2:	/* -0.0 */
+	.tc FD_80000000_0[TC],0x8000000000000000
 	.section	".text"
 	
 /* float [fp1] truncf (float x [fp1])
@@ -31,10 +33,10 @@
    We set "round toward Zero" mode and trunc by adding +-2**23 then
    subtracting +-2**23.  */
 
-EALIGN (__truncf, 4, 0)
+ENTRY (__truncf)
 	CALL_MCOUNT 0
 	mffs	fp11		/* Save current FPU rounding mode.  */
-	lfs	fp13,.LC0@toc(2)
+	lfd	fp13,.LC0@toc(2)
 	fabs	fp0,fp1
 	fsubs	fp12,fp13,fp13	/* generate 0.0  */
 	fcmpu	cr7,fp0,fp13	/* if (fabs(x) > TWO23)  */
@@ -44,18 +46,17 @@ EALIGN (__truncf, 4, 0)
 	ble-	cr6,.L4
 	fadds	fp1,fp1,fp13	/* x+= TWO23;  */
 	fsubs	fp1,fp1,fp13	/* x-= TWO23;  */
-	fabs	fp1,fp1		/* if (x == 0.0)  */
-				/* x = 0.0; */
+.L9:	
 	mtfsf	0x01,fp11	/* restore previous truncing mode.  */
 	blr
 .L4:
 	bge-	cr6,.L9		/* if (x < 0.0)  */
 	fsubs	fp1,fp1,fp13	/* x-= TWO23;  */
 	fadds	fp1,fp1,fp13	/* x+= TWO23;  */
-	fnabs	fp1,fp1		/* if (x == 0.0)  */
-				/* x = -0.0; */
-.L9:
+	fcmpu	cr5,fp1,fp12	/* if (x > 0.0)  */
 	mtfsf	0x01,fp11	/* restore previous rounding mode.  */
+	bnelr+	cr5
+	lfd	fp1,.LC2@toc(2)	/* x must be -0.0 for the 0.0 case.  */	
 	blr
 	END (__truncf)
 
diff --git a/sysdeps/s390/bits/link.h b/sysdeps/s390/bits/link.h
index 70f0043820..fc1fba363a 100644
--- a/sysdeps/s390/bits/link.h
+++ b/sysdeps/s390/bits/link.h
@@ -1,111 +1,13 @@
-/* Copyright (C) 2005 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef	_LINK_H
-# error "Never include <bits/link.h> directly; use <link.h> instead."
-#endif
-
-
-#if __ELF_NATIVE_CLASS == 32
-
-/* Registers for entry into PLT on s390-32.  */
-typedef struct La_s390_32_regs
-{
-  uint32_t lr_r2;
-  uint32_t lr_r3;
-  uint32_t lr_r4;
-  uint32_t lr_r5;
-  uint32_t lr_r6;
-  double lr_fp0;
-  double lr_fp2;
-} La_s390_32_regs;
-
-/* Return values for calls from PLT on s390-32.  */
-typedef struct La_s390_32_retval
-{
-  uint32_t lrv_r2;
-  uint32_t lrv_r3;
-  double lrv_fp0;
-} La_s390_32_retval;
-
-
-__BEGIN_DECLS
-
-extern Elf32_Addr la_s390_32_gnu_pltenter (Elf32_Sym *__sym,
-					   unsigned int __ndx,
-					   uintptr_t *__refcook,
-					   uintptr_t *__defcook,
-					   La_s390_32_regs *__regs,
-					   unsigned int *__flags,
-					   const char *__symname,
-					   long int *__framesizep);
-extern unsigned int la_s390_32_gnu_pltexit (Elf32_Sym *__sym,
-					    unsigned int __ndx,
-					    uintptr_t *__refcook,
-					    uintptr_t *__defcook,
-					    const La_s390_32_regs *__inregs,
-					    La_s390_32_retval *__outregs,
-					    const char *symname);
-
-__END_DECLS
-
+#if __WORDSIZE == 64
+struct link_map_machine
+  {
+    Elf64_Addr plt; /* Address of .plt + 0x2e */
+    Elf64_Addr gotplt; /* Address of .got + 0x18 */
+  };
 #else
-
-/* Registers for entry into PLT on s390-64.  */
-typedef struct La_s390_64_regs
-{
-  uint64_t lr_r2;
-  uint64_t lr_r3;
-  uint64_t lr_r4;
-  uint64_t lr_r5;
-  uint64_t lr_r6;
-  double lr_fp0;
-  double lr_fp2;
-  double lr_fp4;
-  double lr_fp6;
-} La_s390_64_regs;
-
-/* Return values for calls from PLT on s390-64.  */
-typedef struct La_s390_64_retval
-{
-  uint64_t lrv_r2;
-  double lrv_fp0;
-} La_s390_64_retval;
-
-
-__BEGIN_DECLS
-
-extern Elf64_Addr la_s390_64_gnu_pltenter (Elf64_Sym *__sym,
-					   unsigned int __ndx,
-					   uintptr_t *__refcook,
-					   uintptr_t *__defcook,
-					   La_s390_64_regs *__regs,
-					   unsigned int *__flags,
-					   const char *__symname,
-					   long int *__framesizep);
-extern unsigned int la_s390_64_gnu_pltexit (Elf64_Sym *__sym,
-					    unsigned int __ndx,
-					    uintptr_t *__refcook,
-					    uintptr_t *__defcook,
-					    const La_s390_64_regs *__inregs,
-					    La_s390_64_retval *__outregs,
-					    const char *symname);
-
-__END_DECLS
-
+struct link_map_machine
+  {
+    Elf32_Addr plt; /* Address of .plt + 0x2c */
+    Elf32_Addr gotplt; /* Address of .got + 0x0c */
+  };
 #endif
diff --git a/sysdeps/s390/bits/linkmap.h b/sysdeps/s390/bits/linkmap.h
deleted file mode 100644
index fc1fba363a..0000000000
--- a/sysdeps/s390/bits/linkmap.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#if __WORDSIZE == 64
-struct link_map_machine
-  {
-    Elf64_Addr plt; /* Address of .plt + 0x2e */
-    Elf64_Addr gotplt; /* Address of .got + 0x18 */
-  };
-#else
-struct link_map_machine
-  {
-    Elf32_Addr plt; /* Address of .plt + 0x2c */
-    Elf32_Addr gotplt; /* Address of .got + 0x0c */
-  };
-#endif
diff --git a/sysdeps/s390/bits/string.h b/sysdeps/s390/bits/string.h
index d83df39bb2..7134827eb0 100644
--- a/sysdeps/s390/bits/string.h
+++ b/sysdeps/s390/bits/string.h
@@ -51,7 +51,7 @@ strlen (__const char *__str)
 			  "0: srst  %0,%1\n"
 			  "   jo    0b\n"
 			  : "+&a" (__ptr), "+&a" (__tmp) : 
-			  : "cc", "memory", "0" );
+			  : "cc", "0" );
     return (size_t) (__ptr - __str);
 }
 #endif
@@ -105,7 +105,7 @@ strncpy (char *__dest, __const char *__src, size_t __n)
 #endif
                             "4:"
                             : "+&a" (__ptr), "+&a" (__n) : "a" (__diff)
-                            : "cc", "memory", "0" );
+                            : "cc", "0" );
     }
     return __ret;
 }
@@ -134,7 +134,7 @@ strcat(char *__dest, const char *__src)
 			  "0: mvst  %0,%1\n"
 			  "   jo    0b"
 			  : "+&a" (__ptr), "+&a" (__src) :
-			  : "cc", "memory", "0" );
+			  : "cc", "0" );
     return __ret;
 }
 #endif
@@ -157,7 +157,7 @@ strncat (char *__dest, __const char *__src, size_t __n)
 			    "0: srst  %0,%1\n"
 			  "   jo    0b\n"
 			    : "+&a" (__ptr), "+&a" (__tmp) :
-			    : "cc", "memory", "0" );
+			    : "cc", "0" );
 
       __diff = (size_t) (__ptr - __src);
       __tmp = (char *) __src;
@@ -175,7 +175,7 @@ strncat (char *__dest, __const char *__src, size_t __n)
                             "   stc   0,1(%2,%0)\n"
 			    "2:"
                             : "+&a" (__tmp), "+&a" (__n) : "a" (__diff)
-                            : "cc", "memory", "0" );
+                            : "cc", "0" );
 
     }
     return __ret;
@@ -200,7 +200,7 @@ memchr (__const void *__str, int __c, size_t __n)
                           "   la    %0,0\n"
                           "1:"
 			  : "+&a" (__ptr), "+&a" (__tmp) : "d" (__c)
-			  : "cc", "memory", "0" );
+			  : "cc", "0" );
     return __ptr;
 }
 #endif
@@ -222,7 +222,7 @@ strcmp (__const char *__s1, __const char *__s2)
 			  "   ipm   %0\n"
 			  "   srl   %0,28"
 			  : "=d" (__ret), "+&a" (__p1), "+&a" (__p2) : 
-			  : "cc", "memory", "0" );
+			  : "cc", "0" );
     __ret = (__ret == 0) ? 0 : (__ret == 1) ? -1 : 1;
     return __ret;
 }
diff --git a/sysdeps/s390/s390-32/dl-machine.h b/sysdeps/s390/s390-32/dl-machine.h
index 8bbf858fbf..52922a813b 100644
--- a/sysdeps/s390/s390-32/dl-machine.h
+++ b/sysdeps/s390/s390-32/dl-machine.h
@@ -1,6 +1,5 @@
 /* Machine-dependent ELF dynamic relocation inline functions.  S390 Version.
-   Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005
-   Free Software Foundation, Inc.
+   Copyright (C) 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
    Contributed by Carl Pederson & Martin Schwidefsky.
    This file is part of the GNU C Library.
 
@@ -22,6 +21,7 @@
 #ifndef dl_machine_h
 #define dl_machine_h
 
+
 #define ELF_MACHINE_NAME "s390"
 
 #include <sys/param.h>
@@ -112,8 +112,7 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
 	{
 	  got[2] = (Elf32_Addr) &_dl_runtime_profile;
 
-	  if (GLRO(dl_profile) != NULL
-	      && _dl_name_match_p (GLRO(dl_profile), l))
+	  if (_dl_name_match_p (GLRO(dl_profile), l))
 	    /* This is the object we are looking for.  Say that we really
 	       want profiling and the timers are started.  */
 	    GL(dl_profile_map) = l;
@@ -127,6 +126,124 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
   return lazy;
 }
 
+/* This code is used in dl-runtime.c to call the `fixup' function
+   and then redirect to the address it returns.  */
+
+/* s390:
+   Arguments are in register.
+   r2 - r7 holds the original parameters for the function call, fixup
+   and trampoline code use r0-r5 and r14-15. For the correct function
+   call r2-r5 and r14-15 must be restored.
+   Arguments from the PLT are stored at 24(r15) and 28(r15)
+   and must be moved to r2 and r3 for the fixup call (see elf32-s390.c
+   in the binutils for the PLT code).
+   Fixup function address in r2.
+*/
+#ifndef PROF
+#define ELF_MACHINE_RUNTIME_TRAMPOLINE \
+  asm ( "\
+    .text\n\
+    .globl _dl_runtime_resolve\n\
+    .type _dl_runtime_resolve, @function\n\
+    .align 16\n\
+    " CFI_STARTPROC "\n\
+_dl_runtime_resolve:\n\
+    # save registers\n\
+    stm    2,5,32(15)\n\
+    st     14,48(15)\n\
+    lr     0,15\n\
+    ahi    15,-96\n\
+    " CFI_ADJUST_CFA_OFFSET(96)"\n\
+    st     0,0(15)\n\
+    # load args saved by PLT\n\
+    lm     2,3,120(15)\n\
+    basr   1,0\n\
+0:  ahi    1,1f-0b\n\
+    l      14,0(1)\n\
+    bas    14,0(14,1)   # call fixup\n\
+    lr     1,2          # function addr returned in r2\n\
+    # restore registers\n\
+    ahi    15,96\n\
+    " CFI_ADJUST_CFA_OFFSET(-96)" \n\
+    l      14,48(15)\n\
+    lm     2,5,32(15)\n\
+    br     1\n\
+1:  .long  fixup-1b\n\
+    " CFI_ENDPROC "\n\
+    .size _dl_runtime_resolve, .-_dl_runtime_resolve\n\
+\n\
+    .globl _dl_runtime_profile\n\
+    .type _dl_runtime_profile, @function\n\
+    .align 16\n\
+    " CFI_STARTPROC "\n\
+_dl_runtime_profile:\n\
+    # save registers\n\
+    stm    2,5,32(15)\n\
+    st     14,48(15)\n\
+    lr     0,15\n\
+    ahi    15,-96\n\
+    " CFI_ADJUST_CFA_OFFSET(96)"\n\
+    st     0,0(15)\n\
+    # load args saved by PLT\n\
+    lm     2,3,120(15)\n\
+    # load return address as third parameter\n\
+    lr     4,14\n\
+    basr   1,0\n\
+0:  ahi    1,1f-0b\n\
+    l      14,0(1)\n\
+    bas    14,0(14,1)   # call fixup\n\
+    lr     1,2          # function addr returned in r2\n\
+    # restore registers\n\
+    ahi    15,96\n\
+    " CFI_ADJUST_CFA_OFFSET(-96)" \n\
+    l      14,48(15)\n\
+    lm     2,5,32(15)\n\
+    br     1\n\
+1:  .long  profile_fixup-1b\n\
+    " CFI_ENDPROC "\n\
+    .size _dl_runtime_profile, .-_dl_runtime_profile\n\
+");
+#else
+#define ELF_MACHINE_RUNTIME_TRAMPOLINE \
+  asm ( "\
+    .text\n\
+    .globl _dl_runtime_resolve\n\
+    .globl _dl_runtime_profile\n\
+    .type _dl_runtime_resolve, @function\n\
+    .type _dl_runtime_profile, @function\n\
+    .align 16\n\
+    " CFI_STARTPROC "\n\
+_dl_runtime_resolve:\n\
+_dl_runtime_profile:\n\
+    # save registers\n\
+    stm    2,5,32(15)\n\
+    st     14,48(15)\n\
+    lr     0,15\n\
+    ahi    15,-96\n\
+    " CFI_ADJUST_CFA_OFFSET(96)"\n\
+    st     0,0(15)\n\
+    # load args saved by PLT\n\
+    lm     2,3,120(15)\n\
+    # load return address as third parameter\n\
+    lr     4,14\n\
+    basr   1,0\n\
+0:  ahi    1,1f-0b\n\
+    l      14,0(1)\n\
+    bas    14,0(14,1)   # call fixup\n\
+    lr     1,2          # function addr returned in r2\n\
+    # restore registers\n\
+    ahi    15,96\n\
+    " CFI_ADJUST_CFA_OFFSET(-96)" \n\
+    l      14,48(15)\n\
+    lm     2,5,32(15)\n\
+    br     1\n\
+1:  .long  fixup-1b\n\
+    " CFI_ENDPROC "\n\
+    .size _dl_runtime_resolve, .-_dl_runtime_resolve\n\
+    .size _dl_runtime_profile, .-_dl_runtime_profile\n\
+");
+#endif
+
 /* Mask identifying addresses reserved for the user program,
    where the dynamic linker should not map anything.  */
 #define ELF_MACHINE_USER_ADDRESS_MASK   0xf8000000UL
@@ -258,20 +375,15 @@ elf_machine_plt_value (struct link_map *map, const Elf32_Rela *reloc,
   return value;
 }
 
-/* Names of the architecture-specific auditing callback functions.  */
-#define ARCH_LA_PLTENTER s390_32_gnu_pltenter
-#define ARCH_LA_PLTEXIT s390_32_gnu_pltexit
-
 #endif /* !dl_machine_h */
 
 
-#ifdef RESOLVE_MAP
+#ifdef RESOLVE
 
 /* Perform the relocation specified by RELOC and SYM (which is fully resolved).
    MAP is the object containing the reloc.  */
 
-auto inline void
-__attribute__ ((always_inline))
+static inline void
 elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
 		  const Elf32_Sym *sym, const struct r_found_version *version,
 		  void *const reloc_addr_arg)
@@ -305,8 +417,17 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
 #ifndef RESOLVE_CONFLICT_FIND_MAP
       const Elf32_Sym *const refsym = sym;
 #endif
+#if defined USE_TLS && !defined RTLD_BOOTSTRAP
       struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type);
       Elf32_Addr value = sym == NULL ? 0 : sym_map->l_addr + sym->st_value;
+#else
+      Elf32_Addr value = RESOLVE (&sym, version, r_type);
+
+# ifndef RTLD_BOOTSTRAP
+      if (sym)
+# endif
+	value += sym->st_value;
+#endif /* use TLS and !RTLD_BOOTSTRAP */
 
       switch (r_type)
 	{
@@ -418,8 +539,7 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
     }
 }
 
-auto inline void
-__attribute__ ((always_inline))
+static inline void
 elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc,
 			   void *const reloc_addr_arg)
 {
@@ -427,8 +547,7 @@ elf_machine_rela_relative (Elf32_Addr l_addr, const Elf32_Rela *reloc,
   *reloc_addr = l_addr + reloc->r_addend;
 }
 
-auto inline void
-__attribute__ ((always_inline))
+static inline void
 elf_machine_lazy_rel (struct link_map *map,
 		      Elf32_Addr l_addr, const Elf32_Rela *reloc)
 {
@@ -448,4 +567,4 @@ elf_machine_lazy_rel (struct link_map *map,
     _dl_reloc_bad_type (map, r_type, 1);
 }
 
-#endif /* RESOLVE_MAP */
+#endif /* RESOLVE */
diff --git a/sysdeps/s390/s390-32/dl-trampoline.S b/sysdeps/s390/s390-32/dl-trampoline.S
deleted file mode 100644
index 4ee2295b89..0000000000
--- a/sysdeps/s390/s390-32/dl-trampoline.S
+++ /dev/null
@@ -1,133 +0,0 @@
-/* PLT trampolines.  s390 version.
-   Copyright (C) 2005 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-/* This code is used in dl-runtime.c to call the `fixup' function
-   and then redirect to the address it returns.  */
-
-/* The PLT stubs will call _dl_runtime_resolve/_dl_runtime_profile
- * with the following linkage:
- *   r2 - r6 : parameter registers
- *   f0, f2 : floating point parameter registers
- *   24(r15), 28(r15) : PLT arguments PLT1, PLT2
- *   96(r15) : additional stack parameters
- * The normal clobber rules for function calls apply:
- *   r0 - r5 : call clobbered
- *   r6 - r13 :	 call saved
- *   r14 : return address (call clobbered)
- *   r15 : stack pointer (call saved)
- *   f4, f6 : call saved
- *   f0 - f3, f5, f7 - f15 : call clobbered
- */
-
-#include <sysdep.h>
-
-	.text
-	.globl _dl_runtime_resolve
-	.type _dl_runtime_resolve, @function
-	cfi_startproc
-	.align 16
-_dl_runtime_resolve:
-	stm    %r2,%r5,32(%r15)		# save registers
-	st     %r14,8(%r15)
-	lr     %r0,%r15			# create stack frame
-	ahi    %r15,-96
-	cfi_adjust_cfa_offset (96)
-	st     0,0(%r15)
-	lm     %r2,%r3,120(%r15)	# load args saved by PLT
-	basr   %r1,0
-0:	l      %r14,1f-0b(%r1)
-	bas    %r14,0(%r14,%r1)		# call resolver
-	lr     %r1,%r2			# function addr returned in r2
-	ahi    %r15,96			# remove stack frame
-	cfi_adjust_cfa_offset (-96)
-	l      %r14,8(15)		# restore registers
-	lm     %r2,%r5,32(%r15)
-	br     %r1
-1:	.long  _dl_fixup - 0b
-	cfi_endproc
-	.size _dl_runtime_resolve, .-_dl_runtime_resolve
-
-
-	.globl _dl_runtime_profile
-	.type _dl_runtime_profile, @function
-	cfi_startproc
-	.align 16
-_dl_runtime_profile:
-	stm    %r2,%r6,32(%r15)		# save registers
-	std    %f0,56(%r15)
-	std    %f2,64(%r15)
-	st     %r6,8(%r15)
-	st     %r12,12(%r15)
-	st     %r14,16(%r15)
-	lr     %r12,%r15		# create stack frame
-	cfi_def_cfa_register (12)
-	ahi    %r15,-96
-	st     %r12,0(%r15)
-	lm     %r2,%r3,24(%r12)		# load arguments saved by PLT
-	lr     %r4,%r14			# return address as third parameter
-	basr   %r1,0
-0:	l      %r14,6f-0b(%r1)
-	la     %r5,32(%r12)		# pointer to struct La_s390_32_regs
-	la     %r6,20(%r12)		# long int * framesize
-	bas    %r14,0(%r14,%r1)		# call resolver
-	lr     %r1,%r2			# function addr returned in r2
-	icm    %r0,15,20(%r12)		# load & test framesize
-	jnm    2f
-	lm     %r2,%r6,32(%r12)
-	ld     %f0,56(%r12)
-	ld     %f2,64(%r12)
-	basr   %r14,%r1			# call resolved function
-1:	lr     %r15,%r12		# remove stack frame
-	cfi_def_cfa_register (15)
-	l      %r14,16(%r15)		# restore registers
-	l      %r12,12(%r15)
-	l      %r6,8(%r15)
-	br     %r14
-	cfi_def_cfa_register (12)
-2:	jz     4f			# framesize == 0 ?
-	ahi    %r0,7			# align framesize to 8
-	lhi    %r2,-8
-	nr     %r0,%r2
-	slr    %r15,%r0			# make room for framesize bytes
-	st     %r12,0(%r15)
-	la     %r2,96(%r15)
-	la     %r3,96(%r12)
-	srl    %r0,3
-3:	mvc    0(8,%r2),0(%r3)		# copy additional parameters
-	la     %r2,8(%r2)
-	la     %r3,8(%r3)
-	brct   %r0,3b
-4:	lm     %r2,%r6,32(%r12)		# load register parameters
-	ld     %f0,56(%r12)
-	ld     %f2,64(%r12)
-	basr   %r14,%r1			# call resolved function
-	stm    %r2,%r3,72(%r12)
-	std    %f0,80(%r12)
-	lm     %r2,%r3,24(%r12)		# load arguments saved by PLT
-	basr   %r1,0
-5:	l      %r14,7f-5b(%r1)
-	la     %r4,32(%r12)		# pointer to struct La_s390_32_regs
-	la     %r5,72(%r12)		# pointer to struct La_s390_32_retval
-	basr   %r14,%r1			# call _dl_call_pltexit	
-	j      1b
-6:	.long  _dl_profile_fixup - 0b
-7:	.long  _dl_call_pltexit - 5b
-	cfi_endproc
-	.size _dl_runtime_profile, .-_dl_runtime_profile
-
diff --git a/sysdeps/s390/s390-64/dl-machine.h b/sysdeps/s390/s390-64/dl-machine.h
index 5026a2edad..82ece0be0f 100644
--- a/sysdeps/s390/s390-64/dl-machine.h
+++ b/sysdeps/s390/s390-64/dl-machine.h
@@ -1,6 +1,6 @@
 /* Machine-dependent ELF dynamic relocation inline functions.
    64 bit S/390 Version.
-   Copyright (C) 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+   Copyright (C) 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
    Contributed by Martin Schwidefsky (schwidefsky@de.ibm.com).
    This file is part of the GNU C Library.
 
@@ -105,8 +105,7 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
 	{
 	  got[2] = (Elf64_Addr) &_dl_runtime_profile;
 
-	  if (GLRO(dl_profile) != NULL
-	      && _dl_name_match_p (GLRO(dl_profile), l))
+	  if (_dl_name_match_p (GLRO(dl_profile), l))
 	    /* This is the object we are looking for.  Say that we really
 	       want profiling and the timers are started.  */
 	    GL(dl_profile_map) = l;
@@ -120,6 +119,112 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
   return lazy;
 }
 
+/* This code is used in dl-runtime.c to call the `fixup' function
+   and then redirect to the address it returns.	 */
+
+/* s390:
+   Arguments are in register.
+   r2 - r7 holds the original parameters for the function call, fixup
+   and trampoline code use r0-r5 and r14-15. For the correct function
+   call r2-r5 and r14-15 must be restored.
+   Arguments from the PLT are stored at 48(r15) and 56(r15)
+   and must be moved to r2 and r3 for the fixup call (see elf32-s390.c
+   in the binutils for the PLT code).
+   Fixup function address in r2.
+*/
+#ifndef PROF
+#define ELF_MACHINE_RUNTIME_TRAMPOLINE \
+  asm ( "\
+    .text\n\
+    .globl _dl_runtime_resolve\n\
+    .type _dl_runtime_resolve, @function\n\
+    .align 16\n\
+    " CFI_STARTPROC "\n\
+_dl_runtime_resolve:\n\
+    # save registers\n\
+    stmg   2,5,64(15)\n\
+    stg	   14,96(15)\n\
+    lgr	   0,15\n\
+    aghi   15,-160\n\
+    " CFI_ADJUST_CFA_OFFSET(160)"\n\
+    stg	   0,0(15)\n\
+    # load args saved by PLT\n\
+    lmg	   2,3,208(15)\n\
+    brasl  14,fixup	# call fixup\n\
+    lgr	   1,2		# function addr returned in r2\n\
+    # restore registers\n\
+    aghi   15,160\n\
+    " CFI_ADJUST_CFA_OFFSET(-160)" \n\
+    lg	   14,96(15)\n\
+    lmg	   2,5,64(15)\n\
+    br	   1\n\
+    " CFI_ENDPROC "\n\
+    .size _dl_runtime_resolve, .-_dl_runtime_resolve\n\
+\n\
+    .globl _dl_runtime_profile\n\
+    .type _dl_runtime_profile, @function\n\
+    .align 16\n\
+    " CFI_STARTPROC "\n\
+_dl_runtime_profile:\n\
+    # save registers\n\
+    stmg   2,5,64(15)\n\
+    stg	   14,96(15)\n\
+    lgr	   0,15\n\
+    aghi   15,-160\n\
+    " CFI_ADJUST_CFA_OFFSET(160)"\n\
+    stg	   0,0(15)\n\
+    # load args saved by PLT\n\
+    lmg	   2,3,208(15)\n\
+    # load return address as third parameter\n\
+    lgr	   4,14\n\
+    brasl  14,profile_fixup  # call fixup\n\
+    lgr	   1,2		# function addr returned in r2\n\
+    # restore registers\n\
+    aghi   15,160\n\
+    " CFI_ADJUST_CFA_OFFSET(-160)" \n\
+    lg	   14,96(15)\n\
+    lmg	   2,5,64(15)\n\
+    br	   1\n\
+    " CFI_ENDPROC "\n\
+    .size _dl_runtime_profile, .-_dl_runtime_profile\n\
+");
+#else
+#define ELF_MACHINE_RUNTIME_TRAMPOLINE \
+  asm ( "\
+    .text\n\
+    .globl _dl_runtime_resolve\n\
+    .globl _dl_runtime_profile\n\
+    .type _dl_runtime_resolve, @function\n\
+    .type _dl_runtime_profile, @function\n\
+    .align 16\n\
+    " CFI_STARTPROC "\n\
+_dl_runtime_resolve:\n\
+_dl_runtime_profile:\n\
+    # save registers\n\
+    stmg   2,5,64(15)\n\
+    stg	   14,96(15)\n\
+    lgr	   0,15\n\
+    aghi   15,-160\n\
+    " CFI_ADJUST_CFA_OFFSET(160)"\n\
+    stg	   0,0(15)\n\
+    # load args saved by PLT\n\
+    lmg	   2,3,208(15)\n\
+    # load return address as third parameter\n\
+    lgr	   4,14\n\
+    brasl  14,profile_fixup	 # call fixup\n\
+    lgr	   1,2		# function addr returned in r2\n\
+    # restore registers\n\
+    aghi   15,160\n\
+    " CFI_ADJUST_CFA_OFFSET(-160)" \n\
+    lg	   14,96(15)\n\
+    lmg	   2,5,64(15)\n\
+    br	   1\n\
+    " CFI_ENDPROC "\n\
+    .size _dl_runtime_resolve, .-_dl_runtime_resolve\n\
+    .size _dl_runtime_profile, .-_dl_runtime_profile\n\
+");
+#endif
+
 /* Initial entry point code for the dynamic linker.
    The C function `_dl_start' is the real entry point;
    its return value is the user program's entry point.	*/
@@ -238,19 +343,14 @@ elf_machine_plt_value (struct link_map *map, const Elf64_Rela *reloc,
   return value;
 }
 
-/* Names of the architecture-specific auditing callback functions.  */
-#define ARCH_LA_PLTENTER s390_64_gnu_pltenter
-#define ARCH_LA_PLTEXIT s390_64_gnu_pltexit
-
 #endif /* !dl_machine_h */
 
-#ifdef RESOLVE_MAP
+#ifdef RESOLVE
 
 /* Perform the relocation specified by RELOC and SYM (which is fully resolved).
    MAP is the object containing the reloc.  */
 
-auto inline void
-__attribute__ ((always_inline))
+static inline void
 elf_machine_rela (struct link_map *map, const Elf64_Rela *reloc,
 		  const Elf64_Sym *sym, const struct r_found_version *version,
 		  void *const reloc_addr_arg)
@@ -284,8 +384,17 @@ elf_machine_rela (struct link_map *map, const Elf64_Rela *reloc,
 #ifndef RESOLVE_CONFLICT_FIND_MAP
       const Elf64_Sym *const refsym = sym;
 #endif
+#if defined USE_TLS && !defined RTLD_BOOTSTRAP
       struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type);
       Elf64_Addr value = sym == NULL ? 0 : sym_map->l_addr + sym->st_value;
+#else
+      Elf64_Addr value = RESOLVE (&sym, version, r_type);
+
+# ifndef RTLD_BOOTSTRAP
+      if (sym)
+# endif
+	value += sym->st_value;
+#endif /* use TLS and !RTLD_BOOTSTRAP */
 
       switch (r_type)
 	{
@@ -409,8 +518,7 @@ elf_machine_rela (struct link_map *map, const Elf64_Rela *reloc,
     }
 }
 
-auto inline void
-__attribute__ ((always_inline))
+static inline void
 elf_machine_rela_relative (Elf64_Addr l_addr, const Elf64_Rela *reloc,
 			   void *const reloc_addr_arg)
 {
@@ -418,8 +526,7 @@ elf_machine_rela_relative (Elf64_Addr l_addr, const Elf64_Rela *reloc,
   *reloc_addr = l_addr + reloc->r_addend;
 }
 
-auto inline void
-__attribute__ ((always_inline))
+static inline void
 elf_machine_lazy_rel (struct link_map *map,
 		      Elf64_Addr l_addr, const Elf64_Rela *reloc)
 {
@@ -439,4 +546,4 @@ elf_machine_lazy_rel (struct link_map *map,
     _dl_reloc_bad_type (map, r_type, 1);
 }
 
-#endif /* RESOLVE_MAP */
+#endif /* RESOLVE */
diff --git a/sysdeps/s390/s390-64/dl-trampoline.S b/sysdeps/s390/s390-64/dl-trampoline.S
deleted file mode 100644
index 215d869803..0000000000
--- a/sysdeps/s390/s390-64/dl-trampoline.S
+++ /dev/null
@@ -1,126 +0,0 @@
-/* PLT trampolines.  s390 version.
-   Copyright (C) 2005 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-/* The PLT stubs will call _dl_runtime_resolve/_dl_runtime_profile
- * with the following linkage:
- *   r2 - r6 : parameter registers
- *   f0, f2, f4, f6 : floating point parameter registers
- *   24(r15), 28(r15) : PLT arguments PLT1, PLT2
- *   96(r15) : additional stack parameters
- * The normal clobber rules for function calls apply:
- *   r0 - r5 : call clobbered
- *   r6 - r13 :	 call saved
- *   r14 : return address (call clobbered)
- *   r15 : stack pointer (call saved)
- *   f1, f3, f5, f7 : call saved
- *   f0 - f3, f5, f7 - f15 : call clobbered
- */
-
-#include <sysdep.h>
-
-	.text
-	.globl _dl_runtime_resolve
-	.type _dl_runtime_resolve, @function
-	cfi_startproc
-	.align 16
-_dl_runtime_resolve:
-	stmg   2,5,64(15)	# save registers
-	stg    14,96(15)
-	lgr    0,15		# create stack frame
-	aghi   15,-160
-	cfi_adjust_cfa_offset (160)
-	stg    0,0(15)
-	lmg    2,3,208(15)	# load args saved by PLT
-	brasl  14,_dl_fixup	# call fixup
-	lgr    1,2		# function addr returned in r2
-	aghi   15,160		# remove stack frame
-	cfi_adjust_cfa_offset (-160)
-	lg     14,96(15)	# restore registers
-	lmg    2,5,64(15)
-	br     1
-	cfi_endproc
-	.size _dl_runtime_resolve, .-_dl_runtime_resolve
-
-
-	.globl _dl_runtime_profile
-	.type _dl_runtime_profile, @function
-	cfi_startproc
-	.align 16
-_dl_runtime_profile:
-	stmg   %r2,%r6,64(%r15)		# save registers
-	std    %f0,104(%r15)
-	std    %f2,112(%r15)
-	std    %f4,120(%r15)
-	std    %f6,128(%r15)
-	stg    %r6,16(%r15)
-	stg    %r12,24(%r15)
-	stg    %r14,32(%r15)
-	lgr    %r12,%r15		# create stack frame
-	cfi_def_cfa_register (12)
-	aghi   %r15,-160
-	stg    %r12,0(%r15)
-	lmg    %r2,%r3,48(%r12)		# load arguments saved by PLT
-	lgr    %r4,%r14			# return address as third parameter
-	la     %r5,64(%r12)		# pointer to struct La_s390_32_regs
-	la     %r6,40(%r12)		# long int * framesize
-	brasl  %r14,_dl_profile_fixup	# call resolver
-	lgr    %r1,%r2			# function addr returned in r2
-	lg     %r0,40(%r12)		# load framesize
-	ltgr   %r0,%r0
-	jnm    1f
-	lmg    %r2,%r6,64(%r12)
-	ld     %f0,104(%r12)
-	ld     %f2,112(%r12)
-	ld     %f4,120(%r12)
-	ld     %f6,128(%r12)
-	basr   %r14,%r1			# call resolved function
-0:	lr     %r15,%r12		# remove stack frame
-	cfi_def_cfa_register (15)
-	lg     %r14,32(%r15)		# restore registers
-	lg     %r12,24(%r15)
-	lg     %r6,16(%r15)
-	br     %r14
-	cfi_def_cfa_register (12)
-1:	jz     4f			# framesize == 0 ?
-	aghi   %r0,7			# align framesize to 8
-	nill   %r0,0xfff8
-	slgr   %r15,%r0			# make room for framesize bytes
-	stg    %r12,0(%r15)
-	la     %r2,160(%r15)
-	la     %r3,160(%r12)
-	srlg   %r0,%r0,3
-3:	mvc    0(8,%r2),0(%r3)		# copy additional parameters
-	la     %r2,8(%r2)
-	la     %r3,8(%r3)
-	brctg  %r0,3b
-4:	lmg    %r2,%r6,64(%r12)		# load register parameters
-	ld     %f0,104(%r12)
-	ld     %f2,112(%r12)
-	ld     %f4,120(%r12)
-	ld     %f6,128(%r12)
-	basr   %r14,%r1			# call resolved function
-	stg    %r2,136(%r12)
-	std    %f0,144(%r12)
-	lmg    %r2,%r3,48(%r12)		# load arguments saved by PLT
-	la     %r4,32(%r12)		# pointer to struct La_s390_32_regs
-	la     %r5,72(%r12)		# pointer to struct La_s390_32_retval
-	brasl  %r14,_dl_call_pltexit
-	j      0b
-	cfi_endproc
-	.size _dl_runtime_profile, .-_dl_runtime_profile
diff --git a/sysdeps/sh/bits/link.h b/sysdeps/sh/bits/link.h
index 2826677336..bb2fbb5f16 100644
--- a/sysdeps/sh/bits/link.h
+++ b/sysdeps/sh/bits/link.h
@@ -1,70 +1,5 @@
-/* Copyright (C) 2005 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef	_LINK_H
-# error "Never include <bits/link.h> directly; use <link.h> instead."
-#endif
-
-
-/* Registers for entry into PLT on SH.  */
-typedef struct La_sh_regs
-{
-  uint32_t lr_r2;
-  uint32_t lr_r3;
-  uint32_t lr_r4;
-  uint32_t lr_r5;
-  uint32_t lr_r6;
-  uint32_t lr_r7;
-  uint32_t lr_fpscr;
-  float lr_fr4;
-  float lr_fr5;
-  float lr_fr6;
-  float lr_fr7;
-  float lr_fr8;
-  float lr_fr9;
-  float lr_fr10;
-  float lr_fr11;
-} La_sh_regs;
-
-/* Return values for calls from PLT on SH.  */
-typedef struct La_sh_retval
-{
-  uint32_t lrv_r0;
-  uint32_t lrv_r1;
-  float lrv_fr0;
-  float lrv_fr1;
-} La_sh_retval;
-
-
-__BEGIN_DECLS
-
-extern Elf32_Addr la_sh_gnu_pltenter (Elf32_Sym *__sym, unsigned int __ndx,
-				      uintptr_t *__refcook,
-				      uintptr_t *__defcook,
-				      La_sh_regs *__regs,
-				      unsigned int *__flags,
-				      const char *__symname,
-				      long int *__framesizep);
-extern unsigned int la_sh_gnu_pltexit (Elf32_Sym *__sym, unsigned int __ndx,
-				       uintptr_t *__refcook,
-				       uintptr_t *__defcook,
-				       const La_sh_regs *__inregs,
-				       La_sh_retval *__outregs,
-				       const char *symname);
-
-__END_DECLS
+struct link_map_machine
+  {
+    Elf32_Addr plt; /* Address of .plt + 36 */
+    Elf32_Addr gotplt; /* Address of .got + 0x0c */
+  };
diff --git a/sysdeps/sh/bits/linkmap.h b/sysdeps/sh/bits/linkmap.h
deleted file mode 100644
index bb2fbb5f16..0000000000
--- a/sysdeps/sh/bits/linkmap.h
+++ /dev/null
@@ -1,5 +0,0 @@
-struct link_map_machine
-  {
-    Elf32_Addr plt; /* Address of .plt + 36 */
-    Elf32_Addr gotplt; /* Address of .got + 0x0c */
-  };
diff --git a/sysdeps/sh/dl-machine.h b/sysdeps/sh/dl-machine.h
index b66b4f0e42..271666a2a3 100644
--- a/sysdeps/sh/dl-machine.h
+++ b/sysdeps/sh/dl-machine.h
@@ -1,5 +1,5 @@
 /* Machine-dependent ELF dynamic relocation inline functions.  SH version.
-   Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005
+   Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004
    Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
@@ -21,6 +21,8 @@
 #ifndef dl_machine_h
 #define dl_machine_h
 
+/* Only dummy. This doesn't work. */
+
 #define ELF_MACHINE_NAME "SH"
 
 #include <sys/param.h>
@@ -104,9 +106,7 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
 	{
 	  got[2] = (Elf32_Addr) &_dl_runtime_profile;
 	  /* Say that we really want profiling and the timers are started.  */
-	  if (GLRO(dl_profile) != NULL
-	      && _dl_name_match_p (GLRO(dl_profile), l))
-	    GL(dl_profile_map) = l;
+	  GL(dl_profile_map) = l;
 	}
       else
 	/* This function will get called to fix up the GOT entry indicated by
@@ -116,8 +116,273 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
   return lazy;
 }
 
+/* This code is used in dl-runtime.c to call the `fixup' function
+   and then redirect to the address it returns.	 */
+
 #define ELF_MACHINE_RUNTIME_FIXUP_ARGS int plt_type
 
+#ifdef SHARED
+#define FUN_ADDR	"\
+	mov.l 1f,r2\n\
+	mova 1f,r0\n\
+        bra 2f\n\
+	 add r0,r2		! Get GOT address in r2\n\
+0:	.align 2\n\
+1:	.long _GLOBAL_OFFSET_TABLE_\n\
+2:	mov.l 3f,r0\n\
+	add r2,r0"
+#define GOTJMP(x)	#x "@GOTOFF"
+#else
+#define FUN_ADDR	"\
+	mov.l 3f,r0"
+#define GOTJMP(x)	#x
+#endif
+
+#ifdef HAVE_FPU
+#define FGR_SAVE	"\
+	sts.l	fpscr, @-r15\n\
+	" CFI_ADJUST_CFA_OFFSET (4) "\n\
+	mov	#8,r3\n\
+	swap.w	r3, r3\n\
+	lds	r3, fpscr\n\
+	fmov.s	fr11, @-r15\n\
+	" CFI_ADJUST_CFA_OFFSET (4) "\n\
+	fmov.s	fr10, @-r15\n\
+	" CFI_ADJUST_CFA_OFFSET (4) "\n\
+	fmov.s	fr9, @-r15\n\
+	" CFI_ADJUST_CFA_OFFSET (4) "\n\
+	fmov.s	fr8, @-r15\n\
+	" CFI_ADJUST_CFA_OFFSET (4) "\n\
+	fmov.s	fr7, @-r15\n\
+	" CFI_ADJUST_CFA_OFFSET (4) "\n\
+	fmov.s	fr6, @-r15\n\
+	" CFI_ADJUST_CFA_OFFSET (4) "\n\
+	fmov.s	fr5, @-r15\n\
+	" CFI_ADJUST_CFA_OFFSET (4) "\n\
+	fmov.s	fr4, @-r15\n\
+	" CFI_ADJUST_CFA_OFFSET (4)
+#define FGR_LOAD	"\
+	fmov.s	@r15+, fr4\n\
+	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
+	fmov.s	@r15+, fr5\n\
+	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
+	fmov.s	@r15+, fr6\n\
+	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
+	fmov.s	@r15+, fr7\n\
+	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
+	fmov.s	@r15+, fr8\n\
+	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
+	fmov.s	@r15+, fr9\n\
+	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
+	fmov.s	@r15+, fr10\n\
+	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
+	fmov.s	@r15+, fr11\n\
+	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
+	lds.l	@r15+, fpscr\n\
+	" CFI_ADJUST_CFA_OFFSET (-4)
+#else
+#define FGR_SAVE	""
+#define FGR_LOAD	""
+#endif
+
+#ifndef PROF
+# define ELF_MACHINE_RUNTIME_TRAMPOLINE asm ("\
+	.text\n\
+	.globl _dl_runtime_resolve\n\
+	.type _dl_runtime_resolve, @function\n\
+	" CFI_STARTPROC "\n\
+	.align 5\n\
+_dl_runtime_resolve:\n\
+	mov.l r2,@-r15\n\
+	" CFI_ADJUST_CFA_OFFSET (4) "\n\
+	mov.l r3,@-r15\n\
+	" CFI_ADJUST_CFA_OFFSET (4) "\n\
+	mov.l r4,@-r15\n\
+	" CFI_ADJUST_CFA_OFFSET (4) "\n\
+	mov.l r5,@-r15\n\
+	" CFI_ADJUST_CFA_OFFSET (4) "\n\
+	mov.l r6,@-r15\n\
+	" CFI_ADJUST_CFA_OFFSET (4) "\n\
+	mov.l r7,@-r15\n\
+	" CFI_ADJUST_CFA_OFFSET (4) "\n\
+	mov.l r12,@-r15\n\
+	" CFI_ADJUST_CFA_OFFSET (4) "\n\
+	movt r3			! Save T flag.\n\
+	mov.l r3,@-r15\n\
+	" CFI_ADJUST_CFA_OFFSET (4) "\n\
+	" FGR_SAVE "\n\
+	sts.l pr,@-r15\n\
+	" CFI_ADJUST_CFA_OFFSET (4) "\n\
+	tst r0,r0\n\
+	bt 1f\n\
+	mov r0,r2\n\
+1:\n\
+	mov r0,r4		! PLT type\n\
+	mov r2,r5		! link map address\n\
+	" FUN_ADDR "\n\
+	jsr @r0			! Call resolver.\n\
+	 mov r1,r6		! reloc offset\n\
+	lds.l @r15+,pr		! Get register content back.\n\
+	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
+	" FGR_LOAD "\n\
+	mov.l @r15+,r3\n\
+	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
+	shal r3			! Lode T flag.\n\
+	mov.l @r15+,r12\n\
+	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
+	mov.l @r15+,r7\n\
+	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
+	mov.l @r15+,r6\n\
+	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
+	mov.l @r15+,r5\n\
+	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
+	mov.l @r15+,r4\n\
+	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
+	mov.l @r15+,r3\n\
+	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
+	jmp @r0			! Jump to function address.\n\
+	 mov.l @r15+,r2\n\
+	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
+	.align 2\n\
+3:\n\
+	.long " GOTJMP (fixup) "\n\
+	" CFI_ENDPROC "\n\
+	.size _dl_runtime_resolve, .-_dl_runtime_resolve\n\
+\n\
+	.globl _dl_runtime_profile\n\
+	.type _dl_runtime_profile, @function\n\
+	" CFI_STARTPROC "\n\
+	.align 5\n\
+_dl_runtime_profile:\n\
+	mov.l r2,@-r15\n\
+	" CFI_ADJUST_CFA_OFFSET (4) "\n\
+	mov.l r3,@-r15\n\
+	" CFI_ADJUST_CFA_OFFSET (4) "\n\
+	mov.l r4,@-r15\n\
+	" CFI_ADJUST_CFA_OFFSET (4) "\n\
+	mov.l r5,@-r15\n\
+	" CFI_ADJUST_CFA_OFFSET (4) "\n\
+	mov.l r6,@-r15\n\
+	" CFI_ADJUST_CFA_OFFSET (4) "\n\
+	mov.l r7,@-r15\n\
+	" CFI_ADJUST_CFA_OFFSET (4) "\n\
+	mov.l r12,@-r15\n\
+	" CFI_ADJUST_CFA_OFFSET (4) "\n\
+	movt r3			! Save T flag.\n\
+	mov.l r3,@-r15\n\
+	" CFI_ADJUST_CFA_OFFSET (4) "\n\
+	" FGR_SAVE "\n\
+	sts.l pr,@-r15\n\
+	" CFI_ADJUST_CFA_OFFSET (4) "\n\
+	tst r0,r0\n\
+	bt 1f\n\
+	mov r0,r2\n\
+1:\n\
+	mov r0,r4		! PLT type\n\
+	mov r2,r5		! link map address\n\
+	sts pr,r7		! return address\n\
+	" FUN_ADDR "\n\
+	jsr @r0			! Call resolver.\n\
+	 mov r1,r6		! reloc offset\n\
+	lds.l @r15+,pr		! Get register content back.\n\
+	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
+	" FGR_LOAD "\n\
+	mov.l @r15+,r3\n\
+	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
+	shal r3			! Lode T flag.\n\
+	mov.l @r15+,r12\n\
+	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
+	mov.l @r15+,r7\n\
+	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
+	mov.l @r15+,r6\n\
+	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
+	mov.l @r15+,r5\n\
+	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
+	mov.l @r15+,r4\n\
+	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
+	mov.l @r15+,r3\n\
+	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
+	jmp @r0			! Jump to function address.\n\
+	 mov.l @r15+,r2\n\
+	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
+	.align 2\n\
+3:\n\
+	.long " GOTJMP (profile_fixup) "\n\
+	" CFI_ENDPROC "\n\
+	.size _dl_runtime_profile, .-_dl_runtime_profile\n\
+	.previous\n\
+");
+#else
+# define ELF_MACHINE_RUNTIME_TRAMPOLINE asm ("\
+	.text\n\
+	.globl _dl_runtime_resolve\n\
+	.globl _dl_runtime_profile\n\
+	.type _dl_runtime_resolve, @function\n\
+	.type _dl_runtime_profile, @function\n\
+	.align 5\n\
+_dl_runtime_resolve:\n\
+_dl_runtime_profile:\n\
+	mov.l r2,@-r15\n\
+	" CFI_ADJUST_CFA_OFFSET (4) "\n\
+	mov.l r3,@-r15\n\
+	" CFI_ADJUST_CFA_OFFSET (4) "\n\
+	mov.l r4,@-r15\n\
+	" CFI_ADJUST_CFA_OFFSET (4) "\n\
+	mov.l r5,@-r15\n\
+	" CFI_ADJUST_CFA_OFFSET (4) "\n\
+	mov.l r6,@-r15\n\
+	" CFI_ADJUST_CFA_OFFSET (4) "\n\
+	mov.l r7,@-r15\n\
+	" CFI_ADJUST_CFA_OFFSET (4) "\n\
+	mov.l r12,@-r15\n\
+	" CFI_ADJUST_CFA_OFFSET (4) "\n\
+	movt r3			! Save T flag.\n\
+	mov.l r3,@-r15\n\
+	" CFI_ADJUST_CFA_OFFSET (4) "\n\
+	" FGR_SAVE "\n\
+	sts.l pr,@-r15\n\
+	" CFI_ADJUST_CFA_OFFSET (4) "\n\
+	tst r0,r0\n\
+	bt 1f\n\
+	mov r0,r2\n\
+1:\n\
+	mov r0,r4		! PLT type\n\
+	mov r2,r5		! link map address\n\
+	sts pr,r7		! return address\n\
+	" FUN_ADDR "\n\
+	jsr @r0			! Call resolver.\n\
+	 mov r1,r6		! reloc offset\n\
+	lds.l @r15+,pr		! Get register content back.\n\
+	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
+	" FGR_LOAD "\n\
+	mov.l @r15+,r3\n\
+	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
+	shal r3			! Lode T flag.\n\
+	mov.l @r15+,r12\n\
+	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
+	mov.l @r15+,r7\n\
+	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
+	mov.l @r15+,r6\n\
+	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
+	mov.l @r15+,r5\n\
+	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
+	mov.l @r15+,r4\n\
+	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
+	mov.l @r15+,r3\n\
+	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
+	jmp @r0			! Jump to function address.\n\
+	 mov.l @r15+,r2\n\
+	" CFI_ADJUST_CFA_OFFSET (-4) "\n\
+	.align 2\n\
+3:\n\
+	.long " GOTJMP (fixup) "\n\
+	" CFI_ENDPROC "\n\
+	.size _dl_runtime_resolve, .-_dl_runtime_resolve\n\
+	.size _dl_runtime_profile, .-_dl_runtime_profile\n\
+	.previous\n\
+");
+#endif
+
 /* Mask identifying addresses reserved for the user program,
    where the dynamic linker should not map anything.  */
 #define ELF_MACHINE_USER_ADDRESS_MASK	0x80000000UL
@@ -194,12 +459,6 @@ _dl_start_user:\n\
 	.long _rtld_local@GOT\n\
 .L_dl_fini:\n\
 	.long _dl_fini@GOT\n\
-	.type __fpscr_values,@object\n\
-	.global __fpscr_values\n\
-__fpscr_values:\n\
-	.long   0\n\
-	.long   0x80000\n\
-	.weak __fpscr_values\n\
 .previous\n\
 ");
 
@@ -251,12 +510,9 @@ elf_machine_plt_value (struct link_map *map, const Elf32_Rela *reloc,
   return value + reloc->r_addend;
 }
 
-#define ARCH_LA_PLTENTER sh_gnu_pltenter
-#define ARCH_LA_PLTEXIT sh_gnu_pltexit
-
 #endif /* !dl_machine_h */
 
-#ifdef RESOLVE_MAP
+#ifdef RESOLVE
 
 /* SH never uses Elf32_Rel relocations.	 */
 #define ELF_MACHINE_NO_REL 1
@@ -323,9 +579,18 @@ elf_machine_rela (struct link_map *map, const Elf32_Rela *reloc,
   else
     {
       const Elf32_Sym *const refsym = sym;
+#if defined USE_TLS && !defined RTLD_BOOTSTRAP
       struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type);
 
-      value = sym_map == NULL ? 0 : sym_map->l_addr + sym->st_value;
+      value = sym == NULL ? 0 : sym_map->l_addr + sym->st_value;
+#else
+
+      value = RESOLVE (&sym, version, r_type);
+# ifndef RTLD_BOOTSTRAP
+      if (sym != NULL)
+# endif
+	value += sym->st_value;
+#endif
       value += reloc->r_addend;
 
       switch (r_type)
@@ -471,4 +736,4 @@ elf_machine_lazy_rel (struct link_map *map,
     _dl_reloc_bad_type (map, ELF32_R_TYPE (reloc->r_info), 1);
 }
 
-#endif /* RESOLVE_MAP */
+#endif /* RESOLVE */
diff --git a/sysdeps/sh/dl-trampoline.S b/sysdeps/sh/dl-trampoline.S
deleted file mode 100644
index 79493d50b9..0000000000
--- a/sysdeps/sh/dl-trampoline.S
+++ /dev/null
@@ -1,431 +0,0 @@
-/* PLT trampolines.  SH version.
-   Copyright (C) 2005 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#include <sysdep.h>
-
-	.text
-	.globl _dl_runtime_resolve
-	.type _dl_runtime_resolve, @function
-	cfi_startproc
-	.align 5
-_dl_runtime_resolve:
-	mov.l r2,@-r15
-	cfi_adjust_cfa_offset (4)
-	mov.l r3,@-r15
-	cfi_adjust_cfa_offset (4)
-	mov.l r4,@-r15
-	cfi_adjust_cfa_offset (4)
-	mov.l r5,@-r15
-	cfi_adjust_cfa_offset (4)
-	mov.l r6,@-r15
-	cfi_adjust_cfa_offset (4)
-	mov.l r7,@-r15
-	cfi_adjust_cfa_offset (4)
-	mov.l r12,@-r15
-	cfi_adjust_cfa_offset (4)
-	sts.l macl,@-r15
-	cfi_adjust_cfa_offset (4)
-	sts.l mach,@-r15
-	cfi_adjust_cfa_offset (4)
-	movt r3			! Save T flag.
-	mov.l r3,@-r15
-	cfi_adjust_cfa_offset (4)
-#ifdef HAVE_FPU
-	sts.l	fpscr,@-r15
-	cfi_adjust_cfa_offset (4)
-	mov	#8,r3
-	swap.w	r3,r3
-	lds	r3,fpscr
-	fmov.s	fr11,@-r15
-	cfi_adjust_cfa_offset (4)
-	fmov.s	fr10,@-r15
-	cfi_adjust_cfa_offset (4)
-	fmov.s	fr9,@-r15
-	cfi_adjust_cfa_offset (4)
-	fmov.s	fr8,@-r15
-	cfi_adjust_cfa_offset (4)
-	fmov.s	fr7,@-r15
-	cfi_adjust_cfa_offset (4)
-	fmov.s	fr6,@-r15
-	cfi_adjust_cfa_offset (4)
-	fmov.s	fr5,@-r15
-	cfi_adjust_cfa_offset (4)
-	fmov.s	fr4,@-r15
-	cfi_adjust_cfa_offset (4)
-#endif
-	sts.l pr,@-r15
-	cfi_adjust_cfa_offset (4)
-	tst r0,r0
-	bt 1f
-	mov r0,r2
-1:
-	mov r0,r4		! PLT type
-	mov r2,r5		! link map address
-#ifdef SHARED
-	mov.l 2f,r2
-	mova 2f,r0
-	add r0,r2		! Get GOT address in r2
-	mov.l 3f,r0
-	add r2,r0
-#else
-	mov.l 3f,r0
-#endif
-	jsr @r0			! Call resolver.
-	 mov r1,r6		! reloc offset
-	lds.l @r15+,pr		! Get register content back.
-	cfi_adjust_cfa_offset (-4)
-#ifdef HAVE_FPU
-	fmov.s	@r15+,fr4
-	cfi_adjust_cfa_offset (-4)
-	fmov.s	@r15+,fr5
-	cfi_adjust_cfa_offset (-4)
-	fmov.s	@r15+,fr6
-	cfi_adjust_cfa_offset (-4)
-	fmov.s	@r15+,fr7
-	cfi_adjust_cfa_offset (-4)
-	fmov.s	@r15+,fr8
-	cfi_adjust_cfa_offset (-4)
-	fmov.s	@r15+,fr9
-	cfi_adjust_cfa_offset (-4)
-	fmov.s	@r15+,fr10
-	cfi_adjust_cfa_offset (-4)
-	fmov.s	@r15+,fr11
-	cfi_adjust_cfa_offset (-4)
-	lds.l	@r15+,fpscr
-	cfi_adjust_cfa_offset (-4)
-#endif
-	mov.l @r15+,r3
-	cfi_adjust_cfa_offset (-4)
-	shal r3			! Lode T flag.
-	lds.l @r15+,mach
-	cfi_adjust_cfa_offset (-4)
-	lds.l @r15+,macl
-	cfi_adjust_cfa_offset (-4)
-	mov.l @r15+,r12
-	cfi_adjust_cfa_offset (-4)
-	mov.l @r15+,r7
-	cfi_adjust_cfa_offset (-4)
-	mov.l @r15+,r6
-	cfi_adjust_cfa_offset (-4)
-	mov.l @r15+,r5
-	cfi_adjust_cfa_offset (-4)
-	mov.l @r15+,r4
-	cfi_adjust_cfa_offset (-4)
-	mov.l @r15+,r3
-	cfi_adjust_cfa_offset (-4)
-	jmp @r0			! Jump to function address.
-	 mov.l @r15+,r2
-	cfi_adjust_cfa_offset (-4)
-	.align 2
-#ifdef SHARED
-2:	.long _GLOBAL_OFFSET_TABLE_
-3:	.long _dl_fixup@GOTOFF
-#else
-3:	.long _dl_fixup
-#endif
-	cfi_endproc
-	.size _dl_runtime_resolve, .-_dl_runtime_resolve
-
-
-	.globl _dl_runtime_profile
-	.type _dl_runtime_profile,@function
-	cfi_startproc
-	.align 5
-_dl_runtime_profile:
-	mov.l r12,@-r15
-	cfi_adjust_cfa_offset (4)
-#ifdef HAVE_FPU
-	sts.l	fpscr,@-r15
-	cfi_adjust_cfa_offset (4)
-	mov	#8,r12
-	swap.w	r12,r12
-	lds	r12,fpscr
-	fmov.s	fr11,@-r15
-	cfi_adjust_cfa_offset (4)
-	fmov.s	fr10,@-r15
-	cfi_adjust_cfa_offset (4)
-	fmov.s	fr9,@-r15
-	cfi_adjust_cfa_offset (4)
-	fmov.s	fr8,@-r15
-	cfi_adjust_cfa_offset (4)
-	fmov.s	fr7,@-r15
-	cfi_adjust_cfa_offset (4)
-	fmov.s	fr6,@-r15
-	cfi_adjust_cfa_offset (4)
-	fmov.s	fr5,@-r15
-	cfi_adjust_cfa_offset (4)
-	fmov.s	fr4,@-r15
-	cfi_adjust_cfa_offset (4)
-#else
-	add #-36,r15
-	cfi_adjust_cfa_offset (36)
-#endif
-	mov.l r7,@-r15
-	cfi_adjust_cfa_offset (4)
-	mov.l r6,@-r15
-	cfi_adjust_cfa_offset (4)
-	mov.l r5,@-r15
-	cfi_adjust_cfa_offset (4)
-	mov.l r4,@-r15
-	cfi_adjust_cfa_offset (4)
-	mov.l r3,@-r15
-	cfi_adjust_cfa_offset (4)
-	mov.l r2,@-r15
-	cfi_adjust_cfa_offset (4)
-	sts.l macl,@-r15
-	cfi_adjust_cfa_offset (4)
-	sts.l mach,@-r15
-	cfi_adjust_cfa_offset (4)
-	movt r3			! Save T flag.
-	mov.l r3,@-r15
-	cfi_adjust_cfa_offset (4)
-	sts.l pr,@-r15
-	cfi_adjust_cfa_offset (4)
-	tst r0,r0
-	bt 1f
-	mov r0,r2
-1:
-	mov r0,r4		! PLT type
-	mov r2,r5		! link map address
-	sts pr,r7		! return address
-	add #-24,r15
-	cfi_adjust_cfa_offset (24)
-	mov #40,r0
-	add r15,r0
-	mov.l r0,@r15		! Address of the register structure
-	mov #-1,r0
-	mov.l r0,@(8,r15)
-	mov #8,r0
-	add r15,r0
-	mov.l r0,@(4,r15)
-	mov.l r5,@(12,r15)
-	mov.l r1,@(16,r15)
-#ifdef SHARED
-	mov.l 2f,r12
-	mova 2f,r0
-	add r0,r12		! Get GOT address in r12
-	mov.l 3f,r0
-	add r12,r0
-#else
-	mov.l 3f,r0
-#endif
-	jsr @r0			! Call resolver.
-	 mov r1,r6		! reloc offset
-	mov.l @(8,r15),r1
-	cmp/pz r1
-	bt 4f
-	add #24,r15
-	cfi_adjust_cfa_offset (-24)
-	lds.l @r15+,pr		! Get register content back.
-	cfi_adjust_cfa_offset (-4)
-	mov.l @r15+,r3
-	cfi_adjust_cfa_offset (-4)
-	shal r3			! Lode T flag.
-	lds.l @r15+,mach
-	cfi_adjust_cfa_offset (-4)
-	lds.l @r15+,macl
-	cfi_adjust_cfa_offset (-4)
-	mov.l @r15+,r2
-	cfi_adjust_cfa_offset (-4)
-	mov.l @r15+,r3
-	cfi_adjust_cfa_offset (-4)
-	mov.l @r15+,r4
-	cfi_adjust_cfa_offset (-4)
-	mov.l @r15+,r5
-	cfi_adjust_cfa_offset (-4)
-	mov.l @r15+,r6
-	cfi_adjust_cfa_offset (-4)
-	mov.l @r15+,r7
-	cfi_adjust_cfa_offset (-4)
-#ifdef HAVE_FPU
-	fmov.s	@r15+,fr4
-	cfi_adjust_cfa_offset (-4)
-	fmov.s	@r15+,fr5
-	cfi_adjust_cfa_offset (-4)
-	fmov.s	@r15+,fr6
-	cfi_adjust_cfa_offset (-4)
-	fmov.s	@r15+,fr7
-	cfi_adjust_cfa_offset (-4)
-	fmov.s	@r15+,fr8
-	cfi_adjust_cfa_offset (-4)
-	fmov.s	@r15+,fr9
-	cfi_adjust_cfa_offset (-4)
-	fmov.s	@r15+,fr10
-	cfi_adjust_cfa_offset (-4)
-	fmov.s	@r15+,fr11
-	cfi_adjust_cfa_offset (-4)
-	lds.l	@r15+,fpscr
-	cfi_adjust_cfa_offset (-4)
-#else
-	add #36,r15
-	cfi_adjust_cfa_offset (-36)
-#endif
-	jmp @r0			! Jump to function address.
-	 mov.l @r15+,r12
-	cfi_adjust_cfa_offset (-4)
-	.align 2
-#ifdef SHARED
-2:	.long _GLOBAL_OFFSET_TABLE_
-3:	.long _dl_profile_fixup@GOTOFF
-#else
-3:	.long _dl_profile_fixup
-#endif
-
-	cfi_adjust_cfa_offset (104)
-4:
-	mov #104,r3
-	add r15,r3		! Original stack
-	mov.l r8,@(20,r15)
-	cfi_rel_offset (r8, 20)
-	mov r15,r8
-	sub r1,r15
-	shlr2 r15
-	shll2 r15
-	mov r15,r4
-	shlr2 r1
-	tst r1,r1
-5:	
-	bt/s 6f
-	 dt r1
-	mov.l @r3+,r2
-	mov.l r2,@r4
-	bra 5b
-	 add #4,r4
-6:
-	mov.l @r8,r12
-	mov.l @r12+,r2
-	mov.l @r12+,r3
-	mov.l @r12+,r4
-	mov.l @r12+,r5
-	mov.l @r12+,r6
-	mov.l @r12+,r7
-#ifdef HAVE_FPU
-	fmov.s	@r12+,fr4
-	fmov.s	@r12+,fr5
-	fmov.s	@r12+,fr6
-	fmov.s	@r12+,fr7
-	fmov.s	@r12+,fr8
-	fmov.s	@r12+,fr9
-	fmov.s	@r12+,fr10
-	fmov.s	@r12+,fr11
-	lds.l	@r12+,fpscr
-#else
-	add #36,r2
-#endif
-	jsr @r0			! Call function.
-	 nop
-	mov r8,r15
-	mov.l @(12,r15),r4	! link map address
-	mov.l @(16,r15),r5	! reloc offset
-	mov.l @r15,r6		! input registers
-#ifdef HAVE_FPU
-	mov #16,r8
-	add r15,r8
-	fmov.s fr1,@-r8
-	fmov.s fr0,@-r8
-#else
-	mov #8,r8
-	add r15,r8
-#endif
-	mov.l r1,@-r8
-	mov.l r0,@-r8
-	mov.l @(20,r15),r8
-	cfi_restore (r8)
-#ifdef SHARED
-	mov.l 7f,r12
-	mova 7f,r0
-	add r0,r12		! Get GOT address in r12
-	mov.l 8f,r0
-	add r12,r0
-#else
-	mov.l 8f,r0
-#endif
-	jsr @r0
-	 mov r15,r7		! output registers
-	mov.l @r15+,r0
-	cfi_adjust_cfa_offset (-4)
-	mov.l @r15+,r1
-	cfi_adjust_cfa_offset (-4)
-#ifdef HAVE_FPU
-	fmov.s @r15+,fr0
-	cfi_adjust_cfa_offset (-4)
-	fmov.s @r15+,fr1
-	cfi_adjust_cfa_offset (-4)
-	add #8,r15
-	cfi_adjust_cfa_offset (-8)
-#else
-	add #16,r15
-	cfi_adjust_cfa_offset (-16)
-#endif
-	lds.l @r15+,pr		! Get register content back.
-	cfi_adjust_cfa_offset (-4)
-	mov.l @r15+,r3
-	cfi_adjust_cfa_offset (-4)
-	shal r3			! Lode T flag.
-	lds.l @r15+,mach
-	cfi_adjust_cfa_offset (-4)
-	lds.l @r15+,macl
-	cfi_adjust_cfa_offset (-4)
-	mov.l @r15+,r2
-	cfi_adjust_cfa_offset (-4)
-	mov.l @r15+,r3
-	cfi_adjust_cfa_offset (-4)
-	mov.l @r15+,r4
-	cfi_adjust_cfa_offset (-4)
-	mov.l @r15+,r5
-	cfi_adjust_cfa_offset (-4)
-	mov.l @r15+,r6
-	cfi_adjust_cfa_offset (-4)
-	mov.l @r15+,r7
-	cfi_adjust_cfa_offset (-4)
-#ifdef HAVE_FPU
-	fmov.s	@r15+,fr4
-	cfi_adjust_cfa_offset (-4)
-	fmov.s	@r15+,fr5
-	cfi_adjust_cfa_offset (-4)
-	fmov.s	@r15+,fr6
-	cfi_adjust_cfa_offset (-4)
-	fmov.s	@r15+,fr7
-	cfi_adjust_cfa_offset (-4)
-	fmov.s	@r15+,fr8
-	cfi_adjust_cfa_offset (-4)
-	fmov.s	@r15+,fr9
-	cfi_adjust_cfa_offset (-4)
-	fmov.s	@r15+,fr10
-	cfi_adjust_cfa_offset (-4)
-	fmov.s	@r15+,fr11
-	cfi_adjust_cfa_offset (-4)
-	lds.l	@r15+,fpscr
-	cfi_adjust_cfa_offset (-4)
-#else
-	add #36,r15
-	cfi_adjust_cfa_offset (-36)
-#endif
-	rts			! Jump to function address.
-	 mov.l @r15+,r12
-	cfi_adjust_cfa_offset (-4)
-	cfi_endproc
-	.align 2
-#ifdef SHARED
-7:	.long _GLOBAL_OFFSET_TABLE_
-8:	.long _dl_call_pltexit@GOTOFF
-#else
-8:	.long _dl_call_pltexit
-#endif
-	.size _dl_runtime_profile, .-_dl_runtime_profile
diff --git a/sysdeps/sh/sh4/Versions b/sysdeps/sh/sh4/Versions
deleted file mode 100644
index 8cc1c7b7d4..0000000000
--- a/sysdeps/sh/sh4/Versions
+++ /dev/null
@@ -1,5 +0,0 @@
-ld {
-  GLIBC_PRIVATE {
-    __fpscr_values;
-  }
-}
diff --git a/sysdeps/sh/sh4/dl-machine.h b/sysdeps/sh/sh4/dl-machine.h
new file mode 100644
index 0000000000..ec9f6f7b45
--- /dev/null
+++ b/sysdeps/sh/sh4/dl-machine.h
@@ -0,0 +1,2 @@
+#define HAVE_FPU
+#include <sysdeps/sh/dl-machine.h>
diff --git a/sysdeps/sh/sh4/dl-trampoline.S b/sysdeps/sh/sh4/dl-trampoline.S
deleted file mode 100644
index f9529851a1..0000000000
--- a/sysdeps/sh/sh4/dl-trampoline.S
+++ /dev/null
@@ -1,2 +0,0 @@
-#define HAVE_FPU
-#include <sysdeps/sh/dl-trampoline.S>
diff --git a/sysdeps/unix/alarm.c b/sysdeps/unix/alarm.c
index 84ab5a52fe..ae77782c54 100644
--- a/sysdeps/unix/alarm.c
+++ b/sysdeps/unix/alarm.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1991,1992,1994,1997,2002,2004 Free Software Foundation, Inc.
+/* Copyright (C) 1991,92,94,97,2002 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -41,10 +41,7 @@ alarm (seconds)
     return 0;
 
   retval = old.it_value.tv_sec;
-  /* Round to the nearest second, but never report zero seconds when
-     the alarm is still set.  */
-  if (old.it_value.tv_usec >= 500000
-      || (retval == 0 && old.it_value.tv_usec > 0))
+  if (old.it_value.tv_usec)
     ++retval;
   return retval;
 }
diff --git a/sysdeps/unix/i386/sysdep.S b/sysdeps/unix/i386/sysdep.S
index 3bc872add8..6056cbeef2 100644
--- a/sysdeps/unix/i386/sysdep.S
+++ b/sysdeps/unix/i386/sysdep.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 1991,1992,1993,1994,1995,1996,1997,2000,2002,2004,2005
+/* Copyright (C) 1991, 92, 93, 94, 95, 96, 97, 2000, 2002, 2004
 	Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
@@ -47,12 +47,7 @@ notb:
 #endif
 #ifndef	PIC
 # if USE___THREAD
-#  ifndef NO_TLS_DIRECT_SEG_REFS
 	movl %eax, %gs:C_SYMBOL_NAME(errno@NTPOFF)
-#  else
-	movl %gs:0, %ecx
-	movl %eax, C_SYMBOL_NAME(errno@NTPOFF)(%ecx)
-#  endif
 # elif !defined _LIBC_REENTRANT
 	movl %eax, C_SYMBOL_NAME(errno)
 # else
@@ -71,12 +66,7 @@ notb:
 
 	/* Pop %ebx value saved before jumping here.  */
 	popl %ebx
-#  ifndef NO_TLS_DIRECT_SEG_REFS
-	addl %gs:0, %ecx
-	movl %eax, (%ecx)
-#  else
 	movl %eax, %gs:0(%ecx)
-#  endif
 # elif RTLD_PRIVATE_ERRNO
 	movl %eax, C_SYMBOL_NAME(rtld_errno@GOTOFF)(%ebx)
 
diff --git a/sysdeps/unix/rewinddir.c b/sysdeps/unix/rewinddir.c
index 051e93595e..9f3724fc6a 100644
--- a/sysdeps/unix/rewinddir.c
+++ b/sysdeps/unix/rewinddir.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1991, 1995-1998, 2005 Free Software Foundation, Inc.
+/* Copyright (C) 1991, 1995, 1996, 1997, 1998 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -30,7 +30,6 @@ rewinddir (dirp)
 {
   __libc_lock_lock (dirp->lock);
   (void) __lseek (dirp->fd, (off_t) 0, SEEK_SET);
-  dirp->filepos = 0;
   dirp->offset = 0;
   dirp->size = 0;
   __libc_lock_unlock (dirp->lock);
diff --git a/sysdeps/unix/sysv/linux/alpha/bits/termios.h b/sysdeps/unix/sysv/linux/alpha/bits/termios.h
index 966ccf94da..f26e84c9f6 100644
--- a/sysdeps/unix/sysv/linux/alpha/bits/termios.h
+++ b/sysdeps/unix/sysv/linux/alpha/bits/termios.h
@@ -1,5 +1,5 @@
 /* termios type and macro definitions.  Linux version.
-   Copyright (C) 1993, 1994, 1995, 1996, 1997, 1999, 2003, 2005
+   Copyright (C) 1993,1994,1995,1996,1997,1999,2003
 	Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
@@ -91,42 +91,34 @@ struct termios
 
 #define OFILL	00000100
 #define OFDEL	00000200
-#if defined __USE_MISC || defined __USE_XOPEN
-# define NLDLY	00001400
-# define   NL0	00000000
-# define   NL1	00000400
-# define   NL2	00001000
-# define   NL3	00001400
-# define TABDLY	00006000
-# define   TAB0	00000000
-# define   TAB1	00002000
-# define   TAB2	00004000
-# define   TAB3	00006000
-# define CRDLY	00030000
-# define   CR0	00000000
-# define   CR1	00010000
-# define   CR2	00020000
-# define   CR3	00030000
-# define FFDLY	00040000
-# define   FF0	00000000
-# define   FF1	00040000
-# define BSDLY	00100000
-# define   BS0	00000000
-# define   BS1	00100000
-#endif
-
+#define NLDLY	00001400
+#define   NL0	00000000
+#define   NL1	00000400
+#define   NL2	00001000
+#define   NL3	00001400
+#define TABDLY	00006000
+#define   TAB0	00000000
+#define   TAB1	00002000
+#define   TAB2	00004000
+#define   TAB3	00006000
+#define CRDLY	00030000
+#define   CR0	00000000
+#define   CR1	00010000
+#define   CR2	00020000
+#define   CR3	00030000
+#define FFDLY	00040000
+#define   FF0	00000000
+#define   FF1	00040000
+#define BSDLY	00100000
+#define   BS0	00000000
+#define   BS1	00100000
 #define VTDLY	00200000
 #define   VT0	00000000
 #define   VT1	00200000
-
-#ifdef __USE_MISC
-# define XTABS	01000000 /* Hmm.. Linux/i386 considers this part of TABDLY.. */
-#endif
+#define XTABS	01000000 /* Hmm.. Linux/i386 considers this part of TABDLY.. */
 
 /* c_cflag bit meaning */
-#ifdef __USE_MISC
-# define CBAUD	0000037
-#endif
+#define CBAUD	0000037
 #define  B0	0000000		/* hang up */
 #define  B50	0000001
 #define  B75	0000002
@@ -143,11 +135,9 @@ struct termios
 #define  B9600	0000015
 #define  B19200	0000016
 #define  B38400	0000017
-#ifdef __USE_MISC
-# define EXTA B19200
-# define EXTB B38400
-# define CBAUDEX 0000000
-#endif
+#define EXTA B19200
+#define EXTB B38400
+#define CBAUDEX 0000000
 #define  B57600   00020
 #define  B115200  00021
 #define  B230400  00022
@@ -179,30 +169,23 @@ struct termios
 #define HUPCL	00040000
 
 #define CLOCAL	00100000
-#ifdef __USE_MISC
-# define CMSPAR	  010000000000		/* mark or space (stick) parity */
-# define CRTSCTS  020000000000		/* flow control */
-#endif
+#define CRTSCTS	  020000000000		/* flow control */
 
 /* c_lflag bits */
 #define ISIG	0x00000080
 #define ICANON	0x00000100
-#if defined __USE_MISC || defined __USE_XOPEN
-# define XCASE	0x00004000
-#endif
+#define XCASE	0x00004000
 #define ECHO	0x00000008
 #define ECHOE	0x00000002
 #define ECHOK	0x00000004
 #define ECHONL	0x00000010
 #define NOFLSH	0x80000000
 #define TOSTOP	0x00400000
-#ifdef __USE_MISC
-# define ECHOCTL	0x00000040
-# define ECHOPRT	0x00000020
-# define ECHOKE	0x00000001
-# define FLUSHO	0x00800000
-# define PENDIN	0x20000000
-#endif
+#define ECHOCTL	0x00000040
+#define ECHOPRT	0x00000020
+#define ECHOKE	0x00000001
+#define FLUSHO	0x00800000
+#define PENDIN	0x20000000
 #define IEXTEN	0x00000400
 
 /* Values for the ACTION argument to `tcflow'.  */
diff --git a/sysdeps/unix/sysv/linux/alpha/oldglob.c b/sysdeps/unix/sysv/linux/alpha/oldglob.c
index 6d9b79f2c3..9d39176f6b 100644
--- a/sysdeps/unix/sysv/linux/alpha/oldglob.c
+++ b/sysdeps/unix/sysv/linux/alpha/oldglob.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1998, 2000, 2004, 2005 Free Software Foundation, Inc.
+/* Copyright (C) 1998, 2000, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -91,7 +91,6 @@ __old_globfree (old_glob_t *pglob)
   /* We only need these two symbols.  */
   correct.gl_pathc = pglob->gl_pathc;
   correct.gl_pathv = pglob->gl_pathv;
-  correct.gl_offs = pglob->gl_offs;
 
   globfree (&correct);
 }
diff --git a/sysdeps/unix/sysv/linux/bits/termios.h b/sysdeps/unix/sysv/linux/bits/termios.h
index c71e4ad1fe..1b71f60faf 100644
--- a/sysdeps/unix/sysv/linux/bits/termios.h
+++ b/sysdeps/unix/sysv/linux/bits/termios.h
@@ -1,5 +1,5 @@
 /* termios type and macro definitions.  Linux version.
-   Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2003, 2005
+   Copyright (C) 1993,1994,1995,1996,1997,1998,1999,2003
 	Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
@@ -172,7 +172,6 @@ struct termios
 #define __MAX_BAUD B4000000
 #ifdef __USE_MISC
 # define CIBAUD	  002003600000		/* input baud rate (not used) */
-# define CMSPAR   010000000000		/* mark or space (stick) parity */
 # define CRTSCTS  020000000000		/* flow control */
 #endif
 
diff --git a/sysdeps/unix/sysv/linux/bits/waitflags.h b/sysdeps/unix/sysv/linux/bits/waitflags.h
index 464cedb1fc..e3f80f6814 100644
--- a/sysdeps/unix/sysv/linux/bits/waitflags.h
+++ b/sysdeps/unix/sysv/linux/bits/waitflags.h
@@ -1,5 +1,5 @@
 /* Definitions of flag bits for `waitpid' et al.
-   Copyright (C) 1992,1996,1997,2000,2004,2005 Free Software Foundation, Inc.
+   Copyright (C) 1992, 1996, 1997, 2000, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -32,7 +32,5 @@
 #define WCONTINUED	8	/* Report continued child.  */
 #define WNOWAIT		0x01000000 /* Don't reap, just poll status.  */
 
-#define __WNOTHREAD     0x20000000 /* Don't wait on children of other threads
-				      in this group */
 #define __WALL		0x40000000 /* Wait for any child.  */
 #define __WCLONE	0x80000000 /* Wait for cloned process.  */
diff --git a/sysdeps/unix/sysv/linux/dl-execstack.c b/sysdeps/unix/sysv/linux/dl-execstack.c
index b38d0c4238..6ef9679045 100644
--- a/sysdeps/unix/sysv/linux/dl-execstack.c
+++ b/sysdeps/unix/sysv/linux/dl-execstack.c
@@ -1,5 +1,5 @@
 /* Stack executability handling for GNU dynamic linker.  Linux version.
-   Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
+   Copyright (C) 2003, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -24,7 +24,6 @@
 #include <stdbool.h>
 #include <stackinfo.h>
 #include <caller.h>
-#include <sysdep.h>
 
 #include "kernel-features.h"
 
@@ -39,7 +38,6 @@ _dl_make_stack_executable (void **stack_endp)
   /* This gives us the highest/lowest page that needs to be changed.  */
   uintptr_t page = ((uintptr_t) *stack_endp
 		    & -(intptr_t) GLRO(dl_pagesize));
-  int result = 0;
 
   /* Challenge the caller.  */
   if (__builtin_expect (__check_caller (RETURN_ADDRESS (0),
@@ -62,10 +60,7 @@ _dl_make_stack_executable (void **stack_endp)
 	no_growsupdown = true;
       else
 # endif
-	{
-	  result = errno;
-	  goto out;
-	}
+	return errno;
     }
 #endif
 
@@ -90,10 +85,7 @@ _dl_make_stack_executable (void **stack_endp)
       else
 	{
 	  if (errno != ENOMEM)	/* Unexpected failure mode.  */
-	    {
-	      result = errno;
-	      goto out;
-	    }
+	    return errno;
 
 	  if (size == GLRO(dl_pagesize))
 	    /* We just tried to mprotect the top hole page and failed.
@@ -116,10 +108,7 @@ _dl_make_stack_executable (void **stack_endp)
       else
 	{
 	  if (errno != ENOMEM)	/* Unexpected failure mode.  */
-	    {
-	      result = errno;
-	      goto out;
-	    }
+	    return errno;
 
 	  if (size == GLRO(dl_pagesize))
 	    /* We just tried to mprotect the lowest hole page and failed.
@@ -144,11 +133,6 @@ _dl_make_stack_executable (void **stack_endp)
   /* Remember that we changed the permission.  */
   GL(dl_stack_flags) |= PF_X;
 
- out:
-#ifdef check_consistency
-  check_consistency ();
-#endif
-
-  return result;
+  return 0;
 }
 rtld_hidden_def (_dl_make_stack_executable)
diff --git a/sysdeps/unix/sysv/linux/futimes.c b/sysdeps/unix/sysv/linux/futimes.c
index 0c4be2b67f..f43f568ec1 100644
--- a/sysdeps/unix/sysv/linux/futimes.c
+++ b/sysdeps/unix/sysv/linux/futimes.c
@@ -1,5 +1,5 @@
 /* futimes -- change access and modification times of open file.  Linux version.
-   Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
+   Copyright (C) 2002, 2003 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -23,7 +23,6 @@
 #include <utime.h>
 #include <sys/time.h>
 #include <stdio-common/_itoa.h>
-#include <fcntl.h>
 
 #include "kernel-features.h"
 
@@ -41,58 +40,31 @@ __futimes (int fd, const struct timeval tvp[2])
   char *cp = _itoa_word ((unsigned int) fd, fname + sizeof (fname) - 1, 10, 0);
   cp = memcpy (cp - sizeof (selffd) + 1, selffd, sizeof (selffd) - 1);
 
-  int result;
 #ifdef __NR_utimes
-  result = INLINE_SYSCALL (utimes, 2, cp, tvp);
+  int result = INLINE_SYSCALL (utimes, 2, cp, tvp);
 # ifndef __ASSUME_UTIMES
-  if (result == -1 && errno == ENOSYS)
+  if (result != -1 || errno != ENOSYS)
 # endif
+    return result;
 #endif
-    {
-      /* The utimes() syscall does not exist or is not available in the
-	 used kernel.  Use utime().  For this we have to convert to the
-	 data format utime() expects.  */
-#ifndef __ASSUME_UTIMES
-      struct utimbuf buf;
-      struct utimbuf *times;
 
-      if (tvp != NULL)
-	{
-	  times = &buf;
-	  buf.actime = tvp[0].tv_sec + (tvp[0].tv_usec + 500000) / 1000000;
-	  buf.modtime = tvp[1].tv_sec + (tvp[1].tv_usec + 500000) / 1000000;
-	}
-      else
-	times = NULL;
+  /* The utimes() syscall does not exist or is not available in the
+     used kernel.  Use utime().  For this we have to convert to the
+     data format utime() expects.  */
+#ifndef __ASSUME_UTIMES
+  struct utimbuf buf;
+  struct utimbuf *times;
 
-      result = INLINE_SYSCALL (utime, 2, cp, times);
-#endif
+  if (tvp != NULL)
+    {
+      times = &buf;
+      buf.actime = tvp[0].tv_sec + (tvp[0].tv_usec + 500000) / 1000000;
+      buf.modtime = tvp[1].tv_sec + (tvp[1].tv_usec + 500000) / 1000000;
     }
+  else
+    times = NULL;
 
-  if (result == -1)
-    /* Check for errors that result from failing to find /proc.
-       This means we can't do futimes at all, so return ENOSYS
-       rather than some confusing error.  */
-    switch (errno)
-      {
-      case EACCES:
-	if (tvp == NULL)  /* Could be a path problem or a file problem.  */
-	  break;
-	/*FALLTHROUGH*/
-      case ELOOP:
-      case ENAMETOOLONG:
-      case ENOTDIR:
-	__set_errno (ENOSYS);
-	break;
-
-      case ENOENT:
-	/* Validate the file descriptor by letting fcntl set errno to
-	   EBADF if it's bogus.  Otherwise it's a /proc issue.  */
-	if (INLINE_SYSCALL (fcntl, 3, fd, F_GETFD, 0) != -1)
-	  __set_errno (ENOSYS);
-	break;
-      }
-
-  return result;
+  return INLINE_SYSCALL (utime, 2, cp, times);
+#endif
 }
 weak_alias (__futimes, futimes)
diff --git a/sysdeps/unix/sysv/linux/i386/clone.S b/sysdeps/unix/sysv/linux/i386/clone.S
index c7d31f7a32..acd43dfb0b 100644
--- a/sysdeps/unix/sysv/linux/i386/clone.S
+++ b/sysdeps/unix/sysv/linux/i386/clone.S
@@ -67,7 +67,7 @@ ENTRY (BP_SYM (__clone))
 	/* Insert the argument onto the new stack.  Make sure the new
 	   thread is started with an alignment of (mod 16).  */
 	andl	$0xfffffff0, %ecx
-	subl	$28,%ecx
+	subl	$24,%ecx
 	movl	ARG(%esp),%eax		/* no negative argument counts */
 	movl	%eax,12(%ecx)
 
diff --git a/sysdeps/unix/sysv/linux/i386/sysdep.h b/sysdeps/unix/sysv/linux/i386/sysdep.h
index 6bea9d2044..af75d4c51a 100644
--- a/sysdeps/unix/sysv/linux/i386/sysdep.h
+++ b/sysdeps/unix/sysv/linux/i386/sysdep.h
@@ -1,5 +1,5 @@
-/* Copyright (C) 1992,1993,1995,1996,1997,1998,1999,2000,2002,2003,2004,2005
-   	Free Software Foundation, Inc.
+/* Copyright (C) 1992,1993,1995-2000,2002,2003,2004
+   Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper, <drepper@gnu.org>, August 1995.
 
@@ -154,17 +154,9 @@ __i686.get_pc_thunk.reg:						      \
   movl SYSCALL_ERROR_ERRNO@GOTNTPOFF(%ecx), %ecx;			      \
   xorl %edx, %edx;							      \
   subl %eax, %edx;							      \
-  SYSCALL_ERROR_HANDLER_TLS_STORE (%edx, %ecx);				      \
+  movl %edx, %gs:0(%ecx);						      \
   orl $-1, %eax;							      \
   jmp L(pseudo_end);
-#   ifndef NO_TLS_DIRECT_SEG_REFS
-#    define SYSCALL_ERROR_HANDLER_TLS_STORE(src, destoff)		      \
-  movl src, %gs:(destoff)
-#   else
-#    define SYSCALL_ERROR_HANDLER_TLS_STORE(src, destoff)		      \
-  addl %gs:0, destoff;							      \
-  movl src, (destoff)
-#   endif
 #  else
 #   define SYSCALL_ERROR_HANDLER					      \
 0:pushl %ebx;								      \
@@ -540,29 +532,6 @@ asm (".L__X'%ebx = 1\n\t"
 # define EXTRAVAR_5
 #endif
 
-/* Consistency check for position-independent code.  */
-#ifdef __PIC__
-# define check_consistency()						      \
-  ({ int __res;								      \
-     __asm__ __volatile__						      \
-       ("call __i686.get_pc_thunk.cx;"					      \
-	"addl $_GLOBAL_OFFSET_TABLE_, %%ecx;"				      \
-	"subl %%ebx, %%ecx;"						      \
-	"je 1f;"							      \
-	"ud2;"								      \
-	"1:\n"								      \
-	".section .gnu.linkonce.t.__i686.get_pc_thunk.cx,\"ax\",@progbits;"   \
-	".globl __i686.get_pc_thunk.cx;"				      \
-	".hidden __i686.get_pc_thunk.cx;"				      \
-	".type __i686.get_pc_thunk.cx,@function;"			      \
-	"__i686.get_pc_thunk.cx:"					      \
-	"movl (%%esp), %%ecx;"						      \
-	"ret;"								      \
-	".previous"							      \
-	: "=c" (__res));						      \
-     __res; })
-#endif
-
 #endif	/* __ASSEMBLER__ */
 
 #endif /* linux/i386/sysdep.h */
diff --git a/sysdeps/unix/sysv/linux/init-first.c b/sysdeps/unix/sysv/linux/init-first.c
index b061a848c6..f00271255d 100644
--- a/sysdeps/unix/sysv/linux/init-first.c
+++ b/sysdeps/unix/sysv/linux/init-first.c
@@ -1,5 +1,5 @@
 /* Initialization code run first thing by the ELF startup code.  Linux version.
-   Copyright (C) 1995-2004, 2005 Free Software Foundation, Inc.
+   Copyright (C) 1995-1999,2000,01,02,03,2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -29,6 +29,9 @@
 
 #include <ldsodefs.h>
 
+/* The function is called from assembly stubs the compiler can't see.  */
+static void init (int, char **, char **) __attribute__ ((used));
+
 /* Set nonzero if we have to be prepared for more then one libc being
    used in the process.  Safe assumption if initializer never runs.  */
 int __libc_multiple_libcs attribute_hidden = 1;
@@ -39,18 +42,9 @@ int __libc_argc attribute_hidden;
 char **__libc_argv attribute_hidden;
 
 
-void
-__libc_init_first (int argc, char **argv, char **envp)
-{
-#ifdef SHARED
-  /* For DSOs we do not need __libc_init_first but instead _init.  */
-}
-
-void
-attribute_hidden
-_init (int argc, char **argv, char **envp)
+static void
+init (int argc, char **argv, char **envp)
 {
-#endif
 #ifdef USE_NONOPTION_FLAGS
   extern void __getopt_clean_environment (char **);
 #endif
@@ -94,6 +88,27 @@ _init (int argc, char **argv, char **envp)
 #endif
 }
 
+#ifdef SHARED
+
+strong_alias (init, _init);
+
+extern void __libc_init_first (void);
+
+void
+__libc_init_first (void)
+{
+}
+
+#else
+extern void __libc_init_first (int argc, char **argv, char **envp);
+
+void
+__libc_init_first (int argc, char **argv, char **envp)
+{
+  init (argc, argv, envp);
+}
+#endif
+
 
 /* This function is defined here so that if this file ever gets into
    ld.so we will get a link error.  Having this file silently included
diff --git a/sysdeps/unix/sysv/linux/kernel-features.h b/sysdeps/unix/sysv/linux/kernel-features.h
index 83ebe0cf74..f499a712c4 100644
--- a/sysdeps/unix/sysv/linux/kernel-features.h
+++ b/sysdeps/unix/sysv/linux/kernel-features.h
@@ -1,6 +1,6 @@
 /* Set flags signalling availability of kernel features based on given
    kernel version number.
-   Copyright (C) 1999-2003, 2004, 2005 Free Software Foundation, Inc.
+   Copyright (C) 1999-2003, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -79,11 +79,6 @@
 # define __ASSUME_SIOCGIFNAME		1
 #endif
 
-/* MSG_NOSIGNAL was at least available with Linux 2.2.0.  */
-#if __LINUX_KERNEL_VERSION >= 131584
-# define __ASSUME_MSG_NOSIGNAL		1
-#endif
-
 /* On x86 another `getrlimit' syscall was added in 2.3.25.  */
 #if __LINUX_KERNEL_VERSION >= 131865 && defined __i386__
 # define __ASSUME_NEW_GETRLIMIT_SYSCALL	1
diff --git a/sysdeps/unix/sysv/linux/powerpc/bits/termios.h b/sysdeps/unix/sysv/linux/powerpc/bits/termios.h
index 7aac02dc55..8a420cb69c 100644
--- a/sysdeps/unix/sysv/linux/powerpc/bits/termios.h
+++ b/sysdeps/unix/sysv/linux/powerpc/bits/termios.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 1997,1999,2001,2003,2004,2005 Free Software Foundation, Inc.
+/* Copyright (C) 1997,1999,2001,2003,2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -92,41 +92,34 @@ struct termios {
 
 #define OFILL	00000100
 #define OFDEL	00000200
-#if defined __USE_MISC || defined __USE_XOPEN
-# define NLDLY	00001400
-# define   NL0	00000000
-# define   NL1	00000400
-# define   NL2	00001000
-# define   NL3	00001400
-# define TABDLY	00006000
-# define   TAB0	00000000
-# define   TAB1	00002000
-# define   TAB2	00004000
-# define   TAB3	00006000
-# define CRDLY	00030000
-# define   CR0	00000000
-# define   CR1	00010000
-# define   CR2	00020000
-# define   CR3	00030000
-# define FFDLY	00040000
-# define   FF0	00000000
-# define   FF1	00040000
-# define BSDLY	00100000
-# define   BS0	00000000
-# define   BS1	00100000
-#endif
+#define NLDLY	00001400
+#define   NL0	00000000
+#define   NL1	00000400
+#define   NL2	00001000
+#define   NL3	00001400
+#define TABDLY	00006000
+#define   TAB0	00000000
+#define   TAB1	00002000
+#define   TAB2	00004000
+#define   TAB3	00006000
+#define XTABS	00006000	/* Required by POSIX to be == TAB3.  */
+#define CRDLY	00030000
+#define   CR0	00000000
+#define   CR1	00010000
+#define   CR2	00020000
+#define   CR3	00030000
+#define FFDLY	00040000
+#define   FF0	00000000
+#define   FF1	00040000
+#define BSDLY	00100000
+#define   BS0	00000000
+#define   BS1	00100000
 #define VTDLY	00200000
 #define   VT0	00000000
 #define   VT1	00200000
 
-#ifdef __USE_MISC
-# define XTABS	00006000
-#endif
-
 /* c_cflag bit meaning */
-#ifdef __USE_MISC
-# define CBAUD	0000377
-#endif
+#define CBAUD	0000377
 #define  B0	0000000		/* hang up */
 #define  B50	0000001
 #define  B75	0000002
@@ -143,11 +136,9 @@ struct termios {
 #define  B9600	0000015
 #define  B19200	0000016
 #define  B38400	0000017
-#ifdef __USE_MISC
-# define EXTA B19200
-# define EXTB B38400
-# define CBAUDEX 0000020
-#endif
+#define EXTA B19200
+#define EXTB B38400
+#define CBAUDEX 0000020
 #define  B57600   00020
 #define  B115200  00021
 #define  B230400  00022
@@ -178,30 +169,23 @@ struct termios {
 #define HUPCL	00040000
 
 #define CLOCAL	00100000
-#ifdef __USE_MISC
-# define CMSPAR   010000000000		/* mark or space (stick) parity */
-# define CRTSCTS  020000000000		/* flow control */
-#endif
+#define CRTSCTS	  020000000000		/* flow control */
 
 /* c_lflag bits */
 #define ISIG	0x00000080
 #define ICANON	0x00000100
-#if defined __USE_MISC || defined __USE_XOPEN
-# define XCASE	0x00004000
-#endif
+#define XCASE	0x00004000
 #define ECHO	0x00000008
 #define ECHOE	0x00000002
 #define ECHOK	0x00000004
 #define ECHONL	0x00000010
 #define NOFLSH	0x80000000
 #define TOSTOP	0x00400000
-#ifdef __USE_MISC
-# define ECHOCTL	0x00000040
-# define ECHOPRT	0x00000020
-# define ECHOKE	0x00000001
-# define FLUSHO	0x00800000
-# define PENDIN	0x20000000
-#endif
+#define ECHOCTL	0x00000040
+#define ECHOPRT	0x00000020
+#define ECHOKE	0x00000001
+#define FLUSHO	0x00800000
+#define PENDIN	0x20000000
 #define IEXTEN	0x00000400
 
 /* Values for the ACTION argument to `tcflow'.  */
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/setcontext.S b/sysdeps/unix/sysv/linux/powerpc/powerpc64/setcontext.S
index 7eaaad20a4..6514f442a6 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/setcontext.S
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/setcontext.S
@@ -1,5 +1,5 @@
 /* Switch to context.
-   Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
+   Copyright (C) 2002, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -47,7 +47,7 @@ ENTRY(__novec_setcontext)
  * of a procedure call (makecontext), so we don't need to restore
  * msr and ctr.  We don't restore r13 since it will be used as
  * the TLS pointer.  */
-  ld	  r0,(SIGCONTEXT_GP_REGS+(PT_MSR*8))(r31)
+  lwz	  r0,(SIGCONTEXT_GP_REGS+(PT_MSR*8))(r31)
   cmpdi r0,0
   bne	  L(nv_do_sigret)
 
@@ -104,7 +104,7 @@ ENTRY(__novec_setcontext)
   ld   r4,(SIGCONTEXT_GP_REGS+(PT_R4*8))(r31)
   ld   r0,(SIGCONTEXT_GP_REGS+(PT_CCR*8))(r31)
   ld   r5,(SIGCONTEXT_GP_REGS+(PT_R5*8))(r31)
-  mtcr r0
+  mfcr r0
   ld   r6,(SIGCONTEXT_GP_REGS+(PT_R6*8))(r31)
   ld   r7,(SIGCONTEXT_GP_REGS+(PT_R7*8))(r31)
   ld   r8,(SIGCONTEXT_GP_REGS+(PT_R8*8))(r31)
@@ -213,7 +213,7 @@ ENTRY(__setcontext)
  * of a procedure call (makecontext), so we don't need to restore
  * msr and ctr.  We don't restore r13 since it will be used as
  * the TLS pointer.  */
-  ld	  r0,(SIGCONTEXT_GP_REGS+(PT_MSR*8))(r31)
+  lwz	  r0,(SIGCONTEXT_GP_REGS+(PT_MSR*8))(r31)
   cmpdi r0,0
   bne	  L(do_sigret)
 
@@ -380,11 +380,11 @@ L(has_no_vec):
   ld   r4,(SIGCONTEXT_GP_REGS+(PT_R4*8))(r31)
   ld   r0,(SIGCONTEXT_GP_REGS+(PT_CCR*8))(r31)
   ld   r5,(SIGCONTEXT_GP_REGS+(PT_R5*8))(r31)
+  mfcr r0
   ld   r6,(SIGCONTEXT_GP_REGS+(PT_R6*8))(r31)
   ld   r7,(SIGCONTEXT_GP_REGS+(PT_R7*8))(r31)
   ld   r8,(SIGCONTEXT_GP_REGS+(PT_R8*8))(r31)
   ld   r9,(SIGCONTEXT_GP_REGS+(PT_R9*8))(r31)
-  mtcr r0
   ld   r10,(SIGCONTEXT_GP_REGS+(PT_R10*8))(r31)
   ld   r11,(SIGCONTEXT_GP_REGS+(PT_R11*8))(r31)
   ld   r12,(SIGCONTEXT_GP_REGS+(PT_R12*8))(r31)
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/swapcontext.S b/sysdeps/unix/sysv/linux/powerpc/powerpc64/swapcontext.S
index 772adacfe4..f99df951a2 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/swapcontext.S
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/swapcontext.S
@@ -1,5 +1,5 @@
 /* Save current context and install the given one.
-   Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
+   Copyright (C) 2002, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -150,7 +150,7 @@ ENTRY(__novec_swapcontext)
  * of a procedure call (makecontext), so we don't need to restore
  * msr and ctr.  We don't restore r13 since it will be used as
  * the TLS pointer.  */
-  ld	  r0,(SIGCONTEXT_GP_REGS+(PT_MSR*8))(r31)
+  lwz	  r0,(SIGCONTEXT_GP_REGS+(PT_MSR*8))(r31)
   cmpdi r0,0
   bne	  L(nv_do_sigret)
 
@@ -199,7 +199,7 @@ ENTRY(__novec_swapcontext)
   ld   r4,(SIGCONTEXT_GP_REGS+(PT_R4*8))(r31)
   ld   r0,(SIGCONTEXT_GP_REGS+(PT_CCR*8))(r31)
   ld   r5,(SIGCONTEXT_GP_REGS+(PT_R5*8))(r31)
-  mtcr r0
+  mfcr r0
   ld   r6,(SIGCONTEXT_GP_REGS+(PT_R6*8))(r31)
   ld   r7,(SIGCONTEXT_GP_REGS+(PT_R7*8))(r31)
   ld   r8,(SIGCONTEXT_GP_REGS+(PT_R8*8))(r31)
@@ -521,7 +521,7 @@ L(has_no_vec):
  * of a procedure call (makecontext), so we don't need to restore
  * msr and ctr.  We don't restore r13 since it will be used as
  * the TLS pointer.  */
-  ld	  r0,(SIGCONTEXT_GP_REGS+(PT_MSR*8))(r31)
+  lwz	  r0,(SIGCONTEXT_GP_REGS+(PT_MSR*8))(r31)
   cmpdi r0,0
   bne	  L(do_sigret)
 
@@ -681,11 +681,11 @@ L(has_no_vec2):
   ld   r4,(SIGCONTEXT_GP_REGS+(PT_R4*8))(r31)
   ld   r0,(SIGCONTEXT_GP_REGS+(PT_CCR*8))(r31)
   ld   r5,(SIGCONTEXT_GP_REGS+(PT_R5*8))(r31)
+  mfcr r0
   ld   r6,(SIGCONTEXT_GP_REGS+(PT_R6*8))(r31)
   ld   r7,(SIGCONTEXT_GP_REGS+(PT_R7*8))(r31)
   ld   r8,(SIGCONTEXT_GP_REGS+(PT_R8*8))(r31)
   ld   r9,(SIGCONTEXT_GP_REGS+(PT_R9*8))(r31)
-  mtcr r0
   ld   r10,(SIGCONTEXT_GP_REGS+(PT_R10*8))(r31)
   ld   r11,(SIGCONTEXT_GP_REGS+(PT_R11*8))(r31)
   ld   r12,(SIGCONTEXT_GP_REGS+(PT_R12*8))(r31)
diff --git a/sysdeps/unix/sysv/linux/sparc/bits/termios.h b/sysdeps/unix/sysv/linux/sparc/bits/termios.h
index cea13227f8..16f76dfa54 100644
--- a/sysdeps/unix/sysv/linux/sparc/bits/termios.h
+++ b/sysdeps/unix/sysv/linux/sparc/bits/termios.h
@@ -1,6 +1,5 @@
 /* termios type and macro definitions.  Linux/SPARC version.
-   Copyright (C) 1993, 1994, 1995, 1996, 1997, 2000, 2005
-       Free Software Foundation, Inc.
+   Copyright (C) 1993, 1994, 1995, 1996, 1997, 2000 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -85,41 +84,34 @@ struct termios
 #define ONLRET	0x00000020
 #define OFILL	0x00000040
 #define OFDEL	0x00000080
-#if defined __USE_MISC || defined __USE_XOPEN
-# define NLDLY	0x00000100
-# define   NL0	0x00000000
-# define   NL1	0x00000100
-# define CRDLY	0x00000600
-# define   CR0	0x00000000
-# define   CR1	0x00000200
-# define   CR2	0x00000400
-# define   CR3	0x00000600
-# define TABDLY	0x00001800
-# define   TAB0	0x00000000
-# define   TAB1	0x00000800
-# define   TAB2	0x00001000
-# define   TAB3	0x00001800
-# define BSDLY	0x00002000
-# define   BS0	0x00000000
-# define   BS1	0x00002000
-#define FFDLY	0x00008000
-#define   FF0	0x00000000
-#define   FF1	0x00008000
-#endif
+#define NLDLY	0x00000100
+#define   NL0	0x00000000
+#define   NL1	0x00000100
+#define CRDLY	0x00000600
+#define   CR0	0x00000000
+#define   CR1	0x00000200
+#define   CR2	0x00000400
+#define   CR3	0x00000600
+#define TABDLY	0x00001800
+#define   TAB0	0x00000000
+#define   TAB1	0x00000800
+#define   TAB2	0x00001000
+#define   TAB3	0x00001800
+#define   XTABS	0x00001800
+#define BSDLY	0x00002000
+#define   BS0	0x00000000
+#define   BS1	0x00002000
 #define VTDLY	0x00004000
 #define   VT0	0x00000000
 #define   VT1	0x00004000
+#define FFDLY	0x00008000
+#define   FF0	0x00000000
+#define   FF1	0x00008000
 #define PAGEOUT 0x00010000	/* SUNOS specific */
 #define WRAP    0x00020000	/* SUNOS specific */
 
-#ifdef __USE_MISC
-# define   XTABS	0x00001800
-#endif
-
 /* c_cflag bit meaning */
-#ifdef __USE_MISC
-# define CBAUD	0x0000100f
-#endif
+#define CBAUD	0x0000100f
 #define  B0	0x00000000	/* hang up */
 #define  B50	0x00000001
 #define  B75	0x00000002
@@ -136,10 +128,8 @@ struct termios
 #define  B9600	0x0000000d
 #define  B19200	0x0000000e
 #define  B38400	0x0000000f
-#ifdef __USE_MISC
-# define EXTA    B19200
-# define EXTB    B38400
-#endif
+#define EXTA    B19200
+#define EXTB    B38400
 #define  CSIZE  0x00000030
 #define   CS5	0x00000000
 #define   CS6	0x00000010
@@ -151,9 +141,7 @@ struct termios
 #define PARODD	0x00000200
 #define HUPCL	0x00000400
 #define CLOCAL	0x00000800
-#ifdef __USE_MISC
-# define CBAUDEX 0x00001000
-#endif
+#define CBAUDEX 0x00001000
 #define  B57600  0x00001001
 #define  B115200 0x00001002
 #define  B230400 0x00001003
@@ -171,32 +159,26 @@ struct termios
 #define B2000000 0x0000100f
 #define __MAX_BAUD B2000000
 
-#ifdef __USE_MISC
-# define CIBAUD	 0x100f0000	/* input baud rate (not used) */
-# define CMSPAR	 0x40000000	/* mark or space (stick) parity */
-# define CRTSCTS 0x80000000	/* flow control */
-#endif
+#define CIBAUD	0x100f0000	/* input baud rate (not used) */
+#define CMSPAR	0x40000000	/* mark or space (stick) parity */
+#define CRTSCTS	0x80000000	/* flow control */
 
 /* c_lflag bits */
 #define ISIG	0x00000001
 #define ICANON	0x00000002
-#if defined __USE_MISC || defined __USE_XOPEN
-# define XCASE	0x00000004
-#endif
+#define XCASE	0x00000004
 #define ECHO	0x00000008
 #define ECHOE	0x00000010
 #define ECHOK	0x00000020
 #define ECHONL	0x00000040
 #define NOFLSH	0x00000080
 #define TOSTOP	0x00000100
-#ifdef __USE_MISC
-# define ECHOCTL	0x00000200
-# define ECHOPRT	0x00000400
-# define ECHOKE		0x00000800
-# define DEFECHO 	0x00001000	/* SUNOS thing, what is it? */
-# define FLUSHO		0x00002000
-# define PENDIN		0x00004000
-#endif
+#define ECHOCTL	0x00000200
+#define ECHOPRT	0x00000400
+#define ECHOKE	0x00000800
+#define DEFECHO 0x00001000	/* SUNOS thing, what is it? */
+#define FLUSHO	0x00002000
+#define PENDIN	0x00004000
 #define IEXTEN	0x00008000
 
 /* modem lines */
diff --git a/sysdeps/x86_64/bits/link.h b/sysdeps/x86_64/bits/link.h
index 2890c2d88d..8ea7157156 100644
--- a/sysdeps/x86_64/bits/link.h
+++ b/sysdeps/x86_64/bits/link.h
@@ -1,117 +1,14 @@
-/* Copyright (C) 2004, 2005 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef	_LINK_H
-# error "Never include <bits/link.h> directly; use <link.h> instead."
-#endif
-
-
-#if __ELF_NATIVE_CLASS == 32
-/* Registers for entry into PLT on IA-32.  */
-typedef struct La_i86_regs
-{
-  uint32_t lr_edx;
-  uint32_t lr_ecx;
-  uint32_t lr_eax;
-  uint32_t lr_ebp;
-  uint32_t lr_esp;
-} La_i86_regs;
-
-/* Return values for calls from PLT on IA-32.  */
-typedef struct La_i86_retval
-{
-  uint32_t lrv_eax;
-  uint32_t lrv_edx;
-  long double lrv_st0;
-  long double lrv_st1;
-} La_i86_retval;
-
-
-__BEGIN_DECLS
-
-extern Elf32_Addr la_i86_gnu_pltenter (Elf32_Sym *__sym, unsigned int __ndx,
-				       uintptr_t *__refcook,
-				       uintptr_t *__defcook,
-				       La_i86_regs *__regs,
-				       unsigned int *__flags,
-				       const char *__symname,
-				       long int *__framesizep);
-extern unsigned int la_i86_gnu_pltexit (Elf32_Sym *__sym, unsigned int __ndx,
-					uintptr_t *__refcook,
-					uintptr_t *__defcook,
-					const La_i86_regs *__inregs,
-					La_i86_retval *__outregs,
-					const char *symname);
-
-__END_DECLS
+#if __WORDSIZE == 64
+struct link_map_machine
+  {
+    Elf64_Addr plt; /* Address of .plt + 0x16 */
+    Elf64_Addr gotplt; /* Address of .got + 0x18 */
+  };
 
 #else
-
-/* Registers for entry into PLT on x86-64.  */
-# if __GNUC_PREREQ (4,0)
-typedef float La_x86_64_xmm __attribute__ ((__vector_size__ (16)));
-# else
-typedef float La_x86_64_xmm __attribute__ ((__mode__ (__V4SF__)));
-# endif
-
-typedef struct La_x86_64_regs
-{
-  uint64_t lr_rdx;
-  uint64_t lr_r8;
-  uint64_t lr_r9;
-  uint64_t lr_rcx;
-  uint64_t lr_rsi;
-  uint64_t lr_rdi;
-  uint64_t lr_rbp;
-  uint64_t lr_rsp;
-  La_x86_64_xmm lr_xmm[8];
-} La_x86_64_regs;
-
-/* Return values for calls from PLT on x86-64.  */
-typedef struct La_x86_64_retval
-{
-  uint64_t lrv_rax;
-  uint64_t lrv_rdx;
-  La_x86_64_xmm lrv_xmm0;
-  La_x86_64_xmm lrv_xmm1;
-  long double lrv_st0;
-  long double lrv_st1;
-} La_x86_64_retval;
-
-
-__BEGIN_DECLS
-
-extern Elf64_Addr la_x86_64_gnu_pltenter (Elf64_Sym *__sym,
-					  unsigned int __ndx,
-					  uintptr_t *__refcook,
-					  uintptr_t *__defcook,
-					  La_x86_64_regs *__regs,
-					  unsigned int *__flags,
-					  const char *__symname,
-					  long int *__framesizep);
-extern unsigned int la_x86_64_gnu_pltexit (Elf64_Sym *__sym,
-					   unsigned int __ndx,
-					   uintptr_t *__refcook,
-					   uintptr_t *__defcook,
-					   const La_x86_64_regs *__inregs,
-					   La_x86_64_retval *__outregs,
-					   const char *symname);
-
-__END_DECLS
-
+struct link_map_machine
+  {
+    Elf32_Addr plt; /* Address of .plt + 0x16 */
+    Elf32_Addr gotplt; /* Address of .got + 0x0c */
+  };
 #endif
diff --git a/sysdeps/x86_64/bits/linkmap.h b/sysdeps/x86_64/bits/linkmap.h
deleted file mode 100644
index 8ea7157156..0000000000
--- a/sysdeps/x86_64/bits/linkmap.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#if __WORDSIZE == 64
-struct link_map_machine
-  {
-    Elf64_Addr plt; /* Address of .plt + 0x16 */
-    Elf64_Addr gotplt; /* Address of .got + 0x18 */
-  };
-
-#else
-struct link_map_machine
-  {
-    Elf32_Addr plt; /* Address of .plt + 0x16 */
-    Elf32_Addr gotplt; /* Address of .got + 0x0c */
-  };
-#endif
diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h
index bb0c77fd0b..b932f51d15 100644
--- a/sysdeps/x86_64/dl-machine.h
+++ b/sysdeps/x86_64/dl-machine.h
@@ -1,5 +1,5 @@
 /* Machine-dependent ELF dynamic relocation inline functions.  x86-64 version.
-   Copyright (C) 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+   Copyright (C) 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Andreas Jaeger <aj@suse.de>.
 
@@ -116,8 +116,7 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
 	{
 	  got[2] = (Elf64_Addr) &_dl_runtime_profile;
 
-	  if (GLRO(dl_profile) != NULL
-	      && _dl_name_match_p (GLRO(dl_profile), l))
+	  if (_dl_name_match_p (GLRO(dl_profile), l))
 	    /* This is the object we are looking for.  Say that we really
 	       want profiling and the timers are started.  */
 	    GL(dl_profile_map) = l;
@@ -131,6 +130,128 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
   return lazy;
 }
 
+/* This code is used in dl-runtime.c to call the `fixup' function
+   and then redirect to the address it returns.  */
+#ifndef PROF
+# define ELF_MACHINE_RUNTIME_TRAMPOLINE asm ("\n\
+	.text\n\
+	.globl _dl_runtime_resolve\n\
+	.type _dl_runtime_resolve, @function\n\
+	.align 16\n\
+	" CFI_STARTPROC "\n\
+_dl_runtime_resolve:\n\
+	subq $56,%rsp\n\
+	" CFI_ADJUST_CFA_OFFSET(72)" # Incorporate PLT\n\
+	movq %rax,(%rsp)	# Preserve registers otherwise clobbered.\n\
+	movq %rcx,8(%rsp)\n\
+	movq %rdx,16(%rsp)\n\
+	movq %rsi,24(%rsp)\n\
+	movq %rdi,32(%rsp)\n\
+	movq %r8,40(%rsp)\n\
+	movq %r9,48(%rsp)\n\
+	movq 64(%rsp), %rsi	# Copy args pushed by PLT in register.\n\
+	movq %rsi,%r11		# Multiply by 24\n\
+	addq %r11,%rsi\n\
+	addq %r11,%rsi\n\
+	shlq $3, %rsi\n\
+	movq 56(%rsp), %rdi	# %rdi: link_map, %rsi: reloc_offset\n\
+	call fixup		# Call resolver.\n\
+	movq %rax, %r11		# Save return value\n\
+	movq 48(%rsp),%r9	# Get register content back.\n\
+	movq 40(%rsp),%r8\n\
+	movq 32(%rsp),%rdi\n\
+	movq 24(%rsp),%rsi\n\
+	movq 16(%rsp),%rdx\n\
+	movq 8(%rsp),%rcx\n\
+	movq (%rsp),%rax\n\
+	addq $72,%rsp		# Adjust stack(PLT did 2 pushes)\n\
+	" CFI_ADJUST_CFA_OFFSET(-72)" \n\
+	jmp *%r11		# Jump to function address.\n\
+	" CFI_ENDPROC "\n\
+	.size _dl_runtime_resolve, .-_dl_runtime_resolve\n\
+\n\
+	.globl _dl_runtime_profile\n\
+	.type _dl_runtime_profile, @function\n\
+	.align 16\n\
+	" CFI_STARTPROC "\n\
+_dl_runtime_profile:\n\
+	subq $56,%rsp\n\
+	" CFI_ADJUST_CFA_OFFSET(72)" # Incorporate PLT\n\
+	movq %rax,(%rsp)	# Preserve registers otherwise clobbered.\n\
+	movq %rcx,8(%rsp)\n\
+	movq %rdx,16(%rsp)\n\
+	movq %rsi,24(%rsp)\n\
+	movq %rdi,32(%rsp)\n\
+	movq %r8,40(%rsp)\n\
+	movq %r9,48(%rsp)\n\
+	movq 72(%rsp), %rdx	# Load return address if needed\n\
+	movq 64(%rsp), %rsi	# Copy args pushed by PLT in register.\n\
+	movq %rsi,%r11		# Multiply by 24\n\
+	addq %r11,%rsi\n\
+	addq %r11,%rsi\n\
+	shlq $3, %rsi\n\
+	movq 56(%rsp), %rdi	# %rdi: link_map, %rsi: reloc_offset\n\
+	call profile_fixup	# Call resolver.\n\
+	movq %rax, %r11		# Save return value\n\
+	movq 48(%rsp),%r9	# Get register content back.\n\
+	movq 40(%rsp),%r8\n\
+	movq 32(%rsp),%rdi\n\
+	movq 24(%rsp),%rsi\n\
+	movq 16(%rsp),%rdx\n\
+	movq 8(%rsp),%rcx\n\
+	movq (%rsp),%rax\n\
+	addq $72,%rsp		# Adjust stack\n\
+	" CFI_ADJUST_CFA_OFFSET(-72)"\n\
+	jmp *%r11		# Jump to function address.\n\
+	" CFI_ENDPROC "\n\
+	.size _dl_runtime_profile, .-_dl_runtime_profile\n\
+	.previous\n\
+");
+#else
+# define ELF_MACHINE_RUNTIME_TRAMPOLINE asm ("\n\
+	.text\n\
+	.globl _dl_runtime_resolve\n\
+	.globl _dl_runtime_profile\n\
+	.type _dl_runtime_resolve, @function\n\
+	.type _dl_runtime_profile, @function\n\
+	.align 16\n\
+	" CFI_STARTPROC "\n\
+_dl_runtime_resolve:\n\
+_dl_runtime_profile:\n\
+	subq $56,%rsp\n\
+	" CFI_ADJUST_CFA_OFFSET(72)" # Incorporate PLT\n\
+	movq %rax,(%rsp)	# Preserve registers otherwise clobbered.\n\
+	movq %rcx,8(%rsp)\n\
+	movq %rdx,16(%rsp)\n\
+	movq %rsi,24(%rsp)\n\
+	movq %rdi,32(%rsp)\n\
+	movq %r8,40(%rsp)\n\
+	movq %r9,48(%rsp)\n\
+	movq 64(%rsp), %rsi	# Copy args pushed by PLT in register.\n\
+	movq %rsi,%r11		# Multiply by 24\n\
+	addq %r11,%rsi\n\
+	addq %r11,%rsi\n\
+	shlq $3, %rsi\n\
+	movq 56(%rsp), %rdi	# %rdi: link_map, %rsi: reloc_offset\n\
+	call fixup		# Call resolver.\n\
+	movq %rax, %r11		# Save return value\n\
+	movq 48(%rsp),%r9	# Get register content back.\n\
+	movq 40(%rsp),%r8\n\
+	movq 32(%rsp),%rdi\n\
+	movq 24(%rsp),%rsi\n\
+	movq 16(%rsp),%rdx\n\
+	movq 8(%rsp),%rcx\n\
+	movq (%rsp),%rax\n\
+	addq $72,%rsp		# Adjust stack\n\
+	" CFI_ADJUST_CFA_OFFSET(-72)"\n\
+	jmp *%r11		# Jump to function address.\n\
+	" CFI_ENDPROC "\n\
+	.size _dl_runtime_resolve, .-_dl_runtime_resolve\n\
+	.size _dl_runtime_profile, .-_dl_runtime_profile\n\
+	.previous\n\
+");
+#endif
+
 /* Initial entry point code for the dynamic linker.
    The C function `_dl_start' is the real entry point;
    its return value is the user program's entry point.  */
@@ -159,24 +280,16 @@ _dl_start_user:\n\
 	# Call _dl_init (struct link_map *main_map, int argc, char **argv, char **env)\n\
 	# argc -> rsi\n\
 	movq %rdx, %rsi\n\
-	# Save %rsp value in %r13.\n\
-	movq %rsp, %r13\n\
-	# And align stack for the _dl_init_internal call. \n\
-	andq $-16, %rsp\n\
 	# _dl_loaded -> rdi\n\
 	movq _rtld_local(%rip), %rdi\n\
 	# env -> rcx\n\
-	leaq 16(%r13,%rdx,8), %rcx\n\
+	leaq 16(%rsp,%rdx,8), %rcx\n\
 	# argv -> rdx\n\
-	leaq 8(%r13), %rdx\n\
-	# Clear %rbp to mark outermost frame obviously even for constructors.\n\
-	xorq %rbp, %rbp\n\
+	leaq 8(%rsp), %rdx\n\
 	# Call the function to run the initializers.\n\
 	call _dl_init_internal@PLT\n\
 	# Pass our finalizer function to the user in %rdx, as per ELF ABI.\n\
 	leaq _dl_fini(%rip), %rdx\n\
-	# And make sure %rsp points to argc stored on the stack.\n\
-	movq %r13, %rsp\n\
 	# Jump to the user's entry point.\n\
 	jmp *%r12\n\
 .previous\n\
@@ -235,14 +348,9 @@ elf_machine_plt_value (struct link_map *map, const Elf64_Rela *reloc,
   return value;
 }
 
-
-/* Names of the architecture-specific auditing callback functions.  */
-#define ARCH_LA_PLTENTER x86_64_gnu_pltenter
-#define ARCH_LA_PLTEXIT x86_64_gnu_pltexit
-
 #endif /* !dl_machine_h */
 
-#ifdef RESOLVE_MAP
+#ifdef RESOLVE
 
 /* Perform the relocation specified by RELOC and SYM (which is fully resolved).
    MAP is the object containing the reloc.  */
@@ -282,9 +390,18 @@ elf_machine_rela (struct link_map *map, const Elf64_Rela *reloc,
 #ifndef RTLD_BOOTSTRAP
       const Elf64_Sym *const refsym = sym;
 #endif
+#if defined USE_TLS && !defined RTLD_BOOTSTRAP
       struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type);
       Elf64_Addr value = (sym == NULL ? 0
 			  : (Elf64_Addr) sym_map->l_addr + sym->st_value);
+#else
+      Elf64_Addr value = RESOLVE (&sym, version, r_type);
+
+# ifndef RTLD_BOOTSTRAP
+      if (sym != NULL)
+# endif
+	value += sym->st_value;
+#endif
 
 #if defined RTLD_BOOTSTRAP && !USE___THREAD
       assert (r_type == R_X86_64_GLOB_DAT || r_type == R_X86_64_JUMP_SLOT);
@@ -436,4 +553,4 @@ elf_machine_lazy_rel (struct link_map *map,
     _dl_reloc_bad_type (map, r_type, 1);
 }
 
-#endif /* RESOLVE_MAP */
+#endif /* RESOLVE */
diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S
deleted file mode 100644
index eb46f29cf2..0000000000
--- a/sysdeps/x86_64/dl-trampoline.S
+++ /dev/null
@@ -1,188 +0,0 @@
-/* PLT trampolines.  x86-64 version.
-   Copyright (C) 2004, 2005 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#include <sysdep.h>
-
-	.text
-	.globl _dl_runtime_resolve
-	.type _dl_runtime_resolve, @function
-	.align 16
-	cfi_startproc
-_dl_runtime_resolve:
-	subq $56,%rsp
-	cfi_adjust_cfa_offset(72) # Incorporate PLT
-	movq %rax,(%rsp)	# Preserve registers otherwise clobbered.
-	movq %rcx, 8(%rsp)
-	movq %rdx, 16(%rsp)
-	movq %rsi, 24(%rsp)
-	movq %rdi, 32(%rsp)
-	movq %r8, 40(%rsp)
-	movq %r9, 48(%rsp)
-	movq 64(%rsp), %rsi	# Copy args pushed by PLT in register.
-	movq %rsi, %r11		# Multiply by 24
-	addq %r11, %rsi
-	addq %r11, %rsi
-	shlq $3, %rsi
-	movq 56(%rsp), %rdi	# %rdi: link_map, %rsi: reloc_offset
-	call _dl_fixup		# Call resolver.
-	movq %rax, %r11		# Save return value
-	movq 48(%rsp), %r9	# Get register content back.
-	movq 40(%rsp), %r8
-	movq 32(%rsp), %rdi
-	movq 24(%rsp), %rsi
-	movq 16(%rsp), %rdx
-	movq 8(%rsp), %rcx
-	movq (%rsp), %rax
-	addq $72, %rsp		# Adjust stack(PLT did 2 pushes)
-	cfi_adjust_cfa_offset(-72)
-	jmp *%r11		# Jump to function address.
-	cfi_endproc
-	.size _dl_runtime_resolve, .-_dl_runtime_resolve
-
-
-
-	.globl _dl_runtime_profile
-	.type _dl_runtime_profile, @function
-	.align 16
-	cfi_startproc
-_dl_runtime_profile:
-	subq $80, %rsp
-	cfi_adjust_cfa_offset(96) # Incorporate PLT
-	movq %rax, (%rsp)	# Preserve registers otherwise clobbered.
-	movq %rdx, 8(%rsp)
-	movq %r8, 16(%rsp)
-	movq %r9, 24(%rsp)
-	movq %rcx, 32(%rsp)
-	movq %rsi, 40(%rsp)
-	movq %rdi, 48(%rsp)
-	movq %rbp, 56(%rsp)	# Information for auditors.
-	leaq 96(%rsp), %rax
-	movq %rax, 64(%rsp)
-	leaq 8(%rsp), %rcx
-	movq 96(%rsp), %rdx	# Load return address if needed
-	movq 88(%rsp), %rsi	# Copy args pushed by PLT in register.
-	movq %rsi,%r11		# Multiply by 24
-	addq %r11,%rsi
-	addq %r11,%rsi
-	shlq $3, %rsi
-	movq 80(%rsp), %rdi	# %rdi: link_map, %rsi: reloc_offset
-	leaq 72(%rsp), %r8
-	call _dl_profile_fixup	# Call resolver.
-	movq %rax, %r11		# Save return value
-	movq 8(%rsp), %rdx	# Get back register content.
-	movq 16(%rsp), %r8
-	movq 24(%rsp), %r9
-	movq (%rsp),%rax
-	movq 72(%rsp), %r10
-	testq %r10, %r10
-	jns 1f
-	movq 32(%rsp), %rcx
-	movq 40(%rsp), %rsi
-	movq 48(%rsp), %rdi
-	addq $96,%rsp		# Adjust stack
-	cfi_adjust_cfa_offset (-96)
-	jmp *%r11		# Jump to function address.
-
-	/*
-	    +96     return address
-	    +88     PLT2
-	    +80     PLT1
-	    +72     free
-	    +64     %rsp
-	    +56     %rbp
-	    +48     %rdi
-	    +40     %rsi
-	    +32     %rcx
-	    +24     %r9
-	    +16     %r8
-	    +8      %rdx
-	   %esp     %rax
-	*/
-	cfi_adjust_cfa_offset (96)
-1:	movq %rbx, 72(%rsp)
-	cfi_rel_offset (1, 72)
-	leaq 104(%rsp), %rsi
-	movq %rsp, %rbx
-	cfi_def_cfa_register (1)
-	subq %r10, %rsp
-	movq %rsp, %rdi
-	movq %r10, %rcx
-	shrq $3, %rcx
-	rep
-	movsq
-	andq $0xfffffffffffffff0, %rsp
-	movq 32(%rbx), %rcx
-	movq 40(%rbx), %rsi
-	movq 48(%rbx), %rdi
-	call *%r11
-	movq %rbx, %rsp
-	cfi_def_cfa_register (7)
-	subq $72, %rsp
-	cfi_adjust_cfa_offset (72)
-	movq %rsp, %rcx
-	movq %rax, (%rcx)
-	movq %rdx, 8(%rcx)
-	/* Even though the stack is correctly aligned to allow using movaps
-	   we use movups.  Some callers might provide an incorrectly aligned
-	   stack and we do not want to have it blow up here.  */
-	movups %xmm0, 16(%rcx)
-	movups %xmm1, 32(%rcx)
-	fstpt 48(%rcx)
-	fstpt 64(%rcx)
-	/*
-	    +168    return address
-	    +160    PLT2
-	    +152    PLT1
-	    +144    free
-	    +136    %rsp
-	    +128    %rbp
-	    +120    %rdi
-	    +112    %rsi
-	    +104    %rcx
-	    +96     %r9
-	    +88     %r8
-	    +80     %rdx
-	    +64     %st1 result
-	    +48     %st result
-	    +32     %xmm1 result
-	    +16     %xmm0 result
-	    +8      %rdx result
-	   %esp     %rax result
-	*/
-	leaq 80(%rsp), %rdx
-	movq 144(%rsp), %rbx
-	cfi_restore (1)
-	movq 160(%rsp), %rsi	# Copy args pushed by PLT in register.
-	movq %rsi,%r11		# Multiply by 24
-	addq %r11,%rsi
-	addq %r11,%rsi
-	shlq $3, %rsi
-	movq 152(%rsp), %rdi	# %rdi: link_map, %rsi: reloc_offset
-	call _dl_call_pltexit
-	movq (%rsp), %rax
-	movq 8(%rsp), %rdx
-	movups 16(%rsp), %xmm0
-	movups 32(%rsp), %xmm1
-	fldt 64(%rsp)
-	fldt 48(%rsp)
-	addq $168, %rsp
-	cfi_adjust_cfa_offset (-168)
-	retq
-	cfi_endproc
-	.size _dl_runtime_profile, .-_dl_runtime_profile
diff --git a/time/strptime_l.c b/time/strptime_l.c
index 01c4f8282a..df98099f0a 100644
--- a/time/strptime_l.c
+++ b/time/strptime_l.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc.
+/* Copyright (C) 2002, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -539,12 +539,10 @@ __strptime_internal (rp, fmt, tm, decided, era_cnt LOCALE_PARAM)
 	    }
 #endif
 	  if (!match_string (HERE_AM_STR, rp))
-	    {
-	      if (match_string (HERE_PM_STR, rp))
-		is_pm = 1;
-	      else
-		return NULL;
-	    }
+	    if (match_string (HERE_PM_STR, rp))
+	      is_pm = 1;
+	    else
+	      return NULL;
 	  break;
 	case 'r':
 #ifdef _NL_CURRENT
diff --git a/time/tst-strptime.c b/time/tst-strptime.c
index 6356aa0d41..a0b2ebedd2 100644
--- a/time/tst-strptime.c
+++ b/time/tst-strptime.c
@@ -42,10 +42,6 @@ static const struct
   { "C", "19990502123412", "%Y%m%d%H%M%S", 0, 121, 4, 2 },
   { "C", "2001 20 Mon", "%Y %U %a", 1, 140, 4, 21 },
   { "C", "2001 21 Mon", "%Y %W %a", 1, 140, 4, 21 },
-  { "ja_JP.EUC-JP", "2000-01-01 08:12:21 AM", "%Y-%m-%d %I:%M:%S %p",
-    6, 0, 0, 1 },
-  { "en_US.ISO-8859-1", "2000-01-01 08:12:21 PM", "%Y-%m-%d %I:%M:%S %p",
-    6, 0, 0, 1 },
   { "ja_JP.EUC-JP", "2001 20 \xb7\xee", "%Y %U %a", 1, 140, 4, 21 },
   { "ja_JP.EUC-JP", "2001 21 \xb7\xee", "%Y %W %a", 1, 140, 4, 21 },
 };
@@ -77,14 +73,7 @@ test_tm (void)
     {
       memset (&tm, '\0', sizeof (tm));
 
-      char *ret = strptime (tm_tests[i].input, tm_tests[i].format, &tm);
-      if (ret == NULL)
-	{
-	  printf ("strptime returned NULL for `%s'\n", tm_tests[i].input);
-	  result = 1;
-	  continue;
-	}
-      else if (*ret != '\0')
+      if (*strptime (tm_tests[i].input, tm_tests[i].format, &tm) != '\0')
 	{
 	  printf ("not all of `%s' read\n", tm_tests[i].input);
 	  result = 1;
@@ -138,14 +127,7 @@ main (int argc, char *argv[])
 	  exit (EXIT_FAILURE);
 	}
 
-      char *ret = strptime (day_tests[i].input, day_tests[i].format, &tm);
-      if (ret == NULL)
-	{
-	  printf ("strptime returned NULL for `%s'\n", day_tests[i].input);
-	  result = 1;
-	  continue;
-	}
-      else if (*ret != '\0')
+      if (*strptime (day_tests[i].input, day_tests[i].format, &tm) != '\0')
 	{
 	  printf ("not all of `%s' read\n", day_tests[i].input);
 	  result = 1;
diff --git a/timezone/asia b/timezone/asia
index 32e6e3c45f..3c2c1a1868 100644
--- a/timezone/asia
+++ b/timezone/asia
@@ -1,4 +1,4 @@
-# @(#)asia	7.78
+# @(#)asia	7.77
 
 # This data is by no means authoritative; if you think you know better,
 # go ahead and edit the file (and please send any changes to
@@ -639,7 +639,7 @@ Rule	Zion	1988	only	-	Apr	 9	0:00	1:00	D
 Rule	Zion	1988	only	-	Sep	 3	0:00	0	S
 
 # From Ephraim Silverberg <ephraim@cs.huji.ac.il>
-# (1997-03-04, 1998-03-16, 1998-12-28, 2000-01-17, 2000-07-25, and 2004-12-22):
+# (1997-03-04, 1998-03-16, 1998-12-28, 2000-01-17 and 2000-07-25):
 
 # According to the Office of the Secretary General of the Ministry of
 # Interior, there is NO set rule for Daylight-Savings/Standard time changes.
@@ -690,13 +690,13 @@ Rule	Zion	1995	only	-	Sep	 3	0:00	0	S
 # time, Haim Ramon.  The official announcement regarding 1996-1998
 # (with the dates for 1997-1998 no longer being relevant) can be viewed at:
 #
-#   ftp://ftp.cs.huji.ac.il/pub/tz/announcements/1996-1998.ramon.ps.gz
+#   ftp://ftp.huji.ac.il/pub/tz/announcements/1996-1998.ramon.ps.gz
 #
 # The dates for 1997-1998 were altered by his successor, Rabbi Eli Suissa.
 #
 # The official announcements for the years 1997-1999 can be viewed at:
 #
-#   ftp://ftp.cs.huji.ac.il/pub/tz/announcements/YYYY.ps.gz
+#   ftp://ftp.huji.ac.il/pub/tz/announcements/YYYY.ps.gz
 #
 #       where YYYY is the relevant year.
 
@@ -716,12 +716,12 @@ Rule	Zion	1999	only	-	Sep	 3	2:00	0	S
 #
 # The official announcement for the start date of 2000 can be viewed at:
 #
-#	ftp://ftp.cs.huji.ac.il/pub/tz/announcements/2000-start.ps.gz
+#	ftp://ftp.huji.ac.il/pub/tz/announcements/2000-start.ps.gz
 #
 # The official announcement for the end date of 2000 and the dates
 # for the years 2001-2004 can be viewed at:
 #
-#	ftp://ftp.cs.huji.ac.il/pub/tz/announcements/2000-2004.ps.gz
+#	ftp://ftp.huji.ac.il/pub/tz/announcements/2000-2004.ps.gz
 
 # Rule	NAME	FROM	TO	TYPE	IN	ON	AT	SAVE	LETTER/S
 Rule	Zion	2000	only	-	Apr	14	2:00	1:00	D
@@ -735,97 +735,52 @@ Rule	Zion	2003	only	-	Oct	 3	1:00	0	S
 Rule	Zion	2004	only	-	Apr	 7	1:00	1:00	D
 Rule	Zion	2004	only	-	Sep	22	1:00	0	S
 
-# Yesterday, the Knesset Interior Committee passed a proposed (originally
-# in March 2004) change to the Time Setting Law that would make the dates
-# for DST from 2005 and beyond so that DST starts on the night _after_ the
-# first night of the Passover holiday at midnight until midnight of the
-# Saturday night _before_ the fast of Yom Kippur.
-#
-# Those who can read Hebrew can view the proposal at:
-#
-#	ftp://ftp.cs.huji.ac.il/pub/tz/announcements/2005+.ps
-#
-# The proposal still has to be passed by the Knesset (three readings) for
-# it to become law....
-
-# From Paul Eggert (2004-12-22):
-# For now, guess that the rules proposed on 2004-12-20 will be adopted.
-# This is quite possibly wrong, but it is more likely than no DST at all.
-# I used Ed Reingold's cal-hebrew in GNU Emacs 21.3, along with code
-# written by Ephraim Silverberg, to generate this list.
+# From Paul Eggert (2000-07-25):
+# Here are guesses for rules after 2004.
+# They are probably wrong, but they are more likely than no DST at all.
 # Rule	NAME    FROM    TO      TYPE    IN      ON      AT      SAVE    LETTER/S
-Rule	Zion	2005	only	-	Apr	25	0:00	1:00	D
-Rule	Zion	2005	only	-	Oct	 9	0:00	0	S
-Rule	Zion	2006	only	-	Apr	14	0:00	1:00	D
-Rule	Zion	2006	only	-	Oct	 1	0:00	0	S
-Rule	Zion	2007	only	-	Apr	 4	0:00	1:00	D
-Rule	Zion	2007	only	-	Sep	16	0:00	0	S
-Rule	Zion	2008	only	-	Apr	21	0:00	1:00	D
-Rule	Zion	2008	only	-	Oct	 5	0:00	0	S
-Rule	Zion	2009	only	-	Apr	10	0:00	1:00	D
-Rule	Zion	2009	only	-	Sep	27	0:00	0	S
-Rule	Zion	2010	only	-	Mar	31	0:00	1:00	D
-Rule	Zion	2010	only	-	Sep	12	0:00	0	S
-Rule	Zion	2011	only	-	Apr	20	0:00	1:00	D
-Rule	Zion	2011	only	-	Oct	 2	0:00	0	S
-Rule	Zion	2012	only	-	Apr	 8	0:00	1:00	D
-Rule	Zion	2012	only	-	Sep	23	0:00	0	S
-Rule	Zion	2013	only	-	Mar	27	0:00	1:00	D
-Rule	Zion	2013	only	-	Sep	 8	0:00	0	S
-Rule	Zion	2014	only	-	Apr	16	0:00	1:00	D
-Rule	Zion	2014	only	-	Sep	28	0:00	0	S
-Rule	Zion	2015	only	-	Apr	 5	0:00	1:00	D
-Rule	Zion	2015	only	-	Sep	20	0:00	0	S
-Rule	Zion	2016	only	-	Apr	24	0:00	1:00	D
-Rule	Zion	2016	only	-	Oct	 9	0:00	0	S
-Rule	Zion	2017	only	-	Apr	12	0:00	1:00	D
-Rule	Zion	2017	only	-	Sep	24	0:00	0	S
-Rule	Zion	2018	only	-	Apr	 1	0:00	1:00	D
-Rule	Zion	2018	only	-	Sep	16	0:00	0	S
-Rule	Zion	2019	only	-	Apr	21	0:00	1:00	D
-Rule	Zion	2019	only	-	Oct	 6	0:00	0	S
-Rule	Zion	2020	only	-	Apr	10	0:00	1:00	D
-Rule	Zion	2020	only	-	Sep	27	0:00	0	S
-Rule	Zion	2021	only	-	Mar	29	0:00	1:00	D
-Rule	Zion	2021	only	-	Sep	12	0:00	0	S
-Rule	Zion	2022	only	-	Apr	17	0:00	1:00	D
-Rule	Zion	2022	only	-	Oct	 2	0:00	0	S
-Rule	Zion	2023	only	-	Apr	 7	0:00	1:00	D
-Rule	Zion	2023	only	-	Sep	24	0:00	0	S
-Rule	Zion	2024	only	-	Apr	24	0:00	1:00	D
-Rule	Zion	2024	only	-	Oct	 6	0:00	0	S
-Rule	Zion	2025	only	-	Apr	14	0:00	1:00	D
-Rule	Zion	2025	only	-	Sep	28	0:00	0	S
-Rule	Zion	2026	only	-	Apr	 3	0:00	1:00	D
-Rule	Zion	2026	only	-	Sep	20	0:00	0	S
-Rule	Zion	2027	only	-	Apr	23	0:00	1:00	D
-Rule	Zion	2027	only	-	Oct	10	0:00	0	S
-Rule	Zion	2028	only	-	Apr	12	0:00	1:00	D
-Rule	Zion	2028	only	-	Sep	24	0:00	0	S
-Rule	Zion	2029	only	-	Apr	 1	0:00	1:00	D
-Rule	Zion	2029	only	-	Sep	16	0:00	0	S
-Rule	Zion	2030	only	-	Apr	19	0:00	1:00	D
-Rule	Zion	2030	only	-	Oct	 6	0:00	0	S
-Rule	Zion	2031	only	-	Apr	 9	0:00	1:00	D
-Rule	Zion	2031	only	-	Sep	21	0:00	0	S
-Rule	Zion	2032	only	-	Mar	28	0:00	1:00	D
-Rule	Zion	2032	only	-	Sep	12	0:00	0	S
-Rule	Zion	2033	only	-	Apr	15	0:00	1:00	D
-Rule	Zion	2033	only	-	Oct	 2	0:00	0	S
-Rule	Zion	2034	only	-	Apr	 5	0:00	1:00	D
-Rule	Zion	2034	only	-	Sep	17	0:00	0	S
-Rule	Zion	2035	only	-	Apr	25	0:00	1:00	D
-Rule	Zion	2035	only	-	Oct	 7	0:00	0	S
-Rule	Zion	2036	only	-	Apr	13	0:00	1:00	D
-Rule	Zion	2036	only	-	Sep	28	0:00	0	S
-Rule	Zion	2037	only	-	Apr	 1	0:00	1:00	D
-Rule	Zion	2037	only	-	Sep	13	0:00	0	S
+Rule	Zion	2005	max	-	Apr	 1	1:00	1:00	D
+Rule	Zion	2005	max	-	Oct	 1	1:00	0	S
 
 # Zone	NAME		GMTOFF	RULES	FORMAT	[UNTIL]
 Zone	Asia/Jerusalem	2:20:56 -	LMT	1880
 			2:20:40	-	JMT	1918	# Jerusalem Mean Time?
 			2:00	Zion	I%sT
 
+# From Ephraim Silverberg (2003-03-23):
+#
+# Minister of Interior Poraz has announced that he will respect the law
+# passed in July 2000 (proposed at the time jointly by himself and
+# then-MK David Azulai [Shas]) fixing the dates for 2000-2004.  Hence,
+# the dates for 2003 and 2004 remain unchanged....
+#
+# As far as 2005 and beyond, no dates have been set.  However, the
+# minister has mentioned that he wishes to propose to move Israel's
+# timezone in 2005 from GMT+2 to GMT+3 and upon that have DST during
+# the summer months (i.e. GMT+4).  However, no legislation in this
+# direction is expected until the latter part of 2004 which is a long
+# time off in terms of Israeli politics.
+
+# (2004-09-20):
+# The latest rumour, however, is that in 2005, when the clock changes to
+# Daylight Saving Time (date as yet unknown), the move will be a two-hour leap
+# forward (from UTC+0200 to UTC+0400) and then, in the fall, the clock will
+# move back only an hour to UTC+0300 thus effectively moving Israel's timezone
+# from UTC+0200 to UTC+0300.  However, no actual draft has been put before the
+# Knesset (Israel's Parliament) though the intention is to do so this
+# month [2004-09].
+
+# (2004-09-26):
+# Even though the draft law for the above did pass the Ministerial Committee
+# for Legislative Matters three months ago, it was voted down in today's
+# Cabinet meeting.  The current suggestion is to keep the current timezone at
+# UTC+0200 but have an extended period of Daylight Saving Time (UTC+0300) from
+# the beginning of Passover holiday in the spring to after the Tabernacle
+# holiday in the fall (i.e. the dates of which are governed by the Hebrew
+# calendar but this means at least 184 days of DST).  However, this is only a
+# suggestion that was raised in today's cabinet meeting and has not yet been
+# drafted.
+
 
 
 ###############################################################################
diff --git a/timezone/backward b/timezone/backward
index 9fbab94d79..6e118c2726 100644
--- a/timezone/backward
+++ b/timezone/backward
@@ -1,4 +1,4 @@
-# @(#)backward	7.27
+# @(#)backward	7.26
 
 # This file provides links between current names for time zones
 # and their old names.  Many names changed in late 1993.
@@ -13,7 +13,7 @@ Link	America/Argentina/Jujuy	America/Jujuy
 Link	America/Indiana/Knox	America/Knox_IN
 Link	America/Argentina/Mendoza	America/Mendoza
 Link	America/Rio_Branco	America/Porto_Acre
-Link	America/Argentina/Cordoba	America/Rosario
+Link	America/Cordoba		America/Rosario
 Link	America/St_Thomas	America/Virgin
 Link	Asia/Ashgabat		Asia/Ashkhabad
 Link	Asia/Chongqing		Asia/Chungking
@@ -34,7 +34,7 @@ Link	Australia/Hobart	Australia/Tasmania
 Link	Australia/Melbourne	Australia/Victoria
 Link	Australia/Perth		Australia/West
 Link	Australia/Broken_Hill	Australia/Yancowinna
-Link	America/Rio_Branco	Brazil/Acre
+Link	America/Porto_Acre	Brazil/Acre
 Link	America/Noronha		Brazil/DeNoronha
 Link	America/Sao_Paulo	Brazil/East
 Link	America/Manaus		Brazil/West
@@ -55,10 +55,10 @@ Link	Europe/Dublin		Eire
 Link	Europe/Chisinau		Europe/Tiraspol
 Link	Europe/London		GB
 Link	Europe/London		GB-Eire
-Link	Etc/GMT			GMT+0
-Link	Etc/GMT			GMT-0
-Link	Etc/GMT			GMT0
-Link	Etc/GMT			Greenwich
+Link	Etc/GMT+0		GMT+0
+Link	Etc/GMT-0		GMT-0
+Link	Etc/GMT0		GMT0
+Link	Etc/Greenwich		Greenwich
 Link	Asia/Hong_Kong		Hongkong
 Link	Atlantic/Reykjavik	Iceland
 Link	Asia/Tehran		Iran
@@ -70,7 +70,7 @@ Link	Africa/Tripoli		Libya
 Link	America/Tijuana		Mexico/BajaNorte
 Link	America/Mazatlan	Mexico/BajaSur
 Link	America/Mexico_City	Mexico/General
-Link	America/Denver		Navajo
+Link	America/Shiprock	Navajo
 Link	Pacific/Auckland	NZ
 Link	Pacific/Chatham		NZ-CHAT
 Link	Pacific/Pago_Pago	Pacific/Samoa
@@ -95,6 +95,6 @@ Link	America/Denver		US/Mountain
 Link	America/Los_Angeles	US/Pacific
 Link	Pacific/Pago_Pago	US/Samoa
 Link	Etc/UTC			UTC
-Link	Etc/UTC			Universal
+Link	Etc/Universal		Universal
 Link	Europe/Moscow		W-SU
-Link	Etc/UTC			Zulu
+Link	Etc/Zulu		Zulu
diff --git a/timezone/europe b/timezone/europe
index 2da2df16db..eeb114f881 100644
--- a/timezone/europe
+++ b/timezone/europe
@@ -1,4 +1,4 @@
-# @(#)europe	7.91
+# @(#)europe	7.88
 
 # This data is by no means authoritative; if you think you know better,
 # go ahead and edit the file (and please send any changes to
@@ -708,7 +708,7 @@ Zone	Europe/Sofia	1:33:16 -	LMT	1880
 # see Serbia and Montenegro
 
 # Cyprus
-# Please see the `asia' file for Asia/Nicosia.
+# See the `asia' file.
 
 # Czech Republic
 # Rule	NAME	FROM	TO	TYPE	IN	ON	AT	SAVE	LETTER/S
@@ -754,11 +754,6 @@ Zone Atlantic/Faeroe	-0:27:04 -	LMT	1908 Jan 11	# Torshavn
 			 0:00	-	WET	1981
 			 0:00	EU	WE%sT
 #
-# From Paul Eggert (2004-10-31):
-# During World War II, Germany maintained secret manned weather stations in
-# East Greenland and Franz Josef Land, but we don't know their time zones.
-# My source for this is Wilhelm Dege's book mentioned under Svalbard.
-#
 # From Paul Eggert (1996-11-22):
 # Greenland joined the EU as part of Denmark, obtained home rule on 1979-05-01,
 # and left the EU on 1985-02-01.  It therefore should have been using EU
@@ -1060,11 +1055,6 @@ Zone	Europe/Berlin	0:53:28 -	LMT	1893 Apr
 			1:00	Germany	CE%sT	1980
 			1:00	EU	CE%sT
 
-# Georgia
-# Please see the "asia" file for Asia/Tbilisi.
-# Herodotus (Histories, IV.45) says Georgia north of the Phasis (now Rioni)
-# is in Europe.  Our reference location Tbilisi is in the Asian part.
-
 # Gibraltar
 # Zone	NAME		GMTOFF	RULES	FORMAT	[UNTIL]
 Zone Europe/Gibraltar	-0:21:24 -	LMT	1880 Aug  2
@@ -1621,7 +1611,7 @@ Zone	Europe/Oslo	0:43:00 -	LMT	1895 Jan  1
 # From Paul Eggert (2001-05-01):
 #
 # Actually, Jan Mayen was never occupied by Germany during World War II,
-# so it must have diverged from Oslo time during the war, as Oslo was
+# so it must have diverged from Oslo time during the war, as Olso was
 # keeping Berlin time.
 #
 # <http://home.no.net/janmayen/history.htm> says that the meteorologists
@@ -1638,7 +1628,7 @@ Zone	Europe/Oslo	0:43:00 -	LMT	1895 Jan  1
 # <http://www.svalbard.com/SvalbardFAQ.html> says that the Germans were
 # expelled on 1942-05-14.  However, small parties of Germans did return,
 # and according to Wilhelm Dege's book "War North of 80" (1954)
-# <http://www.ucalgary.ca/UofC/departments/UP/1-55238/1-55238-110-2.html>
+# <http://www.utpress.utoronto.ca/publishing/rights/dege_warnorthof80.htm>
 # the German armed forces at the Svalbard weather station code-named
 # Haudegen did not surrender to the Allies until September 1945.
 #
diff --git a/timezone/leapseconds b/timezone/leapseconds
index 385ee66ebd..7add3303f2 100644
--- a/timezone/leapseconds
+++ b/timezone/leapseconds
@@ -1,4 +1,4 @@
-# @(#)leapseconds	7.18
+# @(#)leapseconds	7.17
 
 # Allowance for leapseconds added to each timezone file.
 
@@ -45,6 +45,7 @@ Leap	1997	Jun	30	23:59:60	+	S
 Leap	1998	Dec	31	23:59:60	+	S
 
 # 	INTERNATIONAL EARTH ROTATION AND REFERENCE SYSTEMS SERVICE (IERS)
+# 
 # SERVICE INTERNATIONAL DE LA ROTATION TERRESTRE ET DES SYSTEMES DE REFERENCE
 # 
 # SERVICE DE LA ROTATION TERRESTRE
@@ -54,10 +55,9 @@ Leap	1998	Dec	31	23:59:60	+	S
 # FAX       : 33 (0) 1 40 51 22 91
 # Internet  : services.iers@obspm.fr
 # 
-# 						Paris, 21 July 2004
-# 
+# 						Paris, 15 January 2004
 # 
-# 						Bulletin C 28
+# 						Bulletin C 27
 # 
 # 						To authorities responsible
 # 						for the measurement and
@@ -65,7 +65,7 @@ Leap	1998	Dec	31	23:59:60	+	S
 # 
 # 			INFORMATION ON UTC - TAI
 # 
-# NO positive leap second will be introduced at the end of December 2004.
+# NO positive leap second will be introduced at the end of June 2004.
 # The difference between UTC and the International Atomic Time TAI is:
 # 
 # 	from 1999 January 1, 0h UTC, until further notice : UTC-TAI = -32 s
@@ -78,3 +78,4 @@ Leap	1998	Dec	31	23:59:60	+	S
 # 					Daniel GAMBIS
 # 					Director
 # 					Earth Orientation Center of IERS
+# 					Observatoire de Paris, France
diff --git a/timezone/northamerica b/timezone/northamerica
index f9d2f6a447..6e755b04b3 100644
--- a/timezone/northamerica
+++ b/timezone/northamerica
@@ -1,4 +1,4 @@
-# @(#)northamerica	7.71
+# @(#)northamerica	7.69
 # also includes Central America and the Caribbean
 
 # This data is by no means authoritative; if you think you know better,
@@ -208,13 +208,6 @@ Rule	US	1987	max	-	Apr	Sun>=1	2:00	1:00	D
 # Pennsylvania, Rhode Island, South Carolina, eastern Tennessee,
 # Vermont, Virginia, West Virginia
 
-# From Dave Cantor (2004-11-02):
-# Early this summer I had the occasion to visit the Mount Washington
-# Observatory weather station atop (of course!) Mount Washington [, NH]....
-# One of the staff members said that the station was on Eastern Standard Time
-# and didn't change their clocks for Daylight Saving ... so that their
-# reports will always have times which are 5 hours behind UTC.
-
 # Rule	NAME	FROM	TO	TYPE	IN	ON	AT	SAVE	LETTER
 Rule	NYC	1920	only	-	Mar	lastSun	2:00	1:00	D
 Rule	NYC	1920	only	-	Oct	lastSun	2:00	0	S
@@ -362,26 +355,10 @@ Zone America/Adak	 12:13:21 -	LMT	1867 Oct 18
 			-11:00	US	B%sT	1983 Oct 30 2:00
 			-10:00	US	AH%sT	1983 Nov 30
 			-10:00	US	HA%sT
-# The following switches don't quite make our 1970 cutoff.
-#
 # Shanks writes that part of southwest Alaska (e.g. Aniak)
 # switched from -11:00 to -10:00 on 1968-09-22 at 02:00,
 # and another part (e.g. Akiak) made the same switch five weeks later.
-#
-# From David Flater (2004-11-09):
-# In e-mail, 2004-11-02, Ray Hudson, historian/liaison to the Unalaska
-# Historic Preservation Commission, provided this information, which
-# suggests that Unalaska deviated from statutory time from early 1967
-# possibly until 1983:
-#
-#  Minutes of the Unalaska City Council Meeting, January 10, 1967:
-#  "Except for St. Paul and Akutan, Unalaska is the only important
-#  location not on Alaska Standard Time.  The following resolution was
-#  made by William Robinson and seconded by Henry Swanson:  Be it
-#  resolved that the City of Unalaska hereby goes to Alaska Standard
-#  Time as of midnight Friday, January 13, 1967 (1 A.M. Saturday,
-#  January 14, Alaska Standard Time.)  This resolution was passed with
-#  three votes for and one against."
+# These switches don't quite make our 1970 cutoff.
 
 # Hawaii
 #
@@ -1754,15 +1731,6 @@ Zone America/Costa_Rica	-5:36:20 -	LMT	1890		# San Jose
 # to DST--and one more hour on 1999-04-04--when the announcers will have
 # returned to Baltimore, which switches on that date.)
 
-# From Evert van der Veer via Steffen Thorsen (2004-10-28):
-# Cuba is not going back to standard time this year.
-# From Paul Eggert (2004-10-28):
-# http://www.granma.cu/ingles/2004/septiembre/juev30/41medid-i.html
-# says that it's due to a problem at the Antonio Guiteras
-# thermoelectric plant, and says "This October there will be no return
-# to normal hours (after daylight saving time)".
-# For now, let's assume that it's a one-year temporary measure.
-
 # Rule	NAME	FROM	TO	TYPE	IN	ON	AT	SAVE	LETTER/S
 Rule	Cuba	1928	only	-	Jun	10	0:00	1:00	D
 Rule	Cuba	1928	only	-	Oct	10	0:00	0	S
@@ -1791,9 +1759,8 @@ Rule	Cuba	1991	1995	-	Oct	Sun>=8	0:00s	0	S
 Rule	Cuba	1996	only	-	Oct	 6	0:00s	0	S
 Rule	Cuba	1997	only	-	Oct	12	0:00s	0	S
 Rule	Cuba	1998	1999	-	Mar	lastSun	0:00s	1:00	D
-Rule	Cuba	1998	2003	-	Oct	lastSun	0:00s	0	S
+Rule	Cuba	1998	max	-	Oct	lastSun	0:00s	0	S
 Rule	Cuba	2000	max	-	Apr	Sun>=1	0:00s	1:00	D
-Rule	Cuba	2005	max	-	Oct	lastSun	0:00s	0	S
 
 # Zone	NAME		GMTOFF	RULES	FORMAT	[UNTIL]
 Zone	America/Havana	-5:29:28 -	LMT	1890
diff --git a/timezone/private.h b/timezone/private.h
index 5de2f7dfe4..57663052f0 100644
--- a/timezone/private.h
+++ b/timezone/private.h
@@ -21,7 +21,7 @@
 
 #ifndef lint
 #ifndef NOID
-static char	privatehid[] = "@(#)private.h	7.55";
+static char	privatehid[] = "@(#)private.h	7.54";
 #endif /* !defined NOID */
 #endif /* !defined lint */
 
@@ -208,7 +208,6 @@ extern char *	asctime_r();
 /*
 ** Private function declarations.
 */
-
 char *	icalloc P((int nelem, int elsize));
 char *	icatalloc P((char * old, const char * new));
 char *	icpyalloc P((const char * string));
@@ -218,6 +217,7 @@ void	icfree P((char * pointer));
 void	ifree P((char * pointer));
 char *	scheck P((const char *string, const char *format));
 
+
 /*
 ** Finally, some convenience items.
 */
@@ -238,15 +238,6 @@ char *	scheck P((const char *string, const char *format));
 #define TYPE_SIGNED(type) (((type) -1) < 0)
 #endif /* !defined TYPE_SIGNED */
 
-/*
-** Since the definition of TYPE_INTEGRAL contains floating point numbers,
-** it cannot be used in preprocessor directives.
-*/
-
-#ifndef TYPE_INTEGRAL
-#define TYPE_INTEGRAL(type) (((type) 0.5) != 0.5)
-#endif /* !defined TYPE_INTEGRAL */
-
 #ifndef INT_STRLEN_MAXIMUM
 /*
 ** 302 / 1000 is log10(2.0) rounded up.
diff --git a/timezone/southamerica b/timezone/southamerica
index 2e9faf4934..912491049b 100644
--- a/timezone/southamerica
+++ b/timezone/southamerica
@@ -1,4 +1,4 @@
-# @(#)southamerica	7.57
+# @(#)southamerica	7.54
 
 # This data is by no means authoritative; if you think you know better,
 # go ahead and edit the file (and please send any changes to
@@ -671,7 +671,7 @@ Zone America/Campo_Grande -3:38:28 -	LMT	1914
 # Mato Grosso (MT)
 Zone America/Cuiaba	-3:44:20 -	LMT	1914
 			-4:00	Brazil	AM%sT	2003 Sep 24
-			-4:00	-	AMT	2004 Oct  1
+			-4:00	-	AMT	2004 Oct  4
 			-4:00	Brazil	AM%sT
 #
 # west Para (PA), Rondonia (RO)
@@ -942,16 +942,9 @@ Rule	Para	1998	2001	-	Mar	Sun>=1	0:00	0	-
 # A decree was issued in Paraguay (no. 16350) on 2002-02-26 that changed the
 # dst method to be from the first Sunday in September to the first Sunday in
 # April.
-Rule	Para	2002	2004	-	Apr	Sun>=1	0:00	0	-
-Rule	Para	2002	2003	-	Sep	Sun>=1	0:00	1:00	S
-#
-# From Jesper Norgaard Welen (2005-01-02):
-# There are several sources that claim that Paraguay made
-# a timezone rule change in autumn 2004.
-# From Steffen Thorsen (2005-01-05):
-# Decree 1,867 (2004-03-05) <http://www.labor.com.py/noticias.asp?id=27>
-Rule	Para	2004	max	-	Oct	Sun>=15	0:00	1:00	S
-Rule	Para	2005	max	-	Mar	Sun>=8	0:00	0	-
+Rule	Para	2002	max	-	Apr	Sun>=1	0:00	0	-
+Rule	Para	2002	max	-	Sep	Sun>=1	0:00	1:00	S
+
 
 # Zone	NAME		GMTOFF	RULES	FORMAT	[UNTIL]
 Zone America/Asuncion	-3:50:40 -	LMT	1890
diff --git a/timezone/tzfile.h b/timezone/tzfile.h
index 0e9966a950..0921c3c339 100644
--- a/timezone/tzfile.h
+++ b/timezone/tzfile.h
@@ -21,7 +21,7 @@
 
 #ifndef lint
 #ifndef NOID
-static char	tzfilehid[] = "@(#)tzfile.h	7.16";
+static char	tzfilehid[] = "@(#)tzfile.h	7.14";
 #endif /* !defined NOID */
 #endif /* !defined lint */
 
@@ -156,21 +156,12 @@ struct tzhead {
 #define EPOCH_YEAR	1970
 #define EPOCH_WDAY	TM_THURSDAY
 
-#define isleap(y) (((y) % 4) == 0 && (((y) % 100) != 0 || ((y) % 400) == 0))
-
 /*
-** Since everything in isleap is modulo 400 (or a factor of 400), we know that
-**	isleap(y) == isleap(y % 400)
-** and so
-**	isleap(a + b) == isleap((a + b) % 400)
-** or
-**	isleap(a + b) == isleap(a % 400 + b % 400)
-** This is true even if % means modulo rather than Fortran remainder
-** (which is allowed by C89 but not C99).
-** We use this to avoid addition overflow problems.
+** Accurate only for the past couple of centuries;
+** that will probably do.
 */
 
-#define isleap_sum(a, b)	isleap((a) % 400 + (b) % 400)
+#define isleap(y) (((y) % 4) == 0 && (((y) % 100) != 0 || ((y) % 400) == 0))
 
 #ifndef USG
 
diff --git a/timezone/zdump.c b/timezone/zdump.c
index bd7132698f..20bb916822 100644
--- a/timezone/zdump.c
+++ b/timezone/zdump.c
@@ -1,4 +1,4 @@
-static char	elsieid[] = "@(#)zdump.c	7.61";
+static char	elsieid[] = "@(#)zdump.c	7.40";
 
 /*
 ** This code has been made independent of the rest of the time
@@ -11,15 +11,6 @@ static char	elsieid[] = "@(#)zdump.c	7.61";
 #include "sys/types.h"	/* for time_t */
 #include "time.h"	/* for struct tm */
 #include "stdlib.h"	/* for exit, malloc, atoi */
-#include "float.h"	/* for FLT_MAX and DBL_MAX */
-
-#ifndef ZDUMP_LO_YEAR
-#define ZDUMP_LO_YEAR	(-500)
-#endif /* !defined ZDUMP_LO_YEAR */
-
-#ifndef ZDUMP_HI_YEAR
-#define ZDUMP_HI_YEAR	2500
-#endif /* !defined ZDUMP_HI_YEAR */
 
 #ifndef MAX_STRING_LENGTH
 #define MAX_STRING_LENGTH	1024
@@ -70,20 +61,9 @@ static char	elsieid[] = "@(#)zdump.c	7.61";
 #endif /* !defined DAYSPERNYEAR */
 
 #ifndef isleap
-#define isleap(y) (((y) % 4) == 0 && (((y) % 100) != 0 || ((y) % 400) == 0))
+#define isleap(y) ((((y) % 4) == 0 && ((y) % 100) != 0) || ((y) % 400) == 0)
 #endif /* !defined isleap */
 
-#ifndef isleap_sum
-/*
-** See tzfile.h for details on isleap_sum.
-*/
-#define isleap_sum(a, b)	isleap((a) % 400 + (b) % 400)
-#endif /* !defined isleap_sum */
-
-#define SECSPERDAY	((long) SECSPERHOUR * HOURSPERDAY)
-#define SECSPERNYEAR	(SECSPERDAY * DAYSPERNYEAR)
-#define SECSPERLYEAR	(SECSPERNYEAR + SECSPERDAY)
-
 #if HAVE_GETTEXT
 #include "locale.h"	/* for setlocale */
 #include "libintl.h"
@@ -135,60 +115,18 @@ static char	elsieid[] = "@(#)zdump.c	7.61";
 
 extern char **	environ;
 extern int	getopt P((int argc, char * const argv[],
-			const char * options));
+			  const char * options));
 extern char *	optarg;
 extern int	optind;
 extern char *	tzname[2];
 
-static time_t	absolute_min_time;
-static time_t	absolute_max_time;
-static size_t	longest;
-static char *	progname;
-
 static char *	abbr P((struct tm * tmp));
 static long	delta P((struct tm * newp, struct tm * oldp));
-static void	dumptime P((const struct tm * tmp));
 static time_t	hunt P((char * name, time_t lot, time_t	hit));
-static void	setabsolutes P((void));
+static size_t	longest;
+static char *	progname;
 static void	show P((char * zone, time_t t, int v));
-static const char *	tformat P((void));
-static time_t	yeartot P((long y));
-
-#ifndef TYPECHECK
-#define my_localtime	localtime
-#else /* !defined TYPECHECK */
-static struct tm *
-my_localtime(tp)
-time_t *	tp;
-{
-	register struct tm *	tmp;
-
-	tmp = localtime(tp);
-	if (tp != NULL && tmp != NULL) {
-		struct tm	tm;
-		register time_t	t;
-
-		tm = *tmp;
-		t = mktime(&tm);
-		if (t - *tp >= 1 || *tp - t >= 1) {
-			(void) fflush(stdout);
-			(void) fprintf(stderr, "\n%s: ", progname);
-			(void) fprintf(stderr, tformat(), *tp);
-			(void) fprintf(stderr, " ->");
-			(void) fprintf(stderr, " sec %d", tmp->tm_sec);
-			(void) fprintf(stderr, " min %d", tmp->tm_min);
-			(void) fprintf(stderr, " hour %d", tmp->tm_hour);
-			(void) fprintf(stderr, " mday %d", tmp->tm_mday);
-			(void) fprintf(stderr, " mon %d", tmp->tm_mon);
-			(void) fprintf(stderr, " year %d", tmp->tm_year);
-			(void) fprintf(stderr, " -> ");
-			(void) fprintf(stderr, tformat(), t);
-			(void) fprintf(stderr, "\n");
-		}
-	}
-	return tmp;
-}
-#endif /* !defined TYPECHECK */
+static void	dumptime P((const struct tm * tmp));
 
 int
 main(argc, argv)
@@ -198,22 +136,18 @@ char *	argv[];
 	register int		i;
 	register int		c;
 	register int		vflag;
-	register char *		cutarg;
-	register long		cutloyear = ZDUMP_LO_YEAR;
-	register long		cuthiyear = ZDUMP_HI_YEAR;
-	register time_t		cutlotime;
-	register time_t		cuthitime;
-	register char **	fakeenv;
+	register char *		cutoff;
+	register int		cutyear;
+	register long		cuttime;
+	char **			fakeenv;
 	time_t			now;
 	time_t			t;
 	time_t			newt;
+	time_t			hibit;
 	struct tm		tm;
 	struct tm		newtm;
-	register struct tm *	tmp;
-	register struct tm *	newtmp;
 
-	INITIALIZE(cutlotime);
-	INITIALIZE(cuthitime);
+	INITIALIZE(cuttime);
 #if HAVE_GETTEXT
 	(void) setlocale(LC_MESSAGES, "");
 #ifdef TZ_DOMAINDIR
@@ -228,50 +162,39 @@ char *	argv[];
 			(void) exit(EXIT_SUCCESS);
 		}
 	vflag = 0;
-	cutarg = NULL;
+	cutoff = NULL;
 	while ((c = getopt(argc, argv, "c:v")) == 'c' || c == 'v')
 		if (c == 'v')
 			vflag = 1;
-		else	cutarg = optarg;
+		else	cutoff = optarg;
 	if ((c != EOF && c != -1) ||
 		(optind == argc - 1 && strcmp(argv[optind], "=") == 0)) {
 			(void) fprintf(stderr,
-_("%s: usage is %s [ --version ] [ -v ] [ -c [loyear,]hiyear ] zonename ...\n"),
-				progname, progname);
+_("%s: usage is %s [ --version ] [ -v ] [ -c cutoff ] zonename ...\n"),
+				argv[0], argv[0]);
 			(void) exit(EXIT_FAILURE);
 	}
-	if (vflag) {
-		if (cutarg != NULL) {
-			long	lo;
-			long	hi;
-			char	dummy;
-
-			if (sscanf(cutarg, "%ld%c", &hi, &dummy) == 1) {
-				cuthiyear = hi;
-			} else if (sscanf(cutarg, "%ld,%ld%c",
-				&lo, &hi, &dummy) == 2) {
-					cutloyear = lo;
-					cuthiyear = hi;
-			} else {
-(void) fprintf(stderr, _("%s: wild -c argument %s\n"),
-					progname, cutarg);
-				(void) exit(EXIT_FAILURE);
-			}
-		}
-		setabsolutes();
-		cutlotime = yeartot(cutloyear);
-		cuthitime = yeartot(cuthiyear);
+	if (cutoff != NULL) {
+		int	y;
+
+		cutyear = atoi(cutoff);
+		cuttime = 0;
+		for (y = EPOCH_YEAR; y < cutyear; ++y)
+			cuttime += DAYSPERNYEAR + isleap(y);
+		cuttime *= SECSPERHOUR * HOURSPERDAY;
 	}
 	(void) time(&now);
 	longest = 0;
 	for (i = optind; i < argc; ++i)
 		if (strlen(argv[i]) > longest)
 			longest = strlen(argv[i]);
+	for (hibit = 1; (hibit << 1) != 0; hibit <<= 1)
+		continue;
 	{
 		register int	from;
 		register int	to;
 
-		for (i = 0; environ[i] != NULL; ++i)
+		for (i = 0;  environ[i] != NULL;  ++i)
 			continue;
 		fakeenv = (char **) malloc((size_t) ((i + 2) *
 			sizeof *fakeenv));
@@ -296,129 +219,58 @@ _("%s: usage is %s [ --version ] [ -v ] [ -c [loyear,]hiyear ] zonename ...\n"),
 			show(argv[i], now, FALSE);
 			continue;
 		}
-		t = absolute_min_time;
+		/*
+		** Get lowest value of t.
+		*/
+		t = hibit;
+		if (t > 0)		/* time_t is unsigned */
+			t = 0;
 		show(argv[i], t, TRUE);
 		t += SECSPERHOUR * HOURSPERDAY;
 		show(argv[i], t, TRUE);
-		if (t < cutlotime)
-			t = cutlotime;
-		tmp = my_localtime(&t);
-		if (tmp != NULL) {
-			tm = *tmp;
-			(void) strncpy(buf, abbr(&tm), (sizeof buf) - 1);
-		}
+		tm = *localtime(&t);
+		(void) strncpy(buf, abbr(&tm), (sizeof buf) - 1);
 		for ( ; ; ) {
-			if (t >= cuthitime)
+			if (cutoff != NULL && t >= cuttime)
 				break;
 			newt = t + SECSPERHOUR * 12;
-			if (newt >= cuthitime)
+			if (cutoff != NULL && newt >= cuttime)
 				break;
 			if (newt <= t)
 				break;
-			newtmp = localtime(&newt);
-			if (newtmp != NULL)
-				newtm = *newtmp;
-			if ((tmp == NULL || newtmp == NULL) ?  (tmp != newtmp) :
-				(delta(&newtm, &tm) != (newt - t) ||
+			newtm = *localtime(&newt);
+			if (delta(&newtm, &tm) != (newt - t) ||
 				newtm.tm_isdst != tm.tm_isdst ||
-				strcmp(abbr(&newtm), buf) != 0)) {
+				strcmp(abbr(&newtm), buf) != 0) {
 					newt = hunt(argv[i], t, newt);
-					newtmp = localtime(&newt);
-					if (newtmp != NULL) {
-						newtm = *newtmp;
-						(void) strncpy(buf,
-							abbr(&newtm),
-							(sizeof buf) - 1);
-					}
+					newtm = *localtime(&newt);
+					(void) strncpy(buf, abbr(&newtm),
+						(sizeof buf) - 1);
 			}
 			t = newt;
 			tm = newtm;
-			tmp = newtmp;
 		}
-		t = absolute_max_time;
+		/*
+		** Get highest value of t.
+		*/
+		t = ~((time_t) 0);
+		if (t < 0)		/* time_t is signed */
+			t &= ~hibit;
 		t -= SECSPERHOUR * HOURSPERDAY;
 		show(argv[i], t, TRUE);
 		t += SECSPERHOUR * HOURSPERDAY;
 		show(argv[i], t, TRUE);
 	}
 	if (fflush(stdout) || ferror(stdout)) {
-		(void) fprintf(stderr, "%s: ", progname);
+		(void) fprintf(stderr, "%s: ", argv[0]);
 		(void) perror(_("Error writing standard output"));
 		(void) exit(EXIT_FAILURE);
 	}
 	exit(EXIT_SUCCESS);
-	/* If exit fails to exit... */
-	return EXIT_FAILURE;
-}
-
-static void
-setabsolutes()
-{
-	if (0.5 == (time_t) 0.5) {
-		/*
-		** time_t is floating.
-		*/
-		if (sizeof (time_t) == sizeof (float)) {
-			absolute_min_time = (time_t) -FLT_MAX;
-			absolute_max_time = (time_t) FLT_MAX;
-		} else if (sizeof (time_t) == sizeof (double)) {
-			absolute_min_time = (time_t) -DBL_MAX;
-			absolute_max_time = (time_t) DBL_MAX;
-		} else {
-			(void) fprintf(stderr,
-_("%s: use of -v on system with floating time_t other than float or double\n"),
-				progname);
-			(void) exit(EXIT_FAILURE);
-		}
-	} else if (0 > (time_t) -1) {
-		/*
-		** time_t is signed.
-		*/
-		register time_t	hibit;
 
-		for (hibit = 1; (hibit * 2) != 0; hibit *= 2)
-			continue;
-		absolute_min_time = hibit;
-		absolute_max_time = -(hibit + 1);
-	} else {
-		/*
-		** time_t is unsigned.
-		*/
-		absolute_min_time = 0;
-		absolute_max_time = absolute_min_time - 1;
-	}
-}
-
-static time_t
-yeartot(y)
-const long	y;
-{
-	register long	myy;
-	register long	seconds;
-	register time_t	t;
-
-	myy = EPOCH_YEAR;
-	t = 0;
-	while (myy != y) {
-		if (myy < y) {
-			seconds = isleap(myy) ? SECSPERLYEAR : SECSPERNYEAR;
-			++myy;
-			if (t > absolute_max_time - seconds) {
-				t = absolute_max_time;
-				break;
-			}
-			t += seconds;
-		} else {
-			--myy;
-			seconds = isleap(myy) ? SECSPERLYEAR : SECSPERNYEAR;
-			if (t < absolute_min_time + seconds) {
-				t = absolute_min_time;
-				break;
-			}
-			t -= seconds;
-		}
-	}
-	return t;
+	/* gcc -Wall pacifier */
+	for ( ; ; )
+		continue;
 }
 
 static time_t
@@ -427,39 +279,25 @@ char *	name;
 time_t	lot;
 time_t	hit;
 {
-	time_t			t;
-	long			diff;
-	struct tm		lotm;
-	register struct tm *	lotmp;
-	struct tm		tm;
-	register struct tm *	tmp;
-	char			loab[MAX_STRING_LENGTH];
-
-	lotmp = my_localtime(&lot);
-	if (lotmp != NULL) {
-		lotm = *lotmp;
-		(void) strncpy(loab, abbr(&lotm), (sizeof loab) - 1);
-	}
-	for ( ; ; ) {
-		diff = (long) (hit - lot);
-		if (diff < 2)
-			break;
-		t = lot;
-		t += diff / 2;
+	time_t		t;
+	struct tm	lotm;
+	struct tm	tm;
+	static char	loab[MAX_STRING_LENGTH];
+
+	lotm = *localtime(&lot);
+	(void) strncpy(loab, abbr(&lotm), (sizeof loab) - 1);
+	while ((hit - lot) >= 2) {
+		t = lot / 2 + hit / 2;
 		if (t <= lot)
 			++t;
 		else if (t >= hit)
 			--t;
-		tmp = my_localtime(&t);
-		if (tmp != NULL)
-			tm = *tmp;
-		if ((lotmp == NULL || tmp == NULL) ? (lotmp == tmp) :
-			(delta(&tm, &lotm) == (t - lot) &&
+		tm = *localtime(&t);
+		if (delta(&tm, &lotm) == (t - lot) &&
 			tm.tm_isdst == lotm.tm_isdst &&
-			strcmp(abbr(&tm), loab) == 0)) {
+			strcmp(abbr(&tm), loab) == 0) {
 				lot = t;
 				lotm = tm;
-				lotmp = tmp;
 		} else	hit = t;
 	}
 	show(name, lot, TRUE);
@@ -476,14 +314,14 @@ delta(newp, oldp)
 struct tm *	newp;
 struct tm *	oldp;
 {
-	register long	result;
-	register int	tmy;
+	long	result;
+	int	tmy;
 
 	if (newp->tm_year < oldp->tm_year)
 		return -delta(oldp, newp);
 	result = 0;
 	for (tmy = oldp->tm_year; tmy < newp->tm_year; ++tmy)
-		result += DAYSPERNYEAR + isleap_sum(tmy, TM_YEAR_BASE);
+		result += DAYSPERNYEAR + isleap(tmy + (long) TM_YEAR_BASE);
 	result += newp->tm_yday - oldp->tm_yday;
 	result *= HOURSPERDAY;
 	result += newp->tm_hour - oldp->tm_hour;
@@ -500,30 +338,22 @@ char *	zone;
 time_t	t;
 int	v;
 {
-	register struct tm *	tmp;
+	struct tm *	tmp;
 
 	(void) printf("%-*s  ", (int) longest, zone);
 	if (v) {
-		tmp = gmtime(&t);
-		if (tmp == NULL) {
-			(void) printf(tformat(), t);
-		} else {
-			dumptime(tmp);
-			(void) printf(" UTC");
-		}
-		(void) printf(" = ");
+		dumptime(gmtime(&t));
+		(void) printf(" UTC = ");
 	}
-	tmp = my_localtime(&t);
+	tmp = localtime(&t);
 	dumptime(tmp);
-	if (tmp != NULL) {
-		if (*abbr(tmp) != '\0')
-			(void) printf(" %s", abbr(tmp));
-		if (v) {
-			(void) printf(" isdst=%d", tmp->tm_isdst);
+	if (*abbr(tmp) != '\0')
+		(void) printf(" %s", abbr(tmp));
+	if (v) {
+		(void) printf(" isdst=%d", tmp->tm_isdst);
 #ifdef TM_GMTOFF
-			(void) printf(" gmtoff=%ld", tmp->TM_GMTOFF);
+		(void) printf(" gmtoff=%ld", tmp->TM_GMTOFF);
 #endif /* defined TM_GMTOFF */
-		}
 	}
 	(void) printf("\n");
 }
@@ -541,33 +371,6 @@ struct tm *	tmp;
 	return (result == NULL) ? &nada : result;
 }
 
-/*
-** The code below can fail on certain theoretical systems;
-** it works on all known real-world systems as of 2004-12-30.
-*/
-
-static const char *
-tformat()
-{
-	if (0.5 == (time_t) 0.5) {	/* floating */
-		if (sizeof (time_t) > sizeof (double))
-			return "%Lg";
-		return "%g";
-	}
-	if (0 > (time_t) -1) {		/* signed */
-		if (sizeof (time_t) > sizeof (long))
-			return "%lld";
-		if (sizeof (time_t) > sizeof (int))
-			return "%ld";
-		return "%d";
-	}
-	if (sizeof (time_t) > sizeof (unsigned long))
-		return "%llu";
-	if (sizeof (time_t) > sizeof (unsigned int))
-		return "%lu";
-	return "%u";
-}
-
 static void
 dumptime(timeptr)
 register const struct tm *	timeptr;
@@ -581,13 +384,7 @@ register const struct tm *	timeptr;
 	};
 	register const char *	wn;
 	register const char *	mn;
-	register int		lead;
-	register int		trail;
 
-	if (timeptr == NULL) {
-		(void) printf("NULL");
-		return;
-	}
 	/*
 	** The packaged versions of localtime and gmtime never put out-of-range
 	** values in tm_wday or tm_mon, but since this code might be compiled
@@ -601,23 +398,9 @@ register const struct tm *	timeptr;
 		(int) (sizeof mon_name / sizeof mon_name[0]))
 			mn = "???";
 	else		mn = mon_name[timeptr->tm_mon];
-	(void) printf("%.3s %.3s%3d %.2d:%.2d:%.2d ",
+	(void) printf("%.3s %.3s%3d %.2d:%.2d:%.2d %ld",
 		wn, mn,
 		timeptr->tm_mday, timeptr->tm_hour,
-		timeptr->tm_min, timeptr->tm_sec);
-#define DIVISOR	10
-	trail = timeptr->tm_year % DIVISOR + TM_YEAR_BASE % DIVISOR;
-	lead = timeptr->tm_year / DIVISOR + TM_YEAR_BASE / DIVISOR +
-		trail / DIVISOR;
-	trail %= DIVISOR;
-	if (trail < 0 && lead > 0) {
-		trail += DIVISOR;
-		--lead;
-	} else if (lead < 0 && trail > 0) {
-		trail -= DIVISOR;
-		++lead;
-	}
-	if (lead == 0)
-		(void) printf("%d", trail);
-	else	(void) printf("%d%d", lead, ((trail < 0) ? -trail : trail));
+		timeptr->tm_min, timeptr->tm_sec,
+		timeptr->tm_year + (long) TM_YEAR_BASE);
 }
diff --git a/timezone/zic.c b/timezone/zic.c
index d855475f9f..9bb8662e55 100644
--- a/timezone/zic.c
+++ b/timezone/zic.c
@@ -1,10 +1,4 @@
-static char	elsieid[] = "@(#)zic.c	7.118";
-
-/*
-** Regardless of the type of time_t, we do our work using this type.
-*/
-
-typedef int	zic_t;
+static char	elsieid[] = "@(#)zic.c	7.116";
 
 #include "private.h"
 #include "locale.h"
@@ -56,7 +50,7 @@ struct rule {
 	const char *	r_abbrvar;	/* variable part of abbreviation */
 
 	int		r_todo;		/* a rule to do (used in outzone) */
-	zic_t		r_temp;		/* used in outzone */
+	time_t		r_temp;		/* used in outzone */
 };
 
 /*
@@ -82,7 +76,7 @@ struct zone {
 	int		z_nrules;
 
 	struct rule	z_untilrule;
-	zic_t		z_untiltime;
+	time_t		z_untiltime;
 };
 
 extern int	getopt P((int argc, char * const argv[],
@@ -91,10 +85,10 @@ extern int	link P((const char * fromname, const char * toname));
 extern char *	optarg;
 extern int	optind;
 
-static void	addtt P((zic_t starttime, int type));
+static void	addtt P((time_t starttime, int type));
 static int	addtype P((long gmtoff, const char * abbr, int isdst,
 				int ttisstd, int ttisgmt));
-static void	leapadd P((zic_t t, int positive, int rolling, int count));
+static void	leapadd P((time_t t, int positive, int rolling, int count));
 static void	adjleap P((void));
 static void	associate P((void));
 static int	ciequal P((const char * ap, const char * bp));
@@ -127,13 +121,13 @@ static long	oadd P((long t1, long t2));
 static void	outzone P((const struct zone * zp, int ntzones));
 static void	puttzcode P((long code, FILE * fp));
 static int	rcomp P((const void * leftp, const void * rightp));
-static zic_t	rpytime P((const struct rule * rp, int wantedy));
+static time_t	rpytime P((const struct rule * rp, int wantedy));
 static void	rulesub P((struct rule * rp,
 			const char * loyearp, const char * hiyearp,
 			const char * typep, const char * monthp,
 			const char * dayp, const char * timep));
 static void	setboundaries P((void));
-static zic_t	tadd P((zic_t t1, long t2));
+static time_t	tadd P((time_t t1, long t2));
 static void	usage P((void));
 static void	writezone P((const char * name));
 static int	yearistype P((int year, const char * type));
@@ -147,10 +141,10 @@ static int		errors;
 static const char *	filename;
 static int		leapcnt;
 static int		linenum;
-static zic_t		max_time;
+static time_t		max_time;
 static int		max_year;
 static int		max_year_representable;
-static zic_t		min_time;
+static time_t		min_time;
 static int		min_year;
 static int		min_year_representable;
 static int		noise;
@@ -340,7 +334,7 @@ static const int	len_years[2] = {
 };
 
 static struct attype {
-	zic_t		at;
+	time_t		at;
 	unsigned char	type;
 }			attypes[TZ_MAX_TIMES];
 static long		gmtoffs[TZ_MAX_TYPES];
@@ -349,7 +343,7 @@ static unsigned char	abbrinds[TZ_MAX_TYPES];
 static char		ttisstds[TZ_MAX_TYPES];
 static char		ttisgmts[TZ_MAX_TYPES];
 static char		chars[TZ_MAX_CHARS];
-static zic_t		trans[TZ_MAX_LEAPS];
+static time_t		trans[TZ_MAX_LEAPS];
 static long		corr[TZ_MAX_LEAPS];
 static char		roll[TZ_MAX_LEAPS];
 
@@ -635,7 +629,7 @@ const char * const	tofile;
 		        register char * symlinkcontents = NULL;
 		        while ((s = strchr(s+1, '/')) != NULL)
 			        symlinkcontents = ecatalloc(symlinkcontents, "../");
-			symlinkcontents = ecatalloc(symlinkcontents, fromfile);
+			symlinkcontents = ecatalloc(symlinkcontents, fromname);
 
 			result = unlink(toname);
 			if (result != 0 && errno != ENOENT) {
@@ -682,36 +676,25 @@ warning(_("hard link failed, symbolic link used"));
 */
 
 #define MAX_BITS_IN_FILE	32
-#define TIME_T_BITS_IN_FILE	((TYPE_BIT(zic_t) < MAX_BITS_IN_FILE) ? \
-					TYPE_BIT(zic_t) : MAX_BITS_IN_FILE)
+#define TIME_T_BITS_IN_FILE	((TYPE_BIT(time_t) < MAX_BITS_IN_FILE) ? TYPE_BIT(time_t) : MAX_BITS_IN_FILE)
 
 static void
 setboundaries P((void))
 {
-	register int	i;
-
-	if (TYPE_SIGNED(zic_t)) {
-		min_time = -1;
-		for (i = 0; i < TIME_T_BITS_IN_FILE - 1; ++i)
-			min_time *= 2;
-		max_time = -(min_time + 1);
+	if (TYPE_SIGNED(time_t)) {
+		min_time = ~ (time_t) 0;
+		min_time <<= TIME_T_BITS_IN_FILE - 1;
+		max_time = ~ (time_t) 0 - min_time;
 		if (sflag)
 			min_time = 0;
 	} else {
 		min_time = 0;
 		max_time = 2 - sflag;
-		for (i = 0; i < TIME_T_BITS_IN_FILE - 1; ++i)
-			max_time *= 2;
+		max_time <<= TIME_T_BITS_IN_FILE - 1;
 		--max_time;
 	}
-	{
-		time_t	t;
-
-		t = (time_t) min_time;
-		min_year = TM_YEAR_BASE + gmtime(&t)->tm_year;
-		t = (time_t) max_time;
-		max_year = TM_YEAR_BASE + gmtime(&t)->tm_year;
-	}
+	min_year = TM_YEAR_BASE + gmtime(&min_time)->tm_year;
+	max_year = TM_YEAR_BASE + gmtime(&max_time)->tm_year;
 	min_year_representable = min_year;
 	max_year_representable = max_year;
 }
@@ -1137,7 +1120,7 @@ const int		nfields;
 	register int			i, j;
 	int				year, month, day;
 	long				dayoff, tod;
-	zic_t				t;
+	time_t				t;
 
 	if (nfields != LEAP_FIELDS) {
 		error(_("wrong number of fields on Leap line"));
@@ -1181,7 +1164,7 @@ const int		nfields;
 			return;
 	}
 	dayoff = oadd(dayoff, eitol(day - 1));
-	if (dayoff < 0 && !TYPE_SIGNED(zic_t)) {
+	if (dayoff < 0 && !TYPE_SIGNED(time_t)) {
 		error(_("time before zero"));
 		return;
 	}
@@ -1193,7 +1176,7 @@ const int		nfields;
 		error(_("time too large"));
 		return;
 	}
-	t = (zic_t) dayoff * SECSPERDAY;
+	t = (time_t) dayoff * SECSPERDAY;
 	tod = gethms(fields[LP_TIME], _("invalid time of day"), FALSE);
 	cp = fields[LP_CORR];
 	{
@@ -1455,7 +1438,7 @@ const char * const	name;
 	register int		i, j;
 	static char *		fullname;
 	static struct tzhead	tzh;
-	zic_t			ats[TZ_MAX_TIMES];
+	time_t			ats[TZ_MAX_TIMES];
 	unsigned char		types[TZ_MAX_TIMES];
 
 	/*
@@ -1620,7 +1603,7 @@ const int			zonecount;
 	register struct rule *		rp;
 	register int			i, j;
 	register int			usestart, useuntil;
-	register zic_t			starttime, untiltime;
+	register time_t			starttime, untiltime;
 	register long			gmtoff;
 	register long			stdoff;
 	register int			year;
@@ -1689,7 +1672,7 @@ const int			zonecount;
 			}
 			for ( ; ; ) {
 				register int	k;
-				register zic_t	jtime, ktime;
+				register time_t	jtime, ktime;
 				register long	offset;
 				char		buf[BUFSIZ];
 
@@ -1801,7 +1784,7 @@ error(_("can't determine time zone abbreviation to use just after until time"));
 
 static void
 addtt(starttime, type)
-const zic_t	starttime;
+const time_t	starttime;
 int		type;
 {
 	if (starttime <= min_time ||
@@ -1885,7 +1868,7 @@ const int		ttisgmt;
 
 static void
 leapadd(t, positive, rolling, count)
-const zic_t	t;
+const time_t	t;
 const int	positive;
 const int	rolling;
 int		count;
@@ -2073,12 +2056,12 @@ const long	t2;
 	return t;
 }
 
-static zic_t
+static time_t
 tadd(t1, t2)
-const zic_t	t1;
+const time_t	t1;
 const long	t2;
 {
-	register zic_t	t;
+	register time_t	t;
 
 	if (t1 == max_time && t2 > 0)
 		return max_time;
@@ -2097,14 +2080,14 @@ const long	t2;
 ** 1970, 00:00 LOCAL time - in that year that the rule refers to.
 */
 
-static zic_t
+static time_t
 rpytime(rp, wantedy)
 register const struct rule * const	rp;
 register const int			wantedy;
 {
 	register int	y, m, i;
 	register long	dayoff;			/* with a nod to Margaret O. */
-	register zic_t	t;
+	register time_t	t;
 
 	if (wantedy == INT_MIN)
 		return min_time;
@@ -2171,13 +2154,15 @@ register const int			wantedy;
 				warning(_("rule goes past start/end of month--will not work with pre-2004 versions of zic"));
 		}
 	}
-	if (dayoff < 0 && !TYPE_SIGNED(zic_t))
+	if (dayoff < 0 && !TYPE_SIGNED(time_t))
 		return min_time;
 	if (dayoff < min_time / SECSPERDAY)
 		return min_time;
 	if (dayoff > max_time / SECSPERDAY)
 		return max_time;
-	t = (zic_t) dayoff * SECSPERDAY;
+	t = (time_t) dayoff * SECSPERDAY;
+	if (t > 0 && max_time - t < rp->r_tod)
+		return max_time;
 	return tadd(t, rp->r_tod);
 }
 
diff --git a/version.h b/version.h
index 2c6a016515..4eeb1cceb3 100644
--- a/version.h
+++ b/version.h
@@ -1,4 +1,4 @@
 /* This file just defines the current version number of libc.  */
 
-#define RELEASE "development"
+#define RELEASE "stable"
 #define VERSION "2.3.4"
author	Roland McGrath <roland@gnu.org>	2005-02-16 12:31:10 +0000
committer	Roland McGrath <roland@gnu.org>	2005-02-16 12:31:10 +0000
commit	833861be818bb5d45ab0c47370b84068dfb2fedf (patch)
tree	2f1754a415c378f6b067f9158cc42df24d4641d2
parent	c397a0064061e28a00eea873669e59f3983db791 (diff)