117 files changed, 10189 insertions, 0 deletions
diff --git a/sysdeps/aarch64/Implies b/sysdeps/aarch64/Implies
new file mode 100644
index 0000000000..e5adf4d63c
--- /dev/null
+++ b/sysdeps/aarch64/Implies
@@ -0,0 +1,6 @@
+wordsize-64
+ieee754/ldbl-128
+ieee754/dbl-64/wordsize-64
+ieee754/dbl-64
+ieee754/flt-32
+aarch64/soft-fp
diff --git a/sysdeps/aarch64/Makefile b/sysdeps/aarch64/Makefile
new file mode 100644
index 0000000000..06323550e9
--- /dev/null
+++ b/sysdeps/aarch64/Makefile
@@ -0,0 +1,14 @@
+long-double-fcts = yes
+
+ifeq ($(subdir),debug)
+CFLAGS-backtrace.c += -funwind-tables
+endif
+
+ifeq ($(subdir),elf)
+sysdep-dl-routines += tlsdesc dl-tlsdesc
+gen-as-const-headers += dl-link.sym
+endif
+
+ifeq ($(subdir),csu)
+gen-as-const-headers += tlsdesc.sym
+endif
diff --git a/sysdeps/aarch64/Versions b/sysdeps/aarch64/Versions
new file mode 100644
index 0000000000..e1aa44f33d
--- /dev/null
+++ b/sysdeps/aarch64/Versions
@@ -0,0 +1,5 @@
+libc {
+  GLIBC_2.18 {
+    _mcount;
+  }
+}
diff --git a/sysdeps/aarch64/__longjmp.S b/sysdeps/aarch64/__longjmp.S
new file mode 100644
index 0000000000..5e0d3e0889
--- /dev/null
+++ b/sysdeps/aarch64/__longjmp.S
@@ -0,0 +1,116 @@
+/* Copyright (C) 1997-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <jmpbuf-offsets.h>
+#include <stap-probe.h>
+
+/* __longjmp(jmpbuf, val) */
+
+ENTRY (__longjmp)
+	cfi_def_cfa(x0, 0)
+	cfi_offset(x19, JB_X19<<3)
+	cfi_offset(x20, JB_X20<<3)
+	cfi_offset(x21, JB_X21<<3)
+	cfi_offset(x22, JB_X22<<3)
+	cfi_offset(x23, JB_X23<<3)
+	cfi_offset(x24, JB_X24<<3)
+	cfi_offset(x25, JB_X25<<3)
+	cfi_offset(x26, JB_X26<<3)
+	cfi_offset(x27, JB_X27<<3)
+	cfi_offset(x28, JB_X28<<3)
+	cfi_offset(x29, JB_X29<<3)
+	cfi_offset(x30, JB_LR<<3)
+
+	cfi_offset( d8, JB_D8<<3)
+	cfi_offset( d9, JB_D9<<3)
+	cfi_offset(d10, JB_D10<<3)
+	cfi_offset(d11, JB_D11<<3)
+	cfi_offset(d12, JB_D12<<3)
+	cfi_offset(d13, JB_D13<<3)
+	cfi_offset(d14, JB_D14<<3)
+	cfi_offset(d15, JB_D15<<3)
+
+	ldp	x19, x20, [x0, #JB_X19<<3]
+	ldp	x21, x22, [x0, #JB_X21<<3]
+	ldp	x23, x24, [x0, #JB_X23<<3]
+	ldp	x25, x26, [x0, #JB_X25<<3]
+	ldp	x27, x28, [x0, #JB_X27<<3]
+#ifdef PTR_DEMANGLE
+	ldp	x29,  x4, [x0, #JB_X29<<3]
+	PTR_DEMANGLE (x30, x4, x3, x2)
+#else
+	ldp	x29, x30, [x0, #JB_X29<<3]
+#endif
+	/* longjmp probe takes 3 arguments, address of jump buffer as
+	   first argument (8@x0), return value as second argument (-4@x1),
+	   and target address (8@x30), respectively.  */
+	LIBC_PROBE (longjmp, 3, 8@x0, -4@x1, 8@x30)
+	ldp	 d8,  d9, [x0, #JB_D8<<3]
+	ldp	d10, d11, [x0, #JB_D10<<3]
+	ldp	d12, d13, [x0, #JB_D12<<3]
+	ldp	d14, d15, [x0, #JB_D14<<3]
+
+        /* Originally this was implemented with a series of
+	   .cfi_restore() directives.
+
+           The theory was that cfi_restore should revert to previous
+           frame value is the same as the current value.  In practice
+           this doesn't work, even after cfi_restore() gdb continues
+           to try to recover a previous frame value offset from x0,
+           which gets stuffed after a few more instructions.  The
+           cfi_same_value() mechanism appears to work fine.  */
+
+	cfi_same_value(x19)
+	cfi_same_value(x20)
+	cfi_same_value(x21)
+	cfi_same_value(x22)
+	cfi_same_value(x23)
+	cfi_same_value(x24)
+	cfi_same_value(x25)
+	cfi_same_value(x26)
+	cfi_same_value(x27)
+	cfi_same_value(x28)
+	cfi_same_value(x29)
+	cfi_same_value(x30)
+	cfi_same_value(d8)
+	cfi_same_value(d9)
+	cfi_same_value(d10)
+	cfi_same_value(d11)
+	cfi_same_value(d12)
+	cfi_same_value(d13)
+	cfi_same_value(d14)
+	cfi_same_value(d15)
+#ifdef PTR_DEMANGLE
+	ldr	x4, [x0, #JB_SP<<3]
+	PTR_DEMANGLE (x5, x4, x3, x2)
+#else
+	ldr	x5, [x0, #JB_SP<<3]
+#endif
+	mov	sp, x5
+
+	/* longjmp_target probe takes 3 arguments, address of jump buffer
+	   as first argument (8@x0), return value as second argument (-4@x1),
+	   and target address (8@x30), respectively.  */
+	LIBC_PROBE (longjmp_target, 3, 8@x0, -4@x1, 8@x30)
+	cmp	x1, #0
+	mov	x0, #1
+	csel	x0, x1, x0, ne
+	/* Use br instead of ret because ret is guaranteed to mispredict */
+	br	x30
+END (__longjmp)
diff --git a/sysdeps/aarch64/abort-instr.h b/sysdeps/aarch64/abort-instr.h
new file mode 100644
index 0000000000..8b1c40b57d
--- /dev/null
+++ b/sysdeps/aarch64/abort-instr.h
@@ -0,0 +1 @@
+#define ABORT_INSTRUCTION asm ("brk\t#1000")
diff --git a/sysdeps/aarch64/backtrace.c b/sysdeps/aarch64/backtrace.c
new file mode 100644
index 0000000000..27ce597b39
--- /dev/null
+++ b/sysdeps/aarch64/backtrace.c
@@ -0,0 +1 @@
+#include <sysdeps/x86_64/backtrace.c>
diff --git a/sysdeps/aarch64/bits/atomic.h b/sysdeps/aarch64/bits/atomic.h
new file mode 100644
index 0000000000..fdd5eaa517
--- /dev/null
+++ b/sysdeps/aarch64/bits/atomic.h
@@ -0,0 +1,173 @@
+/* Copyright (C) 2003-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _AARCH64_BITS_ATOMIC_H
+#define _AARCH64_BITS_ATOMIC_H	1
+
+#include <stdint.h>
+
+typedef int8_t  atomic8_t;
+typedef int16_t atomic16_t;
+typedef int32_t atomic32_t;
+typedef int64_t atomic64_t;
+
+typedef uint8_t  uatomic8_t;
+typedef uint16_t uatomic16_t;
+typedef uint32_t uatomic32_t;
+typedef uint64_t uatomic64_t;
+
+typedef intptr_t atomicptr_t;
+typedef uintptr_t uatomicptr_t;
+typedef intmax_t atomic_max_t;
+typedef uintmax_t uatomic_max_t;
+
+#define __HAVE_64B_ATOMICS 1
+#define USE_ATOMIC_COMPILER_BUILTINS 1
+
+/* Compare and exchange.
+   For all "bool" routines, we return FALSE if exchange succesful.  */
+
+# define __arch_compare_and_exchange_bool_8_int(mem, newval, oldval, model) \
+  ({									\
+    typeof (*mem) __oldval = (oldval);					\
+    !__atomic_compare_exchange_n (mem, (void *) &__oldval, newval, 0,	\
+				  model, __ATOMIC_RELAXED);		\
+  })
+
+# define __arch_compare_and_exchange_bool_16_int(mem, newval, oldval, model) \
+  ({									\
+    typeof (*mem) __oldval = (oldval);					\
+    !__atomic_compare_exchange_n (mem, (void *) &__oldval, newval, 0,	\
+				  model, __ATOMIC_RELAXED);		\
+  })
+
+# define __arch_compare_and_exchange_bool_32_int(mem, newval, oldval, model) \
+  ({									\
+    typeof (*mem) __oldval = (oldval);					\
+    !__atomic_compare_exchange_n (mem, (void *) &__oldval, newval, 0,	\
+				  model, __ATOMIC_RELAXED);		\
+  })
+
+#  define __arch_compare_and_exchange_bool_64_int(mem, newval, oldval, model) \
+  ({									\
+    typeof (*mem) __oldval = (oldval);					\
+    !__atomic_compare_exchange_n (mem, (void *) &__oldval, newval, 0,	\
+				  model, __ATOMIC_RELAXED);		\
+  })
+
+# define __arch_compare_and_exchange_val_8_int(mem, newval, oldval, model) \
+  ({									\
+    typeof (*mem) __oldval = (oldval);					\
+    __atomic_compare_exchange_n (mem, (void *) &__oldval, newval, 0,	\
+				 model, __ATOMIC_RELAXED);		\
+    __oldval;								\
+  })
+
+# define __arch_compare_and_exchange_val_16_int(mem, newval, oldval, model) \
+  ({									\
+    typeof (*mem) __oldval = (oldval);					\
+    __atomic_compare_exchange_n (mem, (void *) &__oldval, newval, 0,	\
+				 model, __ATOMIC_RELAXED);		\
+    __oldval;								\
+  })
+
+# define __arch_compare_and_exchange_val_32_int(mem, newval, oldval, model) \
+  ({									\
+    typeof (*mem) __oldval = (oldval);					\
+    __atomic_compare_exchange_n (mem, (void *) &__oldval, newval, 0,	\
+				 model, __ATOMIC_RELAXED);		\
+    __oldval;								\
+  })
+
+#  define __arch_compare_and_exchange_val_64_int(mem, newval, oldval, model) \
+  ({									\
+    typeof (*mem) __oldval = (oldval);					\
+    __atomic_compare_exchange_n (mem, (void *) &__oldval, newval, 0,	\
+				 model, __ATOMIC_RELAXED);		\
+    __oldval;								\
+  })
+
+
+/* Compare and exchange with "acquire" semantics, ie barrier after.  */
+
+# define atomic_compare_and_exchange_bool_acq(mem, new, old)	\
+  __atomic_bool_bysize (__arch_compare_and_exchange_bool, int,	\
+			mem, new, old, __ATOMIC_ACQUIRE)
+
+# define atomic_compare_and_exchange_val_acq(mem, new, old)	\
+  __atomic_val_bysize (__arch_compare_and_exchange_val, int,	\
+		       mem, new, old, __ATOMIC_ACQUIRE)
+
+/* Compare and exchange with "release" semantics, ie barrier before.  */
+
+# define atomic_compare_and_exchange_bool_rel(mem, new, old)	\
+  __atomic_bool_bysize (__arch_compare_and_exchange_bool, int,	\
+			mem, new, old, __ATOMIC_RELEASE)
+
+# define atomic_compare_and_exchange_val_rel(mem, new, old)	 \
+  __atomic_val_bysize (__arch_compare_and_exchange_val, int,    \
+                       mem, new, old, __ATOMIC_RELEASE)
+
+
+/* Atomic exchange (without compare).  */
+
+# define __arch_exchange_8_int(mem, newval, model)	\
+  __atomic_exchange_n (mem, newval, model)
+
+# define __arch_exchange_16_int(mem, newval, model)	\
+  __atomic_exchange_n (mem, newval, model)
+
+# define __arch_exchange_32_int(mem, newval, model)	\
+  __atomic_exchange_n (mem, newval, model)
+
+#  define __arch_exchange_64_int(mem, newval, model)	\
+  __atomic_exchange_n (mem, newval, model)
+
+# define atomic_exchange_acq(mem, value)				\
+  __atomic_val_bysize (__arch_exchange, int, mem, value, __ATOMIC_ACQUIRE)
+
+# define atomic_exchange_rel(mem, value)				\
+  __atomic_val_bysize (__arch_exchange, int, mem, value, __ATOMIC_RELEASE)
+
+
+/* Atomically add value and return the previous (unincremented) value.  */
+
+# define __arch_exchange_and_add_8_int(mem, value, model)	\
+  __atomic_fetch_add (mem, value, model)
+
+# define __arch_exchange_and_add_16_int(mem, value, model)	\
+  __atomic_fetch_add (mem, value, model)
+
+# define __arch_exchange_and_add_32_int(mem, value, model)	\
+  __atomic_fetch_add (mem, value, model)
+
+#  define __arch_exchange_and_add_64_int(mem, value, model)	\
+  __atomic_fetch_add (mem, value, model)
+
+# define atomic_exchange_and_add_acq(mem, value)			\
+  __atomic_val_bysize (__arch_exchange_and_add, int, mem, value,	\
+		       __ATOMIC_ACQUIRE)
+
+# define atomic_exchange_and_add_rel(mem, value)			\
+  __atomic_val_bysize (__arch_exchange_and_add, int, mem, value,	\
+		       __ATOMIC_RELEASE)
+
+/* Barrier macro. */
+#define atomic_full_barrier() __sync_synchronize()
+
+#endif
diff --git a/sysdeps/aarch64/bits/endian.h b/sysdeps/aarch64/bits/endian.h
new file mode 100644
index 0000000000..33d09f7fa1
--- /dev/null
+++ b/sysdeps/aarch64/bits/endian.h
@@ -0,0 +1,30 @@
+/* Copyright (C) 1997-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _ENDIAN_H
+# error "Never use <bits/endian.h> directly; include <endian.h> instead."
+#endif
+
+/* AArch64 can be either big or little endian.  */
+#ifdef __AARCH64EB__
+# define __BYTE_ORDER __BIG_ENDIAN
+#else
+# define __BYTE_ORDER __LITTLE_ENDIAN
+#endif
+
+#define __FLOAT_WORD_ORDER __BYTE_ORDER
diff --git a/sysdeps/aarch64/bits/fenv.h b/sysdeps/aarch64/bits/fenv.h
new file mode 100644
index 0000000000..2ee36ab827
--- /dev/null
+++ b/sysdeps/aarch64/bits/fenv.h
@@ -0,0 +1,74 @@
+/* Copyright (C) 2004-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _FENV_H
+# error "Never use <bits/fenv.h> directly; include <fenv.h> instead."
+#endif
+
+/* Define bits representing exceptions in the FPSR status word.  */
+enum
+  {
+    FE_INVALID =
+#define FE_INVALID	1
+      FE_INVALID,
+    FE_DIVBYZERO =
+#define FE_DIVBYZERO	2
+      FE_DIVBYZERO,
+    FE_OVERFLOW =
+#define FE_OVERFLOW	4
+      FE_OVERFLOW,
+    FE_UNDERFLOW =
+#define FE_UNDERFLOW	8
+      FE_UNDERFLOW,
+    FE_INEXACT =
+#define FE_INEXACT	16
+      FE_INEXACT,
+  };
+
+/* Amount to shift by to convert an exception bit in FPSR to a an
+   exception bit mask in FPCR.  */
+#define FE_EXCEPT_SHIFT	8
+
+/* All supported exceptions.  */
+#define FE_ALL_EXCEPT	\
+	(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW | FE_INEXACT)
+
+/* Define bits representing rounding modes in the FPCR Rmode field.  */
+#define FE_TONEAREST  0x000000
+#define FE_UPWARD     0x400000
+#define FE_DOWNWARD   0x800000
+#define FE_TOWARDZERO 0xc00000
+
+/* Type representing exception flags. */
+typedef unsigned int fexcept_t;
+
+/* Type representing floating-point environment.  */
+typedef struct
+  {
+    unsigned int __fpcr;
+    unsigned int __fpsr;
+  }
+fenv_t;
+
+/* If the default argument is used we use this value.  */
+#define FE_DFL_ENV	((const fenv_t *) -1l)
+
+#ifdef __USE_GNU
+/* Floating-point environment where none of the exceptions are masked.  */
+# define FE_NOMASK_ENV  ((const fenv_t *) -2)
+#endif
diff --git a/sysdeps/aarch64/bits/link.h b/sysdeps/aarch64/bits/link.h
new file mode 100644
index 0000000000..0927671a10
--- /dev/null
+++ b/sysdeps/aarch64/bits/link.h
@@ -0,0 +1,60 @@
+/* Copyright (C) 2005-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef	_LINK_H
+# error "Never include <bits/link.h> directly; use <link.h> instead."
+#endif
+
+/* Registers for entry into PLT on AArch64.  */
+typedef struct La_aarch64_regs
+{
+  uint64_t lr_xreg[8];
+  uint64_t lr_dreg[8];
+  uint64_t lr_sp;
+  uint64_t lr_lr;
+} La_aarch64_regs;
+
+/* Return values for calls from PLT on AArch64.  */
+typedef struct La_aarch64_retval
+{
+  /* Up to two integer registers can be used for a return value.  */
+  uint64_t lrv_xreg[2];
+  /* Up to four D registers can be used for a return value.  */
+  uint64_t lrv_dreg[4];
+
+} La_aarch64_retval;
+__BEGIN_DECLS
+
+extern ElfW(Addr)
+la_aarch64_gnu_pltenter (ElfW(Sym) *__sym, unsigned int __ndx,
+			 uintptr_t *__refcook,
+			 uintptr_t *__defcook,
+			 La_aarch64_regs *__regs,
+			 unsigned int *__flags,
+			 const char *__symname,
+			 long int *__framesizep);
+
+extern unsigned int
+la_aarch64_gnu_pltexit (ElfW(Sym) *__sym, unsigned int __ndx,
+			uintptr_t *__refcook,
+			uintptr_t *__defcook,
+			const La_aarch64_regs *__inregs,
+			La_aarch64_retval *__outregs,
+			const char *__symname);
+
+__END_DECLS
diff --git a/sysdeps/aarch64/bits/linkmap.h b/sysdeps/aarch64/bits/linkmap.h
new file mode 100644
index 0000000000..9fed9b3872
--- /dev/null
+++ b/sysdeps/aarch64/bits/linkmap.h
@@ -0,0 +1,23 @@
+/* Copyright (C) 2009-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+struct link_map_machine
+{
+  ElfW(Addr) plt;	  /* Address of .plt */
+  void *tlsdesc_table;	  /* Address of TLS descriptor hash table.  */
+};
diff --git a/sysdeps/aarch64/bits/mathdef.h b/sysdeps/aarch64/bits/mathdef.h
new file mode 100644
index 0000000000..839306119f
--- /dev/null
+++ b/sysdeps/aarch64/bits/mathdef.h
@@ -0,0 +1,39 @@
+/* Copyright (C) 1999-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if !defined _MATH_H && !defined _COMPLEX_H
+# error "Never use <bits/mathdef.h> directly; include <math.h> instead"
+#endif
+
+#if defined  __USE_ISOC99 && defined _MATH_H && !defined _MATH_H_MATHDEF
+# define _MATH_H_MATHDEF	1
+
+/* GCC does not promote `float' values to `double'.  */
+typedef float float_t;		/* `float' expressions are evaluated as
+				   `float'.  */
+typedef double double_t;	/* `double' expressions are evaluated as
+				   `double'.  */
+
+/* The values returned by `ilogb' for 0 and NaN respectively.  */
+# define FP_ILOGB0	(-2147483647)
+# define FP_ILOGBNAN	(2147483647)
+
+# define FP_FAST_FMA 1
+# define FP_FAST_FMAF 1
+
+#endif	/* ISO C99 */
diff --git a/sysdeps/aarch64/bits/setjmp.h b/sysdeps/aarch64/bits/setjmp.h
new file mode 100644
index 0000000000..3c477f10d4
--- /dev/null
+++ b/sysdeps/aarch64/bits/setjmp.h
@@ -0,0 +1,33 @@
+/* Copyright (C) 1997-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _BITS_SETJMP_H
+#define _BITS_SETJMP_H 1
+
+#if !defined _SETJMP_H && !defined _PTHREAD_H
+# error "Never include <bits/setjmp.h> directly; use <setjmp.h> instead."
+#endif
+
+#ifndef _ASM
+/* Jump buffer contains:
+   x19-x28, x29(fp), x30(lr), (x31)sp, d8-d15.  Other registers are not
+   saved.  */
+__extension__ typedef unsigned long long __jmp_buf [22];
+
+#endif
+#endif
diff --git a/sysdeps/aarch64/bsd-_setjmp.S b/sysdeps/aarch64/bsd-_setjmp.S
new file mode 100644
index 0000000000..4e6a2da560
--- /dev/null
+++ b/sysdeps/aarch64/bsd-_setjmp.S
@@ -0,0 +1 @@
+/* _setjmp is in setjmp.S  */
diff --git a/sysdeps/aarch64/bsd-setjmp.S b/sysdeps/aarch64/bsd-setjmp.S
new file mode 100644
index 0000000000..1da848d2f1
--- /dev/null
+++ b/sysdeps/aarch64/bsd-setjmp.S
@@ -0,0 +1 @@
+/* setjmp is in setjmp.S  */
diff --git a/sysdeps/aarch64/bzero.S b/sysdeps/aarch64/bzero.S
new file mode 100644
index 0000000000..7343896392
--- /dev/null
+++ b/sysdeps/aarch64/bzero.S
@@ -0,0 +1,27 @@
+/* Copyright (C) 2013-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+	.text
+ENTRY(__bzero)
+	mov	x2, x1
+	mov	x1, xzr
+	b	__memset
+END(__bzero)
+weak_alias (__bzero, bzero)
diff --git a/sysdeps/aarch64/configure b/sysdeps/aarch64/configure
new file mode 100644
index 0000000000..5bd355a691
--- /dev/null
+++ b/sysdeps/aarch64/configure
@@ -0,0 +1,174 @@
+# This file is generated from configure.ac by Autoconf.  DO NOT EDIT!
+ # Local configure fragment for sysdeps/aarch64.
+
+# We check to see if the compiler and flags are
+# selecting the big endian ABI and if they are then
+# we set libc_cv_aarch64_be to yes which causes
+# HAVE_AARCH64_BE to be defined in config.h and
+# in include/libc-symbols.h and thus available to
+# shlib-versions to select the appropriate name for
+# the dynamic linker via %ifdef.
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5
+$as_echo_n "checking for grep that handles long lines and -e... " >&6; }
+if ${ac_cv_path_GREP+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test -z "$GREP"; then
+  ac_path_GREP_found=false
+  # Loop through the user's path and test for each of PROGNAME-LIST
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_prog in grep ggrep; do
+    for ac_exec_ext in '' $ac_executable_extensions; do
+      ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext"
+      as_fn_executable_p "$ac_path_GREP" || continue
+# Check for GNU ac_path_GREP and select it if it is found.
+  # Check for GNU $ac_path_GREP
+case `"$ac_path_GREP" --version 2>&1` in
+*GNU*)
+  ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;;
+*)
+  ac_count=0
+  $as_echo_n 0123456789 >"conftest.in"
+  while :
+  do
+    cat "conftest.in" "conftest.in" >"conftest.tmp"
+    mv "conftest.tmp" "conftest.in"
+    cp "conftest.in" "conftest.nl"
+    $as_echo 'GREP' >> "conftest.nl"
+    "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break
+    diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
+    as_fn_arith $ac_count + 1 && ac_count=$as_val
+    if test $ac_count -gt ${ac_path_GREP_max-0}; then
+      # Best one so far, save it but keep looking for a better one
+      ac_cv_path_GREP="$ac_path_GREP"
+      ac_path_GREP_max=$ac_count
+    fi
+    # 10*(2^10) chars as input seems more than enough
+    test $ac_count -gt 10 && break
+  done
+  rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
+esac
+
+      $ac_path_GREP_found && break 3
+    done
+  done
+  done
+IFS=$as_save_IFS
+  if test -z "$ac_cv_path_GREP"; then
+    as_fn_error $? "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
+  fi
+else
+  ac_cv_path_GREP=$GREP
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5
+$as_echo "$ac_cv_path_GREP" >&6; }
+ GREP="$ac_cv_path_GREP"
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5
+$as_echo_n "checking for egrep... " >&6; }
+if ${ac_cv_path_EGREP+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if echo a | $GREP -E '(a|b)' >/dev/null 2>&1
+   then ac_cv_path_EGREP="$GREP -E"
+   else
+     if test -z "$EGREP"; then
+  ac_path_EGREP_found=false
+  # Loop through the user's path and test for each of PROGNAME-LIST
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_prog in egrep; do
+    for ac_exec_ext in '' $ac_executable_extensions; do
+      ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext"
+      as_fn_executable_p "$ac_path_EGREP" || continue
+# Check for GNU ac_path_EGREP and select it if it is found.
+  # Check for GNU $ac_path_EGREP
+case `"$ac_path_EGREP" --version 2>&1` in
+*GNU*)
+  ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;;
+*)
+  ac_count=0
+  $as_echo_n 0123456789 >"conftest.in"
+  while :
+  do
+    cat "conftest.in" "conftest.in" >"conftest.tmp"
+    mv "conftest.tmp" "conftest.in"
+    cp "conftest.in" "conftest.nl"
+    $as_echo 'EGREP' >> "conftest.nl"
+    "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break
+    diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
+    as_fn_arith $ac_count + 1 && ac_count=$as_val
+    if test $ac_count -gt ${ac_path_EGREP_max-0}; then
+      # Best one so far, save it but keep looking for a better one
+      ac_cv_path_EGREP="$ac_path_EGREP"
+      ac_path_EGREP_max=$ac_count
+    fi
+    # 10*(2^10) chars as input seems more than enough
+    test $ac_count -gt 10 && break
+  done
+  rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
+esac
+
+      $ac_path_EGREP_found && break 3
+    done
+  done
+  done
+IFS=$as_save_IFS
+  if test -z "$ac_cv_path_EGREP"; then
+    as_fn_error $? "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
+  fi
+else
+  ac_cv_path_EGREP=$EGREP
+fi
+
+   fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5
+$as_echo "$ac_cv_path_EGREP" >&6; }
+ EGREP="$ac_cv_path_EGREP"
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for big endian" >&5
+$as_echo_n "checking for big endian... " >&6; }
+if ${libc_cv_aarch64_be+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#ifdef __AARCH64EB__
+                      yes
+                     #endif
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+  $EGREP "yes" >/dev/null 2>&1; then :
+  libc_cv_aarch64_be=yes
+else
+  libc_cv_aarch64_be=no
+fi
+rm -f conftest*
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_aarch64_be" >&5
+$as_echo "$libc_cv_aarch64_be" >&6; }
+if test $libc_cv_aarch64_be = yes; then
+  $as_echo "#define HAVE_AARCH64_BE 1" >>confdefs.h
+
+  config_vars="$config_vars
+default-abi = lp64_be"
+else
+  config_vars="$config_vars
+default-abi = lp64"
+fi
diff --git a/sysdeps/aarch64/configure.ac b/sysdeps/aarch64/configure.ac
new file mode 100644
index 0000000000..7851dd4dac
--- /dev/null
+++ b/sysdeps/aarch64/configure.ac
@@ -0,0 +1,22 @@
+GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory.
+# Local configure fragment for sysdeps/aarch64.
+
+# We check to see if the compiler and flags are
+# selecting the big endian ABI and if they are then
+# we set libc_cv_aarch64_be to yes which causes
+# HAVE_AARCH64_BE to be defined in config.h and
+# in include/libc-symbols.h and thus available to
+# shlib-versions to select the appropriate name for
+# the dynamic linker via %ifdef.
+AC_CACHE_CHECK([for big endian],
+  [libc_cv_aarch64_be],
+  [AC_EGREP_CPP(yes,[#ifdef __AARCH64EB__
+                      yes
+                     #endif
+  ], libc_cv_aarch64_be=yes, libc_cv_aarch64_be=no)])
+if test $libc_cv_aarch64_be = yes; then
+  AC_DEFINE(HAVE_AARCH64_BE)
+  LIBC_CONFIG_VAR([default-abi], [lp64_be])
+else
+  LIBC_CONFIG_VAR([default-abi], [lp64])
+fi
diff --git a/sysdeps/aarch64/crti.S b/sysdeps/aarch64/crti.S
new file mode 100644
index 0000000000..1dba606f41
--- /dev/null
+++ b/sysdeps/aarch64/crti.S
@@ -0,0 +1,90 @@
+/* Special .init and .fini section support for AArch64.
+   Copyright (C) 1995-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file with other
+   programs, and to distribute those programs without any restriction
+   coming from the use of this file. (The GNU Lesser General Public
+   License restrictions do apply in other respects; for example, they
+   cover modification of the file, and distribution when not linked
+   into another program.)
+
+   Note that people who make modified versions of this file are not
+   obligated to grant this special exception for their modified
+   versions; it is their choice whether to do so. The GNU Lesser
+   General Public License gives permission to release a modified
+   version without this exception; this exception also makes it
+   possible to release a modified version which carries forward this
+   exception.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* crti.S puts a function prologue at the beginning of the .init and
+   .fini sections and defines global symbols for those addresses, so
+   they can be called as functions.  The symbols _init and _fini are
+   magic and cause the linker to emit DT_INIT and DT_FINI.  */
+
+#include <libc-symbols.h>
+
+#ifndef PREINIT_FUNCTION
+# define PREINIT_FUNCTION __gmon_start__
+#endif
+
+#ifndef PREINIT_FUNCTION_WEAK
+# define PREINIT_FUNCTION_WEAK 1
+#endif
+
+#if PREINIT_FUNCTION_WEAK
+	weak_extern (PREINIT_FUNCTION)
+#else
+	.hidden PREINIT_FUNCTION
+#endif
+
+#if PREINIT_FUNCTION_WEAK
+	.align	2
+	.type	call_weak_fn, %function
+call_weak_fn:
+	adrp	x0, :got:PREINIT_FUNCTION
+	ldr	x0, [x0, #:got_lo12:PREINIT_FUNCTION]
+	cbz	x0, 1f
+	b	PREINIT_FUNCTION
+1:
+	RET
+	.size	call_weak_fn, .-call_weak_fn
+#endif
+
+	.section .init,"ax",%progbits
+	.align	2
+	.global	_init
+	.type	_init, %function
+_init:
+	stp	x29, x30, [sp, -16]!
+	mov	x29, sp
+#if PREINIT_FUNCTION_WEAK
+	bl	call_weak_fn
+#else
+	bl	PREINIT_FUNCTION
+#endif
+
+	.section	.fini,"ax",%progbits
+	.align	2
+	.global	_fini
+	.type	_fini, %function
+_fini:
+	stp	x29, x30, [sp, -16]!
+	mov	x29, sp
diff --git a/sysdeps/aarch64/crtn.S b/sysdeps/aarch64/crtn.S
new file mode 100644
index 0000000000..a356658978
--- /dev/null
+++ b/sysdeps/aarch64/crtn.S
@@ -0,0 +1,46 @@
+/* Special .init and .fini section support for AArch64.
+   Copyright (C) 1995-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file with other
+   programs, and to distribute those programs without any restriction
+   coming from the use of this file. (The GNU Lesser General Public
+   License restrictions do apply in other respects; for example, they
+   cover modification of the file, and distribution when not linked
+   into another program.)
+
+   Note that people who make modified versions of this file are not
+   obligated to grant this special exception for their modified
+   versions; it is their choice whether to do so. The GNU Lesser
+   General Public License gives permission to release a modified
+   version without this exception; this exception also makes it
+   possible to release a modified version which carries forward this
+   exception.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* crtn.S puts function epilogues in the .init and .fini sections
+   corresponding to the prologues in crti.S. */
+
+	.section .init,"ax",%progbits
+	ldp	x29, x30, [sp], 16
+	RET
+
+	.section .fini,"ax",%progbits
+	ldp	x29, x30, [sp], 16
+	RET
diff --git a/sysdeps/aarch64/dl-irel.h b/sysdeps/aarch64/dl-irel.h
new file mode 100644
index 0000000000..1a717e1dde
--- /dev/null
+++ b/sysdeps/aarch64/dl-irel.h
@@ -0,0 +1,52 @@
+/* Machine-dependent ELF indirect relocation inline functions.
+   AArch64 version.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _DL_IREL_H
+#define _DL_IREL_H
+
+#include <stdio.h>
+#include <unistd.h>
+#include <ldsodefs.h>
+
+#define ELF_MACHINE_IRELA	1
+
+static inline ElfW(Addr)
+__attribute ((always_inline))
+elf_ifunc_invoke (ElfW(Addr) addr)
+{
+  return ((ElfW(Addr) (*) (unsigned long int)) (addr)) (GLRO(dl_hwcap));
+}
+
+static inline void
+__attribute ((always_inline))
+elf_irela (const ElfW(Rela) *reloc)
+{
+  ElfW(Addr) *const reloc_addr = (void *) reloc->r_offset;
+  const unsigned long int r_type = ELFW(R_TYPE) (reloc->r_info);
+
+  if (__glibc_likely (r_type == R_AARCH64_IRELATIVE))
+    {
+      ElfW(Addr) value = elf_ifunc_invoke (reloc->r_addend);
+      *reloc_addr = value;
+    }
+  else
+    __libc_fatal ("unexpected reloc type in static binary");
+}
+
+#endif
diff --git a/sysdeps/aarch64/dl-link.sym b/sysdeps/aarch64/dl-link.sym
new file mode 100644
index 0000000000..d67d28b40c
--- /dev/null
+++ b/sysdeps/aarch64/dl-link.sym
@@ -0,0 +1,15 @@
+#include <stddef.h>
+#include <sysdep.h>
+#include <link.h>
+#include <dl-tlsdesc.h>
+
+DL_SIZEOF_RG		sizeof(struct La_aarch64_regs)
+DL_SIZEOF_RV		sizeof(struct La_aarch64_retval)
+
+DL_OFFSET_RG_X0		offsetof(struct La_aarch64_regs, lr_xreg)
+DL_OFFSET_RG_D0		offsetof(struct La_aarch64_regs, lr_dreg)
+DL_OFFSET_RG_SP		offsetof(struct La_aarch64_regs, lr_sp)
+DL_OFFSET_RG_LR		offsetof(struct La_aarch64_regs, lr_lr)
+
+DL_OFFSET_RV_X0		offsetof(struct La_aarch64_retval, lrv_xreg)
+DL_OFFSET_RV_D0		offsetof(struct La_aarch64_retval, lrv_dreg)
diff --git a/sysdeps/aarch64/dl-machine.h b/sysdeps/aarch64/dl-machine.h
new file mode 100644
index 0000000000..217e1798c4
--- /dev/null
+++ b/sysdeps/aarch64/dl-machine.h
@@ -0,0 +1,406 @@
+/* Copyright (C) 1995-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef dl_machine_h
+#define dl_machine_h
+
+#define ELF_MACHINE_NAME "aarch64"
+
+#include <tls.h>
+#include <dl-tlsdesc.h>
+#include <dl-irel.h>
+
+/* Return nonzero iff ELF header is compatible with the running host.  */
+static inline int __attribute__ ((unused))
+elf_machine_matches_host (const ElfW(Ehdr) *ehdr)
+{
+  return ehdr->e_machine == EM_AARCH64;
+}
+
+/* Return the link-time address of _DYNAMIC.  Conveniently, this is the
+   first element of the GOT. */
+static inline ElfW(Addr) __attribute__ ((unused))
+elf_machine_dynamic (void)
+{
+  extern const ElfW(Addr) _GLOBAL_OFFSET_TABLE_[] attribute_hidden;
+  return _GLOBAL_OFFSET_TABLE_[0];
+}
+
+/* Return the run-time load address of the shared object.  */
+
+static inline ElfW(Addr) __attribute__ ((unused))
+elf_machine_load_address (void)
+{
+  /* To figure out the load address we use the definition that for any symbol:
+     dynamic_addr(symbol) = static_addr(symbol) + load_addr
+
+     The choice of symbol is arbitrary. The static address we obtain
+     by constructing a non GOT reference to the symbol, the dynamic
+     address of the symbol we compute using adrp/add to compute the
+     symbol's address relative to the PC.
+     This depends on 32bit relocations being resolved at link time
+     and that the static address fits in the 32bits.  */
+
+  ElfW(Addr) static_addr;
+  ElfW(Addr) dynamic_addr;
+
+  asm ("					\n"
+"	adrp	%1, _dl_start;			\n"
+"	add	%1, %1, #:lo12:_dl_start	\n"
+"	ldr	%w0, 1f				\n"
+"	b	2f				\n"
+"1:						\n"
+"	.word	_dl_start			\n"
+"2:						\n"
+    : "=r" (static_addr),  "=r" (dynamic_addr));
+  return dynamic_addr - static_addr;
+}
+
+/* Set up the loaded object described by L so its unrelocated PLT
+   entries will jump to the on-demand fixup code in dl-runtime.c.  */
+
+static inline int __attribute__ ((unused))
+elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
+{
+  if (l->l_info[DT_JMPREL] && lazy)
+    {
+      ElfW(Addr) *got;
+      extern void _dl_runtime_resolve (ElfW(Word));
+      extern void _dl_runtime_profile (ElfW(Word));
+
+      got = (ElfW(Addr) *) D_PTR (l, l_info[DT_PLTGOT]);
+      if (got[1])
+	{
+	  l->l_mach.plt = got[1] + l->l_addr;
+	}
+      got[1] = (ElfW(Addr)) l;
+
+      /* The got[2] entry contains the address of a function which gets
+	 called to get the address of a so far unresolved function and
+	 jump to it.  The profiling extension of the dynamic linker allows
+	 to intercept the calls to collect information.  In this case we
+	 don't store the address in the GOT so that all future calls also
+	 end in this function.  */
+      if ( profile)
+	{
+	   got[2] = (ElfW(Addr)) &_dl_runtime_profile;
+
+	  if (GLRO(dl_profile) != NULL
+	      && _dl_name_match_p (GLRO(dl_profile), l))
+	    /* Say that we really want profiling and the timers are
+	       started.  */
+	    GL(dl_profile_map) = l;
+	}
+      else
+	{
+	  /* This function will get called to fix up the GOT entry
+	     indicated by the offset on the stack, and then jump to
+	     the resolved address.  */
+	  got[2] = (ElfW(Addr)) &_dl_runtime_resolve;
+	}
+    }
+
+  if (l->l_info[ADDRIDX (DT_TLSDESC_GOT)] && lazy)
+    *(ElfW(Addr)*)(D_PTR (l, l_info[ADDRIDX (DT_TLSDESC_GOT)]) + l->l_addr)
+      = (ElfW(Addr)) &_dl_tlsdesc_resolve_rela;
+
+  return lazy;
+}
+
+/* Initial entry point for the dynamic linker. The C function
+   _dl_start is the real entry point, its return value is the user
+   program's entry point */
+
+#define RTLD_START asm ("\
+.text								\n\
+.globl _start							\n\
+.type _start, %function						\n\
+.globl _dl_start_user						\n\
+.type _dl_start_user, %function					\n\
+_start:								\n\
+	mov	x0,	sp					\n\
+	bl	_dl_start					\n\
+	// returns user entry point in x0			\n\
+	mov	x21, x0						\n\
+_dl_start_user:							\n\
+	// get the original arg count				\n\
+	ldr	x1, [sp]					\n\
+	// get the argv address					\n\
+	add	x2, sp, #8					\n\
+	// get _dl_skip_args to see if we were			\n\
+	// invoked as an executable				\n\
+	adrp	x4, _dl_skip_args				\n\
+        ldr	w4, [x4, #:lo12:_dl_skip_args]			\n\
+	// do we need to adjust argc/argv			\n\
+        cmp	w4, 0						\n\
+	beq	.L_done_stack_adjust				\n\
+	// subtract _dl_skip_args from original arg count	\n\
+	sub	x1, x1, x4					\n\
+	// store adjusted argc back to stack			\n\
+	str	x1, [sp]					\n\
+	// find the first unskipped argument			\n\
+	mov	x3, x2						\n\
+	add	x4, x2, x4, lsl #3				\n\
+	// shuffle argv down					\n\
+1:	ldr	x5, [x4], #8					\n\
+	str	x5, [x3], #8					\n\
+	cmp	x5, #0						\n\
+	bne	1b						\n\
+	// shuffle envp down					\n\
+1:	ldr	x5, [x4], #8					\n\
+	str	x5, [x3], #8					\n\
+	cmp	x5, #0						\n\
+	bne	1b						\n\
+	// shuffle auxv down					\n\
+1:	ldp	x0, x5, [x4, #16]!				\n\
+	stp	x0, x5, [x3], #16				\n\
+	cmp	x0, #0						\n\
+	bne	1b						\n\
+	// Update _dl_argv					\n\
+	adrp	x3, _dl_argv					\n\
+	str	x2, [x3, #:lo12:_dl_argv]			\n\
+.L_done_stack_adjust:						\n\
+	// compute envp						\n\
+	add	x3, x2, x1, lsl #3				\n\
+	add	x3, x3, #8					\n\
+	adrp	x16, _rtld_local				\n\
+        add	x16, x16, #:lo12:_rtld_local			\n\
+        ldr	x0, [x16]					\n\
+	bl	_dl_init					\n\
+	// load the finalizer function				\n\
+	adrp	x0, _dl_fini					\n\
+	add	x0, x0, #:lo12:_dl_fini				\n\
+	// jump to the user_s entry point			\n\
+	br      x21						\n\
+");
+
+#define elf_machine_type_class(type)					\
+  ((((type) == R_AARCH64_JUMP_SLOT ||					\
+     (type) == R_AARCH64_TLS_DTPMOD ||					\
+     (type) == R_AARCH64_TLS_DTPREL ||					\
+     (type) == R_AARCH64_TLS_TPREL ||					\
+     (type) == R_AARCH64_TLSDESC) * ELF_RTYPE_CLASS_PLT)		\
+   | (((type) == R_AARCH64_COPY) * ELF_RTYPE_CLASS_COPY)		\
+   | (((type) == R_AARCH64_GLOB_DAT) * ELF_RTYPE_CLASS_EXTERN_PROTECTED_DATA))
+
+#define ELF_MACHINE_JMP_SLOT	R_AARCH64_JUMP_SLOT
+
+/* AArch64 uses RELA not REL */
+#define ELF_MACHINE_NO_REL 1
+#define ELF_MACHINE_NO_RELA 0
+
+static inline ElfW(Addr)
+elf_machine_fixup_plt (struct link_map *map, lookup_t t,
+		       const ElfW(Rela) *reloc,
+		       ElfW(Addr) *reloc_addr,
+		       ElfW(Addr) value)
+{
+  return *reloc_addr = value;
+}
+
+/* Return the final value of a plt relocation.  */
+static inline ElfW(Addr)
+elf_machine_plt_value (struct link_map *map,
+		       const ElfW(Rela) *reloc,
+		       ElfW(Addr) value)
+{
+  return value;
+}
+
+#endif
+
+/* Names of the architecture-specific auditing callback functions.  */
+#define ARCH_LA_PLTENTER aarch64_gnu_pltenter
+#define ARCH_LA_PLTEXIT  aarch64_gnu_pltexit
+
+#ifdef RESOLVE_MAP
+
+auto inline void
+__attribute__ ((always_inline))
+elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc,
+		  const ElfW(Sym) *sym, const struct r_found_version *version,
+		  void *const reloc_addr_arg, int skip_ifunc)
+{
+  ElfW(Addr) *const reloc_addr = reloc_addr_arg;
+  const unsigned int r_type = ELF64_R_TYPE (reloc->r_info);
+
+  if (__builtin_expect (r_type == R_AARCH64_RELATIVE, 0))
+      *reloc_addr = map->l_addr + reloc->r_addend;
+  else if (__builtin_expect (r_type == R_AARCH64_NONE, 0))
+      return;
+  else
+    {
+      const ElfW(Sym) *const refsym = sym;
+      struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type);
+      ElfW(Addr) value = sym_map == NULL ? 0 : sym_map->l_addr + sym->st_value;
+
+      if (sym != NULL
+	  && __glibc_unlikely (ELFW(ST_TYPE) (sym->st_info) == STT_GNU_IFUNC)
+	  && __glibc_likely (sym->st_shndx != SHN_UNDEF)
+	  && __glibc_likely (!skip_ifunc))
+	value = elf_ifunc_invoke (value);
+
+      switch (r_type)
+	{
+	case R_AARCH64_COPY:
+	  if (sym == NULL)
+	      break;
+
+	  if (sym->st_size > refsym->st_size
+	      || (GLRO(dl_verbose) && sym->st_size < refsym->st_size))
+	    {
+	      const char *strtab;
+
+	      strtab = (const void *) D_PTR (map, l_info[DT_STRTAB]);
+	      _dl_error_printf ("\
+%s: Symbol `%s' has different size in shared object, consider re-linking\n",
+				RTLD_PROGNAME, strtab + refsym->st_name);
+	    }
+	  memcpy (reloc_addr_arg, (void *) value,
+		  MIN (sym->st_size, refsym->st_size));
+	  break;
+
+	case R_AARCH64_RELATIVE:
+	case R_AARCH64_GLOB_DAT:
+	case R_AARCH64_JUMP_SLOT:
+	case R_AARCH64_ABS32:
+	case R_AARCH64_ABS64:
+	  *reloc_addr = value + reloc->r_addend;
+	  break;
+
+	case R_AARCH64_TLSDESC:
+	  {
+	    struct tlsdesc volatile *td =
+	      (struct tlsdesc volatile *)reloc_addr;
+#ifndef RTLD_BOOTSTRAP
+	    if (! sym)
+	      {
+		td->arg = (void*)reloc->r_addend;
+		td->entry = _dl_tlsdesc_undefweak;
+	      }
+	    else
+#endif
+	      {
+#ifndef RTLD_BOOTSTRAP
+# ifndef SHARED
+		CHECK_STATIC_TLS (map, sym_map);
+# else
+		if (!TRY_STATIC_TLS (map, sym_map))
+		  {
+		    td->arg = _dl_make_tlsdesc_dynamic
+		      (sym_map, sym->st_value + reloc->r_addend);
+		    td->entry = _dl_tlsdesc_dynamic;
+		  }
+		else
+# endif
+#endif
+		  {
+		    td->arg = (void*)(sym->st_value + sym_map->l_tls_offset
+				      + reloc->r_addend);
+		    td->entry = _dl_tlsdesc_return;
+		  }
+	      }
+	    break;
+	  }
+
+	case R_AARCH64_TLS_DTPMOD:
+#ifdef RTLD_BOOTSTRAP
+	  *reloc_addr = 1;
+#else
+	  if (sym_map != NULL)
+	    {
+	      *reloc_addr = sym_map->l_tls_modid;
+	    }
+#endif
+	  break;
+
+	case R_AARCH64_TLS_DTPREL:
+	  if (sym)
+	    *reloc_addr = sym->st_value + reloc->r_addend;
+	  break;
+
+	case R_AARCH64_TLS_TPREL:
+	  if (sym)
+	    {
+	      CHECK_STATIC_TLS (map, sym_map);
+	      *reloc_addr =
+		sym->st_value + reloc->r_addend + sym_map->l_tls_offset;
+	    }
+	  break;
+
+	case R_AARCH64_IRELATIVE:
+	  value = map->l_addr + reloc->r_addend;
+	  value = elf_ifunc_invoke (value);
+	  *reloc_addr = value;
+	  break;
+
+	default:
+	  _dl_reloc_bad_type (map, r_type, 0);
+	  break;
+	}
+    }
+}
+
+inline void
+__attribute__ ((always_inline))
+elf_machine_rela_relative (ElfW(Addr) l_addr,
+			   const ElfW(Rela) *reloc,
+			   void *const reloc_addr_arg)
+{
+  ElfW(Addr) *const reloc_addr = reloc_addr_arg;
+  *reloc_addr = l_addr + reloc->r_addend;
+}
+
+inline void
+__attribute__ ((always_inline))
+elf_machine_lazy_rel (struct link_map *map,
+		      ElfW(Addr) l_addr,
+		      const ElfW(Rela) *reloc,
+		      int skip_ifunc)
+{
+  ElfW(Addr) *const reloc_addr = (void *) (l_addr + reloc->r_offset);
+  const unsigned int r_type = ELF64_R_TYPE (reloc->r_info);
+  /* Check for unexpected PLT reloc type.  */
+  if (__builtin_expect (r_type == R_AARCH64_JUMP_SLOT, 1))
+    {
+      if (__builtin_expect (map->l_mach.plt, 0) == 0)
+	*reloc_addr += l_addr;
+      else
+	*reloc_addr = map->l_mach.plt;
+    }
+  else if (__builtin_expect (r_type == R_AARCH64_TLSDESC, 1))
+    {
+      struct tlsdesc volatile *td =
+	(struct tlsdesc volatile *)reloc_addr;
+
+      td->arg = (void*)reloc;
+      td->entry = (void*)(D_PTR (map, l_info[ADDRIDX (DT_TLSDESC_PLT)])
+			  + map->l_addr);
+    }
+  else if (__glibc_unlikely (r_type == R_AARCH64_IRELATIVE))
+    {
+      ElfW(Addr) value = map->l_addr + reloc->r_addend;
+      if (__glibc_likely (!skip_ifunc))
+	value = elf_ifunc_invoke (value);
+      *reloc_addr = value;
+    }
+  else
+    _dl_reloc_bad_type (map, r_type, 1);
+}
+
+#endif
diff --git a/sysdeps/aarch64/dl-sysdep.h b/sysdeps/aarch64/dl-sysdep.h
new file mode 100644
index 0000000000..0bdbea651e
--- /dev/null
+++ b/sysdeps/aarch64/dl-sysdep.h
@@ -0,0 +1,25 @@
+/* Copyright (C) 2002-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include_next <dl-sysdep.h>
+
+/* _dl_argv cannot be attribute_relro, because _dl_start_user
+   might write into it after _dl_start returns.  */
+#define DL_ARGV_NOT_RELRO 1
+
+#define DL_EXTERN_PROTECTED_DATA
diff --git a/sysdeps/aarch64/dl-tls.h b/sysdeps/aarch64/dl-tls.h
new file mode 100644
index 0000000000..2465716542
--- /dev/null
+++ b/sysdeps/aarch64/dl-tls.h
@@ -0,0 +1,30 @@
+/* Copyright (C) 2005-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Type used for the representation of TLS information in the GOT.  */
+typedef struct
+{
+  unsigned long int ti_module;
+  unsigned long int ti_offset;
+} tls_index;
+
+
+extern void *__tls_get_addr (tls_index *ti);
+
+/* Value used for dtv entries for which the allocation is delayed.  */
+#define TLS_DTV_UNALLOCATED ((void *) -1l)
diff --git a/sysdeps/aarch64/dl-tlsdesc.S b/sysdeps/aarch64/dl-tlsdesc.S
new file mode 100644
index 0000000000..64719eb5e1
--- /dev/null
+++ b/sysdeps/aarch64/dl-tlsdesc.S
@@ -0,0 +1,366 @@
+/* Thread-local storage handling in the ELF dynamic linker.
+   AArch64 version.
+   Copyright (C) 2011-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <tls.h>
+#include "tlsdesc.h"
+
+#define NSAVEDQREGPAIRS	16
+#define SAVE_Q_REGISTERS				\
+	stp	q0, q1,	[sp, #-32*NSAVEDQREGPAIRS]!;	\
+	cfi_adjust_cfa_offset (32*NSAVEDQREGPAIRS);	\
+	stp	 q2,  q3, [sp, #32*1];			\
+	stp	 q4,  q5, [sp, #32*2];			\
+	stp	 q6,  q7, [sp, #32*3];			\
+	stp	 q8,  q9, [sp, #32*4];			\
+	stp	q10, q11, [sp, #32*5];			\
+	stp	q12, q13, [sp, #32*6];			\
+	stp	q14, q15, [sp, #32*7];			\
+	stp	q16, q17, [sp, #32*8];			\
+	stp	q18, q19, [sp, #32*9];			\
+	stp	q20, q21, [sp, #32*10];			\
+	stp	q22, q23, [sp, #32*11];			\
+	stp	q24, q25, [sp, #32*12];			\
+	stp	q26, q27, [sp, #32*13];			\
+	stp	q28, q29, [sp, #32*14];			\
+	stp	q30, q31, [sp, #32*15];
+
+#define RESTORE_Q_REGISTERS				\
+	ldp	 q2,  q3, [sp, #32*1];			\
+	ldp	 q4,  q5, [sp, #32*2];			\
+	ldp	 q6,  q7, [sp, #32*3];			\
+	ldp	 q8,  q9, [sp, #32*4];			\
+	ldp	q10, q11, [sp, #32*5];			\
+	ldp	q12, q13, [sp, #32*6];			\
+	ldp	q14, q15, [sp, #32*7];			\
+	ldp	q16, q17, [sp, #32*8];			\
+	ldp	q18, q19, [sp, #32*9];			\
+	ldp	q20, q21, [sp, #32*10];			\
+	ldp	q22, q23, [sp, #32*11];			\
+	ldp	q24, q25, [sp, #32*12];			\
+	ldp	q26, q27, [sp, #32*13];			\
+	ldp	q28, q29, [sp, #32*14];			\
+	ldp	q30, q31, [sp, #32*15];			\
+	ldp	 q0,  q1, [sp], #32*NSAVEDQREGPAIRS;	\
+	cfi_adjust_cfa_offset (-32*NSAVEDQREGPAIRS);
+
+	.text
+
+	/* Compute the thread pointer offset for symbols in the static
+	   TLS block. The offset is the same for all threads.
+	   Prototype:
+	   _dl_tlsdesc_return (tlsdesc *) ;
+	 */
+	.hidden _dl_tlsdesc_return
+	.global	_dl_tlsdesc_return
+	.type	_dl_tlsdesc_return,%function
+	cfi_startproc
+	.align 2
+_dl_tlsdesc_return:
+	ldr	x0, [x0, #8]
+	RET
+	cfi_endproc
+	.size	_dl_tlsdesc_return, .-_dl_tlsdesc_return
+
+	/* Same as _dl_tlsdesc_return but with synchronization for
+	   lazy relocation.
+	   Prototype:
+	   _dl_tlsdesc_return_lazy (tlsdesc *) ;
+	 */
+	.hidden _dl_tlsdesc_return_lazy
+	.global	_dl_tlsdesc_return_lazy
+	.type	_dl_tlsdesc_return_lazy,%function
+	cfi_startproc
+	.align 2
+_dl_tlsdesc_return_lazy:
+	/* The ldar here happens after the load from [x0] at the call site
+	   (that is generated by the compiler as part of the TLS access ABI),
+	   so it reads the same value (this function is the final value of
+	   td->entry) and thus it synchronizes with the release store to
+	   td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
+	   from [x0,#8] here happens after the initialization of td->arg.  */
+	ldar	xzr, [x0]
+	ldr	x0, [x0, #8]
+	RET
+	cfi_endproc
+	.size	_dl_tlsdesc_return_lazy, .-_dl_tlsdesc_return_lazy
+
+	/* Handler for undefined weak TLS symbols.
+	   Prototype:
+	   _dl_tlsdesc_undefweak (tlsdesc *);
+
+	   The second word of the descriptor contains the addend.
+	   Return the addend minus the thread pointer. This ensures
+	   that when the caller adds on the thread pointer it gets back
+	   the addend.  */
+
+	.hidden _dl_tlsdesc_undefweak
+	.global	_dl_tlsdesc_undefweak
+	.type	_dl_tlsdesc_undefweak,%function
+	cfi_startproc
+	.align  2
+_dl_tlsdesc_undefweak:
+	str	x1, [sp, #-16]!
+	cfi_adjust_cfa_offset (16)
+	/* The ldar here happens after the load from [x0] at the call site
+	   (that is generated by the compiler as part of the TLS access ABI),
+	   so it reads the same value (this function is the final value of
+	   td->entry) and thus it synchronizes with the release store to
+	   td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
+	   from [x0,#8] here happens after the initialization of td->arg.  */
+	ldar	xzr, [x0]
+	ldr	x0, [x0, #8]
+	mrs	x1, tpidr_el0
+	sub	x0, x0, x1
+	ldr	x1, [sp], #16
+	cfi_adjust_cfa_offset (-16)
+	RET
+	cfi_endproc
+	.size	_dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
+
+#ifdef SHARED
+	/* Handler for dynamic TLS symbols.
+	   Prototype:
+	   _dl_tlsdesc_dynamic (tlsdesc *) ;
+
+	   The second word of the descriptor points to a
+	   tlsdesc_dynamic_arg structure.
+
+	   Returns the offset between the thread pointer and the
+	   object referenced by the argument.
+
+	   ptrdiff_t
+	   __attribute__ ((__regparm__ (1)))
+	   _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
+	   {
+	     struct tlsdesc_dynamic_arg *td = tdp->arg;
+	     dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + DTV_OFFSET);
+	     if (__builtin_expect (td->gen_count <= dtv[0].counter
+		&& (dtv[td->tlsinfo.ti_module].pointer.val
+		    != TLS_DTV_UNALLOCATED),
+		1))
+	       return dtv[td->tlsinfo.ti_module].pointer.val
+		+ td->tlsinfo.ti_offset
+		- __thread_pointer;
+
+	     return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
+	   }
+	 */
+
+	.hidden _dl_tlsdesc_dynamic
+	.global	_dl_tlsdesc_dynamic
+	.type	_dl_tlsdesc_dynamic,%function
+	cfi_startproc
+	.align 2
+_dl_tlsdesc_dynamic:
+# define NSAVEXREGPAIRS 2
+	stp	x29, x30, [sp,#-(32+16*NSAVEXREGPAIRS)]!
+	cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS)
+	mov	x29, sp
+
+	/* Save just enough registers to support fast path, if we fall
+	   into slow path we will save additional registers.  */
+
+	stp	x1,  x2, [sp, #32+16*0]
+	stp	x3,  x4, [sp, #32+16*1]
+
+	mrs	x4, tpidr_el0
+	/* The ldar here happens after the load from [x0] at the call site
+	   (that is generated by the compiler as part of the TLS access ABI),
+	   so it reads the same value (this function is the final value of
+	   td->entry) and thus it synchronizes with the release store to
+	   td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
+	   from [x0,#8] here happens after the initialization of td->arg.  */
+	ldar	xzr, [x0]
+	ldr	x1, [x0,#8]
+	ldr	x0, [x4]
+	ldr	x3, [x1,#16]
+	ldr	x2, [x0]
+	cmp	x3, x2
+	b.hi	2f
+	ldr	x2, [x1]
+	add	x0, x0, x2, lsl #4
+	ldr	x0, [x0]
+	cmn	x0, #0x1
+	b.eq	2f
+	ldr	x1, [x1,#8]
+	add	x0, x0, x1
+	sub	x0, x0, x4
+1:
+	ldp	 x1,  x2, [sp, #32+16*0]
+	ldp	 x3,  x4, [sp, #32+16*1]
+
+	ldp	x29, x30, [sp], #(32+16*NSAVEXREGPAIRS)
+	cfi_adjust_cfa_offset (-32-16*NSAVEXREGPAIRS)
+# undef NSAVEXREGPAIRS
+	RET
+2:
+	/* This is the slow path. We need to call __tls_get_addr() which
+	   means we need to save and restore all the register that the
+	   callee will trash.  */
+
+	/* Save the remaining registers that we must treat as caller save.  */
+# define NSAVEXREGPAIRS 7
+	stp	 x5,  x6, [sp, #-16*NSAVEXREGPAIRS]!
+	cfi_adjust_cfa_offset (16*NSAVEXREGPAIRS)
+	stp	 x7,  x8, [sp, #16*1]
+	stp	 x9, x10, [sp, #16*2]
+	stp	x11, x12, [sp, #16*3]
+	stp	x13, x14, [sp, #16*4]
+	stp	x15, x16, [sp, #16*5]
+	stp	x17, x18, [sp, #16*6]
+
+	SAVE_Q_REGISTERS
+
+	mov	x0, x1
+	bl	__tls_get_addr
+
+	mrs	x1, tpidr_el0
+	sub	x0, x0, x1
+
+	RESTORE_Q_REGISTERS
+
+	ldp	 x7,  x8, [sp, #16*1]
+	ldp	 x9, x10, [sp, #16*2]
+	ldp	x11, x12, [sp, #16*3]
+	ldp	x13, x14, [sp, #16*4]
+	ldp	x15, x16, [sp, #16*5]
+	ldp	x17, x18, [sp, #16*6]
+	ldp	 x5,  x6, [sp], #16*NSAVEXREGPAIRS
+	cfi_adjust_cfa_offset (-16*NSAVEXREGPAIRS)
+	b	1b
+	cfi_endproc
+	.size	_dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
+# undef NSAVEXREGPAIRS
+#endif
+
+	/* This function is a wrapper for a lazy resolver for TLS_DESC
+	   RELA relocations.
+	   When the actual resolver returns, it will have adjusted the
+	   TLS descriptor such that we can tail-call it for it to return
+	   the TP offset of the symbol.  */
+
+	.hidden _dl_tlsdesc_resolve_rela
+	.global	_dl_tlsdesc_resolve_rela
+	.type	_dl_tlsdesc_resolve_rela,%function
+	cfi_startproc
+	.align 2
+_dl_tlsdesc_resolve_rela:
+#define	NSAVEXREGPAIRS 9
+	stp	x29, x30, [sp, #-(32+16*NSAVEXREGPAIRS)]!
+	cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS)
+	mov	x29, sp
+	stp	 x1,  x4, [sp, #32+16*0]
+	stp	 x5,  x6, [sp, #32+16*1]
+	stp	 x7,  x8, [sp, #32+16*2]
+	stp	 x9, x10, [sp, #32+16*3]
+	stp	x11, x12, [sp, #32+16*4]
+	stp	x13, x14, [sp, #32+16*5]
+	stp	x15, x16, [sp, #32+16*6]
+	stp	x17, x18, [sp, #32+16*7]
+	str	x0,       [sp, #32+16*8]
+
+	SAVE_Q_REGISTERS
+
+	ldr	x1, [x3, #8]
+	bl	_dl_tlsdesc_resolve_rela_fixup
+
+	RESTORE_Q_REGISTERS
+
+	ldr	x0, [sp, #32+16*8]
+	ldr	x1, [x0]
+	blr	x1
+
+	ldp	 x1,  x4, [sp, #32+16*0]
+	ldp	 x5,  x6, [sp, #32+16*1]
+	ldp	 x7,  x8, [sp, #32+16*2]
+	ldp	 x9, x10, [sp, #32+16*3]
+	ldp	x11, x12, [sp, #32+16*4]
+	ldp	x13, x14, [sp, #32+16*5]
+	ldp	x15, x16, [sp, #32+16*6]
+	ldp	x17, x18, [sp, #32+16*7]
+	ldp	x29, x30, [sp], #(32+16*NSAVEXREGPAIRS)
+	cfi_adjust_cfa_offset (-32-16*NSAVEXREGPAIRS)
+	ldp	x2, x3, [sp], #16
+	cfi_adjust_cfa_offset (-16)
+	RET
+#undef NSAVEXREGPAIRS
+	cfi_endproc
+	.size	_dl_tlsdesc_resolve_rela, .-_dl_tlsdesc_resolve_rela
+
+	/* This function is a placeholder for lazy resolving of TLS
+	relocations.  Once some thread starts resolving a TLS
+	relocation, it sets up the TLS descriptor to use this
+	resolver, such that other threads that would attempt to
+	resolve it concurrently may skip the call to the original lazy
+	resolver and go straight to a condition wait.
+
+	When the actual resolver returns, it will have adjusted the
+	TLS descriptor such that we can tail-call it for it to return
+	the TP offset of the symbol.  */
+
+	.hidden _dl_tlsdesc_resolve_hold
+	.global	_dl_tlsdesc_resolve_hold
+	.type	_dl_tlsdesc_resolve_hold,%function
+	cfi_startproc
+	.align 2
+_dl_tlsdesc_resolve_hold:
+#define	NSAVEXREGPAIRS 10
+1:
+	stp	x29, x30, [sp, #-(32+16*NSAVEXREGPAIRS)]!
+	cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS)
+	mov	x29, sp
+	stp	 x1,  x2, [sp, #32+16*0]
+	stp	 x3,  x4, [sp, #32+16*1]
+	stp	 x5,  x6, [sp, #32+16*2]
+	stp	 x7,  x8, [sp, #32+16*3]
+	stp	 x9, x10, [sp, #32+16*4]
+	stp	x11, x12, [sp, #32+16*5]
+	stp	x13, x14, [sp, #32+16*6]
+	stp	x15, x16, [sp, #32+16*7]
+	stp	x17, x18, [sp, #32+16*8]
+	str	x0,       [sp, #32+16*9]
+
+	SAVE_Q_REGISTERS
+
+	adr	x1, 1b
+	bl	_dl_tlsdesc_resolve_hold_fixup
+
+	RESTORE_Q_REGISTERS
+
+	ldr	x0, [sp, #32+16*9]
+	ldr	x1, [x0]
+	blr	x1
+
+	ldp	 x1,  x2, [sp, #32+16*0]
+	ldp	 x3,  x4, [sp, #32+16*1]
+	ldp	 x5,  x6, [sp, #32+16*2]
+	ldp	 x7,  x8, [sp, #32+16*3]
+	ldp	 x9, x10, [sp, #32+16*4]
+	ldp	x11, x12, [sp, #32+16*5]
+	ldp	x13, x14, [sp, #32+16*6]
+	ldp	x15, x16, [sp, #32+16*7]
+	ldp	x17, x18, [sp, #32+16*8]
+	ldp	x29, x30, [sp], #(32+16*NSAVEXREGPAIRS)
+	cfi_adjust_cfa_offset (-32-16*NSAVEXREGPAIRS)
+	RET
+	cfi_endproc
+	.size	_dl_tlsdesc_resolve_hold, .-_dl_tlsdesc_resolve_hold
+#undef NSAVEXREGPAIRS
diff --git a/sysdeps/aarch64/dl-tlsdesc.h b/sysdeps/aarch64/dl-tlsdesc.h
new file mode 100644
index 0000000000..e6c0078eb6
--- /dev/null
+++ b/sysdeps/aarch64/dl-tlsdesc.h
@@ -0,0 +1,68 @@
+/* Thread-local storage descriptor handling in the ELF dynamic linker.
+   AArch64 version.
+   Copyright (C) 2011-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _AARCH64_DL_TLSDESC_H
+#define _AARCH64_DL_TLSDESC_H 1
+
+/* Type used to represent a TLS descriptor in the GOT.  */
+struct tlsdesc
+{
+  ptrdiff_t (*entry) (struct tlsdesc *);
+  void *arg;
+};
+
+typedef struct dl_tls_index
+{
+  unsigned long int ti_module;
+  unsigned long int ti_offset;
+} tls_index;
+
+/* Type used as the argument in a TLS descriptor for a symbol that
+   needs dynamic TLS offsets.  */
+struct tlsdesc_dynamic_arg
+{
+  tls_index tlsinfo;
+  size_t gen_count;
+};
+
+extern ptrdiff_t attribute_hidden
+_dl_tlsdesc_return (struct tlsdesc *);
+
+extern ptrdiff_t attribute_hidden
+_dl_tlsdesc_return_lazy (struct tlsdesc *);
+
+extern ptrdiff_t attribute_hidden
+_dl_tlsdesc_undefweak (struct tlsdesc *);
+
+extern ptrdiff_t attribute_hidden
+_dl_tlsdesc_resolve_rela (struct tlsdesc *);
+
+extern ptrdiff_t attribute_hidden
+_dl_tlsdesc_resolve_hold (struct tlsdesc *);
+
+# ifdef SHARED
+extern void *internal_function _dl_make_tlsdesc_dynamic (struct link_map *,
+							 size_t);
+
+extern ptrdiff_t attribute_hidden
+_dl_tlsdesc_dynamic (struct tlsdesc *);
+#endif
+
+#endif
diff --git a/sysdeps/aarch64/dl-trampoline.S b/sysdeps/aarch64/dl-trampoline.S
new file mode 100644
index 0000000000..f95b452467
--- /dev/null
+++ b/sysdeps/aarch64/dl-trampoline.S
@@ -0,0 +1,296 @@
+/* Copyright (C) 2005-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <libc-symbols.h>
+
+#include "dl-link.h"
+
+#define ip0 x16
+#define ip1 x17
+#define lr  x30
+
+	.text
+	.globl _dl_runtime_resolve
+	.type _dl_runtime_resolve, #function
+	cfi_startproc
+	.align 2
+_dl_runtime_resolve:
+	/* AArch64 we get called with:
+	   ip0		&PLTGOT[2]
+	   ip1		temp(dl resolver entry point)
+	   [sp, #8]	lr
+	   [sp, #0]	&PLTGOT[n]
+	 */
+
+	cfi_rel_offset (lr, 8)
+
+	/* Save arguments.  */
+	stp	x8, x9, [sp, #-(80+8*16)]!
+	cfi_adjust_cfa_offset (80+8*16)
+	cfi_rel_offset (x8, 0)
+	cfi_rel_offset (x9, 8)
+
+	stp	x6, x7, [sp,  #16]
+	cfi_rel_offset (x6, 16)
+	cfi_rel_offset (x7, 24)
+
+	stp	x4, x5, [sp,  #32]
+	cfi_rel_offset (x4, 32)
+	cfi_rel_offset (x5, 40)
+
+	stp	x2, x3, [sp,  #48]
+	cfi_rel_offset (x2, 48)
+	cfi_rel_offset (x3, 56)
+
+	stp	x0, x1, [sp,  #64]
+	cfi_rel_offset (x0, 64)
+	cfi_rel_offset (x1, 72)
+
+	stp	q0, q1, [sp, #(80+0*16)]
+	cfi_rel_offset (q0, 80+0*16)
+	cfi_rel_offset (q1, 80+1*16)
+
+	stp	q2, q3, [sp, #(80+2*16)]
+	cfi_rel_offset (q0, 80+2*16)
+	cfi_rel_offset (q1, 80+3*16)
+
+	stp	q4, q5, [sp, #(80+4*16)]
+	cfi_rel_offset (q0, 80+4*16)
+	cfi_rel_offset (q1, 80+5*16)
+
+	stp	q6, q7, [sp, #(80+6*16)]
+	cfi_rel_offset (q0, 80+6*16)
+	cfi_rel_offset (q1, 80+7*16)
+
+	/* Get pointer to linker struct.  */
+	ldr	x0, [ip0, #-8]
+
+	/* Prepare to call _dl_fixup().  */
+	ldr	x1, [sp, 80+8*16]	/* Recover &PLTGOT[n] */
+
+	sub     x1, x1, ip0
+	add     x1, x1, x1, lsl #1
+	lsl     x1, x1, #3
+	sub     x1, x1, #192
+	lsr     x1, x1, #3
+
+	/* Call fixup routine.  */
+	bl	_dl_fixup
+
+	/* Save the return.  */
+	mov	ip0, x0
+
+	/* Get arguments and return address back.  */
+	ldp	q0, q1, [sp, #(80+0*16)]
+	ldp	q2, q3, [sp, #(80+2*16)]
+	ldp	q4, q5, [sp, #(80+4*16)]
+	ldp	q6, q7, [sp, #(80+6*16)]
+	ldp	x0, x1, [sp, #64]
+	ldp	x2, x3, [sp, #48]
+	ldp	x4, x5, [sp, #32]
+	ldp	x6, x7, [sp, #16]
+	ldp	x8, x9, [sp], #(80+8*16)
+	cfi_adjust_cfa_offset (-(80+8*16))
+
+	ldp	ip1, lr, [sp], #16
+	cfi_adjust_cfa_offset (-16)
+
+	/* Jump to the newly found address.  */
+	br	ip0
+
+	cfi_endproc
+	.size _dl_runtime_resolve, .-_dl_runtime_resolve
+#ifndef PROF
+	.globl _dl_runtime_profile
+	.type _dl_runtime_profile, #function
+	cfi_startproc
+	.align 2
+_dl_runtime_profile:
+	/* AArch64 we get called with:
+	   ip0		&PLTGOT[2]
+	   ip1		temp(dl resolver entry point)
+	   [sp, #8]	lr
+	   [sp, #0]	&PLTGOT[n]
+
+	   Stack frame layout:
+	   [sp,   #...] lr
+	   [sp,   #...] &PLTGOT[n]
+	   [sp,    #96] La_aarch64_regs
+	   [sp,    #48] La_aarch64_retval
+	   [sp,    #40] frame size return from pltenter
+	   [sp,    #32] dl_profile_call saved x1
+	   [sp,    #24] dl_profile_call saved x0
+	   [sp,    #16] t1
+	   [sp,     #0] x29, lr   <- x29
+	 */
+
+# define OFFSET_T1		16
+# define OFFSET_SAVED_CALL_X0	OFFSET_T1 + 8
+# define OFFSET_FS		OFFSET_SAVED_CALL_X0 + 16
+# define OFFSET_RV		OFFSET_FS + 8
+# define OFFSET_RG		OFFSET_RV + DL_SIZEOF_RV
+
+# define SF_SIZE		OFFSET_RG + DL_SIZEOF_RG
+
+# define OFFSET_PLTGOTN		SF_SIZE
+# define OFFSET_LR		OFFSET_PLTGOTN + 8
+
+	/* Save arguments.  */
+	sub	sp, sp, #SF_SIZE
+	cfi_adjust_cfa_offset (SF_SIZE)
+	stp	x29, x30, [SP, #0]
+	mov	x29, sp
+	cfi_def_cfa_register (x29)
+	cfi_rel_offset (x29, 0)
+	cfi_rel_offset (lr, 8)
+
+	stp	x0, x1, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*0]
+	cfi_rel_offset (x0, OFFSET_RG + DL_OFFSET_RG_X0 + 16*0 + 0)
+	cfi_rel_offset (x1, OFFSET_RG + DL_OFFSET_RG_X0 + 16*0 + 8)
+	stp	x2, x3, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*1]
+	cfi_rel_offset (x2, OFFSET_RG + DL_OFFSET_RG_X0 + 16*1 + 0)
+	cfi_rel_offset (x3, OFFSET_RG + DL_OFFSET_RG_X0 + 16*1 + 8)
+	stp	x4, x5, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*2]
+	cfi_rel_offset (x4, OFFSET_RG + DL_OFFSET_RG_X0 + 16*2 + 0)
+	cfi_rel_offset (x5, OFFSET_RG + DL_OFFSET_RG_X0 + 16*2 + 8)
+	stp	x6, x7, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*3]
+	cfi_rel_offset (x6, OFFSET_RG + DL_OFFSET_RG_X0 + 16*3 + 0)
+	cfi_rel_offset (x7, OFFSET_RG + DL_OFFSET_RG_X0 + 16*3 + 8)
+
+	stp	d0, d1, [X29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*0]
+	cfi_rel_offset (d0, OFFSET_RG + DL_OFFSET_RG_D0 + 16*0)
+	cfi_rel_offset (d1, OFFSET_RG + DL_OFFSET_RG_D0 + 16*0 + 8)
+	stp	d2, d3, [X29, #OFFSET_RG+ DL_OFFSET_RG_D0 + 16*1]
+	cfi_rel_offset (d2, OFFSET_RG + DL_OFFSET_RG_D0 + 16*1 + 0)
+	cfi_rel_offset (d3, OFFSET_RG + DL_OFFSET_RG_D0 + 16*1 + 8)
+	stp	d4, d5, [X29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*2]
+	cfi_rel_offset (d4, OFFSET_RG + DL_OFFSET_RG_D0 + 16*2 + 0)
+	cfi_rel_offset (d5, OFFSET_RG + DL_OFFSET_RG_D0 + 16*2 + 8)
+	stp	d6, d7, [X29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*3]
+	cfi_rel_offset (d6, OFFSET_RG + DL_OFFSET_RG_D0 + 16*3 + 0)
+	cfi_rel_offset (d7, OFFSET_RG + DL_OFFSET_RG_D0 + 16*3 + 8)
+
+	add     x0, x29, #SF_SIZE + 16
+	ldr	x1, [x29, #OFFSET_LR]
+	stp	x0, x1, [x29, #OFFSET_RG + DL_OFFSET_RG_SP]
+
+	/* Get pointer to linker struct.  */
+	ldr	x0, [ip0, #-8]
+
+	/* Prepare to call _dl_profile_fixup().  */
+	ldr	x1, [x29, OFFSET_PLTGOTN]	/* Recover &PLTGOT[n] */
+
+	sub     x1, x1, ip0
+	add     x1, x1, x1, lsl #1
+	lsl     x1, x1, #3
+	sub     x1, x1, #192
+	lsr     x1, x1, #3
+
+	stp	x0, x1, [x29, #OFFSET_SAVED_CALL_X0]
+
+	/* Set up extra args for _dl_profile_fixup */
+	ldr	x2, [x29, #OFFSET_LR]		/* load saved LR */
+	add	x3, x29, #OFFSET_RG		/* address of La_aarch64_reg */
+	add	x4, x29, #OFFSET_FS		/* address of framesize */
+	bl	_dl_profile_fixup
+
+	ldr	ip0, [x29, #OFFSET_FS]		/* framesize == 0 */
+	cmp	ip0, #0
+	bge	1f
+	cfi_remember_state
+
+	/* Save the return.  */
+	mov	ip0, x0
+
+	/* Get arguments and return address back.  */
+	ldp	x0, x1, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*0]
+	ldp	x2, x3, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*1]
+	ldp	x4, x5, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*2]
+	ldp	x6, x7, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*3]
+	ldp	d0, d1, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*0]
+	ldp	d2, d3, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*1]
+	ldp	d4, d5, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*2]
+	ldp	d6, d7, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*3]
+
+	cfi_def_cfa_register (sp)
+	ldp	x29, x30, [x29, #0]
+	cfi_restore(x29)
+	cfi_restore(x30)
+
+	add	sp, sp, SF_SIZE + 16
+	cfi_adjust_cfa_offset (- SF_SIZE - 16)
+
+	/* Jump to the newly found address.  */
+	br	ip0
+
+	cfi_restore_state
+1:
+	/* The new frame size is in ip0.  */
+
+	sub	x1, x29, ip0
+	and	sp, x1, #0xfffffffffffffff0
+
+	str	x0, [x29, #OFFSET_T1]
+
+	mov	x0, sp
+	add	x1, x29, #SF_SIZE + 16
+	mov	x2, ip0
+	bl	memcpy
+
+	ldr	ip0, [x29, #OFFSET_T1]
+
+	/* Call the function.  */
+	ldp	x0, x1, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*0]
+	ldp	x2, x3, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*1]
+	ldp	x4, x5, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*2]
+	ldp	x6, x7, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*3]
+	ldp	d0, d1, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*0]
+	ldp	d2, d3, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*1]
+	ldp	d4, d5, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*2]
+	ldp	d6, d7, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*3]
+	blr	ip0
+	stp	x0, x1, [x29, #OFFSET_RV + DL_OFFSET_RV_X0]
+	stp	d0, d1, [x29, #OFFSET_RV + DL_OFFSET_RV_D0 + 16*0]
+	stp	d2, d3, [x29, #OFFSET_RV + DL_OFFSET_RV_D0 + 16*1]
+
+	/* Setup call to pltexit  */
+	ldp	x0, x1, [x29, #OFFSET_SAVED_CALL_X0]
+	add	x2, x29, #OFFSET_RG
+	add	x3, x29, #OFFSET_RV
+	bl	_dl_call_pltexit
+
+	ldp	x0, x1, [x29, #OFFSET_RV + DL_OFFSET_RV_X0]
+	ldp	d0, d1, [x29, #OFFSET_RV + DL_OFFSET_RV_D0 + 16*0]
+	ldp	d2, d3, [x29, #OFFSET_RV + DL_OFFSET_RV_D0 + 16*1]
+	/* LR from within La_aarch64_reg */
+	ldr	lr, [x29, #OFFSET_RG + DL_OFFSET_RG_LR]
+	cfi_restore(lr)
+	mov	sp, x29
+	cfi_def_cfa_register (sp)
+	ldr	x29, [x29, #0]
+	cfi_restore(x29)
+	add	sp, sp, SF_SIZE + 16
+	cfi_adjust_cfa_offset (- SF_SIZE - 16)
+
+	br	lr
+
+	cfi_endproc
+	.size _dl_runtime_profile, .-_dl_runtime_profile
+#endif
+	.previous
diff --git a/sysdeps/aarch64/fpu/e_sqrt.c b/sysdeps/aarch64/fpu/e_sqrt.c
new file mode 100644
index 0000000000..4f11ca2e48
--- /dev/null
+++ b/sysdeps/aarch64/fpu/e_sqrt.c
@@ -0,0 +1,28 @@
+/* Square root of floating point number.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <math_private.h>
+
+double
+__ieee754_sqrt (double d)
+{
+  double res;
+  asm ("fsqrt   %d0, %d1" : "=w" (res) : "w" (d));
+  return res;
+}
+strong_alias (__ieee754_sqrt, __sqrt_finite)
diff --git a/sysdeps/aarch64/fpu/e_sqrtf.c b/sysdeps/aarch64/fpu/e_sqrtf.c
new file mode 100644
index 0000000000..a2e99e1f75
--- /dev/null
+++ b/sysdeps/aarch64/fpu/e_sqrtf.c
@@ -0,0 +1,28 @@
+/* Single-precision floating point square root.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <math_private.h>
+
+float
+__ieee754_sqrtf (float s)
+{
+  float res;
+  asm ("fsqrt   %s0, %s1" : "=w" (res) : "w" (s));
+  return res;
+}
+strong_alias (__ieee754_sqrtf, __sqrtf_finite)
diff --git a/sysdeps/aarch64/fpu/fclrexcpt.c b/sysdeps/aarch64/fpu/fclrexcpt.c
new file mode 100644
index 0000000000..f6f758f16f
--- /dev/null
+++ b/sysdeps/aarch64/fpu/fclrexcpt.c
@@ -0,0 +1,38 @@
+/* Copyright (C) 1997-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv.h>
+#include <fpu_control.h>
+
+int
+feclearexcept (int excepts)
+{
+  fpu_fpsr_t fpsr;
+  fpu_fpsr_t fpsr_new;
+
+  excepts &= FE_ALL_EXCEPT;
+
+  _FPU_GETFPSR (fpsr);
+  fpsr_new = fpsr & ~excepts;
+
+  if (fpsr != fpsr_new)
+    _FPU_SETFPSR (fpsr_new);
+
+  return 0;
+}
+libm_hidden_def (feclearexcept)
diff --git a/sysdeps/aarch64/fpu/fedisblxcpt.c b/sysdeps/aarch64/fpu/fedisblxcpt.c
new file mode 100644
index 0000000000..cdf4bd176a
--- /dev/null
+++ b/sysdeps/aarch64/fpu/fedisblxcpt.c
@@ -0,0 +1,36 @@
+/* Copyright (C) 2001-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv.h>
+#include <fpu_control.h>
+
+int
+fedisableexcept (int excepts)
+{
+  fpu_control_t fpcr;
+  fpu_control_t fpcr_new;
+
+  _FPU_GETCW (fpcr);
+  excepts &= FE_ALL_EXCEPT;
+  fpcr_new = fpcr & ~(excepts << FE_EXCEPT_SHIFT);
+
+  if (fpcr != fpcr_new)
+    _FPU_SETCW (fpcr_new);
+
+  return (fpcr >> FE_EXCEPT_SHIFT) & FE_ALL_EXCEPT;
+}
diff --git a/sysdeps/aarch64/fpu/feenablxcpt.c b/sysdeps/aarch64/fpu/feenablxcpt.c
new file mode 100644
index 0000000000..82ed0b623c
--- /dev/null
+++ b/sysdeps/aarch64/fpu/feenablxcpt.c
@@ -0,0 +1,49 @@
+/* Copyright (C) 2001-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv.h>
+#include <fpu_control.h>
+
+int
+feenableexcept (int excepts)
+{
+  fpu_control_t fpcr;
+  fpu_control_t fpcr_new;
+
+  _FPU_GETCW (fpcr);
+  excepts &= FE_ALL_EXCEPT;
+  fpcr_new = fpcr | (excepts << FE_EXCEPT_SHIFT);
+
+  if (fpcr != fpcr_new)
+    _FPU_SETCW (fpcr_new);
+
+  /* Trapping exceptions are optional in AArch64 the relevant enable
+     bits in FPCR are RES0 hence the absence of support can be
+     detected by reading back the FPCR and comparing with the required
+     value.  */
+  if (excepts)
+    {
+      fpu_control_t updated_fpcr;
+
+      _FPU_GETCW (updated_fpcr);
+      if (((updated_fpcr >> FE_EXCEPT_SHIFT) & excepts) != excepts)
+	return -1;
+    }
+
+  return (fpcr >> FE_EXCEPT_SHIFT) & FE_ALL_EXCEPT;
+}
diff --git a/sysdeps/aarch64/fpu/fegetenv.c b/sysdeps/aarch64/fpu/fegetenv.c
new file mode 100644
index 0000000000..cf32ea0079
--- /dev/null
+++ b/sysdeps/aarch64/fpu/fegetenv.c
@@ -0,0 +1,35 @@
+/* Copyright (C) 1997-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv.h>
+#include <fpu_control.h>
+
+int
+__fegetenv (fenv_t *envp)
+{
+  fpu_control_t fpcr;
+  fpu_fpsr_t fpsr;
+  _FPU_GETCW (fpcr);
+  _FPU_GETFPSR (fpsr);
+  envp->__fpcr = fpcr;
+  envp->__fpsr = fpsr;
+  return 0;
+}
+libm_hidden_def (__fegetenv)
+weak_alias (__fegetenv, fegetenv)
+libm_hidden_weak (fegetenv)
diff --git a/sysdeps/aarch64/fpu/fegetexcept.c b/sysdeps/aarch64/fpu/fegetexcept.c
new file mode 100644
index 0000000000..4a35beab31
--- /dev/null
+++ b/sysdeps/aarch64/fpu/fegetexcept.c
@@ -0,0 +1,28 @@
+/* Copyright (C) 2001-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv.h>
+#include <fpu_control.h>
+
+int
+fegetexcept (void)
+{
+  fpu_control_t fpcr;
+  _FPU_GETCW (fpcr);
+  return (fpcr >> FE_EXCEPT_SHIFT) & FE_ALL_EXCEPT;
+}
diff --git a/sysdeps/aarch64/fpu/fegetround.c b/sysdeps/aarch64/fpu/fegetround.c
new file mode 100644
index 0000000000..5af36f7715
--- /dev/null
+++ b/sysdeps/aarch64/fpu/fegetround.c
@@ -0,0 +1,29 @@
+/* Copyright (C) 1997-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv.h>
+#include <get-rounding-mode.h>
+
+int
+__fegetround (void)
+{
+  return get_rounding_mode ();
+}
+libm_hidden_def (__fegetround)
+weak_alias (__fegetround, fegetround)
+libm_hidden_weak (fegetround)
diff --git a/sysdeps/aarch64/fpu/feholdexcpt.c b/sysdeps/aarch64/fpu/feholdexcpt.c
new file mode 100644
index 0000000000..6c2e3f90d1
--- /dev/null
+++ b/sysdeps/aarch64/fpu/feholdexcpt.c
@@ -0,0 +1,30 @@
+/* Copyright (C) 1997-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv.h>
+#include <math_private.h>
+
+int
+__feholdexcept (fenv_t *envp)
+{
+  libc_feholdexcept_aarch64 (envp);
+  return 0;
+}
+libm_hidden_def (__feholdexcept)
+weak_alias (__feholdexcept, feholdexcept)
+libm_hidden_weak (feholdexcept)
diff --git a/sysdeps/aarch64/fpu/fesetenv.c b/sysdeps/aarch64/fpu/fesetenv.c
new file mode 100644
index 0000000000..f47115fd91
--- /dev/null
+++ b/sysdeps/aarch64/fpu/fesetenv.c
@@ -0,0 +1,72 @@
+/* Copyright (C) 1997-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv.h>
+#include <fpu_control.h>
+
+int
+__fesetenv (const fenv_t *envp)
+{
+  fpu_control_t fpcr;
+  fpu_control_t fpcr_new;
+  fpu_control_t updated_fpcr;
+  fpu_fpsr_t fpsr;
+  fpu_fpsr_t fpsr_new;
+
+  _FPU_GETCW (fpcr);
+  _FPU_GETFPSR (fpsr);
+
+  fpcr_new = fpcr & _FPU_RESERVED;
+  fpsr_new = fpsr & _FPU_FPSR_RESERVED;
+
+  if (envp == FE_DFL_ENV)
+    {
+      fpcr_new |= _FPU_DEFAULT;
+      fpsr_new |= _FPU_FPSR_DEFAULT;
+    }
+  else if (envp == FE_NOMASK_ENV)
+    {
+      fpcr_new |= _FPU_FPCR_IEEE;
+      fpsr_new |= _FPU_FPSR_IEEE;
+    }
+  else
+    {
+      fpcr_new |= envp->__fpcr & ~_FPU_RESERVED;
+      fpsr_new |= envp->__fpsr & ~_FPU_FPSR_RESERVED;
+    }
+
+  if (fpsr != fpsr_new)
+    _FPU_SETFPSR (fpsr_new);
+
+  if (fpcr != fpcr_new)
+    _FPU_SETCW (fpcr_new);
+
+  /* Trapping exceptions are optional in AArch64 the relevant enable
+     bits in FPCR are RES0 hence the absence of support can be
+     detected by reading back the FPCR and comparing with the required
+     value.  */
+
+  _FPU_GETCW (updated_fpcr);
+  if ((updated_fpcr & fpcr_new) != fpcr_new)
+    return 1;
+
+  return 0;
+}
+libm_hidden_def (__fesetenv)
+weak_alias (__fesetenv, fesetenv)
+libm_hidden_weak (fesetenv)
diff --git a/sysdeps/aarch64/fpu/fesetround.c b/sysdeps/aarch64/fpu/fesetround.c
new file mode 100644
index 0000000000..257dba13db
--- /dev/null
+++ b/sysdeps/aarch64/fpu/fesetround.c
@@ -0,0 +1,34 @@
+/* Copyright (C) 1997-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv.h>
+#include <math_private.h>
+#include <fpu_control.h>
+
+int
+__fesetround (int round)
+{
+  if (round & ~_FPU_FPCR_RM_MASK)
+    return 1;
+
+  libc_fesetround_aarch64 (round);
+  return 0;
+}
+libm_hidden_def (__fesetround)
+weak_alias (__fesetround, fesetround)
+libm_hidden_weak (fesetround)
diff --git a/sysdeps/aarch64/fpu/feupdateenv.c b/sysdeps/aarch64/fpu/feupdateenv.c
new file mode 100644
index 0000000000..12e799ec77
--- /dev/null
+++ b/sysdeps/aarch64/fpu/feupdateenv.c
@@ -0,0 +1,89 @@
+/* Copyright (C) 2009-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv.h>
+#include <fpu_control.h>
+
+int
+__feupdateenv (const fenv_t *envp)
+{
+  fpu_control_t fpcr;
+  fpu_control_t fpcr_new;
+  fpu_control_t updated_fpcr;
+  fpu_fpsr_t fpsr;
+  fpu_fpsr_t fpsr_new;
+  int excepts;
+
+  _FPU_GETCW (fpcr);
+  _FPU_GETFPSR (fpsr);
+  excepts = fpsr & FE_ALL_EXCEPT;
+
+  if ((envp != FE_DFL_ENV) && (envp != FE_NOMASK_ENV))
+    {
+      fpcr_new = envp->__fpcr;
+      fpsr_new = envp->__fpsr | excepts;
+
+      if (fpcr != fpcr_new)
+        _FPU_SETCW (fpcr_new);
+
+      if (fpsr != fpsr_new)
+        _FPU_SETFPSR (fpsr_new);
+
+      if (excepts & (fpcr_new >> FE_EXCEPT_SHIFT))
+        return __feraiseexcept (excepts);
+
+      return 0;
+    }
+
+  fpcr_new = fpcr & _FPU_RESERVED;
+  fpsr_new = fpsr & (_FPU_FPSR_RESERVED | FE_ALL_EXCEPT);
+
+  if (envp == FE_DFL_ENV)
+    {
+      fpcr_new |= _FPU_DEFAULT;
+      fpsr_new |= _FPU_FPSR_DEFAULT;
+    }
+  else
+    {
+      fpcr_new |= _FPU_FPCR_IEEE;
+      fpsr_new |= _FPU_FPSR_IEEE;
+    }
+
+  _FPU_SETFPSR (fpsr_new);
+
+  if (fpcr != fpcr_new)
+    {
+      _FPU_SETCW (fpcr_new);
+
+      /* Trapping exceptions are optional in AArch64; the relevant enable
+	 bits in FPCR are RES0 hence the absence of support can be detected
+	 by reading back the FPCR and comparing with the required value.  */
+      _FPU_GETCW (updated_fpcr);
+
+      if (fpcr_new & ~updated_fpcr)
+        return 1;
+    }
+
+  if (excepts & (fpcr_new >> FE_EXCEPT_SHIFT))
+    return __feraiseexcept (excepts);
+
+  return 0;
+}
+libm_hidden_def (__feupdateenv)
+weak_alias (__feupdateenv, feupdateenv)
+libm_hidden_weak (feupdateenv)
diff --git a/sysdeps/aarch64/fpu/fgetexcptflg.c b/sysdeps/aarch64/fpu/fgetexcptflg.c
new file mode 100644
index 0000000000..f7991f2889
--- /dev/null
+++ b/sysdeps/aarch64/fpu/fgetexcptflg.c
@@ -0,0 +1,27 @@
+/* Copyright (C) 2001-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv.h>
+#include <math_private.h>
+
+int
+fegetexceptflag (fexcept_t *flagp, int excepts)
+{
+  *flagp = libc_fetestexcept_aarch64 (excepts);
+  return 0;
+}
diff --git a/sysdeps/aarch64/fpu/fpu_control.h b/sysdeps/aarch64/fpu/fpu_control.h
new file mode 100644
index 0000000000..1e9d28bb26
--- /dev/null
+++ b/sysdeps/aarch64/fpu/fpu_control.h
@@ -0,0 +1,81 @@
+/* Copyright (C) 1996-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _AARCH64_FPU_CONTROL_H
+#define _AARCH64_FPU_CONTROL_H
+
+/* Macros for accessing the FPCR and FPSR.  */
+
+#define _FPU_GETCW(fpcr) \
+  __asm__ __volatile__ ("mrs	%0, fpcr" : "=r" (fpcr))
+
+#define _FPU_SETCW(fpcr) \
+  __asm__ __volatile__ ("msr	fpcr, %0" : : "r" (fpcr))
+
+#define _FPU_GETFPSR(fpsr) \
+  __asm__ __volatile__ ("mrs	%0, fpsr" : "=r" (fpsr))
+
+#define _FPU_SETFPSR(fpsr) \
+  __asm__ __volatile__ ("msr	fpsr, %0" : : "r" (fpsr))
+
+/* Reserved bits should be preserved when modifying register
+   contents. These two masks indicate which bits in each of FPCR and
+   FPSR should not be changed.  */
+
+#define _FPU_RESERVED		0xfe0fe0ff
+#define _FPU_FPSR_RESERVED	0x0fffffe0
+
+#define _FPU_DEFAULT		0x00000000
+#define _FPU_FPSR_DEFAULT	0x00000000
+
+/* Layout of FPCR and FPSR:
+
+   |       |       |       |       |       |       |       |
+   0 0 0 0 1 1 1 0 0 0 0 0 1 0 0 0 1 1 1 0 0 0 0 0 1 1 1 0 0 0 0 0
+   s s s s s                                       s     s s s s s
+             c c c c c c c               c c c c c
+   N Z C V Q A D F R R S S S L L L I U U I U O D I I U U I U O D I
+           C H N Z M M T T B E E E D N N X F F Z O D N N X F F Z O
+             P     O O R R Z N N N E K K E E E E E C K K C C C C C
+                   D D I I P
+                   E E D D
+                       E E
+ */
+
+#define _FPU_FPCR_RM_MASK  0xc00000
+
+#define _FPU_FPCR_MASK_IXE 0x1000
+#define _FPU_FPCR_MASK_UFE 0x0800
+#define _FPU_FPCR_MASK_OFE 0x0400
+#define _FPU_FPCR_MASK_DZE 0x0200
+#define _FPU_FPCR_MASK_IOE 0x0100
+
+#define _FPU_FPCR_IEEE                       \
+  (_FPU_DEFAULT  | _FPU_FPCR_MASK_IXE |	     \
+   _FPU_FPCR_MASK_UFE | _FPU_FPCR_MASK_OFE | \
+   _FPU_FPCR_MASK_DZE | _FPU_FPCR_MASK_IOE)
+
+#define _FPU_FPSR_IEEE 0
+
+typedef unsigned int fpu_control_t;
+typedef unsigned int fpu_fpsr_t;
+
+/* Default control word set at startup.  */
+extern fpu_control_t __fpu_control;
+
+#endif
diff --git a/sysdeps/aarch64/fpu/fraiseexcpt.c b/sysdeps/aarch64/fpu/fraiseexcpt.c
new file mode 100644
index 0000000000..b9dd1aec3f
--- /dev/null
+++ b/sysdeps/aarch64/fpu/fraiseexcpt.c
@@ -0,0 +1,93 @@
+/* Copyright (C) 1997-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv.h>
+#include <fpu_control.h>
+#include <float.h>
+
+int
+__feraiseexcept (int excepts)
+{
+  int fpsr;
+  const float fp_zero = 0.0;
+  const float fp_one = 1.0;
+  const float fp_max = FLT_MAX;
+  const float fp_min = FLT_MIN;
+  const float fp_1e32 = 1.0e32f;
+  const float fp_two = 2.0;
+  const float fp_three = 3.0;
+
+  /* Raise exceptions represented by EXCEPTS.  But we must raise only
+     one signal at a time.  It is important that if the OVERFLOW or
+     UNDERFLOW exception and the inexact exception are given at the
+     same time, the OVERFLOW or UNDERFLOW exception precedes the
+     INEXACT exception.
+
+     After each exception we read from the FPSR, to force the
+     exception to be raised immediately.  */
+
+  if (FE_INVALID & excepts)
+    __asm__ __volatile__ (
+			  "ldr	s0, %1\n\t"
+			  "fdiv	s0, s0, s0\n\t"
+			  "mrs	%0, fpsr" : "=r" (fpsr)
+			  : "m" (fp_zero)
+			  : "d0");
+
+  if (FE_DIVBYZERO & excepts)
+    __asm__ __volatile__ (
+			  "ldr	s0, %1\n\t"
+			  "ldr	s1, %2\n\t"
+			  "fdiv	s0, s0, s1\n\t"
+			  "mrs	%0, fpsr" : "=r" (fpsr)
+			  : "m" (fp_one), "m" (fp_zero)
+			  : "d0", "d1");
+
+  if (FE_OVERFLOW & excepts)
+    /* There's no way to raise overflow without also raising inexact.  */
+    __asm__ __volatile__ (
+			  "ldr	s0, %1\n\t"
+			  "ldr	s1, %2\n\t"
+			  "fadd s0, s0, s1\n\t"
+			  "mrs	%0, fpsr" : "=r" (fpsr)
+			  : "m" (fp_max), "m" (fp_1e32)
+			  : "d0", "d1");
+
+  if (FE_UNDERFLOW & excepts)
+    __asm__ __volatile__ (
+			  "ldr	s0, %1\n\t"
+			  "ldr	s1, %2\n\t"
+			  "fdiv s0, s0, s1\n\t"
+			  "mrs	%0, fpsr" : "=r" (fpsr)
+			  : "m" (fp_min), "m" (fp_three)
+			  : "d0", "d1");
+
+  if (FE_INEXACT & excepts)
+    __asm__ __volatile__ (
+			  "ldr	s0, %1\n\t"
+			  "ldr	s1, %2\n\t"
+			  "fdiv s0, s0, s1\n\t"
+			  "mrs	%0, fpsr" : "=r" (fpsr)
+			  : "m" (fp_two), "m" (fp_three)
+			  : "d0", "d1");
+
+  return 0;
+}
+libm_hidden_def (__feraiseexcept)
+weak_alias (__feraiseexcept, feraiseexcept)
+libm_hidden_weak (feraiseexcept)
diff --git a/sysdeps/aarch64/fpu/fsetexcptflg.c b/sysdeps/aarch64/fpu/fsetexcptflg.c
new file mode 100644
index 0000000000..299d3de6b6
--- /dev/null
+++ b/sysdeps/aarch64/fpu/fsetexcptflg.c
@@ -0,0 +1,41 @@
+/* Copyright (C) 1997-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv.h>
+#include <fpu_control.h>
+
+int
+fesetexceptflag (const fexcept_t *flagp, int excepts)
+{
+  fpu_fpsr_t fpsr;
+  fpu_fpsr_t fpsr_new;
+
+  /* Get the current environment.  */
+  _FPU_GETFPSR (fpsr);
+  excepts &= FE_ALL_EXCEPT;
+
+  /* Set the desired exception mask.  */
+  fpsr_new = fpsr & ~excepts;
+  fpsr_new |= *flagp & excepts;
+
+  /* Save state back to the FPU.  */
+  if (fpsr != fpsr_new)
+    _FPU_SETFPSR (fpsr_new);
+
+  return 0;
+}
diff --git a/sysdeps/aarch64/fpu/ftestexcept.c b/sysdeps/aarch64/fpu/ftestexcept.c
new file mode 100644
index 0000000000..83d59bf661
--- /dev/null
+++ b/sysdeps/aarch64/fpu/ftestexcept.c
@@ -0,0 +1,27 @@
+/* Copyright (C) 1997-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv.h>
+#include <math_private.h>
+
+int
+fetestexcept (int excepts)
+{
+  return libc_fetestexcept_aarch64 (excepts);
+}
+libm_hidden_def (fetestexcept)
diff --git a/sysdeps/aarch64/fpu/get-rounding-mode.h b/sysdeps/aarch64/fpu/get-rounding-mode.h
new file mode 100644
index 0000000000..18979bc372
--- /dev/null
+++ b/sysdeps/aarch64/fpu/get-rounding-mode.h
@@ -0,0 +1,38 @@
+/* Determine floating-point rounding mode within libc.  AArch64 version.
+
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _AARCH64_GET_ROUNDING_MODE_H
+#define _AARCH64_GET_ROUNDING_MODE_H	1
+
+#include <fenv.h>
+#include <fpu_control.h>
+
+/* Return the floating-point rounding mode.  */
+
+static inline int
+get_rounding_mode (void)
+{
+  fpu_control_t fpcr;
+
+  _FPU_GETCW (fpcr);
+  return fpcr & _FPU_FPCR_RM_MASK;
+}
+
+#endif /* get-rounding-mode.h */
diff --git a/sysdeps/aarch64/fpu/math_private.h b/sysdeps/aarch64/fpu/math_private.h
new file mode 100644
index 0000000000..1f02ddb05a
--- /dev/null
+++ b/sysdeps/aarch64/fpu/math_private.h
@@ -0,0 +1,328 @@
+/* Private floating point rounding and exceptions handling.  AArch64 version.
+   Copyright (C) 2014-2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef AARCH64_MATH_PRIVATE_H
+#define AARCH64_MATH_PRIVATE_H 1
+
+#include <fenv.h>
+#include <fpu_control.h>
+
+#define math_opt_barrier(x) \
+({ __typeof (x) __x = (x); __asm ("" : "+w" (__x)); __x; })
+#define math_force_eval(x) \
+({ __typeof (x) __x = (x); __asm __volatile__ ("" : : "w" (__x)); })
+
+extern __always_inline double
+__ieee754_sqrt (double d)
+{
+  double res;
+  asm __volatile__ ("fsqrt   %d0, %d1" : "=w" (res) : "w" (d));
+  return res;
+}
+
+extern __always_inline float
+__ieee754_sqrtf (float s)
+{
+  float res;
+  asm __volatile__ ("fsqrt   %s0, %s1" : "=w" (res) : "w" (s));
+  return res;
+}
+
+static __always_inline void
+libc_feholdexcept_aarch64 (fenv_t *envp)
+{
+  fpu_control_t fpcr;
+  fpu_control_t new_fpcr;
+  fpu_fpsr_t fpsr;
+  fpu_fpsr_t new_fpsr;
+
+  _FPU_GETCW (fpcr);
+  _FPU_GETFPSR (fpsr);
+  envp->__fpcr = fpcr;
+  envp->__fpsr = fpsr;
+
+  /* Clear exception flags and set all exceptions to non-stop.  */
+  new_fpcr = fpcr & ~(FE_ALL_EXCEPT << FE_EXCEPT_SHIFT);
+  new_fpsr = fpsr & ~FE_ALL_EXCEPT;
+
+  if (__glibc_unlikely (new_fpcr != fpcr))
+    _FPU_SETCW (new_fpcr);
+
+  if (new_fpsr != fpsr)
+    _FPU_SETFPSR (new_fpsr);
+}
+
+#define libc_feholdexcept  libc_feholdexcept_aarch64
+#define libc_feholdexceptf libc_feholdexcept_aarch64
+#define libc_feholdexceptl libc_feholdexcept_aarch64
+
+static __always_inline void
+libc_fesetround_aarch64 (int round)
+{
+  fpu_control_t fpcr;
+
+  _FPU_GETCW (fpcr);
+
+  /* Check whether rounding modes are different.  */
+  round = (fpcr ^ round) & _FPU_FPCR_RM_MASK;
+
+  /* Set new rounding mode if different.  */
+  if (__glibc_unlikely (round != 0))
+    _FPU_SETCW (fpcr ^ round);
+}
+
+#define libc_fesetround  libc_fesetround_aarch64
+#define libc_fesetroundf libc_fesetround_aarch64
+#define libc_fesetroundl libc_fesetround_aarch64
+
+static __always_inline void
+libc_feholdexcept_setround_aarch64 (fenv_t *envp, int round)
+{
+  fpu_control_t fpcr;
+  fpu_control_t new_fpcr;
+  fpu_fpsr_t fpsr;
+  fpu_fpsr_t new_fpsr;
+
+  _FPU_GETCW (fpcr);
+  _FPU_GETFPSR (fpsr);
+  envp->__fpcr = fpcr;
+  envp->__fpsr = fpsr;
+
+  /* Clear exception flags, set all exceptions to non-stop,
+     and set new rounding mode.  */
+  new_fpcr = fpcr & ~((FE_ALL_EXCEPT << FE_EXCEPT_SHIFT) | _FPU_FPCR_RM_MASK);
+  new_fpcr |= round;
+  new_fpsr = fpsr & ~FE_ALL_EXCEPT;
+
+  if (__glibc_unlikely (new_fpcr != fpcr))
+    _FPU_SETCW (new_fpcr);
+
+  if (new_fpsr != fpsr)
+    _FPU_SETFPSR (new_fpsr);
+}
+
+#define libc_feholdexcept_setround  libc_feholdexcept_setround_aarch64
+#define libc_feholdexcept_setroundf libc_feholdexcept_setround_aarch64
+#define libc_feholdexcept_setroundl libc_feholdexcept_setround_aarch64
+
+static __always_inline int
+libc_fetestexcept_aarch64 (int ex)
+{
+  fpu_fpsr_t fpsr;
+
+  _FPU_GETFPSR (fpsr);
+  return fpsr & ex & FE_ALL_EXCEPT;
+}
+
+#define libc_fetestexcept  libc_fetestexcept_aarch64
+#define libc_fetestexceptf libc_fetestexcept_aarch64
+#define libc_fetestexceptl libc_fetestexcept_aarch64
+
+static __always_inline void
+libc_fesetenv_aarch64 (const fenv_t *envp)
+{
+  fpu_control_t fpcr;
+  fpu_control_t new_fpcr;
+
+  _FPU_GETCW (fpcr);
+  new_fpcr = envp->__fpcr;
+
+  if (__glibc_unlikely (fpcr != new_fpcr))
+    _FPU_SETCW (new_fpcr);
+
+  _FPU_SETFPSR (envp->__fpsr);
+}
+
+#define libc_fesetenv  libc_fesetenv_aarch64
+#define libc_fesetenvf libc_fesetenv_aarch64
+#define libc_fesetenvl libc_fesetenv_aarch64
+#define libc_feresetround_noex  libc_fesetenv_aarch64
+#define libc_feresetround_noexf libc_fesetenv_aarch64
+#define libc_feresetround_noexl libc_fesetenv_aarch64
+
+static __always_inline int
+libc_feupdateenv_test_aarch64 (const fenv_t *envp, int ex)
+{
+  fpu_control_t fpcr;
+  fpu_control_t new_fpcr;
+  fpu_fpsr_t fpsr;
+  fpu_fpsr_t new_fpsr;
+  int excepts;
+
+  _FPU_GETCW (fpcr);
+  _FPU_GETFPSR (fpsr);
+
+  /* Merge current exception flags with the saved fenv.  */
+  excepts = fpsr & FE_ALL_EXCEPT;
+  new_fpcr = envp->__fpcr;
+  new_fpsr = envp->__fpsr | excepts;
+
+  if (__glibc_unlikely (fpcr != new_fpcr))
+    _FPU_SETCW (new_fpcr);
+
+  if (fpsr != new_fpsr)
+    _FPU_SETFPSR (new_fpsr);
+
+  /* Raise the exceptions if enabled in the new FP state.  */
+  if (__glibc_unlikely (excepts & (new_fpcr >> FE_EXCEPT_SHIFT)))
+    __feraiseexcept (excepts);
+
+  return excepts & ex;
+}
+
+#define libc_feupdateenv_test  libc_feupdateenv_test_aarch64
+#define libc_feupdateenv_testf libc_feupdateenv_test_aarch64
+#define libc_feupdateenv_testl libc_feupdateenv_test_aarch64
+
+static __always_inline void
+libc_feupdateenv_aarch64 (const fenv_t *envp)
+{
+  libc_feupdateenv_test_aarch64 (envp, 0);
+}
+
+#define libc_feupdateenv  libc_feupdateenv_aarch64
+#define libc_feupdateenvf libc_feupdateenv_aarch64
+#define libc_feupdateenvl libc_feupdateenv_aarch64
+
+static __always_inline void
+libc_feholdsetround_aarch64 (fenv_t *envp, int round)
+{
+  fpu_control_t fpcr;
+  fpu_fpsr_t fpsr;
+
+  _FPU_GETCW (fpcr);
+  _FPU_GETFPSR (fpsr);
+  envp->__fpcr = fpcr;
+  envp->__fpsr = fpsr;
+
+  /* Check whether rounding modes are different.  */
+  round = (fpcr ^ round) & _FPU_FPCR_RM_MASK;
+
+  /* Set new rounding mode if different.  */
+  if (__glibc_unlikely (round != 0))
+    _FPU_SETCW (fpcr ^ round);
+}
+
+#define libc_feholdsetround  libc_feholdsetround_aarch64
+#define libc_feholdsetroundf libc_feholdsetround_aarch64
+#define libc_feholdsetroundl libc_feholdsetround_aarch64
+
+static __always_inline void
+libc_feresetround_aarch64 (fenv_t *envp)
+{
+  fpu_control_t fpcr;
+  int round;
+
+  _FPU_GETCW (fpcr);
+
+  /* Check whether rounding modes are different.  */
+  round = (envp->__fpcr ^ fpcr) & _FPU_FPCR_RM_MASK;
+
+  /* Restore the rounding mode if it was changed.  */
+  if (__glibc_unlikely (round != 0))
+    _FPU_SETCW (fpcr ^ round);
+}
+
+#define libc_feresetround  libc_feresetround_aarch64
+#define libc_feresetroundf libc_feresetround_aarch64
+#define libc_feresetroundl libc_feresetround_aarch64
+
+/* We have support for rounding mode context.  */
+#define HAVE_RM_CTX 1
+
+static __always_inline void
+libc_feholdsetround_aarch64_ctx (struct rm_ctx *ctx, int r)
+{
+  fpu_control_t fpcr;
+  int round;
+
+  _FPU_GETCW (fpcr);
+
+  /* Check whether rounding modes are different.  */
+  round = (fpcr ^ r) & _FPU_FPCR_RM_MASK;
+  ctx->updated_status = round != 0;
+
+  /* Set the rounding mode if changed.  */
+  if (__glibc_unlikely (round != 0))
+    {
+      ctx->env.__fpcr = fpcr;
+      _FPU_SETCW (fpcr ^ round);
+    }
+}
+
+#define libc_feholdsetround_ctx		libc_feholdsetround_aarch64_ctx
+#define libc_feholdsetroundf_ctx	libc_feholdsetround_aarch64_ctx
+#define libc_feholdsetroundl_ctx	libc_feholdsetround_aarch64_ctx
+
+static __always_inline void
+libc_feresetround_aarch64_ctx (struct rm_ctx *ctx)
+{
+  /* Restore the rounding mode if updated.  */
+  if (__glibc_unlikely (ctx->updated_status))
+    _FPU_SETCW (ctx->env.__fpcr);
+}
+
+#define libc_feresetround_ctx		libc_feresetround_aarch64_ctx
+#define libc_feresetroundf_ctx		libc_feresetround_aarch64_ctx
+#define libc_feresetroundl_ctx		libc_feresetround_aarch64_ctx
+
+static __always_inline void
+libc_feholdsetround_noex_aarch64_ctx (struct rm_ctx *ctx, int r)
+{
+  fpu_control_t fpcr;
+  fpu_fpsr_t fpsr;
+  int round;
+
+  _FPU_GETCW (fpcr);
+  _FPU_GETFPSR (fpsr);
+  ctx->env.__fpsr = fpsr;
+
+  /* Check whether rounding modes are different.  */
+  round = (fpcr ^ r) & _FPU_FPCR_RM_MASK;
+  ctx->updated_status = round != 0;
+
+  /* Set the rounding mode if changed.  */
+  if (__glibc_unlikely (round != 0))
+    {
+      ctx->env.__fpcr = fpcr;
+      _FPU_SETCW (fpcr ^ round);
+    }
+}
+
+#define libc_feholdsetround_noex_ctx	libc_feholdsetround_noex_aarch64_ctx
+#define libc_feholdsetround_noexf_ctx	libc_feholdsetround_noex_aarch64_ctx
+#define libc_feholdsetround_noexl_ctx	libc_feholdsetround_noex_aarch64_ctx
+
+static __always_inline void
+libc_feresetround_noex_aarch64_ctx (struct rm_ctx *ctx)
+{
+  /* Restore the rounding mode if updated.  */
+  if (__glibc_unlikely (ctx->updated_status))
+    _FPU_SETCW (ctx->env.__fpcr);
+
+  /* Write new FPSR to restore exception flags.  */
+  _FPU_SETFPSR (ctx->env.__fpsr);
+}
+
+#define libc_feresetround_noex_ctx	libc_feresetround_noex_aarch64_ctx
+#define libc_feresetround_noexf_ctx	libc_feresetround_noex_aarch64_ctx
+#define libc_feresetround_noexl_ctx	libc_feresetround_noex_aarch64_ctx
+
+#include_next <math_private.h>
+
+#endif
diff --git a/sysdeps/aarch64/fpu/s_ceil.c b/sysdeps/aarch64/fpu/s_ceil.c
new file mode 100644
index 0000000000..b5ea4ab943
--- /dev/null
+++ b/sysdeps/aarch64/fpu/s_ceil.c
@@ -0,0 +1,21 @@
+/* Copyright (C) 2011-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define	FUNC ceil
+#define INSN "frintp"
+#include <s_frint.c>
diff --git a/sysdeps/aarch64/fpu/s_ceilf.c b/sysdeps/aarch64/fpu/s_ceilf.c
new file mode 100644
index 0000000000..ea4883ad37
--- /dev/null
+++ b/sysdeps/aarch64/fpu/s_ceilf.c
@@ -0,0 +1,21 @@
+/* Copyright (C) 2011-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define	FUNC ceilf
+#define INSN "frintp"
+#include <s_frintf.c>
diff --git a/sysdeps/aarch64/fpu/s_floor.c b/sysdeps/aarch64/fpu/s_floor.c
new file mode 100644
index 0000000000..5217b80026
--- /dev/null
+++ b/sysdeps/aarch64/fpu/s_floor.c
@@ -0,0 +1,21 @@
+/* Copyright (C) 2011-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define FUNC floor
+#define INSN "frintm"
+#include <s_frint.c>
diff --git a/sysdeps/aarch64/fpu/s_floorf.c b/sysdeps/aarch64/fpu/s_floorf.c
new file mode 100644
index 0000000000..0ca0112458
--- /dev/null
+++ b/sysdeps/aarch64/fpu/s_floorf.c
@@ -0,0 +1,21 @@
+/* Copyright (C) 2011-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define FUNC floorf
+#define INSN "frintm"
+#include <s_frintf.c>
diff --git a/sysdeps/aarch64/fpu/s_fma.c b/sysdeps/aarch64/fpu/s_fma.c
new file mode 100644
index 0000000000..3d2247a47e
--- /dev/null
+++ b/sysdeps/aarch64/fpu/s_fma.c
@@ -0,0 +1,45 @@
+/* Copyright (C) 1996-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <math.h>
+
+#ifndef FUNC
+# define FUNC fma
+#endif
+
+#ifndef TYPE
+# define TYPE double
+# define REGS "d"
+#else
+# ifndef REGS
+#  error REGS not defined
+# endif
+#endif
+
+#define __CONCATX(a,b) __CONCAT(a,b)
+
+TYPE
+__CONCATX(__,FUNC) (TYPE x, TYPE y, TYPE z)
+{
+  TYPE result;
+  asm ( "fmadd" "\t%" REGS "0, %" REGS "1, %" REGS "2, %" REGS "3"
+        : "=w" (result) : "w" (x), "w" (y), "w" (z) );
+  return result;
+}
+
+weak_alias (__CONCATX(__,FUNC), FUNC)
diff --git a/sysdeps/aarch64/fpu/s_fmaf.c b/sysdeps/aarch64/fpu/s_fmaf.c
new file mode 100644
index 0000000000..c8d82af4ba
--- /dev/null
+++ b/sysdeps/aarch64/fpu/s_fmaf.c
@@ -0,0 +1,22 @@
+/* Copyright (C) 2011-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define FUNC fmaf
+#define TYPE float
+#define REGS "s"
+#include <s_fma.c>
diff --git a/sysdeps/aarch64/fpu/s_fmax.c b/sysdeps/aarch64/fpu/s_fmax.c
new file mode 100644
index 0000000000..3d6f041f8e
--- /dev/null
+++ b/sysdeps/aarch64/fpu/s_fmax.c
@@ -0,0 +1,21 @@
+/* Copyright (C) 2011-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define FUNC fmax
+#define INSN "fmaxnm"
+#include <s_fmin.c>
diff --git a/sysdeps/aarch64/fpu/s_fmaxf.c b/sysdeps/aarch64/fpu/s_fmaxf.c
new file mode 100644
index 0000000000..31d3bcaf29
--- /dev/null
+++ b/sysdeps/aarch64/fpu/s_fmaxf.c
@@ -0,0 +1,23 @@
+/* Copyright (C) 2011-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define FUNC fmaxf
+#define INSN "fmaxnm"
+#define TYPE float
+#define REGS "s"
+#include <s_fmin.c>
diff --git a/sysdeps/aarch64/fpu/s_fmin.c b/sysdeps/aarch64/fpu/s_fmin.c
new file mode 100644
index 0000000000..9132589a96
--- /dev/null
+++ b/sysdeps/aarch64/fpu/s_fmin.c
@@ -0,0 +1,49 @@
+/* Copyright (C) 1996-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <math.h>
+
+#ifndef FUNC
+# define FUNC fmin
+#endif
+
+#ifndef INSN
+# define INSN "fminnm"
+#endif
+
+#ifndef TYPE
+# define TYPE double
+# define REGS "d"
+#else
+# ifndef REGS
+#  error REGS not defined
+# endif
+#endif
+
+#define __CONCATX(a,b) __CONCAT(a,b)
+
+TYPE
+__CONCATX(__,FUNC) (TYPE x, TYPE y)
+{
+  TYPE result;
+  asm ( INSN "\t%" REGS "0, %" REGS "1, %" REGS "2"
+        : "=w" (result) : "w" (x), "w" (y) );
+  return result;
+}
+
+weak_alias (__CONCATX(__,FUNC), FUNC)
diff --git a/sysdeps/aarch64/fpu/s_fminf.c b/sysdeps/aarch64/fpu/s_fminf.c
new file mode 100644
index 0000000000..0763bd81d9
--- /dev/null
+++ b/sysdeps/aarch64/fpu/s_fminf.c
@@ -0,0 +1,22 @@
+/* Copyright (C) 2011-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define FUNC fminf
+#define TYPE float
+#define REGS "s"
+#include <s_fmin.c>
diff --git a/sysdeps/aarch64/fpu/s_frint.c b/sysdeps/aarch64/fpu/s_frint.c
new file mode 100644
index 0000000000..4cc8b63487
--- /dev/null
+++ b/sysdeps/aarch64/fpu/s_frint.c
@@ -0,0 +1,49 @@
+/* Copyright (C) 1996-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <math.h>
+
+#ifndef FUNC
+# error FUNC not defined
+#endif
+
+#ifndef TYPE
+# define TYPE double
+# define REGS "d"
+#else
+# ifndef REGS
+#  error REGS not defined
+# endif
+#endif
+
+#ifndef INSN
+# error INSN not defined
+#endif
+
+#define __CONCATX(a,b) __CONCAT(a,b)
+
+TYPE
+__CONCATX(__,FUNC) (TYPE x)
+{
+  TYPE result;
+  asm ( INSN "\t%" REGS "0, %" REGS "1" :
+	"=w" (result) : "w" (x) );
+  return result;
+}
+
+weak_alias (__CONCATX(__,FUNC), FUNC)
diff --git a/sysdeps/aarch64/fpu/s_frintf.c b/sysdeps/aarch64/fpu/s_frintf.c
new file mode 100644
index 0000000000..cd779de3d6
--- /dev/null
+++ b/sysdeps/aarch64/fpu/s_frintf.c
@@ -0,0 +1,24 @@
+/* Copyright (C) 2011-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef FUNC
+# error FUNC not defined
+#endif
+#define TYPE float
+#define REGS "s"
+#include <s_frint.c>
diff --git a/sysdeps/aarch64/fpu/s_llrint.c b/sysdeps/aarch64/fpu/s_llrint.c
new file mode 100644
index 0000000000..df41eef281
--- /dev/null
+++ b/sysdeps/aarch64/fpu/s_llrint.c
@@ -0,0 +1,21 @@
+/* Copyright (C) 2011-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define FUNC llrint
+#define OTYPE long long int
+#include <s_lrint.c>
diff --git a/sysdeps/aarch64/fpu/s_llrintf.c b/sysdeps/aarch64/fpu/s_llrintf.c
new file mode 100644
index 0000000000..9aca4f3cb5
--- /dev/null
+++ b/sysdeps/aarch64/fpu/s_llrintf.c
@@ -0,0 +1,23 @@
+/* Copyright (C) 2011-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define FUNC llrintf
+#define ITYPE float
+#define IREGS "s"
+#define OTYPE long long int
+#include <s_lrint.c>
diff --git a/sysdeps/aarch64/fpu/s_llround.c b/sysdeps/aarch64/fpu/s_llround.c
new file mode 100644
index 0000000000..68e17137f3
--- /dev/null
+++ b/sysdeps/aarch64/fpu/s_llround.c
@@ -0,0 +1,21 @@
+/* Copyright (C) 2011-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define FUNC llround
+#define OTYPE long long int
+#include <s_lround.c>
diff --git a/sysdeps/aarch64/fpu/s_llroundf.c b/sysdeps/aarch64/fpu/s_llroundf.c
new file mode 100644
index 0000000000..c12cbfb9ef
--- /dev/null
+++ b/sysdeps/aarch64/fpu/s_llroundf.c
@@ -0,0 +1,23 @@
+/* Copyright (C) 2011-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define FUNC llroundf
+#define ITYPE float
+#define IREGS "s"
+#define OTYPE long long int
+#include <s_lround.c>
diff --git a/sysdeps/aarch64/fpu/s_lrint.c b/sysdeps/aarch64/fpu/s_lrint.c
new file mode 100644
index 0000000000..c45c070cb0
--- /dev/null
+++ b/sysdeps/aarch64/fpu/s_lrint.c
@@ -0,0 +1,53 @@
+/* Copyright (C) 1996-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <math.h>
+
+#ifndef FUNC
+# define FUNC lrint
+#endif
+
+#ifndef ITYPE
+# define ITYPE double
+# define IREGS "d"
+#else
+# ifndef IREGS
+#  error IREGS not defined
+# endif
+#endif
+
+#ifndef OTYPE
+# define OTYPE long int
+#endif
+
+#define OREGS "x"
+
+#define __CONCATX(a,b) __CONCAT(a,b)
+
+OTYPE
+__CONCATX(__,FUNC) (ITYPE x)
+{
+  OTYPE result;
+  ITYPE temp;
+  asm ( "frintx" "\t%" IREGS "1, %" IREGS "2\n\t"
+        "fcvtzs" "\t%" OREGS "0, %" IREGS "1"
+        : "=r" (result), "=w" (temp) : "w" (x) );
+  return result;
+}
+
+weak_alias (__CONCATX(__,FUNC), FUNC)
diff --git a/sysdeps/aarch64/fpu/s_lrintf.c b/sysdeps/aarch64/fpu/s_lrintf.c
new file mode 100644
index 0000000000..09fea443af
--- /dev/null
+++ b/sysdeps/aarch64/fpu/s_lrintf.c
@@ -0,0 +1,22 @@
+/* Copyright (C) 2011-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define FUNC lrintf
+#define ITYPE float
+#define IREGS "s"
+#include <s_lrint.c>
diff --git a/sysdeps/aarch64/fpu/s_lround.c b/sysdeps/aarch64/fpu/s_lround.c
new file mode 100644
index 0000000000..379eb34092
--- /dev/null
+++ b/sysdeps/aarch64/fpu/s_lround.c
@@ -0,0 +1,51 @@
+/* Copyright (C) 1996-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <math.h>
+
+#ifndef FUNC
+# define FUNC lround
+#endif
+
+#ifndef ITYPE
+# define ITYPE double
+# define IREGS "d"
+#else
+# ifndef IREGS
+#  error IREGS not defined
+# endif
+#endif
+
+#ifndef OTYPE
+# define OTYPE long int
+#endif
+
+#define OREGS "x"
+
+#define __CONCATX(a,b) __CONCAT(a,b)
+
+OTYPE
+__CONCATX(__,FUNC) (ITYPE x)
+{
+  OTYPE result;
+  asm ( "fcvtas" "\t%" OREGS "0, %" IREGS "1"
+        : "=r" (result) : "w" (x) );
+  return result;
+}
+
+weak_alias (__CONCATX(__,FUNC), FUNC)
diff --git a/sysdeps/aarch64/fpu/s_lroundf.c b/sysdeps/aarch64/fpu/s_lroundf.c
new file mode 100644
index 0000000000..01dd1cc6be
--- /dev/null
+++ b/sysdeps/aarch64/fpu/s_lroundf.c
@@ -0,0 +1,22 @@
+/* Copyright (C) 2011-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define FUNC lroundf
+#define ITYPE float
+#define IREGS "s"
+#include <s_lround.c>
diff --git a/sysdeps/aarch64/fpu/s_nearbyint.c b/sysdeps/aarch64/fpu/s_nearbyint.c
new file mode 100644
index 0000000000..f3b0b4a90d
--- /dev/null
+++ b/sysdeps/aarch64/fpu/s_nearbyint.c
@@ -0,0 +1,21 @@
+/* Copyright (C) 2011-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define	FUNC nearbyint
+#define INSN "frinti"
+#include <s_frint.c>
diff --git a/sysdeps/aarch64/fpu/s_nearbyintf.c b/sysdeps/aarch64/fpu/s_nearbyintf.c
new file mode 100644
index 0000000000..43af61dbb3
--- /dev/null
+++ b/sysdeps/aarch64/fpu/s_nearbyintf.c
@@ -0,0 +1,21 @@
+/* Copyright (C) 2011-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define FUNC nearbyintf
+#define INSN "frinti"
+#include <s_frintf.c>
diff --git a/sysdeps/aarch64/fpu/s_rint.c b/sysdeps/aarch64/fpu/s_rint.c
new file mode 100644
index 0000000000..3f0469b519
--- /dev/null
+++ b/sysdeps/aarch64/fpu/s_rint.c
@@ -0,0 +1,21 @@
+/* Copyright (C) 2011-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define FUNC rint
+#define INSN "frintx"
+#include <s_frint.c>
diff --git a/sysdeps/aarch64/fpu/s_rintf.c b/sysdeps/aarch64/fpu/s_rintf.c
new file mode 100644
index 0000000000..257c4f02a9
--- /dev/null
+++ b/sysdeps/aarch64/fpu/s_rintf.c
@@ -0,0 +1,21 @@
+/* Copyright (C) 2011-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define FUNC rintf
+#define INSN "frintx"
+#include <s_frintf.c>
diff --git a/sysdeps/aarch64/fpu/s_round.c b/sysdeps/aarch64/fpu/s_round.c
new file mode 100644
index 0000000000..758668fc03
--- /dev/null
+++ b/sysdeps/aarch64/fpu/s_round.c
@@ -0,0 +1,21 @@
+/* Copyright (C) 2011-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define FUNC round
+#define INSN "frinta"
+#include <s_frint.c>
diff --git a/sysdeps/aarch64/fpu/s_roundf.c b/sysdeps/aarch64/fpu/s_roundf.c
new file mode 100644
index 0000000000..a868b8711b
--- /dev/null
+++ b/sysdeps/aarch64/fpu/s_roundf.c
@@ -0,0 +1,21 @@
+/* Copyright (C) 2011-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define FUNC roundf
+#define INSN "frinta"
+#include <s_frintf.c>
diff --git a/sysdeps/aarch64/fpu/s_trunc.c b/sysdeps/aarch64/fpu/s_trunc.c
new file mode 100644
index 0000000000..e8a6d2916e
--- /dev/null
+++ b/sysdeps/aarch64/fpu/s_trunc.c
@@ -0,0 +1,21 @@
+/* Copyright (C) 2011-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define FUNC trunc
+#define INSN "frintz"
+#include <s_frint.c>
diff --git a/sysdeps/aarch64/fpu/s_truncf.c b/sysdeps/aarch64/fpu/s_truncf.c
new file mode 100644
index 0000000000..86ccc47575
--- /dev/null
+++ b/sysdeps/aarch64/fpu/s_truncf.c
@@ -0,0 +1,21 @@
+/* Copyright (C) 2011-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define FUNC truncf
+#define INSN "frintz"
+#include <s_frintf.c>
diff --git a/sysdeps/aarch64/jmpbuf-offsets.h b/sysdeps/aarch64/jmpbuf-offsets.h
new file mode 100644
index 0000000000..3e6b11622c
--- /dev/null
+++ b/sysdeps/aarch64/jmpbuf-offsets.h
@@ -0,0 +1,60 @@
+/* Copyright (C) 2006-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define JB_X19            0
+#define JB_X20            1
+#define JB_X21            2
+#define JB_X22            3
+#define JB_X23            4
+#define JB_X24            5
+#define JB_X25            6
+#define JB_X26            7
+#define JB_X27            8
+#define JB_X28            9
+#define JB_X29           10
+#define JB_LR            11
+#define JB_SP		 13
+
+#define JB_D8		 14
+#define JB_D9		 15
+#define JB_D10		 16
+#define JB_D11		 17
+#define JB_D12		 18
+#define JB_D13		 19
+#define JB_D14		 20
+#define JB_D15		 21
+
+#ifndef  __ASSEMBLER__
+#include <setjmp.h>
+#include <stdint.h>
+#include <sysdep.h>
+
+static inline uintptr_t __attribute__ ((unused))
+_jmpbuf_sp (__jmp_buf jmpbuf)
+{
+  uintptr_t sp = jmpbuf[JB_SP];
+#ifdef PTR_DEMANGLE
+  PTR_DEMANGLE (sp);
+#endif
+  return sp;
+}
+#endif
+
+/* Helper for generic ____longjmp_chk(). */
+#define JB_FRAME_ADDRESS(buf) \
+  ((void *) _jmpbuf_sp (buf))
diff --git a/sysdeps/aarch64/jmpbuf-unwind.h b/sysdeps/aarch64/jmpbuf-unwind.h
new file mode 100644
index 0000000000..a27ad223f1
--- /dev/null
+++ b/sysdeps/aarch64/jmpbuf-unwind.h
@@ -0,0 +1,36 @@
+/* Copyright (C) 2005-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <setjmp.h>
+#include <jmpbuf-offsets.h>
+#include <stdint.h>
+#include <unwind.h>
+
+/* Test if longjmp to JMPBUF would unwind the frame
+   containing a local variable at ADDRESS.  */
+#define _JMPBUF_UNWINDS(jmpbuf, address, demangle) \
+  ((void *) (address) < (void *) demangle (jmpbuf[JB_SP]))
+
+#define _JMPBUF_CFA_UNWINDS_ADJ(jmpbuf, context, adj) \
+  _JMPBUF_UNWINDS_ADJ (jmpbuf, (void *) _Unwind_GetCFA (context), adj)
+
+#define _JMPBUF_UNWINDS_ADJ(_jmpbuf, _address, _adj) \
+  ((uintptr_t) (_address) - (_adj) < _jmpbuf_sp (_jmpbuf) - (_adj))
+
+/* We use the normal longjmp for unwinding.  */
+#define __libc_unwind_longjmp(buf, val) __libc_longjmp (buf, val)
diff --git a/sysdeps/aarch64/ldsodefs.h b/sysdeps/aarch64/ldsodefs.h
new file mode 100644
index 0000000000..d7b431d469
--- /dev/null
+++ b/sysdeps/aarch64/ldsodefs.h
@@ -0,0 +1,48 @@
+/* Copyright (C) 2005-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _AARCH64_LDSODEFS_H
+#define _AARCH64_LDSODEFS_H 1
+
+#include <elf.h>
+
+struct La_aarch64_regs;
+struct La_aarch64_retval;
+
+#define ARCH_PLTENTER_MEMBERS \
+    ElfW(Addr) (*aarch64_gnu_pltenter) (ElfW(Sym) *,                    \
+					unsigned int,			\
+                                        uintptr_t *,			\
+					uintptr_t *,                    \
+					struct La_aarch64_regs *,	\
+					unsigned int *,			\
+					const char *,			\
+					long int *)
+
+#define ARCH_PLTEXIT_MEMBERS \
+    ElfW(Addr) (*aarch64_gnu_pltexit) (ElfW(Sym) *,                     \
+				       unsigned int,                    \
+				       uintptr_t *,			\
+				       uintptr_t *,			\
+				       const struct La_aarch64_regs *,	\
+				       struct La_aarch64_retval *,	\
+				       const char *)
+
+#include_next <ldsodefs.h>
+
+#endif
diff --git a/sysdeps/aarch64/libc-tls.c b/sysdeps/aarch64/libc-tls.c
new file mode 100644
index 0000000000..13f21ed8a1
--- /dev/null
+++ b/sysdeps/aarch64/libc-tls.c
@@ -0,0 +1,32 @@
+/* Copyright (C) 2005-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <csu/libc-tls.c>
+#include <dl-tls.h>
+
+/* On AArch64, linker optimizations are not required, so __tls_get_addr
+   can be called even in statically linked binaries.  In this case module
+   must be always 1 and PT_TLS segment exist in the binary, otherwise it
+   would not link.  */
+
+void *
+__tls_get_addr (tls_index *ti)
+{
+  dtv_t *dtv = THREAD_DTV ();
+  return (char *) dtv[1].pointer.val + ti->ti_offset;
+}
diff --git a/sysdeps/aarch64/libm-test-ulps b/sysdeps/aarch64/libm-test-ulps
new file mode 100644
index 0000000000..232730b8c3
--- /dev/null
+++ b/sysdeps/aarch64/libm-test-ulps
@@ -0,0 +1,2238 @@
+# Begin of automatic generation
+
+# Maximal error of functions:
+Function: "acos":
+float: 1
+ifloat: 1
+
+Function: "acos_downward":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "acos_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "acos_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "acosh":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 1
+ldouble: 1
+
+Function: "acosh_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "acosh_towardzero":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "acosh_upward":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "asin":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "asin_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "asin_towardzero":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "asin_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "asinh":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "asinh_downward":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 3
+ldouble: 3
+
+Function: "asinh_towardzero":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: "asinh_upward":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 3
+ldouble: 3
+
+Function: "atan":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "atan2":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "atan2_downward":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 1
+ldouble: 1
+
+Function: "atan2_towardzero":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: "atan2_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "atan_downward":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: "atan_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "atan_upward":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 1
+ldouble: 1
+
+Function: "atanh":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: "atanh_downward":
+double: 3
+float: 2
+idouble: 3
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: "atanh_towardzero":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: "atanh_upward":
+double: 2
+float: 3
+idouble: 2
+ifloat: 3
+ildouble: 3
+ldouble: 3
+
+Function: "cabs":
+double: 1
+idouble: 1
+
+Function: "cabs_downward":
+double: 1
+idouble: 1
+
+Function: "cabs_towardzero":
+double: 1
+idouble: 1
+
+Function: "cabs_upward":
+double: 1
+idouble: 1
+
+Function: Real part of "cacos":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "cacos":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "cacos_downward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "cacos_downward":
+double: 5
+float: 3
+idouble: 5
+ifloat: 3
+ildouble: 5
+ldouble: 5
+
+Function: Real part of "cacos_towardzero":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "cacos_towardzero":
+double: 5
+float: 3
+idouble: 5
+ifloat: 3
+ildouble: 5
+ldouble: 5
+
+Function: Real part of "cacos_upward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "cacos_upward":
+double: 4
+float: 4
+idouble: 4
+ifloat: 4
+ildouble: 5
+ldouble: 5
+
+Function: Real part of "cacosh":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "cacosh":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "cacosh_downward":
+double: 5
+float: 3
+idouble: 5
+ifloat: 3
+ildouble: 5
+ldouble: 5
+
+Function: Imaginary part of "cacosh_downward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "cacosh_towardzero":
+double: 5
+float: 3
+idouble: 5
+ifloat: 3
+ildouble: 5
+ldouble: 5
+
+Function: Imaginary part of "cacosh_towardzero":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "cacosh_upward":
+double: 4
+float: 4
+idouble: 4
+ifloat: 4
+ildouble: 5
+ldouble: 5
+
+Function: Imaginary part of "cacosh_upward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: "carg":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "carg_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "carg_towardzero":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: "carg_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "casin":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "casin":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "casin_downward":
+double: 3
+float: 1
+idouble: 3
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "casin_downward":
+double: 5
+float: 3
+idouble: 5
+ifloat: 3
+ildouble: 5
+ldouble: 5
+
+Function: Real part of "casin_towardzero":
+double: 3
+float: 1
+idouble: 3
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "casin_towardzero":
+double: 5
+float: 3
+idouble: 5
+ifloat: 3
+ildouble: 5
+ldouble: 5
+
+Function: Real part of "casin_upward":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "casin_upward":
+double: 4
+float: 4
+idouble: 4
+ifloat: 4
+ildouble: 5
+ldouble: 5
+
+Function: Real part of "casinh":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "casinh":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "casinh_downward":
+double: 5
+float: 3
+idouble: 5
+ifloat: 3
+ildouble: 5
+ldouble: 5
+
+Function: Imaginary part of "casinh_downward":
+double: 3
+float: 1
+idouble: 3
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "casinh_towardzero":
+double: 5
+float: 3
+idouble: 5
+ifloat: 3
+ildouble: 5
+ldouble: 5
+
+Function: Imaginary part of "casinh_towardzero":
+double: 3
+float: 1
+idouble: 3
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "casinh_upward":
+double: 4
+float: 4
+idouble: 4
+ifloat: 4
+ildouble: 5
+ldouble: 5
+
+Function: Imaginary part of "casinh_upward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "catan":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "catan":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "catan_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "catan_downward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "catan_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "catan_towardzero":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "catan_upward":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "catan_upward":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "catanh":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "catanh":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "catanh_downward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "catanh_downward":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "catanh_towardzero":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "catanh_towardzero":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "catanh_upward":
+double: 4
+float: 3
+idouble: 4
+ifloat: 3
+ildouble: 4
+ldouble: 4
+
+Function: Imaginary part of "catanh_upward":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "cbrt":
+double: 3
+float: 1
+idouble: 3
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "cbrt_downward":
+double: 4
+float: 1
+idouble: 4
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "cbrt_towardzero":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "cbrt_upward":
+double: 4
+float: 1
+idouble: 4
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "ccos":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "ccos":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "ccos_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "ccos_downward":
+double: 2
+float: 3
+idouble: 2
+ifloat: 3
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "ccos_towardzero":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "ccos_towardzero":
+double: 2
+float: 3
+idouble: 2
+ifloat: 3
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "ccos_upward":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "ccos_upward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "ccosh":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "ccosh":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "ccosh_downward":
+double: 1
+float: 3
+idouble: 1
+ifloat: 3
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "ccosh_downward":
+double: 2
+float: 3
+idouble: 2
+ifloat: 3
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "ccosh_towardzero":
+double: 1
+float: 3
+idouble: 1
+ifloat: 3
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "ccosh_towardzero":
+double: 2
+float: 3
+idouble: 2
+ifloat: 3
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "ccosh_upward":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "ccosh_upward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "cexp":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "cexp":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "cexp_downward":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "cexp_downward":
+double: 1
+float: 3
+idouble: 1
+ifloat: 3
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "cexp_towardzero":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "cexp_towardzero":
+double: 1
+float: 3
+idouble: 1
+ifloat: 3
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "cexp_upward":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "cexp_upward":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "clog":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 4
+ldouble: 4
+
+Function: Imaginary part of "clog":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "clog10":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 4
+ldouble: 4
+
+Function: Imaginary part of "clog10":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "clog10_downward":
+double: 6
+float: 6
+idouble: 6
+ifloat: 6
+ildouble: 5
+ldouble: 5
+
+Function: Imaginary part of "clog10_downward":
+double: 2
+float: 4
+idouble: 2
+ifloat: 4
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "clog10_towardzero":
+double: 5
+float: 4
+idouble: 5
+ifloat: 4
+ildouble: 6
+ldouble: 6
+
+Function: Imaginary part of "clog10_towardzero":
+double: 2
+float: 4
+idouble: 2
+ifloat: 4
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "clog10_upward":
+double: 8
+float: 5
+idouble: 8
+ifloat: 5
+ildouble: 5
+ldouble: 5
+
+Function: Imaginary part of "clog10_upward":
+double: 2
+float: 3
+idouble: 2
+ifloat: 3
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "clog_downward":
+double: 7
+float: 5
+idouble: 7
+ifloat: 5
+ildouble: 6
+ldouble: 6
+
+Function: Imaginary part of "clog_downward":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "clog_towardzero":
+double: 7
+float: 5
+idouble: 7
+ifloat: 5
+ildouble: 6
+ldouble: 6
+
+Function: Imaginary part of "clog_towardzero":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "clog_upward":
+double: 8
+float: 5
+idouble: 8
+ifloat: 5
+ildouble: 6
+ldouble: 6
+
+Function: Imaginary part of "clog_upward":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: "cos":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "cos_downward":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: "cos_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "cos_upward":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: "cosh":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "cosh_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 2
+
+Function: "cosh_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 2
+
+Function: "cosh_upward":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 1
+ldouble: 3
+
+Function: Real part of "cpow":
+double: 2
+float: 5
+idouble: 2
+ifloat: 5
+ildouble: 4
+ldouble: 4
+
+Function: Imaginary part of "cpow":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "cpow_downward":
+double: 4
+float: 8
+idouble: 4
+ifloat: 8
+ildouble: 6
+ldouble: 6
+
+Function: Imaginary part of "cpow_downward":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "cpow_towardzero":
+double: 4
+float: 8
+idouble: 4
+ifloat: 8
+ildouble: 6
+ldouble: 6
+
+Function: Imaginary part of "cpow_towardzero":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "cpow_upward":
+double: 4
+float: 1
+idouble: 4
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "cpow_upward":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "csin":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "csin":
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "csin_downward":
+double: 2
+float: 3
+idouble: 2
+ifloat: 3
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "csin_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "csin_towardzero":
+double: 2
+float: 3
+idouble: 2
+ifloat: 3
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "csin_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "csin_upward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "csin_upward":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "csinh":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "csinh":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "csinh_downward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "csinh_downward":
+double: 2
+float: 3
+idouble: 2
+ifloat: 3
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "csinh_towardzero":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "csinh_towardzero":
+double: 2
+float: 3
+idouble: 2
+ifloat: 3
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "csinh_upward":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "csinh_upward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "csqrt":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+
+Function: Imaginary part of "csqrt":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+
+Function: Real part of "csqrt_downward":
+double: 4
+float: 4
+idouble: 4
+ifloat: 4
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "csqrt_downward":
+double: 4
+float: 3
+idouble: 4
+ifloat: 3
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "csqrt_towardzero":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 2
+ldouble: 2
+
+Function: Imaginary part of "csqrt_towardzero":
+double: 4
+float: 3
+idouble: 4
+ifloat: 3
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "csqrt_upward":
+double: 5
+float: 4
+idouble: 5
+ifloat: 4
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "csqrt_upward":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 2
+ldouble: 2
+
+Function: Real part of "ctan":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "ctan":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "ctan_downward":
+double: 6
+float: 5
+idouble: 6
+ifloat: 5
+ildouble: 4
+ldouble: 4
+
+Function: Imaginary part of "ctan_downward":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 5
+ldouble: 5
+
+Function: Real part of "ctan_towardzero":
+double: 5
+float: 3
+idouble: 5
+ifloat: 3
+ildouble: 4
+ldouble: 4
+
+Function: Imaginary part of "ctan_towardzero":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 5
+ldouble: 5
+
+Function: Real part of "ctan_upward":
+double: 2
+float: 3
+idouble: 2
+ifloat: 3
+ildouble: 5
+ldouble: 5
+
+Function: Imaginary part of "ctan_upward":
+double: 2
+float: 3
+idouble: 2
+ifloat: 3
+ildouble: 5
+ldouble: 5
+
+Function: Real part of "ctanh":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: Imaginary part of "ctanh":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "ctanh_downward":
+double: 4
+float: 1
+idouble: 4
+ifloat: 1
+ildouble: 5
+ldouble: 5
+
+Function: Imaginary part of "ctanh_downward":
+double: 6
+float: 5
+idouble: 6
+ifloat: 5
+ildouble: 4
+ldouble: 4
+
+Function: Real part of "ctanh_towardzero":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 5
+ldouble: 5
+
+Function: Imaginary part of "ctanh_towardzero":
+double: 5
+float: 2
+idouble: 5
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: Real part of "ctanh_upward":
+double: 2
+float: 3
+idouble: 2
+ifloat: 3
+ildouble: 5
+ldouble: 5
+
+Function: Imaginary part of "ctanh_upward":
+double: 2
+float: 3
+idouble: 2
+ifloat: 3
+ildouble: 5
+ldouble: 5
+
+Function: "erf":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "erf_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "erf_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "erf_upward":
+float: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "erfc":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: "erfc_downward":
+double: 3
+float: 4
+idouble: 3
+ifloat: 4
+ildouble: 3
+ldouble: 3
+
+Function: "erfc_towardzero":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 3
+ldouble: 3
+
+Function: "erfc_upward":
+double: 3
+float: 4
+idouble: 3
+ifloat: 4
+ildouble: 3
+ldouble: 3
+
+Function: "exp10":
+double: 2
+idouble: 2
+ildouble: 1
+ldouble: 1
+
+Function: "exp10_downward":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: "exp10_towardzero":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: "exp10_upward":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "exp2":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "exp2_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "exp2_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "exp2_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "exp_downward":
+double: 1
+idouble: 1
+
+Function: "exp_towardzero":
+double: 1
+idouble: 1
+
+Function: "exp_upward":
+double: 1
+idouble: 1
+
+Function: "expm1":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "expm1_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "expm1_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: "expm1_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: "gamma":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "gamma_downward":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 2
+ldouble: 2
+
+Function: "gamma_towardzero":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 2
+ldouble: 2
+
+Function: "gamma_upward":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 3
+ldouble: 3
+
+Function: "hypot":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "hypot_downward":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "hypot_towardzero":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "hypot_upward":
+double: 1
+idouble: 1
+ildouble: 1
+ldouble: 1
+
+Function: "j0":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: "j0_downward":
+double: 2
+float: 3
+idouble: 2
+ifloat: 3
+ildouble: 4
+ldouble: 4
+
+Function: "j0_towardzero":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "j0_upward":
+double: 3
+float: 2
+idouble: 3
+ifloat: 2
+ildouble: 5
+ldouble: 5
+
+Function: "j1":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 4
+ldouble: 4
+
+Function: "j1_downward":
+double: 3
+float: 2
+idouble: 3
+ifloat: 2
+ildouble: 4
+ldouble: 4
+
+Function: "j1_towardzero":
+double: 3
+float: 2
+idouble: 3
+ifloat: 2
+ildouble: 4
+ldouble: 4
+
+Function: "j1_upward":
+double: 3
+float: 4
+idouble: 3
+ifloat: 4
+ildouble: 3
+ldouble: 3
+
+Function: "jn":
+double: 4
+float: 4
+idouble: 4
+ifloat: 4
+ildouble: 7
+ldouble: 7
+
+Function: "jn_downward":
+double: 4
+float: 5
+idouble: 4
+ifloat: 5
+ildouble: 8
+ldouble: 8
+
+Function: "jn_towardzero":
+double: 4
+float: 5
+idouble: 4
+ifloat: 5
+ildouble: 8
+ldouble: 8
+
+Function: "jn_upward":
+double: 5
+float: 4
+idouble: 5
+ifloat: 4
+ildouble: 7
+ldouble: 7
+
+Function: "lgamma":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "lgamma_downward":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 2
+ldouble: 2
+
+Function: "lgamma_towardzero":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 2
+ldouble: 2
+
+Function: "lgamma_upward":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 3
+ldouble: 3
+
+Function: "log":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "log10":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+
+Function: "log10_downward":
+double: 2
+float: 3
+idouble: 2
+ifloat: 3
+ildouble: 1
+ldouble: 1
+
+Function: "log10_towardzero":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+
+Function: "log10_upward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+
+Function: "log1p":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "log1p_downward":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 1
+ldouble: 1
+
+Function: "log1p_towardzero":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+
+Function: "log1p_upward":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+
+Function: "log2":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "log2_downward":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 1
+ldouble: 1
+
+Function: "log2_towardzero":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+
+Function: "log2_upward":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 1
+ldouble: 1
+
+Function: "log_downward":
+float: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+
+Function: "log_towardzero":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "log_upward":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "pow":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "pow10":
+double: 2
+idouble: 2
+ildouble: 1
+ldouble: 1
+
+Function: "pow10_downward":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: "pow10_towardzero":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: "pow10_upward":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "pow_downward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "pow_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "pow_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "sin":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "sin_downward":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: "sin_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "sin_upward":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: "sincos":
+float: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "sincos_downward":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 3
+ldouble: 3
+
+Function: "sincos_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "sincos_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 2
+ldouble: 2
+
+Function: "sinh":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+
+Function: "sinh_downward":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 2
+ldouble: 2
+
+Function: "sinh_towardzero":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: "sinh_upward":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 2
+ldouble: 2
+
+Function: "tan":
+float: 1
+ifloat: 1
+
+Function: "tan_downward":
+double: 1
+float: 2
+idouble: 1
+ifloat: 2
+ildouble: 1
+ldouble: 1
+
+Function: "tan_towardzero":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "tan_upward":
+double: 1
+float: 1
+idouble: 1
+ifloat: 1
+ildouble: 1
+ldouble: 1
+
+Function: "tanh":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 1
+ldouble: 1
+
+Function: "tanh_downward":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 2
+ldouble: 2
+
+Function: "tanh_towardzero":
+double: 2
+float: 2
+idouble: 2
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: "tanh_upward":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 3
+ldouble: 3
+
+Function: "tgamma":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 4
+ldouble: 4
+
+Function: "tgamma_downward":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 5
+ldouble: 5
+
+Function: "tgamma_towardzero":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 5
+ldouble: 5
+
+Function: "tgamma_upward":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 4
+ldouble: 4
+
+Function: "y0":
+double: 2
+float: 1
+idouble: 2
+ifloat: 1
+ildouble: 3
+ldouble: 3
+
+Function: "y0_downward":
+double: 3
+float: 2
+idouble: 3
+ifloat: 2
+ildouble: 4
+ldouble: 4
+
+Function: "y0_towardzero":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 3
+ldouble: 3
+
+Function: "y0_upward":
+double: 2
+float: 3
+idouble: 2
+ifloat: 3
+ildouble: 3
+ldouble: 3
+
+Function: "y1":
+double: 3
+float: 2
+idouble: 3
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: "y1_downward":
+double: 3
+float: 2
+idouble: 3
+ifloat: 2
+ildouble: 4
+ldouble: 4
+
+Function: "y1_towardzero":
+double: 3
+float: 2
+idouble: 3
+ifloat: 2
+ildouble: 2
+ldouble: 2
+
+Function: "y1_upward":
+double: 5
+float: 2
+idouble: 5
+ifloat: 2
+ildouble: 5
+ldouble: 5
+
+Function: "yn":
+double: 3
+float: 2
+idouble: 3
+ifloat: 2
+ildouble: 5
+ldouble: 5
+
+Function: "yn_downward":
+double: 3
+float: 2
+idouble: 3
+ifloat: 2
+ildouble: 5
+ldouble: 5
+
+Function: "yn_towardzero":
+double: 3
+float: 3
+idouble: 3
+ifloat: 3
+ildouble: 5
+ldouble: 5
+
+Function: "yn_upward":
+double: 4
+float: 3
+idouble: 4
+ifloat: 3
+ildouble: 5
+ldouble: 5
+
+# end of automatic generation
diff --git a/sysdeps/aarch64/machine-gmon.h b/sysdeps/aarch64/machine-gmon.h
new file mode 100644
index 0000000000..84057176cc
--- /dev/null
+++ b/sysdeps/aarch64/machine-gmon.h
@@ -0,0 +1,34 @@
+/* AArch64 definitions for profiling support.
+   Copyright (C) 1996-2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Accept 'frompc' address as argument from the function that calls
+   __mcount for profiling.  Use  __builtin_return_address (0)
+   for the 'selfpc' address.  */
+
+#include <sysdep.h>
+
+static void mcount_internal (u_long frompc, u_long selfpc);
+
+#define _MCOUNT_DECL(frompc, selfpc) \
+static inline void mcount_internal (u_long frompc, u_long selfpc)
+
+#define MCOUNT                                                    \
+void __mcount (void *frompc)                                      \
+{                                                                 \
+  mcount_internal ((u_long) frompc, (u_long) RETURN_ADDRESS (0)); \
+}
diff --git a/sysdeps/aarch64/math-tests.h b/sysdeps/aarch64/math-tests.h
new file mode 100644
index 0000000000..0a3fd820f2
--- /dev/null
+++ b/sysdeps/aarch64/math-tests.h
@@ -0,0 +1,22 @@
+/* Configuration for math tests.  AArch64 version.
+   Copyright (C) 2014-2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Trapping exceptions are optional on AArch64.  */
+#define EXCEPTION_ENABLE_SUPPORTED(EXCEPT)	((EXCEPT) == 0)
+
+#include_next <math-tests.h>
diff --git a/sysdeps/aarch64/mcount.c b/sysdeps/aarch64/mcount.c
new file mode 100644
index 0000000000..ca1e817f14
--- /dev/null
+++ b/sysdeps/aarch64/mcount.c
@@ -0,0 +1,33 @@
+/* Copyright (C) 2013-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <shlib-compat.h>
+
+#include <gmon/mcount.c>
+
+/* We forgot to add _mcount in glibc 2.17.  We added it in 2.18
+   therefore we want it to be added with version GLIBC_2_18.  However,
+   setting the version is not straight forward because a generic
+   Version file includes an earlier 2.xx version for each this symbol
+   and the linker uses the first version it sees.  */
+
+#if SHLIB_COMPAT (libc, GLIBC_2_17, GLIBC_2_18)
+versioned_symbol (libc, __mcount, _mcount, GLIBC_2_18);
+#else
+strong_alias (__mcount, _mcount);
+#endif
diff --git a/sysdeps/aarch64/memcmp.S b/sysdeps/aarch64/memcmp.S
new file mode 100644
index 0000000000..b0b34fa77c
--- /dev/null
+++ b/sysdeps/aarch64/memcmp.S
@@ -0,0 +1,151 @@
+/* memcmp - compare memory
+
+   Copyright (C) 2013-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64
+ */
+
+/* Parameters and result.  */
+#define src1		x0
+#define src2		x1
+#define limit		x2
+#define result		x0
+
+/* Internal variables.  */
+#define data1		x3
+#define data1w		w3
+#define data2		x4
+#define data2w		w4
+#define has_nul		x5
+#define diff		x6
+#define endloop		x7
+#define tmp1		x8
+#define tmp2		x9
+#define tmp3		x10
+#define pos		x11
+#define limit_wd	x12
+#define mask		x13
+
+ENTRY_ALIGN (memcmp, 6)
+	cbz	limit, L(ret0)
+	eor	tmp1, src1, src2
+	tst	tmp1, #7
+	b.ne	L(misaligned8)
+	ands	tmp1, src1, #7
+	b.ne	L(mutual_align)
+	add	limit_wd, limit, #7
+	lsr	limit_wd, limit_wd, #3
+	/* Start of performance-critical section  -- one 64B cache line.  */
+L(loop_aligned):
+	ldr	data1, [src1], #8
+	ldr	data2, [src2], #8
+L(start_realigned):
+	subs	limit_wd, limit_wd, #1
+	eor	diff, data1, data2	/* Non-zero if differences found.  */
+	csinv	endloop, diff, xzr, ne	/* Last Dword or differences.  */
+	cbz	endloop, L(loop_aligned)
+	/* End of performance-critical section  -- one 64B cache line.  */
+
+	/* Not reached the limit, must have found a diff.  */
+	cbnz	limit_wd, L(not_limit)
+
+	/* Limit % 8 == 0 => all bytes significant.  */
+	ands	limit, limit, #7
+	b.eq	L(not_limit)
+
+	lsl	limit, limit, #3	/* Bits -> bytes.  */
+	mov	mask, #~0
+#ifdef __AARCH64EB__
+	lsr	mask, mask, limit
+#else
+	lsl	mask, mask, limit
+#endif
+	bic	data1, data1, mask
+	bic	data2, data2, mask
+
+	orr	diff, diff, mask
+L(not_limit):
+
+#ifndef	__AARCH64EB__
+	rev	diff, diff
+	rev	data1, data1
+	rev	data2, data2
+#endif
+	/* The MS-non-zero bit of DIFF marks either the first bit
+	   that is different, or the end of the significant data.
+	   Shifting left now will bring the critical information into the
+	   top bits.  */
+	clz	pos, diff
+	lsl	data1, data1, pos
+	lsl	data2, data2, pos
+	/* But we need to zero-extend (char is unsigned) the value and then
+	   perform a signed 32-bit subtraction.  */
+	lsr	data1, data1, #56
+	sub	result, data1, data2, lsr #56
+	RET
+
+L(mutual_align):
+	/* Sources are mutually aligned, but are not currently at an
+	   alignment boundary.  Round down the addresses and then mask off
+	   the bytes that precede the start point.  */
+	bic	src1, src1, #7
+	bic	src2, src2, #7
+	add	limit, limit, tmp1	/* Adjust the limit for the extra.  */
+	lsl	tmp1, tmp1, #3		/* Bytes beyond alignment -> bits.  */
+	ldr	data1, [src1], #8
+	neg	tmp1, tmp1		/* Bits to alignment -64.  */
+	ldr	data2, [src2], #8
+	mov	tmp2, #~0
+#ifdef __AARCH64EB__
+	/* Big-endian.  Early bytes are at MSB.  */
+	lsl	tmp2, tmp2, tmp1	/* Shift (tmp1 & 63).  */
+#else
+	/* Little-endian.  Early bytes are at LSB.  */
+	lsr	tmp2, tmp2, tmp1	/* Shift (tmp1 & 63).  */
+#endif
+	add	limit_wd, limit, #7
+	orr	data1, data1, tmp2
+	orr	data2, data2, tmp2
+	lsr	limit_wd, limit_wd, #3
+	b	L(start_realigned)
+
+L(ret0):
+	mov	result, #0
+	RET
+
+	.p2align 6
+L(misaligned8):
+	sub	limit, limit, #1
+1:
+	/* Perhaps we can do better than this.  */
+	ldrb	data1w, [src1], #1
+	ldrb	data2w, [src2], #1
+	subs	limit, limit, #1
+	ccmp	data1w, data2w, #0, cs	/* NZCV = 0b0000.  */
+	b.eq	1b
+	sub	result, data1, data2
+	RET
+END (memcmp)
+#undef bcmp
+weak_alias (memcmp, bcmp)
+libc_hidden_builtin_def (memcmp)
diff --git a/sysdeps/aarch64/memcpy.S b/sysdeps/aarch64/memcpy.S
new file mode 100644
index 0000000000..b3d550e2b7
--- /dev/null
+++ b/sysdeps/aarch64/memcpy.S
@@ -0,0 +1,176 @@
+/* Copyright (C) 2012-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64
+ * Unaligned accesses
+ *
+ */
+
+#define dstin	x0
+#define src	x1
+#define count	x2
+#define tmp1	x3
+#define tmp1w	w3
+#define tmp2	x4
+#define tmp2w	w4
+#define tmp3	x5
+#define tmp3w	w5
+#define dst	x6
+
+#define A_l	x7
+#define A_h	x8
+#define B_l	x9
+#define B_h	x10
+#define C_l	x11
+#define C_h	x12
+#define D_l	x13
+#define D_h	x14
+
+#include <sysdep.h>
+
+ENTRY_ALIGN (memcpy, 6)
+
+	mov	dst, dstin
+	cmp	count, #64
+	b.ge	L(cpy_not_short)
+	cmp	count, #15
+	b.le	L(tail15tiny)
+
+	/* Deal with small copies quickly by dropping straight into the
+	 * exit block.  */
+L(tail63):
+	/* Copy up to 48 bytes of data.  At this point we only need the
+	 * bottom 6 bits of count to be accurate.  */
+	ands	tmp1, count, #0x30
+	b.eq	L(tail15)
+	add	dst, dst, tmp1
+	add	src, src, tmp1
+	cmp	tmp1w, #0x20
+	b.eq	1f
+	b.lt	2f
+	ldp	A_l, A_h, [src, #-48]
+	stp	A_l, A_h, [dst, #-48]
+1:
+	ldp	A_l, A_h, [src, #-32]
+	stp	A_l, A_h, [dst, #-32]
+2:
+	ldp	A_l, A_h, [src, #-16]
+	stp	A_l, A_h, [dst, #-16]
+
+L(tail15):
+	ands	count, count, #15
+	beq	1f
+	add	src, src, count
+	ldp	A_l, A_h, [src, #-16]
+	add	dst, dst, count
+	stp	A_l, A_h, [dst, #-16]
+1:
+	RET
+
+L(tail15tiny):
+	/* Copy up to 15 bytes of data.  Does not assume additional data
+	   being copied.  */
+	tbz	count, #3, 1f
+	ldr	tmp1, [src], #8
+	str	tmp1, [dst], #8
+1:
+	tbz	count, #2, 1f
+	ldr	tmp1w, [src], #4
+	str	tmp1w, [dst], #4
+1:
+	tbz	count, #1, 1f
+	ldrh	tmp1w, [src], #2
+	strh	tmp1w, [dst], #2
+1:
+	tbz	count, #0, 1f
+	ldrb	tmp1w, [src]
+	strb	tmp1w, [dst]
+1:
+	RET
+
+L(cpy_not_short):
+	/* We don't much care about the alignment of DST, but we want SRC
+	 * to be 128-bit (16 byte) aligned so that we don't cross cache line
+	 * boundaries on both loads and stores.  */
+	neg	tmp2, src
+	ands	tmp2, tmp2, #15		/* Bytes to reach alignment.  */
+	b.eq	2f
+	sub	count, count, tmp2
+	/* Copy more data than needed; it's faster than jumping
+	 * around copying sub-Quadword quantities.  We know that
+	 * it can't overrun.  */
+	ldp	A_l, A_h, [src]
+	add	src, src, tmp2
+	stp	A_l, A_h, [dst]
+	add	dst, dst, tmp2
+	/* There may be less than 63 bytes to go now.  */
+	cmp	count, #63
+	b.le	L(tail63)
+2:
+	subs	count, count, #128
+	b.ge	L(cpy_body_large)
+	/* Less than 128 bytes to copy, so handle 64 here and then jump
+	 * to the tail.  */
+	ldp	A_l, A_h, [src]
+	ldp	B_l, B_h, [src, #16]
+	ldp	C_l, C_h, [src, #32]
+	ldp	D_l, D_h, [src, #48]
+	stp	A_l, A_h, [dst]
+	stp	B_l, B_h, [dst, #16]
+	stp	C_l, C_h, [dst, #32]
+	stp	D_l, D_h, [dst, #48]
+	tst	count, #0x3f
+	add	src, src, #64
+	add	dst, dst, #64
+	b.ne	L(tail63)
+	RET
+
+	/* Critical loop.  Start at a new cache line boundary.  Assuming
+	 * 64 bytes per line this ensures the entire loop is in one line.  */
+	.p2align 6
+L(cpy_body_large):
+	/* There are at least 128 bytes to copy.  */
+	ldp	A_l, A_h, [src, #0]
+	sub	dst, dst, #16		/* Pre-bias.  */
+	ldp	B_l, B_h, [src, #16]
+	ldp	C_l, C_h, [src, #32]
+	ldp	D_l, D_h, [src, #48]!	/* src += 64 - Pre-bias.  */
+1:
+	stp	A_l, A_h, [dst, #16]
+	ldp	A_l, A_h, [src, #16]
+	stp	B_l, B_h, [dst, #32]
+	ldp	B_l, B_h, [src, #32]
+	stp	C_l, C_h, [dst, #48]
+	ldp	C_l, C_h, [src, #48]
+	stp	D_l, D_h, [dst, #64]!
+	ldp	D_l, D_h, [src, #64]!
+	subs	count, count, #64
+	b.ge	1b
+	stp	A_l, A_h, [dst, #16]
+	stp	B_l, B_h, [dst, #32]
+	stp	C_l, C_h, [dst, #48]
+	stp	D_l, D_h, [dst, #64]
+	add	src, src, #16
+	add	dst, dst, #64 + 16
+	tst	count, #0x3f
+	b.ne	L(tail63)
+	RET
+END (memcpy)
+libc_hidden_builtin_def (memcpy)
diff --git a/sysdeps/aarch64/memmove.S b/sysdeps/aarch64/memmove.S
new file mode 100644
index 0000000000..8d0b32847e
--- /dev/null
+++ b/sysdeps/aarch64/memmove.S
@@ -0,0 +1,312 @@
+/* Copyright (C) 2012-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64
+ * Unaligned accesses
+ */
+
+/* Parameters and result.  */
+#define dstin	x0
+#define src	x1
+#define count	x2
+#define tmp1	x3
+#define tmp1w	w3
+#define tmp2	x4
+#define tmp2w	w4
+#define tmp3	x5
+#define tmp3w	w5
+#define dst	x6
+
+#define A_l	x7
+#define A_h	x8
+#define B_l	x9
+#define B_h	x10
+#define C_l	x11
+#define C_h	x12
+#define D_l	x13
+#define D_h	x14
+
+ENTRY_ALIGN (memmove, 6)
+
+	cmp	dstin, src
+	b.lo	L(downwards)
+	add	tmp1, src, count
+	cmp	dstin, tmp1
+	b.hs	memcpy		/* No overlap.  */
+
+	/* Upwards move with potential overlap.
+	 * Need to move from the tail backwards.  SRC and DST point one
+	 * byte beyond the remaining data to move.  */
+	add	dst, dstin, count
+	add	src, src, count
+	cmp	count, #64
+	b.ge	L(mov_not_short_up)
+
+	/* Deal with small moves quickly by dropping straight into the
+	 * exit block.  */
+L(tail63up):
+	/* Move up to 48 bytes of data.  At this point we only need the
+	 * bottom 6 bits of count to be accurate.  */
+	ands	tmp1, count, #0x30
+	b.eq	L(tail15up)
+	sub	dst, dst, tmp1
+	sub	src, src, tmp1
+	cmp	tmp1w, #0x20
+	b.eq	1f
+	b.lt	2f
+	ldp	A_l, A_h, [src, #32]
+	stp	A_l, A_h, [dst, #32]
+1:
+	ldp	A_l, A_h, [src, #16]
+	stp	A_l, A_h, [dst, #16]
+2:
+	ldp	A_l, A_h, [src]
+	stp	A_l, A_h, [dst]
+L(tail15up):
+	/* Move up to 15 bytes of data.  Does not assume additional data
+	 * being moved.  */
+	tbz	count, #3, 1f
+	ldr	tmp1, [src, #-8]!
+	str	tmp1, [dst, #-8]!
+1:
+	tbz	count, #2, 1f
+	ldr	tmp1w, [src, #-4]!
+	str	tmp1w, [dst, #-4]!
+1:
+	tbz	count, #1, 1f
+	ldrh	tmp1w, [src, #-2]!
+	strh	tmp1w, [dst, #-2]!
+1:
+	tbz	count, #0, 1f
+	ldrb	tmp1w, [src, #-1]
+	strb	tmp1w, [dst, #-1]
+1:
+	RET
+
+L(mov_not_short_up):
+	/* We don't much care about the alignment of DST, but we want SRC
+	 * to be 128-bit (16 byte) aligned so that we don't cross cache line
+	 * boundaries on both loads and stores.  */
+	ands	tmp2, src, #15		/* Bytes to reach alignment.  */
+	b.eq	2f
+	sub	count, count, tmp2
+	/* Move enough data to reach alignment; unlike memcpy, we have to
+	 * be aware of the overlap, which means we can't move data twice.  */
+	tbz	tmp2, #3, 1f
+	ldr	tmp1, [src, #-8]!
+	str	tmp1, [dst, #-8]!
+1:
+	tbz	tmp2, #2, 1f
+	ldr	tmp1w, [src, #-4]!
+	str	tmp1w, [dst, #-4]!
+1:
+	tbz	tmp2, #1, 1f
+	ldrh	tmp1w, [src, #-2]!
+	strh	tmp1w, [dst, #-2]!
+1:
+	tbz	tmp2, #0, 1f
+	ldrb	tmp1w, [src, #-1]!
+	strb	tmp1w, [dst, #-1]!
+1:
+
+	/* There may be less than 63 bytes to go now.  */
+	cmp	count, #63
+	b.le	L(tail63up)
+2:
+	subs	count, count, #128
+	b.ge	L(mov_body_large_up)
+	/* Less than 128 bytes to move, so handle 64 here and then jump
+	 * to the tail.  */
+	ldp	A_l, A_h, [src, #-64]!
+	ldp	B_l, B_h, [src, #16]
+	ldp	C_l, C_h, [src, #32]
+	ldp	D_l, D_h, [src, #48]
+	stp	A_l, A_h, [dst, #-64]!
+	stp	B_l, B_h, [dst, #16]
+	stp	C_l, C_h, [dst, #32]
+	stp	D_l, D_h, [dst, #48]
+	tst	count, #0x3f
+	b.ne	L(tail63up)
+	RET
+
+	/* Critical loop.  Start at a new Icache line boundary.  Assuming
+	 * 64 bytes per line this ensures the entire loop is in one line.  */
+	.p2align 6
+L(mov_body_large_up):
+	/* There are at least 128 bytes to move.  */
+	ldp	A_l, A_h, [src, #-16]
+	ldp	B_l, B_h, [src, #-32]
+	ldp	C_l, C_h, [src, #-48]
+	ldp	D_l, D_h, [src, #-64]!
+1:
+	stp	A_l, A_h, [dst, #-16]
+	ldp	A_l, A_h, [src, #-16]
+	stp	B_l, B_h, [dst, #-32]
+	ldp	B_l, B_h, [src, #-32]
+	stp	C_l, C_h, [dst, #-48]
+	ldp	C_l, C_h, [src, #-48]
+	stp	D_l, D_h, [dst, #-64]!
+	ldp	D_l, D_h, [src, #-64]!
+	subs	count, count, #64
+	b.ge	1b
+	stp	A_l, A_h, [dst, #-16]
+	stp	B_l, B_h, [dst, #-32]
+	stp	C_l, C_h, [dst, #-48]
+	stp	D_l, D_h, [dst, #-64]!
+	tst	count, #0x3f
+	b.ne	L(tail63up)
+	RET
+
+L(downwards):
+	/* For a downwards move we can safely use memcpy provided that
+	 * DST is more than 16 bytes away from SRC.  */
+	sub	tmp1, src, #16
+	cmp	dstin, tmp1
+	b.ls	memcpy		/* May overlap, but not critically.  */
+
+	mov	dst, dstin	/* Preserve DSTIN for return value.  */
+	cmp	count, #64
+	b.ge	L(mov_not_short_down)
+
+	/* Deal with small moves quickly by dropping straight into the
+	 * exit block.  */
+L(tail63down):
+	/* Move up to 48 bytes of data.  At this point we only need the
+	 * bottom 6 bits of count to be accurate.  */
+	ands	tmp1, count, #0x30
+	b.eq	L(tail15down)
+	add	dst, dst, tmp1
+	add	src, src, tmp1
+	cmp	tmp1w, #0x20
+	b.eq	1f
+	b.lt	2f
+	ldp	A_l, A_h, [src, #-48]
+	stp	A_l, A_h, [dst, #-48]
+1:
+	ldp	A_l, A_h, [src, #-32]
+	stp	A_l, A_h, [dst, #-32]
+2:
+	ldp	A_l, A_h, [src, #-16]
+	stp	A_l, A_h, [dst, #-16]
+L(tail15down):
+	/* Move up to 15 bytes of data.  Does not assume additional data
+	   being moved.  */
+	tbz	count, #3, 1f
+	ldr	tmp1, [src], #8
+	str	tmp1, [dst], #8
+1:
+	tbz	count, #2, 1f
+	ldr	tmp1w, [src], #4
+	str	tmp1w, [dst], #4
+1:
+	tbz	count, #1, 1f
+	ldrh	tmp1w, [src], #2
+	strh	tmp1w, [dst], #2
+1:
+	tbz	count, #0, 1f
+	ldrb	tmp1w, [src]
+	strb	tmp1w, [dst]
+1:
+	RET
+
+L(mov_not_short_down):
+	/* We don't much care about the alignment of DST, but we want SRC
+	 * to be 128-bit (16 byte) aligned so that we don't cross cache line
+	 * boundaries on both loads and stores.  */
+	neg	tmp2, src
+	ands	tmp2, tmp2, #15		/* Bytes to reach alignment.  */
+	b.eq	2f
+	sub	count, count, tmp2
+	/* Move enough data to reach alignment; unlike memcpy, we have to
+	 * be aware of the overlap, which means we can't move data twice.  */
+	tbz	tmp2, #3, 1f
+	ldr	tmp1, [src], #8
+	str	tmp1, [dst], #8
+1:
+	tbz	tmp2, #2, 1f
+	ldr	tmp1w, [src], #4
+	str	tmp1w, [dst], #4
+1:
+	tbz	tmp2, #1, 1f
+	ldrh	tmp1w, [src], #2
+	strh	tmp1w, [dst], #2
+1:
+	tbz	tmp2, #0, 1f
+	ldrb	tmp1w, [src], #1
+	strb	tmp1w, [dst], #1
+1:
+
+	/* There may be less than 63 bytes to go now.  */
+	cmp	count, #63
+	b.le	L(tail63down)
+2:
+	subs	count, count, #128
+	b.ge	L(mov_body_large_down)
+	/* Less than 128 bytes to move, so handle 64 here and then jump
+	 * to the tail.  */
+	ldp	A_l, A_h, [src]
+	ldp	B_l, B_h, [src, #16]
+	ldp	C_l, C_h, [src, #32]
+	ldp	D_l, D_h, [src, #48]
+	stp	A_l, A_h, [dst]
+	stp	B_l, B_h, [dst, #16]
+	stp	C_l, C_h, [dst, #32]
+	stp	D_l, D_h, [dst, #48]
+	tst	count, #0x3f
+	add	src, src, #64
+	add	dst, dst, #64
+	b.ne	L(tail63down)
+	RET
+
+	/* Critical loop.  Start at a new cache line boundary.  Assuming
+	 * 64 bytes per line this ensures the entire loop is in one line.  */
+	.p2align 6
+L(mov_body_large_down):
+	/* There are at least 128 bytes to move.  */
+	ldp	A_l, A_h, [src, #0]
+	sub	dst, dst, #16		/* Pre-bias.  */
+	ldp	B_l, B_h, [src, #16]
+	ldp	C_l, C_h, [src, #32]
+	ldp	D_l, D_h, [src, #48]!	/* src += 64 - Pre-bias.  */
+1:
+	stp	A_l, A_h, [dst, #16]
+	ldp	A_l, A_h, [src, #16]
+	stp	B_l, B_h, [dst, #32]
+	ldp	B_l, B_h, [src, #32]
+	stp	C_l, C_h, [dst, #48]
+	ldp	C_l, C_h, [src, #48]
+	stp	D_l, D_h, [dst, #64]!
+	ldp	D_l, D_h, [src, #64]!
+	subs	count, count, #64
+	b.ge	1b
+	stp	A_l, A_h, [dst, #16]
+	stp	B_l, B_h, [dst, #32]
+	stp	C_l, C_h, [dst, #48]
+	stp	D_l, D_h, [dst, #64]
+	add	src, src, #16
+	add	dst, dst, #64 + 16
+	tst	count, #0x3f
+	b.ne	L(tail63down)
+	RET
+END (memmove)
+
+libc_hidden_builtin_def (memmove)
diff --git a/sysdeps/aarch64/memset.S b/sysdeps/aarch64/memset.S
new file mode 100644
index 0000000000..816640a129
--- /dev/null
+++ b/sysdeps/aarch64/memset.S
@@ -0,0 +1,229 @@
+/* Copyright (C) 2012-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64
+ * Unaligned accesses
+ *
+ */
+
+#include <sysdep.h>
+
+/* By default we assume that the DC instruction can be used to zero
+   data blocks more efficiently.  In some circumstances this might be
+   unsafe, for example in an asymmetric multiprocessor environment with
+   different DC clear lengths (neither the upper nor lower lengths are
+   safe to use).  The feature can be disabled by defining DONT_USE_DC.
+
+   If code may be run in a virtualized environment, then define
+   MAYBE_VIRT.  This will cause the code to cache the system register
+   values rather than re-reading them each call.  */
+
+#define dstin		x0
+#define val		w1
+#define count		x2
+#define tmp1		x3
+#define tmp1w		w3
+#define tmp2		x4
+#define tmp2w		w4
+#define zva_len_x	x5
+#define zva_len		w5
+#define zva_bits_x	x6
+
+#define A_l		x7
+#define A_lw		w7
+#define dst		x8
+#define tmp3w		w9
+
+ENTRY_ALIGN (__memset, 6)
+
+	mov	dst, dstin		/* Preserve return value.  */
+	ands	A_lw, val, #255
+#ifndef DONT_USE_DC
+	b.eq	L(zero_mem)
+#endif
+	orr	A_lw, A_lw, A_lw, lsl #8
+	orr	A_lw, A_lw, A_lw, lsl #16
+	orr	A_l, A_l, A_l, lsl #32
+L(tail_maybe_long):
+	cmp	count, #64
+	b.ge	L(not_short)
+L(tail_maybe_tiny):
+	cmp	count, #15
+	b.le	L(tail15tiny)
+L(tail63):
+	ands	tmp1, count, #0x30
+	b.eq	L(tail15)
+	add	dst, dst, tmp1
+	cmp	tmp1w, #0x20
+	b.eq	1f
+	b.lt	2f
+	stp	A_l, A_l, [dst, #-48]
+1:
+	stp	A_l, A_l, [dst, #-32]
+2:
+	stp	A_l, A_l, [dst, #-16]
+
+L(tail15):
+	and	count, count, #15
+	add	dst, dst, count
+	stp	A_l, A_l, [dst, #-16]	/* Repeat some/all of last store. */
+	RET
+
+L(tail15tiny):
+	/* Set up to 15 bytes.  Does not assume earlier memory
+	   being set.  */
+	tbz	count, #3, 1f
+	str	A_l, [dst], #8
+1:
+	tbz	count, #2, 1f
+	str	A_lw, [dst], #4
+1:
+	tbz	count, #1, 1f
+	strh	A_lw, [dst], #2
+1:
+	tbz	count, #0, 1f
+	strb	A_lw, [dst]
+1:
+	RET
+
+	/* Critical loop.  Start at a new cache line boundary.  Assuming
+	 * 64 bytes per line, this ensures the entire loop is in one line.  */
+	.p2align 6
+L(not_short):
+	neg	tmp2, dst
+	ands	tmp2, tmp2, #15
+	b.eq	2f
+	/* Bring DST to 128-bit (16-byte) alignment.  We know that there's
+	 * more than that to set, so we simply store 16 bytes and advance by
+	 * the amount required to reach alignment.  */
+	sub	count, count, tmp2
+	stp	A_l, A_l, [dst]
+	add	dst, dst, tmp2
+	/* There may be less than 63 bytes to go now.  */
+	cmp	count, #63
+	b.le	L(tail63)
+2:
+	sub	dst, dst, #16		/* Pre-bias.  */
+	sub	count, count, #64
+1:
+	stp	A_l, A_l, [dst, #16]
+	stp	A_l, A_l, [dst, #32]
+	stp	A_l, A_l, [dst, #48]
+	stp	A_l, A_l, [dst, #64]!
+	subs	count, count, #64
+	b.ge	1b
+	tst	count, #0x3f
+	add	dst, dst, #16
+	b.ne	L(tail63)
+	RET
+
+#ifndef DONT_USE_DC
+	/* For zeroing memory, check to see if we can use the ZVA feature to
+	 * zero entire 'cache' lines.  */
+L(zero_mem):
+	mov	A_l, #0
+	cmp	count, #63
+	b.le	L(tail_maybe_tiny)
+	neg	tmp2, dst
+	ands	tmp2, tmp2, #15
+	b.eq	1f
+	sub	count, count, tmp2
+	stp	A_l, A_l, [dst]
+	add	dst, dst, tmp2
+	cmp	count, #63
+	b.le	L(tail63)
+1:
+	/* For zeroing small amounts of memory, it's not worth setting up
+	 * the line-clear code.  */
+	cmp	count, #128
+	b.lt	L(not_short)
+#ifdef MAYBE_VIRT
+	/* For efficiency when virtualized, we cache the ZVA capability.  */
+	adrp	tmp2, L(cache_clear)
+	ldr	zva_len, [tmp2, #:lo12:L(cache_clear)]
+	tbnz	zva_len, #31, L(not_short)
+	cbnz	zva_len, L(zero_by_line)
+	mrs	tmp1, dczid_el0
+	tbz	tmp1, #4, 1f
+	/* ZVA not available.  Remember this for next time.  */
+	mov	zva_len, #~0
+	str	zva_len, [tmp2, #:lo12:L(cache_clear)]
+	b	L(not_short)
+1:
+	mov	tmp3w, #4
+	and	zva_len, tmp1w, #15	/* Safety: other bits reserved.  */
+	lsl	zva_len, tmp3w, zva_len
+	str	zva_len, [tmp2, #:lo12:L(cache_clear)]
+#else
+	mrs	tmp1, dczid_el0
+	tbnz	tmp1, #4, L(not_short)
+	mov	tmp3w, #4
+	and	zva_len, tmp1w, #15	/* Safety: other bits reserved.  */
+	lsl	zva_len, tmp3w, zva_len
+#endif
+
+L(zero_by_line):
+	/* Compute how far we need to go to become suitably aligned.  We're
+	 * already at quad-word alignment.  */
+	cmp	count, zva_len_x
+	b.lt	L(not_short)		/* Not enough to reach alignment.  */
+	sub	zva_bits_x, zva_len_x, #1
+	neg	tmp2, dst
+	ands	tmp2, tmp2, zva_bits_x
+	b.eq	1f			/* Already aligned.  */
+	/* Not aligned, check that there's enough to copy after alignment.  */
+	sub	tmp1, count, tmp2
+	cmp	tmp1, #64
+	ccmp	tmp1, zva_len_x, #8, ge	/* NZCV=0b1000 */
+	b.lt	L(not_short)
+	/* We know that there's at least 64 bytes to zero and that it's safe
+	 * to overrun by 64 bytes.  */
+	mov	count, tmp1
+2:
+	stp	A_l, A_l, [dst]
+	stp	A_l, A_l, [dst, #16]
+	stp	A_l, A_l, [dst, #32]
+	subs	tmp2, tmp2, #64
+	stp	A_l, A_l, [dst, #48]
+	add	dst, dst, #64
+	b.ge	2b
+	/* We've overrun a bit, so adjust dst downwards.  */
+	add	dst, dst, tmp2
+1:
+	sub	count, count, zva_len_x
+3:
+	dc	zva, dst
+	add	dst, dst, zva_len_x
+	subs	count, count, zva_len_x
+	b.ge	3b
+	ands	count, count, zva_bits_x
+	b.ne	L(tail_maybe_long)
+	RET
+#ifdef MAYBE_VIRT
+	.bss
+	.p2align 2
+L(cache_clear):
+	.space 4
+#endif
+#endif /* DONT_USE_DC */
+
+END (__memset)
+weak_alias (__memset, memset)
+libc_hidden_builtin_def (memset)
diff --git a/sysdeps/aarch64/memusage.h b/sysdeps/aarch64/memusage.h
new file mode 100644
index 0000000000..3696449354
--- /dev/null
+++ b/sysdeps/aarch64/memusage.h
@@ -0,0 +1,21 @@
+/* Copyright (C) 2000-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define GETSP() ({ register uintptr_t stack_ptr asm ("sp"); stack_ptr; })
+
+#include <sysdeps/generic/memusage.h>
diff --git a/sysdeps/aarch64/nptl/Makefile b/sysdeps/aarch64/nptl/Makefile
new file mode 100644
index 0000000000..f012c2b9f4
--- /dev/null
+++ b/sysdeps/aarch64/nptl/Makefile
@@ -0,0 +1,21 @@
+# Copyright (C) 2005-2015 Free Software Foundation, Inc.
+#
+# This file is part of the GNU C Library.
+#
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library.  If not, see
+# <http://www.gnu.org/licenses/>.
+
+ifeq ($(subdir),csu)
+gen-as-const-headers += tcb-offsets.sym
+endif
diff --git a/sysdeps/aarch64/nptl/bits/pthreadtypes.h b/sysdeps/aarch64/nptl/bits/pthreadtypes.h
new file mode 100644
index 0000000000..0e4795e9f3
--- /dev/null
+++ b/sysdeps/aarch64/nptl/bits/pthreadtypes.h
@@ -0,0 +1,174 @@
+/* Copyright (C) 2002-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _BITS_PTHREADTYPES_H
+#define _BITS_PTHREADTYPES_H	1
+
+#include <endian.h>
+
+#define __SIZEOF_PTHREAD_ATTR_T        64
+#define __SIZEOF_PTHREAD_MUTEX_T       48
+#define __SIZEOF_PTHREAD_MUTEXATTR_T    8
+#define __SIZEOF_PTHREAD_COND_T        48
+#define __SIZEOF_PTHREAD_COND_COMPAT_T 48
+#define __SIZEOF_PTHREAD_CONDATTR_T     8
+#define __SIZEOF_PTHREAD_RWLOCK_T      56
+#define __SIZEOF_PTHREAD_RWLOCKATTR_T   8
+#define __SIZEOF_PTHREAD_BARRIER_T     32
+#define __SIZEOF_PTHREAD_BARRIERATTR_T  8
+
+
+/* Thread identifiers.  The structure of the attribute type is not
+   exposed on purpose.  */
+typedef unsigned long int pthread_t;
+
+
+union pthread_attr_t
+{
+  char __size[__SIZEOF_PTHREAD_ATTR_T];
+  long int __align;
+};
+#ifndef __have_pthread_attr_t
+typedef union pthread_attr_t pthread_attr_t;
+# define __have_pthread_attr_t1
+#endif
+
+typedef struct __pthread_internal_list
+{
+  struct __pthread_internal_list *__prev;
+  struct __pthread_internal_list *__next;
+} __pthread_list_t;
+
+
+/* Data structures for mutex handling.  The structure of the attribute
+   type is not exposed on purpose.  */
+typedef union
+{
+  struct __pthread_mutex_s
+  {
+    int __lock;
+    unsigned int __count;
+    int __owner;
+    unsigned int __nusers;
+    int __kind;
+    int __spins;
+    __pthread_list_t __list;
+#define __PTHREAD_MUTEX_HAVE_PREV	1
+  } __data;
+  char __size[__SIZEOF_PTHREAD_MUTEX_T];
+  long int __align;
+} pthread_mutex_t;
+
+/* Mutex __spins initializer used by PTHREAD_MUTEX_INITIALIZER.  */
+#define __PTHREAD_SPINS 0
+
+typedef union
+{
+  char __size[__SIZEOF_PTHREAD_MUTEXATTR_T];
+  long int __align;
+} pthread_mutexattr_t;
+
+
+/* Data structure for conditional variable handling.  The structure of
+   the attribute type is not exposed on purpose.  */
+typedef union
+{
+  struct
+  {
+    int __lock;
+    unsigned int __futex;
+    __extension__ unsigned long long int __total_seq;
+    __extension__ unsigned long long int __wakeup_seq;
+    __extension__ unsigned long long int __woken_seq;
+    void *__mutex;
+    unsigned int __nwaiters;
+    unsigned int __broadcast_seq;
+  } __data;
+  char __size[__SIZEOF_PTHREAD_COND_T];
+  long int __align;
+} pthread_cond_t;
+
+typedef union
+{
+  char __size[__SIZEOF_PTHREAD_CONDATTR_T];
+  int __align;
+} pthread_condattr_t;
+
+
+/* Keys for thread-specific data */
+typedef unsigned int pthread_key_t;
+
+
+/* Once-only execution */
+typedef int pthread_once_t;
+
+
+#if defined __USE_UNIX98 || defined __USE_XOPEN2K
+/* Data structure for read-write lock variable handling.  The
+   structure of the attribute type is not exposed on purpose.  */
+typedef union
+{
+  struct
+  {
+    int __lock;
+    unsigned int __nr_readers;
+    unsigned int __readers_wakeup;
+    unsigned int __writer_wakeup;
+    unsigned int __nr_readers_queued;
+    unsigned int __nr_writers_queued;
+    int __writer;
+    int __shared;
+    unsigned long int __pad1;
+    unsigned long int __pad2;
+    unsigned int __flags;
+  } __data;
+  char __size[__SIZEOF_PTHREAD_RWLOCK_T];
+  long int __align;
+} pthread_rwlock_t;
+
+#define __PTHREAD_RWLOCK_ELISION_EXTRA 0
+
+typedef union
+{
+  char __size[__SIZEOF_PTHREAD_RWLOCKATTR_T];
+  long int __align;
+} pthread_rwlockattr_t;
+#endif
+
+
+#ifdef __USE_XOPEN2K
+/* POSIX spinlock data type.  */
+typedef volatile int pthread_spinlock_t;
+
+
+/* POSIX barriers data type.  The structure of the type is
+   deliberately not exposed.  */
+typedef union
+{
+  char __size[__SIZEOF_PTHREAD_BARRIER_T];
+  long int __align;
+} pthread_barrier_t;
+
+typedef union
+{
+  char __size[__SIZEOF_PTHREAD_BARRIERATTR_T];
+  int __align;
+} pthread_barrierattr_t;
+#endif
+
+#endif	/* bits/pthreadtypes.h */
diff --git a/sysdeps/aarch64/nptl/bits/semaphore.h b/sysdeps/aarch64/nptl/bits/semaphore.h
new file mode 100644
index 0000000000..047dd4e586
--- /dev/null
+++ b/sysdeps/aarch64/nptl/bits/semaphore.h
@@ -0,0 +1,35 @@
+/* Copyright (C) 2002-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _SEMAPHORE_H
+# error "Never use <bits/semaphore.h> directly; include <semaphore.h> instead."
+#endif
+
+
+#define __SIZEOF_SEM_T	32
+
+
+/* Value returned if `sem_open' failed.  */
+#define SEM_FAILED      ((sem_t *) 0)
+
+
+typedef union
+{
+  char __size[__SIZEOF_SEM_T];
+  long int __align;
+} sem_t;
diff --git a/sysdeps/aarch64/nptl/pthread_spin_lock.c b/sysdeps/aarch64/nptl/pthread_spin_lock.c
new file mode 100644
index 0000000000..6ddd60f451
--- /dev/null
+++ b/sysdeps/aarch64/nptl/pthread_spin_lock.c
@@ -0,0 +1,24 @@
+/* Copyright (C) 2008-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define SPIN_LOCK_READS_BETWEEN_CMPXCHG 1000
+
+/* We can't use the normal "#include <nptl/pthread_spin_lock.c>" because
+   it will resolve to this very file.  Using "sysdeps/.." as reference to the
+   top level directory does the job.  */
+#include <sysdeps/../nptl/pthread_spin_lock.c>
diff --git a/sysdeps/aarch64/nptl/pthreaddef.h b/sysdeps/aarch64/nptl/pthreaddef.h
new file mode 100644
index 0000000000..79f2107e65
--- /dev/null
+++ b/sysdeps/aarch64/nptl/pthreaddef.h
@@ -0,0 +1,32 @@
+/* Copyright (C) 2002-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Default stack size.  */
+#define ARCH_STACK_DEFAULT_SIZE	(2 * 1024 * 1024)
+
+/* Required stack pointer alignment at beginning.  */
+#define STACK_ALIGN 16
+
+/* Minimal stack size after allocating thread descriptor and guard size.  */
+#define MINIMAL_REST_STACK 2048
+
+/* Alignment requirement for TCB.  */
+#define TCB_ALIGNMENT 16
+
+/* Location of current stack frame.  */
+#define CURRENT_STACK_FRAME	__builtin_frame_address (0)
diff --git a/sysdeps/aarch64/nptl/tcb-offsets.sym b/sysdeps/aarch64/nptl/tcb-offsets.sym
new file mode 100644
index 0000000000..0677aeabff
--- /dev/null
+++ b/sysdeps/aarch64/nptl/tcb-offsets.sym
@@ -0,0 +1,7 @@
+#include <sysdep.h>
+#include <tls.h>
+
+PTHREAD_MULTIPLE_THREADS_OFFSET		offsetof (struct pthread, header.multiple_threads)
+PTHREAD_PID_OFFSET			offsetof (struct pthread, pid)
+PTHREAD_TID_OFFSET			offsetof (struct pthread, tid)
+PTHREAD_SIZEOF				sizeof (struct pthread)
diff --git a/sysdeps/aarch64/nptl/tls.h b/sysdeps/aarch64/nptl/tls.h
new file mode 100644
index 0000000000..1e3904eafb
--- /dev/null
+++ b/sysdeps/aarch64/nptl/tls.h
@@ -0,0 +1,146 @@
+/* Copyright (C) 2005-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _TLS_H
+#define _TLS_H	1
+
+#include <dl-sysdep.h>
+
+#ifndef __ASSEMBLER__
+# include <stdbool.h>
+# include <stddef.h>
+# include <stdint.h>
+
+/* Type for the dtv.  */
+typedef union dtv
+{
+  size_t counter;
+  struct
+  {
+    void *val;
+    bool is_static;
+  } pointer;
+} dtv_t;
+
+#else /* __ASSEMBLER__ */
+# include <tcb-offsets.h>
+#endif /* __ASSEMBLER__ */
+
+#ifndef __ASSEMBLER__
+
+/* Get system call information.  */
+# include <sysdep.h>
+
+/* The TP points to the start of the thread blocks.  */
+# define TLS_DTV_AT_TP	1
+# define TLS_TCB_AT_TP	0
+
+/* Get the thread descriptor definition.  */
+# include <nptl/descr.h>
+
+typedef struct
+{
+  dtv_t *dtv;
+  void *private;
+} tcbhead_t;
+
+/* This is the size of the initial TCB.  */
+# define TLS_INIT_TCB_SIZE	sizeof (tcbhead_t)
+
+/* Alignment requirements for the initial TCB.  */
+# define TLS_INIT_TCB_ALIGN	__alignof__ (struct pthread)
+
+/* This is the size of the TCB.  */
+# define TLS_TCB_SIZE		sizeof (tcbhead_t)
+
+/* This is the size we need before TCB.  */
+# define TLS_PRE_TCB_SIZE	sizeof (struct pthread)
+
+/* Alignment requirements for the TCB.  */
+# define TLS_TCB_ALIGN		__alignof__ (struct pthread)
+
+/* Install the dtv pointer.  The pointer passed is to the element with
+   index -1 which contain the length.  */
+# define INSTALL_DTV(tcbp, dtvp) \
+  (((tcbhead_t *) (tcbp))->dtv = (dtvp) + 1)
+
+/* Install new dtv for current thread.  */
+# define INSTALL_NEW_DTV(dtv) \
+  (THREAD_DTV() = (dtv))
+
+/* Return dtv of given thread descriptor.  */
+# define GET_DTV(tcbp) \
+  (((tcbhead_t *) (tcbp))->dtv)
+
+/* Code to initially initialize the thread pointer.  This might need
+   special attention since 'errno' is not yet available and if the
+   operation can cause a failure 'errno' must not be touched.  */
+# define TLS_INIT_TP(tcbp) \
+  ({ __asm __volatile ("msr tpidr_el0, %0" : : "r" (tcbp)); NULL; })
+
+/* Value passed to 'clone' for initialization of the thread register.  */
+# define TLS_DEFINE_INIT_TP(tp, pd) void *tp = (pd) + 1
+
+/* Return the address of the dtv for the current thread.  */
+# define THREAD_DTV() \
+  (((tcbhead_t *) __builtin_thread_pointer ())->dtv)
+
+/* Return the thread descriptor for the current thread.  */
+# define THREAD_SELF \
+ ((struct pthread *)__builtin_thread_pointer () - 1)
+
+/* Magic for libthread_db to know how to do THREAD_SELF.  */
+# define DB_THREAD_SELF \
+  CONST_THREAD_AREA (64, sizeof (struct pthread))
+
+/* Access to data in the thread descriptor is easy.  */
+# define THREAD_GETMEM(descr, member) \
+  descr->member
+# define THREAD_GETMEM_NC(descr, member, idx) \
+  descr->member[idx]
+# define THREAD_SETMEM(descr, member, value) \
+  descr->member = (value)
+# define THREAD_SETMEM_NC(descr, member, idx, value) \
+  descr->member[idx] = (value)
+
+/* Get and set the global scope generation counter in struct pthread.  */
+# define THREAD_GSCOPE_FLAG_UNUSED 0
+# define THREAD_GSCOPE_FLAG_USED   1
+# define THREAD_GSCOPE_FLAG_WAIT   2
+# define THREAD_GSCOPE_RESET_FLAG() \
+  do									     \
+    { int __res								     \
+	= atomic_exchange_rel (&THREAD_SELF->header.gscope_flag,	     \
+			       THREAD_GSCOPE_FLAG_UNUSED);		     \
+      if (__res == THREAD_GSCOPE_FLAG_WAIT)				     \
+	lll_futex_wake (&THREAD_SELF->header.gscope_flag, 1, LLL_PRIVATE);   \
+    }									     \
+  while (0)
+# define THREAD_GSCOPE_SET_FLAG() \
+  do									     \
+    {									     \
+      THREAD_SELF->header.gscope_flag = THREAD_GSCOPE_FLAG_USED;	     \
+      atomic_write_barrier ();						     \
+    }									     \
+  while (0)
+# define THREAD_GSCOPE_WAIT() \
+  GL(dl_wait_lookup_done) ()
+
+# endif /* __ASSEMBLER__ */
+
+#endif	/* tls.h */
diff --git a/sysdeps/aarch64/preconfigure b/sysdeps/aarch64/preconfigure
new file mode 100644
index 0000000000..d9bd1f8558
--- /dev/null
+++ b/sysdeps/aarch64/preconfigure
@@ -0,0 +1,6 @@
+case "$machine" in
+aarch64*)
+	base_machine=aarch64
+	machine=aarch64
+	;;
+esac
diff --git a/sysdeps/aarch64/setjmp.S b/sysdeps/aarch64/setjmp.S
new file mode 100644
index 0000000000..eb84a11c4b
--- /dev/null
+++ b/sysdeps/aarch64/setjmp.S
@@ -0,0 +1,74 @@
+/* Copyright (C) 1997-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <jmpbuf-offsets.h>
+#include <stap-probe.h>
+
+        /* Keep traditional entry points in with sigsetjmp(). */
+ENTRY (setjmp)
+	mov	x1, #1
+	b	1f
+END (setjmp)
+
+ENTRY (_setjmp)
+	mov	x1, #0
+	b	1f
+END (_setjmp)
+libc_hidden_def (_setjmp)
+
+ENTRY (__sigsetjmp)
+
+1:
+	stp	x19, x20, [x0, #JB_X19<<3]
+	stp	x21, x22, [x0, #JB_X21<<3]
+	stp	x23, x24, [x0, #JB_X23<<3]
+	stp	x25, x26, [x0, #JB_X25<<3]
+	stp	x27, x28, [x0, #JB_X27<<3]
+
+#ifdef PTR_MANGLE
+	PTR_MANGLE (x4, x30, x3, x2)
+	stp	x29,  x4, [x0, #JB_X29<<3]
+#else
+	stp	x29, x30, [x0, #JB_X29<<3]
+#endif
+	/* setjmp probe takes 3 arguments, address of jump buffer
+	   first argument (8@x0), return value second argument (-4@x1),
+	   and target address (8@x30), respectively.  */
+	LIBC_PROBE (setjmp, 3, 8@x0, -4@x1, 8@x30)
+	stp	 d8,  d9, [x0, #JB_D8<<3]
+	stp	d10, d11, [x0, #JB_D10<<3]
+	stp	d12, d13, [x0, #JB_D12<<3]
+	stp	d14, d15, [x0, #JB_D14<<3]
+#ifdef PTR_MANGLE
+	mov	x4, sp
+	PTR_MANGLE (x5, x4, x3, x2)
+	str	x5, [x0, #JB_SP<<3]
+#else
+	mov	x2,  sp
+	str	x2,  [x0, #JB_SP<<3]
+#endif
+#if IS_IN (rtld)
+	/* In ld.so we never save the signal mask */
+	mov	w0, #0
+	RET
+#else
+	b	C_SYMBOL_NAME(__sigjmp_save)
+#endif
+END (__sigsetjmp)
+hidden_def (__sigsetjmp)
diff --git a/sysdeps/aarch64/soft-fp/Makefile b/sysdeps/aarch64/soft-fp/Makefile
new file mode 100644
index 0000000000..ada13e8b70
--- /dev/null
+++ b/sysdeps/aarch64/soft-fp/Makefile
@@ -0,0 +1,3 @@
+ifeq ($(subdir),math)
+CPPFLAGS += -I../soft-fp
+endif
diff --git a/sysdeps/aarch64/soft-fp/e_sqrtl.c b/sysdeps/aarch64/soft-fp/e_sqrtl.c
new file mode 100644
index 0000000000..8ed02917f9
--- /dev/null
+++ b/sysdeps/aarch64/soft-fp/e_sqrtl.c
@@ -0,0 +1,39 @@
+/* long double square root in software floating-point emulation.
+   Copyright (C) 1997-2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com) and
+		  Jakub Jelinek (jj@ultra.linux.cz).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <stdlib.h>
+#include <soft-fp.h>
+#include <quad.h>
+
+long double
+__ieee754_sqrtl (const long double a)
+{
+  FP_DECL_EX;
+  FP_DECL_Q(A); FP_DECL_Q(C);
+  long double c;
+
+  FP_INIT_ROUNDMODE;
+  FP_UNPACK_Q(A, a);
+  FP_SQRT_Q(C, A);
+  FP_PACK_Q(c, C);
+  FP_HANDLE_EXCEPTIONS;
+  return c;
+}
+strong_alias (__ieee754_sqrtl, __sqrtl_finite)
diff --git a/sysdeps/aarch64/soft-fp/sfp-machine.h b/sysdeps/aarch64/soft-fp/sfp-machine.h
new file mode 100644
index 0000000000..3e969952fa
--- /dev/null
+++ b/sysdeps/aarch64/soft-fp/sfp-machine.h
@@ -0,0 +1,120 @@
+#include <fenv.h>
+#include <fpu_control.h>
+
+#define _FP_W_TYPE_SIZE		64
+#define _FP_W_TYPE		unsigned long long
+#define _FP_WS_TYPE		signed long long
+#define _FP_I_TYPE		long long
+
+#define _FP_MUL_MEAT_S(R,X,Y)					\
+  _FP_MUL_MEAT_1_imm(_FP_WFRACBITS_S,R,X,Y)
+#define _FP_MUL_MEAT_D(R,X,Y)					\
+  _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_Q(R,X,Y)					\
+  _FP_MUL_MEAT_2_wide_3mul(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm)
+
+#define _FP_DIV_MEAT_S(R,X,Y)	_FP_DIV_MEAT_1_imm(S,R,X,Y,_FP_DIV_HELP_imm)
+#define _FP_DIV_MEAT_D(R,X,Y)	_FP_DIV_MEAT_1_udiv_norm(D,R,X,Y)
+#define _FP_DIV_MEAT_Q(R,X,Y)	_FP_DIV_MEAT_2_udiv(Q,R,X,Y)
+
+#define _FP_NANFRAC_S		((_FP_QNANBIT_S << 1) - 1)
+#define _FP_NANFRAC_D		((_FP_QNANBIT_D << 1) - 1)
+#define _FP_NANFRAC_Q		((_FP_QNANBIT_Q << 1) - 1), -1
+#define _FP_NANSIGN_S		0
+#define _FP_NANSIGN_D		0
+#define _FP_NANSIGN_Q		0
+
+#define _FP_KEEPNANFRACP 1
+#define _FP_QNANNEGATEDP 0
+
+/* From my experiments it seems X is chosen unless one of the
+   NaNs is sNaN,  in which case the result is NANSIGN/NANFRAC.  */
+#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP)			\
+  do {								\
+    if ((_FP_FRAC_HIGH_RAW_##fs(X) |				\
+	 _FP_FRAC_HIGH_RAW_##fs(Y)) & _FP_QNANBIT_##fs)		\
+      {								\
+	R##_s = _FP_NANSIGN_##fs;				\
+        _FP_FRAC_SET_##wc(R,_FP_NANFRAC_##fs);			\
+      }								\
+    else							\
+      {								\
+	R##_s = X##_s;						\
+        _FP_FRAC_COPY_##wc(R,X);				\
+      }								\
+    R##_c = FP_CLS_NAN;						\
+  } while (0)
+
+#define _FP_DECL_EX		fpu_control_t _fcw
+
+#define FP_ROUNDMODE		(_fcw & _FPU_FPCR_RM_MASK)
+
+#define FP_RND_NEAREST		FE_TONEAREST
+#define FP_RND_ZERO		FE_TOWARDZERO
+#define FP_RND_PINF		FE_UPWARD
+#define FP_RND_MINF		FE_DOWNWARD
+
+#define FP_EX_INVALID		FE_INVALID
+#define FP_EX_OVERFLOW		FE_OVERFLOW
+#define FP_EX_UNDERFLOW		FE_UNDERFLOW
+#define FP_EX_DIVZERO		FE_DIVBYZERO
+#define FP_EX_INEXACT		FE_INEXACT
+
+#define _FP_TININESS_AFTER_ROUNDING 0
+
+#define FP_INIT_ROUNDMODE			\
+do {						\
+  _FPU_GETCW (_fcw);				\
+} while (0)
+
+#define FP_HANDLE_EXCEPTIONS						\
+  do {									\
+    const float fp_max = __FLT_MAX__;					\
+    const float fp_min = __FLT_MIN__;					\
+    const float fp_1e32 = 1.0e32f;					\
+    const float fp_zero = 0.0;						\
+    const float fp_one = 1.0;						\
+    unsigned fpsr;							\
+    if (_fex & FP_EX_INVALID)						\
+      {									\
+        __asm__ __volatile__ ("fdiv\ts0, %s0, %s0"			\
+			      :						\
+			      : "w" (fp_zero)				\
+			      : "s0");					\
+	__asm__ __volatile__ ("mrs\t%0, fpsr" : "=r" (fpsr));		\
+      }									\
+    if (_fex & FP_EX_DIVZERO)						\
+      {									\
+	__asm__ __volatile__ ("fdiv\ts0, %s0, %s1"			\
+			      :						\
+			      : "w" (fp_one), "w" (fp_zero)		\
+			      : "s0");					\
+	__asm__ __volatile__ ("mrs\t%0, fpsr" : "=r" (fpsr));		\
+      }									\
+    if (_fex & FP_EX_OVERFLOW)						\
+      {									\
+        __asm__ __volatile__ ("fadd\ts0, %s0, %s1"			\
+			      :						\
+			      : "w" (fp_max), "w" (fp_1e32)		\
+			      : "s0");					\
+        __asm__ __volatile__ ("mrs\t%0, fpsr" : "=r" (fpsr));		\
+      }									\
+    if (_fex & FP_EX_UNDERFLOW)						\
+      {									\
+	__asm__ __volatile__ ("fmul\ts0, %s0, %s0"			\
+			      :						\
+			      : "w" (fp_min)				\
+			      : "s0");					\
+	__asm__ __volatile__ ("mrs\t%0, fpsr" : "=r" (fpsr));		\
+      }									\
+    if (_fex & FP_EX_INEXACT)						\
+      {									\
+	__asm__ __volatile__ ("fsub\ts0, %s0, %s1"			\
+			      :						\
+			      : "w" (fp_max), "w" (fp_one)		\
+			      : "s0");					\
+	__asm__ __volatile__ ("mrs\t%0, fpsr" : "=r" (fpsr));		\
+      }									\
+  } while (0)
+
+#define FP_TRAPPING_EXCEPTIONS ((_fcw >> FE_EXCEPT_SHIFT) & FE_ALL_EXCEPT)
diff --git a/sysdeps/aarch64/sotruss-lib.c b/sysdeps/aarch64/sotruss-lib.c
new file mode 100644
index 0000000000..2464c2a24f
--- /dev/null
+++ b/sysdeps/aarch64/sotruss-lib.c
@@ -0,0 +1,51 @@
+/* Override generic sotruss-lib.c to define actual functions for AArch64.
+   Copyright (C) 2012-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define HAVE_ARCH_PLTENTER
+#define HAVE_ARCH_PLTEXIT
+
+#include <elf/sotruss-lib.c>
+
+ElfW(Addr)
+la_aarch64_gnu_pltenter (ElfW(Sym) *sym __attribute__ ((unused)),
+			 unsigned int ndx __attribute__ ((unused)),
+			 uintptr_t *refcook, uintptr_t *defcook,
+			 La_aarch64_regs *regs, unsigned int *flags,
+			 const char *symname, long int *framesizep)
+{
+  print_enter (refcook, defcook, symname,
+	       regs->lr_xreg[0], regs->lr_xreg[1], regs->lr_xreg[2],
+	       *flags);
+
+  /* No need to copy anything, we will not need the parameters in any case.  */
+  *framesizep = 0;
+
+  return sym->st_value;
+}
+
+unsigned int
+la_aarch64_gnu_pltexit (ElfW(Sym) *sym, unsigned int ndx, uintptr_t *refcook,
+			uintptr_t *defcook,
+			const struct La_aarch64_regs *inregs,
+			struct La_aarch64_retval *outregs, const char *symname)
+{
+  print_exit (refcook, defcook, symname, outregs->lrv_xreg[0]);
+
+  return 0;
+}
diff --git a/sysdeps/aarch64/stackinfo.h b/sysdeps/aarch64/stackinfo.h
new file mode 100644
index 0000000000..c5f536f3dd
--- /dev/null
+++ b/sysdeps/aarch64/stackinfo.h
@@ -0,0 +1,33 @@
+/* Copyright (C) 2001-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This file contains a bit of information about the stack allocation
+   of the processor.  */
+
+#ifndef _STACKINFO_H
+#define _STACKINFO_H	1
+
+#include <elf.h>
+
+/* On AArch64 the stack grows down.  */
+#define _STACK_GROWS_DOWN	1
+
+/* Default to a non-executable stack. */
+#define DEFAULT_STACK_PERMS (PF_R|PF_W)
+
+#endif	/* stackinfo.h */
diff --git a/sysdeps/aarch64/start.S b/sysdeps/aarch64/start.S
new file mode 100644
index 0000000000..073b99d1ba
--- /dev/null
+++ b/sysdeps/aarch64/start.S
@@ -0,0 +1,92 @@
+/* Copyright (C) 1995-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This is the canonical entry point, usually the first thing in the text
+   segment.
+
+   Note that the code in the .init section has already been run.
+   This includes _init and _libc_init
+
+
+   At this entry point, most registers' values are unspecified, except:
+
+   x0		Contains a function pointer to be registered with `atexit'.
+		This is how the dynamic linker arranges to have DT_FINI
+		functions called for shared libraries that have been loaded
+		before this code runs.
+
+   sp		The stack contains the arguments and environment:
+		0(sp)			argc
+		8(sp)			argv[0]
+		...
+		(8*argc)(sp)		NULL
+		(8*(argc+1))(sp)	envp[0]
+		...
+					NULL
+ */
+
+	.text
+	.globl _start
+	.type _start,#function
+_start:
+	/* Create an initial frame with 0 LR and FP */
+	mov	x29, #0
+	mov	x30, #0
+
+	/* Setup rtld_fini in argument register */
+	mov	x5, x0
+
+	/* Load argc and a pointer to argv */
+	ldr	x1, [sp, #0]
+	add	x2, sp, #8
+
+	/* Setup stack limit in argument register */
+	mov	x6, sp
+
+#ifdef SHARED
+        adrp    x0, :got:main
+	ldr     x0, [x0, #:got_lo12:main]
+
+        adrp    x3, :got:__libc_csu_init
+	ldr     x3, [x3, #:got_lo12:__libc_csu_init]
+
+        adrp    x4, :got:__libc_csu_fini
+	ldr     x4, [x4, #:got_lo12:__libc_csu_fini]
+#else
+	/* Set up the other arguments in registers */
+	ldr	x0, =main
+	ldr	x3, =__libc_csu_init
+	ldr	x4, =__libc_csu_fini
+#endif
+
+	/* __libc_start_main (main, argc, argv, init, fini, rtld_fini,
+			      stack_end) */
+
+	/* Let the libc call main and exit with its return code.  */
+	bl	__libc_start_main
+
+	/* should never get here....*/
+	bl	abort
+
+	/* Define a symbol for the first piece of initialized data.  */
+	.data
+	.globl __data_start
+__data_start:
+	.long 0
+	.weak data_start
+	data_start = __data_start
diff --git a/sysdeps/aarch64/stpcpy.S b/sysdeps/aarch64/stpcpy.S
new file mode 100644
index 0000000000..6576a7560d
--- /dev/null
+++ b/sysdeps/aarch64/stpcpy.S
@@ -0,0 +1,20 @@
+/* stpcpy - copy a string returning pointer to end.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define BUILD_STPCPY
+#include "strcpy.S"
diff --git a/sysdeps/aarch64/strchr.S b/sysdeps/aarch64/strchr.S
new file mode 100644
index 0000000000..6fe5fa1d3d
--- /dev/null
+++ b/sysdeps/aarch64/strchr.S
@@ -0,0 +1,138 @@
+/* strchr - find a character in a string
+
+   Copyright (C) 2014-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64
+ */
+
+/* Arguments and results.  */
+#define srcin		x0
+#define chrin		w1
+
+#define result		x0
+
+#define src		x2
+#define tmp1		x3
+#define wtmp2		w4
+#define tmp3		x5
+
+#define vrepchr		v0
+#define vdata1		v1
+#define vdata2		v2
+#define vhas_nul1	v3
+#define vhas_nul2	v4
+#define vhas_chr1	v5
+#define vhas_chr2	v6
+#define vrepmask_0	v7
+#define vrepmask_c	v16
+#define vend1		v17
+#define vend2		v18
+
+	/* Core algorithm.
+	   For each 32-byte hunk we calculate a 64-bit syndrome value, with
+	   two bits per byte (LSB is always in bits 0 and 1, for both big
+	   and little-endian systems).  Bit 0 is set iff the relevant byte
+	   matched the requested character.  Bit 1 is set iff the
+	   relevant byte matched the NUL end of string (we trigger off bit0
+	   for the special case of looking for NUL).  Since the bits
+	   in the syndrome reflect exactly the order in which things occur
+	   in the original string a count_trailing_zeros() operation will
+	   identify exactly which byte is causing the termination, and why.  */
+
+/* Locals and temporaries.  */
+
+ENTRY (strchr)
+	mov	wtmp2, #0x0401
+	movk	wtmp2, #0x4010, lsl #16
+	dup	vrepchr.16b, chrin
+	bic	src, srcin, #31
+	dup	vrepmask_c.4s, wtmp2
+	ands	tmp1, srcin, #31
+	add	vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s // lsl #1
+	b.eq	L(loop)
+
+	/* Input string is not 32-byte aligned.  Rather than forcing
+	   the padding bytes to a safe value, we calculate the syndrome
+	   for all the bytes, but then mask off those bits of the
+	   syndrome that are related to the padding.  */
+	ld1	{vdata1.16b, vdata2.16b}, [src], #32
+	neg	tmp1, tmp1
+	cmeq	vhas_nul1.16b, vdata1.16b, #0
+	cmeq	vhas_chr1.16b, vdata1.16b, vrepchr.16b
+	cmeq	vhas_nul2.16b, vdata2.16b, #0
+	cmeq	vhas_chr2.16b, vdata2.16b, vrepchr.16b
+	and	vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b
+	and	vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b
+	and	vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b
+	and	vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b
+	orr	vend1.16b, vhas_nul1.16b, vhas_chr1.16b
+	orr	vend2.16b, vhas_nul2.16b, vhas_chr2.16b
+	lsl	tmp1, tmp1, #1
+	addp	vend1.16b, vend1.16b, vend2.16b		// 256->128
+	mov	tmp3, #~0
+	addp	vend1.16b, vend1.16b, vend2.16b		// 128->64
+	lsr	tmp1, tmp3, tmp1
+
+	mov	tmp3, vend1.2d[0]
+	bic	tmp1, tmp3, tmp1	// Mask padding bits.
+	cbnz	tmp1, L(tail)
+
+L(loop):
+	ld1	{vdata1.16b, vdata2.16b}, [src], #32
+	cmeq	vhas_nul1.16b, vdata1.16b, #0
+	cmeq	vhas_chr1.16b, vdata1.16b, vrepchr.16b
+	cmeq	vhas_nul2.16b, vdata2.16b, #0
+	cmeq	vhas_chr2.16b, vdata2.16b, vrepchr.16b
+	/* Use a fast check for the termination condition.  */
+	orr	vend1.16b, vhas_nul1.16b, vhas_chr1.16b
+	orr	vend2.16b, vhas_nul2.16b, vhas_chr2.16b
+	orr	vend1.16b, vend1.16b, vend2.16b
+	addp	vend1.2d, vend1.2d, vend1.2d
+	mov	tmp1, vend1.2d[0]
+	cbz	tmp1, L(loop)
+
+	/* Termination condition found.  Now need to establish exactly why
+	   we terminated.  */
+	and	vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b
+	and	vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b
+	and	vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b
+	and	vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b
+	orr	vend1.16b, vhas_nul1.16b, vhas_chr1.16b
+	orr	vend2.16b, vhas_nul2.16b, vhas_chr2.16b
+	addp	vend1.16b, vend1.16b, vend2.16b		// 256->128
+	addp	vend1.16b, vend1.16b, vend2.16b		// 128->64
+
+	mov	tmp1, vend1.2d[0]
+L(tail):
+	sub	src, src, #32
+	rbit	tmp1, tmp1
+	clz	tmp1, tmp1
+	/* Tmp1 is even if the target charager was found first.  Otherwise
+	   we've found the end of string and we weren't looking for NUL.  */
+	tst	tmp1, #1
+	add	result, src, tmp1, lsr #1
+	csel	result, result, xzr, eq
+	ret
+END (strchr)
+libc_hidden_builtin_def (strchr)
+weak_alias (strchr, index)
diff --git a/sysdeps/aarch64/strchrnul.S b/sysdeps/aarch64/strchrnul.S
new file mode 100644
index 0000000000..8d2894719a
--- /dev/null
+++ b/sysdeps/aarch64/strchrnul.S
@@ -0,0 +1,130 @@
+/* strchrnul - find a character or nul in a string
+
+   Copyright (C) 2014-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64
+ * Neon Available.
+ */
+
+/* Arguments and results.  */
+#define srcin		x0
+#define chrin		w1
+
+#define result		x0
+
+/* Locals and temporaries.  */
+
+#define src		x2
+#define tmp1		x3
+#define wtmp2		w4
+#define tmp3		x5
+
+#define vrepchr		v0
+#define vdata1		v1
+#define vdata2		v2
+#define vhas_nul1	v3
+#define vhas_nul2	v4
+#define vhas_chr1	v5
+#define vhas_chr2	v6
+#define vrepmask	v7
+#define vend1		v16
+
+/* Core algorithm.
+
+   For each 32-byte hunk we calculate a 64-bit syndrome value, with
+   two bits per byte (LSB is always in bits 0 and 1, for both big
+   and little-endian systems).  For each tuple, bit 0 is set iff
+   the relevant byte matched the requested character or nul.  Since the
+   bits in the syndrome reflect exactly the order in which things occur
+   in the original string a count_trailing_zeros() operation will
+   identify exactly which byte is causing the termination.  */
+
+ENTRY (__strchrnul)
+	/* Magic constant 0x40100401 to allow us to identify which lane
+	   matches the termination condition.  */
+	mov	wtmp2, #0x0401
+	movk	wtmp2, #0x4010, lsl #16
+	dup	vrepchr.16b, chrin
+	bic	src, srcin, #31		/* Work with aligned 32-byte hunks.  */
+	dup	vrepmask.4s, wtmp2
+	ands	tmp1, srcin, #31
+	b.eq	L(loop)
+
+	/* Input string is not 32-byte aligned.  Rather than forcing
+	   the padding bytes to a safe value, we calculate the syndrome
+	   for all the bytes, but then mask off those bits of the
+	   syndrome that are related to the padding.  */
+	ld1	{vdata1.16b, vdata2.16b}, [src], #32
+	neg	tmp1, tmp1
+	cmeq	vhas_nul1.16b, vdata1.16b, #0
+	cmeq	vhas_chr1.16b, vdata1.16b, vrepchr.16b
+	cmeq	vhas_nul2.16b, vdata2.16b, #0
+	cmeq	vhas_chr2.16b, vdata2.16b, vrepchr.16b
+	orr	vhas_chr1.16b, vhas_chr1.16b, vhas_nul1.16b
+	orr	vhas_chr2.16b, vhas_chr2.16b, vhas_nul2.16b
+	and	vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
+	and	vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
+	lsl	tmp1, tmp1, #1
+	addp	vend1.16b, vhas_chr1.16b, vhas_chr2.16b	// 256->128
+	mov	tmp3, #~0
+	addp	vend1.16b, vend1.16b, vend1.16b		// 128->64
+	lsr	tmp1, tmp3, tmp1
+
+	mov	tmp3, vend1.2d[0]
+	bic	tmp1, tmp3, tmp1	// Mask padding bits.
+	cbnz	tmp1, L(tail)
+
+L(loop):
+	ld1	{vdata1.16b, vdata2.16b}, [src], #32
+	cmeq	vhas_nul1.16b, vdata1.16b, #0
+	cmeq	vhas_chr1.16b, vdata1.16b, vrepchr.16b
+	cmeq	vhas_nul2.16b, vdata2.16b, #0
+	cmeq	vhas_chr2.16b, vdata2.16b, vrepchr.16b
+	/* Use a fast check for the termination condition.  */
+	orr	vhas_chr1.16b, vhas_nul1.16b, vhas_chr1.16b
+	orr	vhas_chr2.16b, vhas_nul2.16b, vhas_chr2.16b
+	orr	vend1.16b, vhas_chr1.16b, vhas_chr2.16b
+	addp	vend1.2d, vend1.2d, vend1.2d
+	mov	tmp1, vend1.2d[0]
+	cbz	tmp1, L(loop)
+
+	/* Termination condition found.  Now need to establish exactly why
+	   we terminated.  */
+	and	vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
+	and	vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
+	addp	vend1.16b, vhas_chr1.16b, vhas_chr2.16b		// 256->128
+	addp	vend1.16b, vend1.16b, vend1.16b		// 128->64
+
+	mov	tmp1, vend1.2d[0]
+L(tail):
+	/* Count the trailing zeros, by bit reversing...  */
+	rbit	tmp1, tmp1
+	/* Re-bias source.  */
+	sub	src, src, #32
+	clz	tmp1, tmp1	/* ... and counting the leading zeros.  */
+	/* tmp1 is twice the offset into the fragment.  */
+	add	result, src, tmp1, lsr #1
+	ret
+
+END(__strchrnul)
+weak_alias (__strchrnul, strchrnul)
diff --git a/sysdeps/aarch64/strcmp.S b/sysdeps/aarch64/strcmp.S
new file mode 100644
index 0000000000..63701b7b66
--- /dev/null
+++ b/sysdeps/aarch64/strcmp.S
@@ -0,0 +1,155 @@
+/* Copyright (C) 2012-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64
+ */
+
+#include <sysdep.h>
+
+#define REP8_01 0x0101010101010101
+#define REP8_7f 0x7f7f7f7f7f7f7f7f
+#define REP8_80 0x8080808080808080
+
+/* Parameters and result.  */
+#define src1		x0
+#define src2		x1
+#define result		x0
+
+/* Internal variables.  */
+#define data1		x2
+#define data1w		w2
+#define data2		x3
+#define data2w		w3
+#define has_nul		x4
+#define diff		x5
+#define syndrome	x6
+#define tmp1		x7
+#define tmp2		x8
+#define tmp3		x9
+#define zeroones	x10
+#define pos		x11
+
+	/* Start of performance-critical section  -- one 64B cache line.  */
+ENTRY_ALIGN(strcmp, 6)
+
+	eor	tmp1, src1, src2
+	mov	zeroones, #REP8_01
+	tst	tmp1, #7
+	b.ne	L(misaligned8)
+	ands	tmp1, src1, #7
+	b.ne	L(mutual_align)
+	/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
+	   (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
+	   can be done in parallel across the entire word.  */
+L(loop_aligned):
+	ldr	data1, [src1], #8
+	ldr	data2, [src2], #8
+L(start_realigned):
+	sub	tmp1, data1, zeroones
+	orr	tmp2, data1, #REP8_7f
+	eor	diff, data1, data2	/* Non-zero if differences found.  */
+	bic	has_nul, tmp1, tmp2	/* Non-zero if NUL terminator.  */
+	orr	syndrome, diff, has_nul
+	cbz	syndrome, L(loop_aligned)
+	/* End of performance-critical section  -- one 64B cache line.  */
+
+#ifndef	__AARCH64EB__
+	rev	syndrome, syndrome
+	rev	data1, data1
+	/* The MS-non-zero bit of the syndrome marks either the first bit
+	   that is different, or the top bit of the first zero byte.
+	   Shifting left now will bring the critical information into the
+	   top bits.  */
+	clz	pos, syndrome
+	rev	data2, data2
+	lsl	data1, data1, pos
+	lsl	data2, data2, pos
+	/* But we need to zero-extend (char is unsigned) the value and then
+	   perform a signed 32-bit subtraction.  */
+	lsr	data1, data1, #56
+	sub	result, data1, data2, lsr #56
+	RET
+#else
+	/* For big-endian we cannot use the trick with the syndrome value
+	   as carry-propagation can corrupt the upper bits if the trailing
+	   bytes in the string contain 0x01.  */
+	/* However, if there is no NUL byte in the dword, we can generate
+	   the result directly.  We can't just subtract the bytes as the
+	   MSB might be significant.  */
+	cbnz	has_nul, 1f
+	cmp	data1, data2
+	cset	result, ne
+	cneg	result, result, lo
+	RET
+1:
+	/* Re-compute the NUL-byte detection, using a byte-reversed value.  */
+	rev	tmp3, data1
+	sub	tmp1, tmp3, zeroones
+	orr	tmp2, tmp3, #REP8_7f
+	bic	has_nul, tmp1, tmp2
+	rev	has_nul, has_nul
+	orr	syndrome, diff, has_nul
+	clz	pos, syndrome
+	/* The MS-non-zero bit of the syndrome marks either the first bit
+	   that is different, or the top bit of the first zero byte.
+	   Shifting left now will bring the critical information into the
+	   top bits.  */
+	lsl	data1, data1, pos
+	lsl	data2, data2, pos
+	/* But we need to zero-extend (char is unsigned) the value and then
+	   perform a signed 32-bit subtraction.  */
+	lsr	data1, data1, #56
+	sub	result, data1, data2, lsr #56
+	RET
+#endif
+
+L(mutual_align):
+	/* Sources are mutually aligned, but are not currently at an
+	   alignment boundary.  Round down the addresses and then mask off
+	   the bytes that preceed the start point.  */
+	bic	src1, src1, #7
+	bic	src2, src2, #7
+	lsl	tmp1, tmp1, #3		/* Bytes beyond alignment -> bits.  */
+	ldr	data1, [src1], #8
+	neg	tmp1, tmp1		/* Bits to alignment -64.  */
+	ldr	data2, [src2], #8
+	mov	tmp2, #~0
+#ifdef __AARCH64EB__
+	/* Big-endian.  Early bytes are at MSB.  */
+	lsl	tmp2, tmp2, tmp1	/* Shift (tmp1 & 63).  */
+#else
+	/* Little-endian.  Early bytes are at LSB.  */
+	lsr	tmp2, tmp2, tmp1	/* Shift (tmp1 & 63).  */
+#endif
+	orr	data1, data1, tmp2
+	orr	data2, data2, tmp2
+	b	L(start_realigned)
+
+L(misaligned8):
+	/* We can do better than this.  */
+	ldrb	data1w, [src1], #1
+	ldrb	data2w, [src2], #1
+	cmp	data1w, #1
+	ccmp	data1w, data2w, #0, cs	/* NZCV = 0b0000.  */
+	b.eq	L(misaligned8)
+	sub	result, data1, data2
+	RET
+END(strcmp)
+libc_hidden_builtin_def (strcmp)
diff --git a/sysdeps/aarch64/strcpy.S b/sysdeps/aarch64/strcpy.S
new file mode 100644
index 0000000000..28846fbf0e
--- /dev/null
+++ b/sysdeps/aarch64/strcpy.S
@@ -0,0 +1,326 @@
+/* strcpy/stpcpy - copy a string returning pointer to start/end.
+   Copyright (C) 2013-2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* To build as stpcpy, define BUILD_STPCPY before compiling this file.
+
+   To test the page crossing code path more thoroughly, compile with
+   -DSTRCPY_TEST_PAGE_CROSS - this will force all unaligned copies through
+   the slower entry path.  This option is not intended for production use.  */
+
+#include <sysdep.h>
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64, unaligned accesses, min page size 4k.
+ */
+
+/* Arguments and results.  */
+#define dstin		x0
+#define srcin		x1
+
+/* Locals and temporaries.  */
+#define src		x2
+#define dst		x3
+#define data1		x4
+#define data1w		w4
+#define data2		x5
+#define data2w		w5
+#define has_nul1	x6
+#define has_nul2	x7
+#define tmp1		x8
+#define tmp2		x9
+#define tmp3		x10
+#define tmp4		x11
+#define zeroones	x12
+#define data1a		x13
+#define data2a		x14
+#define pos		x15
+#define len		x16
+#define to_align	x17
+
+#ifdef BUILD_STPCPY
+#define STRCPY __stpcpy
+#else
+#define STRCPY strcpy
+#endif
+
+	/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
+	   (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
+	   can be done in parallel across the entire word.  */
+
+#define REP8_01 0x0101010101010101
+#define REP8_7f 0x7f7f7f7f7f7f7f7f
+#define REP8_80 0x8080808080808080
+
+	/* AArch64 systems have a minimum page size of 4k.  We can do a quick
+	   page size check for crossing this boundary on entry and if we
+	   do not, then we can short-circuit much of the entry code.  We
+	   expect early page-crossing strings to be rare (probability of
+	   16/MIN_PAGE_SIZE ~= 0.4%), so the branch should be quite
+	   predictable, even with random strings.
+
+	   We don't bother checking for larger page sizes, the cost of setting
+	   up the correct page size is just not worth the extra gain from
+	   a small reduction in the cases taking the slow path.  Note that
+	   we only care about whether the first fetch, which may be
+	   misaligned, crosses a page boundary - after that we move to aligned
+	   fetches for the remainder of the string.  */
+
+#ifdef STRCPY_TEST_PAGE_CROSS
+	/* Make everything that isn't Qword aligned look like a page cross.  */
+#define MIN_PAGE_P2 4
+#else
+#define MIN_PAGE_P2 12
+#endif
+
+#define MIN_PAGE_SIZE (1 << MIN_PAGE_P2)
+
+ENTRY_ALIGN (STRCPY, 6)
+	/* For moderately short strings, the fastest way to do the copy is to
+	   calculate the length of the string in the same way as strlen, then
+	   essentially do a memcpy of the result.  This avoids the need for
+	   multiple byte copies and further means that by the time we
+	   reach the bulk copy loop we know we can always use DWord
+	   accesses.  We expect strcpy to rarely be called repeatedly
+	   with the same source string, so branch prediction is likely to
+	   always be difficult - we mitigate against this by preferring
+	   conditional select operations over branches whenever this is
+	   feasible.  */
+	and	tmp2, srcin, #(MIN_PAGE_SIZE - 1)
+	mov	zeroones, #REP8_01
+	and	to_align, srcin, #15
+	cmp	tmp2, #(MIN_PAGE_SIZE - 16)
+	neg	tmp1, to_align
+	/* The first fetch will straddle a (possible) page boundary iff
+	   srcin + 15 causes bit[MIN_PAGE_P2] to change value.  A 16-byte
+	   aligned string will never fail the page align check, so will
+	   always take the fast path.  */
+	b.gt	L(page_cross)
+
+L(page_cross_ok):
+	ldp	data1, data2, [srcin]
+#ifdef __AARCH64EB__
+	/* Because we expect the end to be found within 16 characters
+	   (profiling shows this is the most common case), it's worth
+	   swapping the bytes now to save having to recalculate the
+	   termination syndrome later.  We preserve data1 and data2
+	   so that we can re-use the values later on.  */
+	rev	tmp2, data1
+	sub	tmp1, tmp2, zeroones
+	orr	tmp2, tmp2, #REP8_7f
+	bics	has_nul1, tmp1, tmp2
+	b.ne	L(fp_le8)
+	rev	tmp4, data2
+	sub	tmp3, tmp4, zeroones
+	orr	tmp4, tmp4, #REP8_7f
+#else
+	sub	tmp1, data1, zeroones
+	orr	tmp2, data1, #REP8_7f
+	bics	has_nul1, tmp1, tmp2
+	b.ne	L(fp_le8)
+	sub	tmp3, data2, zeroones
+	orr	tmp4, data2, #REP8_7f
+#endif
+	bics	has_nul2, tmp3, tmp4
+	b.eq	L(bulk_entry)
+
+	/* The string is short (<=16 bytes).  We don't know exactly how
+	   short though, yet.  Work out the exact length so that we can
+	   quickly select the optimal copy strategy.  */
+L(fp_gt8):
+	rev	has_nul2, has_nul2
+	clz	pos, has_nul2
+	mov	tmp2, #56
+	add	dst, dstin, pos, lsr #3		/* Bits to bytes.  */
+	sub	pos, tmp2, pos
+#ifdef __AARCH64EB__
+	lsr	data2, data2, pos
+#else
+	lsl	data2, data2, pos
+#endif
+	str	data2, [dst, #1]
+	str	data1, [dstin]
+#ifdef BUILD_STPCPY
+	add	dstin, dst, #8
+#endif
+	ret
+
+L(fp_le8):
+	rev	has_nul1, has_nul1
+	clz	pos, has_nul1
+	add	dst, dstin, pos, lsr #3		/* Bits to bytes.  */
+	subs	tmp2, pos, #24			/* Pos in bits. */
+	b.lt	L(fp_lt4)
+#ifdef __AARCH64EB__
+	mov	tmp2, #56
+	sub	pos, tmp2, pos
+	lsr	data2, data1, pos
+	lsr	data1, data1, #32
+#else
+	lsr	data2, data1, tmp2
+#endif
+	/* 4->7 bytes to copy.  */
+	str	data2w, [dst, #-3]
+	str	data1w, [dstin]
+#ifdef BUILD_STPCPY
+	mov	dstin, dst
+#endif
+	ret
+L(fp_lt4):
+	cbz	pos, L(fp_lt2)
+	/* 2->3 bytes to copy.  */
+#ifdef __AARCH64EB__
+	lsr	data1, data1, #48
+#endif
+	strh	data1w, [dstin]
+	/* Fall-through, one byte (max) to go.  */
+L(fp_lt2):
+	/* Null-terminated string.  Last character must be zero!  */
+	strb	wzr, [dst]
+#ifdef BUILD_STPCPY
+	mov	dstin, dst
+#endif
+	ret
+
+	.p2align 6
+	/* Aligning here ensures that the entry code and main loop all lies
+	   within one 64-byte cache line.  */
+L(bulk_entry):
+	sub	to_align, to_align, #16
+	stp	data1, data2, [dstin]
+	sub	src, srcin, to_align
+	sub	dst, dstin, to_align
+	b	L(entry_no_page_cross)
+
+	/* The inner loop deals with two Dwords at a time.  This has a
+	   slightly higher start-up cost, but we should win quite quickly,
+	   especially on cores with a high number of issue slots per
+	   cycle, as we get much better parallelism out of the operations.  */
+L(main_loop):
+	stp	data1, data2, [dst], #16
+L(entry_no_page_cross):
+	ldp	data1, data2, [src], #16
+	sub	tmp1, data1, zeroones
+	orr	tmp2, data1, #REP8_7f
+	sub	tmp3, data2, zeroones
+	orr	tmp4, data2, #REP8_7f
+	bic	has_nul1, tmp1, tmp2
+	bics	has_nul2, tmp3, tmp4
+	ccmp	has_nul1, #0, #0, eq	/* NZCV = 0000  */
+	b.eq	L(main_loop)
+
+	/* Since we know we are copying at least 16 bytes, the fastest way
+	   to deal with the tail is to determine the location of the
+	   trailing NUL, then (re)copy the 16 bytes leading up to that.  */
+	cmp	has_nul1, #0
+#ifdef __AARCH64EB__
+	/* For big-endian, carry propagation (if the final byte in the
+	   string is 0x01) means we cannot use has_nul directly.  The
+	   easiest way to get the correct byte is to byte-swap the data
+	   and calculate the syndrome a second time.  */
+	csel	data1, data1, data2, ne
+	rev	data1, data1
+	sub	tmp1, data1, zeroones
+	orr	tmp2, data1, #REP8_7f
+	bic	has_nul1, tmp1, tmp2
+#else
+	csel	has_nul1, has_nul1, has_nul2, ne
+#endif
+	rev	has_nul1, has_nul1
+	clz	pos, has_nul1
+	add	tmp1, pos, #72
+	add	pos, pos, #8
+	csel	pos, pos, tmp1, ne
+	add	src, src, pos, lsr #3
+	add	dst, dst, pos, lsr #3
+	ldp	data1, data2, [src, #-32]
+	stp	data1, data2, [dst, #-16]
+#ifdef BUILD_STPCPY
+	sub	dstin, dst, #1
+#endif
+	ret
+
+L(page_cross):
+	bic	src, srcin, #15
+	/* Start by loading two words at [srcin & ~15], then forcing the
+	   bytes that precede srcin to 0xff.  This means they never look
+	   like termination bytes.  */
+	ldp	data1, data2, [src]
+	lsl	tmp1, tmp1, #3	/* Bytes beyond alignment -> bits.  */
+	tst	to_align, #7
+	csetm	tmp2, ne
+#ifdef __AARCH64EB__
+	lsl	tmp2, tmp2, tmp1	/* Shift (tmp1 & 63).  */
+#else
+	lsr	tmp2, tmp2, tmp1	/* Shift (tmp1 & 63).  */
+#endif
+	orr	data1, data1, tmp2
+	orr	data2a, data2, tmp2
+	cmp	to_align, #8
+	csinv	data1, data1, xzr, lt
+	csel	data2, data2, data2a, lt
+	sub	tmp1, data1, zeroones
+	orr	tmp2, data1, #REP8_7f
+	sub	tmp3, data2, zeroones
+	orr	tmp4, data2, #REP8_7f
+	bic	has_nul1, tmp1, tmp2
+	bics	has_nul2, tmp3, tmp4
+	ccmp	has_nul1, #0, #0, eq	/* NZCV = 0000  */
+	b.eq	L(page_cross_ok)
+	/* We now need to make data1 and data2 look like they've been
+	   loaded directly from srcin.  Do a rotate on the 128-bit value.  */
+	lsl	tmp1, to_align, #3	/* Bytes->bits.  */
+	neg	tmp2, to_align, lsl #3
+#ifdef __AARCH64EB__
+	lsl	data1a, data1, tmp1
+	lsr	tmp4, data2, tmp2
+	lsl	data2, data2, tmp1
+	orr	tmp4, tmp4, data1a
+	cmp	to_align, #8
+	csel	data1, tmp4, data2, lt
+	rev	tmp2, data1
+	rev	tmp4, data2
+	sub	tmp1, tmp2, zeroones
+	orr	tmp2, tmp2, #REP8_7f
+	sub	tmp3, tmp4, zeroones
+	orr	tmp4, tmp4, #REP8_7f
+#else
+	lsr	data1a, data1, tmp1
+	lsl	tmp4, data2, tmp2
+	lsr	data2, data2, tmp1
+	orr	tmp4, tmp4, data1a
+	cmp	to_align, #8
+	csel	data1, tmp4, data2, lt
+	sub	tmp1, data1, zeroones
+	orr	tmp2, data1, #REP8_7f
+	sub	tmp3, data2, zeroones
+	orr	tmp4, data2, #REP8_7f
+#endif
+	bic	has_nul1, tmp1, tmp2
+	cbnz	has_nul1, L(fp_le8)
+	bic	has_nul2, tmp3, tmp4
+	b	L(fp_gt8)
+END (STRCPY)
+
+#ifdef BUILD_STPCPY
+weak_alias (__stpcpy, stpcpy)
+libc_hidden_def (__stpcpy)
+libc_hidden_builtin_def (stpcpy)
+#else
+libc_hidden_builtin_def (strcpy)
+#endif
diff --git a/sysdeps/aarch64/strlen.S b/sysdeps/aarch64/strlen.S
new file mode 100644
index 0000000000..feb9e48abc
--- /dev/null
+++ b/sysdeps/aarch64/strlen.S
@@ -0,0 +1,217 @@
+/* Copyright (C) 2012-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64, unaligned accesses, min page size 4k.
+ */
+
+/* To test the page crossing code path more thoroughly, compile with
+   -DTEST_PAGE_CROSS - this will force all calls through the slower
+   entry path.  This option is not intended for production use.  */
+
+/* Arguments and results.  */
+#define srcin		x0
+#define len		x0
+
+/* Locals and temporaries.  */
+#define src		x1
+#define data1		x2
+#define data2		x3
+#define has_nul1	x4
+#define has_nul2	x5
+#define tmp1		x4
+#define tmp2		x5
+#define tmp3		x6
+#define tmp4		x7
+#define zeroones	x8
+
+	/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
+	   (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
+	   can be done in parallel across the entire word. A faster check
+	   (X - 1) & 0x80 is zero for non-NUL ASCII characters, but gives
+	   false hits for characters 129..255.	*/
+
+#define REP8_01 0x0101010101010101
+#define REP8_7f 0x7f7f7f7f7f7f7f7f
+#define REP8_80 0x8080808080808080
+
+#ifdef TEST_PAGE_CROSS
+# define MIN_PAGE_SIZE 15
+#else
+# define MIN_PAGE_SIZE 4096
+#endif
+
+	/* Since strings are short on average, we check the first 16 bytes
+	   of the string for a NUL character.  In order to do an unaligned ldp
+	   safely we have to do a page cross check first.  If there is a NUL
+	   byte we calculate the length from the 2 8-byte words using
+	   conditional select to reduce branch mispredictions (it is unlikely
+	   strlen will be repeatedly called on strings with the same length).
+
+	   If the string is longer than 16 bytes, we align src so don't need
+	   further page cross checks, and process 32 bytes per iteration
+	   using the fast NUL check.  If we encounter non-ASCII characters,
+	   fallback to a second loop using the full NUL check.
+
+	   If the page cross check fails, we read 16 bytes from an aligned
+	   address, remove any characters before the string, and continue
+	   in the main loop using aligned loads.  Since strings crossing a
+	   page in the first 16 bytes are rare (probability of
+	   16/MIN_PAGE_SIZE ~= 0.4%), this case does not need to be optimized.
+
+	   AArch64 systems have a minimum page size of 4k.  We don't bother
+	   checking for larger page sizes - the cost of setting up the correct
+	   page size is just not worth the extra gain from a small reduction in
+	   the cases taking the slow path.  Note that we only care about
+	   whether the first fetch, which may be misaligned, crosses a page
+	   boundary.  */
+
+ENTRY_ALIGN (strlen, 6)
+	and	tmp1, srcin, MIN_PAGE_SIZE - 1
+	mov	zeroones, REP8_01
+	cmp	tmp1, MIN_PAGE_SIZE - 16
+	b.gt	L(page_cross)
+	ldp	data1, data2, [srcin]
+#ifdef __AARCH64EB__
+	/* For big-endian, carry propagation (if the final byte in the
+	   string is 0x01) means we cannot use has_nul1/2 directly.
+	   Since we expect strings to be small and early-exit,
+	   byte-swap the data now so has_null1/2 will be correct.  */
+	rev	data1, data1
+	rev	data2, data2
+#endif
+	sub	tmp1, data1, zeroones
+	orr	tmp2, data1, REP8_7f
+	sub	tmp3, data2, zeroones
+	orr	tmp4, data2, REP8_7f
+	bics	has_nul1, tmp1, tmp2
+	bic	has_nul2, tmp3, tmp4
+	ccmp	has_nul2, 0, 0, eq
+	beq	L(main_loop_entry)
+
+	/* Enter with C = has_nul1 == 0.  */
+	csel	has_nul1, has_nul1, has_nul2, cc
+	mov	len, 8
+	rev	has_nul1, has_nul1
+	clz	tmp1, has_nul1
+	csel	len, xzr, len, cc
+	add	len, len, tmp1, lsr 3
+	ret
+
+	/* The inner loop processes 32 bytes per iteration and uses the fast
+	   NUL check.  If we encounter non-ASCII characters, use a second
+	   loop with the accurate NUL check.  */
+	.p2align 4
+L(main_loop_entry):
+	bic	src, srcin, 15
+	sub	src, src, 16
+L(main_loop):
+	ldp	data1, data2, [src, 32]!
+L(page_cross_entry):
+	sub	tmp1, data1, zeroones
+	sub	tmp3, data2, zeroones
+	orr	tmp2, tmp1, tmp3
+	tst	tmp2, zeroones, lsl 7
+	bne	1f
+	ldp	data1, data2, [src, 16]
+	sub	tmp1, data1, zeroones
+	sub	tmp3, data2, zeroones
+	orr	tmp2, tmp1, tmp3
+	tst	tmp2, zeroones, lsl 7
+	beq	L(main_loop)
+	add	src, src, 16
+1:
+	/* The fast check failed, so do the slower, accurate NUL check.	 */
+	orr	tmp2, data1, REP8_7f
+	orr	tmp4, data2, REP8_7f
+	bics	has_nul1, tmp1, tmp2
+	bic	has_nul2, tmp3, tmp4
+	ccmp	has_nul2, 0, 0, eq
+	beq	L(nonascii_loop)
+
+	/* Enter with C = has_nul1 == 0.  */
+L(tail):
+#ifdef __AARCH64EB__
+	/* For big-endian, carry propagation (if the final byte in the
+	   string is 0x01) means we cannot use has_nul1/2 directly.  The
+	   easiest way to get the correct byte is to byte-swap the data
+	   and calculate the syndrome a second time.  */
+	csel	data1, data1, data2, cc
+	rev	data1, data1
+	sub	tmp1, data1, zeroones
+	orr	tmp2, data1, REP8_7f
+	bic	has_nul1, tmp1, tmp2
+#else
+	csel	has_nul1, has_nul1, has_nul2, cc
+#endif
+	sub	len, src, srcin
+	rev	has_nul1, has_nul1
+	add	tmp2, len, 8
+	clz	tmp1, has_nul1
+	csel	len, len, tmp2, cc
+	add	len, len, tmp1, lsr 3
+	ret
+
+L(nonascii_loop):
+	ldp	data1, data2, [src, 16]!
+	sub	tmp1, data1, zeroones
+	orr	tmp2, data1, REP8_7f
+	sub	tmp3, data2, zeroones
+	orr	tmp4, data2, REP8_7f
+	bics	has_nul1, tmp1, tmp2
+	bic	has_nul2, tmp3, tmp4
+	ccmp	has_nul2, 0, 0, eq
+	bne	L(tail)
+	ldp	data1, data2, [src, 16]!
+	sub	tmp1, data1, zeroones
+	orr	tmp2, data1, REP8_7f
+	sub	tmp3, data2, zeroones
+	orr	tmp4, data2, REP8_7f
+	bics	has_nul1, tmp1, tmp2
+	bic	has_nul2, tmp3, tmp4
+	ccmp	has_nul2, 0, 0, eq
+	beq	L(nonascii_loop)
+	b	L(tail)
+
+	/* Load 16 bytes from [srcin & ~15] and force the bytes that precede
+	   srcin to 0x7f, so we ignore any NUL bytes before the string.
+	   Then continue in the aligned loop.  */
+L(page_cross):
+	bic	src, srcin, 15
+	ldp	data1, data2, [src]
+	lsl	tmp1, srcin, 3
+	mov	tmp4, -1
+#ifdef __AARCH64EB__
+	/* Big-endian.	Early bytes are at MSB.	 */
+	lsr	tmp1, tmp4, tmp1	/* Shift (tmp1 & 63).  */
+#else
+	/* Little-endian.  Early bytes are at LSB.  */
+	lsl	tmp1, tmp4, tmp1	/* Shift (tmp1 & 63).  */
+#endif
+	orr	tmp1, tmp1, REP8_80
+	orn	data1, data1, tmp1
+	orn	tmp2, data2, tmp1
+	tst	srcin, 8
+	csel	data1, data1, tmp4, eq
+	csel	data2, data2, tmp2, eq
+	b	L(page_cross_entry)
+END (strlen)
+libc_hidden_builtin_def (strlen)
diff --git a/sysdeps/aarch64/strncmp.S b/sysdeps/aarch64/strncmp.S
new file mode 100644
index 0000000000..483b6fdbca
--- /dev/null
+++ b/sysdeps/aarch64/strncmp.S
@@ -0,0 +1,204 @@
+/* Copyright (C) 2013-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64
+ */
+
+#define REP8_01 0x0101010101010101
+#define REP8_7f 0x7f7f7f7f7f7f7f7f
+#define REP8_80 0x8080808080808080
+
+/* Parameters and result.  */
+#define src1		x0
+#define src2		x1
+#define limit		x2
+#define result		x0
+
+/* Internal variables.  */
+#define data1		x3
+#define data1w		w3
+#define data2		x4
+#define data2w		w4
+#define has_nul		x5
+#define diff		x6
+#define syndrome	x7
+#define tmp1		x8
+#define tmp2		x9
+#define tmp3		x10
+#define zeroones	x11
+#define pos		x12
+#define limit_wd	x13
+#define mask		x14
+#define endloop		x15
+
+ENTRY_ALIGN_AND_PAD (strncmp, 6, 7)
+	cbz	limit, L(ret0)
+	eor	tmp1, src1, src2
+	mov	zeroones, #REP8_01
+	tst	tmp1, #7
+	b.ne	L(misaligned8)
+	ands	tmp1, src1, #7
+	b.ne	L(mutual_align)
+	/* Calculate the number of full and partial words -1.  */
+	sub	limit_wd, limit, #1	/* limit != 0, so no underflow.  */
+	lsr	limit_wd, limit_wd, #3	/* Convert to Dwords.  */
+
+	/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
+	   (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
+	   can be done in parallel across the entire word.  */
+	/* Start of performance-critical section  -- one 64B cache line.  */
+L(loop_aligned):
+	ldr	data1, [src1], #8
+	ldr	data2, [src2], #8
+L(start_realigned):
+	subs	limit_wd, limit_wd, #1
+	sub	tmp1, data1, zeroones
+	orr	tmp2, data1, #REP8_7f
+	eor	diff, data1, data2	/* Non-zero if differences found.  */
+	csinv	endloop, diff, xzr, pl	/* Last Dword or differences.  */
+	bics	has_nul, tmp1, tmp2	/* Non-zero if NUL terminator.  */
+	ccmp	endloop, #0, #0, eq
+	b.eq	L(loop_aligned)
+	/* End of performance-critical section  -- one 64B cache line.  */
+
+	/* Not reached the limit, must have found the end or a diff.  */
+	tbz	limit_wd, #63, L(not_limit)
+
+	/* Limit % 8 == 0 => all bytes significant.  */
+	ands	limit, limit, #7
+	b.eq	L(not_limit)
+
+	lsl	limit, limit, #3	/* Bits -> bytes.  */
+	mov	mask, #~0
+#ifdef __AARCH64EB__
+	lsr	mask, mask, limit
+#else
+	lsl	mask, mask, limit
+#endif
+	bic	data1, data1, mask
+	bic	data2, data2, mask
+
+	/* Make sure that the NUL byte is marked in the syndrome.  */
+	orr	has_nul, has_nul, mask
+
+L(not_limit):
+	orr	syndrome, diff, has_nul
+
+#ifndef	__AARCH64EB__
+	rev	syndrome, syndrome
+	rev	data1, data1
+	/* The MS-non-zero bit of the syndrome marks either the first bit
+	   that is different, or the top bit of the first zero byte.
+	   Shifting left now will bring the critical information into the
+	   top bits.  */
+	clz	pos, syndrome
+	rev	data2, data2
+	lsl	data1, data1, pos
+	lsl	data2, data2, pos
+	/* But we need to zero-extend (char is unsigned) the value and then
+	   perform a signed 32-bit subtraction.  */
+	lsr	data1, data1, #56
+	sub	result, data1, data2, lsr #56
+	RET
+#else
+	/* For big-endian we cannot use the trick with the syndrome value
+	   as carry-propagation can corrupt the upper bits if the trailing
+	   bytes in the string contain 0x01.  */
+	/* However, if there is no NUL byte in the dword, we can generate
+	   the result directly.  We can't just subtract the bytes as the
+	   MSB might be significant.  */
+	cbnz	has_nul, 1f
+	cmp	data1, data2
+	cset	result, ne
+	cneg	result, result, lo
+	RET
+1:
+	/* Re-compute the NUL-byte detection, using a byte-reversed value.  */
+	rev	tmp3, data1
+	sub	tmp1, tmp3, zeroones
+	orr	tmp2, tmp3, #REP8_7f
+	bic	has_nul, tmp1, tmp2
+	rev	has_nul, has_nul
+	orr	syndrome, diff, has_nul
+	clz	pos, syndrome
+	/* The MS-non-zero bit of the syndrome marks either the first bit
+	   that is different, or the top bit of the first zero byte.
+	   Shifting left now will bring the critical information into the
+	   top bits.  */
+	lsl	data1, data1, pos
+	lsl	data2, data2, pos
+	/* But we need to zero-extend (char is unsigned) the value and then
+	   perform a signed 32-bit subtraction.  */
+	lsr	data1, data1, #56
+	sub	result, data1, data2, lsr #56
+	RET
+#endif
+
+L(mutual_align):
+	/* Sources are mutually aligned, but are not currently at an
+	   alignment boundary.  Round down the addresses and then mask off
+	   the bytes that precede the start point.
+	   We also need to adjust the limit calculations, but without
+	   overflowing if the limit is near ULONG_MAX.  */
+	bic	src1, src1, #7
+	bic	src2, src2, #7
+	ldr	data1, [src1], #8
+	neg	tmp3, tmp1, lsl #3	/* 64 - bits(bytes beyond align). */
+	ldr	data2, [src2], #8
+	mov	tmp2, #~0
+	sub	limit_wd, limit, #1	/* limit != 0, so no underflow.  */
+#ifdef __AARCH64EB__
+	/* Big-endian.  Early bytes are at MSB.  */
+	lsl	tmp2, tmp2, tmp3	/* Shift (tmp1 & 63).  */
+#else
+	/* Little-endian.  Early bytes are at LSB.  */
+	lsr	tmp2, tmp2, tmp3	/* Shift (tmp1 & 63).  */
+#endif
+	and	tmp3, limit_wd, #7
+	lsr	limit_wd, limit_wd, #3
+	/* Adjust the limit. Only low 3 bits used, so overflow irrelevant.  */
+	add	limit, limit, tmp1
+	add	tmp3, tmp3, tmp1
+	orr	data1, data1, tmp2
+	orr	data2, data2, tmp2
+	add	limit_wd, limit_wd, tmp3, lsr #3
+	b	L(start_realigned)
+
+L(ret0):
+	mov	result, #0
+	RET
+
+	.p2align 6
+L(misaligned8):
+	sub	limit, limit, #1
+1:
+	/* Perhaps we can do better than this.  */
+	ldrb	data1w, [src1], #1
+	ldrb	data2w, [src2], #1
+	subs	limit, limit, #1
+	ccmp	data1w, #1, #0, cs	/* NZCV = 0b0000.  */
+	ccmp	data1w, data2w, #0, cs	/* NZCV = 0b0000.  */
+	b.eq	1b
+	sub	result, data1, data2
+	RET
+END (strncmp)
+libc_hidden_builtin_def (strncmp)
diff --git a/sysdeps/aarch64/strnlen.S b/sysdeps/aarch64/strnlen.S
new file mode 100644
index 0000000000..9d6b19f685
--- /dev/null
+++ b/sysdeps/aarch64/strnlen.S
@@ -0,0 +1,162 @@
+/* strnlen - calculate the length of a string with limit.
+
+   Copyright (C) 2013-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64
+ */
+
+/* Arguments and results.  */
+#define srcin		x0
+#define len		x0
+#define limit		x1
+
+/* Locals and temporaries.  */
+#define src		x2
+#define data1		x3
+#define data2		x4
+#define data2a		x5
+#define has_nul1	x6
+#define has_nul2	x7
+#define tmp1		x8
+#define tmp2		x9
+#define tmp3		x10
+#define tmp4		x11
+#define zeroones	x12
+#define pos		x13
+#define limit_wd	x14
+
+#define REP8_01 0x0101010101010101
+#define REP8_7f 0x7f7f7f7f7f7f7f7f
+#define REP8_80 0x8080808080808080
+
+ENTRY_ALIGN_AND_PAD (__strnlen, 6, 9)
+	cbz	limit, L(hit_limit)
+	mov	zeroones, #REP8_01
+	bic	src, srcin, #15
+	ands	tmp1, srcin, #15
+	b.ne	L(misaligned)
+	/* Calculate the number of full and partial words -1.  */
+	sub	limit_wd, limit, #1	/* Limit != 0, so no underflow.  */
+	lsr	limit_wd, limit_wd, #4	/* Convert to Qwords.  */
+
+	/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
+	   (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
+	   can be done in parallel across the entire word.  */
+	/* The inner loop deals with two Dwords at a time.  This has a
+	   slightly higher start-up cost, but we should win quite quickly,
+	   especially on cores with a high number of issue slots per
+	   cycle, as we get much better parallelism out of the operations.  */
+
+	/* Start of critial section -- keep to one 64Byte cache line.  */
+L(loop):
+	ldp	data1, data2, [src], #16
+L(realigned):
+	sub	tmp1, data1, zeroones
+	orr	tmp2, data1, #REP8_7f
+	sub	tmp3, data2, zeroones
+	orr	tmp4, data2, #REP8_7f
+	bic	has_nul1, tmp1, tmp2
+	bic	has_nul2, tmp3, tmp4
+	subs	limit_wd, limit_wd, #1
+	orr	tmp1, has_nul1, has_nul2
+	ccmp	tmp1, #0, #0, pl	/* NZCV = 0000  */
+	b.eq	L(loop)
+	/* End of critical section -- keep to one 64Byte cache line.  */
+
+	orr	tmp1, has_nul1, has_nul2
+	cbz	tmp1, L(hit_limit)	/* No null in final Qword.  */
+
+	/* We know there's a null in the final Qword.  The easiest thing
+	   to do now is work out the length of the string and return
+	   MIN (len, limit).  */
+
+	sub	len, src, srcin
+	cbz	has_nul1, L(nul_in_data2)
+#ifdef __AARCH64EB__
+	mov	data2, data1
+#endif
+	sub	len, len, #8
+	mov	has_nul2, has_nul1
+L(nul_in_data2):
+#ifdef __AARCH64EB__
+	/* For big-endian, carry propagation (if the final byte in the
+	   string is 0x01) means we cannot use has_nul directly.  The
+	   easiest way to get the correct byte is to byte-swap the data
+	   and calculate the syndrome a second time.  */
+	rev	data2, data2
+	sub	tmp1, data2, zeroones
+	orr	tmp2, data2, #REP8_7f
+	bic	has_nul2, tmp1, tmp2
+#endif
+	sub	len, len, #8
+	rev	has_nul2, has_nul2
+	clz	pos, has_nul2
+	add	len, len, pos, lsr #3		/* Bits to bytes.  */
+	cmp	len, limit
+	csel	len, len, limit, ls		/* Return the lower value.  */
+	RET
+
+L(misaligned):
+	/* Deal with a partial first word.
+	   We're doing two things in parallel here;
+	   1) Calculate the number of words (but avoiding overflow if
+	      limit is near ULONG_MAX) - to do this we need to work out
+	      limit + tmp1 - 1 as a 65-bit value before shifting it;
+	   2) Load and mask the initial data words - we force the bytes
+	      before the ones we are interested in to 0xff - this ensures
+	      early bytes will not hit any zero detection.  */
+	sub	limit_wd, limit, #1
+	neg	tmp4, tmp1
+	cmp	tmp1, #8
+
+	and	tmp3, limit_wd, #15
+	lsr	limit_wd, limit_wd, #4
+	mov	tmp2, #~0
+
+	ldp	data1, data2, [src], #16
+	lsl	tmp4, tmp4, #3		/* Bytes beyond alignment -> bits.  */
+	add	tmp3, tmp3, tmp1
+
+#ifdef __AARCH64EB__
+	/* Big-endian.  Early bytes are at MSB.  */
+	lsl	tmp2, tmp2, tmp4	/* Shift (tmp1 & 63).  */
+#else
+	/* Little-endian.  Early bytes are at LSB.  */
+	lsr	tmp2, tmp2, tmp4	/* Shift (tmp1 & 63).  */
+#endif
+	add	limit_wd, limit_wd, tmp3, lsr #4
+
+	orr	data1, data1, tmp2
+	orr	data2a, data2, tmp2
+
+	csinv	data1, data1, xzr, le
+	csel	data2, data2, data2a, le
+	b	L(realigned)
+
+L(hit_limit):
+	mov	len, limit
+	RET
+END (__strnlen)
+libc_hidden_def (__strnlen)
+weak_alias (__strnlen, strnlen)
+libc_hidden_def (strnlen)
diff --git a/sysdeps/aarch64/strrchr.S b/sysdeps/aarch64/strrchr.S
new file mode 100644
index 0000000000..b49e81dd19
--- /dev/null
+++ b/sysdeps/aarch64/strrchr.S
@@ -0,0 +1,165 @@
+/* strrchr: find the last instance of a character in a string.
+
+   Copyright (C) 2014-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64
+ * Neon Available.
+ */
+
+/* Arguments and results.  */
+#define srcin		x0
+#define chrin		w1
+
+#define result		x0
+
+#define src		x2
+#define	tmp1		x3
+#define wtmp2		w4
+#define tmp3		x5
+#define src_match	x6
+#define src_offset	x7
+#define const_m1	x8
+#define tmp4		x9
+#define nul_match	x10
+#define chr_match	x11
+
+#define vrepchr		v0
+#define vdata1		v1
+#define vdata2		v2
+#define vhas_nul1	v3
+#define vhas_nul2	v4
+#define vhas_chr1	v5
+#define vhas_chr2	v6
+#define vrepmask_0	v7
+#define vrepmask_c	v16
+#define vend1		v17
+#define vend2		v18
+
+/* Core algorithm.
+
+   For each 32-byte hunk we calculate a 64-bit syndrome value, with
+   two bits per byte (LSB is always in bits 0 and 1, for both big
+   and little-endian systems).  For each tuple, bit 0 is set iff
+   the relevant byte matched the requested character; bit 1 is set
+   iff the relevant byte matched the NUL end of string (we trigger
+   off bit0 for the special case of looking for NUL).  Since the bits
+   in the syndrome reflect exactly the order in which things occur
+   in the original string a count_trailing_zeros() operation will
+   identify exactly which byte is causing the termination, and why.  */
+
+ENTRY(strrchr)
+	cbz	x1, L(null_search)
+	/* Magic constant 0x40100401 to allow us to identify which lane
+	   matches the requested byte.  Magic constant 0x80200802 used
+	   similarly for NUL termination.  */
+	mov	wtmp2, #0x0401
+	movk	wtmp2, #0x4010, lsl #16
+	dup	vrepchr.16b, chrin
+	bic	src, srcin, #31		/* Work with aligned 32-byte hunks.  */
+	dup	vrepmask_c.4s, wtmp2
+	mov	src_offset, #0
+	ands	tmp1, srcin, #31
+	add	vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */
+	b.eq	L(aligned)
+
+	/* Input string is not 32-byte aligned.  Rather than forcing
+	   the padding bytes to a safe value, we calculate the syndrome
+	   for all the bytes, but then mask off those bits of the
+	   syndrome that are related to the padding.  */
+	ld1	{vdata1.16b, vdata2.16b}, [src], #32
+	neg	tmp1, tmp1
+	cmeq	vhas_nul1.16b, vdata1.16b, #0
+	cmeq	vhas_chr1.16b, vdata1.16b, vrepchr.16b
+	cmeq	vhas_nul2.16b, vdata2.16b, #0
+	cmeq	vhas_chr2.16b, vdata2.16b, vrepchr.16b
+	and	vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b
+	and	vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b
+	and	vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b
+	and	vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b
+	addp	vhas_nul1.16b, vhas_nul1.16b, vhas_nul2.16b	// 256->128
+	addp	vhas_chr1.16b, vhas_chr1.16b, vhas_chr2.16b	// 256->128
+	addp	vhas_nul1.16b, vhas_nul1.16b, vhas_nul1.16b	// 128->64
+	addp	vhas_chr1.16b, vhas_chr1.16b, vhas_chr1.16b	// 128->64
+	mov	nul_match, vhas_nul1.2d[0]
+	lsl	tmp1, tmp1, #1
+	mov	const_m1, #~0
+	mov	chr_match, vhas_chr1.2d[0]
+	lsr	tmp3, const_m1, tmp1
+
+	bic	nul_match, nul_match, tmp3	// Mask padding bits.
+	bic	chr_match, chr_match, tmp3	// Mask padding bits.
+	cbnz	nul_match, L(tail)
+
+L(loop):
+	cmp	chr_match, #0
+	csel	src_match, src, src_match, ne
+	csel	src_offset, chr_match, src_offset, ne
+L(aligned):
+	ld1	{vdata1.16b, vdata2.16b}, [src], #32
+	cmeq	vhas_nul1.16b, vdata1.16b, #0
+	cmeq	vhas_chr1.16b, vdata1.16b, vrepchr.16b
+	cmeq	vhas_nul2.16b, vdata2.16b, #0
+	cmeq	vhas_chr2.16b, vdata2.16b, vrepchr.16b
+	addp	vend1.16b, vhas_nul1.16b, vhas_nul2.16b	// 256->128
+	and	vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b
+	and	vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b
+	addp	vhas_chr1.16b, vhas_chr1.16b, vhas_chr2.16b	// 256->128
+	addp	vend1.16b, vend1.16b, vend1.16b	// 128->64
+	addp	vhas_chr1.16b, vhas_chr1.16b, vhas_chr1.16b	// 128->64
+	mov	nul_match, vend1.2d[0]
+	mov	chr_match, vhas_chr1.2d[0]
+	cbz	nul_match, L(loop)
+
+	and	vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b
+	and	vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b
+	addp	vhas_nul1.16b, vhas_nul1.16b, vhas_nul2.16b
+	addp	vhas_nul1.16b, vhas_nul1.16b, vhas_nul1.16b
+	mov	nul_match, vhas_nul1.2d[0]
+
+L(tail):
+	/* Work out exactly where the string ends.  */
+	sub	tmp4, nul_match, #1
+	eor	tmp4, tmp4, nul_match
+	ands	chr_match, chr_match, tmp4
+	/* And pick the values corresponding to the last match.  */
+	csel	src_match, src, src_match, ne
+	csel	src_offset, chr_match, src_offset, ne
+
+	/* Count down from the top of the syndrome to find the last match.  */
+	clz	tmp3, src_offset
+	/* Src_match points beyond the word containing the match, so we can
+	   simply subtract half the bit-offset into the syndrome.  Because
+	   we are counting down, we need to go back one more character.  */
+	add	tmp3, tmp3, #2
+	sub	result, src_match, tmp3, lsr #1
+	/* But if the syndrome shows no match was found, then return NULL.  */
+	cmp	src_offset, #0
+	csel	result, result, xzr, ne
+
+	ret
+L(null_search):
+	b	__strchrnul
+
+END(strrchr)
+weak_alias (strrchr, rindex)
+libc_hidden_builtin_def (strrchr)
diff --git a/sysdeps/aarch64/sysdep.h b/sysdeps/aarch64/sysdep.h
new file mode 100644
index 0000000000..af1af7092f
--- /dev/null
+++ b/sysdeps/aarch64/sysdep.h
@@ -0,0 +1,98 @@
+/* Copyright (C) 1997-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdeps/generic/sysdep.h>
+
+#ifdef	__ASSEMBLER__
+
+/* Syntactic details of assembler.  */
+
+#define ASM_SIZE_DIRECTIVE(name) .size name,.-name
+
+/* Define an entry point visible from C.  */
+#define ENTRY(name)						\
+  .globl C_SYMBOL_NAME(name);					\
+  .type C_SYMBOL_NAME(name),%function;				\
+  .align 4;							\
+  C_LABEL(name)							\
+  cfi_startproc;						\
+  CALL_MCOUNT
+
+/* Define an entry point visible from C.  */
+#define ENTRY_ALIGN(name, align)				\
+  .globl C_SYMBOL_NAME(name);					\
+  .type C_SYMBOL_NAME(name),%function;				\
+  .p2align align;						\
+  C_LABEL(name)							\
+  cfi_startproc;						\
+  CALL_MCOUNT
+
+/* Define an entry point visible from C with a specified alignment and
+   pre-padding with NOPs.  This can be used to ensure that a critical
+   loop within a function is cache line aligned.  Note this version
+   does not adjust the padding if CALL_MCOUNT is defined. */
+
+#define ENTRY_ALIGN_AND_PAD(name, align, padding)		\
+  .globl C_SYMBOL_NAME(name);					\
+  .type C_SYMBOL_NAME(name),%function;				\
+  .p2align align;						\
+  .rep padding;							\
+  nop;								\
+  .endr;							\
+  C_LABEL(name)							\
+  cfi_startproc;						\
+  CALL_MCOUNT
+
+#undef	END
+#define END(name)						\
+  cfi_endproc;							\
+  ASM_SIZE_DIRECTIVE(name)
+
+/* If compiled for profiling, call `mcount' at the start of each function.  */
+#ifdef	PROF
+# define CALL_MCOUNT						\
+	str	x30, [sp, #-16]!;				\
+	bl	mcount;						\
+	ldr	x30, [sp], #16	;
+#else
+# define CALL_MCOUNT		/* Do nothing.  */
+#endif
+
+/* Local label name for asm code.  */
+#ifndef L
+# define L(name)         .L##name
+#endif
+
+/* Load or store to/from a pc-relative EXPR into/from R, using T.  */
+#define LDST_PCREL(OP, R, T, EXPR)  \
+	adrp	T, EXPR;	    \
+	OP	R, [T, #:lo12:EXPR];\
+
+/* Load or store to/from a got-relative EXPR into/from R, using T.  */
+#define LDST_GLOBAL(OP, R, T, EXPR)     \
+	adrp	T, :got:EXPR;		\
+	ldr	T, [T, #:got_lo12:EXPR];\
+	OP	R, [T];
+
+/* Since C identifiers are not normally prefixed with an underscore
+   on this system, the asm identifier `syscall_error' intrudes on the
+   C name space.  Make sure we use an innocuous name.  */
+#define syscall_error	__syscall_error
+#define mcount		_mcount
+
+#endif	/* __ASSEMBLER__ */
diff --git a/sysdeps/aarch64/tls-macros.h b/sysdeps/aarch64/tls-macros.h
new file mode 100644
index 0000000000..81421851e7
--- /dev/null
+++ b/sysdeps/aarch64/tls-macros.h
@@ -0,0 +1,51 @@
+/* Copyright (C) 2009-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define TLS_LD(x) TLS_GD(x)
+
+#define TLS_GD(x)					\
+  ({ register unsigned long __result asm ("x0");	\
+     asm ("adrp	%0, :tlsgd:" #x "; "			\
+	  "add	%0, %0, #:tlsgd_lo12:" #x "; "		\
+	  "bl	__tls_get_addr;"			\
+	  "nop"						\
+	  : "=r" (__result)				\
+	  :						\
+	  : "x1", "x2", "x3", "x4", "x5", "x6",		\
+	    "x7", "x8", "x9", "x10", "x11", "x12",	\
+	    "x13", "x14", "x15", "x16", "x17", "x18",	\
+	    "x30", "memory", "cc");			\
+     (int *) (__result); })
+
+#define TLS_IE(x)					\
+  ({ register unsigned long __result asm ("x0");	\
+     register unsigned long __t;			\
+     asm ("mrs	%1, tpidr_el0; "			\
+	  "adrp	%0, :gottprel:" #x "; "			\
+	  "ldr	%0, [%0, #:gottprel_lo12:" #x "]; "	\
+	  "add	%0, %0, %1"				\
+	  : "=r" (__result), "=r" (__t));		\
+     (int *) (__result); })
+
+#define TLS_LE(x)					\
+  ({ register unsigned long __result asm ("x0");	\
+     asm ("mrs	%0, tpidr_el0; "			\
+	  "add	%0, %0, :tprel_hi12:" #x "; "		\
+	  "add	%0, %0, :tprel_lo12_nc:" #x		\
+	  : "=r" (__result));				\
+     (int *) (__result); })
diff --git a/sysdeps/aarch64/tlsdesc.c b/sysdeps/aarch64/tlsdesc.c
new file mode 100644
index 0000000000..9f3ff9b662
--- /dev/null
+++ b/sysdeps/aarch64/tlsdesc.c
@@ -0,0 +1,166 @@
+/* Manage TLS descriptors.  AArch64 version.
+
+   Copyright (C) 2011-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <link.h>
+#include <ldsodefs.h>
+#include <elf/dynamic-link.h>
+#include <tls.h>
+#include <dl-tlsdesc.h>
+#include <dl-unmap-segments.h>
+#include <tlsdeschtab.h>
+#include <atomic.h>
+
+/* The following functions take an entry_check_offset argument.  It's
+   computed by the caller as an offset between its entry point and the
+   call site, such that by adding the built-in return address that is
+   implicitly passed to the function with this offset, we can easily
+   obtain the caller's entry point to compare with the entry point
+   given in the TLS descriptor.  If it's changed, we want to return
+   immediately.  */
+
+/* This function is used to lazily resolve TLS_DESC RELA relocations.
+   The argument location is used to hold a pointer to the relocation.  */
+
+void
+attribute_hidden
+_dl_tlsdesc_resolve_rela_fixup (struct tlsdesc *td, struct link_map *l)
+{
+  const ElfW(Rela) *reloc = atomic_load_relaxed (&td->arg);
+
+  /* After GL(dl_load_lock) is grabbed only one caller can see td->entry in
+     initial state in _dl_tlsdesc_resolve_early_return_p, other concurrent
+     callers will return and retry calling td->entry.  The updated td->entry
+     synchronizes with the single writer so all read accesses here can use
+     relaxed order.  */
+  if (_dl_tlsdesc_resolve_early_return_p
+      (td, (void*)(D_PTR (l, l_info[ADDRIDX (DT_TLSDESC_PLT)]) + l->l_addr)))
+    return;
+
+  /* The code below was borrowed from _dl_fixup(),
+     except for checking for STB_LOCAL.  */
+  const ElfW(Sym) *const symtab
+    = (const void *) D_PTR (l, l_info[DT_SYMTAB]);
+  const char *strtab = (const void *) D_PTR (l, l_info[DT_STRTAB]);
+  const ElfW(Sym) *sym = &symtab[ELFW(R_SYM) (reloc->r_info)];
+  lookup_t result;
+
+   /* Look up the target symbol.  If the normal lookup rules are not
+      used don't look in the global scope.  */
+  if (ELFW(ST_BIND) (sym->st_info) != STB_LOCAL
+      && __builtin_expect (ELFW(ST_VISIBILITY) (sym->st_other), 0) == 0)
+    {
+      const struct r_found_version *version = NULL;
+
+      if (l->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
+	{
+	  const ElfW(Half) *vernum =
+	    (const void *) D_PTR (l, l_info[VERSYMIDX (DT_VERSYM)]);
+	  ElfW(Half) ndx = vernum[ELFW(R_SYM) (reloc->r_info)] & 0x7fff;
+	  version = &l->l_versions[ndx];
+	  if (version->hash == 0)
+	    version = NULL;
+	}
+
+      result = _dl_lookup_symbol_x (strtab + sym->st_name, l, &sym,
+				    l->l_scope, version, ELF_RTYPE_CLASS_PLT,
+				    DL_LOOKUP_ADD_DEPENDENCY, NULL);
+    }
+  else
+    {
+      /* We already found the symbol.  The module (and therefore its load
+	 address) is also known.  */
+      result = l;
+    }
+
+  if (!sym)
+    {
+      atomic_store_relaxed (&td->arg, (void *) reloc->r_addend);
+      /* This release store synchronizes with the ldar acquire load
+	 instruction in _dl_tlsdesc_undefweak.  */
+      atomic_store_release (&td->entry, _dl_tlsdesc_undefweak);
+    }
+  else
+    {
+#  ifndef SHARED
+      CHECK_STATIC_TLS (l, result);
+#  else
+      if (!TRY_STATIC_TLS (l, result))
+	{
+	  void *p = _dl_make_tlsdesc_dynamic (result, sym->st_value
+					      + reloc->r_addend);
+	  atomic_store_relaxed (&td->arg, p);
+	  /* This release store synchronizes with the ldar acquire load
+	     instruction in _dl_tlsdesc_dynamic.  */
+	  atomic_store_release (&td->entry, _dl_tlsdesc_dynamic);
+	}
+      else
+#  endif
+	{
+	  void *p = (void*) (sym->st_value + result->l_tls_offset
+			     + reloc->r_addend);
+	  atomic_store_relaxed (&td->arg, p);
+	  /* This release store synchronizes with the ldar acquire load
+	     instruction in _dl_tlsdesc_return_lazy.  */
+	  atomic_store_release (&td->entry, _dl_tlsdesc_return_lazy);
+	}
+    }
+
+  _dl_tlsdesc_wake_up_held_fixups ();
+}
+
+/* This function is used to avoid busy waiting for other threads to
+   complete the lazy relocation.  Once another thread wins the race to
+   relocate a TLS descriptor, it sets the descriptor up such that this
+   function is called to wait until the resolver releases the
+   lock.  */
+
+void
+attribute_hidden
+_dl_tlsdesc_resolve_hold_fixup (struct tlsdesc *td, void *caller)
+{
+  /* Maybe we're lucky and can return early.  */
+  if (caller != atomic_load_relaxed (&td->entry))
+    return;
+
+  /* Locking here will stop execution until the running resolver runs
+     _dl_tlsdesc_wake_up_held_fixups(), releasing the lock.
+
+     FIXME: We'd be better off waiting on a condition variable, such
+     that we didn't have to hold the lock throughout the relocation
+     processing.  */
+  __rtld_lock_lock_recursive (GL(dl_load_lock));
+  __rtld_lock_unlock_recursive (GL(dl_load_lock));
+}
+
+
+/* Unmap the dynamic object, but also release its TLS descriptor table
+   if there is one.  */
+
+void
+internal_function
+_dl_unmap (struct link_map *map)
+{
+  _dl_unmap_segments (map);
+
+#ifdef SHARED
+  if (map->l_mach.tlsdesc_table)
+    htab_delete (map->l_mach.tlsdesc_table);
+#endif
+}
diff --git a/sysdeps/aarch64/tlsdesc.sym b/sysdeps/aarch64/tlsdesc.sym
new file mode 100644
index 0000000000..63766af612
--- /dev/null
+++ b/sysdeps/aarch64/tlsdesc.sym
@@ -0,0 +1,15 @@
+#include <stddef.h>
+#include <sysdep.h>
+#include <tls.h>
+#include <link.h>
+#include <dl-tlsdesc.h>
+
+--
+
+-- Abuse tls.h macros to derive offsets relative to the thread register.
+
+TLSDESC_ARG			offsetof(struct tlsdesc, arg)
+
+TLSDESC_GEN_COUNT	offsetof(struct tlsdesc_dynamic_arg, gen_count)
+TLSDESC_MODID		offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_module)
+TLSDESC_MODOFF		offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_offset)
diff --git a/sysdeps/aarch64/tst-audit.h b/sysdeps/aarch64/tst-audit.h
new file mode 100644
index 0000000000..dbb5ac46bb
--- /dev/null
+++ b/sysdeps/aarch64/tst-audit.h
@@ -0,0 +1,25 @@
+/* Definitions for testing PLT entry/exit auditing.  AArch64 version.
+
+   Copyright (C) 2005-2015 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define pltenter la_aarch64_gnu_pltenter
+#define pltexit la_aarch64_gnu_pltexit
+#define La_regs La_aarch64_regs
+#define La_retval La_aarch64_retval
+#define int_retval lrv_xreg[0]