724 files changed, 16693 insertions, 8898 deletions
diff --git a/sysdeps/x86_64/Implies b/sysdeps/x86_64/Implies
index 811c19a8f2..3d7ded70d2 100644
--- a/sysdeps/x86_64/Implies
+++ b/sysdeps/x86_64/Implies
@@ -1,4 +1,5 @@
 x86
+ieee754/float128
 ieee754/ldbl-96
 ieee754/dbl-64/wordsize-64
 ieee754/dbl-64
diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
index 67ed5ba213..9f1562f1b2 100644
--- a/sysdeps/x86_64/Makefile
+++ b/sysdeps/x86_64/Makefile
@@ -7,6 +7,10 @@ endif
 
 ifeq ($(subdir),gmon)
 sysdep_routines += _mcount
+# We cannot compile _mcount.S with -pg because that would create
+# recursive calls when ENTRY is used.  Just copy the normal static
+# object.
+sysdep_noprof += _mcount
 endif
 
 ifeq ($(subdir),malloc)
@@ -23,7 +27,7 @@ ifeq ($(subdir),elf)
 CFLAGS-.os += $(if $(filter $(@F),$(patsubst %,%.os,$(all-rtld-routines))),\
 		   -mno-mmx)
 
-sysdep-dl-routines += tlsdesc dl-tlsdesc
+sysdep-dl-routines += tlsdesc dl-tlsdesc tls_get_addr
 
 tests += ifuncmain8
 modules-names += ifuncmod8
@@ -39,28 +43,66 @@ $(objpfx)tst-quad2: $(objpfx)tst-quadmod2.so
 quad-pie-test += tst-quad1pie tst-quad2pie
 tests += $(quad-pie-test)
 tests-pie += $(quad-pie-test)
+test-extras += tst-quadmod1pie tst-quadmod2pie
+extra-test-objs += tst-quadmod1pie.o tst-quadmod2pie.o
 
 $(objpfx)tst-quad1pie: $(objpfx)tst-quadmod1pie.o
 $(objpfx)tst-quad2pie: $(objpfx)tst-quadmod2pie.o
 
-tests += tst-audit3 tst-audit4 tst-audit5 tst-audit6 tst-audit7 tst-audit10
+CFLAGS-tst-quad1pie.c = $(PIE-ccflag)
+CFLAGS-tst-quad2pie.c = $(PIE-ccflag)
 
+tests += tst-x86_64-1
+modules-names += x86_64/tst-x86_64mod-1
+LDFLAGS-tst-x86_64mod-1.so = -Wl,-soname,tst-x86_64mod-1.so
+ifneq (no,$(have-tunables))
+# Test the state size for XSAVE when XSAVEC is disabled.
+tst-x86_64-1-ENV = GLIBC_TUNABLES=glibc.tune.hwcaps=-XSAVEC_Usable
+endif
+
+$(objpfx)tst-x86_64-1: $(objpfx)x86_64/tst-x86_64mod-1.so
+
+ifneq (no,$(have-tunables))
+tests += tst-platform-1
+modules-names += tst-platformmod-1 x86_64/tst-platformmod-2
+CFLAGS-tst-platform-1.c = -mno-avx
+CFLAGS-tst-platformmod-1.c = -mno-avx
+CFLAGS-tst-platformmod-2.c = -mno-avx
+LDFLAGS-tst-platformmod-2.so = -Wl,-soname,tst-platformmod-2.so
+$(objpfx)tst-platform-1: $(objpfx)tst-platformmod-1.so
+$(objpfx)tst-platform-1.out: $(objpfx)x86_64/tst-platformmod-2.so
+# Turn off AVX512F_Usable and AVX2_Usable so that GLRO(dl_platform) is
+# always set to x86_64.
+tst-platform-1-ENV = LD_PRELOAD=$(objpfx)\$$PLATFORM/tst-platformmod-2.so \
+	GLIBC_TUNABLES=glibc.tune.hwcaps=-AVX512F_Usable,-AVX2_Usable
+endif
+
+tests += tst-audit3 tst-audit4 tst-audit5 tst-audit6 tst-audit7 \
+	 tst-audit10 tst-sse tst-avx tst-avx512
+test-extras += tst-audit4-aux tst-audit10-aux \
+	       tst-avx-aux tst-avx512-aux
+extra-test-objs += tst-audit4-aux.o tst-audit10-aux.o \
+		   tst-avx-aux.o tst-avx512-aux.o
+
+ifeq ($(have-insert),yes)
 tests += tst-split-dynreloc
 LDFLAGS-tst-split-dynreloc = -Wl,-T,$(..)sysdeps/x86_64/tst-split-dynreloc.lds
 tst-split-dynreloc-ENV = LD_BIND_NOW=1
+endif
 
 modules-names += tst-auditmod3a tst-auditmod3b \
 		tst-auditmod4a tst-auditmod4b \
 		tst-auditmod5a tst-auditmod5b \
 		tst-auditmod6a tst-auditmod6b tst-auditmod6c \
 		tst-auditmod7a tst-auditmod7b \
-		tst-auditmod10a tst-auditmod10b
+		tst-auditmod10a tst-auditmod10b \
+		tst-ssemod tst-avxmod tst-avx512mod
 
 $(objpfx)tst-audit3: $(objpfx)tst-auditmod3a.so
 $(objpfx)tst-audit3.out: $(objpfx)tst-auditmod3b.so
 tst-audit3-ENV = LD_AUDIT=$(objpfx)tst-auditmod3b.so
 
-$(objpfx)tst-audit4: $(objpfx)tst-auditmod4a.so
+$(objpfx)tst-audit4: $(objpfx)tst-audit4-aux.o $(objpfx)tst-auditmod4a.so
 $(objpfx)tst-audit4.out: $(objpfx)tst-auditmod4b.so
 tst-audit4-ENV = LD_AUDIT=$(objpfx)tst-auditmod4b.so
 
@@ -77,25 +119,49 @@ $(objpfx)tst-audit7: $(objpfx)tst-auditmod7a.so
 $(objpfx)tst-audit7.out: $(objpfx)tst-auditmod7b.so
 tst-audit7-ENV = LD_AUDIT=$(objpfx)tst-auditmod7b.so
 
-$(objpfx)tst-audit10: $(objpfx)tst-auditmod10a.so
+$(objpfx)tst-audit10: $(objpfx)tst-audit10-aux.o $(objpfx)tst-auditmod10a.so
 $(objpfx)tst-audit10.out: $(objpfx)tst-auditmod10b.so
 tst-audit10-ENV = LD_AUDIT=$(objpfx)tst-auditmod10b.so
 
+$(objpfx)tst-sse: $(objpfx)tst-ssemod.so
+$(objpfx)tst-avx: $(objpfx)tst-avx-aux.o $(objpfx)tst-avxmod.so
+$(objpfx)tst-avx512: $(objpfx)tst-avx512-aux.o $(objpfx)tst-avx512mod.so
+
 AVX-CFLAGS=-mavx -mno-vzeroupper
-CFLAGS-tst-audit4.c += $(AVX-CFLAGS)
+CFLAGS-tst-audit4-aux.c += $(AVX-CFLAGS)
 CFLAGS-tst-auditmod4a.c += $(AVX-CFLAGS)
 CFLAGS-tst-auditmod4b.c += $(AVX-CFLAGS)
 CFLAGS-tst-auditmod6b.c += $(AVX-CFLAGS)
 CFLAGS-tst-auditmod6c.c += $(AVX-CFLAGS)
 CFLAGS-tst-auditmod7b.c += $(AVX-CFLAGS)
+CFLAGS-tst-avx-aux.c += $(AVX-CFLAGS)
+CFLAGS-tst-avxmod.c += $(AVX-CFLAGS)
 ifeq (yes,$(config-cflags-avx512))
 AVX512-CFLAGS = -mavx512f
-CFLAGS-tst-audit10.c += $(AVX512-CFLAGS)
+CFLAGS-tst-audit10-aux.c += $(AVX512-CFLAGS)
 CFLAGS-tst-auditmod10a.c += $(AVX512-CFLAGS)
 CFLAGS-tst-auditmod10b.c += $(AVX512-CFLAGS)
+CFLAGS-tst-avx512-aux.c += $(AVX512-CFLAGS)
+CFLAGS-tst-avx512mod.c += $(AVX512-CFLAGS)
 endif
 endif
 
 ifeq ($(subdir),csu)
-gen-as-const-headers += tlsdesc.sym
+gen-as-const-headers += tlsdesc.sym rtld-offsets.sym
 endif
+
+$(objpfx)x86_64/tst-x86_64mod-1.os: $(objpfx)tst-x86_64mod-1.os
+	$(make-target-directory)
+	rm -f $@
+	ln $< $@
+
+do-tests-clean common-mostlyclean: tst-x86_64-1-clean
+
+.PHONY: tst-x86_64-1-clean
+tst-x86_64-1-clean:
+	-rm -rf $(objpfx)x86_64
+
+$(objpfx)x86_64/tst-platformmod-2.os: $(objpfx)tst-platformmod-2.os
+	$(make-target-directory)
+	rm -f $@
+	ln $< $@
diff --git a/sysdeps/x86_64/__longjmp.S b/sysdeps/x86_64/__longjmp.S
index c164626577..d7d123e4bc 100644
--- a/sysdeps/x86_64/__longjmp.S
+++ b/sysdeps/x86_64/__longjmp.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2001-2016 Free Software Foundation, Inc.
+/* Copyright (C) 2001-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -17,9 +17,18 @@
 
 #include <sysdep.h>
 #include <jmpbuf-offsets.h>
+#include <jmp_buf-ssp.h>
 #include <asm-syntax.h>
 #include <stap-probe.h>
 
+/* Don't restore shadow stack register if
+   1. Shadow stack isn't enabled.  Or
+   2. __longjmp is defined for __longjmp_cancel.
+ */
+#if !SHSTK_ENABLED || defined __longjmp
+# undef SHADOW_STACK_POINTER_OFFSET
+#endif
+
 /* Jump to the position specified by ENV, causing the
    setjmp call there to return VAL, or 1 if VAL is 0.
    void __longjmp (__jmp_buf env, int val).  */
@@ -42,6 +51,41 @@ ENTRY(__longjmp)
 	orq %rax, %r9
 # endif
 #endif
+#ifdef SHADOW_STACK_POINTER_OFFSET
+# if IS_IN (libc) && defined SHARED && defined FEATURE_1_OFFSET
+	/* Check if Shadow Stack is enabled.  */
+	testl $X86_FEATURE_1_SHSTK, %fs:FEATURE_1_OFFSET
+	jz L(skip_ssp)
+# else
+	xorl %eax, %eax
+# endif
+	/* Check and adjust the Shadow-Stack-Pointer.  */
+	/* Get the current ssp.  */
+	rdsspq %rax
+	/* And compare it with the saved ssp value.  */
+	subq SHADOW_STACK_POINTER_OFFSET(%rdi), %rax
+	je L(skip_ssp)
+	/* Count the number of frames to adjust and adjust it
+	   with incssp instruction.  The instruction can adjust
+	   the ssp by [0..255] value only thus use a loop if
+	   the number of frames is bigger than 255.  */
+	negq %rax
+	shrq $3, %rax
+	/* NB: We saved Shadow-Stack-Pointer of setjmp.  Since we are
+	       restoring Shadow-Stack-Pointer of setjmp's caller, we
+	       need to unwind shadow stack by one more frame.  */
+	addq $1, %rax
+
+	movl $255, %ebx
+L(loop):
+	cmpq %rbx, %rax
+	cmovb %rax, %rbx
+	incsspq %rbx
+	subq %rbx, %rax
+	ja L(loop)
+
+L(skip_ssp):
+#endif
 	LIBC_PROBE (longjmp, 3, LP_SIZE@%RDI_LP, -4@%esi, LP_SIZE@%RDX_LP)
 	/* We add unwind information for the target here.  */
 	cfi_def_cfa(%rdi, 0)
diff --git a/sysdeps/x86_64/_mcount.S b/sysdeps/x86_64/_mcount.S
index 5d7edd2a29..a2f4068b61 100644
--- a/sysdeps/x86_64/_mcount.S
+++ b/sysdeps/x86_64/_mcount.S
@@ -1,5 +1,5 @@
 /* Machine-specific calling sequence for `mcount' profiling function.  x86-64 version.
-   Copyright (C) 2002-2016 Free Software Foundation, Inc.
+   Copyright (C) 2002-2018 Free Software Foundation, Inc.
    Contributed by Andreas Jaeger <aj@suse.de>.
    This file is part of the GNU C Library.
 
@@ -24,81 +24,102 @@
 
 #include <sysdep.h>
 
-	.globl C_SYMBOL_NAME(_mcount)
-	.type C_SYMBOL_NAME(_mcount), @function
-	.align ALIGNARG(4)
-C_LABEL(_mcount)
+ENTRY(_mcount)
 	/* Allocate space for 7 registers.  */
 	subq	$56,%rsp
+	cfi_adjust_cfa_offset (56)
 	movq	%rax,(%rsp)
+	cfi_rel_offset (rax, 0)
 	movq	%rcx,8(%rsp)
+	cfi_rel_offset (rcx, 8)
 	movq	%rdx,16(%rsp)
+	cfi_rel_offset (rdx, 16)
 	movq	%rsi,24(%rsp)
+	cfi_rel_offset (rsi, 24)
 	movq	%rdi,32(%rsp)
+	cfi_rel_offset (rdi, 32)
 	movq	%r8,40(%rsp)
+	cfi_rel_offset (r8, 40)
 	movq	%r9,48(%rsp)
+	cfi_rel_offset (r9, 48)
 
 	/* Setup parameter for __mcount_internal.  */
 	/* selfpc is the return address on the stack.  */
 	movq	56(%rsp),%rsi
 	/* Get frompc via the frame pointer.  */
 	movq	8(%rbp),%rdi
-#ifdef PIC
-	call C_SYMBOL_NAME(__mcount_internal)@PLT
-#else
 	call C_SYMBOL_NAME(__mcount_internal)
-#endif
 	/* Pop the saved registers.  Please note that `mcount' has no
 	   return value.  */
 	movq	48(%rsp),%r9
+	cfi_restore (r9)
 	movq	40(%rsp),%r8
+	cfi_restore (r8)
 	movq	32(%rsp),%rdi
+	cfi_restore (rdi)
 	movq	24(%rsp),%rsi
+	cfi_restore (rsi)
 	movq	16(%rsp),%rdx
+	cfi_restore (rdx)
 	movq	8(%rsp),%rcx
+	cfi_restore (rcx)
 	movq	(%rsp),%rax
+	cfi_restore (rax)
 	addq	$56,%rsp
+	cfi_adjust_cfa_offset (-56)
 	ret
-
-	ASM_SIZE_DIRECTIVE(C_SYMBOL_NAME(_mcount))
+END(_mcount)
 
 #undef mcount
 weak_alias (_mcount, mcount)
 
-	.globl C_SYMBOL_NAME(__fentry__)
-	.type C_SYMBOL_NAME(__fentry__), @function
-	.align ALIGNARG(4)
-C_LABEL(__fentry__)
-	/* Allocate space for 7 registers.  */
+/* __fentry__ is different from _mcount in that it is called before
+   function prolog.  This means (among other things) that it has non-standard
+   stack alignment on entry: (%RSP & 0xF) == 0.  */
+
+ENTRY(__fentry__)
+	/* Allocate space for 7 registers
+	   (+8 bytes for proper stack alignment).  */
 	subq	$64,%rsp
+	cfi_adjust_cfa_offset (64)
 	movq	%rax,(%rsp)
+	cfi_rel_offset (rax, 0)
 	movq	%rcx,8(%rsp)
+	cfi_rel_offset (rcx, 8)
 	movq	%rdx,16(%rsp)
+	cfi_rel_offset (rdx, 16)
 	movq	%rsi,24(%rsp)
+	cfi_rel_offset (rsi, 24)
 	movq	%rdi,32(%rsp)
+	cfi_rel_offset (rdi, 32)
 	movq	%r8,40(%rsp)
+	cfi_rel_offset (r8, 40)
 	movq	%r9,48(%rsp)
+	cfi_rel_offset (r9, 48)
 
 	/* Setup parameter for __mcount_internal.  */
 	/* selfpc is the return address on the stack.  */
 	movq	64(%rsp),%rsi
 	/* caller is the return address above it */
 	movq	72(%rsp),%rdi
-#ifdef PIC
-	call C_SYMBOL_NAME(__mcount_internal)@PLT
-#else
 	call C_SYMBOL_NAME(__mcount_internal)
-#endif
 	/* Pop the saved registers.  Please note that `__fentry__' has no
 	   return value.  */
 	movq	48(%rsp),%r9
+	cfi_restore (r9)
 	movq	40(%rsp),%r8
+	cfi_restore (r8)
 	movq	32(%rsp),%rdi
+	cfi_restore (rdi)
 	movq	24(%rsp),%rsi
+	cfi_restore (rsi)
 	movq	16(%rsp),%rdx
+	cfi_restore (rdx)
 	movq	8(%rsp),%rcx
+	cfi_restore (rcx)
 	movq	(%rsp),%rax
+	cfi_restore (rax)
 	addq	$64,%rsp
+	cfi_adjust_cfa_offset (-64)
 	ret
-
-	ASM_SIZE_DIRECTIVE(C_SYMBOL_NAME(__fentry__))
+END(__fentry__)
diff --git a/sysdeps/x86_64/add_n.S b/sysdeps/x86_64/add_n.S
index fc99811476..ba9699a2f0 100644
--- a/sysdeps/x86_64/add_n.S
+++ b/sysdeps/x86_64/add_n.S
@@ -1,6 +1,6 @@
 /* x86-64 __mpn_add_n -- Add two limb vectors of the same length > 0 and store
    sum in a third limb vector.
-   Copyright (C) 2006-2016 Free Software Foundation, Inc.
+   Copyright (C) 2006-2018 Free Software Foundation, Inc.
    This file is part of the GNU MP Library.
 
    The GNU MP Library is free software; you can redistribute it and/or modify
diff --git a/sysdeps/x86_64/addmul_1.S b/sysdeps/x86_64/addmul_1.S
index ab7c2fa701..eefe8004b0 100644
--- a/sysdeps/x86_64/addmul_1.S
+++ b/sysdeps/x86_64/addmul_1.S
@@ -1,6 +1,6 @@
 /* x86-64 __mpn_addmul_1 -- Multiply a limb vector with a limb and add
    the result to a second limb vector.
-   Copyright (C) 2003-2016 Free Software Foundation, Inc.
+   Copyright (C) 2003-2018 Free Software Foundation, Inc.
    This file is part of the GNU MP Library.
 
    The GNU MP Library is free software; you can redistribute it and/or modify
diff --git a/sysdeps/x86_64/atomic-machine.h b/sysdeps/x86_64/atomic-machine.h
index a5b86eb3ce..9d31c64962 100644
--- a/sysdeps/x86_64/atomic-machine.h
+++ b/sysdeps/x86_64/atomic-machine.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002-2016 Free Software Foundation, Inc.
+/* Copyright (C) 2002-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
 
@@ -16,10 +16,12 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include <stdint.h>
-#include <tls.h>	/* For tcbhead_t.  */
-#include <libc-internal.h>
+#ifndef _X86_64_ATOMIC_MACHINE_H
+#define _X86_64_ATOMIC_MACHINE_H 1
 
+#include <stdint.h>
+#include <tls.h>                   /* For tcbhead_t.  */
+#include <libc-pointer-arith.h>    /* For cast_to_integer.  */
 
 typedef int8_t atomic8_t;
 typedef uint8_t uatomic8_t;
@@ -57,6 +59,7 @@ typedef uintmax_t uatomic_max_t;
 
 #define __HAVE_64B_ATOMICS 1
 #define USE_ATOMIC_COMPILER_BUILTINS 1
+#define ATOMIC_EXCHANGE_USES_CAS 0
 
 #define atomic_compare_and_exchange_val_acq(mem, newval, oldval) \
   __sync_val_compare_and_swap (mem, oldval, newval)
@@ -475,3 +478,5 @@ typedef uintmax_t uatomic_max_t;
     __asm __volatile (LOCK_PREFIX "orl $0, (%%rsp)" ::: "memory")
 #define atomic_read_barrier() __asm ("" ::: "memory")
 #define atomic_write_barrier() __asm ("" ::: "memory")
+
+#endif /* atomic-machine.h */
diff --git a/sysdeps/x86_64/backtrace.c b/sysdeps/x86_64/backtrace.c
deleted file mode 100644
index e04407c516..0000000000
--- a/sysdeps/x86_64/backtrace.c
+++ /dev/null
@@ -1,133 +0,0 @@
-/* Return backtrace of current program state.
-   Copyright (C) 2003-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Jakub Jelinek <jakub@redhat.com>, 2003.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <libc-lock.h>
-#include <dlfcn.h>
-#include <execinfo.h>
-#include <stdlib.h>
-#include <unwind.h>
-
-struct trace_arg
-{
-  void **array;
-  _Unwind_Word cfa;
-  int cnt;
-  int size;
-};
-
-#ifdef SHARED
-static _Unwind_Reason_Code (*unwind_backtrace) (_Unwind_Trace_Fn, void *);
-static _Unwind_Ptr (*unwind_getip) (struct _Unwind_Context *);
-static _Unwind_Word (*unwind_getcfa) (struct _Unwind_Context *);
-static void *libgcc_handle;
-
-
-/* Dummy version in case libgcc_s does not contain the real code.  */
-static _Unwind_Word
-dummy_getcfa (struct _Unwind_Context *ctx __attribute__ ((unused)))
-{
-  return 0;
-}
-
-
-static void
-init (void)
-{
-  libgcc_handle = __libc_dlopen ("libgcc_s.so.1");
-
-  if (libgcc_handle == NULL)
-    return;
-
-  unwind_backtrace = __libc_dlsym (libgcc_handle, "_Unwind_Backtrace");
-  unwind_getip = __libc_dlsym (libgcc_handle, "_Unwind_GetIP");
-  if (unwind_getip == NULL)
-    unwind_backtrace = NULL;
-  unwind_getcfa = (__libc_dlsym (libgcc_handle, "_Unwind_GetCFA")
-		  ?: dummy_getcfa);
-}
-#else
-# define unwind_backtrace _Unwind_Backtrace
-# define unwind_getip _Unwind_GetIP
-# define unwind_getcfa _Unwind_GetCFA
-#endif
-
-static _Unwind_Reason_Code
-backtrace_helper (struct _Unwind_Context *ctx, void *a)
-{
-  struct trace_arg *arg = a;
-
-  /* We are first called with address in the __backtrace function.
-     Skip it.  */
-  if (arg->cnt != -1)
-    {
-      arg->array[arg->cnt] = (void *) unwind_getip (ctx);
-
-      /* Check whether we make any progress.  */
-      _Unwind_Word cfa = unwind_getcfa (ctx);
-
-      if (arg->cnt > 0 && arg->array[arg->cnt - 1] == arg->array[arg->cnt]
-	 && cfa == arg->cfa)
-       return _URC_END_OF_STACK;
-      arg->cfa = cfa;
-    }
-  if (++arg->cnt == arg->size)
-    return _URC_END_OF_STACK;
-  return _URC_NO_REASON;
-}
-
-int
-__backtrace (void **array, int size)
-{
-  struct trace_arg arg = { .array = array, .cfa = 0, .size = size, .cnt = -1 };
-
-  if (size <= 0)
-    return 0;
-
-#ifdef SHARED
-  __libc_once_define (static, once);
-
-  __libc_once (once, init);
-  if (unwind_backtrace == NULL)
-    return 0;
-#endif
-
-  unwind_backtrace (backtrace_helper, &arg);
-
-  /* _Unwind_Backtrace seems to put NULL address above
-     _start.  Fix it up here.  */
-  if (arg.cnt > 1 && arg.array[arg.cnt - 1] == NULL)
-    --arg.cnt;
-  return arg.cnt != -1 ? arg.cnt : 0;
-}
-weak_alias (__backtrace, backtrace)
-libc_hidden_def (__backtrace)
-
-
-#ifdef SHARED
-/* Free all resources if necessary.  */
-libc_freeres_fn (free_mem)
-{
-  unwind_backtrace = NULL;
-  if (libgcc_handle != NULL)
-    {
-      __libc_dlclose (libgcc_handle);
-      libgcc_handle = NULL;
-    }
-}
-#endif
diff --git a/sysdeps/x86_64/bsd-_setjmp.S b/sysdeps/x86_64/bsd-_setjmp.S
index 1a2a94f1a6..58c997de59 100644
--- a/sysdeps/x86_64/bsd-_setjmp.S
+++ b/sysdeps/x86_64/bsd-_setjmp.S
@@ -1,5 +1,5 @@
 /* BSD `_setjmp' entry point to `sigsetjmp (..., 0)'.  x86-64 version.
-   Copyright (C) 1994-2016 Free Software Foundation, Inc.
+   Copyright (C) 1994-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/bsd-setjmp.S b/sysdeps/x86_64/bsd-setjmp.S
index 11d9d8daa0..8e3c430dac 100644
--- a/sysdeps/x86_64/bsd-setjmp.S
+++ b/sysdeps/x86_64/bsd-setjmp.S
@@ -1,5 +1,5 @@
 /* BSD `setjmp' entry point to `sigsetjmp (..., 1)'.  x86-64 version.
-   Copyright (C) 1994-2016 Free Software Foundation, Inc.
+   Copyright (C) 1994-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/cacheinfo.c b/sysdeps/x86_64/cacheinfo.c
deleted file mode 100644
index 96463df064..0000000000
--- a/sysdeps/x86_64/cacheinfo.c
+++ /dev/null
@@ -1,665 +0,0 @@
-/* x86_64 cache info.
-   Copyright (C) 2003-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <assert.h>
-#include <stdbool.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <cpuid.h>
-#include <init-arch.h>
-
-#define is_intel GLRO(dl_x86_cpu_features).kind == arch_kind_intel
-#define is_amd GLRO(dl_x86_cpu_features).kind == arch_kind_amd
-#define max_cpuid GLRO(dl_x86_cpu_features).max_cpuid
-
-static const struct intel_02_cache_info
-{
-  unsigned char idx;
-  unsigned char assoc;
-  unsigned char linesize;
-  unsigned char rel_name;
-  unsigned int size;
-} intel_02_known [] =
-  {
-#define M(sc) ((sc) - _SC_LEVEL1_ICACHE_SIZE)
-    { 0x06,  4, 32, M(_SC_LEVEL1_ICACHE_SIZE),    8192 },
-    { 0x08,  4, 32, M(_SC_LEVEL1_ICACHE_SIZE),   16384 },
-    { 0x09,  4, 32, M(_SC_LEVEL1_ICACHE_SIZE),   32768 },
-    { 0x0a,  2, 32, M(_SC_LEVEL1_DCACHE_SIZE),    8192 },
-    { 0x0c,  4, 32, M(_SC_LEVEL1_DCACHE_SIZE),   16384 },
-    { 0x0d,  4, 64, M(_SC_LEVEL1_DCACHE_SIZE),   16384 },
-    { 0x0e,  6, 64, M(_SC_LEVEL1_DCACHE_SIZE),   24576 },
-    { 0x21,  8, 64, M(_SC_LEVEL2_CACHE_SIZE),   262144 },
-    { 0x22,  4, 64, M(_SC_LEVEL3_CACHE_SIZE),   524288 },
-    { 0x23,  8, 64, M(_SC_LEVEL3_CACHE_SIZE),  1048576 },
-    { 0x25,  8, 64, M(_SC_LEVEL3_CACHE_SIZE),  2097152 },
-    { 0x29,  8, 64, M(_SC_LEVEL3_CACHE_SIZE),  4194304 },
-    { 0x2c,  8, 64, M(_SC_LEVEL1_DCACHE_SIZE),   32768 },
-    { 0x30,  8, 64, M(_SC_LEVEL1_ICACHE_SIZE),   32768 },
-    { 0x39,  4, 64, M(_SC_LEVEL2_CACHE_SIZE),   131072 },
-    { 0x3a,  6, 64, M(_SC_LEVEL2_CACHE_SIZE),   196608 },
-    { 0x3b,  2, 64, M(_SC_LEVEL2_CACHE_SIZE),   131072 },
-    { 0x3c,  4, 64, M(_SC_LEVEL2_CACHE_SIZE),   262144 },
-    { 0x3d,  6, 64, M(_SC_LEVEL2_CACHE_SIZE),   393216 },
-    { 0x3e,  4, 64, M(_SC_LEVEL2_CACHE_SIZE),   524288 },
-    { 0x3f,  2, 64, M(_SC_LEVEL2_CACHE_SIZE),   262144 },
-    { 0x41,  4, 32, M(_SC_LEVEL2_CACHE_SIZE),   131072 },
-    { 0x42,  4, 32, M(_SC_LEVEL2_CACHE_SIZE),   262144 },
-    { 0x43,  4, 32, M(_SC_LEVEL2_CACHE_SIZE),   524288 },
-    { 0x44,  4, 32, M(_SC_LEVEL2_CACHE_SIZE),  1048576 },
-    { 0x45,  4, 32, M(_SC_LEVEL2_CACHE_SIZE),  2097152 },
-    { 0x46,  4, 64, M(_SC_LEVEL3_CACHE_SIZE),  4194304 },
-    { 0x47,  8, 64, M(_SC_LEVEL3_CACHE_SIZE),  8388608 },
-    { 0x48, 12, 64, M(_SC_LEVEL2_CACHE_SIZE),  3145728 },
-    { 0x49, 16, 64, M(_SC_LEVEL2_CACHE_SIZE),  4194304 },
-    { 0x4a, 12, 64, M(_SC_LEVEL3_CACHE_SIZE),  6291456 },
-    { 0x4b, 16, 64, M(_SC_LEVEL3_CACHE_SIZE),  8388608 },
-    { 0x4c, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 12582912 },
-    { 0x4d, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 16777216 },
-    { 0x4e, 24, 64, M(_SC_LEVEL2_CACHE_SIZE),  6291456 },
-    { 0x60,  8, 64, M(_SC_LEVEL1_DCACHE_SIZE),   16384 },
-    { 0x66,  4, 64, M(_SC_LEVEL1_DCACHE_SIZE),    8192 },
-    { 0x67,  4, 64, M(_SC_LEVEL1_DCACHE_SIZE),   16384 },
-    { 0x68,  4, 64, M(_SC_LEVEL1_DCACHE_SIZE),   32768 },
-    { 0x78,  8, 64, M(_SC_LEVEL2_CACHE_SIZE),  1048576 },
-    { 0x79,  8, 64, M(_SC_LEVEL2_CACHE_SIZE),   131072 },
-    { 0x7a,  8, 64, M(_SC_LEVEL2_CACHE_SIZE),   262144 },
-    { 0x7b,  8, 64, M(_SC_LEVEL2_CACHE_SIZE),   524288 },
-    { 0x7c,  8, 64, M(_SC_LEVEL2_CACHE_SIZE),  1048576 },
-    { 0x7d,  8, 64, M(_SC_LEVEL2_CACHE_SIZE),  2097152 },
-    { 0x7f,  2, 64, M(_SC_LEVEL2_CACHE_SIZE),   524288 },
-    { 0x80,  8, 64, M(_SC_LEVEL2_CACHE_SIZE),   524288 },
-    { 0x82,  8, 32, M(_SC_LEVEL2_CACHE_SIZE),   262144 },
-    { 0x83,  8, 32, M(_SC_LEVEL2_CACHE_SIZE),   524288 },
-    { 0x84,  8, 32, M(_SC_LEVEL2_CACHE_SIZE),  1048576 },
-    { 0x85,  8, 32, M(_SC_LEVEL2_CACHE_SIZE),  2097152 },
-    { 0x86,  4, 64, M(_SC_LEVEL2_CACHE_SIZE),   524288 },
-    { 0x87,  8, 64, M(_SC_LEVEL2_CACHE_SIZE),  1048576 },
-    { 0xd0,  4, 64, M(_SC_LEVEL3_CACHE_SIZE),   524288 },
-    { 0xd1,  4, 64, M(_SC_LEVEL3_CACHE_SIZE),  1048576 },
-    { 0xd2,  4, 64, M(_SC_LEVEL3_CACHE_SIZE),  2097152 },
-    { 0xd6,  8, 64, M(_SC_LEVEL3_CACHE_SIZE),  1048576 },
-    { 0xd7,  8, 64, M(_SC_LEVEL3_CACHE_SIZE),  2097152 },
-    { 0xd8,  8, 64, M(_SC_LEVEL3_CACHE_SIZE),  4194304 },
-    { 0xdc, 12, 64, M(_SC_LEVEL3_CACHE_SIZE),  2097152 },
-    { 0xdd, 12, 64, M(_SC_LEVEL3_CACHE_SIZE),  4194304 },
-    { 0xde, 12, 64, M(_SC_LEVEL3_CACHE_SIZE),  8388608 },
-    { 0xe2, 16, 64, M(_SC_LEVEL3_CACHE_SIZE),  2097152 },
-    { 0xe3, 16, 64, M(_SC_LEVEL3_CACHE_SIZE),  4194304 },
-    { 0xe4, 16, 64, M(_SC_LEVEL3_CACHE_SIZE),  8388608 },
-    { 0xea, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 12582912 },
-    { 0xeb, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 18874368 },
-    { 0xec, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 25165824 },
-  };
-
-#define nintel_02_known (sizeof (intel_02_known) / sizeof (intel_02_known [0]))
-
-static int
-intel_02_known_compare (const void *p1, const void *p2)
-{
-  const struct intel_02_cache_info *i1;
-  const struct intel_02_cache_info *i2;
-
-  i1 = (const struct intel_02_cache_info *) p1;
-  i2 = (const struct intel_02_cache_info *) p2;
-
-  if (i1->idx == i2->idx)
-    return 0;
-
-  return i1->idx < i2->idx ? -1 : 1;
-}
-
-
-static long int
-__attribute__ ((noinline))
-intel_check_word (int name, unsigned int value, bool *has_level_2,
-		  bool *no_level_2_or_3)
-{
-  if ((value & 0x80000000) != 0)
-    /* The register value is reserved.  */
-    return 0;
-
-  /* Fold the name.  The _SC_ constants are always in the order SIZE,
-     ASSOC, LINESIZE.  */
-  int folded_rel_name = (M(name) / 3) * 3;
-
-  while (value != 0)
-    {
-      unsigned int byte = value & 0xff;
-
-      if (byte == 0x40)
-	{
-	  *no_level_2_or_3 = true;
-
-	  if (folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
-	    /* No need to look further.  */
-	    break;
-	}
-      else if (byte == 0xff)
-	{
-	  /* CPUID leaf 0x4 contains all the information.  We need to
-	     iterate over it.  */
-	  unsigned int eax;
-	  unsigned int ebx;
-	  unsigned int ecx;
-	  unsigned int edx;
-
-	  unsigned int round = 0;
-	  while (1)
-	    {
-	      __cpuid_count (4, round, eax, ebx, ecx, edx);
-
-	      enum { null = 0, data = 1, inst = 2, uni = 3 } type = eax & 0x1f;
-	      if (type == null)
-		/* That was the end.  */
-		break;
-
-	      unsigned int level = (eax >> 5) & 0x7;
-
-	      if ((level == 1 && type == data
-		   && folded_rel_name == M(_SC_LEVEL1_DCACHE_SIZE))
-		  || (level == 1 && type == inst
-		      && folded_rel_name == M(_SC_LEVEL1_ICACHE_SIZE))
-		  || (level == 2 && folded_rel_name == M(_SC_LEVEL2_CACHE_SIZE))
-		  || (level == 3 && folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
-		  || (level == 4 && folded_rel_name == M(_SC_LEVEL4_CACHE_SIZE)))
-		{
-		  unsigned int offset = M(name) - folded_rel_name;
-
-		  if (offset == 0)
-		    /* Cache size.  */
-		    return (((ebx >> 22) + 1)
-			    * (((ebx >> 12) & 0x3ff) + 1)
-			    * ((ebx & 0xfff) + 1)
-			    * (ecx + 1));
-		  if (offset == 1)
-		    return (ebx >> 22) + 1;
-
-		  assert (offset == 2);
-		  return (ebx & 0xfff) + 1;
-		}
-
-	      ++round;
-	    }
-	  /* There is no other cache information anywhere else.  */
-	  break;
-	}
-      else
-	{
-	  if (byte == 0x49 && folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
-	    {
-	      /* Intel reused this value.  For family 15, model 6 it
-		 specifies the 3rd level cache.  Otherwise the 2nd
-		 level cache.  */
-	      unsigned int family = GLRO(dl_x86_cpu_features).family;
-	      unsigned int model = GLRO(dl_x86_cpu_features).model;
-
-	      if (family == 15 && model == 6)
-		{
-		  /* The level 3 cache is encoded for this model like
-		     the level 2 cache is for other models.  Pretend
-		     the caller asked for the level 2 cache.  */
-		  name = (_SC_LEVEL2_CACHE_SIZE
-			  + (name - _SC_LEVEL3_CACHE_SIZE));
-		  folded_rel_name = M(_SC_LEVEL2_CACHE_SIZE);
-		}
-	    }
-
-	  struct intel_02_cache_info *found;
-	  struct intel_02_cache_info search;
-
-	  search.idx = byte;
-	  found = bsearch (&search, intel_02_known, nintel_02_known,
-			   sizeof (intel_02_known[0]), intel_02_known_compare);
-	  if (found != NULL)
-	    {
-	      if (found->rel_name == folded_rel_name)
-		{
-		  unsigned int offset = M(name) - folded_rel_name;
-
-		  if (offset == 0)
-		    /* Cache size.  */
-		    return found->size;
-		  if (offset == 1)
-		    return found->assoc;
-
-		  assert (offset == 2);
-		  return found->linesize;
-		}
-
-	      if (found->rel_name == M(_SC_LEVEL2_CACHE_SIZE))
-		*has_level_2 = true;
-	    }
-	}
-
-      /* Next byte for the next round.  */
-      value >>= 8;
-    }
-
-  /* Nothing found.  */
-  return 0;
-}
-
-
-static long int __attribute__ ((noinline))
-handle_intel (int name, unsigned int maxidx)
-{
-  assert (maxidx >= 2);
-
-  /* OK, we can use the CPUID instruction to get all info about the
-     caches.  */
-  unsigned int cnt = 0;
-  unsigned int max = 1;
-  long int result = 0;
-  bool no_level_2_or_3 = false;
-  bool has_level_2 = false;
-
-  while (cnt++ < max)
-    {
-      unsigned int eax;
-      unsigned int ebx;
-      unsigned int ecx;
-      unsigned int edx;
-      __cpuid (2, eax, ebx, ecx, edx);
-
-      /* The low byte of EAX in the first round contain the number of
-	 rounds we have to make.  At least one, the one we are already
-	 doing.  */
-      if (cnt == 1)
-	{
-	  max = eax & 0xff;
-	  eax &= 0xffffff00;
-	}
-
-      /* Process the individual registers' value.  */
-      result = intel_check_word (name, eax, &has_level_2, &no_level_2_or_3);
-      if (result != 0)
-	return result;
-
-      result = intel_check_word (name, ebx, &has_level_2, &no_level_2_or_3);
-      if (result != 0)
-	return result;
-
-      result = intel_check_word (name, ecx, &has_level_2, &no_level_2_or_3);
-      if (result != 0)
-	return result;
-
-      result = intel_check_word (name, edx, &has_level_2, &no_level_2_or_3);
-      if (result != 0)
-	return result;
-    }
-
-  if (name >= _SC_LEVEL2_CACHE_SIZE && name <= _SC_LEVEL3_CACHE_LINESIZE
-      && no_level_2_or_3)
-    return -1;
-
-  return 0;
-}
-
-
-static long int __attribute__ ((noinline))
-handle_amd (int name)
-{
-  unsigned int eax;
-  unsigned int ebx;
-  unsigned int ecx;
-  unsigned int edx;
-  __cpuid (0x80000000, eax, ebx, ecx, edx);
-
-  /* No level 4 cache (yet).  */
-  if (name > _SC_LEVEL3_CACHE_LINESIZE)
-    return 0;
-
-  unsigned int fn = 0x80000005 + (name >= _SC_LEVEL2_CACHE_SIZE);
-  if (eax < fn)
-    return 0;
-
-  __cpuid (fn, eax, ebx, ecx, edx);
-
-  if (name < _SC_LEVEL1_DCACHE_SIZE)
-    {
-      name += _SC_LEVEL1_DCACHE_SIZE - _SC_LEVEL1_ICACHE_SIZE;
-      ecx = edx;
-    }
-
-  switch (name)
-    {
-    case _SC_LEVEL1_DCACHE_SIZE:
-      return (ecx >> 14) & 0x3fc00;
-
-    case _SC_LEVEL1_DCACHE_ASSOC:
-      ecx >>= 16;
-      if ((ecx & 0xff) == 0xff)
-	/* Fully associative.  */
-	return (ecx << 2) & 0x3fc00;
-      return ecx & 0xff;
-
-    case _SC_LEVEL1_DCACHE_LINESIZE:
-      return ecx & 0xff;
-
-    case _SC_LEVEL2_CACHE_SIZE:
-      return (ecx & 0xf000) == 0 ? 0 : (ecx >> 6) & 0x3fffc00;
-
-    case _SC_LEVEL2_CACHE_ASSOC:
-      switch ((ecx >> 12) & 0xf)
-	{
-	case 0:
-	case 1:
-	case 2:
-	case 4:
-	  return (ecx >> 12) & 0xf;
-	case 6:
-	  return 8;
-	case 8:
-	  return 16;
-	case 10:
-	  return 32;
-	case 11:
-	  return 48;
-	case 12:
-	  return 64;
-	case 13:
-	  return 96;
-	case 14:
-	  return 128;
-	case 15:
-	  return ((ecx >> 6) & 0x3fffc00) / (ecx & 0xff);
-	default:
-	  return 0;
-	}
-      /* NOTREACHED */
-
-    case _SC_LEVEL2_CACHE_LINESIZE:
-      return (ecx & 0xf000) == 0 ? 0 : ecx & 0xff;
-
-    case _SC_LEVEL3_CACHE_SIZE:
-      return (edx & 0xf000) == 0 ? 0 : (edx & 0x3ffc0000) << 1;
-
-    case _SC_LEVEL3_CACHE_ASSOC:
-      switch ((edx >> 12) & 0xf)
-	{
-	case 0:
-	case 1:
-	case 2:
-	case 4:
-	  return (edx >> 12) & 0xf;
-	case 6:
-	  return 8;
-	case 8:
-	  return 16;
-	case 10:
-	  return 32;
-	case 11:
-	  return 48;
-	case 12:
-	  return 64;
-	case 13:
-	  return 96;
-	case 14:
-	  return 128;
-	case 15:
-	  return ((edx & 0x3ffc0000) << 1) / (edx & 0xff);
-	default:
-	  return 0;
-	}
-      /* NOTREACHED */
-
-    case _SC_LEVEL3_CACHE_LINESIZE:
-      return (edx & 0xf000) == 0 ? 0 : edx & 0xff;
-
-    default:
-      assert (! "cannot happen");
-    }
-  return -1;
-}
-
-
-/* Get the value of the system variable NAME.  */
-long int
-attribute_hidden
-__cache_sysconf (int name)
-{
-  if (is_intel)
-    return handle_intel (name, max_cpuid);
-
-  if (is_amd)
-    return handle_amd (name);
-
-  // XXX Fill in more vendors.
-
-  /* CPU not known, we have no information.  */
-  return 0;
-}
-
-
-/* Data cache size for use in memory and string routines, typically
-   L1 size, rounded to multiple of 256 bytes.  */
-long int __x86_data_cache_size_half attribute_hidden = 32 * 1024 / 2;
-long int __x86_data_cache_size attribute_hidden = 32 * 1024;
-/* Similar to __x86_data_cache_size_half, but not rounded.  */
-long int __x86_raw_data_cache_size_half attribute_hidden = 32 * 1024 / 2;
-/* Similar to __x86_data_cache_size, but not rounded.  */
-long int __x86_raw_data_cache_size attribute_hidden = 32 * 1024;
-/* Shared cache size for use in memory and string routines, typically
-   L2 or L3 size, rounded to multiple of 256 bytes.  */
-long int __x86_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
-long int __x86_shared_cache_size attribute_hidden = 1024 * 1024;
-/* Similar to __x86_shared_cache_size_half, but not rounded.  */
-long int __x86_raw_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
-/* Similar to __x86_shared_cache_size, but not rounded.  */
-long int __x86_raw_shared_cache_size attribute_hidden = 1024 * 1024;
-
-#ifndef DISABLE_PREFETCHW
-/* PREFETCHW support flag for use in memory and string routines.  */
-int __x86_prefetchw attribute_hidden;
-#endif
-
-
-static void
-__attribute__((constructor))
-init_cacheinfo (void)
-{
-  /* Find out what brand of processor.  */
-  unsigned int eax;
-  unsigned int ebx;
-  unsigned int ecx;
-  unsigned int edx;
-  int max_cpuid_ex;
-  long int data = -1;
-  long int shared = -1;
-  unsigned int level;
-  unsigned int threads = 0;
-
-  if (is_intel)
-    {
-      data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, max_cpuid);
-
-      /* Try L3 first.  */
-      level  = 3;
-      shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, max_cpuid);
-
-      if (shared <= 0)
-	{
-	  /* Try L2 otherwise.  */
-	  level  = 2;
-	  shared = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid);
-	}
-
-      /* Figure out the number of logical threads that share the
-	 highest cache level.  */
-      if (max_cpuid >= 4)
-	{
-	  unsigned int family = GLRO(dl_x86_cpu_features).family;
-	  unsigned int model = GLRO(dl_x86_cpu_features).model;
-
-	  int i = 0;
-
-	  /* Query until desired cache level is enumerated.  */
-	  do
-	    {
-	      __cpuid_count (4, i++, eax, ebx, ecx, edx);
-
-	      /* There seems to be a bug in at least some Pentium Ds
-		 which sometimes fail to iterate all cache parameters.
-		 Do not loop indefinitely here, stop in this case and
-		 assume there is no such information.  */
-	      if ((eax & 0x1f) == 0)
-		goto intel_bug_no_cache_info;
-	    }
-	  while (((eax >> 5) & 0x7) != level);
-
-	  threads = (eax >> 14) & 0x3ff;
-
-	  /* If max_cpuid >= 11, THREADS is the maximum number of
-	      addressable IDs for logical processors sharing the
-	      cache, instead of the maximum number of threads
-	      sharing the cache.  */
-	  if (threads && max_cpuid >= 11)
-	    {
-	      /* Find the number of logical processors shipped in
-		 one core and apply count mask.  */
-	      i = 0;
-	      while (1)
-		{
-		  __cpuid_count (11, i++, eax, ebx, ecx, edx);
-
-		  int shipped = ebx & 0xff;
-		  int type = ecx & 0xff0;
-		  if (shipped == 0 || type == 0)
-		    break;
-		  else if (type == 0x200)
-		    {
-		      int count_mask;
-
-		      /* Compute count mask.  */
-		      asm ("bsr %1, %0"
-			   : "=r" (count_mask) : "g" (threads));
-		      count_mask = ~(-1 << (count_mask + 1));
-		      threads = (shipped - 1) & count_mask;
-		      break;
-		    }
-		}
-	    }
-	  threads += 1;
-	  if (threads > 2 && level == 2 && family == 6)
-	    {
-	      switch (model)
-		{
-		case 0x57:
-		  /* Knights Landing has L2 cache shared by 2 cores.  */
-		case 0x37:
-		case 0x4a:
-		case 0x4d:
-		case 0x5a:
-		case 0x5d:
-		  /* Silvermont has L2 cache shared by 2 cores.  */
-		  threads = 2;
-		  break;
-		default:
-		  break;
-		}
-	    }
-	}
-      else
-	{
-	intel_bug_no_cache_info:
-	  /* Assume that all logical threads share the highest cache level.  */
-
-	  threads
-	    = ((GLRO(dl_x86_cpu_features).cpuid[COMMON_CPUID_INDEX_1].ebx
-		>> 16) & 0xff);
-	}
-
-      /* Cap usage of highest cache level to the number of supported
-	 threads.  */
-      if (shared > 0 && threads > 0)
-	shared /= threads;
-    }
-  /* This spells out "AuthenticAMD".  */
-  else if (is_amd)
-    {
-      data   = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
-      long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
-      shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
-
-      /* Get maximum extended function. */
-      __cpuid (0x80000000, max_cpuid_ex, ebx, ecx, edx);
-
-      if (shared <= 0)
-	/* No shared L3 cache.  All we have is the L2 cache.  */
-	shared = core;
-      else
-	{
-	  /* Figure out the number of logical threads that share L3.  */
-	  if (max_cpuid_ex >= 0x80000008)
-	    {
-	      /* Get width of APIC ID.  */
-	      __cpuid (0x80000008, max_cpuid_ex, ebx, ecx, edx);
-	      threads = 1 << ((ecx >> 12) & 0x0f);
-	    }
-
-	  if (threads == 0)
-	    {
-	      /* If APIC ID width is not available, use logical
-		 processor count.  */
-	      __cpuid (0x00000001, max_cpuid_ex, ebx, ecx, edx);
-
-	      if ((edx & (1 << 28)) != 0)
-		threads = (ebx >> 16) & 0xff;
-	    }
-
-	  /* Cap usage of highest cache level to the number of
-	     supported threads.  */
-	  if (threads > 0)
-	    shared /= threads;
-
-	  /* Account for exclusive L2 and L3 caches.  */
-	  shared += core;
-	}
-
-#ifndef DISABLE_PREFETCHW
-      if (max_cpuid_ex >= 0x80000001)
-	{
-	  __cpuid (0x80000001, eax, ebx, ecx, edx);
-	  /*  PREFETCHW     || 3DNow!  */
-	  if ((ecx & 0x100) || (edx & 0x80000000))
-	    __x86_prefetchw = -1;
-	}
-#endif
-    }
-
-  if (data > 0)
-    {
-      __x86_raw_data_cache_size_half = data / 2;
-      __x86_raw_data_cache_size = data;
-      /* Round data cache size to multiple of 256 bytes.  */
-      data = data & ~255L;
-      __x86_data_cache_size_half = data / 2;
-      __x86_data_cache_size = data;
-    }
-
-  if (shared > 0)
-    {
-      __x86_raw_shared_cache_size_half = shared / 2;
-      __x86_raw_shared_cache_size = shared;
-      /* Round shared cache size to multiple of 256 bytes.  */
-      shared = shared & ~255L;
-      __x86_shared_cache_size_half = shared / 2;
-      __x86_shared_cache_size = shared;
-    }
-}
diff --git a/sysdeps/x86_64/configure b/sysdeps/x86_64/configure
index c72b9d3184..8674d14569 100644
--- a/sysdeps/x86_64/configure
+++ b/sysdeps/x86_64/configure
@@ -1,13 +1,12 @@
 # This file is generated from configure.ac by Autoconf.  DO NOT EDIT!
  # Local configure fragment for sysdeps/x86_64.
 
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX512 support in assembler" >&5
-$as_echo_n "checking for AVX512 support in assembler... " >&6; }
-if ${libc_cv_asm_avx512+:} false; then :
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX512DQ support in assembler" >&5
+$as_echo_n "checking for AVX512DQ support in assembler... " >&6; }
+if ${libc_cv_asm_avx512dq+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   cat > conftest.s <<\EOF
-        vmovdqu64 %zmm0, (%rsp)
         vandpd (%rax), %zmm6, %zmm1
 EOF
 if { ac_try='${CC-cc} -c $ASFLAGS conftest.s 1>&5'
@@ -16,16 +15,16 @@ if { ac_try='${CC-cc} -c $ASFLAGS conftest.s 1>&5'
   ac_status=$?
   $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
   test $ac_status = 0; }; }; then
-  libc_cv_asm_avx512=yes
+  libc_cv_asm_avx512dq=yes
 else
-  libc_cv_asm_avx512=no
+  libc_cv_asm_avx512dq=no
 fi
 rm -f conftest*
 fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_asm_avx512" >&5
-$as_echo "$libc_cv_asm_avx512" >&6; }
-if test $libc_cv_asm_avx512 == yes; then
-  $as_echo "#define HAVE_AVX512_ASM_SUPPORT 1" >>confdefs.h
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_asm_avx512dq" >&5
+$as_echo "$libc_cv_asm_avx512dq" >&6; }
+if test $libc_cv_asm_avx512dq = yes; then
+  $as_echo "#define HAVE_AVX512DQ_ASM_SUPPORT 1" >>confdefs.h
 
 fi
 
@@ -40,7 +39,7 @@ else
   ac_status=$?
   $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
   test $ac_status = 0; }; }; then :
-  libc_cv_cc_avx512=$libc_cv_asm_avx512
+  libc_cv_cc_avx512=$libc_cv_asm_avx512dq
 else
   libc_cv_cc_avx512=no
 fi
@@ -77,7 +76,7 @@ rm -f conftest*
 fi
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_asm_mpx" >&5
 $as_echo "$libc_cv_asm_mpx" >&6; }
-if test $libc_cv_asm_mpx == yes; then
+if test $libc_cv_asm_mpx = yes; then
   $as_echo "#define HAVE_MPX_SUPPORT 1" >>confdefs.h
 
 fi
@@ -86,6 +85,41 @@ if test x"$build_mathvec" = xnotset; then
   build_mathvec=yes
 fi
 
+if test "$static_pie" = yes; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for linker static PIE support" >&5
+$as_echo_n "checking for linker static PIE support... " >&6; }
+if ${libc_cv_ld_static_pie+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat > conftest.s <<\EOF
+	.text
+	.global _start
+	.weak foo
+_start:
+	leaq	foo(%rip), %rax
+EOF
+  libc_cv_pie_option="-Wl,-pie"
+  if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -nostartfiles -nostdlib $no_ssp $libc_cv_pie_option -o conftest conftest.s 1>&5'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then
+    libc_cv_ld_static_pie=yes
+  else
+    libc_cv_ld_static_pie=no
+  fi
+rm -f conftest*
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_ld_static_pie" >&5
+$as_echo "$libc_cv_ld_static_pie" >&6; }
+  if test "$libc_cv_ld_static_pie" != yes; then
+    as_fn_error $? "linker support for static PIE needed" "$LINENO" 5
+  fi
+fi
+
 $as_echo "#define PI_STATIC_AND_HIDDEN 1" >>confdefs.h
 
-# work around problem with autoconf and empty lines at the end of files
+
+test -n "$critic_missing" && as_fn_error $? "
+*** $critic_missing" "$LINENO" 5
diff --git a/sysdeps/x86_64/configure.ac b/sysdeps/x86_64/configure.ac
index 37b1059af3..b7d2c0124f 100644
--- a/sysdeps/x86_64/configure.ac
+++ b/sysdeps/x86_64/configure.ac
@@ -1,25 +1,24 @@
 GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory.
 # Local configure fragment for sysdeps/x86_64.
 
-dnl Check if asm supports AVX512.
-AC_CACHE_CHECK(for AVX512 support in assembler, libc_cv_asm_avx512, [dnl
+dnl Check if asm supports AVX512DQ.
+AC_CACHE_CHECK(for AVX512DQ support in assembler, libc_cv_asm_avx512dq, [dnl
 cat > conftest.s <<\EOF
-        vmovdqu64 %zmm0, (%rsp)
         vandpd (%rax), %zmm6, %zmm1
 EOF
 if AC_TRY_COMMAND(${CC-cc} -c $ASFLAGS conftest.s 1>&AS_MESSAGE_LOG_FD); then
-  libc_cv_asm_avx512=yes
+  libc_cv_asm_avx512dq=yes
 else
-  libc_cv_asm_avx512=no
+  libc_cv_asm_avx512dq=no
 fi
 rm -f conftest*])
-if test $libc_cv_asm_avx512 == yes; then
-  AC_DEFINE(HAVE_AVX512_ASM_SUPPORT)
+if test $libc_cv_asm_avx512dq = yes; then
+  AC_DEFINE(HAVE_AVX512DQ_ASM_SUPPORT)
 fi
 
 dnl Check if -mavx512f works.
 AC_CACHE_CHECK(for AVX512 support, libc_cv_cc_avx512, [dnl
-LIBC_TRY_CC_OPTION([-mavx512f], [libc_cv_cc_avx512=$libc_cv_asm_avx512], [libc_cv_cc_avx512=no])
+LIBC_TRY_CC_OPTION([-mavx512f], [libc_cv_cc_avx512=$libc_cv_asm_avx512dq], [libc_cv_cc_avx512=no])
 ])
 if test $libc_cv_cc_avx512 = yes; then
   AC_DEFINE(HAVE_AVX512_SUPPORT)
@@ -37,7 +36,7 @@ else
   libc_cv_asm_mpx=no
 fi
 rm -f conftest*])
-if test $libc_cv_asm_mpx == yes; then
+if test $libc_cv_asm_mpx = yes; then
   AC_DEFINE(HAVE_MPX_SUPPORT)
 fi
 
@@ -45,7 +44,34 @@ if test x"$build_mathvec" = xnotset; then
   build_mathvec=yes
 fi
 
+dnl Check if linker supports static PIE with the fix for
+dnl
+dnl https://sourceware.org/bugzilla/show_bug.cgi?id=21782
+dnl
+if test "$static_pie" = yes; then
+  AC_CACHE_CHECK(for linker static PIE support, libc_cv_ld_static_pie, [dnl
+cat > conftest.s <<\EOF
+	.text
+	.global _start
+	.weak foo
+_start:
+	leaq	foo(%rip), %rax
+EOF
+  libc_cv_pie_option="-Wl,-pie"
+  if AC_TRY_COMMAND(${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -nostartfiles -nostdlib $no_ssp $libc_cv_pie_option -o conftest conftest.s 1>&AS_MESSAGE_LOG_FD); then
+    libc_cv_ld_static_pie=yes
+  else
+    libc_cv_ld_static_pie=no
+  fi
+rm -f conftest*])
+  if test "$libc_cv_ld_static_pie" != yes; then
+    AC_MSG_ERROR([linker support for static PIE needed])
+  fi
+fi
+
 dnl It is always possible to access static and hidden symbols in an
 dnl position independent way.
 AC_DEFINE(PI_STATIC_AND_HIDDEN)
-# work around problem with autoconf and empty lines at the end of files
+
+test -n "$critic_missing" && AC_MSG_ERROR([
+*** $critic_missing])
diff --git a/sysdeps/x86_64/crti.S b/sysdeps/x86_64/crti.S
index a34525974a..067ac14884 100644
--- a/sysdeps/x86_64/crti.S
+++ b/sysdeps/x86_64/crti.S
@@ -1,5 +1,5 @@
 /* Special .init and .fini section support for x86-64.
-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+   Copyright (C) 2012-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -58,15 +58,17 @@
 	.section .init,"ax",@progbits
 	.p2align 2
 	.globl _init
+	.hidden	_init
 	.type _init, @function
 _init:
+	_CET_ENDBR
 	/* Maintain 16-byte stack alignment for called functions.  */
 	subq $8, %rsp
 #if PREINIT_FUNCTION_WEAK
 	movq PREINIT_FUNCTION@GOTPCREL(%rip), %rax
 	testq %rax, %rax
 	je .Lno_weak_fn
-	call PREINIT_FUNCTION@PLT
+	call *%rax
 .Lno_weak_fn:
 #else
 	call PREINIT_FUNCTION
@@ -75,6 +77,8 @@ _init:
 	.section .fini,"ax",@progbits
 	.p2align 2
 	.globl _fini
+	.hidden	_fini
 	.type _fini, @function
 _fini:
+	_CET_ENDBR
 	subq $8, %rsp
diff --git a/sysdeps/x86_64/crtn.S b/sysdeps/x86_64/crtn.S
index b2fa0c6765..2463d742fd 100644
--- a/sysdeps/x86_64/crtn.S
+++ b/sysdeps/x86_64/crtn.S
@@ -1,5 +1,5 @@
 /* Special .init and .fini section support for x86-64.
-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+   Copyright (C) 2012-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/dl-irel.h b/sysdeps/x86_64/dl-irel.h
index 80d7d1dd78..6ecc50fb42 100644
--- a/sysdeps/x86_64/dl-irel.h
+++ b/sysdeps/x86_64/dl-irel.h
@@ -1,6 +1,6 @@
 /* Machine-dependent ELF indirect relocation inline functions.
    x86-64 version.
-   Copyright (C) 2009-2016 Free Software Foundation, Inc.
+   Copyright (C) 2009-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/dl-lookupcfg.h b/sysdeps/x86_64/dl-lookupcfg.h
index 033b475889..5399cf25ab 100644
--- a/sysdeps/x86_64/dl-lookupcfg.h
+++ b/sysdeps/x86_64/dl-lookupcfg.h
@@ -1,5 +1,5 @@
 /* Configuration of lookup functions.
-   Copyright (C) 2005-2016 Free Software Foundation, Inc.
+   Copyright (C) 2005-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -26,7 +26,6 @@
 
 struct link_map;
 
-extern void _dl_unmap (struct link_map *map)
-  internal_function attribute_hidden;
+extern void _dl_unmap (struct link_map *map) attribute_hidden;
 
 #define DL_UNMAP(map) _dl_unmap (map)
diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h
index 980ca73cf2..1942ed5061 100644
--- a/sysdeps/x86_64/dl-machine.h
+++ b/sysdeps/x86_64/dl-machine.h
@@ -1,5 +1,5 @@
 /* Machine-dependent ELF dynamic relocation inline functions.  x86-64 version.
-   Copyright (C) 2001-2016 Free Software Foundation, Inc.
+   Copyright (C) 2001-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Andreas Jaeger <aj@suse.de>.
 
@@ -66,9 +66,9 @@ static inline int __attribute__ ((unused, always_inline))
 elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
 {
   Elf64_Addr *got;
-  extern void _dl_runtime_resolve_sse (ElfW(Word)) attribute_hidden;
-  extern void _dl_runtime_resolve_avx (ElfW(Word)) attribute_hidden;
-  extern void _dl_runtime_resolve_avx512 (ElfW(Word)) attribute_hidden;
+  extern void _dl_runtime_resolve_fxsave (ElfW(Word)) attribute_hidden;
+  extern void _dl_runtime_resolve_xsave (ElfW(Word)) attribute_hidden;
+  extern void _dl_runtime_resolve_xsavec (ElfW(Word)) attribute_hidden;
   extern void _dl_runtime_profile_sse (ElfW(Word)) attribute_hidden;
   extern void _dl_runtime_profile_avx (ElfW(Word)) attribute_hidden;
   extern void _dl_runtime_profile_avx512 (ElfW(Word)) attribute_hidden;
@@ -117,12 +117,14 @@ elf_machine_runtime_setup (struct link_map *l, int lazy, int profile)
 	  /* This function will get called to fix up the GOT entry
 	     indicated by the offset on the stack, and then jump to
 	     the resolved address.  */
-	  if (HAS_ARCH_FEATURE (AVX512F_Usable))
-	    *(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_resolve_avx512;
-	  else if (HAS_ARCH_FEATURE (AVX_Usable))
-	    *(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_resolve_avx;
+	  if (GLRO(dl_x86_cpu_features).xsave_state_size != 0)
+	    *(ElfW(Addr) *) (got + 2)
+	      = (HAS_ARCH_FEATURE (XSAVEC_Usable)
+		 ? (ElfW(Addr)) &_dl_runtime_resolve_xsavec
+		 : (ElfW(Addr)) &_dl_runtime_resolve_xsave);
 	  else
-	    *(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_resolve_sse;
+	    *(ElfW(Addr) *) (got + 2)
+	      = (ElfW(Addr)) &_dl_runtime_resolve_fxsave;
 	}
     }
 
@@ -220,15 +222,20 @@ _dl_start_user:\n\
 static inline void __attribute__ ((unused))
 dl_platform_init (void)
 {
+#if IS_IN (rtld)
+  /* init_cpu_features has been called early from __libc_start_main in
+     static executable.  */
+  init_cpu_features (&GLRO(dl_x86_cpu_features));
+#else
   if (GLRO(dl_platform) != NULL && *GLRO(dl_platform) == '\0')
     /* Avoid an empty string which would disturb us.  */
     GLRO(dl_platform) = NULL;
-
-  init_cpu_features (&GLRO(dl_x86_cpu_features));
+#endif
 }
 
 static inline ElfW(Addr)
 elf_machine_fixup_plt (struct link_map *map, lookup_t t,
+		       const ElfW(Sym) *refsym, const ElfW(Sym) *sym,
 		       const ElfW(Rela) *reloc,
 		       ElfW(Addr) *reloc_addr, ElfW(Addr) value)
 {
@@ -299,15 +306,29 @@ elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc,
       const ElfW(Sym) *const refsym = sym;
 # endif
       struct link_map *sym_map = RESOLVE_MAP (&sym, version, r_type);
-      ElfW(Addr) value = (sym == NULL ? 0
-			  : (ElfW(Addr)) sym_map->l_addr + sym->st_value);
+      ElfW(Addr) value = SYMBOL_ADDRESS (sym_map, sym, true);
 
       if (sym != NULL
-	  && __builtin_expect (ELFW(ST_TYPE) (sym->st_info) == STT_GNU_IFUNC,
-			       0)
-	  && __builtin_expect (sym->st_shndx != SHN_UNDEF, 1)
-	  && __builtin_expect (!skip_ifunc, 1))
-	value = ((ElfW(Addr) (*) (void)) value) ();
+	  && __glibc_unlikely (ELFW(ST_TYPE) (sym->st_info) == STT_GNU_IFUNC)
+	  && __glibc_likely (sym->st_shndx != SHN_UNDEF)
+	  && __glibc_likely (!skip_ifunc))
+	{
+# ifndef RTLD_BOOTSTRAP
+	  if (sym_map != map
+	      && sym_map->l_type != lt_executable
+	      && !sym_map->l_relocated)
+	    {
+	      const char *strtab
+		= (const char *) D_PTR (map, l_info[DT_STRTAB]);
+	      _dl_error_printf ("\
+%s: Relink `%s' with `%s' for IFUNC symbol `%s'\n",
+				RTLD_PROGNAME, map->l_name,
+				sym_map->l_name,
+				strtab + refsym->st_name);
+	    }
+# endif
+	  value = ((ElfW(Addr) (*) (void)) value) ();
+	}
 
       switch (r_type)
 	{
@@ -477,8 +498,8 @@ elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc,
 	    break;
 	  memcpy (reloc_addr_arg, (void *) value,
 		  MIN (sym->st_size, refsym->st_size));
-	  if (__builtin_expect (sym->st_size > refsym->st_size, 0)
-	      || (__builtin_expect (sym->st_size < refsym->st_size, 0)
+	  if (__glibc_unlikely (sym->st_size > refsym->st_size)
+	      || (__glibc_unlikely (sym->st_size < refsym->st_size)
 		  && GLRO(dl_verbose)))
 	    {
 	      fmt = "\
@@ -531,7 +552,8 @@ elf_machine_lazy_rel (struct link_map *map,
   /* Check for unexpected PLT reloc type.  */
   if (__glibc_likely (r_type == R_X86_64_JUMP_SLOT))
     {
-      if (__builtin_expect (map->l_mach.plt, 0) == 0)
+      /* Prelink has been deprecated.  */
+      if (__glibc_likely (map->l_mach.plt == 0))
 	*reloc_addr += l_addr;
       else
 	*reloc_addr =
diff --git a/sysdeps/x86_64/dl-procinfo.c b/sysdeps/x86_64/dl-procinfo.c
index 4625695dfb..269ce7e87d 100644
--- a/sysdeps/x86_64/dl-procinfo.c
+++ b/sysdeps/x86_64/dl-procinfo.c
@@ -1,5 +1,5 @@
 /* Data for x86-64 version of processor capability information.
-   Copyright (C) 2015-2016 Free Software Foundation, Inc.
+   Copyright (C) 2015-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -39,19 +39,7 @@
 # define PROCINFO_CLASS
 #endif
 
-#if !defined PROCINFO_DECL && defined SHARED
-  ._dl_x86_cpu_features
-#else
-PROCINFO_CLASS struct cpu_features _dl_x86_cpu_features
-#endif
-#ifndef PROCINFO_DECL
-= { }
-#endif
-#if !defined SHARED || defined PROCINFO_DECL
-;
-#else
-,
-#endif
+#include <sysdeps/x86/dl-procinfo.c>
 
 #undef PROCINFO_DECL
 #undef PROCINFO_CLASS
diff --git a/sysdeps/x86_64/dl-tls.c b/sysdeps/x86_64/dl-tls.c
new file mode 100644
index 0000000000..533ee2b3a6
--- /dev/null
+++ b/sysdeps/x86_64/dl-tls.c
@@ -0,0 +1,53 @@
+/* Thread-local storage handling in the ELF dynamic linker.  x86-64 version.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifdef SHARED
+/* Work around GCC PR58066, due to which __tls_get_addr may be called
+   with an unaligned stack.  The compat implementation is in
+   tls_get_addr-compat.S.  */
+
+# include <dl-tls.h>
+
+/* Define __tls_get_addr within elf/dl-tls.c under a different
+   name.  */
+extern __typeof__ (__tls_get_addr) ___tls_get_addr;
+
+# define __tls_get_addr ___tls_get_addr
+# include <elf/dl-tls.c>
+# undef __tls_get_addr
+
+hidden_ver (___tls_get_addr, __tls_get_addr)
+
+/* Only handle slow paths for __tls_get_addr.  */
+attribute_hidden
+void *
+__tls_get_addr_slow (GET_ADDR_ARGS)
+{
+  dtv_t *dtv = THREAD_DTV ();
+
+  if (__glibc_unlikely (dtv[0].counter != GL(dl_tls_generation)))
+    return update_get_addr (GET_ADDR_PARAM);
+
+  return tls_get_addr_tail (GET_ADDR_PARAM, dtv, NULL);
+}
+#else
+
+/* No compatibility symbol needed.  */
+# include <elf/dl-tls.c>
+
+#endif
diff --git a/sysdeps/x86_64/dl-tls.h b/sysdeps/x86_64/dl-tls.h
index 0f101e6ac6..bc18e70b23 100644
--- a/sysdeps/x86_64/dl-tls.h
+++ b/sysdeps/x86_64/dl-tls.h
@@ -1,5 +1,5 @@
 /* Thread-local storage handling in the ELF dynamic linker.  x86-64 version.
-   Copyright (C) 2002-2016 Free Software Foundation, Inc.
+   Copyright (C) 2002-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -16,6 +16,9 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
+#ifndef _X86_64_DL_TLS_H
+#define _X86_64_DL_TLS_H
+
 #include <stdint.h>
 
 /* Type used for the representation of TLS information in the GOT.  */
@@ -28,5 +31,4 @@ typedef struct dl_tls_index
 
 extern void *__tls_get_addr (tls_index *ti);
 
-/* Value used for dtv entries for which the allocation is delayed.  */
-#define TLS_DTV_UNALLOCATED	((void *) -1l)
+#endif /* _X86_64_DL_TLS_H */
diff --git a/sysdeps/x86_64/dl-tlsdesc.S b/sysdeps/x86_64/dl-tlsdesc.S
index 3cb7c3d031..80d771cd88 100644
--- a/sysdeps/x86_64/dl-tlsdesc.S
+++ b/sysdeps/x86_64/dl-tlsdesc.S
@@ -1,5 +1,5 @@
 /* Thread-local storage handling in the ELF dynamic linker.  x86_64 version.
-   Copyright (C) 2004-2016 Free Software Foundation, Inc.
+   Copyright (C) 2004-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -37,6 +37,7 @@
 	cfi_startproc
 	.align 16
 _dl_tlsdesc_return:
+	_CET_ENDBR
 	movq	8(%rax), %rax
 	ret
 	cfi_endproc
@@ -58,6 +59,7 @@ _dl_tlsdesc_return:
 	cfi_startproc
 	.align 16
 _dl_tlsdesc_undefweak:
+	_CET_ENDBR
 	movq	8(%rax), %rax
 	subq	%fs:0, %rax
 	ret
@@ -96,6 +98,7 @@ _dl_tlsdesc_dynamic (register struct tlsdesc *tdp asm ("%rax"))
 	cfi_startproc
 	.align 16
 _dl_tlsdesc_dynamic:
+	_CET_ENDBR
 	/* Preserve call-clobbered registers that we modify.
 	   We need two scratch regs anyway.  */
 	movq	%rsi, -16(%rsp)
@@ -128,7 +131,11 @@ _dl_tlsdesc_dynamic:
 	movq	%r10, 40(%rsp)
 	movq	%r11, 48(%rsp)
 	/* %rdi already points to the tlsinfo data structure.  */
+#ifdef NO_RTLD_HIDDEN
+	call	JUMPTARGET (__tls_get_addr)
+#else
 	call	HIDDEN_JUMPTARGET (__tls_get_addr)
+#endif
 	movq	8(%rsp), %rdx
 	movq	16(%rsp), %rcx
 	movq	24(%rsp), %r8
@@ -162,15 +169,17 @@ _dl_tlsdesc_dynamic:
 	.align 16
 	/* The PLT entry will have pushed the link_map pointer.  */
 _dl_tlsdesc_resolve_rela:
+	_CET_ENDBR
 	cfi_adjust_cfa_offset (8)
-	/* Save all call-clobbered registers.  */
-	subq	$72, %rsp
-	cfi_adjust_cfa_offset (72)
+	/* Save all call-clobbered registers.  Add 8 bytes for push in
+	   the PLT entry to align the stack.  */
+	subq	$80, %rsp
+	cfi_adjust_cfa_offset (80)
 	movq	%rax, (%rsp)
 	movq	%rdi, 8(%rsp)
 	movq	%rax, %rdi	/* Pass tlsdesc* in %rdi.  */
 	movq	%rsi, 16(%rsp)
-	movq	72(%rsp), %rsi	/* Pass link_map* in %rsi.  */
+	movq	80(%rsp), %rsi	/* Pass link_map* in %rsi.  */
 	movq	%r8, 24(%rsp)
 	movq	%r9, 32(%rsp)
 	movq	%r10, 40(%rsp)
@@ -187,8 +196,8 @@ _dl_tlsdesc_resolve_rela:
 	movq	48(%rsp), %r11
 	movq	56(%rsp), %rdx
 	movq	64(%rsp), %rcx
-	addq	$80, %rsp
-	cfi_adjust_cfa_offset (-80)
+	addq	$88, %rsp
+	cfi_adjust_cfa_offset (-88)
 	jmp	*(%rax)
 	cfi_endproc
 	.size	_dl_tlsdesc_resolve_rela, .-_dl_tlsdesc_resolve_rela
@@ -211,6 +220,7 @@ _dl_tlsdesc_resolve_rela:
 	.align 16
 _dl_tlsdesc_resolve_hold:
 0:
+	_CET_ENDBR
 	/* Save all call-clobbered registers.  */
 	subq	$72, %rsp
 	cfi_adjust_cfa_offset (72)
diff --git a/sysdeps/x86_64/dl-tlsdesc.h b/sysdeps/x86_64/dl-tlsdesc.h
index 11e1a50b8f..66e659bb5c 100644
--- a/sysdeps/x86_64/dl-tlsdesc.h
+++ b/sysdeps/x86_64/dl-tlsdesc.h
@@ -1,6 +1,6 @@
 /* Thread-local storage descriptor handling in the ELF dynamic linker.
    x86_64 version.
-   Copyright (C) 2005-2016 Free Software Foundation, Inc.
+   Copyright (C) 2005-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -62,7 +62,7 @@ extern ptrdiff_t attribute_hidden
 # ifdef SHARED
 extern void *_dl_make_tlsdesc_dynamic (struct link_map *map,
 				       size_t ti_offset)
-  internal_function attribute_hidden;
+  attribute_hidden;
 
 extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic(struct tlsdesc *);
 # endif
diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S
index 9fb6b13983..ef1425cbb9 100644
--- a/sysdeps/x86_64/dl-trampoline.S
+++ b/sysdeps/x86_64/dl-trampoline.S
@@ -1,5 +1,5 @@
 /* PLT trampolines.  x86-64 version.
-   Copyright (C) 2004-2016 Free Software Foundation, Inc.
+   Copyright (C) 2004-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -18,6 +18,7 @@
 
 #include <config.h>
 #include <sysdep.h>
+#include <cpu-features.h>
 #include <link-defines.h>
 
 #ifndef DL_STACK_ALIGNMENT
@@ -33,37 +34,24 @@
 # define DL_STACK_ALIGNMENT 8
 #endif
 
-#ifndef DL_RUNIME_UNALIGNED_VEC_SIZE
-/* The maximum size of unaligned vector load and store.  */
-# define DL_RUNIME_UNALIGNED_VEC_SIZE 16
-#endif
-
-/* True if _dl_runtime_resolve should align stack to VEC_SIZE bytes.  */
-#define DL_RUNIME_RESOLVE_REALIGN_STACK \
-  (VEC_SIZE > DL_STACK_ALIGNMENT \
-   && VEC_SIZE > DL_RUNIME_UNALIGNED_VEC_SIZE)
-
-/* Align vector register save area to 16 bytes.  */
-#define REGISTER_SAVE_VEC_OFF	0
+/* True if _dl_runtime_resolve should align stack for STATE_SAVE or align
+   stack to 16 bytes before calling _dl_fixup.  */
+#define DL_RUNTIME_RESOLVE_REALIGN_STACK \
+  (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \
+   || 16 > DL_STACK_ALIGNMENT)
 
 /* Area on stack to save and restore registers used for parameter
    passing when calling _dl_fixup.  */
 #ifdef __ILP32__
-# define REGISTER_SAVE_RAX	(REGISTER_SAVE_VEC_OFF + VEC_SIZE * 8)
 # define PRESERVE_BND_REGS_PREFIX
 #else
-/* Align bound register save area to 16 bytes.  */
-# define REGISTER_SAVE_BND0	(REGISTER_SAVE_VEC_OFF + VEC_SIZE * 8)
-# define REGISTER_SAVE_BND1	(REGISTER_SAVE_BND0 + 16)
-# define REGISTER_SAVE_BND2	(REGISTER_SAVE_BND1 + 16)
-# define REGISTER_SAVE_BND3	(REGISTER_SAVE_BND2 + 16)
-# define REGISTER_SAVE_RAX	(REGISTER_SAVE_BND3 + 16)
 # ifdef HAVE_MPX_SUPPORT
 #  define PRESERVE_BND_REGS_PREFIX bnd
 # else
 #  define PRESERVE_BND_REGS_PREFIX .byte 0xf2
 # endif
 #endif
+#define REGISTER_SAVE_RAX	0
 #define REGISTER_SAVE_RCX	(REGISTER_SAVE_RAX + 8)
 #define REGISTER_SAVE_RDX	(REGISTER_SAVE_RCX + 8)
 #define REGISTER_SAVE_RSI	(REGISTER_SAVE_RDX + 8)
@@ -73,59 +61,58 @@
 
 #define RESTORE_AVX
 
-#ifdef HAVE_AVX512_ASM_SUPPORT
-# define VEC_SIZE		64
-# define VMOVA			vmovdqa64
-# if DL_RUNIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT
-#  define VMOV			vmovdqa64
-# else
-#  define VMOV			vmovdqu64
-# endif
-# define VEC(i)			zmm##i
-# define _dl_runtime_resolve	_dl_runtime_resolve_avx512
-# define _dl_runtime_profile	_dl_runtime_profile_avx512
-# include "dl-trampoline.h"
-# undef _dl_runtime_resolve
-# undef _dl_runtime_profile
-# undef VEC
-# undef VMOV
-# undef VMOVA
-# undef VEC_SIZE
-#else
-strong_alias (_dl_runtime_resolve_avx, _dl_runtime_resolve_avx512)
-	.hidden _dl_runtime_resolve_avx512
-strong_alias (_dl_runtime_profile_avx, _dl_runtime_profile_avx512)
-	.hidden _dl_runtime_profile_avx512
-#endif
+#define VEC_SIZE		64
+#define VMOVA			vmovdqa64
+#define VEC(i)			zmm##i
+#define _dl_runtime_profile	_dl_runtime_profile_avx512
+#include "dl-trampoline.h"
+#undef _dl_runtime_profile
+#undef VEC
+#undef VMOVA
+#undef VEC_SIZE
 
 #define VEC_SIZE		32
 #define VMOVA			vmovdqa
-#if DL_RUNIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT
-# define VMOV			vmovdqa
-#else
-# define VMOV			vmovdqu
-#endif
 #define VEC(i)			ymm##i
-#define _dl_runtime_resolve	_dl_runtime_resolve_avx
 #define _dl_runtime_profile	_dl_runtime_profile_avx
 #include "dl-trampoline.h"
-#undef _dl_runtime_resolve
 #undef _dl_runtime_profile
 #undef VEC
-#undef VMOV
 #undef VMOVA
 #undef VEC_SIZE
 
 /* movaps/movups is 1-byte shorter.  */
 #define VEC_SIZE		16
 #define VMOVA			movaps
-#if DL_RUNIME_RESOLVE_REALIGN_STACK || VEC_SIZE <= DL_STACK_ALIGNMENT
-# define VMOV			movaps
-#else
-# define VMOV			movups
-#endif
 #define VEC(i)			xmm##i
-#define _dl_runtime_resolve	_dl_runtime_resolve_sse
 #define _dl_runtime_profile	_dl_runtime_profile_sse
 #undef RESTORE_AVX
 #include "dl-trampoline.h"
+#undef _dl_runtime_profile
+#undef VEC
+#undef VMOVA
+#undef VEC_SIZE
+
+#define USE_FXSAVE
+#define STATE_SAVE_ALIGNMENT	16
+#define _dl_runtime_resolve	_dl_runtime_resolve_fxsave
+#include "dl-trampoline.h"
+#undef _dl_runtime_resolve
+#undef USE_FXSAVE
+#undef STATE_SAVE_ALIGNMENT
+
+#define USE_XSAVE
+#define STATE_SAVE_ALIGNMENT	64
+#define _dl_runtime_resolve	_dl_runtime_resolve_xsave
+#include "dl-trampoline.h"
+#undef _dl_runtime_resolve
+#undef USE_XSAVE
+#undef STATE_SAVE_ALIGNMENT
+
+#define USE_XSAVEC
+#define STATE_SAVE_ALIGNMENT	64
+#define _dl_runtime_resolve	_dl_runtime_resolve_xsavec
+#include "dl-trampoline.h"
+#undef _dl_runtime_resolve
+#undef USE_XSAVEC
+#undef STATE_SAVE_ALIGNMENT
diff --git a/sysdeps/x86_64/dl-trampoline.h b/sysdeps/x86_64/dl-trampoline.h
index f4191833ab..a28b1e73a4 100644
--- a/sysdeps/x86_64/dl-trampoline.h
+++ b/sysdeps/x86_64/dl-trampoline.h
@@ -1,5 +1,5 @@
 /* PLT trampolines.  x86-64 version.
-   Copyright (C) 2009-2016 Free Software Foundation, Inc.
+   Copyright (C) 2009-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -16,40 +16,47 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#undef REGISTER_SAVE_AREA_RAW
-#ifdef __ILP32__
-/* X32 saves RCX, RDX, RSI, RDI, R8 and R9 plus RAX as well as VEC0 to
-   VEC7.  */
-# define REGISTER_SAVE_AREA_RAW	(8 * 7 + VEC_SIZE * 8)
-#else
-/* X86-64 saves RCX, RDX, RSI, RDI, R8 and R9 plus RAX as well as
-   BND0, BND1, BND2, BND3 and VEC0 to VEC7. */
-# define REGISTER_SAVE_AREA_RAW	(8 * 7 + 16 * 4 + VEC_SIZE * 8)
-#endif
+	.text
+#ifdef _dl_runtime_resolve
 
-#undef REGISTER_SAVE_AREA
-#undef LOCAL_STORAGE_AREA
-#undef BASE
-#if DL_RUNIME_RESOLVE_REALIGN_STACK
-# define REGISTER_SAVE_AREA	(REGISTER_SAVE_AREA_RAW + 8)
-/* Local stack area before jumping to function address: RBX.  */
-# define LOCAL_STORAGE_AREA	8
-# define BASE			rbx
-# if (REGISTER_SAVE_AREA % VEC_SIZE) != 0
-#  error REGISTER_SAVE_AREA must be multples of VEC_SIZE
+# undef REGISTER_SAVE_AREA
+# undef LOCAL_STORAGE_AREA
+# undef BASE
+
+# if (STATE_SAVE_ALIGNMENT % 16) != 0
+#  error STATE_SAVE_ALIGNMENT must be multples of 16
+# endif
+
+# if (STATE_SAVE_OFFSET % STATE_SAVE_ALIGNMENT) != 0
+#  error STATE_SAVE_OFFSET must be multples of STATE_SAVE_ALIGNMENT
 # endif
-#else
-# define REGISTER_SAVE_AREA	REGISTER_SAVE_AREA_RAW
+
+# if DL_RUNTIME_RESOLVE_REALIGN_STACK
+/* Local stack area before jumping to function address: RBX.  */
+#  define LOCAL_STORAGE_AREA	8
+#  define BASE			rbx
+#  ifdef USE_FXSAVE
+/* Use fxsave to save XMM registers.  */
+#   define REGISTER_SAVE_AREA	(512 + STATE_SAVE_OFFSET)
+#   if (REGISTER_SAVE_AREA % 16) != 0
+#    error REGISTER_SAVE_AREA must be multples of 16
+#   endif
+#  endif
+# else
+#  ifndef USE_FXSAVE
+#   error USE_FXSAVE must be defined
+#  endif
+/* Use fxsave to save XMM registers.  */
+#  define REGISTER_SAVE_AREA	(512 + STATE_SAVE_OFFSET + 8)
 /* Local stack area before jumping to function address:  All saved
    registers.  */
-# define LOCAL_STORAGE_AREA	REGISTER_SAVE_AREA
-# define BASE			rsp
-# if (REGISTER_SAVE_AREA % 16) != 8
-#  error REGISTER_SAVE_AREA must be odd multples of 8
+#  define LOCAL_STORAGE_AREA	REGISTER_SAVE_AREA
+#  define BASE			rsp
+#  if (REGISTER_SAVE_AREA % 16) != 8
+#   error REGISTER_SAVE_AREA must be odd multples of 8
+#  endif
 # endif
-#endif
 
-	.text
 	.globl _dl_runtime_resolve
 	.hidden _dl_runtime_resolve
 	.type _dl_runtime_resolve, @function
@@ -57,19 +64,31 @@
 	cfi_startproc
 _dl_runtime_resolve:
 	cfi_adjust_cfa_offset(16) # Incorporate PLT
-#if DL_RUNIME_RESOLVE_REALIGN_STACK
-# if LOCAL_STORAGE_AREA != 8
-#  error LOCAL_STORAGE_AREA must be 8
-# endif
+	_CET_ENDBR
+# if DL_RUNTIME_RESOLVE_REALIGN_STACK
+#  if LOCAL_STORAGE_AREA != 8
+#   error LOCAL_STORAGE_AREA must be 8
+#  endif
 	pushq %rbx			# push subtracts stack by 8.
 	cfi_adjust_cfa_offset(8)
 	cfi_rel_offset(%rbx, 0)
 	mov %RSP_LP, %RBX_LP
 	cfi_def_cfa_register(%rbx)
-	and $-VEC_SIZE, %RSP_LP
-#endif
+	and $-STATE_SAVE_ALIGNMENT, %RSP_LP
+# endif
+# ifdef REGISTER_SAVE_AREA
 	sub $REGISTER_SAVE_AREA, %RSP_LP
+#  if !DL_RUNTIME_RESOLVE_REALIGN_STACK
 	cfi_adjust_cfa_offset(REGISTER_SAVE_AREA)
+#  endif
+# else
+	# Allocate stack space of the required size to save the state.
+#  if IS_IN (rtld)
+	sub _rtld_local_ro+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_SIZE_OFFSET(%rip), %RSP_LP
+#  else
+	sub _dl_x86_cpu_features+XSAVE_STATE_SIZE_OFFSET(%rip), %RSP_LP
+#  endif
+# endif
 	# Preserve registers otherwise clobbered.
 	movq %rax, REGISTER_SAVE_RAX(%rsp)
 	movq %rcx, REGISTER_SAVE_RCX(%rsp)
@@ -78,59 +97,42 @@ _dl_runtime_resolve:
 	movq %rdi, REGISTER_SAVE_RDI(%rsp)
 	movq %r8, REGISTER_SAVE_R8(%rsp)
 	movq %r9, REGISTER_SAVE_R9(%rsp)
-	VMOV %VEC(0), (REGISTER_SAVE_VEC_OFF)(%rsp)
-	VMOV %VEC(1), (REGISTER_SAVE_VEC_OFF + VEC_SIZE)(%rsp)
-	VMOV %VEC(2), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 2)(%rsp)
-	VMOV %VEC(3), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 3)(%rsp)
-	VMOV %VEC(4), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 4)(%rsp)
-	VMOV %VEC(5), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 5)(%rsp)
-	VMOV %VEC(6), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 6)(%rsp)
-	VMOV %VEC(7), (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 7)(%rsp)
-#ifndef __ILP32__
-	# We also have to preserve bound registers.  These are nops if
-	# Intel MPX isn't available or disabled.
-# ifdef HAVE_MPX_SUPPORT
-	bndmov %bnd0, REGISTER_SAVE_BND0(%rsp)
-	bndmov %bnd1, REGISTER_SAVE_BND1(%rsp)
-	bndmov %bnd2, REGISTER_SAVE_BND2(%rsp)
-	bndmov %bnd3, REGISTER_SAVE_BND3(%rsp)
+# ifdef USE_FXSAVE
+	fxsave STATE_SAVE_OFFSET(%rsp)
 # else
-#  if REGISTER_SAVE_BND0 == 0
-	.byte 0x66,0x0f,0x1b,0x04,0x24
+	movl $STATE_SAVE_MASK, %eax
+	xorl %edx, %edx
+	# Clear the XSAVE Header.
+#  ifdef USE_XSAVE
+	movq %rdx, (STATE_SAVE_OFFSET + 512)(%rsp)
+	movq %rdx, (STATE_SAVE_OFFSET + 512 + 8)(%rsp)
+#  endif
+	movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 2)(%rsp)
+	movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 3)(%rsp)
+	movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 4)(%rsp)
+	movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 5)(%rsp)
+	movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 6)(%rsp)
+	movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 7)(%rsp)
+#  ifdef USE_XSAVE
+	xsave STATE_SAVE_OFFSET(%rsp)
 #  else
-	.byte 0x66,0x0f,0x1b,0x44,0x24,REGISTER_SAVE_BND0
+	xsavec STATE_SAVE_OFFSET(%rsp)
 #  endif
-	.byte 0x66,0x0f,0x1b,0x4c,0x24,REGISTER_SAVE_BND1
-	.byte 0x66,0x0f,0x1b,0x54,0x24,REGISTER_SAVE_BND2
-	.byte 0x66,0x0f,0x1b,0x5c,0x24,REGISTER_SAVE_BND3
 # endif
-#endif
 	# Copy args pushed by PLT in register.
 	# %rdi: link_map, %rsi: reloc_index
 	mov (LOCAL_STORAGE_AREA + 8)(%BASE), %RSI_LP
 	mov LOCAL_STORAGE_AREA(%BASE), %RDI_LP
 	call _dl_fixup		# Call resolver.
 	mov %RAX_LP, %R11_LP	# Save return value
-#ifndef __ILP32__
-	# Restore bound registers.  These are nops if Intel MPX isn't
-	# avaiable or disabled.
-# ifdef HAVE_MPX_SUPPORT
-	bndmov REGISTER_SAVE_BND3(%rsp), %bnd3
-	bndmov REGISTER_SAVE_BND2(%rsp), %bnd2
-	bndmov REGISTER_SAVE_BND1(%rsp), %bnd1
-	bndmov REGISTER_SAVE_BND0(%rsp), %bnd0
+	# Get register content back.
+# ifdef USE_FXSAVE
+	fxrstor STATE_SAVE_OFFSET(%rsp)
 # else
-	.byte 0x66,0x0f,0x1a,0x5c,0x24,REGISTER_SAVE_BND3
-	.byte 0x66,0x0f,0x1a,0x54,0x24,REGISTER_SAVE_BND2
-	.byte 0x66,0x0f,0x1a,0x4c,0x24,REGISTER_SAVE_BND1
-#  if REGISTER_SAVE_BND0 == 0
-	.byte 0x66,0x0f,0x1a,0x04,0x24
-#  else
-	.byte 0x66,0x0f,0x1a,0x44,0x24,REGISTER_SAVE_BND0
-#  endif
+	movl $STATE_SAVE_MASK, %eax
+	xorl %edx, %edx
+	xrstor STATE_SAVE_OFFSET(%rsp)
 # endif
-#endif
-	# Get register content back.
 	movq REGISTER_SAVE_R9(%rsp), %r9
 	movq REGISTER_SAVE_R8(%rsp), %r8
 	movq REGISTER_SAVE_RDI(%rsp), %rdi
@@ -138,20 +140,12 @@ _dl_runtime_resolve:
 	movq REGISTER_SAVE_RDX(%rsp), %rdx
 	movq REGISTER_SAVE_RCX(%rsp), %rcx
 	movq REGISTER_SAVE_RAX(%rsp), %rax
-	VMOV (REGISTER_SAVE_VEC_OFF)(%rsp), %VEC(0)
-	VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE)(%rsp), %VEC(1)
-	VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 2)(%rsp), %VEC(2)
-	VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 3)(%rsp), %VEC(3)
-	VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 4)(%rsp), %VEC(4)
-	VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 5)(%rsp), %VEC(5)
-	VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 6)(%rsp), %VEC(6)
-	VMOV (REGISTER_SAVE_VEC_OFF + VEC_SIZE * 7)(%rsp), %VEC(7)
-#if DL_RUNIME_RESOLVE_REALIGN_STACK
+# if DL_RUNTIME_RESOLVE_REALIGN_STACK
 	mov %RBX_LP, %RSP_LP
 	cfi_def_cfa_register(%rsp)
 	movq (%rsp), %rbx
 	cfi_restore(%rbx)
-#endif
+# endif
 	# Adjust stack(PLT did 2 pushes)
 	add $(LOCAL_STORAGE_AREA + 16), %RSP_LP
 	cfi_adjust_cfa_offset(-(LOCAL_STORAGE_AREA + 16))
@@ -160,9 +154,10 @@ _dl_runtime_resolve:
 	jmp *%r11		# Jump to function address.
 	cfi_endproc
 	.size _dl_runtime_resolve, .-_dl_runtime_resolve
+#endif
 
 
-#ifndef PROF
+#if !defined PROF && defined _dl_runtime_profile
 # if (LR_VECTOR_OFFSET % VEC_SIZE) != 0
 #  error LR_VECTOR_OFFSET must be multples of VEC_SIZE
 # endif
@@ -174,6 +169,7 @@ _dl_runtime_resolve:
 _dl_runtime_profile:
 	cfi_startproc
 	cfi_adjust_cfa_offset(16) # Incorporate PLT
+	_CET_ENDBR
 	/* The La_x86_64_regs data structure pointed to by the
 	   fourth paramater must be VEC_SIZE-byte aligned.  This must
 	   be explicitly enforced.  We have the set up a dynamically
@@ -446,8 +442,16 @@ _dl_runtime_profile:
 # ifdef RESTORE_AVX
 	/* sizeof(La_x86_64_retval).  Need extra space for 2 SSE
 	   registers to detect if xmm0/xmm1 registers are changed
-	   by audit module.  */
-	sub $(LRV_SIZE + XMM_SIZE*2), %RSP_LP
+	   by audit module.  Since rsp is aligned to VEC_SIZE, we
+	   need to make sure that the address of La_x86_64_retval +
+	   LRV_VECTOR0_OFFSET is aligned to VEC_SIZE.  */
+#  define LRV_SPACE (LRV_SIZE + XMM_SIZE*2)
+#  define LRV_MISALIGNED ((LRV_SIZE + LRV_VECTOR0_OFFSET) & (VEC_SIZE - 1))
+#  if LRV_MISALIGNED == 0
+	sub $LRV_SPACE, %RSP_LP
+#  else
+	sub $(LRV_SPACE + VEC_SIZE - LRV_MISALIGNED), %RSP_LP
+#  endif
 # else
 	sub $LRV_SIZE, %RSP_LP	# sizeof(La_x86_64_retval)
 # endif
diff --git a/sysdeps/x86_64/ffs.c b/sysdeps/x86_64/ffs.c
index be5b6c8589..fa5b20544d 100644
--- a/sysdeps/x86_64/ffs.c
+++ b/sysdeps/x86_64/ffs.c
@@ -1,7 +1,7 @@
 /* ffs -- find first set bit in a word, counted from least significant end.
    For AMD x86-64.
    This file is part of the GNU C Library.
-   Copyright (C) 1991-2016 Free Software Foundation, Inc.
+   Copyright (C) 1991-2018 Free Software Foundation, Inc.
    Contributed by Ulrich Drepper <drepper@cygnus.com>.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/ffsll.c b/sysdeps/x86_64/ffsll.c
index c0f5abc446..206deb6810 100644
--- a/sysdeps/x86_64/ffsll.c
+++ b/sysdeps/x86_64/ffsll.c
@@ -1,7 +1,7 @@
 /* ffsll -- find first set bit in a word, counted from least significant end.
    For AMD x86-64.
    This file is part of the GNU C Library.
-   Copyright (C) 1991-2016 Free Software Foundation, Inc.
+   Copyright (C) 1991-2018 Free Software Foundation, Inc.
    Contributed by Ulrich Drepper <drepper@cygnus.com>.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/Makefile b/sysdeps/x86_64/fpu/Makefile
index 88742faff1..2b7d69bb50 100644
--- a/sysdeps/x86_64/fpu/Makefile
+++ b/sysdeps/x86_64/fpu/Makefile
@@ -30,11 +30,171 @@ ifeq ($(subdir),math)
 ifeq ($(build-mathvec),yes)
 libmvec-tests += double-vlen2 double-vlen4 double-vlen4-avx2 \
 		 float-vlen4 float-vlen8 float-vlen8-avx2
+tests += test-double-libmvec-alias test-double-libmvec-alias-avx \
+	 test-double-libmvec-alias-avx2 test-double-libmvec-alias-main \
+	 test-double-libmvec-alias-avx-main test-double-libmvec-alias-avx2-main \
+	 test-float-libmvec-alias test-float-libmvec-alias-avx \
+	 test-float-libmvec-alias-avx2 test-float-libmvec-alias-main \
+	 test-float-libmvec-alias-avx-main test-float-libmvec-alias-avx2-main \
+	 test-double-libmvec-sincos test-double-libmvec-sincos-avx \
+	 test-double-libmvec-sincos-avx2 test-float-libmvec-sincosf \
+	 test-float-libmvec-sincosf-avx test-float-libmvec-sincosf-avx2
+modules-names += test-double-libmvec-alias-mod \
+		 test-double-libmvec-alias-avx-mod \
+		 test-double-libmvec-alias-avx2-mod \
+		 test-float-libmvec-alias-mod \
+		 test-float-libmvec-alias-avx-mod \
+		 test-float-libmvec-alias-avx2-mod
+modules-names-tests += test-double-libmvec-alias-mod \
+		 test-double-libmvec-alias-avx-mod \
+		 test-double-libmvec-alias-avx2-mod \
+		 test-float-libmvec-alias-mod \
+		 test-float-libmvec-alias-avx-mod \
+		 test-float-libmvec-alias-avx2-mod
+extra-test-objs += test-double-libmvec-sincos-avx-main.o \
+		   test-double-libmvec-sincos-avx2-main.o \
+		   test-double-libmvec-sincos-main.o \
+		   test-float-libmvec-sincosf-avx-main.o \
+		   test-float-libmvec-sincosf-avx2-main.o\
+		   test-float-libmvec-sincosf-main.o
+test-double-libmvec-alias-mod.so-no-z-defs = yes
+test-double-libmvec-alias-avx-mod.so-no-z-defs = yes
+test-double-libmvec-alias-avx2-mod.so-no-z-defs = yes
+test-float-libmvec-alias-mod.so-no-z-defs = yes
+test-float-libmvec-alias-avx-mod.so-no-z-defs = yes
+test-float-libmvec-alias-avx2-mod.so-no-z-defs = yes
+
+$(objpfx)test-double-libmvec-alias: \
+  $(objpfx)test-double-libmvec-alias-mod.so
+$(objpfx)test-double-libmvec-alias-mod.so: \
+  $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec)
+
+$(objpfx)test-double-libmvec-alias-avx: \
+  $(objpfx)test-double-libmvec-alias-avx-mod.so
+$(objpfx)test-double-libmvec-alias-avx-mod.so: \
+  $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec)
+
+$(objpfx)test-double-libmvec-alias-avx2: \
+  $(objpfx)test-double-libmvec-alias-avx2-mod.so
+$(objpfx)test-double-libmvec-alias-avx2-mod.so: \
+  $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec)
+
+$(objpfx)test-double-libmvec-alias-main: \
+  $(objpfx)test-double-libmvec-alias-mod.os \
+  $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec)
+
+$(objpfx)test-double-libmvec-alias-avx-main: \
+  $(objpfx)test-double-libmvec-alias-avx-mod.os \
+  $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec)
+
+$(objpfx)test-double-libmvec-alias-avx2-main: \
+  $(objpfx)test-double-libmvec-alias-avx2-mod.os \
+  $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec)
+
+$(objpfx)test-float-libmvec-alias: \
+  $(objpfx)test-float-libmvec-alias-mod.so
+$(objpfx)test-float-libmvec-alias-mod.so: \
+  $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec)
+
+$(objpfx)test-float-libmvec-alias-avx: \
+  $(objpfx)test-float-libmvec-alias-avx-mod.so
+$(objpfx)test-float-libmvec-alias-avx-mod.so: \
+  $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec)
+
+$(objpfx)test-float-libmvec-alias-avx2: \
+  $(objpfx)test-float-libmvec-alias-avx2-mod.so
+$(objpfx)test-float-libmvec-alias-avx2-mod.so: \
+  $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec)
+
+$(objpfx)test-float-libmvec-alias-main: \
+  $(objpfx)test-float-libmvec-alias-mod.os \
+  $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec)
+
+$(objpfx)test-float-libmvec-alias-avx-main: \
+  $(objpfx)test-float-libmvec-alias-avx-mod.os \
+  $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec)
+
+$(objpfx)test-float-libmvec-alias-avx2-main: \
+  $(objpfx)test-float-libmvec-alias-avx2-mod.os \
+  $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec)
+
+$(objpfx)test-double-libmvec-sincos: \
+  $(objpfx)test-double-libmvec-sincos.o \
+  $(objpfx)test-double-libmvec-sincos-main.o $(libmvec)
+
+$(objpfx)test-double-libmvec-sincos-avx: \
+  $(objpfx)test-double-libmvec-sincos-avx.o \
+  $(objpfx)test-double-libmvec-sincos-avx-main.o $(libmvec)
+
+$(objpfx)test-double-libmvec-sincos-avx2: \
+  $(objpfx)test-double-libmvec-sincos-avx2.o \
+  $(objpfx)test-double-libmvec-sincos-avx2-main.o $(libmvec)
+
+$(objpfx)test-float-libmvec-sincosf: \
+  $(objpfx)test-float-libmvec-sincosf.o \
+  $(objpfx)test-float-libmvec-sincosf-main.o $(libmvec)
+
+$(objpfx)test-float-libmvec-sincosf-avx: \
+  $(objpfx)test-float-libmvec-sincosf-avx.o \
+  $(objpfx)test-float-libmvec-sincosf-avx-main.o $(libmvec)
+
+$(objpfx)test-float-libmvec-sincosf-avx2: \
+  $(objpfx)test-float-libmvec-sincosf-avx2.o \
+  $(objpfx)test-float-libmvec-sincosf-avx2-main.o $(libmvec)
 
 ifeq (yes,$(config-cflags-avx512))
 libmvec-tests += double-vlen8 float-vlen16
+tests += test-double-libmvec-alias-avx512 \
+	 test-float-libmvec-alias-avx512 \
+	 test-double-libmvec-alias-avx512-main \
+	 test-float-libmvec-alias-avx512-main \
+	 test-double-libmvec-sincos-avx512 \
+	 test-float-libmvec-sincosf-avx512
+modules-names += test-double-libmvec-alias-avx512-mod \
+		 test-float-libmvec-alias-avx512-mod
+modules-names-tests += test-double-libmvec-alias-avx512-mod \
+		 test-float-libmvec-alias-avx512-mod
+extra-test-objs += test-double-libmvec-sincos-avx512-main.o \
+		   test-float-libmvec-sincosf-avx512-main.o
+test-double-libmvec-alias-avx512-mod.so-no-z-defs = yes
+test-float-libmvec-alias-avx512-mod.so-no-z-defs = yes
+
+$(objpfx)test-double-libmvec-alias-avx512: \
+  $(objpfx)test-double-libmvec-alias-avx512-mod.so
+$(objpfx)test-double-libmvec-alias-avx512-mod.so: \
+  $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec)
+
+$(objpfx)test-double-libmvec-alias-avx512-main: \
+  $(objpfx)test-double-libmvec-alias-avx512-mod.os \
+  $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec)
+
+$(objpfx)test-float-libmvec-alias-avx512: \
+  $(objpfx)test-float-libmvec-alias-avx512-mod.so
+$(objpfx)test-float-libmvec-alias-avx512-mod.so: \
+  $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec)
+
+$(objpfx)test-float-libmvec-alias-avx512-main: \
+  $(objpfx)test-float-libmvec-alias-avx512-mod.os \
+  $(objpfx)../mathvec/libmvec_nonshared.a $(libmvec)
+
+$(objpfx)test-double-libmvec-sincos-avx512: \
+  $(objpfx)test-double-libmvec-sincos-avx512.o \
+  $(objpfx)test-double-libmvec-sincos-avx512-main.o $(libmvec)
+
+$(objpfx)test-float-libmvec-sincosf-avx512: \
+  $(objpfx)test-float-libmvec-sincosf-avx512.o \
+  $(objpfx)test-float-libmvec-sincosf-avx512-main.o $(libmvec)
 endif
 
+double-vlen2-funcs = cos exp log pow sin sincos
+double-vlen4-funcs = cos exp log pow sin sincos
+double-vlen4-avx2-funcs = cos exp log pow sin sincos
+double-vlen8-funcs = cos exp log pow sin sincos
+float-vlen4-funcs = cos exp log pow sin sincos
+float-vlen8-funcs = cos exp log pow sin sincos
+float-vlen8-avx2-funcs = cos exp log pow sin sincos
+float-vlen16-funcs = cos exp log pow sin sincos
+
 double-vlen4-arch-ext-cflags = -mavx
 double-vlen4-arch-ext2-cflags = -mavx2
 double-vlen8-arch-ext-cflags = -mavx512f
@@ -43,11 +203,37 @@ float-vlen8-arch-ext-cflags = -mavx
 float-vlen8-arch-ext2-cflags = -mavx2
 float-vlen16-arch-ext-cflags = -mavx512f
 
-CFLAGS-test-double-vlen4-avx2.c = $(libm-test-vec-cflags)
+libmvec-sincos-cflags = $(libm-test-fast-math-cflags) -fno-inline -fopenmp -Wno-unknown-pragmas
+libmvec-alias-cflags = $(libmvec-sincos-cflags) -ffloat-store -ffinite-math-only
+
+CFLAGS-test-double-libmvec-alias-mod.c = $(libmvec-alias-cflags)
+CFLAGS-test-double-libmvec-alias-avx-mod.c = $(double-vlen4-arch-ext-cflags) $(libmvec-alias-cflags) -DREQUIRE_AVX
+CFLAGS-test-double-libmvec-alias-avx2-mod.c = $(double-vlen4-arch-ext2-cflags) $(libmvec-alias-cflags) -DREQUIRE_AVX2
+CFLAGS-test-double-libmvec-alias-avx512-mod.c = $(double-vlen8-arch-ext-cflags) $(libmvec-alias-cflags) -DREQUIRE_AVX512F
+
+CFLAGS-test-float-libmvec-alias-mod.c = $(libmvec-alias-cflags)
+CFLAGS-test-float-libmvec-alias-avx-mod.c = $(double-vlen4-arch-ext-cflags) $(libmvec-alias-cflags) -DREQUIRE_AVX
+CFLAGS-test-float-libmvec-alias-avx2-mod.c = $(double-vlen4-arch-ext2-cflags) $(libmvec-alias-cflags) -DREQUIRE_AVX2
+CFLAGS-test-float-libmvec-alias-avx512-mod.c = $(double-vlen8-arch-ext-cflags) $(libmvec-alias-cflags) -DREQUIRE_AVX512F
+
 CFLAGS-test-double-vlen4-avx2-wrappers.c = $(double-vlen4-arch-ext2-cflags)
 
-CFLAGS-test-float-vlen8-avx2.c = $(libm-test-vec-cflags)
 CFLAGS-test-float-vlen8-avx2-wrappers.c = $(float-vlen8-arch-ext2-cflags)
 
+CFLAGS-test-double-libmvec-sincos-main.c = $(libmvec-sincos-cflags)
+CFLAGS-test-double-libmvec-sincos-avx.c = -DREQUIRE_AVX
+CFLAGS-test-double-libmvec-sincos-avx-main.c = $(libmvec-sincos-cflags) $(double-vlen4-arch-ext-cflags)
+CFLAGS-test-double-libmvec-sincos-avx2.c = -DREQUIRE_AVX2
+CFLAGS-test-double-libmvec-sincos-avx2-main.c = $(libmvec-sincos-cflags) $(double-vlen4-arch-ext2-cflags)
+CFLAGS-test-double-libmvec-sincos-avx512.c = -DREQUIRE_AVX512F
+CFLAGS-test-double-libmvec-sincos-avx512-main.c = $(libmvec-sincos-cflags) $(double-vlen8-arch-ext-cflags)
+
+CFLAGS-test-float-libmvec-sincosf-main.c = $(libmvec-sincos-cflags)
+CFLAGS-test-float-libmvec-sincosf-avx.c = -DREQUIRE_AVX
+CFLAGS-test-float-libmvec-sincosf-avx-main.c = $(libmvec-sincos-cflags) $(float-vlen8-arch-ext-cflags)
+CFLAGS-test-float-libmvec-sincosf-avx2.c = -DREQUIRE_AVX2
+CFLAGS-test-float-libmvec-sincosf-avx2-main.c = $(libmvec-sincos-cflags) $(float-vlen8-arch-ext2-cflags)
+CFLAGS-test-float-libmvec-sincosf-avx512.c = -DREQUIRE_AVX512F
+CFLAGS-test-float-libmvec-sincosf-avx512-main.c = $(libmvec-sincos-cflags) $(float-vlen16-arch-ext-cflags)
 endif
 endif
diff --git a/sysdeps/x86_64/fpu/dla.h b/sysdeps/x86_64/fpu/dla.h
deleted file mode 100644
index 688efa0f5b..0000000000
--- a/sysdeps/x86_64/fpu/dla.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#include <features.h>
-
-#ifdef __FMA4__
-# define DLA_FMS(x,y,z) \
-  __builtin_fma (x, y, -(z))
-#endif
-
-#include "sysdeps/ieee754/dbl-64/dla.h"
diff --git a/sysdeps/x86_64/fpu/e_expf.S b/sysdeps/x86_64/fpu/e_expf.S
deleted file mode 100644
index d4b63a8d8e..0000000000
--- a/sysdeps/x86_64/fpu/e_expf.S
+++ /dev/null
@@ -1,339 +0,0 @@
-/* Optimized __ieee754_expf function.
-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-
-/* Short algorithm description:
- *
- *  Let K = 64 (table size).
- *       e^x  = 2^(x/log(2)) = 2^n * T[j] * (1 + P(y))
- *  where
- *       x = m*log(2)/K + y,    y in [0.0..log(2)/K]
- *       m = n*K + j,           m,n,j - signed integer, j in [0..K-1]
- *       values of 2^(j/K) are tabulated as T[j].
- *
- *       P(y) is a minimax polynomial approximation of expf(x)-1
- *       on small interval [0.0..log(2)/K].
- *
- *       P(y) = P3*y*y*y*y + P2*y*y*y + P1*y*y + P0*y, calculated as
- *       z = y*y;    P(y) = (P3*z + P1)*z + (P2*z + P0)*y
- *
- * Special cases:
- *  expf(NaN) = NaN
- *  expf(+INF) = +INF
- *  expf(-INF) = 0
- *  expf(x) = 1 for subnormals
- *  for finite argument, only expf(0)=1 is exact
- *  expf(x) overflows if x>88.7228317260742190
- *  expf(x) underflows if x<-103.972076416015620
- */
-
-	.text
-ENTRY(__ieee754_expf)
-	/* Input: single precision x in %xmm0 */
-	cvtss2sd	%xmm0, %xmm1	/* Convert x to double precision */
-	movd	%xmm0, %ecx		/* Copy x */
-	movsd	L(DP_KLN2)(%rip), %xmm2	/* DP K/log(2) */
-	movsd	L(DP_P2)(%rip), %xmm3	/* DP P2 */
-	movl	%ecx, %eax		/* x */
-	mulsd	%xmm1, %xmm2		/* DP x*K/log(2) */
-	andl	$0x7fffffff, %ecx	/* |x| */
-	lea	L(DP_T)(%rip), %rsi	/* address of table T[j] */
-	cmpl	$0x42ad496b, %ecx	/* |x|<125*log(2) ? */
-	movsd	L(DP_P3)(%rip), %xmm4	/* DP P3 */
-	addsd	L(DP_RS)(%rip), %xmm2	/* DP x*K/log(2)+RS */
-	jae	L(special_paths)
-
-	/* Here if |x|<125*log(2) */
-	cmpl	$0x31800000, %ecx	/* |x|<2^(-28) ? */
-	jb	L(small_arg)
-
-	/* Main path: here if 2^(-28)<=|x|<125*log(2) */
-	cvtsd2ss	%xmm2, %xmm2	/* SP x*K/log(2)+RS */
-	movd	%xmm2, %eax		/* bits of n*K+j with trash */
-	subss	L(SP_RS)(%rip), %xmm2	/* SP t=round(x*K/log(2)) */
-	movl	%eax, %edx		/* n*K+j with trash */
-	cvtss2sd	%xmm2, %xmm2	/* DP t */
-	andl	$0x3f, %eax		/* bits of j */
-	mulsd	L(DP_NLN2K)(%rip), %xmm2/* DP -t*log(2)/K */
-	andl	$0xffffffc0, %edx	/* bits of n */
-#ifdef __AVX__
-	vaddsd	%xmm1, %xmm2, %xmm0	/* DP y=x-t*log(2)/K */
-	vmulsd	%xmm0, %xmm0, %xmm2	/* DP z=y*y */
-#else
-	addsd	%xmm1, %xmm2		/* DP y=x-t*log(2)/K */
-	movaps	%xmm2, %xmm0		/* DP y */
-	mulsd	%xmm2, %xmm2		/* DP z=y*y */
-#endif
-	mulsd	%xmm2, %xmm4		/* DP P3*z */
-	addl	$0x1fc0, %edx		/* bits of n + SP exponent bias */
-	mulsd	%xmm2, %xmm3		/* DP P2*z */
-	shll	$17, %edx		/* SP 2^n */
-	addsd	L(DP_P1)(%rip), %xmm4	/* DP P3*z+P1 */
-	addsd	L(DP_P0)(%rip), %xmm3	/* DP P2*z+P0 */
-	movd	%edx, %xmm1		/* SP 2^n */
-	mulsd	%xmm2, %xmm4		/* DP (P3*z+P1)*z */
-	mulsd	%xmm3, %xmm0		/* DP (P2*z+P0)*y */
-	addsd	%xmm4, %xmm0		/* DP P(y) */
-	mulsd	(%rsi,%rax,8), %xmm0	/* DP P(y)*T[j] */
-	addsd	(%rsi,%rax,8), %xmm0	/* DP T[j]*(P(y)+1) */
-	cvtsd2ss	%xmm0, %xmm0	/* SP T[j]*(P(y)+1) */
-	mulss	%xmm1, %xmm0		/* SP result=2^n*(T[j]*(P(y)+1)) */
-	ret
-
-	.p2align	4
-L(small_arg):
-	/* Here if 0<=|x|<2^(-28) */
-	addss	L(SP_ONE)(%rip), %xmm0	/* 1.0 + x */
-	/* Return 1.0 with inexact raised, except for x==0 */
-	ret
-
-	.p2align	4
-L(special_paths):
-	/* Here if 125*log(2)<=|x| */
-	shrl	$31, %eax		/* Get sign bit of x, and depending on it: */
-	lea	L(SP_RANGE)(%rip), %rdx	/* load over/underflow bound */
-	cmpl	(%rdx,%rax,4), %ecx	/* |x|<under/overflow bound ? */
-	jbe	L(near_under_or_overflow)
-
-	/* Here if |x|>under/overflow bound */
-	cmpl	$0x7f800000, %ecx	/* |x| is finite ? */
-	jae	L(arg_inf_or_nan)
-
-	/* Here if |x|>under/overflow bound, and x is finite */
-	testq	%rax, %rax		/* sign of x nonzero ? */
-	je	L(res_overflow)
-
-	/* Here if -inf<x<underflow bound (x<0) */
-	movss	L(SP_SMALL)(%rip), %xmm0/* load small value 2^(-100) */
-	mulss	%xmm0, %xmm0		/* Return underflowed result (zero or subnormal) */
-	ret
-
-	.p2align	4
-L(res_overflow):
-	/* Here if overflow bound<x<inf (x>0) */
-	movss	L(SP_LARGE)(%rip), %xmm0/* load large value 2^100 */
-	mulss	%xmm0, %xmm0		/* Return overflowed result (Inf or max normal) */
-	ret
-
-	.p2align	4
-L(arg_inf_or_nan):
-	/* Here if |x| is Inf or NAN */
-	jne	L(arg_nan)	/* |x| is Inf ? */
-
-	/* Here if |x| is Inf */
-	lea	L(SP_INF_0)(%rip), %rdx	/* depending on sign of x: */
-	movss	(%rdx,%rax,4), %xmm0	/* return zero or Inf */
-	ret
-
-	.p2align	4
-L(arg_nan):
-	/* Here if |x| is NaN */
-	addss	%xmm0, %xmm0		/* Return x+x (raise invalid) */
-	ret
-
-	.p2align	4
-L(near_under_or_overflow):
-	/* Here if 125*log(2)<=|x|<under/overflow bound */
-	cvtsd2ss	%xmm2, %xmm2	/* SP x*K/log(2)+RS */
-	movd	%xmm2, %eax		/* bits of n*K+j with trash */
-	subss	L(SP_RS)(%rip), %xmm2	/* SP t=round(x*K/log(2)) */
-	movl	%eax, %edx		/* n*K+j with trash */
-	cvtss2sd	%xmm2, %xmm2	/* DP t */
-	andl	$0x3f, %eax		/* bits of j */
-	mulsd	L(DP_NLN2K)(%rip), %xmm2/* DP -t*log(2)/K */
-	andl	$0xffffffc0, %edx	/* bits of n */
-#ifdef __AVX__
-	vaddsd	%xmm1, %xmm2, %xmm0	/* DP y=x-t*log(2)/K */
-	vmulsd	%xmm0, %xmm0, %xmm2	/* DP z=y*y */
-#else
-	addsd	%xmm1, %xmm2		/* DP y=x-t*log(2)/K */
-	movaps	%xmm2, %xmm0		/* DP y */
-	mulsd	%xmm2, %xmm2		/* DP z=y*y */
-#endif
-	mulsd	%xmm2, %xmm4		/* DP P3*z */
-	addl	$0xffc0, %edx		/* bits of n + DP exponent bias */
-	mulsd	%xmm2, %xmm3		/* DP P2*z */
-	shlq	$46, %rdx		/* DP 2^n */
-	addsd	L(DP_P1)(%rip), %xmm4	/* DP P3*z+P1 */
-	addsd	L(DP_P0)(%rip), %xmm3	/* DP P2*z+P0 */
-	movd	%rdx, %xmm1		/* DP 2^n */
-	mulsd	%xmm2, %xmm4		/* DP (P3*z+P1)*z */
-	mulsd	%xmm3, %xmm0		/* DP (P2*z+P0)*y */
-	addsd	%xmm4, %xmm0		/* DP P(y) */
-	mulsd	(%rsi,%rax,8), %xmm0	/* DP P(y)*T[j] */
-	addsd	(%rsi,%rax,8), %xmm0	/* DP T[j]*(P(y)+1) */
-	mulsd	%xmm1, %xmm0		/* DP result=2^n*(T[j]*(P(y)+1)) */
-	cvtsd2ss	%xmm0, %xmm0	/* convert result to single precision */
-	ret
-END(__ieee754_expf)
-
-	.section .rodata, "a"
-	.p2align 3
-L(DP_T): /* table of double precision values 2^(j/K) for j=[0..K-1] */
-	.long	0x00000000, 0x3ff00000
-	.long	0x3e778061, 0x3ff02c9a
-	.long	0xd3158574, 0x3ff059b0
-	.long	0x18759bc8, 0x3ff08745
-	.long	0x6cf9890f, 0x3ff0b558
-	.long	0x32d3d1a2, 0x3ff0e3ec
-	.long	0xd0125b51, 0x3ff11301
-	.long	0xaea92de0, 0x3ff1429a
-	.long	0x3c7d517b, 0x3ff172b8
-	.long	0xeb6fcb75, 0x3ff1a35b
-	.long	0x3168b9aa, 0x3ff1d487
-	.long	0x88628cd6, 0x3ff2063b
-	.long	0x6e756238, 0x3ff2387a
-	.long	0x65e27cdd, 0x3ff26b45
-	.long	0xf51fdee1, 0x3ff29e9d
-	.long	0xa6e4030b, 0x3ff2d285
-	.long	0x0a31b715, 0x3ff306fe
-	.long	0xb26416ff, 0x3ff33c08
-	.long	0x373aa9cb, 0x3ff371a7
-	.long	0x34e59ff7, 0x3ff3a7db
-	.long	0x4c123422, 0x3ff3dea6
-	.long	0x21f72e2a, 0x3ff4160a
-	.long	0x6061892d, 0x3ff44e08
-	.long	0xb5c13cd0, 0x3ff486a2
-	.long	0xd5362a27, 0x3ff4bfda
-	.long	0x769d2ca7, 0x3ff4f9b2
-	.long	0x569d4f82, 0x3ff5342b
-	.long	0x36b527da, 0x3ff56f47
-	.long	0xdd485429, 0x3ff5ab07
-	.long	0x15ad2148, 0x3ff5e76f
-	.long	0xb03a5585, 0x3ff6247e
-	.long	0x82552225, 0x3ff66238
-	.long	0x667f3bcd, 0x3ff6a09e
-	.long	0x3c651a2f, 0x3ff6dfb2
-	.long	0xe8ec5f74, 0x3ff71f75
-	.long	0x564267c9, 0x3ff75feb
-	.long	0x73eb0187, 0x3ff7a114
-	.long	0x36cf4e62, 0x3ff7e2f3
-	.long	0x994cce13, 0x3ff82589
-	.long	0x9b4492ed, 0x3ff868d9
-	.long	0x422aa0db, 0x3ff8ace5
-	.long	0x99157736, 0x3ff8f1ae
-	.long	0xb0cdc5e5, 0x3ff93737
-	.long	0x9fde4e50, 0x3ff97d82
-	.long	0x82a3f090, 0x3ff9c491
-	.long	0x7b5de565, 0x3ffa0c66
-	.long	0xb23e255d, 0x3ffa5503
-	.long	0x5579fdbf, 0x3ffa9e6b
-	.long	0x995ad3ad, 0x3ffae89f
-	.long	0xb84f15fb, 0x3ffb33a2
-	.long	0xf2fb5e47, 0x3ffb7f76
-	.long	0x904bc1d2, 0x3ffbcc1e
-	.long	0xdd85529c, 0x3ffc199b
-	.long	0x2e57d14b, 0x3ffc67f1
-	.long	0xdcef9069, 0x3ffcb720
-	.long	0x4a07897c, 0x3ffd072d
-	.long	0xdcfba487, 0x3ffd5818
-	.long	0x03db3285, 0x3ffda9e6
-	.long	0x337b9b5f, 0x3ffdfc97
-	.long	0xe78b3ff6, 0x3ffe502e
-	.long	0xa2a490da, 0x3ffea4af
-	.long	0xee615a27, 0x3ffefa1b
-	.long	0x5b6e4540, 0x3fff5076
-	.long	0x819e90d8, 0x3fffa7c1
-	.type L(DP_T), @object
-	ASM_SIZE_DIRECTIVE(L(DP_T))
-
-	.section .rodata.cst8,"aM",@progbits,8
-	.p2align 3
-L(DP_KLN2): /* double precision K/log(2) */
-	.long	0x652b82fe, 0x40571547
-	.type L(DP_KLN2), @object
-	ASM_SIZE_DIRECTIVE(L(DP_KLN2))
-
-	.p2align 3
-L(DP_NLN2K): /* double precision -log(2)/K */
-	.long	0xfefa39ef, 0xbf862e42
-	.type L(DP_NLN2K), @object
-	ASM_SIZE_DIRECTIVE(L(DP_NLN2K))
-
-	.p2align 3
-L(DP_RS): /* double precision 2^23+2^22 */
-	.long	0x00000000, 0x41680000
-	.type L(DP_RS), @object
-	ASM_SIZE_DIRECTIVE(L(DP_RS))
-
-	.p2align 3
-L(DP_P3): /* double precision polynomial coefficient P3 */
-	.long	0xeb78fa85, 0x3fa56420
-	.type L(DP_P3), @object
-	ASM_SIZE_DIRECTIVE(L(DP_P3))
-
-	.p2align 3
-L(DP_P1): /* double precision polynomial coefficient P1 */
-	.long	0x008d6118, 0x3fe00000
-	.type L(DP_P1), @object
-	ASM_SIZE_DIRECTIVE(L(DP_P1))
-
-	.p2align 3
-L(DP_P2): /* double precision polynomial coefficient P2 */
-	.long	0xda752d4f, 0x3fc55550
-	.type L(DP_P2), @object
-	ASM_SIZE_DIRECTIVE(L(DP_P2))
-
-	.p2align 3
-L(DP_P0): /* double precision polynomial coefficient P0 */
-	.long	0xffffe7c6, 0x3fefffff
-	.type L(DP_P0), @object
-	ASM_SIZE_DIRECTIVE(L(DP_P0))
-
-	.p2align 2
-L(SP_RANGE): /* single precision overflow/underflow bounds */
-	.long	0x42b17217	/* if x>this bound, then result overflows */
-	.long	0x42cff1b4	/* if x<this bound, then result underflows */
-	.type L(SP_RANGE), @object
-	ASM_SIZE_DIRECTIVE(L(SP_RANGE))
-
-	.p2align 2
-L(SP_INF_0):
-	.long	0x7f800000	/* single precision Inf */
-	.long	0		/* single precision zero */
-	.type L(SP_INF_0), @object
-	ASM_SIZE_DIRECTIVE(L(SP_INF_0))
-
-	.section .rodata.cst4,"aM",@progbits,4
-	.p2align 2
-L(SP_RS): /* single precision 2^23+2^22 */
-	.long	0x4b400000
-	.type L(SP_RS), @object
-	ASM_SIZE_DIRECTIVE(L(SP_RS))
-
-	.p2align 2
-L(SP_SMALL): /* single precision small value 2^(-100) */
-	.long	0x0d800000
-	.type L(SP_SMALL), @object
-	ASM_SIZE_DIRECTIVE(L(SP_SMALL))
-
-	.p2align 2
-L(SP_LARGE): /* single precision large value 2^100 */
-	.long	0x71800000
-	.type L(SP_LARGE), @object
-	ASM_SIZE_DIRECTIVE(L(SP_LARGE))
-
-	.p2align 2
-L(SP_ONE): /* single precision 1.0 */
-	.long	0x3f800000
-	.type L(SP_ONE), @object
-	ASM_SIZE_DIRECTIVE(L(SP_ONE))
-
-strong_alias (__ieee754_expf, __expf_finite)
diff --git a/sysdeps/x86_64/fpu/e_expl.S b/sysdeps/x86_64/fpu/e_expl.S
index 8b3ddaec59..b75a103803 100644
--- a/sysdeps/x86_64/fpu/e_expl.S
+++ b/sysdeps/x86_64/fpu/e_expl.S
@@ -22,6 +22,7 @@
  * -- moshier@na-net.ornl.gov
  */
 
+#include <libm-alias-ldouble.h>
 #include <machine/asm.h>
 #include <x86_64-math-asm.h>
 
@@ -99,7 +100,7 @@ ENTRY(IEEE754_EXPL)
 	/* Below -64.0 (may be -NaN or -Inf). */
 	andb	%ah, %dh
 	cmpb	$0x01, %dh
-	je	2f		/* Is +-NaN, jump.  */
+	je	6f		/* Is +-NaN, jump.  */
 	jmp	1f		/* -large, possibly -Inf.  */
 
 4:	/* In range -64.0 to 64.0 (may be +-0 but not NaN or +-Inf).  */
@@ -141,7 +142,7 @@ ENTRY(IEEE754_EXPL)
 	cmpb	$0x05, %dh
 	je	1f		/* Is +-Inf, jump.    */
 	cmpb	$0x01, %dh
-	je	2f		/* Is +-NaN, jump.    */
+	je	6f		/* Is +-NaN, jump.    */
 	/* Overflow or underflow; saturate.  */
 	fstp	%st
 	fldt	MO(csat)
@@ -207,10 +208,13 @@ ENTRY(IEEE754_EXPL)
 	fldz			/* Set result to 0.  */
 #endif
 2:	ret
+6:	/* NaN argument.  */
+	fadd	%st
+	ret
 END(IEEE754_EXPL)
 #ifdef USE_AS_EXPM1L
 libm_hidden_def (__expm1l)
-weak_alias (__expm1l, expm1l)
+libm_alias_ldouble (__expm1, expm1)
 #else
 strong_alias (IEEE754_EXPL, EXPL_FINITE)
 #endif
diff --git a/sysdeps/x86_64/fpu/e_log10l.S b/sysdeps/x86_64/fpu/e_log10l.S
index 8fa61644c1..e0cb88e32e 100644
--- a/sysdeps/x86_64/fpu/e_log10l.S
+++ b/sysdeps/x86_64/fpu/e_log10l.S
@@ -64,6 +64,7 @@ ENTRY(__ieee754_log10l)
 	jnz	4b		// in case x is �Inf
 	fstp	%st(1)
 	fstp	%st(1)
+	fadd	%st(0)
 	ret
 END(__ieee754_log10l)
 
diff --git a/sysdeps/x86_64/fpu/e_log2l.S b/sysdeps/x86_64/fpu/e_log2l.S
index a063255ddd..023ec29164 100644
--- a/sysdeps/x86_64/fpu/e_log2l.S
+++ b/sysdeps/x86_64/fpu/e_log2l.S
@@ -63,6 +63,7 @@ ENTRY(__ieee754_log2l)
 	jnz	4b		// in case x is �Inf
 	fstp	%st(1)
 	fstp	%st(1)
+	fadd	%st(0)
 	ret
 END (__ieee754_log2l)
 
diff --git a/sysdeps/x86_64/fpu/e_logl.S b/sysdeps/x86_64/fpu/e_logl.S
index dbe6fd59dc..0d3576f48b 100644
--- a/sysdeps/x86_64/fpu/e_logl.S
+++ b/sysdeps/x86_64/fpu/e_logl.S
@@ -66,6 +66,7 @@ ENTRY(__ieee754_logl)
 	jnz	4b		// in case x is +-Inf
 	fstp	%st(1)
 	fstp	%st(1)
+	fadd	%st(0)
 	ret
 END (__ieee754_logl)
 
diff --git a/sysdeps/x86_64/fpu/e_powl.S b/sysdeps/x86_64/fpu/e_powl.S
index 1f68cf0102..f32228104e 100644
--- a/sysdeps/x86_64/fpu/e_powl.S
+++ b/sysdeps/x86_64/fpu/e_powl.S
@@ -1,5 +1,5 @@
 /* ix87 specific implementation of pow function.
-   Copyright (C) 1996-2016 Free Software Foundation, Inc.
+   Copyright (C) 1996-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
 
@@ -26,9 +26,9 @@
 	.type one,@object
 one:	.double 1.0
 	ASM_SIZE_DIRECTIVE(one)
-	.type p3,@object
-p3:	.byte 0, 0, 0, 0, 0, 0, 0x20, 0x40
-	ASM_SIZE_DIRECTIVE(p3)
+	.type p2,@object
+p2:	.byte 0, 0, 0, 0, 0, 0, 0x10, 0x40
+	ASM_SIZE_DIRECTIVE(p2)
 	.type p63,@object
 p63:	.byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43
 	ASM_SIZE_DIRECTIVE(p63)
@@ -136,12 +136,12 @@ ENTRY(__ieee754_powl)
 	jmp	3f
 
 9:	/* OK, we have an integer value for y.  Unless very small
-	   (we use < 8), use the algorithm for real exponent to avoid
+	   (we use < 4), use the algorithm for real exponent to avoid
 	   accumulation of errors.  */
-	fldl	MO(p3)		// 8 : y : x
-	fld	%st(1)		// y : 8 : y : x
-	fabs			// |y| : 8 : y : x
-	fcomip	%st(1), %st	// 8 : y : x
+	fldl	MO(p2)		// 4 : y : x
+	fld	%st(1)		// y : 4 : y : x
+	fabs			// |y| : 4 : y : x
+	fcomip	%st(1), %st	// 4 : y : x
 	fstp	%st(0)		// y : x
 	jnc	3f
 	mov	-8(%rsp),%eax
@@ -184,9 +184,15 @@ ENTRY(__ieee754_powl)
 30:	fldt	8(%rsp)		// x : y
 	fldl	MO(one)		// 1.0 : x : y
 	fucomip	%st(1),%st	// x : y
-	je	31f
-	fxch			// y : x
-31:	fstp	%st(1)
+	je	32f
+31:	/* At least one argument NaN, and result should be NaN.  */
+	faddp
+	ret
+32:	jc	31b
+	/* pow (1, NaN); check if the NaN signaling.  */
+	testb	$0x40, 31(%rsp)
+	jz	31b
+	fstp	%st(1)
 	ret
 
 	.align ALIGNARG(4)
@@ -217,12 +223,24 @@ ENTRY(__ieee754_powl)
 	cfi_adjust_cfa_offset (-40)
 	ret
 
-	// pow(x,�0) = 1
+	// pow(x,�0) = 1, unless x is sNaN
 	.align ALIGNARG(4)
 11:	fstp	%st(0)		// pop y
+	fldt	8(%rsp)		// x
+	fxam
+	fnstsw
+	andb	$0x45, %ah
+	cmpb	$0x01, %ah
+	je	112f		// x is NaN
+111:	fstp	%st(0)
 	fldl	MO(one)
 	ret
 
+112:	testb	$0x40, 15(%rsp)
+	jnz	111b
+	fadd	%st(0)
+	ret
+
 	// y == �inf
 	.align ALIGNARG(4)
 12:	fstp	%st(0)		// pop y
@@ -255,6 +273,7 @@ ENTRY(__ieee754_powl)
 
 	.align ALIGNARG(4)
 13:	fldt	8(%rsp)		// load x == NaN
+	fadd	%st(0)
 	ret
 
 	.align ALIGNARG(4)
diff --git a/sysdeps/x86_64/fpu/e_scalbl.S b/sysdeps/x86_64/fpu/e_scalbl.S
index 331bee580c..2982dc3b9e 100644
--- a/sysdeps/x86_64/fpu/e_scalbl.S
+++ b/sysdeps/x86_64/fpu/e_scalbl.S
@@ -44,7 +44,7 @@ ENTRY(__ieee754_scalbl)
 	fnstsw
 	andl	$0x4500, %eax
 	cmpl	$0x0100, %eax
-	je	3f
+	je	2f
 	fld	%st(1)
 	frndint
 	fcomip	%st(2), %st
@@ -75,15 +75,8 @@ ENTRY(__ieee754_scalbl)
 #endif
 	ret
 
-	/* The result is NaN, but we must not raise an exception.
-	   So use a variable.  */
-2:	fstp	%st
-	fstp	%st
-	fldl	MO(nan)
-	ret
-
-	/* The first parameter is a NaN.  Return it.  */
-3:	fstp	%st(1)
+	/* The result is NaN; raise an exception for sNaN arguments.  */
+2:	faddp
 	ret
 
 	/* Return NaN and raise the invalid exception.  */
diff --git a/sysdeps/x86_64/fpu/e_sqrt.c b/sysdeps/x86_64/fpu/e_sqrt.c
index 4b86434913..f4c2e5fd7c 100644
--- a/sysdeps/x86_64/fpu/e_sqrt.c
+++ b/sysdeps/x86_64/fpu/e_sqrt.c
@@ -1,5 +1,5 @@
 /* Square root of floating point number.
-   Copyright (C) 2002-2016 Free Software Foundation, Inc.
+   Copyright (C) 2002-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/e_sqrtf.c b/sysdeps/x86_64/fpu/e_sqrtf.c
index 639137b735..8f76ccb530 100644
--- a/sysdeps/x86_64/fpu/e_sqrtf.c
+++ b/sysdeps/x86_64/fpu/e_sqrtf.c
@@ -1,5 +1,5 @@
 /* Square root of floating point number.
-   Copyright (C) 2002-2016 Free Software Foundation, Inc.
+   Copyright (C) 2002-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/fclrexcpt.c b/sysdeps/x86_64/fpu/fclrexcpt.c
index a8e00c0141..e7f6aa341f 100644
--- a/sysdeps/x86_64/fpu/fclrexcpt.c
+++ b/sysdeps/x86_64/fpu/fclrexcpt.c
@@ -1,5 +1,5 @@
 /* Clear given exceptions in current floating-point environment.
-   Copyright (C) 2001-2016 Free Software Foundation, Inc.
+   Copyright (C) 2001-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/fedisblxcpt.c b/sysdeps/x86_64/fpu/fedisblxcpt.c
index f1ea6cfa97..9153f997ed 100644
--- a/sysdeps/x86_64/fpu/fedisblxcpt.c
+++ b/sysdeps/x86_64/fpu/fedisblxcpt.c
@@ -1,5 +1,5 @@
 /* Disable floating-point exceptions.
-   Copyright (C) 2001-2016 Free Software Foundation, Inc.
+   Copyright (C) 2001-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Andreas Jaeger <aj@suse.de>, 2001.
 
diff --git a/sysdeps/x86_64/fpu/feenablxcpt.c b/sysdeps/x86_64/fpu/feenablxcpt.c
index df4c628b8d..7a3e26b3f9 100644
--- a/sysdeps/x86_64/fpu/feenablxcpt.c
+++ b/sysdeps/x86_64/fpu/feenablxcpt.c
@@ -1,5 +1,5 @@
 /* Enable floating-point exceptions.
-   Copyright (C) 2001-2016 Free Software Foundation, Inc.
+   Copyright (C) 2001-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Andreas Jaeger <aj@suse.de>, 2001.
 
diff --git a/sysdeps/x86_64/fpu/fegetenv.c b/sysdeps/x86_64/fpu/fegetenv.c
index a28efb36f3..9461af7575 100644
--- a/sysdeps/x86_64/fpu/fegetenv.c
+++ b/sysdeps/x86_64/fpu/fegetenv.c
@@ -1,5 +1,5 @@
 /* Store current floating-point environment.
-   Copyright (C) 2001-2016 Free Software Foundation, Inc.
+   Copyright (C) 2001-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/fegetexcept.c b/sysdeps/x86_64/fpu/fegetexcept.c
index 8acd0382a0..ce54c251ba 100644
--- a/sysdeps/x86_64/fpu/fegetexcept.c
+++ b/sysdeps/x86_64/fpu/fegetexcept.c
@@ -1,5 +1,5 @@
 /* Get enabled floating-point exceptions.
-   Copyright (C) 2001-2016 Free Software Foundation, Inc.
+   Copyright (C) 2001-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Andreas Jaeger <aj@suse.de>, 2001.
 
diff --git a/sysdeps/x86_64/fpu/fegetmode.c b/sysdeps/x86_64/fpu/fegetmode.c
new file mode 100644
index 0000000000..cc4f12649b
--- /dev/null
+++ b/sysdeps/x86_64/fpu/fegetmode.c
@@ -0,0 +1,28 @@
+/* Store current floating-point control modes.  x86_64 version.
+   Copyright (C) 2016-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv.h>
+#include <fpu_control.h>
+
+int
+fegetmode (femode_t *modep)
+{
+  _FPU_GETCW (modep->__control_word);
+  __asm__ ("stmxcsr %0" : "=m" (modep->__mxcsr));
+  return 0;
+}
diff --git a/sysdeps/x86_64/fpu/fegetround.c b/sysdeps/x86_64/fpu/fegetround.c
index 296d366560..0f31cafedd 100644
--- a/sysdeps/x86_64/fpu/fegetround.c
+++ b/sysdeps/x86_64/fpu/fegetround.c
@@ -1,5 +1,5 @@
 /* Return current rounding direction.
-   Copyright (C) 1997-2016 Free Software Foundation, Inc.
+   Copyright (C) 1997-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
 
diff --git a/sysdeps/x86_64/fpu/feholdexcpt.c b/sysdeps/x86_64/fpu/feholdexcpt.c
index a040c3dea5..dec689beb2 100644
--- a/sysdeps/x86_64/fpu/feholdexcpt.c
+++ b/sysdeps/x86_64/fpu/feholdexcpt.c
@@ -1,5 +1,5 @@
 /* Store current floating-point environment and clear exceptions.
-   Copyright (C) 2001-2016 Free Software Foundation, Inc.
+   Copyright (C) 2001-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/fesetenv.c b/sysdeps/x86_64/fpu/fesetenv.c
index 355d02aaa6..c12dba5101 100644
--- a/sysdeps/x86_64/fpu/fesetenv.c
+++ b/sysdeps/x86_64/fpu/fesetenv.c
@@ -1,5 +1,5 @@
 /* Install given floating-point environment.
-   Copyright (C) 2001-2016 Free Software Foundation, Inc.
+   Copyright (C) 2001-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/fesetexcept.c b/sysdeps/x86_64/fpu/fesetexcept.c
new file mode 100644
index 0000000000..122a7629dc
--- /dev/null
+++ b/sysdeps/x86_64/fpu/fesetexcept.c
@@ -0,0 +1,31 @@
+/* Set given exception flags.  x86_64 version.
+   Copyright (C) 2016-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv.h>
+
+int
+fesetexcept (int excepts)
+{
+  unsigned int mxcsr;
+
+  __asm__ ("stmxcsr %0" : "=m" (*&mxcsr));
+  mxcsr |= excepts & FE_ALL_EXCEPT;
+  __asm__ ("ldmxcsr %0" : : "m" (*&mxcsr));
+
+  return 0;
+}
diff --git a/sysdeps/x86_64/fpu/fesetmode.c b/sysdeps/x86_64/fpu/fesetmode.c
new file mode 100644
index 0000000000..0771e4c10a
--- /dev/null
+++ b/sysdeps/x86_64/fpu/fesetmode.c
@@ -0,0 +1,50 @@
+/* Install given floating-point control modes.  x86_64 version.
+   Copyright (C) 2016-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fenv.h>
+#include <fpu_control.h>
+
+/* All exceptions, including the x86-specific "denormal operand"
+   exception.  */
+#define FE_ALL_EXCEPT_X86 (FE_ALL_EXCEPT | __FE_DENORM)
+
+int
+fesetmode (const femode_t *modep)
+{
+  fpu_control_t cw;
+  unsigned int mxcsr;
+  __asm__ ("stmxcsr %0" : "=m" (mxcsr));
+  /* Preserve SSE exception flags but restore other state in
+     MXCSR.  */
+  mxcsr &= FE_ALL_EXCEPT_X86;
+  if (modep == FE_DFL_MODE)
+    {
+      cw = _FPU_DEFAULT;
+      /* Default MXCSR state has all bits zero except for those
+	 masking exceptions.  */
+      mxcsr |= FE_ALL_EXCEPT_X86 << 7;
+    }
+  else
+    {
+      cw = modep->__control_word;
+      mxcsr |= modep->__mxcsr & ~FE_ALL_EXCEPT_X86;
+    }
+  _FPU_SETCW (cw);
+  __asm__ ("ldmxcsr %0" : : "m" (mxcsr));
+  return 0;
+}
diff --git a/sysdeps/x86_64/fpu/fesetround.c b/sysdeps/x86_64/fpu/fesetround.c
index 475d63f4db..e5afc1d57a 100644
--- a/sysdeps/x86_64/fpu/fesetround.c
+++ b/sysdeps/x86_64/fpu/fesetround.c
@@ -1,5 +1,5 @@
 /* Set current rounding direction.
-   Copyright (C) 2001-2016 Free Software Foundation, Inc.
+   Copyright (C) 2001-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/feupdateenv.c b/sysdeps/x86_64/fpu/feupdateenv.c
index f035d57ca8..00da535e64 100644
--- a/sysdeps/x86_64/fpu/feupdateenv.c
+++ b/sysdeps/x86_64/fpu/feupdateenv.c
@@ -1,5 +1,5 @@
 /* Install given floating-point environment and raise exceptions.
-   Copyright (C) 1997-2016 Free Software Foundation, Inc.
+   Copyright (C) 1997-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
 
diff --git a/sysdeps/x86_64/fpu/fgetexcptflg.c b/sysdeps/x86_64/fpu/fgetexcptflg.c
index 938cf3e62b..16719ceb5e 100644
--- a/sysdeps/x86_64/fpu/fgetexcptflg.c
+++ b/sysdeps/x86_64/fpu/fgetexcptflg.c
@@ -1,5 +1,5 @@
 /* Store current representation for exceptions.
-   Copyright (C) 2001-2016 Free Software Foundation, Inc.
+   Copyright (C) 2001-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/fraiseexcpt.c b/sysdeps/x86_64/fpu/fraiseexcpt.c
index e2abbbec33..ca1c223053 100644
--- a/sysdeps/x86_64/fpu/fraiseexcpt.c
+++ b/sysdeps/x86_64/fpu/fraiseexcpt.c
@@ -1,5 +1,5 @@
 /* Raise given exceptions.
-   Copyright (C) 2001-2016 Free Software Foundation, Inc.
+   Copyright (C) 2001-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/fsetexcptflg.c b/sysdeps/x86_64/fpu/fsetexcptflg.c
index 76f7bad9a8..821dd9d786 100644
--- a/sysdeps/x86_64/fpu/fsetexcptflg.c
+++ b/sysdeps/x86_64/fpu/fsetexcptflg.c
@@ -1,5 +1,5 @@
 /* Set floating-point environment exception handling.
-   Copyright (C) 2001-2016 Free Software Foundation, Inc.
+   Copyright (C) 2001-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/ftestexcept.c b/sysdeps/x86_64/fpu/ftestexcept.c
index c8f2c01c67..63167c68df 100644
--- a/sysdeps/x86_64/fpu/ftestexcept.c
+++ b/sysdeps/x86_64/fpu/ftestexcept.c
@@ -1,5 +1,5 @@
 /* Test exception in current environment.
-   Copyright (C) 2001-2016 Free Software Foundation, Inc.
+   Copyright (C) 2001-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/k_rem_pio2l.c b/sysdeps/x86_64/fpu/k_rem_pio2l.c
deleted file mode 100644
index eea55a98d2..0000000000
--- a/sysdeps/x86_64/fpu/k_rem_pio2l.c
+++ /dev/null
@@ -1 +0,0 @@
-/*  Not needed.  */
diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps
index 445b47527d..912db318b6 100644
--- a/sysdeps/x86_64/fpu/libm-test-ulps
+++ b/sysdeps/x86_64/fpu/libm-test-ulps
@@ -3,1015 +3,1293 @@
 # Maximal error of functions:
 Function: "acos":
 float: 1
+float128: 1
 ifloat: 1
+ifloat128: 1
 ildouble: 1
 ldouble: 1
 
 Function: "acos_downward":
 double: 1
 float: 1
+float128: 1
 idouble: 1
 ifloat: 1
+ifloat128: 1
 ildouble: 2
 ldouble: 2
 
 Function: "acos_towardzero":
 double: 1
 float: 1
+float128: 1
 idouble: 1
 ifloat: 1
+ifloat128: 1
 ildouble: 2
 ldouble: 2
 
 Function: "acos_upward":
 double: 1
 float: 1
+float128: 1
 idouble: 1
 ifloat: 1
+ifloat128: 1
 ildouble: 2
 ldouble: 2
 
 Function: "acosh":
 double: 2
 float: 2
+float128: 2
 idouble: 2
 ifloat: 2
+ifloat128: 2
 ildouble: 2
 ldouble: 2
 
 Function: "acosh_downward":
 double: 2
 float: 2
+float128: 3
 idouble: 2
 ifloat: 2
+ifloat128: 3
 ildouble: 4
 ldouble: 4
 
 Function: "acosh_towardzero":
 double: 2
 float: 2
+float128: 2
 idouble: 2
 ifloat: 2
+ifloat128: 2
 ildouble: 4
 ldouble: 4
 
 Function: "acosh_upward":
 double: 2
 float: 2
+float128: 2
 idouble: 2
 ifloat: 2
+ifloat128: 2
 ildouble: 3
 ldouble: 3
 
 Function: "asin":
 float: 1
+float128: 1
 ifloat: 1
+ifloat128: 1
 ildouble: 1
 ldouble: 1
 
 Function: "asin_downward":
 double: 1
 float: 1
+float128: 2
 idouble: 1
 ifloat: 1
+ifloat128: 2
 ildouble: 2
 ldouble: 2
 
 Function: "asin_towardzero":
 double: 1
 float: 1
+float128: 1
 idouble: 1
 ifloat: 1
+ifloat128: 1
 ildouble: 1
 ldouble: 1
 
 Function: "asin_upward":
 double: 1
 float: 1
+float128: 2
 idouble: 1
 ifloat: 1
+ifloat128: 2
 ildouble: 1
 ldouble: 1
 
 Function: "asinh":
 double: 1
 float: 1
+float128: 3
 idouble: 1
 ifloat: 1
+ifloat128: 3
 ildouble: 3
 ldouble: 3
 
 Function: "asinh_downward":
 double: 3
 float: 3
+float128: 4
 idouble: 3
 ifloat: 3
+ifloat128: 4
 ildouble: 5
 ldouble: 5
 
 Function: "asinh_towardzero":
 double: 2
 float: 2
+float128: 2
 idouble: 2
 ifloat: 2
+ifloat128: 2
 ildouble: 4
 ldouble: 4
 
 Function: "asinh_upward":
 double: 3
 float: 3
+float128: 4
 idouble: 3
 ifloat: 3
+ifloat128: 4
 ildouble: 5
 ldouble: 5
 
 Function: "atan":
 float: 1
+float128: 1
 ifloat: 1
+ifloat128: 1
 ildouble: 1
 ldouble: 1
 
 Function: "atan2":
 float: 1
+float128: 1
 ifloat: 1
+ifloat128: 1
 ildouble: 1
 ldouble: 1
 
 Function: "atan2_downward":
 double: 1
 float: 2
+float128: 2
 idouble: 1
 ifloat: 2
+ifloat128: 2
 ildouble: 1
 ldouble: 1
 
 Function: "atan2_towardzero":
 double: 1
 float: 2
+float128: 3
 idouble: 1
 ifloat: 2
+ifloat128: 3
 ildouble: 1
 ldouble: 1
 
 Function: "atan2_upward":
 double: 1
 float: 2
+float128: 2
 idouble: 1
 ifloat: 2
+ifloat128: 2
 ildouble: 1
 ldouble: 1
 
 Function: "atan_downward":
 double: 1
 float: 2
+float128: 2
 idouble: 1
 ifloat: 2
+ifloat128: 2
 ildouble: 1
 ldouble: 1
 
 Function: "atan_towardzero":
 double: 1
 float: 1
+float128: 1
 idouble: 1
 ifloat: 1
+ifloat128: 1
 ildouble: 1
 ldouble: 1
 
 Function: "atan_upward":
 double: 1
 float: 2
+float128: 2
 idouble: 1
 ifloat: 2
+ifloat128: 2
 ildouble: 1
 ldouble: 1
 
 Function: "atanh":
 double: 2
 float: 2
+float128: 3
 idouble: 2
 ifloat: 2
+ifloat128: 3
 ildouble: 3
 ldouble: 3
 
 Function: "atanh_downward":
 double: 3
 float: 3
+float128: 4
 idouble: 3
 ifloat: 3
+ifloat128: 4
 ildouble: 5
 ldouble: 5
 
 Function: "atanh_towardzero":
 double: 2
 float: 2
+float128: 2
 idouble: 2
 ifloat: 2
+ifloat128: 2
 ildouble: 4
 ldouble: 4
 
 Function: "atanh_upward":
 double: 3
 float: 3
+float128: 4
 idouble: 3
 ifloat: 3
+ifloat128: 4
 ildouble: 5
 ldouble: 5
 
 Function: "cabs":
 double: 1
+float128: 1
 idouble: 1
+ifloat128: 1
 ildouble: 1
 ldouble: 1
 
 Function: "cabs_downward":
 double: 1
+float128: 1
 idouble: 1
+ifloat128: 1
 ildouble: 1
 ldouble: 1
 
 Function: "cabs_towardzero":
 double: 1
+float128: 1
 idouble: 1
+ifloat128: 1
 ildouble: 1
 ldouble: 1
 
 Function: "cabs_upward":
 double: 1
+float128: 1
 idouble: 1
+ifloat128: 1
 ildouble: 1
 ldouble: 1
 
 Function: Real part of "cacos":
 double: 1
 float: 2
+float128: 2
 idouble: 1
 ifloat: 2
+ifloat128: 2
 ildouble: 1
 ldouble: 1
 
 Function: Imaginary part of "cacos":
-double: 1
+double: 2
 float: 2
-idouble: 1
+float128: 2
+idouble: 2
 ifloat: 2
+ifloat128: 2
 ildouble: 2
 ldouble: 2
 
 Function: Real part of "cacos_downward":
-double: 2
+double: 3
 float: 2
-idouble: 2
+float128: 3
+idouble: 3
 ifloat: 2
+ifloat128: 3
 ildouble: 2
 ldouble: 2
 
 Function: Imaginary part of "cacos_downward":
 double: 5
 float: 3
+float128: 6
 idouble: 5
 ifloat: 3
-ildouble: 5
-ldouble: 5
+ifloat128: 6
+ildouble: 6
+ldouble: 6
 
 Function: Real part of "cacos_towardzero":
-double: 2
+double: 3
 float: 2
-idouble: 2
+float128: 3
+idouble: 3
 ifloat: 2
+ifloat128: 3
 ildouble: 2
 ldouble: 2
 
 Function: Imaginary part of "cacos_towardzero":
 double: 5
 float: 3
+float128: 5
 idouble: 5
 ifloat: 3
+ifloat128: 5
 ildouble: 5
 ldouble: 5
 
 Function: Real part of "cacos_upward":
 double: 2
 float: 2
+float128: 3
 idouble: 2
 ifloat: 2
+ifloat128: 3
 ildouble: 2
 ldouble: 2
 
 Function: Imaginary part of "cacos_upward":
-double: 4
-float: 4
-idouble: 4
-ifloat: 4
-ildouble: 5
-ldouble: 5
+double: 5
+float: 7
+float128: 7
+idouble: 5
+ifloat: 7
+ifloat128: 7
+ildouble: 7
+ldouble: 7
 
 Function: Real part of "cacosh":
-double: 1
+double: 2
 float: 2
-idouble: 1
+float128: 2
+idouble: 2
 ifloat: 2
+ifloat128: 2
 ildouble: 2
 ldouble: 2
 
 Function: Imaginary part of "cacosh":
 double: 1
 float: 2
+float128: 2
 idouble: 1
 ifloat: 2
+ifloat128: 2
 ildouble: 1
 ldouble: 1
 
 Function: Real part of "cacosh_downward":
 double: 5
 float: 3
+float128: 5
 idouble: 5
 ifloat: 3
+ifloat128: 5
 ildouble: 5
 ldouble: 5
 
 Function: Imaginary part of "cacosh_downward":
-double: 2
-float: 2
-idouble: 2
-ifloat: 2
-ildouble: 2
-ldouble: 2
+double: 3
+float: 3
+float128: 4
+idouble: 3
+ifloat: 3
+ifloat128: 4
+ildouble: 3
+ldouble: 3
 
 Function: Real part of "cacosh_towardzero":
 double: 5
 float: 3
+float128: 5
 idouble: 5
 ifloat: 3
+ifloat128: 5
 ildouble: 5
 ldouble: 5
 
 Function: Imaginary part of "cacosh_towardzero":
-double: 2
+double: 3
 float: 2
-idouble: 2
+float128: 3
+idouble: 3
 ifloat: 2
+ifloat128: 3
 ildouble: 2
 ldouble: 2
 
 Function: Real part of "cacosh_upward":
 double: 4
 float: 4
+float128: 6
 idouble: 4
 ifloat: 4
+ifloat128: 6
 ildouble: 5
 ldouble: 5
 
 Function: Imaginary part of "cacosh_upward":
-double: 2
+double: 3
 float: 2
-idouble: 2
+float128: 4
+idouble: 3
 ifloat: 2
-ildouble: 2
-ldouble: 2
+ifloat128: 4
+ildouble: 3
+ldouble: 3
 
 Function: "carg":
 float: 1
+float128: 2
 ifloat: 1
+ifloat128: 2
 ildouble: 1
 ldouble: 1
 
 Function: "carg_downward":
 double: 1
 float: 2
+float128: 2
 idouble: 1
 ifloat: 2
+ifloat128: 2
 ildouble: 1
 ldouble: 1
 
 Function: "carg_towardzero":
 double: 1
 float: 2
+float128: 3
 idouble: 1
 ifloat: 2
+ifloat128: 3
 ildouble: 1
 ldouble: 1
 
 Function: "carg_upward":
 double: 1
 float: 2
+float128: 2
 idouble: 1
 ifloat: 2
+ifloat128: 2
 ildouble: 1
 ldouble: 1
 
 Function: Real part of "casin":
 double: 1
 float: 1
+float128: 2
 idouble: 1
 ifloat: 1
+ifloat128: 2
 ildouble: 1
 ldouble: 1
 
 Function: Imaginary part of "casin":
-double: 1
+double: 2
 float: 2
-idouble: 1
+float128: 2
+idouble: 2
 ifloat: 2
+ifloat128: 2
 ildouble: 2
 ldouble: 2
 
 Function: Real part of "casin_downward":
 double: 3
-float: 1
+float: 2
+float128: 3
 idouble: 3
-ifloat: 1
-ildouble: 2
-ldouble: 2
+ifloat: 2
+ifloat128: 3
+ildouble: 3
+ldouble: 3
 
 Function: Imaginary part of "casin_downward":
 double: 5
 float: 3
+float128: 6
 idouble: 5
 ifloat: 3
-ildouble: 5
-ldouble: 5
+ifloat128: 6
+ildouble: 6
+ldouble: 6
 
 Function: Real part of "casin_towardzero":
 double: 3
 float: 1
+float128: 3
 idouble: 3
 ifloat: 1
-ildouble: 2
-ldouble: 2
+ifloat128: 3
+ildouble: 3
+ldouble: 3
 
 Function: Imaginary part of "casin_towardzero":
 double: 5
 float: 3
+float128: 5
 idouble: 5
 ifloat: 3
+ifloat128: 5
 ildouble: 5
 ldouble: 5
 
 Function: Real part of "casin_upward":
-double: 2
-float: 1
-idouble: 2
-ifloat: 1
+double: 3
+float: 2
+float128: 3
+idouble: 3
+ifloat: 2
+ifloat128: 3
 ildouble: 2
 ldouble: 2
 
 Function: Imaginary part of "casin_upward":
-double: 4
-float: 4
-idouble: 4
-ifloat: 4
-ildouble: 5
-ldouble: 5
+double: 5
+float: 7
+float128: 7
+idouble: 5
+ifloat: 7
+ifloat128: 7
+ildouble: 7
+ldouble: 7
 
 Function: Real part of "casinh":
-double: 1
+double: 2
 float: 2
-idouble: 1
+float128: 2
+idouble: 2
 ifloat: 2
+ifloat128: 2
 ildouble: 2
 ldouble: 2
 
 Function: Imaginary part of "casinh":
 double: 1
 float: 1
+float128: 2
 idouble: 1
 ifloat: 1
+ifloat128: 2
 ildouble: 1
 ldouble: 1
 
 Function: Real part of "casinh_downward":
 double: 5
 float: 3
+float128: 6
 idouble: 5
 ifloat: 3
-ildouble: 5
-ldouble: 5
+ifloat128: 6
+ildouble: 6
+ldouble: 6
 
 Function: Imaginary part of "casinh_downward":
 double: 3
-float: 1
+float: 2
+float128: 3
 idouble: 3
-ifloat: 1
-ildouble: 2
-ldouble: 2
+ifloat: 2
+ifloat128: 3
+ildouble: 3
+ldouble: 3
 
 Function: Real part of "casinh_towardzero":
 double: 5
 float: 3
+float128: 5
 idouble: 5
 ifloat: 3
+ifloat128: 5
 ildouble: 5
 ldouble: 5
 
 Function: Imaginary part of "casinh_towardzero":
 double: 3
 float: 1
+float128: 3
 idouble: 3
 ifloat: 1
-ildouble: 2
-ldouble: 2
+ifloat128: 3
+ildouble: 3
+ldouble: 3
 
 Function: Real part of "casinh_upward":
-double: 4
-float: 4
-idouble: 4
-ifloat: 4
-ildouble: 5
-ldouble: 5
+double: 5
+float: 7
+float128: 7
+idouble: 5
+ifloat: 7
+ifloat128: 7
+ildouble: 7
+ldouble: 7
 
 Function: Imaginary part of "casinh_upward":
-double: 2
+double: 3
 float: 2
-idouble: 2
+float128: 3
+idouble: 3
 ifloat: 2
+ifloat128: 3
 ildouble: 2
 ldouble: 2
 
 Function: Real part of "catan":
+double: 1
 float: 1
+float128: 1
+idouble: 1
 ifloat: 1
+ifloat128: 1
+ildouble: 1
+ldouble: 1
 
 Function: Imaginary part of "catan":
 double: 1
 float: 1
+float128: 1
 idouble: 1
 ifloat: 1
+ifloat128: 1
 ildouble: 1
 ldouble: 1
 
 Function: Real part of "catan_downward":
 double: 1
-float: 1
+float: 2
+float128: 2
 idouble: 1
-ifloat: 1
+ifloat: 2
+ifloat128: 2
 ildouble: 1
 ldouble: 1
 
 Function: Imaginary part of "catan_downward":
 double: 2
 float: 2
+float128: 2
 idouble: 2
 ifloat: 2
+ifloat128: 2
 ildouble: 4
 ldouble: 4
 
 Function: Real part of "catan_towardzero":
 double: 1
-float: 1
+float: 2
+float128: 2
 idouble: 1
-ifloat: 1
+ifloat: 2
+ifloat128: 2
 ildouble: 1
 ldouble: 1
 
 Function: Imaginary part of "catan_towardzero":
 double: 2
-float: 1
+float: 2
+float128: 2
 idouble: 2
-ifloat: 1
+ifloat: 2
+ifloat128: 2
 ildouble: 4
 ldouble: 4
 
 Function: Real part of "catan_upward":
+double: 1
 float: 1
+float128: 2
+idouble: 1
 ifloat: 1
+ifloat128: 2
 ildouble: 1
 ldouble: 1
 
 Function: Imaginary part of "catan_upward":
 double: 3
 float: 3
+float128: 3
 idouble: 3
 ifloat: 3
+ifloat128: 3
 ildouble: 3
 ldouble: 3
 
 Function: Real part of "catanh":
 double: 1
 float: 1
+float128: 1
 idouble: 1
 ifloat: 1
+ifloat128: 1
 ildouble: 1
 ldouble: 1
 
 Function: Imaginary part of "catanh":
+double: 1
 float: 1
+float128: 1
+idouble: 1
 ifloat: 1
+ifloat128: 1
+ildouble: 1
+ldouble: 1
 
 Function: Real part of "catanh_downward":
 double: 2
 float: 2
+float128: 2
 idouble: 2
 ifloat: 2
+ifloat128: 2
 ildouble: 4
 ldouble: 4
 
 Function: Imaginary part of "catanh_downward":
 double: 1
 float: 2
+float128: 2
 idouble: 1
 ifloat: 2
+ifloat128: 2
 ildouble: 1
 ldouble: 1
 
 Function: Real part of "catanh_towardzero":
 double: 2
-float: 1
+float: 2
+float128: 2
 idouble: 2
-ifloat: 1
+ifloat: 2
+ifloat128: 2
 ildouble: 4
 ldouble: 4
 
 Function: Imaginary part of "catanh_towardzero":
 double: 1
 float: 2
+float128: 2
 idouble: 1
 ifloat: 2
+ifloat128: 2
 ildouble: 1
 ldouble: 1
 
 Function: Real part of "catanh_upward":
 double: 4
-float: 3
+float: 4
+float128: 4
 idouble: 4
-ifloat: 3
+ifloat: 4
+ifloat128: 4
 ildouble: 4
 ldouble: 4
 
 Function: Imaginary part of "catanh_upward":
+double: 1
 float: 1
+float128: 2
+idouble: 1
 ifloat: 1
+ifloat128: 2
 ildouble: 1
 ldouble: 1
 
 Function: "cbrt":
 double: 3
 float: 1
+float128: 1
 idouble: 3
 ifloat: 1
+ifloat128: 1
 ildouble: 1
 ldouble: 1
 
 Function: "cbrt_downward":
 double: 4
 float: 1
+float128: 1
 idouble: 4
 ifloat: 1
+ifloat128: 1
 ildouble: 1
 ldouble: 1
 
 Function: "cbrt_towardzero":
 double: 3
 float: 1
+float128: 1
 idouble: 3
 ifloat: 1
+ifloat128: 1
 ildouble: 1
 ldouble: 1
 
 Function: "cbrt_upward":
 double: 5
 float: 1
+float128: 1
 idouble: 5
 ifloat: 1
+ifloat128: 1
 ildouble: 1
 ldouble: 1
 
 Function: Real part of "ccos":
 double: 1
 float: 1
+float128: 1
 idouble: 1
 ifloat: 1
+ifloat128: 1
 ildouble: 1
 ldouble: 1
 
 Function: Imaginary part of "ccos":
 double: 1
 float: 1
+float128: 1
 idouble: 1
 ifloat: 1
+ifloat128: 1
 ildouble: 1
 ldouble: 1
 
 Function: Real part of "ccos_downward":
 double: 1
 float: 1
+float128: 2
 idouble: 1
 ifloat: 1
+ifloat128: 2
 ildouble: 3
 ldouble: 3
 
 Function: Imaginary part of "ccos_downward":
 double: 2
 float: 3
+float128: 2
 idouble: 2
 ifloat: 3
+ifloat128: 2
 ildouble: 3
 ldouble: 3
 
 Function: Real part of "ccos_towardzero":
 double: 1
 float: 2
+float128: 2
 idouble: 1
 ifloat: 2
+ifloat128: 2
 ildouble: 3
 ldouble: 3
 
 Function: Imaginary part of "ccos_towardzero":
 double: 2
 float: 3
+float128: 2
 idouble: 2
 ifloat: 3
+ifloat128: 2
 ildouble: 3
 ldouble: 3
 
 Function: Real part of "ccos_upward":
 double: 1
 float: 2
+float128: 3
 idouble: 1
 ifloat: 2
+ifloat128: 3
 ildouble: 2
 ldouble: 2
 
 Function: Imaginary part of "ccos_upward":
 double: 2
 float: 2
+float128: 2
 idouble: 2
 ifloat: 2
+ifloat128: 2
 ildouble: 2
 ldouble: 2
 
 Function: Real part of "ccosh":
 double: 1
 float: 1
+float128: 1
 idouble: 1
 ifloat: 1
+ifloat128: 1
 ildouble: 1
 ldouble: 1
 
 Function: Imaginary part of "ccosh":
 double: 1
 float: 1
+float128: 1
 idouble: 1
 ifloat: 1
+ifloat128: 1
 ildouble: 1
 ldouble: 1
 
 Function: Real part of "ccosh_downward":
 double: 1
 float: 2
+float128: 2
 idouble: 1
 ifloat: 2
+ifloat128: 2
 ildouble: 3
 ldouble: 3
 
 Function: Imaginary part of "ccosh_downward":
 double: 2
 float: 3
+float128: 2
 idouble: 2
 ifloat: 3
+ifloat128: 2
 ildouble: 3
 ldouble: 3
 
 Function: Real part of "ccosh_towardzero":
 double: 1
 float: 3
+float128: 2
 idouble: 1
 ifloat: 3
+ifloat128: 2
 ildouble: 3
 ldouble: 3
 
 Function: Imaginary part of "ccosh_towardzero":
 double: 2
 float: 3
+float128: 2
 idouble: 2
 ifloat: 3
+ifloat128: 2
 ildouble: 3
 ldouble: 3
 
 Function: Real part of "ccosh_upward":
 double: 1
 float: 2
+float128: 3
 idouble: 1
 ifloat: 2
+ifloat128: 3
 ildouble: 2
 ldouble: 2
 
 Function: Imaginary part of "ccosh_upward":
 double: 2
 float: 2
+float128: 2
 idouble: 2
 ifloat: 2
+ifloat128: 2
 ildouble: 2
 ldouble: 2
 
 Function: Real part of "cexp":
 double: 2
 float: 1
+float128: 1
 idouble: 2
 ifloat: 1
+ifloat128: 1
 ildouble: 1
 ldouble: 1
 
 Function: Imaginary part of "cexp":
 double: 1
 float: 2
+float128: 1
 idouble: 1
 ifloat: 2
+ifloat128: 1
 ildouble: 1
 ldouble: 1
 
 Function: Real part of "cexp_downward":
 double: 1
 float: 2
+float128: 2
 idouble: 1
 ifloat: 2
+ifloat128: 2
 ildouble: 3
 ldouble: 3
 
 Function: Imaginary part of "cexp_downward":
 double: 1
 float: 3
+float128: 2
 idouble: 1
 ifloat: 3
+ifloat128: 2
 ildouble: 3
 ldouble: 3
 
 Function: Real part of "cexp_towardzero":
 double: 1
 float: 2
+float128: 2
 idouble: 1
 ifloat: 2
+ifloat128: 2
 ildouble: 3
 ldouble: 3
 
 Function: Imaginary part of "cexp_towardzero":
 double: 1
 float: 3
+float128: 2
 idouble: 1
 ifloat: 3
+ifloat128: 2
 ildouble: 3
 ldouble: 3
 
 Function: Real part of "cexp_upward":
 double: 1
 float: 2
+float128: 3
 idouble: 1
 ifloat: 2
+ifloat128: 3
 ildouble: 2
 ldouble: 2
 
 Function: Imaginary part of "cexp_upward":
 double: 1
 float: 2
+float128: 3
 idouble: 1
 ifloat: 2
+ifloat128: 3
 ildouble: 3
 ldouble: 3
 
 Function: Real part of "clog":
 double: 3
 float: 3
+float128: 2
 idouble: 3
 ifloat: 3
+ifloat128: 2
 ildouble: 3
 ldouble: 3
 
 Function: Imaginary part of "clog":
 float: 1
+float128: 1
 ifloat: 1
+ifloat128: 1
 ildouble: 1
 ldouble: 1
 
 Function: Real part of "clog10":
 double: 3
 float: 4
+float128: 2
 idouble: 3
 ifloat: 4
+ifloat128: 2
 ildouble: 4
 ldouble: 4
 
 Function: Imaginary part of "clog10":
 double: 2
 float: 2
+float128: 2
 idouble: 2
 ifloat: 2
+ifloat128: 2
 ildouble: 2
 ldouble: 2
 
 Function: Real part of "clog10_downward":
 double: 5
-float: 4
+float: 5
+float128: 3
 idouble: 5
-ifloat: 4
+ifloat: 5
+ifloat128: 3
 ildouble: 8
 ldouble: 8
 
 Function: Imaginary part of "clog10_downward":
 double: 2
 float: 4
+float128: 3
 idouble: 2
 ifloat: 4
+ifloat128: 3
 ildouble: 3
 ldouble: 3
 
 Function: Real part of "clog10_towardzero":
 double: 5
 float: 5
+float128: 4
 idouble: 5
 ifloat: 5
+ifloat128: 4
 ildouble: 8
 ldouble: 8
 
 Function: Imaginary part of "clog10_towardzero":
 double: 2
 float: 4
+float128: 3
 idouble: 2
 ifloat: 4
+ifloat128: 3
 ildouble: 3
 ldouble: 3
 
 Function: Real part of "clog10_upward":
 double: 6
 float: 5
+float128: 4
 idouble: 6
 ifloat: 5
+ifloat128: 4
 ildouble: 8
 ldouble: 8
 
 Function: Imaginary part of "clog10_upward":
 double: 2
 float: 4
+float128: 3
 idouble: 2
 ifloat: 4
+ifloat128: 3
 ildouble: 3
 ldouble: 3
 
 Function: Real part of "clog_downward":
 double: 4
 float: 3
+float128: 3
 idouble: 4
 ifloat: 3
+ifloat128: 3
 ildouble: 5
 ldouble: 5
 
 Function: Imaginary part of "clog_downward":
 double: 1
 float: 2
+float128: 2
 idouble: 1
 ifloat: 2
+ifloat128: 2
 ildouble: 1
 ldouble: 1
 
 Function: Real part of "clog_towardzero":
 double: 4
 float: 4
+float128: 3
 idouble: 4
 ifloat: 4
+ifloat128: 3
 ildouble: 5
 ldouble: 5
 
 Function: Imaginary part of "clog_towardzero":
 double: 1
 float: 3
+float128: 2
 idouble: 1
 ifloat: 3
+ifloat128: 2
 ildouble: 1
 ldouble: 1
 
 Function: Real part of "clog_upward":
 double: 4
 float: 3
+float128: 4
 idouble: 4
 ifloat: 3
+ifloat128: 4
 ildouble: 4
 ldouble: 4
 
 Function: Imaginary part of "clog_upward":
 double: 1
 float: 2
+float128: 2
 idouble: 1
 ifloat: 2
+ifloat128: 2
 ildouble: 1
 ldouble: 1
 
 Function: "cos":
+double: 1
+float128: 1
+idouble: 1
+ifloat128: 1
 ildouble: 1
 ldouble: 1
 
 Function: "cos_downward":
 double: 1
+float128: 3
 idouble: 1
+ifloat128: 3
 ildouble: 3
 ldouble: 3
 
 Function: "cos_towardzero":
 double: 1
+float128: 1
 idouble: 1
+ifloat128: 1
 ildouble: 2
 ldouble: 2
 
 Function: "cos_upward":
 double: 1
+float128: 2
 idouble: 1
+ifloat128: 2
 ildouble: 2
 ldouble: 2
 
@@ -1029,7 +1307,7 @@ Function: "cos_vlen4_avx2":
 double: 2
 
 Function: "cos_vlen8":
-double: 1
+double: 2
 float: 1
 
 Function: "cos_vlen8_avx2":
@@ -1038,546 +1316,690 @@ float: 1
 Function: "cosh":
 double: 1
 float: 1
+float128: 1
 idouble: 1
 ifloat: 1
+ifloat128: 1
 ildouble: 2
 ldouble: 2
 
 Function: "cosh_downward":
 double: 1
 float: 1
+float128: 2
 idouble: 1
 ifloat: 1
+ifloat128: 1
 ildouble: 2
 ldouble: 3
 
 Function: "cosh_towardzero":
 double: 1
 float: 1
+float128: 2
 idouble: 1
 ifloat: 1
+ifloat128: 1
 ildouble: 2
 ldouble: 2
 
 Function: "cosh_upward":
 double: 1
 float: 2
+float128: 3
 idouble: 1
 ifloat: 2
+ifloat128: 1
 ildouble: 2
 ldouble: 3
 
 Function: Real part of "cpow":
 double: 2
 float: 5
+float128: 4
 idouble: 2
 ifloat: 5
+ifloat128: 4
 ildouble: 3
 ldouble: 3
 
 Function: Imaginary part of "cpow":
 float: 2
+float128: 1
 ifloat: 2
+ifloat128: 1
 ildouble: 4
 ldouble: 4
 
 Function: Real part of "cpow_downward":
 double: 4
 float: 8
+float128: 6
 idouble: 4
 ifloat: 8
+ifloat128: 6
 ildouble: 7
 ldouble: 7
 
 Function: Imaginary part of "cpow_downward":
 double: 1
 float: 2
+float128: 2
 idouble: 1
 ifloat: 2
+ifloat128: 2
 ildouble: 2
 ldouble: 2
 
 Function: Real part of "cpow_towardzero":
 double: 4
 float: 8
+float128: 6
 idouble: 4
 ifloat: 8
+ifloat128: 6
 ildouble: 7
 ldouble: 7
 
 Function: Imaginary part of "cpow_towardzero":
 double: 1
 float: 2
+float128: 2
 idouble: 1
 ifloat: 2
+ifloat128: 2
 ildouble: 1
 ldouble: 1
 
 Function: Real part of "cpow_upward":
 double: 4
 float: 1
+float128: 3
 idouble: 4
 ifloat: 1
+ifloat128: 3
 ildouble: 2
 ldouble: 2
 
 Function: Imaginary part of "cpow_upward":
 double: 1
 float: 2
+float128: 2
 idouble: 1
 ifloat: 2
+ifloat128: 2
 ildouble: 2
 ldouble: 2
 
 Function: Real part of "csin":
 double: 1
 float: 1
+float128: 1
 idouble: 1
 ifloat: 1
+ifloat128: 1
 ildouble: 1
 ldouble: 1
 
+Function: Imaginary part of "csin":
+float128: 1
+ifloat128: 1
+
 Function: Real part of "csin_downward":
 double: 2
 float: 3
+float128: 2
 idouble: 2
 ifloat: 3
+ifloat128: 2
 ildouble: 3
 ldouble: 3
 
 Function: Imaginary part of "csin_downward":
 double: 1
 float: 2
+float128: 2
 idouble: 1
 ifloat: 2
+ifloat128: 2
 ildouble: 3
 ldouble: 3
 
 Function: Real part of "csin_towardzero":
 double: 2
 float: 3
+float128: 2
 idouble: 2
 ifloat: 3
+ifloat128: 2
 ildouble: 3
 ldouble: 3
 
 Function: Imaginary part of "csin_towardzero":
 double: 2
 float: 2
+float128: 2
 idouble: 2
 ifloat: 2
+ifloat128: 2
 ildouble: 3
 ldouble: 3
 
 Function: Real part of "csin_upward":
 double: 2
 float: 3
+float128: 2
 idouble: 2
 ifloat: 3
+ifloat128: 2
 ildouble: 3
 ldouble: 3
 
 Function: Imaginary part of "csin_upward":
 double: 1
 float: 3
+float128: 3
 idouble: 1
 ifloat: 3
+ifloat128: 3
 ildouble: 3
 ldouble: 3
 
 Function: Real part of "csinh":
 float: 1
+float128: 1
 ifloat: 1
+ifloat128: 1
 ildouble: 1
 ldouble: 1
 
 Function: Imaginary part of "csinh":
 double: 1
 float: 1
+float128: 1
 idouble: 1
 ifloat: 1
+ifloat128: 1
 ildouble: 1
 ldouble: 1
 
 Function: Real part of "csinh_downward":
 double: 2
 float: 2
+float128: 2
 idouble: 2
 ifloat: 2
+ifloat128: 2
 ildouble: 3
 ldouble: 3
 
 Function: Imaginary part of "csinh_downward":
 double: 2
 float: 3
+float128: 2
 idouble: 2
 ifloat: 3
+ifloat128: 2
 ildouble: 3
 ldouble: 3
 
 Function: Real part of "csinh_towardzero":
 double: 2
 float: 2
+float128: 2
 idouble: 2
 ifloat: 2
+ifloat128: 2
 ildouble: 3
 ldouble: 3
 
 Function: Imaginary part of "csinh_towardzero":
 double: 2
 float: 3
+float128: 2
 idouble: 2
 ifloat: 3
+ifloat128: 2
 ildouble: 3
 ldouble: 3
 
 Function: Real part of "csinh_upward":
 double: 1
 float: 3
+float128: 3
 idouble: 1
 ifloat: 3
+ifloat128: 3
 ildouble: 3
 ldouble: 3
 
 Function: Imaginary part of "csinh_upward":
 double: 2
 float: 3
+float128: 2
 idouble: 2
 ifloat: 3
+ifloat128: 2
 ildouble: 3
 ldouble: 3
 
 Function: Real part of "csqrt":
 double: 2
 float: 2
+float128: 2
 idouble: 2
 ifloat: 2
+ifloat128: 2
 ildouble: 2
 ldouble: 2
 
 Function: Imaginary part of "csqrt":
 double: 2
 float: 2
+float128: 2
 idouble: 2
 ifloat: 2
+ifloat128: 2
 ildouble: 2
 ldouble: 2
 
 Function: Real part of "csqrt_downward":
 double: 5
 float: 4
+float128: 4
 idouble: 5
 ifloat: 4
+ifloat128: 4
 ildouble: 5
 ldouble: 5
 
 Function: Imaginary part of "csqrt_downward":
 double: 4
 float: 3
+float128: 3
 idouble: 4
 ifloat: 3
+ifloat128: 3
 ildouble: 4
 ldouble: 4
 
 Function: Real part of "csqrt_towardzero":
 double: 4
 float: 3
+float128: 3
 idouble: 4
 ifloat: 3
+ifloat128: 3
 ildouble: 4
 ldouble: 4
 
 Function: Imaginary part of "csqrt_towardzero":
 double: 4
 float: 3
+float128: 3
 idouble: 4
 ifloat: 3
+ifloat128: 3
 ildouble: 4
 ldouble: 4
 
 Function: Real part of "csqrt_upward":
 double: 5
 float: 4
+float128: 4
 idouble: 5
 ifloat: 4
+ifloat128: 4
 ildouble: 5
 ldouble: 5
 
 Function: Imaginary part of "csqrt_upward":
 double: 3
 float: 3
+float128: 3
 idouble: 3
 ifloat: 3
+ifloat128: 3
 ildouble: 4
 ldouble: 4
 
 Function: Real part of "ctan":
 double: 1
 float: 1
+float128: 3
 idouble: 1
 ifloat: 1
+ifloat128: 3
 ildouble: 2
 ldouble: 2
 
 Function: Imaginary part of "ctan":
 double: 2
 float: 2
+float128: 3
 idouble: 2
 ifloat: 2
+ifloat128: 3
 ildouble: 1
 ldouble: 1
 
 Function: Real part of "ctan_downward":
 double: 6
 float: 5
+float128: 4
 idouble: 6
 ifloat: 5
+ifloat128: 4
 ildouble: 5
 ldouble: 5
 
 Function: Imaginary part of "ctan_downward":
 double: 2
 float: 2
+float128: 5
 idouble: 2
 ifloat: 2
+ifloat128: 5
 ildouble: 4
 ldouble: 4
 
 Function: Real part of "ctan_towardzero":
 double: 5
 float: 3
+float128: 4
 idouble: 5
 ifloat: 3
+ifloat128: 4
 ildouble: 5
 ldouble: 5
 
 Function: Imaginary part of "ctan_towardzero":
 double: 2
 float: 2
+float128: 5
 idouble: 2
 ifloat: 2
+ifloat128: 5
 ildouble: 4
 ldouble: 4
 
 Function: Real part of "ctan_upward":
 double: 2
 float: 4
+float128: 5
 idouble: 2
 ifloat: 4
+ifloat128: 5
 ildouble: 3
 ldouble: 3
 
 Function: Imaginary part of "ctan_upward":
 double: 2
-float: 1
+float: 2
+float128: 5
 idouble: 2
-ifloat: 1
+ifloat: 2
+ifloat128: 5
 ildouble: 3
 ldouble: 3
 
 Function: Real part of "ctanh":
 double: 2
 float: 2
+float128: 3
 idouble: 2
 ifloat: 2
+ifloat128: 3
 ildouble: 1
 ldouble: 1
 
 Function: Imaginary part of "ctanh":
 double: 2
 float: 2
+float128: 3
 idouble: 2
 ifloat: 2
+ifloat128: 3
 ildouble: 2
 ldouble: 2
 
 Function: Real part of "ctanh_downward":
 double: 4
 float: 2
+float128: 5
 idouble: 4
 ifloat: 2
+ifloat128: 5
 ildouble: 4
 ldouble: 4
 
 Function: Imaginary part of "ctanh_downward":
 double: 6
 float: 5
+float128: 4
 idouble: 6
 ifloat: 5
+ifloat128: 4
 ildouble: 4
 ldouble: 4
 
 Function: Real part of "ctanh_towardzero":
 double: 2
 float: 2
+float128: 5
 idouble: 2
 ifloat: 2
+ifloat128: 5
 ildouble: 4
 ldouble: 4
 
 Function: Imaginary part of "ctanh_towardzero":
 double: 5
 float: 3
+float128: 3
 idouble: 5
 ifloat: 3
+ifloat128: 3
 ildouble: 3
 ldouble: 3
 
 Function: Real part of "ctanh_upward":
 double: 2
 float: 2
+float128: 5
 idouble: 2
 ifloat: 2
+ifloat128: 5
 ildouble: 3
 ldouble: 3
 
 Function: Imaginary part of "ctanh_upward":
 double: 2
 float: 3
+float128: 5
 idouble: 2
 ifloat: 3
+ifloat128: 5
 ildouble: 3
 ldouble: 3
 
 Function: "erf":
 double: 1
 float: 1
+float128: 1
 idouble: 1
 ifloat: 1
+ifloat128: 1
 ildouble: 1
 ldouble: 1
 
 Function: "erf_downward":
 double: 1
 float: 1
+float128: 2
 idouble: 1
 ifloat: 1
+ifloat128: 2
 ildouble: 1
 ldouble: 1
 
 Function: "erf_towardzero":
 double: 1
 float: 1
+float128: 1
 idouble: 1
 ifloat: 1
+ifloat128: 1
 ildouble: 1
 ldouble: 1
 
 Function: "erf_upward":
 double: 1
 float: 1
+float128: 2
 idouble: 1
 ifloat: 1
+ifloat128: 2
 ildouble: 1
 ldouble: 1
 
 Function: "erfc":
 double: 3
 float: 2
+float128: 2
 idouble: 3
 ifloat: 2
+ifloat128: 2
 ildouble: 3
 ldouble: 3
 
 Function: "erfc_downward":
 double: 5
 float: 6
+float128: 5
 idouble: 5
 ifloat: 6
+ifloat128: 5
 ildouble: 4
 ldouble: 4
 
 Function: "erfc_towardzero":
 double: 3
 float: 4
+float128: 4
 idouble: 3
 ifloat: 4
+ifloat128: 4
 ildouble: 4
 ldouble: 4
 
 Function: "erfc_upward":
 double: 5
 float: 6
+float128: 5
 idouble: 5
 ifloat: 6
+ifloat128: 5
 ildouble: 5
 ldouble: 5
 
 Function: "exp":
+float128: 1
+ifloat128: 1
 ildouble: 1
 ldouble: 1
 
 Function: "exp10":
 double: 2
+float128: 2
 idouble: 2
+ifloat128: 2
 ildouble: 1
 ldouble: 1
 
 Function: "exp10_downward":
 double: 2
 float: 1
+float128: 3
 idouble: 2
 ifloat: 1
+ifloat128: 3
 ildouble: 2
 ldouble: 2
 
 Function: "exp10_towardzero":
 double: 2
 float: 1
+float128: 3
 idouble: 2
 ifloat: 1
+ifloat128: 3
 ildouble: 2
 ldouble: 2
 
 Function: "exp10_upward":
 double: 2
 float: 1
+float128: 3
 idouble: 2
 ifloat: 1
+ifloat128: 3
 ildouble: 2
 ldouble: 2
 
 Function: "exp2":
 double: 1
 float: 1
+float128: 1
 idouble: 1
 ifloat: 1
+ifloat128: 1
 ildouble: 1
 ldouble: 1
 
 Function: "exp2_downward":
 double: 1
 float: 1
+float128: 1
 idouble: 1
 ifloat: 1
+ifloat128: 1
 ildouble: 1
 ldouble: 1
 
 Function: "exp2_towardzero":
 double: 1
 float: 1
+float128: 1
 idouble: 1
 ifloat: 1
+ifloat128: 1
 ildouble: 1
 ldouble: 1
 
 Function: "exp2_upward":
 double: 1
 float: 1
+float128: 2
 idouble: 1
 ifloat: 1
+ifloat128: 2
 ildouble: 1
 ldouble: 1
 
 Function: "exp_downward":
 double: 1
+float: 1
 idouble: 1
+ifloat: 1
 ildouble: 1
 ldouble: 1
 
 Function: "exp_towardzero":
 double: 1
+float: 1
 idouble: 1
+ifloat: 1
 ildouble: 2
 ldouble: 2
 
@@ -1612,32 +2034,40 @@ float: 1
 Function: "expm1":
 double: 1
 float: 1
+float128: 1
 idouble: 1
 ifloat: 1
+ifloat128: 1
 ildouble: 2
 ldouble: 2
 
 Function: "expm1_downward":
 double: 1
 float: 1
+float128: 2
 idouble: 1
 ifloat: 1
+ifloat128: 2
 ildouble: 4
 ldouble: 4
 
 Function: "expm1_towardzero":
 double: 1
 float: 2
+float128: 4
 idouble: 1
 ifloat: 2
+ifloat128: 4
 ildouble: 4
 ldouble: 4
 
 Function: "expm1_upward":
 double: 1
 float: 1
+float128: 3
 idouble: 1
 ifloat: 1
+ifloat128: 3
 ildouble: 4
 ldouble: 4
 
@@ -1675,275 +2105,347 @@ ldouble: 6
 
 Function: "hypot":
 double: 1
+float128: 1
 idouble: 1
+ifloat128: 1
 ildouble: 1
 ldouble: 1
 
 Function: "hypot_downward":
 double: 1
+float128: 1
 idouble: 1
+ifloat128: 1
 ildouble: 1
 ldouble: 1
 
 Function: "hypot_towardzero":
 double: 1
+float128: 1
 idouble: 1
+ifloat128: 1
 ildouble: 1
 ldouble: 1
 
 Function: "hypot_upward":
 double: 1
+float128: 1
 idouble: 1
+ifloat128: 1
 ildouble: 1
 ldouble: 1
 
 Function: "j0":
 double: 2
 float: 2
+float128: 2
 idouble: 2
 ifloat: 2
+ifloat128: 2
 ildouble: 2
 ldouble: 2
 
 Function: "j0_downward":
 double: 2
 float: 4
+float128: 4
 idouble: 2
 ifloat: 4
+ifloat128: 4
 ildouble: 4
 ldouble: 4
 
 Function: "j0_towardzero":
 double: 3
 float: 2
+float128: 2
 idouble: 3
 ifloat: 2
+ifloat128: 2
 ildouble: 5
 ldouble: 5
 
 Function: "j0_upward":
 double: 3
 float: 2
+float128: 5
 idouble: 3
 ifloat: 2
+ifloat128: 5
 ildouble: 4
 ldouble: 4
 
 Function: "j1":
 double: 1
 float: 2
+float128: 4
 idouble: 1
 ifloat: 2
+ifloat128: 4
 ildouble: 1
 ldouble: 1
 
 Function: "j1_downward":
 double: 3
 float: 3
+float128: 4
 idouble: 3
 ifloat: 3
+ifloat128: 4
 ildouble: 4
 ldouble: 4
 
 Function: "j1_towardzero":
 double: 3
 float: 2
+float128: 4
 idouble: 3
 ifloat: 2
+ifloat128: 4
 ildouble: 4
 ldouble: 4
 
 Function: "j1_upward":
 double: 3
 float: 5
+float128: 3
 idouble: 3
 ifloat: 5
+ifloat128: 3
 ildouble: 3
 ldouble: 3
 
 Function: "jn":
 double: 4
 float: 4
+float128: 7
 idouble: 4
 ifloat: 4
+ifloat128: 7
 ildouble: 4
 ldouble: 4
 
 Function: "jn_downward":
 double: 5
 float: 5
+float128: 8
 idouble: 5
 ifloat: 5
+ifloat128: 8
 ildouble: 4
 ldouble: 4
 
 Function: "jn_towardzero":
 double: 5
 float: 5
+float128: 8
 idouble: 5
 ifloat: 5
+ifloat128: 8
 ildouble: 5
 ldouble: 5
 
 Function: "jn_upward":
 double: 5
 float: 5
+float128: 7
 idouble: 5
 ifloat: 5
+ifloat128: 7
 ildouble: 5
 ldouble: 5
 
 Function: "lgamma":
 double: 4
 float: 4
+float128: 5
 idouble: 4
 ifloat: 4
+ifloat128: 5
 ildouble: 4
 ldouble: 4
 
 Function: "lgamma_downward":
 double: 5
 float: 4
+float128: 8
 idouble: 5
 ifloat: 4
+ifloat128: 8
 ildouble: 7
 ldouble: 7
 
 Function: "lgamma_towardzero":
 double: 5
 float: 4
+float128: 5
 idouble: 5
 ifloat: 4
+ifloat128: 5
 ildouble: 7
 ldouble: 7
 
 Function: "lgamma_upward":
 double: 5
 float: 5
+float128: 8
 idouble: 5
 ifloat: 5
+ifloat128: 8
 ildouble: 6
 ldouble: 6
 
 Function: "log":
 float: 1
+float128: 1
 ifloat: 1
+ifloat128: 1
 ildouble: 1
 ldouble: 1
 
 Function: "log10":
 double: 2
 float: 2
+float128: 1
 idouble: 2
 ifloat: 2
+ifloat128: 1
 ildouble: 1
 ldouble: 1
 
 Function: "log10_downward":
 double: 2
 float: 3
+float128: 1
 idouble: 2
 ifloat: 3
+ifloat128: 1
 ildouble: 2
 ldouble: 2
 
 Function: "log10_towardzero":
 double: 2
 float: 2
+float128: 1
 idouble: 2
 ifloat: 2
+ifloat128: 1
 ildouble: 2
 ldouble: 2
 
 Function: "log10_upward":
 double: 2
 float: 2
+float128: 1
 idouble: 2
 ifloat: 2
+ifloat128: 1
 ildouble: 1
 ldouble: 1
 
 Function: "log1p":
 double: 1
 float: 1
+float128: 2
 idouble: 1
 ifloat: 1
+ifloat128: 2
 ildouble: 2
 ldouble: 2
 
 Function: "log1p_downward":
 double: 2
 float: 2
+float128: 3
 idouble: 2
 ifloat: 2
+ifloat128: 3
 ildouble: 4
 ldouble: 4
 
 Function: "log1p_towardzero":
 double: 2
 float: 2
+float128: 3
 idouble: 2
 ifloat: 2
+ifloat128: 3
 ildouble: 4
 ldouble: 4
 
 Function: "log1p_upward":
 double: 2
 float: 2
+float128: 2
 idouble: 2
 ifloat: 2
+ifloat128: 2
 ildouble: 3
 ldouble: 3
 
 Function: "log2":
 double: 2
 float: 1
+float128: 2
 idouble: 2
 ifloat: 1
+ifloat128: 2
 ildouble: 1
 ldouble: 1
 
 Function: "log2_downward":
 double: 3
 float: 3
+float128: 3
 idouble: 3
 ifloat: 3
+ifloat128: 3
 ildouble: 1
 ldouble: 1
 
 Function: "log2_towardzero":
 double: 2
 float: 2
+float128: 1
 idouble: 2
 ifloat: 2
+ifloat128: 1
 ildouble: 1
 ldouble: 1
 
 Function: "log2_upward":
 double: 3
 float: 3
+float128: 1
 idouble: 3
 ifloat: 3
+ifloat128: 1
 ildouble: 1
 ldouble: 1
 
 Function: "log_downward":
 float: 2
+float128: 1
 ifloat: 2
+ifloat128: 1
 ildouble: 2
 ldouble: 2
 
 Function: "log_towardzero":
 float: 2
+float128: 2
 ifloat: 2
+ifloat128: 2
 ildouble: 2
 ldouble: 2
 
 Function: "log_upward":
 double: 1
 float: 2
+float128: 1
 idouble: 1
 ifloat: 2
+ifloat128: 1
 ildouble: 1
 ldouble: 1
 
@@ -1965,67 +2467,47 @@ double: 1
 float: 3
 
 Function: "log_vlen8_avx2":
-float: 2
+float: 3
 
 Function: "pow":
+double: 1
 float: 1
+float128: 2
+idouble: 1
 ifloat: 1
+ifloat128: 2
 ildouble: 1
 ldouble: 1
 
-Function: "pow10":
-double: 2
-idouble: 2
-ildouble: 1
-ldouble: 1
-
-Function: "pow10_downward":
-double: 2
-float: 1
-idouble: 2
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
-Function: "pow10_towardzero":
-double: 2
-float: 1
-idouble: 2
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
-Function: "pow10_upward":
-double: 2
-float: 1
-idouble: 2
-ifloat: 1
-ildouble: 2
-ldouble: 2
-
 Function: "pow_downward":
 double: 1
 float: 1
+float128: 2
 idouble: 1
 ifloat: 1
+ifloat128: 2
 ildouble: 4
 ldouble: 4
 
 Function: "pow_towardzero":
 double: 1
 float: 1
+float128: 2
 idouble: 1
 ifloat: 1
-ildouble: 1
-ldouble: 1
+ifloat128: 2
+ildouble: 4
+ldouble: 4
 
 Function: "pow_upward":
 double: 1
 float: 1
+float128: 2
 idouble: 1
 ifloat: 1
-ildouble: 2
-ldouble: 2
+ifloat128: 2
+ildouble: 4
+ldouble: 4
 
 Function: "pow_vlen16":
 float: 3
@@ -2048,24 +2530,34 @@ Function: "pow_vlen8_avx2":
 float: 3
 
 Function: "sin":
+double: 1
+float128: 1
+idouble: 1
+ifloat128: 1
 ildouble: 1
 ldouble: 1
 
 Function: "sin_downward":
 double: 1
+float128: 3
 idouble: 1
+ifloat128: 3
 ildouble: 3
 ldouble: 3
 
 Function: "sin_towardzero":
 double: 1
+float128: 2
 idouble: 1
+ifloat128: 2
 ildouble: 2
 ldouble: 2
 
 Function: "sin_upward":
 double: 1
+float128: 3
 idouble: 1
+ifloat128: 3
 ildouble: 3
 ldouble: 3
 
@@ -2090,24 +2582,34 @@ Function: "sin_vlen8_avx2":
 float: 1
 
 Function: "sincos":
+double: 1
+float128: 1
+idouble: 1
+ifloat128: 1
 ildouble: 1
 ldouble: 1
 
 Function: "sincos_downward":
 double: 1
+float128: 3
 idouble: 1
+ifloat128: 3
 ildouble: 3
 ldouble: 3
 
 Function: "sincos_towardzero":
 double: 1
+float128: 2
 idouble: 1
+ifloat128: 2
 ildouble: 2
 ldouble: 2
 
 Function: "sincos_upward":
 double: 1
+float128: 3
 idouble: 1
+ifloat128: 3
 ildouble: 3
 ldouble: 3
 
@@ -2125,7 +2627,7 @@ Function: "sincos_vlen4_avx2":
 double: 2
 
 Function: "sincos_vlen8":
-double: 1
+double: 2
 float: 1
 
 Function: "sincos_vlen8_avx2":
@@ -2134,222 +2636,278 @@ float: 1
 Function: "sinh":
 double: 2
 float: 2
+float128: 2
 idouble: 2
 ifloat: 2
+ifloat128: 2
 ildouble: 2
 ldouble: 2
 
 Function: "sinh_downward":
 double: 3
 float: 3
+float128: 3
 idouble: 3
 ifloat: 3
+ifloat128: 3
 ildouble: 5
 ldouble: 5
 
 Function: "sinh_towardzero":
 double: 2
 float: 2
+float128: 3
 idouble: 2
 ifloat: 2
+ifloat128: 3
 ildouble: 4
 ldouble: 4
 
 Function: "sinh_upward":
 double: 3
 float: 3
+float128: 4
 idouble: 3
 ifloat: 3
+ifloat128: 4
 ildouble: 5
 ldouble: 5
 
 Function: "tan":
 float: 1
+float128: 1
 ifloat: 1
+ifloat128: 1
 ildouble: 2
 ldouble: 2
 
 Function: "tan_downward":
 double: 1
 float: 2
+float128: 1
 idouble: 1
 ifloat: 2
+ifloat128: 1
 ildouble: 3
 ldouble: 3
 
 Function: "tan_towardzero":
 double: 1
 float: 1
+float128: 1
 idouble: 1
 ifloat: 1
+ifloat128: 1
 ildouble: 3
 ldouble: 3
 
 Function: "tan_upward":
 double: 1
 float: 1
+float128: 1
 idouble: 1
 ifloat: 1
+ifloat128: 1
 ildouble: 2
 ldouble: 2
 
 Function: "tanh":
 double: 2
 float: 2
+float128: 2
 idouble: 2
 ifloat: 2
+ifloat128: 2
 ildouble: 3
 ldouble: 3
 
 Function: "tanh_downward":
 double: 3
 float: 3
+float128: 4
 idouble: 3
 ifloat: 3
+ifloat128: 4
 ildouble: 4
 ldouble: 4
 
 Function: "tanh_towardzero":
 double: 2
 float: 2
+float128: 3
 idouble: 2
 ifloat: 2
+ifloat128: 3
 ildouble: 3
 ldouble: 3
 
 Function: "tanh_upward":
 double: 3
 float: 3
+float128: 3
 idouble: 3
 ifloat: 3
+ifloat128: 3
 ildouble: 4
 ldouble: 4
 
 Function: "tgamma":
 double: 5
 float: 5
+float128: 4
 idouble: 5
 ifloat: 5
+ifloat128: 4
 ildouble: 5
 ldouble: 5
 
 Function: "tgamma_downward":
 double: 5
 float: 5
+float128: 5
 idouble: 5
 ifloat: 5
+ifloat128: 5
 ildouble: 5
 ldouble: 5
 
 Function: "tgamma_towardzero":
 double: 5
 float: 5
+float128: 5
 idouble: 5
 ifloat: 5
+ifloat128: 5
 ildouble: 5
 ldouble: 5
 
 Function: "tgamma_upward":
 double: 5
 float: 5
+float128: 4
 idouble: 5
 ifloat: 5
+ifloat128: 4
 ildouble: 5
 ldouble: 5
 
 Function: "y0":
 double: 2
 float: 1
+float128: 3
 idouble: 2
 ifloat: 1
+ifloat128: 3
 ildouble: 1
 ldouble: 1
 
 Function: "y0_downward":
 double: 3
 float: 4
+float128: 4
 idouble: 3
 ifloat: 4
+ifloat128: 4
 ildouble: 5
 ldouble: 5
 
 Function: "y0_towardzero":
 double: 3
 float: 3
+float128: 3
 idouble: 3
 ifloat: 3
+ifloat128: 3
 ildouble: 5
 ldouble: 5
 
 Function: "y0_upward":
 double: 3
 float: 5
+float128: 3
 idouble: 3
 ifloat: 5
+ifloat128: 3
 ildouble: 3
 ldouble: 3
 
 Function: "y1":
 double: 3
 float: 2
+float128: 2
 idouble: 3
 ifloat: 2
+ifloat128: 2
 ildouble: 2
 ldouble: 2
 
 Function: "y1_downward":
 double: 3
 float: 2
+float128: 4
 idouble: 3
 ifloat: 2
+ifloat128: 4
 ildouble: 7
 ldouble: 7
 
 Function: "y1_towardzero":
 double: 3
 float: 2
+float128: 2
 idouble: 3
 ifloat: 2
+ifloat128: 2
 ildouble: 5
 ldouble: 5
 
 Function: "y1_upward":
 double: 7
 float: 2
+float128: 5
 idouble: 7
 ifloat: 2
+ifloat128: 5
 ildouble: 7
 ldouble: 7
 
 Function: "yn":
 double: 3
 float: 3
+float128: 5
 idouble: 3
 ifloat: 3
+ifloat128: 5
 ildouble: 4
 ldouble: 4
 
 Function: "yn_downward":
 double: 3
 float: 4
+float128: 5
 idouble: 3
 ifloat: 4
+ifloat128: 5
 ildouble: 5
 ldouble: 5
 
 Function: "yn_towardzero":
 double: 3
 float: 3
+float128: 5
 idouble: 3
 ifloat: 3
+ifloat128: 5
 ildouble: 5
 ldouble: 5
 
 Function: "yn_upward":
 double: 4
 float: 5
+float128: 5
 idouble: 4
 ifloat: 5
+ifloat128: 5
 ildouble: 4
 ldouble: 4
 
diff --git a/sysdeps/x86_64/fpu/libm-test-ulps-name b/sysdeps/x86_64/fpu/libm-test-ulps-name
new file mode 100644
index 0000000000..1c09346681
--- /dev/null
+++ b/sysdeps/x86_64/fpu/libm-test-ulps-name
@@ -0,0 +1 @@
+x86_64
diff --git a/sysdeps/x86_64/fpu/math-tests-arch.h b/sysdeps/x86_64/fpu/math-tests-arch.h
index 867152046e..a5df133292 100644
--- a/sysdeps/x86_64/fpu/math-tests-arch.h
+++ b/sysdeps/x86_64/fpu/math-tests-arch.h
@@ -1,5 +1,5 @@
 /* Runtime architecture check for math tests. x86_64 version.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -16,11 +16,11 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
+#include <cpu-features.h>
+
 #if defined REQUIRE_AVX
-# include <init-arch.h>
 
 # define INIT_ARCH_EXT
-
 # define CHECK_ARCH_EXT                                        \
   do                                                           \
     {                                                          \
@@ -29,10 +29,8 @@
   while (0)
 
 #elif defined REQUIRE_AVX2
-# include <init-arch.h>
 
 # define INIT_ARCH_EXT
-
 # define CHECK_ARCH_EXT                                        \
   do                                                           \
     {                                                          \
@@ -41,10 +39,8 @@
   while (0)
 
 #elif defined REQUIRE_AVX512F
-# include <init-arch.h>
 
 # define INIT_ARCH_EXT
-
 # define CHECK_ARCH_EXT                                        \
   do                                                           \
     {                                                          \
diff --git a/sysdeps/x86_64/fpu/math_ldbl.h b/sysdeps/x86_64/fpu/math_ldbl.h
index b9ff8dadaf..27f8fce904 100644
--- a/sysdeps/x86_64/fpu/math_ldbl.h
+++ b/sysdeps/x86_64/fpu/math_ldbl.h
@@ -1,6 +1,25 @@
-#ifndef _MATH_PRIVATE_H_
-#error "Never use <math_ldbl.h> directly; include <math_private.h> instead."
-#endif
+/* Manipulation of the bit representation of 'long double' quantities.
+   Copyright (C) 2001-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _MATH_LDBL_H_
+#define _MATH_LDBL_H_ 1
+
+#include <stdint.h>
 
 /* A union which permits us to convert between a long double and
    three 32 bit ints.  */
@@ -10,8 +29,8 @@ typedef union
   long double value;
   struct
   {
-    u_int32_t lsw;
-    u_int32_t msw;
+    uint32_t lsw;
+    uint32_t msw;
     int sign_exponent:16;
     unsigned int empty1:16;
     unsigned int empty0:32;
@@ -77,3 +96,5 @@ do {								\
   se_u.parts.sign_exponent = (exp);				\
   (d) = se_u.value;						\
 } while (0)
+
+#endif /* math_ldbl.h */
diff --git a/sysdeps/x86_64/fpu/math_private.h b/sysdeps/x86_64/fpu/math_private.h
index 027a6a3a4d..13052893ef 100644
--- a/sysdeps/x86_64/fpu/math_private.h
+++ b/sysdeps/x86_64/fpu/math_private.h
@@ -48,38 +48,6 @@
 #include <sysdeps/i386/fpu/fenv_private.h>
 #include_next <math_private.h>
 
-extern __always_inline double
-__ieee754_sqrt (double d)
-{
-  double res;
-#if defined __AVX__ || defined SSE2AVX
-  asm ("vsqrtsd %1, %0, %0" : "=x" (res) : "xm" (d));
-#else
-  asm ("sqrtsd %1, %0" : "=x" (res) : "xm" (d));
-#endif
-  return res;
-}
-
-extern __always_inline float
-__ieee754_sqrtf (float d)
-{
-  float res;
-#if defined __AVX__ || defined SSE2AVX
-  asm ("vsqrtss %1, %0, %0" : "=x" (res) : "xm" (d));
-#else
-  asm ("sqrtss %1, %0" : "=x" (res) : "xm" (d));
-#endif
-  return res;
-}
-
-extern __always_inline long double
-__ieee754_sqrtl (long double d)
-{
-  long double res;
-  asm ("fsqrt" : "=t" (res) : "0" (d));
-  return res;
-}
-
 #ifdef __SSE4_1__
 extern __always_inline double
 __rint (double d)
diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile
index 34542155aa..9f387248aa 100644
--- a/sysdeps/x86_64/fpu/multiarch/Makefile
+++ b/sysdeps/x86_64/fpu/multiarch/Makefile
@@ -1,12 +1,54 @@
 ifeq ($(subdir),math)
 libm-sysdep_routines += s_floor-c s_ceil-c s_floorf-c s_ceilf-c \
-			s_rint-c s_rintf-c s_nearbyint-c s_nearbyintf-c
+			s_rint-c s_rintf-c s_nearbyint-c s_nearbyintf-c \
+			s_trunc-c s_truncf-c
+
+libm-sysdep_routines += s_ceil-sse4_1 s_ceilf-sse4_1 s_floor-sse4_1 \
+			s_floorf-sse4_1 s_nearbyint-sse4_1 \
+			s_nearbyintf-sse4_1 s_rint-sse4_1 s_rintf-sse4_1 \
+			s_trunc-sse4_1 s_truncf-sse4_1
+
+libm-sysdep_routines += e_exp-fma e_log-fma e_pow-fma s_atan-fma \
+			e_asin-fma e_atan2-fma s_sin-fma s_tan-fma \
+			mpa-fma \
+			sincos32-fma doasin-fma dosincos-fma \
+			mpatan2-fma mpatan-fma mpsqrt-fma mptan-fma
+
+CFLAGS-doasin-fma.c = -mfma -mavx2
+CFLAGS-dosincos-fma.c = -mfma -mavx2
+CFLAGS-e_asin-fma.c = -mfma -mavx2
+CFLAGS-e_atan2-fma.c = -mfma -mavx2
+CFLAGS-e_exp-fma.c = -mfma -mavx2
+CFLAGS-e_log-fma.c = -mfma -mavx2
+CFLAGS-e_pow-fma.c = -mfma -mavx2 $(config-cflags-nofma)
+CFLAGS-mpa-fma.c = -mfma -mavx2
+CFLAGS-mpatan-fma.c = -mfma -mavx2
+CFLAGS-mpatan2-fma.c = -mfma -mavx2
+CFLAGS-mpsqrt-fma.c = -mfma -mavx2
+CFLAGS-mptan-fma.c = -mfma -mavx2
+CFLAGS-s_atan-fma.c = -mfma -mavx2
+CFLAGS-sincos32-fma.c = -mfma -mavx2
+CFLAGS-s_sin-fma.c = -mfma -mavx2
+CFLAGS-s_tan-fma.c = -mfma -mavx2
+
+libm-sysdep_routines += s_sinf-sse2 s_cosf-sse2 s_sincosf-sse2
+
+libm-sysdep_routines += e_exp2f-fma e_expf-fma e_log2f-fma e_logf-fma \
+			e_powf-fma s_sinf-fma s_cosf-fma s_sincosf-fma
+
+CFLAGS-e_exp2f-fma.c = -mfma -mavx2
+CFLAGS-e_expf-fma.c = -mfma -mavx2
+CFLAGS-e_log2f-fma.c = -mfma -mavx2
+CFLAGS-e_logf-fma.c = -mfma -mavx2
+CFLAGS-e_powf-fma.c = -mfma -mavx2
+CFLAGS-s_sinf-fma.c = -mfma -mavx2
+CFLAGS-s_cosf-fma.c = -mfma -mavx2
+CFLAGS-s_sincosf-fma.c = -mfma -mavx2
 
 libm-sysdep_routines += e_exp-fma4 e_log-fma4 e_pow-fma4 s_atan-fma4 \
 			e_asin-fma4 e_atan2-fma4 s_sin-fma4 s_tan-fma4 \
-			mplog-fma4 mpa-fma4 slowexp-fma4 slowpow-fma4 \
+			mpa-fma4 \
 			sincos32-fma4 doasin-fma4 dosincos-fma4 \
-			halfulp-fma4 mpexp-fma4 \
 			mpatan2-fma4 mpatan-fma4 mpsqrt-fma4 mptan-fma4
 
 CFLAGS-doasin-fma4.c = -mfma4
@@ -16,35 +58,26 @@ CFLAGS-e_atan2-fma4.c = -mfma4
 CFLAGS-e_exp-fma4.c = -mfma4
 CFLAGS-e_log-fma4.c = -mfma4
 CFLAGS-e_pow-fma4.c = -mfma4 $(config-cflags-nofma)
-CFLAGS-halfulp-fma4.c = -mfma4
 CFLAGS-mpa-fma4.c = -mfma4
 CFLAGS-mpatan-fma4.c = -mfma4
 CFLAGS-mpatan2-fma4.c = -mfma4
-CFLAGS-mpexp-fma4.c = -mfma4
-CFLAGS-mplog-fma4.c = -mfma4
 CFLAGS-mpsqrt-fma4.c = -mfma4
 CFLAGS-mptan-fma4.c = -mfma4
 CFLAGS-s_atan-fma4.c = -mfma4
 CFLAGS-sincos32-fma4.c = -mfma4
-CFLAGS-slowexp-fma4.c = -mfma4
-CFLAGS-slowpow-fma4.c = -mfma4
 CFLAGS-s_sin-fma4.c = -mfma4
 CFLAGS-s_tan-fma4.c = -mfma4
 
 libm-sysdep_routines += e_exp-avx e_log-avx s_atan-avx \
 			e_atan2-avx s_sin-avx s_tan-avx \
-			mplog-avx mpa-avx slowexp-avx \
-			mpexp-avx
+			mpa-avx
 
 CFLAGS-e_atan2-avx.c = -msse2avx -DSSE2AVX
 CFLAGS-e_exp-avx.c = -msse2avx -DSSE2AVX
 CFLAGS-e_log-avx.c = -msse2avx -DSSE2AVX
 CFLAGS-mpa-avx.c = -msse2avx -DSSE2AVX
-CFLAGS-mpexp-avx.c = -msse2avx -DSSE2AVX
-CFLAGS-mplog-avx.c = -msse2avx -DSSE2AVX
 CFLAGS-s_atan-avx.c = -msse2avx -DSSE2AVX
 CFLAGS-s_sin-avx.c = -msse2avx -DSSE2AVX
-CFLAGS-slowexp-avx.c = -msse2avx -DSSE2AVX
 CFLAGS-s_tan-avx.c = -msse2avx -DSSE2AVX
 endif
 
@@ -66,5 +99,35 @@ libmvec-sysdep_routines += svml_d_cos2_core_sse4 svml_d_cos4_core_avx2 \
 			   svml_d_pow4_core_avx2 svml_d_pow8_core_avx512 \
 			   svml_s_powf4_core_sse4 svml_s_powf8_core_avx2 \
 			   svml_s_powf16_core_avx512 svml_s_sincosf4_core_sse4 \
-			   svml_s_sincosf8_core_avx2 svml_s_sincosf16_core_avx512
+			   svml_s_sincosf8_core_avx2 \
+			   svml_s_sincosf16_core_avx512 \
+			   svml_d_cos2_core-sse2 svml_d_cos4_core-sse \
+			   svml_d_cos8_core-avx2 svml_d_exp2_core-sse2 \
+			   svml_d_exp4_core-sse svml_d_exp8_core-avx2 \
+			   svml_d_log2_core-sse2 svml_d_log4_core-sse \
+			   svml_d_log8_core-avx2 svml_d_pow2_core-sse2 \
+			   svml_d_pow4_core-sse svml_d_pow8_core-avx2 \
+			   svml_d_sin2_core-sse2 svml_d_sin4_core-sse \
+			   svml_d_sin8_core-avx2 \
+			   svml_d_sincos2_core-sse2 \
+			   svml_d_sincos4_core-sse \
+			   svml_d_sincos8_core-avx2 \
+			   svml_s_cosf16_core-avx2 \
+			   svml_s_cosf4_core-sse2 \
+			   svml_s_cosf8_core-sse \
+			   svml_s_expf16_core-avx2 \
+			   svml_s_expf4_core-sse2 \
+			   svml_s_expf8_core-sse \
+			   svml_s_logf16_core-avx2 \
+			   svml_s_logf4_core-sse2 \
+			   svml_s_logf8_core-sse \
+			   svml_s_powf16_core-avx2 \
+			   svml_s_powf4_core-sse2 \
+			   svml_s_powf8_core-sse \
+			   svml_s_sincosf16_core-avx2 \
+			   svml_s_sincosf4_core-sse2 \
+			   svml_s_sincosf8_core-sse \
+			   svml_s_sinf16_core-avx2 \
+			   svml_s_sinf4_core-sse2 \
+			   svml_s_sinf8_core-sse
 endif
diff --git a/sysdeps/x86_64/fpu/multiarch/doasin-fma.c b/sysdeps/x86_64/fpu/multiarch/doasin-fma.c
new file mode 100644
index 0000000000..7a09865fca
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/doasin-fma.c
@@ -0,0 +1,4 @@
+#define __doasin __doasin_fma
+#define SECTION __attribute__ ((section (".text.fma")))
+
+#include <sysdeps/ieee754/dbl-64/doasin.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/dosincos-fma.c b/sysdeps/x86_64/fpu/multiarch/dosincos-fma.c
new file mode 100644
index 0000000000..5744586bdb
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/dosincos-fma.c
@@ -0,0 +1,6 @@
+#define __docos __docos_fma
+#define __dubcos __dubcos_fma
+#define __dubsin __dubsin_fma
+#define SECTION __attribute__ ((section (".text.fma")))
+
+#include <sysdeps/ieee754/dbl-64/dosincos.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_asin-fma.c b/sysdeps/x86_64/fpu/multiarch/e_asin-fma.c
new file mode 100644
index 0000000000..50e9c64247
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/e_asin-fma.c
@@ -0,0 +1,11 @@
+#define __ieee754_acos __ieee754_acos_fma
+#define __ieee754_asin __ieee754_asin_fma
+#define __cos32 __cos32_fma
+#define __doasin __doasin_fma
+#define __docos __docos_fma
+#define __dubcos __dubcos_fma
+#define __dubsin __dubsin_fma
+#define __sin32 __sin32_fma
+#define SECTION __attribute__ ((section (".text.fma")))
+
+#include <sysdeps/ieee754/dbl-64/e_asin.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_asin.c b/sysdeps/x86_64/fpu/multiarch/e_asin.c
index 111a5b99bd..8d47004e4f 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_asin.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_asin.c
@@ -1,26 +1,40 @@
-#include <init-arch.h>
-#include <math.h>
-#include <math_private.h>
-
-extern double __ieee754_acos_sse2 (double);
-extern double __ieee754_asin_sse2 (double);
-extern double __ieee754_acos_fma4 (double);
-extern double __ieee754_asin_fma4 (double);
-
-libm_ifunc (__ieee754_acos,
-	    HAS_ARCH_FEATURE (FMA4_Usable)
-	    ? __ieee754_acos_fma4
-	    : __ieee754_acos_sse2);
-strong_alias (__ieee754_acos, __acos_finite)
+/* Multiple versions of IEEE 754 asin and acos.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+extern double __redirect_ieee754_asin (double);
+extern double __redirect_ieee754_acos (double);
+
+#define SYMBOL_NAME ieee754_asin
+#include "ifunc-fma4.h"
 
-libm_ifunc (__ieee754_asin,
-	    HAS_ARCH_FEATURE (FMA4_Usable)
-	    ? __ieee754_asin_fma4
-	    : __ieee754_asin_sse2);
+libc_ifunc_redirected (__redirect_ieee754_asin, __ieee754_asin,
+		       IFUNC_SELECTOR ());
 strong_alias (__ieee754_asin, __asin_finite)
 
-#define __ieee754_acos __ieee754_acos_sse2
-#define __ieee754_asin __ieee754_asin_sse2
+#undef SYMBOL_NAME
+#define SYMBOL_NAME ieee754_acos
+#include "ifunc-fma4.h"
+
+libc_ifunc_redirected (__redirect_ieee754_acos, __ieee754_acos,
+		       IFUNC_SELECTOR ());
+strong_alias (__ieee754_acos, __acos_finite)
 
 
+#define __ieee754_acos __ieee754_acos_sse2
+#define __ieee754_asin __ieee754_asin_sse2
 #include <sysdeps/ieee754/dbl-64/e_asin.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_atan2-fma.c b/sysdeps/x86_64/fpu/multiarch/e_atan2-fma.c
new file mode 100644
index 0000000000..caba686496
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/e_atan2-fma.c
@@ -0,0 +1,10 @@
+#define __ieee754_atan2 __ieee754_atan2_fma
+#define __add __add_fma
+#define __dbl_mp __dbl_mp_fma
+#define __dvd __dvd_fma
+#define __mpatan2 __mpatan2_fma
+#define __mul __mul_fma
+#define __sub __sub_fma
+#define SECTION __attribute__ ((section (".text.fma")))
+
+#include <sysdeps/ieee754/dbl-64/e_atan2.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_atan2.c b/sysdeps/x86_64/fpu/multiarch/e_atan2.c
index 9ca3c02a44..6c2dd5af37 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_atan2.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_atan2.c
@@ -1,18 +1,29 @@
-#include <init-arch.h>
-#include <math.h>
-#include <math_private.h>
+/* Multiple versions of IEEE 754 atan.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
 
-extern double __ieee754_atan2_sse2 (double, double);
-extern double __ieee754_atan2_avx (double, double);
-extern double __ieee754_atan2_fma4 (double, double);
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
 
-libm_ifunc (__ieee754_atan2,
-	    HAS_ARCH_FEATURE (FMA4_Usable) ? __ieee754_atan2_fma4
-	    : (HAS_ARCH_FEATURE (AVX_Usable)
-	       ? __ieee754_atan2_avx : __ieee754_atan2_sse2));
-strong_alias (__ieee754_atan2, __atan2_finite)
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
 
-#define __ieee754_atan2 __ieee754_atan2_sse2
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+extern double __redirect_ieee754_atan2 (double, double);
 
+#define SYMBOL_NAME ieee754_atan2
+#include "ifunc-avx-fma4.h"
 
+libc_ifunc_redirected (__redirect_ieee754_atan2,
+		       __ieee754_atan2, IFUNC_SELECTOR ());
+strong_alias (__ieee754_atan2, __atan2_finite)
+
+#define __ieee754_atan2 __ieee754_atan2_sse2
 #include <sysdeps/ieee754/dbl-64/e_atan2.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp-avx.c b/sysdeps/x86_64/fpu/multiarch/e_exp-avx.c
index ee5dd6d2dc..afd917442a 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_exp-avx.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_exp-avx.c
@@ -1,6 +1,5 @@
 #define __ieee754_exp __ieee754_exp_avx
 #define __exp1 __exp1_avx
-#define __slowexp __slowexp_avx
 #define SECTION __attribute__ ((section (".text.avx")))
 
 #include <sysdeps/ieee754/dbl-64/e_exp.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp-fma.c b/sysdeps/x86_64/fpu/multiarch/e_exp-fma.c
new file mode 100644
index 0000000000..765b1b9dd3
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/e_exp-fma.c
@@ -0,0 +1,5 @@
+#define __ieee754_exp __ieee754_exp_fma
+#define __exp1 __exp1_fma
+#define SECTION __attribute__ ((section (".text.fma")))
+
+#include <sysdeps/ieee754/dbl-64/e_exp.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp-fma4.c b/sysdeps/x86_64/fpu/multiarch/e_exp-fma4.c
index ae6eb67603..9ac7acad28 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_exp-fma4.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_exp-fma4.c
@@ -1,6 +1,5 @@
 #define __ieee754_exp __ieee754_exp_fma4
 #define __exp1 __exp1_fma4
-#define __slowexp __slowexp_fma4
 #define SECTION __attribute__ ((section (".text.fma4")))
 
 #include <sysdeps/ieee754/dbl-64/e_exp.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp.c b/sysdeps/x86_64/fpu/multiarch/e_exp.c
index b7d7b5ff27..7cd7d1729c 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_exp.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_exp.c
@@ -1,18 +1,29 @@
-#include <init-arch.h>
-#include <math.h>
-#include <math_private.h>
+/* Multiple versions of IEEE 754 exp.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
 
-extern double __ieee754_exp_sse2 (double);
-extern double __ieee754_exp_avx (double);
-extern double __ieee754_exp_fma4 (double);
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
 
-libm_ifunc (__ieee754_exp,
-	    HAS_ARCH_FEATURE (FMA4_Usable) ? __ieee754_exp_fma4
-	    : (HAS_ARCH_FEATURE (AVX_Usable)
-	       ? __ieee754_exp_avx : __ieee754_exp_sse2));
-strong_alias (__ieee754_exp, __exp_finite)
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
 
-#define __ieee754_exp __ieee754_exp_sse2
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+extern double __redirect_ieee754_exp (double);
 
+#define SYMBOL_NAME ieee754_exp
+#include "ifunc-avx-fma4.h"
 
+libc_ifunc_redirected (__redirect_ieee754_exp, __ieee754_exp,
+		       IFUNC_SELECTOR ());
+strong_alias (__ieee754_exp, __exp_finite)
+
+#define __ieee754_exp __ieee754_exp_sse2
 #include <sysdeps/ieee754/dbl-64/e_exp.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp2f-fma.c b/sysdeps/x86_64/fpu/multiarch/e_exp2f-fma.c
new file mode 100644
index 0000000000..c915a50794
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/e_exp2f-fma.c
@@ -0,0 +1,3 @@
+#define __exp2f __exp2f_fma
+
+#include <sysdeps/ieee754/flt-32/e_exp2f.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp2f.c b/sysdeps/x86_64/fpu/multiarch/e_exp2f.c
new file mode 100644
index 0000000000..e3a0706839
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/e_exp2f.c
@@ -0,0 +1,40 @@
+/* Multiple versions of exp2f.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <libm-alias-float.h>
+
+extern float __redirect_exp2f (float);
+
+#define SYMBOL_NAME exp2f
+#include "ifunc-fma.h"
+
+libc_ifunc_redirected (__redirect_exp2f, __exp2f, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+# include <shlib-compat.h>
+versioned_symbol (libm, __exp2f, exp2f, GLIBC_2_27);
+libm_alias_float_other (__exp2, exp2)
+#else
+libm_alias_float (__exp2, exp2)
+#endif
+
+strong_alias (__exp2f, __ieee754_exp2f)
+strong_alias (__exp2f, __exp2f_finite)
+
+#define __exp2f __exp2f_sse2
+#include <sysdeps/ieee754/flt-32/e_exp2f.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_expf-fma.c b/sysdeps/x86_64/fpu/multiarch/e_expf-fma.c
new file mode 100644
index 0000000000..4e01cd6a82
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/e_expf-fma.c
@@ -0,0 +1,3 @@
+#define __expf __expf_fma
+
+#include <sysdeps/ieee754/flt-32/e_expf.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_expf.c b/sysdeps/x86_64/fpu/multiarch/e_expf.c
new file mode 100644
index 0000000000..2b7c7ccbd0
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/e_expf.c
@@ -0,0 +1,43 @@
+/* Multiple versions of expf.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <libm-alias-float.h>
+
+extern float __redirect_expf (float);
+
+#define SYMBOL_NAME expf
+#include "ifunc-fma.h"
+
+libc_ifunc_redirected (__redirect_expf, __expf, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (__expf, __GI___expf, __redirect_expf)
+  __attribute__ ((visibility ("hidden")));
+
+# include <shlib-compat.h>
+versioned_symbol (libm, __expf, expf, GLIBC_2_27);
+libm_alias_float_other (__exp, exp)
+#else
+libm_alias_float (__exp, exp)
+#endif
+
+strong_alias (__expf, __ieee754_expf)
+strong_alias (__expf, __expf_finite)
+
+#define __expf __expf_sse2
+#include <sysdeps/ieee754/flt-32/e_expf.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_log-avx.c b/sysdeps/x86_64/fpu/multiarch/e_log-avx.c
index c669019bc2..b22a5767be 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_log-avx.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_log-avx.c
@@ -1,8 +1,4 @@
 #define __ieee754_log __ieee754_log_avx
-#define __mplog __mplog_avx
-#define __add __add_avx
-#define __dbl_mp __dbl_mp_avx
-#define __sub __sub_avx
 #define SECTION __attribute__ ((section (".text.avx")))
 
 #include <sysdeps/ieee754/dbl-64/e_log.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_log-fma.c b/sysdeps/x86_64/fpu/multiarch/e_log-fma.c
new file mode 100644
index 0000000000..bce0ee03c2
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/e_log-fma.c
@@ -0,0 +1,4 @@
+#define __ieee754_log __ieee754_log_fma
+#define SECTION __attribute__ ((section (".text.fma")))
+
+#include <sysdeps/ieee754/dbl-64/e_log.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_log-fma4.c b/sysdeps/x86_64/fpu/multiarch/e_log-fma4.c
index a2346cc618..f458f9c23c 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_log-fma4.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_log-fma4.c
@@ -1,8 +1,4 @@
 #define __ieee754_log __ieee754_log_fma4
-#define __mplog __mplog_fma4
-#define __add __add_fma4
-#define __dbl_mp __dbl_mp_fma4
-#define __sub __sub_fma4
 #define SECTION __attribute__ ((section (".text.fma4")))
 
 #include <sysdeps/ieee754/dbl-64/e_log.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_log.c b/sysdeps/x86_64/fpu/multiarch/e_log.c
index cf9533d6c0..e0a1b02fae 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_log.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_log.c
@@ -1,18 +1,29 @@
-#include <init-arch.h>
-#include <math.h>
-#include <math_private.h>
+/* Multiple versions of IEEE 754 log.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
 
-extern double __ieee754_log_sse2 (double);
-extern double __ieee754_log_avx (double);
-extern double __ieee754_log_fma4 (double);
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
 
-libm_ifunc (__ieee754_log,
-	    HAS_ARCH_FEATURE (FMA4_Usable) ? __ieee754_log_fma4
-	    : (HAS_ARCH_FEATURE (AVX_Usable)
-	       ? __ieee754_log_avx : __ieee754_log_sse2));
-strong_alias (__ieee754_log, __log_finite)
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
 
-#define __ieee754_log __ieee754_log_sse2
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+extern double __redirect_ieee754_log (double);
 
+#define SYMBOL_NAME ieee754_log
+#include "ifunc-avx-fma4.h"
 
+libc_ifunc_redirected (__redirect_ieee754_log, __ieee754_log,
+		       IFUNC_SELECTOR ());
+strong_alias (__ieee754_log, __log_finite)
+
+#define __ieee754_log __ieee754_log_sse2
 #include <sysdeps/ieee754/dbl-64/e_log.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_log2f-fma.c b/sysdeps/x86_64/fpu/multiarch/e_log2f-fma.c
new file mode 100644
index 0000000000..8a76b836fb
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/e_log2f-fma.c
@@ -0,0 +1,3 @@
+#define __log2f __log2f_fma
+
+#include <sysdeps/ieee754/flt-32/e_log2f.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_log2f.c b/sysdeps/x86_64/fpu/multiarch/e_log2f.c
new file mode 100644
index 0000000000..12d0c30dd3
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/e_log2f.c
@@ -0,0 +1,43 @@
+/* Multiple versions of log2f.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <libm-alias-float.h>
+
+extern float __redirect_log2f (float);
+
+#define SYMBOL_NAME log2f
+#include "ifunc-fma.h"
+
+libc_ifunc_redirected (__redirect_log2f, __log2f, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (__log2f, __GI___log2f, __redirect_log2f)
+  __attribute__ ((visibility ("hidden")));
+
+# include <shlib-compat.h>
+versioned_symbol (libm, __log2f, log2f, GLIBC_2_27);
+libm_alias_float_other (__log2, log2)
+#else
+libm_alias_float (__log2, log2)
+#endif
+
+strong_alias (__log2f, __ieee754_log2f)
+strong_alias (__log2f, __log2f_finite)
+
+#define __log2f __log2f_sse2
+#include <sysdeps/ieee754/flt-32/e_log2f.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_logf-fma.c b/sysdeps/x86_64/fpu/multiarch/e_logf-fma.c
new file mode 100644
index 0000000000..a47fd8195f
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/e_logf-fma.c
@@ -0,0 +1,3 @@
+#define __logf __logf_fma
+
+#include <sysdeps/ieee754/flt-32/e_logf.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_logf.c b/sysdeps/x86_64/fpu/multiarch/e_logf.c
new file mode 100644
index 0000000000..224d40a1e4
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/e_logf.c
@@ -0,0 +1,43 @@
+/* Multiple versions of logf.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <libm-alias-float.h>
+
+extern float __redirect_logf (float);
+
+#define SYMBOL_NAME logf
+#include "ifunc-fma.h"
+
+libc_ifunc_redirected (__redirect_logf, __logf, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (__logf, __GI___logf, __redirect_logf)
+  __attribute__ ((visibility ("hidden")));
+
+# include <shlib-compat.h>
+versioned_symbol (libm, __logf, logf, GLIBC_2_27);
+libm_alias_float_other (__log, log)
+#else
+libm_alias_float (__log, log)
+#endif
+
+strong_alias (__logf, __ieee754_logf)
+strong_alias (__logf, __logf_finite)
+
+#define __logf __logf_sse2
+#include <sysdeps/ieee754/flt-32/e_logf.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_pow-fma.c b/sysdeps/x86_64/fpu/multiarch/e_pow-fma.c
new file mode 100644
index 0000000000..73c1e7fb89
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/e_pow-fma.c
@@ -0,0 +1,5 @@
+#define __ieee754_pow __ieee754_pow_fma
+#define __exp1 __exp1_fma
+#define SECTION __attribute__ ((section (".text.fma")))
+
+#include <sysdeps/ieee754/dbl-64/e_pow.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c b/sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c
index 5b3ea8e103..8971b655ca 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_pow-fma4.c
@@ -1,6 +1,5 @@
 #define __ieee754_pow __ieee754_pow_fma4
 #define __exp1 __exp1_fma4
-#define __slowpow __slowpow_fma4
 #define SECTION __attribute__ ((section (".text.fma4")))
 
 #include <sysdeps/ieee754/dbl-64/e_pow.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_pow.c b/sysdeps/x86_64/fpu/multiarch/e_pow.c
index a5c5d89c3e..084073c936 100644
--- a/sysdeps/x86_64/fpu/multiarch/e_pow.c
+++ b/sysdeps/x86_64/fpu/multiarch/e_pow.c
@@ -1,17 +1,29 @@
-#include <init-arch.h>
-#include <math.h>
-#include <math_private.h>
+/* Multiple versions of IEEE 754 pow.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
 
-extern double __ieee754_pow_sse2 (double, double);
-extern double __ieee754_pow_fma4 (double, double);
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
 
-libm_ifunc (__ieee754_pow,
-	    HAS_ARCH_FEATURE (FMA4_Usable)
-	    ? __ieee754_pow_fma4
-	    : __ieee754_pow_sse2);
-strong_alias (__ieee754_pow, __pow_finite)
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
 
-#define __ieee754_pow __ieee754_pow_sse2
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+extern double __redirect_ieee754_pow (double, double);
 
+#define SYMBOL_NAME ieee754_pow
+#include "ifunc-fma4.h"
 
+libc_ifunc_redirected (__redirect_ieee754_pow,
+		       __ieee754_pow, IFUNC_SELECTOR ());
+strong_alias (__ieee754_pow, __pow_finite)
+
+#define __ieee754_pow __ieee754_pow_sse2
 #include <sysdeps/ieee754/dbl-64/e_pow.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_powf-fma.c b/sysdeps/x86_64/fpu/multiarch/e_powf-fma.c
new file mode 100644
index 0000000000..fdf5dcc56a
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/e_powf-fma.c
@@ -0,0 +1,3 @@
+#define __powf __powf_fma
+
+#include <sysdeps/ieee754/flt-32/e_powf.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/e_powf.c b/sysdeps/x86_64/fpu/multiarch/e_powf.c
new file mode 100644
index 0000000000..a185006f40
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/e_powf.c
@@ -0,0 +1,46 @@
+/* Multiple versions of powf.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <libm-alias-float.h>
+
+#define powf __redirect_powf
+#define __DECL_SIMD___redirect_powf
+#include <math.h>
+#undef powf
+
+#define SYMBOL_NAME powf
+#include "ifunc-fma.h"
+
+libc_ifunc_redirected (__redirect_powf, __powf, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (__powf, __GI___powf, __redirect_powf)
+  __attribute__ ((visibility ("hidden")));
+
+# include <shlib-compat.h>
+versioned_symbol (libm, __powf, powf, GLIBC_2_27);
+libm_alias_float_other (__pow, pow)
+#else
+libm_alias_float (__pow, pow)
+#endif
+
+strong_alias (__powf, __ieee754_powf)
+strong_alias (__powf, __powf_finite)
+
+#define __powf __powf_sse2
+#include <sysdeps/ieee754/flt-32/e_powf.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c b/sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c
deleted file mode 100644
index a00c17c016..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/halfulp-fma4.c
+++ /dev/null
@@ -1,4 +0,0 @@
-#define __halfulp __halfulp_fma4
-#define SECTION __attribute__ ((section (".text.fma4")))
-
-#include <sysdeps/ieee754/dbl-64/halfulp.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/ifunc-avx-fma4.h b/sysdeps/x86_64/fpu/multiarch/ifunc-avx-fma4.h
new file mode 100644
index 0000000000..a5f9375afc
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/ifunc-avx-fma4.h
@@ -0,0 +1,43 @@
+/* Common definition for ifunc selections optimized with AVX, AVX2/FMA
+   and FMA4.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <init-arch.h>
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (fma) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (fma4) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+  const struct cpu_features* cpu_features = __get_cpu_features ();
+
+  if (CPU_FEATURES_ARCH_P (cpu_features, FMA_Usable)
+      && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable))
+    return OPTIMIZE (fma);
+
+  if (CPU_FEATURES_ARCH_P (cpu_features, FMA4_Usable))
+    return OPTIMIZE (fma4);
+
+  if (CPU_FEATURES_ARCH_P (cpu_features, AVX_Usable))
+    return OPTIMIZE (avx);
+
+  return OPTIMIZE (sse2);
+}
diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceilf.S b/sysdeps/x86_64/fpu/multiarch/ifunc-fma.h
index 9a06a5c174..63a8cd221f 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_ceilf.S
+++ b/sysdeps/x86_64/fpu/multiarch/ifunc-fma.h
@@ -1,6 +1,6 @@
-/* Copyright (C) 2011-2016 Free Software Foundation, Inc.
+/* Common definition for ifunc selections optimized with AVX2/FMA.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
-   Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
 
    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
@@ -16,23 +16,19 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include <machine/asm.h>
 #include <init-arch.h>
 
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (fma) attribute_hidden;
 
-ENTRY(__ceilf)
-	.type	__ceilf, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-	leaq	__ceilf_sse41(%rip), %rax
-	HAS_CPU_FEATURE (SSE4_1)
-	jnz	2f
-	leaq	__ceilf_c(%rip), %rax
-2:	ret
-END(__ceilf)
-weak_alias (__ceilf, ceilf)
+static inline void *
+IFUNC_SELECTOR (void)
+{
+  const struct cpu_features* cpu_features = __get_cpu_features ();
 
+  if (CPU_FEATURES_ARCH_P (cpu_features, FMA_Usable)
+      && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable))
+    return OPTIMIZE (fma);
 
-ENTRY(__ceilf_sse41)
-	roundss	$2, %xmm0, %xmm0
-	ret
-END(__ceilf_sse41)
+  return OPTIMIZE (sse2);
+}
diff --git a/sysdeps/x86_64/fpu/multiarch/ifunc-fma4.h b/sysdeps/x86_64/fpu/multiarch/ifunc-fma4.h
new file mode 100644
index 0000000000..a2526a2ee0
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/ifunc-fma4.h
@@ -0,0 +1,39 @@
+/* Common definition for ifunc selections optimized with AVX2/FMA and
+   FMA4.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <init-arch.h>
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (fma) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (fma4) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+  const struct cpu_features* cpu_features = __get_cpu_features ();
+
+  if (CPU_FEATURES_ARCH_P (cpu_features, FMA_Usable)
+      && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable))
+    return OPTIMIZE (fma);
+
+  if (CPU_FEATURES_ARCH_P (cpu_features, FMA4_Usable))
+    return OPTIMIZE (fma4);
+
+  return OPTIMIZE (sse2);
+}
diff --git a/sysdeps/x86_64/fpu/multiarch/ifunc-mathvec-avx2.h b/sysdeps/x86_64/fpu/multiarch/ifunc-mathvec-avx2.h
new file mode 100644
index 0000000000..bd2d32e418
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/ifunc-mathvec-avx2.h
@@ -0,0 +1,39 @@
+/* Common definition for libmathvec ifunc selections optimized with
+   AVX2.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <init-arch.h>
+
+#undef PASTER2
+#define PASTER2(x,y)	x##_##y
+
+extern void REDIRECT_NAME (void);
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse_wrapper) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+  const struct cpu_features* cpu_features = __get_cpu_features ();
+
+  if (CPU_FEATURES_ARCH_P (cpu_features, FMA_Usable)
+      && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable))
+    return OPTIMIZE (avx2);
+
+  return OPTIMIZE (sse_wrapper);
+}
diff --git a/sysdeps/x86_64/fpu/multiarch/ifunc-mathvec-avx512.h b/sysdeps/x86_64/fpu/multiarch/ifunc-mathvec-avx512.h
new file mode 100644
index 0000000000..174e462cfb
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/ifunc-mathvec-avx512.h
@@ -0,0 +1,45 @@
+/* Common definition for libmathvec ifunc selections optimized with
+   AVX512.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <init-arch.h>
+
+#undef PASTER2
+#define PASTER2(x,y)	x##_##y
+
+extern void REDIRECT_NAME (void);
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_wrapper) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (knl) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (skx) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+  const struct cpu_features* cpu_features = __get_cpu_features ();
+
+  if (!CPU_FEATURES_ARCH_P (cpu_features, MathVec_Prefer_No_AVX512))
+    {
+      if (CPU_FEATURES_ARCH_P (cpu_features, AVX512DQ_Usable))
+	return OPTIMIZE (skx);
+
+      if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable))
+	return OPTIMIZE (knl);
+    }
+
+  return OPTIMIZE (avx2_wrapper);
+}
diff --git a/sysdeps/x86_64/fpu/multiarch/ifunc-mathvec-sse4_1.h b/sysdeps/x86_64/fpu/multiarch/ifunc-mathvec-sse4_1.h
new file mode 100644
index 0000000000..c1e70ebfc1
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/ifunc-mathvec-sse4_1.h
@@ -0,0 +1,38 @@
+/* Common definition for libmathvec ifunc selections optimized with
+   SSE4.1.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <init-arch.h>
+
+#undef PASTER2
+#define PASTER2(x,y)	x##_##y
+
+extern void REDIRECT_NAME (void);
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+  const struct cpu_features* cpu_features = __get_cpu_features ();
+
+  if (CPU_FEATURES_CPU_P (cpu_features, SSE4_1))
+    return OPTIMIZE (sse4);
+
+  return OPTIMIZE (sse2);
+}
diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceil.S b/sysdeps/x86_64/fpu/multiarch/ifunc-sse4_1.h
index 40fa729955..a8710ba802 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_ceil.S
+++ b/sysdeps/x86_64/fpu/multiarch/ifunc-sse4_1.h
@@ -1,6 +1,6 @@
-/* Copyright (C) 2011-2016 Free Software Foundation, Inc.
+/* Common definition for ifunc selections optimized with SSE4.1.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
-   Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
 
    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
@@ -16,23 +16,18 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include <machine/asm.h>
 #include <init-arch.h>
 
+extern __typeof (REDIRECT_NAME) OPTIMIZE (c) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse41) attribute_hidden;
 
-ENTRY(__ceil)
-	.type	__ceil, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-	leaq	__ceil_sse41(%rip), %rax
-	HAS_CPU_FEATURE (SSE4_1)
-	jnz	2f
-	leaq	__ceil_c(%rip), %rax
-2:	ret
-END(__ceil)
-weak_alias (__ceil, ceil)
+static inline void *
+IFUNC_SELECTOR (void)
+{
+  const struct cpu_features* cpu_features = __get_cpu_features ();
 
+  if (CPU_FEATURES_CPU_P (cpu_features, SSE4_1))
+    return OPTIMIZE (sse41);
 
-ENTRY(__ceil_sse41)
-	roundsd	$2, %xmm0, %xmm0
-	ret
-END(__ceil_sse41)
+  return OPTIMIZE (c);
+}
diff --git a/sysdeps/x86_64/fpu/multiarch/mpa-fma.c b/sysdeps/x86_64/fpu/multiarch/mpa-fma.c
new file mode 100644
index 0000000000..177cc2517f
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/mpa-fma.c
@@ -0,0 +1,14 @@
+#define __add __add_fma
+#define __mul __mul_fma
+#define __sqr __sqr_fma
+#define __sub __sub_fma
+#define __dbl_mp __dbl_mp_fma
+#define __dvd __dvd_fma
+
+#define NO___CPY 1
+#define NO___MP_DBL 1
+#define NO___ACR 1
+#define NO__CONST 1
+#define SECTION __attribute__ ((section (".text.fma")))
+
+#include <sysdeps/ieee754/dbl-64/mpa.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/mpatan-fma.c b/sysdeps/x86_64/fpu/multiarch/mpatan-fma.c
new file mode 100644
index 0000000000..d216f9142d
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/mpatan-fma.c
@@ -0,0 +1,10 @@
+#define __mpatan __mpatan_fma
+#define __add __add_fma
+#define __dvd __dvd_fma
+#define __mpsqrt __mpsqrt_fma
+#define __mul __mul_fma
+#define __sub __sub_fma
+#define AVOID_MPATAN_H 1
+#define SECTION __attribute__ ((section (".text.fma")))
+
+#include <sysdeps/ieee754/dbl-64/mpatan.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/mpatan2-fma.c b/sysdeps/x86_64/fpu/multiarch/mpatan2-fma.c
new file mode 100644
index 0000000000..98df336f79
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/mpatan2-fma.c
@@ -0,0 +1,9 @@
+#define __mpatan2 __mpatan2_fma
+#define __add __add_fma
+#define __dvd __dvd_fma
+#define __mpatan __mpatan_fma
+#define __mpsqrt __mpsqrt_fma
+#define __mul __mul_fma
+#define SECTION __attribute__ ((section (".text.fma")))
+
+#include <sysdeps/ieee754/dbl-64/mpatan2.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/mpexp-avx.c b/sysdeps/x86_64/fpu/multiarch/mpexp-avx.c
deleted file mode 100644
index 87f29c96c9..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/mpexp-avx.c
+++ /dev/null
@@ -1,9 +0,0 @@
-#define __mpexp __mpexp_avx
-#define __add __add_avx
-#define __dbl_mp __dbl_mp_avx
-#define __dvd __dvd_avx
-#define __mul __mul_avx
-#define AVOID_MPEXP_H 1
-#define SECTION __attribute__ ((section (".text.avx")))
-
-#include <sysdeps/ieee754/dbl-64/mpexp.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/mpexp-fma4.c b/sysdeps/x86_64/fpu/multiarch/mpexp-fma4.c
deleted file mode 100644
index 07ca6e9ad0..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/mpexp-fma4.c
+++ /dev/null
@@ -1,9 +0,0 @@
-#define __mpexp __mpexp_fma4
-#define __add __add_fma4
-#define __dbl_mp __dbl_mp_fma4
-#define __dvd __dvd_fma4
-#define __mul __mul_fma4
-#define AVOID_MPEXP_H 1
-#define SECTION __attribute__ ((section (".text.fma4")))
-
-#include <sysdeps/ieee754/dbl-64/mpexp.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/mplog-avx.c b/sysdeps/x86_64/fpu/multiarch/mplog-avx.c
deleted file mode 100644
index fd783d9a67..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/mplog-avx.c
+++ /dev/null
@@ -1,8 +0,0 @@
-#define __mplog __mplog_avx
-#define __add __add_avx
-#define __mpexp __mpexp_avx
-#define __mul __mul_avx
-#define __sub __sub_avx
-#define SECTION __attribute__ ((section (".text.avx")))
-
-#include <sysdeps/ieee754/dbl-64/mplog.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/mplog-fma4.c b/sysdeps/x86_64/fpu/multiarch/mplog-fma4.c
deleted file mode 100644
index b4733118d7..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/mplog-fma4.c
+++ /dev/null
@@ -1,8 +0,0 @@
-#define __mplog __mplog_fma4
-#define __add __add_fma4
-#define __mpexp __mpexp_fma4
-#define __mul __mul_fma4
-#define __sub __sub_fma4
-#define SECTION __attribute__ ((section (".text.fma4")))
-
-#include <sysdeps/ieee754/dbl-64/mplog.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/mpsqrt-fma.c b/sysdeps/x86_64/fpu/multiarch/mpsqrt-fma.c
new file mode 100644
index 0000000000..44d7a23ae3
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/mpsqrt-fma.c
@@ -0,0 +1,8 @@
+#define __mpsqrt __mpsqrt_fma
+#define __dbl_mp __dbl_mp_fma
+#define __mul __mul_fma
+#define __sub __sub_fma
+#define AVOID_MPSQRT_H 1
+#define SECTION __attribute__ ((section (".text.fma")))
+
+#include <sysdeps/ieee754/dbl-64/mpsqrt.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/mptan-fma.c b/sysdeps/x86_64/fpu/multiarch/mptan-fma.c
new file mode 100644
index 0000000000..d1a691413c
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/mptan-fma.c
@@ -0,0 +1,7 @@
+#define __mptan __mptan_fma
+#define __c32 __c32_fma
+#define __dvd __dvd_fma
+#define __mpranred __mpranred_fma
+#define SECTION __attribute__ ((section (".text.fma")))
+
+#include <sysdeps/ieee754/dbl-64/mptan.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_atan-avx.c b/sysdeps/x86_64/fpu/multiarch/s_atan-avx.c
index b5cb9c3a75..41816bfe6c 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_atan-avx.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_atan-avx.c
@@ -1,4 +1,4 @@
-#define atan __atan_avx
+#define __atan __atan_avx
 #define __add __add_avx
 #define __dbl_mp __dbl_mp_avx
 #define __mul __mul_avx
diff --git a/sysdeps/x86_64/fpu/multiarch/s_atan-fma.c b/sysdeps/x86_64/fpu/multiarch/s_atan-fma.c
new file mode 100644
index 0000000000..363e32bcbd
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_atan-fma.c
@@ -0,0 +1,9 @@
+#define __atan __atan_fma
+#define __add __add_fma
+#define __dbl_mp __dbl_mp_fma
+#define __mpatan __mpatan_fma
+#define __mul __mul_fma
+#define __sub __sub_fma
+#define SECTION __attribute__ ((section (".text.fma")))
+
+#include <sysdeps/ieee754/dbl-64/s_atan.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_atan-fma4.c b/sysdeps/x86_64/fpu/multiarch/s_atan-fma4.c
index 9e83e6cdab..ad8d3af579 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_atan-fma4.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_atan-fma4.c
@@ -1,4 +1,4 @@
-#define atan __atan_fma4
+#define __atan __atan_fma4
 #define __add __add_fma4
 #define __dbl_mp __dbl_mp_fma4
 #define __mpatan __mpatan_fma4
diff --git a/sysdeps/x86_64/fpu/multiarch/s_atan.c b/sysdeps/x86_64/fpu/multiarch/s_atan.c
index 742e95cb96..f9ce8549ab 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_atan.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_atan.c
@@ -1,15 +1,30 @@
-#include <init-arch.h>
-#include <math.h>
+/* Multiple versions of atan.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
 
-extern double __atan_sse2 (double);
-extern double __atan_avx (double);
-extern double __atan_fma4 (double);
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
 
-libm_ifunc (atan, (HAS_ARCH_FEATURE (FMA4_Usable) ? __atan_fma4 :
-		   HAS_ARCH_FEATURE (AVX_Usable)
-		   ? __atan_avx : __atan_sse2));
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
 
-#define atan __atan_sse2
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
 
+#include <libm-alias-double.h>
 
+extern double __redirect_atan (double);
+
+#define SYMBOL_NAME atan
+#include "ifunc-avx-fma4.h"
+
+libc_ifunc_redirected (__redirect_atan, __atan, IFUNC_SELECTOR ());
+libm_alias_double (__atan, atan)
+
+#define __atan __atan_sse2
 #include <sysdeps/ieee754/dbl-64/s_atan.c>
diff --git a/sysdeps/x86_64/multiarch/mempcpy-avx512-no-vzeroupper.S b/sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S
index fcc0945ea7..e90f05b42f 100644
--- a/sysdeps/x86_64/multiarch/mempcpy-avx512-no-vzeroupper.S
+++ b/sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S
@@ -1,6 +1,6 @@
-/* mempcpy optimized with AVX512 for KNL hardware.
-   Copyright (C) 2016 Free Software Foundation, Inc.
+/* Copyright (C) 2011-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
+   Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
 
    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
@@ -16,7 +16,10 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#define USE_AS_MEMPCPY
-#define MEMCPY		__mempcpy_avx512_no_vzeroupper
-#define MEMCPY_CHK	__mempcpy_chk_avx512_no_vzeroupper
-#include "memcpy-avx512-no-vzeroupper.S"
+#include <sysdep.h>
+
+	.section .text.sse4.1,"ax",@progbits
+ENTRY(__ceil_sse41)
+	roundsd	$10, %xmm0, %xmm0
+	ret
+END(__ceil_sse41)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceil.c b/sysdeps/x86_64/fpu/multiarch/s_ceil.c
new file mode 100644
index 0000000000..070fcdddea
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_ceil.c
@@ -0,0 +1,31 @@
+/* Multiple versions of __ceil.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <libm-alias-double.h>
+
+#define ceil __redirect_ceil
+#define __ceil __redirect___ceil
+#include <math.h>
+#undef ceil
+#undef __ceil
+
+#define SYMBOL_NAME ceil
+#include "ifunc-sse4_1.h"
+
+libc_ifunc_redirected (__redirect_ceil, __ceil, IFUNC_SELECTOR ());
+libm_alias_double (__ceil, ceil)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S
new file mode 100644
index 0000000000..c3bd24c5ae
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S
@@ -0,0 +1,25 @@
+/* Copyright (C) 2011-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+	.section .text.sse4.1,"ax",@progbits
+ENTRY(__ceilf_sse41)
+	roundss	$10, %xmm0, %xmm0
+	ret
+END(__ceilf_sse41)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceilf.c b/sysdeps/x86_64/fpu/multiarch/s_ceilf.c
new file mode 100644
index 0000000000..db0c6c4bc3
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_ceilf.c
@@ -0,0 +1,31 @@
+/* Multiple versions of __ceilf.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <libm-alias-float.h>
+
+#define ceilf __redirect_ceilf
+#define __ceilf __redirect___ceilf
+#include <math.h>
+#undef ceilf
+#undef __ceilf
+
+#define SYMBOL_NAME ceilf
+#include "ifunc-sse4_1.h"
+
+libc_ifunc_redirected (__redirect_ceilf, __ceilf, IFUNC_SELECTOR ());
+libm_alias_float (__ceil, ceil)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_cosf-fma.c b/sysdeps/x86_64/fpu/multiarch/s_cosf-fma.c
new file mode 100644
index 0000000000..5f9191aef9
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_cosf-fma.c
@@ -0,0 +1,2 @@
+#define COSF __cosf_fma
+#include <sysdeps/ieee754/flt-32/s_cosf.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_cosf-sse2.c b/sysdeps/x86_64/fpu/multiarch/s_cosf-sse2.c
new file mode 100644
index 0000000000..87cf42a82a
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_cosf-sse2.c
@@ -0,0 +1,2 @@
+#define COSF __cosf_sse2
+#include <sysdeps/ieee754/flt-32/s_cosf.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_cosf.c b/sysdeps/x86_64/fpu/multiarch/s_cosf.c
new file mode 100644
index 0000000000..33959d3d01
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_cosf.c
@@ -0,0 +1,28 @@
+/* Multiple versions of cosf.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <libm-alias-float.h>
+
+extern float __redirect_cosf (float);
+
+#define SYMBOL_NAME cosf
+#include "ifunc-fma.h"
+
+libc_ifunc_redirected (__redirect_cosf, __cosf, IFUNC_SELECTOR ());
+
+libm_alias_float (__cos, cos)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S
new file mode 100644
index 0000000000..b3c7aa29ff
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S
@@ -0,0 +1,25 @@
+/* Copyright (C) 2011-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+	.section .text.sse4.1,"ax",@progbits
+ENTRY(__floor_sse41)
+	roundsd	$9, %xmm0, %xmm0
+	ret
+END(__floor_sse41)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor.c b/sysdeps/x86_64/fpu/multiarch/s_floor.c
new file mode 100644
index 0000000000..58f8ed8eaf
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_floor.c
@@ -0,0 +1,31 @@
+/* Multiple versions of __floor.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <libm-alias-double.h>
+
+#define floor __redirect_floor
+#define __floor __redirect___floor
+#include <math.h>
+#undef floor
+#undef __floor
+
+#define SYMBOL_NAME floor
+#include "ifunc-sse4_1.h"
+
+libc_ifunc_redirected (__redirect_floor, __floor, IFUNC_SELECTOR ());
+libm_alias_double (__floor, floor)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S
new file mode 100644
index 0000000000..43461d3e6b
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S
@@ -0,0 +1,25 @@
+/* Copyright (C) 2011-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+	.section .text.sse4.1,"ax",@progbits
+ENTRY(__floorf_sse41)
+	roundss	$9, %xmm0, %xmm0
+	ret
+END(__floorf_sse41)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf.c b/sysdeps/x86_64/fpu/multiarch/s_floorf.c
new file mode 100644
index 0000000000..5ef2fec2e3
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_floorf.c
@@ -0,0 +1,31 @@
+/* Multiple versions of __floorf.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <libm-alias-float.h>
+
+#define floorf __redirect_floorf
+#define __floorf __redirect___floorf
+#include <math.h>
+#undef floorf
+#undef __floorf
+
+#define SYMBOL_NAME floorf
+#include "ifunc-sse4_1.h"
+
+libc_ifunc_redirected (__redirect_floorf, __floorf, IFUNC_SELECTOR ());
+libm_alias_float (__floor, floor)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_fma.c b/sysdeps/x86_64/fpu/multiarch/s_fma.c
index 1de1a84cbe..875c76d372 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_fma.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_fma.c
@@ -1,5 +1,5 @@
 /* FMA version of fma.
-   Copyright (C) 2009-2016 Free Software Foundation, Inc.
+   Copyright (C) 2009-2018 Free Software Foundation, Inc.
    Contributed by Intel Corporation.
    This file is part of the GNU C Library.
 
@@ -20,6 +20,7 @@
 #include <config.h>
 #include <math.h>
 #include <init-arch.h>
+#include <libm-alias-double.h>
 
 extern double __fma_sse2 (double x, double y, double z) attribute_hidden;
 
@@ -43,7 +44,7 @@ __fma_fma4 (double x, double y, double z)
 libm_ifunc (__fma, HAS_ARCH_FEATURE (FMA_Usable)
 	    ? __fma_fma3 : (HAS_ARCH_FEATURE (FMA4_Usable)
 			    ? __fma_fma4 : __fma_sse2));
-weak_alias (__fma, fma)
+libm_alias_double (__fma, fma)
 
 #define __fma __fma_sse2
 
diff --git a/sysdeps/x86_64/fpu/multiarch/s_fmaf.c b/sysdeps/x86_64/fpu/multiarch/s_fmaf.c
index 8905e4b54f..5f4c2ec0be 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_fmaf.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_fmaf.c
@@ -1,5 +1,5 @@
 /* FMA version of fmaf.
-   Copyright (C) 2009-2016 Free Software Foundation, Inc.
+   Copyright (C) 2009-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -19,6 +19,7 @@
 #include <config.h>
 #include <math.h>
 #include <init-arch.h>
+#include <libm-alias-float.h>
 
 extern float __fmaf_sse2 (float x, float y, float z) attribute_hidden;
 
@@ -42,7 +43,7 @@ __fmaf_fma4 (float x, float y, float z)
 libm_ifunc (__fmaf, HAS_ARCH_FEATURE (FMA_Usable)
 	    ? __fmaf_fma3 : (HAS_ARCH_FEATURE (FMA4_Usable)
 			     ? __fmaf_fma4 : __fmaf_sse2));
-weak_alias (__fmaf, fmaf)
+libm_alias_float (__fma, fma)
 
 #define __fmaf __fmaf_sse2
 
diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S
new file mode 100644
index 0000000000..f9ac36e4f0
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S
@@ -0,0 +1,25 @@
+/* Copyright (C) 2011-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+	.section .text.sse4.1,"ax",@progbits
+ENTRY(__nearbyint_sse41)
+	roundsd	$0xc, %xmm0, %xmm0
+	ret
+END(__nearbyint_sse41)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyint.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyint.S
deleted file mode 100644
index 5091cf5813..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/s_nearbyint.S
+++ /dev/null
@@ -1,38 +0,0 @@
-/* Copyright (C) 2011-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <machine/asm.h>
-#include <init-arch.h>
-
-
-ENTRY(__nearbyint)
-	.type	__nearbyint, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-	leaq	__nearbyint_sse41(%rip), %rax
-	HAS_CPU_FEATURE (SSE4_1)
-	jnz	2f
-	leaq	__nearbyint_c(%rip), %rax
-2:	ret
-END(__nearbyint)
-weak_alias (__nearbyint, nearbyint)
-
-
-ENTRY(__nearbyint_sse41)
-	roundsd	$0xc, %xmm0, %xmm0
-	ret
-END(__nearbyint_sse41)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c b/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c
new file mode 100644
index 0000000000..d92945fd14
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c
@@ -0,0 +1,32 @@
+/* Multiple versions of __nearbyint.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <libm-alias-double.h>
+
+#define nearbyint __redirect_nearbyint
+#define __nearbyint __redirect___nearbyint
+#include <math.h>
+#undef nearbyint
+#undef __nearbyint
+
+#define SYMBOL_NAME nearbyint
+#include "ifunc-sse4_1.h"
+
+libc_ifunc_redirected (__redirect_nearbyint, __nearbyint,
+		       IFUNC_SELECTOR ());
+libm_alias_double (__nearbyint, nearbyint)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S
new file mode 100644
index 0000000000..2f427da778
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S
@@ -0,0 +1,25 @@
+/* Copyright (C) 2011-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+	.section .text.sse4.1,"ax",@progbits
+ENTRY(__nearbyintf_sse41)
+	roundss	$0xc, %xmm0, %xmm0
+	ret
+END(__nearbyintf_sse41)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.S
deleted file mode 100644
index 4a13700001..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.S
+++ /dev/null
@@ -1,38 +0,0 @@
-/* Copyright (C) 2011-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <machine/asm.h>
-#include <init-arch.h>
-
-
-ENTRY(__nearbyintf)
-	.type	__nearbyintf, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-	leaq	__nearbyintf_sse41(%rip), %rax
-	HAS_CPU_FEATURE (SSE4_1)
-	jnz	2f
-	leaq	__nearbyintf_c(%rip), %rax
-2:	ret
-END(__nearbyintf)
-weak_alias (__nearbyintf, nearbyintf)
-
-
-ENTRY(__nearbyintf_sse41)
-	roundss	$0xc, %xmm0, %xmm0
-	ret
-END(__nearbyintf_sse41)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c
index 1c0d1e14b7..ba7be27956 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_rint.S
+++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c
@@ -1,6 +1,6 @@
-/* Copyright (C) 2011-2016 Free Software Foundation, Inc.
+/* Multiple versions of __nearbyintf.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
-   Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
 
    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
@@ -16,23 +16,17 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include <machine/asm.h>
-#include <init-arch.h>
+#include <libm-alias-float.h>
 
+#define nearbyintf __redirect_nearbyintf
+#define __nearbyintf __redirect___nearbyintf
+#include <math.h>
+#undef nearbyintf
+#undef __nearbyintf
 
-ENTRY(__rint)
-	.type	__rint, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-	leaq	__rint_sse41(%rip), %rax
-	HAS_CPU_FEATURE (SSE4_1)
-	jnz	2f
-	leaq	__rint_c(%rip), %rax
-2:	ret
-END(__rint)
-weak_alias (__rint, rint)
+#define SYMBOL_NAME nearbyintf
+#include "ifunc-sse4_1.h"
 
-
-ENTRY(__rint_sse41)
-	roundsd	$4, %xmm0, %xmm0
-	ret
-END(__rint_sse41)
+libc_ifunc_redirected (__redirect_nearbyintf, __nearbyintf,
+		       IFUNC_SELECTOR ());
+libm_alias_float (__nearbyint, nearbyint)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S
new file mode 100644
index 0000000000..7d7568a1a0
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S
@@ -0,0 +1,25 @@
+/* Copyright (C) 2011-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+	.section .text.sse4.1,"ax",@progbits
+ENTRY(__rint_sse41)
+	roundsd	$4, %xmm0, %xmm0
+	ret
+END(__rint_sse41)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint.c b/sysdeps/x86_64/fpu/multiarch/s_rint.c
new file mode 100644
index 0000000000..f1cb2fed0c
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_rint.c
@@ -0,0 +1,31 @@
+/* Multiple versions of __rint.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <libm-alias-double.h>
+
+#define rint __redirect_rint
+#define __rint __redirect___rint
+#include <math.h>
+#undef rint
+#undef __rint
+
+#define SYMBOL_NAME rint
+#include "ifunc-sse4_1.h"
+
+libc_ifunc_redirected (__redirect_rint, __rint, IFUNC_SELECTOR ());
+libm_alias_double (__rint, rint)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S
new file mode 100644
index 0000000000..ef5d896f55
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S
@@ -0,0 +1,25 @@
+/* Copyright (C) 2011-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+	.section .text.sse4.1,"ax",@progbits
+ENTRY(__rintf_sse41)
+	roundss	$4, %xmm0, %xmm0
+	ret
+END(__rintf_sse41)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf.S b/sysdeps/x86_64/fpu/multiarch/s_rintf.S
deleted file mode 100644
index 8e42fa561f..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/s_rintf.S
+++ /dev/null
@@ -1,38 +0,0 @@
-/* Copyright (C) 2011-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <machine/asm.h>
-#include <init-arch.h>
-
-
-ENTRY(__rintf)
-	.type	__rintf, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-	leaq	__rintf_sse41(%rip), %rax
-	HAS_CPU_FEATURE (SSE4_1)
-	jnz	2f
-	leaq	__rintf_c(%rip), %rax
-2:	ret
-END(__rintf)
-weak_alias (__rintf, rintf)
-
-
-ENTRY(__rintf_sse41)
-	roundss	$4, %xmm0, %xmm0
-	ret
-END(__rintf_sse41)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf.c b/sysdeps/x86_64/fpu/multiarch/s_rintf.c
new file mode 100644
index 0000000000..41323b3b5b
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_rintf.c
@@ -0,0 +1,31 @@
+/* Multiple versions of __rintf.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <libm-alias-float.h>
+
+#define rintf __redirect_rintf
+#define __rintf __redirect___rintf
+#include <math.h>
+#undef rintf
+#undef __rintf
+
+#define SYMBOL_NAME rintf
+#include "ifunc-sse4_1.h"
+
+libc_ifunc_redirected (__redirect_rintf, __rintf, IFUNC_SELECTOR ());
+libm_alias_float (__rint, rint)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_sin-fma.c b/sysdeps/x86_64/fpu/multiarch/s_sin-fma.c
new file mode 100644
index 0000000000..15f3c394d5
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_sin-fma.c
@@ -0,0 +1,11 @@
+#define __cos __cos_fma
+#define __sin __sin_fma
+#define __docos __docos_fma
+#define __dubsin __dubsin_fma
+#define __mpcos __mpcos_fma
+#define __mpcos1 __mpcos1_fma
+#define __mpsin __mpsin_fma
+#define __mpsin1 __mpsin1_fma
+#define SECTION __attribute__ ((section (".text.fma")))
+
+#include <sysdeps/ieee754/dbl-64/s_sin.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_sin.c b/sysdeps/x86_64/fpu/multiarch/s_sin.c
index 8ffd3e7125..b289269240 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_sin.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_sin.c
@@ -1,26 +1,39 @@
-#include <init-arch.h>
-#include <math.h>
-#undef NAN
-
-extern double __cos_sse2 (double);
-extern double __sin_sse2 (double);
-extern double __cos_avx (double);
-extern double __sin_avx (double);
-extern double __cos_fma4 (double);
-extern double __sin_fma4 (double);
-
-libm_ifunc (__cos, (HAS_ARCH_FEATURE (FMA4_Usable) ? __cos_fma4 :
-		    HAS_ARCH_FEATURE (AVX_Usable)
-		    ? __cos_avx : __cos_sse2));
-weak_alias (__cos, cos)
-
-libm_ifunc (__sin, (HAS_ARCH_FEATURE (FMA4_Usable) ? __sin_fma4 :
-		    HAS_ARCH_FEATURE (AVX_Usable)
-		    ? __sin_avx : __sin_sse2));
-weak_alias (__sin, sin)
+/* Multiple versions of sin and cos.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
 
-#define __cos __cos_sse2
-#define __sin __sin_sse2
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <libm-alias-double.h>
+
+extern double __redirect_sin (double);
+extern double __redirect_cos (double);
 
+#define SYMBOL_NAME sin
+#include "ifunc-avx-fma4.h"
 
+libc_ifunc_redirected (__redirect_sin, __sin, IFUNC_SELECTOR ());
+libm_alias_double (__sin, sin)
+
+#undef SYMBOL_NAME
+#define SYMBOL_NAME cos
+#include "ifunc-avx-fma4.h"
+
+libc_ifunc_redirected (__redirect_cos, __cos, IFUNC_SELECTOR ());
+libm_alias_double (__cos, cos)
+
+#define __cos __cos_sse2
+#define __sin __sin_sse2
 #include <sysdeps/ieee754/dbl-64/s_sin.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_sincosf-fma.c b/sysdeps/x86_64/fpu/multiarch/s_sincosf-fma.c
new file mode 100644
index 0000000000..64abe7abca
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_sincosf-fma.c
@@ -0,0 +1,240 @@
+/* Compute sine and cosine of argument optimized with vector.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <errno.h>
+#include <math.h>
+#include <math_private.h>
+#include <x86intrin.h>
+#include <libm-alias-float.h>
+#include "s_sincosf.h"
+
+#define SINCOSF __sincosf_fma
+
+#ifndef SINCOSF
+# define SINCOSF_FUNC __sincosf
+#else
+# define SINCOSF_FUNC SINCOSF
+#endif
+
+/* Chebyshev constants for sin and cos, range -PI/4 - PI/4.  */
+static const __v2df V0 = { -0x1.5555555551cd9p-3, -0x1.ffffffffe98aep-2};
+static const __v2df V1 = { 0x1.1111110c2688bp-7, 0x1.55555545c50c7p-5 };
+static const __v2df V2 = { -0x1.a019f8b4bd1f9p-13, -0x1.6c16b348b6874p-10 };
+static const __v2df V3 = { 0x1.71d7264e6b5b4p-19, 0x1.a00eb9ac43ccp-16 };
+static const __v2df V4 = { -0x1.a947e1674b58ap-26, -0x1.23c97dd8844d7p-22 };
+
+/* Chebyshev constants for sin and cos, range 2^-27 - 2^-5.  */
+static const __v2df VC0 = { -0x1.555555543d49dp-3, -0x1.fffffff5cc6fdp-2 };
+static const __v2df VC1 = { 0x1.110f475cec8c5p-7, 0x1.55514b178dac5p-5 };
+
+static const __v2df v2ones = { 1.0, 1.0 };
+
+/* Compute the sine and cosine values using Chebyshev polynomials where
+   THETA is the range reduced absolute value of the input
+   and it is less than Pi/4,
+   N is calculated as trunc(|x|/(Pi/4)) + 1 and it is used to decide
+   whether a sine or cosine approximation is more accurate and
+   SIGNBIT is used to add the correct sign after the Chebyshev
+   polynomial is computed.  */
+static void
+reduced_sincos (const double theta, const unsigned int n,
+		const unsigned int signbit, float *sinx, float *cosx)
+{
+  __v2df v2x, v2sx, v2cx;
+  const __v2df v2theta = { theta, theta };
+  const __v2df v2theta2 = v2theta * v2theta;
+  /* Here sinf() and cosf() are calculated using sin Chebyshev polynomial:
+     x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))).  */
+  v2x = V3 + v2theta2 * V4;    /* S3+x^2*S4.  */
+  v2x = V2 + v2theta2 * v2x;   /* S2+x^2*(S3+x^2*S4).  */
+  v2x = V1 + v2theta2 * v2x;   /* S1+x^2*(S2+x^2*(S3+x^2*S4)).  */
+  v2x = V0 + v2theta2 * v2x;   /* S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4))).  */
+  v2x = v2theta2 * v2x;
+  v2cx = v2ones + v2x;
+  v2sx = v2theta + v2theta * v2x;
+  /* We are operating on |x|, so we need to add back the original
+     signbit for sinf.  */
+  /* Determine positive or negative primary interval.  */
+  /* Are we in the primary interval of sin or cos?  */
+  if ((n & 2) == 0)
+    {
+      const __v2df v2sign =
+	{
+	  ones[((n >> 2) & 1) ^ signbit],
+	  ones[((n + 2) >> 2) & 1]
+	};
+      v2cx[0] = v2sx[0];
+      v2cx *= v2sign;
+      __v4sf v4sx = _mm_cvtpd_ps (v2cx);
+      *sinx = v4sx[0];
+      *cosx = v4sx[1];
+    }
+  else
+    {
+      const __v2df v2sign =
+	{
+	  ones[((n + 2) >> 2) & 1],
+	  ones[((n >> 2) & 1) ^ signbit]
+	};
+      v2cx[0] = v2sx[0];
+      v2cx *= v2sign;
+      __v4sf v4sx = _mm_cvtpd_ps (v2cx);
+      *sinx = v4sx[1];
+      *cosx = v4sx[0];
+    }
+}
+
+void
+SINCOSF_FUNC (float x, float *sinx, float *cosx)
+{
+  double theta = x;
+  double abstheta = fabs (theta);
+  uint32_t ix, xi;
+  GET_FLOAT_WORD (xi, x);
+  /* |x| */
+  ix = xi & 0x7fffffff;
+  /* If |x|< Pi/4.  */
+  if (ix < 0x3f490fdb)
+    {
+      if (ix >= 0x3d000000) /* |x| >= 2^-5.  */
+	{
+	  __v2df v2x, v2sx, v2cx;
+	  const __v2df v2theta = { theta, theta };
+	  const __v2df v2theta2 = v2theta * v2theta;
+	  /* Chebyshev polynomial of the form for sin and cos.  */
+	  v2x = V3 + v2theta2 * V4;
+	  v2x = V2 + v2theta2 * v2x;
+	  v2x = V1 + v2theta2 * v2x;
+	  v2x = V0 + v2theta2 * v2x;
+	  v2x = v2theta2 * v2x;
+	  v2cx = v2ones + v2x;
+	  v2sx = v2theta + v2theta * v2x;
+	  v2cx[0] = v2sx[0];
+	  __v4sf v4sx = _mm_cvtpd_ps (v2cx);
+	  *sinx = v4sx[0];
+	  *cosx = v4sx[1];
+	}
+      else if (ix >= 0x32000000)     /* |x| >= 2^-27.  */
+	{
+	  /* A simpler Chebyshev approximation is close enough for this range:
+	     for sin: x+x^3*(SS0+x^2*SS1)
+	     for cos: 1.0+x^2*(CC0+x^3*CC1).  */
+	  __v2df v2x, v2sx, v2cx;
+	  const __v2df v2theta = { theta, theta };
+	  const __v2df v2theta2 = v2theta * v2theta;
+	  v2x = VC0 + v2theta * v2theta2 * VC1;
+	  v2x = v2theta2 * v2x;
+	  v2cx = v2ones + v2x;
+	  v2sx = v2theta + v2theta * v2x;
+	  v2cx[0] = v2sx[0];
+	  __v4sf v4sx = _mm_cvtpd_ps (v2cx);
+	  *sinx = v4sx[0];
+	  *cosx = v4sx[1];
+	}
+      else
+	{
+	  /* Handle some special cases.  */
+	  if (ix)
+	    *sinx = theta - (theta * SMALL);
+	  else
+	    *sinx = theta;
+	  *cosx = 1.0 - abstheta;
+	}
+    }
+  else                          /* |x| >= Pi/4.  */
+    {
+      unsigned int signbit = xi >> 31;
+      if (ix < 0x40e231d6) /* |x| < 9*Pi/4.  */
+	{
+	  /* There are cases where FE_UPWARD rounding mode can
+	     produce a result of abstheta * inv_PI_4 == 9,
+	     where abstheta < 9pi/4, so the domain for
+	     pio2_table must go to 5 (9 / 2 + 1).  */
+	  unsigned int n = (abstheta * inv_PI_4) + 1;
+	  theta = abstheta - pio2_table[n / 2];
+	  reduced_sincos (theta, n, signbit, sinx, cosx);
+	}
+      else if (ix < 0x7f800000)
+	{
+	  if (ix < 0x4b000000)     /* |x| < 2^23.  */
+	    {
+	      unsigned int n = ((unsigned int) (abstheta * inv_PI_4)) + 1;
+	      double x = n / 2;
+	      theta = (abstheta - x * PI_2_hi) - x * PI_2_lo;
+	      /* Argument reduction needed.  */
+	      reduced_sincos (theta, n, signbit, sinx, cosx);
+	    }
+	  else                  /* |x| >= 2^23.  */
+	    {
+	      x = fabsf (x);
+	      int exponent
+	        = (ix >> FLOAT_EXPONENT_SHIFT) - FLOAT_EXPONENT_BIAS;
+	      exponent += 3;
+	      exponent /= 28;
+	      double a = invpio4_table[exponent] * x;
+	      double b = invpio4_table[exponent + 1] * x;
+	      double c = invpio4_table[exponent + 2] * x;
+	      double d = invpio4_table[exponent + 3] * x;
+	      uint64_t l = a;
+	      l &= ~0x7;
+	      a -= l;
+	      double e = a + b;
+	      l = e;
+	      e = a - l;
+	      if (l & 1)
+	        {
+	          e -= 1.0;
+	          e += b;
+	          e += c;
+	          e += d;
+	          e *= M_PI_4;
+		  reduced_sincos (e, l + 1, signbit, sinx, cosx);
+	        }
+	      else
+		{
+		  e += b;
+		  e += c;
+		  e += d;
+		  if (e <= 1.0)
+		    {
+		      e *= M_PI_4;
+		      reduced_sincos (e, l + 1, signbit, sinx, cosx);
+		    }
+		  else
+		    {
+		      l++;
+		      e -= 2.0;
+		      e *= M_PI_4;
+		      reduced_sincos (e, l + 1, signbit, sinx, cosx);
+		    }
+		}
+	    }
+	}
+      else
+	{
+	  if (ix == 0x7f800000)
+	    __set_errno (EDOM);
+	  /* sin/cos(Inf or NaN) is NaN.  */
+	  *sinx = *cosx = x - x;
+	}
+    }
+}
+
+#ifndef SINCOSF
+libm_alias_float (__sincos, sincos)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_sincosf-sse2.S b/sysdeps/x86_64/fpu/multiarch/s_sincosf-sse2.S
new file mode 100644
index 0000000000..51d012bb12
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_sincosf-sse2.S
@@ -0,0 +1,2 @@
+#define __sincosf __sincosf_sse2
+#include <sysdeps/x86_64/fpu/s_sincosf.S>
diff --git a/sysdeps/x86_64/fpu/test-float-vlen4.c b/sysdeps/x86_64/fpu/multiarch/s_sincosf.c
index f6a4cf5c1e..6cb4295558 100644
--- a/sysdeps/x86_64/fpu/test-float-vlen4.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_sincosf.c
@@ -1,5 +1,5 @@
-/* Tests for SSE ISA versions of vector math functions.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+/* Multiple versions of sincosf.
+   Copyright (C) 2017 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -16,13 +16,13 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include "test-float-vlen4.h"
+#include <libm-alias-float.h>
 
-#define TEST_VECTOR_cosf 1
-#define TEST_VECTOR_sinf 1
-#define TEST_VECTOR_sincosf 1
-#define TEST_VECTOR_logf 1
-#define TEST_VECTOR_expf 1
-#define TEST_VECTOR_powf 1
+extern void __redirect_sincosf (float, float *, float *);
 
-#include "libm-test.c"
+#define SYMBOL_NAME sincosf
+#include "ifunc-fma.h"
+
+libc_ifunc_redirected (__redirect_sincosf, __sincosf, IFUNC_SELECTOR ());
+
+libm_alias_float (__sincos, sincos)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_sinf-fma.c b/sysdeps/x86_64/fpu/multiarch/s_sinf-fma.c
new file mode 100644
index 0000000000..34440ebf4a
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_sinf-fma.c
@@ -0,0 +1,2 @@
+#define SINF __sinf_fma
+#include <sysdeps/ieee754/flt-32/s_sinf.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_sinf-sse2.c b/sysdeps/x86_64/fpu/multiarch/s_sinf-sse2.c
new file mode 100644
index 0000000000..74e32c98db
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_sinf-sse2.c
@@ -0,0 +1,2 @@
+#define SINF __sinf_sse2
+#include <sysdeps/ieee754/flt-32/s_sinf.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_sinf.c b/sysdeps/x86_64/fpu/multiarch/s_sinf.c
new file mode 100644
index 0000000000..4fdfbd8d3e
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_sinf.c
@@ -0,0 +1,28 @@
+/* Multiple versions of sinf.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <libm-alias-float.h>
+
+extern float __redirect_sinf (float);
+
+#define SYMBOL_NAME sinf
+#include "ifunc-fma.h"
+
+libc_ifunc_redirected (__redirect_sinf, __sinf, IFUNC_SELECTOR ());
+
+libm_alias_float (__sin, sin)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_tan-avx.c b/sysdeps/x86_64/fpu/multiarch/s_tan-avx.c
index 53de5d3c98..5ee29a9a06 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_tan-avx.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_tan-avx.c
@@ -1,4 +1,4 @@
-#define tan __tan_avx
+#define __tan __tan_avx
 #define __dbl_mp __dbl_mp_avx
 #define __sub __sub_avx
 #define SECTION __attribute__ ((section (".text.avx")))
diff --git a/sysdeps/x86_64/fpu/multiarch/s_tan-fma.c b/sysdeps/x86_64/fpu/multiarch/s_tan-fma.c
new file mode 100644
index 0000000000..1a1b9d2490
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_tan-fma.c
@@ -0,0 +1,8 @@
+#define __tan __tan_fma
+#define __dbl_mp __dbl_mp_fma
+#define __mpranred __mpranred_fma
+#define __mptan __mptan_fma
+#define __sub __sub_fma
+#define SECTION __attribute__ ((section (".text.fma")))
+
+#include <sysdeps/ieee754/dbl-64/s_tan.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_tan-fma4.c b/sysdeps/x86_64/fpu/multiarch/s_tan-fma4.c
index a805440b46..e4e9f6cb85 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_tan-fma4.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_tan-fma4.c
@@ -1,4 +1,4 @@
-#define tan __tan_fma4
+#define __tan __tan_fma4
 #define __dbl_mp __dbl_mp_fma4
 #define __mpranred __mpranred_fma4
 #define __mptan __mptan_fma4
diff --git a/sysdeps/x86_64/fpu/multiarch/s_tan.c b/sysdeps/x86_64/fpu/multiarch/s_tan.c
index 25f3bca07e..bb75d8d0bc 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_tan.c
+++ b/sysdeps/x86_64/fpu/multiarch/s_tan.c
@@ -1,15 +1,30 @@
-#include <init-arch.h>
-#include <math.h>
+/* Multiple versions of tan.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
 
-extern double __tan_sse2 (double);
-extern double __tan_avx (double);
-extern double __tan_fma4 (double);
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
 
-libm_ifunc (tan, (HAS_ARCH_FEATURE (FMA4_Usable) ? __tan_fma4 :
-		  HAS_ARCH_FEATURE (AVX_Usable)
-		  ? __tan_avx : __tan_sse2));
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
 
-#define tan __tan_sse2
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
 
+#include <libm-alias-double.h>
 
+extern double __redirect_tan (double);
+
+#define SYMBOL_NAME tan
+#include "ifunc-avx-fma4.h"
+
+libc_ifunc_redirected (__redirect_tan, __tan, IFUNC_SELECTOR ());
+libm_alias_double (__tan, tan)
+
+#define __tan __tan_sse2
 #include <sysdeps/ieee754/dbl-64/s_tan.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_trunc-c.c b/sysdeps/x86_64/fpu/multiarch/s_trunc-c.c
new file mode 100644
index 0000000000..6204ae3c77
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_trunc-c.c
@@ -0,0 +1,2 @@
+#define __trunc __trunc_c
+#include <sysdeps/ieee754/dbl-64/wordsize-64/s_trunc.c>
diff --git a/sysdeps/x86_64/multiarch/mempcpy-avx-unaligned.S b/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S
index 241378e770..b8046bfa0c 100644
--- a/sysdeps/x86_64/multiarch/mempcpy-avx-unaligned.S
+++ b/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S
@@ -1,5 +1,5 @@
-/* mempcpy with AVX
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+/* trunc for SSE4.1.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -16,7 +16,10 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#define USE_AS_MEMPCPY
-#define MEMCPY		__mempcpy_avx_unaligned
-#define MEMCPY_CHK	__mempcpy_chk_avx_unaligned
-#include "memcpy-avx-unaligned.S"
+#include <sysdep.h>
+
+	.section .text.sse4.1,"ax",@progbits
+ENTRY(__trunc_sse41)
+	roundsd	$11, %xmm0, %xmm0
+	ret
+END(__trunc_sse41)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_trunc.c b/sysdeps/x86_64/fpu/multiarch/s_trunc.c
new file mode 100644
index 0000000000..a1b0c60630
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_trunc.c
@@ -0,0 +1,31 @@
+/* Multiple versions of __trunc.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <libm-alias-double.h>
+
+#define trunc __redirect_trunc
+#define __trunc __redirect___trunc
+#include <math.h>
+#undef trunc
+#undef __trunc
+
+#define SYMBOL_NAME trunc
+#include "ifunc-sse4_1.h"
+
+libc_ifunc_redirected (__redirect_trunc, __trunc, IFUNC_SELECTOR ());
+libm_alias_double (__trunc, trunc)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_truncf-c.c b/sysdeps/x86_64/fpu/multiarch/s_truncf-c.c
new file mode 100644
index 0000000000..7a5ac7da1f
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_truncf-c.c
@@ -0,0 +1,2 @@
+#define __truncf __truncf_c
+#include <sysdeps/ieee754/flt-32/s_truncf.c>
diff --git a/sysdeps/x86_64/multiarch/memmove-avx-unaligned.S b/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S
index 75e35f2957..2dabc0be57 100644
--- a/sysdeps/x86_64/multiarch/memmove-avx-unaligned.S
+++ b/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S
@@ -1,5 +1,5 @@
-/* memmove with AVX
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+/* truncf for SSE4.1.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -16,7 +16,10 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#define USE_AS_MEMMOVE
-#define MEMCPY		__memmove_avx_unaligned
-#define MEMCPY_CHK	__memmove_chk_avx_unaligned
-#include "memcpy-avx-unaligned.S"
+#include <sysdep.h>
+
+	.section .text.sse4.1,"ax",@progbits
+ENTRY(__truncf_sse41)
+	roundss	$11, %xmm0, %xmm0
+	ret
+END(__truncf_sse41)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_truncf.c b/sysdeps/x86_64/fpu/multiarch/s_truncf.c
new file mode 100644
index 0000000000..a7e220bd0c
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_truncf.c
@@ -0,0 +1,31 @@
+/* Multiple versions of __truncf.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <libm-alias-float.h>
+
+#define truncf __redirect_truncf
+#define __truncf __redirect___truncf
+#include <math.h>
+#undef truncf
+#undef __truncf
+
+#define SYMBOL_NAME truncf
+#include "ifunc-sse4_1.h"
+
+libc_ifunc_redirected (__redirect_truncf, __truncf, IFUNC_SELECTOR ());
+libm_alias_float (__trunc, trunc)
diff --git a/sysdeps/x86_64/fpu/multiarch/sincos32-fma.c b/sysdeps/x86_64/fpu/multiarch/sincos32-fma.c
new file mode 100644
index 0000000000..dcd44bc5e8
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/sincos32-fma.c
@@ -0,0 +1,15 @@
+#define __cos32 __cos32_fma
+#define __sin32 __sin32_fma
+#define __c32 __c32_fma
+#define __mpsin __mpsin_fma
+#define __mpsin1 __mpsin1_fma
+#define __mpcos __mpcos_fma
+#define __mpcos1 __mpcos1_fma
+#define __mpranred __mpranred_fma
+#define __add __add_fma
+#define __dbl_mp __dbl_mp_fma
+#define __mul __mul_fma
+#define __sub __sub_fma
+#define SECTION __attribute__ ((section (".text.fma")))
+
+#include <sysdeps/ieee754/dbl-64/sincos32.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/slowexp-avx.c b/sysdeps/x86_64/fpu/multiarch/slowexp-avx.c
deleted file mode 100644
index d01c6d71a4..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/slowexp-avx.c
+++ /dev/null
@@ -1,9 +0,0 @@
-#define __slowexp __slowexp_avx
-#define __add __add_avx
-#define __dbl_mp __dbl_mp_avx
-#define __mpexp __mpexp_avx
-#define __mul __mul_avx
-#define __sub __sub_avx
-#define SECTION __attribute__ ((section (".text.avx")))
-
-#include <sysdeps/ieee754/dbl-64/slowexp.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/slowexp-fma4.c b/sysdeps/x86_64/fpu/multiarch/slowexp-fma4.c
deleted file mode 100644
index 3bcde84233..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/slowexp-fma4.c
+++ /dev/null
@@ -1,9 +0,0 @@
-#define __slowexp __slowexp_fma4
-#define __add __add_fma4
-#define __dbl_mp __dbl_mp_fma4
-#define __mpexp __mpexp_fma4
-#define __mul __mul_fma4
-#define __sub __sub_fma4
-#define SECTION __attribute__ ((section (".text.fma4")))
-
-#include <sysdeps/ieee754/dbl-64/slowexp.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c b/sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c
deleted file mode 100644
index 69d69823bb..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/slowpow-fma4.c
+++ /dev/null
@@ -1,11 +0,0 @@
-#define __slowpow __slowpow_fma4
-#define __add __add_fma4
-#define __dbl_mp __dbl_mp_fma4
-#define __mpexp __mpexp_fma4
-#define __mplog __mplog_fma4
-#define __mul __mul_fma4
-#define __sub __sub_fma4
-#define __halfulp __halfulp_fma4
-#define SECTION __attribute__ ((section (".text.fma4")))
-
-#include <sysdeps/ieee754/dbl-64/slowpow.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core-sse2.S
new file mode 100644
index 0000000000..a85729807f
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core-sse2.S
@@ -0,0 +1,20 @@
+/* SSE2 version of vectorized cos, vector length is 2.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define _ZGVbN2v_cos _ZGVbN2v_cos_sse2
+#include "../svml_d_cos2_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core.S
deleted file mode 100644
index 7d720e2fcb..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of vectorized cos, vector length is 2.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-	.text
-ENTRY (_ZGVbN2v_cos)
-        .type   _ZGVbN2v_cos, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-        leaq    _ZGVbN2v_cos_sse4(%rip), %rax
-	HAS_CPU_FEATURE (SSE4_1)
-        jz      2f
-        ret
-2:      leaq    _ZGVbN2v_cos_sse2(%rip), %rax
-        ret
-END (_ZGVbN2v_cos)
-libmvec_hidden_def (_ZGVbN2v_cos)
-
-#define _ZGVbN2v_cos _ZGVbN2v_cos_sse2
-#include "../svml_d_cos2_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core.c
new file mode 100644
index 0000000000..3ff39eecd7
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core.c
@@ -0,0 +1,27 @@
+/* Multiple versions of vectorized cos, vector length is 2.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVbN2v_cos
+#include "ifunc-mathvec-sse4_1.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVbN2v_cos, __GI__ZGVbN2v_cos, __redirect__ZGVbN2v_cos)
+  __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core_sse4.S
index 088fcae067..10be76e207 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core_sse4.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos2_core_sse4.S
@@ -1,5 +1,5 @@
 /* Function cos vectorized with SSE4.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -205,7 +205,7 @@ ENTRY (_ZGVbN2v_cos_sse4)
         shlq      $4, %r15
         movsd     200(%rsp,%r15), %xmm0
 
-        call      cos@PLT
+        call      JUMPTARGET(cos)
 
         movsd     %xmm0, 264(%rsp,%r15)
         jmp       .LBL_1_8
@@ -215,7 +215,7 @@ ENTRY (_ZGVbN2v_cos_sse4)
         shlq      $4, %r15
         movsd     192(%rsp,%r15), %xmm0
 
-        call      cos@PLT
+        call      JUMPTARGET(cos)
 
         movsd     %xmm0, 256(%rsp,%r15)
         jmp       .LBL_1_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core-sse.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core-sse.S
new file mode 100644
index 0000000000..9f406ea7c9
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core-sse.S
@@ -0,0 +1,20 @@
+/* SSE version of vectorized cos, vector length is 4.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define _ZGVdN4v_cos _ZGVdN4v_cos_sse_wrapper
+#include "../svml_d_cos4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core.S
deleted file mode 100644
index 65a3570d2e..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of vectorized cos, vector length is 4.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-	.text
-ENTRY (_ZGVdN4v_cos)
-        .type   _ZGVdN4v_cos, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-        leaq    _ZGVdN4v_cos_avx2(%rip), %rax
-	HAS_ARCH_FEATURE (AVX2_Usable)
-        jz      2f
-        ret
-2:      leaq    _ZGVdN4v_cos_sse_wrapper(%rip), %rax
-        ret
-END (_ZGVdN4v_cos)
-libmvec_hidden_def (_ZGVdN4v_cos)
-
-#define _ZGVdN4v_cos _ZGVdN4v_cos_sse_wrapper
-#include "../svml_d_cos4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core.c
new file mode 100644
index 0000000000..cb8405201a
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core.c
@@ -0,0 +1,27 @@
+/* Multiple versions of vectorized cos, vector length is 4.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVdN4v_cos
+#include "ifunc-mathvec-avx2.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVdN4v_cos, __GI__ZGVdN4v_cos, __redirect__ZGVdN4v_cos)
+  __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core_avx2.S
index 4e653216d9..38cdc6bb03 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core_avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos4_core_avx2.S
@@ -1,5 +1,5 @@
 /* Function cos vectorized with AVX2.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -188,7 +188,7 @@ ENTRY (_ZGVdN4v_cos_avx2)
         vmovsd    328(%rsp,%r15), %xmm0
         vzeroupper
 
-        call      cos@PLT
+        call      JUMPTARGET(cos)
 
         vmovsd    %xmm0, 392(%rsp,%r15)
         jmp       .LBL_1_8
@@ -199,7 +199,7 @@ ENTRY (_ZGVdN4v_cos_avx2)
         vmovsd    320(%rsp,%r15), %xmm0
         vzeroupper
 
-        call      cos@PLT
+        call      JUMPTARGET(cos)
 
         vmovsd    %xmm0, 384(%rsp,%r15)
         jmp       .LBL_1_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core-avx2.S
new file mode 100644
index 0000000000..081baeeff5
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core-avx2.S
@@ -0,0 +1,20 @@
+/* AVX2 version of vectorized cos, vector length is 8.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define _ZGVeN8v_cos _ZGVeN8v_cos_avx2_wrapper
+#include "../svml_d_cos8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core.S
deleted file mode 100644
index 3e7f16d44e..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core.S
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Multiple versions of vectorized cos, vector length is 8.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-	.text
-ENTRY (_ZGVeN8v_cos)
-        .type   _ZGVeN8v_cos, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-1:      leaq    _ZGVeN8v_cos_skx(%rip), %rax
-	HAS_ARCH_FEATURE (AVX512DQ_Usable)
-        jnz     2f
-        leaq    _ZGVeN8v_cos_knl(%rip), %rax
-	HAS_ARCH_FEATURE (AVX512F_Usable)
-        jnz     2f
-        leaq    _ZGVeN8v_cos_avx2_wrapper(%rip), %rax
-2:      ret
-END (_ZGVeN8v_cos)
-
-#define _ZGVeN8v_cos _ZGVeN8v_cos_avx2_wrapper
-#include "../svml_d_cos8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core.c
new file mode 100644
index 0000000000..4aa12595bc
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core.c
@@ -0,0 +1,27 @@
+/* Multiple versions of vectorized cos, vector length is 8.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVeN8v_cos
+#include "ifunc-mathvec-avx512.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVeN8v_cos, __GI__ZGVeN8v_cos, __redirect__ZGVeN8v_cos)
+  __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S
index 1cac1d827a..24e3b36357 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_cos8_core_avx512.S
@@ -1,5 +1,5 @@
 /* Function cos vectorized with AVX-512, KNL and SKX versions.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -22,7 +22,7 @@
 
 	.text
 ENTRY (_ZGVeN8v_cos_knl)
-#ifndef HAVE_AVX512_ASM_SUPPORT
+#ifndef HAVE_AVX512DQ_ASM_SUPPORT
 WRAPPER_IMPL_AVX512 _ZGVdN4v_cos
 #else
 /*
@@ -221,7 +221,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_cos
         movzbl    %r12b, %r15d
         shlq      $4, %r15
         vmovsd    1160(%rsp,%r15), %xmm0
-        call      cos@PLT
+        call      JUMPTARGET(cos)
         vmovsd    %xmm0, 1224(%rsp,%r15)
         jmp       .LBL_1_8
 
@@ -229,14 +229,14 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_cos
         movzbl    %r12b, %r15d
         shlq      $4, %r15
         vmovsd    1152(%rsp,%r15), %xmm0
-        call      cos@PLT
+        call      JUMPTARGET(cos)
         vmovsd    %xmm0, 1216(%rsp,%r15)
         jmp       .LBL_1_7
 #endif
 END (_ZGVeN8v_cos_knl)
 
 ENTRY (_ZGVeN8v_cos_skx)
-#ifndef HAVE_AVX512_ASM_SUPPORT
+#ifndef HAVE_AVX512DQ_ASM_SUPPORT
 WRAPPER_IMPL_AVX512 _ZGVdN4v_cos
 #else
 /*
@@ -438,7 +438,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_cos
         vzeroupper
         vmovsd    1160(%rsp,%r15), %xmm0
 
-        call      cos@PLT
+        call      JUMPTARGET(cos)
 
         vmovsd    %xmm0, 1224(%rsp,%r15)
         jmp       .LBL_2_8
@@ -450,7 +450,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_cos
         vzeroupper
         vmovsd    1152(%rsp,%r15), %xmm0
 
-        call      cos@PLT
+        call      JUMPTARGET(cos)
 
         vmovsd    %xmm0, 1216(%rsp,%r15)
         jmp       .LBL_2_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core-sse2.S
new file mode 100644
index 0000000000..3591eb1f19
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core-sse2.S
@@ -0,0 +1,20 @@
+/* SSE2 version of vectorized exp.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define _ZGVbN2v_exp _ZGVbN2v_exp_sse2
+#include "../svml_d_exp2_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core.S
deleted file mode 100644
index 136c67a550..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of vectorized exp.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-	.text
-ENTRY (_ZGVbN2v_exp)
-        .type   _ZGVbN2v_exp, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-        leaq    _ZGVbN2v_exp_sse4(%rip), %rax
-	HAS_CPU_FEATURE (SSE4_1)
-        jz      2f
-        ret
-2:      leaq    _ZGVbN2v_exp_sse2(%rip), %rax
-        ret
-END (_ZGVbN2v_exp)
-libmvec_hidden_def (_ZGVbN2v_exp)
-
-#define _ZGVbN2v_exp _ZGVbN2v_exp_sse2
-#include "../svml_d_exp2_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core.c
new file mode 100644
index 0000000000..2cfe8937c9
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core.c
@@ -0,0 +1,27 @@
+/* Multiple versions of vectorized exp, vector length is 2.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVbN2v_exp
+#include "ifunc-mathvec-sse4_1.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVbN2v_exp, __GI__ZGVbN2v_exp, __redirect__ZGVbN2v_exp)
+  __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core_sse4.S
index 445b230152..e98d11b311 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core_sse4.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp2_core_sse4.S
@@ -1,5 +1,5 @@
 /* Function exp vectorized with SSE4.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -207,7 +207,7 @@ ENTRY (_ZGVbN2v_exp_sse4)
         shlq      $4, %r15
         movsd     200(%rsp,%r15), %xmm0
 
-        call      exp@PLT
+        call      JUMPTARGET(__exp_finite)
 
         movsd     %xmm0, 264(%rsp,%r15)
         jmp       .LBL_1_8
@@ -217,7 +217,7 @@ ENTRY (_ZGVbN2v_exp_sse4)
         shlq      $4, %r15
         movsd     192(%rsp,%r15), %xmm0
 
-        call      exp@PLT
+        call      JUMPTARGET(__exp_finite)
 
         movsd     %xmm0, 256(%rsp,%r15)
         jmp       .LBL_1_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core-sse.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core-sse.S
new file mode 100644
index 0000000000..f8e0b5517a
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core-sse.S
@@ -0,0 +1,20 @@
+/* SSE version of vectorized exp.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define _ZGVdN4v_exp _ZGVdN4v_exp_sse_wrapper
+#include "../svml_d_exp4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core.S
deleted file mode 100644
index 9d6a47be0a..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of vectorized exp.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-	.text
-ENTRY (_ZGVdN4v_exp)
-        .type   _ZGVdN4v_exp, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-        leaq    _ZGVdN4v_exp_avx2(%rip), %rax
-	HAS_ARCH_FEATURE (AVX2_Usable)
-        jz      2f
-        ret
-2:      leaq    _ZGVdN4v_exp_sse_wrapper(%rip), %rax
-        ret
-END (_ZGVdN4v_exp)
-libmvec_hidden_def (_ZGVdN4v_exp)
-
-#define _ZGVdN4v_exp _ZGVdN4v_exp_sse_wrapper
-#include "../svml_d_exp4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core.c
new file mode 100644
index 0000000000..59bb36984a
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core.c
@@ -0,0 +1,27 @@
+/* Multiple versions of vectorized exp, vector length is 4.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVdN4v_exp
+#include "ifunc-mathvec-avx2.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVdN4v_exp, __GI__ZGVdN4v_exp, __redirect__ZGVdN4v_exp)
+  __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core_avx2.S
index 25f9e28941..87990f8ad7 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core_avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp4_core_avx2.S
@@ -1,5 +1,5 @@
 /* Function exp vectorized with AVX2.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -193,7 +193,7 @@ ENTRY (_ZGVdN4v_exp_avx2)
         vmovsd    328(%rsp,%r15), %xmm0
         vzeroupper
 
-        call      exp@PLT
+        call      JUMPTARGET(__exp_finite)
 
         vmovsd    %xmm0, 392(%rsp,%r15)
         jmp       .LBL_1_8
@@ -204,7 +204,7 @@ ENTRY (_ZGVdN4v_exp_avx2)
         vmovsd    320(%rsp,%r15), %xmm0
         vzeroupper
 
-        call      exp@PLT
+        call      JUMPTARGET(__exp_finite)
 
         vmovsd    %xmm0, 384(%rsp,%r15)
         jmp       .LBL_1_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core-avx2.S
new file mode 100644
index 0000000000..b1d3cad0e1
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core-avx2.S
@@ -0,0 +1,20 @@
+/* AVX2 version of vectorized exp.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define _ZGVeN8v_exp _ZGVeN8v_exp_avx2_wrapper
+#include "../svml_d_exp8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.S
deleted file mode 100644
index 317ee36e61..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.S
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Multiple versions of vectorized exp.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-	.text
-ENTRY (_ZGVeN8v_exp)
-        .type   _ZGVeN8v_exp, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-        leaq    _ZGVeN8v_exp_skx(%rip), %rax
-	HAS_ARCH_FEATURE (AVX512DQ_Usable)
-        jnz     2f
-        leaq    _ZGVeN8v_exp_knl(%rip), %rax
-	HAS_ARCH_FEATURE (AVX512F_Usable)
-        jnz     2f
-        leaq    _ZGVeN8v_exp_avx2_wrapper(%rip), %rax
-2:      ret
-END (_ZGVeN8v_exp)
-
-#define _ZGVeN8v_exp _ZGVeN8v_exp_avx2_wrapper
-#include "../svml_d_exp8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.c
new file mode 100644
index 0000000000..cfdc96ec86
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core.c
@@ -0,0 +1,27 @@
+/* Multiple versions of vectorized exp, vector length is 8.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVeN8v_exp
+#include "ifunc-mathvec-avx512.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVeN8v_exp, __GI__ZGVeN8v_exp, __redirect__ZGVeN8v_exp)
+  __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core_avx512.S
index 74f1d2ce7b..8dd8a03e4b 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_exp8_core_avx512.S
@@ -1,5 +1,5 @@
 /* Function exp vectorized with AVX-512. KNL and SKX versions.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -22,7 +22,7 @@
 
 	.text
 ENTRY (_ZGVeN8v_exp_knl)
-#ifndef HAVE_AVX512_ASM_SUPPORT
+#ifndef HAVE_AVX512DQ_ASM_SUPPORT
 WRAPPER_IMPL_AVX512 _ZGVdN4v_exp
 #else
 /*
@@ -223,7 +223,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_exp
         movzbl    %r12b, %r15d
         shlq      $4, %r15
         vmovsd    1160(%rsp,%r15), %xmm0
-        call      exp@PLT
+        call      JUMPTARGET(__exp_finite)
         vmovsd    %xmm0, 1224(%rsp,%r15)
         jmp       .LBL_1_8
 
@@ -231,14 +231,14 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_exp
         movzbl    %r12b, %r15d
         shlq      $4, %r15
         vmovsd    1152(%rsp,%r15), %xmm0
-        call      exp@PLT
+        call      JUMPTARGET(__exp_finite)
         vmovsd    %xmm0, 1216(%rsp,%r15)
         jmp       .LBL_1_7
 #endif
 END (_ZGVeN8v_exp_knl)
 
 ENTRY (_ZGVeN8v_exp_skx)
-#ifndef HAVE_AVX512_ASM_SUPPORT
+#ifndef HAVE_AVX512DQ_ASM_SUPPORT
 WRAPPER_IMPL_AVX512 _ZGVdN4v_exp
 #else
 /*
@@ -438,7 +438,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_exp
         vmovsd    1160(%rsp,%r15), %xmm0
         vzeroupper
         vmovsd    1160(%rsp,%r15), %xmm0
-        call      exp@PLT
+        call      JUMPTARGET(__exp_finite)
         vmovsd    %xmm0, 1224(%rsp,%r15)
         jmp       .LBL_2_8
 
@@ -448,7 +448,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_exp
         vmovsd    1152(%rsp,%r15), %xmm0
         vzeroupper
         vmovsd    1152(%rsp,%r15), %xmm0
-        call      exp@PLT
+        call      JUMPTARGET(__exp_finite)
         vmovsd    %xmm0, 1216(%rsp,%r15)
         jmp       .LBL_2_7
 
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core-sse2.S
new file mode 100644
index 0000000000..761a1a537d
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core-sse2.S
@@ -0,0 +1,20 @@
+/* SSE2 version of vectorized log.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define _ZGVbN2v_log _ZGVbN2v_log_sse2
+#include "../svml_d_log2_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core.S
deleted file mode 100644
index 03d86a3e63..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of vectorized log.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-        .text
-ENTRY (_ZGVbN2v_log)
-        .type   _ZGVbN2v_log, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-        leaq    _ZGVbN2v_log_sse4(%rip), %rax
-	HAS_CPU_FEATURE (SSE4_1)
-        jz      2f
-        ret
-2:      leaq    _ZGVbN2v_log_sse2(%rip), %rax
-        ret
-END (_ZGVbN2v_log)
-libmvec_hidden_def (_ZGVbN2v_log)
-
-#define _ZGVbN2v_log _ZGVbN2v_log_sse2
-#include "../svml_d_log2_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core.c
new file mode 100644
index 0000000000..c24437a3be
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core.c
@@ -0,0 +1,27 @@
+/* Multiple versions of vectorized log, vector length is 2.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVbN2v_log
+#include "ifunc-mathvec-sse4_1.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVbN2v_log, __GI__ZGVbN2v_log, __redirect__ZGVbN2v_log)
+  __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core_sse4.S
index 5d254288f6..eb854c68d6 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core_sse4.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log2_core_sse4.S
@@ -1,5 +1,5 @@
 /* Function log vectorized with SSE4.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -211,7 +211,7 @@ ENTRY (_ZGVbN2v_log_sse4)
         shlq      $4, %r15
         movsd     200(%rsp,%r15), %xmm0
 
-        call      log@PLT
+        call      JUMPTARGET(__log_finite)
 
         movsd     %xmm0, 264(%rsp,%r15)
         jmp       .LBL_1_8
@@ -221,7 +221,7 @@ ENTRY (_ZGVbN2v_log_sse4)
         shlq      $4, %r15
         movsd     192(%rsp,%r15), %xmm0
 
-        call      log@PLT
+        call      JUMPTARGET(__log_finite)
 
         movsd     %xmm0, 256(%rsp,%r15)
         jmp       .LBL_1_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core-sse.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core-sse.S
new file mode 100644
index 0000000000..2460512f78
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core-sse.S
@@ -0,0 +1,20 @@
+/* SSE version of vectorized log.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define _ZGVdN4v_log _ZGVdN4v_log_sse_wrapper
+#include "../svml_d_log4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core.S
deleted file mode 100644
index 9f6ddbef15..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of vectorized log.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-	.text
-ENTRY (_ZGVdN4v_log)
-        .type   _ZGVdN4v_log, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-        leaq    _ZGVdN4v_log_avx2(%rip), %rax
-	HAS_ARCH_FEATURE (AVX2_Usable)
-        jz      2f
-        ret
-2:      leaq    _ZGVdN4v_log_sse_wrapper(%rip), %rax
-        ret
-END (_ZGVdN4v_log)
-libmvec_hidden_def (_ZGVdN4v_log)
-
-#define _ZGVdN4v_log _ZGVdN4v_log_sse_wrapper
-#include "../svml_d_log4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core.c
new file mode 100644
index 0000000000..5751370d65
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core.c
@@ -0,0 +1,27 @@
+/* Multiple versions of vectorized log, vector length is 4.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVdN4v_log
+#include "ifunc-mathvec-avx2.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVdN4v_log, __GI__ZGVdN4v_log, __redirect__ZGVdN4v_log)
+  __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core_avx2.S
index 5da298747d..81515850e1 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core_avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log4_core_avx2.S
@@ -1,5 +1,5 @@
 /* Function log vectorized with AVX2.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -191,7 +191,7 @@ ENTRY (_ZGVdN4v_log_avx2)
         vmovsd    328(%rsp,%r15), %xmm0
         vzeroupper
 
-        call      log@PLT
+        call      JUMPTARGET(__log_finite)
 
         vmovsd    %xmm0, 392(%rsp,%r15)
         jmp       .LBL_1_8
@@ -202,7 +202,7 @@ ENTRY (_ZGVdN4v_log_avx2)
         vmovsd    320(%rsp,%r15), %xmm0
         vzeroupper
 
-        call      log@PLT
+        call      JUMPTARGET(__log_finite)
 
         vmovsd    %xmm0, 384(%rsp,%r15)
         jmp       .LBL_1_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core-avx2.S
new file mode 100644
index 0000000000..ecfbeafb23
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core-avx2.S
@@ -0,0 +1,20 @@
+/* AVX2 version of vectorized log.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define _ZGVeN8v_log _ZGVeN8v_log_avx2_wrapper
+#include "../svml_d_log8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.S
deleted file mode 100644
index 2e1a1da1a5..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.S
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Multiple versions of vectorized log.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-	.text
-ENTRY (_ZGVeN8v_log)
-        .type   _ZGVeN8v_log, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-        leaq    _ZGVeN8v_log_skx(%rip), %rax
-	HAS_ARCH_FEATURE (AVX512DQ_Usable)
-        jnz     2f
-        leaq    _ZGVeN8v_log_knl(%rip), %rax
-	HAS_ARCH_FEATURE (AVX512F_Usable)
-        jnz     2f
-        leaq    _ZGVeN8v_log_avx2_wrapper(%rip), %rax
-2:      ret
-END (_ZGVeN8v_log)
-
-#define _ZGVeN8v_log _ZGVeN8v_log_avx2_wrapper
-#include "../svml_d_log8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.c
new file mode 100644
index 0000000000..1e796dcfdd
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core.c
@@ -0,0 +1,27 @@
+/* Multiple versions of vectorized log, vector length is 8.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVeN8v_log
+#include "ifunc-mathvec-avx512.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVeN8v_log, __GI__ZGVeN8v_log, __redirect__ZGVeN8v_log)
+  __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core_avx512.S
index dca8e61f34..ae8af8d861 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_log8_core_avx512.S
@@ -1,5 +1,5 @@
 /* Function log vectorized with AVX-512. KNL and SKX versions.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -22,7 +22,7 @@
 
 	.text
 ENTRY (_ZGVeN8v_log_knl)
-#ifndef HAVE_AVX512_ASM_SUPPORT
+#ifndef HAVE_AVX512DQ_ASM_SUPPORT
 WRAPPER_IMPL_AVX512 _ZGVdN4v_log
 #else
 /*
@@ -222,7 +222,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_log
         movzbl    %r12b, %r15d
         shlq      $4, %r15
         vmovsd    1160(%rsp,%r15), %xmm0
-        call      log@PLT
+        call      JUMPTARGET(__log_finite)
         vmovsd    %xmm0, 1224(%rsp,%r15)
         jmp       .LBL_1_8
 
@@ -230,14 +230,14 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_log
         movzbl    %r12b, %r15d
         shlq      $4, %r15
         vmovsd    1152(%rsp,%r15), %xmm0
-        call      log@PLT
+        call      JUMPTARGET(__log_finite)
         vmovsd    %xmm0, 1216(%rsp,%r15)
         jmp       .LBL_1_7
 #endif
 END (_ZGVeN8v_log_knl)
 
 ENTRY (_ZGVeN8v_log_skx)
-#ifndef HAVE_AVX512_ASM_SUPPORT
+#ifndef HAVE_AVX512DQ_ASM_SUPPORT
 WRAPPER_IMPL_AVX512 _ZGVdN4v_log
 #else
 /*
@@ -443,7 +443,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_log
         vzeroupper
         vmovsd    1160(%rsp,%r15), %xmm0
 
-        call      log@PLT
+        call      JUMPTARGET(__log_finite)
 
         vmovsd    %xmm0, 1224(%rsp,%r15)
         jmp       .LBL_2_8
@@ -455,7 +455,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_log
         vzeroupper
         vmovsd    1152(%rsp,%r15), %xmm0
 
-        call      log@PLT
+        call      JUMPTARGET(__log_finite)
 
         vmovsd    %xmm0, 1216(%rsp,%r15)
         jmp       .LBL_2_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core-sse2.S
new file mode 100644
index 0000000000..2d8ad50681
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core-sse2.S
@@ -0,0 +1,20 @@
+/* SSE2 version of vectorized pow.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define _ZGVbN2vv_pow _ZGVbN2vv_pow_sse2
+#include "../svml_d_pow2_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core.S
deleted file mode 100644
index 4a50246889..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of vectorized pow.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-	.text
-ENTRY (_ZGVbN2vv_pow)
-        .type   _ZGVbN2vv_pow, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-        leaq    _ZGVbN2vv_pow_sse4(%rip), %rax
-	HAS_CPU_FEATURE (SSE4_1)
-        jz      2f
-        ret
-2:      leaq    _ZGVbN2vv_pow_sse2(%rip), %rax
-        ret
-END (_ZGVbN2vv_pow)
-libmvec_hidden_def (_ZGVbN2vv_pow)
-
-#define _ZGVbN2vv_pow _ZGVbN2vv_pow_sse2
-#include "../svml_d_pow2_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core.c
new file mode 100644
index 0000000000..3424c0e326
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core.c
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized pow, vector length is 2.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVbN2vv_pow
+#include "ifunc-mathvec-sse4_1.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVbN2vv_pow, __GI__ZGVbN2vv_pow,
+	       __redirect__ZGVbN2vv_pow)
+  __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core_sse4.S
index 064d170878..77828b44d5 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core_sse4.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow2_core_sse4.S
@@ -1,5 +1,5 @@
 /* Function pow vectorized with SSE4.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -413,7 +413,7 @@ ENTRY (_ZGVbN2vv_pow_sse4)
         movsd     72(%rsp,%r15), %xmm0
         movsd     136(%rsp,%r15), %xmm1
 
-        call      pow@PLT
+        call      JUMPTARGET(__pow_finite)
 
         movsd     %xmm0, 200(%rsp,%r15)
         jmp       .LBL_1_8
@@ -424,7 +424,7 @@ ENTRY (_ZGVbN2vv_pow_sse4)
         movsd     64(%rsp,%r15), %xmm0
         movsd     128(%rsp,%r15), %xmm1
 
-        call      pow@PLT
+        call      JUMPTARGET(__pow_finite)
 
         movsd     %xmm0, 192(%rsp,%r15)
         jmp       .LBL_1_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core-sse.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core-sse.S
new file mode 100644
index 0000000000..4dcd14ff20
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core-sse.S
@@ -0,0 +1,20 @@
+/* SSE version of vectorized pow.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define _ZGVdN4vv_pow _ZGVdN4vv_pow_sse_wrapper
+#include "../svml_d_pow4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core.c
new file mode 100644
index 0000000000..447be39401
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core.c
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized pow, vector length is 4.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVdN4vv_pow
+#include "ifunc-mathvec-avx2.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVdN4vv_pow, __GI__ZGVdN4vv_pow,
+	       __redirect__ZGVdN4vv_pow)
+  __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core_avx2.S
index f2a73ffe1e..c43d62f202 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core_avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core_avx2.S
@@ -1,5 +1,5 @@
 /* Function pow vectorized with AVX2.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -367,7 +367,7 @@ ENTRY (_ZGVdN4vv_pow_avx2)
         vmovsd    264(%rsp,%r15), %xmm1
         vzeroupper
 
-        call      pow@PLT
+        call      JUMPTARGET(__pow_finite)
 
         vmovsd    %xmm0, 328(%rsp,%r15)
         jmp       .LBL_1_8
@@ -379,7 +379,7 @@ ENTRY (_ZGVdN4vv_pow_avx2)
         vmovsd    256(%rsp,%r15), %xmm1
         vzeroupper
 
-        call      pow@PLT
+        call      JUMPTARGET(__pow_finite)
 
         vmovsd    %xmm0, 320(%rsp,%r15)
         jmp       .LBL_1_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core-avx2.S
new file mode 100644
index 0000000000..8acf700e76
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core-avx2.S
@@ -0,0 +1,20 @@
+/* AVX2 version of vectorized pow.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define _ZGVeN8vv_pow _ZGVeN8vv_pow_avx2_wrapper
+#include "../svml_d_pow8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core.S
deleted file mode 100644
index 30bc53f2f7..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core.S
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Multiple versions of vectorized pow.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-	.text
-ENTRY (_ZGVeN8vv_pow)
-        .type   _ZGVeN8vv_pow, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-        leaq    _ZGVeN8vv_pow_skx(%rip), %rax
-	HAS_ARCH_FEATURE (AVX512DQ_Usable)
-        jnz     2f
-        leaq    _ZGVeN8vv_pow_knl(%rip), %rax
-	HAS_ARCH_FEATURE (AVX512F_Usable)
-        jnz     2f
-        leaq    _ZGVeN8vv_pow_avx2_wrapper(%rip), %rax
-2:      ret
-END (_ZGVeN8vv_pow)
-
-#define _ZGVeN8vv_pow _ZGVeN8vv_pow_avx2_wrapper
-#include "../svml_d_pow8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core.c
new file mode 100644
index 0000000000..62f96965bb
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core.c
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized pow, vector length is 8.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVeN8vv_pow
+#include "ifunc-mathvec-avx512.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVeN8vv_pow, __GI__ZGVeN8vv_pow,
+	       __redirect__ZGVeN8vv_pow)
+  __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core_avx512.S
index 4a515233fc..a28c39b73d 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_pow8_core_avx512.S
@@ -1,5 +1,5 @@
 /* Function pow vectorized with AVX-512. KNL and SKX versions.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -82,7 +82,7 @@
 
 	.text
 ENTRY (_ZGVeN8vv_pow_knl)
-#ifndef HAVE_AVX512_ASM_SUPPORT
+#ifndef HAVE_AVX512DQ_ASM_SUPPORT
 WRAPPER_IMPL_AVX512_ff _ZGVdN4vv_pow
 #else
         pushq     %rbp
@@ -392,7 +392,7 @@ WRAPPER_IMPL_AVX512_ff _ZGVdN4vv_pow
         shlq      $4, %r15
         vmovsd    1160(%rsp,%r15), %xmm0
         vmovsd    1224(%rsp,%r15), %xmm1
-        call      pow@PLT
+        call      JUMPTARGET(__pow_finite)
         vmovsd    %xmm0, 1288(%rsp,%r15)
         jmp       .LBL_1_8
 
@@ -401,7 +401,7 @@ WRAPPER_IMPL_AVX512_ff _ZGVdN4vv_pow
         shlq      $4, %r15
         vmovsd    1152(%rsp,%r15), %xmm0
         vmovsd    1216(%rsp,%r15), %xmm1
-        call      pow@PLT
+        call      JUMPTARGET(__pow_finite)
         vmovsd    %xmm0, 1280(%rsp,%r15)
         jmp       .LBL_1_7
 
@@ -409,7 +409,7 @@ WRAPPER_IMPL_AVX512_ff _ZGVdN4vv_pow
 END (_ZGVeN8vv_pow_knl)
 
 ENTRY (_ZGVeN8vv_pow_skx)
-#ifndef HAVE_AVX512_ASM_SUPPORT
+#ifndef HAVE_AVX512DQ_ASM_SUPPORT
 WRAPPER_IMPL_AVX512_ff _ZGVdN4vv_pow
 #else
         pushq     %rbp
@@ -720,7 +720,7 @@ WRAPPER_IMPL_AVX512_ff _ZGVdN4vv_pow
         vzeroupper
         vmovsd    1160(%rsp,%r15), %xmm0
 
-        call      pow@PLT
+        call      JUMPTARGET(__pow_finite)
 
         vmovsd    %xmm0, 1288(%rsp,%r15)
         jmp       .LBL_2_8
@@ -732,7 +732,7 @@ WRAPPER_IMPL_AVX512_ff _ZGVdN4vv_pow
         vzeroupper
         vmovsd    1152(%rsp,%r15), %xmm0
 
-        call      pow@PLT
+        call      JUMPTARGET(__pow_finite)
 
         vmovsd    %xmm0, 1280(%rsp,%r15)
         jmp       .LBL_2_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core-sse2.S
new file mode 100644
index 0000000000..cb7b31aa1c
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core-sse2.S
@@ -0,0 +1,20 @@
+/* SSE2 version of vectorized sin.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define _ZGVbN2v_sin _ZGVbN2v_sin_sse2
+#include "../svml_d_sin2_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core.S
deleted file mode 100644
index 112bec2224..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of vectorized sin.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-	.text
-ENTRY (_ZGVbN2v_sin)
-        .type   _ZGVbN2v_sin, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-        leaq    _ZGVbN2v_sin_sse4(%rip), %rax
-	HAS_CPU_FEATURE (SSE4_1)
-        jz      2f
-        ret
-2:      leaq    _ZGVbN2v_sin_sse2(%rip), %rax
-        ret
-END (_ZGVbN2v_sin)
-libmvec_hidden_def (_ZGVbN2v_sin)
-
-#define _ZGVbN2v_sin _ZGVbN2v_sin_sse2
-#include "../svml_d_sin2_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core.c
new file mode 100644
index 0000000000..1c5788f205
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core.c
@@ -0,0 +1,27 @@
+/* Multiple versions of vectorized sin, vector length is 2.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVbN2v_sin
+#include "ifunc-mathvec-sse4_1.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVbN2v_sin, __GI__ZGVbN2v_sin, __redirect__ZGVbN2v_sin)
+  __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core_sse4.S
index 5755ce6f74..15980e9eeb 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core_sse4.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin2_core_sse4.S
@@ -1,5 +1,5 @@
 /* Function sin vectorized with SSE4.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -211,7 +211,7 @@ ENTRY (_ZGVbN2v_sin_sse4)
         shlq      $4, %r15
         movsd     200(%rsp,%r15), %xmm0
 
-        call      sin@PLT
+        call      JUMPTARGET(sin)
 
         movsd     %xmm0, 264(%rsp,%r15)
         jmp       .LBL_1_8
@@ -221,7 +221,7 @@ ENTRY (_ZGVbN2v_sin_sse4)
         shlq      $4, %r15
         movsd     192(%rsp,%r15), %xmm0
 
-        call      sin@PLT
+        call      JUMPTARGET(sin)
 
         movsd     %xmm0, 256(%rsp,%r15)
         jmp       .LBL_1_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core-sse.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core-sse.S
new file mode 100644
index 0000000000..07fae6f3b4
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core-sse.S
@@ -0,0 +1,20 @@
+/* SSE version of vectorized sin, vector length is 4.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define _ZGVdN4v_sin _ZGVdN4v_sin_sse_wrapper
+#include "../svml_d_sin4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core.S
deleted file mode 100644
index 700a1c629d..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of vectorized sin, vector length is 4.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-	.text
-ENTRY (_ZGVdN4v_sin)
-        .type   _ZGVdN4v_sin, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-        leaq    _ZGVdN4v_sin_avx2(%rip), %rax
-	HAS_ARCH_FEATURE (AVX2_Usable)
-        jz      2f
-        ret
-2:      leaq    _ZGVdN4v_sin_sse_wrapper(%rip), %rax
-        ret
-END (_ZGVdN4v_sin)
-libmvec_hidden_def (_ZGVdN4v_sin)
-
-#define _ZGVdN4v_sin _ZGVdN4v_sin_sse_wrapper
-#include "../svml_d_sin4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core.c
new file mode 100644
index 0000000000..b5933914aa
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core.c
@@ -0,0 +1,27 @@
+/* Multiple versions of vectorized sin, vector length is 4.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVdN4v_sin
+#include "ifunc-mathvec-avx2.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVdN4v_sin, __GI__ZGVdN4v_sin, __redirect__ZGVdN4v_sin)
+  __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core_avx2.S
index 46b557158a..4f0917c56d 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core_avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin4_core_avx2.S
@@ -1,5 +1,5 @@
 /* Function sin vectorized with AVX2.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -191,7 +191,7 @@ ENTRY (_ZGVdN4v_sin_avx2)
         vmovsd    328(%rsp,%r15), %xmm0
         vzeroupper
 
-        call      sin@PLT
+        call      JUMPTARGET(sin)
 
         vmovsd    %xmm0, 392(%rsp,%r15)
         jmp       .LBL_1_8
@@ -202,7 +202,7 @@ ENTRY (_ZGVdN4v_sin_avx2)
         vmovsd    320(%rsp,%r15), %xmm0
         vzeroupper
 
-        call      sin@PLT
+        call      JUMPTARGET(sin)
 
         vmovsd    %xmm0, 384(%rsp,%r15)
         jmp       .LBL_1_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core-avx2.S
new file mode 100644
index 0000000000..b64c3390d6
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core-avx2.S
@@ -0,0 +1,23 @@
+/* AVX2 version of vectorized sin.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+#define _ZGVeN8v_sin _ZGVeN8v_sin_avx2_wrapper
+#include "../svml_d_sin8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core.S
deleted file mode 100644
index 5afce0ed88..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core.S
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Multiple versions of vectorized sin.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-	.text
-ENTRY (_ZGVeN8v_sin)
-        .type   _ZGVeN8v_sin, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-        leaq    _ZGVeN8v_sin_skx(%rip), %rax
-	HAS_ARCH_FEATURE (AVX512DQ_Usable)
-        jnz     2f
-        leaq    _ZGVeN8v_sin_knl(%rip), %rax
-	HAS_ARCH_FEATURE (AVX512F_Usable)
-        jnz     2f
-        leaq    _ZGVeN8v_sin_avx2_wrapper(%rip), %rax
-2:      ret
-END (_ZGVeN8v_sin)
-
-#define _ZGVeN8v_sin _ZGVeN8v_sin_avx2_wrapper
-#include "../svml_d_sin8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core.c
new file mode 100644
index 0000000000..57023d8494
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core.c
@@ -0,0 +1,27 @@
+/* Multiple versions of vectorized sin, vector length is 8.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVeN8v_sin
+#include "ifunc-mathvec-avx512.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVeN8v_sin, __GI__ZGVeN8v_sin, __redirect__ZGVeN8v_sin)
+  __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S
index 6c565f3861..2d4b14fd1b 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sin8_core_avx512.S
@@ -1,5 +1,5 @@
 /* Function sin vectorized with AVX-512, KNL and SKX versions.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -22,7 +22,7 @@
 
 	.text
 ENTRY (_ZGVeN8v_sin_knl)
-#ifndef HAVE_AVX512_ASM_SUPPORT
+#ifndef HAVE_AVX512DQ_ASM_SUPPORT
 WRAPPER_IMPL_AVX512 _ZGVdN4v_sin
 #else
 /*
@@ -222,7 +222,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_sin
         movzbl    %r12b, %r15d
         shlq      $4, %r15
         vmovsd    1160(%rsp,%r15), %xmm0
-        call      sin@PLT
+        call      JUMPTARGET(sin)
         vmovsd    %xmm0, 1224(%rsp,%r15)
         jmp       .LBL_1_8
 
@@ -230,14 +230,14 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_sin
         movzbl    %r12b, %r15d
         shlq      $4, %r15
         vmovsd    1152(%rsp,%r15), %xmm0
-        call      sin@PLT
+        call      JUMPTARGET(sin)
         vmovsd    %xmm0, 1216(%rsp,%r15)
         jmp       .LBL_1_7
 #endif
 END (_ZGVeN8v_sin_knl)
 
 ENTRY (_ZGVeN8v_sin_skx)
-#ifndef HAVE_AVX512_ASM_SUPPORT
+#ifndef HAVE_AVX512DQ_ASM_SUPPORT
 WRAPPER_IMPL_AVX512 _ZGVdN4v_sin
 #else
 /*
@@ -440,7 +440,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_sin
         vzeroupper
         vmovsd    1160(%rsp,%r15), %xmm0
 
-        call      sin@PLT
+        call      JUMPTARGET(sin)
 
         vmovsd    %xmm0, 1224(%rsp,%r15)
         jmp       .LBL_2_8
@@ -452,7 +452,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN4v_sin
         vzeroupper
         vmovsd    1152(%rsp,%r15), %xmm0
 
-        call      sin@PLT
+        call      JUMPTARGET(sin)
 
         vmovsd    %xmm0, 1216(%rsp,%r15)
         jmp       .LBL_2_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core-sse2.S
new file mode 100644
index 0000000000..ab7f9c500d
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core-sse2.S
@@ -0,0 +1,20 @@
+/* SSE2 version of vectorized sincos.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define _ZGVbN2vvv_sincos _ZGVbN2vvv_sincos_sse2
+#include "../svml_d_sincos2_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core.S
deleted file mode 100644
index 883d7d33a4..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of vectorized sincos.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-	.text
-ENTRY (_ZGVbN2vvv_sincos)
-        .type   _ZGVbN2vvv_sincos, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-        leaq    _ZGVbN2vvv_sincos_sse4(%rip), %rax
-	HAS_CPU_FEATURE (SSE4_1)
-        jz      2f
-        ret
-2:      leaq    _ZGVbN2vvv_sincos_sse2(%rip), %rax
-        ret
-END (_ZGVbN2vvv_sincos)
-libmvec_hidden_def (_ZGVbN2vvv_sincos)
-
-#define _ZGVbN2vvv_sincos _ZGVbN2vvv_sincos_sse2
-#include "../svml_d_sincos2_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core.c
new file mode 100644
index 0000000000..f373bb40a3
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core.c
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized sincos, vector length is 2.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVbN2vvv_sincos
+#include "ifunc-mathvec-sse4_1.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVbN2vvv_sincos, __GI__ZGVbN2vvv_sincos,
+	       __redirect__ZGVbN2vvv_sincos)
+  __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S
index 65ad540122..b4dfa37898 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S
@@ -1,5 +1,5 @@
 /* Function sincos vectorized with SSE4.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -20,7 +20,7 @@
 #include "svml_d_trig_data.h"
 
 	.text
-ENTRY (_ZGVbN2vvv_sincos_sse4)
+ENTRY (_ZGVbN2vl8l8_sincos_sse4)
 /*
    ALGORITHM DESCRIPTION:
 
@@ -287,12 +287,12 @@ ENTRY (_ZGVbN2vvv_sincos_sse4)
         shlq      $4, %r15
         movsd     136(%rsp,%r15), %xmm0
 
-        call      sin@PLT
+        call      JUMPTARGET(sin)
 
         movsd     %xmm0, 200(%rsp,%r15)
         movsd     136(%rsp,%r15), %xmm0
 
-        call      cos@PLT
+        call      JUMPTARGET(cos)
 
         movsd     %xmm0, 264(%rsp,%r15)
         jmp       .LBL_1_8
@@ -302,13 +302,67 @@ ENTRY (_ZGVbN2vvv_sincos_sse4)
         shlq      $4, %r15
         movsd     128(%rsp,%r15), %xmm0
 
-        call      sin@PLT
+        call      JUMPTARGET(sin)
 
         movsd     %xmm0, 192(%rsp,%r15)
         movsd     128(%rsp,%r15), %xmm0
 
-        call      cos@PLT
+        call      JUMPTARGET(cos)
 
         movsd     %xmm0, 256(%rsp,%r15)
         jmp       .LBL_1_7
+END (_ZGVbN2vl8l8_sincos_sse4)
+libmvec_hidden_def(_ZGVbN2vl8l8_sincos_sse4)
+
+/* vvv version implemented with wrapper to vl8l8 variant.  */
+ENTRY (_ZGVbN2vvv_sincos_sse4)
+#ifndef __ILP32__
+        subq      $72, %rsp
+        .cfi_def_cfa_offset 80
+        movdqu    %xmm1, 32(%rsp)
+        lea       (%rsp), %rdi
+        movdqu    %xmm2, 48(%rdi)
+        lea       16(%rsp), %rsi
+        call      HIDDEN_JUMPTARGET(_ZGVbN2vl8l8_sincos_sse4)
+        movq      32(%rsp), %rdx
+        movq      48(%rsp), %rsi
+        movq      40(%rsp), %r8
+        movq      56(%rsp), %r10
+        movq      (%rsp), %rax
+        movq      16(%rsp), %rcx
+        movq      8(%rsp), %rdi
+        movq      24(%rsp), %r9
+        movq      %rax, (%rdx)
+        movq      %rcx, (%rsi)
+        movq      %rdi, (%r8)
+        movq      %r9, (%r10)
+        addq      $72, %rsp
+        .cfi_def_cfa_offset 8
+        ret
+#else
+        subl    $72, %esp
+        .cfi_def_cfa_offset 80
+        leal    48(%rsp), %esi
+        movaps  %xmm1, 16(%esp)
+        leal    32(%rsp), %edi
+        movaps  %xmm2, (%esp)
+        call    HIDDEN_JUMPTARGET(_ZGVbN2vl8l8_sincos_sse4)
+        movdqa  16(%esp), %xmm1
+        movsd   32(%esp), %xmm0
+        movq    %xmm1, %rax
+        movdqa  (%esp), %xmm2
+        movsd   %xmm0, (%eax)
+        movsd   40(%esp), %xmm0
+        pextrd  $1, %xmm1, %eax
+        movsd   %xmm0, (%eax)
+        movsd   48(%esp), %xmm0
+        movq    %xmm2, %rax
+        movsd   %xmm0, (%eax)
+        movsd   56(%esp), %xmm0
+        pextrd  $1, %xmm2, %eax
+        movsd   %xmm0, (%eax)
+        addl    $72, %esp
+        .cfi_def_cfa_offset 8
+        ret
+#endif
 END (_ZGVbN2vvv_sincos_sse4)
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core-sse.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core-sse.S
new file mode 100644
index 0000000000..10b4a2cf16
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core-sse.S
@@ -0,0 +1,20 @@
+/* SSE version of vectorized sincos.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define _ZGVdN4vvv_sincos _ZGVdN4vvv_sincos_sse_wrapper
+#include "../svml_d_sincos4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core.S
deleted file mode 100644
index 69a3f74650..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of vectorized sincos.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-	.text
-ENTRY (_ZGVdN4vvv_sincos)
-        .type   _ZGVdN4vvv_sincos, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-        leaq    _ZGVdN4vvv_sincos_avx2(%rip), %rax
-	HAS_ARCH_FEATURE (AVX2_Usable)
-        jz      2f
-        ret
-2:      leaq    _ZGVdN4vvv_sincos_sse_wrapper(%rip), %rax
-        ret
-END (_ZGVdN4vvv_sincos)
-libmvec_hidden_def (_ZGVdN4vvv_sincos)
-
-#define _ZGVdN4vvv_sincos _ZGVdN4vvv_sincos_sse_wrapper
-#include "../svml_d_sincos4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core.c
new file mode 100644
index 0000000000..1fabd7b471
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core.c
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized sincos, vector length is 4.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVdN4vvv_sincos
+#include "ifunc-mathvec-avx2.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVdN4vvv_sincos, __GI__ZGVdN4vvv_sincos,
+	       __redirect__ZGVdN4vvv_sincos)
+  __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S
index 60d03e9f8b..d56aa96ac9 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos4_core_avx2.S
@@ -1,5 +1,5 @@
 /* Function sincos vectorized with AVX2.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -20,7 +20,7 @@
 #include "svml_d_trig_data.h"
 
 	.text
-ENTRY (_ZGVdN4vvv_sincos_avx2)
+ENTRY (_ZGVdN4vl8l8_sincos_avx2)
 /*
    ALGORITHM DESCRIPTION:
 
@@ -248,12 +248,12 @@ ENTRY (_ZGVdN4vvv_sincos_avx2)
         vmovsd    264(%rsp,%r15), %xmm0
         vzeroupper
 
-        call      sin@PLT
+        call      JUMPTARGET(sin)
 
         vmovsd    %xmm0, 328(%rsp,%r15)
         vmovsd    264(%rsp,%r15), %xmm0
 
-        call      cos@PLT
+        call      JUMPTARGET(cos)
 
         vmovsd    %xmm0, 392(%rsp,%r15)
         jmp       .LBL_1_8
@@ -264,14 +264,110 @@ ENTRY (_ZGVdN4vvv_sincos_avx2)
         vmovsd    256(%rsp,%r15), %xmm0
         vzeroupper
 
-        call      sin@PLT
+        call      JUMPTARGET(sin)
 
         vmovsd    %xmm0, 320(%rsp,%r15)
         vmovsd    256(%rsp,%r15), %xmm0
 
-        call      cos@PLT
+        call      JUMPTARGET(cos)
 
         vmovsd    %xmm0, 384(%rsp,%r15)
         jmp       .LBL_1_7
 
+END (_ZGVdN4vl8l8_sincos_avx2)
+libmvec_hidden_def(_ZGVdN4vl8l8_sincos_avx2)
+
+/* vvv version implemented with wrapper to vl8l8 variant.  */
+ENTRY (_ZGVdN4vvv_sincos_avx2)
+#ifndef __ILP32__
+        pushq     %rbp
+        cfi_adjust_cfa_offset (8)
+        cfi_rel_offset (%rbp, 0)
+        movq      %rsp, %rbp
+        cfi_def_cfa_register (%rbp)
+        andq      $-32, %rsp
+        subq      $128, %rsp
+        vmovdqu   %ymm1, 64(%rsp)
+        lea       (%rsp), %rdi
+        vmovdqu   %ymm2, 96(%rdi)
+        lea       32(%rsp), %rsi
+        call      HIDDEN_JUMPTARGET(_ZGVdN4vl8l8_sincos_avx2)
+        movq      64(%rsp), %rdx
+        movq      96(%rsp), %rsi
+        movq      72(%rsp), %r8
+        movq      104(%rsp), %r10
+        movq      (%rsp), %rax
+        movq      32(%rsp), %rcx
+        movq      8(%rsp), %rdi
+        movq      40(%rsp), %r9
+        movq      %rax, (%rdx)
+        movq      %rcx, (%rsi)
+        movq      80(%rsp), %rax
+        movq      112(%rsp), %rcx
+        movq      %rdi, (%r8)
+        movq      %r9, (%r10)
+        movq      88(%rsp), %rdi
+        movq      120(%rsp), %r9
+        movq      16(%rsp), %r11
+        movq      48(%rsp), %rdx
+        movq      24(%rsp), %rsi
+        movq      56(%rsp), %r8
+        movq      %r11, (%rax)
+        movq      %rdx, (%rcx)
+        movq      %rsi, (%rdi)
+        movq      %r8, (%r9)
+        movq      %rbp, %rsp
+        cfi_def_cfa_register (%rsp)
+        popq      %rbp
+        cfi_adjust_cfa_offset (-8)
+        cfi_restore (%rbp)
+        ret
+#else
+        leal    8(%rsp), %r10d
+        .cfi_def_cfa 10, 0
+        andl    $-32, %esp
+        pushq   -8(%r10d)
+        pushq   %rbp
+        .cfi_escape 0x10,0x6,0x2,0x76,0
+        movl    %esp, %ebp
+        pushq   %r10
+        .cfi_escape 0xf,0x3,0x76,0x78,0x6
+        leal    -48(%rbp), %esi
+        leal    -80(%rbp), %edi
+        subl    $104, %esp
+        vmovaps %xmm1, -96(%ebp)
+        vmovaps %xmm2, -112(%ebp)
+        call    HIDDEN_JUMPTARGET(_ZGVdN4vl8l8_sincos_avx2)
+        movl    -96(%ebp), %eax
+        vmovsd  -80(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        movl    -92(%ebp), %eax
+        vmovsd  -72(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        movl    -88(%ebp), %eax
+        vmovsd  -64(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        movl    -84(%ebp), %eax
+        vmovsd  -56(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        movl    -112(%ebp), %eax
+        vmovsd  -48(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        movl    -108(%ebp), %eax
+        vmovsd  -40(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        movl    -104(%ebp), %eax
+        vmovsd  -32(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        movl    -100(%ebp), %eax
+        vmovsd  -24(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        addl    $104, %esp
+        popq    %r10
+        .cfi_def_cfa 10, 0
+        popq    %rbp
+        leal    -8(%r10), %esp
+        .cfi_def_cfa 7, 8
+        ret
+#endif
 END (_ZGVdN4vvv_sincos_avx2)
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core-avx2.S
new file mode 100644
index 0000000000..8cf88f6461
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core-avx2.S
@@ -0,0 +1,20 @@
+/* AVX2 version of vectorized sincos.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define _ZGVeN8vvv_sincos _ZGVeN8vvv_sincos_avx2_wrapper
+#include "../svml_d_sincos8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core.S
deleted file mode 100644
index 64cb08c5d1..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core.S
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Multiple versions of vectorized sincos.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-	.text
-ENTRY (_ZGVeN8vvv_sincos)
-        .type   _ZGVeN8vvv_sincos, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-        leaq    _ZGVeN8vvv_sincos_skx(%rip), %rax
-	HAS_ARCH_FEATURE (AVX512DQ_Usable)
-        jnz     2f
-        leaq    _ZGVeN8vvv_sincos_knl(%rip), %rax
-	HAS_ARCH_FEATURE (AVX512F_Usable)
-        jnz     2f
-        leaq    _ZGVeN8vvv_sincos_avx2_wrapper(%rip), %rax
-2:      ret
-END (_ZGVeN8vvv_sincos)
-
-#define _ZGVeN8vvv_sincos _ZGVeN8vvv_sincos_avx2_wrapper
-#include "../svml_d_sincos8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core.c
new file mode 100644
index 0000000000..1409872ed2
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core.c
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized sincos, vector length is 8.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVeN8vvv_sincos
+#include "ifunc-mathvec-avx512.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVeN8vvv_sincos, __GI__ZGVeN8vvv_sincos,
+	       __redirect__ZGVeN8vvv_sincos)
+  __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S
index 44700f90b8..2df626c0c1 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos8_core_avx512.S
@@ -1,5 +1,5 @@
 /* Function sincos vectorized with AVX-512. KNL and SKX versions.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -36,9 +36,9 @@
      sin(R), sin(R') are approximated by corresponding polynomial.  */
 
 	.text
-ENTRY (_ZGVeN8vvv_sincos_knl)
-#ifndef HAVE_AVX512_ASM_SUPPORT
-WRAPPER_IMPL_AVX512_fFF _ZGVdN4vvv_sincos
+ENTRY (_ZGVeN8vl8l8_sincos_knl)
+#ifndef HAVE_AVX512DQ_ASM_SUPPORT
+WRAPPER_IMPL_AVX512_fFF _ZGVdN4vl8l8_sincos
 #else
         pushq     %rbp
         cfi_adjust_cfa_offset (8)
@@ -278,12 +278,12 @@ WRAPPER_IMPL_AVX512_fFF _ZGVdN4vvv_sincos
         shlq      $4, %r15
         vmovsd    1160(%rsp,%r15), %xmm0
 
-        call      sin@PLT
+        call      JUMPTARGET(sin)
 
         vmovsd    %xmm0, 1224(%rsp,%r15)
         vmovsd    1160(%rsp,%r15), %xmm0
 
-        call      cos@PLT
+        call      JUMPTARGET(cos)
 
         vmovsd    %xmm0, 1288(%rsp,%r15)
         jmp       .LBL_1_8
@@ -293,22 +293,23 @@ WRAPPER_IMPL_AVX512_fFF _ZGVdN4vvv_sincos
         shlq      $4, %r15
         vmovsd    1152(%rsp,%r15), %xmm0
 
-        call      sin@PLT
+        call      JUMPTARGET(sin)
 
         vmovsd    %xmm0, 1216(%rsp,%r15)
         vmovsd    1152(%rsp,%r15), %xmm0
 
-        call      cos@PLT
+        call      JUMPTARGET(cos)
 
         vmovsd    %xmm0, 1280(%rsp,%r15)
         jmp       .LBL_1_7
 
 #endif
-END (_ZGVeN8vvv_sincos_knl)
+END (_ZGVeN8vl8l8_sincos_knl)
+libmvec_hidden_def(_ZGVeN8vl8l8_sincos_knl)
 
-ENTRY (_ZGVeN8vvv_sincos_skx)
-#ifndef HAVE_AVX512_ASM_SUPPORT
-WRAPPER_IMPL_AVX512_fFF _ZGVdN4vvv_sincos
+ENTRY (_ZGVeN8vl8l8_sincos_skx)
+#ifndef HAVE_AVX512DQ_ASM_SUPPORT
+WRAPPER_IMPL_AVX512_fFF _ZGVdN4vl8l8_sincos
 #else
         pushq     %rbp
         cfi_adjust_cfa_offset (8)
@@ -557,12 +558,12 @@ WRAPPER_IMPL_AVX512_fFF _ZGVdN4vvv_sincos
         vzeroupper
         vmovsd    1160(%rsp,%r15), %xmm0
 
-        call      sin@PLT
+        call      JUMPTARGET(sin)
 
         vmovsd    %xmm0, 1224(%rsp,%r15)
         vmovsd    1160(%rsp,%r15), %xmm0
 
-        call      cos@PLT
+        call      JUMPTARGET(cos)
 
         vmovsd    %xmm0, 1288(%rsp,%r15)
         jmp       .LBL_2_8
@@ -574,17 +575,171 @@ WRAPPER_IMPL_AVX512_fFF _ZGVdN4vvv_sincos
         vzeroupper
         vmovsd    1152(%rsp,%r15), %xmm0
 
-        call      sin@PLT
+        call      JUMPTARGET(sin)
 
         vmovsd    %xmm0, 1216(%rsp,%r15)
         vmovsd    1152(%rsp,%r15), %xmm0
 
-        call      cos@PLT
+        call      JUMPTARGET(cos)
 
         vmovsd    %xmm0, 1280(%rsp,%r15)
         jmp       .LBL_2_7
 
 #endif
+END (_ZGVeN8vl8l8_sincos_skx)
+libmvec_hidden_def(_ZGVeN8vl8l8_sincos_skx)
+
+/* Wrapper between vvv and vl8l8 vector variants.  */
+.macro WRAPPER_AVX512_vvv_vl8l8 callee
+#ifndef __ILP32__
+        pushq     %rbp
+        cfi_adjust_cfa_offset (8)
+        cfi_rel_offset (%rbp, 0)
+        movq      %rsp, %rbp
+        cfi_def_cfa_register (%rbp)
+        andq      $-64, %rsp
+        subq      $256, %rsp
+        vmovups   %zmm1, 128(%rsp)
+        lea       (%rsp), %rdi
+        vmovups   %zmm2, 192(%rdi)
+        lea       64(%rsp), %rsi
+        call      HIDDEN_JUMPTARGET(\callee)
+        movq      128(%rsp), %rdx
+        movq      136(%rsp), %rsi
+        movq      144(%rsp), %r8
+        movq      152(%rsp), %r10
+        movq      (%rsp), %rax
+        movq      8(%rsp), %rcx
+        movq      16(%rsp), %rdi
+        movq      24(%rsp), %r9
+        movq      %rax, (%rdx)
+        movq      %rcx, (%rsi)
+        movq      160(%rsp), %rax
+        movq      168(%rsp), %rcx
+        movq      %rdi, (%r8)
+        movq      %r9, (%r10)
+        movq      176(%rsp), %rdi
+        movq      184(%rsp), %r9
+        movq      32(%rsp), %r11
+        movq      40(%rsp), %rdx
+        movq      48(%rsp), %rsi
+        movq      56(%rsp), %r8
+        movq      %r11, (%rax)
+        movq      %rdx, (%rcx)
+        movq      192(%rsp), %r11
+        movq      200(%rsp), %rdx
+        movq      %rsi, (%rdi)
+        movq      %r8, (%r9)
+        movq      208(%rsp), %rsi
+        movq      216(%rsp), %r8
+        movq      64(%rsp), %r10
+        movq      72(%rsp), %rax
+        movq      80(%rsp), %rcx
+        movq      88(%rsp), %rdi
+        movq      %r10, (%r11)
+        movq      %rax, (%rdx)
+        movq      224(%rsp), %r10
+        movq      232(%rsp), %rax
+        movq      %rcx, (%rsi)
+        movq      %rdi, (%r8)
+        movq      240(%rsp), %rcx
+        movq      248(%rsp), %rdi
+        movq      96(%rsp), %r9
+        movq      104(%rsp), %r11
+        movq      112(%rsp), %rdx
+        movq      120(%rsp), %rsi
+        movq      %r9, (%r10)
+        movq      %r11, (%rax)
+        movq      %rdx, (%rcx)
+        movq      %rsi, (%rdi)
+        movq      %rbp, %rsp
+        cfi_def_cfa_register (%rsp)
+        popq      %rbp
+        cfi_adjust_cfa_offset (-8)
+        cfi_restore (%rbp)
+        ret
+#else
+        leal    8(%rsp), %r10d
+        .cfi_def_cfa 10, 0
+        andl    $-64, %esp
+        pushq   -8(%r10d)
+        pushq   %rbp
+        .cfi_escape 0x10,0x6,0x2,0x76,0
+        movl    %esp, %ebp
+        pushq   %r10
+        .cfi_escape 0xf,0x3,0x76,0x78,0x6
+        leal    -112(%rbp), %esi
+        leal    -176(%rbp), %edi
+        subl    $232, %esp
+        vmovdqa %ymm1, -208(%ebp)
+        vmovdqa %ymm2, -240(%ebp)
+        call    HIDDEN_JUMPTARGET(\callee)
+        vmovdqa -208(%ebp), %xmm0
+        vmovq   %xmm0, %rax
+        vmovsd  -176(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        shrq    $32, %rax
+        vmovsd  -168(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        movq    -200(%ebp), %rax
+        vmovsd  -160(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        shrq    $32, %rax
+        vmovsd  -152(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        movq    -192(%ebp), %rax
+        vmovsd  -144(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        shrq    $32, %rax
+        vmovsd  -136(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        movq    -184(%ebp), %rax
+        vmovsd  -128(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        shrq    $32, %rax
+        vmovsd  -120(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        vmovdqa -240(%ebp), %xmm0
+        vmovq   %xmm0, %rax
+        vmovsd  -112(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        shrq    $32, %rax
+        vmovsd  -104(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        movq    -232(%ebp), %rax
+        vmovsd  -96(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        shrq    $32, %rax
+        vmovsd  -88(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        movq    -224(%ebp), %rax
+        vmovsd  -80(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        shrq    $32, %rax
+        vmovsd  -72(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        movq    -216(%ebp), %rax
+        vmovsd  -64(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        shrq    $32, %rax
+        vmovsd  -56(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        addl    $232, %esp
+        popq    %r10
+        .cfi_def_cfa 10, 0
+        popq    %rbp
+        leal    -8(%r10), %esp
+        .cfi_def_cfa 7, 8
+        ret
+#endif
+.endm
+
+ENTRY (_ZGVeN8vvv_sincos_knl)
+WRAPPER_AVX512_vvv_vl8l8 _ZGVeN8vl8l8_sincos_knl
+END (_ZGVeN8vvv_sincos_knl)
+
+ENTRY (_ZGVeN8vvv_sincos_skx)
+WRAPPER_AVX512_vvv_vl8l8 _ZGVeN8vl8l8_sincos_skx
 END (_ZGVeN8vvv_sincos_skx)
 
 	.section .rodata, "a"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core-avx2.S
new file mode 100644
index 0000000000..f01f89f294
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core-avx2.S
@@ -0,0 +1,20 @@
+/* AVX2 version of vectorized cosf.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define _ZGVeN16v_cosf _ZGVeN16v_cosf_avx2_wrapper
+#include "../svml_s_cosf16_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.S
deleted file mode 100644
index 755254a280..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.S
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Multiple versions of vectorized cosf.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-	.text
-ENTRY (_ZGVeN16v_cosf)
-        .type   _ZGVeN16v_cosf, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-        leaq    _ZGVeN16v_cosf_skx(%rip), %rax
-	HAS_ARCH_FEATURE (AVX512DQ_Usable)
-        jnz     2f
-        leaq    _ZGVeN16v_cosf_knl(%rip), %rax
-	HAS_ARCH_FEATURE (AVX512F_Usable)
-        jnz     2f
-        leaq    _ZGVeN16v_cosf_avx2_wrapper(%rip), %rax
-2:      ret
-END (_ZGVeN16v_cosf)
-
-#define _ZGVeN16v_cosf _ZGVeN16v_cosf_avx2_wrapper
-#include "../svml_s_cosf16_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.c
new file mode 100644
index 0000000000..5bd0441b16
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core.c
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized cosf, vector length is 16.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVeN16v_cosf
+#include "ifunc-mathvec-avx512.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVeN16v_cosf, __GI__ZGVeN16v_cosf,
+	       __redirect__ZGVeN16v_cosf)
+  __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core_avx512.S
index 5004cd4758..6ea1137b42 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf16_core_avx512.S
@@ -1,5 +1,5 @@
 /* Function cosf vectorized with AVX-512. KNL and SKX versions.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -22,7 +22,7 @@
 
 	.text
 ENTRY (_ZGVeN16v_cosf_knl)
-#ifndef HAVE_AVX512_ASM_SUPPORT
+#ifndef HAVE_AVX512DQ_ASM_SUPPORT
 WRAPPER_IMPL_AVX512 _ZGVdN8v_cosf
 #else
 /*
@@ -225,21 +225,21 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_cosf
         cfi_restore_state
         movzbl    %r12b, %r15d
         vmovss    1156(%rsp,%r15,8), %xmm0
-        call      cosf@PLT
+        call      JUMPTARGET(cosf)
         vmovss    %xmm0, 1220(%rsp,%r15,8)
         jmp       .LBL_1_8
 
 .LBL_1_12:
         movzbl    %r12b, %r15d
         vmovss    1152(%rsp,%r15,8), %xmm0
-        call      cosf@PLT
+        call      JUMPTARGET(cosf)
         vmovss    %xmm0, 1216(%rsp,%r15,8)
         jmp       .LBL_1_7
 #endif
 END (_ZGVeN16v_cosf_knl)
 
 ENTRY (_ZGVeN16v_cosf_skx)
-#ifndef HAVE_AVX512_ASM_SUPPORT
+#ifndef HAVE_AVX512DQ_ASM_SUPPORT
 WRAPPER_IMPL_AVX512 _ZGVdN8v_cosf
 #else
 /*
@@ -440,7 +440,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_cosf
         vmovss    1156(%rsp,%r15,8), %xmm0
         vzeroupper
         vmovss    1156(%rsp,%r15,8), %xmm0
-        call      cosf@PLT
+        call      JUMPTARGET(cosf)
         vmovss    %xmm0, 1220(%rsp,%r15,8)
         jmp       .LBL_2_8
 .LBL_2_12:
@@ -448,7 +448,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_cosf
         vmovss    1152(%rsp,%r15,8), %xmm0
         vzeroupper
         vmovss    1152(%rsp,%r15,8), %xmm0
-        call      cosf@PLT
+        call      JUMPTARGET(cosf)
         vmovss    %xmm0, 1216(%rsp,%r15,8)
         jmp       .LBL_2_7
 #endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core-sse2.S
new file mode 100644
index 0000000000..727189f8e6
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core-sse2.S
@@ -0,0 +1,20 @@
+/* SSE2 version of vectorized cosf, vector length is 4.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define _ZGVbN4v_cosf _ZGVbN4v_cosf_sse2
+#include "../svml_s_cosf4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core.S
deleted file mode 100644
index ad7de18851..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of vectorized cosf, vector length is 4.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-	.text
-ENTRY (_ZGVbN4v_cosf)
-        .type   _ZGVbN4v_cosf, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-        leaq    _ZGVbN4v_cosf_sse4(%rip), %rax
-	HAS_CPU_FEATURE (SSE4_1)
-        jz      2f
-        ret
-2:      leaq    _ZGVbN4v_cosf_sse2(%rip), %rax
-        ret
-END (_ZGVbN4v_cosf)
-libmvec_hidden_def (_ZGVbN4v_cosf)
-
-#define _ZGVbN4v_cosf _ZGVbN4v_cosf_sse2
-#include "../svml_s_cosf4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core.c
new file mode 100644
index 0000000000..dde470af5d
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core.c
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized cosf, vector length is 4.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVbN4v_cosf
+#include "ifunc-mathvec-sse4_1.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVbN4v_cosf, __GI__ZGVbN4v_cosf,
+	       __redirect__ZGVbN4v_cosf)
+  __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core_sse4.S
index d23ff72a30..f4e0553bb3 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core_sse4.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf4_core_sse4.S
@@ -1,5 +1,5 @@
 /* Function cosf vectorized with SSE4.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -211,7 +211,7 @@ ENTRY (_ZGVbN4v_cosf_sse4)
         movzbl    %r12b, %r15d
         movss     196(%rsp,%r15,8), %xmm0
 
-        call      cosf@PLT
+        call      JUMPTARGET(cosf)
 
         movss     %xmm0, 260(%rsp,%r15,8)
         jmp       .LBL_1_8
@@ -220,7 +220,7 @@ ENTRY (_ZGVbN4v_cosf_sse4)
         movzbl    %r12b, %r15d
         movss     192(%rsp,%r15,8), %xmm0
 
-        call      cosf@PLT
+        call      JUMPTARGET(cosf)
 
         movss     %xmm0, 256(%rsp,%r15,8)
         jmp       .LBL_1_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core-sse.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core-sse.S
new file mode 100644
index 0000000000..1e1a5540c3
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core-sse.S
@@ -0,0 +1,20 @@
+/* SSE version of vectorized cosf, vector length is 8.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define _ZGVdN8v_cosf _ZGVdN8v_cosf_sse_wrapper
+#include "../svml_s_cosf8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core.S
deleted file mode 100644
index 602c70e324..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of vectorized cosf, vector length is 8.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-    Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-	.text
-ENTRY (_ZGVdN8v_cosf)
-        .type   _ZGVdN8v_cosf, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-        leaq    _ZGVdN8v_cosf_avx2(%rip), %rax
-	HAS_ARCH_FEATURE (AVX2_Usable)
-        jz      2f
-        ret
-2:      leaq    _ZGVdN8v_cosf_sse_wrapper(%rip), %rax
-        ret
-END (_ZGVdN8v_cosf)
-libmvec_hidden_def (_ZGVdN8v_cosf)
-
-#define _ZGVdN8v_cosf _ZGVdN8v_cosf_sse_wrapper
-#include "../svml_s_cosf8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core.c
new file mode 100644
index 0000000000..56531b215a
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core.c
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized cosf, vector length is 8.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVdN8v_cosf
+#include "ifunc-mathvec-avx2.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVdN8v_cosf, __GI__ZGVdN8v_cosf,
+	       __redirect__ZGVdN8v_cosf)
+  __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core_avx2.S
index 513f3c0a29..dbff4a7b7e 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core_avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cosf8_core_avx2.S
@@ -1,5 +1,5 @@
 /* Function cosf vectorized with AVX2.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -197,7 +197,7 @@ ENTRY (_ZGVdN8v_cosf_avx2)
         vmovss    324(%rsp,%r15,8), %xmm0
         vzeroupper
 
-        call      cosf@PLT
+        call      JUMPTARGET(cosf)
 
         vmovss    %xmm0, 388(%rsp,%r15,8)
         jmp       .LBL_1_8
@@ -207,7 +207,7 @@ ENTRY (_ZGVdN8v_cosf_avx2)
         vmovss    320(%rsp,%r15,8), %xmm0
         vzeroupper
 
-        call      cosf@PLT
+        call      JUMPTARGET(cosf)
 
         vmovss    %xmm0, 384(%rsp,%r15,8)
         jmp       .LBL_1_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core-avx2.S
new file mode 100644
index 0000000000..e0b7fd787f
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core-avx2.S
@@ -0,0 +1,23 @@
+/* AVX2 version of vectorized expf.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+#define _ZGVeN16v_expf _ZGVeN16v_expf_avx2_wrapper
+#include "../svml_s_expf16_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core.S
deleted file mode 100644
index f990d36483..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core.S
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Multiple versions of vectorized expf.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-	.text
-ENTRY (_ZGVeN16v_expf)
-        .type   _ZGVeN16v_expf, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-        leaq    _ZGVeN16v_expf_skx(%rip), %rax
-	HAS_ARCH_FEATURE (AVX512DQ_Usable)
-        jnz     2f
-        leaq    _ZGVeN16v_expf_knl(%rip), %rax
-	HAS_ARCH_FEATURE (AVX512F_Usable)
-        jnz     2f
-        leaq    _ZGVeN16v_expf_avx2_wrapper(%rip), %rax
-2:      ret
-END (_ZGVeN16v_expf)
-
-#define _ZGVeN16v_expf _ZGVeN16v_expf_avx2_wrapper
-#include "../svml_s_expf16_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core.c
new file mode 100644
index 0000000000..d358d93546
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core.c
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized expf, vector length is 16.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVeN16v_expf
+#include "ifunc-mathvec-avx512.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVeN16v_expf, __GI__ZGVeN16v_expf,
+	       __redirect__ZGVeN16v_expf)
+  __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core_avx512.S
index 7eb7a1b775..89ba0df28f 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf16_core_avx512.S
@@ -1,5 +1,5 @@
 /* Function expf vectorized with AVX-512. KNL and SKX versions.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -22,7 +22,7 @@
 
 	.text
 ENTRY (_ZGVeN16v_expf_knl)
-#ifndef HAVE_AVX512_ASM_SUPPORT
+#ifndef HAVE_AVX512DQ_ASM_SUPPORT
 WRAPPER_IMPL_AVX512 _ZGVdN8v_expf
 #else
 /*
@@ -212,14 +212,14 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_expf
         cfi_restore_state
         movzbl    %r12b, %r15d
         vmovss    1156(%rsp,%r15,8), %xmm0
-        call      expf@PLT
+        call      JUMPTARGET(__expf_finite)
         vmovss    %xmm0, 1220(%rsp,%r15,8)
         jmp       .LBL_1_8
 
 .LBL_1_12:
         movzbl    %r12b, %r15d
         vmovss    1152(%rsp,%r15,8), %xmm0
-        call      expf@PLT
+        call      JUMPTARGET(__expf_finite)
         vmovss    %xmm0, 1216(%rsp,%r15,8)
         jmp       .LBL_1_7
 
@@ -227,7 +227,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_expf
 END (_ZGVeN16v_expf_knl)
 
 ENTRY (_ZGVeN16v_expf_skx)
-#ifndef HAVE_AVX512_ASM_SUPPORT
+#ifndef HAVE_AVX512DQ_ASM_SUPPORT
 WRAPPER_IMPL_AVX512 _ZGVdN8v_expf
 #else
 /*
@@ -422,7 +422,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_expf
         vzeroupper
         vmovss    1156(%rsp,%r15,8), %xmm0
 
-        call      expf@PLT
+        call      JUMPTARGET(__expf_finite)
 
         vmovss    %xmm0, 1220(%rsp,%r15,8)
         jmp       .LBL_2_8
@@ -433,7 +433,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_expf
         vzeroupper
         vmovss    1152(%rsp,%r15,8), %xmm0
 
-        call      expf@PLT
+        call      JUMPTARGET(__expf_finite)
 
         vmovss    %xmm0, 1216(%rsp,%r15,8)
         jmp       .LBL_2_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core-sse2.S
new file mode 100644
index 0000000000..8f57e4bbd9
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core-sse2.S
@@ -0,0 +1,20 @@
+/* SSE2 version of vectorized expf.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define _ZGVbN4v_expf _ZGVbN4v_expf_sse2
+#include "../svml_s_expf4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core.S
deleted file mode 100644
index 2fbe6d475e..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of vectorized expf.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-	.text
-ENTRY (_ZGVbN4v_expf)
-        .type   _ZGVbN4v_expf, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-        leaq    _ZGVbN4v_expf_sse4(%rip), %rax
-	HAS_CPU_FEATURE (SSE4_1)
-        jz      2f
-        ret
-2:      leaq    _ZGVbN4v_expf_sse2(%rip), %rax
-        ret
-END (_ZGVbN4v_expf)
-libmvec_hidden_def (_ZGVbN4v_expf)
-
-#define _ZGVbN4v_expf _ZGVbN4v_expf_sse2
-#include "../svml_s_expf4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core.c
new file mode 100644
index 0000000000..82befe0b5d
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core.c
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized expf, vector length is 4.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVbN4v_expf
+#include "ifunc-mathvec-sse4_1.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVbN4v_expf, __GI__ZGVbN4v_expf,
+	       __redirect__ZGVbN4v_expf)
+  __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core_sse4.S
index c6f91e8dc1..254ec94096 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core_sse4.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf4_core_sse4.S
@@ -1,5 +1,5 @@
 /* Function expf vectorized with SSE4.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -195,7 +195,7 @@ ENTRY (_ZGVbN4v_expf_sse4)
         movzbl    %r12b, %r15d
         movss     196(%rsp,%r15,8), %xmm0
 
-        call      expf@PLT
+        call      JUMPTARGET(__expf_finite)
 
         movss     %xmm0, 260(%rsp,%r15,8)
         jmp       .LBL_1_8
@@ -204,7 +204,7 @@ ENTRY (_ZGVbN4v_expf_sse4)
         movzbl    %r12b, %r15d
         movss     192(%rsp,%r15,8), %xmm0
 
-        call      expf@PLT
+        call      JUMPTARGET(__expf_finite)
 
         movss     %xmm0, 256(%rsp,%r15,8)
         jmp       .LBL_1_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core-sse.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core-sse.S
new file mode 100644
index 0000000000..459699c80c
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core-sse.S
@@ -0,0 +1,20 @@
+/* SSE version of vectorized expf.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define _ZGVdN8v_expf _ZGVdN8v_expf_sse_wrapper
+#include "../svml_s_expf8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core.S
deleted file mode 100644
index 7d19bb423d..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of vectorized expf.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-    Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-	.text
-ENTRY (_ZGVdN8v_expf)
-        .type   _ZGVdN8v_expf, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-        leaq    _ZGVdN8v_expf_avx2(%rip), %rax
-	HAS_ARCH_FEATURE (AVX2_Usable)
-        jz      2f
-        ret
-2:      leaq    _ZGVdN8v_expf_sse_wrapper(%rip), %rax
-        ret
-END (_ZGVdN8v_expf)
-libmvec_hidden_def (_ZGVdN8v_expf)
-
-#define _ZGVdN8v_expf _ZGVdN8v_expf_sse_wrapper
-#include "../svml_s_expf8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core.c
new file mode 100644
index 0000000000..0b8a47ede0
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core.c
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized expf, vector length is 8.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVdN8v_expf
+#include "ifunc-mathvec-avx2.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVdN8v_expf, __GI__ZGVdN8v_expf,
+	       __redirect__ZGVdN8v_expf)
+  __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core_avx2.S
index c6be6954f7..ae1d5317e4 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core_avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_expf8_core_avx2.S
@@ -1,5 +1,5 @@
 /* Function expf vectorized with AVX2.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -184,7 +184,7 @@ ENTRY(_ZGVdN8v_expf_avx2)
         vmovss    324(%rsp,%r15,8), %xmm0
         vzeroupper
 
-        call      expf@PLT
+        call      JUMPTARGET(__expf_finite)
 
         vmovss    %xmm0, 388(%rsp,%r15,8)
         jmp       .LBL_1_8
@@ -194,7 +194,7 @@ ENTRY(_ZGVdN8v_expf_avx2)
         vmovss    320(%rsp,%r15,8), %xmm0
         vzeroupper
 
-        call      expf@PLT
+        call      JUMPTARGET(__expf_finite)
 
         vmovss    %xmm0, 384(%rsp,%r15,8)
         jmp       .LBL_1_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core-avx2.S
new file mode 100644
index 0000000000..b23bd12fa0
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core-avx2.S
@@ -0,0 +1,20 @@
+/* AVX2 version of vectorized logf.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define _ZGVeN16v_logf _ZGVeN16v_logf_avx2_wrapper
+#include "../svml_s_logf16_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core.S
deleted file mode 100644
index 9efb2fb7df..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core.S
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Multiple versions of vectorized logf.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-	.text
-ENTRY (_ZGVeN16v_logf)
-        .type   _ZGVeN16v_logf, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-        leaq    _ZGVeN16v_logf_skx(%rip), %rax
-	HAS_ARCH_FEATURE (AVX512DQ_Usable)
-        jnz     2f
-        leaq    _ZGVeN16v_logf_knl(%rip), %rax
-	HAS_ARCH_FEATURE (AVX512F_Usable)
-        jnz     2f
-        leaq    _ZGVeN16v_logf_avx2_wrapper(%rip), %rax
-2:      ret
-END (_ZGVeN16v_logf)
-
-#define _ZGVeN16v_logf _ZGVeN16v_logf_avx2_wrapper
-#include "../svml_s_logf16_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core.c
new file mode 100644
index 0000000000..fec61883b4
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core.c
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized logf, vector length is 16.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVeN16v_logf
+#include "ifunc-mathvec-avx512.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVeN16v_logf, __GI__ZGVeN16v_logf,
+	       __redirect__ZGVeN16v_logf)
+  __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core_avx512.S
index 6209058381..4cf0a96fe4 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf16_core_avx512.S
@@ -1,5 +1,5 @@
 /* Function logf vectorized with AVX-512. KNL and SKX versions.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -22,7 +22,7 @@
 
 	.text
 ENTRY (_ZGVeN16v_logf_knl)
-#ifndef HAVE_AVX512_ASM_SUPPORT
+#ifndef HAVE_AVX512DQ_ASM_SUPPORT
 WRAPPER_IMPL_AVX512 _ZGVdN8v_logf
 #else
 /*
@@ -197,21 +197,21 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_logf
         cfi_restore_state
         movzbl    %r12b, %r15d
         vmovss    1156(%rsp,%r15,8), %xmm0
-        call      logf@PLT
+        call      JUMPTARGET(__logf_finite)
         vmovss    %xmm0, 1220(%rsp,%r15,8)
         jmp       .LBL_1_8
 
 .LBL_1_12:
         movzbl    %r12b, %r15d
         vmovss    1152(%rsp,%r15,8), %xmm0
-        call      logf@PLT
+        call      JUMPTARGET(__logf_finite)
         vmovss    %xmm0, 1216(%rsp,%r15,8)
         jmp       .LBL_1_7
 #endif
 END (_ZGVeN16v_logf_knl)
 
 ENTRY (_ZGVeN16v_logf_skx)
-#ifndef HAVE_AVX512_ASM_SUPPORT
+#ifndef HAVE_AVX512DQ_ASM_SUPPORT
 WRAPPER_IMPL_AVX512 _ZGVdN8v_logf
 #else
 /*
@@ -391,7 +391,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_logf
         vzeroupper
         vmovss    1156(%rsp,%r15,8), %xmm0
 
-        call      logf@PLT
+        call      JUMPTARGET(__logf_finite)
 
         vmovss    %xmm0, 1220(%rsp,%r15,8)
         jmp       .LBL_2_8
@@ -402,7 +402,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_logf
         vzeroupper
         vmovss    1152(%rsp,%r15,8), %xmm0
 
-        call      logf@PLT
+        call      JUMPTARGET(__logf_finite)
 
         vmovss    %xmm0, 1216(%rsp,%r15,8)
         jmp       .LBL_2_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core-sse2.S
new file mode 100644
index 0000000000..2c2331e1d8
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core-sse2.S
@@ -0,0 +1,20 @@
+/* SSE2 version of vectorized logf.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define _ZGVbN4v_logf _ZGVbN4v_logf_sse2
+#include "../svml_s_logf4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core.S
deleted file mode 100644
index c85615ac25..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of vectorized logf.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-	.text
-ENTRY (_ZGVbN4v_logf)
-        .type   _ZGVbN4v_logf, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-        leaq    _ZGVbN4v_logf_sse4(%rip), %rax
-	HAS_CPU_FEATURE (SSE4_1)
-        jz      2f
-        ret
-2:      leaq    _ZGVbN4v_logf_sse2(%rip), %rax
-        ret
-END (_ZGVbN4v_logf)
-libmvec_hidden_def (_ZGVbN4v_logf)
-
-#define _ZGVbN4v_logf _ZGVbN4v_logf_sse2
-#include "../svml_s_logf4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core.c
new file mode 100644
index 0000000000..f249c351bd
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core.c
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized logf, vector length is 4.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVbN4v_logf
+#include "ifunc-mathvec-sse4_1.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVbN4v_logf, __GI__ZGVbN4v_logf,
+	       __redirect__ZGVbN4v_logf)
+  __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core_sse4.S
index 1ce9838513..651eb5eb1a 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core_sse4.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf4_core_sse4.S
@@ -1,5 +1,5 @@
 /* Function logf vectorized with SSE4.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -177,7 +177,7 @@ ENTRY (_ZGVbN4v_logf_sse4)
         movzbl    %r12b, %r15d
         movss     196(%rsp,%r15,8), %xmm0
 
-        call      logf@PLT
+        call      JUMPTARGET(__logf_finite)
 
         movss     %xmm0, 260(%rsp,%r15,8)
         jmp       .LBL_1_8
@@ -186,7 +186,7 @@ ENTRY (_ZGVbN4v_logf_sse4)
         movzbl    %r12b, %r15d
         movss     192(%rsp,%r15,8), %xmm0
 
-        call      logf@PLT
+        call      JUMPTARGET(__logf_finite)
 
         movss     %xmm0, 256(%rsp,%r15,8)
         jmp       .LBL_1_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core-sse.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core-sse.S
new file mode 100644
index 0000000000..862379277b
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core-sse.S
@@ -0,0 +1,20 @@
+/* SSE version of vectorized logf.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define _ZGVdN8v_logf _ZGVdN8v_logf_sse_wrapper
+#include "../svml_s_logf8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core.S
deleted file mode 100644
index 8f6d83dd56..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of vectorized logf.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-    Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-	.text
-ENTRY (_ZGVdN8v_logf)
-        .type   _ZGVdN8v_logf, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-        leaq    _ZGVdN8v_logf_avx2(%rip), %rax
-	HAS_ARCH_FEATURE (AVX2_Usable)
-        jz      2f
-        ret
-2:      leaq    _ZGVdN8v_logf_sse_wrapper(%rip), %rax
-        ret
-END (_ZGVdN8v_logf)
-libmvec_hidden_def (_ZGVdN8v_logf)
-
-#define _ZGVdN8v_logf _ZGVdN8v_logf_sse_wrapper
-#include "../svml_s_logf8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core.c
new file mode 100644
index 0000000000..dbd29657ca
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core.c
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized logf, vector length is 8.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVdN8v_logf
+#include "ifunc-mathvec-avx2.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVdN8v_logf, __GI__ZGVdN8v_logf,
+	       __redirect__ZGVdN8v_logf)
+  __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core_avx2.S
index 91fb549ce6..c7f5448fcb 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core_avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_logf8_core_avx2.S
@@ -1,5 +1,5 @@
 /* Function logf vectorized with AVX2.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -166,7 +166,7 @@ ENTRY(_ZGVdN8v_logf_avx2)
         vmovss    324(%rsp,%r15,8), %xmm0
         vzeroupper
 
-        call      logf@PLT
+        call      JUMPTARGET(__logf_finite)
 
         vmovss    %xmm0, 388(%rsp,%r15,8)
         jmp       .LBL_1_8
@@ -176,7 +176,7 @@ ENTRY(_ZGVdN8v_logf_avx2)
         vmovss    320(%rsp,%r15,8), %xmm0
         vzeroupper
 
-        call      logf@PLT
+        call      JUMPTARGET(__logf_finite)
 
         vmovss    %xmm0, 384(%rsp,%r15,8)
         jmp       .LBL_1_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core-avx2.S
new file mode 100644
index 0000000000..de705c8632
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core-avx2.S
@@ -0,0 +1,20 @@
+/* AVX2 version of vectorized powf.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define _ZGVeN16vv_powf _ZGVeN16vv_powf_avx2_wrapper
+#include "../svml_s_powf16_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core.S
deleted file mode 100644
index 80048ce977..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core.S
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Multiple versions of vectorized powf.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-	.text
-ENTRY (_ZGVeN16vv_powf)
-        .type   _ZGVeN16vv_powf, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-        leaq    _ZGVeN16vv_powf_skx(%rip), %rax
-	HAS_ARCH_FEATURE (AVX512DQ_Usable)
-        jnz     2f
-        leaq    _ZGVeN16vv_powf_knl(%rip), %rax
-	HAS_ARCH_FEATURE (AVX512F_Usable)
-        jnz     2f
-        leaq    _ZGVeN16vv_powf_avx2_wrapper(%rip), %rax
-2:      ret
-END (_ZGVeN16vv_powf)
-
-#define _ZGVeN16vv_powf _ZGVeN16vv_powf_avx2_wrapper
-#include "../svml_s_powf16_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core.c
new file mode 100644
index 0000000000..91ea810441
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core.c
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized powf, vector length is 16.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVeN16vv_powf
+#include "ifunc-mathvec-avx512.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVeN16vv_powf, __GI__ZGVeN16vv_powf,
+	       __redirect__ZGVeN16vv_powf)
+  __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core_avx512.S
index 45d48723af..bdcd50afe1 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf16_core_avx512.S
@@ -1,5 +1,5 @@
 /* Function powf vectorized with AVX-512. KNL and SKX versions.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -82,7 +82,7 @@
 
 	.text
 ENTRY (_ZGVeN16vv_powf_knl)
-#ifndef HAVE_AVX512_ASM_SUPPORT
+#ifndef HAVE_AVX512DQ_ASM_SUPPORT
 WRAPPER_IMPL_AVX512_ff _ZGVdN8vv_powf
 #else
         pushq     %rbp
@@ -344,7 +344,7 @@ WRAPPER_IMPL_AVX512_ff _ZGVdN8vv_powf
         movzbl    %r12b, %r15d
         vmovss    1156(%rsp,%r15,8), %xmm0
         vmovss    1220(%rsp,%r15,8), %xmm1
-        call      powf@PLT
+        call      JUMPTARGET(__powf_finite)
         vmovss    %xmm0, 1284(%rsp,%r15,8)
         jmp       .LBL_1_8
 
@@ -352,14 +352,14 @@ WRAPPER_IMPL_AVX512_ff _ZGVdN8vv_powf
         movzbl    %r12b, %r15d
         vmovss    1152(%rsp,%r15,8), %xmm0
         vmovss    1216(%rsp,%r15,8), %xmm1
-        call      powf@PLT
+        call      JUMPTARGET(__powf_finite)
         vmovss    %xmm0, 1280(%rsp,%r15,8)
         jmp       .LBL_1_7
 #endif
 END (_ZGVeN16vv_powf_knl)
 
 ENTRY (_ZGVeN16vv_powf_skx)
-#ifndef HAVE_AVX512_ASM_SUPPORT
+#ifndef HAVE_AVX512DQ_ASM_SUPPORT
 WRAPPER_IMPL_AVX512_ff _ZGVdN8vv_powf
 #else
         pushq     %rbp
@@ -629,7 +629,7 @@ WRAPPER_IMPL_AVX512_ff _ZGVdN8vv_powf
         vmovss    1156(%rsp,%r15,8), %xmm1
         vzeroupper
         vmovss    1092(%rsp,%r15,8), %xmm0
-        call      powf@PLT
+        call      JUMPTARGET(__powf_finite)
         vmovss    %xmm0, 1220(%rsp,%r15,8)
         jmp       .LBL_2_8
 
@@ -638,7 +638,7 @@ WRAPPER_IMPL_AVX512_ff _ZGVdN8vv_powf
         vmovss    1152(%rsp,%r15,8), %xmm1
         vzeroupper
         vmovss    1088(%rsp,%r15,8), %xmm0
-        call      powf@PLT
+        call      JUMPTARGET(__powf_finite)
         vmovss    %xmm0, 1216(%rsp,%r15,8)
         jmp       .LBL_2_7
 #endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core-sse2.S
new file mode 100644
index 0000000000..b6789a621d
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core-sse2.S
@@ -0,0 +1,20 @@
+/* SSE2 version of vectorized powf.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define _ZGVbN4vv_powf _ZGVbN4vv_powf_sse2
+#include "../svml_s_powf4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core.S
deleted file mode 100644
index b46821189b..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of vectorized powf.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-	.text
-ENTRY (_ZGVbN4vv_powf)
-        .type   _ZGVbN4vv_powf, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-        leaq    _ZGVbN4vv_powf_sse4(%rip), %rax
-	HAS_CPU_FEATURE (SSE4_1)
-        jz      2f
-        ret
-2:      leaq    _ZGVbN4vv_powf_sse2(%rip), %rax
-        ret
-END (_ZGVbN4vv_powf)
-libmvec_hidden_def (_ZGVbN4vv_powf)
-
-#define _ZGVbN4vv_powf _ZGVbN4vv_powf_sse2
-#include "../svml_s_powf4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core.c
new file mode 100644
index 0000000000..8149d7c991
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core.c
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized powf, vector length is 4.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVbN4vv_powf
+#include "ifunc-mathvec-sse4_1.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVbN4vv_powf, __GI__ZGVbN4vv_powf,
+	       __redirect__ZGVbN4vv_powf)
+  __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core_sse4.S
index 420f98c6a6..bc59545c98 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core_sse4.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf4_core_sse4.S
@@ -1,5 +1,5 @@
 /* Function powf vectorized with SSE4.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -356,7 +356,7 @@ ENTRY (_ZGVbN4vv_powf_sse4)
         movss     68(%rsp,%r15,8), %xmm0
         movss     132(%rsp,%r15,8), %xmm1
 
-        call      powf@PLT
+        call      JUMPTARGET(__powf_finite)
 
         movss     %xmm0, 196(%rsp,%r15,8)
         jmp       .LBL_1_8
@@ -366,7 +366,7 @@ ENTRY (_ZGVbN4vv_powf_sse4)
         movss     64(%rsp,%r15,8), %xmm0
         movss     128(%rsp,%r15,8), %xmm1
 
-        call      powf@PLT
+        call      JUMPTARGET(__powf_finite)
 
         movss     %xmm0, 192(%rsp,%r15,8)
         jmp       .LBL_1_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core-sse.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core-sse.S
new file mode 100644
index 0000000000..48da6d25c7
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core-sse.S
@@ -0,0 +1,20 @@
+/* SSE version of vectorized powf.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define _ZGVdN8vv_powf _ZGVdN8vv_powf_sse_wrapper
+#include "../svml_s_powf8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core.S
deleted file mode 100644
index 945908a2ff..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of vectorized powf.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-    Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-	.text
-ENTRY (_ZGVdN8vv_powf)
-        .type   _ZGVdN8vv_powf, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-        leaq    _ZGVdN8vv_powf_avx2(%rip), %rax
-	HAS_ARCH_FEATURE (AVX2_Usable)
-        jz      2f
-        ret
-2:      leaq    _ZGVdN8vv_powf_sse_wrapper(%rip), %rax
-        ret
-END (_ZGVdN8vv_powf)
-libmvec_hidden_def (_ZGVdN8vv_powf)
-
-#define _ZGVdN8vv_powf _ZGVdN8vv_powf_sse_wrapper
-#include "../svml_s_powf8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core.c
new file mode 100644
index 0000000000..0da188180e
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core.c
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized sinf, vector length is 8.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVdN8vv_powf
+#include "ifunc-mathvec-avx2.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVdN8vv_powf, __GI__ZGVdN8vv_powf,
+	       __redirect__ZGVdN8vv_powf)
+  __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core_avx2.S
index 4446859130..53a4b4bc2b 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core_avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_powf8_core_avx2.S
@@ -1,5 +1,5 @@
 /* Function powf vectorized with AVX2.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -317,7 +317,7 @@ ENTRY(_ZGVdN8vv_powf_avx2)
         vmovss    132(%rsp,%r15,8), %xmm1
         vzeroupper
 
-        call      powf@PLT
+        call      JUMPTARGET(__powf_finite)
 
         vmovss    %xmm0, 196(%rsp,%r15,8)
         jmp       .LBL_1_8
@@ -328,7 +328,7 @@ ENTRY(_ZGVdN8vv_powf_avx2)
         vmovss    128(%rsp,%r15,8), %xmm1
         vzeroupper
 
-        call      powf@PLT
+        call      JUMPTARGET(__powf_finite)
 
         vmovss    %xmm0, 192(%rsp,%r15,8)
         jmp       .LBL_1_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core-avx2.S
new file mode 100644
index 0000000000..c677e3f1cf
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core-avx2.S
@@ -0,0 +1,20 @@
+/* AVX2 version of vectorized sincosf.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define _ZGVeN16vvv_sincosf _ZGVeN16vvv_sincosf_avx2_wrapper
+#include "../svml_s_sincosf16_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core.S
deleted file mode 100644
index 16cee0c676..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core.S
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Multiple versions of vectorized sincosf.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-	.text
-ENTRY (_ZGVeN16vvv_sincosf)
-        .type   _ZGVeN16vvv_sincosf, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-        leaq    _ZGVeN16vvv_sincosf_skx(%rip), %rax
-	HAS_ARCH_FEATURE (AVX512DQ_Usable)
-        jnz     2f
-        leaq    _ZGVeN16vvv_sincosf_knl(%rip), %rax
-	HAS_ARCH_FEATURE (AVX512F_Usable)
-        jnz     2f
-        leaq    _ZGVeN16vvv_sincosf_avx2_wrapper(%rip), %rax
-2:      ret
-END (_ZGVeN16vvv_sincosf)
-
-#define _ZGVeN16vvv_sincosf _ZGVeN16vvv_sincosf_avx2_wrapper
-#include "../svml_s_sincosf16_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core.c
new file mode 100644
index 0000000000..b753be6bbd
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core.c
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized sincosf, vector length is 16.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVeN16vvv_sincosf
+#include "ifunc-mathvec-avx512.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVeN16vvv_sincosf, __GI__ZGVeN16vvv_sincosf,
+	       __redirect__ZGVeN16vvv_sincosf)
+  __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S
index 758aeeaeed..5fa4bc412a 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf16_core_avx512.S
@@ -1,5 +1,5 @@
 /* Function sincosf vectorized with AVX-512. KNL and SKX versions.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -41,7 +41,7 @@
         b) Calculate 2 polynomials for sin and cos:
            RS = X * ( A0 + X^2 * (A1 + x^2 * (A2 + x^2 * (A3))));
            RC = B0 + X^2 * (B1 + x^2 * (B2 + x^2 * (B3 + x^2 * (B4))));
-        c) Swap RS & RC if if first bit of obtained value after
+        c) Swap RS & RC if first bit of obtained value after
            Right Shifting is set to 1. Using And, Andnot & Or operations.
      3) Destination sign setting
         a) Set shifted destination sign using XOR operation:
@@ -49,9 +49,9 @@
            R2 = XOR( RC, SC ).  */
 
 	.text
-ENTRY (_ZGVeN16vvv_sincosf_knl)
-#ifndef HAVE_AVX512_ASM_SUPPORT
-WRAPPER_IMPL_AVX512_fFF _ZGVdN8vvv_sincosf
+ENTRY (_ZGVeN16vl4l4_sincosf_knl)
+#ifndef HAVE_AVX512DQ_ASM_SUPPORT
+WRAPPER_IMPL_AVX512_fFF _ZGVdN8vl4l4_sincosf
 #else
         pushq     %rbp
         cfi_adjust_cfa_offset (8)
@@ -243,12 +243,12 @@ WRAPPER_IMPL_AVX512_fFF _ZGVdN8vvv_sincosf
         movzbl    %r12b, %r15d
         vmovss    1156(%rsp,%r15,8), %xmm0
 
-        call      sinf@PLT
+        call      JUMPTARGET(sinf)
 
         vmovss    %xmm0, 1220(%rsp,%r15,8)
         vmovss    1156(%rsp,%r15,8), %xmm0
 
-        call      cosf@PLT
+        call      JUMPTARGET(cosf)
 
         vmovss    %xmm0, 1284(%rsp,%r15,8)
         jmp       .LBL_1_8
@@ -257,20 +257,21 @@ WRAPPER_IMPL_AVX512_fFF _ZGVdN8vvv_sincosf
         movzbl    %r12b, %r15d
         vmovss    1152(%rsp,%r15,8), %xmm0
 
-        call      sinf@PLT
+        call      JUMPTARGET(sinf)
 
         vmovss    %xmm0, 1216(%rsp,%r15,8)
         vmovss    1152(%rsp,%r15,8), %xmm0
 
-        call      cosf@PLT
+        call      JUMPTARGET(cosf)
 
         vmovss    %xmm0, 1280(%rsp,%r15,8)
         jmp       .LBL_1_7
 #endif
-END (_ZGVeN16vvv_sincosf_knl)
+END (_ZGVeN16vl4l4_sincosf_knl)
+libmvec_hidden_def(_ZGVeN16vl4l4_sincosf_knl)
 
-ENTRY (_ZGVeN16vvv_sincosf_skx)
-#ifndef HAVE_AVX512_ASM_SUPPORT
+ENTRY (_ZGVeN16vl4l4_sincosf_skx)
+#ifndef HAVE_AVX512DQ_ASM_SUPPORT
 WRAPPER_IMPL_AVX512_fFF _ZGVdN8vvv_sincosf
 #else
         pushq     %rbp
@@ -470,12 +471,12 @@ WRAPPER_IMPL_AVX512_fFF _ZGVdN8vvv_sincosf
         vzeroupper
         vmovss    1156(%rsp,%r15,8), %xmm0
 
-        call      sinf@PLT
+        call      JUMPTARGET(sinf)
 
         vmovss    %xmm0, 1220(%rsp,%r15,8)
         vmovss    1156(%rsp,%r15,8), %xmm0
 
-        call      cosf@PLT
+        call      JUMPTARGET(cosf)
 
         vmovss    %xmm0, 1284(%rsp,%r15,8)
         jmp       .LBL_2_8
@@ -486,16 +487,266 @@ WRAPPER_IMPL_AVX512_fFF _ZGVdN8vvv_sincosf
         vzeroupper
         vmovss    1152(%rsp,%r15,8), %xmm0
 
-        call      sinf@PLT
+        call      JUMPTARGET(sinf)
 
         vmovss    %xmm0, 1216(%rsp,%r15,8)
         vmovss    1152(%rsp,%r15,8), %xmm0
 
-        call      cosf@PLT
+        call      JUMPTARGET(cosf)
 
         vmovss    %xmm0, 1280(%rsp,%r15,8)
         jmp       .LBL_2_7
 #endif
+END (_ZGVeN16vl4l4_sincosf_skx)
+libmvec_hidden_def(_ZGVeN16vl4l4_sincosf_skx)
+
+/* Wrapper between vvv and vl4l4 vector variants.  */
+.macro WRAPPER_AVX512_vvv_vl4l4 callee
+#ifndef __ILP32__
+        pushq     %rbp
+        cfi_adjust_cfa_offset (8)
+        cfi_rel_offset (%rbp, 0)
+        movq      %rsp, %rbp
+        cfi_def_cfa_register (%rbp)
+        andq      $-64, %rsp
+        subq      $384, %rsp
+        vmovups   %zmm1, 128(%rsp)
+        lea       (%rsp), %rdi
+        vmovups   %zmm2, 192(%rdi)
+        vmovups   %zmm3, 256(%rdi)
+        vmovups   %zmm4, 320(%rdi)
+        lea       64(%rsp), %rsi
+        call      HIDDEN_JUMPTARGET(\callee)
+        movq      128(%rsp), %rdx
+        movq      136(%rsp), %rsi
+        movq      144(%rsp), %r8
+        movq      152(%rsp), %r10
+        movl      (%rsp), %eax
+        movl      4(%rsp), %ecx
+        movl      8(%rsp), %edi
+        movl      12(%rsp), %r9d
+        movl      %eax, (%rdx)
+        movl      %ecx, (%rsi)
+        movq      160(%rsp), %rax
+        movq      168(%rsp), %rcx
+        movl      %edi, (%r8)
+        movl      %r9d, (%r10)
+        movq      176(%rsp), %rdi
+        movq      184(%rsp), %r9
+        movl      16(%rsp), %r11d
+        movl      20(%rsp), %edx
+        movl      24(%rsp), %esi
+        movl      28(%rsp), %r8d
+        movl      %r11d, (%rax)
+        movl      %edx, (%rcx)
+        movq      192(%rsp), %r11
+        movq      200(%rsp), %rdx
+        movl      %esi, (%rdi)
+        movl      %r8d, (%r9)
+        movq      208(%rsp), %rsi
+        movq      216(%rsp), %r8
+        movl      32(%rsp), %r10d
+        movl      36(%rsp), %eax
+        movl      40(%rsp), %ecx
+        movl      44(%rsp), %edi
+        movl      %r10d, (%r11)
+        movl      %eax, (%rdx)
+        movq      224(%rsp), %r10
+        movq      232(%rsp), %rax
+        movl      %ecx, (%rsi)
+        movl      %edi, (%r8)
+        movq      240(%rsp), %rcx
+        movq      248(%rsp), %rdi
+        movl      48(%rsp), %r9d
+        movl      52(%rsp), %r11d
+        movl      56(%rsp), %edx
+        movl      60(%rsp), %esi
+        movl      %r9d, (%r10)
+        movl      %r11d, (%rax)
+        movq      256(%rsp), %r9
+        movq      264(%rsp), %r11
+        movl      %edx, (%rcx)
+        movl      %esi, (%rdi)
+        movq      272(%rsp), %rdx
+        movq      280(%rsp), %rsi
+        movl      64(%rsp), %r8d
+        movl      68(%rsp), %r10d
+        movl      72(%rsp), %eax
+        movl      76(%rsp), %ecx
+        movl      %r8d, (%r9)
+        movl      %r10d, (%r11)
+        movq      288(%rsp), %r8
+        movq      296(%rsp), %r10
+        movl      %eax, (%rdx)
+        movl      %ecx, (%rsi)
+        movq      304(%rsp), %rax
+        movq      312(%rsp), %rcx
+        movl      80(%rsp), %edi
+        movl      84(%rsp), %r9d
+        movl      88(%rsp), %r11d
+        movl      92(%rsp), %edx
+        movl      %edi, (%r8)
+        movl      %r9d, (%r10)
+        movq      320(%rsp), %rdi
+        movq      328(%rsp), %r9
+        movl      %r11d, (%rax)
+        movl      %edx, (%rcx)
+        movq      336(%rsp), %r11
+        movq      344(%rsp), %rdx
+        movl      96(%rsp), %esi
+        movl      100(%rsp), %r8d
+        movl      104(%rsp), %r10d
+        movl      108(%rsp), %eax
+        movl      %esi, (%rdi)
+        movl      %r8d, (%r9)
+        movq      352(%rsp), %rsi
+        movq      360(%rsp), %r8
+        movl      %r10d, (%r11)
+        movl      %eax, (%rdx)
+        movq      368(%rsp), %r10
+        movq      376(%rsp), %rax
+        movl      112(%rsp), %ecx
+        movl      116(%rsp), %edi
+        movl      120(%rsp), %r9d
+        movl      124(%rsp), %r11d
+        movl      %ecx, (%rsi)
+        movl      %edi, (%r8)
+        movl      %r9d, (%r10)
+        movl      %r11d, (%rax)
+        movq      %rbp, %rsp
+        cfi_def_cfa_register (%rsp)
+        popq      %rbp
+        cfi_adjust_cfa_offset (-8)
+        cfi_restore (%rbp)
+        ret
+#else
+        leal    8(%rsp), %r10d
+        .cfi_def_cfa 10, 0
+        andl    $-64, %esp
+        pushq   -8(%r10d)
+        pushq   %rbp
+        .cfi_escape 0x10,0x6,0x2,0x76,0
+        movl    %esp, %ebp
+        pushq   %r10
+        .cfi_escape 0xf,0x3,0x76,0x78,0x6
+        leal    -112(%rbp), %esi
+        leal    -176(%rbp), %edi
+        subl    $296, %esp
+        vmovdqa64 %zmm1, -240(%ebp)
+        vmovdqa64 %zmm2, -304(%ebp)
+        call    HIDDEN_JUMPTARGET(\callee)
+        movl    -240(%ebp), %eax
+        vmovss  -176(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -236(%ebp), %eax
+        vmovss  -172(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -232(%ebp), %eax
+        vmovss  -168(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -228(%ebp), %eax
+        vmovss  -164(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -224(%ebp), %eax
+        vmovss  -160(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -220(%ebp), %eax
+        vmovss  -156(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -216(%ebp), %eax
+        vmovss  -152(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -212(%ebp), %eax
+        vmovss  -148(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -208(%ebp), %eax
+        vmovss  -144(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -204(%ebp), %eax
+        vmovss  -140(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -200(%ebp), %eax
+        vmovss  -136(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -196(%ebp), %eax
+        vmovss  -132(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -192(%ebp), %eax
+        vmovss  -128(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -188(%ebp), %eax
+        vmovss  -124(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -184(%ebp), %eax
+        vmovss  -120(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -180(%ebp), %eax
+        vmovss  -116(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -304(%ebp), %eax
+        vmovss  -112(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -300(%ebp), %eax
+        vmovss  -108(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -296(%ebp), %eax
+        vmovss  -104(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -292(%ebp), %eax
+        vmovss  -100(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -288(%ebp), %eax
+        vmovss  -96(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -284(%ebp), %eax
+        vmovss  -92(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -280(%ebp), %eax
+        vmovss  -88(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -276(%ebp), %eax
+        vmovss  -84(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -272(%ebp), %eax
+        vmovss  -80(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -268(%ebp), %eax
+        vmovss  -76(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -264(%ebp), %eax
+        vmovss  -72(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -260(%ebp), %eax
+        vmovss  -68(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -256(%ebp), %eax
+        vmovss  -64(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -252(%ebp), %eax
+        vmovss  -60(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -248(%ebp), %eax
+        vmovss  -56(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -244(%ebp), %eax
+        vmovss  -52(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        addl    $296, %esp
+        popq    %r10
+        .cfi_def_cfa 10, 0
+        popq    %rbp
+        leal    -8(%r10), %esp
+        .cfi_def_cfa 7, 8
+        ret
+#endif
+.endm
+
+ENTRY (_ZGVeN16vvv_sincosf_knl)
+WRAPPER_AVX512_vvv_vl4l4 _ZGVeN16vl4l4_sincosf_knl
+END (_ZGVeN16vvv_sincosf_knl)
+
+ENTRY (_ZGVeN16vvv_sincosf_skx)
+WRAPPER_AVX512_vvv_vl4l4 _ZGVeN16vl4l4_sincosf_skx
 END (_ZGVeN16vvv_sincosf_skx)
 
 	.section .rodata, "a"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core-sse2.S
new file mode 100644
index 0000000000..cc718b3a2e
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core-sse2.S
@@ -0,0 +1,20 @@
+/* SSE2 version of vectorized sincosf.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define _ZGVbN4vvv_sincosf _ZGVbN4vvv_sincosf_sse2
+#include "../svml_s_sincosf4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core.S
deleted file mode 100644
index d72b4049e2..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of vectorized sincosf.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-	.text
-ENTRY (_ZGVbN4vvv_sincosf)
-        .type   _ZGVbN4vvv_sincosf, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-        leaq    _ZGVbN4vvv_sincosf_sse4(%rip), %rax
-	HAS_CPU_FEATURE (SSE4_1)
-        jz      2f
-        ret
-2:      leaq    _ZGVbN4vvv_sincosf_sse2(%rip), %rax
-        ret
-END (_ZGVbN4vvv_sincosf)
-libmvec_hidden_def (_ZGVbN4vvv_sincosf)
-
-#define _ZGVbN4vvv_sincosf _ZGVbN4vvv_sincosf_sse2
-#include "../svml_s_sincosf4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core.c
new file mode 100644
index 0000000000..705d96a8fb
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core.c
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized sincosf, vector length is 4.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVbN4vvv_sincosf
+#include "ifunc-mathvec-sse4_1.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVbN4vvv_sincosf, __GI__ZGVbN4vvv_sincosf,
+	       __redirect__ZGVbN4vvv_sincosf)
+  __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core_sse4.S
index 643fc0ca3b..d758ceeb30 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core_sse4.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf4_core_sse4.S
@@ -1,5 +1,5 @@
 /* Function sincosf vectorized with SSE4.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -20,7 +20,7 @@
 #include "svml_s_trig_data.h"
 
 	.text
-ENTRY (_ZGVbN4vvv_sincosf_sse4)
+ENTRY (_ZGVbN4vl4l4_sincosf_sse4)
 /*
    ALGORITHM DESCRIPTION:
 
@@ -42,7 +42,7 @@ ENTRY (_ZGVbN4vvv_sincosf_sse4)
         b) Calculate 2 polynomials for sin and cos:
            RS = X * ( A0 + X^2 * (A1 + x^2 * (A2 + x^2 * (A3))));
            RC = B0 + X^2 * (B1 + x^2 * (B2 + x^2 * (B3 + x^2 * (B4))));
-        c) Swap RS & RC if if first bit of obtained value after
+        c) Swap RS & RC if first bit of obtained value after
            Right Shifting is set to 1. Using And, Andnot & Or operations.
      3) Destination sign setting
         a) Set shifted destination sign using XOR operation:
@@ -241,12 +241,12 @@ ENTRY (_ZGVbN4vvv_sincosf_sse4)
         movzbl    %r12b, %r15d
         movss     132(%rsp,%r15,8), %xmm0
 
-        call      sinf@PLT
+        call      JUMPTARGET(sinf)
 
         movss     %xmm0, 196(%rsp,%r15,8)
         movss     132(%rsp,%r15,8), %xmm0
 
-        call      cosf@PLT
+        call      JUMPTARGET(cosf)
 
         movss     %xmm0, 260(%rsp,%r15,8)
         jmp       .LBL_1_8
@@ -255,14 +255,92 @@ ENTRY (_ZGVbN4vvv_sincosf_sse4)
         movzbl    %r12b, %r15d
         movss     128(%rsp,%r15,8), %xmm0
 
-        call      sinf@PLT
+        call      JUMPTARGET(sinf)
 
         movss     %xmm0, 192(%rsp,%r15,8)
         movss     128(%rsp,%r15,8), %xmm0
 
-        call      cosf@PLT
+        call      JUMPTARGET(cosf)
 
         movss     %xmm0, 256(%rsp,%r15,8)
         jmp       .LBL_1_7
 
+END (_ZGVbN4vl4l4_sincosf_sse4)
+libmvec_hidden_def(_ZGVbN4vl4l4_sincosf_sse4)
+
+/* vvv version implemented with wrapper to vl4l4 variant.  */
+ENTRY (_ZGVbN4vvv_sincosf_sse4)
+#ifndef __ILP32__
+        subq      $104, %rsp
+        .cfi_def_cfa_offset 112
+        movdqu    %xmm1, 32(%rsp)
+        lea       (%rsp), %rdi
+        movdqu    %xmm2, 48(%rdi)
+        lea       16(%rsp), %rsi
+        movdqu    %xmm3, 48(%rsi)
+        movdqu    %xmm4, 64(%rsi)
+        call      HIDDEN_JUMPTARGET(_ZGVbN4vl4l4_sincosf_sse4)
+        movq      32(%rsp), %rdx
+        movq      40(%rsp), %rsi
+        movq      48(%rsp), %r8
+        movq      56(%rsp), %r10
+        movl      (%rsp), %eax
+        movl      4(%rsp), %ecx
+        movl      8(%rsp), %edi
+        movl      12(%rsp), %r9d
+        movl      %eax, (%rdx)
+        movl      %ecx, (%rsi)
+        movq      64(%rsp), %rax
+        movq      72(%rsp), %rcx
+        movl      %edi, (%r8)
+        movl      %r9d, (%r10)
+        movq      80(%rsp), %rdi
+        movq      88(%rsp), %r9
+        movl      16(%rsp), %r11d
+        movl      20(%rsp), %edx
+        movl      24(%rsp), %esi
+        movl      28(%rsp), %r8d
+        movl      %r11d, (%rax)
+        movl      %edx, (%rcx)
+        movl      %esi, (%rdi)
+        movl      %r8d, (%r9)
+        addq      $104, %rsp
+        .cfi_def_cfa_offset 8
+        ret
+#else
+        subl    $72, %esp
+        .cfi_def_cfa_offset 80
+        leal    48(%rsp), %esi
+        movaps  %xmm1, 16(%esp)
+        leal    32(%rsp), %edi
+        movaps  %xmm2, (%esp)
+        call    HIDDEN_JUMPTARGET(_ZGVbN4vl4l4_sincosf_sse4)
+        movl    16(%esp), %eax
+        movss   32(%esp), %xmm0
+        movss   %xmm0, (%eax)
+        movl    20(%esp), %eax
+        movss   36(%esp), %xmm0
+        movss   %xmm0, (%eax)
+        movl    24(%esp), %eax
+        movss   40(%esp), %xmm0
+        movss   %xmm0, (%eax)
+        movl    28(%esp), %eax
+        movss   44(%esp), %xmm0
+        movss   %xmm0, (%eax)
+        movl    (%esp), %eax
+        movss   48(%esp), %xmm0
+        movss   %xmm0, (%eax)
+        movl    4(%esp), %eax
+        movss   52(%esp), %xmm0
+        movss   %xmm0, (%eax)
+        movl    8(%esp), %eax
+        movss   56(%esp), %xmm0
+        movss   %xmm0, (%eax)
+        movl    12(%esp), %eax
+        movss   60(%esp), %xmm0
+        movss   %xmm0, (%eax)
+        addl    $72, %esp
+        .cfi_def_cfa_offset 8
+        ret
+#endif
 END (_ZGVbN4vvv_sincosf_sse4)
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core-sse.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core-sse.S
new file mode 100644
index 0000000000..348d1e6619
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core-sse.S
@@ -0,0 +1,20 @@
+/* SSE version of vectorized sincosf.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define _ZGVdN8vvv_sincosf _ZGVdN8vvv_sincosf_sse_wrapper
+#include "../svml_s_sincosf8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core.S
deleted file mode 100644
index 0123b8024e..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of vectorized sincosf.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-    Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-	.text
-ENTRY (_ZGVdN8vvv_sincosf)
-        .type   _ZGVdN8vvv_sincosf, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-        leaq    _ZGVdN8vvv_sincosf_avx2(%rip), %rax
-	HAS_ARCH_FEATURE (AVX2_Usable)
-        jz      2f
-        ret
-2:      leaq    _ZGVdN8vvv_sincosf_sse_wrapper(%rip), %rax
-        ret
-END (_ZGVdN8vvv_sincosf)
-libmvec_hidden_def (_ZGVdN8vvv_sincosf)
-
-#define _ZGVdN8vvv_sincosf _ZGVdN8vvv_sincosf_sse_wrapper
-#include "../svml_s_sincosf8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core.c
new file mode 100644
index 0000000000..74f3d3f041
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core.c
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized sincosf, vector length is 8.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVdN8vvv_sincosf
+#include "ifunc-mathvec-avx2.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVdN8vvv_sincosf, __GI__ZGVdN8vvv_sincosf,
+	       __redirect__ZGVdN8vvv_sincosf)
+  __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core_avx2.S
index f2a0ba7116..8b4b92dd94 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core_avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sincosf8_core_avx2.S
@@ -1,5 +1,5 @@
 /* Function sincosf vectorized with AVX2.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -20,7 +20,7 @@
 #include "svml_s_trig_data.h"
 
 	.text
-ENTRY(_ZGVdN8vvv_sincosf_avx2)
+ENTRY (_ZGVdN8vl4l4_sincosf_avx2)
 /*
    ALGORITHM DESCRIPTION:
 
@@ -42,7 +42,7 @@ ENTRY(_ZGVdN8vvv_sincosf_avx2)
         b) Calculate 2 polynomials for sin and cos:
            RS = X * ( A0 + X^2 * (A1 + x^2 * (A2 + x^2 * (A3))));
            RC = B0 + X^2 * (B1 + x^2 * (B2 + x^2 * (B3 + x^2 * (B4))));
-        c) Swap RS & RC if if first bit of obtained value after
+        c) Swap RS & RC if first bit of obtained value after
            Right Shifting is set to 1. Using And, Andnot & Or operations.
      3) Destination sign setting
         a) Set shifted destination sign using XOR operation:
@@ -213,12 +213,12 @@ ENTRY(_ZGVdN8vvv_sincosf_avx2)
         vmovss    260(%rsp,%r15,8), %xmm0
         vzeroupper
 
-        call      sinf@PLT
+        call      JUMPTARGET(sinf)
 
         vmovss    %xmm0, 324(%rsp,%r15,8)
         vmovss    260(%rsp,%r15,8), %xmm0
 
-        call      cosf@PLT
+        call      JUMPTARGET(cosf)
 
         vmovss    %xmm0, 388(%rsp,%r15,8)
         jmp       .LBL_1_8
@@ -228,14 +228,162 @@ ENTRY(_ZGVdN8vvv_sincosf_avx2)
         vmovss    256(%rsp,%r15,8), %xmm0
         vzeroupper
 
-        call      sinf@PLT
+        call      JUMPTARGET(sinf)
 
         vmovss    %xmm0, 320(%rsp,%r15,8)
         vmovss    256(%rsp,%r15,8), %xmm0
 
-        call      cosf@PLT
+        call      JUMPTARGET(cosf)
 
         vmovss    %xmm0, 384(%rsp,%r15,8)
         jmp       .LBL_1_7
 
-END(_ZGVdN8vvv_sincosf_avx2)
+END (_ZGVdN8vl4l4_sincosf_avx2)
+libmvec_hidden_def(_ZGVdN8vl4l4_sincosf_avx2)
+
+/* vvv version implemented with wrapper to vl4l4 variant.  */
+ENTRY (_ZGVdN8vvv_sincosf_avx2)
+#ifndef __ILP32__
+        pushq     %rbp
+        cfi_adjust_cfa_offset (8)
+        cfi_rel_offset (%rbp, 0)
+        movq      %rsp, %rbp
+        cfi_def_cfa_register (%rbp)
+        andq      $-32, %rsp
+        subq      $192, %rsp
+        vmovdqu   %ymm1, 64(%rsp)
+        lea       (%rsp), %rdi
+        vmovdqu   %ymm2, 96(%rdi)
+        vmovdqu   %ymm3, 128(%rdi)
+        vmovdqu   %ymm4, 160(%rdi)
+        lea       32(%rsp), %rsi
+        call      HIDDEN_JUMPTARGET(_ZGVdN8vl4l4_sincosf_avx2)
+        movq      64(%rsp), %rdx
+        movq      72(%rsp), %rsi
+        movq      80(%rsp), %r8
+        movq      88(%rsp), %r10
+        movl      (%rsp), %eax
+        movl      4(%rsp), %ecx
+        movl      8(%rsp), %edi
+        movl      12(%rsp), %r9d
+        movl      %eax, (%rdx)
+        movl      %ecx, (%rsi)
+        movq      96(%rsp), %rax
+        movq      104(%rsp), %rcx
+        movl      %edi, (%r8)
+        movl      %r9d, (%r10)
+        movq      112(%rsp), %rdi
+        movq      120(%rsp), %r9
+        movl      16(%rsp), %r11d
+        movl      20(%rsp), %edx
+        movl      24(%rsp), %esi
+        movl      28(%rsp), %r8d
+        movl      %r11d, (%rax)
+        movl      %edx, (%rcx)
+        movq      128(%rsp), %r11
+        movq      136(%rsp), %rdx
+        movl      %esi, (%rdi)
+        movl      %r8d, (%r9)
+        movq      144(%rsp), %rsi
+        movq      152(%rsp), %r8
+        movl      32(%rsp), %r10d
+        movl      36(%rsp), %eax
+        movl      40(%rsp), %ecx
+        movl      44(%rsp), %edi
+        movl      %r10d, (%r11)
+        movl      %eax, (%rdx)
+        movq      160(%rsp), %r10
+        movq      168(%rsp), %rax
+        movl      %ecx, (%rsi)
+        movl      %edi, (%r8)
+        movq      176(%rsp), %rcx
+        movq      184(%rsp), %rdi
+        movl      48(%rsp), %r9d
+        movl      52(%rsp), %r11d
+        movl      56(%rsp), %edx
+        movl      60(%rsp), %esi
+        movl      %r9d, (%r10)
+        movl      %r11d, (%rax)
+        movl      %edx, (%rcx)
+        movl      %esi, (%rdi)
+        movq      %rbp, %rsp
+        cfi_def_cfa_register (%rsp)
+        popq      %rbp
+        cfi_adjust_cfa_offset (-8)
+        cfi_restore (%rbp)
+        ret
+#else
+        leal    8(%rsp), %r10d
+        .cfi_def_cfa 10, 0
+        andl    $-32, %esp
+        pushq   -8(%r10d)
+        pushq   %rbp
+        .cfi_escape 0x10,0x6,0x2,0x76,0
+        movl    %esp, %ebp
+        pushq   %r10
+        .cfi_escape 0xf,0x3,0x76,0x78,0x6
+        leal    -48(%rbp), %esi
+        leal    -80(%rbp), %edi
+        subl    $136, %esp
+        vmovdqa %ymm1, -112(%ebp)
+        vmovdqa %ymm2, -144(%ebp)
+        call    HIDDEN_JUMPTARGET(_ZGVdN8vl4l4_sincosf_avx2)
+        vmovdqa -112(%ebp), %xmm0
+        vmovq   %xmm0, %rax
+        vmovss  -80(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        vmovss  -76(%ebp), %xmm0
+        shrq    $32, %rax
+        vmovss  %xmm0, (%eax)
+        movq    -104(%ebp), %rax
+        vmovss  -72(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        vmovss  -68(%ebp), %xmm0
+        shrq    $32, %rax
+        vmovss  %xmm0, (%eax)
+        movq    -96(%ebp), %rax
+        vmovss  -64(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        vmovss  -60(%ebp), %xmm0
+        shrq    $32, %rax
+        vmovss  %xmm0, (%eax)
+        movq    -88(%ebp), %rax
+        vmovss  -56(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        vmovss  -52(%ebp), %xmm0
+        shrq    $32, %rax
+        vmovss  %xmm0, (%eax)
+        vmovdqa -144(%ebp), %xmm0
+        vmovq   %xmm0, %rax
+        vmovss  -48(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        vmovss  -44(%ebp), %xmm0
+        shrq    $32, %rax
+        vmovss  %xmm0, (%eax)
+        movq    -136(%ebp), %rax
+        vmovss  -40(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        vmovss  -36(%ebp), %xmm0
+        shrq    $32, %rax
+        vmovss  %xmm0, (%eax)
+        movq    -128(%ebp), %rax
+        vmovss  -32(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        vmovss  -28(%ebp), %xmm0
+        shrq    $32, %rax
+        vmovss  %xmm0, (%eax)
+        movq    -120(%ebp), %rax
+        vmovss  -24(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        vmovss  -20(%ebp), %xmm0
+        shrq    $32, %rax
+        vmovss  %xmm0, (%eax)
+        addl    $136, %esp
+        popq    %r10
+        .cfi_def_cfa 10, 0
+        popq    %rbp
+        leal    -8(%r10), %esp
+        .cfi_def_cfa 7, 8
+        ret
+#endif
+END (_ZGVdN8vvv_sincosf_avx2)
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core-avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core-avx2.S
new file mode 100644
index 0000000000..fa521b9dac
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core-avx2.S
@@ -0,0 +1,20 @@
+/* AVX2 version of vectorized sinf.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define _ZGVeN16v_sinf _ZGVeN16v_sinf_avx2_wrapper
+#include "../svml_s_sinf16_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core.S
deleted file mode 100644
index 2212cdd94d..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core.S
+++ /dev/null
@@ -1,37 +0,0 @@
-/* Multiple versions of vectorized sinf.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-	.text
-ENTRY (_ZGVeN16v_sinf)
-        .type   _ZGVeN16v_sinf, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-        leaq    _ZGVeN16v_sinf_skx(%rip), %rax
-	HAS_ARCH_FEATURE (AVX512DQ_Usable)
-        jnz     2f
-        leaq    _ZGVeN16v_sinf_knl(%rip), %rax
-	HAS_ARCH_FEATURE (AVX512F_Usable)
-        jnz     2f
-        leaq    _ZGVeN16v_sinf_avx2_wrapper(%rip), %rax
-2:      ret
-END (_ZGVeN16v_sinf)
-
-#define _ZGVeN16v_sinf _ZGVeN16v_sinf_avx2_wrapper
-#include "../svml_s_sinf16_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core.c
new file mode 100644
index 0000000000..97e5b58284
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core.c
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized sinf, vector length is 16.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVeN16v_sinf
+#include "ifunc-mathvec-avx512.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVeN16v_sinf, __GI__ZGVeN16v_sinf,
+	       __redirect__ZGVeN16v_sinf)
+  __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core_avx512.S
index 61d8d3793a..141f747eb5 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf16_core_avx512.S
@@ -1,5 +1,5 @@
 /* Function sinf vectorized with AVX-512. KNL and SKX versions.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -22,7 +22,7 @@
 
 	.text
 ENTRY(_ZGVeN16v_sinf_knl)
-#ifndef HAVE_AVX512_ASM_SUPPORT
+#ifndef HAVE_AVX512DQ_ASM_SUPPORT
 WRAPPER_IMPL_AVX512 _ZGVdN8v_sinf
 #else
 /*
@@ -229,21 +229,21 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_sinf
         cfi_restore_state
         movzbl    %r12b, %r15d
         vmovss    1156(%rsp,%r15,8), %xmm0
-        call      sinf@PLT
+        call      JUMPTARGET(sinf)
         vmovss    %xmm0, 1220(%rsp,%r15,8)
         jmp       .LBL_1_8
 
 .LBL_1_12:
         movzbl    %r12b, %r15d
         vmovss    1152(%rsp,%r15,8), %xmm0
-        call      sinf@PLT
+        call      JUMPTARGET(sinf)
         vmovss    %xmm0, 1216(%rsp,%r15,8)
         jmp       .LBL_1_7
 #endif
 END(_ZGVeN16v_sinf_knl)
 
 ENTRY (_ZGVeN16v_sinf_skx)
-#ifndef HAVE_AVX512_ASM_SUPPORT
+#ifndef HAVE_AVX512DQ_ASM_SUPPORT
 WRAPPER_IMPL_AVX512 _ZGVdN8v_sinf
 #else
 /*
@@ -455,7 +455,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_sinf
         vzeroupper
         vmovss    1156(%rsp,%r15,8), %xmm0
 
-        call      sinf@PLT
+        call      JUMPTARGET(sinf)
 
         vmovss    %xmm0, 1220(%rsp,%r15,8)
         jmp       .LBL_2_8
@@ -466,7 +466,7 @@ WRAPPER_IMPL_AVX512 _ZGVdN8v_sinf
         vzeroupper
         vmovss    1152(%rsp,%r15,8), %xmm0
 
-        call      sinf@PLT
+        call      JUMPTARGET(sinf)
 
         vmovss    %xmm0, 1216(%rsp,%r15,8)
         jmp       .LBL_2_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core-sse2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core-sse2.S
new file mode 100644
index 0000000000..1d2e65c39d
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core-sse2.S
@@ -0,0 +1,20 @@
+/* SSE2 version of vectorized sinf.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define _ZGVbN4v_sinf _ZGVbN4v_sinf_sse2
+#include "../svml_s_sinf4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core.S
deleted file mode 100644
index b31554730d..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of vectorized sinf.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-	.text
-ENTRY (_ZGVbN4v_sinf)
-        .type   _ZGVbN4v_sinf, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-        leaq    _ZGVbN4v_sinf_sse4(%rip), %rax
-	HAS_CPU_FEATURE (SSE4_1)
-        jz      2f
-        ret
-2:      leaq    _ZGVbN4v_sinf_sse2(%rip), %rax
-        ret
-END (_ZGVbN4v_sinf)
-libmvec_hidden_def (_ZGVbN4v_sinf)
-
-#define _ZGVbN4v_sinf _ZGVbN4v_sinf_sse2
-#include "../svml_s_sinf4_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core.c
new file mode 100644
index 0000000000..93b8bfebbf
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core.c
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized sinf, vector length is 4.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVbN4v_sinf
+#include "ifunc-mathvec-sse4_1.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVbN4v_sinf, __GI__ZGVbN4v_sinf,
+	       __redirect__ZGVbN4v_sinf)
+  __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core_sse4.S
index 5268ab1f09..39a4c92235 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core_sse4.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf4_core_sse4.S
@@ -1,5 +1,5 @@
 /* Function sinf vectorized with SSE4.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -207,7 +207,7 @@ ENTRY(_ZGVbN4v_sinf_sse4)
         movzbl    %r12b, %r15d
         movss     196(%rsp,%r15,8), %xmm0
 
-        call      sinf@PLT
+        call      JUMPTARGET(sinf)
 
         movss     %xmm0, 260(%rsp,%r15,8)
         jmp       .LBL_1_8
@@ -216,7 +216,7 @@ ENTRY(_ZGVbN4v_sinf_sse4)
         movzbl    %r12b, %r15d
         movss     192(%rsp,%r15,8), %xmm0
 
-        call      sinf@PLT
+        call      JUMPTARGET(sinf)
 
         movss     %xmm0, 256(%rsp,%r15,8)
         jmp       .LBL_1_7
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core-sse.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core-sse.S
new file mode 100644
index 0000000000..f2af3a0b4b
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core-sse.S
@@ -0,0 +1,20 @@
+/* SSE version of vectorized sinf, vector length is 8.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define _ZGVdN8v_sinf _ZGVdN8v_sinf_sse_wrapper
+#include "../svml_s_sinf8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core.S
deleted file mode 100644
index 47fe0a4adc..0000000000
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Multiple versions of vectorized sinf, vector length is 8.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-    Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-	.text
-ENTRY (_ZGVdN8v_sinf)
-        .type   _ZGVdN8v_sinf, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-1:      leaq    _ZGVdN8v_sinf_avx2(%rip), %rax
-	HAS_ARCH_FEATURE (AVX2_Usable)
-        jz      2f
-        ret
-2:      leaq    _ZGVdN8v_sinf_sse_wrapper(%rip), %rax
-        ret
-END (_ZGVdN8v_sinf)
-libmvec_hidden_def (_ZGVdN8v_sinf)
-
-#define _ZGVdN8v_sinf _ZGVdN8v_sinf_sse_wrapper
-#include "../svml_s_sinf8_core.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core.c b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core.c
new file mode 100644
index 0000000000..cf13b6647c
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core.c
@@ -0,0 +1,28 @@
+/* Multiple versions of vectorized sinf, vector length is 8.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define SYMBOL_NAME _ZGVdN8v_sinf
+#include "ifunc-mathvec-avx2.h"
+
+libc_ifunc_redirected (REDIRECT_NAME, SYMBOL_NAME, IFUNC_SELECTOR ());
+
+#ifdef SHARED
+__hidden_ver1 (_ZGVdN8v_sinf, __GI__ZGVdN8v_sinf,
+	       __redirect__ZGVdN8v_sinf)
+  __attribute__ ((visibility ("hidden")));
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core_avx2.S b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core_avx2.S
index 9fdaadb2e8..5f7a95e9ad 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core_avx2.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_sinf8_core_avx2.S
@@ -1,5 +1,5 @@
 /* Function sinf vectorized with AVX2.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -201,7 +201,7 @@ ENTRY(_ZGVdN8v_sinf_avx2)
         vmovss    324(%rsp,%r15,8), %xmm0
         vzeroupper
 
-        call      sinf@PLT
+        call      JUMPTARGET(sinf)
 
         vmovss    %xmm0, 388(%rsp,%r15,8)
         jmp       .LBL_1_8
@@ -211,7 +211,7 @@ ENTRY(_ZGVdN8v_sinf_avx2)
         vmovss    320(%rsp,%r15,8), %xmm0
         vzeroupper
 
-        call      sinf@PLT
+        call      JUMPTARGET(sinf)
 
         vmovss    %xmm0, 384(%rsp,%r15,8)
         jmp       .LBL_1_7
diff --git a/sysdeps/x86_64/fpu/printf_fphex.c b/sysdeps/x86_64/fpu/printf_fphex.c
index 0fbaa3748e..62efed10da 100644
--- a/sysdeps/x86_64/fpu/printf_fphex.c
+++ b/sysdeps/x86_64/fpu/printf_fphex.c
@@ -1,5 +1,5 @@
 /* Print floating point number in hexadecimal notation according to ISO C99.
-   Copyright (C) 1997-2016 Free Software Foundation, Inc.
+   Copyright (C) 1997-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/s_ceill.S b/sysdeps/x86_64/fpu/s_ceill.S
index 910c371d58..8f2bd351f6 100644
--- a/sysdeps/x86_64/fpu/s_ceill.S
+++ b/sysdeps/x86_64/fpu/s_ceill.S
@@ -5,27 +5,33 @@
  * Public domain.
  */
 
+#include <libm-alias-ldouble.h>
 #include <machine/asm.h>
 
 
 ENTRY(__ceill)
 	fldt	8(%rsp)
 
-	fstcw	-4(%rsp)		/* store fpu control word */
+	fnstenv	-28(%rsp)		/* store fpu environment */
 
 	/* We use here %edx although only the low 1 bits are defined.
 	   But none of the operations should care and they are faster
 	   than the 16 bit operations.  */
 	movl	$0x0800,%edx		/* round towards +oo */
-	orl	-4(%rsp),%edx
+	orl	-28(%rsp),%edx
 	andl	$0xfbff,%edx
-	movl	%edx,-8(%rsp)
-	fldcw	-8(%rsp)		/* load modified control word */
+	movl	%edx,-32(%rsp)
+	fldcw	-32(%rsp)		/* load modified control word */
 
 	frndint				/* round */
 
-	fldcw	-4(%rsp)		/* restore original control word */
+	/* Preserve "invalid" exceptions from sNaN input.  */
+	fnstsw
+	andl	$0x1, %eax
+	orl	%eax, -24(%rsp)
+
+	fldenv	-28(%rsp)		/* restore original environment */
 
 	ret
 END (__ceill)
-weak_alias (__ceill, ceill)
+libm_alias_ldouble (__ceil, ceil)
diff --git a/sysdeps/x86_64/fpu/s_copysign.S b/sysdeps/x86_64/fpu/s_copysign.S
index 18f568f46f..e2921ce770 100644
--- a/sysdeps/x86_64/fpu/s_copysign.S
+++ b/sysdeps/x86_64/fpu/s_copysign.S
@@ -1,5 +1,5 @@
 /* copy sign, double version.
-   Copyright (C) 2002-2016 Free Software Foundation, Inc.
+   Copyright (C) 2002-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Andreas Jaeger <aj@suse.de>, 2002.
 
@@ -18,6 +18,7 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <machine/asm.h>
+#include <libm-alias-double.h>
 
 	.section .rodata.cst16,"aM",@progbits,16
 
@@ -47,4 +48,4 @@ ENTRY(__copysign)
 	ret
 END (__copysign)
 
-weak_alias (__copysign, copysign)
+libm_alias_double (__copysign, copysign)
diff --git a/sysdeps/x86_64/fpu/s_copysignf.S b/sysdeps/x86_64/fpu/s_copysignf.S
index 00a1fabaee..4093e781fe 100644
--- a/sysdeps/x86_64/fpu/s_copysignf.S
+++ b/sysdeps/x86_64/fpu/s_copysignf.S
@@ -1,5 +1,5 @@
 /* copy sign, double version.
-   Copyright (C) 2002-2016 Free Software Foundation, Inc.
+   Copyright (C) 2002-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Andreas Jaeger <aj@suse.de>, 2002.
 
@@ -18,6 +18,7 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <machine/asm.h>
+#include <libm-alias-float.h>
 
 	.section .rodata
 
@@ -42,4 +43,4 @@ ENTRY(__copysignf)
 	retq
 END (__copysignf)
 
-weak_alias (__copysignf, copysignf)
+libm_alias_float (__copysign, copysign)
diff --git a/sysdeps/x86_64/fpu/s_copysignl.S b/sysdeps/x86_64/fpu/s_copysignl.S
index 2ffd612d65..8616205d38 100644
--- a/sysdeps/x86_64/fpu/s_copysignl.S
+++ b/sysdeps/x86_64/fpu/s_copysignl.S
@@ -5,6 +5,7 @@
  * Public domain.
  */
 
+#include <libm-alias-ldouble.h>
 #include <machine/asm.h>
 
 RCSID("$NetBSD: $")
@@ -19,4 +20,4 @@ ENTRY(__copysignl)
 	fldt	8(%rsp)
 	ret
 END (__copysignl)
-weak_alias (__copysignl, copysignl)
+libm_alias_ldouble (__copysign, copysign)
diff --git a/sysdeps/x86_64/fpu/s_cosf.S b/sysdeps/x86_64/fpu/s_cosf.S
deleted file mode 100644
index 31968e498f..0000000000
--- a/sysdeps/x86_64/fpu/s_cosf.S
+++ /dev/null
@@ -1,533 +0,0 @@
-/* Optimized cosf function.
-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#define __need_Emath
-#include <bits/errno.h>
-
-/* Short algorithm description:
- *
- *  1) if |x| == 0: return 1.0-|x|.
- *  2) if |x| <  2^-27: return 1.0-|x|.
- *  3) if |x| <  2^-5 : return 1.0+x^2*DP_COS2_0+x^5*DP_COS2_1.
- *  4) if |x| <   Pi/4: return 1.0+x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))).
- *  5) if |x| < 9*Pi/4:
- *      5.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0x0e, n=k+3,
- *           t=|x|-j*Pi/4.
- *      5.2) Reconstruction:
- *          s = (-1.0)^((n>>2)&1)
- *          if(n&2 != 0) {
- *              using cos(t) polynomial for |t|<Pi/4, result is
- *              s     * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4))))).
- *          } else {
- *              using sin(t) polynomial for |t|<Pi/4, result is
- *              s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4))))).
- *          }
- *  6) if |x| < 2^23, large args:
- *      6.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+3,
- *           t=|x|-j*Pi/4.
- *      6.2) Reconstruction same as (5.2).
- *  7) if |x| >= 2^23, very large args:
- *      7.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+3,
- *           t=|x|-j*Pi/4.
- *      7.2) Reconstruction same as (5.2).
- *  8) if x is Inf, return x-x, and set errno=EDOM.
- *  9) if x is NaN, return x-x.
- *
- * Special cases:
- *  cos(+-0) = 1 not raising inexact,
- *  cos(subnormal) raises inexact,
- *  cos(min_normalized) raises inexact,
- *  cos(normalized) raises inexact,
- *  cos(Inf) = NaN, raises invalid, sets errno to EDOM,
- *  cos(NaN) = NaN.
- */
-
-	.text
-ENTRY(__cosf)
-	/* Input: single precision x in %xmm0 */
-
-	movd	%xmm0, %eax		/* Bits of x */
-	movaps	%xmm0, %xmm7		/* Copy of x */
-	cvtss2sd %xmm0, %xmm0		/* DP x */
-	movss	L(SP_ABS_MASK)(%rip), %xmm3
-	andl	$0x7fffffff, %eax	/* |x| */
-
-	cmpl	$0x3f490fdb, %eax	/* |x|<Pi/4?  */
-	jb	L(arg_less_pio4)
-
-	/* Here if |x|>=Pi/4 */
-	andps	%xmm7, %xmm3		/* SP |x| */
-	andpd	L(DP_ABS_MASK)(%rip), %xmm0	/* DP |x| */
-	movss	L(SP_INVPIO4)(%rip), %xmm2	/* SP 1/(Pi/4) */
-
-	cmpl	$0x40e231d6, %eax	/* |x|<9*Pi/4?  */
-	jae	L(large_args)
-
-	/* Here if Pi/4<=|x|<9*Pi/4 */
-	mulss	%xmm3, %xmm2		/* SP |x|/(Pi/4) */
-	cvttss2si %xmm2, %eax		/* k, number of Pi/4 in x */
-	lea	L(PIO4J)(%rip), %rsi
-	addl	$1, %eax		/* k+1 */
-	movl	$0x0e, %edx
-	andl	%eax, %edx		/* j = (k+1)&0x0e */
-	addl	$2, %eax		/* n */
-	subsd	(%rsi,%rdx,8), %xmm0	/* t = |x| - j * Pi/4 */
-
-L(reconstruction):
-	/* Input: %eax=n, %xmm0=t */
-	testl	$2, %eax		/* n&2 != 0?  */
-	jz	L(sin_poly)
-
-/*L(cos_poly):*/
-	/* Here if cos(x) calculated using cos(t) polynomial for |t|<Pi/4:
-	 * y = t*t; z = y*y;
-	 * s = sign(x) * (-1.0)^((n>>2)&1)
-	 * result = s     * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))))
-	 */
-	shrl	$2, %eax		/* n>>2 */
-	mulsd	%xmm0, %xmm0		/* y=t^2 */
-	andl	$1, %eax		/* (n>>2)&1 */
-	movaps	%xmm0, %xmm1		/* y */
-	mulsd	%xmm0, %xmm0		/* z=t^4 */
-
-	movsd	L(DP_C4)(%rip), %xmm4	/* C4 */
-	mulsd	%xmm0, %xmm4		/* z*C4 */
-	movsd	L(DP_C3)(%rip), %xmm3	/* C3 */
-	mulsd	%xmm0, %xmm3		/* z*C3 */
-	lea	L(DP_ONES)(%rip), %rsi
-	addsd	L(DP_C2)(%rip), %xmm4	/* C2+z*C4 */
-	mulsd	%xmm0, %xmm4		/* z*(C2+z*C4) */
-	addsd	L(DP_C1)(%rip), %xmm3	/* C1+z*C3 */
-	mulsd	%xmm0, %xmm3		/* z*(C1+z*C3) */
-	addsd	L(DP_C0)(%rip), %xmm4	/* C0+z*(C2+z*C4) */
-	mulsd	%xmm1, %xmm4		/* y*(C0+z*(C2+z*C4)) */
-
-	addsd	%xmm4, %xmm3		/* y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
-	/* 1.0+y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
-	addsd	L(DP_ONES)(%rip), %xmm3
-
-	mulsd	(%rsi,%rax,8), %xmm3	/* DP result */
-	cvtsd2ss %xmm3, %xmm0		/* SP result */
-	ret
-
-	.p2align	4
-L(sin_poly):
-	/* Here if cos(x) calculated using sin(t) polynomial for |t|<Pi/4:
-	 * y = t*t; z = y*y;
-	 * s = sign(x) * (-1.0)^((n>>2)&1)
-	 * result = s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))))
-	 */
-
-	movaps	%xmm0, %xmm4		/* t */
-	shrl	$2, %eax		/* n>>2 */
-	mulsd	%xmm0, %xmm0		/* y=t^2 */
-	andl	$1, %eax		/* (n>>2)&1 */
-	movaps	%xmm0, %xmm1		/* y */
-	mulsd	%xmm0, %xmm0		/* z=t^4 */
-
-	movsd	L(DP_S4)(%rip), %xmm2	/* S4 */
-	mulsd	%xmm0, %xmm2		/* z*S4 */
-	movsd	L(DP_S3)(%rip), %xmm3	/* S3 */
-	mulsd	%xmm0, %xmm3		/* z*S3 */
-	lea	L(DP_ONES)(%rip), %rsi
-	addsd	L(DP_S2)(%rip), %xmm2	/* S2+z*S4 */
-	mulsd	%xmm0, %xmm2		/* z*(S2+z*S4) */
-	addsd	L(DP_S1)(%rip), %xmm3	/* S1+z*S3 */
-	mulsd	%xmm0, %xmm3		/* z*(S1+z*S3) */
-	addsd	L(DP_S0)(%rip), %xmm2	/* S0+z*(S2+z*S4) */
-	mulsd	%xmm1, %xmm2		/* y*(S0+z*(S2+z*S4)) */
-	/* t*s, where s = sign(x) * (-1.0)^((n>>2)&1) */
-	mulsd	(%rsi,%rax,8), %xmm4
-	/* y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
-	addsd	%xmm2, %xmm3
-	/* t*s*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
-	mulsd	%xmm4, %xmm3
-	/* t*s*(1.0+y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
-	addsd	%xmm4, %xmm3
-	cvtsd2ss %xmm3, %xmm0		/* SP result */
-	ret
-
-	.p2align	4
-L(large_args):
-	/* Here if |x|>=9*Pi/4 */
-	cmpl	$0x7f800000, %eax	/* x is Inf or NaN?  */
-	jae	L(arg_inf_or_nan)
-
-	/* Here if finite |x|>=9*Pi/4 */
-	cmpl	$0x4b000000, %eax	/* |x|<2^23?  */
-	jae	L(very_large_args)
-
-	/* Here if 9*Pi/4<=|x|<2^23 */
-	movsd	L(DP_INVPIO4)(%rip), %xmm1 /* 1/(Pi/4) */
-	mulsd	%xmm0, %xmm1		/* |x|/(Pi/4) */
-	cvttsd2si %xmm1, %eax		/* k=trunc(|x|/(Pi/4)) */
-	addl	$1, %eax		/* k+1 */
-	movl	%eax, %edx
-	andl	$0xfffffffe, %edx	/* j=(k+1)&0xfffffffe */
-	cvtsi2sdl %edx, %xmm4		/* DP j */
-	movsd	L(DP_PIO4HI)(%rip), %xmm2 /* -PIO4HI = high part of -Pi/4 */
-	mulsd	%xmm4, %xmm2		/* -j*PIO4HI */
-	movsd	L(DP_PIO4LO)(%rip), %xmm3 /* -PIO4LO = low part of -Pi/4 */
-	addsd	%xmm2, %xmm0		/* |x| - j*PIO4HI */
-	addl	$2, %eax		/* n */
-	mulsd	%xmm3, %xmm4		/* j*PIO4LO */
-	addsd	%xmm4, %xmm0		/* t = |x| - j*PIO4HI - j*PIO4LO */
-	jmp	L(reconstruction)
-
-	.p2align	4
-L(very_large_args):
-	/* Here if finite |x|>=2^23 */
-
-	/* bitpos = (ix>>23) - BIAS_32 + 59; */
-	shrl	$23, %eax		/* eb = biased exponent of x */
-	/* bitpos = eb - 0x7f + 59, where 0x7f is exponent bias */
-	subl	$68, %eax
-	movl	$28, %ecx		/* %cl=28 */
-	movl	%eax, %edx		/* bitpos copy */
-
-	/* j = bitpos/28; */
-	div	%cl			/* j in register %al=%ax/%cl */
-	movapd	%xmm0, %xmm3		/* |x| */
-	/* clear unneeded remainder from %ah */
-	andl	$0xff, %eax
-
-	imull	$28, %eax, %ecx		/* j*28 */
-	lea	L(_FPI)(%rip), %rsi
-	movsd	L(DP_HI_MASK)(%rip), %xmm4 /* DP_HI_MASK */
-	movapd	%xmm0, %xmm5		/* |x| */
-	mulsd	-16(%rsi,%rax,8), %xmm3	/* tmp3 = FPI[j-2]*|x| */
-	movapd	%xmm0, %xmm1		/* |x| */
-	mulsd	-8(%rsi,%rax,8), %xmm5	/* tmp2 = FPI[j-1]*|x| */
-	mulsd	(%rsi,%rax,8), %xmm0	/* tmp0 = FPI[j]*|x| */
-	addl	$19, %ecx		/* j*28+19 */
-	mulsd	8(%rsi,%rax,8), %xmm1	/* tmp1 = FPI[j+1]*|x| */
-	cmpl	%ecx, %edx		/* bitpos>=j*28+19?  */
-	jl	L(very_large_skip1)
-
-	/* Here if bitpos>=j*28+19 */
-	andpd	%xmm3, %xmm4		/* HI(tmp3) */
-	subsd	%xmm4, %xmm3		/* tmp3 = tmp3 - HI(tmp3) */
-L(very_large_skip1):
-
-	movsd	L(DP_2POW52)(%rip), %xmm6
-	movapd	%xmm5, %xmm2		/* tmp2 copy */
-	addsd	%xmm3, %xmm5		/* tmp5 = tmp3 + tmp2 */
-	movl	$1, %edx
-	addsd	%xmm5, %xmm6		/* tmp6 = tmp5 + 2^52 */
-	movsd	8+L(DP_2POW52)(%rip), %xmm4
-	movd	%xmm6, %eax		/* k = I64_LO(tmp6); */
-	addsd	%xmm6, %xmm4		/* tmp4 = tmp6 - 2^52 */
-	comisd	%xmm5, %xmm4		/* tmp4 > tmp5?  */
-	jbe	L(very_large_skip2)
-
-	/* Here if tmp4 > tmp5 */
-	subl	$1, %eax		/* k-- */
-	addsd	8+L(DP_ONES)(%rip), %xmm4 /* tmp4 -= 1.0 */
-L(very_large_skip2):
-
-	andl	%eax, %edx		/* k&1 */
-	lea	L(DP_ZERONE)(%rip), %rsi
-	subsd	%xmm4, %xmm3		/* tmp3 -= tmp4 */
-	addsd	(%rsi,%rdx,8), %xmm3	/* t  = DP_ZERONE[k&1] + tmp3 */
-	addsd	%xmm2, %xmm3		/* t += tmp2 */
-	addsd	%xmm3, %xmm0		/* t += tmp0 */
-	addl	$3, %eax		/* n=k+3 */
-	addsd	%xmm1, %xmm0		/* t += tmp1 */
-	mulsd	L(DP_PIO4)(%rip), %xmm0	/* t *= PI04 */
-
-	jmp	L(reconstruction)	/* end of very_large_args peth */
-
-	.p2align	4
-L(arg_less_pio4):
-	/* Here if |x|<Pi/4 */
-	cmpl	$0x3d000000, %eax	/* |x|<2^-5?  */
-	jl	L(arg_less_2pn5)
-
-	/* Here if 2^-5<=|x|<Pi/4 */
-	mulsd	%xmm0, %xmm0		/* y=x^2 */
-	movaps	%xmm0, %xmm1		/* y */
-	mulsd	%xmm0, %xmm0		/* z=x^4 */
-	movsd	L(DP_C4)(%rip), %xmm3	/* C4 */
-	mulsd	%xmm0, %xmm3		/* z*C4 */
-	movsd	L(DP_C3)(%rip), %xmm5	/* C3 */
-	mulsd	%xmm0, %xmm5		/* z*C3 */
-	addsd	L(DP_C2)(%rip), %xmm3	/* C2+z*C4 */
-	mulsd	%xmm0, %xmm3		/* z*(C2+z*C4) */
-	addsd	L(DP_C1)(%rip), %xmm5	/* C1+z*C3 */
-	mulsd	%xmm0, %xmm5		/* z*(C1+z*C3) */
-	addsd	L(DP_C0)(%rip), %xmm3	/* C0+z*(C2+z*C4) */
-	mulsd	%xmm1, %xmm3		/* y*(C0+z*(C2+z*C4)) */
-	/* y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
-	addsd	%xmm5, %xmm3
-	/* 1.0 + y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
-	addsd	L(DP_ONES)(%rip), %xmm3
-	cvtsd2ss %xmm3, %xmm0		/* SP result */
-	ret
-
-	.p2align	4
-L(arg_less_2pn5):
-	/* Here if |x|<2^-5 */
-	cmpl	$0x32000000, %eax	/* |x|<2^-27?  */
-	jl	L(arg_less_2pn27)
-
-	/* Here if 2^-27<=|x|<2^-5 */
-	mulsd	%xmm0, %xmm0		/* DP x^2 */
-	movsd	L(DP_COS2_1)(%rip), %xmm3 /* DP DP_COS2_1 */
-	mulsd	%xmm0, %xmm3		/* DP x^2*DP_COS2_1 */
-	addsd	L(DP_COS2_0)(%rip), %xmm3 /* DP DP_COS2_0+x^2*DP_COS2_1 */
-	mulsd	%xmm0, %xmm3		/* DP x^2*DP_COS2_0+x^4*DP_COS2_1 */
-	/* DP 1.0+x^2*DP_COS2_0+x^4*DP_COS2_1 */
-	addsd	L(DP_ONES)(%rip), %xmm3
-	cvtsd2ss %xmm3, %xmm0		/* SP result */
-	ret
-
-	.p2align	4
-L(arg_less_2pn27):
-	/* Here if |x|<2^-27 */
-	andps	L(SP_ABS_MASK)(%rip),%xmm7 /* |x| */
-	movss	L(SP_ONE)(%rip), %xmm0	/* 1.0 */
-	subss	%xmm7, %xmm0		/* result is 1.0-|x| */
-	ret
-
-	.p2align	4
-L(arg_inf_or_nan):
-	/* Here if |x| is Inf or NAN */
-	jne	L(skip_errno_setting)	/* in case of x is NaN */
-
-	/* Align stack to 16 bytes.  */
-	subq	$8, %rsp
-	cfi_adjust_cfa_offset (8)
-	/* Here if x is Inf. Set errno to EDOM.  */
-	call	JUMPTARGET(__errno_location)
-	addq	$8, %rsp
-	cfi_adjust_cfa_offset (-8)
-
-	movl	$EDOM, (%rax)
-
-	.p2align	4
-L(skip_errno_setting):
-	/* Here if |x| is Inf or NAN. Continued.  */
-	movaps	%xmm7, %xmm0		/* load x */
-	subss	%xmm0, %xmm0		/* Result is NaN */
-	ret
-END(__cosf)
-
-	.section .rodata, "a"
-	.p2align 3
-L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
-	.long	0x00000000,0x00000000
-	.long	0x54442d18,0x3fe921fb
-	.long	0x54442d18,0x3ff921fb
-	.long	0x7f3321d2,0x4002d97c
-	.long	0x54442d18,0x400921fb
-	.long	0x2955385e,0x400f6a7a
-	.long	0x7f3321d2,0x4012d97c
-	.long	0xe9bba775,0x4015fdbb
-	.long	0x54442d18,0x401921fb
-	.long	0xbeccb2bb,0x401c463a
-	.long	0x2955385e,0x401f6a7a
-	.type L(PIO4J), @object
-	ASM_SIZE_DIRECTIVE(L(PIO4J))
-
-	.p2align 3
-L(_FPI): /* 4/Pi broken into sum of positive DP values */
-	.long	0x00000000,0x00000000
-	.long	0x6c000000,0x3ff45f30
-	.long	0x2a000000,0x3e3c9c88
-	.long	0xa8000000,0x3c54fe13
-	.long	0xd0000000,0x3aaf47d4
-	.long	0x6c000000,0x38fbb81b
-	.long	0xe0000000,0x3714acc9
-	.long	0x7c000000,0x3560e410
-	.long	0x56000000,0x33bca2c7
-	.long	0xac000000,0x31fbd778
-	.long	0xe0000000,0x300b7246
-	.long	0xe8000000,0x2e5d2126
-	.long	0x48000000,0x2c970032
-	.long	0xe8000000,0x2ad77504
-	.long	0xe0000000,0x290921cf
-	.long	0xb0000000,0x274deb1c
-	.long	0xe0000000,0x25829a73
-	.long	0xbe000000,0x23fd1046
-	.long	0x10000000,0x2224baed
-	.long	0x8e000000,0x20709d33
-	.long	0x80000000,0x1e535a2f
-	.long	0x64000000,0x1cef904e
-	.long	0x30000000,0x1b0d6398
-	.long	0x24000000,0x1964ce7d
-	.long	0x16000000,0x17b908bf
-	.type L(_FPI), @object
-	ASM_SIZE_DIRECTIVE(L(_FPI))
-
-/* Coefficients of polynomial
-   for cos(x)~=1.0+x^2*DP_COS2_0+x^4*DP_COS2_1, |x|<2^-5.  */
-	.p2align 3
-L(DP_COS2_0):
-	.long	0xff5cc6fd,0xbfdfffff
-	.type L(DP_COS2_0), @object
-	ASM_SIZE_DIRECTIVE(L(DP_COS2_0))
-
-	.p2align 3
-L(DP_COS2_1):
-	.long	0xb178dac5,0x3fa55514
-	.type L(DP_COS2_1), @object
-	ASM_SIZE_DIRECTIVE(L(DP_COS2_1))
-
-	.p2align 3
-L(DP_ZERONE):
-	.long	0x00000000,0x00000000	/* 0.0 */
-	.long	0x00000000,0xbff00000	/* 1.0 */
-	.type L(DP_ZERONE), @object
-	ASM_SIZE_DIRECTIVE(L(DP_ZERONE))
-
-	.p2align 3
-L(DP_ONES):
-	.long	0x00000000,0x3ff00000	/* +1.0 */
-	.long	0x00000000,0xbff00000	/* -1.0 */
-	.type L(DP_ONES), @object
-	ASM_SIZE_DIRECTIVE(L(DP_ONES))
-
-/* Coefficients of polynomial
-   for sin(t)~=t+t^3*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))), |t|<Pi/4.  */
-	.p2align 3
-L(DP_S3):
-	.long	0x64e6b5b4,0x3ec71d72
-	.type L(DP_S3), @object
-	ASM_SIZE_DIRECTIVE(L(DP_S3))
-
-	.p2align 3
-L(DP_S1):
-	.long	0x10c2688b,0x3f811111
-	.type L(DP_S1), @object
-	ASM_SIZE_DIRECTIVE(L(DP_S1))
-
-	.p2align 3
-L(DP_S4):
-	.long	0x1674b58a,0xbe5a947e
-	.type L(DP_S4), @object
-	ASM_SIZE_DIRECTIVE(L(DP_S4))
-
-	.p2align 3
-L(DP_S2):
-	.long	0x8b4bd1f9,0xbf2a019f
-	.type L(DP_S2),@object
-	ASM_SIZE_DIRECTIVE(L(DP_S2))
-
-	.p2align 3
-L(DP_S0):
-	.long	0x55551cd9,0xbfc55555
-	.type L(DP_S0), @object
-	ASM_SIZE_DIRECTIVE(L(DP_S0))
-
-/* Coefficients of polynomial
-   for cos(t)~=1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))), |t|<Pi/4.  */
-	.p2align 3
-L(DP_C3):
-	.long	0x9ac43cc0,0x3efa00eb
-	.type L(DP_C3), @object
-	ASM_SIZE_DIRECTIVE(L(DP_C3))
-
-	.p2align 3
-L(DP_C1):
-	.long	0x545c50c7,0x3fa55555
-	.type L(DP_C1), @object
-	ASM_SIZE_DIRECTIVE(L(DP_C1))
-
-	.p2align 3
-L(DP_C4):
-	.long	0xdd8844d7,0xbe923c97
-	.type L(DP_C4), @object
-	ASM_SIZE_DIRECTIVE(L(DP_C4))
-
-	.p2align 3
-L(DP_C2):
-	.long	0x348b6874,0xbf56c16b
-	.type L(DP_C2), @object
-	ASM_SIZE_DIRECTIVE(L(DP_C2))
-
-	.p2align 3
-L(DP_C0):
-	.long	0xfffe98ae,0xbfdfffff
-	.type L(DP_C0), @object
-	ASM_SIZE_DIRECTIVE(L(DP_C0))
-
-	.p2align 3
-L(DP_PIO4):
-	.long	0x54442d18,0x3fe921fb	/* Pi/4 */
-	.type L(DP_PIO4), @object
-	ASM_SIZE_DIRECTIVE(L(DP_PIO4))
-
-	.p2align 3
-L(DP_2POW52):
-	.long	0x00000000,0x43300000	/* +2^52 */
-	.long	0x00000000,0xc3300000	/* -2^52 */
-	.type L(DP_2POW52), @object
-	ASM_SIZE_DIRECTIVE(L(DP_2POW52))
-
-	.p2align 3
-L(DP_INVPIO4):
-	.long	0x6dc9c883,0x3ff45f30	/* 4/Pi */
-	.type L(DP_INVPIO4), @object
-	ASM_SIZE_DIRECTIVE(L(DP_INVPIO4))
-
-	.p2align 3
-L(DP_PIO4HI):
-	.long	0x54000000,0xbfe921fb	/* High part of Pi/4 */
-	.type L(DP_PIO4HI), @object
-	ASM_SIZE_DIRECTIVE(L(DP_PIO4HI))
-
-	.p2align 3
-L(DP_PIO4LO):
-	.long	0x11A62633,0xbe010b46	/* Low part of Pi/4 */
-	.type L(DP_PIO4LO), @object
-	ASM_SIZE_DIRECTIVE(L(DP_PIO4LO))
-
-	.p2align 2
-L(SP_INVPIO4):
-	.long	0x3fa2f983		/* 4/Pi */
-	.type L(SP_INVPIO4), @object
-	ASM_SIZE_DIRECTIVE(L(SP_INVPIO4))
-
-	.p2align 4
-L(DP_ABS_MASK): /* Mask for getting DP absolute value */
-	.long	0xffffffff,0x7fffffff
-	.long	0xffffffff,0x7fffffff
-	.type L(DP_ABS_MASK), @object
-	ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
-
-	.p2align 3
-L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
-	.long	0x00000000,0xffffffff
-	.type L(DP_HI_MASK), @object
-	ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
-
-	.p2align 4
-L(SP_ABS_MASK): /* Mask for getting SP absolute value */
-	.long	0x7fffffff,0x7fffffff
-	.long	0x7fffffff,0x7fffffff
-	.type L(SP_ABS_MASK), @object
-	ASM_SIZE_DIRECTIVE(L(SP_ABS_MASK))
-
-	.p2align 2
-L(SP_ONE):
-	.long	0x3f800000		/* 1.0 */
-	.type L(SP_ONE), @object
-	ASM_SIZE_DIRECTIVE(L(SP_ONE))
-
-weak_alias(__cosf, cosf)
diff --git a/sysdeps/x86_64/fpu/s_fabs.c b/sysdeps/x86_64/fpu/s_fabs.c
index d3a313fdf5..d1e17878d4 100644
--- a/sysdeps/x86_64/fpu/s_fabs.c
+++ b/sysdeps/x86_64/fpu/s_fabs.c
@@ -1,5 +1,5 @@
 /* Absolute value of floating point number.
-   Copyright (C) 2002-2016 Free Software Foundation, Inc.
+   Copyright (C) 2002-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -17,10 +17,11 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <math.h>
+#include <libm-alias-double.h>
 
 double
 __fabs (double x)
 {
   return __builtin_fabs (x);
 }
-weak_alias (__fabs, fabs)
+libm_alias_double (__fabs, fabs)
diff --git a/sysdeps/x86_64/fpu/s_fabsf.c b/sysdeps/x86_64/fpu/s_fabsf.c
index e6dcda9433..2f39228560 100644
--- a/sysdeps/x86_64/fpu/s_fabsf.c
+++ b/sysdeps/x86_64/fpu/s_fabsf.c
@@ -1,5 +1,5 @@
 /* Absolute value of floating point number.
-   Copyright (C) 2002-2016 Free Software Foundation, Inc.
+   Copyright (C) 2002-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -17,10 +17,11 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <math.h>
+#include <libm-alias-float.h>
 
 float
 __fabsf (float x)
 {
   return __builtin_fabsf (x);
 }
-weak_alias (__fabsf, fabsf)
+libm_alias_float (__fabs, fabs)
diff --git a/sysdeps/x86_64/fpu/s_fabsl.S b/sysdeps/x86_64/fpu/s_fabsl.S
index 6881ff11c7..7f03ecdccb 100644
--- a/sysdeps/x86_64/fpu/s_fabsl.S
+++ b/sysdeps/x86_64/fpu/s_fabsl.S
@@ -1,5 +1,5 @@
 /* Absolute value of floating point number.
-   Copyright (C) 2002-2016 Free Software Foundation, Inc.
+   Copyright (C) 2002-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -17,6 +17,7 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
+#include <libm-alias-ldouble.h>
 
        .text
 ENTRY(__fabsl)
@@ -24,4 +25,4 @@ ENTRY(__fabsl)
        fabs
        ret
 END(__fabsl)
-weak_alias (__fabsl, fabsl)
+libm_alias_ldouble (__fabs, fabs)
diff --git a/sysdeps/x86_64/fpu/s_floorl.S b/sysdeps/x86_64/fpu/s_floorl.S
index f9ecc388df..75f8255648 100644
--- a/sysdeps/x86_64/fpu/s_floorl.S
+++ b/sysdeps/x86_64/fpu/s_floorl.S
@@ -5,26 +5,32 @@
  * Public domain.
  */
 
+#include <libm-alias-ldouble.h>
 #include <machine/asm.h>
 
 ENTRY(__floorl)
 	fldt	8(%rsp)
 
-	fstcw	-4(%rsp)		/* store fpu control word */
+	fnstenv	-28(%rsp)		/* store fpu environment */
 
 	/* We use here %edx although only the low 1 bits are defined.
 	   But none of the operations should care and they are faster
 	   than the 16 bit operations.  */
 	movl	$0x400,%edx		/* round towards -oo */
-	orl	-4(%rsp),%edx
+	orl	-28(%rsp),%edx
 	andl	$0xf7ff,%edx
-	movl	%edx,-8(%rsp)
-	fldcw	-8(%rsp)		/* load modified control word */
+	movl	%edx,-32(%rsp)
+	fldcw	-32(%rsp)		/* load modified control word */
 
 	frndint				/* round */
 
-	fldcw	-4(%rsp)		/* restore original control word */
+	/* Preserve "invalid" exceptions from sNaN input.  */
+	fnstsw
+	andl	$0x1, %eax
+	orl	%eax, -24(%rsp)
+
+	fldenv	-28(%rsp)		/* restore original environment */
 
 	ret
 END (__floorl)
-weak_alias (__floorl, floorl)
+libm_alias_ldouble (__floor, floor)
diff --git a/sysdeps/x86_64/fpu/s_fmax.S b/sysdeps/x86_64/fpu/s_fmax.S
index 02096c0aea..7cd8f1ed10 100644
--- a/sysdeps/x86_64/fpu/s_fmax.S
+++ b/sysdeps/x86_64/fpu/s_fmax.S
@@ -1,5 +1,5 @@
 /* Compute maximum of two numbers, regarding NaN as missing argument.
-   Copyright (C) 2002-2016 Free Software Foundation, Inc.
+   Copyright (C) 2002-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Andreas Jaeger <aj@suse.de>, 2002.
 
@@ -18,6 +18,7 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
+#include <libm-alias-double.h>
 
 	.text
 ENTRY(__fmax)
@@ -27,9 +28,26 @@ ENTRY(__fmax)
 	jmp	2f
 
 1:	ucomisd	%xmm1, %xmm1	// Is xmm1 a NaN?
-	jp	2f		// then return xmm0
+	jp	3f
+	// xmm0 is a NaN; xmm1 is not.  Test if xmm0 is signaling.
+	movsd	%xmm0, -8(%rsp)
+	testb	$0x8, -2(%rsp)
+	jz	4f
 	movsd	%xmm1, %xmm0	// otherwise return xmm1
+	ret
+
+3:	// xmm1 is a NaN; xmm0 may or may not be.
+	ucomisd	%xmm0, %xmm0
+	jp	4f
+	// xmm1 is a NaN; xmm0 is not.  Test if xmm1 is signaling.
+	movsd	%xmm1, -8(%rsp)
+	testb	$0x8, -2(%rsp)
+	jz	4f
+	ret
+
+4:	// Both arguments are NaNs, or one is a signaling NaN.
+	addsd	%xmm1, %xmm0
 
 2:	ret
 END(__fmax)
-weak_alias (__fmax, fmax)
+libm_alias_double (__fmax, fmax)
diff --git a/sysdeps/x86_64/fpu/s_fmaxf.S b/sysdeps/x86_64/fpu/s_fmaxf.S
index 28e129701e..9b932fddc2 100644
--- a/sysdeps/x86_64/fpu/s_fmaxf.S
+++ b/sysdeps/x86_64/fpu/s_fmaxf.S
@@ -1,5 +1,5 @@
 /* Compute maximum of two numbers, regarding NaN as missing argument.
-   Copyright (C) 2002-2016 Free Software Foundation, Inc.
+   Copyright (C) 2002-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Andreas Jaeger <aj@suse.de>, 2002.
 
@@ -18,6 +18,7 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
+#include <libm-alias-float.h>
 
 	.text
 ENTRY(__fmaxf)
@@ -27,9 +28,26 @@ ENTRY(__fmaxf)
 	jmp	2f
 
 1:	ucomiss	%xmm1, %xmm1	// Is xmm1 a NaN?
-	jp	2f		// then return xmm0
+	jp	3f
+	// xmm0 is a NaN; xmm1 is not.  Test if xmm0 is signaling.
+	movss	%xmm0, -4(%rsp)
+	testb	$0x40, -2(%rsp)
+	jz	4f
 	movss	%xmm1, %xmm0	// otherwise return xmm1
+	ret
+
+3:	// xmm1 is a NaN; xmm0 may or may not be.
+	ucomiss	%xmm0, %xmm0
+	jp	4f
+	// xmm1 is a NaN; xmm0 is not.  Test if xmm1 is signaling.
+	movss	%xmm1, -4(%rsp)
+	testb	$0x40, -2(%rsp)
+	jz	4f
+	ret
+
+4:	// Both arguments are NaNs, or one is a signaling NaN.
+	addss	%xmm1, %xmm0
 
 2:	ret
 END(__fmaxf)
-weak_alias (__fmaxf, fmaxf)
+libm_alias_float (__fmax, fmax)
diff --git a/sysdeps/x86_64/fpu/s_fmaxl.S b/sysdeps/x86_64/fpu/s_fmaxl.S
index f0c2bc0d56..3463a07083 100644
--- a/sysdeps/x86_64/fpu/s_fmaxl.S
+++ b/sysdeps/x86_64/fpu/s_fmaxl.S
@@ -1,5 +1,5 @@
 /* Compute maximum of two numbers, regarding NaN as missing argument.
-   Copyright (C) 1997-2016 Free Software Foundation, Inc.
+   Copyright (C) 1997-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
 
@@ -18,22 +18,42 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
+#include <libm-alias-ldouble.h>
 
 	.text
 ENTRY(__fmaxl)
 	fldt	8(%rsp)		// x
 	fldt	24(%rsp)	// x : y
 
-	fucomi	%st(0), %st
-	fcmovu	%st(1), %st	// now %st contains y if not NaN, x otherwise
-
-	fxch
-
 	fucomi	%st(1), %st
+	jp	2f
 	fcmovb	%st(1), %st
 
 	fstp	%st(1)
 
 	ret
+
+2:	// Unordered.
+	fucomi	%st(0), %st
+	jp	3f
+	// st(1) is a NaN; st(0) is not.  Test if st(1) is signaling.
+	testb	$0x40, 15(%rsp)
+	jz	4f
+	fstp	%st(1)
+	ret
+
+3:	// st(0) is a NaN; st(1) may or may not be.
+	fxch
+	fucomi	%st(0), %st
+	jp	4f
+	// st(1) is a NaN; st(0) is not.  Test if st(1) is signaling.
+	testb	$0x40, 31(%rsp)
+	jz	4f
+	fstp	%st(1)
+	ret
+
+4:	// Both arguments are NaNs, or one is a signaling NaN.
+	faddp
+	ret
 END(__fmaxl)
-weak_alias (__fmaxl, fmaxl)
+libm_alias_ldouble (__fmax, fmax)
diff --git a/sysdeps/x86_64/fpu/s_fmin.S b/sysdeps/x86_64/fpu/s_fmin.S
index fb14e2f3ed..15b6eaed90 100644
--- a/sysdeps/x86_64/fpu/s_fmin.S
+++ b/sysdeps/x86_64/fpu/s_fmin.S
@@ -1,5 +1,5 @@
 /* Compute minimum of two numbers, regarding NaN as missing argument.
-   Copyright (C) 2002-2016 Free Software Foundation, Inc.
+   Copyright (C) 2002-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Andreas Jaeger <aj@suse.de>, 2002.
 
@@ -18,6 +18,7 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
+#include <libm-alias-double.h>
 
 	.text
 ENTRY(__fmin)
@@ -27,9 +28,26 @@ ENTRY(__fmin)
 	jmp	2f
 
 1:	ucomisd	%xmm1, %xmm1	// Is xmm1 a NaN?
-	jp	2f		// then return xmm0
+	jp	3f
+	// xmm0 is a NaN; xmm1 is not.  Test if xmm0 is signaling.
+	movsd	%xmm0, -8(%rsp)
+	testb	$0x8, -2(%rsp)
+	jz	4f
 	movsd	%xmm1, %xmm0	// otherwise return xmm1
+	ret
+
+3:	// xmm1 is a NaN; xmm0 may or may not be.
+	ucomisd	%xmm0, %xmm0
+	jp	4f
+	// xmm1 is a NaN; xmm0 is not.  Test if xmm1 is signaling.
+	movsd	%xmm1, -8(%rsp)
+	testb	$0x8, -2(%rsp)
+	jz	4f
+	ret
+
+4:	// Both arguments are NaNs, or one is a signaling NaN.
+	addsd	%xmm1, %xmm0
 
 2:	ret
 END(__fmin)
-weak_alias (__fmin, fmin)
+libm_alias_double (__fmin, fmin)
diff --git a/sysdeps/x86_64/fpu/s_fminf.S b/sysdeps/x86_64/fpu/s_fminf.S
index c8d6d0fd33..28e26aead5 100644
--- a/sysdeps/x86_64/fpu/s_fminf.S
+++ b/sysdeps/x86_64/fpu/s_fminf.S
@@ -1,5 +1,5 @@
 /* Compute minimum of two numbers, regarding NaN as missing argument.
-   Copyright (C) 2002-2016 Free Software Foundation, Inc.
+   Copyright (C) 2002-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Andreas Jaeger <aj@suse.de>, 2002.
 
@@ -18,6 +18,7 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
+#include <libm-alias-float.h>
 
 	.text
 ENTRY(__fminf)
@@ -27,9 +28,26 @@ ENTRY(__fminf)
 	jmp	2f
 
 1:	ucomiss	%xmm1, %xmm1	// Is xmm1 a NaN?
-	jp	2f		// then return xmm0
+	jp	3f
+	// xmm0 is a NaN; xmm1 is not.  Test if xmm0 is signaling.
+	movss	%xmm0, -4(%rsp)
+	testb	$0x40, -2(%rsp)
+	jz	4f
 	movss	%xmm1, %xmm0	// otherwise return xmm1
+	ret
+
+3:	// xmm1 is a NaN; xmm0 may or may not be.
+	ucomiss	%xmm0, %xmm0
+	jp	4f
+	// xmm1 is a NaN; xmm0 is not.  Test if xmm1 is signaling.
+	movss	%xmm1, -4(%rsp)
+	testb	$0x40, -2(%rsp)
+	jz	4f
+	ret
+
+4:	// Both arguments are NaNs, or one is a signaling NaN.
+	addss	%xmm1, %xmm0
 
 2:	ret
 END(__fminf)
-weak_alias (__fminf, fminf)
+libm_alias_float (__fmin, fmin)
diff --git a/sysdeps/x86_64/fpu/s_fminl.S b/sysdeps/x86_64/fpu/s_fminl.S
index f1a06d29d7..df81762449 100644
--- a/sysdeps/x86_64/fpu/s_fminl.S
+++ b/sysdeps/x86_64/fpu/s_fminl.S
@@ -1,5 +1,5 @@
 /* Compute minimum of two numbers, regarding NaN as missing argument.
-   Copyright (C) 1997-2016 Free Software Foundation, Inc.
+   Copyright (C) 1997-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
 
@@ -18,20 +18,42 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
+#include <libm-alias-ldouble.h>
 
 	.text
 ENTRY(__fminl)
 	fldt	8(%rsp)		// x
 	fldt	24(%rsp)	// x : y
 
-	fucomi	%st(0), %st
-	fcmovu	%st(1), %st	// now %st contains y if not NaN, x otherwise
-
 	fucomi	%st(1), %st
+	jp	2f
 	fcmovnb	%st(1), %st
 
 	fstp	%st(1)
 
 	ret
+
+2:	// Unordered.
+	fucomi	%st(0), %st
+	jp	3f
+	// st(1) is a NaN; st(0) is not.  Test if st(1) is signaling.
+	testb	$0x40, 15(%rsp)
+	jz	4f
+	fstp	%st(1)
+	ret
+
+3:	// st(0) is a NaN; st(1) may or may not be.
+	fxch
+	fucomi	%st(0), %st
+	jp	4f
+	// st(1) is a NaN; st(0) is not.  Test if st(1) is signaling.
+	testb	$0x40, 31(%rsp)
+	jz	4f
+	fstp	%st(1)
+	ret
+
+4:	// Both arguments are NaNs, or one is a signaling NaN.
+	faddp
+	ret
 END(__fminl)
-weak_alias (__fminl, fminl)
+libm_alias_ldouble (__fmin, fmin)
diff --git a/sysdeps/x86_64/fpu/s_llrint.S b/sysdeps/x86_64/fpu/s_llrint.S
index 6634c653ea..7b93724e46 100644
--- a/sysdeps/x86_64/fpu/s_llrint.S
+++ b/sysdeps/x86_64/fpu/s_llrint.S
@@ -1,6 +1,6 @@
 /* Round argument to nearest integral value according to current rounding
    direction.
-   Copyright (C) 2002-2016 Free Software Foundation, Inc.
+   Copyright (C) 2002-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Andreas Jaeger <aj@suse.d>, 2002.
 
@@ -19,14 +19,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
+#include <libm-alias-double.h>
 
 	.text
 ENTRY(__llrint)
 	cvtsd2si %xmm0,%rax
 	ret
 END(__llrint)
-weak_alias (__llrint, llrint)
+libm_alias_double (__llrint, llrint)
 #ifndef __ILP32__
 strong_alias (__llrint, __lrint)
-weak_alias (__llrint, lrint)
+libm_alias_double (__llrint, lrint)
 #endif
diff --git a/sysdeps/x86_64/fpu/s_llrintf.S b/sysdeps/x86_64/fpu/s_llrintf.S
index 5ac03dffd9..b6088de1ff 100644
--- a/sysdeps/x86_64/fpu/s_llrintf.S
+++ b/sysdeps/x86_64/fpu/s_llrintf.S
@@ -1,6 +1,6 @@
 /* Round argument to nearest integral value according to current rounding
    direction.
-   Copyright (C) 2002-2016 Free Software Foundation, Inc.
+   Copyright (C) 2002-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Andreas Jaeger <aj@suse.d>, 2002.
 
@@ -19,14 +19,15 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
+#include <libm-alias-float.h>
 
 	.text
 ENTRY(__llrintf)
 	cvtss2si %xmm0,%rax
 	ret
 END(__llrintf)
-weak_alias (__llrintf, llrintf)
+libm_alias_float (__llrint, llrint)
 #ifndef __ILP32__
 strong_alias (__llrintf, __lrintf)
-weak_alias (__llrintf, lrintf)
+libm_alias_float (__llrint, lrint)
 #endif
diff --git a/sysdeps/x86_64/fpu/s_llrintl.S b/sysdeps/x86_64/fpu/s_llrintl.S
index 5f4d827dff..49f6ff1961 100644
--- a/sysdeps/x86_64/fpu/s_llrintl.S
+++ b/sysdeps/x86_64/fpu/s_llrintl.S
@@ -1,6 +1,6 @@
 /* Round argument to nearest integral value according to current rounding
    direction.
-   Copyright (C) 1997-2016 Free Software Foundation, Inc.
+   Copyright (C) 1997-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -18,6 +18,7 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
+#include <libm-alias-ldouble.h>
 
 	.text
 ENTRY(__llrintl)
@@ -27,8 +28,8 @@ ENTRY(__llrintl)
 	movq	-8(%rsp),%rax
 	ret
 END(__llrintl)
-weak_alias (__llrintl, llrintl)
+libm_alias_ldouble (__llrint, llrint)
 #ifndef __ILP32__
 strong_alias (__llrintl, __lrintl)
-weak_alias (__llrintl, lrintl)
+libm_alias_ldouble (__llrint, lrint)
 #endif
diff --git a/sysdeps/x86_64/fpu/s_log1pl.S b/sysdeps/x86_64/fpu/s_log1pl.S
index e83f64d3c0..947e5e4552 100644
--- a/sysdeps/x86_64/fpu/s_log1pl.S
+++ b/sysdeps/x86_64/fpu/s_log1pl.S
@@ -68,6 +68,7 @@ ENTRY(__log1pl)
 	jnz	4b		// in case x is �Inf
 	fstp	%st(1)
 	fstp	%st(1)
+	fadd	%st(0)
 	ret
 
 END (__log1pl)
diff --git a/sysdeps/x86_64/fpu/s_nearbyintl.S b/sysdeps/x86_64/fpu/s_nearbyintl.S
index 76d41bdd52..80508bdbee 100644
--- a/sysdeps/x86_64/fpu/s_nearbyintl.S
+++ b/sysdeps/x86_64/fpu/s_nearbyintl.S
@@ -4,15 +4,12 @@
  */
 /* Adapted for use as nearbyint by Ulrich Drepper <drepper@cygnus.com>.  */
 
+#include <libm-alias-ldouble.h>
 #include <machine/asm.h>
 
 ENTRY(__nearbyintl)
 	fldt	8(%rsp)
 	fnstenv	-28(%rsp)
-	movl	-28(%rsp), %eax
-	orl	$0x20, %eax
-	movl	%eax, -32(%rsp)
-	fldcw	-32(%rsp)
 	frndint
 	fnstsw
 	andl	$0x1, %eax
@@ -20,4 +17,4 @@ ENTRY(__nearbyintl)
 	fldenv	-28(%rsp)
 	ret
 END (__nearbyintl)
-weak_alias (__nearbyintl, nearbyintl)
+libm_alias_ldouble (__nearbyint, nearbyint)
diff --git a/sysdeps/x86_64/fpu/s_signbit.S b/sysdeps/x86_64/fpu/s_signbit.S
index 92a79d3123..becfc646cb 100644
--- a/sysdeps/x86_64/fpu/s_signbit.S
+++ b/sysdeps/x86_64/fpu/s_signbit.S
@@ -1,5 +1,5 @@
 /* Return nonzero value if number is negative.
-   Copyright (C) 2009-2016 Free Software Foundation, Inc.
+   Copyright (C) 2009-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@redha.com>, 2009.
 
diff --git a/sysdeps/x86_64/fpu/s_signbitf.S b/sysdeps/x86_64/fpu/s_signbitf.S
index 885645372e..c7be6a6329 100644
--- a/sysdeps/x86_64/fpu/s_signbitf.S
+++ b/sysdeps/x86_64/fpu/s_signbitf.S
@@ -1,5 +1,5 @@
 /* Return nonzero value if number is negative.
-   Copyright (C) 2009-2016 Free Software Foundation, Inc.
+   Copyright (C) 2009-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@redha.com>, 2009.
 
diff --git a/sysdeps/x86_64/fpu/s_sincosf.S b/sysdeps/x86_64/fpu/s_sincosf.S
index 5e7cbe57e3..2086e8ca5c 100644
--- a/sysdeps/x86_64/fpu/s_sincosf.S
+++ b/sysdeps/x86_64/fpu/s_sincosf.S
@@ -1,5 +1,5 @@
 /* Optimized sincosf function.
-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+   Copyright (C) 2012-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -17,8 +17,8 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
-#define __need_Emath
-#include <bits/errno.h>
+#include <errno.h>
+#include <libm-alias-float.h>
 
 /* Short algorithm description:
  *
@@ -561,4 +561,6 @@ L(SP_ONE):
 	.type L(SP_ONE), @object
 	ASM_SIZE_DIRECTIVE(L(SP_ONE))
 
-weak_alias(__sincosf, sincosf)
+#ifndef __sincosf
+libm_alias_float (__sincos, sincos)
+#endif
diff --git a/sysdeps/x86_64/fpu/s_sinf.S b/sysdeps/x86_64/fpu/s_sinf.S
deleted file mode 100644
index c980c6e207..0000000000
--- a/sysdeps/x86_64/fpu/s_sinf.S
+++ /dev/null
@@ -1,559 +0,0 @@
-/* Optimized sinf function.
-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#define __need_Emath
-#include <bits/errno.h>
-
-/* Short algorithm description:
- *
- *  1) if |x| == 0: return x.
- *  2) if |x| <  2^-27: return x-x*DP_SMALL, raise underflow only when needed.
- *  3) if |x| <  2^-5 : return x+x^3*DP_SIN2_0+x^5*DP_SIN2_1.
- *  4) if |x| <   Pi/4: return x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))).
- *  5) if |x| < 9*Pi/4:
- *      5.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0x0e, n=k+1,
- *           t=|x|-j*Pi/4.
- *      5.2) Reconstruction:
- *          s = sign(x) * (-1.0)^((n>>2)&1)
- *          if(n&2 != 0) {
- *              using cos(t) polynomial for |t|<Pi/4, result is
- *              s     * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4))))).
- *          } else {
- *              using sin(t) polynomial for |t|<Pi/4, result is
- *              s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4))))).
- *          }
- *  6) if |x| < 2^23, large args:
- *      6.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1,
- *           t=|x|-j*Pi/4.
- *      6.2) Reconstruction same as (5.2).
- *  7) if |x| >= 2^23, very large args:
- *      7.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1,
- *           t=|x|-j*Pi/4.
- *      7.2) Reconstruction same as (5.2).
- *  8) if x is Inf, return x-x, and set errno=EDOM.
- *  9) if x is NaN, return x-x.
- *
- * Special cases:
- *  sin(+-0) = +-0 not raising inexact/underflow,
- *  sin(subnormal) raises inexact/underflow,
- *  sin(min_normalized) raises inexact/underflow,
- *  sin(normalized) raises inexact,
- *  sin(Inf) = NaN, raises invalid, sets errno to EDOM,
- *  sin(NaN) = NaN.
- */
-
-	.text
-ENTRY(__sinf)
-	/* Input: single precision x in %xmm0 */
-
-	movd	%xmm0, %eax		/* Bits of x */
-	movaps	%xmm0, %xmm7		/* Copy of x */
-	cvtss2sd %xmm0, %xmm0		/* DP x */
-	movss	L(SP_ABS_MASK)(%rip), %xmm3
-	movl	%eax, %edi		/* Copy of x bits */
-	andl	$0x7fffffff, %eax	/* |x| */
-
-	cmpl	$0x3f490fdb, %eax	/* |x|<Pi/4?  */
-	jb	L(arg_less_pio4)
-
-	/* Here if |x|>=Pi/4 */
-	andps	%xmm7, %xmm3		/* SP |x| */
-	andpd	L(DP_ABS_MASK)(%rip),%xmm0 /* DP |x| */
-	movss	L(SP_INVPIO4)(%rip), %xmm2 /* SP 1/(Pi/4) */
-
-	cmpl	$0x40e231d6, %eax	/* |x|<9*Pi/4?  */
-	jae	L(large_args)
-
-	/* Here if Pi/4<=|x|<9*Pi/4 */
-	mulss	%xmm3, %xmm2		/* SP |x|/(Pi/4) */
-	movl	%edi, %ecx		/* Load x */
-	cvttss2si %xmm2, %eax		/* k, number of Pi/4 in x */
-	lea	L(PIO4J)(%rip), %rsi
-	shrl	$31, %ecx		/* sign of x */
-	addl	$1, %eax		/* k+1 */
-	movl	$0x0e, %edx
-	andl	%eax, %edx		/* j = (k+1)&0x0e */
-	subsd	(%rsi,%rdx,8), %xmm0	/* t = |x| - j * Pi/4 */
-
-L(reconstruction):
-	/* Input: %eax=n, %xmm0=t, %ecx=sign(x) */
-	testl	$2, %eax		/* n&2 != 0?  */
-	jz	L(sin_poly)
-
-/*L(cos_poly):*/
-	/* Here if sin(x) calculated using cos(t) polynomial for |t|<Pi/4:
-	 * y = t*t; z = y*y;
-	 * s = sign(x) * (-1.0)^((n>>2)&1)
-	 * result = s     * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))))
-	 */
-	shrl	$2, %eax		/* n>>2 */
-	mulsd	%xmm0, %xmm0		/* y=t^2 */
-	andl	$1, %eax		/* (n>>2)&1 */
-	movaps	%xmm0, %xmm1		/* y */
-	mulsd	%xmm0, %xmm0		/* z=t^4 */
-
-	movsd	L(DP_C4)(%rip), %xmm4	/* C4 */
-	mulsd	%xmm0, %xmm4		/* z*C4 */
-	xorl	%eax, %ecx		/* (-1.0)^((n>>2)&1) XOR sign(x) */
-	movsd	L(DP_C3)(%rip), %xmm3	/* C3 */
-	mulsd	%xmm0, %xmm3		/* z*C3 */
-	lea	L(DP_ONES)(%rip), %rsi
-	addsd	L(DP_C2)(%rip), %xmm4	/* C2+z*C4 */
-	mulsd	%xmm0, %xmm4		/* z*(C2+z*C4) */
-	addsd	L(DP_C1)(%rip), %xmm3	/* C1+z*C3 */
-	mulsd	%xmm0, %xmm3		/* z*(C1+z*C3) */
-	addsd	L(DP_C0)(%rip), %xmm4	/* C0+z*(C2+z*C4) */
-	mulsd	%xmm1, %xmm4		/* y*(C0+z*(C2+z*C4)) */
-
-	/* y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
-	addsd	%xmm4, %xmm3
-	/* 1.0+y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
-	addsd	L(DP_ONES)(%rip), %xmm3
-
-	mulsd	(%rsi,%rcx,8), %xmm3	/* DP result */
-	cvtsd2ss %xmm3, %xmm0 		/* SP result */
-	ret
-
-	.p2align	4
-L(sin_poly):
-	/* Here if sin(x) calculated using sin(t) polynomial for |t|<Pi/4:
-	 * y = t*t; z = y*y;
-	 * s = sign(x) * (-1.0)^((n>>2)&1)
-	 * result = s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))))
-	 */
-
-	movaps	%xmm0, %xmm4		/* t */
-	shrl	$2, %eax		/* n>>2 */
-	mulsd	%xmm0, %xmm0		/* y=t^2 */
-	andl	$1, %eax		/* (n>>2)&1 */
-	movaps	%xmm0, %xmm1		/* y */
-	xorl	%eax, %ecx		/* (-1.0)^((n>>2)&1) XOR sign(x) */
-	mulsd	%xmm0, %xmm0		/* z=t^4 */
-
-	movsd	L(DP_S4)(%rip), %xmm2	/* S4 */
-	mulsd	%xmm0, %xmm2		/* z*S4 */
-	movsd	L(DP_S3)(%rip), %xmm3	/* S3 */
-	mulsd	%xmm0, %xmm3		/* z*S3 */
-	lea	L(DP_ONES)(%rip), %rsi
-	addsd	L(DP_S2)(%rip), %xmm2	/* S2+z*S4 */
-	mulsd	%xmm0, %xmm2		/* z*(S2+z*S4) */
-	addsd	L(DP_S1)(%rip), %xmm3	/* S1+z*S3 */
-	mulsd	%xmm0, %xmm3		/* z*(S1+z*S3) */
-	addsd	L(DP_S0)(%rip), %xmm2	/* S0+z*(S2+z*S4) */
-	mulsd	%xmm1, %xmm2		/* y*(S0+z*(S2+z*S4)) */
-	/* t*s, where s = sign(x) * (-1.0)^((n>>2)&1) */
-	mulsd	(%rsi,%rcx,8), %xmm4
-	/* y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
-	addsd	%xmm2, %xmm3
-	/* t*s*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
-	mulsd	%xmm4, %xmm3
-	/* t*s*(1.0+y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
-	addsd	%xmm4, %xmm3
-	cvtsd2ss %xmm3, %xmm0 		/* SP result */
-	ret
-
-	.p2align	4
-L(large_args):
-	/* Here if |x|>=9*Pi/4 */
-	cmpl	$0x7f800000, %eax	/* x is Inf or NaN?  */
-	jae	L(arg_inf_or_nan)
-
-	/* Here if finite |x|>=9*Pi/4 */
-	cmpl	$0x4b000000, %eax	/* |x|<2^23?  */
-	jae	L(very_large_args)
-
-	/* Here if 9*Pi/4<=|x|<2^23 */
-	movsd	L(DP_INVPIO4)(%rip), %xmm1 /* 1/(Pi/4) */
-	mulsd	%xmm0, %xmm1		/* |x|/(Pi/4) */
-	cvttsd2si %xmm1, %eax		/* k=trunc(|x|/(Pi/4)) */
-	addl	$1, %eax		/* k+1 */
-	movl	%eax, %edx
-	andl	$0xfffffffe, %edx	/* j=(k+1)&0xfffffffe */
-	cvtsi2sdl %edx, %xmm4		/* DP j */
-	movl	%edi, %ecx		/* Load x */
-	movsd	L(DP_PIO4HI)(%rip), %xmm2 /* -PIO4HI = high part of -Pi/4 */
-	shrl	$31, %ecx		/* sign bit of x */
-	mulsd	%xmm4, %xmm2		/* -j*PIO4HI */
-	movsd	L(DP_PIO4LO)(%rip), %xmm3 /* -PIO4LO = low part of -Pi/4 */
-	addsd	%xmm2, %xmm0		/* |x| - j*PIO4HI */
-	mulsd	%xmm3, %xmm4		/* j*PIO4LO */
-	addsd	%xmm4, %xmm0		/* t = |x| - j*PIO4HI - j*PIO4LO */
-	jmp	L(reconstruction)
-
-	.p2align	4
-L(very_large_args):
-	/* Here if finite |x|>=2^23 */
-
-	/* bitpos = (ix>>23) - BIAS_32 + 59; */
-	shrl	$23, %eax		/* eb = biased exponent of x */
-	/* bitpos = eb - 0x7f + 59, where 0x7f is exponent bias */
-	subl	$68, %eax
-	movl	$28, %ecx		/* %cl=28 */
-	movl	%eax, %edx		/* bitpos copy */
-
-	/* j = bitpos/28; */
-	div	%cl			/* j in register %al=%ax/%cl */
-	movapd	%xmm0, %xmm3		/* |x| */
-	/* clear unneeded remainder from %ah */
-	andl	$0xff, %eax
-
-	imull	$28, %eax, %ecx		/* j*28 */
-	lea	L(_FPI)(%rip), %rsi
-	movsd	L(DP_HI_MASK)(%rip), %xmm4 /* DP_HI_MASK */
-	movapd	%xmm0, %xmm5		/* |x| */
-	mulsd	-16(%rsi,%rax,8), %xmm3	/* tmp3 = FPI[j-2]*|x| */
-	movapd	%xmm0, %xmm1		/* |x| */
-	mulsd	-8(%rsi,%rax,8), %xmm5	/* tmp2 = FPI[j-1]*|x| */
-	mulsd	(%rsi,%rax,8), %xmm0	/* tmp0 = FPI[j]*|x| */
-	addl	$19, %ecx		/* j*28+19 */
-	mulsd	8(%rsi,%rax,8), %xmm1	/* tmp1 = FPI[j+1]*|x| */
-	cmpl	%ecx, %edx		/* bitpos>=j*28+19?  */
-	jl	L(very_large_skip1)
-
-	/* Here if bitpos>=j*28+19 */
-	andpd	%xmm3, %xmm4		/* HI(tmp3) */
-	subsd	%xmm4, %xmm3		/* tmp3 = tmp3 - HI(tmp3) */
-L(very_large_skip1):
-
-	movsd	L(DP_2POW52)(%rip), %xmm6
-	movapd	%xmm5, %xmm2		/* tmp2 copy */
-	addsd	%xmm3, %xmm5		/* tmp5 = tmp3 + tmp2 */
-	movl	$1, %edx
-	addsd	%xmm5, %xmm6		/* tmp6 = tmp5 + 2^52 */
-	movsd	8+L(DP_2POW52)(%rip), %xmm4
-	movd	%xmm6, %eax		/* k = I64_LO(tmp6); */
-	addsd	%xmm6, %xmm4		/* tmp4 = tmp6 - 2^52 */
-	movl	%edi, %ecx		/* Load x */
-	comisd	%xmm5, %xmm4		/* tmp4 > tmp5?  */
-	jbe	L(very_large_skip2)
-
-	/* Here if tmp4 > tmp5 */
-	subl	$1, %eax		/* k-- */
-	addsd	8+L(DP_ONES)(%rip), %xmm4 /* tmp4 -= 1.0 */
-L(very_large_skip2):
-
-	andl	%eax, %edx		/* k&1 */
-	lea	L(DP_ZERONE)(%rip), %rsi
-	subsd	%xmm4, %xmm3		/* tmp3 -= tmp4 */
-	addsd	(%rsi,%rdx,8), %xmm3	/* t  = DP_ZERONE[k&1] + tmp3 */
-	addsd	%xmm2, %xmm3		/* t += tmp2 */
-	shrl	$31, %ecx		/* sign of x */
-	addsd	%xmm3, %xmm0		/* t += tmp0 */
-	addl	$1, %eax		/* n=k+1 */
-	addsd	%xmm1, %xmm0		/* t += tmp1 */
-	mulsd	L(DP_PIO4)(%rip), %xmm0	/* t *= PI04 */
-
-	jmp	L(reconstruction)	/* end of very_large_args peth */
-
-	.p2align	4
-L(arg_less_pio4):
-	/* Here if |x|<Pi/4 */
-	cmpl	$0x3d000000, %eax	/* |x|<2^-5?  */
-	jl	L(arg_less_2pn5)
-
-	/* Here if 2^-5<=|x|<Pi/4 */
-	movaps	%xmm0, %xmm3		/* x */
-	mulsd	%xmm0, %xmm0		/* y=x^2 */
-	movaps	%xmm0, %xmm1		/* y */
-	mulsd	%xmm0, %xmm0		/* z=x^4 */
-	movsd	L(DP_S4)(%rip), %xmm4	/* S4 */
-	mulsd	%xmm0, %xmm4		/* z*S4 */
-	movsd	L(DP_S3)(%rip), %xmm5	/* S3 */
-	mulsd	%xmm0, %xmm5		/* z*S3 */
-	addsd	L(DP_S2)(%rip), %xmm4	/* S2+z*S4 */
-	mulsd	%xmm0, %xmm4		/* z*(S2+z*S4) */
-	addsd	L(DP_S1)(%rip), %xmm5	/* S1+z*S3 */
-	mulsd	%xmm0, %xmm5		/* z*(S1+z*S3) */
-	addsd	L(DP_S0)(%rip), %xmm4	/* S0+z*(S2+z*S4) */
-	mulsd	%xmm1, %xmm4		/* y*(S0+z*(S2+z*S4)) */
-	mulsd	%xmm3, %xmm5		/* x*z*(S1+z*S3) */
-	mulsd	%xmm3, %xmm4		/* x*y*(S0+z*(S2+z*S4)) */
-	/* x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
-	addsd	%xmm5, %xmm4
-	/* x + x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
-	addsd	%xmm4, %xmm3
-	cvtsd2ss %xmm3, %xmm0		/* SP result */
-	ret
-
-	.p2align	4
-L(arg_less_2pn5):
-	/* Here if |x|<2^-5 */
-	cmpl	$0x32000000, %eax	/* |x|<2^-27?  */
-	jl	L(arg_less_2pn27)
-
-	/* Here if 2^-27<=|x|<2^-5 */
-	movaps	%xmm0, %xmm1		/* DP x */
-	mulsd	%xmm0, %xmm0		/* DP x^2 */
-	movsd	L(DP_SIN2_1)(%rip), %xmm3 /* DP DP_SIN2_1 */
-	mulsd	%xmm0, %xmm3		/* DP x^2*DP_SIN2_1 */
-	addsd	L(DP_SIN2_0)(%rip), %xmm3 /* DP DP_SIN2_0+x^2*DP_SIN2_1 */
-	mulsd	%xmm0, %xmm3		/* DP x^2*DP_SIN2_0+x^4*DP_SIN2_1 */
-	mulsd	%xmm1, %xmm3		/* DP x^3*DP_SIN2_0+x^5*DP_SIN2_1 */
-	addsd	%xmm1, %xmm3		/* DP x+x^3*DP_SIN2_0+x^5*DP_SIN2_1 */
-	cvtsd2ss %xmm3, %xmm0		/* SP result */
-	ret
-
-	.p2align	4
-L(arg_less_2pn27):
-	cmpl	$0, %eax		/* x=0?  */
-	je	L(arg_zero)		/* in case x=0 return sin(+-0)==+-0 */
-	/* Here if |x|<2^-27 */
-	/*
-	 * Special cases here:
-	 *  sin(subnormal) raises inexact/underflow
-	 *  sin(min_normalized) raises inexact/underflow
-	 *  sin(normalized) raises inexact
-	 */
-	movaps	%xmm0, %xmm3		/* Copy of DP x */
-	mulsd	L(DP_SMALL)(%rip), %xmm0 /* x*DP_SMALL */
-	subsd	%xmm0, %xmm3		/* Result is x-x*DP_SMALL */
-	cvtsd2ss %xmm3, %xmm0		/* Result converted to SP */
-	ret
-
-	.p2align	4
-L(arg_zero):
-	movaps	%xmm7, %xmm0		/* SP x */
-	ret
-
-	.p2align	4
-L(arg_inf_or_nan):
-	/* Here if |x| is Inf or NAN */
-	jne	L(skip_errno_setting)	/* in case of x is NaN */
-
-	/* Align stack to 16 bytes.  */
-	subq	$8, %rsp
-	cfi_adjust_cfa_offset (8)
-	/* Here if x is Inf. Set errno to EDOM.  */
-	call	JUMPTARGET(__errno_location)
-	addq	$8, %rsp
-	cfi_adjust_cfa_offset (-8)
-
-	movl	$EDOM, (%rax)
-
-	.p2align	4
-L(skip_errno_setting):
-	/* Here if |x| is Inf or NAN. Continued.  */
-	movaps	%xmm7, %xmm0		/* load x */
-	subss	%xmm0, %xmm0		/* Result is NaN */
-	ret
-END(__sinf)
-
-	.section .rodata, "a"
-	.p2align 3
-L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
-	.long	0x00000000,0x00000000
-	.long	0x54442d18,0x3fe921fb
-	.long	0x54442d18,0x3ff921fb
-	.long	0x7f3321d2,0x4002d97c
-	.long	0x54442d18,0x400921fb
-	.long	0x2955385e,0x400f6a7a
-	.long	0x7f3321d2,0x4012d97c
-	.long	0xe9bba775,0x4015fdbb
-	.long	0x54442d18,0x401921fb
-	.long	0xbeccb2bb,0x401c463a
-	.long	0x2955385e,0x401f6a7a
-	.type L(PIO4J), @object
-	ASM_SIZE_DIRECTIVE(L(PIO4J))
-
-	.p2align 3
-L(_FPI): /* 4/Pi broken into sum of positive DP values */
-	.long	0x00000000,0x00000000
-	.long	0x6c000000,0x3ff45f30
-	.long	0x2a000000,0x3e3c9c88
-	.long	0xa8000000,0x3c54fe13
-	.long	0xd0000000,0x3aaf47d4
-	.long	0x6c000000,0x38fbb81b
-	.long	0xe0000000,0x3714acc9
-	.long	0x7c000000,0x3560e410
-	.long	0x56000000,0x33bca2c7
-	.long	0xac000000,0x31fbd778
-	.long	0xe0000000,0x300b7246
-	.long	0xe8000000,0x2e5d2126
-	.long	0x48000000,0x2c970032
-	.long	0xe8000000,0x2ad77504
-	.long	0xe0000000,0x290921cf
-	.long	0xb0000000,0x274deb1c
-	.long	0xe0000000,0x25829a73
-	.long	0xbe000000,0x23fd1046
-	.long	0x10000000,0x2224baed
-	.long	0x8e000000,0x20709d33
-	.long	0x80000000,0x1e535a2f
-	.long	0x64000000,0x1cef904e
-	.long	0x30000000,0x1b0d6398
-	.long	0x24000000,0x1964ce7d
-	.long	0x16000000,0x17b908bf
-	.type L(_FPI), @object
-	ASM_SIZE_DIRECTIVE(L(_FPI))
-
-/* Coefficients of polynomial
-   for sin(x)~=x+x^3*DP_SIN2_0+x^5*DP_SIN2_1, |x|<2^-5.  */
-	.p2align 3
-L(DP_SIN2_0):
-	.long	0x5543d49d,0xbfc55555
-	.type L(DP_SIN2_0), @object
-	ASM_SIZE_DIRECTIVE(L(DP_SIN2_0))
-
-	.p2align 3
-L(DP_SIN2_1):
-	.long	0x75cec8c5,0x3f8110f4
-	.type L(DP_SIN2_1), @object
-	ASM_SIZE_DIRECTIVE(L(DP_SIN2_1))
-
-	.p2align 3
-L(DP_ZERONE):
-	.long	0x00000000,0x00000000	/* 0.0 */
-	.long	0x00000000,0xbff00000	/* 1.0 */
-	.type L(DP_ZERONE), @object
-	ASM_SIZE_DIRECTIVE(L(DP_ZERONE))
-
-	.p2align 3
-L(DP_ONES):
-	.long	0x00000000,0x3ff00000	/* +1.0 */
-	.long	0x00000000,0xbff00000	/* -1.0 */
-	.type L(DP_ONES), @object
-	ASM_SIZE_DIRECTIVE(L(DP_ONES))
-
-/* Coefficients of polynomial
-   for sin(t)~=t+t^3*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))), |t|<Pi/4.  */
-	.p2align 3
-L(DP_S3):
-	.long	0x64e6b5b4,0x3ec71d72
-	.type L(DP_S3), @object
-	ASM_SIZE_DIRECTIVE(L(DP_S3))
-
-	.p2align 3
-L(DP_S1):
-	.long	0x10c2688b,0x3f811111
-	.type L(DP_S1), @object
-	ASM_SIZE_DIRECTIVE(L(DP_S1))
-
-	.p2align 3
-L(DP_S4):
-	.long	0x1674b58a,0xbe5a947e
-	.type L(DP_S4), @object
-	ASM_SIZE_DIRECTIVE(L(DP_S4))
-
-	.p2align 3
-L(DP_S2):
-	.long	0x8b4bd1f9,0xbf2a019f
-	.type L(DP_S2), @object
-	ASM_SIZE_DIRECTIVE(L(DP_S2))
-
-	.p2align 3
-L(DP_S0):
-	.long	0x55551cd9,0xbfc55555
-	.type L(DP_S0), @object
-	ASM_SIZE_DIRECTIVE(L(DP_S0))
-
-	.p2align 3
-L(DP_SMALL):
-	.long	0x00000000,0x3cd00000	/* 2^(-50) */
-	.type L(DP_SMALL), @object
-	ASM_SIZE_DIRECTIVE(L(DP_SMALL))
-
-/* Coefficients of polynomial
-   for cos(t)~=1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))), |t|<Pi/4.  */
-	.p2align 3
-L(DP_C3):
-	.long	0x9ac43cc0,0x3efa00eb
-	.type L(DP_C3), @object
-	ASM_SIZE_DIRECTIVE(L(DP_C3))
-
-	.p2align 3
-L(DP_C1):
-	.long	0x545c50c7,0x3fa55555
-	.type L(DP_C1), @object
-	ASM_SIZE_DIRECTIVE(L(DP_C1))
-
-	.p2align 3
-L(DP_C4):
-	.long	0xdd8844d7,0xbe923c97
-	.type L(DP_C4), @object
-	ASM_SIZE_DIRECTIVE(L(DP_C4))
-
-	.p2align 3
-L(DP_C2):
-	.long	0x348b6874,0xbf56c16b
-	.type L(DP_C2), @object
-	ASM_SIZE_DIRECTIVE(L(DP_C2))
-
-	.p2align 3
-L(DP_C0):
-	.long	0xfffe98ae,0xbfdfffff
-	.type L(DP_C0), @object
-	ASM_SIZE_DIRECTIVE(L(DP_C0))
-
-	.p2align 3
-L(DP_PIO4):
-	.long	0x54442d18,0x3fe921fb	/* Pi/4 */
-	.type L(DP_PIO4), @object
-	ASM_SIZE_DIRECTIVE(L(DP_PIO4))
-
-	.p2align 3
-L(DP_2POW52):
-	.long	0x00000000,0x43300000	/* +2^52 */
-	.long	0x00000000,0xc3300000	/* -2^52 */
-	.type L(DP_2POW52), @object
-	ASM_SIZE_DIRECTIVE(L(DP_2POW52))
-
-	.p2align 3
-L(DP_INVPIO4):
-	.long	0x6dc9c883,0x3ff45f30	/* 4/Pi */
-	.type L(DP_INVPIO4), @object
-	ASM_SIZE_DIRECTIVE(L(DP_INVPIO4))
-
-	.p2align 3
-L(DP_PIO4HI):
-	.long	0x54000000,0xbfe921fb	/* High part of Pi/4 */
-	.type L(DP_PIO4HI), @object
-	ASM_SIZE_DIRECTIVE(L(DP_PIO4HI))
-
-	.p2align 3
-L(DP_PIO4LO):
-	.long	0x11A62633,0xbe010b46	/* Low part of Pi/4 */
-	.type L(DP_PIO4LO), @object
-	ASM_SIZE_DIRECTIVE(L(DP_PIO4LO))
-
-	.p2align 2
-L(SP_INVPIO4):
-	.long	0x3fa2f983		/* 4/Pi */
-	.type L(SP_INVPIO4), @object
-	ASM_SIZE_DIRECTIVE(L(SP_INVPIO4))
-
-	.p2align 4
-L(DP_ABS_MASK): /* Mask for getting DP absolute value */
-	.long	0xffffffff,0x7fffffff
-	.long	0xffffffff,0x7fffffff
-	.type L(DP_ABS_MASK), @object
-	ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
-
-	.p2align 3
-L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
-	.long	0x00000000,0xffffffff
-	.type L(DP_HI_MASK),@object
-	ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
-
-	.p2align 4
-L(SP_ABS_MASK): /* Mask for getting SP absolute value */
-	.long	0x7fffffff,0x7fffffff
-	.long	0x7fffffff,0x7fffffff
-	.type L(SP_ABS_MASK), @object
-	ASM_SIZE_DIRECTIVE(L(SP_ABS_MASK))
-
-weak_alias(__sinf, sinf)
diff --git a/sysdeps/x86_64/fpu/s_truncl.S b/sysdeps/x86_64/fpu/s_truncl.S
index c37cf00241..22427ece00 100644
--- a/sysdeps/x86_64/fpu/s_truncl.S
+++ b/sysdeps/x86_64/fpu/s_truncl.S
@@ -1,5 +1,5 @@
 /* Truncate long double value.
-   Copyright (C) 1997-2016 Free Software Foundation, Inc.
+   Copyright (C) 1997-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
 
@@ -17,17 +17,21 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
+#include <libm-alias-ldouble.h>
 #include <machine/asm.h>
 
 ENTRY(__truncl)
 	fldt	8(%rsp)
-	fstcw	-4(%rsp)
+	fnstenv	-28(%rsp)
 	movl	$0xc00, %edx
-	orl	-4(%rsp), %edx
-	movl	%edx, -8(%rsp)
-	fldcw	-8(%rsp)
+	orl	-28(%rsp), %edx
+	movl	%edx, -32(%rsp)
+	fldcw	-32(%rsp)
 	frndint
-	fldcw	-4(%rsp)
+	fnstsw
+	andl	$0x1, %eax
+	orl	%eax, -24(%rsp)
+	fldenv	-28(%rsp)
 	ret
 END(__truncl)
-weak_alias (__truncl, truncl)
+libm_alias_ldouble (__trunc, trunc)
diff --git a/sysdeps/x86_64/fpu/svml_d_cos2_core.S b/sysdeps/x86_64/fpu/svml_d_cos2_core.S
index 7f62d29917..111548367b 100644
--- a/sysdeps/x86_64/fpu/svml_d_cos2_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_cos2_core.S
@@ -1,5 +1,5 @@
 /* Function cos vectorized with SSE2.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_cos4_core.S b/sysdeps/x86_64/fpu/svml_d_cos4_core.S
index b92ff13b86..28b31d510c 100644
--- a/sysdeps/x86_64/fpu/svml_d_cos4_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_cos4_core.S
@@ -1,5 +1,5 @@
 /* Function cos vectorized with AVX2, wrapper version.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_cos4_core_avx.S b/sysdeps/x86_64/fpu/svml_d_cos4_core_avx.S
index a3da721e35..988d0650ca 100644
--- a/sysdeps/x86_64/fpu/svml_d_cos4_core_avx.S
+++ b/sysdeps/x86_64/fpu/svml_d_cos4_core_avx.S
@@ -1,5 +1,5 @@
 /* Function cos vectorized in AVX ISA as wrapper to SSE4 ISA version.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_cos8_core.S b/sysdeps/x86_64/fpu/svml_d_cos8_core.S
index e5d986d11a..830776b5d2 100644
--- a/sysdeps/x86_64/fpu/svml_d_cos8_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_cos8_core.S
@@ -1,5 +1,5 @@
 /* Function cos vectorized with AVX-512, wrapper to AVX2.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_exp2_core.S b/sysdeps/x86_64/fpu/svml_d_exp2_core.S
index 9e511037a1..e19ddb7f3b 100644
--- a/sysdeps/x86_64/fpu/svml_d_exp2_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_exp2_core.S
@@ -1,5 +1,5 @@
 /* Function exp vectorized with SSE2.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -21,7 +21,7 @@
 
 	.text
 ENTRY (_ZGVbN2v_exp)
-WRAPPER_IMPL_SSE2 exp
+WRAPPER_IMPL_SSE2 __exp_finite
 END (_ZGVbN2v_exp)
 
 #ifndef USE_MULTIARCH
diff --git a/sysdeps/x86_64/fpu/svml_d_exp4_core.S b/sysdeps/x86_64/fpu/svml_d_exp4_core.S
index 8cac8adbc7..341fea8f30 100644
--- a/sysdeps/x86_64/fpu/svml_d_exp4_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_exp4_core.S
@@ -1,5 +1,5 @@
 /* Function exp vectorized with AVX2, wrapper version.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_exp4_core_avx.S b/sysdeps/x86_64/fpu/svml_d_exp4_core_avx.S
index 1a0fbf574a..39e6fcf228 100644
--- a/sysdeps/x86_64/fpu/svml_d_exp4_core_avx.S
+++ b/sysdeps/x86_64/fpu/svml_d_exp4_core_avx.S
@@ -1,5 +1,5 @@
 /* Function exp vectorized in AVX ISA as wrapper to SSE4 ISA version.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_exp8_core.S b/sysdeps/x86_64/fpu/svml_d_exp8_core.S
index 2486e888a4..94edc01fcb 100644
--- a/sysdeps/x86_64/fpu/svml_d_exp8_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_exp8_core.S
@@ -1,5 +1,5 @@
 /* Function exp vectorized with AVX-512. Wrapper to AVX2 version.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_exp_data.S b/sysdeps/x86_64/fpu/svml_d_exp_data.S
index 6d1acbdd21..5e229c9bcc 100644
--- a/sysdeps/x86_64/fpu/svml_d_exp_data.S
+++ b/sysdeps/x86_64/fpu/svml_d_exp_data.S
@@ -1,5 +1,5 @@
 /* Data for vector function exp.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_exp_data.h b/sysdeps/x86_64/fpu/svml_d_exp_data.h
index f993403d47..a3721ce137 100644
--- a/sysdeps/x86_64/fpu/svml_d_exp_data.h
+++ b/sysdeps/x86_64/fpu/svml_d_exp_data.h
@@ -1,5 +1,5 @@
 /* Offsets for data table for function exp.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_log2_core.S b/sysdeps/x86_64/fpu/svml_d_log2_core.S
index 8ea40fee56..41522f2069 100644
--- a/sysdeps/x86_64/fpu/svml_d_log2_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_log2_core.S
@@ -1,5 +1,5 @@
 /* Function log vectorized with SSE2.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -21,7 +21,7 @@
 
 	.text
 ENTRY (_ZGVbN2v_log)
-WRAPPER_IMPL_SSE2 log
+WRAPPER_IMPL_SSE2 __log_finite
 END (_ZGVbN2v_log)
 
 #ifndef USE_MULTIARCH
diff --git a/sysdeps/x86_64/fpu/svml_d_log4_core.S b/sysdeps/x86_64/fpu/svml_d_log4_core.S
index 72813d8921..5857b45aa0 100644
--- a/sysdeps/x86_64/fpu/svml_d_log4_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_log4_core.S
@@ -1,5 +1,5 @@
 /* Function log vectorized with AVX2, wrapper version.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_log4_core_avx.S b/sysdeps/x86_64/fpu/svml_d_log4_core_avx.S
index 6ca1139931..bab3ba9877 100644
--- a/sysdeps/x86_64/fpu/svml_d_log4_core_avx.S
+++ b/sysdeps/x86_64/fpu/svml_d_log4_core_avx.S
@@ -1,5 +1,5 @@
 /* Function log vectorized in AVX ISA as wrapper to SSE4 ISA version.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_log8_core.S b/sysdeps/x86_64/fpu/svml_d_log8_core.S
index 6850fd9a44..bb3523ee0d 100644
--- a/sysdeps/x86_64/fpu/svml_d_log8_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_log8_core.S
@@ -1,5 +1,5 @@
 /* Function log vectorized with AVX-512. Wrapper to AVX2 version.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_log_data.S b/sysdeps/x86_64/fpu/svml_d_log_data.S
index 9ab541b23f..0514551ccf 100644
--- a/sysdeps/x86_64/fpu/svml_d_log_data.S
+++ b/sysdeps/x86_64/fpu/svml_d_log_data.S
@@ -1,5 +1,5 @@
 /* Data for function log.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_log_data.h b/sysdeps/x86_64/fpu/svml_d_log_data.h
index 30c2b54a4b..a317c7b845 100644
--- a/sysdeps/x86_64/fpu/svml_d_log_data.h
+++ b/sysdeps/x86_64/fpu/svml_d_log_data.h
@@ -1,5 +1,5 @@
 /* Offsets for data table for function log.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_pow2_core.S b/sysdeps/x86_64/fpu/svml_d_pow2_core.S
index b25515c825..b2451b2ed5 100644
--- a/sysdeps/x86_64/fpu/svml_d_pow2_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_pow2_core.S
@@ -1,5 +1,5 @@
 /* Function pow vectorized with SSE2.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -21,7 +21,7 @@
 
 	.text
 ENTRY (_ZGVbN2vv_pow)
-WRAPPER_IMPL_SSE2_ff pow
+WRAPPER_IMPL_SSE2_ff __pow_finite
 END (_ZGVbN2vv_pow)
 
 #ifndef USE_MULTIARCH
diff --git a/sysdeps/x86_64/fpu/svml_d_pow4_core.S b/sysdeps/x86_64/fpu/svml_d_pow4_core.S
index 547993799e..1520ba1d45 100644
--- a/sysdeps/x86_64/fpu/svml_d_pow4_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_pow4_core.S
@@ -1,5 +1,5 @@
 /* Function pow vectorized with AVX2, wrapper version.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_pow4_core_avx.S b/sysdeps/x86_64/fpu/svml_d_pow4_core_avx.S
index 4e4e9867b4..d4b265c91a 100644
--- a/sysdeps/x86_64/fpu/svml_d_pow4_core_avx.S
+++ b/sysdeps/x86_64/fpu/svml_d_pow4_core_avx.S
@@ -1,5 +1,5 @@
 /* Function pow vectorized in AVX ISA as wrapper to SSE4 ISA version.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_pow8_core.S b/sysdeps/x86_64/fpu/svml_d_pow8_core.S
index 372e5a9c83..15292ccebd 100644
--- a/sysdeps/x86_64/fpu/svml_d_pow8_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_pow8_core.S
@@ -1,5 +1,5 @@
 /* Function pow vectorized with AVX-512. Wrapper to AVX2 version.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_pow_data.S b/sysdeps/x86_64/fpu/svml_d_pow_data.S
index 8481f95455..9e5f99c25e 100644
--- a/sysdeps/x86_64/fpu/svml_d_pow_data.S
+++ b/sysdeps/x86_64/fpu/svml_d_pow_data.S
@@ -1,5 +1,5 @@
 /* Data for function pow.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_pow_data.h b/sysdeps/x86_64/fpu/svml_d_pow_data.h
index 239ba96984..55b573b2a7 100644
--- a/sysdeps/x86_64/fpu/svml_d_pow_data.h
+++ b/sysdeps/x86_64/fpu/svml_d_pow_data.h
@@ -1,5 +1,5 @@
 /* Offsets for data table for function pow.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_sin2_core.S b/sysdeps/x86_64/fpu/svml_d_sin2_core.S
index f6ec13104b..6485e0819f 100644
--- a/sysdeps/x86_64/fpu/svml_d_sin2_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_sin2_core.S
@@ -1,5 +1,5 @@
 /* Function sin vectorized with SSE2.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_sin4_core.S b/sysdeps/x86_64/fpu/svml_d_sin4_core.S
index 95a1dec6f6..7c7c426451 100644
--- a/sysdeps/x86_64/fpu/svml_d_sin4_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_sin4_core.S
@@ -1,5 +1,5 @@
 /* Function sin vectorized with AVX2, wrapper version.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_sin4_core_avx.S b/sysdeps/x86_64/fpu/svml_d_sin4_core_avx.S
index 29d1526a12..a8200dfc58 100644
--- a/sysdeps/x86_64/fpu/svml_d_sin4_core_avx.S
+++ b/sysdeps/x86_64/fpu/svml_d_sin4_core_avx.S
@@ -1,5 +1,5 @@
 /* Function sin vectorized in AVX ISA as wrapper to SSE4 ISA version.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_sin8_core.S b/sysdeps/x86_64/fpu/svml_d_sin8_core.S
index abd86b3d98..7f07a41ba1 100644
--- a/sysdeps/x86_64/fpu/svml_d_sin8_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_sin8_core.S
@@ -1,5 +1,5 @@
 /* Function sin vectorized with AVX-512, wrapper to AVX2 version.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_sincos2_core.S b/sysdeps/x86_64/fpu/svml_d_sincos2_core.S
index 74afa0a677..ebf9e25aca 100644
--- a/sysdeps/x86_64/fpu/svml_d_sincos2_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_sincos2_core.S
@@ -1,5 +1,5 @@
 /* Function sincos vectorized with SSE2.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -20,8 +20,89 @@
 #include "svml_d_wrapper_impl.h"
 
 	.text
-ENTRY (_ZGVbN2vvv_sincos)
+ENTRY (_ZGVbN2vl8l8_sincos)
 WRAPPER_IMPL_SSE2_fFF sincos
+END (_ZGVbN2vl8l8_sincos)
+libmvec_hidden_def (_ZGVbN2vl8l8_sincos)
+
+/* SSE2 ISA version as wrapper to scalar (for vector
+   function declared with #pragma omp declare simd notinbranch).  */
+.macro WRAPPER_IMPL_SSE2_fFF_vvv callee
+#ifndef __ILP32__
+        subq      $88, %rsp
+        cfi_adjust_cfa_offset(88)
+        movaps    %xmm0, 64(%rsp)
+        lea       (%rsp), %rdi
+        movdqa    %xmm1, 32(%rdi)
+        lea       16(%rsp), %rsi
+        movdqa    %xmm2, 32(%rsi)
+        call      JUMPTARGET(\callee)
+        movsd     72(%rsp), %xmm0
+        lea       8(%rsp), %rdi
+        lea       24(%rsp), %rsi
+        call      JUMPTARGET(\callee)
+        movq      32(%rsp), %rdx
+        movq      48(%rsp), %rsi
+        movq      40(%rsp), %r8
+        movq      56(%rsp), %r10
+        movq      (%rsp), %rax
+        movq      16(%rsp), %rcx
+        movq      8(%rsp), %rdi
+        movq      24(%rsp), %r9
+        movq      %rax, (%rdx)
+        movq      %rcx, (%rsi)
+        movq      %rdi, (%r8)
+        movq      %r9, (%r10)
+        addq      $88, %rsp
+        cfi_adjust_cfa_offset(-88)
+        ret
+#else
+        pushq   %rbp
+        .cfi_def_cfa_offset 16
+        .cfi_offset 6, -16
+        pushq   %rbx
+        .cfi_def_cfa_offset 24
+        .cfi_offset 3, -24
+        subl    $88, %esp
+        .cfi_def_cfa_offset 112
+        leal    64(%rsp), %esi
+        movaps  %xmm1, 32(%esp)
+        leal    48(%rsp), %edi
+        movaps  %xmm2, 16(%esp)
+        movq    %rsi, %rbp
+        movq    %rdi, %rbx
+        movaps  %xmm0, (%esp)
+        call    JUMPTARGET(\callee)
+        movupd  8(%esp), %xmm0
+        leal    8(%rbp), %esi
+        leal    8(%rbx), %edi
+        call    JUMPTARGET(\callee)
+        movdqa  32(%esp), %xmm1
+        movsd   48(%esp), %xmm0
+        movq    %xmm1, %rax
+        movdqa  16(%esp), %xmm2
+        movsd   %xmm0, (%eax)
+        movsd   56(%esp), %xmm0
+        pextrd  $1, %xmm1, %eax
+        movsd   %xmm0, (%eax)
+        movsd   64(%esp), %xmm0
+        movq    %xmm2, %rax
+        movsd   %xmm0, (%eax)
+        movsd   72(%esp), %xmm0
+        pextrd  $1, %xmm2, %eax
+        movsd   %xmm0, (%eax)
+        addl    $88, %esp
+        .cfi_def_cfa_offset 24
+        popq    %rbx
+        .cfi_def_cfa_offset 16
+        popq    %rbp
+        .cfi_def_cfa_offset 8
+        ret
+#endif
+.endm
+
+ENTRY (_ZGVbN2vvv_sincos)
+WRAPPER_IMPL_SSE2_fFF_vvv sincos
 END (_ZGVbN2vvv_sincos)
 
 #ifndef USE_MULTIARCH
diff --git a/sysdeps/x86_64/fpu/svml_d_sincos4_core.S b/sysdeps/x86_64/fpu/svml_d_sincos4_core.S
index 2c0b011fb3..626a2b3a7b 100644
--- a/sysdeps/x86_64/fpu/svml_d_sincos4_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_sincos4_core.S
@@ -1,5 +1,5 @@
 /* Function sincos vectorized with AVX2, wrapper version.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -20,8 +20,131 @@
 #include "svml_d_wrapper_impl.h"
 
 	.text
+ENTRY (_ZGVdN4vl8l8_sincos)
+WRAPPER_IMPL_AVX_fFF _ZGVbN2vl8l8_sincos
+END (_ZGVdN4vl8l8_sincos)
+libmvec_hidden_def (_ZGVdN4vl8l8_sincos)
+
+/* AVX2 ISA version as wrapper to SSE ISA version (for vector
+   function declared with #pragma omp declare simd notinbranch).  */
+.macro WRAPPER_IMPL_AVX2_fFF_vvv callee
+#ifndef __ILP32__
+        pushq     %rbp
+        cfi_adjust_cfa_offset (8)
+        cfi_rel_offset (%rbp, 0)
+        movq      %rsp, %rbp
+        cfi_def_cfa_register (%rbp)
+        andq      $-32, %rsp
+        subq      $160, %rsp
+        vmovupd   %ymm0, 128(%rsp)
+        lea       (%rsp), %rdi
+        vmovdqu   %ymm1, 64(%rdi)
+        vmovdqu   %ymm2, 96(%rdi)
+        lea       32(%rsp), %rsi
+        vzeroupper
+        call      HIDDEN_JUMPTARGET(\callee)
+        vmovupd   144(%rsp), %xmm0
+        lea       16(%rsp), %rdi
+        lea       48(%rsp), %rsi
+        call      HIDDEN_JUMPTARGET(\callee)
+        movq      64(%rsp), %rdx
+        movq      96(%rsp), %rsi
+        movq      72(%rsp), %r8
+        movq      104(%rsp), %r10
+        movq      (%rsp), %rax
+        movq      32(%rsp), %rcx
+        movq      8(%rsp), %rdi
+        movq      40(%rsp), %r9
+        movq      %rax, (%rdx)
+        movq      %rcx, (%rsi)
+        movq      80(%rsp), %rax
+        movq      112(%rsp), %rcx
+        movq      %rdi, (%r8)
+        movq      %r9, (%r10)
+        movq      88(%rsp), %rdi
+        movq      120(%rsp), %r9
+        movq      16(%rsp), %r11
+        movq      48(%rsp), %rdx
+        movq      24(%rsp), %rsi
+        movq      56(%rsp), %r8
+        movq      %r11, (%rax)
+        movq      %rdx, (%rcx)
+        movq      %rsi, (%rdi)
+        movq      %r8, (%r9)
+        movq      %rbp, %rsp
+        cfi_def_cfa_register (%rsp)
+        popq      %rbp
+        cfi_adjust_cfa_offset (-8)
+        cfi_restore (%rbp)
+        ret
+#else
+        leal    8(%rsp), %r10d
+        .cfi_def_cfa 10, 0
+        andl    $-32, %esp
+        pushq   -8(%r10d)
+        pushq   %rbp
+        .cfi_escape 0x10,0x6,0x2,0x76,0
+        movl    %esp, %ebp
+        pushq   %r12
+        leal    -80(%rbp), %esi
+        pushq   %r10
+        .cfi_escape 0xf,0x3,0x76,0x70,0x6
+        .cfi_escape 0x10,0xc,0x2,0x76,0x78
+        leal    -112(%rbp), %edi
+        movq    %rsi, %r12
+        pushq   %rbx
+        .cfi_escape 0x10,0x3,0x2,0x76,0x68
+        movq    %rdi, %rbx
+        subl    $152, %esp
+        vmovaps %xmm1, -128(%ebp)
+        vmovaps %xmm2, -144(%ebp)
+        vmovapd %ymm0, -176(%ebp)
+        vzeroupper
+        call    HIDDEN_JUMPTARGET(\callee)
+        leal    16(%r12), %esi
+        vmovapd -160(%ebp), %xmm0
+        leal    16(%rbx), %edi
+        call    HIDDEN_JUMPTARGET(\callee)
+        movq    -128(%ebp), %rax
+        vmovsd  -112(%ebp), %xmm0
+        vmovdqa -128(%ebp), %xmm5
+        vmovdqa -144(%ebp), %xmm1
+        vmovsd  %xmm0, (%eax)
+        vmovsd  -104(%ebp), %xmm0
+        vpextrd $1, %xmm5, %eax
+        vmovsd  %xmm0, (%eax)
+        movq    -120(%ebp), %rax
+        vmovsd  -96(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        vmovsd  -88(%ebp), %xmm0
+        vpextrd $3, %xmm5, %eax
+        vmovsd  %xmm0, (%eax)
+        movq    -144(%ebp), %rax
+        vmovsd  -80(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        vmovsd  -72(%ebp), %xmm0
+        vpextrd $1, %xmm1, %eax
+        vmovsd  %xmm0, (%eax)
+        movq    -136(%ebp), %rax
+        vmovsd  -64(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        vmovsd  -56(%ebp), %xmm0
+        vpextrd $3, %xmm1, %eax
+        vmovsd  %xmm0, (%eax)
+        addl    $152, %esp
+        popq    %rbx
+        popq    %r10
+        .cfi_def_cfa 10, 0
+        popq    %r12
+        popq    %rbp
+        leal    -8(%r10), %esp
+        .cfi_def_cfa 7, 8
+        ret
+#endif
+.endm
+
 ENTRY (_ZGVdN4vvv_sincos)
-WRAPPER_IMPL_AVX_fFF _ZGVbN2vvv_sincos
+WRAPPER_IMPL_AVX2_fFF_vvv _ZGVbN2vl8l8_sincos
 END (_ZGVdN4vvv_sincos)
 
 #ifndef USE_MULTIARCH
diff --git a/sysdeps/x86_64/fpu/svml_d_sincos4_core_avx.S b/sysdeps/x86_64/fpu/svml_d_sincos4_core_avx.S
index e4320a97c7..4a5d4f637a 100644
--- a/sysdeps/x86_64/fpu/svml_d_sincos4_core_avx.S
+++ b/sysdeps/x86_64/fpu/svml_d_sincos4_core_avx.S
@@ -1,5 +1,5 @@
 /* Function sincos vectorized in AVX ISA as wrapper to SSE4 ISA version.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -20,6 +20,124 @@
 #include "svml_d_wrapper_impl.h"
 
 	.text
+ENTRY (_ZGVcN4vl8l8_sincos)
+WRAPPER_IMPL_AVX_fFF _ZGVbN2vl8l8_sincos
+END (_ZGVcN4vl8l8_sincos)
+
+/* AVX ISA version as wrapper to SSE ISA version (for vector
+   function declared with #pragma omp declare simd notinbranch).  */
+.macro WRAPPER_IMPL_AVX_fFF_vvv callee
+#ifndef __ILP32__
+        pushq     %rbp
+        movq      %rsp, %rbp
+        andq      $-32, %rsp
+        subq      $160, %rsp
+        vmovupd   %ymm0, 64(%rsp)
+        lea       (%rsp), %rdi
+        vmovdqu   %xmm1, 96(%rdi)
+        vmovdqu   %xmm2, 112(%rdi)
+        vmovdqu   %xmm3, 128(%rdi)
+        vmovdqu   %xmm4, 144(%rdi)
+        lea       32(%rsp), %rsi
+	vzeroupper
+        call      HIDDEN_JUMPTARGET(\callee)
+        vmovdqu   80(%rsp), %xmm0
+        lea       16(%rsp), %rdi
+        lea       48(%rsp), %rsi
+        call      HIDDEN_JUMPTARGET(\callee)
+        movq      96(%rsp), %rdx
+        movq      104(%rsp), %rsi
+        movq      112(%rsp), %r8
+        movq      120(%rsp), %r10
+        movq      (%rsp), %rax
+        movq      8(%rsp), %rcx
+        movq      16(%rsp), %rdi
+        movq      24(%rsp), %r9
+        movq      %rax, (%rdx)
+        movq      %rcx, (%rsi)
+        movq      128(%rsp), %rax
+        movq      136(%rsp), %rcx
+        movq      %rdi, (%r8)
+        movq      %r9, (%r10)
+        movq      144(%rsp), %rdi
+        movq      152(%rsp), %r9
+        movq      32(%rsp), %r11
+        movq      40(%rsp), %rdx
+        movq      48(%rsp), %rsi
+        movq      56(%rsp), %r8
+        movq      %r11, (%rax)
+        movq      %rdx, (%rcx)
+        movq      %rsi, (%rdi)
+        movq      %r8, (%r9)
+        movq      %rbp, %rsp
+        popq      %rbp
+        ret
+#else
+        leal    8(%rsp), %r10d
+        .cfi_def_cfa 10, 0
+        andl    $-32, %esp
+        pushq   -8(%r10d)
+        pushq   %rbp
+        .cfi_escape 0x10,0x6,0x2,0x76,0
+        movl    %esp, %ebp
+        pushq   %r12
+        leal    -80(%rbp), %esi
+        pushq   %r10
+        .cfi_escape 0xf,0x3,0x76,0x70,0x6
+        .cfi_escape 0x10,0xc,0x2,0x76,0x78
+        leal    -112(%rbp), %edi
+        movq    %rsi, %r12
+        pushq   %rbx
+        .cfi_escape 0x10,0x3,0x2,0x76,0x68
+        movq    %rdi, %rbx
+        subl    $152, %esp
+        vmovaps %xmm1, -128(%ebp)
+        vmovaps %xmm2, -144(%ebp)
+        vmovapd %ymm0, -176(%ebp)
+        vzeroupper
+        call    HIDDEN_JUMPTARGET(\callee)
+        leal    16(%r12), %esi
+        vmovupd -160(%ebp), %xmm0
+        leal    16(%rbx), %edi
+        call    HIDDEN_JUMPTARGET(\callee)
+        movq    -128(%ebp), %rax
+        vmovsd  -112(%ebp), %xmm0
+        vmovdqa -128(%ebp), %xmm5
+        vmovdqa -144(%ebp), %xmm1
+        vmovsd  %xmm0, (%eax)
+        vmovsd  -104(%ebp), %xmm0
+        vpextrd $1, %xmm5, %eax
+        vmovsd  %xmm0, (%eax)
+        movq    -120(%ebp), %rax
+        vmovsd  -96(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        vmovsd  -88(%ebp), %xmm0
+        vpextrd $3, %xmm5, %eax
+        vmovsd  %xmm0, (%eax)
+        movq    -144(%ebp), %rax
+        vmovsd  -80(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        vmovsd  -72(%ebp), %xmm0
+        vpextrd $1, %xmm1, %eax
+        vmovsd  %xmm0, (%eax)
+        movq    -136(%ebp), %rax
+        vmovsd  -64(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        vmovsd  -56(%ebp), %xmm0
+        vpextrd $3, %xmm1, %eax
+        vmovsd  %xmm0, (%eax)
+        addl    $152, %esp
+        popq    %rbx
+        popq    %r10
+        .cfi_def_cfa 10, 0
+        popq    %r12
+        popq    %rbp
+        leal    -8(%r10), %esp
+        .cfi_def_cfa 7, 8
+        ret
+#endif
+.endm
+
 ENTRY (_ZGVcN4vvv_sincos)
-WRAPPER_IMPL_AVX_fFF _ZGVbN2vvv_sincos
+WRAPPER_IMPL_AVX_fFF_vvv _ZGVbN2vl8l8_sincos
 END (_ZGVcN4vvv_sincos)
diff --git a/sysdeps/x86_64/fpu/svml_d_sincos8_core.S b/sysdeps/x86_64/fpu/svml_d_sincos8_core.S
index 68d490e5bc..7cf453872b 100644
--- a/sysdeps/x86_64/fpu/svml_d_sincos8_core.S
+++ b/sysdeps/x86_64/fpu/svml_d_sincos8_core.S
@@ -1,5 +1,5 @@
 /* Function sincos vectorized with AVX-512. Wrapper to AVX2 version.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -20,6 +20,172 @@
 #include "svml_d_wrapper_impl.h"
 
 	.text
+ENTRY (_ZGVeN8vl8l8_sincos)
+WRAPPER_IMPL_AVX512_fFF _ZGVdN4vl8l8_sincos
+END (_ZGVeN8vl8l8_sincos)
+
+/* AVX512 ISA version as wrapper to AVX2 ISA version (for vector
+   function declared with #pragma omp declare simd notinbranch).  */
+.macro WRAPPER_IMPL_AVX512_fFF_vvv callee
+#ifndef __ILP32__
+        pushq     %rbp
+        cfi_adjust_cfa_offset (8)
+        cfi_rel_offset (%rbp, 0)
+        movq      %rsp, %rbp
+        cfi_def_cfa_register (%rbp)
+        andq      $-64, %rsp
+        subq      $320, %rsp
+        vmovups    %zmm0, 256(%rsp)
+        lea       (%rsp), %rdi
+        vmovups   %zmm1, 128(%rdi)
+        vmovups   %zmm2, 192(%rdi)
+        lea       64(%rsp), %rsi
+        call      HIDDEN_JUMPTARGET(\callee)
+        vmovdqu   288(%rsp), %ymm0
+        lea       32(%rsp), %rdi
+        lea       96(%rsp), %rsi
+        call      HIDDEN_JUMPTARGET(\callee)
+        movq      128(%rsp), %rdx
+        movq      192(%rsp), %rsi
+        movq      136(%rsp), %r8
+        movq      200(%rsp), %r10
+        movq      (%rsp), %rax
+        movq      64(%rsp), %rcx
+        movq      8(%rsp), %rdi
+        movq      72(%rsp), %r9
+        movq      %rax, (%rdx)
+        movq      %rcx, (%rsi)
+        movq      144(%rsp), %rax
+        movq      208(%rsp), %rcx
+        movq      %rdi, (%r8)
+        movq      %r9, (%r10)
+        movq      152(%rsp), %rdi
+        movq      216(%rsp), %r9
+        movq      16(%rsp), %r11
+        movq      80(%rsp), %rdx
+        movq      24(%rsp), %rsi
+        movq      88(%rsp), %r8
+        movq      %r11, (%rax)
+        movq      %rdx, (%rcx)
+        movq      160(%rsp), %r11
+        movq      224(%rsp), %rdx
+        movq      %rsi, (%rdi)
+        movq      %r8, (%r9)
+        movq      168(%rsp), %rsi
+        movq      232(%rsp), %r8
+        movq      32(%rsp), %r10
+        movq      96(%rsp), %rax
+        movq      40(%rsp), %rcx
+        movq      104(%rsp), %rdi
+        movq      %r10, (%r11)
+        movq      %rax, (%rdx)
+        movq      176(%rsp), %r10
+        movq      240(%rsp), %rax
+        movq      %rcx, (%rsi)
+        movq      %rdi, (%r8)
+        movq      184(%rsp), %rcx
+        movq      248(%rsp), %rdi
+        movq      48(%rsp), %r9
+        movq      112(%rsp), %r11
+        movq      56(%rsp), %rdx
+        movq      120(%rsp), %rsi
+        movq      %r9, (%r10)
+        movq      %r11, (%rax)
+        movq      %rdx, (%rcx)
+        movq      %rsi, (%rdi)
+        movq      %rbp, %rsp
+        cfi_def_cfa_register (%rsp)
+        popq      %rbp
+        cfi_adjust_cfa_offset (-8)
+        cfi_restore (%rbp)
+        ret
+#else
+        leal    8(%rsp), %r10d
+        .cfi_def_cfa 10, 0
+        andl    $-64, %esp
+        pushq   -8(%r10d)
+        pushq   %rbp
+        .cfi_escape 0x10,0x6,0x2,0x76,0
+        movl    %esp, %ebp
+        pushq   %r12
+        leal    -112(%rbp), %esi
+        pushq   %r10
+        .cfi_escape 0xf,0x3,0x76,0x70,0x6
+        .cfi_escape 0x10,0xc,0x2,0x76,0x78
+        leal    -176(%rbp), %edi
+        movq    %rsi, %r12
+        pushq   %rbx
+        .cfi_escape 0x10,0x3,0x2,0x76,0x68
+        movq    %rdi, %rbx
+        subl    $280, %esp
+        vmovdqa %ymm1, -208(%ebp)
+        vmovdqa %ymm2, -240(%ebp)
+        vmovapd %zmm0, -304(%ebp)
+        call    HIDDEN_JUMPTARGET(\callee)
+        leal    32(%r12), %esi
+        vmovupd -272(%ebp), %ymm0
+        leal    32(%rbx), %edi
+        call    HIDDEN_JUMPTARGET(\callee)
+        movl    -208(%ebp), %eax
+        vmovsd  -176(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        movl    -204(%ebp), %eax
+        vmovsd  -168(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        movl    -200(%ebp), %eax
+        vmovsd  -160(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        movl    -196(%ebp), %eax
+        vmovsd  -152(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        movl    -192(%ebp), %eax
+        vmovsd  -144(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        movl    -188(%ebp), %eax
+        vmovsd  -136(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        movl    -184(%ebp), %eax
+        vmovsd  -128(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        movl    -180(%ebp), %eax
+        vmovsd  -120(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        movl    -240(%ebp), %eax
+        vmovsd  -112(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        movl    -236(%ebp), %eax
+        vmovsd  -104(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        movl    -232(%ebp), %eax
+        vmovsd  -96(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        movl    -228(%ebp), %eax
+        vmovsd  -88(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        movl    -224(%ebp), %eax
+        vmovsd  -80(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        movl    -220(%ebp), %eax
+        vmovsd  -72(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        movl    -216(%ebp), %eax
+        vmovsd  -64(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        movl    -212(%ebp), %eax
+        vmovsd  -56(%ebp), %xmm0
+        vmovsd  %xmm0, (%eax)
+        addl    $280, %esp
+        popq    %rbx
+        popq    %r10
+        .cfi_def_cfa 10, 0
+        popq    %r12
+        popq    %rbp
+        leal    -8(%r10), %esp
+        .cfi_def_cfa 7, 8
+        ret
+#endif
+.endm
+
 ENTRY (_ZGVeN8vvv_sincos)
-WRAPPER_IMPL_AVX512_fFF _ZGVdN4vvv_sincos
+WRAPPER_IMPL_AVX512_fFF_vvv _ZGVdN4vl8l8_sincos
 END (_ZGVeN8vvv_sincos)
diff --git a/sysdeps/x86_64/fpu/svml_d_trig_data.S b/sysdeps/x86_64/fpu/svml_d_trig_data.S
index 887dacee91..2b148325fc 100644
--- a/sysdeps/x86_64/fpu/svml_d_trig_data.S
+++ b/sysdeps/x86_64/fpu/svml_d_trig_data.S
@@ -1,5 +1,5 @@
 /* Data for vectorized sin, cos, sincos.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_trig_data.h b/sysdeps/x86_64/fpu/svml_d_trig_data.h
index 4617b5e0c3..b9bb5dc6af 100644
--- a/sysdeps/x86_64/fpu/svml_d_trig_data.h
+++ b/sysdeps/x86_64/fpu/svml_d_trig_data.h
@@ -1,5 +1,5 @@
 /* Offsets for data table for vectorized sin, cos, sincos.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_d_wrapper_impl.h b/sysdeps/x86_64/fpu/svml_d_wrapper_impl.h
index 54f4f58371..d8452e0c2b 100644
--- a/sysdeps/x86_64/fpu/svml_d_wrapper_impl.h
+++ b/sysdeps/x86_64/fpu/svml_d_wrapper_impl.h
@@ -1,5 +1,5 @@
 /* Wrapper implementations of vector math functions.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -21,10 +21,10 @@
         subq      $40, %rsp
         cfi_adjust_cfa_offset(40)
         movaps    %xmm0, (%rsp)
-        call      \callee@PLT
+        call      JUMPTARGET(\callee)
         movsd     %xmm0, 16(%rsp)
         movsd     8(%rsp), %xmm0
-        call      \callee@PLT
+        call      JUMPTARGET(\callee)
         movsd     16(%rsp), %xmm1
         movsd     %xmm0, 24(%rsp)
         unpcklpd  %xmm0, %xmm1
@@ -40,11 +40,11 @@
         cfi_adjust_cfa_offset(56)
         movaps    %xmm0, (%rsp)
         movaps    %xmm1, 16(%rsp)
-        call      \callee@PLT
+        call      JUMPTARGET(\callee)
         movsd     %xmm0, 32(%rsp)
         movsd     8(%rsp), %xmm0
         movsd     24(%rsp), %xmm1
-        call      \callee@PLT
+        call      JUMPTARGET(\callee)
         movsd     32(%rsp), %xmm1
         movsd     %xmm0, 40(%rsp)
         unpcklpd  %xmm0, %xmm1
@@ -69,7 +69,7 @@
         leaq    16(%rsp), %rsi
         leaq    24(%rsp), %rdi
         movaps  %xmm0, (%rsp)
-        call    \callee@PLT
+        call    JUMPTARGET(\callee)
         leaq    16(%rsp), %rsi
         leaq    24(%rsp), %rdi
         movsd   24(%rsp), %xmm0
@@ -79,7 +79,7 @@
         movsd   16(%rsp), %xmm0
         movsd   %xmm0, (%rbx)
         movapd  %xmm1, %xmm0
-        call    \callee@PLT
+        call    JUMPTARGET(\callee)
         movsd   24(%rsp), %xmm0
         movsd   %xmm0, 8(%rbp)
         movsd   16(%rsp), %xmm0
@@ -201,29 +201,14 @@
         cfi_def_cfa_register (%rbp)
         andq      $-64, %rsp
         subq      $128, %rsp
-/* Below is encoding for vmovups %zmm0, (%rsp).  */
-        .byte   0x62
-        .byte   0xf1
-        .byte   0x7c
-        .byte   0x48
-        .byte   0x11
-        .byte   0x04
-        .byte   0x24
+        vmovups   %zmm0, (%rsp)
         vmovupd   (%rsp), %ymm0
         call      HIDDEN_JUMPTARGET(\callee)
         vmovupd   %ymm0, 64(%rsp)
         vmovupd   32(%rsp), %ymm0
         call      HIDDEN_JUMPTARGET(\callee)
         vmovupd   %ymm0, 96(%rsp)
-/* Below is encoding for vmovups 64(%rsp), %zmm0.  */
-        .byte   0x62
-        .byte   0xf1
-        .byte   0x7c
-        .byte   0x48
-        .byte   0x10
-        .byte   0x44
-        .byte   0x24
-        .byte   0x01
+        vmovups   64(%rsp), %zmm0
         movq      %rbp, %rsp
         cfi_def_cfa_register (%rsp)
         popq      %rbp
@@ -241,23 +226,8 @@
         cfi_def_cfa_register (%rbp)
         andq      $-64, %rsp
         subq      $192, %rsp
-/* Below is encoding for vmovups %zmm0, (%rsp).  */
-        .byte   0x62
-        .byte   0xf1
-        .byte   0x7c
-        .byte   0x48
-        .byte   0x11
-        .byte   0x04
-        .byte   0x24
-/* Below is encoding for vmovups %zmm1, 64(%rsp).  */
-        .byte   0x62
-        .byte   0xf1
-        .byte   0x7c
-        .byte   0x48
-        .byte   0x11
-        .byte   0x4c
-        .byte   0x24
-        .byte   0x01
+        vmovups   %zmm0, (%rsp)
+        vmovups   %zmm1, 64(%rsp)
         vmovupd   (%rsp), %ymm0
         vmovupd   64(%rsp), %ymm1
         call      HIDDEN_JUMPTARGET(\callee)
@@ -266,15 +236,7 @@
         vmovupd   96(%rsp), %ymm1
         call      HIDDEN_JUMPTARGET(\callee)
         vmovupd   %ymm0, 160(%rsp)
-/* Below is encoding for vmovups 128(%rsp), %zmm0.  */
-        .byte   0x62
-        .byte   0xf1
-        .byte   0x7c
-        .byte   0x48
-        .byte   0x10
-        .byte   0x44
-        .byte   0x24
-        .byte   0x02
+        vmovups   128(%rsp), %zmm0
         movq      %rbp, %rsp
         cfi_def_cfa_register (%rsp)
         popq      %rbp
@@ -299,14 +261,7 @@
         cfi_rel_offset (%r13, 0)
         subq      $176, %rsp
         movq      %rsi, %r13
-/* Below is encoding for vmovups %zmm0, (%rsp).  */
-        .byte	0x62
-        .byte	0xf1
-        .byte	0x7c
-        .byte	0x48
-        .byte	0x11
-        .byte	0x04
-        .byte	0x24
+        vmovups   %zmm0, (%rsp)
         movq    %rdi, %r12
         vmovupd (%rsp), %ymm0
         call      HIDDEN_JUMPTARGET(\callee)
diff --git a/sysdeps/x86_64/fpu/svml_finite_alias.S b/sysdeps/x86_64/fpu/svml_finite_alias.S
index 2dcfc37590..21a9d6d2ee 100644
--- a/sysdeps/x86_64/fpu/svml_finite_alias.S
+++ b/sysdeps/x86_64/fpu/svml_finite_alias.S
@@ -2,7 +2,7 @@
    aliases in libmvec.so while compiler creates the vector names
    based on scalar asm name.  Corresponding discussion is at
    <https://gcc.gnu.org/ml/gcc/2015-06/msg00173.html>.
-   Copyright (C) 2015-2016 Free Software Foundation, Inc.
+   Copyright (C) 2015-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_cosf16_core.S b/sysdeps/x86_64/fpu/svml_s_cosf16_core.S
index 9ca4fbfaa8..d1a4647082 100644
--- a/sysdeps/x86_64/fpu/svml_s_cosf16_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_cosf16_core.S
@@ -1,5 +1,5 @@
 /* Function cosf vectorized with AVX-512. Wrapper to AVX2 version.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_cosf4_core.S b/sysdeps/x86_64/fpu/svml_s_cosf4_core.S
index 363090c54a..d58ccecc09 100644
--- a/sysdeps/x86_64/fpu/svml_s_cosf4_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_cosf4_core.S
@@ -1,5 +1,5 @@
 /* Function cosf vectorized with SSE2, wrapper version.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_cosf8_core.S b/sysdeps/x86_64/fpu/svml_s_cosf8_core.S
index 26a6a4e4d6..f9dc74fc49 100644
--- a/sysdeps/x86_64/fpu/svml_s_cosf8_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_cosf8_core.S
@@ -1,5 +1,5 @@
 /* Function cosf vectorized with AVX2, wrapper version.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_cosf8_core_avx.S b/sysdeps/x86_64/fpu/svml_s_cosf8_core_avx.S
index 6c210d98ce..45f14e23df 100644
--- a/sysdeps/x86_64/fpu/svml_s_cosf8_core_avx.S
+++ b/sysdeps/x86_64/fpu/svml_s_cosf8_core_avx.S
@@ -1,5 +1,5 @@
 /* Function cosf vectorized in AVX ISA as wrapper to SSE4 ISA version.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_expf16_core.S b/sysdeps/x86_64/fpu/svml_s_expf16_core.S
index d8eecac674..4e18b6f544 100644
--- a/sysdeps/x86_64/fpu/svml_s_expf16_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_expf16_core.S
@@ -1,5 +1,5 @@
 /* Function expf vectorized with AVX-512. Wrapper to AVX2 version.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_expf4_core.S b/sysdeps/x86_64/fpu/svml_s_expf4_core.S
index 65b5d1a3ce..a2a6209621 100644
--- a/sysdeps/x86_64/fpu/svml_s_expf4_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_expf4_core.S
@@ -1,5 +1,5 @@
 /* Function expf vectorized with SSE2.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -22,7 +22,7 @@
 
 	.text
 ENTRY (_ZGVbN4v_expf)
-WRAPPER_IMPL_SSE2 expf
+WRAPPER_IMPL_SSE2 __expf_finite
 END (_ZGVbN4v_expf)
 
 #ifndef USE_MULTIARCH
diff --git a/sysdeps/x86_64/fpu/svml_s_expf8_core.S b/sysdeps/x86_64/fpu/svml_s_expf8_core.S
index e3cf975bf6..46297208cd 100644
--- a/sysdeps/x86_64/fpu/svml_s_expf8_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_expf8_core.S
@@ -1,5 +1,5 @@
 /* Function expf vectorized with AVX2, wrapper version.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_expf8_core_avx.S b/sysdeps/x86_64/fpu/svml_s_expf8_core_avx.S
index 90469d7dcf..1210dcf885 100644
--- a/sysdeps/x86_64/fpu/svml_s_expf8_core_avx.S
+++ b/sysdeps/x86_64/fpu/svml_s_expf8_core_avx.S
@@ -1,5 +1,5 @@
 /* Function expf vectorized in AVX ISA as wrapper to SSE4 ISA version.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_expf_data.S b/sysdeps/x86_64/fpu/svml_s_expf_data.S
index 4b644082b6..a1cb6e7591 100644
--- a/sysdeps/x86_64/fpu/svml_s_expf_data.S
+++ b/sysdeps/x86_64/fpu/svml_s_expf_data.S
@@ -1,5 +1,5 @@
 /* Data for function expf.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_expf_data.h b/sysdeps/x86_64/fpu/svml_s_expf_data.h
index 3610633c96..56a1d8bdf6 100644
--- a/sysdeps/x86_64/fpu/svml_s_expf_data.h
+++ b/sysdeps/x86_64/fpu/svml_s_expf_data.h
@@ -1,5 +1,5 @@
 /* Offsets for data table for vector function expf.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_logf16_core.S b/sysdeps/x86_64/fpu/svml_s_logf16_core.S
index cc2e97df78..e1f4b0cf0c 100644
--- a/sysdeps/x86_64/fpu/svml_s_logf16_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_logf16_core.S
@@ -1,5 +1,5 @@
 /* Function logf vectorized with AVX-512. Wrapper to AVX2 version.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_logf4_core.S b/sysdeps/x86_64/fpu/svml_s_logf4_core.S
index 195f328d92..496b93ffa6 100644
--- a/sysdeps/x86_64/fpu/svml_s_logf4_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_logf4_core.S
@@ -1,5 +1,5 @@
 /* Function logf vectorized with SSE2.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -22,7 +22,7 @@
 
 	.text
 ENTRY (_ZGVbN4v_logf)
-WRAPPER_IMPL_SSE2 logf
+WRAPPER_IMPL_SSE2 __logf_finite
 END (_ZGVbN4v_logf)
 
 #ifndef USE_MULTIARCH
diff --git a/sysdeps/x86_64/fpu/svml_s_logf8_core.S b/sysdeps/x86_64/fpu/svml_s_logf8_core.S
index 8bb6926667..f0ccee7205 100644
--- a/sysdeps/x86_64/fpu/svml_s_logf8_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_logf8_core.S
@@ -1,5 +1,5 @@
 /* Function logf vectorized with AVX2, wrapper version.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_logf8_core_avx.S b/sysdeps/x86_64/fpu/svml_s_logf8_core_avx.S
index c2efba23f2..1ddd0381cd 100644
--- a/sysdeps/x86_64/fpu/svml_s_logf8_core_avx.S
+++ b/sysdeps/x86_64/fpu/svml_s_logf8_core_avx.S
@@ -1,5 +1,5 @@
 /* Function logf vectorized in AVX ISA as wrapper to SSE4 ISA version.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_logf_data.S b/sysdeps/x86_64/fpu/svml_s_logf_data.S
index a5675f5c7a..154f98c2e0 100644
--- a/sysdeps/x86_64/fpu/svml_s_logf_data.S
+++ b/sysdeps/x86_64/fpu/svml_s_logf_data.S
@@ -1,5 +1,5 @@
 /* Data for vector function logf.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_logf_data.h b/sysdeps/x86_64/fpu/svml_s_logf_data.h
index 619d5c4bd1..82a9903b10 100644
--- a/sysdeps/x86_64/fpu/svml_s_logf_data.h
+++ b/sysdeps/x86_64/fpu/svml_s_logf_data.h
@@ -1,5 +1,5 @@
 /* Offsets for data table for vectorized function logf.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_powf16_core.S b/sysdeps/x86_64/fpu/svml_s_powf16_core.S
index cb52af0c6b..0859996d0a 100644
--- a/sysdeps/x86_64/fpu/svml_s_powf16_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_powf16_core.S
@@ -1,5 +1,5 @@
 /* Function powf vectorized with AVX-512. Wrapper to AVX2 version.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_powf4_core.S b/sysdeps/x86_64/fpu/svml_s_powf4_core.S
index 88fae60892..4276e6ea28 100644
--- a/sysdeps/x86_64/fpu/svml_s_powf4_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_powf4_core.S
@@ -1,5 +1,5 @@
 /* Function powf vectorized with SSE2.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -21,7 +21,7 @@
 
 	.text
 ENTRY (_ZGVbN4vv_powf)
-WRAPPER_IMPL_SSE2_ff powf
+WRAPPER_IMPL_SSE2_ff __powf_finite
 END (_ZGVbN4vv_powf)
 
 #ifndef USE_MULTIARCH
diff --git a/sysdeps/x86_64/fpu/svml_s_powf8_core.S b/sysdeps/x86_64/fpu/svml_s_powf8_core.S
index 8ea44897c1..764dc99ee7 100644
--- a/sysdeps/x86_64/fpu/svml_s_powf8_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_powf8_core.S
@@ -1,5 +1,5 @@
 /* Function powf vectorized with AVX2, wrapper version.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_powf8_core_avx.S b/sysdeps/x86_64/fpu/svml_s_powf8_core_avx.S
index b5e4e5e6ef..8bb1ef22fd 100644
--- a/sysdeps/x86_64/fpu/svml_s_powf8_core_avx.S
+++ b/sysdeps/x86_64/fpu/svml_s_powf8_core_avx.S
@@ -1,5 +1,5 @@
 /* Function powf vectorized in AVX ISA as wrapper to SSE4 ISA version.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_powf_data.S b/sysdeps/x86_64/fpu/svml_s_powf_data.S
index fc1a3d9390..74a31abd1e 100644
--- a/sysdeps/x86_64/fpu/svml_s_powf_data.S
+++ b/sysdeps/x86_64/fpu/svml_s_powf_data.S
@@ -1,5 +1,5 @@
 /* Data for function powf.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_powf_data.h b/sysdeps/x86_64/fpu/svml_s_powf_data.h
index 514004238a..5d3270cf27 100644
--- a/sysdeps/x86_64/fpu/svml_s_powf_data.h
+++ b/sysdeps/x86_64/fpu/svml_s_powf_data.h
@@ -1,5 +1,5 @@
 /* Offsets for data table for function powf.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_sincosf16_core.S b/sysdeps/x86_64/fpu/svml_s_sincosf16_core.S
index 5cbf10b8da..40eb974a74 100644
--- a/sysdeps/x86_64/fpu/svml_s_sincosf16_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_sincosf16_core.S
@@ -1,5 +1,5 @@
 /* Function sincosf vectorized with AVX-512. Wrapper to AVX2 version.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -20,6 +20,270 @@
 #include "svml_s_wrapper_impl.h"
 
 	.text
+ENTRY (_ZGVeN16vl4l4_sincosf)
+WRAPPER_IMPL_AVX512_fFF _ZGVdN8vl4l4_sincosf
+END (_ZGVeN16vl4l4_sincosf)
+
+/* AVX512 ISA version as wrapper to AVX2 ISA version (for vector
+   function declared with #pragma omp declare simd notinbranch).  */
+.macro WRAPPER_IMPL_AVX512_fFF_vvv callee
+#ifndef __ILP32__
+        pushq     %rbp
+        cfi_adjust_cfa_offset (8)
+        cfi_rel_offset (%rbp, 0)
+        movq      %rsp, %rbp
+        cfi_def_cfa_register (%rbp)
+        andq      $-64, %rsp
+        subq      $448, %rsp
+        vmovups   %zmm0, 384(%rsp)
+        lea       (%rsp), %rdi
+        vmovups   %zmm1, 128(%rdi)
+        vmovups   %zmm2, 192(%rdi)
+        vmovups   %zmm3, 256(%rdi)
+        vmovups   %zmm4, 320(%rdi)
+        lea       64(%rsp), %rsi
+        call      HIDDEN_JUMPTARGET(\callee)
+        vmovdqu   416(%rsp), %ymm0
+        lea       32(%rsp), %rdi
+        lea       96(%rsp), %rsi
+        call      HIDDEN_JUMPTARGET(\callee)
+        movq      128(%rsp), %rdx
+        movq      136(%rsp), %rsi
+        movq      144(%rsp), %r8
+        movq      152(%rsp), %r10
+        movl      (%rsp), %eax
+        movl      4(%rsp), %ecx
+        movl      8(%rsp), %edi
+        movl      12(%rsp), %r9d
+        movl      %eax, (%rdx)
+        movl      %ecx, (%rsi)
+        movq      160(%rsp), %rax
+        movq      168(%rsp), %rcx
+        movl      %edi, (%r8)
+        movl      %r9d, (%r10)
+        movq      176(%rsp), %rdi
+        movq      184(%rsp), %r9
+        movl      16(%rsp), %r11d
+        movl      20(%rsp), %edx
+        movl      24(%rsp), %esi
+        movl      28(%rsp), %r8d
+        movl      %r11d, (%rax)
+        movl      %edx, (%rcx)
+        movq      192(%rsp), %r11
+        movq      200(%rsp), %rdx
+        movl      %esi, (%rdi)
+        movl      %r8d, (%r9)
+        movq      208(%rsp), %rsi
+        movq      216(%rsp), %r8
+        movl      32(%rsp), %r10d
+        movl      36(%rsp), %eax
+        movl      40(%rsp), %ecx
+        movl      44(%rsp), %edi
+        movl      %r10d, (%r11)
+        movl      %eax, (%rdx)
+        movq      224(%rsp), %r10
+        movq      232(%rsp), %rax
+        movl      %ecx, (%rsi)
+        movl      %edi, (%r8)
+        movq      240(%rsp), %rcx
+        movq      248(%rsp), %rdi
+        movl      48(%rsp), %r9d
+        movl      52(%rsp), %r11d
+        movl      56(%rsp), %edx
+        movl      60(%rsp), %esi
+        movl      %r9d, (%r10)
+        movl      %r11d, (%rax)
+        movq      256(%rsp), %r9
+        movq      264(%rsp), %r11
+        movl      %edx, (%rcx)
+        movl      %esi, (%rdi)
+        movq      272(%rsp), %rdx
+        movq      280(%rsp), %rsi
+        movl      64(%rsp), %r8d
+        movl      68(%rsp), %r10d
+        movl      72(%rsp), %eax
+        movl      76(%rsp), %ecx
+        movl      %r8d, (%r9)
+        movl      %r10d, (%r11)
+        movq      288(%rsp), %r8
+        movq      296(%rsp), %r10
+        movl      %eax, (%rdx)
+        movl      %ecx, (%rsi)
+        movq      304(%rsp), %rax
+        movq      312(%rsp), %rcx
+        movl      80(%rsp), %edi
+        movl      84(%rsp), %r9d
+        movl      88(%rsp), %r11d
+        movl      92(%rsp), %edx
+        movl      %edi, (%r8)
+        movl      %r9d, (%r10)
+        movq      320(%rsp), %rdi
+        movq      328(%rsp), %r9
+        movl      %r11d, (%rax)
+        movl      %edx, (%rcx)
+        movq      336(%rsp), %r11
+        movq      344(%rsp), %rdx
+        movl      96(%rsp), %esi
+        movl      100(%rsp), %r8d
+        movl      104(%rsp), %r10d
+        movl      108(%rsp), %eax
+        movl      %esi, (%rdi)
+        movl      %r8d, (%r9)
+        movq      352(%rsp), %rsi
+        movq      360(%rsp), %r8
+        movl      %r10d, (%r11)
+        movl      %eax, (%rdx)
+        movq      368(%rsp), %r10
+        movq      376(%rsp), %rax
+        movl      112(%rsp), %ecx
+        movl      116(%rsp), %edi
+        movl      120(%rsp), %r9d
+        movl      124(%rsp), %r11d
+        movl      %ecx, (%rsi)
+        movl      %edi, (%r8)
+        movl      %r9d, (%r10)
+        movl      %r11d, (%rax)
+        movq      %rbp, %rsp
+        cfi_def_cfa_register (%rsp)
+        popq      %rbp
+        cfi_adjust_cfa_offset (-8)
+        cfi_restore (%rbp)
+        ret
+#else
+        leal    8(%rsp), %r10d
+        .cfi_def_cfa 10, 0
+        andl    $-64, %esp
+        pushq   -8(%r10d)
+        pushq   %rbp
+        .cfi_escape 0x10,0x6,0x2,0x76,0
+        movl    %esp, %ebp
+        pushq   %r12
+        leal    -112(%rbp), %esi
+        pushq   %r10
+        .cfi_escape 0xf,0x3,0x76,0x70,0x6
+        .cfi_escape 0x10,0xc,0x2,0x76,0x78
+        leal    -176(%rbp), %edi
+        movq    %rsi, %r12
+        pushq   %rbx
+        .cfi_escape 0x10,0x3,0x2,0x76,0x68
+        movq    %rdi, %rbx
+        subl    $344, %esp
+        vmovdqa64 %zmm1, -240(%ebp)
+        vmovdqa64 %zmm2, -304(%ebp)
+        vmovaps   %zmm0, -368(%ebp)
+        call    HIDDEN_JUMPTARGET(\callee)
+        leal    32(%r12), %esi
+        vmovups -336(%ebp), %ymm0
+        leal    32(%rbx), %edi
+        call    HIDDEN_JUMPTARGET(\callee)
+        movl    -240(%ebp), %eax
+        vmovss  -176(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -236(%ebp), %eax
+        vmovss  -172(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -232(%ebp), %eax
+        vmovss  -168(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -228(%ebp), %eax
+        vmovss  -164(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -224(%ebp), %eax
+        vmovss  -160(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -220(%ebp), %eax
+        vmovss  -156(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -216(%ebp), %eax
+        vmovss  -152(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -212(%ebp), %eax
+        vmovss  -148(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -208(%ebp), %eax
+        vmovss  -144(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -204(%ebp), %eax
+        vmovss  -140(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -200(%ebp), %eax
+        vmovss  -136(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -196(%ebp), %eax
+        vmovss  -132(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -192(%ebp), %eax
+        vmovss  -128(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -188(%ebp), %eax
+        vmovss  -124(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -184(%ebp), %eax
+        vmovss  -120(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -180(%ebp), %eax
+        vmovss  -116(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -304(%ebp), %eax
+        vmovss  -112(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -300(%ebp), %eax
+        vmovss  -108(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -296(%ebp), %eax
+        vmovss  -104(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -292(%ebp), %eax
+        vmovss  -100(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -288(%ebp), %eax
+        vmovss  -96(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -284(%ebp), %eax
+        vmovss  -92(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -280(%ebp), %eax
+        vmovss  -88(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -276(%ebp), %eax
+        vmovss  -84(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -272(%ebp), %eax
+        vmovss  -80(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -268(%ebp), %eax
+        vmovss  -76(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -264(%ebp), %eax
+        vmovss  -72(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -260(%ebp), %eax
+        vmovss  -68(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -256(%ebp), %eax
+        vmovss  -64(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -252(%ebp), %eax
+        vmovss  -60(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -248(%ebp), %eax
+        vmovss  -56(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -244(%ebp), %eax
+        vmovss  -52(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        addl    $344, %esp
+        popq    %rbx
+        popq    %r10
+        .cfi_def_cfa 10, 0
+        popq    %r12
+        popq    %rbp
+        leal    -8(%r10), %esp
+        .cfi_def_cfa 7, 8
+        ret
+#endif
+.endm
+
 ENTRY (_ZGVeN16vvv_sincosf)
-WRAPPER_IMPL_AVX512_fFF _ZGVdN8vvv_sincosf
+WRAPPER_IMPL_AVX512_fFF_vvv _ZGVdN8vl4l4_sincosf
 END (_ZGVeN16vvv_sincosf)
diff --git a/sysdeps/x86_64/fpu/svml_s_sincosf4_core.S b/sysdeps/x86_64/fpu/svml_s_sincosf4_core.S
index 1a7d2733af..5daa5118d6 100644
--- a/sysdeps/x86_64/fpu/svml_s_sincosf4_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_sincosf4_core.S
@@ -1,5 +1,5 @@
 /* Function sincosf vectorized with SSE2.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -16,13 +16,135 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-
 #include <sysdep.h>
 #include "svml_s_wrapper_impl.h"
 
 	.text
-ENTRY (_ZGVbN4vvv_sincosf)
+ENTRY (_ZGVbN4vl4l4_sincosf)
 WRAPPER_IMPL_SSE2_fFF sincosf
+END (_ZGVbN4vl4l4_sincosf)
+libmvec_hidden_def (_ZGVbN4vl4l4_sincosf)
+
+/* SSE2 ISA version as wrapper to scalar (for vector
+   function declared with #pragma omp declare simd notinbranch).  */
+.macro WRAPPER_IMPL_SSE2_fFF_vvv callee
+#ifndef __ILP32__
+        subq      $120, %rsp
+        cfi_adjust_cfa_offset(120)
+        movaps    %xmm0, 96(%rsp)
+        lea       (%rsp), %rdi
+        movdqa    %xmm1, 32(%rdi)
+        lea       16(%rsp), %rsi
+        movdqa    %xmm2, 32(%rsi)
+        movdqa    %xmm3, 48(%rsi)
+        movdqa    %xmm4, 64(%rsi)
+        call      JUMPTARGET(\callee)
+        movss     100(%rsp), %xmm0
+        lea       4(%rsp), %rdi
+        lea       20(%rsp), %rsi
+        call      JUMPTARGET(\callee)
+        movss     104(%rsp), %xmm0
+        lea       8(%rsp), %rdi
+        lea       24(%rsp), %rsi
+        call      JUMPTARGET(\callee)
+        movss     108(%rsp), %xmm0
+        lea       12(%rsp), %rdi
+        lea       28(%rsp), %rsi
+        call      JUMPTARGET(\callee)
+        movq      32(%rsp), %rdx
+        movq      40(%rsp), %rsi
+        movq      48(%rsp), %r8
+        movq      56(%rsp), %r10
+        movl      (%rsp), %eax
+        movl      4(%rsp), %ecx
+        movl      8(%rsp), %edi
+        movl      12(%rsp), %r9d
+        movl      %eax, (%rdx)
+        movl      %ecx, (%rsi)
+        movq      64(%rsp), %rax
+        movq      72(%rsp), %rcx
+        movl      %edi, (%r8)
+        movl      %r9d, (%r10)
+        movq      80(%rsp), %rdi
+        movq      88(%rsp), %r9
+        movl      16(%rsp), %r11d
+        movl      20(%rsp), %edx
+        movl      24(%rsp), %esi
+        movl      28(%rsp), %r8d
+        movl      %r11d, (%rax)
+        movl      %edx, (%rcx)
+        movl      %esi, (%rdi)
+        movl      %r8d, (%r9)
+        addq      $120, %rsp
+        cfi_adjust_cfa_offset(-120)
+        ret
+#else
+        pushq   %rbp
+        .cfi_def_cfa_offset 16
+        .cfi_offset 6, -16
+        pushq   %rbx
+        .cfi_def_cfa_offset 24
+        .cfi_offset 3, -24
+        subl    $88, %esp
+        .cfi_def_cfa_offset 112
+        leal    64(%rsp), %esi
+        movaps  %xmm1, (%esp)
+        leal    48(%rsp), %edi
+        movaps  %xmm2, 16(%esp)
+        movq    %rsi, %rbp
+        movq    %rdi, %rbx
+        movaps  %xmm0, 32(%esp)
+        call    JUMPTARGET(\callee)
+        movups  36(%esp), %xmm0
+        leal    4(%rbp), %esi
+        leal    4(%rbx), %edi
+        call    JUMPTARGET(\callee)
+        movups  40(%esp), %xmm0
+        leal    8(%rbp), %esi
+        leal    8(%rbx), %edi
+        call    JUMPTARGET(\callee)
+        movups  44(%esp), %xmm0
+        leal    12(%rbp), %esi
+        leal    12(%rbx), %edi
+        call    JUMPTARGET(\callee)
+        movq    (%esp), %rax
+        movss   48(%esp), %xmm0
+        movdqa  (%esp), %xmm4
+        movdqa  16(%esp), %xmm7
+        movss   %xmm0, (%eax)
+        movss   52(%esp), %xmm0
+        pextrd  $1, %xmm4, %eax
+        movss   %xmm0, (%eax)
+        movq    8(%esp), %rax
+        movss   56(%esp), %xmm0
+        movss   %xmm0, (%eax)
+        movss   60(%esp), %xmm0
+        pextrd  $3, %xmm4, %eax
+        movss   %xmm0, (%eax)
+        movq    16(%esp), %rax
+        movss   64(%esp), %xmm0
+        movss   %xmm0, (%eax)
+        movss   68(%esp), %xmm0
+        pextrd  $1, %xmm7, %eax
+        movss   %xmm0, (%eax)
+        movq    24(%esp), %rax
+        movss   72(%esp), %xmm0
+        movss   %xmm0, (%eax)
+        movss   76(%esp), %xmm0
+        pextrd  $3, %xmm7, %eax
+        movss   %xmm0, (%eax)
+        addl    $88, %esp
+        .cfi_def_cfa_offset 24
+        popq    %rbx
+        .cfi_def_cfa_offset 16
+        popq    %rbp
+        .cfi_def_cfa_offset 8
+        ret
+#endif
+.endm
+
+ENTRY (_ZGVbN4vvv_sincosf)
+WRAPPER_IMPL_SSE2_fFF_vvv sincosf
 END (_ZGVbN4vvv_sincosf)
 
 #ifndef USE_MULTIARCH
diff --git a/sysdeps/x86_64/fpu/svml_s_sincosf8_core.S b/sysdeps/x86_64/fpu/svml_s_sincosf8_core.S
index 74d1dfd1a8..d6d4600d10 100644
--- a/sysdeps/x86_64/fpu/svml_s_sincosf8_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_sincosf8_core.S
@@ -1,5 +1,5 @@
 /* Function sincosf vectorized with AVX2, wrapper version.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -20,8 +20,179 @@
 #include "svml_s_wrapper_impl.h"
 
 	.text
+ENTRY (_ZGVdN8vl4l4_sincosf)
+WRAPPER_IMPL_AVX_fFF _ZGVbN4vl4l4_sincosf
+END (_ZGVdN8vl4l4_sincosf)
+libmvec_hidden_def (_ZGVdN8vl4l4_sincosf)
+
+/* AVX2 ISA version as wrapper to SSE ISA version (for vector
+   function declared with #pragma omp declare simd notinbranch).  */
+.macro WRAPPER_IMPL_AVX2_fFF_vvv callee
+#ifndef __ILP32__
+        pushq     %rbp
+        cfi_adjust_cfa_offset (8)
+        cfi_rel_offset (%rbp, 0)
+        movq      %rsp, %rbp
+        cfi_def_cfa_register (%rbp)
+        andq      $-32, %rsp
+        subq      $224, %rsp
+        vmovups   %ymm0, 192(%rsp)
+        lea       (%rsp), %rdi
+        vmovdqu   %ymm1, 64(%rdi)
+        vmovdqu   %ymm2, 96(%rdi)
+        vmovdqu   %ymm3, 128(%rdi)
+        vmovdqu   %ymm4, 160(%rdi)
+        lea       32(%rsp), %rsi
+	vzeroupper
+        call      HIDDEN_JUMPTARGET(\callee)
+        vmovups   208(%rsp), %xmm0
+        lea       16(%rsp), %rdi
+        lea       48(%rsp), %rsi
+        call      HIDDEN_JUMPTARGET(\callee)
+        movq      64(%rsp), %rdx
+        movq      72(%rsp), %rsi
+        movq      80(%rsp), %r8
+        movq      88(%rsp), %r10
+        movl      (%rsp), %eax
+        movl      4(%rsp), %ecx
+        movl      8(%rsp), %edi
+        movl      12(%rsp), %r9d
+        movl      %eax, (%rdx)
+        movl      %ecx, (%rsi)
+        movq      96(%rsp), %rax
+        movq      104(%rsp), %rcx
+        movl      %edi, (%r8)
+        movl      %r9d, (%r10)
+        movq      112(%rsp), %rdi
+        movq      120(%rsp), %r9
+        movl      16(%rsp), %r11d
+        movl      20(%rsp), %edx
+        movl      24(%rsp), %esi
+        movl      28(%rsp), %r8d
+        movl      %r11d, (%rax)
+        movl      %edx, (%rcx)
+        movq      128(%rsp), %r11
+        movq      136(%rsp), %rdx
+        movl      %esi, (%rdi)
+        movl      %r8d, (%r9)
+        movq      144(%rsp), %rsi
+        movq      152(%rsp), %r8
+        movl      32(%rsp), %r10d
+        movl      36(%rsp), %eax
+        movl      40(%rsp), %ecx
+        movl      44(%rsp), %edi
+        movl      %r10d, (%r11)
+        movl      %eax, (%rdx)
+        movq      160(%rsp), %r10
+        movq      168(%rsp), %rax
+        movl      %ecx, (%rsi)
+        movl      %edi, (%r8)
+        movq      176(%rsp), %rcx
+        movq      184(%rsp), %rdi
+        movl      48(%rsp), %r9d
+        movl      52(%rsp), %r11d
+        movl      56(%rsp), %edx
+        movl      60(%rsp), %esi
+        movl      %r9d, (%r10)
+        movl      %r11d, (%rax)
+        movl      %edx, (%rcx)
+        movl      %esi, (%rdi)
+        movq      %rbp, %rsp
+        cfi_def_cfa_register (%rsp)
+        popq      %rbp
+        cfi_adjust_cfa_offset (-8)
+        cfi_restore (%rbp)
+        ret
+#else
+        leal    8(%rsp), %r10d
+        .cfi_def_cfa 10, 0
+        andl    $-32, %esp
+        pushq   -8(%r10d)
+        pushq   %rbp
+        .cfi_escape 0x10,0x6,0x2,0x76,0
+        movl    %esp, %ebp
+        pushq   %r12
+        leal    -80(%rbp), %esi
+        pushq   %r10
+        .cfi_escape 0xf,0x3,0x76,0x70,0x6
+        .cfi_escape 0x10,0xc,0x2,0x76,0x78
+        leal    -112(%rbp), %edi
+        movq    %rsi, %r12
+        pushq   %rbx
+        .cfi_escape 0x10,0x3,0x2,0x76,0x68
+        movq    %rdi, %rbx
+        subl    $184, %esp
+        vmovdqa %ymm1, -144(%ebp)
+        vmovdqa %ymm2, -176(%ebp)
+        vmovaps %ymm0, -208(%ebp)
+	vzeroupper
+        call    HIDDEN_JUMPTARGET(\callee)
+        leal    16(%r12), %esi
+        vmovups -192(%ebp), %xmm0
+        leal    16(%rbx), %edi
+        call    HIDDEN_JUMPTARGET(\callee)
+        movl    -144(%ebp), %eax
+        vmovss  -112(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -140(%ebp), %eax
+        vmovss  -108(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -136(%ebp), %eax
+        vmovss  -104(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -132(%ebp), %eax
+        vmovss  -100(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -128(%ebp), %eax
+        vmovss  -96(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -124(%ebp), %eax
+        vmovss  -92(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -120(%ebp), %eax
+        vmovss  -88(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -116(%ebp), %eax
+        vmovss  -84(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -176(%ebp), %eax
+        vmovss  -80(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -172(%ebp), %eax
+        vmovss  -76(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -168(%ebp), %eax
+        vmovss  -72(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -164(%ebp), %eax
+        vmovss  -68(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -160(%ebp), %eax
+        vmovss  -64(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -156(%ebp), %eax
+        vmovss  -60(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -152(%ebp), %eax
+        vmovss  -56(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        movl    -148(%ebp), %eax
+        vmovss  -52(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        addl    $184, %esp
+        popq    %rbx
+        popq    %r10
+        .cfi_def_cfa 10, 0
+        popq    %r12
+        popq    %rbp
+        leal    -8(%r10), %esp
+        .cfi_def_cfa 7, 8
+        ret
+#endif
+.endm
+
 ENTRY (_ZGVdN8vvv_sincosf)
-WRAPPER_IMPL_AVX_fFF _ZGVbN4vvv_sincosf
+WRAPPER_IMPL_AVX2_fFF_vvv _ZGVbN4vl4l4_sincosf
 END (_ZGVdN8vvv_sincosf)
 
 #ifndef USE_MULTIARCH
diff --git a/sysdeps/x86_64/fpu/svml_s_sincosf8_core_avx.S b/sysdeps/x86_64/fpu/svml_s_sincosf8_core_avx.S
index 55b8b2d768..585e6d87c4 100644
--- a/sysdeps/x86_64/fpu/svml_s_sincosf8_core_avx.S
+++ b/sysdeps/x86_64/fpu/svml_s_sincosf8_core_avx.S
@@ -1,5 +1,5 @@
 /* Function sincosf vectorized in AVX ISA as wrapper to SSE4 ISA version.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -20,6 +20,179 @@
 #include "svml_s_wrapper_impl.h"
 
         .text
-ENTRY(_ZGVcN8vvv_sincosf)
-WRAPPER_IMPL_AVX_fFF _ZGVbN4vvv_sincosf
-END(_ZGVcN8vvv_sincosf)
+ENTRY (_ZGVcN8vl4l4_sincosf)
+WRAPPER_IMPL_AVX_fFF _ZGVbN4vl4l4_sincosf
+END (_ZGVcN8vl4l4_sincosf)
+
+/* AVX ISA version as wrapper to SSE ISA version (for vector
+   function declared with #pragma omp declare simd notinbranch).  */
+.macro WRAPPER_IMPL_AVX_fFF_vvv callee
+#ifndef __ILP32__
+        pushq     %rbp
+        movq      %rsp, %rbp
+        andq      $-32, %rsp
+        subq      $224, %rsp
+        vmovups   %ymm0, 64(%rsp)
+        lea       (%rsp), %rdi
+        vmovdqu   %xmm1, 96(%rdi)
+        vmovdqu   %xmm2, 112(%rdi)
+        vmovdqu   %xmm3, 128(%rdi)
+        vmovdqu   %xmm4, 144(%rdi)
+        vmovdqu   %xmm5, 160(%rdi)
+        lea       32(%rsp), %rsi
+        vmovdqu   %xmm6, 144(%rsi)
+        vmovdqu   %xmm7, 160(%rsi)
+        vzeroupper
+        call      HIDDEN_JUMPTARGET(\callee)
+        vmovdqu   80(%rsp), %xmm0
+        lea       16(%rsp), %rdi
+        lea       48(%rsp), %rsi
+        call      HIDDEN_JUMPTARGET(\callee)
+        movq      96(%rsp), %rdx
+        movq      104(%rsp), %rsi
+        movq      112(%rsp), %r8
+        movq      120(%rsp), %r10
+        movl      (%rsp), %eax
+        movl      4(%rsp), %ecx
+        movl      8(%rsp), %edi
+        movl      12(%rsp), %r9d
+        movl      %eax, (%rdx)
+        movl      %ecx, (%rsi)
+        movq      128(%rsp), %rax
+        movq      136(%rsp), %rcx
+        movl      %edi, (%r8)
+        movl      %r9d, (%r10)
+        movq      144(%rsp), %rdi
+        movq      152(%rsp), %r9
+        movl      16(%rsp), %r11d
+        movl      20(%rsp), %edx
+        movl      24(%rsp), %esi
+        movl      28(%rsp), %r8d
+        movl      %r11d, (%rax)
+        movl      %edx, (%rcx)
+        movq      160(%rsp), %r11
+        movq      168(%rsp), %rdx
+        movl      %esi, (%rdi)
+        movl      %r8d, (%r9)
+        movq      176(%rsp), %rsi
+        movq      184(%rsp), %r8
+        movl      32(%rsp), %r10d
+        movl      36(%rsp), %eax
+        movl      40(%rsp), %ecx
+        movl      44(%rsp), %edi
+        movl      %r10d, (%r11)
+        movl      %eax, (%rdx)
+        movq      192(%rsp), %r10
+        movq      200(%rsp), %rax
+        movl      %ecx, (%rsi)
+        movl      %edi, (%r8)
+        movq      16(%rbp), %rcx
+        movq      24(%rbp), %rdi
+        movl      48(%rsp), %r9d
+        movl      52(%rsp), %r11d
+        movl      56(%rsp), %edx
+        movl      60(%rsp), %esi
+        movl      %r9d, (%r10)
+        movl      %r11d, (%rax)
+        movl      %edx, (%rcx)
+        movl      %esi, (%rdi)
+        movq      %rbp, %rsp
+        popq      %rbp
+        ret
+#else
+        leal    8(%rsp), %r10d
+        .cfi_def_cfa 10, 0
+        andl    $-32, %esp
+        pushq   -8(%r10d)
+        pushq   %rbp
+        .cfi_escape 0x10,0x6,0x2,0x76,0
+        movl    %esp, %ebp
+        pushq   %r12
+        leal    -80(%rbp), %esi
+        pushq   %r10
+        .cfi_escape 0xf,0x3,0x76,0x70,0x6
+        .cfi_escape 0x10,0xc,0x2,0x76,0x78
+        leal    -112(%rbp), %edi
+        movq    %rsi, %r12
+        pushq   %rbx
+        .cfi_escape 0x10,0x3,0x2,0x76,0x68
+        movq    %rdi, %rbx
+        subl    $184, %esp
+        vmovaps %xmm1, -128(%ebp)
+        vmovaps %xmm2, -144(%ebp)
+        vmovaps %xmm3, -160(%ebp)
+        vmovaps %xmm4, -176(%ebp)
+        vmovaps %ymm0, -208(%ebp)
+        vzeroupper
+        call    HIDDEN_JUMPTARGET(\callee)
+        leal    16(%r12), %esi
+        vmovups -192(%ebp), %xmm0
+        leal    16(%rbx), %edi
+        call    HIDDEN_JUMPTARGET(\callee)
+        movq    -128(%ebp), %rax
+        vmovss  -112(%ebp), %xmm0
+        vmovdqa -128(%ebp), %xmm7
+        vmovdqa -144(%ebp), %xmm3
+        vmovss  %xmm0, (%eax)
+        vmovss  -108(%ebp), %xmm0
+        vpextrd $1, %xmm7, %eax
+        vmovss  %xmm0, (%eax)
+        movq    -120(%ebp), %rax
+        vmovss  -104(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        vmovss  -100(%ebp), %xmm0
+        vpextrd $3, %xmm7, %eax
+        vmovdqa -160(%ebp), %xmm7
+        vmovss  %xmm0, (%eax)
+        movq    -144(%ebp), %rax
+        vmovss  -96(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        vmovss  -92(%ebp), %xmm0
+        vpextrd $1, %xmm3, %eax
+        vmovss  %xmm0, (%eax)
+        movq    -136(%ebp), %rax
+        vmovss  -88(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        vmovss  -84(%ebp), %xmm0
+        vpextrd $3, %xmm3, %eax
+        vmovss  %xmm0, (%eax)
+        movq    -160(%ebp), %rax
+        vmovss  -80(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        vmovss  -76(%ebp), %xmm0
+        vpextrd $1, %xmm7, %eax
+        vmovss  %xmm0, (%eax)
+        movq    -152(%ebp), %rax
+        vmovss  -72(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        vmovss  -68(%ebp), %xmm0
+        vpextrd $3, %xmm7, %eax
+        vmovss  %xmm0, (%eax)
+        movq    -176(%ebp), %rax
+        vmovss  -64(%ebp), %xmm0
+        vmovdqa -176(%ebp), %xmm3
+        vmovss  %xmm0, (%eax)
+        vmovss  -60(%ebp), %xmm0
+        vpextrd $1, %xmm3, %eax
+        vmovss  %xmm0, (%eax)
+        movq    -168(%ebp), %rax
+        vmovss  -56(%ebp), %xmm0
+        vmovss  %xmm0, (%eax)
+        vmovss  -52(%ebp), %xmm0
+        vpextrd $3, %xmm3, %eax
+        vmovss  %xmm0, (%eax)
+        addl    $184, %esp
+        popq    %rbx
+        popq    %r10
+        .cfi_def_cfa 10, 0
+        popq    %r12
+        popq    %rbp
+        leal    -8(%r10), %esp
+        .cfi_def_cfa 7, 8
+        ret
+#endif
+.endm
+
+ENTRY (_ZGVcN8vvv_sincosf)
+WRAPPER_IMPL_AVX_fFF_vvv _ZGVbN4vl4l4_sincosf
+END (_ZGVcN8vvv_sincosf)
diff --git a/sysdeps/x86_64/fpu/svml_s_sinf16_core.S b/sysdeps/x86_64/fpu/svml_s_sinf16_core.S
index d7a31e1ea6..8c5547e26f 100644
--- a/sysdeps/x86_64/fpu/svml_s_sinf16_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_sinf16_core.S
@@ -1,5 +1,5 @@
 /* Function sinf vectorized with AVX-512. Wrapper to AVX2 version.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_sinf4_core.S b/sysdeps/x86_64/fpu/svml_s_sinf4_core.S
index 6f10137134..d56137b32a 100644
--- a/sysdeps/x86_64/fpu/svml_s_sinf4_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_sinf4_core.S
@@ -1,5 +1,5 @@
 /* Function sinf vectorized with SSE2.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_sinf8_core.S b/sysdeps/x86_64/fpu/svml_s_sinf8_core.S
index c459658688..e39392243e 100644
--- a/sysdeps/x86_64/fpu/svml_s_sinf8_core.S
+++ b/sysdeps/x86_64/fpu/svml_s_sinf8_core.S
@@ -1,5 +1,5 @@
 /* Function sinf vectorized with AVX2, wrapper version.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_sinf8_core_avx.S b/sysdeps/x86_64/fpu/svml_s_sinf8_core_avx.S
index 5e95aa2e02..9984e6f9f7 100644
--- a/sysdeps/x86_64/fpu/svml_s_sinf8_core_avx.S
+++ b/sysdeps/x86_64/fpu/svml_s_sinf8_core_avx.S
@@ -1,5 +1,5 @@
 /* Function sinf vectorized in AVX ISA as wrapper to SSE4 ISA version.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_trig_data.S b/sysdeps/x86_64/fpu/svml_s_trig_data.S
index b61aa6abb9..8f1e1f60b8 100644
--- a/sysdeps/x86_64/fpu/svml_s_trig_data.S
+++ b/sysdeps/x86_64/fpu/svml_s_trig_data.S
@@ -1,5 +1,5 @@
 /* Data for function cosf.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_trig_data.h b/sysdeps/x86_64/fpu/svml_s_trig_data.h
index 2e469a918a..0faf161c08 100644
--- a/sysdeps/x86_64/fpu/svml_s_trig_data.h
+++ b/sysdeps/x86_64/fpu/svml_s_trig_data.h
@@ -1,5 +1,5 @@
 /* Offsets for data table for vectorized sinf, cosf, sincosf.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h b/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h
index b1a03be3d9..937afb5cbc 100644
--- a/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h
+++ b/sysdeps/x86_64/fpu/svml_s_wrapper_impl.h
@@ -1,5 +1,5 @@
 /* Wrapper implementations of vector math functions.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -21,16 +21,16 @@
         subq      $40, %rsp
         cfi_adjust_cfa_offset(40)
         movaps    %xmm0, (%rsp)
-        call      \callee@PLT
+        call      JUMPTARGET(\callee)
         movss     %xmm0, 16(%rsp)
         movss     4(%rsp), %xmm0
-        call      \callee@PLT
+        call      JUMPTARGET(\callee)
         movss     %xmm0, 20(%rsp)
         movss     8(%rsp), %xmm0
-        call      \callee@PLT
+        call      JUMPTARGET(\callee)
         movss     %xmm0, 24(%rsp)
         movss     12(%rsp), %xmm0
-        call      \callee@PLT
+        call      JUMPTARGET(\callee)
         movss     16(%rsp), %xmm3
         movss     20(%rsp), %xmm2
         movss     24(%rsp), %xmm1
@@ -50,19 +50,19 @@
         cfi_adjust_cfa_offset(56)
         movaps    %xmm0, (%rsp)
         movaps    %xmm1, 16(%rsp)
-        call      \callee@PLT
+        call      JUMPTARGET(\callee)
         movss     %xmm0, 32(%rsp)
         movss     4(%rsp), %xmm0
         movss     20(%rsp), %xmm1
-        call      \callee@PLT
+        call      JUMPTARGET(\callee)
         movss     %xmm0, 36(%rsp)
         movss     8(%rsp), %xmm0
         movss     24(%rsp), %xmm1
-        call      \callee@PLT
+        call      JUMPTARGET(\callee)
         movss     %xmm0, 40(%rsp)
         movss     12(%rsp), %xmm0
         movss     28(%rsp), %xmm1
-        call      \callee@PLT
+        call      JUMPTARGET(\callee)
         movss     32(%rsp), %xmm3
         movss     36(%rsp), %xmm2
         movss     40(%rsp), %xmm1
@@ -91,7 +91,7 @@
         leaq    24(%rsp), %rsi
         leaq    28(%rsp), %rdi
         movaps  %xmm0, (%rsp)
-        call    \callee@PLT
+        call    JUMPTARGET(\callee)
         leaq    24(%rsp), %rsi
         leaq    28(%rsp), %rdi
         movss   28(%rsp), %xmm0
@@ -101,7 +101,7 @@
         movss   %xmm0, (%rbx)
         movaps  %xmm1, %xmm0
         shufps  $85, %xmm1, %xmm0
-        call    \callee@PLT
+        call    JUMPTARGET(\callee)
         movss   28(%rsp), %xmm0
         leaq    24(%rsp), %rsi
         movss   %xmm0, 4(%rbp)
@@ -111,7 +111,7 @@
         movss   %xmm0, 4(%rbx)
         movaps  %xmm1, %xmm0
         unpckhps        %xmm1, %xmm0
-        call    \callee@PLT
+        call    JUMPTARGET(\callee)
         movaps  (%rsp), %xmm1
         leaq    24(%rsp), %rsi
         leaq    28(%rsp), %rdi
@@ -121,7 +121,7 @@
         movss   24(%rsp), %xmm0
         movss   %xmm0, 8(%rbx)
         movaps  %xmm1, %xmm0
-        call    \callee@PLT
+        call    JUMPTARGET(\callee)
         movss   28(%rsp), %xmm0
         movss   %xmm0, 12(%rbp)
         movss   24(%rsp), %xmm0
@@ -246,29 +246,14 @@
         cfi_def_cfa_register (%rbp)
         andq      $-64, %rsp
         subq      $128, %rsp
-/* Below is encoding for vmovups %zmm0, (%rsp).  */
-        .byte   0x62
-        .byte   0xf1
-        .byte   0x7c
-        .byte   0x48
-        .byte   0x11
-        .byte   0x04
-        .byte   0x24
+        vmovups   %zmm0, (%rsp)
         vmovupd   (%rsp), %ymm0
         call      HIDDEN_JUMPTARGET(\callee)
         vmovupd   %ymm0, 64(%rsp)
         vmovupd   32(%rsp), %ymm0
         call      HIDDEN_JUMPTARGET(\callee)
         vmovupd   %ymm0, 96(%rsp)
-/* Below is encoding for vmovups 64(%rsp), %zmm0.  */
-        .byte   0x62
-        .byte   0xf1
-        .byte   0x7c
-        .byte   0x48
-        .byte   0x10
-        .byte   0x44
-        .byte   0x24
-        .byte   0x01
+        vmovups   64(%rsp), %zmm0
         movq      %rbp, %rsp
         cfi_def_cfa_register (%rsp)
         popq      %rbp
@@ -286,23 +271,8 @@
         cfi_def_cfa_register (%rbp)
         andq      $-64, %rsp
         subq      $192, %rsp
-/* Below is encoding for vmovups %zmm0, (%rsp).  */
-        .byte   0x62
-        .byte   0xf1
-        .byte   0x7c
-        .byte   0x48
-        .byte   0x11
-        .byte   0x04
-        .byte   0x24
-/* Below is encoding for vmovups %zmm1, 64(%rsp).  */
-        .byte   0x62
-        .byte   0xf1
-        .byte   0x7c
-        .byte   0x48
-        .byte   0x11
-        .byte   0x4c
-        .byte   0x24
-        .byte   0x01
+        vmovups   %zmm0, (%rsp)
+        vmovups   %zmm1, 64(%rsp)
         vmovups   (%rsp), %ymm0
         vmovups   64(%rsp), %ymm1
         call      HIDDEN_JUMPTARGET(\callee)
@@ -311,15 +281,7 @@
         vmovups   96(%rsp), %ymm1
         call      HIDDEN_JUMPTARGET(\callee)
         vmovups   %ymm0, 160(%rsp)
-/* Below is encoding for vmovups 128(%rsp), %zmm0.  */
-        .byte   0x62
-        .byte   0xf1
-        .byte   0x7c
-        .byte   0x48
-        .byte   0x10
-        .byte   0x44
-        .byte   0x24
-        .byte   0x02
+        vmovups   128(%rsp), %zmm0
         movq      %rbp, %rsp
         cfi_def_cfa_register (%rsp)
         popq      %rbp
@@ -340,14 +302,7 @@
         pushq     %r13
         subq      $176, %rsp
         movq      %rsi, %r13
-/* Below is encoding for vmovaps %zmm0, (%rsp).  */
-        .byte	0x62
-        .byte	0xf1
-        .byte	0x7c
-        .byte	0x48
-        .byte	0x29
-        .byte	0x04
-        .byte	0x24
+        vmovaps   %zmm0, (%rsp)
         movq      %rdi, %r12
         vmovaps   (%rsp), %ymm0
         call      HIDDEN_JUMPTARGET(\callee)
diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx-main.c b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx-main.c
new file mode 100644
index 0000000000..43914ef0e7
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx-main.c
@@ -0,0 +1 @@
+#include "test-double-libmvec-alias.c"
diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx-mod.c b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx-mod.c
new file mode 100644
index 0000000000..514883dcf9
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx-mod.c
@@ -0,0 +1 @@
+#include "test-double-libmvec-alias-mod.c"
diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx.c b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx.c
new file mode 100644
index 0000000000..43914ef0e7
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx.c
@@ -0,0 +1 @@
+#include "test-double-libmvec-alias.c"
diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx2-main.c b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx2-main.c
new file mode 100644
index 0000000000..43914ef0e7
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx2-main.c
@@ -0,0 +1 @@
+#include "test-double-libmvec-alias.c"
diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx2-mod.c b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx2-mod.c
new file mode 100644
index 0000000000..514883dcf9
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx2-mod.c
@@ -0,0 +1 @@
+#include "test-double-libmvec-alias-mod.c"
diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx2.c b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx2.c
new file mode 100644
index 0000000000..43914ef0e7
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx2.c
@@ -0,0 +1 @@
+#include "test-double-libmvec-alias.c"
diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx512-main.c b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx512-main.c
new file mode 100644
index 0000000000..43914ef0e7
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx512-main.c
@@ -0,0 +1 @@
+#include "test-double-libmvec-alias.c"
diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx512-mod.c b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx512-mod.c
new file mode 100644
index 0000000000..514883dcf9
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx512-mod.c
@@ -0,0 +1 @@
+#include "test-double-libmvec-alias-mod.c"
diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx512.c b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx512.c
new file mode 100644
index 0000000000..43914ef0e7
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-double-libmvec-alias-avx512.c
@@ -0,0 +1 @@
+#include "test-double-libmvec-alias.c"
diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-alias-main.c b/sysdeps/x86_64/fpu/test-double-libmvec-alias-main.c
new file mode 100644
index 0000000000..43914ef0e7
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-double-libmvec-alias-main.c
@@ -0,0 +1 @@
+#include "test-double-libmvec-alias.c"
diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-alias-mod.c b/sysdeps/x86_64/fpu/test-double-libmvec-alias-mod.c
new file mode 100644
index 0000000000..6f2e588021
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-double-libmvec-alias-mod.c
@@ -0,0 +1,25 @@
+/* Part of test to build shared library to ensure link against
+   *_finite aliases from libmvec.
+   Copyright (C) 2016-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <math.h>
+#include <stdlib.h>
+#include <math-tests-arch.h>
+
+#include "test-double.h"
+#include "test-libmvec-alias-mod.c"
diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-alias.c b/sysdeps/x86_64/fpu/test-double-libmvec-alias.c
new file mode 100644
index 0000000000..d38b49d6c8
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-double-libmvec-alias.c
@@ -0,0 +1,29 @@
+/* Part of test to ensure link against *_finite aliases from libmvec.
+   Copyright (C) 2016-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+extern int
+test_finite_alias (void);
+
+static int
+do_test (void)
+{
+  return test_finite_alias ();
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../../../test-skeleton.c"
diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx-main.c b/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx-main.c
new file mode 100644
index 0000000000..fc2ffea314
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx-main.c
@@ -0,0 +1 @@
+#include "test-double-libmvec-sincos-main.c"
diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx.c b/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx.c
new file mode 100644
index 0000000000..896f1bcbaf
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx.c
@@ -0,0 +1 @@
+#include "test-double-libmvec-sincos.c"
diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx2-main.c b/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx2-main.c
new file mode 100644
index 0000000000..fc2ffea314
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx2-main.c
@@ -0,0 +1 @@
+#include "test-double-libmvec-sincos-main.c"
diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx2.c b/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx2.c
new file mode 100644
index 0000000000..896f1bcbaf
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx2.c
@@ -0,0 +1 @@
+#include "test-double-libmvec-sincos.c"
diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx512-main.c b/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx512-main.c
new file mode 100644
index 0000000000..fc2ffea314
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx512-main.c
@@ -0,0 +1 @@
+#include "test-double-libmvec-sincos-main.c"
diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx512.c b/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx512.c
new file mode 100644
index 0000000000..896f1bcbaf
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-double-libmvec-sincos-avx512.c
@@ -0,0 +1 @@
+#include "test-double-libmvec-sincos.c"
diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-sincos-main.c b/sysdeps/x86_64/fpu/test-double-libmvec-sincos-main.c
new file mode 100644
index 0000000000..2e52fddf5d
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-double-libmvec-sincos-main.c
@@ -0,0 +1,43 @@
+/* Test for vector sincos ABI.
+   Copyright (C) 2016-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <math.h>
+
+#define N 1000
+double x[N], s[N], c[N];
+double* s_ptrs[N];
+double* c_ptrs[N];
+
+int
+test_sincos_abi (void)
+{
+  int i;
+
+  for(i = 0; i < N; i++)
+  {
+    x[i] = i / 3;
+    s_ptrs[i] = &s[i];
+    c_ptrs[i] = &c[i];
+  }
+
+#pragma omp simd
+  for(i = 0; i < N; i++)
+    sincos (x[i], s_ptrs[i], c_ptrs[i]);
+
+  return 0;
+}
diff --git a/sysdeps/x86_64/fpu/test-double-libmvec-sincos.c b/sysdeps/x86_64/fpu/test-double-libmvec-sincos.c
new file mode 100644
index 0000000000..cffaa73135
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-double-libmvec-sincos.c
@@ -0,0 +1,44 @@
+/* Test for vector sincos ABI.
+   Copyright (C) 2016-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <math-tests-arch.h>
+
+extern int test_sincos_abi (void);
+
+int arch_check = 1;
+
+static void
+check_arch (void)
+{
+  CHECK_ARCH_EXT;
+  arch_check = 0;
+}
+
+static int
+do_test (void)
+{
+  check_arch ();
+
+  if (arch_check)
+    return 77;
+
+  return test_sincos_abi ();
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../../../test-skeleton.c"
diff --git a/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c
index a9d15979aa..4ff1439f9c 100644
--- a/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-double-vlen2-wrappers.c
@@ -1,5 +1,5 @@
 /* Wrapper part of tests for SSE ISA versions of vector math functions.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -17,13 +17,17 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include "test-double-vlen2.h"
+#include "test-math-vector-sincos.h"
 #include <immintrin.h>
 
 #define VEC_TYPE __m128d
 
 VECTOR_WRAPPER (WRAPPER_NAME (cos), _ZGVbN2v_cos)
 VECTOR_WRAPPER (WRAPPER_NAME (sin), _ZGVbN2v_sin)
-VECTOR_WRAPPER_fFF (WRAPPER_NAME (sincos), _ZGVbN2vvv_sincos)
 VECTOR_WRAPPER (WRAPPER_NAME (log), _ZGVbN2v_log)
 VECTOR_WRAPPER (WRAPPER_NAME (exp), _ZGVbN2v_exp)
 VECTOR_WRAPPER_ff (WRAPPER_NAME (pow), _ZGVbN2vv_pow)
+
+#define VEC_INT_TYPE __m128i
+
+VECTOR_WRAPPER_fFF_2 (WRAPPER_NAME (sincos), _ZGVbN2vvv_sincos)
diff --git a/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c
index eb6a531502..c7bdad517b 100644
--- a/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-double-vlen4-avx2-wrappers.c
@@ -1,5 +1,5 @@
 /* Wrapper part of tests for AVX2 ISA versions of vector math functions.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -17,6 +17,7 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include "test-double-vlen4.h"
+#include "test-math-vector-sincos.h"
 #include <immintrin.h>
 
 #undef VEC_SUFF
@@ -26,7 +27,14 @@
 
 VECTOR_WRAPPER (WRAPPER_NAME (cos), _ZGVdN4v_cos)
 VECTOR_WRAPPER (WRAPPER_NAME (sin), _ZGVdN4v_sin)
-VECTOR_WRAPPER_fFF (WRAPPER_NAME (sincos), _ZGVdN4vvv_sincos)
 VECTOR_WRAPPER (WRAPPER_NAME (log), _ZGVdN4v_log)
 VECTOR_WRAPPER (WRAPPER_NAME (exp), _ZGVdN4v_exp)
 VECTOR_WRAPPER_ff (WRAPPER_NAME (pow), _ZGVdN4vv_pow)
+
+#ifndef __ILP32__
+# define VEC_INT_TYPE __m256i
+#else
+# define VEC_INT_TYPE __m128i
+#endif
+
+VECTOR_WRAPPER_fFF_2 (WRAPPER_NAME (sincos), _ZGVdN4vvv_sincos)
diff --git a/sysdeps/x86_64/fpu/test-double-vlen4-avx2.c b/sysdeps/x86_64/fpu/test-double-vlen4-avx2.h
index 0cadef03d6..4b196e66fc 100644
--- a/sysdeps/x86_64/fpu/test-double-vlen4-avx2.c
+++ b/sysdeps/x86_64/fpu/test-double-vlen4-avx2.h
@@ -1,5 +1,5 @@
 /* Tests for AVX2 ISA versions of vector math functions.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -16,18 +16,10 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include "test-double-vlen4.h"
+#include <test-double-vlen4.h>
 
 #undef VEC_SUFF
 #define VEC_SUFF _vlen4_avx2
 
-#define TEST_VECTOR_cos 1
-#define TEST_VECTOR_sin 1
-#define TEST_VECTOR_sincos 1
-#define TEST_VECTOR_log 1
-#define TEST_VECTOR_exp 1
-#define TEST_VECTOR_pow 1
-
+#undef REQUIRE_AVX
 #define REQUIRE_AVX2
-
-#include "libm-test.c"
diff --git a/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c
index 52b81da3ee..2bb0085700 100644
--- a/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-double-vlen4-wrappers.c
@@ -1,5 +1,5 @@
 /* Wrapper part of tests for AVX ISA versions of vector math functions.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -17,13 +17,21 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include "test-double-vlen4.h"
+#include "test-math-vector-sincos.h"
 #include <immintrin.h>
 
 #define VEC_TYPE __m256d
 
 VECTOR_WRAPPER (WRAPPER_NAME (cos), _ZGVcN4v_cos)
 VECTOR_WRAPPER (WRAPPER_NAME (sin), _ZGVcN4v_sin)
-VECTOR_WRAPPER_fFF (WRAPPER_NAME (sincos), _ZGVcN4vvv_sincos)
 VECTOR_WRAPPER (WRAPPER_NAME (log), _ZGVcN4v_log)
 VECTOR_WRAPPER (WRAPPER_NAME (exp), _ZGVcN4v_exp)
 VECTOR_WRAPPER_ff (WRAPPER_NAME (pow), _ZGVcN4vv_pow)
+
+#define VEC_INT_TYPE __m128i
+
+#ifndef __ILP32__
+VECTOR_WRAPPER_fFF_3 (WRAPPER_NAME (sincos), _ZGVcN4vvv_sincos)
+#else
+VECTOR_WRAPPER_fFF_2 (WRAPPER_NAME (sincos), _ZGVcN4vvv_sincos)
+#endif
diff --git a/sysdeps/x86_64/fpu/test-double-vlen4.c b/sysdeps/x86_64/fpu/test-double-vlen4.h
index 9ae97f1388..316340cb59 100644
--- a/sysdeps/x86_64/fpu/test-double-vlen4.c
+++ b/sysdeps/x86_64/fpu/test-double-vlen4.h
@@ -1,5 +1,5 @@
 /* Tests for AVX ISA versions of vector math functions.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -16,15 +16,6 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include "test-double-vlen4.h"
-
-#define TEST_VECTOR_cos 1
-#define TEST_VECTOR_sin 1
-#define TEST_VECTOR_sincos 1
-#define TEST_VECTOR_log 1
-#define TEST_VECTOR_exp 1
-#define TEST_VECTOR_pow 1
+#include_next <test-double-vlen4.h>
 
 #define REQUIRE_AVX
-
-#include "libm-test.c"
diff --git a/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c b/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c
index c10bb9cb4a..ea179284ed 100644
--- a/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-double-vlen8-wrappers.c
@@ -1,5 +1,5 @@
 /* Wrapper part of tests for AVX-512 versions of vector math functions.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -17,13 +17,21 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include "test-double-vlen8.h"
+#include "test-math-vector-sincos.h"
 #include <immintrin.h>
 
 #define VEC_TYPE __m512d
 
 VECTOR_WRAPPER (WRAPPER_NAME (cos), _ZGVeN8v_cos)
 VECTOR_WRAPPER (WRAPPER_NAME (sin), _ZGVeN8v_sin)
-VECTOR_WRAPPER_fFF (WRAPPER_NAME (sincos), _ZGVeN8vvv_sincos)
 VECTOR_WRAPPER (WRAPPER_NAME (log), _ZGVeN8v_log)
 VECTOR_WRAPPER (WRAPPER_NAME (exp), _ZGVeN8v_exp)
 VECTOR_WRAPPER_ff (WRAPPER_NAME (pow), _ZGVeN8vv_pow)
+
+#ifndef __ILP32__
+# define VEC_INT_TYPE __m512i
+#else
+# define VEC_INT_TYPE __m256i
+#endif
+
+VECTOR_WRAPPER_fFF_2 (WRAPPER_NAME (sincos), _ZGVeN8vvv_sincos)
diff --git a/sysdeps/x86_64/fpu/test-double-vlen8.c b/sysdeps/x86_64/fpu/test-double-vlen8.h
index 4fb6c8d196..41d188081e 100644
--- a/sysdeps/x86_64/fpu/test-double-vlen8.c
+++ b/sysdeps/x86_64/fpu/test-double-vlen8.h
@@ -1,5 +1,5 @@
 /* Tests for AVX-512 versions of vector math functions.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -16,15 +16,6 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include "test-double-vlen8.h"
-
-#define TEST_VECTOR_cos 1
-#define TEST_VECTOR_sin 1
-#define TEST_VECTOR_sincos 1
-#define TEST_VECTOR_log 1
-#define TEST_VECTOR_exp 1
-#define TEST_VECTOR_pow 1
+#include_next <test-double-vlen8.h>
 
 #define REQUIRE_AVX512F
-
-#include "libm-test.c"
diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx-main.c b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx-main.c
new file mode 100644
index 0000000000..f3691cc8e6
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx-main.c
@@ -0,0 +1 @@
+#include "test-float-libmvec-alias.c"
diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx-mod.c b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx-mod.c
new file mode 100644
index 0000000000..7fc3d8aedd
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx-mod.c
@@ -0,0 +1 @@
+#include "test-float-libmvec-alias-mod.c"
diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx.c b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx.c
new file mode 100644
index 0000000000..f3691cc8e6
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx.c
@@ -0,0 +1 @@
+#include "test-float-libmvec-alias.c"
diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx2-main.c b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx2-main.c
new file mode 100644
index 0000000000..f3691cc8e6
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx2-main.c
@@ -0,0 +1 @@
+#include "test-float-libmvec-alias.c"
diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx2-mod.c b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx2-mod.c
new file mode 100644
index 0000000000..7fc3d8aedd
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx2-mod.c
@@ -0,0 +1 @@
+#include "test-float-libmvec-alias-mod.c"
diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx2.c b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx2.c
new file mode 100644
index 0000000000..f3691cc8e6
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx2.c
@@ -0,0 +1 @@
+#include "test-float-libmvec-alias.c"
diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx512-main.c b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx512-main.c
new file mode 100644
index 0000000000..f3691cc8e6
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx512-main.c
@@ -0,0 +1 @@
+#include "test-float-libmvec-alias.c"
diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx512-mod.c b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx512-mod.c
new file mode 100644
index 0000000000..7fc3d8aedd
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx512-mod.c
@@ -0,0 +1 @@
+#include "test-float-libmvec-alias-mod.c"
diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx512.c b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx512.c
new file mode 100644
index 0000000000..f3691cc8e6
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-libmvec-alias-avx512.c
@@ -0,0 +1 @@
+#include "test-float-libmvec-alias.c"
diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-alias-main.c b/sysdeps/x86_64/fpu/test-float-libmvec-alias-main.c
new file mode 100644
index 0000000000..f3691cc8e6
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-libmvec-alias-main.c
@@ -0,0 +1 @@
+#include "test-float-libmvec-alias.c"
diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-alias-mod.c b/sysdeps/x86_64/fpu/test-float-libmvec-alias-mod.c
new file mode 100644
index 0000000000..5e6a587a94
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-libmvec-alias-mod.c
@@ -0,0 +1,25 @@
+/* Part of test to build shared library to ensure link against
+   *_finite aliases from libmvec.
+   Copyright (C) 2016-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <math.h>
+#include <stdlib.h>
+#include <math-tests-arch.h>
+
+#include "test-float.h"
+#include "test-libmvec-alias-mod.c"
diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-alias.c b/sysdeps/x86_64/fpu/test-float-libmvec-alias.c
new file mode 100644
index 0000000000..d38b49d6c8
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-libmvec-alias.c
@@ -0,0 +1,29 @@
+/* Part of test to ensure link against *_finite aliases from libmvec.
+   Copyright (C) 2016-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+extern int
+test_finite_alias (void);
+
+static int
+do_test (void)
+{
+  return test_finite_alias ();
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../../../test-skeleton.c"
diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx-main.c b/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx-main.c
new file mode 100644
index 0000000000..558e2ac649
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx-main.c
@@ -0,0 +1 @@
+#include "test-float-libmvec-sincosf-main.c"
diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx.c b/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx.c
new file mode 100644
index 0000000000..5b45f0a055
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx.c
@@ -0,0 +1 @@
+#include "test-float-libmvec-sincosf.c"
diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx2-main.c b/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx2-main.c
new file mode 100644
index 0000000000..558e2ac649
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx2-main.c
@@ -0,0 +1 @@
+#include "test-float-libmvec-sincosf-main.c"
diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx2.c b/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx2.c
new file mode 100644
index 0000000000..5b45f0a055
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx2.c
@@ -0,0 +1 @@
+#include "test-float-libmvec-sincosf.c"
diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx512-main.c b/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx512-main.c
new file mode 100644
index 0000000000..558e2ac649
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx512-main.c
@@ -0,0 +1 @@
+#include "test-float-libmvec-sincosf-main.c"
diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx512.c b/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx512.c
new file mode 100644
index 0000000000..5b45f0a055
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-avx512.c
@@ -0,0 +1 @@
+#include "test-float-libmvec-sincosf.c"
diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-main.c b/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-main.c
new file mode 100644
index 0000000000..ce1dd1a8a4
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-libmvec-sincosf-main.c
@@ -0,0 +1,42 @@
+/* Test for vector sincosf ABI.
+   Copyright (C) 2016-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <math.h>
+
+#define N 1000
+float x[N], s[N], c[N];
+float *s_ptrs[N];
+float *c_ptrs[N];
+
+int
+test_sincosf_abi (void)
+{
+  int i;
+  for(i = 0; i < N; i++)
+  {
+    x[i] = i / 3;
+    s_ptrs[i] = &s[i];
+    c_ptrs[i] = &c[i];
+  }
+
+#pragma omp simd
+  for(i = 0; i < N; i++)
+    sincosf (x[i], s_ptrs[i], c_ptrs[i]);
+
+  return 0;
+}
diff --git a/sysdeps/x86_64/fpu/test-float-libmvec-sincosf.c b/sysdeps/x86_64/fpu/test-float-libmvec-sincosf.c
new file mode 100644
index 0000000000..a56d9680a0
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-float-libmvec-sincosf.c
@@ -0,0 +1,44 @@
+/* Test for vector sincosf ABI.
+   Copyright (C) 2016-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <math-tests-arch.h>
+
+extern int test_sincosf_abi (void);
+
+int arch_check = 1;
+
+static void
+check_arch (void)
+{
+  CHECK_ARCH_EXT;
+  arch_check = 0;
+}
+
+static int
+do_test (void)
+{
+  check_arch ();
+
+  if (arch_check)
+    return 77;
+
+  return test_sincosf_abi ();
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../../../test-skeleton.c"
diff --git a/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c
index dc09e4a338..d2a81ecf53 100644
--- a/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-float-vlen16-wrappers.c
@@ -1,5 +1,5 @@
 /* Wrapper part of tests for AVX-512 ISA versions of vector math functions.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -17,13 +17,21 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include "test-float-vlen16.h"
+#include "test-math-vector-sincos.h"
 #include <immintrin.h>
 
 #define VEC_TYPE __m512
 
 VECTOR_WRAPPER (WRAPPER_NAME (cosf), _ZGVeN16v_cosf)
 VECTOR_WRAPPER (WRAPPER_NAME (sinf), _ZGVeN16v_sinf)
-VECTOR_WRAPPER_fFF (WRAPPER_NAME (sincosf), _ZGVeN16vvv_sincosf)
 VECTOR_WRAPPER (WRAPPER_NAME (logf), _ZGVeN16v_logf)
 VECTOR_WRAPPER (WRAPPER_NAME (expf), _ZGVeN16v_expf)
 VECTOR_WRAPPER_ff (WRAPPER_NAME (powf), _ZGVeN16vv_powf)
+
+#define VEC_INT_TYPE __m512i
+
+#ifndef __ILP32__
+VECTOR_WRAPPER_fFF_3 (WRAPPER_NAME (sincosf), _ZGVeN16vvv_sincosf)
+#else
+VECTOR_WRAPPER_fFF_2 (WRAPPER_NAME (sincosf), _ZGVeN16vvv_sincosf)
+#endif
diff --git a/sysdeps/x86_64/fpu/test-float-vlen16.c b/sysdeps/x86_64/fpu/test-float-vlen16.h
index 882bfc840d..ffe27866b5 100644
--- a/sysdeps/x86_64/fpu/test-float-vlen16.c
+++ b/sysdeps/x86_64/fpu/test-float-vlen16.h
@@ -1,5 +1,5 @@
 /* Tests for AVX-512 ISA versions of vector math functions.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -16,15 +16,6 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include "test-float-vlen16.h"
-
-#define TEST_VECTOR_cosf 1
-#define TEST_VECTOR_sinf 1
-#define TEST_VECTOR_sincosf 1
-#define TEST_VECTOR_logf 1
-#define TEST_VECTOR_expf 1
-#define TEST_VECTOR_powf 1
+#include_next <test-float-vlen16.h>
 
 #define REQUIRE_AVX512F
-
-#include "libm-test.c"
diff --git a/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c
index 0bb9818146..afa7da26f6 100644
--- a/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-float-vlen4-wrappers.c
@@ -1,5 +1,5 @@
 /* Wrapper part of tests for SSE ISA versions of vector math functions.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -17,13 +17,21 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include "test-float-vlen4.h"
+#include "test-math-vector-sincos.h"
 #include <immintrin.h>
 
 #define VEC_TYPE __m128
 
 VECTOR_WRAPPER (WRAPPER_NAME (cosf), _ZGVbN4v_cosf)
 VECTOR_WRAPPER (WRAPPER_NAME (sinf), _ZGVbN4v_sinf)
-VECTOR_WRAPPER_fFF (WRAPPER_NAME (sincosf), _ZGVbN4vvv_sincosf)
 VECTOR_WRAPPER (WRAPPER_NAME (logf), _ZGVbN4v_logf)
 VECTOR_WRAPPER (WRAPPER_NAME (expf), _ZGVbN4v_expf)
 VECTOR_WRAPPER_ff (WRAPPER_NAME (powf), _ZGVbN4vv_powf)
+
+#define VEC_INT_TYPE __m128i
+
+#ifndef __ILP32__
+VECTOR_WRAPPER_fFF_3 (WRAPPER_NAME (sincosf), _ZGVbN4vvv_sincosf)
+#else
+VECTOR_WRAPPER_fFF_2 (WRAPPER_NAME (sincosf), _ZGVbN4vvv_sincosf)
+#endif
diff --git a/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c
index 4985ac2379..d7e79a3f37 100644
--- a/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-float-vlen8-avx2-wrappers.c
@@ -1,5 +1,5 @@
 /* Wrapper part of tests for AVX2 ISA versions of vector math functions.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -17,6 +17,7 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include "test-float-vlen8.h"
+#include "test-math-vector-sincos.h"
 #include <immintrin.h>
 
 #undef VEC_SUFF
@@ -26,7 +27,17 @@
 
 VECTOR_WRAPPER (WRAPPER_NAME (cosf), _ZGVdN8v_cosf)
 VECTOR_WRAPPER (WRAPPER_NAME (sinf), _ZGVdN8v_sinf)
-VECTOR_WRAPPER_fFF (WRAPPER_NAME (sincosf), _ZGVdN8vvv_sincosf)
 VECTOR_WRAPPER (WRAPPER_NAME (logf), _ZGVdN8v_logf)
 VECTOR_WRAPPER (WRAPPER_NAME (expf), _ZGVdN8v_expf)
 VECTOR_WRAPPER_ff (WRAPPER_NAME (powf), _ZGVdN8vv_powf)
+
+/* Redefinition of wrapper to be compatible with _ZGVdN8vvv_sincosf.  */
+#undef VECTOR_WRAPPER_fFF
+
+#define VEC_INT_TYPE __m256i
+
+#ifndef __ILP32__
+VECTOR_WRAPPER_fFF_3 (WRAPPER_NAME (sincosf), _ZGVdN8vvv_sincosf)
+#else
+VECTOR_WRAPPER_fFF_2 (WRAPPER_NAME (sincosf), _ZGVdN8vvv_sincosf)
+#endif
diff --git a/sysdeps/x86_64/fpu/test-float-vlen8-avx2.c b/sysdeps/x86_64/fpu/test-float-vlen8-avx2.h
index 7a416385b6..c468dd6e69 100644
--- a/sysdeps/x86_64/fpu/test-float-vlen8-avx2.c
+++ b/sysdeps/x86_64/fpu/test-float-vlen8-avx2.h
@@ -1,5 +1,5 @@
 /* Tests for AVX2 ISA versions of vector math functions.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -16,18 +16,10 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include "test-float-vlen8.h"
+#include <test-float-vlen8.h>
 
 #undef VEC_SUFF
 #define VEC_SUFF _vlen8_avx2
 
-#define TEST_VECTOR_cosf 1
-#define TEST_VECTOR_sinf 1
-#define TEST_VECTOR_sincosf 1
-#define TEST_VECTOR_logf 1
-#define TEST_VECTOR_expf 1
-#define TEST_VECTOR_powf 1
-
+#undef REQUIRE_AVX
 #define REQUIRE_AVX2
-
-#include "libm-test.c"
diff --git a/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c b/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c
index 9cc2883399..6f7869ba3d 100644
--- a/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c
+++ b/sysdeps/x86_64/fpu/test-float-vlen8-wrappers.c
@@ -1,5 +1,5 @@
 /* Wrapper part of tests for AVX ISA versions of vector math functions.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -17,13 +17,21 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include "test-float-vlen8.h"
+#include "test-math-vector-sincos.h"
 #include <immintrin.h>
 
 #define VEC_TYPE __m256
 
 VECTOR_WRAPPER (WRAPPER_NAME (cosf), _ZGVcN8v_cosf)
 VECTOR_WRAPPER (WRAPPER_NAME (sinf), _ZGVcN8v_sinf)
-VECTOR_WRAPPER_fFF (WRAPPER_NAME (sincosf), _ZGVcN8vvv_sincosf)
 VECTOR_WRAPPER (WRAPPER_NAME (logf), _ZGVcN8v_logf)
 VECTOR_WRAPPER (WRAPPER_NAME (expf), _ZGVcN8v_expf)
 VECTOR_WRAPPER_ff (WRAPPER_NAME (powf), _ZGVcN8vv_powf)
+
+#define VEC_INT_TYPE __m128i
+
+#ifndef __ILP32__
+VECTOR_WRAPPER_fFF_4 (WRAPPER_NAME (sincosf), _ZGVcN8vvv_sincosf)
+#else
+VECTOR_WRAPPER_fFF_3 (WRAPPER_NAME (sincosf), _ZGVcN8vvv_sincosf)
+#endif
diff --git a/sysdeps/x86_64/fpu/test-float-vlen8.c b/sysdeps/x86_64/fpu/test-float-vlen8.h
index c92a50ae7e..153820ecc2 100644
--- a/sysdeps/x86_64/fpu/test-float-vlen8.c
+++ b/sysdeps/x86_64/fpu/test-float-vlen8.h
@@ -1,5 +1,5 @@
 /* Tests for AVX ISA versions of vector math functions.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -16,15 +16,6 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include "test-float-vlen8.h"
-
-#define TEST_VECTOR_cosf 1
-#define TEST_VECTOR_sinf 1
-#define TEST_VECTOR_sincosf 1
-#define TEST_VECTOR_logf 1
-#define TEST_VECTOR_expf 1
-#define TEST_VECTOR_powf 1
+#include_next <test-float-vlen8.h>
 
 #define REQUIRE_AVX
-
-#include "libm-test.c"
diff --git a/sysdeps/x86_64/fpu/test-libmvec-alias-mod.c b/sysdeps/x86_64/fpu/test-libmvec-alias-mod.c
new file mode 100644
index 0000000000..6d70844147
--- /dev/null
+++ b/sysdeps/x86_64/fpu/test-libmvec-alias-mod.c
@@ -0,0 +1,66 @@
+/* Part of test to build shared library to ensure link against
+   *_finite aliases from libmvec.
+   Copyright (C) 2016-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define N 4000
+FLOAT log_arg[N];
+FLOAT exp_arg[N];
+FLOAT log_res[N];
+FLOAT exp_res[N];
+FLOAT pow_res[N];
+int arch_check = 1;
+
+static void
+init_arg (void)
+{
+  int i;
+
+  CHECK_ARCH_EXT;
+
+  arch_check = 0;
+
+  for (i = 0; i < N; i += 1)
+    {
+      log_arg[i] = 1.0;
+      exp_arg[i] = 0.0;
+    }
+}
+
+int
+test_finite_alias (void)
+{
+  int i;
+
+  init_arg ();
+
+  if (arch_check) return 77;
+
+#pragma omp simd
+  for (i = 0; i < N; i += 1)
+    {
+      log_res[i] = FUNC (log) (log_arg[i]);
+      exp_res[i] = FUNC (exp) (exp_arg[i]);
+      pow_res[i] = FUNC (pow) (log_arg[i], log_arg[i]);
+    }
+
+  if (log_res[0] != 0.0) return 1;
+  if (exp_res[0] != 1.0) return 1;
+  if (pow_res[0] != 1.0) return 1;
+
+  return 0;
+}
diff --git a/sysdeps/x86_64/fpu/x86_64-math-asm.h b/sysdeps/x86_64/fpu/x86_64-math-asm.h
index db3f9f78b0..597b967b7b 100644
--- a/sysdeps/x86_64/fpu/x86_64-math-asm.h
+++ b/sysdeps/x86_64/fpu/x86_64-math-asm.h
@@ -1,5 +1,5 @@
 /* Helper macros for x86_64 libm functions.
-   Copyright (C) 2015-2016 Free Software Foundation, Inc.
+   Copyright (C) 2015-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/hp-timing.h b/sysdeps/x86_64/hp-timing.h
index 65381b314d..ec543bef03 100644
--- a/sysdeps/x86_64/hp-timing.h
+++ b/sysdeps/x86_64/hp-timing.h
@@ -1,5 +1,5 @@
 /* High precision, low overhead timing functions.  x86-64 version.
-   Copyright (C) 2002-2016 Free Software Foundation, Inc.
+   Copyright (C) 2002-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/htonl.S b/sysdeps/x86_64/htonl.S
index c92fae8791..23e2046caa 100644
--- a/sysdeps/x86_64/htonl.S
+++ b/sysdeps/x86_64/htonl.S
@@ -1,5 +1,5 @@
 /* Change byte order in word.  For AMD x86-64.
-   Copyright (C) 1997-2016 Free Software Foundation, Inc.
+   Copyright (C) 1997-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/ifuncmain8.c b/sysdeps/x86_64/ifuncmain8.c
index 448ab96bfa..449998df50 100644
--- a/sysdeps/x86_64/ifuncmain8.c
+++ b/sysdeps/x86_64/ifuncmain8.c
@@ -1,5 +1,5 @@
 /* Test IFUNC selector with floating-point parameters.
-   Copyright (C) 2015-2016 Free Software Foundation, Inc.
+   Copyright (C) 2015-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/ifuncmod8.c b/sysdeps/x86_64/ifuncmod8.c
index c00436799c..8225c4da12 100644
--- a/sysdeps/x86_64/ifuncmod8.c
+++ b/sysdeps/x86_64/ifuncmod8.c
@@ -1,5 +1,5 @@
 /* Test IFUNC selector with floating-point parameters.
-   Copyright (C) 2015-2016 Free Software Foundation, Inc.
+   Copyright (C) 2015-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -28,6 +28,7 @@ foo_impl (float x)
 }
 
 void *
+inhibit_stack_protector
 foo_ifunc (void)
 {
   __m128i xmm = _mm_set1_epi32 (-1);
diff --git a/sysdeps/x86_64/jmpbuf-offsets.h b/sysdeps/x86_64/jmpbuf-offsets.h
index da71e555f7..6d1ee5e812 100644
--- a/sysdeps/x86_64/jmpbuf-offsets.h
+++ b/sysdeps/x86_64/jmpbuf-offsets.h
@@ -1,5 +1,5 @@
 /* Private macros for accessing __jmp_buf contents.  x86-64 version.
-   Copyright (C) 2006-2016 Free Software Foundation, Inc.
+   Copyright (C) 2006-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/jmpbuf-unwind.h b/sysdeps/x86_64/jmpbuf-unwind.h
index aa0642b54a..49208bdd9e 100644
--- a/sysdeps/x86_64/jmpbuf-unwind.h
+++ b/sysdeps/x86_64/jmpbuf-unwind.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003-2016 Free Software Foundation, Inc.
+/* Copyright (C) 2003-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Jakub Jelinek <jakub@redhat.com>, 2003.
 
diff --git a/sysdeps/x86_64/ldsodefs.h b/sysdeps/x86_64/ldsodefs.h
deleted file mode 100644
index 6a96c53721..0000000000
--- a/sysdeps/x86_64/ldsodefs.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/* Run-time dynamic linker data structures for loaded ELF shared objects.
-   Copyright (C) 1995-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef	_X86_64_LDSODEFS_H
-#define	_X86_64_LDSODEFS_H	1
-
-#include <elf.h>
-#include <cpu-features.h>
-
-struct La_x86_64_regs;
-struct La_x86_64_retval;
-struct La_x32_regs;
-struct La_x32_retval;
-
-#define ARCH_PLTENTER_MEMBERS						\
-    Elf64_Addr (*x86_64_gnu_pltenter) (Elf64_Sym *, unsigned int,	\
-				       uintptr_t *,			\
-				       uintptr_t *, struct La_x86_64_regs *, \
-				       unsigned int *, const char *name, \
-				       long int *framesizep);		\
-    Elf32_Addr (*x32_gnu_pltenter) (Elf32_Sym *, unsigned int, uintptr_t *, \
-				    uintptr_t *, struct La_x32_regs *,	\
-				    unsigned int *, const char *name,	\
-				    long int *framesizep)
-
-#define ARCH_PLTEXIT_MEMBERS						\
-    unsigned int (*x86_64_gnu_pltexit) (Elf64_Sym *, unsigned int,	\
-					uintptr_t *,			\
-					uintptr_t *,			\
-					const struct La_x86_64_regs *,	\
-					struct La_x86_64_retval *,	\
-					const char *);			\
-    unsigned int (*x32_gnu_pltexit) (Elf32_Sym *, unsigned int, uintptr_t *, \
-				     uintptr_t *,			\
-				     const struct La_x32_regs *,	\
-				     struct La_x86_64_retval *,		\
-				     const char *)
-
-#include_next <ldsodefs.h>
-
-#endif
diff --git a/sysdeps/x86_64/localplt.data b/sysdeps/x86_64/localplt.data
index f168b143ff..c27a02b66a 100644
--- a/sysdeps/x86_64/localplt.data
+++ b/sysdeps/x86_64/localplt.data
@@ -8,12 +8,15 @@ libc.so: free + RELA R_X86_64_GLOB_DAT
 libc.so: malloc + RELA R_X86_64_GLOB_DAT
 libc.so: memalign + RELA R_X86_64_GLOB_DAT
 libc.so: realloc + RELA R_X86_64_GLOB_DAT
-libm.so: matherr
-# The dynamic loader uses __libc_memalign internally to allocate aligned
-# TLS storage. The other malloc family of functions are expected to allow
-# user symbol interposition.
-ld.so: __libc_memalign + RELA R_X86_64_GLOB_DAT
+libm.so: matherr + RELA R_X86_64_GLOB_DAT
+# The main malloc is interposed into the dynamic linker, for
+# allocations after the initial link (when dlopen is used).
 ld.so: malloc + RELA R_X86_64_GLOB_DAT
 ld.so: calloc + RELA R_X86_64_GLOB_DAT
 ld.so: realloc + RELA R_X86_64_GLOB_DAT
 ld.so: free + RELA R_X86_64_GLOB_DAT
+# The TLS-enabled version of these functions is interposed from libc.so.
+ld.so: _dl_signal_error + RELA R_X86_64_GLOB_DAT
+ld.so: _dl_catch_error + RELA R_X86_64_GLOB_DAT
+ld.so: _dl_signal_exception + RELA R_X86_64_GLOB_DAT
+ld.so: _dl_catch_exception + RELA R_X86_64_GLOB_DAT
diff --git a/sysdeps/x86_64/lshift.S b/sysdeps/x86_64/lshift.S
index 49cbfbaf3d..af568768d0 100644
--- a/sysdeps/x86_64/lshift.S
+++ b/sysdeps/x86_64/lshift.S
@@ -1,5 +1,5 @@
 /* x86-64 __mpn_lshift --
-   Copyright (C) 2007-2016 Free Software Foundation, Inc.
+   Copyright (C) 2007-2018 Free Software Foundation, Inc.
    This file is part of the GNU MP Library.
 
    The GNU MP Library is free software; you can redistribute it and/or modify
diff --git a/sysdeps/x86_64/machine-gmon.h b/sysdeps/x86_64/machine-gmon.h
index 3d9ce5c44e..8bc111612c 100644
--- a/sysdeps/x86_64/machine-gmon.h
+++ b/sysdeps/x86_64/machine-gmon.h
@@ -1,5 +1,5 @@
 /* x86-64-specific implementation of profiling support.
-   Copyright (C) 2002-2016 Free Software Foundation, Inc.
+   Copyright (C) 2002-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Andreas Jaeger <aj@suse.de>, 2002.
 
diff --git a/sysdeps/x86_64/memchr.S b/sysdeps/x86_64/memchr.S
index 132eacba8f..feef5d4f24 100644
--- a/sysdeps/x86_64/memchr.S
+++ b/sysdeps/x86_64/memchr.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011-2016 Free Software Foundation, Inc.
+/* Copyright (C) 2011-2018 Free Software Foundation, Inc.
    Contributed by Intel Corporation.
    This file is part of the GNU C Library.
 
@@ -18,26 +18,40 @@
 
 #include <sysdep.h>
 
+#ifdef USE_AS_WMEMCHR
+# define MEMCHR		wmemchr
+# define PCMPEQ		pcmpeqd
+#else
+# define MEMCHR		memchr
+# define PCMPEQ		pcmpeqb
+#endif
+
 /* fast SSE2 version with using pmaxub and 64 byte loop */
 
 	.text
-ENTRY(memchr)
-	movd	%rsi, %xmm1
-	mov	%rdi, %rcx
+ENTRY(MEMCHR)
+	movd	%esi, %xmm1
+	mov	%edi, %ecx
 
+#ifdef USE_AS_WMEMCHR
+	test	%rdx, %rdx
+	jz	L(return_null)
+	shl	$2, %rdx
+#else
 	punpcklbw %xmm1, %xmm1
 	test	%rdx, %rdx
 	jz	L(return_null)
 	punpcklbw %xmm1, %xmm1
+#endif
 
-	and	$63, %rcx
+	and	$63, %ecx
 	pshufd	$0, %xmm1, %xmm1
 
-	cmp	$48, %rcx
+	cmp	$48, %ecx
 	ja	L(crosscache)
 
 	movdqu	(%rdi), %xmm0
-	pcmpeqb	%xmm1, %xmm0
+	PCMPEQ	%xmm1, %xmm0
 	pmovmskb %xmm0, %eax
 	test	%eax, %eax
 
@@ -45,7 +59,7 @@ ENTRY(memchr)
 	sub	$16, %rdx
 	jbe	L(return_null)
 	add	$16, %rdi
-	and	$15, %rcx
+	and	$15, %ecx
 	and	$-16, %rdi
 	add	%rcx, %rdx
 	sub	$64, %rdx
@@ -54,11 +68,11 @@ ENTRY(memchr)
 
 	.p2align 4
 L(crosscache):
-	and	$15, %rcx
+	and	$15, %ecx
 	and	$-16, %rdi
 	movdqa	(%rdi), %xmm0
 
-	pcmpeqb	%xmm1, %xmm0
+	PCMPEQ	%xmm1, %xmm0
 /* Check if there is a match.  */
 	pmovmskb %xmm0, %eax
 /* Remove the leading bytes.  */
@@ -76,8 +90,12 @@ L(crosscache):
 
 	.p2align 4
 L(unaligned_no_match):
-	add	%rcx, %rdx
-	sub	$16, %rdx
+        /* "rcx" is less than 16.  Calculate "rdx + rcx - 16" by using
+	   "rdx - (16 - rcx)" instead of "(rdx + rcx) - 16" to void
+	   possible addition overflow.  */
+	neg	%rcx
+	add	$16, %rcx
+	sub	%rcx, %rdx
 	jbe	L(return_null)
 	add	$16, %rdi
 	sub	$64, %rdx
@@ -86,25 +104,25 @@ L(unaligned_no_match):
 	.p2align 4
 L(loop_prolog):
 	movdqa	(%rdi), %xmm0
-	pcmpeqb	%xmm1, %xmm0
+	PCMPEQ	%xmm1, %xmm0
 	pmovmskb %xmm0, %eax
 	test	%eax, %eax
 	jnz	L(matches)
 
 	movdqa	16(%rdi), %xmm2
-	pcmpeqb	%xmm1, %xmm2
+	PCMPEQ	%xmm1, %xmm2
 	pmovmskb %xmm2, %eax
 	test	%eax, %eax
 	jnz	L(matches16)
 
 	movdqa	32(%rdi), %xmm3
-	pcmpeqb	%xmm1, %xmm3
+	PCMPEQ	%xmm1, %xmm3
 	pmovmskb %xmm3, %eax
 	test	%eax, %eax
 	jnz	L(matches32)
 
 	movdqa	48(%rdi), %xmm4
-	pcmpeqb	%xmm1, %xmm4
+	PCMPEQ	%xmm1, %xmm4
 	add	$64, %rdi
 	pmovmskb %xmm4, %eax
 	test	%eax, %eax
@@ -117,25 +135,25 @@ L(loop_prolog):
 	jbe	L(exit_loop)
 
 	movdqa	(%rdi), %xmm0
-	pcmpeqb	%xmm1, %xmm0
+	PCMPEQ	%xmm1, %xmm0
 	pmovmskb %xmm0, %eax
 	test	%eax, %eax
 	jnz	L(matches)
 
 	movdqa	16(%rdi), %xmm2
-	pcmpeqb	%xmm1, %xmm2
+	PCMPEQ	%xmm1, %xmm2
 	pmovmskb %xmm2, %eax
 	test	%eax, %eax
 	jnz	L(matches16)
 
 	movdqa	32(%rdi), %xmm3
-	pcmpeqb	%xmm1, %xmm3
+	PCMPEQ	%xmm1, %xmm3
 	pmovmskb %xmm3, %eax
 	test	%eax, %eax
 	jnz	L(matches32)
 
 	movdqa	48(%rdi), %xmm3
-	pcmpeqb	%xmm1, %xmm3
+	PCMPEQ	%xmm1, %xmm3
 	pmovmskb %xmm3, %eax
 
 	add	$64, %rdi
@@ -144,7 +162,7 @@ L(loop_prolog):
 
 	mov	%rdi, %rcx
 	and	$-64, %rdi
-	and	$63, %rcx
+	and	$63, %ecx
 	add	%rcx, %rdx
 
 	.p2align 4
@@ -156,10 +174,10 @@ L(align64_loop):
 	movdqa	32(%rdi), %xmm3
 	movdqa	48(%rdi), %xmm4
 
-	pcmpeqb	%xmm1, %xmm0
-	pcmpeqb	%xmm1, %xmm2
-	pcmpeqb	%xmm1, %xmm3
-	pcmpeqb	%xmm1, %xmm4
+	PCMPEQ	%xmm1, %xmm0
+	PCMPEQ	%xmm1, %xmm2
+	PCMPEQ	%xmm1, %xmm3
+	PCMPEQ	%xmm1, %xmm4
 
 	pmaxub	%xmm0, %xmm3
 	pmaxub	%xmm2, %xmm4
@@ -182,9 +200,9 @@ L(align64_loop):
 	jnz	L(matches16)
 
 	movdqa	32(%rdi), %xmm3
-	pcmpeqb	%xmm1, %xmm3
+	PCMPEQ	%xmm1, %xmm3
 
-	pcmpeqb	48(%rdi), %xmm1
+	PCMPEQ	48(%rdi), %xmm1
 	pmovmskb %xmm3, %eax
 	test	%eax, %eax
 	jnz	L(matches32)
@@ -196,52 +214,52 @@ L(align64_loop):
 
 	.p2align 4
 L(exit_loop):
-	add	$32, %rdx
+	add	$32, %edx
 	jle	L(exit_loop_32)
 
 	movdqa	(%rdi), %xmm0
-	pcmpeqb	%xmm1, %xmm0
+	PCMPEQ	%xmm1, %xmm0
 	pmovmskb %xmm0, %eax
 	test	%eax, %eax
 	jnz	L(matches)
 
 	movdqa	16(%rdi), %xmm2
-	pcmpeqb	%xmm1, %xmm2
+	PCMPEQ	%xmm1, %xmm2
 	pmovmskb %xmm2, %eax
 	test	%eax, %eax
 	jnz	L(matches16)
 
 	movdqa	32(%rdi), %xmm3
-	pcmpeqb	%xmm1, %xmm3
+	PCMPEQ	%xmm1, %xmm3
 	pmovmskb %xmm3, %eax
 	test	%eax, %eax
 	jnz	L(matches32_1)
-	sub	$16, %rdx
+	sub	$16, %edx
 	jle	L(return_null)
 
-	pcmpeqb	48(%rdi), %xmm1
+	PCMPEQ	48(%rdi), %xmm1
 	pmovmskb %xmm1, %eax
 	test	%eax, %eax
 	jnz	L(matches48_1)
-	xor	%rax, %rax
+	xor	%eax, %eax
 	ret
 
 	.p2align 4
 L(exit_loop_32):
-	add	$32, %rdx
+	add	$32, %edx
 	movdqa	(%rdi), %xmm0
-	pcmpeqb	%xmm1, %xmm0
+	PCMPEQ	%xmm1, %xmm0
 	pmovmskb %xmm0, %eax
 	test	%eax, %eax
 	jnz	L(matches_1)
-	sub	$16, %rdx
+	sub	$16, %edx
 	jbe	L(return_null)
 
-	pcmpeqb	16(%rdi), %xmm1
+	PCMPEQ	16(%rdi), %xmm1
 	pmovmskb %xmm1, %eax
 	test	%eax, %eax
 	jnz	L(matches16_1)
-	xor	%rax, %rax
+	xor	%eax, %eax
 	ret
 
 	.p2align 4
@@ -302,10 +320,11 @@ L(matches48_1):
 
 	.p2align 4
 L(return_null):
-	xor	%rax, %rax
+	xor	%eax, %eax
 	ret
-END(memchr)
+END(MEMCHR)
 
+#ifndef USE_AS_WMEMCHR
 strong_alias (memchr, __memchr)
-
 libc_hidden_builtin_def(memchr)
+#endif
diff --git a/sysdeps/x86_64/memcmp.S b/sysdeps/x86_64/memcmp.S
index 3fb018a772..bcb4a2e88d 100644
--- a/sysdeps/x86_64/memcmp.S
+++ b/sysdeps/x86_64/memcmp.S
@@ -1,5 +1,5 @@
 /* memcmp with SSE2
-   Copyright (C) 2009-2016 Free Software Foundation, Inc.
+   Copyright (C) 2009-2018 Free Software Foundation, Inc.
    Contributed by Intel Corporation.
    This file is part of the GNU C Library.
 
diff --git a/sysdeps/x86_64/memcopy.h b/sysdeps/x86_64/memcopy.h
new file mode 100644
index 0000000000..590b6cb16b
--- /dev/null
+++ b/sysdeps/x86_64/memcopy.h
@@ -0,0 +1 @@
+/* X86-64 doesn't use memory copy functions.  */
diff --git a/sysdeps/x86_64/memcpy.S b/sysdeps/x86_64/memcpy.S
index f6e3d9396c..d98500a78a 100644
--- a/sysdeps/x86_64/memcpy.S
+++ b/sysdeps/x86_64/memcpy.S
@@ -1,584 +1 @@
-/*
-   Optimized memcpy for x86-64.
-
-   Copyright (C) 2007-2016 Free Software Foundation, Inc.
-   Contributed by Evandro Menezes <evandro.menezes@amd.com>, 2007.
-
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.
-*/
-
-#include <sysdep.h>
-#include "asm-syntax.h"
-
-/* Stack slots in the red-zone. */
-
-#ifdef USE_AS_MEMPCPY
-#  define RETVAL	(0)
-#else
-#  define RETVAL	(-8)
-#  if defined SHARED && !defined USE_MULTIARCH && IS_IN (libc)
-#    define memcpy	__memcpy
-#    undef libc_hidden_builtin_def
-#    define libc_hidden_builtin_def(name) \
-	.globl __GI_memcpy; __GI_memcpy = __memcpy
-#  endif
-#endif
-#define SAVE0	(RETVAL - 8)
-#define SAVE1	(SAVE0	- 8)
-#define SAVE2	(SAVE1	- 8)
-#define SAVE3	(SAVE2	- 8)
-
-        .text
-
-#if defined PIC && IS_IN (libc)
-ENTRY_CHK (__memcpy_chk)
-
-	cmpq	%rdx, %rcx
-	jb	HIDDEN_JUMPTARGET (__chk_fail)
-
-END_CHK (__memcpy_chk)
-#endif
-
-ENTRY(memcpy)				/* (void *, const void*, size_t) */
-
-/* Handle tiny blocks. */
-
-L(1try):				/* up to 32B */
-	cmpq	$32, %rdx
-#ifndef USE_AS_MEMPCPY
-	movq	%rdi, %rax		/* save return value */
-#endif
-	jae	L(1after)
-
-L(1):					/* 1-byte once */
-	testb	$1, %dl
-	jz	L(1a)
-
-	movzbl	(%rsi),	%ecx
-	movb	%cl, (%rdi)
-
-	incq	%rsi
-	incq	%rdi
-
-	.p2align 4,, 4
-
-L(1a):					/* 2-byte once */
-	testb	$2, %dl
-	jz	L(1b)
-
-	movzwl	(%rsi),	%ecx
-	movw	%cx, (%rdi)
-
-	addq	$2, %rsi
-	addq	$2, %rdi
-
-	.p2align 4,, 4
-
-L(1b):					/* 4-byte once */
-	testb	$4, %dl
-	jz	L(1c)
-
-	movl	(%rsi),	%ecx
-	movl	%ecx, (%rdi)
-
-	addq	$4, %rsi
-	addq	$4, %rdi
-
-	.p2align 4,, 4
-
-L(1c):					/* 8-byte once */
-	testb	$8, %dl
-	jz	L(1d)
-
-	movq	(%rsi), %rcx
-	movq	%rcx, (%rdi)
-
-	addq	$8, %rsi
-	addq	$8, %rdi
-
-	.p2align 4,, 4
-
-L(1d):					/* 16-byte loop */
-	andl	$0xf0, %edx
-	jz	L(exit)
-
-	.p2align 4
-
-L(1loop):
-	movq	 (%rsi), %rcx
-	movq	8(%rsi), %r8
-	movq	%rcx,  (%rdi)
-	movq	 %r8, 8(%rdi)
-
-	subl	$16, %edx
-
-	leaq	16(%rsi), %rsi
-	leaq	16(%rdi), %rdi
-
-	jnz	L(1loop)
-
-	.p2align 4,, 4
-
-L(exit):				/* exit */
-#ifdef USE_AS_MEMPCPY
-	movq	%rdi, %rax		/* return value */
-#else
-	rep
-#endif
-	retq
-
-	.p2align 4
-
-L(1after):
-#ifndef USE_AS_MEMPCPY
-	movq	%rax, RETVAL(%rsp)	/* save return value */
-#endif
-
-/* Align to the natural word size. */
-
-L(aligntry):
-	movl	%esi, %ecx      	/* align by source */
-
-	andl	$7, %ecx
-	jz	L(alignafter)  		/* already aligned */
-
-L(align):		      		/* align */
-	leaq	-8(%rcx, %rdx), %rdx	/* calculate remaining bytes */
-	subl	$8, %ecx
-
-	.p2align 4
-
-L(alignloop):				/* 1-byte alignment loop */
-	movzbl	(%rsi), %eax
-	movb	%al, (%rdi)
-
-	incl	%ecx
-
-	leaq	1(%rsi), %rsi
-	leaq	1(%rdi), %rdi
-
-	jnz	L(alignloop)
-
-	.p2align 4
-
-L(alignafter):
-
-/* Handle mid-sized blocks. */
-
-L(32try):				/* up to 1KB */
-	cmpq	$1024, %rdx
-	ja	L(32after)
-
-L(32):					/* 32-byte loop */
-	movl	%edx, %ecx
-	shrl	$5, %ecx
-	jz	L(32skip)
-
-	.p2align 4
-
-L(32loop):
-	decl	%ecx
-
-	movq	  (%rsi), %rax
-	movq	 8(%rsi), %r8
-	movq	16(%rsi), %r9
-	movq	24(%rsi), %r10
-
-	movq	%rax,   (%rdi)
-	movq	 %r8,  8(%rdi)
-	movq	 %r9, 16(%rdi)
-	movq	%r10, 24(%rdi)
-
-	leaq	32(%rsi), %rsi
-	leaq	32(%rdi), %rdi
-
-	jz	L(32skip)		/* help out smaller blocks */
-
-	decl	%ecx
-
-	movq	  (%rsi), %rax
-	movq	 8(%rsi), %r8
-	movq	16(%rsi), %r9
-	movq	24(%rsi), %r10
-
-	movq	%rax,   (%rdi)
-	movq	 %r8,  8(%rdi)
-	movq	 %r9, 16(%rdi)
-	movq	%r10, 24(%rdi)
-
-	leaq	32(%rsi), %rsi
-	leaq	32(%rdi), %rdi
-
-	jnz	L(32loop)
-
-	.p2align 4
-
-L(32skip):
-	andl	$31, %edx		/* check for left overs */
-#ifdef USE_AS_MEMPCPY
-	jnz	L(1)
-
-	movq	%rdi, %rax
-#else
-	movq	RETVAL(%rsp), %rax
-	jnz	L(1)
-
-	rep
-#endif
-	retq				/* exit */
-
-	.p2align 4
-
-L(32after):
-
-/*
-	In order to minimize code-size in RTLD, algorithms specific for
-	larger blocks are excluded when building for RTLD.
-*/
-
-/* Handle blocks smaller than 1/2 L1. */
-
-L(fasttry):				/* first 1/2 L1 */
-#if IS_IN (libc)			/* only up to this algorithm outside of libc.so */
-	mov	__x86_data_cache_size_half(%rip), %R11_LP
-	cmpq	%rdx, %r11		/* calculate the smaller of */
-	cmovaq	%rdx, %r11		/* remaining bytes and 1/2 L1 */
-#endif
-
-L(fast):				/* good ol' MOVS */
-#if IS_IN (libc)
-	movq	%r11, %rcx
-	andq	$-8, %r11
-#else
-	movq	%rdx, %rcx
-#endif
-	shrq	$3, %rcx
-	jz	L(fastskip)
-
-	rep
-	movsq
-
-	.p2align 4,, 4
-
-L(fastskip):
-#if IS_IN (libc)
-	subq	%r11, %rdx		/* check for more */
-	testq	$-8, %rdx
-	jnz	L(fastafter)
-#endif
-
-	andl	$7, %edx		/* check for left overs */
-#ifdef USE_AS_MEMPCPY
-	jnz	L(1)
-
-	movq	%rdi, %rax
-#else
-	movq	RETVAL(%rsp), %rax
-	jnz	L(1)
-
-	rep
-#endif
-	retq				/* exit */
-
-#if IS_IN (libc)			/* none of the algorithms below for RTLD */
-
-	.p2align 4
-
-L(fastafter):
-
-/* Handle large blocks smaller than 1/2 L2. */
-
-L(pretry):				/* first 1/2 L2 */
-	mov	__x86_shared_cache_size_half (%rip), %R8_LP
-	cmpq	%rdx, %r8		/* calculate the lesser of */
-	cmovaq	%rdx, %r8		/* remaining bytes and 1/2 L2 */
-
-L(pre):					/* 64-byte with prefetching */
-	movq	%r8, %rcx
-	andq	$-64, %r8
-	shrq	$6, %rcx
-	jz	L(preskip)
-
-	movq	%r14, SAVE0(%rsp)
-	cfi_rel_offset (%r14, SAVE0)
-	movq	%r13, SAVE1(%rsp)
-	cfi_rel_offset (%r13, SAVE1)
-	movq	%r12, SAVE2(%rsp)
-	cfi_rel_offset (%r12, SAVE2)
-	movq	%rbx, SAVE3(%rsp)
-	cfi_rel_offset (%rbx, SAVE3)
-
-	cmpl	$0, __x86_prefetchw(%rip)
-	jz	L(preloop)		/* check if PREFETCHW OK */
-
-	.p2align 4
-
-/* ... when PREFETCHW is available (less cache-probe traffic in MP systems). */
-
-L(prewloop):				/* cache-line in state M */
-	decq	%rcx
-
-	movq	   (%rsi), %rax
-	movq	 8 (%rsi), %rbx
-	movq	16 (%rsi), %r9
-	movq	24 (%rsi), %r10
-	movq	32 (%rsi), %r11
-	movq	40 (%rsi), %r12
-	movq	48 (%rsi), %r13
-	movq	56 (%rsi), %r14
-
-	prefetcht0	 0 + 896 (%rsi)
-	prefetcht0	64 + 896 (%rsi)
-
-	movq	%rax,   (%rdi)
-	movq	%rbx,  8(%rdi)
-	movq	 %r9, 16(%rdi)
-	movq	%r10, 24(%rdi)
-	movq	%r11, 32(%rdi)
-	movq	%r12, 40(%rdi)
-	movq	%r13, 48(%rdi)
-	movq	%r14, 56(%rdi)
-
-	leaq	64(%rsi), %rsi
-	leaq	64(%rdi), %rdi
-
-	jz	L(prebail)
-
-	decq	%rcx
-
-	movq	  (%rsi), %rax
-	movq	 8(%rsi), %rbx
-	movq	16(%rsi), %r9
-	movq	24(%rsi), %r10
-	movq	32(%rsi), %r11
-	movq	40(%rsi), %r12
-	movq	48(%rsi), %r13
-	movq	56(%rsi), %r14
-
-	movq	%rax,   (%rdi)
-	movq	%rbx,  8(%rdi)
-	movq	 %r9, 16(%rdi)
-	movq	%r10, 24(%rdi)
-	movq	%r11, 32(%rdi)
-	movq	%r12, 40(%rdi)
-	movq	%r13, 48(%rdi)
-	movq	%r14, 56(%rdi)
-
-	prefetchw	896 - 64(%rdi)
-	prefetchw	896 -  0(%rdi)
-
-	leaq	64(%rsi), %rsi
-	leaq	64(%rdi), %rdi
-
-	jnz	L(prewloop)
-	jmp	L(prebail)
-
-	.p2align 4
-
-/* ... when PREFETCHW is not available. */
-
-L(preloop):				/* cache-line in state E */
-	decq	%rcx
-
-	movq	  (%rsi), %rax
-	movq	 8(%rsi), %rbx
-	movq	16(%rsi), %r9
-	movq	24(%rsi), %r10
-	movq	32(%rsi), %r11
-	movq	40(%rsi), %r12
-	movq	48(%rsi), %r13
-	movq	56(%rsi), %r14
-
-	prefetcht0	896 +  0(%rsi)
-	prefetcht0	896 + 64(%rsi)
-
-	movq	%rax,   (%rdi)
-	movq	%rbx,  8(%rdi)
-	movq	 %r9, 16(%rdi)
-	movq	%r10, 24(%rdi)
-	movq	%r11, 32(%rdi)
-	movq	%r12, 40(%rdi)
-	movq	%r13, 48(%rdi)
-	movq	%r14, 56(%rdi)
-
-	leaq	64 (%rsi), %rsi
-	leaq	64 (%rdi), %rdi
-
-	jz	L(prebail)
-
-	decq	%rcx
-
-	movq	  (%rsi), %rax
-	movq	 8(%rsi), %rbx
-	movq	16(%rsi), %r9
-	movq	24(%rsi), %r10
-	movq	32(%rsi), %r11
-	movq	40(%rsi), %r12
-	movq	48(%rsi), %r13
-	movq	56(%rsi), %r14
-
-	prefetcht0	896 - 64(%rdi)
-	prefetcht0	896 -  0(%rdi)
-
-	movq	%rax,   (%rdi)
-	movq	%rbx,  8(%rdi)
-	movq	 %r9, 16(%rdi)
-	movq	%r10, 24(%rdi)
-	movq	%r11, 32(%rdi)
-	movq	%r12, 40(%rdi)
-	movq	%r13, 48(%rdi)
-	movq	%r14, 56(%rdi)
-
-	leaq	64(%rsi), %rsi
-	leaq	64(%rdi), %rdi
-
-	jnz	L(preloop)
-
-L(prebail):
-	movq	SAVE3(%rsp), %rbx
-	cfi_restore (%rbx)
-	movq	SAVE2(%rsp), %r12
-	cfi_restore (%r12)
-	movq	SAVE1(%rsp), %r13
-	cfi_restore (%r13)
-	movq	SAVE0(%rsp), %r14
-	cfi_restore (%r14)
-
-/*       .p2align 4 */
-
-L(preskip):
-	subq	%r8, %rdx		/* check for more */
-	testq	$-64, %rdx
-	jnz	L(preafter)
-
-	andl	$63, %edx		/* check for left overs */
-#ifdef USE_AS_MEMPCPY
-	jnz	L(1)
-
-	movq	%rdi, %rax
-#else
-	movq	RETVAL(%rsp), %rax
-	jnz	L(1)
-
-	rep
-#endif
-	retq				/* exit */
-
-	.p2align 4
-
-L(preafter):
-
-/* Handle huge blocks. */
-
-L(NTtry):
-
-L(NT):					/* non-temporal 128-byte */
-	movq	%rdx, %rcx
-	shrq	$7, %rcx
-	jz	L(NTskip)
-
-	movq	%r14, SAVE0(%rsp)
-	cfi_rel_offset (%r14, SAVE0)
-	movq	%r13, SAVE1(%rsp)
-	cfi_rel_offset (%r13, SAVE1)
-	movq	%r12, SAVE2(%rsp)
-	cfi_rel_offset (%r12, SAVE2)
-
-       .p2align 4
-
-L(NTloop):
-	prefetchnta	768(%rsi)
-	prefetchnta	832(%rsi)
-
-	decq	%rcx
-
-	movq	  (%rsi), %rax
-	movq	 8(%rsi), %r8
-	movq	16(%rsi), %r9
-	movq	24(%rsi), %r10
-	movq	32(%rsi), %r11
-	movq	40(%rsi), %r12
-	movq	48(%rsi), %r13
-	movq	56(%rsi), %r14
-
-	movntiq	%rax,   (%rdi)
-	movntiq	 %r8,  8(%rdi)
-	movntiq	 %r9, 16(%rdi)
-	movntiq	%r10, 24(%rdi)
-	movntiq	%r11, 32(%rdi)
-	movntiq	%r12, 40(%rdi)
-	movntiq	%r13, 48(%rdi)
-	movntiq	%r14, 56(%rdi)
-
-	movq	 64(%rsi), %rax
-	movq	 72(%rsi), %r8
-	movq	 80(%rsi), %r9
-	movq	 88(%rsi), %r10
-	movq	 96(%rsi), %r11
-	movq	104(%rsi), %r12
-	movq	112(%rsi), %r13
-	movq	120(%rsi), %r14
-
-	movntiq	%rax,  64(%rdi)
-	movntiq	 %r8,  72(%rdi)
-	movntiq	 %r9,  80(%rdi)
-	movntiq	%r10,  88(%rdi)
-	movntiq	%r11,  96(%rdi)
-	movntiq	%r12, 104(%rdi)
-	movntiq	%r13, 112(%rdi)
-	movntiq	%r14, 120(%rdi)
-
-	leaq	128(%rsi), %rsi
-	leaq	128(%rdi), %rdi
-
-	jnz	L(NTloop)
-
-	sfence				/* serialize memory stores */
-
-	movq	SAVE2(%rsp), %r12
-	cfi_restore (%r12)
-	movq	SAVE1(%rsp), %r13
-	cfi_restore (%r13)
-	movq	SAVE0(%rsp), %r14
-	cfi_restore (%r14)
-
-L(NTskip):
-	andl	$127, %edx		/* check for left overs */
-#ifdef USE_AS_MEMPCPY
-	jnz	L(1)
-
-	movq	%rdi, %rax
-#else
-	movq	RETVAL(%rsp), %rax
-	jnz	L(1)
-
-	rep
-#endif
-	retq				/* exit */
-
-#endif /* IS_IN (libc) */
-
-END(memcpy)
-
-#ifndef USE_AS_MEMPCPY
-libc_hidden_builtin_def (memcpy)
-# if defined SHARED && !defined USE_MULTIARCH && IS_IN (libc)
-#  undef memcpy
-#  include <shlib-compat.h>
-versioned_symbol (libc, __memcpy, memcpy, GLIBC_2_14);
-# endif
-#endif
+/* Implemented in memcpy.S.  */
diff --git a/sysdeps/x86_64/memcpy_chk.S b/sysdeps/x86_64/memcpy_chk.S
index 2296b55119..aa33cd5fc1 100644
--- a/sysdeps/x86_64/memcpy_chk.S
+++ b/sysdeps/x86_64/memcpy_chk.S
@@ -1,5 +1,5 @@
 /* Checking memcpy for x86-64.
-   Copyright (C) 2004-2016 Free Software Foundation, Inc.
+   Copyright (C) 2004-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -19,7 +19,7 @@
 #include <sysdep.h>
 #include "asm-syntax.h"
 
-#ifndef PIC
+#ifndef SHARED
 	/* For libc.so this is defined in memcpy.S.
 	   For libc.a, this is a separate source to avoid
 	   memcpy bringing in __chk_fail and all routines
diff --git a/sysdeps/x86_64/memmove.S b/sysdeps/x86_64/memmove.S
new file mode 100644
index 0000000000..9cc92ff9a9
--- /dev/null
+++ b/sysdeps/x86_64/memmove.S
@@ -0,0 +1,71 @@
+/* Optimized memmove for x86-64.
+   Copyright (C) 2016-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+#define VEC_SIZE	16
+#define VEC(i)		xmm##i
+#define PREFETCHNT	prefetchnta
+#define VMOVNT		movntdq
+/* Use movups and movaps for smaller code sizes.  */
+#define VMOVU		movups
+#define VMOVA		movaps
+
+#define SECTION(p)		p
+
+#ifdef USE_MULTIARCH
+# if !IS_IN (libc)
+#  define MEMCPY_SYMBOL(p,s)		memcpy
+# endif
+#else
+# if defined SHARED && IS_IN (libc)
+#  define MEMCPY_SYMBOL(p,s)		__memcpy
+# else
+#  define MEMCPY_SYMBOL(p,s)		memcpy
+# endif
+#endif
+#if !defined USE_MULTIARCH || !IS_IN (libc)
+# define MEMPCPY_SYMBOL(p,s)		__mempcpy
+#endif
+#ifndef MEMMOVE_SYMBOL
+# define MEMMOVE_CHK_SYMBOL(p,s)	p
+# define MEMMOVE_SYMBOL(p,s)		memmove
+#endif
+
+#include "multiarch/memmove-vec-unaligned-erms.S"
+
+#ifndef USE_MULTIARCH
+libc_hidden_builtin_def (memmove)
+# if defined SHARED && IS_IN (libc)
+strong_alias (memmove, __memcpy)
+libc_hidden_ver (memmove, memcpy)
+# endif
+libc_hidden_def (__mempcpy)
+weak_alias (__mempcpy, mempcpy)
+libc_hidden_builtin_def (mempcpy)
+
+# if defined SHARED && IS_IN (libc)
+#  undef memcpy
+#  include <shlib-compat.h>
+versioned_symbol (libc, __memcpy, memcpy, GLIBC_2_14);
+
+#  if SHLIB_COMPAT (libc, GLIBC_2_2_5, GLIBC_2_14)
+compat_symbol (libc, memmove, memcpy, GLIBC_2_2_5);
+#  endif
+# endif
+#endif
diff --git a/sysdeps/x86_64/fpu/s_fdiml.S b/sysdeps/x86_64/memmove_chk.S
index f9f1e20259..39b56dde65 100644
--- a/sysdeps/x86_64/fpu/s_fdiml.S
+++ b/sysdeps/x86_64/memmove_chk.S
@@ -1,7 +1,6 @@
-/* Compute positive difference.
-   Copyright (C) 1997-2016 Free Software Foundation, Inc.
+/* Checking memmove for x86-64.
+   Copyright (C) 2016-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
-   Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
 
    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
@@ -18,26 +17,17 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
-
-	.text
-ENTRY(__fdiml)
-	fldt	8(%rsp)		// x
-	fldt	24(%rsp)	// x : y
-
-	fucomi	%st(1), %st
-	jp	1f
-
-	jc	3f
-	fstp	%st(1)
-	fldz
-	jmp	2f
-
-3:	fsubrp	%st, %st(1)
-	ret
-
-1:	fucomi	%st(0), %st
-	fcmovnu	%st(1), %st
-2:	fstp	%st(1)
-	ret
-END(__fdiml)
-weak_alias (__fdiml, fdiml)
+#include "asm-syntax.h"
+
+#ifndef SHARED
+	/* For libc.so this is defined in memmove.S.
+	   For libc.a, this is a separate source to avoid
+	   memmove bringing in __chk_fail and all routines
+	   it calls.  */
+        .text
+ENTRY (__memmove_chk)
+	cmpq	%rdx, %rcx
+	jb	__chk_fail
+	jmp	memmove
+END (__memmove_chk)
+#endif
diff --git a/sysdeps/x86_64/mempcpy.S b/sysdeps/x86_64/mempcpy.S
index acee5e56b1..d98500a78a 100644
--- a/sysdeps/x86_64/mempcpy.S
+++ b/sysdeps/x86_64/mempcpy.S
@@ -1,8 +1 @@
-#define USE_AS_MEMPCPY
-#define memcpy __mempcpy
-#define __memcpy_chk __mempcpy_chk
-#include <sysdeps/x86_64/memcpy.S>
-
-libc_hidden_def (__mempcpy)
-weak_alias (__mempcpy, mempcpy)
-libc_hidden_builtin_def (mempcpy)
+/* Implemented in memcpy.S.  */
diff --git a/sysdeps/x86_64/mempcpy_chk.S b/sysdeps/x86_64/mempcpy_chk.S
index 390abc68dd..0e9e24db00 100644
--- a/sysdeps/x86_64/mempcpy_chk.S
+++ b/sysdeps/x86_64/mempcpy_chk.S
@@ -1,5 +1,5 @@
 /* Checking mempcpy for x86-64.
-   Copyright (C) 2004-2016 Free Software Foundation, Inc.
+   Copyright (C) 2004-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -19,7 +19,7 @@
 #include <sysdep.h>
 #include "asm-syntax.h"
 
-#ifndef PIC
+#ifndef SHARED
 	/* For libc.so this is defined in memcpy.S.
 	   For libc.a, this is a separate source to avoid
 	   mempcpy bringing in __chk_fail and all routines
diff --git a/sysdeps/x86_64/memrchr.S b/sysdeps/x86_64/memrchr.S
index 840de30cd7..b8e3fa1d87 100644
--- a/sysdeps/x86_64/memrchr.S
+++ b/sysdeps/x86_64/memrchr.S
@@ -1,6 +1,6 @@
 /* fast SSE2 memrchr with 64 byte loop and pmaxub instruction using
 
-   Copyright (C) 2011-2016 Free Software Foundation, Inc.
+   Copyright (C) 2011-2018 Free Software Foundation, Inc.
    Contributed by Intel Corporation.
    This file is part of the GNU C Library.
 
@@ -22,7 +22,7 @@
 
 	.text
 ENTRY (__memrchr)
-	movd	%rsi, %xmm1
+	movd	%esi, %xmm1
 
 	sub	$16, %rdx
 	jbe	L(length_less16)
@@ -42,8 +42,8 @@ ENTRY (__memrchr)
 	jnz	L(matches0)
 
 	sub	$64, %rdi
-	mov	%rdi, %rcx
-	and	$15, %rcx
+	mov	%edi, %ecx
+	and	$15, %ecx
 	jz	L(loop_prolog)
 
 	add	$16, %rdi
@@ -108,8 +108,8 @@ L(loop_prolog):
 	test	%eax, %eax
 	jnz	L(matches0)
 
-	mov	%rdi, %rcx
-	and	$63, %rcx
+	mov	%edi, %ecx
+	and	$63, %ecx
 	jz	L(align64_loop)
 
 	add	$64, %rdi
@@ -166,8 +166,8 @@ L(align64_loop):
 
 	.p2align 4
 L(exit_loop):
-	add	$64, %rdx
-	cmp	$32, %rdx
+	add	$64, %edx
+	cmp	$32, %edx
 	jbe	L(exit_loop_32)
 
 	movdqa	48(%rdi), %xmm0
@@ -187,7 +187,7 @@ L(exit_loop):
 	pmovmskb	%xmm3, %eax
 	test	%eax, %eax
 	jnz	L(matches16_1)
-	cmp	$48, %rdx
+	cmp	$48, %edx
 	jbe	L(return_null)
 
 	pcmpeqb	(%rdi), %xmm1
@@ -204,7 +204,7 @@ L(exit_loop_32):
 	pmovmskb	%xmm0, %eax
 	test	%eax, %eax
 	jnz	L(matches48_1)
-	cmp	$16, %rdx
+	cmp	$16, %edx
 	jbe	L(return_null)
 
 	pcmpeqb	32(%rdi), %xmm1
@@ -276,7 +276,7 @@ L(matches48_1):
 
 	.p2align 4
 L(return_null):
-	xor	%rax, %rax
+	xor	%eax, %eax
 	ret
 
 	.p2align 4
@@ -306,18 +306,16 @@ L(length_less16):
 	punpcklbw	%xmm1, %xmm1
 	punpcklbw	%xmm1, %xmm1
 
-	add	$16, %rdx
+	add	$16, %edx
 
 	pshufd	$0, %xmm1, %xmm1
 
-	mov	%rdi, %rcx
-	and	$15, %rcx
+	mov	%edi, %ecx
+	and	$15, %ecx
 	jz	L(length_less16_offset0)
 
-	mov	%rdi, %rcx
-	and	$15, %rcx
 	mov	%cl, %dh
-	mov	%rcx, %r8
+	mov	%ecx, %esi
 	add	%dl, %dh
 	and	$-16, %rdi
 
@@ -340,7 +338,7 @@ L(length_less16):
 
 	bsr	%eax, %eax
 	add	%rdi, %rax
-	add	%r8, %rax
+	add	%rsi, %rax
 	ret
 
 	.p2align 4
@@ -362,14 +360,14 @@ L(length_less16_part2):
 	pcmpeqb	(%rdi), %xmm1
 	pmovmskb	%xmm1, %eax
 
-	mov	%r8, %rcx
+	mov	%esi, %ecx
 	sar	%cl, %eax
 	test	%eax, %eax
 	jz	L(return_null)
 
 	bsr	%eax, %eax
 	add	%rdi, %rax
-	add	%r8, %rax
+	add	%rsi, %rax
 	ret
 
 	.p2align 4
diff --git a/sysdeps/x86_64/memset.S b/sysdeps/x86_64/memset.S
index 4cf0da0fb8..b342679576 100644
--- a/sysdeps/x86_64/memset.S
+++ b/sysdeps/x86_64/memset.S
@@ -1,6 +1,6 @@
 /* memset/bzero -- set memory area to CH/0
    Optimized version for x86-64.
-   Copyright (C) 2002-2016 Free Software Foundation, Inc.
+   Copyright (C) 2002-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -19,114 +19,43 @@
 
 #include <sysdep.h>
 
-	.text
-#if IS_IN (libc)
-ENTRY(__bzero)
-	movq	%rdi, %rax /* Set return value.  */
-	movq	%rsi, %rdx /* Set n.  */
-	pxor	%xmm0, %xmm0
-	jmp	L(entry_from_bzero)
-END(__bzero)
-weak_alias (__bzero, bzero)
-
-/* Like memset but takes additional parameter with return value.  */
-ENTRY(__memset_tail)
-	movq	%rcx, %rax /* Set return value.  */
-
-	movd	%esi, %xmm0
-	punpcklbw	%xmm0, %xmm0
-	punpcklwd	%xmm0, %xmm0
-	pshufd	$0, %xmm0, %xmm0
-
-	jmp	L(entry_from_bzero)
-END(__memset_tail)
+#define VEC_SIZE	16
+#define VEC(i)		xmm##i
+/* Don't use movups and movaps since it will get larger nop paddings for
+   alignment.  */
+#define VMOVU		movdqu
+#define VMOVA		movdqa
+
+#define MEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
+  movd d, %xmm0; \
+  movq r, %rax; \
+  punpcklbw %xmm0, %xmm0; \
+  punpcklwd %xmm0, %xmm0; \
+  pshufd $0, %xmm0, %xmm0
+
+#define WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
+  movd d, %xmm0; \
+  movq r, %rax; \
+  pshufd $0, %xmm0, %xmm0
+
+#define SECTION(p)		p
+
+#ifndef MEMSET_SYMBOL
+# define MEMSET_CHK_SYMBOL(p,s)	p
+# define MEMSET_SYMBOL(p,s)	memset
 #endif
 
-#if defined PIC && IS_IN (libc)
-ENTRY_CHK (__memset_chk)
-	cmpq	%rdx, %rcx
-	jb	HIDDEN_JUMPTARGET (__chk_fail)
-END_CHK (__memset_chk)
+#ifndef WMEMSET_SYMBOL
+# define WMEMSET_CHK_SYMBOL(p,s) p
+# define WMEMSET_SYMBOL(p,s)	__wmemset
 #endif
 
-ENTRY (memset)
-	movd	%esi, %xmm0
-	movq	%rdi, %rax
-	punpcklbw	%xmm0, %xmm0
-	punpcklwd	%xmm0, %xmm0
-	pshufd	$0, %xmm0, %xmm0
-L(entry_from_bzero):
-	cmpq	$64, %rdx
-	ja	L(loop_start)
-	cmpq	$16, %rdx
-	jbe	L(less_16_bytes)
-	cmpq	$32, %rdx
-	movdqu	%xmm0, (%rdi)
-	movdqu	%xmm0, -16(%rdi,%rdx)
-	ja	L(between_32_64_bytes)
-L(return):
-	rep
-	ret
-	.p2align 4
-L(between_32_64_bytes):
-	movdqu	%xmm0, 16(%rdi)
-	movdqu	%xmm0, -32(%rdi,%rdx)
-	ret
-	.p2align 4
-L(loop_start):
-	leaq	64(%rdi), %rcx
-	movdqu	%xmm0, (%rdi)
-	andq	$-64, %rcx
-	movdqu	%xmm0, -16(%rdi,%rdx)
-	movdqu	%xmm0, 16(%rdi)
-	movdqu	%xmm0, -32(%rdi,%rdx)
-	movdqu	%xmm0, 32(%rdi)
-	movdqu	%xmm0, -48(%rdi,%rdx)
-	movdqu	%xmm0, 48(%rdi)
-	movdqu	%xmm0, -64(%rdi,%rdx)
-	addq	%rdi, %rdx
-	andq	$-64, %rdx
-	cmpq	%rdx, %rcx
-	je	L(return)
-	.p2align 4
-L(loop):
-	movdqa	%xmm0, (%rcx)
-	movdqa	%xmm0, 16(%rcx)
-	movdqa	%xmm0, 32(%rcx)
-	movdqa	%xmm0, 48(%rcx)
-	addq	$64, %rcx
-	cmpq	%rcx, %rdx
-	jne	L(loop)
-	rep
-	ret
-L(less_16_bytes):
-	movq %xmm0, %rcx
-	testb	$24, %dl
-	jne	L(between8_16bytes)
-	testb	$4, %dl
-	jne	L(between4_7bytes)
-	testb	$1, %dl
-	je	L(odd_byte)
-	movb	%cl, (%rdi)
-L(odd_byte):
-	testb	$2, %dl
-	je	L(return)
-	movw	%cx, -2(%rax,%rdx)
-	ret
-L(between4_7bytes):
-	movl	%ecx, (%rdi)
-	movl	%ecx, -4(%rdi,%rdx)
-	ret
-L(between8_16bytes):
-	movq	%rcx, (%rdi)
-	movq	%rcx, -8(%rdi,%rdx)
-	ret
+#include "multiarch/memset-vec-unaligned-erms.S"
 
-END (memset)
 libc_hidden_builtin_def (memset)
 
-#if defined PIC && IS_IN (libc) && !defined USE_MULTIARCH
-strong_alias (__memset_chk, __memset_zero_constant_len_parameter)
-	.section .gnu.warning.__memset_zero_constant_len_parameter
-	.string "memset used with constant zero length parameter; this could be due to transposed parameters"
+#if IS_IN (libc)
+libc_hidden_def (__wmemset)
+weak_alias (__wmemset, wmemset)
+libc_hidden_weak (wmemset)
 #endif
diff --git a/sysdeps/x86_64/memset_chk.S b/sysdeps/x86_64/memset_chk.S
index 95bb5d0e94..4ecf914fbe 100644
--- a/sysdeps/x86_64/memset_chk.S
+++ b/sysdeps/x86_64/memset_chk.S
@@ -1,5 +1,5 @@
 /* Checking memset for x86-64.
-   Copyright (C) 2004-2016 Free Software Foundation, Inc.
+   Copyright (C) 2004-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/memusage.h b/sysdeps/x86_64/memusage.h
index fc102c4252..45fd920b52 100644
--- a/sysdeps/x86_64/memusage.h
+++ b/sysdeps/x86_64/memusage.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2001-2016 Free Software Foundation, Inc.
+/* Copyright (C) 2001-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/mul_1.S b/sysdeps/x86_64/mul_1.S
index 88b8f920a1..c38927b5a0 100644
--- a/sysdeps/x86_64/mul_1.S
+++ b/sysdeps/x86_64/mul_1.S
@@ -1,6 +1,6 @@
 /* AMD64 __mpn_mul_1 -- Multiply a limb vector with a limb and store
    the result in a second limb vector.
-   Copyright (C) 2003-2016 Free Software Foundation, Inc.
+   Copyright (C) 2003-2018 Free Software Foundation, Inc.
    This file is part of the GNU MP Library.
 
    The GNU MP Library is free software; you can redistribute it and/or modify
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index d234f4ab66..bb5e970735 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -1,26 +1,46 @@
 ifeq ($(subdir),csu)
 tests += test-multiarch
-gen-as-const-headers += ifunc-defines.sym
 endif
 
 ifeq ($(subdir),string)
 
-sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 \
-		   strcmp-sse2-unaligned strncmp-ssse3 \
-		   memcmp-sse4 memcpy-ssse3 memcpy-sse2-unaligned \
-		   memcpy-avx512-no-vzeroupper mempcpy-ssse3 memmove-ssse3 \
-		   memcpy-ssse3-back mempcpy-ssse3-back memmove-avx-unaligned \
-		   memcpy-avx-unaligned mempcpy-avx-unaligned \
-		   mempcpy-avx512-no-vzeroupper memmove-ssse3-back \
-		   memmove-avx512-no-vzeroupper strcasecmp_l-ssse3 \
-		   strncase_l-ssse3 strcat-ssse3 strncat-ssse3\
+sysdep_routines += strncat-c stpncpy-c strncpy-c \
+		   strcmp-sse2 strcmp-sse2-unaligned strcmp-ssse3  \
+		   strcmp-sse4_2 strcmp-avx2 \
+		   strncmp-sse2 strncmp-ssse3 strncmp-sse4_2 strncmp-avx2 \
+		   memchr-sse2 rawmemchr-sse2 memchr-avx2 rawmemchr-avx2 \
+		   memrchr-sse2 memrchr-avx2 \
+		   memcmp-sse2 \
+		   memcmp-avx2-movbe \
+		   memcmp-sse4 memcpy-ssse3 \
+		   memmove-ssse3 \
+		   memcpy-ssse3-back \
+		   memmove-ssse3-back \
+		   memmove-avx512-no-vzeroupper \
+		   strcasecmp_l-sse2 strcasecmp_l-ssse3 \
+		   strcasecmp_l-sse4_2 strcasecmp_l-avx \
+		   strncase_l-sse2 strncase_l-ssse3 \
+		   strncase_l-sse4_2 strncase_l-avx \
+		   strchr-sse2 strchrnul-sse2 strchr-avx2 strchrnul-avx2 \
+		   strrchr-sse2 strrchr-avx2 \
+		   strlen-sse2 strnlen-sse2 strlen-avx2 strnlen-avx2 \
+		   strcat-ssse3 strncat-ssse3\
+		   strcpy-sse2 stpcpy-sse2 \
 		   strcpy-ssse3 strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 \
 		   strcpy-sse2-unaligned strncpy-sse2-unaligned \
 		   stpcpy-sse2-unaligned stpncpy-sse2-unaligned \
+		   strcat-sse2 \
 		   strcat-sse2-unaligned strncat-sse2-unaligned \
 		   strchr-sse2-no-bsf memcmp-ssse3 strstr-sse2-unaligned \
-		   strcspn-c strpbrk-c strspn-c varshift memset-avx2 \
-		   memset-avx512-no-vzeroupper
+		   strcspn-sse2 strpbrk-sse2 strspn-sse2 \
+		   strcspn-c strpbrk-c strspn-c varshift \
+		   memset-avx512-no-vzeroupper \
+		   memmove-sse2-unaligned-erms \
+		   memmove-avx-unaligned-erms \
+		   memmove-avx512-unaligned-erms \
+		   memset-sse2-unaligned-erms \
+		   memset-avx2-unaligned-erms \
+		   memset-avx512-unaligned-erms
 CFLAGS-varshift.c += -msse4
 CFLAGS-strcspn-c.c += -msse4
 CFLAGS-strpbrk-c.c += -msse4
@@ -28,5 +48,20 @@ CFLAGS-strspn-c.c += -msse4
 endif
 
 ifeq ($(subdir),wcsmbs)
-sysdep_routines += wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c wcscpy-ssse3 wcscpy-c
+sysdep_routines += wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c \
+		   wmemcmp-avx2-movbe \
+		   wmemchr-sse2 wmemchr-avx2 \
+		   wcscmp-sse2 wcscmp-avx2 \
+		   wcsncmp-sse2 wcsncmp-avx2 \
+		   wcscpy-ssse3 wcscpy-c \
+		   wcschr-sse2 wcschr-avx2 \
+		   wcsrchr-sse2 wcsrchr-avx2 \
+		   wcsnlen-sse4_1 wcsnlen-c \
+		   wcslen-sse2 wcslen-avx2 wcsnlen-avx2
+endif
+
+ifeq ($(subdir),debug)
+sysdep_routines += memcpy_chk-nonshared mempcpy_chk-nonshared \
+		   memmove_chk-nonshared memset_chk-nonshared \
+		   wmemset_chk-nonshared
 endif
diff --git a/sysdeps/x86_64/multiarch/ifunc-avx2.h b/sysdeps/x86_64/multiarch/ifunc-avx2.h
new file mode 100644
index 0000000000..9cab837642
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/ifunc-avx2.h
@@ -0,0 +1,36 @@
+/* Common definition for ifunc selections optimized with SSE2 and AVX2.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <init-arch.h>
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+  const struct cpu_features* cpu_features = __get_cpu_features ();
+
+  if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)
+      && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)
+      && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+    return OPTIMIZE (avx2);
+
+  return OPTIMIZE (sse2);
+}
diff --git a/sysdeps/x86_64/multiarch/ifunc-defines.sym b/sysdeps/x86_64/multiarch/ifunc-defines.sym
deleted file mode 100644
index 3df946f343..0000000000
--- a/sysdeps/x86_64/multiarch/ifunc-defines.sym
+++ /dev/null
@@ -1,20 +0,0 @@
-#include "init-arch.h"
-#include <stddef.h>
-
---
-
-CPU_FEATURES_SIZE	sizeof (struct cpu_features)
-CPUID_OFFSET		offsetof (struct cpu_features, cpuid)
-CPUID_SIZE		sizeof (struct cpuid_registers)
-CPUID_EAX_OFFSET	offsetof (struct cpuid_registers, eax)
-CPUID_EBX_OFFSET	offsetof (struct cpuid_registers, ebx)
-CPUID_ECX_OFFSET	offsetof (struct cpuid_registers, ecx)
-CPUID_EDX_OFFSET	offsetof (struct cpuid_registers, edx)
-FAMILY_OFFSET		offsetof (struct cpu_features, family)
-MODEL_OFFSET		offsetof (struct cpu_features, model)
-FEATURE_OFFSET		offsetof (struct cpu_features, feature)
-FEATURE_SIZE		sizeof (unsigned int)
-
-COMMON_CPUID_INDEX_1
-COMMON_CPUID_INDEX_7
-FEATURE_INDEX_1
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index 188b6d36c6..9aaaef7251 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -1,5 +1,5 @@
 /* Enumerate available IFUNC implementations of a function.  x86-64 version.
-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+   Copyright (C) 2012-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -38,77 +38,164 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 
   size_t i = 0;
 
-  /* Support sysdeps/x86_64/multiarch/memcmp.S.  */
+  /* Support sysdeps/x86_64/multiarch/memchr.c.  */
+  IFUNC_IMPL (i, name, memchr,
+	      IFUNC_IMPL_ADD (array, i, memchr,
+			      HAS_ARCH_FEATURE (AVX2_Usable),
+			      __memchr_avx2)
+	      IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_sse2))
+
+  /* Support sysdeps/x86_64/multiarch/memcmp.c.  */
   IFUNC_IMPL (i, name, memcmp,
+	      IFUNC_IMPL_ADD (array, i, memcmp,
+			      (HAS_ARCH_FEATURE (AVX2_Usable)
+			       && HAS_CPU_FEATURE (MOVBE)),
+			      __memcmp_avx2_movbe)
 	      IFUNC_IMPL_ADD (array, i, memcmp, HAS_CPU_FEATURE (SSE4_1),
 			      __memcmp_sse4_1)
 	      IFUNC_IMPL_ADD (array, i, memcmp, HAS_CPU_FEATURE (SSSE3),
 			      __memcmp_ssse3)
 	      IFUNC_IMPL_ADD (array, i, memcmp, 1, __memcmp_sse2))
 
+#ifdef SHARED
   /* Support sysdeps/x86_64/multiarch/memmove_chk.c.  */
   IFUNC_IMPL (i, name, __memmove_chk,
-#ifdef HAVE_AVX512_ASM_SUPPORT
 	      IFUNC_IMPL_ADD (array, i, __memmove_chk,
 			      HAS_ARCH_FEATURE (AVX512F_Usable),
 			      __memmove_chk_avx512_no_vzeroupper)
-#endif
+	      IFUNC_IMPL_ADD (array, i, __memmove_chk,
+			      HAS_ARCH_FEATURE (AVX512F_Usable),
+			      __memmove_chk_avx512_unaligned)
+	      IFUNC_IMPL_ADD (array, i, __memmove_chk,
+			      HAS_ARCH_FEATURE (AVX512F_Usable),
+			      __memmove_chk_avx512_unaligned_erms)
 	      IFUNC_IMPL_ADD (array, i, __memmove_chk,
 			      HAS_ARCH_FEATURE (AVX_Usable),
 			      __memmove_chk_avx_unaligned)
 	      IFUNC_IMPL_ADD (array, i, __memmove_chk,
+			      HAS_ARCH_FEATURE (AVX_Usable),
+			      __memmove_chk_avx_unaligned_erms)
+	      IFUNC_IMPL_ADD (array, i, __memmove_chk,
 			      HAS_CPU_FEATURE (SSSE3),
 			      __memmove_chk_ssse3_back)
 	      IFUNC_IMPL_ADD (array, i, __memmove_chk,
 			      HAS_CPU_FEATURE (SSSE3),
 			      __memmove_chk_ssse3)
 	      IFUNC_IMPL_ADD (array, i, __memmove_chk, 1,
-			      __memmove_chk_sse2))
+			      __memmove_chk_sse2_unaligned)
+	      IFUNC_IMPL_ADD (array, i, __memmove_chk, 1,
+			      __memmove_chk_sse2_unaligned_erms)
+	      IFUNC_IMPL_ADD (array, i, __memmove_chk, 1,
+			      __memmove_chk_erms))
+#endif
 
-  /* Support sysdeps/x86_64/multiarch/memmove.S.  */
+  /* Support sysdeps/x86_64/multiarch/memmove.c.  */
   IFUNC_IMPL (i, name, memmove,
 	      IFUNC_IMPL_ADD (array, i, memmove,
 			      HAS_ARCH_FEATURE (AVX_Usable),
 			      __memmove_avx_unaligned)
-#ifdef HAVE_AVX512_ASM_SUPPORT
+	      IFUNC_IMPL_ADD (array, i, memmove,
+			      HAS_ARCH_FEATURE (AVX_Usable),
+			      __memmove_avx_unaligned_erms)
 	      IFUNC_IMPL_ADD (array, i, memmove,
 			      HAS_ARCH_FEATURE (AVX512F_Usable),
 			      __memmove_avx512_no_vzeroupper)
-#endif
+	      IFUNC_IMPL_ADD (array, i, memmove,
+			      HAS_ARCH_FEATURE (AVX512F_Usable),
+			      __memmove_avx512_unaligned)
+	      IFUNC_IMPL_ADD (array, i, memmove,
+			      HAS_ARCH_FEATURE (AVX512F_Usable),
+			      __memmove_avx512_unaligned_erms)
 	      IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSSE3),
 			      __memmove_ssse3_back)
 	      IFUNC_IMPL_ADD (array, i, memmove, HAS_CPU_FEATURE (SSSE3),
 			      __memmove_ssse3)
-	      IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_sse2))
+	      IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_erms)
+	      IFUNC_IMPL_ADD (array, i, memmove, 1,
+			      __memmove_sse2_unaligned)
+	      IFUNC_IMPL_ADD (array, i, memmove, 1,
+			      __memmove_sse2_unaligned_erms))
+
+  /* Support sysdeps/x86_64/multiarch/memrchr.c.  */
+  IFUNC_IMPL (i, name, memrchr,
+	      IFUNC_IMPL_ADD (array, i, memrchr,
+			      HAS_ARCH_FEATURE (AVX2_Usable),
+			      __memrchr_avx2)
+	      IFUNC_IMPL_ADD (array, i, memrchr, 1, __memrchr_sse2))
 
-  /* Support sysdeps/x86_64/multiarch/memset_chk.S.  */
+#ifdef SHARED
+  /* Support sysdeps/x86_64/multiarch/memset_chk.c.  */
   IFUNC_IMPL (i, name, __memset_chk,
 	      IFUNC_IMPL_ADD (array, i, __memset_chk, 1,
-			      __memset_chk_sse2)
+			      __memset_chk_erms)
+	      IFUNC_IMPL_ADD (array, i, __memset_chk, 1,
+			      __memset_chk_sse2_unaligned)
+	      IFUNC_IMPL_ADD (array, i, __memset_chk, 1,
+			      __memset_chk_sse2_unaligned_erms)
 	      IFUNC_IMPL_ADD (array, i, __memset_chk,
 			      HAS_ARCH_FEATURE (AVX2_Usable),
-			      __memset_chk_avx2)
-#ifdef HAVE_AVX512_ASM_SUPPORT
+			      __memset_chk_avx2_unaligned)
+	      IFUNC_IMPL_ADD (array, i, __memset_chk,
+			      HAS_ARCH_FEATURE (AVX2_Usable),
+			      __memset_chk_avx2_unaligned_erms)
+	      IFUNC_IMPL_ADD (array, i, __memset_chk,
+			      HAS_ARCH_FEATURE (AVX512F_Usable),
+			      __memset_chk_avx512_unaligned_erms)
+	      IFUNC_IMPL_ADD (array, i, __memset_chk,
+			      HAS_ARCH_FEATURE (AVX512F_Usable),
+			      __memset_chk_avx512_unaligned)
 	      IFUNC_IMPL_ADD (array, i, __memset_chk,
 			      HAS_ARCH_FEATURE (AVX512F_Usable),
 			      __memset_chk_avx512_no_vzeroupper)
-#endif
 	      )
+#endif
 
-  /* Support sysdeps/x86_64/multiarch/memset.S.  */
+  /* Support sysdeps/x86_64/multiarch/memset.c.  */
   IFUNC_IMPL (i, name, memset,
-	      IFUNC_IMPL_ADD (array, i, memset, 1, __memset_sse2)
+	      IFUNC_IMPL_ADD (array, i, memset, 1,
+			      __memset_sse2_unaligned)
+	      IFUNC_IMPL_ADD (array, i, memset, 1,
+			      __memset_sse2_unaligned_erms)
+	      IFUNC_IMPL_ADD (array, i, memset, 1, __memset_erms)
+	      IFUNC_IMPL_ADD (array, i, memset,
+			      HAS_ARCH_FEATURE (AVX2_Usable),
+			      __memset_avx2_unaligned)
 	      IFUNC_IMPL_ADD (array, i, memset,
 			      HAS_ARCH_FEATURE (AVX2_Usable),
-			      __memset_avx2)
-#ifdef HAVE_AVX512_ASM_SUPPORT
+			      __memset_avx2_unaligned_erms)
+	      IFUNC_IMPL_ADD (array, i, memset,
+			      HAS_ARCH_FEATURE (AVX512F_Usable),
+			      __memset_avx512_unaligned_erms)
+	      IFUNC_IMPL_ADD (array, i, memset,
+			      HAS_ARCH_FEATURE (AVX512F_Usable),
+			      __memset_avx512_unaligned)
 	      IFUNC_IMPL_ADD (array, i, memset,
 			      HAS_ARCH_FEATURE (AVX512F_Usable),
 			      __memset_avx512_no_vzeroupper)
-#endif
 	     )
 
-  /* Support sysdeps/x86_64/multiarch/stpncpy.S.  */
+  /* Support sysdeps/x86_64/multiarch/rawmemchr.c.  */
+  IFUNC_IMPL (i, name, rawmemchr,
+	      IFUNC_IMPL_ADD (array, i, rawmemchr,
+			      HAS_ARCH_FEATURE (AVX2_Usable),
+			      __rawmemchr_avx2)
+	      IFUNC_IMPL_ADD (array, i, rawmemchr, 1, __rawmemchr_sse2))
+
+  /* Support sysdeps/x86_64/multiarch/strlen.c.  */
+  IFUNC_IMPL (i, name, strlen,
+	      IFUNC_IMPL_ADD (array, i, strlen,
+			      HAS_ARCH_FEATURE (AVX2_Usable),
+			      __strlen_avx2)
+	      IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_sse2))
+
+  /* Support sysdeps/x86_64/multiarch/strnlen.c.  */
+  IFUNC_IMPL (i, name, strnlen,
+	      IFUNC_IMPL_ADD (array, i, strnlen,
+			      HAS_ARCH_FEATURE (AVX2_Usable),
+			      __strnlen_avx2)
+	      IFUNC_IMPL_ADD (array, i, strnlen, 1, __strnlen_sse2))
+
+  /* Support sysdeps/x86_64/multiarch/stpncpy.c.  */
   IFUNC_IMPL (i, name, stpncpy,
 	      IFUNC_IMPL_ADD (array, i, stpncpy, HAS_CPU_FEATURE (SSSE3),
 			      __stpncpy_ssse3)
@@ -116,14 +203,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __stpncpy_sse2_unaligned)
 	      IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_sse2))
 
-  /* Support sysdeps/x86_64/multiarch/stpcpy.S.  */
+  /* Support sysdeps/x86_64/multiarch/stpcpy.c.  */
   IFUNC_IMPL (i, name, stpcpy,
 	      IFUNC_IMPL_ADD (array, i, stpcpy, HAS_CPU_FEATURE (SSSE3),
 			      __stpcpy_ssse3)
 	      IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_sse2_unaligned)
 	      IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_sse2))
 
-  /* Support sysdeps/x86_64/multiarch/strcasecmp_l.S.  */
+  /* Support sysdeps/x86_64/multiarch/strcasecmp_l.c.  */
   IFUNC_IMPL (i, name, strcasecmp,
 	      IFUNC_IMPL_ADD (array, i, strcasecmp,
 			      HAS_ARCH_FEATURE (AVX_Usable),
@@ -136,7 +223,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __strcasecmp_ssse3)
 	      IFUNC_IMPL_ADD (array, i, strcasecmp, 1, __strcasecmp_sse2))
 
-  /* Support sysdeps/x86_64/multiarch/strcasecmp_l.S.  */
+  /* Support sysdeps/x86_64/multiarch/strcasecmp_l.c.  */
   IFUNC_IMPL (i, name, strcasecmp_l,
 	      IFUNC_IMPL_ADD (array, i, strcasecmp_l,
 			      HAS_ARCH_FEATURE (AVX_Usable),
@@ -150,20 +237,40 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 	      IFUNC_IMPL_ADD (array, i, strcasecmp_l, 1,
 			      __strcasecmp_l_sse2))
 
-  /* Support sysdeps/x86_64/multiarch/strcat.S.  */
+  /* Support sysdeps/x86_64/multiarch/strcat.c.  */
   IFUNC_IMPL (i, name, strcat,
 	      IFUNC_IMPL_ADD (array, i, strcat, HAS_CPU_FEATURE (SSSE3),
 			      __strcat_ssse3)
 	      IFUNC_IMPL_ADD (array, i, strcat, 1, __strcat_sse2_unaligned)
 	      IFUNC_IMPL_ADD (array, i, strcat, 1, __strcat_sse2))
 
-  /* Support sysdeps/x86_64/multiarch/strchr.S.  */
+  /* Support sysdeps/x86_64/multiarch/strchr.c.  */
   IFUNC_IMPL (i, name, strchr,
+	      IFUNC_IMPL_ADD (array, i, strchr,
+			      HAS_ARCH_FEATURE (AVX2_Usable),
+			      __strchr_avx2)
 	      IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_sse2_no_bsf)
 	      IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_sse2))
 
-  /* Support sysdeps/x86_64/multiarch/strcmp.S.  */
+  /* Support sysdeps/x86_64/multiarch/strchrnul.c.  */
+  IFUNC_IMPL (i, name, strchrnul,
+	      IFUNC_IMPL_ADD (array, i, strchrnul,
+			      HAS_ARCH_FEATURE (AVX2_Usable),
+			      __strchrnul_avx2)
+	      IFUNC_IMPL_ADD (array, i, strchrnul, 1, __strchrnul_sse2))
+
+  /* Support sysdeps/x86_64/multiarch/strrchr.c.  */
+  IFUNC_IMPL (i, name, strrchr,
+	      IFUNC_IMPL_ADD (array, i, strrchr,
+			      HAS_ARCH_FEATURE (AVX2_Usable),
+			      __strrchr_avx2)
+	      IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_sse2))
+
+  /* Support sysdeps/x86_64/multiarch/strcmp.c.  */
   IFUNC_IMPL (i, name, strcmp,
+	      IFUNC_IMPL_ADD (array, i, strcmp,
+			      HAS_ARCH_FEATURE (AVX2_Usable),
+			      __strcmp_avx2)
 	      IFUNC_IMPL_ADD (array, i, strcmp, HAS_CPU_FEATURE (SSE4_2),
 			      __strcmp_sse42)
 	      IFUNC_IMPL_ADD (array, i, strcmp, HAS_CPU_FEATURE (SSSE3),
@@ -171,20 +278,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 	      IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_sse2_unaligned)
 	      IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_sse2))
 
-  /* Support sysdeps/x86_64/multiarch/strcpy.S.  */
+  /* Support sysdeps/x86_64/multiarch/strcpy.c.  */
   IFUNC_IMPL (i, name, strcpy,
 	      IFUNC_IMPL_ADD (array, i, strcpy, HAS_CPU_FEATURE (SSSE3),
 			      __strcpy_ssse3)
 	      IFUNC_IMPL_ADD (array, i, strcpy, 1, __strcpy_sse2_unaligned)
 	      IFUNC_IMPL_ADD (array, i, strcpy, 1, __strcpy_sse2))
 
-  /* Support sysdeps/x86_64/multiarch/strcspn.S.  */
+  /* Support sysdeps/x86_64/multiarch/strcspn.c.  */
   IFUNC_IMPL (i, name, strcspn,
 	      IFUNC_IMPL_ADD (array, i, strcspn, HAS_CPU_FEATURE (SSE4_2),
 			      __strcspn_sse42)
 	      IFUNC_IMPL_ADD (array, i, strcspn, 1, __strcspn_sse2))
 
-  /* Support sysdeps/x86_64/multiarch/strncase_l.S.  */
+  /* Support sysdeps/x86_64/multiarch/strncase_l.c.  */
   IFUNC_IMPL (i, name, strncasecmp,
 	      IFUNC_IMPL_ADD (array, i, strncasecmp,
 			      HAS_ARCH_FEATURE (AVX_Usable),
@@ -198,7 +305,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 	      IFUNC_IMPL_ADD (array, i, strncasecmp, 1,
 			      __strncasecmp_sse2))
 
-  /* Support sysdeps/x86_64/multiarch/strncase_l.S.  */
+  /* Support sysdeps/x86_64/multiarch/strncase_l.c.  */
   IFUNC_IMPL (i, name, strncasecmp_l,
 	      IFUNC_IMPL_ADD (array, i, strncasecmp_l,
 			      HAS_ARCH_FEATURE (AVX_Usable),
@@ -212,7 +319,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 	      IFUNC_IMPL_ADD (array, i, strncasecmp_l, 1,
 			      __strncasecmp_l_sse2))
 
-  /* Support sysdeps/x86_64/multiarch/strncat.S.  */
+  /* Support sysdeps/x86_64/multiarch/strncat.c.  */
   IFUNC_IMPL (i, name, strncat,
 	      IFUNC_IMPL_ADD (array, i, strncat, HAS_CPU_FEATURE (SSSE3),
 			      __strncat_ssse3)
@@ -220,7 +327,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __strncat_sse2_unaligned)
 	      IFUNC_IMPL_ADD (array, i, strncat, 1, __strncat_sse2))
 
-  /* Support sysdeps/x86_64/multiarch/strncpy.S.  */
+  /* Support sysdeps/x86_64/multiarch/strncpy.c.  */
   IFUNC_IMPL (i, name, strncpy,
 	      IFUNC_IMPL_ADD (array, i, strncpy, HAS_CPU_FEATURE (SSSE3),
 			      __strncpy_ssse3)
@@ -228,14 +335,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __strncpy_sse2_unaligned)
 	      IFUNC_IMPL_ADD (array, i, strncpy, 1, __strncpy_sse2))
 
-  /* Support sysdeps/x86_64/multiarch/strpbrk.S.  */
+  /* Support sysdeps/x86_64/multiarch/strpbrk.c.  */
   IFUNC_IMPL (i, name, strpbrk,
 	      IFUNC_IMPL_ADD (array, i, strpbrk, HAS_CPU_FEATURE (SSE4_2),
 			      __strpbrk_sse42)
 	      IFUNC_IMPL_ADD (array, i, strpbrk, 1, __strpbrk_sse2))
 
 
-  /* Support sysdeps/x86_64/multiarch/strspn.S.  */
+  /* Support sysdeps/x86_64/multiarch/strspn.c.  */
   IFUNC_IMPL (i, name, strspn,
 	      IFUNC_IMPL_ADD (array, i, strspn, HAS_CPU_FEATURE (SSE4_2),
 			      __strspn_sse42)
@@ -246,99 +353,226 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 	      IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_sse2_unaligned)
 	      IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_sse2))
 
-  /* Support sysdeps/x86_64/multiarch/wcscpy.S.  */
+  /* Support sysdeps/x86_64/multiarch/wcschr.c.  */
+  IFUNC_IMPL (i, name, wcschr,
+	      IFUNC_IMPL_ADD (array, i, wcschr,
+			      HAS_ARCH_FEATURE (AVX2_Usable),
+			      __wcschr_avx2)
+	      IFUNC_IMPL_ADD (array, i, wcschr, 1, __wcschr_sse2))
+
+  /* Support sysdeps/x86_64/multiarch/wcsrchr.c.  */
+  IFUNC_IMPL (i, name, wcsrchr,
+	      IFUNC_IMPL_ADD (array, i, wcsrchr,
+			      HAS_ARCH_FEATURE (AVX2_Usable),
+			      __wcsrchr_avx2)
+	      IFUNC_IMPL_ADD (array, i, wcsrchr, 1, __wcsrchr_sse2))
+
+  /* Support sysdeps/x86_64/multiarch/wcscmp.c.  */
+  IFUNC_IMPL (i, name, wcscmp,
+	      IFUNC_IMPL_ADD (array, i, wcscmp,
+			      HAS_ARCH_FEATURE (AVX2_Usable),
+			      __wcscmp_avx2)
+	      IFUNC_IMPL_ADD (array, i, wcscmp, 1, __wcscmp_sse2))
+
+  /* Support sysdeps/x86_64/multiarch/wcsncmp.c.  */
+  IFUNC_IMPL (i, name, wcsncmp,
+	      IFUNC_IMPL_ADD (array, i, wcsncmp,
+			      HAS_ARCH_FEATURE (AVX2_Usable),
+			      __wcsncmp_avx2)
+	      IFUNC_IMPL_ADD (array, i, wcsncmp, 1, __wcsncmp_sse2))
+
+  /* Support sysdeps/x86_64/multiarch/wcscpy.c.  */
   IFUNC_IMPL (i, name, wcscpy,
 	      IFUNC_IMPL_ADD (array, i, wcscpy, HAS_CPU_FEATURE (SSSE3),
 			      __wcscpy_ssse3)
 	      IFUNC_IMPL_ADD (array, i, wcscpy, 1, __wcscpy_sse2))
 
-  /* Support sysdeps/x86_64/multiarch/wmemcmp.S.  */
+  /* Support sysdeps/x86_64/multiarch/wcslen.c.  */
+  IFUNC_IMPL (i, name, wcslen,
+	      IFUNC_IMPL_ADD (array, i, wcslen,
+			      HAS_ARCH_FEATURE (AVX2_Usable),
+			      __wcslen_avx2)
+	      IFUNC_IMPL_ADD (array, i, wcslen, 1, __wcslen_sse2))
+
+  /* Support sysdeps/x86_64/multiarch/wcsnlen.c.  */
+  IFUNC_IMPL (i, name, wcsnlen,
+	      IFUNC_IMPL_ADD (array, i, wcsnlen,
+			      HAS_ARCH_FEATURE (AVX2_Usable),
+			      __wcsnlen_avx2)
+	      IFUNC_IMPL_ADD (array, i, wcsnlen,
+			      HAS_CPU_FEATURE (SSE4_1),
+			      __wcsnlen_sse4_1)
+	      IFUNC_IMPL_ADD (array, i, wcsnlen, 1, __wcsnlen_sse2))
+
+  /* Support sysdeps/x86_64/multiarch/wmemchr.c.  */
+  IFUNC_IMPL (i, name, wmemchr,
+	      IFUNC_IMPL_ADD (array, i, wmemchr,
+			      HAS_ARCH_FEATURE (AVX2_Usable),
+			      __wmemchr_avx2)
+	      IFUNC_IMPL_ADD (array, i, wmemchr, 1, __wmemchr_sse2))
+
+  /* Support sysdeps/x86_64/multiarch/wmemcmp.c.  */
   IFUNC_IMPL (i, name, wmemcmp,
+	      IFUNC_IMPL_ADD (array, i, wmemcmp,
+			      (HAS_ARCH_FEATURE (AVX2_Usable)
+			       && HAS_CPU_FEATURE (MOVBE)),
+			      __wmemcmp_avx2_movbe)
 	      IFUNC_IMPL_ADD (array, i, wmemcmp, HAS_CPU_FEATURE (SSE4_1),
 			      __wmemcmp_sse4_1)
 	      IFUNC_IMPL_ADD (array, i, wmemcmp, HAS_CPU_FEATURE (SSSE3),
 			      __wmemcmp_ssse3)
 	      IFUNC_IMPL_ADD (array, i, wmemcmp, 1, __wmemcmp_sse2))
 
+  /* Support sysdeps/x86_64/multiarch/wmemset.c.  */
+  IFUNC_IMPL (i, name, wmemset,
+	      IFUNC_IMPL_ADD (array, i, wmemset, 1,
+			      __wmemset_sse2_unaligned)
+	      IFUNC_IMPL_ADD (array, i, wmemset,
+			      HAS_ARCH_FEATURE (AVX2_Usable),
+			      __wmemset_avx2_unaligned)
+	      IFUNC_IMPL_ADD (array, i, wmemset,
+			      HAS_ARCH_FEATURE (AVX512F_Usable),
+			      __wmemset_avx512_unaligned))
+
 #ifdef SHARED
-  /* Support sysdeps/x86_64/multiarch/memcpy_chk.S.  */
+  /* Support sysdeps/x86_64/multiarch/memcpy_chk.c.  */
   IFUNC_IMPL (i, name, __memcpy_chk,
-#ifdef HAVE_AVX512_ASM_SUPPORT
 	      IFUNC_IMPL_ADD (array, i, __memcpy_chk,
 			      HAS_ARCH_FEATURE (AVX512F_Usable),
 			      __memcpy_chk_avx512_no_vzeroupper)
-#endif
+	      IFUNC_IMPL_ADD (array, i, __memcpy_chk,
+			      HAS_ARCH_FEATURE (AVX512F_Usable),
+			      __memcpy_chk_avx512_unaligned)
+	      IFUNC_IMPL_ADD (array, i, __memcpy_chk,
+			      HAS_ARCH_FEATURE (AVX512F_Usable),
+			      __memcpy_chk_avx512_unaligned_erms)
 	      IFUNC_IMPL_ADD (array, i, __memcpy_chk,
 			      HAS_ARCH_FEATURE (AVX_Usable),
 			      __memcpy_chk_avx_unaligned)
 	      IFUNC_IMPL_ADD (array, i, __memcpy_chk,
+			      HAS_ARCH_FEATURE (AVX_Usable),
+			      __memcpy_chk_avx_unaligned_erms)
+	      IFUNC_IMPL_ADD (array, i, __memcpy_chk,
 			      HAS_CPU_FEATURE (SSSE3),
 			      __memcpy_chk_ssse3_back)
 	      IFUNC_IMPL_ADD (array, i, __memcpy_chk,
 			      HAS_CPU_FEATURE (SSSE3),
 			      __memcpy_chk_ssse3)
 	      IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1,
-			      __memcpy_chk_sse2))
+			      __memcpy_chk_sse2_unaligned)
+	      IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1,
+			      __memcpy_chk_sse2_unaligned_erms)
+	      IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1,
+			      __memcpy_chk_erms))
+#endif
 
-  /* Support sysdeps/x86_64/multiarch/memcpy.S.  */
+  /* Support sysdeps/x86_64/multiarch/memcpy.c.  */
   IFUNC_IMPL (i, name, memcpy,
 	      IFUNC_IMPL_ADD (array, i, memcpy,
 			      HAS_ARCH_FEATURE (AVX_Usable),
 			      __memcpy_avx_unaligned)
+	      IFUNC_IMPL_ADD (array, i, memcpy,
+			      HAS_ARCH_FEATURE (AVX_Usable),
+			      __memcpy_avx_unaligned_erms)
 	      IFUNC_IMPL_ADD (array, i, memcpy, HAS_CPU_FEATURE (SSSE3),
 			      __memcpy_ssse3_back)
 	      IFUNC_IMPL_ADD (array, i, memcpy, HAS_CPU_FEATURE (SSSE3),
 			      __memcpy_ssse3)
-#ifdef HAVE_AVX512_ASM_SUPPORT
 	      IFUNC_IMPL_ADD (array, i, memcpy,
 			      HAS_ARCH_FEATURE (AVX512F_Usable),
 			      __memcpy_avx512_no_vzeroupper)
-#endif
+	      IFUNC_IMPL_ADD (array, i, memcpy,
+			      HAS_ARCH_FEATURE (AVX512F_Usable),
+			      __memcpy_avx512_unaligned)
+	      IFUNC_IMPL_ADD (array, i, memcpy,
+			      HAS_ARCH_FEATURE (AVX512F_Usable),
+			      __memcpy_avx512_unaligned_erms)
 	      IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_sse2_unaligned)
-	      IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_sse2))
+	      IFUNC_IMPL_ADD (array, i, memcpy, 1,
+			      __memcpy_sse2_unaligned_erms)
+	      IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_erms))
 
-  /* Support sysdeps/x86_64/multiarch/mempcpy_chk.S.  */
+#ifdef SHARED
+  /* Support sysdeps/x86_64/multiarch/mempcpy_chk.c.  */
   IFUNC_IMPL (i, name, __mempcpy_chk,
-#ifdef HAVE_AVX512_ASM_SUPPORT
 	      IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
 			      HAS_ARCH_FEATURE (AVX512F_Usable),
 			      __mempcpy_chk_avx512_no_vzeroupper)
-#endif
+	      IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
+			      HAS_ARCH_FEATURE (AVX512F_Usable),
+			      __mempcpy_chk_avx512_unaligned)
+	      IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
+			      HAS_ARCH_FEATURE (AVX512F_Usable),
+			      __mempcpy_chk_avx512_unaligned_erms)
 	      IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
 			      HAS_ARCH_FEATURE (AVX_Usable),
 			      __mempcpy_chk_avx_unaligned)
 	      IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
+			      HAS_ARCH_FEATURE (AVX_Usable),
+			      __mempcpy_chk_avx_unaligned_erms)
+	      IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
 			      HAS_CPU_FEATURE (SSSE3),
 			      __mempcpy_chk_ssse3_back)
 	      IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
 			      HAS_CPU_FEATURE (SSSE3),
 			      __mempcpy_chk_ssse3)
 	      IFUNC_IMPL_ADD (array, i, __mempcpy_chk, 1,
-			      __mempcpy_chk_sse2))
+			      __mempcpy_chk_sse2_unaligned)
+	      IFUNC_IMPL_ADD (array, i, __mempcpy_chk, 1,
+			      __mempcpy_chk_sse2_unaligned_erms)
+	      IFUNC_IMPL_ADD (array, i, __mempcpy_chk, 1,
+			      __mempcpy_chk_erms))
+#endif
 
-  /* Support sysdeps/x86_64/multiarch/mempcpy.S.  */
+  /* Support sysdeps/x86_64/multiarch/mempcpy.c.  */
   IFUNC_IMPL (i, name, mempcpy,
-#ifdef HAVE_AVX512_ASM_SUPPORT
 	      IFUNC_IMPL_ADD (array, i, mempcpy,
 			      HAS_ARCH_FEATURE (AVX512F_Usable),
 			      __mempcpy_avx512_no_vzeroupper)
-#endif
+	      IFUNC_IMPL_ADD (array, i, mempcpy,
+			      HAS_ARCH_FEATURE (AVX512F_Usable),
+			      __mempcpy_avx512_unaligned)
+	      IFUNC_IMPL_ADD (array, i, mempcpy,
+			      HAS_ARCH_FEATURE (AVX512F_Usable),
+			      __mempcpy_avx512_unaligned_erms)
 	      IFUNC_IMPL_ADD (array, i, mempcpy,
 			      HAS_ARCH_FEATURE (AVX_Usable),
 			      __mempcpy_avx_unaligned)
+	      IFUNC_IMPL_ADD (array, i, mempcpy,
+			      HAS_ARCH_FEATURE (AVX_Usable),
+			      __mempcpy_avx_unaligned_erms)
 	      IFUNC_IMPL_ADD (array, i, mempcpy, HAS_CPU_FEATURE (SSSE3),
 			      __mempcpy_ssse3_back)
 	      IFUNC_IMPL_ADD (array, i, mempcpy, HAS_CPU_FEATURE (SSSE3),
 			      __mempcpy_ssse3)
-	      IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_sse2))
+	      IFUNC_IMPL_ADD (array, i, mempcpy, 1,
+			      __mempcpy_sse2_unaligned)
+	      IFUNC_IMPL_ADD (array, i, mempcpy, 1,
+			      __mempcpy_sse2_unaligned_erms)
+	      IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_erms))
 
-  /* Support sysdeps/x86_64/multiarch/strncmp.S.  */
+  /* Support sysdeps/x86_64/multiarch/strncmp.c.  */
   IFUNC_IMPL (i, name, strncmp,
+	      IFUNC_IMPL_ADD (array, i, strncmp,
+			      HAS_ARCH_FEATURE (AVX2_Usable),
+			      __strncmp_avx2)
 	      IFUNC_IMPL_ADD (array, i, strncmp, HAS_CPU_FEATURE (SSE4_2),
 			      __strncmp_sse42)
 	      IFUNC_IMPL_ADD (array, i, strncmp, HAS_CPU_FEATURE (SSSE3),
 			      __strncmp_ssse3)
 	      IFUNC_IMPL_ADD (array, i, strncmp, 1, __strncmp_sse2))
+
+#ifdef SHARED
+  /* Support sysdeps/x86_64/multiarch/wmemset_chk.c.  */
+  IFUNC_IMPL (i, name, __wmemset_chk,
+	      IFUNC_IMPL_ADD (array, i, __wmemset_chk, 1,
+			      __wmemset_chk_sse2_unaligned)
+	      IFUNC_IMPL_ADD (array, i, __wmemset_chk,
+			      HAS_ARCH_FEATURE (AVX2_Usable),
+			      __wmemset_chk_avx2_unaligned)
+	      IFUNC_IMPL_ADD (array, i, __wmemset_chk,
+			      HAS_ARCH_FEATURE (AVX512F_Usable),
+			      __wmemset_chk_avx512_unaligned))
 #endif
 
   return i;
diff --git a/sysdeps/x86_64/multiarch/ifunc-memcmp.h b/sysdeps/x86_64/multiarch/ifunc-memcmp.h
new file mode 100644
index 0000000000..bf5ab8eb7f
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/ifunc-memcmp.h
@@ -0,0 +1,45 @@
+/* Common definition for memcmp/wmemcmp ifunc selections.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+# include <init-arch.h>
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_movbe) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+  const struct cpu_features* cpu_features = __get_cpu_features ();
+
+  if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)
+      && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)
+      && CPU_FEATURES_CPU_P (cpu_features, MOVBE)
+      && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+    return OPTIMIZE (avx2_movbe);
+
+  if (CPU_FEATURES_CPU_P (cpu_features, SSE4_1))
+    return OPTIMIZE (sse4_1);
+
+  if (CPU_FEATURES_CPU_P (cpu_features, SSSE3))
+    return OPTIMIZE (ssse3);
+
+  return OPTIMIZE (sse2);
+}
diff --git a/sysdeps/x86_64/multiarch/ifunc-memmove.h b/sysdeps/x86_64/multiarch/ifunc-memmove.h
new file mode 100644
index 0000000000..5b1eb1c92c
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/ifunc-memmove.h
@@ -0,0 +1,81 @@
+/* Common definition for memcpy, mempcpy and memmove implementation.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <init-arch.h>
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (erms) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned)
+  attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned_erms)
+  attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3_back) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned_erms)
+  attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned)
+  attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned_erms)
+  attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_no_vzeroupper)
+  attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+  const struct cpu_features* cpu_features = __get_cpu_features ();
+
+  if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_ERMS)
+      || CPU_FEATURES_ARCH_P (cpu_features, Prefer_FSRM))
+    return OPTIMIZE (erms);
+
+  if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable)
+      && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512))
+    {
+      if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
+	return OPTIMIZE (avx512_no_vzeroupper);
+
+      if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
+	return OPTIMIZE (avx512_unaligned_erms);
+
+      return OPTIMIZE (avx512_unaligned);
+    }
+
+  if (CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+    {
+      if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
+	return OPTIMIZE (avx_unaligned_erms);
+
+      return OPTIMIZE (avx_unaligned);
+    }
+
+  if (!CPU_FEATURES_CPU_P (cpu_features, SSSE3)
+      || CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Copy))
+    {
+      if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
+	return OPTIMIZE (sse2_unaligned_erms);
+
+      return OPTIMIZE (sse2_unaligned);
+    }
+
+  if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Copy_Backward))
+    return OPTIMIZE (ssse3_back);
+
+  return OPTIMIZE (ssse3);
+}
diff --git a/sysdeps/x86_64/multiarch/ifunc-memset.h b/sysdeps/x86_64/multiarch/ifunc-memset.h
new file mode 100644
index 0000000000..19b5ae676c
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/ifunc-memset.h
@@ -0,0 +1,69 @@
+/* Common definition for memset/memset_chk ifunc selections.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <init-arch.h>
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (erms) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned)
+  attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned_erms)
+  attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_erms)
+  attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned)
+  attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned_erms)
+  attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_no_vzeroupper)
+  attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+  const struct cpu_features* cpu_features = __get_cpu_features ();
+
+  if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_ERMS))
+    return OPTIMIZE (erms);
+
+  if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable)
+      && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512))
+    {
+      if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
+	return OPTIMIZE (avx512_no_vzeroupper);
+
+      if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
+	return OPTIMIZE (avx512_unaligned_erms);
+
+      return OPTIMIZE (avx512_unaligned);
+    }
+
+  if (CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable))
+    {
+      if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
+	return OPTIMIZE (avx2_unaligned_erms);
+      else
+	return OPTIMIZE (avx2_unaligned);
+    }
+
+  if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
+    return OPTIMIZE (sse2_unaligned_erms);
+
+  return OPTIMIZE (sse2_unaligned);
+}
diff --git a/sysdeps/x86_64/multiarch/ifunc-sse4_2.h b/sysdeps/x86_64/multiarch/ifunc-sse4_2.h
new file mode 100644
index 0000000000..f2b791cccf
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/ifunc-sse4_2.h
@@ -0,0 +1,34 @@
+/* Common definition for ifunc selections optimized with SSE2 and SSE4.2.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <init-arch.h>
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+  const struct cpu_features* cpu_features = __get_cpu_features ();
+
+  if (CPU_FEATURES_CPU_P (cpu_features, SSE4_2))
+    return OPTIMIZE (sse42);
+
+  return OPTIMIZE (sse2);
+}
diff --git a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
new file mode 100644
index 0000000000..1ca170b663
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
@@ -0,0 +1,43 @@
+/* Common definition for strcasecmp famly ifunc selections.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <init-arch.h>
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+  const struct cpu_features* cpu_features = __get_cpu_features ();
+
+  if (CPU_FEATURES_ARCH_P (cpu_features, AVX_Usable))
+    return OPTIMIZE (avx);
+
+  if (CPU_FEATURES_CPU_P (cpu_features, SSE4_2)
+      && !CPU_FEATURES_ARCH_P (cpu_features, Slow_SSE4_2))
+    return OPTIMIZE (sse42);
+
+  if (CPU_FEATURES_CPU_P (cpu_features, SSSE3))
+    return OPTIMIZE (ssse3);
+
+  return OPTIMIZE (sse2);
+}
diff --git a/sysdeps/x86_64/multiarch/ifunc-unaligned-ssse3.h b/sysdeps/x86_64/multiarch/ifunc-unaligned-ssse3.h
new file mode 100644
index 0000000000..81805f9832
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/ifunc-unaligned-ssse3.h
@@ -0,0 +1,40 @@
+/* Common definition for ifunc selections optimized with SSE2, unaligned
+   SSE2 and SSSE3.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <init-arch.h>
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned)
+  attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+  const struct cpu_features* cpu_features = __get_cpu_features ();
+
+  if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
+    return OPTIMIZE (sse2_unaligned);
+
+  if (CPU_FEATURES_CPU_P (cpu_features, SSSE3))
+    return OPTIMIZE (ssse3);
+
+  return OPTIMIZE (sse2);
+}
diff --git a/sysdeps/x86_64/multiarch/ifunc-wmemset.h b/sysdeps/x86_64/multiarch/ifunc-wmemset.h
new file mode 100644
index 0000000000..2f1085f5fc
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/ifunc-wmemset.h
@@ -0,0 +1,42 @@
+/* Common definition for wmemset/wmemset_chk ifunc selections.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <init-arch.h>
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+  const struct cpu_features* cpu_features = __get_cpu_features ();
+
+  if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)
+      && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)
+      && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+    {
+      if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable)
+	  && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512))
+	return OPTIMIZE (avx512_unaligned);
+      else
+	return OPTIMIZE (avx2_unaligned);
+    }
+
+  return OPTIMIZE (sse2_unaligned);
+}
diff --git a/sysdeps/x86_64/multiarch/memchr-avx2.S b/sysdeps/x86_64/multiarch/memchr-avx2.S
new file mode 100644
index 0000000000..5f5e772554
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memchr-avx2.S
@@ -0,0 +1,340 @@
+/* memchr/wmemchr optimized with AVX2.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+
+# ifndef MEMCHR
+#  define MEMCHR	__memchr_avx2
+# endif
+
+# ifdef USE_AS_WMEMCHR
+#  define VPCMPEQ	vpcmpeqd
+# else
+#  define VPCMPEQ	vpcmpeqb
+# endif
+
+# ifndef VZEROUPPER
+#  define VZEROUPPER	vzeroupper
+# endif
+
+# define VEC_SIZE 32
+
+	.section .text.avx,"ax",@progbits
+ENTRY (MEMCHR)
+# ifndef USE_AS_RAWMEMCHR
+	/* Check for zero length.  */
+	testq	%rdx, %rdx
+	jz	L(null)
+# endif
+	movl	%edi, %ecx
+	/* Broadcast CHAR to YMM0.  */
+	vmovd	%esi, %xmm0
+# ifdef USE_AS_WMEMCHR
+	shl	$2, %rdx
+	vpbroadcastd %xmm0, %ymm0
+# else
+	vpbroadcastb %xmm0, %ymm0
+# endif
+	/* Check if we may cross page boundary with one vector load.  */
+	andl	$(2 * VEC_SIZE - 1), %ecx
+	cmpl	$VEC_SIZE, %ecx
+	ja	L(cros_page_boundary)
+
+	/* Check the first VEC_SIZE bytes.  */
+	VPCMPEQ (%rdi), %ymm0, %ymm1
+	vpmovmskb %ymm1, %eax
+	testl	%eax, %eax
+
+# ifndef USE_AS_RAWMEMCHR
+	jnz	L(first_vec_x0_check)
+	/* Adjust length and check the end of data.  */
+	subq	$VEC_SIZE, %rdx
+	jbe	L(zero)
+# else
+	jnz	L(first_vec_x0)
+# endif
+
+	/* Align data for aligned loads in the loop.  */
+	addq	$VEC_SIZE, %rdi
+	andl	$(VEC_SIZE - 1), %ecx
+	andq	$-VEC_SIZE, %rdi
+
+# ifndef USE_AS_RAWMEMCHR
+	/* Adjust length.  */
+	addq	%rcx, %rdx
+
+	subq	$(VEC_SIZE * 4), %rdx
+	jbe	L(last_4x_vec_or_less)
+# endif
+	jmp	L(more_4x_vec)
+
+	.p2align 4
+L(cros_page_boundary):
+	andl	$(VEC_SIZE - 1), %ecx
+	andq	$-VEC_SIZE, %rdi
+	VPCMPEQ (%rdi), %ymm0, %ymm1
+	vpmovmskb %ymm1, %eax
+	/* Remove the leading bytes.  */
+	sarl	%cl, %eax
+	testl	%eax, %eax
+	jz	L(aligned_more)
+	tzcntl	%eax, %eax
+# ifndef USE_AS_RAWMEMCHR
+	/* Check the end of data.  */
+	cmpq	%rax, %rdx
+	jbe	L(zero)
+# endif
+	addq	%rdi, %rax
+	addq	%rcx, %rax
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(aligned_more):
+# ifndef USE_AS_RAWMEMCHR
+        /* Calculate "rdx + rcx - VEC_SIZE" with "rdx - (VEC_SIZE - rcx)"
+	   instead of "(rdx + rcx) - VEC_SIZE" to void possible addition
+	   overflow.  */
+	negq	%rcx
+	addq	$VEC_SIZE, %rcx
+
+	/* Check the end of data.  */
+	subq	%rcx, %rdx
+	jbe	L(zero)
+# endif
+
+	addq	$VEC_SIZE, %rdi
+
+# ifndef USE_AS_RAWMEMCHR
+	subq	$(VEC_SIZE * 4), %rdx
+	jbe	L(last_4x_vec_or_less)
+# endif
+
+L(more_4x_vec):
+	/* Check the first 4 * VEC_SIZE.  Only one VEC_SIZE at a time
+	   since data is only aligned to VEC_SIZE.  */
+	VPCMPEQ (%rdi), %ymm0, %ymm1
+	vpmovmskb %ymm1, %eax
+	testl	%eax, %eax
+	jnz	L(first_vec_x0)
+
+	VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1
+	vpmovmskb %ymm1, %eax
+	testl	%eax, %eax
+	jnz	L(first_vec_x1)
+
+	VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm1
+	vpmovmskb %ymm1, %eax
+	testl	%eax, %eax
+	jnz	L(first_vec_x2)
+
+	VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1
+	vpmovmskb %ymm1, %eax
+	testl	%eax, %eax
+	jnz	L(first_vec_x3)
+
+	addq	$(VEC_SIZE * 4), %rdi
+
+# ifndef USE_AS_RAWMEMCHR
+	subq	$(VEC_SIZE * 4), %rdx
+	jbe	L(last_4x_vec_or_less)
+# endif
+
+	/* Align data to 4 * VEC_SIZE.  */
+	movq	%rdi, %rcx
+	andl	$(4 * VEC_SIZE - 1), %ecx
+	andq	$-(4 * VEC_SIZE), %rdi
+
+# ifndef USE_AS_RAWMEMCHR
+	/* Adjust length.  */
+	addq	%rcx, %rdx
+# endif
+
+	.p2align 4
+L(loop_4x_vec):
+	/* Compare 4 * VEC at a time forward.  */
+	VPCMPEQ (%rdi), %ymm0, %ymm1
+	VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm2
+	VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm3
+	VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm4
+
+	vpor	%ymm1, %ymm2, %ymm5
+	vpor	%ymm3, %ymm4, %ymm6
+	vpor	%ymm5, %ymm6, %ymm5
+
+	vpmovmskb %ymm5, %eax
+	testl	%eax, %eax
+	jnz	L(4x_vec_end)
+
+	addq	$(VEC_SIZE * 4), %rdi
+
+# ifdef USE_AS_RAWMEMCHR
+	jmp	L(loop_4x_vec)
+# else
+	subq	$(VEC_SIZE * 4), %rdx
+	ja	L(loop_4x_vec)
+
+L(last_4x_vec_or_less):
+	/* Less than 4 * VEC and aligned to VEC_SIZE.  */
+	addl	$(VEC_SIZE * 2), %edx
+	jle	L(last_2x_vec)
+
+	VPCMPEQ (%rdi), %ymm0, %ymm1
+	vpmovmskb %ymm1, %eax
+	testl	%eax, %eax
+	jnz	L(first_vec_x0)
+
+	VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1
+	vpmovmskb %ymm1, %eax
+	testl	%eax, %eax
+	jnz	L(first_vec_x1)
+
+	VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm1
+	vpmovmskb %ymm1, %eax
+	testl	%eax, %eax
+
+	jnz	L(first_vec_x2_check)
+	subl	$VEC_SIZE, %edx
+	jle	L(zero)
+
+	VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1
+	vpmovmskb %ymm1, %eax
+	testl	%eax, %eax
+
+	jnz	L(first_vec_x3_check)
+	xorl	%eax, %eax
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(last_2x_vec):
+	addl	$(VEC_SIZE * 2), %edx
+	VPCMPEQ (%rdi), %ymm0, %ymm1
+	vpmovmskb %ymm1, %eax
+	testl	%eax, %eax
+
+	jnz	L(first_vec_x0_check)
+	subl	$VEC_SIZE, %edx
+	jle	L(zero)
+
+	VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1
+	vpmovmskb %ymm1, %eax
+	testl	%eax, %eax
+	jnz	L(first_vec_x1_check)
+	xorl	%eax, %eax
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(first_vec_x0_check):
+	tzcntl	%eax, %eax
+	/* Check the end of data.  */
+	cmpq	%rax, %rdx
+	jbe	L(zero)
+	addq	%rdi, %rax
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(first_vec_x1_check):
+	tzcntl	%eax, %eax
+	/* Check the end of data.  */
+	cmpq	%rax, %rdx
+	jbe	L(zero)
+	addq	$VEC_SIZE, %rax
+	addq	%rdi, %rax
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(first_vec_x2_check):
+	tzcntl	%eax, %eax
+	/* Check the end of data.  */
+	cmpq	%rax, %rdx
+	jbe	L(zero)
+	addq	$(VEC_SIZE * 2), %rax
+	addq	%rdi, %rax
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(first_vec_x3_check):
+	tzcntl	%eax, %eax
+	/* Check the end of data.  */
+	cmpq	%rax, %rdx
+	jbe	L(zero)
+	addq	$(VEC_SIZE * 3), %rax
+	addq	%rdi, %rax
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(zero):
+	VZEROUPPER
+L(null):
+	xorl	%eax, %eax
+	ret
+# endif
+
+	.p2align 4
+L(first_vec_x0):
+	tzcntl	%eax, %eax
+	addq	%rdi, %rax
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(first_vec_x1):
+	tzcntl	%eax, %eax
+	addq	$VEC_SIZE, %rax
+	addq	%rdi, %rax
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(first_vec_x2):
+	tzcntl	%eax, %eax
+	addq	$(VEC_SIZE * 2), %rax
+	addq	%rdi, %rax
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(4x_vec_end):
+	vpmovmskb %ymm1, %eax
+	testl	%eax, %eax
+	jnz	L(first_vec_x0)
+	vpmovmskb %ymm2, %eax
+	testl	%eax, %eax
+	jnz	L(first_vec_x1)
+	vpmovmskb %ymm3, %eax
+	testl	%eax, %eax
+	jnz	L(first_vec_x2)
+	vpmovmskb %ymm4, %eax
+	testl	%eax, %eax
+L(first_vec_x3):
+	tzcntl	%eax, %eax
+	addq	$(VEC_SIZE * 3), %rax
+	addq	%rdi, %rax
+	VZEROUPPER
+	ret
+
+END (MEMCHR)
+#endif
diff --git a/sysdeps/x86_64/multiarch/memchr-sse2.S b/sysdeps/x86_64/multiarch/memchr-sse2.S
new file mode 100644
index 0000000000..8a5e7fd1c5
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memchr-sse2.S
@@ -0,0 +1,28 @@
+/* memchr optimized with SSE2.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc)
+# define memchr __memchr_sse2
+
+# undef strong_alias
+# define strong_alias(memchr, __memchr)
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(memchr)
+#endif
+
+#include "../memchr.S"
diff --git a/sysdeps/x86_64/multiarch/wcscpy.S b/sysdeps/x86_64/multiarch/memchr.c
index 8e7270b9c7..016f57846a 100644
--- a/sysdeps/x86_64/multiarch/wcscpy.S
+++ b/sysdeps/x86_64/multiarch/memchr.c
@@ -1,7 +1,6 @@
-/* Multiple versions of wcscpy
+/* Multiple versions of memchr
    All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2011-2016 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -18,23 +17,19 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include <sysdep.h>
-#include <init-arch.h>
-
 /* Define multiple versions only for the definition in libc. */
 #if IS_IN (libc)
-
-	.text
-ENTRY(wcscpy)
-	.type	wcscpy, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-	HAS_CPU_FEATURE (SSSE3)
-	jnz	2f
-	leaq	__wcscpy_sse2(%rip), %rax
-	ret
-
-2:	leaq	__wcscpy_ssse3(%rip), %rax
-	ret
-
-END(wcscpy)
+# define memchr __redirect_memchr
+# include <string.h>
+# undef memchr
+
+# define SYMBOL_NAME memchr
+# include "ifunc-avx2.h"
+
+libc_ifunc_redirected (__redirect_memchr, memchr, IFUNC_SELECTOR ());
+strong_alias (memchr, __memchr)
+# ifdef SHARED
+__hidden_ver1 (memchr, __GI_memchr, __redirect_memchr)
+  __attribute__((visibility ("hidden")));
+# endif
 #endif
diff --git a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S
new file mode 100644
index 0000000000..30f764c393
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S
@@ -0,0 +1,429 @@
+/* memcmp/wmemcmp optimized with AVX2.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc)
+
+/* memcmp/wmemcmp is implemented as:
+   1. For size from 2 to 7 bytes, load as big endian with movbe and bswap
+      to avoid branches.
+   2. Use overlapping compare to avoid branch.
+   3. Use vector compare when size >= 4 bytes for memcmp or size >= 8
+      bytes for wmemcmp.
+   4. If size is 8 * VEC_SIZE or less, unroll the loop.
+   5. Compare 4 * VEC_SIZE at a time with the aligned first memory
+      area.
+   6. Use 2 vector compares when size is 2 * VEC_SIZE or less.
+   7. Use 4 vector compares when size is 4 * VEC_SIZE or less.
+   8. Use 8 vector compares when size is 8 * VEC_SIZE or less.  */
+
+# include <sysdep.h>
+
+# ifndef MEMCMP
+#  define MEMCMP	__memcmp_avx2_movbe
+# endif
+
+# ifdef USE_AS_WMEMCMP
+#  define VPCMPEQ	vpcmpeqd
+# else
+#  define VPCMPEQ	vpcmpeqb
+# endif
+
+# ifndef VZEROUPPER
+#  define VZEROUPPER	vzeroupper
+# endif
+
+# define VEC_SIZE 32
+# define VEC_MASK ((1 << VEC_SIZE) - 1)
+
+/* Warning!
+           wmemcmp has to use SIGNED comparison for elements.
+           memcmp has to use UNSIGNED comparison for elemnts.
+*/
+
+	.section .text.avx,"ax",@progbits
+ENTRY (MEMCMP)
+# ifdef USE_AS_WMEMCMP
+	shl	$2, %rdx
+# endif
+	cmpq	$VEC_SIZE, %rdx
+	jb	L(less_vec)
+
+	/* From VEC to 2 * VEC.  No branch when size == VEC_SIZE.  */
+	vmovdqu	(%rsi), %ymm2
+	VPCMPEQ (%rdi), %ymm2, %ymm2
+	vpmovmskb %ymm2, %eax
+	subl    $VEC_MASK, %eax
+	jnz	L(first_vec)
+
+	cmpq	$(VEC_SIZE * 2), %rdx
+	jbe	L(last_vec)
+
+	VPCMPEQ	%ymm0, %ymm0, %ymm0
+	/* More than 2 * VEC.  */
+	cmpq	$(VEC_SIZE * 8), %rdx
+	ja	L(more_8x_vec)
+	cmpq	$(VEC_SIZE * 4), %rdx
+	jb	L(last_4x_vec)
+
+	/* From 4 * VEC to 8 * VEC, inclusively. */
+	vmovdqu	(%rsi), %ymm1
+	VPCMPEQ (%rdi), %ymm1, %ymm1
+
+	vmovdqu	VEC_SIZE(%rsi), %ymm2
+	VPCMPEQ VEC_SIZE(%rdi), %ymm2, %ymm2
+
+	vmovdqu	(VEC_SIZE * 2)(%rsi), %ymm3
+	VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm3, %ymm3
+
+	vmovdqu	(VEC_SIZE * 3)(%rsi), %ymm4
+	VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm4, %ymm4
+
+	vpand	%ymm1, %ymm2, %ymm5
+	vpand	%ymm3, %ymm4, %ymm6
+	vpand	%ymm5, %ymm6, %ymm5
+
+	vptest	%ymm0, %ymm5
+	jnc	L(4x_vec_end)
+
+	leaq	-(4 * VEC_SIZE)(%rdi, %rdx), %rdi
+	leaq	-(4 * VEC_SIZE)(%rsi, %rdx), %rsi
+	vmovdqu	(%rsi), %ymm1
+	VPCMPEQ (%rdi), %ymm1, %ymm1
+
+	vmovdqu	VEC_SIZE(%rsi), %ymm2
+	VPCMPEQ VEC_SIZE(%rdi), %ymm2, %ymm2
+	vpand	%ymm2, %ymm1, %ymm5
+
+	vmovdqu	(VEC_SIZE * 2)(%rsi), %ymm3
+	VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm3, %ymm3
+	vpand	%ymm3, %ymm5, %ymm5
+
+	vmovdqu	(VEC_SIZE * 3)(%rsi), %ymm4
+	VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm4, %ymm4
+	vpand	%ymm4, %ymm5, %ymm5
+
+	vptest	%ymm0, %ymm5
+	jnc	L(4x_vec_end)
+	xorl	%eax, %eax
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(last_2x_vec):
+	/* From VEC to 2 * VEC.  No branch when size == VEC_SIZE.  */
+	vmovdqu	(%rsi), %ymm2
+	VPCMPEQ (%rdi), %ymm2, %ymm2
+	vpmovmskb %ymm2, %eax
+	subl    $VEC_MASK, %eax
+	jnz	L(first_vec)
+
+L(last_vec):
+	/* Use overlapping loads to avoid branches.  */
+	leaq	-VEC_SIZE(%rdi, %rdx), %rdi
+	leaq	-VEC_SIZE(%rsi, %rdx), %rsi
+	vmovdqu	(%rsi), %ymm2
+	VPCMPEQ (%rdi), %ymm2, %ymm2
+	vpmovmskb %ymm2, %eax
+	subl    $VEC_MASK, %eax
+	jnz	L(first_vec)
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(first_vec):
+	/* A byte or int32 is different within 16 or 32 bytes.  */
+	tzcntl	%eax, %ecx
+# ifdef USE_AS_WMEMCMP
+	xorl	%eax, %eax
+	movl	(%rdi, %rcx), %edx
+	cmpl	(%rsi, %rcx), %edx
+L(wmemcmp_return):
+	setl	%al
+	negl	%eax
+	orl	$1, %eax
+# else
+	movzbl	(%rdi, %rcx), %eax
+	movzbl	(%rsi, %rcx), %edx
+	sub	%edx, %eax
+# endif
+	VZEROUPPER
+	ret
+
+# ifdef USE_AS_WMEMCMP
+	.p2align 4
+L(4):
+	xorl	%eax, %eax
+	movl	(%rdi), %edx
+	cmpl	(%rsi), %edx
+	jne	L(wmemcmp_return)
+	ret
+# else
+	.p2align 4
+L(between_4_7):
+	/* Load as big endian with overlapping movbe to avoid branches.  */
+	movbe	(%rdi), %eax
+	movbe	(%rsi), %ecx
+	shlq	$32, %rax
+	shlq	$32, %rcx
+	movbe	-4(%rdi, %rdx), %edi
+	movbe	-4(%rsi, %rdx), %esi
+	orq	%rdi, %rax
+	orq	%rsi, %rcx
+	subq	%rcx, %rax
+	je	L(exit)
+	sbbl	%eax, %eax
+	orl	$1, %eax
+	ret
+
+	.p2align 4
+L(exit):
+	ret
+
+	.p2align 4
+L(between_2_3):
+	/* Load as big endian to avoid branches.  */
+	movzwl	(%rdi), %eax
+	movzwl	(%rsi), %ecx
+	shll	$8, %eax
+	shll	$8, %ecx
+	bswap	%eax
+	bswap	%ecx
+	movb	-1(%rdi, %rdx), %al
+	movb	-1(%rsi, %rdx), %cl
+	/* Subtraction is okay because the upper 8 bits are zero.  */
+	subl	%ecx, %eax
+	ret
+
+	.p2align 4
+L(1):
+	movzbl	(%rdi), %eax
+	movzbl	(%rsi), %ecx
+	subl	%ecx, %eax
+	ret
+# endif
+
+	.p2align 4
+L(zero):
+	xorl	%eax, %eax
+	ret
+
+	.p2align 4
+L(less_vec):
+# ifdef USE_AS_WMEMCMP
+	/* It can only be 0, 4, 8, 12, 16, 20, 24, 28 bytes.  */
+	cmpb	$4, %dl
+	je	L(4)
+	jb	L(zero)
+# else
+	cmpb	$1, %dl
+	je	L(1)
+	jb	L(zero)
+	cmpb	$4, %dl
+	jb	L(between_2_3)
+	cmpb	$8, %dl
+	jb	L(between_4_7)
+# endif
+	cmpb	$16, %dl
+	jae	L(between_16_31)
+	/* It is between 8 and 15 bytes.  */
+	vmovq	(%rdi), %xmm1
+	vmovq	(%rsi), %xmm2
+	VPCMPEQ %xmm1, %xmm2, %xmm2
+	vpmovmskb %xmm2, %eax
+	subl    $0xffff, %eax
+	jnz	L(first_vec)
+	/* Use overlapping loads to avoid branches.  */
+	leaq	-8(%rdi, %rdx), %rdi
+	leaq	-8(%rsi, %rdx), %rsi
+	vmovq	(%rdi), %xmm1
+	vmovq	(%rsi), %xmm2
+	VPCMPEQ %xmm1, %xmm2, %xmm2
+	vpmovmskb %xmm2, %eax
+	subl    $0xffff, %eax
+	jnz	L(first_vec)
+	ret
+
+	.p2align 4
+L(between_16_31):
+	/* From 16 to 31 bytes.  No branch when size == 16.  */
+	vmovdqu	(%rsi), %xmm2
+	VPCMPEQ (%rdi), %xmm2, %xmm2
+	vpmovmskb %xmm2, %eax
+	subl    $0xffff, %eax
+	jnz	L(first_vec)
+
+	/* Use overlapping loads to avoid branches.  */
+	leaq	-16(%rdi, %rdx), %rdi
+	leaq	-16(%rsi, %rdx), %rsi
+	vmovdqu	(%rsi), %xmm2
+	VPCMPEQ (%rdi), %xmm2, %xmm2
+	vpmovmskb %xmm2, %eax
+	subl    $0xffff, %eax
+	jnz	L(first_vec)
+	ret
+
+	.p2align 4
+L(more_8x_vec):
+	/* More than 8 * VEC.  Check the first VEC.  */
+	vmovdqu	(%rsi), %ymm2
+	VPCMPEQ (%rdi), %ymm2, %ymm2
+	vpmovmskb %ymm2, %eax
+	subl    $VEC_MASK, %eax
+	jnz	L(first_vec)
+
+	/* Align the first memory area for aligned loads in the loop.
+	   Compute how much the first memory area is misaligned.  */
+	movq	%rdi, %rcx
+	andl	$(VEC_SIZE - 1), %ecx
+	/* Get the negative of offset for alignment.  */
+	subq	$VEC_SIZE, %rcx
+	/* Adjust the second memory area.  */
+	subq	%rcx, %rsi
+	/* Adjust the first memory area which should be aligned now.  */
+	subq	%rcx, %rdi
+	/* Adjust length.  */
+	addq	%rcx, %rdx
+
+L(loop_4x_vec):
+	/* Compare 4 * VEC at a time forward.  */
+	vmovdqu	(%rsi), %ymm1
+	VPCMPEQ (%rdi), %ymm1, %ymm1
+
+	vmovdqu	VEC_SIZE(%rsi), %ymm2
+	VPCMPEQ VEC_SIZE(%rdi), %ymm2, %ymm2
+	vpand	%ymm2, %ymm1, %ymm5
+
+	vmovdqu	(VEC_SIZE * 2)(%rsi), %ymm3
+	VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm3, %ymm3
+	vpand	%ymm3, %ymm5, %ymm5
+
+	vmovdqu	(VEC_SIZE * 3)(%rsi), %ymm4
+	VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm4, %ymm4
+	vpand	%ymm4, %ymm5, %ymm5
+
+	vptest	%ymm0, %ymm5
+	jnc	L(4x_vec_end)
+
+	addq	$(VEC_SIZE * 4), %rdi
+	addq	$(VEC_SIZE * 4), %rsi
+
+	subq	$(VEC_SIZE * 4), %rdx
+	cmpq	$(VEC_SIZE * 4), %rdx
+	jae	L(loop_4x_vec)
+
+	/* Less than 4 * VEC.  */
+	cmpq	$VEC_SIZE, %rdx
+	jbe	L(last_vec)
+	cmpq	$(VEC_SIZE * 2), %rdx
+	jbe	L(last_2x_vec)
+
+L(last_4x_vec):
+	/* From 2 * VEC to 4 * VEC. */
+	vmovdqu	(%rsi), %ymm2
+	VPCMPEQ (%rdi), %ymm2, %ymm2
+	vpmovmskb %ymm2, %eax
+	subl    $VEC_MASK, %eax
+	jnz	L(first_vec)
+
+	addq	$VEC_SIZE, %rdi
+	addq	$VEC_SIZE, %rsi
+	vmovdqu	(%rsi), %ymm2
+	VPCMPEQ (%rdi), %ymm2, %ymm2
+	vpmovmskb %ymm2, %eax
+	subl    $VEC_MASK, %eax
+	jnz	L(first_vec)
+
+	/* Use overlapping loads to avoid branches.  */
+	leaq	-(3 * VEC_SIZE)(%rdi, %rdx), %rdi
+	leaq	-(3 * VEC_SIZE)(%rsi, %rdx), %rsi
+	vmovdqu	(%rsi), %ymm2
+	VPCMPEQ (%rdi), %ymm2, %ymm2
+	vpmovmskb %ymm2, %eax
+	subl    $VEC_MASK, %eax
+	jnz	L(first_vec)
+
+	addq	$VEC_SIZE, %rdi
+	addq	$VEC_SIZE, %rsi
+	vmovdqu	(%rsi), %ymm2
+	VPCMPEQ (%rdi), %ymm2, %ymm2
+	vpmovmskb %ymm2, %eax
+	subl    $VEC_MASK, %eax
+	jnz	L(first_vec)
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(4x_vec_end):
+	vpmovmskb %ymm1, %eax
+	subl	$VEC_MASK, %eax
+	jnz	L(first_vec)
+	vpmovmskb %ymm2, %eax
+	subl	$VEC_MASK, %eax
+	jnz	L(first_vec_x1)
+	vpmovmskb %ymm3, %eax
+	subl	$VEC_MASK, %eax
+	jnz	L(first_vec_x2)
+	vpmovmskb %ymm4, %eax
+	subl	$VEC_MASK, %eax
+	tzcntl	%eax, %ecx
+# ifdef USE_AS_WMEMCMP
+	xorl	%eax, %eax
+	movl	(VEC_SIZE * 3)(%rdi, %rcx), %edx
+	cmpl	(VEC_SIZE * 3)(%rsi, %rcx), %edx
+	jmp	L(wmemcmp_return)
+# else
+	movzbl	(VEC_SIZE * 3)(%rdi, %rcx), %eax
+	movzbl	(VEC_SIZE * 3)(%rsi, %rcx), %edx
+	sub	%edx, %eax
+# endif
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(first_vec_x1):
+	tzcntl	%eax, %ecx
+# ifdef USE_AS_WMEMCMP
+	xorl	%eax, %eax
+	movl	VEC_SIZE(%rdi, %rcx), %edx
+	cmpl	VEC_SIZE(%rsi, %rcx), %edx
+	jmp	L(wmemcmp_return)
+# else
+	movzbl	VEC_SIZE(%rdi, %rcx), %eax
+	movzbl	VEC_SIZE(%rsi, %rcx), %edx
+	sub	%edx, %eax
+# endif
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(first_vec_x2):
+	tzcntl	%eax, %ecx
+# ifdef USE_AS_WMEMCMP
+	xorl	%eax, %eax
+	movl	(VEC_SIZE * 2)(%rdi, %rcx), %edx
+	cmpl	(VEC_SIZE * 2)(%rsi, %rcx), %edx
+	jmp	L(wmemcmp_return)
+# else
+	movzbl	(VEC_SIZE * 2)(%rdi, %rcx), %eax
+	movzbl	(VEC_SIZE * 2)(%rsi, %rcx), %edx
+	sub	%edx, %eax
+# endif
+	VZEROUPPER
+	ret
+END (MEMCMP)
+#endif
diff --git a/sysdeps/x86_64/multiarch/memcmp-sse2.S b/sysdeps/x86_64/multiarch/memcmp-sse2.S
new file mode 100644
index 0000000000..6058aa751e
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memcmp-sse2.S
@@ -0,0 +1,31 @@
+/* memcmp with SSE2.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc)
+# define memcmp __memcmp_sse2
+
+# ifdef SHARED
+#  undef libc_hidden_builtin_def
+#  define libc_hidden_builtin_def(name)
+# endif
+
+# undef weak_alias
+# define weak_alias(ignored1, ignored2)
+#endif
+
+#include <sysdeps/x86_64/memcmp.S>
diff --git a/sysdeps/x86_64/multiarch/memcmp-sse4.S b/sysdeps/x86_64/multiarch/memcmp-sse4.S
index 786f87282c..8e164f2cb6 100644
--- a/sysdeps/x86_64/multiarch/memcmp-sse4.S
+++ b/sysdeps/x86_64/multiarch/memcmp-sse4.S
@@ -1,5 +1,5 @@
 /* memcmp with SSE4.1, wmemcmp with SSE4.1
-   Copyright (C) 2010-2016 Free Software Foundation, Inc.
+   Copyright (C) 2010-2018 Free Software Foundation, Inc.
    Contributed by Intel Corporation.
    This file is part of the GNU C Library.
 
@@ -31,7 +31,7 @@
   lea		TABLE(%rip), %r11;				\
   movslq	(%r11, INDEX, SCALE), %rcx;			\
   add		%r11, %rcx;					\
-  jmp		*%rcx;						\
+  _CET_NOTRACK jmp *%rcx;					\
   ud2
 
 /* Warning!
diff --git a/sysdeps/x86_64/multiarch/memcmp-ssse3.S b/sysdeps/x86_64/multiarch/memcmp-ssse3.S
index a22f399e02..6f76c64123 100644
--- a/sysdeps/x86_64/multiarch/memcmp-ssse3.S
+++ b/sysdeps/x86_64/multiarch/memcmp-ssse3.S
@@ -1,5 +1,5 @@
 /* memcmp with SSSE3, wmemcmp with SSSE3
-   Copyright (C) 2011-2016 Free Software Foundation, Inc.
+   Copyright (C) 2011-2018 Free Software Foundation, Inc.
    Contributed by Intel Corporation.
    This file is part of the GNU C Library.
 
diff --git a/sysdeps/x86_64/multiarch/memcmp.S b/sysdeps/x86_64/multiarch/memcmp.S
deleted file mode 100644
index b5a1cc202e..0000000000
--- a/sysdeps/x86_64/multiarch/memcmp.S
+++ /dev/null
@@ -1,67 +0,0 @@
-/* Multiple versions of memcmp
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2010-2016 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in libc. */
-#if IS_IN (libc)
-	.text
-ENTRY(memcmp)
-	.type	memcmp, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-	HAS_CPU_FEATURE (SSSE3)
-	jnz	2f
-	leaq	__memcmp_sse2(%rip), %rax
-	ret
-
-2:	HAS_CPU_FEATURE (SSE4_1)
-	jz	3f
-	leaq	__memcmp_sse4_1(%rip), %rax
-	ret
-
-3:	leaq	__memcmp_ssse3(%rip), %rax
-	ret
-
-END(memcmp)
-
-# undef ENTRY
-# define ENTRY(name) \
-	.type __memcmp_sse2, @function; \
-	.p2align 4; \
-	.globl __memcmp_sse2; \
-	.hidden __memcmp_sse2; \
-	__memcmp_sse2: cfi_startproc; \
-	CALL_MCOUNT
-# undef END
-# define END(name) \
-	cfi_endproc; .size __memcmp_sse2, .-__memcmp_sse2
-
-# ifdef SHARED
-#  undef libc_hidden_builtin_def
-/* IFUNC doesn't work with the hidden functions in shared library since
-   they will be called without setting up EBX needed for PLT which is
-   used by IFUNC.  */
-#  define libc_hidden_builtin_def(name) \
-	.globl __GI_memcmp; __GI_memcmp = __memcmp_sse2
-# endif
-#endif
-
-#include "../memcmp.S"
diff --git a/sysdeps/x86_64/multiarch/strspn.S b/sysdeps/x86_64/multiarch/memcmp.c
index 4942826b24..6f3ca43128 100644
--- a/sysdeps/x86_64/multiarch/strspn.S
+++ b/sysdeps/x86_64/multiarch/memcmp.c
@@ -1,7 +1,6 @@
-/* Multiple versions of strspn
+/* Multiple versions of memcmp.
    All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2009-2016 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -18,33 +17,21 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include <config.h>
-#include <sysdep.h>
-#include <init-arch.h>
-
 /* Define multiple versions only for the definition in libc.  */
 #if IS_IN (libc)
-	.text
-ENTRY(strspn)
-	.type	strspn, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-	leaq	__strspn_sse2(%rip), %rax
-	HAS_CPU_FEATURE (SSE4_2)
-	jz	2f
-	leaq	__strspn_sse42(%rip), %rax
-2:	ret
-END(strspn)
+# define memcmp __redirect_memcmp
+# include <string.h>
+# undef memcmp
 
-# undef ENTRY
-# define ENTRY(name) \
-	.type __strspn_sse2, @function; \
-	.globl __strspn_sse2; \
-	.align 16; \
-	__strspn_sse2: cfi_startproc; \
-	CALL_MCOUNT
-# undef END
-# define END(name) \
-	cfi_endproc; .size __strspn_sse2, .-__strspn_sse2
-#endif
+# define SYMBOL_NAME memcmp
+# include "ifunc-memcmp.h"
 
-#include "../strspn.S"
+libc_ifunc_redirected (__redirect_memcmp, memcmp, IFUNC_SELECTOR ());
+# undef bcmp
+weak_alias (memcmp, bcmp)
+
+# ifdef SHARED
+__hidden_ver1 (memcmp, __GI_memcmp, __redirect_memcmp)
+  __attribute__ ((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/memcpy-avx-unaligned.S b/sysdeps/x86_64/multiarch/memcpy-avx-unaligned.S
deleted file mode 100644
index 74fed186e9..0000000000
--- a/sysdeps/x86_64/multiarch/memcpy-avx-unaligned.S
+++ /dev/null
@@ -1,376 +0,0 @@
-/* memcpy with AVX
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-
-#if IS_IN (libc) \
-    && (defined SHARED \
-        || defined USE_AS_MEMMOVE \
-	|| !defined USE_MULTIARCH)
-
-#include "asm-syntax.h"
-#ifndef MEMCPY
-# define MEMCPY	__memcpy_avx_unaligned
-# define MEMCPY_CHK	__memcpy_chk_avx_unaligned
-#endif
-
-	.section .text.avx,"ax",@progbits
-#if !defined USE_AS_BCOPY
-ENTRY (MEMCPY_CHK)
-	cmpq	%rdx, %rcx
-	jb	HIDDEN_JUMPTARGET (__chk_fail)
-END (MEMCPY_CHK)
-#endif
-
-ENTRY (MEMCPY)
-	mov	%rdi, %rax
-#ifdef USE_AS_MEMPCPY
-	add	%rdx, %rax
-#endif
-	cmp	$256, %rdx
-	jae	L(256bytesormore)
-	cmp	$16, %dl
-	jb	L(less_16bytes)
-	cmp	$128, %dl
-	jb	L(less_128bytes)
-	vmovdqu (%rsi), %xmm0
-	lea	(%rsi, %rdx), %rcx
-	vmovdqu 0x10(%rsi), %xmm1
-	vmovdqu 0x20(%rsi), %xmm2
-	vmovdqu 0x30(%rsi), %xmm3
-	vmovdqu 0x40(%rsi), %xmm4
-	vmovdqu 0x50(%rsi), %xmm5
-	vmovdqu 0x60(%rsi), %xmm6
-	vmovdqu 0x70(%rsi), %xmm7
-	vmovdqu -0x80(%rcx), %xmm8
-	vmovdqu -0x70(%rcx), %xmm9
-	vmovdqu -0x60(%rcx), %xmm10
-	vmovdqu -0x50(%rcx), %xmm11
-	vmovdqu -0x40(%rcx), %xmm12
-	vmovdqu -0x30(%rcx), %xmm13
-	vmovdqu -0x20(%rcx), %xmm14
-	vmovdqu -0x10(%rcx), %xmm15
-	lea	(%rdi, %rdx), %rdx
-	vmovdqu %xmm0, (%rdi)
-	vmovdqu %xmm1, 0x10(%rdi)
-	vmovdqu %xmm2, 0x20(%rdi)
-	vmovdqu %xmm3, 0x30(%rdi)
-	vmovdqu %xmm4, 0x40(%rdi)
-	vmovdqu %xmm5, 0x50(%rdi)
-	vmovdqu %xmm6, 0x60(%rdi)
-	vmovdqu %xmm7, 0x70(%rdi)
-	vmovdqu %xmm8, -0x80(%rdx)
-	vmovdqu %xmm9, -0x70(%rdx)
-	vmovdqu %xmm10, -0x60(%rdx)
-	vmovdqu %xmm11, -0x50(%rdx)
-	vmovdqu %xmm12, -0x40(%rdx)
-	vmovdqu %xmm13, -0x30(%rdx)
-	vmovdqu %xmm14, -0x20(%rdx)
-	vmovdqu %xmm15, -0x10(%rdx)
-	ret
-	.p2align 4
-L(less_128bytes):
-	cmp	$64, %dl
-	jb	L(less_64bytes)
-	vmovdqu (%rsi), %xmm0
-	lea	(%rsi, %rdx), %rcx
-	vmovdqu 0x10(%rsi), %xmm1
-	vmovdqu 0x20(%rsi), %xmm2
-	lea	(%rdi, %rdx), %rdx
-	vmovdqu 0x30(%rsi), %xmm3
-	vmovdqu -0x40(%rcx), %xmm4
-	vmovdqu -0x30(%rcx), %xmm5
-	vmovdqu -0x20(%rcx), %xmm6
-	vmovdqu -0x10(%rcx), %xmm7
-	vmovdqu %xmm0, (%rdi)
-	vmovdqu %xmm1, 0x10(%rdi)
-	vmovdqu %xmm2, 0x20(%rdi)
-	vmovdqu %xmm3, 0x30(%rdi)
-	vmovdqu %xmm4, -0x40(%rdx)
-	vmovdqu %xmm5, -0x30(%rdx)
-	vmovdqu %xmm6, -0x20(%rdx)
-	vmovdqu %xmm7, -0x10(%rdx)
-	ret
-
-	.p2align 4
-L(less_64bytes):
-	cmp	$32, %dl
-	jb	L(less_32bytes)
-	vmovdqu (%rsi), %xmm0
-	vmovdqu 0x10(%rsi), %xmm1
-	vmovdqu -0x20(%rsi, %rdx), %xmm6
-	vmovdqu -0x10(%rsi, %rdx), %xmm7
-	vmovdqu %xmm0, (%rdi)
-	vmovdqu %xmm1, 0x10(%rdi)
-	vmovdqu %xmm6, -0x20(%rdi, %rdx)
-	vmovdqu %xmm7, -0x10(%rdi, %rdx)
-	ret
-
-	.p2align 4
-L(less_32bytes):
-	vmovdqu (%rsi), %xmm0
-	vmovdqu -0x10(%rsi, %rdx), %xmm7
-	vmovdqu %xmm0, (%rdi)
-	vmovdqu %xmm7, -0x10(%rdi, %rdx)
-	ret
-
-	.p2align 4
-L(less_16bytes):
-	cmp	$8, %dl
-	jb	L(less_8bytes)
-	movq -0x08(%rsi, %rdx),	%rcx
-	movq (%rsi),	%rsi
-	movq %rsi, (%rdi)
-	movq %rcx, -0x08(%rdi, %rdx)
-	ret
-
-	.p2align 4
-L(less_8bytes):
-	cmp	$4, %dl
-	jb	L(less_4bytes)
-	mov -0x04(%rsi, %rdx), %ecx
-	mov (%rsi),	%esi
-	mov %esi, (%rdi)
-	mov %ecx, -0x04(%rdi, %rdx)
-	ret
-
-L(less_4bytes):
-	cmp	$1, %dl
-	jbe	L(less_2bytes)
-	mov -0x02(%rsi, %rdx),	%cx
-	mov (%rsi),	%si
-	mov %si, (%rdi)
-	mov %cx, -0x02(%rdi, %rdx)
-	ret
-
-L(less_2bytes):
-	jb	L(less_0bytes)
-	mov	(%rsi), %cl
-	mov	%cl,	(%rdi)
-L(less_0bytes):
-	ret
-
-	.p2align 4
-L(256bytesormore):
-#ifdef USE_AS_MEMMOVE
-	mov	%rdi, %rcx
-	sub	%rsi, %rcx
-	cmp	%rdx, %rcx
-	jc	L(copy_backward)
-#endif
-	cmp	$2048, %rdx
-	jae	L(gobble_data_movsb)
-	mov	%rax, %r8
-	lea	(%rsi, %rdx), %rcx
-	mov	%rdi, %r10
-	vmovdqu -0x80(%rcx), %xmm5
-	vmovdqu -0x70(%rcx), %xmm6
-	mov	$0x80, %rax
-	and	$-32, %rdi
-	add	$32, %rdi
-	vmovdqu -0x60(%rcx), %xmm7
-	vmovdqu -0x50(%rcx), %xmm8
-	mov	%rdi, %r11
-	sub	%r10, %r11
-	vmovdqu -0x40(%rcx), %xmm9
-	vmovdqu -0x30(%rcx), %xmm10
-	sub	%r11, %rdx
-	vmovdqu -0x20(%rcx), %xmm11
-	vmovdqu -0x10(%rcx), %xmm12
-	vmovdqu	(%rsi), %ymm4
-	add	%r11, %rsi
-	sub	%eax, %edx
-L(goble_128_loop):
-	vmovdqu (%rsi), %ymm0
-	vmovdqu 0x20(%rsi), %ymm1
-	vmovdqu 0x40(%rsi), %ymm2
-	vmovdqu 0x60(%rsi), %ymm3
-	add	%rax, %rsi
-	vmovdqa %ymm0, (%rdi)
-	vmovdqa %ymm1, 0x20(%rdi)
-	vmovdqa %ymm2, 0x40(%rdi)
-	vmovdqa %ymm3, 0x60(%rdi)
-	add	%rax, %rdi
-	sub	%eax, %edx
-	jae	L(goble_128_loop)
-	add	%eax, %edx
-	add	%rdi, %rdx
-	vmovdqu	%ymm4, (%r10)
-	vzeroupper
-	vmovdqu %xmm5, -0x80(%rdx)
-	vmovdqu %xmm6, -0x70(%rdx)
-	vmovdqu %xmm7, -0x60(%rdx)
-	vmovdqu %xmm8, -0x50(%rdx)
-	vmovdqu %xmm9, -0x40(%rdx)
-	vmovdqu %xmm10, -0x30(%rdx)
-	vmovdqu %xmm11, -0x20(%rdx)
-	vmovdqu %xmm12, -0x10(%rdx)
-	mov	%r8, %rax
-	ret
-
-	.p2align 4
-L(gobble_data_movsb):
-#ifdef SHARED_CACHE_SIZE_HALF
-	mov	$SHARED_CACHE_SIZE_HALF, %rcx
-#else
-	mov	__x86_shared_cache_size_half(%rip), %rcx
-#endif
-	shl	$3, %rcx
-	cmp	%rcx, %rdx
-	jae	L(gobble_big_data_fwd)
-	mov	%rdx, %rcx
-	mov	%rdx, %rcx
-	rep	movsb
-	ret
-
-	.p2align 4
-L(gobble_big_data_fwd):
-	lea	(%rsi, %rdx), %rcx
-	vmovdqu	(%rsi), %ymm4
-	vmovdqu -0x80(%rsi,%rdx), %xmm5
-	vmovdqu -0x70(%rcx), %xmm6
-	vmovdqu -0x60(%rcx), %xmm7
-	vmovdqu -0x50(%rcx), %xmm8
-	vmovdqu -0x40(%rcx), %xmm9
-	vmovdqu -0x30(%rcx), %xmm10
-	vmovdqu -0x20(%rcx), %xmm11
-	vmovdqu -0x10(%rcx), %xmm12
-	mov	%rdi, %r8
-	and	$-32, %rdi
-	add	$32, %rdi
-	mov	%rdi, %r10
-	sub	%r8, %r10
-	sub	%r10, %rdx
-	add	%r10, %rsi
-	lea	(%rdi, %rdx), %rcx
-	add	$-0x80, %rdx
-L(gobble_mem_fwd_loop):
-	prefetchnta 0x1c0(%rsi)
-	prefetchnta 0x280(%rsi)
-	vmovdqu	(%rsi), %ymm0
-	vmovdqu	0x20(%rsi), %ymm1
-	vmovdqu	0x40(%rsi), %ymm2
-	vmovdqu	0x60(%rsi), %ymm3
-	sub	$-0x80, %rsi
-	vmovntdq	%ymm0, (%rdi)
-	vmovntdq	%ymm1, 0x20(%rdi)
-	vmovntdq	%ymm2, 0x40(%rdi)
-	vmovntdq	%ymm3, 0x60(%rdi)
-	sub	$-0x80, %rdi
-	add	$-0x80, %rdx
-	jb	L(gobble_mem_fwd_loop)
-	sfence
-	vmovdqu	%ymm4, (%r8)
-	vzeroupper
-	vmovdqu %xmm5, -0x80(%rcx)
-	vmovdqu %xmm6, -0x70(%rcx)
-	vmovdqu %xmm7, -0x60(%rcx)
-	vmovdqu %xmm8, -0x50(%rcx)
-	vmovdqu %xmm9, -0x40(%rcx)
-	vmovdqu %xmm10, -0x30(%rcx)
-	vmovdqu %xmm11, -0x20(%rcx)
-	vmovdqu %xmm12, -0x10(%rcx)
-	ret
-
-#ifdef USE_AS_MEMMOVE
-	.p2align 4
-L(copy_backward):
-#ifdef SHARED_CACHE_SIZE_HALF
-	mov	$SHARED_CACHE_SIZE_HALF, %rcx
-#else
-	mov	__x86_shared_cache_size_half(%rip), %rcx
-#endif
-	shl	$3, %rcx
-	vmovdqu (%rsi), %xmm5
-	vmovdqu 0x10(%rsi), %xmm6
-	add	%rdx, %rdi
-	vmovdqu 0x20(%rsi), %xmm7
-	vmovdqu 0x30(%rsi), %xmm8
-	lea	-0x20(%rdi), %r10
-	mov %rdi, %r11
-	vmovdqu 0x40(%rsi), %xmm9
-	vmovdqu 0x50(%rsi), %xmm10
-	and	$0x1f, %r11
-	vmovdqu 0x60(%rsi), %xmm11
-	vmovdqu 0x70(%rsi), %xmm12
-	xor	%r11, %rdi
-	add	%rdx, %rsi
-	vmovdqu	-0x20(%rsi), %ymm4
-	sub	%r11, %rsi
-	sub	%r11, %rdx
-	cmp	%rcx, %rdx
-	ja	L(gobble_big_data_bwd)
-	add	$-0x80, %rdx
-L(gobble_mem_bwd_llc):
-	vmovdqu	-0x20(%rsi), %ymm0
-	vmovdqu	-0x40(%rsi), %ymm1
-	vmovdqu	-0x60(%rsi), %ymm2
-	vmovdqu	-0x80(%rsi), %ymm3
-	lea	-0x80(%rsi), %rsi
-	vmovdqa	%ymm0, -0x20(%rdi)
-	vmovdqa	%ymm1, -0x40(%rdi)
-	vmovdqa	%ymm2, -0x60(%rdi)
-	vmovdqa	%ymm3, -0x80(%rdi)
-	lea	-0x80(%rdi), %rdi
-	add	$-0x80, %rdx
-	jb	L(gobble_mem_bwd_llc)
-	vmovdqu	%ymm4, (%r10)
-	vzeroupper
-	vmovdqu %xmm5, (%rax)
-	vmovdqu %xmm6, 0x10(%rax)
-	vmovdqu %xmm7, 0x20(%rax)
-	vmovdqu %xmm8, 0x30(%rax)
-	vmovdqu %xmm9, 0x40(%rax)
-	vmovdqu %xmm10, 0x50(%rax)
-	vmovdqu %xmm11, 0x60(%rax)
-	vmovdqu %xmm12, 0x70(%rax)
-	ret
-
-	.p2align 4
-L(gobble_big_data_bwd):
-	add	$-0x80, %rdx
-L(gobble_mem_bwd_loop):
-	prefetchnta -0x1c0(%rsi)
-	prefetchnta -0x280(%rsi)
-	vmovdqu	-0x20(%rsi), %ymm0
-	vmovdqu	-0x40(%rsi), %ymm1
-	vmovdqu	-0x60(%rsi), %ymm2
-	vmovdqu	-0x80(%rsi), %ymm3
-	lea	-0x80(%rsi), %rsi
-	vmovntdq	%ymm0, -0x20(%rdi)
-	vmovntdq	%ymm1, -0x40(%rdi)
-	vmovntdq	%ymm2, -0x60(%rdi)
-	vmovntdq	%ymm3, -0x80(%rdi)
-	lea	-0x80(%rdi), %rdi
-	add	$-0x80, %rdx
-	jb	L(gobble_mem_bwd_loop)
-	sfence
-	vmovdqu	%ymm4, (%r10)
-	vzeroupper
-	vmovdqu %xmm5, (%rax)
-	vmovdqu %xmm6, 0x10(%rax)
-	vmovdqu %xmm7, 0x20(%rax)
-	vmovdqu %xmm8, 0x30(%rax)
-	vmovdqu %xmm9, 0x40(%rax)
-	vmovdqu %xmm10, 0x50(%rax)
-	vmovdqu %xmm11, 0x60(%rax)
-	vmovdqu %xmm12, 0x70(%rax)
-	ret
-#endif
-END (MEMCPY)
-#endif
diff --git a/sysdeps/x86_64/multiarch/memcpy-avx512-no-vzeroupper.S b/sysdeps/x86_64/multiarch/memcpy-avx512-no-vzeroupper.S
deleted file mode 100644
index 1bb12e81b0..0000000000
--- a/sysdeps/x86_64/multiarch/memcpy-avx512-no-vzeroupper.S
+++ /dev/null
@@ -1,408 +0,0 @@
-/* memcpy optimized with AVX512 for KNL hardware.
-   Copyright (C) 2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-
-#if defined HAVE_AVX512_ASM_SUPPORT && IS_IN (libc) \
-    && (defined SHARED \
-	|| defined USE_AS_MEMMOVE \
-	|| !defined USE_MULTIARCH)
-
-#include "asm-syntax.h"
-#ifndef MEMCPY
-# define MEMCPY		__memcpy_avx512_no_vzeroupper
-# define MEMCPY_CHK	__memcpy_chk_avx512_no_vzeroupper
-#endif
-
-	.section .text,"ax",@progbits
-#if !defined USE_AS_BCOPY
-ENTRY (MEMCPY_CHK)
-	cmpq	%rdx, %rcx
-	jb	HIDDEN_JUMPTARGET (__chk_fail)
-END (MEMCPY_CHK)
-#endif
-
-ENTRY (MEMCPY)
-	mov	%rdi, %rax
-#ifdef USE_AS_MEMPCPY
-	add	%rdx, %rax
-#endif
-	lea	(%rsi, %rdx), %rcx
-	lea	(%rdi, %rdx), %r9
-	cmp	$512, %rdx
-	ja	L(512bytesormore)
-
-L(check):
-	cmp	$16, %rdx
-	jbe	L(less_16bytes)
-	cmp	$256, %rdx
-	jb	L(less_256bytes)
-	vmovups	(%rsi), %zmm0
-	vmovups 0x40(%rsi), %zmm1
-	vmovups 0x80(%rsi), %zmm2
-	vmovups 0xC0(%rsi), %zmm3
-	vmovups	-0x100(%rcx), %zmm4
-	vmovups -0xC0(%rcx), %zmm5
-	vmovups -0x80(%rcx), %zmm6
-	vmovups -0x40(%rcx), %zmm7
-	vmovups %zmm0, (%rdi)
-	vmovups %zmm1, 0x40(%rdi)
-	vmovups %zmm2, 0x80(%rdi)
-	vmovups %zmm3, 0xC0(%rdi)
-	vmovups	%zmm4, -0x100(%r9)
-	vmovups %zmm5, -0xC0(%r9)
-	vmovups %zmm6, -0x80(%r9)
-	vmovups %zmm7, -0x40(%r9)
-	ret
-
-L(less_256bytes):
-	cmp	$128, %dl
-	jb	L(less_128bytes)
-	vmovups	(%rsi), %zmm0
-	vmovups 0x40(%rsi), %zmm1
-	vmovups -0x80(%rcx), %zmm2
-	vmovups -0x40(%rcx), %zmm3
-	vmovups	%zmm0, (%rdi)
-	vmovups %zmm1, 0x40(%rdi)
-	vmovups %zmm2, -0x80(%r9)
-	vmovups %zmm3, -0x40(%r9)
-	ret
-
-L(less_128bytes):
-	cmp	$64, %dl
-	jb	L(less_64bytes)
-	vmovdqu (%rsi), %ymm0
-	vmovdqu 0x20(%rsi), %ymm1
-	vmovdqu -0x40(%rcx), %ymm2
-	vmovdqu -0x20(%rcx), %ymm3
-	vmovdqu %ymm0, (%rdi)
-	vmovdqu %ymm1, 0x20(%rdi)
-	vmovdqu %ymm2, -0x40(%r9)
-	vmovdqu %ymm3, -0x20(%r9)
-	ret
-
-L(less_64bytes):
-	cmp	$32, %dl
-	jb	L(less_32bytes)
-	vmovdqu	(%rsi), %ymm0
-	vmovdqu -0x20(%rcx), %ymm1
-	vmovdqu	%ymm0, (%rdi)
-	vmovdqu	%ymm1, -0x20(%r9)
-	ret
-
-L(less_32bytes):
-	vmovdqu (%rsi), %xmm0
-	vmovdqu -0x10(%rcx), %xmm1
-	vmovdqu %xmm0, (%rdi)
-	vmovdqu %xmm1, -0x10(%r9)
-	ret
-
-L(less_16bytes):
-	cmp	$8, %dl
-	jb	L(less_8bytes)
-	movq	(%rsi), %rsi
-	movq	-0x8(%rcx), %rcx
-	movq	%rsi, (%rdi)
-	movq	%rcx, -0x8(%r9)
-	ret
-
-L(less_8bytes):
-	cmp	$4, %dl
-	jb	L(less_4bytes)
-	mov	(%rsi), %esi
-	mov	-0x4(%rcx), %ecx
-	mov	%esi, (%rdi)
-	mov	%ecx, -0x4(%r9)
-	ret
-
-L(less_4bytes):
-	cmp	$2, %dl
-	jb	L(less_2bytes)
-	mov	(%rsi), %si
-	mov	-0x2(%rcx), %cx
-	mov	%si, (%rdi)
-	mov	%cx, -0x2(%r9)
-	ret
-
-L(less_2bytes):
-	cmp	$1, %dl
-	jb	L(less_1bytes)
-	mov	(%rsi), %cl
-	mov	%cl, (%rdi)
-L(less_1bytes):
-	ret
-
-L(512bytesormore):
-#ifdef SHARED_CACHE_SIZE_HALF
-	mov	$SHARED_CACHE_SIZE_HALF, %r8
-#else
-	mov	__x86_shared_cache_size_half(%rip), %r8
-#endif
-	cmp	%r8, %rdx
-	jae	L(preloop_large)
-	cmp	$1024, %rdx
-	ja	L(1024bytesormore)
-	prefetcht1 (%rsi)
-	prefetcht1 0x40(%rsi)
-	prefetcht1 0x80(%rsi)
-	prefetcht1 0xC0(%rsi)
-	prefetcht1 0x100(%rsi)
-	prefetcht1 0x140(%rsi)
-	prefetcht1 0x180(%rsi)
-	prefetcht1 0x1C0(%rsi)
-	prefetcht1 -0x200(%rcx)
-	prefetcht1 -0x1C0(%rcx)
-	prefetcht1 -0x180(%rcx)
-	prefetcht1 -0x140(%rcx)
-	prefetcht1 -0x100(%rcx)
-	prefetcht1 -0xC0(%rcx)
-	prefetcht1 -0x80(%rcx)
-	prefetcht1 -0x40(%rcx)
-	vmovups	(%rsi), %zmm0
-	vmovups 0x40(%rsi), %zmm1
-	vmovups 0x80(%rsi), %zmm2
-	vmovups 0xC0(%rsi), %zmm3
-	vmovups	0x100(%rsi), %zmm4
-	vmovups 0x140(%rsi), %zmm5
-	vmovups 0x180(%rsi), %zmm6
-	vmovups 0x1C0(%rsi), %zmm7
-	vmovups	-0x200(%rcx), %zmm8
-	vmovups -0x1C0(%rcx), %zmm9
-	vmovups -0x180(%rcx), %zmm10
-	vmovups -0x140(%rcx), %zmm11
-	vmovups	-0x100(%rcx), %zmm12
-	vmovups -0xC0(%rcx), %zmm13
-	vmovups -0x80(%rcx), %zmm14
-	vmovups -0x40(%rcx), %zmm15
-	vmovups %zmm0, (%rdi)
-	vmovups %zmm1, 0x40(%rdi)
-	vmovups %zmm2, 0x80(%rdi)
-	vmovups %zmm3, 0xC0(%rdi)
-	vmovups %zmm4, 0x100(%rdi)
-	vmovups %zmm5, 0x140(%rdi)
-	vmovups %zmm6, 0x180(%rdi)
-	vmovups %zmm7, 0x1C0(%rdi)
-	vmovups	%zmm8, -0x200(%r9)
-	vmovups %zmm9, -0x1C0(%r9)
-	vmovups %zmm10, -0x180(%r9)
-	vmovups %zmm11, -0x140(%r9)
-	vmovups	%zmm12, -0x100(%r9)
-	vmovups %zmm13, -0xC0(%r9)
-	vmovups %zmm14, -0x80(%r9)
-	vmovups %zmm15, -0x40(%r9)
-	ret
-
-L(1024bytesormore):
-	cmp	%rsi, %rdi
-	ja	L(1024bytesormore_bkw)
-	sub	$512, %r9
-	vmovups -0x200(%rcx), %zmm8
-	vmovups -0x1C0(%rcx), %zmm9
-	vmovups -0x180(%rcx), %zmm10
-	vmovups -0x140(%rcx), %zmm11
-	vmovups	-0x100(%rcx), %zmm12
-	vmovups -0xC0(%rcx), %zmm13
-	vmovups -0x80(%rcx), %zmm14
-	vmovups -0x40(%rcx), %zmm15
-	prefetcht1 (%rsi)
-	prefetcht1 0x40(%rsi)
-	prefetcht1 0x80(%rsi)
-	prefetcht1 0xC0(%rsi)
-	prefetcht1 0x100(%rsi)
-	prefetcht1 0x140(%rsi)
-	prefetcht1 0x180(%rsi)
-	prefetcht1 0x1C0(%rsi)
-
-/* Loop with unaligned memory access.  */
-L(gobble_512bytes_loop):
-	vmovups	(%rsi), %zmm0
-	vmovups 0x40(%rsi), %zmm1
-	vmovups 0x80(%rsi), %zmm2
-	vmovups 0xC0(%rsi), %zmm3
-	vmovups	0x100(%rsi), %zmm4
-	vmovups 0x140(%rsi), %zmm5
-	vmovups 0x180(%rsi), %zmm6
-	vmovups 0x1C0(%rsi), %zmm7
-	add	$512, %rsi
-	prefetcht1 (%rsi)
-	prefetcht1 0x40(%rsi)
-	prefetcht1 0x80(%rsi)
-	prefetcht1 0xC0(%rsi)
-	prefetcht1 0x100(%rsi)
-	prefetcht1 0x140(%rsi)
-	prefetcht1 0x180(%rsi)
-	prefetcht1 0x1C0(%rsi)
-	vmovups	%zmm0, (%rdi)
-	vmovups %zmm1, 0x40(%rdi)
-	vmovups %zmm2, 0x80(%rdi)
-	vmovups %zmm3, 0xC0(%rdi)
-	vmovups	%zmm4, 0x100(%rdi)
-	vmovups %zmm5, 0x140(%rdi)
-	vmovups %zmm6, 0x180(%rdi)
-	vmovups %zmm7, 0x1C0(%rdi)
-	add	$512, %rdi
-	cmp	%r9, %rdi
-	jb	L(gobble_512bytes_loop)
-	vmovups %zmm8, (%r9)
-	vmovups %zmm9, 0x40(%r9)
-	vmovups %zmm10, 0x80(%r9)
-	vmovups %zmm11, 0xC0(%r9)
-	vmovups %zmm12, 0x100(%r9)
-	vmovups %zmm13, 0x140(%r9)
-	vmovups %zmm14, 0x180(%r9)
-	vmovups %zmm15, 0x1C0(%r9)
-	ret
-
-L(1024bytesormore_bkw):
-	add	$512, %rdi
-	vmovups	0x1C0(%rsi), %zmm8
-	vmovups 0x180(%rsi), %zmm9
-	vmovups 0x140(%rsi), %zmm10
-	vmovups 0x100(%rsi), %zmm11
-	vmovups	0xC0(%rsi), %zmm12
-	vmovups 0x80(%rsi), %zmm13
-	vmovups 0x40(%rsi), %zmm14
-	vmovups (%rsi), %zmm15
-	prefetcht1 -0x40(%rcx)
-	prefetcht1 -0x80(%rcx)
-	prefetcht1 -0xC0(%rcx)
-	prefetcht1 -0x100(%rcx)
-	prefetcht1 -0x140(%rcx)
-	prefetcht1 -0x180(%rcx)
-	prefetcht1 -0x1C0(%rcx)
-	prefetcht1 -0x200(%rcx)
-
-/* Backward loop with unaligned memory access.  */
-L(gobble_512bytes_loop_bkw):
-	vmovups -0x40(%rcx), %zmm0
-	vmovups -0x80(%rcx), %zmm1
-	vmovups -0xC0(%rcx), %zmm2
-	vmovups	-0x100(%rcx), %zmm3
-	vmovups -0x140(%rcx), %zmm4
-	vmovups -0x180(%rcx), %zmm5
-	vmovups -0x1C0(%rcx), %zmm6
-	vmovups	-0x200(%rcx), %zmm7
-	sub	$512, %rcx
-	prefetcht1 -0x40(%rcx)
-	prefetcht1 -0x80(%rcx)
-	prefetcht1 -0xC0(%rcx)
-	prefetcht1 -0x100(%rcx)
-	prefetcht1 -0x140(%rcx)
-	prefetcht1 -0x180(%rcx)
-	prefetcht1 -0x1C0(%rcx)
-	prefetcht1 -0x200(%rcx)
-	vmovups %zmm0, -0x40(%r9)
-	vmovups %zmm1, -0x80(%r9)
-	vmovups %zmm2, -0xC0(%r9)
-	vmovups	%zmm3, -0x100(%r9)
-	vmovups %zmm4, -0x140(%r9)
-	vmovups %zmm5, -0x180(%r9)
-	vmovups %zmm6, -0x1C0(%r9)
-	vmovups	%zmm7, -0x200(%r9)
-	sub	$512, %r9
-	cmp	%rdi, %r9
-	ja	L(gobble_512bytes_loop_bkw)
-	vmovups %zmm8, -0x40(%rdi)
-	vmovups %zmm9, -0x80(%rdi)
-	vmovups %zmm10, -0xC0(%rdi)
-	vmovups %zmm11, -0x100(%rdi)
-	vmovups %zmm12, -0x140(%rdi)
-	vmovups %zmm13, -0x180(%rdi)
-	vmovups %zmm14, -0x1C0(%rdi)
-	vmovups %zmm15, -0x200(%rdi)
-	ret
-
-L(preloop_large):
-	cmp	%rsi, %rdi
-	ja	L(preloop_large_bkw)
-	vmovups	(%rsi), %zmm4
-	vmovups	0x40(%rsi), %zmm5
-
-/* Align destination for access with non-temporal stores in the loop.  */
-	mov	%rdi, %r8
-	and	$-0x80, %rdi
-	add	$0x80, %rdi
-	sub	%rdi, %r8
-	sub	%r8, %rsi
-	add	%r8, %rdx
-L(gobble_256bytes_nt_loop):
-	prefetcht1 0x200(%rsi)
-	prefetcht1 0x240(%rsi)
-	prefetcht1 0x280(%rsi)
-	prefetcht1 0x2C0(%rsi)
-	prefetcht1 0x300(%rsi)
-	prefetcht1 0x340(%rsi)
-	prefetcht1 0x380(%rsi)
-	prefetcht1 0x3C0(%rsi)
-	vmovdqu64 (%rsi), %zmm0
-	vmovdqu64 0x40(%rsi), %zmm1
-	vmovdqu64 0x80(%rsi), %zmm2
-	vmovdqu64 0xC0(%rsi), %zmm3
-	vmovntdq %zmm0, (%rdi)
-	vmovntdq %zmm1, 0x40(%rdi)
-	vmovntdq %zmm2, 0x80(%rdi)
-	vmovntdq %zmm3, 0xC0(%rdi)
-	sub	$256, %rdx
-	add	$256, %rsi
-	add	$256, %rdi
-	cmp	$256, %rdx
-	ja	L(gobble_256bytes_nt_loop)
-	sfence
-	vmovups	%zmm4, (%rax)
-	vmovups	%zmm5, 0x40(%rax)
-	jmp	L(check)
-
-L(preloop_large_bkw):
-	vmovups -0x80(%rcx), %zmm4
-	vmovups -0x40(%rcx), %zmm5
-
-/* Align end of destination for access with non-temporal stores.  */
-	mov	%r9, %r8
-	and	$-0x80, %r9
-	sub	%r9, %r8
-	sub	%r8, %rcx
-	sub	%r8, %rdx
-	add	%r9, %r8
-L(gobble_256bytes_nt_loop_bkw):
-	prefetcht1 -0x400(%rcx)
-	prefetcht1 -0x3C0(%rcx)
-	prefetcht1 -0x380(%rcx)
-	prefetcht1 -0x340(%rcx)
-	prefetcht1 -0x300(%rcx)
-	prefetcht1 -0x2C0(%rcx)
-	prefetcht1 -0x280(%rcx)
-	prefetcht1 -0x240(%rcx)
-	vmovdqu64 -0x100(%rcx), %zmm0
-	vmovdqu64 -0xC0(%rcx), %zmm1
-	vmovdqu64 -0x80(%rcx), %zmm2
-	vmovdqu64 -0x40(%rcx), %zmm3
-	vmovntdq %zmm0,	-0x100(%r9)
-	vmovntdq %zmm1,	-0xC0(%r9)
-	vmovntdq %zmm2,	-0x80(%r9)
-	vmovntdq %zmm3,	-0x40(%r9)
-	sub	$256, %rdx
-	sub	$256, %rcx
-	sub	$256, %r9
-	cmp	$256, %rdx
-	ja	L(gobble_256bytes_nt_loop_bkw)
-	sfence
-	vmovups	%zmm4, -0x80(%r8)
-	vmovups	%zmm5, -0x40(%r8)
-	jmp	L(check)
-END (MEMCPY)
-#endif
diff --git a/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S b/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S
deleted file mode 100644
index c4509831fa..0000000000
--- a/sysdeps/x86_64/multiarch/memcpy-sse2-unaligned.S
+++ /dev/null
@@ -1,175 +0,0 @@
-/* memcpy with unaliged loads
-   Copyright (C) 2013-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#if IS_IN (libc)
-
-#include <sysdep.h>
-
-#include "asm-syntax.h"
-
-
-ENTRY(__memcpy_sse2_unaligned)
-	movq	%rsi, %rax
-	leaq	(%rdx,%rdx), %rcx
-	subq	%rdi, %rax
-	subq	%rdx, %rax
-	cmpq	%rcx, %rax
-	jb	L(overlapping)
-	cmpq	$16, %rdx
-	jbe	L(less_16)
-	movdqu	(%rsi), %xmm8
-	cmpq	$32, %rdx
-	movdqu	%xmm8, (%rdi)
-	movdqu	-16(%rsi,%rdx), %xmm8
-	movdqu	%xmm8, -16(%rdi,%rdx)
-	ja	.L31
-L(return):
-	movq	%rdi, %rax
-	ret
-	.p2align 4,,10
-	.p2align 4
-.L31:
-	movdqu	16(%rsi), %xmm8
-	cmpq	$64, %rdx
-	movdqu	%xmm8, 16(%rdi)
-	movdqu	-32(%rsi,%rdx), %xmm8
-	movdqu	%xmm8, -32(%rdi,%rdx)
-	jbe	L(return)
-	movdqu	32(%rsi), %xmm8
-	cmpq	$128, %rdx
-	movdqu	%xmm8, 32(%rdi)
-	movdqu	-48(%rsi,%rdx), %xmm8
-	movdqu	%xmm8, -48(%rdi,%rdx)
-	movdqu	48(%rsi), %xmm8
-	movdqu	%xmm8, 48(%rdi)
-	movdqu	-64(%rsi,%rdx), %xmm8
-	movdqu	%xmm8, -64(%rdi,%rdx)
-	jbe	L(return)
-	leaq	64(%rdi), %rcx
-	addq	%rdi, %rdx
-	andq	$-64, %rdx
-	andq	$-64, %rcx
-	movq	%rcx, %rax
-	subq	%rdi, %rax
-	addq	%rax, %rsi
-	cmpq	%rdx, %rcx
-	je	L(return)
-	movq	%rsi, %r10
-	subq	%rcx, %r10
-	leaq	16(%r10), %r9
-	leaq	32(%r10), %r8
-	leaq	48(%r10), %rax
-	.p2align 4,,10
-	.p2align 4
-L(loop):
-	movdqu	(%rcx,%r10), %xmm8
-	movdqa	%xmm8, (%rcx)
-	movdqu	(%rcx,%r9), %xmm8
-	movdqa	%xmm8, 16(%rcx)
-	movdqu	(%rcx,%r8), %xmm8
-	movdqa	%xmm8, 32(%rcx)
-	movdqu	(%rcx,%rax), %xmm8
-	movdqa	%xmm8, 48(%rcx)
-	addq	$64, %rcx
-	cmpq	%rcx, %rdx
-	jne	L(loop)
-	jmp	L(return)
-L(overlapping):
-	cmpq	%rsi, %rdi
-	jae	.L3
-	testq	%rdx, %rdx
-	.p2align 4,,5
-	je	L(return)
-	movq	%rdx, %r9
-	leaq	16(%rsi), %rcx
-	leaq	16(%rdi), %r8
-	shrq	$4, %r9
-	movq	%r9, %rax
-	salq	$4, %rax
-	cmpq	%rcx, %rdi
-	setae	%cl
-	cmpq	%r8, %rsi
-	setae	%r8b
-	orl	%r8d, %ecx
-	cmpq	$15, %rdx
-	seta	%r8b
-	testb	%r8b, %cl
-	je	.L16
-	testq	%rax, %rax
-	je	.L16
-	xorl	%ecx, %ecx
-	xorl	%r8d, %r8d
-.L7:
-	movdqu	(%rsi,%rcx), %xmm8
-	addq	$1, %r8
-	movdqu	%xmm8, (%rdi,%rcx)
-	addq	$16, %rcx
-	cmpq	%r8, %r9
-	ja	.L7
-	cmpq	%rax, %rdx
-	je	L(return)
-.L21:
-	movzbl	(%rsi,%rax), %ecx
-	movb	%cl, (%rdi,%rax)
-	addq	$1, %rax
-	cmpq	%rax, %rdx
-	ja	.L21
-	jmp	L(return)
-L(less_16):
-	testb	$24, %dl
-	jne	L(between_9_16)
-	testb	$4, %dl
-	.p2align 4,,5
-	jne	L(between_5_8)
-	testq	%rdx, %rdx
-	.p2align 4,,2
-	je	L(return)
-	movzbl	(%rsi), %eax
-	testb	$2, %dl
-	movb	%al, (%rdi)
-	je	L(return)
-	movzwl	-2(%rsi,%rdx), %eax
-	movw	%ax, -2(%rdi,%rdx)
-	jmp	L(return)
-.L3:
-	leaq	-1(%rdx), %rax
-	.p2align 4,,10
-	.p2align 4
-.L11:
-	movzbl	(%rsi,%rax), %edx
-	movb	%dl, (%rdi,%rax)
-	subq	$1, %rax
-	jmp	.L11
-L(between_9_16):
-	movq	(%rsi), %rax
-	movq	%rax, (%rdi)
-	movq	-8(%rsi,%rdx), %rax
-	movq	%rax, -8(%rdi,%rdx)
-	jmp	L(return)
-.L16:
-	xorl	%eax, %eax
-	jmp	.L21
-L(between_5_8):
-	movl	(%rsi), %eax
-	movl	%eax, (%rdi)
-	movl	-4(%rsi,%rdx), %eax
-	movl	%eax, -4(%rdi,%rdx)
-	jmp	L(return)
-END(__memcpy_sse2_unaligned)
-
-#endif
diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S b/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
index 08b41e9e5a..3cd1123326 100644
--- a/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
+++ b/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
@@ -1,5 +1,5 @@
 /* memcpy with SSSE3 and REP string
-   Copyright (C) 2010-2016 Free Software Foundation, Inc.
+   Copyright (C) 2010-2018 Free Software Foundation, Inc.
    Contributed by Intel Corporation.
    This file is part of the GNU C Library.
 
@@ -19,16 +19,15 @@
 
 #include <sysdep.h>
 
-#if IS_IN (libc) \
-    && (defined SHARED \
-        || defined USE_AS_MEMMOVE \
-	|| !defined USE_MULTIARCH)
+#if IS_IN (libc)
 
 #include "asm-syntax.h"
 
 #ifndef MEMCPY
 # define MEMCPY		__memcpy_ssse3_back
 # define MEMCPY_CHK	__memcpy_chk_ssse3_back
+# define MEMPCPY	__mempcpy_ssse3_back
+# define MEMPCPY_CHK	__mempcpy_chk_ssse3_back
 #endif
 
 #define JMPTBL(I, B)	I - B
@@ -40,10 +39,23 @@
   lea		TABLE(%rip), %r11;				\
   movslq	(%r11, INDEX, SCALE), INDEX;			\
   lea		(%r11, INDEX), INDEX;				\
-  jmp		*INDEX;						\
+  _CET_NOTRACK jmp *INDEX;					\
   ud2
 
 	.section .text.ssse3,"ax",@progbits
+#if !defined USE_AS_MEMPCPY && !defined USE_AS_MEMMOVE
+ENTRY (MEMPCPY_CHK)
+	cmpq	%rdx, %rcx
+	jb	HIDDEN_JUMPTARGET (__chk_fail)
+END (MEMPCPY_CHK)
+
+ENTRY (MEMPCPY)
+	movq	%rdi, %rax
+	addq	%rdx, %rax
+	jmp	L(start)
+END (MEMPCPY)
+#endif
+
 #if !defined USE_AS_BCOPY
 ENTRY (MEMCPY_CHK)
 	cmpq	%rdx, %rcx
@@ -66,6 +78,7 @@ ENTRY (MEMCPY)
 	BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
 L(copy_forward):
 #endif
+L(start):
 	cmp	$144, %rdx
 	jae	L(144bytesormore)
 
@@ -112,7 +125,7 @@ L(144bytesormore):
 	sub	$0x80, %rdx
 	movslq	(%r11, %r9, 4), %r9
 	add	%r11, %r9
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 
 	.p2align 4
@@ -142,7 +155,7 @@ L(copy_backward):
 	sub	$0x80, %rdx
 	movslq	(%r11, %r9, 4), %r9
 	add	%r11, %r9
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 
 	.p2align 4
diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3.S b/sysdeps/x86_64/multiarch/memcpy-ssse3.S
index 95de9695f9..0240bfa309 100644
--- a/sysdeps/x86_64/multiarch/memcpy-ssse3.S
+++ b/sysdeps/x86_64/multiarch/memcpy-ssse3.S
@@ -1,5 +1,5 @@
 /* memcpy with SSSE3
-   Copyright (C) 2010-2016 Free Software Foundation, Inc.
+   Copyright (C) 2010-2018 Free Software Foundation, Inc.
    Contributed by Intel Corporation.
    This file is part of the GNU C Library.
 
@@ -19,16 +19,15 @@
 
 #include <sysdep.h>
 
-#if IS_IN (libc) \
-    && (defined SHARED \
-        || defined USE_AS_MEMMOVE \
-	|| !defined USE_MULTIARCH)
+#if IS_IN (libc)
 
 #include "asm-syntax.h"
 
 #ifndef MEMCPY
 # define MEMCPY		__memcpy_ssse3
 # define MEMCPY_CHK	__memcpy_chk_ssse3
+# define MEMPCPY	__mempcpy_ssse3
+# define MEMPCPY_CHK	__mempcpy_chk_ssse3
 #endif
 
 #define JMPTBL(I, B)	I - B
@@ -40,10 +39,23 @@
   lea		TABLE(%rip), %r11;				\
   movslq	(%r11, INDEX, SCALE), INDEX;			\
   lea		(%r11, INDEX), INDEX;				\
-  jmp		*INDEX;						\
+  _CET_NOTRACK jmp *INDEX;					\
   ud2
 
 	.section .text.ssse3,"ax",@progbits
+#if !defined USE_AS_MEMPCPY && !defined USE_AS_MEMMOVE
+ENTRY (MEMPCPY_CHK)
+	cmpq	%rdx, %rcx
+	jb	HIDDEN_JUMPTARGET (__chk_fail)
+END (MEMPCPY_CHK)
+
+ENTRY (MEMPCPY)
+	movq	%rdi, %rax
+	addq	%rdx, %rax
+	jmp	L(start)
+END (MEMPCPY)
+#endif
+
 #if !defined USE_AS_BCOPY
 ENTRY (MEMCPY_CHK)
 	cmpq	%rdx, %rcx
@@ -66,6 +78,7 @@ ENTRY (MEMCPY)
 	jmp	L(copy_backward)
 L(copy_forward):
 #endif
+L(start):
 	cmp	$79, %rdx
 	lea     L(table_less_80bytes)(%rip), %r11
 	ja	L(80bytesormore)
@@ -73,7 +86,7 @@ L(copy_forward):
 	add	%rdx, %rsi
 	add	%rdx, %rdi
 	add	%r11, %r9
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 
 	.p2align 4
@@ -428,7 +441,7 @@ L(shl_1):
 	lea	(L(shl_1_loop_L2)-L(shl_1_loop_L1))(%r9), %r9
 L(L1_fwd):
 	lea	-64(%rdx), %rdx
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_1_loop_L2):
 	prefetchnta 0x1c0(%rsi)
@@ -451,7 +464,7 @@ L(shl_1_loop_L1):
 	jb	L(shl_1_end)
 	movaps	%xmm4, -0x20(%rdi)
 	movaps	%xmm5, -0x10(%rdi)
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_1_end):
 	movaps	%xmm4, -0x20(%rdi)
@@ -471,7 +484,7 @@ L(shl_1_bwd):
 	lea	(L(shl_1_bwd_loop_L2)-L(shl_1_bwd_loop_L1))(%r9), %r9
 L(L1_bwd):
 	lea	-64(%rdx), %rdx
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_1_bwd_loop_L2):
 	prefetchnta -0x1c0(%rsi)
@@ -496,7 +509,7 @@ L(shl_1_bwd_loop_L1):
 	movaps	%xmm3, 0x10(%rdi)
 	jb	L(shl_1_bwd_end)
 	movaps	%xmm4, (%rdi)
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_1_bwd_end):
 	movaps	%xmm4, (%rdi)
@@ -513,7 +526,7 @@ L(shl_2):
 	lea	(L(shl_2_loop_L2)-L(shl_2_loop_L1))(%r9), %r9
 L(L2_fwd):
 	lea	-64(%rdx), %rdx
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_2_loop_L2):
 	prefetchnta 0x1c0(%rsi)
@@ -536,7 +549,7 @@ L(shl_2_loop_L1):
 	jb	L(shl_2_end)
 	movaps	%xmm4, -0x20(%rdi)
 	movaps	%xmm5, -0x10(%rdi)
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_2_end):
 	movaps	%xmm4, -0x20(%rdi)
@@ -556,7 +569,7 @@ L(shl_2_bwd):
 	lea	(L(shl_2_bwd_loop_L2)-L(shl_2_bwd_loop_L1))(%r9), %r9
 L(L2_bwd):
 	lea	-64(%rdx), %rdx
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_2_bwd_loop_L2):
 	prefetchnta -0x1c0(%rsi)
@@ -581,7 +594,7 @@ L(shl_2_bwd_loop_L1):
 	movaps	%xmm3, 0x10(%rdi)
 	jb	L(shl_2_bwd_end)
 	movaps	%xmm4, (%rdi)
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_2_bwd_end):
 	movaps	%xmm4, (%rdi)
@@ -598,7 +611,7 @@ L(shl_3):
 	lea	(L(shl_3_loop_L2)-L(shl_3_loop_L1))(%r9), %r9
 L(L3_fwd):
 	lea	-64(%rdx), %rdx
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_3_loop_L2):
 	prefetchnta 0x1c0(%rsi)
@@ -621,7 +634,7 @@ L(shl_3_loop_L1):
 	jb	L(shl_3_end)
 	movaps	%xmm4, -0x20(%rdi)
 	movaps	%xmm5, -0x10(%rdi)
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_3_end):
 	movaps	%xmm4, -0x20(%rdi)
@@ -641,7 +654,7 @@ L(shl_3_bwd):
 	lea	(L(shl_3_bwd_loop_L2)-L(shl_3_bwd_loop_L1))(%r9), %r9
 L(L3_bwd):
 	lea	-64(%rdx), %rdx
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_3_bwd_loop_L2):
 	prefetchnta -0x1c0(%rsi)
@@ -666,7 +679,7 @@ L(shl_3_bwd_loop_L1):
 	movaps	%xmm3, 0x10(%rdi)
 	jb	L(shl_3_bwd_end)
 	movaps	%xmm4, (%rdi)
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_3_bwd_end):
 	movaps	%xmm4, (%rdi)
@@ -683,7 +696,7 @@ L(shl_4):
 	lea	(L(shl_4_loop_L2)-L(shl_4_loop_L1))(%r9), %r9
 L(L4_fwd):
 	lea	-64(%rdx), %rdx
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_4_loop_L2):
 	prefetchnta 0x1c0(%rsi)
@@ -706,7 +719,7 @@ L(shl_4_loop_L1):
 	jb	L(shl_4_end)
 	movaps	%xmm4, -0x20(%rdi)
 	movaps	%xmm5, -0x10(%rdi)
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_4_end):
 	movaps	%xmm4, -0x20(%rdi)
@@ -726,7 +739,7 @@ L(shl_4_bwd):
 	lea	(L(shl_4_bwd_loop_L2)-L(shl_4_bwd_loop_L1))(%r9), %r9
 L(L4_bwd):
 	lea	-64(%rdx), %rdx
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_4_bwd_loop_L2):
 	prefetchnta -0x1c0(%rsi)
@@ -751,7 +764,7 @@ L(shl_4_bwd_loop_L1):
 	movaps	%xmm3, 0x10(%rdi)
 	jb	L(shl_4_bwd_end)
 	movaps	%xmm4, (%rdi)
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_4_bwd_end):
 	movaps	%xmm4, (%rdi)
@@ -768,7 +781,7 @@ L(shl_5):
 	lea	(L(shl_5_loop_L2)-L(shl_5_loop_L1))(%r9), %r9
 L(L5_fwd):
 	lea	-64(%rdx), %rdx
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_5_loop_L2):
 	prefetchnta 0x1c0(%rsi)
@@ -791,7 +804,7 @@ L(shl_5_loop_L1):
 	jb	L(shl_5_end)
 	movaps	%xmm4, -0x20(%rdi)
 	movaps	%xmm5, -0x10(%rdi)
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_5_end):
 	movaps	%xmm4, -0x20(%rdi)
@@ -811,7 +824,7 @@ L(shl_5_bwd):
 	lea	(L(shl_5_bwd_loop_L2)-L(shl_5_bwd_loop_L1))(%r9), %r9
 L(L5_bwd):
 	lea	-64(%rdx), %rdx
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_5_bwd_loop_L2):
 	prefetchnta -0x1c0(%rsi)
@@ -836,7 +849,7 @@ L(shl_5_bwd_loop_L1):
 	movaps	%xmm3, 0x10(%rdi)
 	jb	L(shl_5_bwd_end)
 	movaps	%xmm4, (%rdi)
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_5_bwd_end):
 	movaps	%xmm4, (%rdi)
@@ -853,7 +866,7 @@ L(shl_6):
 	lea	(L(shl_6_loop_L2)-L(shl_6_loop_L1))(%r9), %r9
 L(L6_fwd):
 	lea	-64(%rdx), %rdx
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_6_loop_L2):
 	prefetchnta 0x1c0(%rsi)
@@ -876,7 +889,7 @@ L(shl_6_loop_L1):
 	jb	L(shl_6_end)
 	movaps	%xmm4, -0x20(%rdi)
 	movaps	%xmm5, -0x10(%rdi)
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_6_end):
 	movaps	%xmm4, -0x20(%rdi)
@@ -896,7 +909,7 @@ L(shl_6_bwd):
 	lea	(L(shl_6_bwd_loop_L2)-L(shl_6_bwd_loop_L1))(%r9), %r9
 L(L6_bwd):
 	lea	-64(%rdx), %rdx
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_6_bwd_loop_L2):
 	prefetchnta -0x1c0(%rsi)
@@ -921,7 +934,7 @@ L(shl_6_bwd_loop_L1):
 	movaps	%xmm3, 0x10(%rdi)
 	jb	L(shl_6_bwd_end)
 	movaps	%xmm4, (%rdi)
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_6_bwd_end):
 	movaps	%xmm4, (%rdi)
@@ -938,7 +951,7 @@ L(shl_7):
 	lea	(L(shl_7_loop_L2)-L(shl_7_loop_L1))(%r9), %r9
 L(L7_fwd):
 	lea	-64(%rdx), %rdx
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_7_loop_L2):
 	prefetchnta 0x1c0(%rsi)
@@ -961,7 +974,7 @@ L(shl_7_loop_L1):
 	jb	L(shl_7_end)
 	movaps	%xmm4, -0x20(%rdi)
 	movaps	%xmm5, -0x10(%rdi)
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_7_end):
 	movaps	%xmm4, -0x20(%rdi)
@@ -981,7 +994,7 @@ L(shl_7_bwd):
 	lea	(L(shl_7_bwd_loop_L2)-L(shl_7_bwd_loop_L1))(%r9), %r9
 L(L7_bwd):
 	lea	-64(%rdx), %rdx
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_7_bwd_loop_L2):
 	prefetchnta -0x1c0(%rsi)
@@ -1006,7 +1019,7 @@ L(shl_7_bwd_loop_L1):
 	movaps	%xmm3, 0x10(%rdi)
 	jb	L(shl_7_bwd_end)
 	movaps	%xmm4, (%rdi)
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_7_bwd_end):
 	movaps	%xmm4, (%rdi)
@@ -1023,7 +1036,7 @@ L(shl_8):
 	lea	(L(shl_8_loop_L2)-L(shl_8_loop_L1))(%r9), %r9
 L(L8_fwd):
 	lea	-64(%rdx), %rdx
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 L(shl_8_loop_L2):
 	prefetchnta 0x1c0(%rsi)
 L(shl_8_loop_L1):
@@ -1045,7 +1058,7 @@ L(shl_8_loop_L1):
 	jb	L(shl_8_end)
 	movaps	%xmm4, -0x20(%rdi)
 	movaps	%xmm5, -0x10(%rdi)
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 	.p2align 4
 L(shl_8_end):
@@ -1066,7 +1079,7 @@ L(shl_8_bwd):
 	lea	(L(shl_8_bwd_loop_L2)-L(shl_8_bwd_loop_L1))(%r9), %r9
 L(L8_bwd):
 	lea	-64(%rdx), %rdx
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_8_bwd_loop_L2):
 	prefetchnta -0x1c0(%rsi)
@@ -1091,7 +1104,7 @@ L(shl_8_bwd_loop_L1):
 	movaps	%xmm3, 0x10(%rdi)
 	jb	L(shl_8_bwd_end)
 	movaps	%xmm4, (%rdi)
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_8_bwd_end):
 	movaps	%xmm4, (%rdi)
@@ -1108,7 +1121,7 @@ L(shl_9):
 	lea	(L(shl_9_loop_L2)-L(shl_9_loop_L1))(%r9), %r9
 L(L9_fwd):
 	lea	-64(%rdx), %rdx
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_9_loop_L2):
 	prefetchnta 0x1c0(%rsi)
@@ -1131,7 +1144,7 @@ L(shl_9_loop_L1):
 	jb	L(shl_9_end)
 	movaps	%xmm4, -0x20(%rdi)
 	movaps	%xmm5, -0x10(%rdi)
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_9_end):
 	movaps	%xmm4, -0x20(%rdi)
@@ -1151,7 +1164,7 @@ L(shl_9_bwd):
 	lea	(L(shl_9_bwd_loop_L2)-L(shl_9_bwd_loop_L1))(%r9), %r9
 L(L9_bwd):
 	lea	-64(%rdx), %rdx
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_9_bwd_loop_L2):
 	prefetchnta -0x1c0(%rsi)
@@ -1176,7 +1189,7 @@ L(shl_9_bwd_loop_L1):
 	movaps	%xmm3, 0x10(%rdi)
 	jb	L(shl_9_bwd_end)
 	movaps	%xmm4, (%rdi)
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_9_bwd_end):
 	movaps	%xmm4, (%rdi)
@@ -1193,7 +1206,7 @@ L(shl_10):
 	lea	(L(shl_10_loop_L2)-L(shl_10_loop_L1))(%r9), %r9
 L(L10_fwd):
 	lea	-64(%rdx), %rdx
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_10_loop_L2):
 	prefetchnta 0x1c0(%rsi)
@@ -1216,7 +1229,7 @@ L(shl_10_loop_L1):
 	jb	L(shl_10_end)
 	movaps	%xmm4, -0x20(%rdi)
 	movaps	%xmm5, -0x10(%rdi)
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_10_end):
 	movaps	%xmm4, -0x20(%rdi)
@@ -1236,7 +1249,7 @@ L(shl_10_bwd):
 	lea	(L(shl_10_bwd_loop_L2)-L(shl_10_bwd_loop_L1))(%r9), %r9
 L(L10_bwd):
 	lea	-64(%rdx), %rdx
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_10_bwd_loop_L2):
 	prefetchnta -0x1c0(%rsi)
@@ -1261,7 +1274,7 @@ L(shl_10_bwd_loop_L1):
 	movaps	%xmm3, 0x10(%rdi)
 	jb	L(shl_10_bwd_end)
 	movaps	%xmm4, (%rdi)
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_10_bwd_end):
 	movaps	%xmm4, (%rdi)
@@ -1278,7 +1291,7 @@ L(shl_11):
 	lea	(L(shl_11_loop_L2)-L(shl_11_loop_L1))(%r9), %r9
 L(L11_fwd):
 	lea	-64(%rdx), %rdx
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_11_loop_L2):
 	prefetchnta 0x1c0(%rsi)
@@ -1301,7 +1314,7 @@ L(shl_11_loop_L1):
 	jb	L(shl_11_end)
 	movaps	%xmm4, -0x20(%rdi)
 	movaps	%xmm5, -0x10(%rdi)
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_11_end):
 	movaps	%xmm4, -0x20(%rdi)
@@ -1321,7 +1334,7 @@ L(shl_11_bwd):
 	lea	(L(shl_11_bwd_loop_L2)-L(shl_11_bwd_loop_L1))(%r9), %r9
 L(L11_bwd):
 	lea	-64(%rdx), %rdx
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_11_bwd_loop_L2):
 	prefetchnta -0x1c0(%rsi)
@@ -1346,7 +1359,7 @@ L(shl_11_bwd_loop_L1):
 	movaps	%xmm3, 0x10(%rdi)
 	jb	L(shl_11_bwd_end)
 	movaps	%xmm4, (%rdi)
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_11_bwd_end):
 	movaps	%xmm4, (%rdi)
@@ -1363,7 +1376,7 @@ L(shl_12):
 	lea	(L(shl_12_loop_L2)-L(shl_12_loop_L1))(%r9), %r9
 L(L12_fwd):
 	lea	-64(%rdx), %rdx
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_12_loop_L2):
 	prefetchnta 0x1c0(%rsi)
@@ -1386,7 +1399,7 @@ L(shl_12_loop_L1):
 	jb	L(shl_12_end)
 	movaps	%xmm4, -0x20(%rdi)
 	movaps	%xmm5, -0x10(%rdi)
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_12_end):
 	movaps	%xmm4, -0x20(%rdi)
@@ -1406,7 +1419,7 @@ L(shl_12_bwd):
 	lea	(L(shl_12_bwd_loop_L2)-L(shl_12_bwd_loop_L1))(%r9), %r9
 L(L12_bwd):
 	lea	-64(%rdx), %rdx
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_12_bwd_loop_L2):
 	prefetchnta -0x1c0(%rsi)
@@ -1431,7 +1444,7 @@ L(shl_12_bwd_loop_L1):
 	movaps	%xmm3, 0x10(%rdi)
 	jb	L(shl_12_bwd_end)
 	movaps	%xmm4, (%rdi)
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_12_bwd_end):
 	movaps	%xmm4, (%rdi)
@@ -1448,7 +1461,7 @@ L(shl_13):
 	lea	(L(shl_13_loop_L2)-L(shl_13_loop_L1))(%r9), %r9
 L(L13_fwd):
 	lea	-64(%rdx), %rdx
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_13_loop_L2):
 	prefetchnta 0x1c0(%rsi)
@@ -1471,7 +1484,7 @@ L(shl_13_loop_L1):
 	jb	L(shl_13_end)
 	movaps	%xmm4, -0x20(%rdi)
 	movaps	%xmm5, -0x10(%rdi)
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_13_end):
 	movaps	%xmm4, -0x20(%rdi)
@@ -1491,7 +1504,7 @@ L(shl_13_bwd):
 	lea	(L(shl_13_bwd_loop_L2)-L(shl_13_bwd_loop_L1))(%r9), %r9
 L(L13_bwd):
 	lea	-64(%rdx), %rdx
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_13_bwd_loop_L2):
 	prefetchnta -0x1c0(%rsi)
@@ -1516,7 +1529,7 @@ L(shl_13_bwd_loop_L1):
 	movaps	%xmm3, 0x10(%rdi)
 	jb	L(shl_13_bwd_end)
 	movaps	%xmm4, (%rdi)
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_13_bwd_end):
 	movaps	%xmm4, (%rdi)
@@ -1533,7 +1546,7 @@ L(shl_14):
 	lea	(L(shl_14_loop_L2)-L(shl_14_loop_L1))(%r9), %r9
 L(L14_fwd):
 	lea	-64(%rdx), %rdx
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_14_loop_L2):
 	prefetchnta 0x1c0(%rsi)
@@ -1556,7 +1569,7 @@ L(shl_14_loop_L1):
 	jb	L(shl_14_end)
 	movaps	%xmm4, -0x20(%rdi)
 	movaps	%xmm5, -0x10(%rdi)
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_14_end):
 	movaps	%xmm4, -0x20(%rdi)
@@ -1576,7 +1589,7 @@ L(shl_14_bwd):
 	lea	(L(shl_14_bwd_loop_L2)-L(shl_14_bwd_loop_L1))(%r9), %r9
 L(L14_bwd):
 	lea	-64(%rdx), %rdx
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_14_bwd_loop_L2):
 	prefetchnta -0x1c0(%rsi)
@@ -1601,7 +1614,7 @@ L(shl_14_bwd_loop_L1):
 	movaps	%xmm3, 0x10(%rdi)
 	jb	L(shl_14_bwd_end)
 	movaps	%xmm4, (%rdi)
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_14_bwd_end):
 	movaps	%xmm4, (%rdi)
@@ -1618,7 +1631,7 @@ L(shl_15):
 	lea	(L(shl_15_loop_L2)-L(shl_15_loop_L1))(%r9), %r9
 L(L15_fwd):
 	lea	-64(%rdx), %rdx
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_15_loop_L2):
 	prefetchnta 0x1c0(%rsi)
@@ -1641,7 +1654,7 @@ L(shl_15_loop_L1):
 	jb	L(shl_15_end)
 	movaps	%xmm4, -0x20(%rdi)
 	movaps	%xmm5, -0x10(%rdi)
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_15_end):
 	movaps	%xmm4, -0x20(%rdi)
@@ -1661,7 +1674,7 @@ L(shl_15_bwd):
 	lea	(L(shl_15_bwd_loop_L2)-L(shl_15_bwd_loop_L1))(%r9), %r9
 L(L15_bwd):
 	lea	-64(%rdx), %rdx
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_15_bwd_loop_L2):
 	prefetchnta -0x1c0(%rsi)
@@ -1686,7 +1699,7 @@ L(shl_15_bwd_loop_L1):
 	movaps	%xmm3, 0x10(%rdi)
 	jb	L(shl_15_bwd_end)
 	movaps	%xmm4, (%rdi)
-	jmp	*%r9
+	_CET_NOTRACK jmp *%r9
 	ud2
 L(shl_15_bwd_end):
 	movaps	%xmm4, (%rdi)
diff --git a/sysdeps/x86_64/multiarch/memcpy.S b/sysdeps/x86_64/multiarch/memcpy.S
deleted file mode 100644
index 64a1bcd137..0000000000
--- a/sysdeps/x86_64/multiarch/memcpy.S
+++ /dev/null
@@ -1,89 +0,0 @@
-/* Multiple versions of memcpy
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2010-2016 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <shlib-compat.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in lib and for
-   DSO.  In static binaries we need memcpy before the initialization
-   happened.  */
-#if defined SHARED && IS_IN (libc)
-	.text
-ENTRY(__new_memcpy)
-	.type	__new_memcpy, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-#ifdef HAVE_AVX512_ASM_SUPPORT
-	HAS_ARCH_FEATURE (AVX512F_Usable)
-	jz	1f
-	HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
-	jz	1f
-	leaq    __memcpy_avx512_no_vzeroupper(%rip), %rax
-	ret
-#endif
-1:	leaq	__memcpy_avx_unaligned(%rip), %rax
-	HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
-	jz 2f
-	ret
-2:	leaq	__memcpy_sse2(%rip), %rax
-	HAS_ARCH_FEATURE (Slow_BSF)
-	jnz	3f
-	leaq	__memcpy_sse2_unaligned(%rip), %rax
-	ret
-3:	HAS_CPU_FEATURE (SSSE3)
-	jz 4f
-	leaq    __memcpy_ssse3(%rip), %rax
-4:	ret
-END(__new_memcpy)
-
-# undef ENTRY
-# define ENTRY(name) \
-	.type __memcpy_sse2, @function; \
-	.globl __memcpy_sse2; \
-	.hidden __memcpy_sse2; \
-	.p2align 4; \
-	__memcpy_sse2: cfi_startproc; \
-	CALL_MCOUNT
-# undef END
-# define END(name) \
-	cfi_endproc; .size __memcpy_sse2, .-__memcpy_sse2
-
-# undef ENTRY_CHK
-# define ENTRY_CHK(name) \
-	.type __memcpy_chk_sse2, @function; \
-	.globl __memcpy_chk_sse2; \
-	.p2align 4; \
-	__memcpy_chk_sse2: cfi_startproc; \
-	CALL_MCOUNT
-# undef END_CHK
-# define END_CHK(name) \
-	cfi_endproc; .size __memcpy_chk_sse2, .-__memcpy_chk_sse2
-
-# undef libc_hidden_builtin_def
-/* It doesn't make sense to send libc-internal memcpy calls through a PLT.
-   The speedup we get from using SSSE3 instruction is likely eaten away
-   by the indirect call in the PLT.  */
-# define libc_hidden_builtin_def(name) \
-	.globl __GI_memcpy; __GI_memcpy = __memcpy_sse2
-
-versioned_symbol (libc, __new_memcpy, memcpy, GLIBC_2_14);
-#endif
-
-#include "../memcpy.S"
diff --git a/sysdeps/x86_64/multiarch/memcpy.c b/sysdeps/x86_64/multiarch/memcpy.c
new file mode 100644
index 0000000000..419f76aefc
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memcpy.c
@@ -0,0 +1,39 @@
+/* Multiple versions of memcpy.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
+# define memcpy __redirect_memcpy
+# include <string.h>
+# undef memcpy
+
+# define SYMBOL_NAME memcpy
+# include "ifunc-memmove.h"
+
+libc_ifunc_redirected (__redirect_memcpy, __new_memcpy,
+		       IFUNC_SELECTOR ());
+
+# ifdef SHARED
+__hidden_ver1 (__new_memcpy, __GI_memcpy, __redirect_memcpy)
+  __attribute__ ((visibility ("hidden")));
+# endif
+
+# include <shlib-compat.h>
+versioned_symbol (libc, __new_memcpy, memcpy, GLIBC_2_14);
+#endif
diff --git a/sysdeps/x86_64/multiarch/memcpy_chk-nonshared.S b/sysdeps/x86_64/multiarch/memcpy_chk-nonshared.S
new file mode 100644
index 0000000000..84c8842ce7
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memcpy_chk-nonshared.S
@@ -0,0 +1,21 @@
+/* Non-shared version of memcpy_chk for x86-64.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc) && !defined SHARED
+# include <sysdeps/x86_64/memcpy_chk.S>
+#endif
diff --git a/sysdeps/x86_64/multiarch/memcpy_chk.S b/sysdeps/x86_64/multiarch/memcpy_chk.S
deleted file mode 100644
index 648217e971..0000000000
--- a/sysdeps/x86_64/multiarch/memcpy_chk.S
+++ /dev/null
@@ -1,56 +0,0 @@
-/* Multiple versions of __memcpy_chk
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2010-2016 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in lib and for
-   DSO.  There are no multiarch memcpy functions for static binaries.
- */
-#if IS_IN (libc)
-# ifdef SHARED
-	.text
-ENTRY(__memcpy_chk)
-	.type	__memcpy_chk, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-#ifdef HAVE_AVX512_ASM_SUPPORT
-	HAS_ARCH_FEATURE (AVX512F_Usable)
-	jz      1f
-	HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
-	jz      1f
-	leaq    __memcpy_chk_avx512_no_vzeroupper(%rip), %rax
-	ret
-#endif
-1:	leaq	__memcpy_chk_sse2(%rip), %rax
-	HAS_CPU_FEATURE (SSSE3)
-	jz	2f
-	leaq	__memcpy_chk_ssse3(%rip), %rax
-	HAS_ARCH_FEATURE (Fast_Copy_Backward)
-	jz	2f
-	leaq	__memcpy_chk_ssse3_back(%rip), %rax
-	HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
-	jz  2f
-	leaq    __memcpy_chk_avx_unaligned(%rip), %rax
-2:	ret
-END(__memcpy_chk)
-# else
-#  include "../memcpy_chk.S"
-# endif
-#endif
diff --git a/sysdeps/x86_64/multiarch/memcpy_chk.c b/sysdeps/x86_64/multiarch/memcpy_chk.c
new file mode 100644
index 0000000000..c9b901a6dd
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memcpy_chk.c
@@ -0,0 +1,31 @@
+/* Multiple versions of __memcpy_chk
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.so. */
+#if IS_IN (libc) && defined SHARED
+# define __memcpy_chk __redirect_memcpy_chk
+# include <string.h>
+# undef __memcpy_chk
+
+# define SYMBOL_NAME memcpy_chk
+# include "ifunc-memmove.h"
+
+libc_ifunc_redirected (__redirect_memcpy_chk, __memcpy_chk,
+		       IFUNC_SELECTOR ());
+#endif
diff --git a/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms.S
new file mode 100644
index 0000000000..e195e93f15
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms.S
@@ -0,0 +1,12 @@
+#if IS_IN (libc)
+# define VEC_SIZE	32
+# define VEC(i)		ymm##i
+# define VMOVNT		vmovntdq
+# define VMOVU		vmovdqu
+# define VMOVA		vmovdqa
+
+# define SECTION(p)		p##.avx
+# define MEMMOVE_SYMBOL(p,s)	p##_avx_##s
+
+# include "memmove-vec-unaligned-erms.S"
+#endif
diff --git a/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S b/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S
index 518d1fec35..effc3ac2de 100644
--- a/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S
+++ b/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S
@@ -1,5 +1,5 @@
-/* memmove optimized with AVX512 for KNL hardware.
-   Copyright (C) 2016 Free Software Foundation, Inc.
+/* memmove/memcpy/mempcpy optimized with AVX512 for KNL hardware.
+   Copyright (C) 2016-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -16,7 +16,400 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#define USE_AS_MEMMOVE
-#define MEMCPY		__memmove_avx512_no_vzeroupper
-#define MEMCPY_CHK	__memmove_chk_avx512_no_vzeroupper
-#include "memcpy-avx512-no-vzeroupper.S"
+#include <sysdep.h>
+
+#if IS_IN (libc)
+
+# include "asm-syntax.h"
+
+	.section .text.avx512,"ax",@progbits
+ENTRY (__mempcpy_chk_avx512_no_vzeroupper)
+	cmpq	%rdx, %rcx
+	jb	HIDDEN_JUMPTARGET (__chk_fail)
+END (__mempcpy_chk_avx512_no_vzeroupper)
+
+ENTRY (__mempcpy_avx512_no_vzeroupper)
+	movq	%rdi, %rax
+	addq	%rdx, %rax
+	jmp	L(start)
+END (__mempcpy_avx512_no_vzeroupper)
+
+ENTRY (__memmove_chk_avx512_no_vzeroupper)
+	cmpq	%rdx, %rcx
+	jb	HIDDEN_JUMPTARGET (__chk_fail)
+END (__memmove_chk_avx512_no_vzeroupper)
+
+ENTRY (__memmove_avx512_no_vzeroupper)
+	mov	%rdi, %rax
+# ifdef USE_AS_MEMPCPY
+	add	%rdx, %rax
+# endif
+L(start):
+	lea	(%rsi, %rdx), %rcx
+	lea	(%rdi, %rdx), %r9
+	cmp	$512, %rdx
+	ja	L(512bytesormore)
+
+L(check):
+	cmp	$16, %rdx
+	jbe	L(less_16bytes)
+	cmp	$256, %rdx
+	jb	L(less_256bytes)
+	vmovups	(%rsi), %zmm0
+	vmovups 0x40(%rsi), %zmm1
+	vmovups 0x80(%rsi), %zmm2
+	vmovups 0xC0(%rsi), %zmm3
+	vmovups	-0x100(%rcx), %zmm4
+	vmovups -0xC0(%rcx), %zmm5
+	vmovups -0x80(%rcx), %zmm6
+	vmovups -0x40(%rcx), %zmm7
+	vmovups %zmm0, (%rdi)
+	vmovups %zmm1, 0x40(%rdi)
+	vmovups %zmm2, 0x80(%rdi)
+	vmovups %zmm3, 0xC0(%rdi)
+	vmovups	%zmm4, -0x100(%r9)
+	vmovups %zmm5, -0xC0(%r9)
+	vmovups %zmm6, -0x80(%r9)
+	vmovups %zmm7, -0x40(%r9)
+	ret
+
+L(less_256bytes):
+	cmp	$128, %dl
+	jb	L(less_128bytes)
+	vmovups	(%rsi), %zmm0
+	vmovups 0x40(%rsi), %zmm1
+	vmovups -0x80(%rcx), %zmm2
+	vmovups -0x40(%rcx), %zmm3
+	vmovups	%zmm0, (%rdi)
+	vmovups %zmm1, 0x40(%rdi)
+	vmovups %zmm2, -0x80(%r9)
+	vmovups %zmm3, -0x40(%r9)
+	ret
+
+L(less_128bytes):
+	cmp	$64, %dl
+	jb	L(less_64bytes)
+	vmovdqu (%rsi), %ymm0
+	vmovdqu 0x20(%rsi), %ymm1
+	vmovdqu -0x40(%rcx), %ymm2
+	vmovdqu -0x20(%rcx), %ymm3
+	vmovdqu %ymm0, (%rdi)
+	vmovdqu %ymm1, 0x20(%rdi)
+	vmovdqu %ymm2, -0x40(%r9)
+	vmovdqu %ymm3, -0x20(%r9)
+	ret
+
+L(less_64bytes):
+	cmp	$32, %dl
+	jb	L(less_32bytes)
+	vmovdqu	(%rsi), %ymm0
+	vmovdqu -0x20(%rcx), %ymm1
+	vmovdqu	%ymm0, (%rdi)
+	vmovdqu	%ymm1, -0x20(%r9)
+	ret
+
+L(less_32bytes):
+	vmovdqu (%rsi), %xmm0
+	vmovdqu -0x10(%rcx), %xmm1
+	vmovdqu %xmm0, (%rdi)
+	vmovdqu %xmm1, -0x10(%r9)
+	ret
+
+L(less_16bytes):
+	cmp	$8, %dl
+	jb	L(less_8bytes)
+	movq	(%rsi), %rsi
+	movq	-0x8(%rcx), %rcx
+	movq	%rsi, (%rdi)
+	movq	%rcx, -0x8(%r9)
+	ret
+
+L(less_8bytes):
+	cmp	$4, %dl
+	jb	L(less_4bytes)
+	mov	(%rsi), %esi
+	mov	-0x4(%rcx), %ecx
+	mov	%esi, (%rdi)
+	mov	%ecx, -0x4(%r9)
+	ret
+
+L(less_4bytes):
+	cmp	$2, %dl
+	jb	L(less_2bytes)
+	mov	(%rsi), %si
+	mov	-0x2(%rcx), %cx
+	mov	%si, (%rdi)
+	mov	%cx, -0x2(%r9)
+	ret
+
+L(less_2bytes):
+	cmp	$1, %dl
+	jb	L(less_1bytes)
+	mov	(%rsi), %cl
+	mov	%cl, (%rdi)
+L(less_1bytes):
+	ret
+
+L(512bytesormore):
+# ifdef SHARED_CACHE_SIZE_HALF
+	mov	$SHARED_CACHE_SIZE_HALF, %r8
+# else
+	mov	__x86_shared_cache_size_half(%rip), %r8
+# endif
+	cmp	%r8, %rdx
+	jae	L(preloop_large)
+	cmp	$1024, %rdx
+	ja	L(1024bytesormore)
+	prefetcht1 (%rsi)
+	prefetcht1 0x40(%rsi)
+	prefetcht1 0x80(%rsi)
+	prefetcht1 0xC0(%rsi)
+	prefetcht1 0x100(%rsi)
+	prefetcht1 0x140(%rsi)
+	prefetcht1 0x180(%rsi)
+	prefetcht1 0x1C0(%rsi)
+	prefetcht1 -0x200(%rcx)
+	prefetcht1 -0x1C0(%rcx)
+	prefetcht1 -0x180(%rcx)
+	prefetcht1 -0x140(%rcx)
+	prefetcht1 -0x100(%rcx)
+	prefetcht1 -0xC0(%rcx)
+	prefetcht1 -0x80(%rcx)
+	prefetcht1 -0x40(%rcx)
+	vmovups	(%rsi), %zmm0
+	vmovups 0x40(%rsi), %zmm1
+	vmovups 0x80(%rsi), %zmm2
+	vmovups 0xC0(%rsi), %zmm3
+	vmovups	0x100(%rsi), %zmm4
+	vmovups 0x140(%rsi), %zmm5
+	vmovups 0x180(%rsi), %zmm6
+	vmovups 0x1C0(%rsi), %zmm7
+	vmovups	-0x200(%rcx), %zmm8
+	vmovups -0x1C0(%rcx), %zmm9
+	vmovups -0x180(%rcx), %zmm10
+	vmovups -0x140(%rcx), %zmm11
+	vmovups	-0x100(%rcx), %zmm12
+	vmovups -0xC0(%rcx), %zmm13
+	vmovups -0x80(%rcx), %zmm14
+	vmovups -0x40(%rcx), %zmm15
+	vmovups %zmm0, (%rdi)
+	vmovups %zmm1, 0x40(%rdi)
+	vmovups %zmm2, 0x80(%rdi)
+	vmovups %zmm3, 0xC0(%rdi)
+	vmovups %zmm4, 0x100(%rdi)
+	vmovups %zmm5, 0x140(%rdi)
+	vmovups %zmm6, 0x180(%rdi)
+	vmovups %zmm7, 0x1C0(%rdi)
+	vmovups	%zmm8, -0x200(%r9)
+	vmovups %zmm9, -0x1C0(%r9)
+	vmovups %zmm10, -0x180(%r9)
+	vmovups %zmm11, -0x140(%r9)
+	vmovups	%zmm12, -0x100(%r9)
+	vmovups %zmm13, -0xC0(%r9)
+	vmovups %zmm14, -0x80(%r9)
+	vmovups %zmm15, -0x40(%r9)
+	ret
+
+L(1024bytesormore):
+	cmp	%rsi, %rdi
+	ja	L(1024bytesormore_bkw)
+	sub	$512, %r9
+	vmovups -0x200(%rcx), %zmm8
+	vmovups -0x1C0(%rcx), %zmm9
+	vmovups -0x180(%rcx), %zmm10
+	vmovups -0x140(%rcx), %zmm11
+	vmovups	-0x100(%rcx), %zmm12
+	vmovups -0xC0(%rcx), %zmm13
+	vmovups -0x80(%rcx), %zmm14
+	vmovups -0x40(%rcx), %zmm15
+	prefetcht1 (%rsi)
+	prefetcht1 0x40(%rsi)
+	prefetcht1 0x80(%rsi)
+	prefetcht1 0xC0(%rsi)
+	prefetcht1 0x100(%rsi)
+	prefetcht1 0x140(%rsi)
+	prefetcht1 0x180(%rsi)
+	prefetcht1 0x1C0(%rsi)
+
+/* Loop with unaligned memory access.  */
+L(gobble_512bytes_loop):
+	vmovups	(%rsi), %zmm0
+	vmovups 0x40(%rsi), %zmm1
+	vmovups 0x80(%rsi), %zmm2
+	vmovups 0xC0(%rsi), %zmm3
+	vmovups	0x100(%rsi), %zmm4
+	vmovups 0x140(%rsi), %zmm5
+	vmovups 0x180(%rsi), %zmm6
+	vmovups 0x1C0(%rsi), %zmm7
+	add	$512, %rsi
+	prefetcht1 (%rsi)
+	prefetcht1 0x40(%rsi)
+	prefetcht1 0x80(%rsi)
+	prefetcht1 0xC0(%rsi)
+	prefetcht1 0x100(%rsi)
+	prefetcht1 0x140(%rsi)
+	prefetcht1 0x180(%rsi)
+	prefetcht1 0x1C0(%rsi)
+	vmovups	%zmm0, (%rdi)
+	vmovups %zmm1, 0x40(%rdi)
+	vmovups %zmm2, 0x80(%rdi)
+	vmovups %zmm3, 0xC0(%rdi)
+	vmovups	%zmm4, 0x100(%rdi)
+	vmovups %zmm5, 0x140(%rdi)
+	vmovups %zmm6, 0x180(%rdi)
+	vmovups %zmm7, 0x1C0(%rdi)
+	add	$512, %rdi
+	cmp	%r9, %rdi
+	jb	L(gobble_512bytes_loop)
+	vmovups %zmm8, (%r9)
+	vmovups %zmm9, 0x40(%r9)
+	vmovups %zmm10, 0x80(%r9)
+	vmovups %zmm11, 0xC0(%r9)
+	vmovups %zmm12, 0x100(%r9)
+	vmovups %zmm13, 0x140(%r9)
+	vmovups %zmm14, 0x180(%r9)
+	vmovups %zmm15, 0x1C0(%r9)
+	ret
+
+L(1024bytesormore_bkw):
+	add	$512, %rdi
+	vmovups	0x1C0(%rsi), %zmm8
+	vmovups 0x180(%rsi), %zmm9
+	vmovups 0x140(%rsi), %zmm10
+	vmovups 0x100(%rsi), %zmm11
+	vmovups	0xC0(%rsi), %zmm12
+	vmovups 0x80(%rsi), %zmm13
+	vmovups 0x40(%rsi), %zmm14
+	vmovups (%rsi), %zmm15
+	prefetcht1 -0x40(%rcx)
+	prefetcht1 -0x80(%rcx)
+	prefetcht1 -0xC0(%rcx)
+	prefetcht1 -0x100(%rcx)
+	prefetcht1 -0x140(%rcx)
+	prefetcht1 -0x180(%rcx)
+	prefetcht1 -0x1C0(%rcx)
+	prefetcht1 -0x200(%rcx)
+
+/* Backward loop with unaligned memory access.  */
+L(gobble_512bytes_loop_bkw):
+	vmovups -0x40(%rcx), %zmm0
+	vmovups -0x80(%rcx), %zmm1
+	vmovups -0xC0(%rcx), %zmm2
+	vmovups	-0x100(%rcx), %zmm3
+	vmovups -0x140(%rcx), %zmm4
+	vmovups -0x180(%rcx), %zmm5
+	vmovups -0x1C0(%rcx), %zmm6
+	vmovups	-0x200(%rcx), %zmm7
+	sub	$512, %rcx
+	prefetcht1 -0x40(%rcx)
+	prefetcht1 -0x80(%rcx)
+	prefetcht1 -0xC0(%rcx)
+	prefetcht1 -0x100(%rcx)
+	prefetcht1 -0x140(%rcx)
+	prefetcht1 -0x180(%rcx)
+	prefetcht1 -0x1C0(%rcx)
+	prefetcht1 -0x200(%rcx)
+	vmovups %zmm0, -0x40(%r9)
+	vmovups %zmm1, -0x80(%r9)
+	vmovups %zmm2, -0xC0(%r9)
+	vmovups	%zmm3, -0x100(%r9)
+	vmovups %zmm4, -0x140(%r9)
+	vmovups %zmm5, -0x180(%r9)
+	vmovups %zmm6, -0x1C0(%r9)
+	vmovups	%zmm7, -0x200(%r9)
+	sub	$512, %r9
+	cmp	%rdi, %r9
+	ja	L(gobble_512bytes_loop_bkw)
+	vmovups %zmm8, -0x40(%rdi)
+	vmovups %zmm9, -0x80(%rdi)
+	vmovups %zmm10, -0xC0(%rdi)
+	vmovups %zmm11, -0x100(%rdi)
+	vmovups %zmm12, -0x140(%rdi)
+	vmovups %zmm13, -0x180(%rdi)
+	vmovups %zmm14, -0x1C0(%rdi)
+	vmovups %zmm15, -0x200(%rdi)
+	ret
+
+L(preloop_large):
+	cmp	%rsi, %rdi
+	ja	L(preloop_large_bkw)
+	vmovups	(%rsi), %zmm4
+	vmovups	0x40(%rsi), %zmm5
+
+	mov	%rdi, %r11
+/* Align destination for access with non-temporal stores in the loop.  */
+	mov	%rdi, %r8
+	and	$-0x80, %rdi
+	add	$0x80, %rdi
+	sub	%rdi, %r8
+	sub	%r8, %rsi
+	add	%r8, %rdx
+L(gobble_256bytes_nt_loop):
+	prefetcht1 0x200(%rsi)
+	prefetcht1 0x240(%rsi)
+	prefetcht1 0x280(%rsi)
+	prefetcht1 0x2C0(%rsi)
+	prefetcht1 0x300(%rsi)
+	prefetcht1 0x340(%rsi)
+	prefetcht1 0x380(%rsi)
+	prefetcht1 0x3C0(%rsi)
+	vmovdqu64 (%rsi), %zmm0
+	vmovdqu64 0x40(%rsi), %zmm1
+	vmovdqu64 0x80(%rsi), %zmm2
+	vmovdqu64 0xC0(%rsi), %zmm3
+	vmovntdq %zmm0, (%rdi)
+	vmovntdq %zmm1, 0x40(%rdi)
+	vmovntdq %zmm2, 0x80(%rdi)
+	vmovntdq %zmm3, 0xC0(%rdi)
+	sub	$256, %rdx
+	add	$256, %rsi
+	add	$256, %rdi
+	cmp	$256, %rdx
+	ja	L(gobble_256bytes_nt_loop)
+	sfence
+	vmovups	%zmm4, (%r11)
+	vmovups	%zmm5, 0x40(%r11)
+	jmp	L(check)
+
+L(preloop_large_bkw):
+	vmovups -0x80(%rcx), %zmm4
+	vmovups -0x40(%rcx), %zmm5
+
+/* Align end of destination for access with non-temporal stores.  */
+	mov	%r9, %r8
+	and	$-0x80, %r9
+	sub	%r9, %r8
+	sub	%r8, %rcx
+	sub	%r8, %rdx
+	add	%r9, %r8
+L(gobble_256bytes_nt_loop_bkw):
+	prefetcht1 -0x400(%rcx)
+	prefetcht1 -0x3C0(%rcx)
+	prefetcht1 -0x380(%rcx)
+	prefetcht1 -0x340(%rcx)
+	prefetcht1 -0x300(%rcx)
+	prefetcht1 -0x2C0(%rcx)
+	prefetcht1 -0x280(%rcx)
+	prefetcht1 -0x240(%rcx)
+	vmovdqu64 -0x100(%rcx), %zmm0
+	vmovdqu64 -0xC0(%rcx), %zmm1
+	vmovdqu64 -0x80(%rcx), %zmm2
+	vmovdqu64 -0x40(%rcx), %zmm3
+	vmovntdq %zmm0,	-0x100(%r9)
+	vmovntdq %zmm1,	-0xC0(%r9)
+	vmovntdq %zmm2,	-0x80(%r9)
+	vmovntdq %zmm3,	-0x40(%r9)
+	sub	$256, %rdx
+	sub	$256, %rcx
+	sub	$256, %r9
+	cmp	$256, %rdx
+	ja	L(gobble_256bytes_nt_loop_bkw)
+	sfence
+	vmovups	%zmm4, -0x80(%r8)
+	vmovups	%zmm5, -0x40(%r8)
+	jmp	L(check)
+END (__memmove_avx512_no_vzeroupper)
+
+strong_alias (__memmove_avx512_no_vzeroupper, __memcpy_avx512_no_vzeroupper)
+strong_alias (__memmove_chk_avx512_no_vzeroupper, __memcpy_chk_avx512_no_vzeroupper)
+#endif
diff --git a/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S
new file mode 100644
index 0000000000..aac1515cf6
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S
@@ -0,0 +1,12 @@
+#if IS_IN (libc)
+# define VEC_SIZE	64
+# define VEC(i)		zmm##i
+# define VMOVNT		vmovntdq
+# define VMOVU		vmovdqu64
+# define VMOVA		vmovdqa64
+
+# define SECTION(p)		p##.avx512
+# define MEMMOVE_SYMBOL(p,s)	p##_avx512_##s
+
+# include "memmove-vec-unaligned-erms.S"
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf.S b/sysdeps/x86_64/multiarch/memmove-sse2-unaligned-erms.S
index 74a149a950..7c6163ddcb 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_floorf.S
+++ b/sysdeps/x86_64/multiarch/memmove-sse2-unaligned-erms.S
@@ -1,6 +1,6 @@
-/* Copyright (C) 2011-2016 Free Software Foundation, Inc.
+/* memmove with SSE2.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
-   Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
 
    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
@@ -16,23 +16,18 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include <machine/asm.h>
-#include <init-arch.h>
-
-
-ENTRY(__floorf)
-	.type	__floorf, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-	leaq	__floorf_sse41(%rip), %rax
-	HAS_CPU_FEATURE (SSE4_1)
-	jnz	2f
-	leaq	__floorf_c(%rip), %rax
-2:	ret
-END(__floorf)
-weak_alias (__floorf, floorf)
-
-
-ENTRY(__floorf_sse41)
-	roundss	$1, %xmm0, %xmm0
-	ret
-END(__floorf_sse41)
+#if IS_IN (libc)
+# define MEMMOVE_SYMBOL(p,s)	p##_sse2_##s
+#else
+weak_alias (__mempcpy, mempcpy)
+#endif
+
+#include <sysdeps/x86_64/memmove.S>
+
+#if defined SHARED && IS_IN (libc)
+# include <shlib-compat.h>
+# if SHLIB_COMPAT (libc, GLIBC_2_2_5, GLIBC_2_14)
+/* Use __memmove_sse2_unaligned to support overlapping addresses.  */
+compat_symbol (libc, __memmove_sse2_unaligned, memcpy, GLIBC_2_2_5);
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
new file mode 100644
index 0000000000..e2ede45e9f
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
@@ -0,0 +1,565 @@
+/* memmove/memcpy/mempcpy with unaligned load/store and rep movsb
+   Copyright (C) 2016-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* memmove/memcpy/mempcpy is implemented as:
+   1. Use overlapping load and store to avoid branch.
+   2. Load all sources into registers and store them together to avoid
+      possible address overlap between source and destination.
+   3. If size is 8 * VEC_SIZE or less, load all sources into registers
+      and store them together.
+   4. If address of destination > address of source, backward copy
+      4 * VEC_SIZE at a time with unaligned load and aligned store.
+      Load the first 4 * VEC and last VEC before the loop and store
+      them after the loop to support overlapping addresses.
+   5. Otherwise, forward copy 4 * VEC_SIZE at a time with unaligned
+      load and aligned store.  Load the last 4 * VEC and first VEC
+      before the loop and store them after the loop to support
+      overlapping addresses.
+   6. If size >= __x86_shared_non_temporal_threshold and there is no
+      overlap between destination and source, use non-temporal store
+      instead of aligned store.  */
+
+#include <sysdep.h>
+
+#ifndef MEMCPY_SYMBOL
+# define MEMCPY_SYMBOL(p,s)		MEMMOVE_SYMBOL(p, s)
+#endif
+
+#ifndef MEMPCPY_SYMBOL
+# define MEMPCPY_SYMBOL(p,s)		MEMMOVE_SYMBOL(p, s)
+#endif
+
+#ifndef MEMMOVE_CHK_SYMBOL
+# define MEMMOVE_CHK_SYMBOL(p,s)	MEMMOVE_SYMBOL(p, s)
+#endif
+
+#ifndef VZEROUPPER
+# if VEC_SIZE > 16
+#  define VZEROUPPER vzeroupper
+# else
+#  define VZEROUPPER
+# endif
+#endif
+
+/* Threshold to use Enhanced REP MOVSB.  Since there is overhead to set
+   up REP MOVSB operation, REP MOVSB isn't faster on short data.  The
+   memcpy micro benchmark in glibc shows that 2KB is the approximate
+   value above which REP MOVSB becomes faster than SSE2 optimization
+   on processors with Enhanced REP MOVSB.  Since larger register size
+   can move more data with a single load and store, the threshold is
+   higher with larger register size.  */
+#ifndef REP_MOVSB_THRESHOLD
+# define REP_MOVSB_THRESHOLD	(2048 * (VEC_SIZE / 16))
+#endif
+
+#ifndef PREFETCH
+# define PREFETCH(addr) prefetcht0 addr
+#endif
+
+/* Assume 64-byte prefetch size.  */
+#ifndef PREFETCH_SIZE
+# define PREFETCH_SIZE 64
+#endif
+
+#define PREFETCHED_LOAD_SIZE (VEC_SIZE * 4)
+
+#if PREFETCH_SIZE == 64
+# if PREFETCHED_LOAD_SIZE == PREFETCH_SIZE
+#  define PREFETCH_ONE_SET(dir, base, offset) \
+	PREFETCH ((offset)base)
+# elif PREFETCHED_LOAD_SIZE == 2 * PREFETCH_SIZE
+#  define PREFETCH_ONE_SET(dir, base, offset) \
+	PREFETCH ((offset)base); \
+	PREFETCH ((offset + dir * PREFETCH_SIZE)base)
+# elif PREFETCHED_LOAD_SIZE == 4 * PREFETCH_SIZE
+#  define PREFETCH_ONE_SET(dir, base, offset) \
+	PREFETCH ((offset)base); \
+	PREFETCH ((offset + dir * PREFETCH_SIZE)base); \
+	PREFETCH ((offset + dir * PREFETCH_SIZE * 2)base); \
+	PREFETCH ((offset + dir * PREFETCH_SIZE * 3)base)
+# else
+#   error Unsupported PREFETCHED_LOAD_SIZE!
+# endif
+#else
+# error Unsupported PREFETCH_SIZE!
+#endif
+
+#ifndef SECTION
+# error SECTION is not defined!
+#endif
+
+	.section SECTION(.text),"ax",@progbits
+#if defined SHARED && IS_IN (libc)
+ENTRY (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned))
+	cmpq	%rdx, %rcx
+	jb	HIDDEN_JUMPTARGET (__chk_fail)
+END (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned))
+#endif
+
+ENTRY (MEMPCPY_SYMBOL (__mempcpy, unaligned))
+	movq	%rdi, %rax
+	addq	%rdx, %rax
+	jmp	L(start)
+END (MEMPCPY_SYMBOL (__mempcpy, unaligned))
+
+#if defined SHARED && IS_IN (libc)
+ENTRY (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned))
+	cmpq	%rdx, %rcx
+	jb	HIDDEN_JUMPTARGET (__chk_fail)
+END (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned))
+#endif
+
+ENTRY (MEMMOVE_SYMBOL (__memmove, unaligned))
+	movq	%rdi, %rax
+L(start):
+	cmpq	$VEC_SIZE, %rdx
+	jb	L(less_vec)
+	cmpq	$(VEC_SIZE * 2), %rdx
+	ja	L(more_2x_vec)
+#if !defined USE_MULTIARCH || !IS_IN (libc)
+L(last_2x_vec):
+#endif
+	/* From VEC and to 2 * VEC.  No branch when size == VEC_SIZE.  */
+	VMOVU	(%rsi), %VEC(0)
+	VMOVU	-VEC_SIZE(%rsi,%rdx), %VEC(1)
+	VMOVU	%VEC(0), (%rdi)
+	VMOVU	%VEC(1), -VEC_SIZE(%rdi,%rdx)
+	VZEROUPPER
+#if !defined USE_MULTIARCH || !IS_IN (libc)
+L(nop):
+#endif
+	ret
+#if defined USE_MULTIARCH && IS_IN (libc)
+END (MEMMOVE_SYMBOL (__memmove, unaligned))
+
+# if VEC_SIZE == 16
+ENTRY (__mempcpy_chk_erms)
+	cmpq	%rdx, %rcx
+	jb	HIDDEN_JUMPTARGET (__chk_fail)
+END (__mempcpy_chk_erms)
+
+/* Only used to measure performance of REP MOVSB.  */
+ENTRY (__mempcpy_erms)
+	movq	%rdi, %rax
+	/* Skip zero length.  */
+	testq	%rdx, %rdx
+	jz	2f
+	addq	%rdx, %rax
+	jmp	L(start_movsb)
+END (__mempcpy_erms)
+
+ENTRY (__memmove_chk_erms)
+	cmpq	%rdx, %rcx
+	jb	HIDDEN_JUMPTARGET (__chk_fail)
+END (__memmove_chk_erms)
+
+ENTRY (__memmove_erms)
+	movq	%rdi, %rax
+	/* Skip zero length.  */
+	testq	%rdx, %rdx
+	jz	2f
+L(start_movsb):
+	movq	%rdx, %rcx
+	cmpq	%rsi, %rdi
+	jb	1f
+	/* Source == destination is less common.  */
+	je	2f
+	leaq	(%rsi,%rcx), %rdx
+	cmpq	%rdx, %rdi
+	jb	L(movsb_backward)
+1:
+	rep movsb
+2:
+	ret
+L(movsb_backward):
+	leaq	-1(%rdi,%rcx), %rdi
+	leaq	-1(%rsi,%rcx), %rsi
+	std
+	rep movsb
+	cld
+	ret
+END (__memmove_erms)
+strong_alias (__memmove_erms, __memcpy_erms)
+strong_alias (__memmove_chk_erms, __memcpy_chk_erms)
+# endif
+
+# ifdef SHARED
+ENTRY (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned_erms))
+	cmpq	%rdx, %rcx
+	jb	HIDDEN_JUMPTARGET (__chk_fail)
+END (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned_erms))
+# endif
+
+ENTRY (MEMMOVE_SYMBOL (__mempcpy, unaligned_erms))
+	movq	%rdi, %rax
+	addq	%rdx, %rax
+	jmp	L(start_erms)
+END (MEMMOVE_SYMBOL (__mempcpy, unaligned_erms))
+
+# ifdef SHARED
+ENTRY (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned_erms))
+	cmpq	%rdx, %rcx
+	jb	HIDDEN_JUMPTARGET (__chk_fail)
+END (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned_erms))
+# endif
+
+ENTRY (MEMMOVE_SYMBOL (__memmove, unaligned_erms))
+	movq	%rdi, %rax
+L(start_erms):
+	cmpq	$VEC_SIZE, %rdx
+	jb	L(less_vec)
+	cmpq	$(VEC_SIZE * 2), %rdx
+	ja	L(movsb_more_2x_vec)
+L(last_2x_vec):
+	/* From VEC and to 2 * VEC.  No branch when size == VEC_SIZE. */
+	VMOVU	(%rsi), %VEC(0)
+	VMOVU	-VEC_SIZE(%rsi,%rdx), %VEC(1)
+	VMOVU	%VEC(0), (%rdi)
+	VMOVU	%VEC(1), -VEC_SIZE(%rdi,%rdx)
+L(return):
+	VZEROUPPER
+	ret
+
+L(movsb):
+	cmpq	__x86_shared_non_temporal_threshold(%rip), %rdx
+	jae	L(more_8x_vec)
+	cmpq	%rsi, %rdi
+	jb	1f
+	/* Source == destination is less common.  */
+	je	L(nop)
+	leaq	(%rsi,%rdx), %r9
+	cmpq	%r9, %rdi
+	/* Avoid slow backward REP MOVSB.  */
+# if REP_MOVSB_THRESHOLD <= (VEC_SIZE * 8)
+#  error Unsupported REP_MOVSB_THRESHOLD and VEC_SIZE!
+# endif
+	jb	L(more_8x_vec_backward)
+1:
+	movq	%rdx, %rcx
+	rep movsb
+L(nop):
+	ret
+#endif
+
+L(less_vec):
+	/* Less than 1 VEC.  */
+#if VEC_SIZE != 16 && VEC_SIZE != 32 && VEC_SIZE != 64
+# error Unsupported VEC_SIZE!
+#endif
+#if VEC_SIZE > 32
+	cmpb	$32, %dl
+	jae	L(between_32_63)
+#endif
+#if VEC_SIZE > 16
+	cmpb	$16, %dl
+	jae	L(between_16_31)
+#endif
+	cmpb	$8, %dl
+	jae	L(between_8_15)
+	cmpb	$4, %dl
+	jae	L(between_4_7)
+	cmpb	$1, %dl
+	ja	L(between_2_3)
+	jb	1f
+	movzbl	(%rsi), %ecx
+	movb	%cl, (%rdi)
+1:
+	ret
+#if VEC_SIZE > 32
+L(between_32_63):
+	/* From 32 to 63.  No branch when size == 32.  */
+	vmovdqu	(%rsi), %ymm0
+	vmovdqu	-32(%rsi,%rdx), %ymm1
+	vmovdqu	%ymm0, (%rdi)
+	vmovdqu	%ymm1, -32(%rdi,%rdx)
+	VZEROUPPER
+	ret
+#endif
+#if VEC_SIZE > 16
+	/* From 16 to 31.  No branch when size == 16.  */
+L(between_16_31):
+	vmovdqu	(%rsi), %xmm0
+	vmovdqu	-16(%rsi,%rdx), %xmm1
+	vmovdqu	%xmm0, (%rdi)
+	vmovdqu	%xmm1, -16(%rdi,%rdx)
+	ret
+#endif
+L(between_8_15):
+	/* From 8 to 15.  No branch when size == 8.  */
+	movq	-8(%rsi,%rdx), %rcx
+	movq	(%rsi), %rsi
+	movq	%rcx, -8(%rdi,%rdx)
+	movq	%rsi, (%rdi)
+	ret
+L(between_4_7):
+	/* From 4 to 7.  No branch when size == 4.  */
+	movl	-4(%rsi,%rdx), %ecx
+	movl	(%rsi), %esi
+	movl	%ecx, -4(%rdi,%rdx)
+	movl	%esi, (%rdi)
+	ret
+L(between_2_3):
+	/* From 2 to 3.  No branch when size == 2.  */
+	movzwl	-2(%rsi,%rdx), %ecx
+	movzwl	(%rsi), %esi
+	movw	%cx, -2(%rdi,%rdx)
+	movw	%si, (%rdi)
+	ret
+
+#if defined USE_MULTIARCH && IS_IN (libc)
+L(movsb_more_2x_vec):
+	cmpq	$REP_MOVSB_THRESHOLD, %rdx
+	ja	L(movsb)
+#endif
+L(more_2x_vec):
+	/* More than 2 * VEC and there may be overlap between destination
+	   and source.  */
+	cmpq	$(VEC_SIZE * 8), %rdx
+	ja	L(more_8x_vec)
+	cmpq	$(VEC_SIZE * 4), %rdx
+	jb	L(last_4x_vec)
+	/* Copy from 4 * VEC to 8 * VEC, inclusively. */
+	VMOVU	(%rsi), %VEC(0)
+	VMOVU	VEC_SIZE(%rsi), %VEC(1)
+	VMOVU	(VEC_SIZE * 2)(%rsi), %VEC(2)
+	VMOVU	(VEC_SIZE * 3)(%rsi), %VEC(3)
+	VMOVU	-VEC_SIZE(%rsi,%rdx), %VEC(4)
+	VMOVU	-(VEC_SIZE * 2)(%rsi,%rdx), %VEC(5)
+	VMOVU	-(VEC_SIZE * 3)(%rsi,%rdx), %VEC(6)
+	VMOVU	-(VEC_SIZE * 4)(%rsi,%rdx), %VEC(7)
+	VMOVU	%VEC(0), (%rdi)
+	VMOVU	%VEC(1), VEC_SIZE(%rdi)
+	VMOVU	%VEC(2), (VEC_SIZE * 2)(%rdi)
+	VMOVU	%VEC(3), (VEC_SIZE * 3)(%rdi)
+	VMOVU	%VEC(4), -VEC_SIZE(%rdi,%rdx)
+	VMOVU	%VEC(5), -(VEC_SIZE * 2)(%rdi,%rdx)
+	VMOVU	%VEC(6), -(VEC_SIZE * 3)(%rdi,%rdx)
+	VMOVU	%VEC(7), -(VEC_SIZE * 4)(%rdi,%rdx)
+	VZEROUPPER
+	ret
+L(last_4x_vec):
+	/* Copy from 2 * VEC to 4 * VEC. */
+	VMOVU	(%rsi), %VEC(0)
+	VMOVU	VEC_SIZE(%rsi), %VEC(1)
+	VMOVU	-VEC_SIZE(%rsi,%rdx), %VEC(2)
+	VMOVU	-(VEC_SIZE * 2)(%rsi,%rdx), %VEC(3)
+	VMOVU	%VEC(0), (%rdi)
+	VMOVU	%VEC(1), VEC_SIZE(%rdi)
+	VMOVU	%VEC(2), -VEC_SIZE(%rdi,%rdx)
+	VMOVU	%VEC(3), -(VEC_SIZE * 2)(%rdi,%rdx)
+	VZEROUPPER
+	ret
+
+L(more_8x_vec):
+	cmpq	%rsi, %rdi
+	ja	L(more_8x_vec_backward)
+	/* Source == destination is less common.  */
+	je	L(nop)
+	/* Load the first VEC and last 4 * VEC to support overlapping
+	   addresses.  */
+	VMOVU	(%rsi), %VEC(4)
+	VMOVU	-VEC_SIZE(%rsi, %rdx), %VEC(5)
+	VMOVU	-(VEC_SIZE * 2)(%rsi, %rdx), %VEC(6)
+	VMOVU	-(VEC_SIZE * 3)(%rsi, %rdx), %VEC(7)
+	VMOVU	-(VEC_SIZE * 4)(%rsi, %rdx), %VEC(8)
+	/* Save start and stop of the destination buffer.  */
+	movq	%rdi, %r11
+	leaq	-VEC_SIZE(%rdi, %rdx), %rcx
+	/* Align destination for aligned stores in the loop.  Compute
+	   how much destination is misaligned.  */
+	movq	%rdi, %r8
+	andq	$(VEC_SIZE - 1), %r8
+	/* Get the negative of offset for alignment.  */
+	subq	$VEC_SIZE, %r8
+	/* Adjust source.  */
+	subq	%r8, %rsi
+	/* Adjust destination which should be aligned now.  */
+	subq	%r8, %rdi
+	/* Adjust length.  */
+	addq	%r8, %rdx
+#if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc)
+	/* Check non-temporal store threshold.  */
+	cmpq	__x86_shared_non_temporal_threshold(%rip), %rdx
+	ja	L(large_forward)
+#endif
+L(loop_4x_vec_forward):
+	/* Copy 4 * VEC a time forward.  */
+	VMOVU	(%rsi), %VEC(0)
+	VMOVU	VEC_SIZE(%rsi), %VEC(1)
+	VMOVU	(VEC_SIZE * 2)(%rsi), %VEC(2)
+	VMOVU	(VEC_SIZE * 3)(%rsi), %VEC(3)
+	addq	$(VEC_SIZE * 4), %rsi
+	subq	$(VEC_SIZE * 4), %rdx
+	VMOVA	%VEC(0), (%rdi)
+	VMOVA	%VEC(1), VEC_SIZE(%rdi)
+	VMOVA	%VEC(2), (VEC_SIZE * 2)(%rdi)
+	VMOVA	%VEC(3), (VEC_SIZE * 3)(%rdi)
+	addq	$(VEC_SIZE * 4), %rdi
+	cmpq	$(VEC_SIZE * 4), %rdx
+	ja	L(loop_4x_vec_forward)
+	/* Store the last 4 * VEC.  */
+	VMOVU	%VEC(5), (%rcx)
+	VMOVU	%VEC(6), -VEC_SIZE(%rcx)
+	VMOVU	%VEC(7), -(VEC_SIZE * 2)(%rcx)
+	VMOVU	%VEC(8), -(VEC_SIZE * 3)(%rcx)
+	/* Store the first VEC.  */
+	VMOVU	%VEC(4), (%r11)
+	VZEROUPPER
+	ret
+
+L(more_8x_vec_backward):
+	/* Load the first 4 * VEC and last VEC to support overlapping
+	   addresses.  */
+	VMOVU	(%rsi), %VEC(4)
+	VMOVU	VEC_SIZE(%rsi), %VEC(5)
+	VMOVU	(VEC_SIZE * 2)(%rsi), %VEC(6)
+	VMOVU	(VEC_SIZE * 3)(%rsi), %VEC(7)
+	VMOVU	-VEC_SIZE(%rsi,%rdx), %VEC(8)
+	/* Save stop of the destination buffer.  */
+	leaq	-VEC_SIZE(%rdi, %rdx), %r11
+	/* Align destination end for aligned stores in the loop.  Compute
+	   how much destination end is misaligned.  */
+	leaq	-VEC_SIZE(%rsi, %rdx), %rcx
+	movq	%r11, %r9
+	movq	%r11, %r8
+	andq	$(VEC_SIZE - 1), %r8
+	/* Adjust source.  */
+	subq	%r8, %rcx
+	/* Adjust the end of destination which should be aligned now.  */
+	subq	%r8, %r9
+	/* Adjust length.  */
+	subq	%r8, %rdx
+#if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc)
+	/* Check non-temporal store threshold.  */
+	cmpq	__x86_shared_non_temporal_threshold(%rip), %rdx
+	ja	L(large_backward)
+#endif
+L(loop_4x_vec_backward):
+	/* Copy 4 * VEC a time backward.  */
+	VMOVU	(%rcx), %VEC(0)
+	VMOVU	-VEC_SIZE(%rcx), %VEC(1)
+	VMOVU	-(VEC_SIZE * 2)(%rcx), %VEC(2)
+	VMOVU	-(VEC_SIZE * 3)(%rcx), %VEC(3)
+	subq	$(VEC_SIZE * 4), %rcx
+	subq	$(VEC_SIZE * 4), %rdx
+	VMOVA	%VEC(0), (%r9)
+	VMOVA	%VEC(1), -VEC_SIZE(%r9)
+	VMOVA	%VEC(2), -(VEC_SIZE * 2)(%r9)
+	VMOVA	%VEC(3), -(VEC_SIZE * 3)(%r9)
+	subq	$(VEC_SIZE * 4), %r9
+	cmpq	$(VEC_SIZE * 4), %rdx
+	ja	L(loop_4x_vec_backward)
+	/* Store the first 4 * VEC.  */
+	VMOVU	%VEC(4), (%rdi)
+	VMOVU	%VEC(5), VEC_SIZE(%rdi)
+	VMOVU	%VEC(6), (VEC_SIZE * 2)(%rdi)
+	VMOVU	%VEC(7), (VEC_SIZE * 3)(%rdi)
+	/* Store the last VEC.  */
+	VMOVU	%VEC(8), (%r11)
+	VZEROUPPER
+	ret
+
+#if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc)
+L(large_forward):
+	/* Don't use non-temporal store if there is overlap between
+	   destination and source since destination may be in cache
+	   when source is loaded.  */
+	leaq    (%rdi, %rdx), %r10
+	cmpq    %r10, %rsi
+	jb	L(loop_4x_vec_forward)
+L(loop_large_forward):
+	/* Copy 4 * VEC a time forward with non-temporal stores.  */
+	PREFETCH_ONE_SET (1, (%rsi), PREFETCHED_LOAD_SIZE * 2)
+	PREFETCH_ONE_SET (1, (%rsi), PREFETCHED_LOAD_SIZE * 3)
+	VMOVU	(%rsi), %VEC(0)
+	VMOVU	VEC_SIZE(%rsi), %VEC(1)
+	VMOVU	(VEC_SIZE * 2)(%rsi), %VEC(2)
+	VMOVU	(VEC_SIZE * 3)(%rsi), %VEC(3)
+	addq	$PREFETCHED_LOAD_SIZE, %rsi
+	subq	$PREFETCHED_LOAD_SIZE, %rdx
+	VMOVNT	%VEC(0), (%rdi)
+	VMOVNT	%VEC(1), VEC_SIZE(%rdi)
+	VMOVNT	%VEC(2), (VEC_SIZE * 2)(%rdi)
+	VMOVNT	%VEC(3), (VEC_SIZE * 3)(%rdi)
+	addq	$PREFETCHED_LOAD_SIZE, %rdi
+	cmpq	$PREFETCHED_LOAD_SIZE, %rdx
+	ja	L(loop_large_forward)
+	sfence
+	/* Store the last 4 * VEC.  */
+	VMOVU	%VEC(5), (%rcx)
+	VMOVU	%VEC(6), -VEC_SIZE(%rcx)
+	VMOVU	%VEC(7), -(VEC_SIZE * 2)(%rcx)
+	VMOVU	%VEC(8), -(VEC_SIZE * 3)(%rcx)
+	/* Store the first VEC.  */
+	VMOVU	%VEC(4), (%r11)
+	VZEROUPPER
+	ret
+
+L(large_backward):
+	/* Don't use non-temporal store if there is overlap between
+	   destination and source since destination may be in cache
+	   when source is loaded.  */
+	leaq    (%rcx, %rdx), %r10
+	cmpq    %r10, %r9
+	jb	L(loop_4x_vec_backward)
+L(loop_large_backward):
+	/* Copy 4 * VEC a time backward with non-temporal stores.  */
+	PREFETCH_ONE_SET (-1, (%rcx), -PREFETCHED_LOAD_SIZE * 2)
+	PREFETCH_ONE_SET (-1, (%rcx), -PREFETCHED_LOAD_SIZE * 3)
+	VMOVU	(%rcx), %VEC(0)
+	VMOVU	-VEC_SIZE(%rcx), %VEC(1)
+	VMOVU	-(VEC_SIZE * 2)(%rcx), %VEC(2)
+	VMOVU	-(VEC_SIZE * 3)(%rcx), %VEC(3)
+	subq	$PREFETCHED_LOAD_SIZE, %rcx
+	subq	$PREFETCHED_LOAD_SIZE, %rdx
+	VMOVNT	%VEC(0), (%r9)
+	VMOVNT	%VEC(1), -VEC_SIZE(%r9)
+	VMOVNT	%VEC(2), -(VEC_SIZE * 2)(%r9)
+	VMOVNT	%VEC(3), -(VEC_SIZE * 3)(%r9)
+	subq	$PREFETCHED_LOAD_SIZE, %r9
+	cmpq	$PREFETCHED_LOAD_SIZE, %rdx
+	ja	L(loop_large_backward)
+	sfence
+	/* Store the first 4 * VEC.  */
+	VMOVU	%VEC(4), (%rdi)
+	VMOVU	%VEC(5), VEC_SIZE(%rdi)
+	VMOVU	%VEC(6), (VEC_SIZE * 2)(%rdi)
+	VMOVU	%VEC(7), (VEC_SIZE * 3)(%rdi)
+	/* Store the last VEC.  */
+	VMOVU	%VEC(8), (%r11)
+	VZEROUPPER
+	ret
+#endif
+END (MEMMOVE_SYMBOL (__memmove, unaligned_erms))
+
+#if IS_IN (libc)
+# ifdef USE_MULTIARCH
+strong_alias (MEMMOVE_SYMBOL (__memmove, unaligned_erms),
+	      MEMMOVE_SYMBOL (__memcpy, unaligned_erms))
+#  ifdef SHARED
+strong_alias (MEMMOVE_SYMBOL (__memmove_chk, unaligned_erms),
+	      MEMMOVE_SYMBOL (__memcpy_chk, unaligned_erms))
+#  endif
+# endif
+# ifdef SHARED
+strong_alias (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned),
+	      MEMMOVE_CHK_SYMBOL (__memcpy_chk, unaligned))
+# endif
+#endif
+strong_alias (MEMMOVE_SYMBOL (__memmove, unaligned),
+	      MEMCPY_SYMBOL (__memcpy, unaligned))
diff --git a/sysdeps/x86_64/multiarch/memmove.c b/sysdeps/x86_64/multiarch/memmove.c
index 8da5640bb0..d512228eae 100644
--- a/sysdeps/x86_64/multiarch/memmove.c
+++ b/sysdeps/x86_64/multiarch/memmove.c
@@ -1,6 +1,6 @@
-/* Multiple versions of memmove.
+/* Multiple versions of memmmove.
    All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2010-2016 Free Software Foundation, Inc.
+   Copyright (C) 2016-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -17,57 +17,21 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
+/* Define multiple versions only for the definition in libc.  */
 #if IS_IN (libc)
-# define MEMMOVE __memmove_sse2
-# ifdef SHARED
-#  undef libc_hidden_builtin_def
-#  define libc_hidden_builtin_def(name) \
-  __hidden_ver1 (__memmove_sse2, __GI_memmove, __memmove_sse2);
-# endif
-
-/* Redefine memmove so that the compiler won't complain about the type
-   mismatch with the IFUNC selector in strong_alias, below.  */
-# undef memmove
 # define memmove __redirect_memmove
 # include <string.h>
 # undef memmove
 
-extern __typeof (__redirect_memmove) __memmove_sse2 attribute_hidden;
-extern __typeof (__redirect_memmove) __memmove_ssse3 attribute_hidden;
-extern __typeof (__redirect_memmove) __memmove_ssse3_back attribute_hidden;
-extern __typeof (__redirect_memmove) __memmove_avx_unaligned attribute_hidden;
-# ifdef HAVE_AVX512_ASM_SUPPORT
-  extern __typeof (__redirect_memmove) __memmove_avx512_no_vzeroupper attribute_hidden;
-# endif
-
-#endif
+# define SYMBOL_NAME memmove
+# include "ifunc-memmove.h"
 
-#include "string/memmove.c"
+libc_ifunc_redirected (__redirect_memmove, __libc_memmove,
+		       IFUNC_SELECTOR ());
 
-#if IS_IN (libc)
-# include <shlib-compat.h>
-# include "init-arch.h"
-
-/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle
-   ifunc symbol properly.  */
-extern __typeof (__redirect_memmove) __libc_memmove;
-libc_ifunc (__libc_memmove,
-#ifdef HAVE_AVX512_ASM_SUPPORT
-	    HAS_ARCH_FEATURE (AVX512F_Usable)
-	      && HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
-	    ? __memmove_avx512_no_vzeroupper
-	    :
-#endif
-	    (HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
-	    ? __memmove_avx_unaligned
-	    : (HAS_CPU_FEATURE (SSSE3)
-	       ? (HAS_ARCH_FEATURE (Fast_Copy_Backward)
-	          ? __memmove_ssse3_back : __memmove_ssse3)
-	       : __memmove_sse2)));
-
-strong_alias (__libc_memmove, memmove)
-
-# if SHLIB_COMPAT (libc, GLIBC_2_2_5, GLIBC_2_14)
-compat_symbol (libc, memmove, memcpy, GLIBC_2_2_5);
+strong_alias (__libc_memmove, memmove);
+# ifdef SHARED
+__hidden_ver1 (__libc_memmove, __GI_memmove, __redirect_memmove)
+  __attribute__ ((visibility ("hidden")));
 # endif
 #endif
diff --git a/sysdeps/x86_64/multiarch/memmove_chk-nonshared.S b/sysdeps/x86_64/multiarch/memmove_chk-nonshared.S
new file mode 100644
index 0000000000..c362a3324d
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memmove_chk-nonshared.S
@@ -0,0 +1,21 @@
+/* Non-shared version of memmove_chk for x86-64.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc) && !defined SHARED
+# include <sysdeps/x86_64/memmove_chk.S>
+#endif
diff --git a/sysdeps/x86_64/multiarch/memmove_chk.c b/sysdeps/x86_64/multiarch/memmove_chk.c
index f64da63180..0e9dc7e07f 100644
--- a/sysdeps/x86_64/multiarch/memmove_chk.c
+++ b/sysdeps/x86_64/multiarch/memmove_chk.c
@@ -1,6 +1,6 @@
-/* Multiple versions of __memmove_chk.
+/* Multiple versions of __memmove_chk
    All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2010-2016 Free Software Foundation, Inc.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -17,30 +17,15 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include <string.h>
-#include "init-arch.h"
+/* Define multiple versions only for the definition in libc.so. */
+#if IS_IN (libc) && defined SHARED
+# define __memmove_chk __redirect_memmove_chk
+# include <string.h>
+# undef __memmove_chk
 
-#define MEMMOVE_CHK __memmove_chk_sse2
+# define SYMBOL_NAME memmove_chk
+# include "ifunc-memmove.h"
 
-extern __typeof (__memmove_chk) __memmove_chk_sse2 attribute_hidden;
-extern __typeof (__memmove_chk) __memmove_chk_ssse3 attribute_hidden;
-extern __typeof (__memmove_chk) __memmove_chk_ssse3_back attribute_hidden;
-extern __typeof (__memmove_chk) __memmove_chk_avx_unaligned attribute_hidden;
-# ifdef HAVE_AVX512_ASM_SUPPORT
-  extern __typeof (__memmove_chk) __memmove_chk_avx512_no_vzeroupper attribute_hidden;
-# endif
-
-#include "debug/memmove_chk.c"
-
-libc_ifunc (__memmove_chk,
-#ifdef HAVE_AVX512_ASM_SUPPORT
-	    HAS_ARCH_FEATURE (AVX512F_Usable)
-	      && HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
-	    ? __memmove_chk_avx512_no_vzeroupper
-	    :
+libc_ifunc_redirected (__redirect_memmove_chk, __memmove_chk,
+		       IFUNC_SELECTOR ());
 #endif
-	    HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load) ? __memmove_chk_avx_unaligned :
-	    (HAS_CPU_FEATURE (SSSE3)
-	    ? (HAS_ARCH_FEATURE (Fast_Copy_Backward)
-	       ? __memmove_chk_ssse3_back : __memmove_chk_ssse3)
-	    : __memmove_chk_sse2));
diff --git a/sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S b/sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S
deleted file mode 100644
index 82ffacb8fb..0000000000
--- a/sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S
+++ /dev/null
@@ -1,4 +0,0 @@
-#define USE_AS_MEMPCPY
-#define MEMCPY		__mempcpy_ssse3_back
-#define MEMCPY_CHK	__mempcpy_chk_ssse3_back
-#include "memcpy-ssse3-back.S"
diff --git a/sysdeps/x86_64/multiarch/mempcpy-ssse3.S b/sysdeps/x86_64/multiarch/mempcpy-ssse3.S
deleted file mode 100644
index 822d98e954..0000000000
--- a/sysdeps/x86_64/multiarch/mempcpy-ssse3.S
+++ /dev/null
@@ -1,4 +0,0 @@
-#define USE_AS_MEMPCPY
-#define MEMCPY		__mempcpy_ssse3
-#define MEMCPY_CHK	__mempcpy_chk_ssse3
-#include "memcpy-ssse3.S"
diff --git a/sysdeps/x86_64/multiarch/mempcpy.S b/sysdeps/x86_64/multiarch/mempcpy.S
deleted file mode 100644
index ed78623565..0000000000
--- a/sysdeps/x86_64/multiarch/mempcpy.S
+++ /dev/null
@@ -1,86 +0,0 @@
-/* Multiple versions of mempcpy
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2010-2016 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in lib and for
-   DSO.  In static binaries we need mempcpy before the initialization
-   happened.  */
-#if defined SHARED && IS_IN (libc)
-ENTRY(__mempcpy)
-	.type	__mempcpy, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-#ifdef HAVE_AVX512_ASM_SUPPORT
-	HAS_ARCH_FEATURE (AVX512F_Usable)
-	jz	1f
-	HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
-	jz	1f
-	leaq    __mempcpy_avx512_no_vzeroupper(%rip), %rax
-	ret
-#endif
-1:	leaq	__mempcpy_sse2(%rip), %rax
-	HAS_CPU_FEATURE (SSSE3)
-	jz	2f
-	leaq	__mempcpy_ssse3(%rip), %rax
-	HAS_ARCH_FEATURE (Fast_Copy_Backward)
-	jz	2f
-	leaq	__mempcpy_ssse3_back(%rip), %rax
-	HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
-	jz	2f
-	leaq	__mempcpy_avx_unaligned(%rip), %rax
-2:	ret
-END(__mempcpy)
-
-# undef ENTRY
-# define ENTRY(name) \
-	.type __mempcpy_sse2, @function; \
-	.p2align 4; \
-	.globl __mempcpy_sse2; \
-	.hidden __mempcpy_sse2; \
-	__mempcpy_sse2: cfi_startproc; \
-	CALL_MCOUNT
-# undef END
-# define END(name) \
-	cfi_endproc; .size __mempcpy_sse2, .-__mempcpy_sse2
-
-# undef ENTRY_CHK
-# define ENTRY_CHK(name) \
-	.type __mempcpy_chk_sse2, @function; \
-	.globl __mempcpy_chk_sse2; \
-	.p2align 4; \
-	__mempcpy_chk_sse2: cfi_startproc; \
-	CALL_MCOUNT
-# undef END_CHK
-# define END_CHK(name) \
-	cfi_endproc; .size __mempcpy_chk_sse2, .-__mempcpy_chk_sse2
-
-# undef libc_hidden_def
-# undef libc_hidden_builtin_def
-/* It doesn't make sense to send libc-internal mempcpy calls through a PLT.
-   The speedup we get from using SSSE3 instruction is likely eaten away
-   by the indirect call in the PLT.  */
-# define libc_hidden_def(name) \
-	.globl __GI_mempcpy; __GI_mempcpy = __mempcpy_sse2
-# define libc_hidden_builtin_def(name) \
-	.globl __GI___mempcpy; __GI___mempcpy = __mempcpy_sse2
-#endif
-
-#include "../mempcpy.S"
diff --git a/sysdeps/x86_64/multiarch/mempcpy.c b/sysdeps/x86_64/multiarch/mempcpy.c
new file mode 100644
index 0000000000..9fe41dda82
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/mempcpy.c
@@ -0,0 +1,42 @@
+/* Multiple versions of mempcpy.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
+# define mempcpy __redirect_mempcpy
+# define __mempcpy __redirect___mempcpy
+# define NO_MEMPCPY_STPCPY_REDIRECT
+# define __NO_STRING_INLINES
+# include <string.h>
+# undef mempcpy
+# undef __mempcpy
+
+# define SYMBOL_NAME mempcpy
+# include "ifunc-memmove.h"
+
+libc_ifunc_redirected (__redirect_mempcpy, __mempcpy, IFUNC_SELECTOR ());
+
+weak_alias (__mempcpy, mempcpy)
+# ifdef SHARED
+__hidden_ver1 (__mempcpy, __GI___mempcpy, __redirect___mempcpy)
+  __attribute__ ((visibility ("hidden")));
+__hidden_ver1 (mempcpy, __GI_mempcpy, __redirect_mempcpy)
+  __attribute__ ((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/mempcpy_chk-nonshared.S b/sysdeps/x86_64/multiarch/mempcpy_chk-nonshared.S
new file mode 100644
index 0000000000..7133246a1d
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/mempcpy_chk-nonshared.S
@@ -0,0 +1,21 @@
+/* Non-shared version of mempcpy_chk for x86-64.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc) && !defined SHARED
+# include <sysdeps/x86_64/mempcpy_chk.S>
+#endif
diff --git a/sysdeps/x86_64/multiarch/mempcpy_chk.S b/sysdeps/x86_64/multiarch/mempcpy_chk.S
deleted file mode 100644
index 6e8a89d38c..0000000000
--- a/sysdeps/x86_64/multiarch/mempcpy_chk.S
+++ /dev/null
@@ -1,56 +0,0 @@
-/* Multiple versions of __mempcpy_chk
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2010-2016 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in lib and for
-   DSO.  There are no multiarch mempcpy functions for static binaries.
- */
-#if IS_IN (libc)
-# ifdef SHARED
-	.text
-ENTRY(__mempcpy_chk)
-	.type	__mempcpy_chk, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-#ifdef HAVE_AVX512_ASM_SUPPORT
-	HAS_ARCH_FEATURE (AVX512F_Usable)
-	jz	1f
-	HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
-	jz	1f
-	leaq    __mempcpy_chk_avx512_no_vzeroupper(%rip), %rax
-	ret
-#endif
-1:	leaq	__mempcpy_chk_sse2(%rip), %rax
-	HAS_CPU_FEATURE (SSSE3)
-	jz	2f
-	leaq	__mempcpy_chk_ssse3(%rip), %rax
-	HAS_ARCH_FEATURE (Fast_Copy_Backward)
-	jz	2f
-	leaq	__mempcpy_chk_ssse3_back(%rip), %rax
-	HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
-	jz	2f
-	leaq	__mempcpy_chk_avx_unaligned(%rip), %rax
-2:	ret
-END(__mempcpy_chk)
-# else
-#  include "../mempcpy_chk.S"
-# endif
-#endif
diff --git a/sysdeps/x86_64/multiarch/mempcpy_chk.c b/sysdeps/x86_64/multiarch/mempcpy_chk.c
new file mode 100644
index 0000000000..956918b3a1
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/mempcpy_chk.c
@@ -0,0 +1,31 @@
+/* Multiple versions of __mempcpy_chk
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.so. */
+#if IS_IN (libc) && defined SHARED
+# define __mempcpy_chk __redirect_mempcpy_chk
+# include <string.h>
+# undef __mempcpy_chk
+
+# define SYMBOL_NAME mempcpy_chk
+# include "ifunc-memmove.h"
+
+libc_ifunc_redirected (__redirect_mempcpy_chk, __mempcpy_chk,
+		       IFUNC_SELECTOR ());
+#endif
diff --git a/sysdeps/x86_64/multiarch/memrchr-avx2.S b/sysdeps/x86_64/multiarch/memrchr-avx2.S
new file mode 100644
index 0000000000..b41a58bcba
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memrchr-avx2.S
@@ -0,0 +1,359 @@
+/* memrchr optimized with AVX2.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+
+# ifndef VZEROUPPER
+#  define VZEROUPPER	vzeroupper
+# endif
+
+# define VEC_SIZE 32
+
+	.section .text.avx,"ax",@progbits
+ENTRY (__memrchr_avx2)
+	/* Broadcast CHAR to YMM0.  */
+	vmovd	%esi, %xmm0
+	vpbroadcastb %xmm0, %ymm0
+
+	subq	$VEC_SIZE, %rdx
+	jbe	L(last_vec_or_less)
+
+	addq	%rdx, %rdi
+
+	/* Check the last VEC_SIZE bytes.  */
+	vpcmpeqb (%rdi), %ymm0, %ymm1
+	vpmovmskb %ymm1, %eax
+	testl	%eax, %eax
+	jnz	L(last_vec_x0)
+
+	subq	$(VEC_SIZE * 4), %rdi
+	movl	%edi, %ecx
+	andl	$(VEC_SIZE - 1), %ecx
+	jz	L(aligned_more)
+
+	/* Align data for aligned loads in the loop.  */
+	addq	$VEC_SIZE, %rdi
+	addq	$VEC_SIZE, %rdx
+	andq	$-VEC_SIZE, %rdi
+	subq	%rcx, %rdx
+
+	.p2align 4
+L(aligned_more):
+	subq	$(VEC_SIZE * 4), %rdx
+	jbe	L(last_4x_vec_or_less)
+
+	/* Check the last 4 * VEC_SIZE.  Only one VEC_SIZE at a time
+	   since data is only aligned to VEC_SIZE.  */
+	vpcmpeqb (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1
+	vpmovmskb %ymm1, %eax
+	testl	%eax, %eax
+	jnz	L(last_vec_x3)
+
+	vpcmpeqb (VEC_SIZE * 2)(%rdi), %ymm0, %ymm2
+	vpmovmskb %ymm2, %eax
+	testl	%eax, %eax
+	jnz	L(last_vec_x2)
+
+	vpcmpeqb VEC_SIZE(%rdi), %ymm0, %ymm3
+	vpmovmskb %ymm3, %eax
+	testl	%eax, %eax
+	jnz	L(last_vec_x1)
+
+	vpcmpeqb (%rdi), %ymm0, %ymm4
+	vpmovmskb %ymm4, %eax
+	testl	%eax, %eax
+	jnz	L(last_vec_x0)
+
+	/* Align data to 4 * VEC_SIZE for loop with fewer branches.
+	   There are some overlaps with above if data isn't aligned
+	   to 4 * VEC_SIZE.  */
+	movl	%edi, %ecx
+	andl	$(VEC_SIZE * 4 - 1), %ecx
+	jz	L(loop_4x_vec)
+
+	addq	$(VEC_SIZE * 4), %rdi
+	addq	$(VEC_SIZE * 4), %rdx
+	andq	$-(VEC_SIZE * 4), %rdi
+	subq	%rcx, %rdx
+
+	.p2align 4
+L(loop_4x_vec):
+	/* Compare 4 * VEC at a time forward.  */
+	subq	$(VEC_SIZE * 4), %rdi
+	subq	$(VEC_SIZE * 4), %rdx
+	jbe	L(last_4x_vec_or_less)
+
+	vmovdqa	(%rdi), %ymm1
+	vmovdqa	VEC_SIZE(%rdi), %ymm2
+	vmovdqa	(VEC_SIZE * 2)(%rdi), %ymm3
+	vmovdqa	(VEC_SIZE * 3)(%rdi), %ymm4
+
+	vpcmpeqb %ymm1, %ymm0, %ymm1
+	vpcmpeqb %ymm2, %ymm0, %ymm2
+	vpcmpeqb %ymm3, %ymm0, %ymm3
+	vpcmpeqb %ymm4, %ymm0, %ymm4
+
+	vpor	%ymm1, %ymm2, %ymm5
+	vpor	%ymm3, %ymm4, %ymm6
+	vpor	%ymm5, %ymm6, %ymm5
+
+	vpmovmskb %ymm5, %eax
+	testl	%eax, %eax
+	jz	L(loop_4x_vec)
+
+	/* There is a match.  */
+	vpmovmskb %ymm4, %eax
+	testl	%eax, %eax
+	jnz	L(last_vec_x3)
+
+	vpmovmskb %ymm3, %eax
+	testl	%eax, %eax
+	jnz	L(last_vec_x2)
+
+	vpmovmskb %ymm2, %eax
+	testl	%eax, %eax
+	jnz	L(last_vec_x1)
+
+	vpmovmskb %ymm1, %eax
+	bsrl	%eax, %eax
+	addq	%rdi, %rax
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(last_4x_vec_or_less):
+	addl	$(VEC_SIZE * 4), %edx
+	cmpl	$(VEC_SIZE * 2), %edx
+	jbe	L(last_2x_vec)
+
+	vpcmpeqb (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1
+	vpmovmskb %ymm1, %eax
+	testl	%eax, %eax
+	jnz	L(last_vec_x3)
+
+	vpcmpeqb (VEC_SIZE * 2)(%rdi), %ymm0, %ymm2
+	vpmovmskb %ymm2, %eax
+	testl	%eax, %eax
+	jnz	L(last_vec_x2)
+
+	vpcmpeqb VEC_SIZE(%rdi), %ymm0, %ymm3
+	vpmovmskb %ymm3, %eax
+	testl	%eax, %eax
+	jnz	L(last_vec_x1_check)
+	cmpl	$(VEC_SIZE * 3), %edx
+	jbe	L(zero)
+
+	vpcmpeqb (%rdi), %ymm0, %ymm4
+	vpmovmskb %ymm4, %eax
+	testl	%eax, %eax
+	jz	L(zero)
+	bsrl	%eax, %eax
+	subq	$(VEC_SIZE * 4), %rdx
+	addq	%rax, %rdx
+	jl	L(zero)
+	addq	%rdi, %rax
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(last_2x_vec):
+	vpcmpeqb (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1
+	vpmovmskb %ymm1, %eax
+	testl	%eax, %eax
+	jnz	L(last_vec_x3_check)
+	cmpl	$VEC_SIZE, %edx
+	jbe	L(zero)
+
+	vpcmpeqb (VEC_SIZE * 2)(%rdi), %ymm0, %ymm1
+	vpmovmskb %ymm1, %eax
+	testl	%eax, %eax
+	jz	L(zero)
+	bsrl	%eax, %eax
+	subq	$(VEC_SIZE * 2), %rdx
+	addq	%rax, %rdx
+	jl	L(zero)
+	addl	$(VEC_SIZE * 2), %eax
+	addq	%rdi, %rax
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(last_vec_x0):
+	bsrl	%eax, %eax
+	addq	%rdi, %rax
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(last_vec_x1):
+	bsrl	%eax, %eax
+	addl	$VEC_SIZE, %eax
+	addq	%rdi, %rax
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(last_vec_x2):
+	bsrl	%eax, %eax
+	addl	$(VEC_SIZE * 2), %eax
+	addq	%rdi, %rax
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(last_vec_x3):
+	bsrl	%eax, %eax
+	addl	$(VEC_SIZE * 3), %eax
+	addq	%rdi, %rax
+	ret
+
+	.p2align 4
+L(last_vec_x1_check):
+	bsrl	%eax, %eax
+	subq	$(VEC_SIZE * 3), %rdx
+	addq	%rax, %rdx
+	jl	L(zero)
+	addl	$VEC_SIZE, %eax
+	addq	%rdi, %rax
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(last_vec_x3_check):
+	bsrl	%eax, %eax
+	subq	$VEC_SIZE, %rdx
+	addq	%rax, %rdx
+	jl	L(zero)
+	addl	$(VEC_SIZE * 3), %eax
+	addq	%rdi, %rax
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(zero):
+	VZEROUPPER
+L(null):
+	xorl	%eax, %eax
+	ret
+
+	.p2align 4
+L(last_vec_or_less_aligned):
+	movl	%edx, %ecx
+
+	vpcmpeqb (%rdi), %ymm0, %ymm1
+
+	movl	$1, %edx
+	/* Support rdx << 32.  */
+	salq	%cl, %rdx
+	subq	$1, %rdx
+
+	vpmovmskb %ymm1, %eax
+
+	/* Remove the trailing bytes.  */
+	andl	%edx, %eax
+	testl	%eax, %eax
+	jz	L(zero)
+
+	bsrl	%eax, %eax
+	addq	%rdi, %rax
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(last_vec_or_less):
+	addl	$VEC_SIZE, %edx
+
+	/* Check for zero length.  */
+	testl	%edx, %edx
+	jz	L(null)
+
+	movl	%edi, %ecx
+	andl	$(VEC_SIZE - 1), %ecx
+	jz	L(last_vec_or_less_aligned)
+
+	movl	%ecx, %esi
+	movl	%ecx, %r8d
+	addl	%edx, %esi
+	andq	$-VEC_SIZE, %rdi
+
+	subl	$VEC_SIZE, %esi
+	ja	L(last_vec_2x_aligned)
+
+	/* Check the last VEC.  */
+	vpcmpeqb (%rdi), %ymm0, %ymm1
+	vpmovmskb %ymm1, %eax
+
+	/* Remove the leading and trailing bytes.  */
+	sarl	%cl, %eax
+	movl	%edx, %ecx
+
+	movl	$1, %edx
+	sall	%cl, %edx
+	subl	$1, %edx
+
+	andl	%edx, %eax
+	testl	%eax, %eax
+	jz	L(zero)
+
+	bsrl	%eax, %eax
+	addq	%rdi, %rax
+	addq	%r8, %rax
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(last_vec_2x_aligned):
+	movl	%esi, %ecx
+
+	/* Check the last VEC.  */
+	vpcmpeqb VEC_SIZE(%rdi), %ymm0, %ymm1
+
+	movl	$1, %edx
+	sall	%cl, %edx
+	subl	$1, %edx
+
+	vpmovmskb %ymm1, %eax
+
+	/* Remove the trailing bytes.  */
+	andl	%edx, %eax
+
+	testl	%eax, %eax
+	jnz	L(last_vec_x1)
+
+	/* Check the second last VEC.  */
+	vpcmpeqb (%rdi), %ymm0, %ymm1
+
+	movl	%r8d, %ecx
+
+	vpmovmskb %ymm1, %eax
+
+	/* Remove the leading bytes.  Must use unsigned right shift for
+	   bsrl below.  */
+	shrl	%cl, %eax
+	testl	%eax, %eax
+	jz	L(zero)
+
+	bsrl	%eax, %eax
+	addq	%rdi, %rax
+	addq	%r8, %rax
+	VZEROUPPER
+	ret
+END (__memrchr_avx2)
+#endif
diff --git a/sysdeps/x86_64/memmove.c b/sysdeps/x86_64/multiarch/memrchr-sse2.S
index 07f81852d6..12281663ec 100644
--- a/sysdeps/x86_64/memmove.c
+++ b/sysdeps/x86_64/multiarch/memrchr-sse2.S
@@ -1,4 +1,5 @@
-/* Copyright (C) 2011-2016 Free Software Foundation, Inc.
+/* memrchr optimized with SSE2.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -15,12 +16,11 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include "string/memmove.c"
+#if IS_IN (libc)
+# define __memrchr __memrchr_sse2
 
-#if !defined memmove && IS_IN (libc)
-#include <shlib-compat.h>
-
-#if SHLIB_COMPAT (libc, GLIBC_2_2_5, GLIBC_2_14)
-compat_symbol (libc, memmove, memcpy, GLIBC_2_2_5);
-#endif
+# undef weak_alias
+# define weak_alias(__memrchr, memrchr)
 #endif
+
+#include "../memrchr.S"
diff --git a/sysdeps/x86_64/multiarch/memrchr.c b/sysdeps/x86_64/multiarch/memrchr.c
new file mode 100644
index 0000000000..d227fe7819
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memrchr.c
@@ -0,0 +1,31 @@
+/* Multiple versions of memrchr
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
+# define memrchr __redirect_memrchr
+# include <string.h>
+# undef memrchr
+
+# define SYMBOL_NAME memrchr
+# include "ifunc-avx2.h"
+
+libc_ifunc_redirected (__redirect_memrchr, __memrchr, IFUNC_SELECTOR ());
+weak_alias (__memrchr, memrchr)
+#endif
diff --git a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S
new file mode 100644
index 0000000000..7ab3d89849
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S
@@ -0,0 +1,22 @@
+#if IS_IN (libc)
+# define VEC_SIZE	32
+# define VEC(i)		ymm##i
+# define VMOVU		vmovdqu
+# define VMOVA		vmovdqa
+
+# define MEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
+  vmovd d, %xmm0; \
+  movq r, %rax; \
+  vpbroadcastb %xmm0, %ymm0
+
+# define WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
+  vmovd d, %xmm0; \
+  movq r, %rax; \
+  vpbroadcastd %xmm0, %ymm0
+
+# define SECTION(p)		p##.avx
+# define MEMSET_SYMBOL(p,s)	p##_avx2_##s
+# define WMEMSET_SYMBOL(p,s)	p##_avx2_##s
+
+# include "memset-vec-unaligned-erms.S"
+#endif
diff --git a/sysdeps/x86_64/multiarch/memset-avx2.S b/sysdeps/x86_64/multiarch/memset-avx2.S
deleted file mode 100644
index df634728d4..0000000000
--- a/sysdeps/x86_64/multiarch/memset-avx2.S
+++ /dev/null
@@ -1,168 +0,0 @@
-/* memset with AVX2
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-
-#if IS_IN (libc)
-
-#include "asm-syntax.h"
-#ifndef MEMSET
-# define MEMSET	__memset_avx2
-# define MEMSET_CHK	__memset_chk_avx2
-#endif
-
-	.section .text.avx2,"ax",@progbits
-#if defined PIC
-ENTRY (MEMSET_CHK)
-	cmpq	%rdx, %rcx
-	jb	HIDDEN_JUMPTARGET (__chk_fail)
-END (MEMSET_CHK)
-#endif
-
-ENTRY (MEMSET)
-	vpxor	%xmm0, %xmm0, %xmm0
-	vmovd	%esi, %xmm1
-	lea	(%rdi, %rdx), %rsi
-	mov	%rdi, %rax
-	vpshufb	%xmm0, %xmm1, %xmm0
-	cmp	$16, %rdx
-	jb	L(less_16bytes)
-	cmp	$256, %rdx
-	jae	L(256bytesormore)
-	cmp	$128, %dl
-	jb	L(less_128bytes)
-	vmovdqu	%xmm0, (%rdi)
-	vmovdqu %xmm0, 0x10(%rdi)
-	vmovdqu %xmm0, 0x20(%rdi)
-	vmovdqu %xmm0, 0x30(%rdi)
-	vmovdqu %xmm0, 0x40(%rdi)
-	vmovdqu %xmm0, 0x50(%rdi)
-	vmovdqu %xmm0, 0x60(%rdi)
-	vmovdqu %xmm0, 0x70(%rdi)
-	vmovdqu %xmm0, -0x80(%rsi)
-	vmovdqu %xmm0, -0x70(%rsi)
-	vmovdqu %xmm0, -0x60(%rsi)
-	vmovdqu %xmm0, -0x50(%rsi)
-	vmovdqu %xmm0, -0x40(%rsi)
-	vmovdqu %xmm0, -0x30(%rsi)
-	vmovdqu %xmm0, -0x20(%rsi)
-	vmovdqu %xmm0, -0x10(%rsi)
-	ret
-
-	.p2align 4
-L(less_128bytes):
-	cmp	$64, %dl
-	jb	L(less_64bytes)
-	vmovdqu %xmm0, (%rdi)
-	vmovdqu %xmm0, 0x10(%rdi)
-	vmovdqu %xmm0, 0x20(%rdi)
-	vmovdqu %xmm0, 0x30(%rdi)
-	vmovdqu %xmm0, -0x40(%rsi)
-	vmovdqu %xmm0, -0x30(%rsi)
-	vmovdqu %xmm0, -0x20(%rsi)
-	vmovdqu %xmm0, -0x10(%rsi)
-	ret
-
-	.p2align 4
-L(less_64bytes):
-	cmp	$32, %dl
-	jb	L(less_32bytes)
-	vmovdqu %xmm0, (%rdi)
-	vmovdqu %xmm0, 0x10(%rdi)
-	vmovdqu %xmm0, -0x20(%rsi)
-	vmovdqu %xmm0, -0x10(%rsi)
-	ret
-
-	.p2align 4
-L(less_32bytes):
-	vmovdqu %xmm0, (%rdi)
-	vmovdqu %xmm0, -0x10(%rsi)
-	ret
-
-	.p2align 4
-L(less_16bytes):
-	cmp	$8, %dl
-	jb	L(less_8bytes)
-	vmovq %xmm0, (%rdi)
-	vmovq %xmm0, -0x08(%rsi)
-	ret
-
-	.p2align 4
-L(less_8bytes):
-	vmovd	%xmm0, %ecx
-	cmp	$4, %dl
-	jb	L(less_4bytes)
-	mov	%ecx, (%rdi)
-	mov	%ecx, -0x04(%rsi)
-	ret
-
-	.p2align 4
-L(less_4bytes):
-	cmp	$2, %dl
-	jb	L(less_2bytes)
-	mov	%cx, (%rdi)
-	mov	%cx, -0x02(%rsi)
-	ret
-
-	.p2align 4
-L(less_2bytes):
-	cmp	$1, %dl
-	jb	L(less_1bytes)
-	mov	%cl, (%rdi)
-L(less_1bytes):
-	ret
-
-	.p2align 4
-L(256bytesormore):
-	vinserti128 $1, %xmm0, %ymm0, %ymm0
-	and	$-0x20, %rdi
-	add	$0x20, %rdi
-	vmovdqu	%ymm0, (%rax)
-	sub	%rdi, %rax
-	lea	-0x80(%rax, %rdx), %rcx
-	cmp	$4096, %rcx
-	ja	L(gobble_data)
-L(gobble_128_loop):
-	vmovdqa	%ymm0, (%rdi)
-	vmovdqa	%ymm0, 0x20(%rdi)
-	vmovdqa	%ymm0, 0x40(%rdi)
-	vmovdqa	%ymm0, 0x60(%rdi)
-	sub	$-0x80, %rdi
-	add	$-0x80, %ecx
-	jb	L(gobble_128_loop)
-	mov	%rsi, %rax
-	vmovdqu	%ymm0, -0x80(%rsi)
-	vmovdqu	%ymm0, -0x60(%rsi)
-	vmovdqu	%ymm0, -0x40(%rsi)
-	vmovdqu	%ymm0, -0x20(%rsi)
-	sub	%rdx, %rax
-	vzeroupper
-	ret
-
-	.p2align 4
-L(gobble_data):
-	sub	$-0x80, %rcx
-	vmovd	%xmm0, %eax
-	rep	stosb
-	mov	%rsi, %rax
-	sub	%rdx, %rax
-	vzeroupper
-	ret
-
-END (MEMSET)
-#endif
diff --git a/sysdeps/x86_64/multiarch/memset-avx512-no-vzeroupper.S b/sysdeps/x86_64/multiarch/memset-avx512-no-vzeroupper.S
index 1e638d7ac2..689cc1199c 100644
--- a/sysdeps/x86_64/multiarch/memset-avx512-no-vzeroupper.S
+++ b/sysdeps/x86_64/multiarch/memset-avx512-no-vzeroupper.S
@@ -1,5 +1,5 @@
 /* memset optimized with AVX512 for KNL hardware.
-   Copyright (C) 2015-2016 Free Software Foundation, Inc.
+   Copyright (C) 2015-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -18,7 +18,7 @@
 
 #include <sysdep.h>
 
-#if defined HAVE_AVX512_ASM_SUPPORT && IS_IN (libc)
+#if IS_IN (libc)
 
 #include "asm-syntax.h"
 #ifndef MEMSET
@@ -26,7 +26,7 @@
 # define MEMSET_CHK __memset_chk_avx512_no_vzeroupper
 #endif
 
-	.section .text,"ax",@progbits
+	.section .text.avx512,"ax",@progbits
 #if defined PIC
 ENTRY (MEMSET_CHK)
 	cmpq	%rdx, %rcx
diff --git a/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S
new file mode 100644
index 0000000000..0783979ca5
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S
@@ -0,0 +1,24 @@
+#if IS_IN (libc)
+# define VEC_SIZE	64
+# define VEC(i)		zmm##i
+# define VMOVU		vmovdqu64
+# define VMOVA		vmovdqa64
+
+# define MEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
+  vmovd d, %xmm0; \
+  movq r, %rax; \
+  vpbroadcastb %xmm0, %xmm0; \
+  vpbroadcastq %xmm0, %zmm0
+
+# define WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
+  vmovd d, %xmm0; \
+  movq r, %rax; \
+  vpbroadcastd %xmm0, %xmm0; \
+  vpbroadcastq %xmm0, %zmm0
+
+# define SECTION(p)		p##.avx512
+# define MEMSET_SYMBOL(p,s)	p##_avx512_##s
+# define WMEMSET_SYMBOL(p,s)	p##_avx512_##s
+
+# include "memset-vec-unaligned-erms.S"
+#endif
diff --git a/sysdeps/x86_64/multiarch/wmemcmp.S b/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S
index b510f756e2..be6671759b 100644
--- a/sysdeps/x86_64/multiarch/wmemcmp.S
+++ b/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S
@@ -1,7 +1,6 @@
-/* Multiple versions of wmemcmp
+/* memset with SSE2.
    All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2011-2016 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
+   Copyright (C) 2014-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -19,26 +18,24 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
+#include <shlib-compat.h>
 #include <init-arch.h>
 
-/* Define multiple versions only for the definition in libc. */
 #if IS_IN (libc)
-	.text
-ENTRY(wmemcmp)
-	.type	wmemcmp, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-	HAS_CPU_FEATURE (SSSE3)
-	jnz	2f
-	leaq	__wmemcmp_sse2(%rip), %rax
-	ret
-
-2:	HAS_CPU_FEATURE (SSE4_1)
-	jz	3f
-	leaq	__wmemcmp_sse4_1(%rip), %rax
-	ret
-
-3:	leaq	__wmemcmp_ssse3(%rip), %rax
-	ret
-
-END(wmemcmp)
+# define MEMSET_SYMBOL(p,s)	p##_sse2_##s
+# define WMEMSET_SYMBOL(p,s)	p##_sse2_##s
+
+# ifdef SHARED
+#  undef libc_hidden_builtin_def
+#  define libc_hidden_builtin_def(name)
+# endif
+
+# undef weak_alias
+# define weak_alias(original, alias) \
+	.weak bzero; bzero = __bzero
+
+# undef strong_alias
+# define strong_alias(ignored1, ignored2)
 #endif
+
+#include <sysdeps/x86_64/memset.S>
diff --git a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
new file mode 100644
index 0000000000..dc9cb88b37
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
@@ -0,0 +1,274 @@
+/* memset/bzero with unaligned store and rep stosb
+   Copyright (C) 2016-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* memset is implemented as:
+   1. Use overlapping store to avoid branch.
+   2. If size is less than VEC, use integer register stores.
+   3. If size is from VEC_SIZE to 2 * VEC_SIZE, use 2 VEC stores.
+   4. If size is from 2 * VEC_SIZE to 4 * VEC_SIZE, use 4 VEC stores.
+   5. If size is more to 4 * VEC_SIZE, align to 4 * VEC_SIZE with
+      4 VEC stores and store 4 * VEC at a time until done.  */
+
+#include <sysdep.h>
+
+#ifndef MEMSET_CHK_SYMBOL
+# define MEMSET_CHK_SYMBOL(p,s)		MEMSET_SYMBOL(p, s)
+#endif
+
+#ifndef WMEMSET_CHK_SYMBOL
+# define WMEMSET_CHK_SYMBOL(p,s)	WMEMSET_SYMBOL(p, s)
+#endif
+
+#ifndef VZEROUPPER
+# if VEC_SIZE > 16
+#  define VZEROUPPER			vzeroupper
+# else
+#  define VZEROUPPER
+# endif
+#endif
+
+#ifndef VZEROUPPER_SHORT_RETURN
+# if VEC_SIZE > 16
+#  define VZEROUPPER_SHORT_RETURN	vzeroupper
+# else
+#  define VZEROUPPER_SHORT_RETURN	rep
+# endif
+#endif
+
+#ifndef MOVQ
+# if VEC_SIZE > 16
+#  define MOVQ				vmovq
+# else
+#  define MOVQ				movq
+# endif
+#endif
+
+/* Threshold to use Enhanced REP STOSB.  Since there is overhead to set
+   up REP STOSB operation, REP STOSB isn't faster on short data.  The
+   memset micro benchmark in glibc shows that 2KB is the approximate
+   value above which REP STOSB becomes faster on processors with
+   Enhanced REP STOSB.  Since the stored value is fixed, larger register
+   size has minimal impact on threshold.  */
+#ifndef REP_STOSB_THRESHOLD
+# define REP_STOSB_THRESHOLD		2048
+#endif
+
+#ifndef SECTION
+# error SECTION is not defined!
+#endif
+
+	.section SECTION(.text),"ax",@progbits
+#if VEC_SIZE == 16 && IS_IN (libc)
+ENTRY (__bzero)
+	movq	%rdi, %rax /* Set return value.  */
+	movq	%rsi, %rdx /* Set n.  */
+	pxor	%xmm0, %xmm0
+	jmp	L(entry_from_bzero)
+END (__bzero)
+weak_alias (__bzero, bzero)
+#endif
+
+#if IS_IN (libc)
+# if defined SHARED
+ENTRY_CHK (WMEMSET_CHK_SYMBOL (__wmemset_chk, unaligned))
+	cmpq	%rdx, %rcx
+	jb	HIDDEN_JUMPTARGET (__chk_fail)
+END_CHK (WMEMSET_CHK_SYMBOL (__wmemset_chk, unaligned))
+# endif
+
+ENTRY (WMEMSET_SYMBOL (__wmemset, unaligned))
+	shlq	$2, %rdx
+	WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN (%esi, %rdi)
+	jmp	L(entry_from_bzero)
+END (WMEMSET_SYMBOL (__wmemset, unaligned))
+#endif
+
+#if defined SHARED && IS_IN (libc)
+ENTRY_CHK (MEMSET_CHK_SYMBOL (__memset_chk, unaligned))
+	cmpq	%rdx, %rcx
+	jb	HIDDEN_JUMPTARGET (__chk_fail)
+END_CHK (MEMSET_CHK_SYMBOL (__memset_chk, unaligned))
+#endif
+
+ENTRY (MEMSET_SYMBOL (__memset, unaligned))
+	MEMSET_VDUP_TO_VEC0_AND_SET_RETURN (%esi, %rdi)
+L(entry_from_bzero):
+	cmpq	$VEC_SIZE, %rdx
+	jb	L(less_vec)
+	cmpq	$(VEC_SIZE * 2), %rdx
+	ja	L(more_2x_vec)
+	/* From VEC and to 2 * VEC.  No branch when size == VEC_SIZE.  */
+	VMOVU	%VEC(0), -VEC_SIZE(%rdi,%rdx)
+	VMOVU	%VEC(0), (%rdi)
+	VZEROUPPER
+	ret
+#if defined USE_MULTIARCH && IS_IN (libc)
+END (MEMSET_SYMBOL (__memset, unaligned))
+
+# if VEC_SIZE == 16
+ENTRY (__memset_chk_erms)
+	cmpq	%rdx, %rcx
+	jb	HIDDEN_JUMPTARGET (__chk_fail)
+END (__memset_chk_erms)
+
+/* Only used to measure performance of REP STOSB.  */
+ENTRY (__memset_erms)
+	/* Skip zero length.  */
+	testq	%rdx, %rdx
+	jnz	 L(stosb)
+	movq	%rdi, %rax
+	ret
+# else
+/* Provide a hidden symbol to debugger.  */
+	.hidden	MEMSET_SYMBOL (__memset, erms)
+ENTRY (MEMSET_SYMBOL (__memset, erms))
+# endif
+L(stosb):
+	/* Issue vzeroupper before rep stosb.  */
+	VZEROUPPER
+	movq	%rdx, %rcx
+	movzbl	%sil, %eax
+	movq	%rdi, %rdx
+	rep stosb
+	movq	%rdx, %rax
+	ret
+# if VEC_SIZE == 16
+END (__memset_erms)
+# else
+END (MEMSET_SYMBOL (__memset, erms))
+# endif
+
+# if defined SHARED && IS_IN (libc)
+ENTRY_CHK (MEMSET_CHK_SYMBOL (__memset_chk, unaligned_erms))
+	cmpq	%rdx, %rcx
+	jb	HIDDEN_JUMPTARGET (__chk_fail)
+END_CHK (MEMSET_CHK_SYMBOL (__memset_chk, unaligned_erms))
+# endif
+
+ENTRY (MEMSET_SYMBOL (__memset, unaligned_erms))
+	MEMSET_VDUP_TO_VEC0_AND_SET_RETURN (%esi, %rdi)
+	cmpq	$VEC_SIZE, %rdx
+	jb	L(less_vec)
+	cmpq	$(VEC_SIZE * 2), %rdx
+	ja	L(stosb_more_2x_vec)
+	/* From VEC and to 2 * VEC.  No branch when size == VEC_SIZE.  */
+	VMOVU	%VEC(0), -VEC_SIZE(%rdi,%rdx)
+	VMOVU	%VEC(0), (%rdi)
+	VZEROUPPER
+	ret
+
+L(stosb_more_2x_vec):
+	cmpq	$REP_STOSB_THRESHOLD, %rdx
+	ja	L(stosb)
+#endif
+L(more_2x_vec):
+	cmpq  $(VEC_SIZE * 4), %rdx
+	ja	L(loop_start)
+	VMOVU	%VEC(0), (%rdi)
+	VMOVU	%VEC(0), VEC_SIZE(%rdi)
+	VMOVU	%VEC(0), -VEC_SIZE(%rdi,%rdx)
+	VMOVU	%VEC(0), -(VEC_SIZE * 2)(%rdi,%rdx)
+L(return):
+	VZEROUPPER
+	ret
+
+L(loop_start):
+	leaq	(VEC_SIZE * 4)(%rdi), %rcx
+	VMOVU	%VEC(0), (%rdi)
+	andq	$-(VEC_SIZE * 4), %rcx
+	VMOVU	%VEC(0), -VEC_SIZE(%rdi,%rdx)
+	VMOVU	%VEC(0), VEC_SIZE(%rdi)
+	VMOVU	%VEC(0), -(VEC_SIZE * 2)(%rdi,%rdx)
+	VMOVU	%VEC(0), (VEC_SIZE * 2)(%rdi)
+	VMOVU	%VEC(0), -(VEC_SIZE * 3)(%rdi,%rdx)
+	VMOVU	%VEC(0), (VEC_SIZE * 3)(%rdi)
+	VMOVU	%VEC(0), -(VEC_SIZE * 4)(%rdi,%rdx)
+	addq	%rdi, %rdx
+	andq	$-(VEC_SIZE * 4), %rdx
+	cmpq	%rdx, %rcx
+	je	L(return)
+L(loop):
+	VMOVA	%VEC(0), (%rcx)
+	VMOVA	%VEC(0), VEC_SIZE(%rcx)
+	VMOVA	%VEC(0), (VEC_SIZE * 2)(%rcx)
+	VMOVA	%VEC(0), (VEC_SIZE * 3)(%rcx)
+	addq	$(VEC_SIZE * 4), %rcx
+	cmpq	%rcx, %rdx
+	jne	L(loop)
+	VZEROUPPER_SHORT_RETURN
+	ret
+L(less_vec):
+	/* Less than 1 VEC.  */
+# if VEC_SIZE != 16 && VEC_SIZE != 32 && VEC_SIZE != 64
+#  error Unsupported VEC_SIZE!
+# endif
+# if VEC_SIZE > 32
+	cmpb	$32, %dl
+	jae	L(between_32_63)
+# endif
+# if VEC_SIZE > 16
+	cmpb	$16, %dl
+	jae	L(between_16_31)
+# endif
+	MOVQ	%xmm0, %rcx
+	cmpb	$8, %dl
+	jae	L(between_8_15)
+	cmpb	$4, %dl
+	jae	L(between_4_7)
+	cmpb	$1, %dl
+	ja	L(between_2_3)
+	jb	1f
+	movb	%cl, (%rdi)
+1:
+	VZEROUPPER
+	ret
+# if VEC_SIZE > 32
+	/* From 32 to 63.  No branch when size == 32.  */
+L(between_32_63):
+	vmovdqu	%ymm0, -32(%rdi,%rdx)
+	vmovdqu	%ymm0, (%rdi)
+	VZEROUPPER
+	ret
+# endif
+# if VEC_SIZE > 16
+	/* From 16 to 31.  No branch when size == 16.  */
+L(between_16_31):
+	vmovdqu	%xmm0, -16(%rdi,%rdx)
+	vmovdqu	%xmm0, (%rdi)
+	VZEROUPPER
+	ret
+# endif
+	/* From 8 to 15.  No branch when size == 8.  */
+L(between_8_15):
+	movq	%rcx, -8(%rdi,%rdx)
+	movq	%rcx, (%rdi)
+	VZEROUPPER
+	ret
+L(between_4_7):
+	/* From 4 to 7.  No branch when size == 4.  */
+	movl	%ecx, -4(%rdi,%rdx)
+	movl	%ecx, (%rdi)
+	VZEROUPPER
+	ret
+L(between_2_3):
+	/* From 2 to 3.  No branch when size == 2.  */
+	movw	%cx, -2(%rdi,%rdx)
+	movw	%cx, (%rdi)
+	VZEROUPPER
+	ret
+END (MEMSET_SYMBOL (__memset, unaligned_erms))
diff --git a/sysdeps/x86_64/multiarch/memset.S b/sysdeps/x86_64/multiarch/memset.S
deleted file mode 100644
index 8e3b9b9764..0000000000
--- a/sysdeps/x86_64/multiarch/memset.S
+++ /dev/null
@@ -1,64 +0,0 @@
-/* Multiple versions of memset
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <shlib-compat.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in lib.  */
-#if IS_IN (libc)
-ENTRY(memset)
-	.type	memset, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-	leaq	__memset_sse2(%rip), %rax
-	HAS_ARCH_FEATURE (AVX2_Usable)
-	jz	2f
-	leaq	__memset_avx2(%rip), %rax
-#ifdef HAVE_AVX512_ASM_SUPPORT
-	HAS_ARCH_FEATURE (AVX512F_Usable)
-	jz	2f
-	HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
-	jz	2f
-	leaq	__memset_avx512_no_vzeroupper(%rip), %rax
-#endif
-2:	ret
-END(memset)
-#endif
-
-#if IS_IN (libc)
-# undef memset
-# define memset __memset_sse2
-
-# undef __memset_chk
-# define __memset_chk __memset_chk_sse2
-
-# ifdef SHARED
-# undef libc_hidden_builtin_def
-/* It doesn't make sense to send libc-internal memset calls through a PLT.
-   The speedup we get from using GPR instruction is likely eaten away
-   by the indirect call in the PLT.  */
-# define libc_hidden_builtin_def(name) \
-	.globl __GI_memset; __GI_memset = __memset_sse2
-# endif
-
-# undef strong_alias
-# define strong_alias(original, alias)
-#endif
-
-#include "../memset.S"
diff --git a/sysdeps/x86_64/multiarch/memset.c b/sysdeps/x86_64/multiarch/memset.c
new file mode 100644
index 0000000000..064841d5fc
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memset.c
@@ -0,0 +1,35 @@
+/* Multiple versions of memset.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
+# define memset __redirect_memset
+# include <string.h>
+# undef memset
+
+# define SYMBOL_NAME memset
+# include "ifunc-memset.h"
+
+libc_ifunc_redirected (__redirect_memset, memset, IFUNC_SELECTOR ());
+
+# ifdef SHARED
+__hidden_ver1 (memset, __GI_memset, __redirect_memset)
+  __attribute__ ((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/memset_chk-nonshared.S b/sysdeps/x86_64/multiarch/memset_chk-nonshared.S
new file mode 100644
index 0000000000..dcc2384a27
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memset_chk-nonshared.S
@@ -0,0 +1,21 @@
+/* Non-shared version of memcpy_chk for x86-64.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc) && !defined SHARED
+# include <sysdeps/x86_64/memset_chk.S>
+#endif
diff --git a/sysdeps/x86_64/multiarch/memset_chk.S b/sysdeps/x86_64/multiarch/memset_chk.S
deleted file mode 100644
index 9a7b270274..0000000000
--- a/sysdeps/x86_64/multiarch/memset_chk.S
+++ /dev/null
@@ -1,49 +0,0 @@
-/* Multiple versions of memset_chk
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in lib.  */
-#if IS_IN (libc)
-# ifdef SHARED
-ENTRY(__memset_chk)
-	.type	__memset_chk, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-	leaq	__memset_chk_sse2(%rip), %rax
-	HAS_ARCH_FEATURE (AVX2_Usable)
-	jz	2f
-	leaq	__memset_chk_avx2(%rip), %rax
-#ifdef HAVE_AVX512_ASM_SUPPORT
-	HAS_ARCH_FEATURE (AVX512F_Usable)
-	jz	2f
-	HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
-	jz	2f
-	leaq	__memset_chk_avx512_no_vzeroupper(%rip), %rax
-#endif
-2:	ret
-END(__memset_chk)
-
-strong_alias (__memset_chk, __memset_zero_constant_len_parameter)
-	.section .gnu.warning.__memset_zero_constant_len_parameter
-	.string "memset used with constant zero length parameter; this could be due to transposed parameters"
-# else
-#  include "../memset_chk.S"
-# endif
-#endif
diff --git a/sysdeps/x86_64/multiarch/memset_chk.c b/sysdeps/x86_64/multiarch/memset_chk.c
new file mode 100644
index 0000000000..f9c05b364e
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memset_chk.c
@@ -0,0 +1,31 @@
+/* Multiple versions of __memset_chk
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.so. */
+#if IS_IN (libc) && defined SHARED
+# define __memset_chk __redirect_memset_chk
+# include <string.h>
+# undef __memset_chk
+
+# define SYMBOL_NAME memset_chk
+# include "ifunc-memset.h"
+
+libc_ifunc_redirected (__redirect_memset_chk, __memset_chk,
+		       IFUNC_SELECTOR ());
+#endif
diff --git a/sysdeps/x86_64/multiarch/rawmemchr-avx2.S b/sysdeps/x86_64/multiarch/rawmemchr-avx2.S
new file mode 100644
index 0000000000..128f9ea637
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/rawmemchr-avx2.S
@@ -0,0 +1,4 @@
+#define MEMCHR __rawmemchr_avx2
+#define USE_AS_RAWMEMCHR 1
+
+#include "memchr-avx2.S"
diff --git a/sysdeps/x86_64/multiarch/rawmemchr-sse2.S b/sysdeps/x86_64/multiarch/rawmemchr-sse2.S
new file mode 100644
index 0000000000..c681d84037
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/rawmemchr-sse2.S
@@ -0,0 +1,29 @@
+/* rawmemchr optimized with SSE2.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define __rawmemchr __rawmemchr_sse2
+
+# undef weak_alias
+# define weak_alias(__rawmemchr, rawmemchr)
+# undef libc_hidden_def
+# define libc_hidden_def(__rawmemchr)
+#endif
+
+#include "../rawmemchr.S"
diff --git a/sysdeps/x86_64/multiarch/rawmemchr.c b/sysdeps/x86_64/multiarch/rawmemchr.c
new file mode 100644
index 0000000000..8a0bc3137e
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/rawmemchr.c
@@ -0,0 +1,38 @@
+/* Multiple versions of rawmemchr
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
+# define rawmemchr __redirect_rawmemchr
+# define __rawmemchr __redirect___rawmemchr
+# include <string.h>
+# undef rawmemchr
+# undef __rawmemchr
+
+# define SYMBOL_NAME rawmemchr
+# include "ifunc-avx2.h"
+
+libc_ifunc_redirected (__redirect_rawmemchr, __rawmemchr,
+		       IFUNC_SELECTOR ());
+weak_alias (__rawmemchr, rawmemchr)
+# ifdef SHARED
+__hidden_ver1 (__rawmemchr, __GI___rawmemchr, __redirect___rawmemchr)
+  __attribute__((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/sched_cpucount.c b/sysdeps/x86_64/multiarch/sched_cpucount.c
index b75aeb79b2..d10d74ae21 100644
--- a/sysdeps/x86_64/multiarch/sched_cpucount.c
+++ b/sysdeps/x86_64/multiarch/sched_cpucount.c
@@ -1,6 +1,6 @@
 /* Count bits in CPU set.  x86-64 multi-arch version.
    This file is part of the GNU C Library.
-   Copyright (C) 2008-2016 Free Software Foundation, Inc.
+   Copyright (C) 2008-2018 Free Software Foundation, Inc.
    Contributed by Ulrich Drepper <drepper@redhat.com>.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/multiarch/stpcpy-sse2.S b/sysdeps/x86_64/multiarch/stpcpy-sse2.S
new file mode 100644
index 0000000000..b91a988399
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/stpcpy-sse2.S
@@ -0,0 +1,33 @@
+/* stpcpy optimized with SSE2.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+# define __stpcpy __stpcpy_sse2
+
+# undef weak_alias
+# define weak_alias(ignored1, ignored2)
+# undef libc_hidden_def
+# define libc_hidden_def(__stpcpy)
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(stpcpy)
+#endif
+
+#define USE_AS_STPCPY
+#include <sysdeps/x86_64/stpcpy.S>
diff --git a/sysdeps/x86_64/multiarch/stpcpy.S b/sysdeps/x86_64/multiarch/stpcpy.S
deleted file mode 100644
index ee81ab6ae3..0000000000
--- a/sysdeps/x86_64/multiarch/stpcpy.S
+++ /dev/null
@@ -1,9 +0,0 @@
-/* Multiple versions of stpcpy
-   All versions must be listed in ifunc-impl-list.c.  */
-#define USE_AS_STPCPY
-#define STRCPY __stpcpy
-#include "strcpy.S"
-
-weak_alias (__stpcpy, stpcpy)
-libc_hidden_def (__stpcpy)
-libc_hidden_builtin_def (stpcpy)
diff --git a/sysdeps/x86_64/multiarch/stpcpy.c b/sysdeps/x86_64/multiarch/stpcpy.c
new file mode 100644
index 0000000000..1e340fca99
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/stpcpy.c
@@ -0,0 +1,42 @@
+/* Multiple versions of stpcpy.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
+# define stpcpy __redirect_stpcpy
+# define __stpcpy __redirect___stpcpy
+# define NO_MEMPCPY_STPCPY_REDIRECT
+# define __NO_STRING_INLINES
+# include <string.h>
+# undef stpcpy
+# undef __stpcpy
+
+# define SYMBOL_NAME stpcpy
+# include "ifunc-unaligned-ssse3.h"
+
+libc_ifunc_redirected (__redirect_stpcpy, __stpcpy, IFUNC_SELECTOR ());
+
+weak_alias (__stpcpy, stpcpy)
+# ifdef SHARED
+__hidden_ver1 (__stpcpy, __GI___stpcpy, __redirect___stpcpy)
+  __attribute__ ((visibility ("hidden")));
+__hidden_ver1 (stpcpy, __GI_stpcpy, __redirect_stpcpy)
+  __attribute__ ((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/stpncpy-c.c b/sysdeps/x86_64/multiarch/stpncpy-c.c
index 2fde77dcab..b016e487e1 100644
--- a/sysdeps/x86_64/multiarch/stpncpy-c.c
+++ b/sysdeps/x86_64/multiarch/stpncpy-c.c
@@ -1,8 +1,7 @@
 #define STPNCPY __stpncpy_sse2
-#ifdef SHARED
+#undef weak_alias
+#define weak_alias(ignored1, ignored2)
 #undef libc_hidden_def
-#define libc_hidden_def(name) \
-  __hidden_ver1 (__stpncpy_sse2, __GI___stpncpy, __stpncpy_sse2);
-#endif
+#define libc_hidden_def(stpncpy)
 
-#include "stpncpy.c"
+#include <string/stpncpy.c>
diff --git a/sysdeps/x86_64/multiarch/stpncpy.S b/sysdeps/x86_64/multiarch/stpncpy.S
deleted file mode 100644
index 2698ca6a8c..0000000000
--- a/sysdeps/x86_64/multiarch/stpncpy.S
+++ /dev/null
@@ -1,8 +0,0 @@
-/* Multiple versions of stpncpy
-   All versions must be listed in ifunc-impl-list.c.  */
-#define STRCPY __stpncpy
-#define USE_AS_STPCPY
-#define USE_AS_STRNCPY
-#include "strcpy.S"
-
-weak_alias (__stpncpy, stpncpy)
diff --git a/sysdeps/x86_64/multiarch/stpncpy.c b/sysdeps/x86_64/multiarch/stpncpy.c
new file mode 100644
index 0000000000..28842ece2b
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/stpncpy.c
@@ -0,0 +1,38 @@
+/* Multiple versions of stpncpy.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
+# define stpncpy __redirect_stpncpy
+# define __stpncpy __redirect___stpncpy
+# include <string.h>
+# undef stpncpy
+# undef __stpncpy
+
+# define SYMBOL_NAME stpncpy
+# include "ifunc-unaligned-ssse3.h"
+
+libc_ifunc_redirected (__redirect_stpncpy, __stpncpy, IFUNC_SELECTOR ());
+
+weak_alias (__stpncpy, stpncpy)
+# ifdef SHARED
+__hidden_ver1 (__stpncpy, __GI___stpncpy, __redirect___stpncpy)
+  __attribute__ ((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/strcasecmp.c b/sysdeps/x86_64/multiarch/strcasecmp.c
new file mode 100644
index 0000000000..8676a621c6
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strcasecmp.c
@@ -0,0 +1,39 @@
+/* Multiple versions of strcasecmp.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
+# define strcasecmp __redirect_strcasecmp
+# define __strcasecmp __redirect___strcasecmp
+# include <string.h>
+# undef strcasecmp
+# undef __strcasecmp
+
+# define SYMBOL_NAME strcasecmp
+# include "ifunc-strcasecmp.h"
+
+libc_ifunc_redirected (__redirect_strcasecmp, __strcasecmp,
+		       IFUNC_SELECTOR ());
+
+weak_alias (__strcasecmp, strcasecmp)
+# ifdef SHARED
+__hidden_ver1 (__strcasecmp, __GI___strcasecmp, __redirect___strcasecmp)
+  __attribute__ ((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/strcasecmp_l-avx.S b/sysdeps/x86_64/multiarch/strcasecmp_l-avx.S
new file mode 100644
index 0000000000..56a03547eb
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strcasecmp_l-avx.S
@@ -0,0 +1,22 @@
+/* strcasecmp_l optimized with AVX.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define STRCMP_SSE42 __strcasecmp_l_avx
+#define USE_AVX 1
+#define USE_AS_STRCASECMP_L
+#include "strcmp-sse42.S"
diff --git a/sysdeps/x86_64/multiarch/strcasecmp_l-sse2.S b/sysdeps/x86_64/multiarch/strcasecmp_l-sse2.S
new file mode 100644
index 0000000000..2984640405
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strcasecmp_l-sse2.S
@@ -0,0 +1,23 @@
+/* strcasecmp_l optimized with SSE2.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define STRCMP __strcasecmp_l_sse2
+#define USE_AS_STRCASECMP_L
+#define NO_NOLOCALE_ALIAS
+#define __strcasecmp __strcasecmp_sse2
+#include <sysdeps/x86_64/strcmp.S>
diff --git a/sysdeps/x86_64/multiarch/strcasecmp_l-sse4_2.S b/sysdeps/x86_64/multiarch/strcasecmp_l-sse4_2.S
new file mode 100644
index 0000000000..31e2f9075d
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strcasecmp_l-sse4_2.S
@@ -0,0 +1,21 @@
+/* strcasecmp_l optimized with SSE4.2.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define STRCMP_SSE42 __strcasecmp_l_sse42
+#define USE_AS_STRCASECMP_L
+#include "strcmp-sse42.S"
diff --git a/sysdeps/x86_64/multiarch/strcasecmp_l.S b/sysdeps/x86_64/multiarch/strcasecmp_l.S
deleted file mode 100644
index 49f5b9fd95..0000000000
--- a/sysdeps/x86_64/multiarch/strcasecmp_l.S
+++ /dev/null
@@ -1,8 +0,0 @@
-/* Multiple versions of strcasecmp and strcasecmp_l
-   All versions must be listed in ifunc-impl-list.c.  */
-#define STRCMP __strcasecmp_l
-#define USE_AS_STRCASECMP_L
-#include "strcmp.S"
-
-weak_alias (__strcasecmp_l, strcasecmp_l)
-libc_hidden_def (strcasecmp_l)
diff --git a/sysdeps/x86_64/multiarch/strcasecmp_l.c b/sysdeps/x86_64/multiarch/strcasecmp_l.c
new file mode 100644
index 0000000000..dc674510df
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strcasecmp_l.c
@@ -0,0 +1,40 @@
+/* Multiple versions of strcasecmp_l.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
+# define strcasecmp_l __redirect_strcasecmp_l
+# define __strcasecmp_l __redirect___strcasecmp_l
+# include <string.h>
+# undef strcasecmp_l
+# undef __strcasecmp_l
+
+# define SYMBOL_NAME strcasecmp_l
+# include "ifunc-strcasecmp.h"
+
+libc_ifunc_redirected (__redirect_strcasecmp_l, __strcasecmp_l,
+		       IFUNC_SELECTOR ());
+
+weak_alias (__strcasecmp_l, strcasecmp_l)
+# ifdef SHARED
+__hidden_ver1 (__strcasecmp_l, __GI___strcasecmp_l,
+	       __redirect___strcasecmp_l)
+  __attribute__ ((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
index 3a694d45c2..852f179bf4 100644
--- a/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
+++ b/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
@@ -1,5 +1,5 @@
 /* strcat with SSE2
-   Copyright (C) 2011-2016 Free Software Foundation, Inc.
+   Copyright (C) 2011-2018 Free Software Foundation, Inc.
    Contributed by Intel Corporation.
    This file is part of the GNU C Library.
 
diff --git a/sysdeps/x86_64/multiarch/strcat-sse2.S b/sysdeps/x86_64/multiarch/strcat-sse2.S
new file mode 100644
index 0000000000..8eb64e104c
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strcat-sse2.S
@@ -0,0 +1,28 @@
+/* strcat optimized with SSE2.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+# define strcat __strcat_sse2
+
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(strcat)
+#endif
+
+#include <sysdeps/x86_64/strcat.S>
diff --git a/sysdeps/x86_64/multiarch/strcat-ssse3.S b/sysdeps/x86_64/multiarch/strcat-ssse3.S
index 96184d0f0f..2d4fd78f99 100644
--- a/sysdeps/x86_64/multiarch/strcat-ssse3.S
+++ b/sysdeps/x86_64/multiarch/strcat-ssse3.S
@@ -1,5 +1,5 @@
 /* strcat with SSSE3
-   Copyright (C) 2011-2016 Free Software Foundation, Inc.
+   Copyright (C) 2011-2018 Free Software Foundation, Inc.
    Contributed by Intel Corporation.
    This file is part of the GNU C Library.
 
diff --git a/sysdeps/x86_64/multiarch/strcat.S b/sysdeps/x86_64/multiarch/strcat.S
deleted file mode 100644
index 7bb38e68ad..0000000000
--- a/sysdeps/x86_64/multiarch/strcat.S
+++ /dev/null
@@ -1,85 +0,0 @@
-/* Multiple versions of strcat
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2009-2016 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-#ifndef USE_AS_STRNCAT
-# ifndef STRCAT
-#  define STRCAT strcat
-# endif
-#endif
-
-#ifdef USE_AS_STRNCAT
-# define STRCAT_SSSE3	         	__strncat_ssse3
-# define STRCAT_SSE2	            	__strncat_sse2
-# define STRCAT_SSE2_UNALIGNED    	__strncat_sse2_unaligned
-# define __GI_STRCAT	            	__GI_strncat
-# define __GI___STRCAT              __GI___strncat
-#else
-# define STRCAT_SSSE3	         	__strcat_ssse3
-# define STRCAT_SSE2	            	__strcat_sse2
-# define STRCAT_SSE2_UNALIGNED    	__strcat_sse2_unaligned
-# define __GI_STRCAT	            	__GI_strcat
-# define __GI___STRCAT              __GI___strcat
-#endif
-
-
-/* Define multiple versions only for the definition in libc.  */
-#if IS_IN (libc)
-	.text
-ENTRY(STRCAT)
-	.type	STRCAT, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-	leaq	STRCAT_SSE2_UNALIGNED(%rip), %rax
-	HAS_ARCH_FEATURE (Fast_Unaligned_Load)
-	jnz	2f
-	leaq	STRCAT_SSE2(%rip), %rax
-	HAS_CPU_FEATURE (SSSE3)
-	jz	2f
-	leaq	STRCAT_SSSE3(%rip), %rax
-2:	ret
-END(STRCAT)
-
-# undef ENTRY
-# define ENTRY(name) \
-	.type STRCAT_SSE2, @function; \
-	.align 16; \
-	.globl STRCAT_SSE2; \
-	.hidden STRCAT_SSE2; \
-	STRCAT_SSE2: cfi_startproc; \
-	CALL_MCOUNT
-# undef END
-# define END(name) \
-	cfi_endproc; .size STRCAT_SSE2, .-STRCAT_SSE2
-# undef libc_hidden_builtin_def
-/* It doesn't make sense to send libc-internal strcat calls through a PLT.
-   The speedup we get from using SSSE3 instruction is likely eaten away
-   by the indirect call in the PLT.  */
-# define libc_hidden_builtin_def(name) \
-	.globl __GI_STRCAT; __GI_STRCAT = STRCAT_SSE2
-# undef libc_hidden_def
-# define libc_hidden_def(name) \
-	.globl __GI___STRCAT; __GI___STRCAT = STRCAT_SSE2
-#endif
-
-#ifndef USE_AS_STRNCAT
-# include "../strcat.S"
-#endif
diff --git a/sysdeps/x86_64/multiarch/strcat.c b/sysdeps/x86_64/multiarch/strcat.c
new file mode 100644
index 0000000000..1f7f6263f3
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strcat.c
@@ -0,0 +1,35 @@
+/* Multiple versions of strcat.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
+# define strcat __redirect_strcat
+# include <string.h>
+# undef strcat
+
+# define SYMBOL_NAME strcat
+# include "ifunc-unaligned-ssse3.h"
+
+libc_ifunc_redirected (__redirect_strcat, strcat, IFUNC_SELECTOR ());
+
+# ifdef SHARED
+__hidden_ver1 (strcat, __GI_strcat, __redirect_strcat)
+  __attribute__ ((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/strchr-avx2.S b/sysdeps/x86_64/multiarch/strchr-avx2.S
new file mode 100644
index 0000000000..47bc3c9949
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strchr-avx2.S
@@ -0,0 +1,254 @@
+/* strchr/strchrnul optimized with AVX2.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+
+# ifndef STRCHR
+#  define STRCHR	__strchr_avx2
+# endif
+
+# ifdef USE_AS_WCSCHR
+#  define VPBROADCAST	vpbroadcastd
+#  define VPCMPEQ	vpcmpeqd
+#  define CHAR_REG	esi
+# else
+#  define VPBROADCAST	vpbroadcastb
+#  define VPCMPEQ	vpcmpeqb
+#  define CHAR_REG	sil
+# endif
+
+# ifndef VZEROUPPER
+#  define VZEROUPPER	vzeroupper
+# endif
+
+# define VEC_SIZE 32
+
+	.section .text.avx,"ax",@progbits
+ENTRY (STRCHR)
+	movl	%edi, %ecx
+	/* Broadcast CHAR to YMM0.  */
+	vmovd	%esi, %xmm0
+	vpxor	%xmm9, %xmm9, %xmm9
+	VPBROADCAST %xmm0, %ymm0
+	/* Check if we may cross page boundary with one vector load.  */
+	andl	$(2 * VEC_SIZE - 1), %ecx
+	cmpl	$VEC_SIZE, %ecx
+	ja	L(cros_page_boundary)
+
+	/* Check the first VEC_SIZE bytes.  Search for both CHAR and the
+	   null byte.  */
+	vmovdqu	(%rdi), %ymm8
+	VPCMPEQ %ymm8, %ymm0, %ymm1
+	VPCMPEQ %ymm8, %ymm9, %ymm2
+	vpor	%ymm1, %ymm2, %ymm1
+	vpmovmskb %ymm1, %eax
+	testl	%eax, %eax
+	jnz	L(first_vec_x0)
+
+	/* Align data for aligned loads in the loop.  */
+	addq	$VEC_SIZE, %rdi
+	andl	$(VEC_SIZE - 1), %ecx
+	andq	$-VEC_SIZE, %rdi
+
+	jmp	L(more_4x_vec)
+
+	.p2align 4
+L(cros_page_boundary):
+	andl	$(VEC_SIZE - 1), %ecx
+	andq	$-VEC_SIZE, %rdi
+	vmovdqu	(%rdi), %ymm8
+	VPCMPEQ %ymm8, %ymm0, %ymm1
+	VPCMPEQ %ymm8, %ymm9, %ymm2
+	vpor	%ymm1, %ymm2, %ymm1
+	vpmovmskb %ymm1, %eax
+	/* Remove the leading bytes.  */
+	sarl	%cl, %eax
+	testl	%eax, %eax
+	jz	L(aligned_more)
+	/* Found CHAR or the null byte.  */
+	tzcntl	%eax, %eax
+	addq	%rcx, %rax
+# ifdef USE_AS_STRCHRNUL
+	addq	%rdi, %rax
+# else
+	xorl	%edx, %edx
+	leaq	(%rdi, %rax), %rax
+	cmp	(%rax), %CHAR_REG
+	cmovne	%rdx, %rax
+# endif
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(aligned_more):
+	addq	$VEC_SIZE, %rdi
+
+L(more_4x_vec):
+	/* Check the first 4 * VEC_SIZE.  Only one VEC_SIZE at a time
+	   since data is only aligned to VEC_SIZE.  */
+	vmovdqa	(%rdi), %ymm8
+	VPCMPEQ %ymm8, %ymm0, %ymm1
+	VPCMPEQ %ymm8, %ymm9, %ymm2
+	vpor	%ymm1, %ymm2, %ymm1
+	vpmovmskb %ymm1, %eax
+	testl	%eax, %eax
+	jnz	L(first_vec_x0)
+
+	vmovdqa	VEC_SIZE(%rdi), %ymm8
+	VPCMPEQ %ymm8, %ymm0, %ymm1
+	VPCMPEQ %ymm8, %ymm9, %ymm2
+	vpor	%ymm1, %ymm2, %ymm1
+	vpmovmskb %ymm1, %eax
+	testl	%eax, %eax
+	jnz	L(first_vec_x1)
+
+	vmovdqa	(VEC_SIZE * 2)(%rdi), %ymm8
+	VPCMPEQ %ymm8, %ymm0, %ymm1
+	VPCMPEQ %ymm8, %ymm9, %ymm2
+	vpor	%ymm1, %ymm2, %ymm1
+	vpmovmskb %ymm1, %eax
+	testl	%eax, %eax
+	jnz	L(first_vec_x2)
+
+	vmovdqa	(VEC_SIZE * 3)(%rdi), %ymm8
+	VPCMPEQ %ymm8, %ymm0, %ymm1
+	VPCMPEQ %ymm8, %ymm9, %ymm2
+	vpor	%ymm1, %ymm2, %ymm1
+	vpmovmskb %ymm1, %eax
+	testl	%eax, %eax
+	jnz	L(first_vec_x3)
+
+	addq	$(VEC_SIZE * 4), %rdi
+
+	/* Align data to 4 * VEC_SIZE.  */
+	movq	%rdi, %rcx
+	andl	$(4 * VEC_SIZE - 1), %ecx
+	andq	$-(4 * VEC_SIZE), %rdi
+
+	.p2align 4
+L(loop_4x_vec):
+	/* Compare 4 * VEC at a time forward.  */
+	vmovdqa	(%rdi), %ymm5
+	vmovdqa	VEC_SIZE(%rdi), %ymm6
+	vmovdqa	(VEC_SIZE * 2)(%rdi), %ymm7
+	vmovdqa	(VEC_SIZE * 3)(%rdi), %ymm8
+
+	VPCMPEQ %ymm5, %ymm0, %ymm1
+	VPCMPEQ %ymm6, %ymm0, %ymm2
+	VPCMPEQ %ymm7, %ymm0, %ymm3
+	VPCMPEQ %ymm8, %ymm0, %ymm4
+
+	VPCMPEQ %ymm5, %ymm9, %ymm5
+	VPCMPEQ %ymm6, %ymm9, %ymm6
+	VPCMPEQ %ymm7, %ymm9, %ymm7
+	VPCMPEQ %ymm8, %ymm9, %ymm8
+
+	vpor	%ymm1, %ymm5, %ymm1
+	vpor	%ymm2, %ymm6, %ymm2
+	vpor	%ymm3, %ymm7, %ymm3
+	vpor	%ymm4, %ymm8, %ymm4
+
+	vpor	%ymm1, %ymm2, %ymm5
+	vpor	%ymm3, %ymm4, %ymm6
+
+	vpor	%ymm5, %ymm6, %ymm5
+
+	vpmovmskb %ymm5, %eax
+	testl	%eax, %eax
+	jnz	L(4x_vec_end)
+
+	addq	$(VEC_SIZE * 4), %rdi
+
+	jmp	L(loop_4x_vec)
+
+	.p2align 4
+L(first_vec_x0):
+	/* Found CHAR or the null byte.  */
+	tzcntl	%eax, %eax
+# ifdef USE_AS_STRCHRNUL
+	addq	%rdi, %rax
+# else
+	xorl	%edx, %edx
+	leaq	(%rdi, %rax), %rax
+	cmp	(%rax), %CHAR_REG
+	cmovne	%rdx, %rax
+# endif
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(first_vec_x1):
+	tzcntl	%eax, %eax
+# ifdef USE_AS_STRCHRNUL
+	addq	$VEC_SIZE, %rax
+	addq	%rdi, %rax
+# else
+	xorl	%edx, %edx
+	leaq	VEC_SIZE(%rdi, %rax), %rax
+	cmp	(%rax), %CHAR_REG
+	cmovne	%rdx, %rax
+# endif
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(first_vec_x2):
+	tzcntl	%eax, %eax
+# ifdef USE_AS_STRCHRNUL
+	addq	$(VEC_SIZE * 2), %rax
+	addq	%rdi, %rax
+# else
+	xorl	%edx, %edx
+	leaq	(VEC_SIZE * 2)(%rdi, %rax), %rax
+	cmp	(%rax), %CHAR_REG
+	cmovne	%rdx, %rax
+# endif
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(4x_vec_end):
+	vpmovmskb %ymm1, %eax
+	testl	%eax, %eax
+	jnz	L(first_vec_x0)
+	vpmovmskb %ymm2, %eax
+	testl	%eax, %eax
+	jnz	L(first_vec_x1)
+	vpmovmskb %ymm3, %eax
+	testl	%eax, %eax
+	jnz	L(first_vec_x2)
+	vpmovmskb %ymm4, %eax
+	testl	%eax, %eax
+L(first_vec_x3):
+	tzcntl	%eax, %eax
+# ifdef USE_AS_STRCHRNUL
+	addq	$(VEC_SIZE * 3), %rax
+	addq	%rdi, %rax
+# else
+	xorl	%edx, %edx
+	leaq	(VEC_SIZE * 3)(%rdi, %rax), %rax
+	cmp	(%rax), %CHAR_REG
+	cmovne	%rdx, %rax
+# endif
+	VZEROUPPER
+	ret
+
+END (STRCHR)
+#endif
diff --git a/sysdeps/x86_64/multiarch/strchr-sse2-no-bsf.S b/sysdeps/x86_64/multiarch/strchr-sse2-no-bsf.S
index 979d112b28..93fb661da2 100644
--- a/sysdeps/x86_64/multiarch/strchr-sse2-no-bsf.S
+++ b/sysdeps/x86_64/multiarch/strchr-sse2-no-bsf.S
@@ -1,5 +1,5 @@
 /* strchr with SSE2 without bsf
-   Copyright (C) 2011-2016 Free Software Foundation, Inc.
+   Copyright (C) 2011-2018 Free Software Foundation, Inc.
    Contributed by Intel Corporation.
    This file is part of the GNU C Library.
 
diff --git a/sysdeps/x86_64/multiarch/strchr-sse2.S b/sysdeps/x86_64/multiarch/strchr-sse2.S
new file mode 100644
index 0000000000..8a6e77195c
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strchr-sse2.S
@@ -0,0 +1,28 @@
+/* strchr optimized with SSE2.
+   Copyright (C) 2009-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc)
+# define strchr __strchr_sse2
+
+# undef weak_alias
+# define weak_alias(strchr, index)
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(strchr)
+#endif
+
+#include "../strchr.S"
diff --git a/sysdeps/x86_64/multiarch/strchr.S b/sysdeps/x86_64/multiarch/strchr.S
deleted file mode 100644
index 40683ad32b..0000000000
--- a/sysdeps/x86_64/multiarch/strchr.S
+++ /dev/null
@@ -1,57 +0,0 @@
-/* Multiple versions of strchr
-   Copyright (C) 2009-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-
-/* Define multiple versions only for the definition in libc.  */
-#if IS_IN (libc)
-	.text
-ENTRY(strchr)
-	.type	strchr, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-	leaq	__strchr_sse2(%rip), %rax
-2:	HAS_ARCH_FEATURE (Slow_BSF)
-	jz	3f
-	leaq    __strchr_sse2_no_bsf(%rip), %rax
-3:	ret
-END(strchr)
-
-
-
-# undef ENTRY
-# define ENTRY(name) \
-	.type __strchr_sse2, @function; \
-	.align 16; \
-	.globl __strchr_sse2; \
-	.hidden __strchr_sse2; \
-	__strchr_sse2: cfi_startproc; \
-	CALL_MCOUNT
-# undef END
-# define END(name) \
-	cfi_endproc; .size __strchr_sse2, .-__strchr_sse2
-# undef libc_hidden_builtin_def
-/* It doesn't make sense to send libc-internal strchr calls through a PLT.
-   The speedup we get from using SSE4.2 instruction is likely eaten away
-   by the indirect call in the PLT.  */
-# define libc_hidden_builtin_def(name) \
-	.globl __GI_strchr; __GI_strchr = __strchr_sse2
-#endif
-
-#include "../strchr.S"
diff --git a/sysdeps/x86_64/multiarch/strchr.c b/sysdeps/x86_64/multiarch/strchr.c
new file mode 100644
index 0000000000..76d64fb378
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strchr.c
@@ -0,0 +1,55 @@
+/* Multiple versions of strchr.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2009-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
+# define strchr __redirect_strchr
+# include <string.h>
+# undef strchr
+
+# define SYMBOL_NAME strchr
+# include <init-arch.h>
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_no_bsf) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+  const struct cpu_features* cpu_features = __get_cpu_features ();
+
+  if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)
+      && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)
+      && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+    return OPTIMIZE (avx2);
+
+  if (CPU_FEATURES_ARCH_P (cpu_features, Slow_BSF))
+    return OPTIMIZE (sse2_no_bsf);
+
+  return OPTIMIZE (sse2);
+}
+
+libc_ifunc_redirected (__redirect_strchr, strchr, IFUNC_SELECTOR ());
+weak_alias (strchr, index)
+# ifdef SHARED
+__hidden_ver1 (strchr, __GI_strchr, __redirect_strchr)
+  __attribute__((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/strchrnul-avx2.S b/sysdeps/x86_64/multiarch/strchrnul-avx2.S
new file mode 100644
index 0000000000..fa0cc09760
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strchrnul-avx2.S
@@ -0,0 +1,3 @@
+#define STRCHR __strchrnul_avx2
+#define USE_AS_STRCHRNUL 1
+#include "strchr-avx2.S"
diff --git a/sysdeps/x86_64/multiarch/strchrnul-sse2.S b/sysdeps/x86_64/multiarch/strchrnul-sse2.S
new file mode 100644
index 0000000000..d4a2be118e
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strchrnul-sse2.S
@@ -0,0 +1,26 @@
+/* strchrnul optimized with SSE2.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc)
+# define __strchrnul __strchrnul_sse2
+
+# undef weak_alias
+# define weak_alias(__strchrnul, strchrnul)
+#endif
+
+#include "../strchrnul.S"
diff --git a/sysdeps/x86_64/multiarch/strchrnul.c b/sysdeps/x86_64/multiarch/strchrnul.c
new file mode 100644
index 0000000000..7514999341
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strchrnul.c
@@ -0,0 +1,34 @@
+/* Multiple versions of strchrnul.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
+# define strchrnul __redirect_strchrnul
+# define __strchrnul __redirect___strchrnul
+# include <string.h>
+# undef __strchrnul
+# undef strchrnul
+
+# define SYMBOL_NAME strchrnul
+# include "ifunc-avx2.h"
+
+libc_ifunc_redirected (__redirect_strchrnul, __strchrnul,
+		       IFUNC_SELECTOR ());
+weak_alias (__strchrnul, strchrnul)
+#endif
diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2.S b/sysdeps/x86_64/multiarch/strcmp-avx2.S
new file mode 100644
index 0000000000..e8397f3b05
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strcmp-avx2.S
@@ -0,0 +1,847 @@
+/* strcmp/wcscmp/strncmp/wcsncmp optimized with AVX2.
+   Copyright (C) 2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+
+# ifndef STRCMP
+#  define STRCMP	__strcmp_avx2
+# endif
+
+# define PAGE_SIZE	4096
+
+/* VEC_SIZE = Number of bytes in a ymm register */
+# define VEC_SIZE	32
+
+/* Shift for dividing by (VEC_SIZE * 4).  */
+# define DIVIDE_BY_VEC_4_SHIFT	7
+# if (VEC_SIZE * 4) != (1 << DIVIDE_BY_VEC_4_SHIFT)
+#  error (VEC_SIZE * 4) != (1 << DIVIDE_BY_VEC_4_SHIFT)
+# endif
+
+# ifdef USE_AS_WCSCMP
+/* Compare packed dwords.  */
+#  define VPCMPEQ	vpcmpeqd
+/* Compare packed dwords and store minimum.  */
+#  define VPMINU	vpminud
+/* 1 dword char == 4 bytes.  */
+#  define SIZE_OF_CHAR	4
+# else
+/* Compare packed bytes.  */
+#  define VPCMPEQ	vpcmpeqb
+/* Compare packed bytes and store minimum.  */
+#  define VPMINU	vpminub
+/* 1 byte char == 1 byte.  */
+#  define SIZE_OF_CHAR	1
+# endif
+
+# ifndef VZEROUPPER
+#  define VZEROUPPER	vzeroupper
+# endif
+
+/* Warning!
+           wcscmp/wcsncmp have to use SIGNED comparison for elements.
+           strcmp/strncmp have to use UNSIGNED comparison for elements.
+*/
+
+/* The main idea of the string comparison (byte or dword) using AVX2
+   consists of comparing (VPCMPEQ) two ymm vectors. The latter can be on
+   either packed bytes or dwords depending on USE_AS_WCSCMP. In order
+   to check the null char, algorithm keeps the matched bytes/dwords,
+   requiring two more AVX2 instructions (VPMINU and VPCMPEQ). In general,
+   the costs of comparing VEC_SIZE bytes (32-bytes) are two VPCMPEQ and
+   one VPMINU instructions, together with movdqu and testl instructions.
+   Main loop (away from from page boundary) compares 4 vectors are a time,
+   effectively comparing 4 x VEC_SIZE bytes (128 bytes) on each loop.
+
+   The routine strncmp/wcsncmp (enabled by defining USE_AS_STRNCMP) logic
+   is the same as strcmp, except that an a maximum offset is tracked.  If
+   the maximum offset is reached before a difference is found, zero is
+   returned.  */
+
+	.section .text.avx,"ax",@progbits
+ENTRY (STRCMP)
+# ifdef USE_AS_STRNCMP
+	/* Check for simple cases (0 or 1) in offset.  */
+	cmp	$1, %rdx
+	je	L(char0)
+	jb	L(zero)
+#  ifdef USE_AS_WCSCMP
+	/* Convert units: from wide to byte char.  */
+	shl	$2, %rdx
+#  endif
+	/* Register %r11 tracks the maximum offset.  */
+	movq	%rdx, %r11
+# endif
+	movl	%edi, %eax
+	xorl	%edx, %edx
+	/* Make %ymm7 all zeros in this function.  */
+	vpxor	%ymm7, %ymm7, %ymm7
+	orl	%esi, %eax
+	andl	$(PAGE_SIZE - 1), %eax
+	cmpl	$(PAGE_SIZE - (VEC_SIZE * 4)), %eax
+	jg	L(cross_page)
+	/* Start comparing 4 vectors.  */
+	vmovdqu	(%rdi), %ymm1
+	VPCMPEQ	(%rsi), %ymm1, %ymm0
+	VPMINU	%ymm1, %ymm0, %ymm0
+	VPCMPEQ	%ymm7, %ymm0, %ymm0
+	vpmovmskb %ymm0, %ecx
+	testl	%ecx, %ecx
+	je	L(next_3_vectors)
+	tzcntl	%ecx, %edx
+# ifdef USE_AS_STRNCMP
+	/* Return 0 if the mismatched index (%rdx) is after the maximum
+	   offset (%r11).   */
+	cmpq	%r11, %rdx
+	jae	L(zero)
+# endif
+# ifdef USE_AS_WCSCMP
+	xorl	%eax, %eax
+	movl	(%rdi, %rdx), %ecx
+	cmpl	(%rsi, %rdx), %ecx
+	je	L(return)
+L(wcscmp_return):
+	setl	%al
+	negl	%eax
+	orl	$1, %eax
+L(return):
+# else
+	movzbl	(%rdi, %rdx), %eax
+	movzbl	(%rsi, %rdx), %edx
+	subl	%edx, %eax
+# endif
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(return_vec_size):
+	tzcntl	%ecx, %edx
+# ifdef USE_AS_STRNCMP
+	/* Return 0 if the mismatched index (%rdx + VEC_SIZE) is after
+	   the maximum offset (%r11).  */
+	addq	$VEC_SIZE, %rdx
+	cmpq	%r11, %rdx
+	jae	L(zero)
+#  ifdef USE_AS_WCSCMP
+	xorl	%eax, %eax
+	movl	(%rdi, %rdx), %ecx
+	cmpl	(%rsi, %rdx), %ecx
+	jne	L(wcscmp_return)
+#  else
+	movzbl	(%rdi, %rdx), %eax
+	movzbl	(%rsi, %rdx), %edx
+	subl	%edx, %eax
+#  endif
+# else
+#  ifdef USE_AS_WCSCMP
+	xorl	%eax, %eax
+	movl	VEC_SIZE(%rdi, %rdx), %ecx
+	cmpl	VEC_SIZE(%rsi, %rdx), %ecx
+	jne	L(wcscmp_return)
+#  else
+	movzbl	VEC_SIZE(%rdi, %rdx), %eax
+	movzbl	VEC_SIZE(%rsi, %rdx), %edx
+	subl	%edx, %eax
+#  endif
+# endif
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(return_2_vec_size):
+	tzcntl	%ecx, %edx
+# ifdef USE_AS_STRNCMP
+	/* Return 0 if the mismatched index (%rdx + 2 * VEC_SIZE) is
+	   after the maximum offset (%r11).  */
+	addq	$(VEC_SIZE * 2), %rdx
+	cmpq	%r11, %rdx
+	jae	L(zero)
+#  ifdef USE_AS_WCSCMP
+	xorl	%eax, %eax
+	movl	(%rdi, %rdx), %ecx
+	cmpl	(%rsi, %rdx), %ecx
+	jne	L(wcscmp_return)
+#  else
+	movzbl	(%rdi, %rdx), %eax
+	movzbl	(%rsi, %rdx), %edx
+	subl	%edx, %eax
+#  endif
+# else
+#  ifdef USE_AS_WCSCMP
+	xorl	%eax, %eax
+	movl	(VEC_SIZE * 2)(%rdi, %rdx), %ecx
+	cmpl	(VEC_SIZE * 2)(%rsi, %rdx), %ecx
+	jne	L(wcscmp_return)
+#  else
+	movzbl	(VEC_SIZE * 2)(%rdi, %rdx), %eax
+	movzbl	(VEC_SIZE * 2)(%rsi, %rdx), %edx
+	subl	%edx, %eax
+#  endif
+# endif
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(return_3_vec_size):
+	tzcntl	%ecx, %edx
+# ifdef USE_AS_STRNCMP
+	/* Return 0 if the mismatched index (%rdx + 3 * VEC_SIZE) is
+	   after the maximum offset (%r11).  */
+	addq	$(VEC_SIZE * 3), %rdx
+	cmpq	%r11, %rdx
+	jae	L(zero)
+#  ifdef USE_AS_WCSCMP
+	xorl	%eax, %eax
+	movl	(%rdi, %rdx), %ecx
+	cmpl	(%rsi, %rdx), %ecx
+	jne	L(wcscmp_return)
+#  else
+	movzbl	(%rdi, %rdx), %eax
+	movzbl	(%rsi, %rdx), %edx
+	subl	%edx, %eax
+#  endif
+# else
+#  ifdef USE_AS_WCSCMP
+	xorl	%eax, %eax
+	movl	(VEC_SIZE * 3)(%rdi, %rdx), %ecx
+	cmpl	(VEC_SIZE * 3)(%rsi, %rdx), %ecx
+	jne	L(wcscmp_return)
+#  else
+	movzbl	(VEC_SIZE * 3)(%rdi, %rdx), %eax
+	movzbl	(VEC_SIZE * 3)(%rsi, %rdx), %edx
+	subl	%edx, %eax
+#  endif
+# endif
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(next_3_vectors):
+	vmovdqu	VEC_SIZE(%rdi), %ymm6
+	VPCMPEQ	VEC_SIZE(%rsi), %ymm6, %ymm3
+	VPMINU	%ymm6, %ymm3, %ymm3
+	VPCMPEQ	%ymm7, %ymm3, %ymm3
+	vpmovmskb %ymm3, %ecx
+	testl	%ecx, %ecx
+	jne	L(return_vec_size)
+	vmovdqu	(VEC_SIZE * 2)(%rdi), %ymm5
+	vmovdqu	(VEC_SIZE * 3)(%rdi), %ymm4
+	vmovdqu	(VEC_SIZE * 3)(%rsi), %ymm0
+	VPCMPEQ	(VEC_SIZE * 2)(%rsi), %ymm5, %ymm2
+	VPMINU	%ymm5, %ymm2, %ymm2
+	VPCMPEQ	%ymm4, %ymm0, %ymm0
+	VPCMPEQ	%ymm7, %ymm2, %ymm2
+	vpmovmskb %ymm2, %ecx
+	testl	%ecx, %ecx
+	jne	L(return_2_vec_size)
+	VPMINU	%ymm4, %ymm0, %ymm0
+	VPCMPEQ	%ymm7, %ymm0, %ymm0
+	vpmovmskb %ymm0, %ecx
+	testl	%ecx, %ecx
+	jne	L(return_3_vec_size)
+L(main_loop_header):
+	leaq	(VEC_SIZE * 4)(%rdi), %rdx
+	movl	$PAGE_SIZE, %ecx
+	/* Align load via RAX.  */
+	andq	$-(VEC_SIZE * 4), %rdx
+	subq	%rdi, %rdx
+	leaq	(%rdi, %rdx), %rax
+# ifdef USE_AS_STRNCMP
+	/* Starting from this point, the maximum offset, or simply the
+	   'offset', DECREASES by the same amount when base pointers are
+	   moved forward.  Return 0 when:
+	     1) On match: offset <= the matched vector index.
+	     2) On mistmach, offset is before the mistmatched index.
+	 */
+	subq	%rdx, %r11
+	jbe	L(zero)
+# endif
+	addq	%rsi, %rdx
+	movq	%rdx, %rsi
+	andl	$(PAGE_SIZE - 1), %esi
+	/* Number of bytes before page crossing.  */
+	subq	%rsi, %rcx
+	/* Number of VEC_SIZE * 4 blocks before page crossing.  */
+	shrq	$DIVIDE_BY_VEC_4_SHIFT, %rcx
+	/* ESI: Number of VEC_SIZE * 4 blocks before page crossing.   */
+	movl	%ecx, %esi
+	jmp	L(loop_start)
+
+	.p2align 4
+L(loop):
+# ifdef USE_AS_STRNCMP
+	/* Base pointers are moved forward by 4 * VEC_SIZE.  Decrease
+	   the maximum offset (%r11) by the same amount.  */
+	subq	$(VEC_SIZE * 4), %r11
+	jbe	L(zero)
+# endif
+	addq	$(VEC_SIZE * 4), %rax
+	addq	$(VEC_SIZE * 4), %rdx
+L(loop_start):
+	testl	%esi, %esi
+	leal	-1(%esi), %esi
+	je	L(loop_cross_page)
+L(back_to_loop):
+	/* Main loop, comparing 4 vectors are a time.  */
+	vmovdqa	(%rax), %ymm0
+	vmovdqa	VEC_SIZE(%rax), %ymm3
+	VPCMPEQ	(%rdx), %ymm0, %ymm4
+	VPCMPEQ	VEC_SIZE(%rdx), %ymm3, %ymm1
+	VPMINU	%ymm0, %ymm4, %ymm4
+	VPMINU	%ymm3, %ymm1, %ymm1
+	vmovdqa	(VEC_SIZE * 2)(%rax), %ymm2
+	VPMINU	%ymm1, %ymm4, %ymm0
+	vmovdqa	(VEC_SIZE * 3)(%rax), %ymm3
+	VPCMPEQ	(VEC_SIZE * 2)(%rdx), %ymm2, %ymm5
+	VPCMPEQ	(VEC_SIZE * 3)(%rdx), %ymm3, %ymm6
+	VPMINU	%ymm2, %ymm5, %ymm5
+	VPMINU	%ymm3, %ymm6, %ymm6
+	VPMINU	%ymm5, %ymm0, %ymm0
+	VPMINU	%ymm6, %ymm0, %ymm0
+	VPCMPEQ	%ymm7, %ymm0, %ymm0
+
+	/* Test each mask (32 bits) individually because for VEC_SIZE
+	   == 32 is not possible to OR the four masks and keep all bits
+	   in a 64-bit integer register, differing from SSE2 strcmp
+	   where ORing is possible.  */
+	vpmovmskb %ymm0, %ecx
+	testl	%ecx, %ecx
+	je	L(loop)
+	VPCMPEQ	%ymm7, %ymm4, %ymm0
+	vpmovmskb %ymm0, %edi
+	testl	%edi, %edi
+	je	L(test_vec)
+	tzcntl	%edi, %ecx
+# ifdef USE_AS_STRNCMP
+	cmpq	%rcx, %r11
+	jbe	L(zero)
+#  ifdef USE_AS_WCSCMP
+	movq	%rax, %rsi
+	xorl	%eax, %eax
+	movl	(%rsi, %rcx), %edi
+	cmpl	(%rdx, %rcx), %edi
+	jne	L(wcscmp_return)
+#  else
+	movzbl	(%rax, %rcx), %eax
+	movzbl	(%rdx, %rcx), %edx
+	subl	%edx, %eax
+#  endif
+# else
+#  ifdef USE_AS_WCSCMP
+	movq	%rax, %rsi
+	xorl	%eax, %eax
+	movl	(%rsi, %rcx), %edi
+	cmpl	(%rdx, %rcx), %edi
+	jne	L(wcscmp_return)
+#  else
+	movzbl	(%rax, %rcx), %eax
+	movzbl	(%rdx, %rcx), %edx
+	subl	%edx, %eax
+#  endif
+# endif
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(test_vec):
+# ifdef USE_AS_STRNCMP
+	/* The first vector matched.  Return 0 if the maximum offset
+	   (%r11) <= VEC_SIZE.  */
+	cmpq	$VEC_SIZE, %r11
+	jbe	L(zero)
+# endif
+	VPCMPEQ	%ymm7, %ymm1, %ymm1
+	vpmovmskb %ymm1, %ecx
+	testl	%ecx, %ecx
+	je	L(test_2_vec)
+	tzcntl	%ecx, %edi
+# ifdef USE_AS_STRNCMP
+	addq	$VEC_SIZE, %rdi
+	cmpq	%rdi, %r11
+	jbe	L(zero)
+#  ifdef USE_AS_WCSCMP
+	movq	%rax, %rsi
+	xorl	%eax, %eax
+	movl	(%rsi, %rdi), %ecx
+	cmpl	(%rdx, %rdi), %ecx
+	jne	L(wcscmp_return)
+#  else
+	movzbl	(%rax, %rdi), %eax
+	movzbl	(%rdx, %rdi), %edx
+	subl	%edx, %eax
+#  endif
+# else
+#  ifdef USE_AS_WCSCMP
+	movq	%rax, %rsi
+	xorl	%eax, %eax
+	movl	VEC_SIZE(%rsi, %rdi), %ecx
+	cmpl	VEC_SIZE(%rdx, %rdi), %ecx
+	jne	L(wcscmp_return)
+#  else
+	movzbl	VEC_SIZE(%rax, %rdi), %eax
+	movzbl	VEC_SIZE(%rdx, %rdi), %edx
+	subl	%edx, %eax
+#  endif
+# endif
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(test_2_vec):
+# ifdef USE_AS_STRNCMP
+	/* The first 2 vectors matched.  Return 0 if the maximum offset
+	   (%r11) <= 2 * VEC_SIZE.  */
+	cmpq	$(VEC_SIZE * 2), %r11
+	jbe	L(zero)
+# endif
+	VPCMPEQ	%ymm7, %ymm5, %ymm5
+	vpmovmskb %ymm5, %ecx
+	testl	%ecx, %ecx
+	je	L(test_3_vec)
+	tzcntl	%ecx, %edi
+# ifdef USE_AS_STRNCMP
+	addq	$(VEC_SIZE * 2), %rdi
+	cmpq	%rdi, %r11
+	jbe	L(zero)
+#  ifdef USE_AS_WCSCMP
+	movq	%rax, %rsi
+	xorl	%eax, %eax
+	movl	(%rsi, %rdi), %ecx
+	cmpl	(%rdx, %rdi), %ecx
+	jne	L(wcscmp_return)
+#  else
+	movzbl	(%rax, %rdi), %eax
+	movzbl	(%rdx, %rdi), %edx
+	subl	%edx, %eax
+#  endif
+# else
+#  ifdef USE_AS_WCSCMP
+	movq	%rax, %rsi
+	xorl	%eax, %eax
+	movl	(VEC_SIZE * 2)(%rsi, %rdi), %ecx
+	cmpl	(VEC_SIZE * 2)(%rdx, %rdi), %ecx
+	jne	L(wcscmp_return)
+#  else
+	movzbl	(VEC_SIZE * 2)(%rax, %rdi), %eax
+	movzbl	(VEC_SIZE * 2)(%rdx, %rdi), %edx
+	subl	%edx, %eax
+#  endif
+# endif
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(test_3_vec):
+# ifdef USE_AS_STRNCMP
+	/* The first 3 vectors matched.  Return 0 if the maximum offset
+	   (%r11) <= 3 * VEC_SIZE.  */
+	cmpq	$(VEC_SIZE * 3), %r11
+	jbe	L(zero)
+# endif
+	VPCMPEQ	%ymm7, %ymm6, %ymm6
+	vpmovmskb %ymm6, %esi
+	tzcntl	%esi, %ecx
+# ifdef USE_AS_STRNCMP
+	addq	$(VEC_SIZE * 3), %rcx
+	cmpq	%rcx, %r11
+	jbe	L(zero)
+#  ifdef USE_AS_WCSCMP
+	movq	%rax, %rsi
+	xorl	%eax, %eax
+	movl	(%rsi, %rcx), %esi
+	cmpl	(%rdx, %rcx), %esi
+	jne	L(wcscmp_return)
+#  else
+	movzbl	(%rax, %rcx), %eax
+	movzbl	(%rdx, %rcx), %edx
+	subl	%edx, %eax
+#  endif
+# else
+#  ifdef USE_AS_WCSCMP
+	movq	%rax, %rsi
+	xorl	%eax, %eax
+	movl	(VEC_SIZE * 3)(%rsi, %rcx), %esi
+	cmpl	(VEC_SIZE * 3)(%rdx, %rcx), %esi
+	jne	L(wcscmp_return)
+#  else
+	movzbl	(VEC_SIZE * 3)(%rax, %rcx), %eax
+	movzbl	(VEC_SIZE * 3)(%rdx, %rcx), %edx
+	subl	%edx, %eax
+#  endif
+# endif
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(loop_cross_page):
+	xorl	%r10d, %r10d
+	movq	%rdx, %rcx
+	/* Align load via RDX.  We load the extra ECX bytes which should
+	   be ignored.  */
+	andl	$((VEC_SIZE * 4) - 1), %ecx
+	/* R10 is -RCX.  */
+	subq	%rcx, %r10
+
+	/* This works only if VEC_SIZE * 2 == 64. */
+# if (VEC_SIZE * 2) != 64
+#  error (VEC_SIZE * 2) != 64
+# endif
+
+	/* Check if the first VEC_SIZE * 2 bytes should be ignored.  */
+	cmpl	$(VEC_SIZE * 2), %ecx
+	jge	L(loop_cross_page_2_vec)
+
+	vmovdqu	(%rax, %r10), %ymm2
+	vmovdqu	VEC_SIZE(%rax, %r10), %ymm3
+	VPCMPEQ	(%rdx, %r10), %ymm2, %ymm0
+	VPCMPEQ	VEC_SIZE(%rdx, %r10), %ymm3, %ymm1
+	VPMINU	%ymm2, %ymm0, %ymm0
+	VPMINU	%ymm3, %ymm1, %ymm1
+	VPCMPEQ	%ymm7, %ymm0, %ymm0
+	VPCMPEQ	%ymm7, %ymm1, %ymm1
+
+	vpmovmskb %ymm0, %edi
+	vpmovmskb %ymm1, %esi
+
+	salq	$32, %rsi
+	xorq	%rsi, %rdi
+
+	/* Since ECX < VEC_SIZE * 2, simply skip the first ECX bytes.  */
+	shrq	%cl, %rdi
+
+	testq	%rdi, %rdi
+	je	L(loop_cross_page_2_vec)
+	tzcntq	%rdi, %rcx
+# ifdef USE_AS_STRNCMP
+	cmpq	%rcx, %r11
+	jbe	L(zero)
+#  ifdef USE_AS_WCSCMP
+	movq	%rax, %rsi
+	xorl	%eax, %eax
+	movl	(%rsi, %rcx), %edi
+	cmpl	(%rdx, %rcx), %edi
+	jne	L(wcscmp_return)
+#  else
+	movzbl	(%rax, %rcx), %eax
+	movzbl	(%rdx, %rcx), %edx
+	subl	%edx, %eax
+#  endif
+# else
+#  ifdef USE_AS_WCSCMP
+	movq	%rax, %rsi
+	xorl	%eax, %eax
+	movl	(%rsi, %rcx), %edi
+	cmpl	(%rdx, %rcx), %edi
+	jne	L(wcscmp_return)
+#  else
+	movzbl	(%rax, %rcx), %eax
+	movzbl	(%rdx, %rcx), %edx
+	subl	%edx, %eax
+#  endif
+# endif
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(loop_cross_page_2_vec):
+	/* The first VEC_SIZE * 2 bytes match or are ignored.  */
+	vmovdqu	(VEC_SIZE * 2)(%rax, %r10), %ymm2
+	vmovdqu	(VEC_SIZE * 3)(%rax, %r10), %ymm3
+	VPCMPEQ	(VEC_SIZE * 2)(%rdx, %r10), %ymm2, %ymm5
+	VPMINU	%ymm2, %ymm5, %ymm5
+	VPCMPEQ	(VEC_SIZE * 3)(%rdx, %r10), %ymm3, %ymm6
+	VPCMPEQ	%ymm7, %ymm5, %ymm5
+	VPMINU	%ymm3, %ymm6, %ymm6
+	VPCMPEQ	%ymm7, %ymm6, %ymm6
+
+	vpmovmskb %ymm5, %edi
+	vpmovmskb %ymm6, %esi
+
+	salq	$32, %rsi
+	xorq	%rsi, %rdi
+
+	xorl	%r8d, %r8d
+	/* If ECX > VEC_SIZE * 2, skip ECX - (VEC_SIZE * 2) bytes.  */
+	subl	$(VEC_SIZE * 2), %ecx
+	jle	1f
+	/* Skip ECX bytes.  */
+	shrq	%cl, %rdi
+	/* R8 has number of bytes skipped.  */
+	movl	%ecx, %r8d
+1:
+	/* Before jumping back to the loop, set ESI to the number of
+	   VEC_SIZE * 4 blocks before page crossing.  */
+	movl	$(PAGE_SIZE / (VEC_SIZE * 4) - 1), %esi
+
+	testq	%rdi, %rdi
+	je	L(back_to_loop)
+	tzcntq	%rdi, %rcx
+	addq	%r10, %rcx
+	/* Adjust for number of bytes skipped.  */
+	addq	%r8, %rcx
+# ifdef USE_AS_STRNCMP
+	addq	$(VEC_SIZE * 2), %rcx
+	subq	%rcx, %r11
+	jbe	L(zero)
+#  ifdef USE_AS_WCSCMP
+	movq	%rax, %rsi
+	xorl	%eax, %eax
+	movl	(%rsi, %rcx), %edi
+	cmpl	(%rdx, %rcx), %edi
+	jne	L(wcscmp_return)
+#  else
+	movzbl	(%rax, %rcx), %eax
+	movzbl	(%rdx, %rcx), %edx
+	subl	%edx, %eax
+#  endif
+# else
+#  ifdef USE_AS_WCSCMP
+	movq	%rax, %rsi
+	xorl	%eax, %eax
+	movl	(VEC_SIZE * 2)(%rsi, %rcx), %edi
+	cmpl	(VEC_SIZE * 2)(%rdx, %rcx), %edi
+	jne	L(wcscmp_return)
+#  else
+	movzbl	(VEC_SIZE * 2)(%rax, %rcx), %eax
+	movzbl	(VEC_SIZE * 2)(%rdx, %rcx), %edx
+	subl	%edx, %eax
+#  endif
+# endif
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(cross_page_loop):
+	/* Check one byte/dword at a time.  */
+# ifdef USE_AS_WCSCMP
+	cmpl	%ecx, %eax
+# else
+	subl	%ecx, %eax
+# endif
+	jne	L(different)
+	addl	$SIZE_OF_CHAR, %edx
+	cmpl	$(VEC_SIZE * 4), %edx
+	je	L(main_loop_header)
+# ifdef USE_AS_STRNCMP
+	cmpq	%r11, %rdx
+	jae	L(zero)
+# endif
+# ifdef USE_AS_WCSCMP
+	movl	(%rdi, %rdx), %eax
+	movl	(%rsi, %rdx), %ecx
+# else
+	movzbl	(%rdi, %rdx), %eax
+	movzbl	(%rsi, %rdx), %ecx
+# endif
+	/* Check null char.  */
+	testl	%eax, %eax
+	jne	L(cross_page_loop)
+	/* Since %eax == 0, subtract is OK for both SIGNED and UNSIGNED
+	   comparisons.  */
+	subl	%ecx, %eax
+# ifndef USE_AS_WCSCMP
+L(different):
+# endif
+	VZEROUPPER
+	ret
+
+# ifdef USE_AS_WCSCMP
+	.p2align 4
+L(different):
+	/* Use movl to avoid modifying EFLAGS.  */
+	movl	$0, %eax
+	setl	%al
+	negl	%eax
+	orl	$1, %eax
+	VZEROUPPER
+	ret
+# endif
+
+# ifdef USE_AS_STRNCMP
+	.p2align 4
+L(zero):
+	xorl	%eax, %eax
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(char0):
+#  ifdef USE_AS_WCSCMP
+	xorl	%eax, %eax
+	movl	(%rdi), %ecx
+	cmpl	(%rsi), %ecx
+	jne	L(wcscmp_return)
+#  else
+	movzbl	(%rsi), %ecx
+	movzbl	(%rdi), %eax
+	subl	%ecx, %eax
+#  endif
+	VZEROUPPER
+	ret
+# endif
+
+	.p2align 4
+L(last_vector):
+	addq	%rdx, %rdi
+	addq	%rdx, %rsi
+# ifdef USE_AS_STRNCMP
+	subq	%rdx, %r11
+# endif
+	tzcntl	%ecx, %edx
+# ifdef USE_AS_STRNCMP
+	cmpq	%r11, %rdx
+	jae	L(zero)
+# endif
+# ifdef USE_AS_WCSCMP
+	xorl	%eax, %eax
+	movl	(%rdi, %rdx), %ecx
+	cmpl	(%rsi, %rdx), %ecx
+	jne	L(wcscmp_return)
+# else
+	movzbl	(%rdi, %rdx), %eax
+	movzbl	(%rsi, %rdx), %edx
+	subl	%edx, %eax
+# endif
+	VZEROUPPER
+	ret
+
+	/* Comparing on page boundary region requires special treatment:
+	   It must done one vector at the time, starting with the wider
+	   ymm vector if possible, if not, with xmm. If fetching 16 bytes
+	   (xmm) still passes the boundary, byte comparison must be done.
+	 */
+	.p2align 4
+L(cross_page):
+	/* Try one ymm vector at a time.  */
+	cmpl	$(PAGE_SIZE - VEC_SIZE), %eax
+	jg	L(cross_page_1_vector)
+L(loop_1_vector):
+	vmovdqu	(%rdi, %rdx), %ymm1
+	VPCMPEQ	(%rsi, %rdx), %ymm1, %ymm0
+	VPMINU	%ymm1, %ymm0, %ymm0
+	VPCMPEQ	%ymm7, %ymm0, %ymm0
+	vpmovmskb %ymm0, %ecx
+	testl	%ecx, %ecx
+	jne	L(last_vector)
+
+	addl	$VEC_SIZE, %edx
+
+	addl	$VEC_SIZE, %eax
+# ifdef USE_AS_STRNCMP
+	/* Return 0 if the current offset (%rdx) >= the maximum offset
+	   (%r11).  */
+	cmpq	%r11, %rdx
+	jae	L(zero)
+# endif
+	cmpl	$(PAGE_SIZE - VEC_SIZE), %eax
+	jle	L(loop_1_vector)
+L(cross_page_1_vector):
+	/* Less than 32 bytes to check, try one xmm vector.  */
+	cmpl	$(PAGE_SIZE - 16), %eax
+	jg	L(cross_page_1_xmm)
+	vmovdqu	(%rdi, %rdx), %xmm1
+	VPCMPEQ	(%rsi, %rdx), %xmm1, %xmm0
+	VPMINU	%xmm1, %xmm0, %xmm0
+	VPCMPEQ	%xmm7, %xmm0, %xmm0
+	vpmovmskb %xmm0, %ecx
+	testl	%ecx, %ecx
+	jne	L(last_vector)
+
+	addl	$16, %edx
+# ifndef USE_AS_WCSCMP
+	addl	$16, %eax
+# endif
+# ifdef USE_AS_STRNCMP
+	/* Return 0 if the current offset (%rdx) >= the maximum offset
+	   (%r11).  */
+	cmpq	%r11, %rdx
+	jae	L(zero)
+# endif
+
+L(cross_page_1_xmm):
+# ifndef USE_AS_WCSCMP
+	/* Less than 16 bytes to check, try 8 byte vector.  NB: No need
+	   for wcscmp nor wcsncmp since wide char is 4 bytes.   */
+	cmpl	$(PAGE_SIZE - 8), %eax
+	jg	L(cross_page_8bytes)
+	vmovq	(%rdi, %rdx), %xmm1
+	vmovq	(%rsi, %rdx), %xmm0
+	VPCMPEQ	%xmm0, %xmm1, %xmm0
+	VPMINU	%xmm1, %xmm0, %xmm0
+	VPCMPEQ	%xmm7, %xmm0, %xmm0
+	vpmovmskb %xmm0, %ecx
+	/* Only last 8 bits are valid.  */
+	andl	$0xff, %ecx
+	testl	%ecx, %ecx
+	jne	L(last_vector)
+
+	addl	$8, %edx
+	addl	$8, %eax
+#  ifdef USE_AS_STRNCMP
+	/* Return 0 if the current offset (%rdx) >= the maximum offset
+	   (%r11).  */
+	cmpq	%r11, %rdx
+	jae	L(zero)
+#  endif
+
+L(cross_page_8bytes):
+	/* Less than 8 bytes to check, try 4 byte vector.  */
+	cmpl	$(PAGE_SIZE - 4), %eax
+	jg	L(cross_page_4bytes)
+	vmovd	(%rdi, %rdx), %xmm1
+	vmovd	(%rsi, %rdx), %xmm0
+	VPCMPEQ	%xmm0, %xmm1, %xmm0
+	VPMINU	%xmm1, %xmm0, %xmm0
+	VPCMPEQ	%xmm7, %xmm0, %xmm0
+	vpmovmskb %xmm0, %ecx
+	/* Only last 4 bits are valid.  */
+	andl	$0xf, %ecx
+	testl	%ecx, %ecx
+	jne	L(last_vector)
+
+	addl	$4, %edx
+#  ifdef USE_AS_STRNCMP
+	/* Return 0 if the current offset (%rdx) >= the maximum offset
+	   (%r11).  */
+	cmpq	%r11, %rdx
+	jae	L(zero)
+#  endif
+
+L(cross_page_4bytes):
+# endif
+	/* Less than 4 bytes to check, try one byte/dword at a time.  */
+# ifdef USE_AS_STRNCMP
+	cmpq	%r11, %rdx
+	jae	L(zero)
+# endif
+# ifdef USE_AS_WCSCMP
+	movl	(%rdi, %rdx), %eax
+	movl	(%rsi, %rdx), %ecx
+# else
+	movzbl	(%rdi, %rdx), %eax
+	movzbl	(%rsi, %rdx), %ecx
+# endif
+	testl	%eax, %eax
+	jne	L(cross_page_loop)
+	subl	%ecx, %eax
+	VZEROUPPER
+	ret
+END (STRCMP)
+#endif
diff --git a/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S
index bf555b4066..a9b6267d15 100644
--- a/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S
+++ b/sysdeps/x86_64/multiarch/strcmp-sse2-unaligned.S
@@ -1,5 +1,5 @@
 /* strcmp with unaligned loads
-   Copyright (C) 2013-2016 Free Software Foundation, Inc.
+   Copyright (C) 2013-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/multiarch/strcmp-sse2.S b/sysdeps/x86_64/multiarch/strcmp-sse2.S
new file mode 100644
index 0000000000..d173ded8c0
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strcmp-sse2.S
@@ -0,0 +1,28 @@
+/* strcmp optimized with SSE2.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc)
+# include <sysdep.h>
+
+# define STRCMP __strcmp_sse2
+
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(strcmp)
+#endif
+
+#include <sysdeps/x86_64/strcmp.S>
diff --git a/sysdeps/x86_64/multiarch/strcmp-sse42.S b/sysdeps/x86_64/multiarch/strcmp-sse42.S
index 70df84ae32..d3c07bd292 100644
--- a/sysdeps/x86_64/multiarch/strcmp-sse42.S
+++ b/sysdeps/x86_64/multiarch/strcmp-sse42.S
@@ -1,5 +1,5 @@
 /* strcmp with SSE4.2
-   Copyright (C) 2009-2016 Free Software Foundation, Inc.
+   Copyright (C) 2009-2018 Free Software Foundation, Inc.
    Contributed by Intel Corporation.
    This file is part of the GNU C Library.
 
@@ -17,6 +17,40 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
+#include <sysdep.h>
+
+#ifndef STRCMP_SSE42
+# define STRCMP_SSE42	__strcmp_sse42
+#endif
+
+#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
+# include "locale-defines.h"
+#endif
+
+#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
+/* Since the counter, %r11, is unsigned, we branch to strcmp_exitz
+   if the new counter > the old one or is 0.  */
+# define UPDATE_STRNCMP_COUNTER				\
+	/* calculate left number to compare */		\
+	lea	-16(%rcx, %r11), %r9;			\
+	cmp	%r9, %r11;				\
+	jb	LABEL(strcmp_exitz);			\
+	test	%r9, %r9;				\
+	je	LABEL(strcmp_exitz);			\
+	mov	%r9, %r11
+#else
+# define UPDATE_STRNCMP_COUNTER
+#endif
+
+#ifdef USE_AVX
+# define SECTION	avx
+# define GLABEL(l)	l##_avx
+#else
+# define SECTION	sse4.2
+# define GLABEL(l)	l##_sse42
+#endif
+
+#define LABEL(l)	.L##l
 
 /* We use 0x1a:
 	_SIDD_SBYTE_OPS
@@ -92,6 +126,7 @@ END (GLABEL(__strncasecmp))
 
 STRCMP_SSE42:
 	cfi_startproc
+	_CET_ENDBR
 	CALL_MCOUNT
 
 /*
@@ -240,7 +275,7 @@ LABEL(bigger):
 	movslq	(%r10, %r9,4), %r9
 	pcmpeqb	%xmm1, D(%xmm0)		/* Any null chars? */
 	lea	(%r10, %r9), %r10
-	jmp	*%r10			/* jump to corresponding case */
+	_CET_NOTRACK jmp *%r10		/* jump to corresponding case */
 
 /*
  * The following cases will be handled by ashr_0
diff --git a/sysdeps/x86_64/multiarch/strcmp-sse4_2.S b/sysdeps/x86_64/multiarch/strcmp-sse4_2.S
new file mode 100644
index 0000000000..776e5e060f
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strcmp-sse4_2.S
@@ -0,0 +1,21 @@
+/* strcmp optimized with SSE4.2.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc)
+# include "strcmp-sse42.S"
+#endif
diff --git a/sysdeps/x86_64/multiarch/strcmp.S b/sysdeps/x86_64/multiarch/strcmp.S
deleted file mode 100644
index 0e4a113f61..0000000000
--- a/sysdeps/x86_64/multiarch/strcmp.S
+++ /dev/null
@@ -1,209 +0,0 @@
-/* Multiple versions of strcmp
-   Copyright (C) 2009-2016 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-#ifdef USE_AS_STRNCMP
-/* Since the counter, %r11, is unsigned, we branch to strcmp_exitz
-   if the new counter > the old one or is 0.  */
-# define UPDATE_STRNCMP_COUNTER				\
-	/* calculate left number to compare */		\
-	lea	-16(%rcx, %r11), %r9;			\
-	cmp	%r9, %r11;				\
-	jb	LABEL(strcmp_exitz);			\
-	test	%r9, %r9;				\
-	je	LABEL(strcmp_exitz);			\
-	mov	%r9, %r11
-
-# define STRCMP_SSE42	__strncmp_sse42
-# define STRCMP_SSSE3	__strncmp_ssse3
-# define STRCMP_SSE2	__strncmp_sse2
-# define __GI_STRCMP	__GI_strncmp
-#elif defined USE_AS_STRCASECMP_L
-# include "locale-defines.h"
-
-# define UPDATE_STRNCMP_COUNTER
-
-# define STRCMP_AVX	__strcasecmp_l_avx
-# define STRCMP_SSE42	__strcasecmp_l_sse42
-# define STRCMP_SSSE3	__strcasecmp_l_ssse3
-# define STRCMP_SSE2	__strcasecmp_l_sse2
-# define __GI_STRCMP	__GI___strcasecmp_l
-#elif defined USE_AS_STRNCASECMP_L
-# include "locale-defines.h"
-
-/* Since the counter, %r11, is unsigned, we branch to strcmp_exitz
-   if the new counter > the old one or is 0.  */
-# define UPDATE_STRNCMP_COUNTER				\
-	/* calculate left number to compare */		\
-	lea	-16(%rcx, %r11), %r9;			\
-	cmp	%r9, %r11;				\
-	jb	LABEL(strcmp_exitz);			\
-	test	%r9, %r9;				\
-	je	LABEL(strcmp_exitz);			\
-	mov	%r9, %r11
-
-# define STRCMP_AVX	__strncasecmp_l_avx
-# define STRCMP_SSE42	__strncasecmp_l_sse42
-# define STRCMP_SSSE3	__strncasecmp_l_ssse3
-# define STRCMP_SSE2	__strncasecmp_l_sse2
-# define __GI_STRCMP	__GI___strncasecmp_l
-#else
-# define USE_AS_STRCMP
-# define UPDATE_STRNCMP_COUNTER
-# ifndef STRCMP
-#  define STRCMP	strcmp
-#  define STRCMP_SSE42	__strcmp_sse42
-#  define STRCMP_SSSE3	__strcmp_ssse3
-#  define STRCMP_SSE2	__strcmp_sse2
-#  define __GI_STRCMP	__GI_strcmp
-# endif
-#endif
-
-/* Define multiple versions only for the definition in libc.  Don't
-   define multiple versions for strncmp in static library since we
-   need strncmp before the initialization happened.  */
-#if (defined SHARED || !defined USE_AS_STRNCMP) && IS_IN (libc)
-	.text
-ENTRY(STRCMP)
-	.type	STRCMP, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-#ifdef USE_AS_STRCMP
-	leaq	__strcmp_sse2_unaligned(%rip), %rax
-	HAS_ARCH_FEATURE (Fast_Unaligned_Load)
-	jnz     3f
-#else
-	HAS_ARCH_FEATURE (Slow_SSE4_2)
-	jnz	2f
-	leaq	STRCMP_SSE42(%rip), %rax
-	HAS_CPU_FEATURE (SSE4_2)
-	jnz	3f
-#endif
-2:	leaq	STRCMP_SSSE3(%rip), %rax
-	HAS_CPU_FEATURE (SSSE3)
-	jnz	3f
-	leaq	STRCMP_SSE2(%rip), %rax
-3:	ret
-END(STRCMP)
-
-# ifdef USE_AS_STRCASECMP_L
-ENTRY(__strcasecmp)
-	.type	__strcasecmp, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-	leaq	__strcasecmp_avx(%rip), %rax
-	HAS_ARCH_FEATURE (AVX_Usable)
-	jnz	3f
-	HAS_ARCH_FEATURE (Slow_SSE4_2)
-	jnz	2f
-	leaq	__strcasecmp_sse42(%rip), %rax
-	HAS_CPU_FEATURE (SSE4_2)
-	jnz	3f
-2:	leaq	__strcasecmp_ssse3(%rip), %rax
-	HAS_CPU_FEATURE (SSSE3)
-	jnz	3f
-	leaq	__strcasecmp_sse2(%rip), %rax
-3:	ret
-END(__strcasecmp)
-weak_alias (__strcasecmp, strcasecmp)
-# endif
-# ifdef USE_AS_STRNCASECMP_L
-ENTRY(__strncasecmp)
-	.type	__strncasecmp, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-	leaq	__strncasecmp_avx(%rip), %rax
-	HAS_ARCH_FEATURE (AVX_Usable)
-	jnz	3f
-	HAS_ARCH_FEATURE (Slow_SSE4_2)
-	jnz	2f
-	leaq	__strncasecmp_sse42(%rip), %rax
-	HAS_CPU_FEATURE (SSE4_2)
-	jnz	3f
-2:	leaq	__strncasecmp_ssse3(%rip), %rax
-	HAS_CPU_FEATURE (SSSE3)
-	jnz	3f
-	leaq	__strncasecmp_sse2(%rip), %rax
-3:	ret
-END(__strncasecmp)
-weak_alias (__strncasecmp, strncasecmp)
-# endif
-
-# undef LABEL
-# define LABEL(l) .L##l##_sse42
-# define GLABEL(l) l##_sse42
-# define SECTION sse4.2
-# include "strcmp-sse42.S"
-
-
-# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
-#  define LABEL(l) .L##l##_avx
-#  define GLABEL(l) l##_avx
-#  define USE_AVX 1
-#  undef STRCMP_SSE42
-#  define STRCMP_SSE42 STRCMP_AVX
-#  define SECTION avx
-#  include "strcmp-sse42.S"
-# endif
-
-
-# undef ENTRY
-# define ENTRY(name) \
-	.type STRCMP_SSE2, @function; \
-	.align 16; \
-	.globl STRCMP_SSE2; \
-	.hidden STRCMP_SSE2; \
-	STRCMP_SSE2: cfi_startproc; \
-	CALL_MCOUNT
-# undef END
-# define END(name) \
-	cfi_endproc; .size STRCMP_SSE2, .-STRCMP_SSE2
-
-# ifdef USE_AS_STRCASECMP_L
-#  define ENTRY2(name) \
-	.type __strcasecmp_sse2, @function; \
-	.align 16; \
-	.globl __strcasecmp_sse2; \
-	.hidden __strcasecmp_sse2; \
-	__strcasecmp_sse2: cfi_startproc; \
-	CALL_MCOUNT
-#  define END2(name) \
-	cfi_endproc; .size __strcasecmp_sse2, .-__strcasecmp_sse2
-# endif
-
-# ifdef USE_AS_STRNCASECMP_L
-#  define ENTRY2(name) \
-	.type __strncasecmp_sse2, @function; \
-	.align 16; \
-	.globl __strncasecmp_sse2; \
-	.hidden __strncasecmp_sse2; \
-	__strncasecmp_sse2: cfi_startproc; \
-	CALL_MCOUNT
-#  define END2(name) \
-	cfi_endproc; .size __strncasecmp_sse2, .-__strncasecmp_sse2
-# endif
-
-# undef libc_hidden_builtin_def
-/* It doesn't make sense to send libc-internal strcmp calls through a PLT.
-   The speedup we get from using SSE4.2 instruction is likely eaten away
-   by the indirect call in the PLT.  */
-# define libc_hidden_builtin_def(name) \
-	.globl __GI_STRCMP; __GI_STRCMP = STRCMP_SSE2
-#endif
-
-#include "../strcmp.S"
diff --git a/sysdeps/x86_64/multiarch/strcmp.c b/sysdeps/x86_64/multiarch/strcmp.c
new file mode 100644
index 0000000000..b903e418df
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strcmp.c
@@ -0,0 +1,59 @@
+/* Multiple versions of strcmp.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
+# define strcmp __redirect_strcmp
+# include <string.h>
+# undef strcmp
+
+# define SYMBOL_NAME strcmp
+# include <init-arch.h>
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+  const struct cpu_features* cpu_features = __get_cpu_features ();
+
+  if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)
+      && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)
+      && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+    return OPTIMIZE (avx2);
+
+  if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
+    return OPTIMIZE (sse2_unaligned);
+
+  if (CPU_FEATURES_CPU_P (cpu_features, SSSE3))
+    return OPTIMIZE (ssse3);
+
+  return OPTIMIZE (sse2);
+}
+
+libc_ifunc_redirected (__redirect_strcmp, strcmp, IFUNC_SELECTOR ());
+
+# ifdef SHARED
+__hidden_ver1 (strcmp, __GI_strcmp, __redirect_strcmp)
+  __attribute__ ((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S
index caa74be2c2..72bf7e8586 100644
--- a/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S
+++ b/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S
@@ -1,5 +1,5 @@
 /* strcpy with SSE2 and unaligned load
-   Copyright (C) 2011-2016 Free Software Foundation, Inc.
+   Copyright (C) 2011-2018 Free Software Foundation, Inc.
    Contributed by Intel Corporation.
    This file is part of the GNU C Library.
 
@@ -33,7 +33,7 @@
 	lea	TABLE(%rip), %r11;                              \
 	movslq	(%r11, INDEX, SCALE), %rcx;                     \
 	lea	(%r11, %rcx), %rcx;                             \
-	jmp	*%rcx
+	_CET_NOTRACK jmp *%rcx
 
 # ifndef USE_AS_STRCAT
 
@@ -99,6 +99,8 @@ L(Unalign16Both):
 	sub	%rcx, %rdi
 # ifdef USE_AS_STRNCPY
 	add	%rcx, %r8
+	sbb	%rcx, %rcx
+	or	%rcx, %r8
 # endif
 	mov	$16, %rcx
 	movdqa	(%rsi, %rcx), %xmm1
diff --git a/sysdeps/x86_64/multiarch/strcpy-sse2.S b/sysdeps/x86_64/multiarch/strcpy-sse2.S
new file mode 100644
index 0000000000..70136017fa
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strcpy-sse2.S
@@ -0,0 +1,28 @@
+/* strcpy optimized with SSE2.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+# define strcpy __strcpy_sse2
+
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(strcpy)
+#endif
+
+#include <sysdeps/x86_64/strcpy.S>
diff --git a/sysdeps/x86_64/multiarch/strcpy-ssse3.S b/sysdeps/x86_64/multiarch/strcpy-ssse3.S
index 5bdb7671cf..9858d0c4d5 100644
--- a/sysdeps/x86_64/multiarch/strcpy-ssse3.S
+++ b/sysdeps/x86_64/multiarch/strcpy-ssse3.S
@@ -1,5 +1,5 @@
 /* strcpy with SSSE3
-   Copyright (C) 2011-2016 Free Software Foundation, Inc.
+   Copyright (C) 2011-2018 Free Software Foundation, Inc.
    Contributed by Intel Corporation.
    This file is part of the GNU C Library.
 
diff --git a/sysdeps/x86_64/multiarch/strcpy.S b/sysdeps/x86_64/multiarch/strcpy.S
deleted file mode 100644
index 024f6ef899..0000000000
--- a/sysdeps/x86_64/multiarch/strcpy.S
+++ /dev/null
@@ -1,99 +0,0 @@
-/* Multiple versions of strcpy
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2009-2016 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-#if !defined (USE_AS_STPCPY) && !defined (USE_AS_STRNCPY)
-# ifndef STRCPY
-#  define STRCPY strcpy
-# endif
-#endif
-
-#ifdef USE_AS_STPCPY
-# ifdef USE_AS_STRNCPY
-#  define STRCPY_SSSE3		__stpncpy_ssse3
-#  define STRCPY_SSE2		__stpncpy_sse2
-#  define STRCPY_SSE2_UNALIGNED __stpncpy_sse2_unaligned
-#  define __GI_STRCPY		__GI_stpncpy
-#  define __GI___STRCPY		__GI___stpncpy
-# else
-#  define STRCPY_SSSE3		__stpcpy_ssse3
-#  define STRCPY_SSE2		__stpcpy_sse2
-#  define STRCPY_SSE2_UNALIGNED	__stpcpy_sse2_unaligned
-#  define __GI_STRCPY		__GI_stpcpy
-#  define __GI___STRCPY		__GI___stpcpy
-# endif
-#else
-# ifdef USE_AS_STRNCPY
-#  define STRCPY_SSSE3		__strncpy_ssse3
-#  define STRCPY_SSE2		__strncpy_sse2
-#  define STRCPY_SSE2_UNALIGNED	__strncpy_sse2_unaligned
-#  define __GI_STRCPY		__GI_strncpy
-# else
-#  define STRCPY_SSSE3		__strcpy_ssse3
-#  define STRCPY_SSE2		__strcpy_sse2
-#  define STRCPY_SSE2_UNALIGNED	__strcpy_sse2_unaligned
-#  define __GI_STRCPY		__GI_strcpy
-# endif
-#endif
-
-
-/* Define multiple versions only for the definition in libc.  */
-#if IS_IN (libc)
-	.text
-ENTRY(STRCPY)
-	.type	STRCPY, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-	leaq	STRCPY_SSE2_UNALIGNED(%rip), %rax
-	HAS_ARCH_FEATURE (Fast_Unaligned_Load)
-	jnz	2f
-	leaq	STRCPY_SSE2(%rip), %rax
-	HAS_CPU_FEATURE (SSSE3)
-	jz	2f
-	leaq	STRCPY_SSSE3(%rip), %rax
-2:	ret
-END(STRCPY)
-
-# undef ENTRY
-# define ENTRY(name) \
-	.type STRCPY_SSE2, @function; \
-	.align 16; \
-	.globl STRCPY_SSE2; \
-	.hidden STRCPY_SSE2; \
-	STRCPY_SSE2: cfi_startproc; \
-	CALL_MCOUNT
-# undef END
-# define END(name) \
-	cfi_endproc; .size STRCPY_SSE2, .-STRCPY_SSE2
-# undef libc_hidden_builtin_def
-/* It doesn't make sense to send libc-internal strcpy calls through a PLT.
-   The speedup we get from using SSSE3 instruction is likely eaten away
-   by the indirect call in the PLT.  */
-# define libc_hidden_builtin_def(name) \
-	.globl __GI_STRCPY; __GI_STRCPY = STRCPY_SSE2
-# undef libc_hidden_def
-# define libc_hidden_def(name) \
-	.globl __GI___STRCPY; __GI___STRCPY = STRCPY_SSE2
-#endif
-
-#ifndef USE_AS_STRNCPY
-#include "../strcpy.S"
-#endif
diff --git a/sysdeps/x86_64/multiarch/strcpy.c b/sysdeps/x86_64/multiarch/strcpy.c
new file mode 100644
index 0000000000..12e0e3ffe2
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strcpy.c
@@ -0,0 +1,35 @@
+/* Multiple versions of strcpy.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
+# define strcpy __redirect_strcpy
+# include <string.h>
+# undef strcpy
+
+# define SYMBOL_NAME strcpy
+# include "ifunc-unaligned-ssse3.h"
+
+libc_ifunc_redirected (__redirect_strcpy, strcpy, IFUNC_SELECTOR ());
+
+# ifdef SHARED
+__hidden_ver1 (strcpy, __GI_strcpy, __redirect_strcpy)
+  __attribute__ ((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/strcspn-c.c b/sysdeps/x86_64/multiarch/strcspn-c.c
index 91b804ddd6..857af10486 100644
--- a/sysdeps/x86_64/multiarch/strcspn-c.c
+++ b/sysdeps/x86_64/multiarch/strcspn-c.c
@@ -1,5 +1,5 @@
 /* strcspn with SSE4.2 intrinsics
-   Copyright (C) 2009-2016 Free Software Foundation, Inc.
+   Copyright (C) 2009-2018 Free Software Foundation, Inc.
    Contributed by Intel Corporation.
    This file is part of the GNU C Library.
 
@@ -70,7 +70,7 @@ char *
 #else
 size_t
 #endif
-STRCSPN_SSE2 (const char *, const char *);
+STRCSPN_SSE2 (const char *, const char *) attribute_hidden;
 
 
 #ifdef USE_AS_STRPBRK
diff --git a/sysdeps/x86_64/multiarch/strcspn-sse2.S b/sysdeps/x86_64/multiarch/strcspn-sse2.S
new file mode 100644
index 0000000000..8a0c69d7f5
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strcspn-sse2.S
@@ -0,0 +1,28 @@
+/* strcspn optimized with SSE2.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+# define strcspn __strcspn_sse2
+
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(strcspn)
+#endif
+
+#include <sysdeps/x86_64/strcspn.S>
diff --git a/sysdeps/x86_64/multiarch/strcspn.S b/sysdeps/x86_64/multiarch/strcspn.S
deleted file mode 100644
index 8e7ff1c663..0000000000
--- a/sysdeps/x86_64/multiarch/strcspn.S
+++ /dev/null
@@ -1,69 +0,0 @@
-/* Multiple versions of strcspn
-   All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2009-2016 Free Software Foundation, Inc.
-   Contributed by Intel Corporation.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <config.h>
-#include <sysdep.h>
-#include <init-arch.h>
-
-#ifdef USE_AS_STRPBRK
-#define STRCSPN_SSE42	__strpbrk_sse42
-#define STRCSPN_SSE2	__strpbrk_sse2
-#define __GI_STRCSPN	__GI_strpbrk
-#else
-#ifndef STRCSPN
-#define STRCSPN		strcspn
-#define STRCSPN_SSE42	__strcspn_sse42
-#define STRCSPN_SSE2	__strcspn_sse2
-#define __GI_STRCSPN	__GI_strcspn
-#endif
-#endif
-
-/* Define multiple versions only for the definition in libc.  Don't
-   define multiple versions for strpbrk in static library since we
-   need strpbrk before the initialization happened.  */
-#if (defined SHARED || !defined USE_AS_STRPBRK) && IS_IN (libc)
-	.text
-ENTRY(STRCSPN)
-	.type	STRCSPN, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-	leaq	STRCSPN_SSE2(%rip), %rax
-	HAS_CPU_FEATURE (SSE4_2)
-	jz	2f
-	leaq	STRCSPN_SSE42(%rip), %rax
-2:	ret
-END(STRCSPN)
-
-# undef ENTRY
-# define ENTRY(name) \
-	.type STRCSPN_SSE2, @function; \
-	.globl STRCSPN_SSE2; \
-	.align 16; \
-	STRCSPN_SSE2: cfi_startproc; \
-	CALL_MCOUNT
-# undef END
-# define END(name) \
-	cfi_endproc; .size STRCSPN_SSE2, .-STRCSPN_SSE2
-#endif
-
-#ifdef USE_AS_STRPBRK
-#include "../strpbrk.S"
-#else
-#include "../strcspn.S"
-#endif
diff --git a/sysdeps/x86_64/multiarch/strcspn.c b/sysdeps/x86_64/multiarch/strcspn.c
new file mode 100644
index 0000000000..9712e8410c
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strcspn.c
@@ -0,0 +1,35 @@
+/* Multiple versions of strcspn.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
+# define strcspn __redirect_strcspn
+# include <string.h>
+# undef strcspn
+
+# define SYMBOL_NAME strcspn
+# include "ifunc-sse4_2.h"
+
+libc_ifunc_redirected (__redirect_strcspn, strcspn, IFUNC_SELECTOR ());
+
+# ifdef SHARED
+__hidden_ver1 (strcspn, __GI_strcspn, __redirect_strcspn)
+  __attribute__ ((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/strlen-avx2.S b/sysdeps/x86_64/multiarch/strlen-avx2.S
new file mode 100644
index 0000000000..fb2418cddc
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strlen-avx2.S
@@ -0,0 +1,393 @@
+/* strlen/strnlen/wcslen/wcsnlen optimized with AVX2.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+
+# ifndef STRLEN
+#  define STRLEN	__strlen_avx2
+# endif
+
+# ifdef USE_AS_WCSLEN
+#  define VPCMPEQ	vpcmpeqd
+#  define VPMINU	vpminud
+# else
+#  define VPCMPEQ	vpcmpeqb
+#  define VPMINU	vpminub
+# endif
+
+# ifndef VZEROUPPER
+#  define VZEROUPPER	vzeroupper
+# endif
+
+# define VEC_SIZE 32
+
+	.section .text.avx,"ax",@progbits
+ENTRY (STRLEN)
+# ifdef USE_AS_STRNLEN
+	/* Check for zero length.  */
+	testq	%rsi, %rsi
+	jz	L(zero)
+#  ifdef USE_AS_WCSLEN
+	shl	$2, %rsi
+#  endif
+	movq	%rsi, %r8
+# endif
+	movl	%edi, %ecx
+	movq	%rdi, %rdx
+	vpxor	%xmm0, %xmm0, %xmm0
+
+	/* Check if we may cross page boundary with one vector load.  */
+	andl	$(2 * VEC_SIZE - 1), %ecx
+	cmpl	$VEC_SIZE, %ecx
+	ja	L(cros_page_boundary)
+
+	/* Check the first VEC_SIZE bytes.  */
+	VPCMPEQ (%rdi), %ymm0, %ymm1
+	vpmovmskb %ymm1, %eax
+	testl	%eax, %eax
+
+# ifdef USE_AS_STRNLEN
+	jnz	L(first_vec_x0_check)
+	/* Adjust length and check the end of data.  */
+	subq	$VEC_SIZE, %rsi
+	jbe	L(max)
+# else
+	jnz	L(first_vec_x0)
+# endif
+
+	/* Align data for aligned loads in the loop.  */
+	addq	$VEC_SIZE, %rdi
+	andl	$(VEC_SIZE - 1), %ecx
+	andq	$-VEC_SIZE, %rdi
+
+# ifdef USE_AS_STRNLEN
+	/* Adjust length.  */
+	addq	%rcx, %rsi
+
+	subq	$(VEC_SIZE * 4), %rsi
+	jbe	L(last_4x_vec_or_less)
+# endif
+	jmp	L(more_4x_vec)
+
+	.p2align 4
+L(cros_page_boundary):
+	andl	$(VEC_SIZE - 1), %ecx
+	andq	$-VEC_SIZE, %rdi
+	VPCMPEQ (%rdi), %ymm0, %ymm1
+	vpmovmskb %ymm1, %eax
+	/* Remove the leading bytes.  */
+	sarl	%cl, %eax
+	testl	%eax, %eax
+	jz	L(aligned_more)
+	tzcntl	%eax, %eax
+# ifdef USE_AS_STRNLEN
+	/* Check the end of data.  */
+	cmpq	%rax, %rsi
+	jbe	L(max)
+# endif
+	addq	%rdi, %rax
+	addq	%rcx, %rax
+	subq	%rdx, %rax
+# ifdef USE_AS_WCSLEN
+	shrq	$2, %rax
+# endif
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(aligned_more):
+# ifdef USE_AS_STRNLEN
+        /* "rcx" is less than VEC_SIZE.  Calculate "rdx + rcx - VEC_SIZE"
+	    with "rdx - (VEC_SIZE - rcx)" instead of "(rdx + rcx) - VEC_SIZE"
+	    to void possible addition overflow.  */
+	negq	%rcx
+	addq	$VEC_SIZE, %rcx
+
+	/* Check the end of data.  */
+	subq	%rcx, %rsi
+	jbe	L(max)
+# endif
+
+	addq	$VEC_SIZE, %rdi
+
+# ifdef USE_AS_STRNLEN
+	subq	$(VEC_SIZE * 4), %rsi
+	jbe	L(last_4x_vec_or_less)
+# endif
+
+L(more_4x_vec):
+	/* Check the first 4 * VEC_SIZE.  Only one VEC_SIZE at a time
+	   since data is only aligned to VEC_SIZE.  */
+	VPCMPEQ (%rdi), %ymm0, %ymm1
+	vpmovmskb %ymm1, %eax
+	testl	%eax, %eax
+	jnz	L(first_vec_x0)
+
+	VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1
+	vpmovmskb %ymm1, %eax
+	testl	%eax, %eax
+	jnz	L(first_vec_x1)
+
+	VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm1
+	vpmovmskb %ymm1, %eax
+	testl	%eax, %eax
+	jnz	L(first_vec_x2)
+
+	VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1
+	vpmovmskb %ymm1, %eax
+	testl	%eax, %eax
+	jnz	L(first_vec_x3)
+
+	addq	$(VEC_SIZE * 4), %rdi
+
+# ifdef USE_AS_STRNLEN
+	subq	$(VEC_SIZE * 4), %rsi
+	jbe	L(last_4x_vec_or_less)
+# endif
+
+	/* Align data to 4 * VEC_SIZE.  */
+	movq	%rdi, %rcx
+	andl	$(4 * VEC_SIZE - 1), %ecx
+	andq	$-(4 * VEC_SIZE), %rdi
+
+# ifdef USE_AS_STRNLEN
+	/* Adjust length.  */
+	addq	%rcx, %rsi
+# endif
+
+	.p2align 4
+L(loop_4x_vec):
+	/* Compare 4 * VEC at a time forward.  */
+	vmovdqa (%rdi), %ymm1
+	vmovdqa	VEC_SIZE(%rdi), %ymm2
+	vmovdqa	(VEC_SIZE * 2)(%rdi), %ymm3
+	vmovdqa	(VEC_SIZE * 3)(%rdi), %ymm4
+	VPMINU	%ymm1, %ymm2, %ymm5
+	VPMINU	%ymm3, %ymm4, %ymm6
+	VPMINU	%ymm5, %ymm6, %ymm5
+
+	VPCMPEQ	%ymm5, %ymm0, %ymm5
+	vpmovmskb %ymm5, %eax
+	testl	%eax, %eax
+	jnz	L(4x_vec_end)
+
+	addq	$(VEC_SIZE * 4), %rdi
+
+# ifndef USE_AS_STRNLEN
+	jmp	L(loop_4x_vec)
+# else
+	subq	$(VEC_SIZE * 4), %rsi
+	ja	L(loop_4x_vec)
+
+L(last_4x_vec_or_less):
+	/* Less than 4 * VEC and aligned to VEC_SIZE.  */
+	addl	$(VEC_SIZE * 2), %esi
+	jle	L(last_2x_vec)
+
+	VPCMPEQ (%rdi), %ymm0, %ymm1
+	vpmovmskb %ymm1, %eax
+	testl	%eax, %eax
+	jnz	L(first_vec_x0)
+
+	VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1
+	vpmovmskb %ymm1, %eax
+	testl	%eax, %eax
+	jnz	L(first_vec_x1)
+
+	VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm1
+	vpmovmskb %ymm1, %eax
+	testl	%eax, %eax
+
+	jnz	L(first_vec_x2_check)
+	subl	$VEC_SIZE, %esi
+	jle	L(max)
+
+	VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1
+	vpmovmskb %ymm1, %eax
+	testl	%eax, %eax
+
+	jnz	L(first_vec_x3_check)
+	movq	%r8, %rax
+#  ifdef USE_AS_WCSLEN
+	shrq	$2, %rax
+#  endif
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(last_2x_vec):
+	addl	$(VEC_SIZE * 2), %esi
+	VPCMPEQ (%rdi), %ymm0, %ymm1
+	vpmovmskb %ymm1, %eax
+	testl	%eax, %eax
+
+	jnz	L(first_vec_x0_check)
+	subl	$VEC_SIZE, %esi
+	jle	L(max)
+
+	VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1
+	vpmovmskb %ymm1, %eax
+	testl	%eax, %eax
+	jnz	L(first_vec_x1_check)
+	movq	%r8, %rax
+#  ifdef USE_AS_WCSLEN
+	shrq	$2, %rax
+#  endif
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(first_vec_x0_check):
+	tzcntl	%eax, %eax
+	/* Check the end of data.  */
+	cmpq	%rax, %rsi
+	jbe	L(max)
+	addq	%rdi, %rax
+	subq	%rdx, %rax
+#  ifdef USE_AS_WCSLEN
+	shrq	$2, %rax
+#  endif
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(first_vec_x1_check):
+	tzcntl	%eax, %eax
+	/* Check the end of data.  */
+	cmpq	%rax, %rsi
+	jbe	L(max)
+	addq	$VEC_SIZE, %rax
+	addq	%rdi, %rax
+	subq	%rdx, %rax
+#  ifdef USE_AS_WCSLEN
+	shrq	$2, %rax
+#  endif
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(first_vec_x2_check):
+	tzcntl	%eax, %eax
+	/* Check the end of data.  */
+	cmpq	%rax, %rsi
+	jbe	L(max)
+	addq	$(VEC_SIZE * 2), %rax
+	addq	%rdi, %rax
+	subq	%rdx, %rax
+#  ifdef USE_AS_WCSLEN
+	shrq	$2, %rax
+#  endif
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(first_vec_x3_check):
+	tzcntl	%eax, %eax
+	/* Check the end of data.  */
+	cmpq	%rax, %rsi
+	jbe	L(max)
+	addq	$(VEC_SIZE * 3), %rax
+	addq	%rdi, %rax
+	subq	%rdx, %rax
+#  ifdef USE_AS_WCSLEN
+	shrq	$2, %rax
+#  endif
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(max):
+	movq	%r8, %rax
+#  ifdef USE_AS_WCSLEN
+	shrq	$2, %rax
+#  endif
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(zero):
+	xorl	%eax, %eax
+	ret
+# endif
+
+	.p2align 4
+L(first_vec_x0):
+	tzcntl	%eax, %eax
+	addq	%rdi, %rax
+	subq	%rdx, %rax
+# ifdef USE_AS_WCSLEN
+	shrq	$2, %rax
+# endif
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(first_vec_x1):
+	tzcntl	%eax, %eax
+	addq	$VEC_SIZE, %rax
+	addq	%rdi, %rax
+	subq	%rdx, %rax
+# ifdef USE_AS_WCSLEN
+	shrq	$2, %rax
+# endif
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(first_vec_x2):
+	tzcntl	%eax, %eax
+	addq	$(VEC_SIZE * 2), %rax
+	addq	%rdi, %rax
+	subq	%rdx, %rax
+# ifdef USE_AS_WCSLEN
+	shrq	$2, %rax
+# endif
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(4x_vec_end):
+	VPCMPEQ	%ymm1, %ymm0, %ymm1
+	vpmovmskb %ymm1, %eax
+	testl	%eax, %eax
+	jnz	L(first_vec_x0)
+	VPCMPEQ %ymm2, %ymm0, %ymm2
+	vpmovmskb %ymm2, %eax
+	testl	%eax, %eax
+	jnz	L(first_vec_x1)
+	VPCMPEQ %ymm3, %ymm0, %ymm3
+	vpmovmskb %ymm3, %eax
+	testl	%eax, %eax
+	jnz	L(first_vec_x2)
+	VPCMPEQ %ymm4, %ymm0, %ymm4
+	vpmovmskb %ymm4, %eax
+L(first_vec_x3):
+	tzcntl	%eax, %eax
+	addq	$(VEC_SIZE * 3), %rax
+	addq	%rdi, %rax
+	subq	%rdx, %rax
+# ifdef USE_AS_WCSLEN
+	shrq	$2, %rax
+# endif
+	VZEROUPPER
+	ret
+
+END (STRLEN)
+#endif
diff --git a/sysdeps/x86_64/multiarch/strlen-sse2.S b/sysdeps/x86_64/multiarch/strlen-sse2.S
new file mode 100644
index 0000000000..7bc57b8d0f
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strlen-sse2.S
@@ -0,0 +1,23 @@
+/* strlen optimized with SSE2.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc)
+# define strlen __strlen_sse2
+#endif
+
+#include "../strlen.S"
diff --git a/sysdeps/x86_64/multiarch/strlen.c b/sysdeps/x86_64/multiarch/strlen.c
new file mode 100644
index 0000000000..1758d22b8f
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strlen.c
@@ -0,0 +1,34 @@
+/* Multiple versions of strlen.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
+# define strlen __redirect_strlen
+# include <string.h>
+# undef strlen
+
+# define SYMBOL_NAME strlen
+# include "ifunc-avx2.h"
+
+libc_ifunc_redirected (__redirect_strlen, strlen, IFUNC_SELECTOR ());
+# ifdef SHARED
+__hidden_ver1 (strlen, __GI_strlen, __redirect_strlen)
+  __attribute__((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/strncase.c b/sysdeps/x86_64/multiarch/strncase.c
new file mode 100644
index 0000000000..798966cf3e
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strncase.c
@@ -0,0 +1,35 @@
+/* Multiple versions of strncasecmp.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
+# define strncasecmp __redirect_strncasecmp
+# define __strncasecmp __redirect___strncasecmp
+# include <string.h>
+# undef strncasecmp
+# undef __strncasecmp
+
+# define SYMBOL_NAME strncasecmp
+# include "ifunc-strcasecmp.h"
+
+libc_ifunc_redirected (__redirect_strncasecmp, __strncasecmp,
+		       IFUNC_SELECTOR ());
+
+weak_alias (__strncasecmp, strncasecmp)
+#endif
diff --git a/sysdeps/x86_64/multiarch/strncase_l-avx.S b/sysdeps/x86_64/multiarch/strncase_l-avx.S
new file mode 100644
index 0000000000..0c4e525bd4
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strncase_l-avx.S
@@ -0,0 +1,22 @@
+/* strncasecmp_l optimized with AVX.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define STRCMP_SSE42 __strncasecmp_l_avx
+#define USE_AVX 1
+#define USE_AS_STRNCASECMP_L
+#include "strcmp-sse42.S"
diff --git a/sysdeps/x86_64/multiarch/strncase_l-sse2.S b/sysdeps/x86_64/multiarch/strncase_l-sse2.S
new file mode 100644
index 0000000000..e7841334b7
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strncase_l-sse2.S
@@ -0,0 +1,23 @@
+/* strncasecmp_l optimized with SSE2.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define STRCMP __strncasecmp_l_sse2
+#define NO_NOLOCALE_ALIAS
+#define USE_AS_STRNCASECMP_L
+#define __strncasecmp __strncasecmp_sse2
+#include <sysdeps/x86_64/strcmp.S>
diff --git a/sysdeps/x86_64/multiarch/strncase_l-sse4_2.S b/sysdeps/x86_64/multiarch/strncase_l-sse4_2.S
new file mode 100644
index 0000000000..d2ea88c4ce
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strncase_l-sse4_2.S
@@ -0,0 +1,21 @@
+/* strncasecmp_l optimized with SSE4.2.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define STRCMP_SSE42 __strncasecmp_l_sse42
+#define USE_AS_STRNCASECMP_L
+#include "strcmp-sse42.S"
diff --git a/sysdeps/x86_64/multiarch/strncase_l.S b/sysdeps/x86_64/multiarch/strncase_l.S
deleted file mode 100644
index 9c0149788e..0000000000
--- a/sysdeps/x86_64/multiarch/strncase_l.S
+++ /dev/null
@@ -1,8 +0,0 @@
-/* Multiple versions of strncasecmp and strncasecmp_l
-   All versions must be listed in ifunc-impl-list.c.  */
-#define STRCMP __strncasecmp_l
-#define USE_AS_STRNCASECMP_L
-#include "strcmp.S"
-
-weak_alias (__strncasecmp_l, strncasecmp_l)
-libc_hidden_def (strncasecmp_l)
diff --git a/sysdeps/x86_64/multiarch/strncase_l.c b/sysdeps/x86_64/multiarch/strncase_l.c
new file mode 100644
index 0000000000..97631cf401
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strncase_l.c
@@ -0,0 +1,40 @@
+/* Multiple versions of strncasecmp_l.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
+# define strncasecmp_l __redirect_strncasecmp_l
+# define __strncasecmp_l __redirect___strncasecmp_l
+# include <string.h>
+# undef strncasecmp_l
+# undef __strncasecmp_l
+
+# define SYMBOL_NAME strncasecmp_l
+# include "ifunc-strcasecmp.h"
+
+libc_ifunc_redirected (__redirect_strncasecmp_l, __strncasecmp_l,
+		       IFUNC_SELECTOR ());
+
+weak_alias (__strncasecmp_l, strncasecmp_l)
+# ifdef SHARED
+__hidden_ver1 (__strncasecmp_l, __GI___strncasecmp_l,
+	       __redirect___strncasecmp_l)
+  __attribute__ ((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/strncat-c.c b/sysdeps/x86_64/multiarch/strncat-c.c
index a3cdbff689..93a7fab7ea 100644
--- a/sysdeps/x86_64/multiarch/strncat-c.c
+++ b/sysdeps/x86_64/multiarch/strncat-c.c
@@ -1,8 +1,2 @@
 #define STRNCAT __strncat_sse2
-#ifdef SHARED
-#undef libc_hidden_def
-#define libc_hidden_def(name) \
-  __hidden_ver1 (__strncat_sse2, __GI___strncat, __strncat_sse2);
-#endif
-
-#include "string/strncat.c"
+#include <string/strncat.c>
diff --git a/sysdeps/x86_64/multiarch/strncat.S b/sysdeps/x86_64/multiarch/strncat.S
deleted file mode 100644
index 5c1bf41453..0000000000
--- a/sysdeps/x86_64/multiarch/strncat.S
+++ /dev/null
@@ -1,5 +0,0 @@
-/* Multiple versions of strncat
-   All versions must be listed in ifunc-impl-list.c.  */
-#define STRCAT strncat
-#define USE_AS_STRNCAT
-#include "strcat.S"
diff --git a/sysdeps/x86_64/multiarch/strncat.c b/sysdeps/x86_64/multiarch/strncat.c
new file mode 100644
index 0000000000..841c165565
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strncat.c
@@ -0,0 +1,35 @@
+/* Multiple versions of strncat.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
+# define strncat __redirect_strncat
+# include <string.h>
+# undef strncat
+
+# define SYMBOL_NAME strncat
+# include "ifunc-unaligned-ssse3.h"
+
+libc_ifunc_redirected (__redirect_strncat, strncat, IFUNC_SELECTOR ());
+strong_alias (strncat, __strncat);
+# ifdef SHARED
+__hidden_ver1 (strncat, __GI___strncat, __redirect_strncat)
+  __attribute__((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/strncmp-avx2.S b/sysdeps/x86_64/multiarch/strncmp-avx2.S
new file mode 100644
index 0000000000..1678bcc235
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strncmp-avx2.S
@@ -0,0 +1,3 @@
+#define STRCMP	__strncmp_avx2
+#define USE_AS_STRNCMP 1
+#include "strcmp-avx2.S"
diff --git a/sysdeps/x86_64/fpu/test-double-vlen2.c b/sysdeps/x86_64/multiarch/strncmp-sse2.S
index c7a3dff747..a5ecb82b13 100644
--- a/sysdeps/x86_64/fpu/test-double-vlen2.c
+++ b/sysdeps/x86_64/multiarch/strncmp-sse2.S
@@ -1,5 +1,5 @@
-/* Tests for SSE ISA versions of vector math functions.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+/* strcmp optimized with SSE2.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -16,13 +16,15 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include "test-double-vlen2.h"
+#include <sysdep.h>
 
-#define TEST_VECTOR_cos 1
-#define TEST_VECTOR_sin 1
-#define TEST_VECTOR_sincos 1
-#define TEST_VECTOR_log 1
-#define TEST_VECTOR_exp 1
-#define TEST_VECTOR_pow 1
+#if IS_IN (libc)
+# define STRCMP __strncmp_sse2
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(strcmp)
+#else
+# define STRCMP strncmp
+#endif
 
-#include "libm-test.c"
+#define USE_AS_STRNCMP
+#include <sysdeps/x86_64/strcmp.S>
diff --git a/sysdeps/x86_64/multiarch/strncmp-sse4_2.S b/sysdeps/x86_64/multiarch/strncmp-sse4_2.S
new file mode 100644
index 0000000000..b859c1eb74
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strncmp-sse4_2.S
@@ -0,0 +1,21 @@
+/* strncmp optimized with SSE4.2.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define STRCMP_SSE42 __strncmp_sse42
+#define USE_AS_STRNCMP
+#include "strcmp-sse42.S"
diff --git a/sysdeps/x86_64/multiarch/strncmp-ssse3.S b/sysdeps/x86_64/multiarch/strncmp-ssse3.S
index 96380a46be..fa43484b54 100644
--- a/sysdeps/x86_64/multiarch/strncmp-ssse3.S
+++ b/sysdeps/x86_64/multiarch/strncmp-ssse3.S
@@ -1,6 +1,28 @@
-#ifdef SHARED
-# define USE_SSSE3 1
-# define STRCMP __strncmp_ssse3
-# define USE_AS_STRNCMP
-# include "../strcmp.S"
-#endif
+/* strcmp optimized with SSSE3.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+#define STRCMP __strncmp_ssse3
+
+#undef libc_hidden_builtin_def
+#define libc_hidden_builtin_def(strcmp)
+
+#define USE_SSSE3 1
+#define USE_AS_STRNCMP
+#include <sysdeps/x86_64/strcmp.S>
diff --git a/sysdeps/x86_64/multiarch/strncmp.S b/sysdeps/x86_64/multiarch/strncmp.S
deleted file mode 100644
index fd5eb1397c..0000000000
--- a/sysdeps/x86_64/multiarch/strncmp.S
+++ /dev/null
@@ -1,5 +0,0 @@
-/* Multiple versions of strncmp
-   All versions must be listed in ifunc-impl-list.c.  */
-#define STRCMP strncmp
-#define USE_AS_STRNCMP
-#include "strcmp.S"
diff --git a/sysdeps/x86_64/multiarch/strncmp.c b/sysdeps/x86_64/multiarch/strncmp.c
new file mode 100644
index 0000000000..02b6d0b6f5
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strncmp.c
@@ -0,0 +1,60 @@
+/* Multiple versions of strncmp.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
+# define strncmp __redirect_strncmp
+# include <string.h>
+# undef strncmp
+
+# define SYMBOL_NAME strncmp
+# include <init-arch.h>
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+  const struct cpu_features* cpu_features = __get_cpu_features ();
+
+  if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)
+      && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)
+      && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+    return OPTIMIZE (avx2);
+
+  if (CPU_FEATURES_CPU_P (cpu_features, SSE4_2)
+      && !CPU_FEATURES_ARCH_P (cpu_features, Slow_SSE4_2))
+    return OPTIMIZE (sse42);
+
+  if (CPU_FEATURES_CPU_P (cpu_features, SSSE3))
+    return OPTIMIZE (ssse3);
+
+  return OPTIMIZE (sse2);
+}
+
+libc_ifunc_redirected (__redirect_strncmp, strncmp, IFUNC_SELECTOR ());
+
+# ifdef SHARED
+__hidden_ver1 (strncmp, __GI_strncmp, __redirect_strncmp)
+  __attribute__ ((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/strncpy-c.c b/sysdeps/x86_64/multiarch/strncpy-c.c
index 296c32cb5d..57c45ac7ab 100644
--- a/sysdeps/x86_64/multiarch/strncpy-c.c
+++ b/sysdeps/x86_64/multiarch/strncpy-c.c
@@ -1,8 +1,5 @@
 #define STRNCPY __strncpy_sse2
-#ifdef SHARED
 #undef libc_hidden_builtin_def
-#define libc_hidden_builtin_def(name) \
-  __hidden_ver1 (__strncpy_sse2, __GI_strncpy, __strncpy_sse2);
-#endif
+#define libc_hidden_builtin_def(strncpy)
 
-#include "strncpy.c"
+#include <string/strncpy.c>
diff --git a/sysdeps/x86_64/multiarch/strncpy.S b/sysdeps/x86_64/multiarch/strncpy.S
deleted file mode 100644
index 6d87a0ba35..0000000000
--- a/sysdeps/x86_64/multiarch/strncpy.S
+++ /dev/null
@@ -1,5 +0,0 @@
-/* Multiple versions of strncpy
-   All versions must be listed in ifunc-impl-list.c.  */
-#define STRCPY strncpy
-#define USE_AS_STRNCPY
-#include "strcpy.S"
diff --git a/sysdeps/x86_64/multiarch/strncpy.c b/sysdeps/x86_64/multiarch/strncpy.c
new file mode 100644
index 0000000000..3c3de8b18e
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strncpy.c
@@ -0,0 +1,35 @@
+/* Multiple versions of strncpy.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
+# define strncpy __redirect_strncpy
+# include <string.h>
+# undef strncpy
+
+# define SYMBOL_NAME strncpy
+# include "ifunc-unaligned-ssse3.h"
+
+libc_ifunc_redirected (__redirect_strncpy, strncpy, IFUNC_SELECTOR ());
+
+# ifdef SHARED
+__hidden_ver1 (strncpy, __GI_strncpy, __redirect_strncpy)
+  __attribute__ ((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/strnlen-avx2.S b/sysdeps/x86_64/multiarch/strnlen-avx2.S
new file mode 100644
index 0000000000..c4062b22f7
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strnlen-avx2.S
@@ -0,0 +1,4 @@
+#define STRLEN __strnlen_avx2
+#define USE_AS_STRNLEN 1
+
+#include "strlen-avx2.S"
diff --git a/sysdeps/x86_64/multiarch/strnlen-sse2.S b/sysdeps/x86_64/multiarch/strnlen-sse2.S
new file mode 100644
index 0000000000..41f33f6f6f
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strnlen-sse2.S
@@ -0,0 +1,28 @@
+/* strnlen optimized with SSE2.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc)
+# define __strnlen __strnlen_sse2
+
+# undef weak_alias
+# define weak_alias(__strnlen, strnlen)
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(strnlen)
+#endif
+
+#include "../strnlen.S"
diff --git a/sysdeps/x86_64/multiarch/strnlen.c b/sysdeps/x86_64/multiarch/strnlen.c
new file mode 100644
index 0000000000..3ab94ce230
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strnlen.c
@@ -0,0 +1,39 @@
+/* Multiple versions of strnlen.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
+# define strnlen __redirect_strnlen
+# define __strnlen __redirect___strnlen
+# include <string.h>
+# undef __strnlen
+# undef strnlen
+
+# define SYMBOL_NAME strnlen
+# include "ifunc-avx2.h"
+
+libc_ifunc_redirected (__redirect_strnlen, __strnlen, IFUNC_SELECTOR ());
+weak_alias (__strnlen, strnlen);
+# ifdef SHARED
+__hidden_ver1 (__strnlen, __GI___strnlen, __redirect___strnlen)
+  __attribute__((visibility ("hidden")));
+__hidden_ver1 (strnlen, __GI_strnlen, __redirect_strnlen)
+  __attribute__((weak, visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/strpbrk-c.c b/sysdeps/x86_64/multiarch/strpbrk-c.c
index bbf5c49d89..c58dcb5605 100644
--- a/sysdeps/x86_64/multiarch/strpbrk-c.c
+++ b/sysdeps/x86_64/multiarch/strpbrk-c.c
@@ -1,8 +1,4 @@
-/* Don't define multiple versions for strpbrk in static library since we
-   need strpbrk before the initialization happened.  */
-#ifdef SHARED
-# define USE_AS_STRPBRK
-# define STRCSPN_SSE2 __strpbrk_sse2
-# define STRCSPN_SSE42 __strpbrk_sse42
-# include "strcspn-c.c"
-#endif
+#define USE_AS_STRPBRK
+#define STRCSPN_SSE2 __strpbrk_sse2
+#define STRCSPN_SSE42 __strpbrk_sse42
+#include "strcspn-c.c"
diff --git a/sysdeps/x86_64/multiarch/strpbrk-sse2.S b/sysdeps/x86_64/multiarch/strpbrk-sse2.S
new file mode 100644
index 0000000000..3c6a74db29
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strpbrk-sse2.S
@@ -0,0 +1,29 @@
+/* strpbrk optimized with SSE2.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+# define strcspn __strpbrk_sse2
+
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(strpbrk)
+#endif
+
+#define USE_AS_STRPBRK
+#include <sysdeps/x86_64/strcspn.S>
diff --git a/sysdeps/x86_64/multiarch/strpbrk.S b/sysdeps/x86_64/multiarch/strpbrk.S
deleted file mode 100644
index 7201d6376f..0000000000
--- a/sysdeps/x86_64/multiarch/strpbrk.S
+++ /dev/null
@@ -1,5 +0,0 @@
-/* Multiple versions of strpbrk
-   All versions must be listed in ifunc-impl-list.c.  */
-#define STRCSPN strpbrk
-#define USE_AS_STRPBRK
-#include "strcspn.S"
diff --git a/sysdeps/x86_64/multiarch/strpbrk.c b/sysdeps/x86_64/multiarch/strpbrk.c
new file mode 100644
index 0000000000..a0d435a504
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strpbrk.c
@@ -0,0 +1,35 @@
+/* Multiple versions of strpbrk.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
+# define strpbrk __redirect_strpbrk
+# include <string.h>
+# undef strpbrk
+
+# define SYMBOL_NAME strpbrk
+# include "ifunc-sse4_2.h"
+
+libc_ifunc_redirected (__redirect_strpbrk, strpbrk, IFUNC_SELECTOR ());
+
+# ifdef SHARED
+__hidden_ver1 (strpbrk, __GI_strpbrk, __redirect_strpbrk)
+  __attribute__ ((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/strrchr-avx2.S b/sysdeps/x86_64/multiarch/strrchr-avx2.S
new file mode 100644
index 0000000000..4381e6ab3e
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strrchr-avx2.S
@@ -0,0 +1,235 @@
+/* strrchr/wcsrchr optimized with AVX2.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+
+# ifndef STRRCHR
+#  define STRRCHR	__strrchr_avx2
+# endif
+
+# ifdef USE_AS_WCSRCHR
+#  define VPBROADCAST	vpbroadcastd
+#  define VPCMPEQ	vpcmpeqd
+# else
+#  define VPBROADCAST	vpbroadcastb
+#  define VPCMPEQ	vpcmpeqb
+# endif
+
+# ifndef VZEROUPPER
+#  define VZEROUPPER	vzeroupper
+# endif
+
+# define VEC_SIZE	32
+
+	.section .text.avx,"ax",@progbits
+ENTRY (STRRCHR)
+	movd	%esi, %xmm4
+	movl	%edi, %ecx
+	/* Broadcast CHAR to YMM4.  */
+	VPBROADCAST %xmm4, %ymm4
+	vpxor	%ymm0, %ymm0, %ymm0
+
+	/* Check if we may cross page boundary with one vector load.  */
+	andl	$(2 * VEC_SIZE - 1), %ecx
+	cmpl	$VEC_SIZE, %ecx
+	ja	L(cros_page_boundary)
+
+	vmovdqu	(%rdi), %ymm1
+	VPCMPEQ	%ymm1, %ymm0, %ymm2
+	VPCMPEQ	%ymm1, %ymm4, %ymm3
+	vpmovmskb %ymm2, %ecx
+	vpmovmskb %ymm3, %eax
+	addq	$VEC_SIZE, %rdi
+
+	testl	%eax, %eax
+	jnz	L(first_vec)
+
+	testl	%ecx, %ecx
+	jnz	L(return_null)
+
+	andq	$-VEC_SIZE, %rdi
+	xorl	%edx, %edx
+	jmp	L(aligned_loop)
+
+	.p2align 4
+L(first_vec):
+	/* Check if there is a nul CHAR.  */
+	testl	%ecx, %ecx
+	jnz	L(char_and_nul_in_first_vec)
+
+	/* Remember the match and keep searching.  */
+	movl	%eax, %edx
+	movq	%rdi, %rsi
+	andq	$-VEC_SIZE, %rdi
+	jmp	L(aligned_loop)
+
+	.p2align 4
+L(cros_page_boundary):
+	andl	$(VEC_SIZE - 1), %ecx
+	andq	$-VEC_SIZE, %rdi
+	vmovdqa	(%rdi), %ymm1
+	VPCMPEQ	%ymm1, %ymm0, %ymm2
+	VPCMPEQ	%ymm1, %ymm4, %ymm3
+	vpmovmskb %ymm2, %edx
+	vpmovmskb %ymm3, %eax
+	shrl	%cl, %edx
+	shrl	%cl, %eax
+	addq	$VEC_SIZE, %rdi
+
+	/* Check if there is a CHAR.  */
+	testl	%eax, %eax
+	jnz	L(found_char)
+
+	testl	%edx, %edx
+	jnz	L(return_null)
+
+	jmp	L(aligned_loop)
+
+	.p2align 4
+L(found_char):
+	testl	%edx, %edx
+	jnz	L(char_and_nul)
+
+	/* Remember the match and keep searching.  */
+	movl	%eax, %edx
+	leaq	(%rdi, %rcx), %rsi
+
+	.p2align 4
+L(aligned_loop):
+	vmovdqa	(%rdi), %ymm1
+	VPCMPEQ	%ymm1, %ymm0, %ymm2
+	addq	$VEC_SIZE, %rdi
+	VPCMPEQ	%ymm1, %ymm4, %ymm3
+	vpmovmskb %ymm2, %ecx
+	vpmovmskb %ymm3, %eax
+	orl	%eax, %ecx
+	jnz	L(char_nor_null)
+
+	vmovdqa	(%rdi), %ymm1
+	VPCMPEQ	%ymm1, %ymm0, %ymm2
+	add	$VEC_SIZE, %rdi
+	VPCMPEQ	%ymm1, %ymm4, %ymm3
+	vpmovmskb %ymm2, %ecx
+	vpmovmskb %ymm3, %eax
+	orl	%eax, %ecx
+	jnz	L(char_nor_null)
+
+	vmovdqa	(%rdi), %ymm1
+	VPCMPEQ	%ymm1, %ymm0, %ymm2
+	addq	$VEC_SIZE, %rdi
+	VPCMPEQ	%ymm1, %ymm4, %ymm3
+	vpmovmskb %ymm2, %ecx
+	vpmovmskb %ymm3, %eax
+	orl	%eax, %ecx
+	jnz	L(char_nor_null)
+
+	vmovdqa	(%rdi), %ymm1
+	VPCMPEQ	%ymm1, %ymm0, %ymm2
+	addq	$VEC_SIZE, %rdi
+	VPCMPEQ	%ymm1, %ymm4, %ymm3
+	vpmovmskb %ymm2, %ecx
+	vpmovmskb %ymm3, %eax
+	orl	%eax, %ecx
+	jz	L(aligned_loop)
+
+	.p2align 4
+L(char_nor_null):
+	/* Find a CHAR or a nul CHAR in a loop.  */
+	testl	%eax, %eax
+	jnz	L(match)
+L(return_value):
+	testl	%edx, %edx
+	jz	L(return_null)
+	movl	%edx, %eax
+	movq	%rsi, %rdi
+
+# ifdef USE_AS_WCSRCHR
+	/* Keep the first bit for each matching CHAR for bsr.  */
+	andl	$0x11111111, %eax
+# endif
+	bsrl	%eax, %eax
+	leaq	-VEC_SIZE(%rdi, %rax), %rax
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(match):
+	/* Find a CHAR.  Check if there is a nul CHAR.  */
+	vpmovmskb %ymm2, %ecx
+	testl	%ecx, %ecx
+	jnz	L(find_nul)
+
+	/* Remember the match and keep searching.  */
+	movl	%eax, %edx
+	movq	%rdi, %rsi
+	jmp	L(aligned_loop)
+
+	.p2align 4
+L(find_nul):
+# ifdef USE_AS_WCSRCHR
+	/* Keep the first bit for each matching CHAR for bsr.  */
+	andl	$0x11111111, %ecx
+	andl	$0x11111111, %eax
+# endif
+	/* Mask out any matching bits after the nul CHAR.  */
+	movl	%ecx, %r8d
+	subl	$1, %r8d
+	xorl	%ecx, %r8d
+	andl	%r8d, %eax
+	testl	%eax, %eax
+	/* If there is no CHAR here, return the remembered one.  */
+	jz	L(return_value)
+	bsrl	%eax, %eax
+	leaq	-VEC_SIZE(%rdi, %rax), %rax
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(char_and_nul):
+	/* Find both a CHAR and a nul CHAR.  */
+	addq	%rcx, %rdi
+	movl	%edx, %ecx
+L(char_and_nul_in_first_vec):
+# ifdef USE_AS_WCSRCHR
+	/* Keep the first bit for each matching CHAR for bsr.  */
+	andl	$0x11111111, %ecx
+	andl	$0x11111111, %eax
+# endif
+	/* Mask out any matching bits after the nul CHAR.  */
+	movl	%ecx, %r8d
+	subl	$1, %r8d
+	xorl	%ecx, %r8d
+	andl	%r8d, %eax
+	testl	%eax, %eax
+	/* Return null pointer if the nul CHAR comes first.  */
+	jz	L(return_null)
+	bsrl	%eax, %eax
+	leaq	-VEC_SIZE(%rdi, %rax), %rax
+	VZEROUPPER
+	ret
+
+	.p2align 4
+L(return_null):
+	xorl	%eax, %eax
+	VZEROUPPER
+	ret
+
+END (STRRCHR)
+#endif
diff --git a/sysdeps/x86_64/multiarch/strrchr-sse2.S b/sysdeps/x86_64/multiarch/strrchr-sse2.S
new file mode 100644
index 0000000000..0ec76fe9cc
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strrchr-sse2.S
@@ -0,0 +1,28 @@
+/* strrchr optimized with SSE2.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc)
+# define strrchr __strrchr_sse2
+
+# undef weak_alias
+# define weak_alias(strrchr, rindex)
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(strrchr)
+#endif
+
+#include "../strrchr.S"
diff --git a/sysdeps/x86_64/multiarch/strrchr.c b/sysdeps/x86_64/multiarch/strrchr.c
new file mode 100644
index 0000000000..a719edde10
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strrchr.c
@@ -0,0 +1,34 @@
+/* Multiple versions of strrchr.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
+# define strrchr __redirect_strrchr
+# include <string.h>
+# undef strrchr
+
+# define SYMBOL_NAME strrchr
+# include "ifunc-avx2.h"
+
+libc_ifunc_redirected (__redirect_strrchr, strrchr, IFUNC_SELECTOR ());
+weak_alias (strrchr, rindex);
+# ifdef SHARED
+__hidden_ver1 (strrchr, __GI_strrchr, __redirect_strrchr)
+  __attribute__((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/strspn-c.c b/sysdeps/x86_64/multiarch/strspn-c.c
index 9675f9360e..4554cff0c2 100644
--- a/sysdeps/x86_64/multiarch/strspn-c.c
+++ b/sysdeps/x86_64/multiarch/strspn-c.c
@@ -1,5 +1,5 @@
 /* strspn with SSE4.2 intrinsics
-   Copyright (C) 2009-2016 Free Software Foundation, Inc.
+   Copyright (C) 2009-2018 Free Software Foundation, Inc.
    Contributed by Intel Corporation.
    This file is part of the GNU C Library.
 
@@ -52,7 +52,7 @@
 
    We exit from the loop for case 1.  */
 
-extern size_t __strspn_sse2 (const char *, const char *);
+extern size_t __strspn_sse2 (const char *, const char *) attribute_hidden;
 
 
 size_t
diff --git a/sysdeps/x86_64/multiarch/strspn-sse2.S b/sysdeps/x86_64/multiarch/strspn-sse2.S
new file mode 100644
index 0000000000..4686cdd55d
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strspn-sse2.S
@@ -0,0 +1,28 @@
+/* strspn optimized with SSE2.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc)
+
+# include <sysdep.h>
+# define strspn __strspn_sse2
+
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(strspn)
+#endif
+
+#include <sysdeps/x86_64/strspn.S>
diff --git a/sysdeps/x86_64/multiarch/strspn.c b/sysdeps/x86_64/multiarch/strspn.c
new file mode 100644
index 0000000000..56ab4d9558
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/strspn.c
@@ -0,0 +1,35 @@
+/* Multiple versions of strspn.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
+# define strspn __redirect_strspn
+# include <string.h>
+# undef strspn
+
+# define SYMBOL_NAME strspn
+# include "ifunc-sse4_2.h"
+
+libc_ifunc_redirected (__redirect_strspn, strspn, IFUNC_SELECTOR ());
+
+# ifdef SHARED
+__hidden_ver1 (strspn, __GI_strspn, __redirect_strspn)
+  __attribute__ ((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S
index 4ead1dfaf5..8188b8f643 100644
--- a/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S
+++ b/sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S
@@ -1,5 +1,5 @@
 /* strstr with unaligned loads
-   Copyright (C) 2009-2016 Free Software Foundation, Inc.
+   Copyright (C) 2009-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/multiarch/strstr.c b/sysdeps/x86_64/multiarch/strstr.c
index eecba2243e..30ce597a16 100644
--- a/sysdeps/x86_64/multiarch/strstr.c
+++ b/sysdeps/x86_64/multiarch/strstr.c
@@ -1,6 +1,6 @@
 /* Multiple versions of strstr.
    All versions must be listed in ifunc-impl-list.c.
-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+   Copyright (C) 2012-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/multiarch/test-multiarch.c b/sysdeps/x86_64/multiarch/test-multiarch.c
index 4eb0c16cd8..aa872f27db 100644
--- a/sysdeps/x86_64/multiarch/test-multiarch.c
+++ b/sysdeps/x86_64/multiarch/test-multiarch.c
@@ -1,6 +1,6 @@
 /* Test CPU feature data.
    This file is part of the GNU C Library.
-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+   Copyright (C) 2012-2018 Free Software Foundation, Inc.
 
    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
@@ -16,7 +16,7 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include <init-arch.h>
+#include <cpu-features.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
diff --git a/sysdeps/x86_64/multiarch/varshift.c b/sysdeps/x86_64/multiarch/varshift.c
index 7921be5b57..2838736544 100644
--- a/sysdeps/x86_64/multiarch/varshift.c
+++ b/sysdeps/x86_64/multiarch/varshift.c
@@ -1,5 +1,5 @@
 /* Helper for variable shifts of SSE registers.
-   Copyright (C) 2010-2016 Free Software Foundation, Inc.
+   Copyright (C) 2010-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/multiarch/varshift.h b/sysdeps/x86_64/multiarch/varshift.h
index 7b27d0e9dd..76f2759874 100644
--- a/sysdeps/x86_64/multiarch/varshift.h
+++ b/sysdeps/x86_64/multiarch/varshift.h
@@ -1,5 +1,5 @@
 /* Helper for variable shifts of SSE registers.
-   Copyright (C) 2010-2016 Free Software Foundation, Inc.
+   Copyright (C) 2010-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/multiarch/wcschr-avx2.S b/sysdeps/x86_64/multiarch/wcschr-avx2.S
new file mode 100644
index 0000000000..67726b6837
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcschr-avx2.S
@@ -0,0 +1,3 @@
+#define STRCHR __wcschr_avx2
+#define USE_AS_WCSCHR 1
+#include "strchr-avx2.S"
diff --git a/sysdeps/x86_64/multiarch/wcschr-sse2.S b/sysdeps/x86_64/multiarch/wcschr-sse2.S
new file mode 100644
index 0000000000..67e4742ef1
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcschr-sse2.S
@@ -0,0 +1,30 @@
+/* wcschr optimized with SSE2.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc)
+# define __wcschr __wcschr_sse2
+
+# undef weak_alias
+# define weak_alias(__wcschr, wcschr)
+# undef libc_hidden_def
+# define libc_hidden_def(__wcschr)
+# undef libc_hidden_weak
+# define libc_hidden_weak(wcschr)
+#endif
+
+#include "../wcschr.S"
diff --git a/sysdeps/x86_64/multiarch/wcschr.c b/sysdeps/x86_64/multiarch/wcschr.c
new file mode 100644
index 0000000000..20a03833b9
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcschr.c
@@ -0,0 +1,39 @@
+/* Multiple versions of wcschr.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
+# define wcschr __redirect_wcschr
+# define __wcschr __redirect___wcschr
+# include <wchar.h>
+# undef wcschr
+# undef __wcschr
+
+# define SYMBOL_NAME wcschr
+# include "ifunc-avx2.h"
+
+libc_ifunc_redirected (__redirect_wcschr, __wcschr, IFUNC_SELECTOR ());
+weak_alias (__wcschr, wcschr);
+# ifdef SHARED
+__hidden_ver1 (__wcschr, __GI___wcschr, __redirect___wcschr)
+  __attribute__((visibility ("hidden")));
+__hidden_ver1 (wcschr, __GI_wcschr, __redirect_wcschr)
+  __attribute__((weak, visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/wcscmp-avx2.S b/sysdeps/x86_64/multiarch/wcscmp-avx2.S
new file mode 100644
index 0000000000..e5da4da689
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcscmp-avx2.S
@@ -0,0 +1,4 @@
+#define STRCMP __wcscmp_avx2
+#define USE_AS_WCSCMP 1
+
+#include "strcmp-avx2.S"
diff --git a/sysdeps/x86_64/multiarch/wcscmp-sse2.S b/sysdeps/x86_64/multiarch/wcscmp-sse2.S
new file mode 100644
index 0000000000..b129d1c073
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcscmp-sse2.S
@@ -0,0 +1,23 @@
+/* wcscmp optimized with SSE2.
+   Copyright (C) 2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc)
+# define __wcscmp __wcscmp_sse2
+#endif
+
+#include "../wcscmp.S"
diff --git a/sysdeps/x86_64/multiarch/wcscmp.c b/sysdeps/x86_64/multiarch/wcscmp.c
new file mode 100644
index 0000000000..74d92cf0f9
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcscmp.c
@@ -0,0 +1,37 @@
+/* Multiple versions of wcscmp.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
+# define wcscmp __redirect_wcscmp
+# define __wcscmp __redirect___wcscmp
+# include <wchar.h>
+# undef wcscmp
+# undef __wcscmp
+
+# define SYMBOL_NAME wcscmp
+# include "ifunc-avx2.h"
+
+libc_ifunc_redirected (__redirect_wcscmp, __wcscmp, IFUNC_SELECTOR ());
+weak_alias (__wcscmp, wcscmp)
+
+# ifdef SHARED
+__hidden_ver1 (__wcscmp, __GI___wcscmp, __redirect_wcscmp)
+  __attribute__ ((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/wcscpy-ssse3.S b/sysdeps/x86_64/multiarch/wcscpy-ssse3.S
index 341e57a5ca..ea1589052b 100644
--- a/sysdeps/x86_64/multiarch/wcscpy-ssse3.S
+++ b/sysdeps/x86_64/multiarch/wcscpy-ssse3.S
@@ -1,5 +1,5 @@
 /* wcscpy with SSSE3
-   Copyright (C) 2011-2016 Free Software Foundation, Inc.
+   Copyright (C) 2011-2018 Free Software Foundation, Inc.
    Contributed by Intel Corporation.
    This file is part of the GNU C Library.
 
diff --git a/sysdeps/x86_64/multiarch/wcscpy.c b/sysdeps/x86_64/multiarch/wcscpy.c
new file mode 100644
index 0000000000..f23b1fd853
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcscpy.c
@@ -0,0 +1,44 @@
+/* Multiple versions of wcscpy.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
+# define wcscpy __redirect_wcscpy
+# include <wchar.h>
+# undef wcscpy
+
+# define SYMBOL_NAME wcscpy
+# include <init-arch.h>
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+  const struct cpu_features* cpu_features = __get_cpu_features ();
+
+  if (CPU_FEATURES_CPU_P (cpu_features, SSSE3))
+    return OPTIMIZE (ssse3);
+
+  return OPTIMIZE (sse2);
+}
+
+libc_ifunc_redirected (__redirect_wcscpy, wcscpy, IFUNC_SELECTOR ());
+#endif
diff --git a/sysdeps/x86_64/multiarch/wcslen-avx2.S b/sysdeps/x86_64/multiarch/wcslen-avx2.S
new file mode 100644
index 0000000000..c9224f1bc5
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcslen-avx2.S
@@ -0,0 +1,4 @@
+#define STRLEN __wcslen_avx2
+#define USE_AS_WCSLEN 1
+
+#include "strlen-avx2.S"
diff --git a/sysdeps/x86_64/multiarch/wcslen-sse2.S b/sysdeps/x86_64/multiarch/wcslen-sse2.S
new file mode 100644
index 0000000000..6031978363
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcslen-sse2.S
@@ -0,0 +1,26 @@
+/* wcslen optimized with SSE2.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc)
+# define __wcslen __wcslen_sse2
+
+# undef weak_alias
+# define weak_alias(__wcslen, wcslen)
+#endif
+
+#include "../wcslen.S"
diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor.S b/sysdeps/x86_64/multiarch/wcslen.c
index 57a0eee5ba..6d06e47cbd 100644
--- a/sysdeps/x86_64/fpu/multiarch/s_floor.S
+++ b/sysdeps/x86_64/multiarch/wcslen.c
@@ -1,6 +1,7 @@
-/* Copyright (C) 2011-2016 Free Software Foundation, Inc.
+/* Multiple versions of wcslen.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
-   Contributed by Ulrich Drepper <drepper@gmail.come>, 2011.
 
    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
@@ -16,23 +17,15 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include <machine/asm.h>
-#include <init-arch.h>
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
+# define __wcslen __redirect_wcslen
+# include <wchar.h>
+# undef __wcslen
 
+# define SYMBOL_NAME wcslen
+# include "ifunc-avx2.h"
 
-ENTRY(__floor)
-	.type	__floor, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-	leaq	__floor_sse41(%rip), %rax
-	HAS_CPU_FEATURE (SSE4_1)
-	jnz	2f
-	leaq	__floor_c(%rip), %rax
-2:	ret
-END(__floor)
-weak_alias (__floor, floor)
-
-
-ENTRY(__floor_sse41)
-	roundsd	$1, %xmm0, %xmm0
-	ret
-END(__floor_sse41)
+libc_ifunc_redirected (__redirect_wcslen, __wcslen, IFUNC_SELECTOR ());
+weak_alias (__wcslen, wcslen);
+#endif
diff --git a/sysdeps/x86_64/multiarch/wcsncmp-avx2.S b/sysdeps/x86_64/multiarch/wcsncmp-avx2.S
new file mode 100644
index 0000000000..4fa1de4d3f
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcsncmp-avx2.S
@@ -0,0 +1,5 @@
+#define STRCMP __wcsncmp_avx2
+#define USE_AS_STRNCMP 1
+#define USE_AS_WCSCMP 1
+
+#include "strcmp-avx2.S"
diff --git a/sysdeps/x86_64/multiarch/wcsncmp-sse2.c b/sysdeps/x86_64/multiarch/wcsncmp-sse2.c
new file mode 100644
index 0000000000..2bc7b4f693
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcsncmp-sse2.c
@@ -0,0 +1,20 @@
+/* wcsncmp optimized with SSE2.
+   Copyright (C) 2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define WCSNCMP __wcsncmp_sse2
+#include <wcsmbs/wcsncmp.c>
diff --git a/sysdeps/x86_64/multiarch/wcsncmp.c b/sysdeps/x86_64/multiarch/wcsncmp.c
new file mode 100644
index 0000000000..90e9a352d9
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcsncmp.c
@@ -0,0 +1,31 @@
+/* Multiple versions of wcsncmp.
+   Copyright (C) 2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
+# define wcsncmp __redirect_wcsncmp
+# define __wcsncmp __redirect___wcsncmp
+# include <wchar.h>
+# undef wcsncmp
+# undef __wcsncmp
+
+# define SYMBOL_NAME wcsncmp
+# include "ifunc-avx2.h"
+
+libc_ifunc_redirected (__redirect_wcsncmp, wcsncmp, IFUNC_SELECTOR ());
+#endif
diff --git a/sysdeps/x86_64/multiarch/wcsnlen-avx2.S b/sysdeps/x86_64/multiarch/wcsnlen-avx2.S
new file mode 100644
index 0000000000..fac83546b5
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcsnlen-avx2.S
@@ -0,0 +1,5 @@
+#define STRLEN __wcsnlen_avx2
+#define USE_AS_WCSLEN 1
+#define USE_AS_STRNLEN 1
+
+#include "strlen-avx2.S"
diff --git a/sysdeps/x86_64/multiarch/wcsnlen-c.c b/sysdeps/x86_64/multiarch/wcsnlen-c.c
new file mode 100644
index 0000000000..e1ec7cfbb5
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcsnlen-c.c
@@ -0,0 +1,9 @@
+#if IS_IN (libc)
+# include <wchar.h>
+
+# define WCSNLEN __wcsnlen_sse2
+
+extern __typeof (wcsnlen) __wcsnlen_sse2;
+#endif
+
+#include "wcsmbs/wcsnlen.c"
diff --git a/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S b/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S
new file mode 100644
index 0000000000..a8cab0cb00
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S
@@ -0,0 +1,5 @@
+#define AS_WCSLEN
+#define AS_STRNLEN
+#define strlen	__wcsnlen_sse4_1
+
+#include "../strlen.S"
diff --git a/sysdeps/x86_64/multiarch/wcsnlen.c b/sysdeps/x86_64/multiarch/wcsnlen.c
new file mode 100644
index 0000000000..bd376057e3
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcsnlen.c
@@ -0,0 +1,51 @@
+/* Multiple versions of wcsnlen.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
+# define __wcsnlen __redirect_wcsnlen
+# include <wchar.h>
+# undef __wcsnlen
+
+# define SYMBOL_NAME wcsnlen
+# include <init-arch.h>
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+  const struct cpu_features* cpu_features = __get_cpu_features ();
+
+  if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)
+      && CPU_FEATURES_ARCH_P (cpu_features, AVX2_Usable)
+      && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+    return OPTIMIZE (avx2);
+
+  if (CPU_FEATURES_CPU_P (cpu_features, SSE4_1))
+    return OPTIMIZE (sse4_1);
+
+  return OPTIMIZE (sse2);
+}
+
+libc_ifunc_redirected (__redirect_wcsnlen, __wcsnlen, IFUNC_SELECTOR ());
+weak_alias (__wcsnlen, wcsnlen);
+#endif
diff --git a/sysdeps/x86_64/multiarch/wcsrchr-avx2.S b/sysdeps/x86_64/multiarch/wcsrchr-avx2.S
new file mode 100644
index 0000000000..cf8a239ab2
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcsrchr-avx2.S
@@ -0,0 +1,3 @@
+#define STRRCHR __wcsrchr_avx2
+#define USE_AS_WCSRCHR 1
+#include "strrchr-avx2.S"
diff --git a/sysdeps/x86_64/multiarch/wcsrchr-sse2.S b/sysdeps/x86_64/multiarch/wcsrchr-sse2.S
new file mode 100644
index 0000000000..d015e95317
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcsrchr-sse2.S
@@ -0,0 +1,23 @@
+/* wcsrchr optimized with SSE2.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc)
+# define wcsrchr __wcsrchr_sse2
+#endif
+
+#include "../wcsrchr.S"
diff --git a/sysdeps/x86_64/multiarch/wcsrchr.c b/sysdeps/x86_64/multiarch/wcsrchr.c
new file mode 100644
index 0000000000..219fc828a6
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wcsrchr.c
@@ -0,0 +1,29 @@
+/* Multiple versions of wcsrchr.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
+# define wcsrchr __redirect_wcsrchr
+# include <wchar.h>
+# undef wcsrchr
+
+# define SYMBOL_NAME wcsrchr
+# include "ifunc-avx2.h"
+
+libc_ifunc_redirected (__redirect_wcsrchr, wcsrchr, IFUNC_SELECTOR ());
+#endif
diff --git a/sysdeps/x86_64/multiarch/wmemchr-avx2.S b/sysdeps/x86_64/multiarch/wmemchr-avx2.S
new file mode 100644
index 0000000000..282854f1a1
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wmemchr-avx2.S
@@ -0,0 +1,4 @@
+#define MEMCHR __wmemchr_avx2
+#define USE_AS_WMEMCHR 1
+
+#include "memchr-avx2.S"
diff --git a/sysdeps/x86_64/multiarch/wmemchr-sse2.S b/sysdeps/x86_64/multiarch/wmemchr-sse2.S
new file mode 100644
index 0000000000..70a965d552
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wmemchr-sse2.S
@@ -0,0 +1,4 @@
+#define USE_AS_WMEMCHR 1
+#define wmemchr __wmemchr_sse2
+
+#include "../memchr.S"
diff --git a/sysdeps/x86_64/multiarch/wmemchr.c b/sysdeps/x86_64/multiarch/wmemchr.c
new file mode 100644
index 0000000000..6d833702c6
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wmemchr.c
@@ -0,0 +1,39 @@
+/* Multiple versions of wmemchr
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
+# define wmemchr __redirect_wmemchr
+# define __wmemchr __redirect___wmemchr
+# include <wchar.h>
+# undef wmemchr
+# undef __wmemchr
+
+# define SYMBOL_NAME wmemchr
+# include "ifunc-avx2.h"
+
+libc_ifunc_redirected (__redirect_wmemchr, __wmemchr, IFUNC_SELECTOR ());
+weak_alias (__wmemchr, wmemchr)
+# ifdef SHARED
+__hidden_ver1 (__wmemchr, __GI___wmemchr, __redirect___wmemchr)
+  __attribute__((visibility ("hidden")));
+__hidden_ver1 (wmemchr, __GI_wmemchr, __redirect_wmemchr)
+  __attribute__((weak, visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/wmemcmp-avx2-movbe.S b/sysdeps/x86_64/multiarch/wmemcmp-avx2-movbe.S
new file mode 100644
index 0000000000..bfa1a16a35
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wmemcmp-avx2-movbe.S
@@ -0,0 +1,4 @@
+#define MEMCMP __wmemcmp_avx2_movbe
+#define USE_AS_WMEMCMP 1
+
+#include "memcmp-avx2-movbe.S"
diff --git a/sysdeps/x86_64/multiarch/wmemcmp.c b/sysdeps/x86_64/multiarch/wmemcmp.c
new file mode 100644
index 0000000000..3f4a7422f3
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wmemcmp.c
@@ -0,0 +1,30 @@
+/* Multiple versions of wmemcmp
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
+# define wmemcmp __redirect_wmemcmp
+# include <wchar.h>
+# undef wmemcmp
+
+# define SYMBOL_NAME wmemcmp
+# include "ifunc-memcmp.h"
+
+libc_ifunc_redirected (__redirect_wmemcmp, wmemcmp, IFUNC_SELECTOR ());
+#endif
diff --git a/sysdeps/x86_64/multiarch/wmemset.c b/sysdeps/x86_64/multiarch/wmemset.c
new file mode 100644
index 0000000000..9fee77ea81
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wmemset.c
@@ -0,0 +1,40 @@
+/* Multiple versions of wmemset.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
+# define wmemset __redirect_wmemset
+# define __wmemset __redirect___wmemset
+# include <wchar.h>
+# undef wmemset
+# undef __wmemset
+
+# define SYMBOL_NAME wmemset
+# include "ifunc-wmemset.h"
+
+libc_ifunc_redirected (__redirect_wmemset, __wmemset, IFUNC_SELECTOR ());
+weak_alias (__wmemset, wmemset)
+
+# ifdef SHARED
+__hidden_ver1 (__wmemset, __GI___wmemset, __redirect___wmemset)
+  __attribute__ ((visibility ("hidden")));
+__hidden_ver1 (wmemset, __GI_wmemset, __redirect_wmemset)
+  __attribute__ ((visibility ("hidden")));
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/wmemset_chk-nonshared.S b/sysdeps/x86_64/multiarch/wmemset_chk-nonshared.S
new file mode 100644
index 0000000000..140c93d6f0
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wmemset_chk-nonshared.S
@@ -0,0 +1,21 @@
+/* Non-shared version of wmemset_chk for x86-64.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc) && !defined SHARED
+# include <sysdeps/x86_64/wmemset_chk.S>
+#endif
diff --git a/sysdeps/x86_64/multiarch/wmemset_chk.c b/sysdeps/x86_64/multiarch/wmemset_chk.c
new file mode 100644
index 0000000000..88280192c5
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/wmemset_chk.c
@@ -0,0 +1,31 @@
+/* Multiple versions of wmemset_chk.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.so.  */
+#if IS_IN (libc) && defined SHARED
+# define __wmemset_chk __redirect_wmemset_chk
+# include <wchar.h>
+# undef __wmemset_chk
+
+# define SYMBOL_NAME wmemset_chk
+# include "ifunc-wmemset.h"
+
+libc_ifunc_redirected (__redirect_wmemset_chk, __wmemset_chk,
+		       IFUNC_SELECTOR ());
+#endif
diff --git a/sysdeps/x86_64/nptl/Makefile b/sysdeps/x86_64/nptl/Makefile
index 9b64b533ee..73024033ee 100644
--- a/sysdeps/x86_64/nptl/Makefile
+++ b/sysdeps/x86_64/nptl/Makefile
@@ -1,4 +1,4 @@
-# Copyright (C) 2002-2016 Free Software Foundation, Inc.
+# Copyright (C) 2002-2018 Free Software Foundation, Inc.
 # This file is part of the GNU C Library.
 
 # The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/nptl/pthread-offsets.h b/sysdeps/x86_64/nptl/pthread-offsets.h
new file mode 100644
index 0000000000..16c6b0d9fd
--- /dev/null
+++ b/sysdeps/x86_64/nptl/pthread-offsets.h
@@ -0,0 +1,5 @@
+#define __PTHREAD_MUTEX_NUSERS_OFFSET   12
+#define __PTHREAD_MUTEX_KIND_OFFSET     16
+#define __PTHREAD_MUTEX_SPINS_OFFSET    20
+#define __PTHREAD_MUTEX_ELISION_OFFSET  22
+#define __PTHREAD_MUTEX_LIST_OFFSET     24
diff --git a/sysdeps/x86_64/nptl/pthread_spin_lock.S b/sysdeps/x86_64/nptl/pthread_spin_lock.S
index b871241617..730fd65034 100644
--- a/sysdeps/x86_64/nptl/pthread_spin_lock.S
+++ b/sysdeps/x86_64/nptl/pthread_spin_lock.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2012-2016 Free Software Foundation, Inc.
+/* Copyright (C) 2012-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/nptl/pthread_spin_trylock.S b/sysdeps/x86_64/nptl/pthread_spin_trylock.S
index c9c53171fe..a8f1b72d60 100644
--- a/sysdeps/x86_64/nptl/pthread_spin_trylock.S
+++ b/sysdeps/x86_64/nptl/pthread_spin_trylock.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002-2016 Free Software Foundation, Inc.
+/* Copyright (C) 2002-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
 
diff --git a/sysdeps/x86_64/nptl/pthread_spin_unlock.S b/sysdeps/x86_64/nptl/pthread_spin_unlock.S
index 188de2e8cb..afd114e855 100644
--- a/sysdeps/x86_64/nptl/pthread_spin_unlock.S
+++ b/sysdeps/x86_64/nptl/pthread_spin_unlock.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002-2016 Free Software Foundation, Inc.
+/* Copyright (C) 2002-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
 
diff --git a/sysdeps/x86_64/nptl/pthreaddef.h b/sysdeps/x86_64/nptl/pthreaddef.h
index 9397efc631..036deb5772 100644
--- a/sysdeps/x86_64/nptl/pthreaddef.h
+++ b/sysdeps/x86_64/nptl/pthreaddef.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002-2016 Free Software Foundation, Inc.
+/* Copyright (C) 2002-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
 
diff --git a/sysdeps/x86_64/nptl/tcb-offsets.sym b/sysdeps/x86_64/nptl/tcb-offsets.sym
index aeb752673a..ae8034743b 100644
--- a/sysdeps/x86_64/nptl/tcb-offsets.sym
+++ b/sysdeps/x86_64/nptl/tcb-offsets.sym
@@ -4,7 +4,6 @@
 
 RESULT			offsetof (struct pthread, result)
 TID			offsetof (struct pthread, tid)
-PID			offsetof (struct pthread, pid)
 CANCELHANDLING		offsetof (struct pthread, cancelhandling)
 CLEANUP_JMP_BUF		offsetof (struct pthread, cleanup_jmp_buf)
 CLEANUP			offsetof (struct pthread, cleanup)
@@ -13,9 +12,8 @@ MUTEX_FUTEX		offsetof (pthread_mutex_t, __data.__lock)
 MULTIPLE_THREADS_OFFSET	offsetof (tcbhead_t, multiple_threads)
 POINTER_GUARD		offsetof (tcbhead_t, pointer_guard)
 VGETCPU_CACHE_OFFSET	offsetof (tcbhead_t, vgetcpu_cache)
-#ifndef __ASSUME_PRIVATE_FUTEX
-PRIVATE_FUTEX		offsetof (tcbhead_t, private_futex)
-#endif
+FEATURE_1_OFFSET	offsetof (tcbhead_t, feature_1)
+SSP_BASE_OFFSET		offsetof (tcbhead_t, ssp_base)
 
 -- Not strictly offsets, but these values are also used in the TCB.
 TCB_CANCELSTATE_BITMASK	 CANCELSTATE_BITMASK
diff --git a/sysdeps/x86_64/nptl/tls.h b/sysdeps/x86_64/nptl/tls.h
index 2b061a07c6..e88561c934 100644
--- a/sysdeps/x86_64/nptl/tls.h
+++ b/sysdeps/x86_64/nptl/tls.h
@@ -1,5 +1,5 @@
 /* Definition for thread-local data handling.  nptl/x86_64 version.
-   Copyright (C) 2002-2016 Free Software Foundation, Inc.
+   Copyright (C) 2002-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -26,8 +26,9 @@
 # include <stdint.h>
 # include <stdlib.h>
 # include <sysdep.h>
-# include <libc-internal.h>
+# include <libc-pointer-arith.h> /* For cast_to_integer.  */
 # include <kernel-features.h>
+# include <dl-dtv.h>
 
 /* Replacement type for __m128 since this file is included by ld.so,
    which is compiled with -mno-sse.  It must not change the alignment
@@ -38,18 +39,6 @@ typedef struct
 } __128bits;
 
 
-/* Type for the dtv.  */
-typedef union dtv
-{
-  size_t counter;
-  struct
-  {
-    void *val;
-    bool is_static;
-  } pointer;
-} dtv_t;
-
-
 typedef struct
 {
   void *tcb;		/* Pointer to the TCB.  Not necessarily the
@@ -62,17 +51,17 @@ typedef struct
   uintptr_t stack_guard;
   uintptr_t pointer_guard;
   unsigned long int vgetcpu_cache[2];
-# ifndef __ASSUME_PRIVATE_FUTEX
-  int private_futex;
-# else
-  int __glibc_reserved1;
-# endif
+  /* Bit 0: X86_FEATURE_1_IBT.
+     Bit 1: X86_FEATURE_1_SHSTK.
+   */
+  unsigned int feature_1;
   int __glibc_unused1;
   /* Reservation of some values for the TM ABI.  */
   void *__private_tm[4];
   /* GCC split stack support.  */
   void *__private_ss;
-  long int __glibc_reserved2;
+  /* The lowest address of shadow stack,  */
+  unsigned long long int ssp_base;
   /* Must be kept even if it is no longer used by glibc since programs,
      like AddressSanitizer, depend on the size of tcbhead_t.  */
   __128bits __glibc_unused2[8][4] __attribute__ ((aligned (32)));
@@ -80,6 +69,23 @@ typedef struct
   void *__padding[8];
 } tcbhead_t;
 
+# ifdef __ILP32__
+/* morestack.S in libgcc uses offset 0x40 to access __private_ss,   */
+_Static_assert (offsetof (tcbhead_t, __private_ss) == 0x40,
+		"offset of __private_ss != 0x40");
+/* NB: ssp_base used to be "long int __glibc_reserved2", which was
+   changed from 32 bits to 64 bits.  Make sure that the offset of the
+   next field, __glibc_unused2, is unchanged.  */
+_Static_assert (offsetof (tcbhead_t, __glibc_unused2) == 0x60,
+		"offset of __glibc_unused2 != 0x60");
+# else
+/* morestack.S in libgcc uses offset 0x70 to access __private_ss,   */
+_Static_assert (offsetof (tcbhead_t, __private_ss) == 0x70,
+		"offset of __private_ss != 0x70");
+_Static_assert (offsetof (tcbhead_t, __glibc_unused2) == 0x80,
+		"offset of __glibc_unused2 != 0x80");
+# endif
+
 #else /* __ASSEMBLER__ */
 # include <tcb-offsets.h>
 #endif
@@ -337,18 +343,6 @@ typedef struct
 	      abort (); })
 
 
-# define CALL_THREAD_FCT(descr) \
-  ({ void *__res;							      \
-     asm volatile ("movq %%fs:%P2, %%rdi\n\t"				      \
-		   "callq *%%fs:%P1"					      \
-		   : "=a" (__res)					      \
-		   : "i" (offsetof (struct pthread, start_routine)),	      \
-		     "i" (offsetof (struct pthread, arg))		      \
-		   : "di", "si", "cx", "dx", "r8", "r9", "r10", "r11",	      \
-		     "memory", "cc");					      \
-     __res; })
-
-
 /* Set the stack guard field in TCB head.  */
 # define THREAD_SET_STACK_GUARD(value) \
     THREAD_SETMEM (THREAD_SELF, header.stack_guard, value)
@@ -366,6 +360,7 @@ typedef struct
 
 
 /* Get and set the global scope generation counter in the TCB head.  */
+# define THREAD_GSCOPE_IN_TCB      1
 # define THREAD_GSCOPE_FLAG_UNUSED 0
 # define THREAD_GSCOPE_FLAG_USED   1
 # define THREAD_GSCOPE_FLAG_WAIT   2
diff --git a/sysdeps/x86_64/rawmemchr.S b/sysdeps/x86_64/rawmemchr.S
index f90b7921a1..cf972768ec 100644
--- a/sysdeps/x86_64/rawmemchr.S
+++ b/sysdeps/x86_64/rawmemchr.S
@@ -1,6 +1,6 @@
 /* fast SSE2 memchr with 64 byte loop and pmaxub instruction using
 
-   Copyright (C) 2011-2016 Free Software Foundation, Inc.
+   Copyright (C) 2011-2018 Free Software Foundation, Inc.
    Contributed by Intel Corporation.
    This file is part of the GNU C Library.
 
@@ -196,11 +196,6 @@ L(matches32):
 	lea	32(%rax, %rdi), %rax
 	ret
 
-	.p2align 4
-L(return_null):
-	xor	%rax, %rax
-	ret
-
 END (__rawmemchr)
 
 weak_alias (__rawmemchr, rawmemchr)
diff --git a/sysdeps/x86_64/rshift.S b/sysdeps/x86_64/rshift.S
index c88c6d82bb..1d018b857b 100644
--- a/sysdeps/x86_64/rshift.S
+++ b/sysdeps/x86_64/rshift.S
@@ -1,5 +1,5 @@
 /* x86-64 __mpn_rshift --
-   Copyright (C) 2007-2016 Free Software Foundation, Inc.
+   Copyright (C) 2007-2018 Free Software Foundation, Inc.
    This file is part of the GNU MP Library.
 
    The GNU MP Library is free software; you can redistribute it and/or modify
diff --git a/sysdeps/x86_64/rtld-offsets.sym b/sysdeps/x86_64/rtld-offsets.sym
new file mode 100644
index 0000000000..fd41b51521
--- /dev/null
+++ b/sysdeps/x86_64/rtld-offsets.sym
@@ -0,0 +1,6 @@
+#define SHARED
+#include <ldsodefs.h>
+
+--
+
+GL_TLS_GENERATION_OFFSET        offsetof (struct rtld_global, _dl_tls_generation)
diff --git a/sysdeps/x86_64/sched_cpucount.c b/sysdeps/x86_64/sched_cpucount.c
index 0834e711b3..af5bbcc044 100644
--- a/sysdeps/x86_64/sched_cpucount.c
+++ b/sysdeps/x86_64/sched_cpucount.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2016 Free Software Foundation, Inc.
+/* Copyright (C) 2007-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/setjmp.S b/sysdeps/x86_64/setjmp.S
index 3e93967c2f..78a8bf4644 100644
--- a/sysdeps/x86_64/setjmp.S
+++ b/sysdeps/x86_64/setjmp.S
@@ -1,5 +1,5 @@
 /* setjmp for x86-64.
-   Copyright (C) 2001-2016 Free Software Foundation, Inc.
+   Copyright (C) 2001-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -18,9 +18,15 @@
 
 #include <sysdep.h>
 #include <jmpbuf-offsets.h>
+#include <jmp_buf-ssp.h>
 #include <asm-syntax.h>
 #include <stap-probe.h>
 
+/* Don't save shadow stack register if shadow stack isn't enabled.  */
+#if !SHSTK_ENABLED
+# undef SHADOW_STACK_POINTER_OFFSET
+#endif
+
 ENTRY (__sigsetjmp)
 	/* Save registers.  */
 	movq %rbx, (JB_RBX*8)(%rdi)
@@ -54,17 +60,28 @@ ENTRY (__sigsetjmp)
 #endif
 	movq %rax, (JB_PC*8)(%rdi)
 
+#ifdef SHADOW_STACK_POINTER_OFFSET
+# if IS_IN (libc) && defined SHARED && defined FEATURE_1_OFFSET
+	/* Check if Shadow Stack is enabled.  */
+	testl $X86_FEATURE_1_SHSTK, %fs:FEATURE_1_OFFSET
+	jz L(skip_ssp)
+# else
+	xorl %eax, %eax
+# endif
+	/* Get the current Shadow-Stack-Pointer and save it.  */
+	rdsspq %rax
+	movq %rax, SHADOW_STACK_POINTER_OFFSET(%rdi)
+# if IS_IN (libc) && defined SHARED && defined FEATURE_1_OFFSET
+L(skip_ssp):
+# endif
+#endif
 #if IS_IN (rtld)
 	/* In ld.so we never save the signal mask.  */
 	xorl %eax, %eax
 	retq
 #else
 	/* Make a tail call to __sigjmp_save; it takes the same args.  */
-# ifdef	PIC
-	jmp C_SYMBOL_NAME (__sigjmp_save)@PLT
-# else
 	jmp __sigjmp_save
-# endif
 #endif
 END (__sigsetjmp)
 hidden_def (__sigsetjmp)
diff --git a/sysdeps/x86_64/stackinfo.h b/sysdeps/x86_64/stackinfo.h
index 848aa7754c..f7a5672f27 100644
--- a/sysdeps/x86_64/stackinfo.h
+++ b/sysdeps/x86_64/stackinfo.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2001-2016 Free Software Foundation, Inc.
+/* Copyright (C) 2001-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/start.S b/sysdeps/x86_64/start.S
index 1374974307..354d2e6ec7 100644
--- a/sysdeps/x86_64/start.S
+++ b/sysdeps/x86_64/start.S
@@ -1,5 +1,5 @@
 /* Startup code compliant to the ELF x86-64 ABI.
-   Copyright (C) 2001-2016 Free Software Foundation, Inc.
+   Copyright (C) 2001-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Andreas Jaeger <aj@suse.de>, 2001.
 
@@ -96,27 +96,28 @@ ENTRY (_start)
 	   which grow downwards).  */
 	pushq %rsp
 
-#ifdef SHARED
+#ifdef PIC
 	/* Pass address of our own entry points to .fini and .init.  */
 	mov __libc_csu_fini@GOTPCREL(%rip), %R8_LP
 	mov __libc_csu_init@GOTPCREL(%rip), %RCX_LP
 
 	mov main@GOTPCREL(%rip), %RDI_LP
-
-	/* Call the user's main function, and exit with its value.
-	   But let the libc call main.	  */
-	call __libc_start_main@PLT
 #else
 	/* Pass address of our own entry points to .fini and .init.  */
 	mov $__libc_csu_fini, %R8_LP
 	mov $__libc_csu_init, %RCX_LP
 
 	mov $main, %RDI_LP
+#endif
 
 	/* Call the user's main function, and exit with its value.
-	   But let the libc call main.	  */
-	call __libc_start_main
-#endif
+	   But let the libc call main.  Since __libc_start_main in
+	   libc.so is called very early, lazy binding isn't relevant
+	   here.  Use indirect branch via GOT to avoid extra branch
+	   to PLT slot.  In case of static executable, ld in binutils
+	   2.26 or above can convert indirect branch into direct
+	   branch.  */
+	call *__libc_start_main@GOTPCREL(%rip)
 
 	hlt			/* Crash if somehow `exit' does return.	 */
 END (_start)
diff --git a/sysdeps/x86_64/strcasecmp_l-nonascii.c b/sysdeps/x86_64/strcasecmp_l-nonascii.c
index 30e8969603..9ba9bc808c 100644
--- a/sysdeps/x86_64/strcasecmp_l-nonascii.c
+++ b/sysdeps/x86_64/strcasecmp_l-nonascii.c
@@ -1,7 +1,7 @@
 #include <string.h>
 
 extern int __strcasecmp_l_nonascii (const char *__s1, const char *__s2,
-				    __locale_t __loc);
+				    locale_t __loc);
 
 #define __strcasecmp_l __strcasecmp_l_nonascii
 #define USE_IN_EXTENDED_LOCALE_MODEL    1
diff --git a/sysdeps/x86_64/strcat.S b/sysdeps/x86_64/strcat.S
index dadf4c76b2..9a4a4e6feb 100644
--- a/sysdeps/x86_64/strcat.S
+++ b/sysdeps/x86_64/strcat.S
@@ -1,6 +1,6 @@
 /* strcat(dest, src) -- Append SRC on the end of DEST.
    Optimized for x86-64.
-   Copyright (C) 2002-2016 Free Software Foundation, Inc.
+   Copyright (C) 2002-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Andreas Jaeger <aj@suse.de>, 2002.
 
diff --git a/sysdeps/x86_64/strchr.S b/sysdeps/x86_64/strchr.S
index 4431fee648..1d5112746f 100644
--- a/sysdeps/x86_64/strchr.S
+++ b/sysdeps/x86_64/strchr.S
@@ -1,6 +1,6 @@
 /* strchr (str, ch) -- Return pointer to first occurrence of CH in STR.
    For AMD x86-64.
-   Copyright (C) 2009-2016 Free Software Foundation, Inc.
+   Copyright (C) 2009-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/strchrnul.S b/sysdeps/x86_64/strchrnul.S
index 7b52d699ee..149f3a9ced 100644
--- a/sysdeps/x86_64/strchrnul.S
+++ b/sysdeps/x86_64/strchrnul.S
@@ -1,7 +1,7 @@
 /* strchrnul (str, ch) -- Return pointer to first occurrence of CH in STR
 	or terminating NUL byte.
    For AMD x86-64.
-   Copyright (C) 2009-2016 Free Software Foundation, Inc.
+   Copyright (C) 2009-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/strcmp.S b/sysdeps/x86_64/strcmp.S
index c5c44d4e27..e16945b961 100644
--- a/sysdeps/x86_64/strcmp.S
+++ b/sysdeps/x86_64/strcmp.S
@@ -1,5 +1,5 @@
 /* Highly optimized version for x86-64.
-   Copyright (C) 1999-2016 Free Software Foundation, Inc.
+   Copyright (C) 1999-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Based on i686 version contributed by Ulrich Drepper
    <drepper@cygnus.com>, 1999.
@@ -233,7 +233,7 @@ LABEL(bigger):
 	lea	LABEL(unaligned_table)(%rip), %r10
 	movslq	(%r10, %r9,4), %r9
 	lea	(%r10, %r9), %r10
-	jmp	*%r10				/* jump to corresponding case */
+	_CET_NOTRACK jmp *%r10			/* jump to corresponding case */
 
 /*
  * The following cases will be handled by ashr_0
diff --git a/sysdeps/x86_64/strcpy.S b/sysdeps/x86_64/strcpy.S
index 3f90c0020a..66128a7cb5 100644
--- a/sysdeps/x86_64/strcpy.S
+++ b/sysdeps/x86_64/strcpy.S
@@ -1,5 +1,5 @@
 /* strcpy/stpcpy implementation for x86-64.
-   Copyright (C) 2002-2016 Free Software Foundation, Inc.
+   Copyright (C) 2002-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Andreas Jaeger <aj@suse.de>, 2002.
 
diff --git a/sysdeps/x86_64/strcspn.S b/sysdeps/x86_64/strcspn.S
index de526c8fdd..7f9202d656 100644
--- a/sysdeps/x86_64/strcspn.S
+++ b/sysdeps/x86_64/strcspn.S
@@ -1,7 +1,7 @@
 /* strcspn (str, ss) -- Return the length of the initial segment of STR
 			which contains no characters from SS.
    For AMD x86-64.
-   Copyright (C) 1994-2016 Free Software Foundation, Inc.
+   Copyright (C) 1994-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>.
    Bug fixes by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>.
@@ -24,9 +24,6 @@
 #include <sysdep.h>
 #include "asm-syntax.h"
 
-/* BEWARE: `#ifdef strcspn' means that strcspn is redefined as `strpbrk' */
-#define STRPBRK_P (defined strcspn)
-
 	.text
 ENTRY (strcspn)
 
@@ -111,7 +108,7 @@ L(5):	incq %rax
 
 L(4):	addq $256, %rsp		/* remove skipset */
 	cfi_adjust_cfa_offset(-256)
-#if STRPBRK_P
+#ifdef USE_AS_STRPBRK
 	xorl %edx,%edx
 	orb %cl, %cl		/* was last character NUL? */
 	cmovzq %rdx, %rax	/* Yes:	return NULL */
diff --git a/sysdeps/x86_64/strlen.S b/sysdeps/x86_64/strlen.S
index 12f63ad1bb..01cb5fa846 100644
--- a/sysdeps/x86_64/strlen.S
+++ b/sysdeps/x86_64/strlen.S
@@ -1,5 +1,5 @@
-/* SSE2 version of strlen.
-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+/* SSE2 version of strlen/wcslen.
+   Copyright (C) 2012-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -18,6 +18,16 @@
 
 #include <sysdep.h>
 
+#ifdef AS_WCSLEN
+# define PMINU		pminud
+# define PCMPEQ		pcmpeqd
+# define SHIFT_RETURN	shrq $2, %rax
+#else
+# define PMINU		pminub
+# define PCMPEQ		pcmpeqb
+# define SHIFT_RETURN
+#endif
+
 /* Long lived register in strlen(s), strnlen(s, n) are:
 
 	%xmm3 - zero
@@ -32,10 +42,10 @@ ENTRY(strlen)
 
 /* Test 64 bytes from %rax for zero. Save result as bitmask in %rdx.  */
 #define FIND_ZERO	\
-	pcmpeqb	(%rax), %xmm0;	\
-	pcmpeqb	16(%rax), %xmm1;	\
-	pcmpeqb	32(%rax), %xmm2;	\
-	pcmpeqb	48(%rax), %xmm3;	\
+	PCMPEQ	(%rax), %xmm0;	\
+	PCMPEQ	16(%rax), %xmm1;	\
+	PCMPEQ	32(%rax), %xmm2;	\
+	PCMPEQ	48(%rax), %xmm3;	\
 	pmovmskb	%xmm0, %esi;	\
 	pmovmskb	%xmm1, %edx;	\
 	pmovmskb	%xmm2, %r8d;	\
@@ -54,6 +64,9 @@ ENTRY(strlen)
 	xor	%rax, %rax
 	ret
 L(n_nonzero):
+# ifdef AS_WCSLEN
+	shlq	$2, %rsi
+# endif
 
 /* Initialize long lived registers.  */
 
@@ -96,6 +109,7 @@ L(n_nonzero):
 	test	%rdx, %rdx;	\
 	je	L(lab);	\
 	bsfq	%rdx, %rax;	\
+	SHIFT_RETURN;		\
 	ret
 
 #ifdef AS_STRNLEN
@@ -104,19 +118,20 @@ L(n_nonzero):
 #else
 	/* Test first 16 bytes unaligned.  */
 	movdqu	(%rax), %xmm4
-	pcmpeqb	%xmm0, %xmm4
+	PCMPEQ	%xmm0, %xmm4
 	pmovmskb	%xmm4, %edx
 	test	%edx, %edx
 	je 	L(next48_bytes)
 	bsf	%edx, %eax /* If eax is zeroed 16bit bsf can be used.  */
+	SHIFT_RETURN
 	ret
 
 L(next48_bytes):
 /* Same as FIND_ZERO except we do not check first 16 bytes.  */
 	andq	$-16, %rax
-	pcmpeqb 16(%rax), %xmm1
-	pcmpeqb 32(%rax), %xmm2
-	pcmpeqb 48(%rax), %xmm3
+	PCMPEQ 16(%rax), %xmm1
+	PCMPEQ 32(%rax), %xmm2
+	PCMPEQ 48(%rax), %xmm3
 	pmovmskb	%xmm1, %edx
 	pmovmskb	%xmm2, %r8d
 	pmovmskb	%xmm3, %ecx
@@ -145,6 +160,7 @@ L(strnlen_ret):
 	test	%rdx, %rdx
 	je	L(loop_init)
 	bsfq	%rdx, %rax
+	SHIFT_RETURN
 	ret
 #endif
 	.p2align 4
@@ -161,10 +177,10 @@ L(loop):
 	je	L(exit_end)
 
 	movdqa	(%rax), %xmm0
-	pminub	16(%rax), %xmm0
-	pminub	32(%rax), %xmm0
-	pminub	48(%rax), %xmm0
-	pcmpeqb	%xmm3, %xmm0
+	PMINU	16(%rax), %xmm0
+	PMINU	32(%rax), %xmm0
+	PMINU	48(%rax), %xmm0
+	PCMPEQ	%xmm3, %xmm0
 	pmovmskb	%xmm0, %edx
 	testl	%edx, %edx
 	jne	L(exit)
@@ -182,6 +198,7 @@ L(first):
 	bsfq	%rdx, %rdx
 	addq	%rdx, %rax
 	subq	%rdi, %rax
+	SHIFT_RETURN
 	ret
 
 	.p2align 4
@@ -192,6 +209,7 @@ L(exit):
 	bsfq	%rdx, %rdx
 	addq	%rdx, %rax
 	subq	%rdi, %rax
+	SHIFT_RETURN
 	ret
 
 #else
@@ -201,10 +219,10 @@ L(exit):
 L(loop):
 
 	movdqa	64(%rax), %xmm0
-	pminub	80(%rax), %xmm0
-	pminub	96(%rax), %xmm0
-	pminub	112(%rax), %xmm0
-	pcmpeqb	%xmm3, %xmm0
+	PMINU	80(%rax), %xmm0
+	PMINU	96(%rax), %xmm0
+	PMINU	112(%rax), %xmm0
+	PCMPEQ	%xmm3, %xmm0
 	pmovmskb	%xmm0, %edx
 	testl	%edx, %edx
 	jne	L(exit64)
@@ -212,10 +230,10 @@ L(loop):
 	subq	$-128, %rax
 
 	movdqa	(%rax), %xmm0
-	pminub	16(%rax), %xmm0
-	pminub	32(%rax), %xmm0
-	pminub	48(%rax), %xmm0
-	pcmpeqb	%xmm3, %xmm0
+	PMINU	16(%rax), %xmm0
+	PMINU	32(%rax), %xmm0
+	PMINU	48(%rax), %xmm0
+	PCMPEQ	%xmm3, %xmm0
 	pmovmskb	%xmm0, %edx
 	testl	%edx, %edx
 	jne	L(exit0)
@@ -231,6 +249,7 @@ L(exit0):
 	bsfq	%rdx, %rdx
 	addq	%rdx, %rax
 	subq	%rdi, %rax
+	SHIFT_RETURN
 	ret
 
 #endif
diff --git a/sysdeps/x86_64/strncase_l-nonascii.c b/sysdeps/x86_64/strncase_l-nonascii.c
index 8664863778..e3d83a06cd 100644
--- a/sysdeps/x86_64/strncase_l-nonascii.c
+++ b/sysdeps/x86_64/strncase_l-nonascii.c
@@ -1,7 +1,7 @@
 #include <string.h>
 
 extern int __strncasecmp_l_nonascii (const char *__s1, const char *__s2,
-				     size_t __n, __locale_t __loc);
+				     size_t __n, locale_t __loc);
 
 #define __strncasecmp_l __strncasecmp_l_nonascii
 #define USE_IN_EXTENDED_LOCALE_MODEL    1
diff --git a/sysdeps/x86_64/strpbrk.S b/sysdeps/x86_64/strpbrk.S
index 9b97ada84e..21888a5b92 100644
--- a/sysdeps/x86_64/strpbrk.S
+++ b/sysdeps/x86_64/strpbrk.S
@@ -1,2 +1,3 @@
 #define strcspn strpbrk
+#define USE_AS_STRPBRK
 #include <sysdeps/x86_64/strcspn.S>
diff --git a/sysdeps/x86_64/strrchr.S b/sysdeps/x86_64/strrchr.S
index de0be762ed..aca98e7eaa 100644
--- a/sysdeps/x86_64/strrchr.S
+++ b/sysdeps/x86_64/strrchr.S
@@ -1,5 +1,5 @@
 /* strrchr (str, ch) -- Return pointer to last occurrence of CH in STR.
-   Copyright (C) 2013-2016 Free Software Foundation, Inc.
+   Copyright (C) 2013-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/strspn.S b/sysdeps/x86_64/strspn.S
index 49dd4ba9f5..635f1bc6ce 100644
--- a/sysdeps/x86_64/strspn.S
+++ b/sysdeps/x86_64/strspn.S
@@ -1,7 +1,7 @@
 /* strspn (str, ss) -- Return the length of the initial segment of STR
 			which contains only characters from SS.
    For AMD x86-64.
-   Copyright (C) 1994-2016 Free Software Foundation, Inc.
+   Copyright (C) 1994-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>.
    Bug fixes by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>.
diff --git a/sysdeps/x86_64/strtok.S b/sysdeps/x86_64/strtok.S
deleted file mode 100644
index bd5b103d50..0000000000
--- a/sysdeps/x86_64/strtok.S
+++ /dev/null
@@ -1,208 +0,0 @@
-/* strtok (str, delim) -- Return next DELIM separated token from STR.
-   For AMD x86-64.
-   Copyright (C) 1998-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Based on i686 version contributed by Ulrich Drepper
-   <drepper@cygnus.com>, 1998.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include "asm-syntax.h"
-
-/* This file can be used for the strtok and strtok_r functions:
-
-   strtok:
-	INPUT PARAMETER:
-	str		%rdi
-	delim		%rsi
-
-   strtok_r:
-	INPUT PARAMETER:
-	str		%rdi
-	delim		%rsi
-	save_ptr	%rdx
-
-   We do a common implementation here.  */
-
-#ifdef USE_AS_STRTOK_R
-# define SAVE_PTR (%r9)
-#else
-	.bss
-	.local save_ptr
-	.type save_ptr, @object
-	.size save_ptr, LP_SIZE
-save_ptr:
-	.space LP_SIZE
-
-# ifdef PIC
-#  define SAVE_PTR save_ptr(%rip)
-# else
-#  define SAVE_PTR save_ptr
-# endif
-
-# define FUNCTION strtok
-#endif
-
-	.text
-ENTRY (FUNCTION)
-	/* First we create a table with flags for all possible characters.
-	   For the ASCII (7bit/8bit) or ISO-8859-X character sets which are
-	   supported by the C string functions we have 256 characters.
-	   Before inserting marks for the stop characters we clear the whole
-	   table.  */
-	movq %rdi, %r8			/* Save value.  */
-	subq $256, %rsp			/* Make space for 256 bytes.  */
-	cfi_adjust_cfa_offset(256)
-	movl $32,  %ecx			/* 32*8 bytes = 256 bytes.  */
-	movq %rsp, %rdi
-	xorl %eax, %eax			/* We store 0s.  */
-	cld
-	rep
-	stosq
-
-	/* Note: %rcx = 0 !!! */
-
-#ifdef USE_AS_STRTOK_R
-	/* The value is stored in the third argument.  */
-	mov %RDX_LP, %R9_LP	/* Save value - see def. of SAVE_PTR.  */
-	mov (%rdx), %RAX_LP
-#else
-	/* The value is in the local variable defined above.  But
-	   we have to take care for PIC code.  */
-	mov SAVE_PTR, %RAX_LP
-#endif
-	movq %r8, %rdx		/* Get start of string.  */
-
-	/* If the pointer is NULL we have to use the stored value of
-	   the last run.  */
-	cmpq $0, %rdx
-	cmove %rax, %rdx
-	testq %rdx, %rdx
-	jz L(returnNULL)
-	movq %rsi, %rax		/* Get start of delimiter set.  */
-
-/* For understanding the following code remember that %rcx == 0 now.
-   Although all the following instruction only modify %cl we always
-   have a correct zero-extended 64-bit value in %rcx.  */
-
-L(2):	movb (%rax), %cl	/* get byte from stopset */
-	testb %cl, %cl		/* is NUL char? */
-	jz L(1)			/* yes => start compare loop */
-	movb %cl, (%rsp,%rcx)	/* set corresponding byte in stopset table */
-
-	movb 1(%rax), %cl	/* get byte from stopset */
-	testb $0xff, %cl	/* is NUL char? */
-	jz L(1)			/* yes => start compare loop */
-	movb %cl, (%rsp,%rcx)	/* set corresponding byte in stopset table */
-
-	movb 2(%rax), %cl	/* get byte from stopset */
-	testb $0xff, %cl	/* is NUL char? */
-	jz L(1)			/* yes => start compare loop */
-	movb %cl, (%rsp,%rcx)	/* set corresponding byte in stopset table */
-
-	movb 3(%rax), %cl	/* get byte from stopset */
-	addq $4, %rax		/* increment stopset pointer */
-	movb %cl, (%rsp,%rcx)	/* set corresponding byte in stopset table */
-	testb $0xff, %cl	/* is NUL char? */
-	jnz L(2)		/* no => process next dword from stopset */
-
-L(1):
-
-	leaq -4(%rdx), %rax	/* prepare loop */
-
-	/* We use a neat trick for the following loop.  Normally we would
-	   have to test for two termination conditions
-	   1. a character in the stopset was found
-	   and
-	   2. the end of the string was found
-	   As a sign that the character is in the stopset we store its
-	   value in the table.  The value of NUL is NUL so the loop
-	   terminates for NUL in every case.  */
-
-L(3):	addq $4, %rax		/* adjust pointer for full loop round */
-
-	movb (%rax), %cl	/* get byte from string */
-	testb %cl, (%rsp,%rcx)	/* is it contained in stopset? */
-	jz L(4)			/* no => start of token */
-
-	movb 1(%rax), %cl	/* get byte from string */
-	testb %cl, (%rsp,%rcx)	/* is it contained in stopset? */
-	jz L(5)			/* no => start of token */
-
-	movb 2(%rax), %cl	/* get byte from string */
-	testb %cl, (%rsp,%rcx)	/* is it contained in stopset? */
-	jz L(6)			/* no => start of token */
-
-	movb 3(%rax), %cl	/* get byte from string */
-	testb %cl, (%rsp,%rcx)	/* is it contained in stopset? */
-	jnz L(3)		/* yes => start of loop */
-
-	incq %rax		/* adjust pointer */
-L(6):	incq %rax
-L(5):	incq %rax
-
-	/* Now we have to terminate the string.  */
-
-L(4):	leaq -4(%rax), %rdx	/* We use %rDX for the next run.  */
-
-L(7):	addq $4, %rdx		/* adjust pointer for full loop round */
-
-	movb (%rdx), %cl	/* get byte from string */
-	cmpb %cl, (%rsp,%rcx)	/* is it contained in skipset? */
-	je L(8)			/* yes => return */
-
-	movb 1(%rdx), %cl	/* get byte from string */
-	cmpb %cl, (%rsp,%rcx)	/* is it contained in skipset? */
-	je L(9)			/* yes => return */
-
-	movb 2(%rdx), %cl	/* get byte from string */
-	cmpb %cl, (%rsp,%rcx)	/* is it contained in skipset? */
-	je L(10)		/* yes => return */
-
-	movb 3(%rdx), %cl	/* get byte from string */
-	cmpb %cl, (%rsp,%rcx)	/* is it contained in skipset? */
-	jne L(7)		/* no => start loop again */
-
-	incq %rdx		/* adjust pointer */
-L(10):	incq %rdx
-L(9):	incq %rdx
-
-L(8):	cmpq %rax, %rdx
-	je L(returnNULL)	/* There was no token anymore.  */
-
-	movb $0, (%rdx)		/* Terminate string.  */
-
-	/* Are we at end of string?  */
-	cmpb $0, %cl
-	leaq 1(%rdx), %rcx
-	cmovne %rcx, %rdx
-
-	/* Store the pointer to the next character.  */
-	mov %RDX_LP, SAVE_PTR
-
-L(epilogue):
-	/* Remove the stopset table.  */
-	addq $256, %rsp
-	cfi_adjust_cfa_offset(-256)
-	retq
-
-L(returnNULL):
-	xorl %eax, %eax
-	/* Store the pointer to the next character.  */
-	mov %RDX_LP, SAVE_PTR
-	jmp L(epilogue)
-
-END (FUNCTION)
diff --git a/sysdeps/x86_64/strtok_r.S b/sysdeps/x86_64/strtok_r.S
deleted file mode 100644
index f0db78c67a..0000000000
--- a/sysdeps/x86_64/strtok_r.S
+++ /dev/null
@@ -1,5 +0,0 @@
-#define FUNCTION __strtok_r
-#define USE_AS_STRTOK_R	1
-#include <sysdeps/x86_64/strtok.S>
-weak_alias (__strtok_r, strtok_r)
-strong_alias (__strtok_r, __GI___strtok_r)
diff --git a/sysdeps/x86_64/sub_n.S b/sysdeps/x86_64/sub_n.S
index cc9bc48b01..e70d48ba47 100644
--- a/sysdeps/x86_64/sub_n.S
+++ b/sysdeps/x86_64/sub_n.S
@@ -1,6 +1,6 @@
 /* x86-64 __mpn_sub_n -- Add two limb vectors of the same length > 0 and store
    sum in a third limb vector.
-   Copyright (C) 2006-2016 Free Software Foundation, Inc.
+   Copyright (C) 2006-2018 Free Software Foundation, Inc.
    This file is part of the GNU MP Library.
 
    The GNU MP Library is free software; you can redistribute it and/or modify
diff --git a/sysdeps/x86_64/submul_1.S b/sysdeps/x86_64/submul_1.S
index 3037cb9c45..ba1bf92bc5 100644
--- a/sysdeps/x86_64/submul_1.S
+++ b/sysdeps/x86_64/submul_1.S
@@ -1,6 +1,6 @@
 /* x86-64 __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
    the result from a second limb vector.
-   Copyright (C) 2003-2016 Free Software Foundation, Inc.
+   Copyright (C) 2003-2018 Free Software Foundation, Inc.
    This file is part of the GNU MP Library.
 
    The GNU MP Library is free software; you can redistribute it and/or modify
diff --git a/sysdeps/x86_64/sysdep.h b/sysdeps/x86_64/sysdep.h
index fbe3560588..1738d7f955 100644
--- a/sysdeps/x86_64/sysdep.h
+++ b/sysdeps/x86_64/sysdep.h
@@ -1,5 +1,5 @@
 /* Assembler macros for x86-64.
-   Copyright (C) 2001-2016 Free Software Foundation, Inc.
+   Copyright (C) 2001-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -19,7 +19,7 @@
 #ifndef _X86_64_SYSDEP_H
 #define _X86_64_SYSDEP_H 1
 
-#include <sysdeps/generic/sysdep.h>
+#include <sysdeps/x86/sysdep.h>
 
 #ifdef	__ASSEMBLER__
 
@@ -32,28 +32,6 @@
 #define cfi_offset_rel_rsp(regn, off)	.cfi_escape 0x10, regn, 0x4, 0x13, \
 					0x77, off & 0x7F | 0x80, off >> 7
 
-/* ELF uses byte-counts for .align, most others use log2 of count of bytes.  */
-#define ALIGNARG(log2) 1<<log2
-#define ASM_SIZE_DIRECTIVE(name) .size name,.-name;
-
-
-/* Define an entry point visible from C.  */
-#define	ENTRY(name)							      \
-  .globl C_SYMBOL_NAME(name);						      \
-  .type C_SYMBOL_NAME(name),@function;					      \
-  .align ALIGNARG(4);							      \
-  C_LABEL(name)								      \
-  cfi_startproc;							      \
-  CALL_MCOUNT
-
-#undef	END
-#define END(name)							      \
-  cfi_endproc;								      \
-  ASM_SIZE_DIRECTIVE(name)
-
-#define ENTRY_CHK(name) ENTRY (name)
-#define END_CHK(name) END (name)
-
 /* If compiled for profiling, call `mcount' at the start of each function.  */
 #ifdef	PROF
 /* The mcount code relies on a normal frame pointer being on the stack
@@ -70,12 +48,6 @@
 #define CALL_MCOUNT		/* Do nothing.  */
 #endif
 
-/* Since C identifiers are not normally prefixed with an underscore
-   on this system, the asm identifier `syscall_error' intrudes on the
-   C name space.  Make sure we use an innocuous name.  */
-#define	syscall_error	__syscall_error
-#define mcount		_mcount
-
 #define	PSEUDO(name, syscall_name, args)				      \
 lose:									      \
   jmp JUMPTARGET(syscall_error)						      \
@@ -84,25 +56,18 @@ lose:									      \
   DO_CALL (syscall_name, args);						      \
   jb lose
 
-#undef	PSEUDO_END
-#define	PSEUDO_END(name)						      \
-  END (name)
-
 #undef JUMPTARGET
-#ifdef PIC
-#define JUMPTARGET(name)	name##@PLT
+#ifdef SHARED
+# ifdef BIND_NOW
+#  define JUMPTARGET(name)	*name##@GOTPCREL(%rip)
+# else
+#  define JUMPTARGET(name)	name##@PLT
+# endif
 #else
-#define JUMPTARGET(name)	name
+/* For static archives, branch to target directly.  */
+# define JUMPTARGET(name)	name
 #endif
 
-/* Local label name for asm code. */
-#ifndef L
-/* ELF-like local names start with `.L'.  */
-# define L(name)	.L##name
-#endif
-
-#define atom_text_section .section ".text.atom", "ax"
-
 /* Long and pointer size in bytes.  */
 #define LP_SIZE	8
 
diff --git a/sysdeps/x86_64/tls_get_addr.S b/sysdeps/x86_64/tls_get_addr.S
new file mode 100644
index 0000000000..cf8c6d101b
--- /dev/null
+++ b/sysdeps/x86_64/tls_get_addr.S
@@ -0,0 +1,61 @@
+/* Stack-aligning implementation of __tls_get_addr.  x86-64 version.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifdef SHARED
+
+# include <sysdep.h>
+# include "tlsdesc.h"
+# include "rtld-offsets.h"
+
+/* See __tls_get_addr and __tls_get_addr_slow in dl-tls.c.  This function
+   call __tls_get_addr_slow on both slow paths.  It realigns the stack
+   before the call to work around GCC PR58066.  */
+
+ENTRY (__tls_get_addr)
+	mov 	%fs:DTV_OFFSET, %RDX_LP
+	mov	GL_TLS_GENERATION_OFFSET+_rtld_local(%rip), %RAX_LP
+	/* GL(dl_tls_generation) == dtv[0].counter */
+	cmp	%RAX_LP, (%rdx)
+	jne	1f
+	mov	TI_MODULE_OFFSET(%rdi), %RAX_LP
+	/* dtv[ti->ti_module] */
+# ifdef __LP64__
+	salq	$4, %rax
+	movq	(%rdx,%rax), %rax
+# else
+	movl	(%rdx,%rax, 8), %eax
+# endif
+	cmp	$-1, %RAX_LP
+	je	1f
+	add	TI_OFFSET_OFFSET(%rdi), %RAX_LP
+	ret
+1:
+	/* On the slow path, align the stack.  */
+	pushq	%rbp
+	cfi_def_cfa_offset (16)
+	cfi_offset (%rbp, -16)
+	mov	%RSP_LP, %RBP_LP
+	cfi_def_cfa_register (%rbp)
+	and	$-16, %RSP_LP
+	call	__tls_get_addr_slow
+	mov	%RBP_LP, %RSP_LP
+	popq	%rbp
+	cfi_def_cfa (%rsp, 8)
+	ret
+END (__tls_get_addr)
+#endif /* SHARED */
diff --git a/sysdeps/x86_64/tlsdesc.c b/sysdeps/x86_64/tlsdesc.c
index aff8b67941..302d097dbb 100644
--- a/sysdeps/x86_64/tlsdesc.c
+++ b/sysdeps/x86_64/tlsdesc.c
@@ -1,5 +1,5 @@
 /* Manage TLS descriptors.  x86_64 version.
-   Copyright (C) 2005-2016 Free Software Foundation, Inc.
+   Copyright (C) 2005-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -134,7 +134,6 @@ _dl_tlsdesc_resolve_hold_fixup (struct tlsdesc volatile *td,
    if there is one.  */
 
 void
-internal_function
 _dl_unmap (struct link_map *map)
 {
   _dl_unmap_segments (map);
diff --git a/sysdeps/x86_64/tlsdesc.sym b/sysdeps/x86_64/tlsdesc.sym
index 33854975d0..fc897ab4b5 100644
--- a/sysdeps/x86_64/tlsdesc.sym
+++ b/sysdeps/x86_64/tlsdesc.sym
@@ -15,3 +15,6 @@ TLSDESC_ARG			offsetof(struct tlsdesc, arg)
 TLSDESC_GEN_COUNT		offsetof(struct tlsdesc_dynamic_arg, gen_count)
 TLSDESC_MODID			offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_module)
 TLSDESC_MODOFF			offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_offset)
+
+TI_MODULE_OFFSET 		offsetof(tls_index, ti_module)
+TI_OFFSET_OFFSET 		offsetof(tls_index, ti_offset)
diff --git a/sysdeps/x86_64/tst-audit.h b/sysdeps/x86_64/tst-audit.h
index 94e9dd5282..623ef8920c 100644
--- a/sysdeps/x86_64/tst-audit.h
+++ b/sysdeps/x86_64/tst-audit.h
@@ -1,6 +1,6 @@
 /* Definitions for testing PLT entry/exit auditing.  x86_64 version.
 
-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+   Copyright (C) 2012-2018 Free Software Foundation, Inc.
 
    This file is part of the GNU C Library.
 
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core.S b/sysdeps/x86_64/tst-audit10-aux.c
index fb9f989adc..e1b2d92f75 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_d_pow4_core.S
+++ b/sysdeps/x86_64/tst-audit10-aux.c
@@ -1,5 +1,5 @@
-/* Multiple versions of vectorized pow.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
+/* Test case for preserved AVX512 registers in dynamic linker, -mavx512f part.
+   Copyright (C) 2012-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -16,21 +16,26 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include <sysdep.h>
-#include <init-arch.h>
-
-	.text
-ENTRY (_ZGVdN4vv_pow)
-        .type   _ZGVdN4vv_pow, @gnu_indirect_function
-	LOAD_RTLD_GLOBAL_RO_RDX
-        leaq    _ZGVdN4vv_pow_avx2(%rip), %rax
-	HAS_ARCH_FEATURE (AVX2_Usable)
-        jz      2f
-        ret
-2:      leaq    _ZGVdN4vv_pow_sse_wrapper(%rip), %rax
-        ret
-END (_ZGVdN4vv_pow)
-libmvec_hidden_def (_ZGVdN4vv_pow)
-
-#define _ZGVdN4vv_pow _ZGVdN4vv_pow_sse_wrapper
-#include "../svml_d_pow4_core.S"
+#include <immintrin.h>
+#include <stdlib.h>
+#include <string.h>
+
+int
+tst_audit10_aux (void)
+{
+#ifdef __AVX512F__
+  extern __m512i audit_test (__m512i, __m512i, __m512i, __m512i,
+                             __m512i, __m512i, __m512i, __m512i);
+
+  __m512i zmm = _mm512_setzero_si512 ();
+  __m512i ret = audit_test (zmm, zmm, zmm, zmm, zmm, zmm, zmm, zmm);
+
+  zmm = _mm512_set1_epi64 (0x12349876);
+
+  if (memcmp (&zmm, &ret, sizeof (ret)))
+    abort ();
+  return 0;
+#else /* __AVX512F__ */
+  return 77;
+#endif /* __AVX512F__ */
+}
diff --git a/sysdeps/x86_64/tst-audit10.c b/sysdeps/x86_64/tst-audit10.c
index d104341be8..568011cb96 100644
--- a/sysdeps/x86_64/tst-audit10.c
+++ b/sysdeps/x86_64/tst-audit10.c
@@ -1,4 +1,5 @@
-/* Copyright (C) 2012-2016 Free Software Foundation, Inc.
+/* Test case for preserved AVX512 registers in dynamic linker.
+   Copyright (C) 2012-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -15,17 +16,14 @@
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-/* Test case for x86-64 preserved registers in dynamic linker.  */
-
-#ifdef __AVX512F__
-#include <stdlib.h>
-#include <string.h>
 #include <cpuid.h>
-#include <immintrin.h>
+
+int tst_audit10_aux (void);
 
 static int
 avx512_enabled (void)
 {
+#ifdef bit_AVX512F
   unsigned int eax, ebx, ecx, edx;
 
   if (__get_cpuid (1, &eax, &ebx, &ecx, &edx) == 0
@@ -40,34 +38,20 @@ avx512_enabled (void)
 
   /* Verify that ZMM, YMM and XMM states are enabled.  */
   return (eax & 0xe6) == 0xe6;
+#else
+  return 0;
+#endif
 }
 
-
-extern __m512i audit_test (__m512i, __m512i, __m512i, __m512i,
-			   __m512i, __m512i, __m512i, __m512i);
 static int
 do_test (void)
 {
   /* Run AVX512 test only if AVX512 is supported.  */
   if (avx512_enabled ())
-    {
-      __m512i zmm = _mm512_setzero_si512 ();
-      __m512i ret = audit_test (zmm, zmm, zmm, zmm, zmm, zmm, zmm, zmm);
-
-      zmm = _mm512_set1_epi64 (0x12349876);
-
-      if (memcmp (&zmm, &ret, sizeof (ret)))
-	abort ();
-    }
-  return 0;
-}
-#else
-static int
-do_test (void)
-{
-  return 0;
+    return tst_audit10_aux ();
+  else
+    return 77;
 }
-#endif
 
 #define TEST_FUNCTION do_test ()
 #include "../../test-skeleton.c"
diff --git a/sysdeps/x86_64/tst-audit4-aux.c b/sysdeps/x86_64/tst-audit4-aux.c
new file mode 100644
index 0000000000..2770be5b5e
--- /dev/null
+++ b/sysdeps/x86_64/tst-audit4-aux.c
@@ -0,0 +1,39 @@
+/* Test case for preserved AVX registers in dynamic linker, -mavx part.
+   Copyright (C) 2009-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <immintrin.h>
+#include <stdlib.h>
+#include <string.h>
+
+extern __m256i audit_test (__m256i, __m256i, __m256i, __m256i,
+			   __m256i, __m256i, __m256i, __m256i);
+
+int
+tst_audit4_aux (void)
+{
+#ifdef __AVX__
+  __m256i ymm = _mm256_setzero_si256 ();
+  __m256i ret = audit_test (ymm, ymm, ymm, ymm, ymm, ymm, ymm, ymm);
+  ymm =	 _mm256_set1_epi32 (0x12349876);
+  if (memcmp (&ymm, &ret, sizeof (ret)))
+    abort ();
+  return 0;
+#else  /* __AVX__ */
+  return 77;
+#endif  /* __AVX__ */
+}
diff --git a/sysdeps/x86_64/tst-audit4.c b/sysdeps/x86_64/tst-audit4.c
index 44d51231e3..d7ca24ac2d 100644
--- a/sysdeps/x86_64/tst-audit4.c
+++ b/sysdeps/x86_64/tst-audit4.c
@@ -1,11 +1,24 @@
-/* Test case for x86-64 preserved registers in dynamic linker.  */
+/* Test case for preserved AVX registers in dynamic linker.
+   Copyright (C) 2009-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
 
-#ifdef __AVX__
-#include <stdlib.h>
-#include <string.h>
 #include <cpuid.h>
-#include <immintrin.h>
 
+int tst_audit4_aux (void);
 
 static int
 avx_enabled (void)
@@ -22,31 +35,15 @@ avx_enabled (void)
   return (eax & 6) == 6;
 }
 
-
-extern __m256i audit_test (__m256i, __m256i, __m256i, __m256i,
-			   __m256i, __m256i, __m256i, __m256i);
 static int
 do_test (void)
 {
   /* Run AVX test only if AVX is supported.  */
   if (avx_enabled ())
-    {
-      __m256i ymm = _mm256_setzero_si256 ();
-      __m256i ret = audit_test (ymm, ymm, ymm, ymm, ymm, ymm, ymm, ymm);
-
-      ymm =  _mm256_set1_epi32 (0x12349876);
-      if (memcmp (&ymm, &ret, sizeof (ret)))
-	abort ();
-    }
-  return 0;
-}
-#else
-static int
-do_test (void)
-{
-  return 0;
+    return tst_audit4_aux ();
+  else
+    return 77;
 }
-#endif
 
 #define TEST_FUNCTION do_test ()
 #include "../../test-skeleton.c"
diff --git a/sysdeps/x86_64/tst-auditmod10a.c b/sysdeps/x86_64/tst-auditmod10a.c
index e94dbaf7fe..ff6021a79a 100644
--- a/sysdeps/x86_64/tst-auditmod10a.c
+++ b/sysdeps/x86_64/tst-auditmod10a.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2012-2016 Free Software Foundation, Inc.
+/* Copyright (C) 2012-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/tst-auditmod10b.c b/sysdeps/x86_64/tst-auditmod10b.c
index ad6fcafdda..de1bbbb7fb 100644
--- a/sysdeps/x86_64/tst-auditmod10b.c
+++ b/sysdeps/x86_64/tst-auditmod10b.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2012-2016 Free Software Foundation, Inc.
+/* Copyright (C) 2012-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -19,6 +19,8 @@
    function parameter passing/return.  */
 
 #include <dlfcn.h>
+#include <link.h>
+#include <stddef.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/sysdeps/x86_64/tst-auditmod3b.c b/sysdeps/x86_64/tst-auditmod3b.c
index 1a41ca80c0..7aad92382e 100644
--- a/sysdeps/x86_64/tst-auditmod3b.c
+++ b/sysdeps/x86_64/tst-auditmod3b.c
@@ -2,6 +2,8 @@
    function parameter passing/return.  */
 
 #include <dlfcn.h>
+#include <link.h>
+#include <stddef.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/sysdeps/x86_64/tst-auditmod4b.c b/sysdeps/x86_64/tst-auditmod4b.c
index 2b0d827e88..1153ea442c 100644
--- a/sysdeps/x86_64/tst-auditmod4b.c
+++ b/sysdeps/x86_64/tst-auditmod4b.c
@@ -2,6 +2,8 @@
    function parameter passing/return.  */
 
 #include <dlfcn.h>
+#include <link.h>
+#include <stddef.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/sysdeps/x86_64/tst-auditmod5b.c b/sysdeps/x86_64/tst-auditmod5b.c
index a74d261f03..6a280fd61b 100644
--- a/sysdeps/x86_64/tst-auditmod5b.c
+++ b/sysdeps/x86_64/tst-auditmod5b.c
@@ -2,6 +2,8 @@
    function parameter passing/return.  */
 
 #include <dlfcn.h>
+#include <link.h>
+#include <stddef.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/sysdeps/x86_64/tst-auditmod6b.c b/sysdeps/x86_64/tst-auditmod6b.c
index 886fc33e9b..3533602c07 100644
--- a/sysdeps/x86_64/tst-auditmod6b.c
+++ b/sysdeps/x86_64/tst-auditmod6b.c
@@ -2,6 +2,8 @@
    function parameter passing/return.  */
 
 #include <dlfcn.h>
+#include <link.h>
+#include <stddef.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/sysdeps/x86_64/tst-auditmod6c.c b/sysdeps/x86_64/tst-auditmod6c.c
index b2ee24d8bf..8000e89224 100644
--- a/sysdeps/x86_64/tst-auditmod6c.c
+++ b/sysdeps/x86_64/tst-auditmod6c.c
@@ -2,6 +2,8 @@
    function parameter passing/return.  */
 
 #include <dlfcn.h>
+#include <link.h>
+#include <stddef.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/sysdeps/x86_64/tst-auditmod7b.c b/sysdeps/x86_64/tst-auditmod7b.c
index f27076d3bb..5abe6d1bc9 100644
--- a/sysdeps/x86_64/tst-auditmod7b.c
+++ b/sysdeps/x86_64/tst-auditmod7b.c
@@ -2,6 +2,8 @@
    function parameter passing/return.  */
 
 #include <dlfcn.h>
+#include <link.h>
+#include <stddef.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/sysdeps/x86_64/tst-avx-aux.c b/sysdeps/x86_64/tst-avx-aux.c
new file mode 100644
index 0000000000..e6ae368fd8
--- /dev/null
+++ b/sysdeps/x86_64/tst-avx-aux.c
@@ -0,0 +1,47 @@
+/* Test case for preserved AVX registers in dynamic linker, -mavx part.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <immintrin.h>
+#include <stdlib.h>
+#include <string.h>
+
+int
+tst_avx_aux (void)
+{
+#ifdef __AVX__
+  extern __m256i avx_test (__m256i, __m256i, __m256i, __m256i,
+			   __m256i, __m256i, __m256i, __m256i);
+
+  __m256i ymm0 = _mm256_set1_epi32 (0);
+  __m256i ymm1 = _mm256_set1_epi32 (1);
+  __m256i ymm2 = _mm256_set1_epi32 (2);
+  __m256i ymm3 = _mm256_set1_epi32 (3);
+  __m256i ymm4 = _mm256_set1_epi32 (4);
+  __m256i ymm5 = _mm256_set1_epi32 (5);
+  __m256i ymm6 = _mm256_set1_epi32 (6);
+  __m256i ymm7 = _mm256_set1_epi32 (7);
+  __m256i ret = avx_test (ymm0, ymm1, ymm2, ymm3,
+			  ymm4, ymm5, ymm6, ymm7);
+  ymm0 =  _mm256_set1_epi32 (0x12349876);
+  if (memcmp (&ymm0, &ret, sizeof (ret)))
+    abort ();
+  return 0;
+#else  /* __AVX__ */
+  return 77;
+#endif  /* __AVX__ */
+}
diff --git a/sysdeps/x86_64/tst-avx.c b/sysdeps/x86_64/tst-avx.c
new file mode 100644
index 0000000000..9c52fc264a
--- /dev/null
+++ b/sysdeps/x86_64/tst-avx.c
@@ -0,0 +1,49 @@
+/* Test case for preserved AVX registers in dynamic linker.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <cpuid.h>
+
+int tst_avx_aux (void);
+
+static int
+avx_enabled (void)
+{
+  unsigned int eax, ebx, ecx, edx;
+
+  if (__get_cpuid (1, &eax, &ebx, &ecx, &edx) == 0
+      || (ecx & (bit_AVX | bit_OSXSAVE)) != (bit_AVX | bit_OSXSAVE))
+    return 0;
+
+  /* Check the OS has AVX and SSE saving enabled.  */
+  asm ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (0));
+
+  return (eax & 6) == 6;
+}
+
+static int
+do_test (void)
+{
+  /* Run AVX test only if AVX is supported.  */
+  if (avx_enabled ())
+    return tst_avx_aux ();
+  else
+    return 77;
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../../test-skeleton.c"
diff --git a/sysdeps/x86_64/tst-avx512-aux.c b/sysdeps/x86_64/tst-avx512-aux.c
new file mode 100644
index 0000000000..87c4124398
--- /dev/null
+++ b/sysdeps/x86_64/tst-avx512-aux.c
@@ -0,0 +1,48 @@
+/* Test case for preserved AVX512 registers in dynamic linker,
+   -mavx512 part.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <immintrin.h>
+#include <stdlib.h>
+#include <string.h>
+
+int
+tst_avx512_aux (void)
+{
+#ifdef __AVX512F__
+  extern __m512i avx512_test (__m512i, __m512i, __m512i, __m512i,
+			      __m512i, __m512i, __m512i, __m512i);
+
+  __m512i zmm0 = _mm512_set1_epi32 (0);
+  __m512i zmm1 = _mm512_set1_epi32 (1);
+  __m512i zmm2 = _mm512_set1_epi32 (2);
+  __m512i zmm3 = _mm512_set1_epi32 (3);
+  __m512i zmm4 = _mm512_set1_epi32 (4);
+  __m512i zmm5 = _mm512_set1_epi32 (5);
+  __m512i zmm6 = _mm512_set1_epi32 (6);
+  __m512i zmm7 = _mm512_set1_epi32 (7);
+  __m512i ret = avx512_test (zmm0, zmm1, zmm2, zmm3,
+			     zmm4, zmm5, zmm6, zmm7);
+  zmm0 =  _mm512_set1_epi32 (0x12349876);
+  if (memcmp (&zmm0, &ret, sizeof (ret)))
+    abort ();
+  return 0;
+#else  /* __AVX512F__ */
+  return 77;
+#endif  /* __AVX512F__ */
+}
diff --git a/sysdeps/x86_64/tst-avx512.c b/sysdeps/x86_64/tst-avx512.c
new file mode 100644
index 0000000000..63d8bc9c27
--- /dev/null
+++ b/sysdeps/x86_64/tst-avx512.c
@@ -0,0 +1,57 @@
+/* Test case for preserved AVX512 registers in dynamic linker.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <cpuid.h>
+
+int tst_avx512_aux (void);
+
+static int
+avx512_enabled (void)
+{
+#ifdef bit_AVX512F
+  unsigned int eax, ebx, ecx, edx;
+
+  if (__get_cpuid (1, &eax, &ebx, &ecx, &edx) == 0
+      || (ecx & (bit_AVX | bit_OSXSAVE)) != (bit_AVX | bit_OSXSAVE))
+    return 0;
+
+  __cpuid_count (7, 0, eax, ebx, ecx, edx);
+  if (!(ebx & bit_AVX512F))
+    return 0;
+
+  asm ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (0));
+
+  /* Verify that ZMM, YMM and XMM states are enabled.  */
+  return (eax & 0xe6) == 0xe6;
+#else
+  return 0;
+#endif
+}
+
+static int
+do_test (void)
+{
+  /* Run AVX512 test only if AVX512 is supported.  */
+  if (avx512_enabled ())
+    return tst_avx512_aux ();
+  else
+    return 77;
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../../test-skeleton.c"
diff --git a/sysdeps/x86_64/tst-avx512mod.c b/sysdeps/x86_64/tst-avx512mod.c
new file mode 100644
index 0000000000..4cfb3a2c3d
--- /dev/null
+++ b/sysdeps/x86_64/tst-avx512mod.c
@@ -0,0 +1,48 @@
+/* Test case for x86-64 preserved AVX512 registers in dynamic linker.  */
+
+#ifdef __AVX512F__
+#include <stdlib.h>
+#include <string.h>
+#include <immintrin.h>
+
+__m512i
+avx512_test (__m512i x0, __m512i x1, __m512i x2, __m512i x3,
+	     __m512i x4, __m512i x5, __m512i x6, __m512i x7)
+{
+  __m512i zmm;
+
+  zmm = _mm512_set1_epi32 (0);
+  if (memcmp (&zmm, &x0, sizeof (zmm)))
+    abort ();
+
+  zmm = _mm512_set1_epi32 (1);
+  if (memcmp (&zmm, &x1, sizeof (zmm)))
+    abort ();
+
+  zmm = _mm512_set1_epi32 (2);
+  if (memcmp (&zmm, &x2, sizeof (zmm)))
+    abort ();
+
+  zmm = _mm512_set1_epi32 (3);
+  if (memcmp (&zmm, &x3, sizeof (zmm)))
+    abort ();
+
+  zmm = _mm512_set1_epi32 (4);
+  if (memcmp (&zmm, &x4, sizeof (zmm)))
+    abort ();
+
+  zmm = _mm512_set1_epi32 (5);
+  if (memcmp (&zmm, &x5, sizeof (zmm)))
+    abort ();
+
+  zmm = _mm512_set1_epi32 (6);
+  if (memcmp (&zmm, &x6, sizeof (zmm)))
+    abort ();
+
+  zmm = _mm512_set1_epi32 (7);
+  if (memcmp (&zmm, &x7, sizeof (zmm)))
+    abort ();
+
+  return _mm512_set1_epi32 (0x12349876);
+}
+#endif
diff --git a/sysdeps/x86_64/tst-avxmod.c b/sysdeps/x86_64/tst-avxmod.c
new file mode 100644
index 0000000000..6e5b154997
--- /dev/null
+++ b/sysdeps/x86_64/tst-avxmod.c
@@ -0,0 +1,48 @@
+/* Test case for x86-64 preserved AVX registers in dynamic linker.  */
+
+#ifdef __AVX__
+#include <stdlib.h>
+#include <string.h>
+#include <immintrin.h>
+
+__m256i
+avx_test (__m256i x0, __m256i x1, __m256i x2, __m256i x3,
+	  __m256i x4, __m256i x5, __m256i x6, __m256i x7)
+{
+  __m256i ymm;
+
+  ymm = _mm256_set1_epi32 (0);
+  if (memcmp (&ymm, &x0, sizeof (ymm)))
+    abort ();
+
+  ymm = _mm256_set1_epi32 (1);
+  if (memcmp (&ymm, &x1, sizeof (ymm)))
+    abort ();
+
+  ymm = _mm256_set1_epi32 (2);
+  if (memcmp (&ymm, &x2, sizeof (ymm)))
+    abort ();
+
+  ymm = _mm256_set1_epi32 (3);
+  if (memcmp (&ymm, &x3, sizeof (ymm)))
+    abort ();
+
+  ymm = _mm256_set1_epi32 (4);
+  if (memcmp (&ymm, &x4, sizeof (ymm)))
+    abort ();
+
+  ymm = _mm256_set1_epi32 (5);
+  if (memcmp (&ymm, &x5, sizeof (ymm)))
+    abort ();
+
+  ymm = _mm256_set1_epi32 (6);
+  if (memcmp (&ymm, &x6, sizeof (ymm)))
+    abort ();
+
+  ymm = _mm256_set1_epi32 (7);
+  if (memcmp (&ymm, &x7, sizeof (ymm)))
+    abort ();
+
+  return _mm256_set1_epi32 (0x12349876);
+}
+#endif
diff --git a/sysdeps/x86_64/tst-mallocalign1.c b/sysdeps/x86_64/tst-mallocalign1.c
index 3897af86c1..0f2e725e3b 100644
--- a/sysdeps/x86_64/tst-mallocalign1.c
+++ b/sysdeps/x86_64/tst-mallocalign1.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2012-2016 Free Software Foundation, Inc.
+/* Copyright (C) 2012-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/tst-platform-1.c b/sysdeps/x86_64/tst-platform-1.c
new file mode 100644
index 0000000000..91dbbb93db
--- /dev/null
+++ b/sysdeps/x86_64/tst-platform-1.c
@@ -0,0 +1,29 @@
+/* Test PRELOAD with $PLATFORM.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <stdlib.h>
+
+extern int preload (void);
+
+static int
+do_test (void)
+{
+  return preload () == 0x1234 ? EXIT_SUCCESS : EXIT_FAILURE;
+}
+
+#include <support/test-driver.c>
diff --git a/sysdeps/x86_64/tst-platformmod-1.c b/sysdeps/x86_64/tst-platformmod-1.c
new file mode 100644
index 0000000000..be0e786e76
--- /dev/null
+++ b/sysdeps/x86_64/tst-platformmod-1.c
@@ -0,0 +1,23 @@
+/* Test PRELOAD with $PLATFORM.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+int
+preload (void)
+{
+  return 0;
+}
diff --git a/sysdeps/x86_64/tst-platformmod-2.c b/sysdeps/x86_64/tst-platformmod-2.c
new file mode 100644
index 0000000000..413d0bd94b
--- /dev/null
+++ b/sysdeps/x86_64/tst-platformmod-2.c
@@ -0,0 +1,23 @@
+/* Test PRELOAD with $PLATFORM.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+int
+preload (void)
+{
+  return 0x1234;
+}
diff --git a/sysdeps/x86_64/tst-quad1.c b/sysdeps/x86_64/tst-quad1.c
index 1cb63a748f..089b25d2df 100644
--- a/sysdeps/x86_64/tst-quad1.c
+++ b/sysdeps/x86_64/tst-quad1.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2012-2016 Free Software Foundation, Inc.
+/* Copyright (C) 2012-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/tst-quadmod1.S b/sysdeps/x86_64/tst-quadmod1.S
index 588c5016b6..c60f9dc89d 100644
--- a/sysdeps/x86_64/tst-quadmod1.S
+++ b/sysdeps/x86_64/tst-quadmod1.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2012-2016 Free Software Foundation, Inc.
+/* Copyright (C) 2012-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -28,6 +28,9 @@
 	.type	func, @function
 func:
 	.cfi_startproc
+#if defined __CET__ && (__CET__ & 1) != 0
+	endbr64
+#endif
 	xorl	%edi, %edi
 	jmp	exit@PLT
 	.cfi_endproc
@@ -37,6 +40,9 @@ func:
 foo:
 	.cfi_startproc
 	.cfi_def_cfa_register 6
+#if defined __CET__ && (__CET__ & 1) != 0
+	endbr64
+#endif
 	movq	.Ljmp(%rip), %rax
 	subq	$BIAS, %rax
 	jmp	*%rax
diff --git a/sysdeps/x86_64/tst-quadmod2.S b/sysdeps/x86_64/tst-quadmod2.S
index 7409a9eaa3..af03444d4f 100644
--- a/sysdeps/x86_64/tst-quadmod2.S
+++ b/sysdeps/x86_64/tst-quadmod2.S
@@ -1,4 +1,4 @@
-/* Copyright (C) 2012-2016 Free Software Foundation, Inc.
+/* Copyright (C) 2012-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -27,6 +27,9 @@
 	.type	func, @function
 func:
 	.cfi_startproc
+#if defined __CET__ && (__CET__ & 1) != 0
+	endbr64
+#endif
 	xorl	%edi, %edi
 	jmp	exit@PLT
 	.cfi_endproc
@@ -36,6 +39,9 @@ func:
 foo:
 	.cfi_startproc
 	.cfi_def_cfa_register 6
+#if defined __CET__ && (__CET__ & 1) != 0
+	endbr64
+#endif
 	movq	.Ljmp(%rip), %rax
 	subq	$BIAS, %rax
 	jmp	*%rax
diff --git a/sysdeps/x86_64/tst-sse.c b/sysdeps/x86_64/tst-sse.c
new file mode 100644
index 0000000000..d219889d1f
--- /dev/null
+++ b/sysdeps/x86_64/tst-sse.c
@@ -0,0 +1,46 @@
+/* Test case for preserved SSE registers in dynamic linker.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <immintrin.h>
+#include <stdlib.h>
+#include <string.h>
+
+extern __m128i sse_test (__m128i, __m128i, __m128i, __m128i,
+			 __m128i, __m128i, __m128i, __m128i);
+
+static int
+do_test (void)
+{
+  __m128i xmm0 = _mm_set1_epi32 (0);
+  __m128i xmm1 = _mm_set1_epi32 (1);
+  __m128i xmm2 = _mm_set1_epi32 (2);
+  __m128i xmm3 = _mm_set1_epi32 (3);
+  __m128i xmm4 = _mm_set1_epi32 (4);
+  __m128i xmm5 = _mm_set1_epi32 (5);
+  __m128i xmm6 = _mm_set1_epi32 (6);
+  __m128i xmm7 = _mm_set1_epi32 (7);
+  __m128i ret = sse_test (xmm0, xmm1, xmm2, xmm3,
+			  xmm4, xmm5, xmm6, xmm7);
+  xmm0 =  _mm_set1_epi32 (0x12349876);
+  if (memcmp (&xmm0, &ret, sizeof (ret)))
+    abort ();
+  return 0;
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../../test-skeleton.c"
diff --git a/sysdeps/x86_64/tst-ssemod.c b/sysdeps/x86_64/tst-ssemod.c
new file mode 100644
index 0000000000..907a64c69e
--- /dev/null
+++ b/sysdeps/x86_64/tst-ssemod.c
@@ -0,0 +1,46 @@
+/* Test case for x86-64 preserved SSE registers in dynamic linker.  */
+
+#include <stdlib.h>
+#include <string.h>
+#include <immintrin.h>
+
+__m128i
+sse_test (__m128i x0, __m128i x1, __m128i x2, __m128i x3,
+	  __m128i x4, __m128i x5, __m128i x6, __m128i x7)
+{
+  __m128i xmm;
+
+  xmm = _mm_set1_epi32 (0);
+  if (memcmp (&xmm, &x0, sizeof (xmm)))
+    abort ();
+
+  xmm = _mm_set1_epi32 (1);
+  if (memcmp (&xmm, &x1, sizeof (xmm)))
+    abort ();
+
+  xmm = _mm_set1_epi32 (2);
+  if (memcmp (&xmm, &x2, sizeof (xmm)))
+    abort ();
+
+  xmm = _mm_set1_epi32 (3);
+  if (memcmp (&xmm, &x3, sizeof (xmm)))
+    abort ();
+
+  xmm = _mm_set1_epi32 (4);
+  if (memcmp (&xmm, &x4, sizeof (xmm)))
+    abort ();
+
+  xmm = _mm_set1_epi32 (5);
+  if (memcmp (&xmm, &x5, sizeof (xmm)))
+    abort ();
+
+  xmm = _mm_set1_epi32 (6);
+  if (memcmp (&xmm, &x6, sizeof (xmm)))
+    abort ();
+
+  xmm = _mm_set1_epi32 (7);
+  if (memcmp (&xmm, &x7, sizeof (xmm)))
+    abort ();
+
+  return _mm_set1_epi32 (0x12349876);
+}
diff --git a/sysdeps/x86_64/tst-stack-align.h b/sysdeps/x86_64/tst-stack-align.h
index 24e8e61c35..b2ef77f65d 100644
--- a/sysdeps/x86_64/tst-stack-align.h
+++ b/sysdeps/x86_64/tst-stack-align.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003-2016 Free Software Foundation, Inc.
+/* Copyright (C) 2003-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/tst-x86_64-1.c b/sysdeps/x86_64/tst-x86_64-1.c
new file mode 100644
index 0000000000..801c866bdd
--- /dev/null
+++ b/sysdeps/x86_64/tst-x86_64-1.c
@@ -0,0 +1,26 @@
+/* Test searching the "x86_64" directory for shared libraries.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+extern void foo (void);
+
+int
+main (void)
+{
+  foo ();
+  return 0;
+}
diff --git a/sysdeps/x86_64/tst-x86_64mod-1.c b/sysdeps/x86_64/tst-x86_64mod-1.c
new file mode 100644
index 0000000000..57e955d5d9
--- /dev/null
+++ b/sysdeps/x86_64/tst-x86_64mod-1.c
@@ -0,0 +1,22 @@
+/* Test searching the "x86_64" directory for shared libraries.
+   Copyright (C) 2017-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+void
+foo (void)
+{
+}
diff --git a/sysdeps/x86_64/wcschr.S b/sysdeps/x86_64/wcschr.S
index 8604289e46..29284662a1 100644
--- a/sysdeps/x86_64/wcschr.S
+++ b/sysdeps/x86_64/wcschr.S
@@ -1,5 +1,5 @@
 /* wcschr with SSSE3
-   Copyright (C) 2011-2016 Free Software Foundation, Inc.
+   Copyright (C) 2011-2018 Free Software Foundation, Inc.
    Contributed by Intel Corporation.
    This file is part of the GNU C Library.
 
diff --git a/sysdeps/x86_64/wcscmp.S b/sysdeps/x86_64/wcscmp.S
index 705a73b10e..0d506c8b5c 100644
--- a/sysdeps/x86_64/wcscmp.S
+++ b/sysdeps/x86_64/wcscmp.S
@@ -1,5 +1,5 @@
 /* Optimized wcscmp for x86-64 with SSE2.
-   Copyright (C) 2011-2016 Free Software Foundation, Inc.
+   Copyright (C) 2011-2018 Free Software Foundation, Inc.
    Contributed by Intel Corporation.
    This file is part of the GNU C Library.
 
@@ -946,5 +946,7 @@ L(equal):
 	ret
 
 END (__wcscmp)
+#ifndef __wcscmp
 libc_hidden_def (__wcscmp)
 weak_alias (__wcscmp, wcscmp)
+#endif
diff --git a/sysdeps/x86_64/wcslen.S b/sysdeps/x86_64/wcslen.S
index 7a9175eefe..9f5f723227 100644
--- a/sysdeps/x86_64/wcslen.S
+++ b/sysdeps/x86_64/wcslen.S
@@ -1,5 +1,5 @@
 /* Optimized wcslen for x86-64 with SSE2.
-   Copyright (C) 2011-2016 Free Software Foundation, Inc.
+   Copyright (C) 2011-2018 Free Software Foundation, Inc.
    Contributed by Intel Corporation.
    This file is part of the GNU C Library.
 
diff --git a/sysdeps/x86_64/wcsrchr.S b/sysdeps/x86_64/wcsrchr.S
index fb192f3ecf..2f38853727 100644
--- a/sysdeps/x86_64/wcsrchr.S
+++ b/sysdeps/x86_64/wcsrchr.S
@@ -1,5 +1,5 @@
 /* wcsrchr with SSSE3
-   Copyright (C) 2011-2016 Free Software Foundation, Inc.
+   Copyright (C) 2011-2018 Free Software Foundation, Inc.
    Contributed by Intel Corporation.
    This file is part of the GNU C Library.
 
diff --git a/sysdeps/x86_64/wmemset.S b/sysdeps/x86_64/wmemset.S
new file mode 100644
index 0000000000..f96d567fd8
--- /dev/null
+++ b/sysdeps/x86_64/wmemset.S
@@ -0,0 +1 @@
+/* Implemented in memset.S.  */
diff --git a/sysdeps/x86_64/wmemset_chk.S b/sysdeps/x86_64/wmemset_chk.S
new file mode 100644
index 0000000000..9275ebb40d
--- /dev/null
+++ b/sysdeps/x86_64/wmemset_chk.S
@@ -0,0 +1,33 @@
+/* Checking wmemset for x86-64.
+   Copyright (C) 2004-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+#ifndef SHARED
+	/* For libc.so this is defined in wmemset.S.
+	   For libc.a, this is a separate source to avoid
+	   wmemset bringing in __chk_fail and all routines
+	   it calls.  */
+        .text
+ENTRY (__wmemset_chk)
+	cmpq	%rdx, %rcx
+	jb	__chk_fail
+	jmp	wmemset
+END (__wmemset_chk)
+#endif
diff --git a/sysdeps/x86_64/wordcopy.c b/sysdeps/x86_64/wordcopy.c
new file mode 100644
index 0000000000..590b6cb16b
--- /dev/null
+++ b/sysdeps/x86_64/wordcopy.c
@@ -0,0 +1 @@
+/* X86-64 doesn't use memory copy functions.  */
diff --git a/sysdeps/x86_64/x32/dl-machine.h b/sysdeps/x86_64/x32/dl-machine.h
index 47132fcd96..2a612913ff 100644
--- a/sysdeps/x86_64/x32/dl-machine.h
+++ b/sysdeps/x86_64/x32/dl-machine.h
@@ -1,5 +1,5 @@
 /* Machine-dependent ELF dynamic relocation inline functions.  x32 version.
-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+   Copyright (C) 2012-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
diff --git a/sysdeps/x86_64/x32/fpu/s_lrint.S b/sysdeps/x86_64/x32/fpu/s_lrint.S
index aa68863553..381684583e 100644
--- a/sysdeps/x86_64/x32/fpu/s_lrint.S
+++ b/sysdeps/x86_64/x32/fpu/s_lrint.S
@@ -1,6 +1,6 @@
 /* Round argument to nearest integral value according to current rounding
    direction.
-   Copyright (C) 2015-2016 Free Software Foundation, Inc.
+   Copyright (C) 2015-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -18,10 +18,11 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
+#include <libm-alias-double.h>
 
 	.text
 ENTRY(__lrint)
 	cvtsd2si %xmm0,%eax
 	ret
 END(__lrint)
-weak_alias (__lrint, lrint)
+libm_alias_double (__lrint, lrint)
diff --git a/sysdeps/x86_64/x32/fpu/s_lrintf.S b/sysdeps/x86_64/x32/fpu/s_lrintf.S
index bb5b1665bd..361d34a989 100644
--- a/sysdeps/x86_64/x32/fpu/s_lrintf.S
+++ b/sysdeps/x86_64/x32/fpu/s_lrintf.S
@@ -1,6 +1,6 @@
 /* Round argument to nearest integral value according to current rounding
    direction.
-   Copyright (C) 2015-2016 Free Software Foundation, Inc.
+   Copyright (C) 2015-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -18,10 +18,11 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
+#include <libm-alias-float.h>
 
 	.text
 ENTRY(__lrintf)
 	cvtss2si %xmm0,%eax
 	ret
 END(__lrintf)
-weak_alias (__lrintf, lrintf)
+libm_alias_float (__lrint, lrint)
diff --git a/sysdeps/x86_64/x32/fpu/s_lrintl.S b/sysdeps/x86_64/x32/fpu/s_lrintl.S
index 6bc8f6fdb9..b68313e916 100644
--- a/sysdeps/x86_64/x32/fpu/s_lrintl.S
+++ b/sysdeps/x86_64/x32/fpu/s_lrintl.S
@@ -1,6 +1,6 @@
 /* Round argument to nearest integral value according to current rounding
    direction.
-   Copyright (C) 1997-2016 Free Software Foundation, Inc.
+   Copyright (C) 1997-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -18,6 +18,7 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <sysdep.h>
+#include <libm-alias-ldouble.h>
 
 	.text
 ENTRY(__lrintl)
@@ -27,4 +28,4 @@ ENTRY(__lrintl)
 	movl	-4(%rsp),%eax
 	ret
 END(__lrintl)
-weak_alias (__lrintl, lrintl)
+libm_alias_ldouble (__lrint, lrint)
diff --git a/sysdeps/x86_64/x32/gmp-mparam.h b/sysdeps/x86_64/x32/gmp-mparam.h
index df37442bfb..331c26d587 100644
--- a/sysdeps/x86_64/x32/gmp-mparam.h
+++ b/sysdeps/x86_64/x32/gmp-mparam.h
@@ -1,6 +1,6 @@
 /* gmp-mparam.h -- Compiler/machine parameter header file.
 
-Copyright (C) 2012-2016 Free Software Foundation, Inc.
+Copyright (C) 2012-2018 Free Software Foundation, Inc.
 
 This file is part of the GNU MP Library.
 
diff --git a/sysdeps/x86_64/x32/nptl/tls.h b/sysdeps/x86_64/x32/nptl/tls.h
deleted file mode 100644
index 245623494b..0000000000
--- a/sysdeps/x86_64/x32/nptl/tls.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/* Definition for thread-local data handling.  nptl/x32 version.
-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef _X32_TLS_H
-#define _X32_TLS_H	1
-
-#include_next <tls.h>
-
-#ifndef __ASSEMBLER__
-
-/* X32 doesn't support 32-bit indirect calls via memory.  Instead, we
-   load the 32-bit address from memory into the lower 32 bits of the
-   return-value register, which will automatically zero-extend the upper
-   32 bits of the return-value register.  We then do the indirect call
-   via the 64-bit return-value register.  */
-# undef CALL_THREAD_FCT
-# define CALL_THREAD_FCT(descr) \
-  ({ void *__res;							      \
-     asm volatile ("movl %%fs:%P2, %%edi\n\t"				      \
-		   "movl %%fs:%P1, %k0\n\t"				      \
-		   "callq *%q0"						      \
-		   : "=a" (__res)					      \
-		   : "i" (offsetof (struct pthread, start_routine)),	      \
-		     "i" (offsetof (struct pthread, arg))		      \
-		   : "di", "si", "cx", "dx", "r8", "r9", "r10", "r11",	      \
-		     "memory", "cc");					      \
-     __res; })
-
-#endif /* __ASSEMBLER__ */
-
-#endif	/* x32/tls.h */
diff --git a/sysdeps/x86_64/x32/sysdep.h b/sysdeps/x86_64/x32/sysdep.h
index 17a1446796..b40a438771 100644
--- a/sysdeps/x86_64/x32/sysdep.h
+++ b/sysdeps/x86_64/x32/sysdep.h
@@ -1,5 +1,5 @@
 /* Assembler macros for x32.
-   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+   Copyright (C) 2012-2018 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or