diff options
Diffstat (limited to 'sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S')
-rw-r--r-- | sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S | 66 |
1 files changed, 60 insertions, 6 deletions
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S index 65ad540122..b4dfa37898 100644 --- a/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S +++ b/sysdeps/x86_64/fpu/multiarch/svml_d_sincos2_core_sse4.S @@ -1,5 +1,5 @@ /* Function sincos vectorized with SSE4. - Copyright (C) 2014-2016 Free Software Foundation, Inc. + Copyright (C) 2014-2018 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -20,7 +20,7 @@ #include "svml_d_trig_data.h" .text -ENTRY (_ZGVbN2vvv_sincos_sse4) +ENTRY (_ZGVbN2vl8l8_sincos_sse4) /* ALGORITHM DESCRIPTION: @@ -287,12 +287,12 @@ ENTRY (_ZGVbN2vvv_sincos_sse4) shlq $4, %r15 movsd 136(%rsp,%r15), %xmm0 - call sin@PLT + call JUMPTARGET(sin) movsd %xmm0, 200(%rsp,%r15) movsd 136(%rsp,%r15), %xmm0 - call cos@PLT + call JUMPTARGET(cos) movsd %xmm0, 264(%rsp,%r15) jmp .LBL_1_8 @@ -302,13 +302,67 @@ ENTRY (_ZGVbN2vvv_sincos_sse4) shlq $4, %r15 movsd 128(%rsp,%r15), %xmm0 - call sin@PLT + call JUMPTARGET(sin) movsd %xmm0, 192(%rsp,%r15) movsd 128(%rsp,%r15), %xmm0 - call cos@PLT + call JUMPTARGET(cos) movsd %xmm0, 256(%rsp,%r15) jmp .LBL_1_7 +END (_ZGVbN2vl8l8_sincos_sse4) +libmvec_hidden_def(_ZGVbN2vl8l8_sincos_sse4) + +/* vvv version implemented with wrapper to vl8l8 variant. */ +ENTRY (_ZGVbN2vvv_sincos_sse4) +#ifndef __ILP32__ + subq $72, %rsp + .cfi_def_cfa_offset 80 + movdqu %xmm1, 32(%rsp) + lea (%rsp), %rdi + movdqu %xmm2, 48(%rdi) + lea 16(%rsp), %rsi + call HIDDEN_JUMPTARGET(_ZGVbN2vl8l8_sincos_sse4) + movq 32(%rsp), %rdx + movq 48(%rsp), %rsi + movq 40(%rsp), %r8 + movq 56(%rsp), %r10 + movq (%rsp), %rax + movq 16(%rsp), %rcx + movq 8(%rsp), %rdi + movq 24(%rsp), %r9 + movq %rax, (%rdx) + movq %rcx, (%rsi) + movq %rdi, (%r8) + movq %r9, (%r10) + addq $72, %rsp + .cfi_def_cfa_offset 8 + ret +#else + subl $72, %esp + .cfi_def_cfa_offset 80 + leal 48(%rsp), %esi + movaps %xmm1, 16(%esp) + leal 32(%rsp), %edi + movaps %xmm2, (%esp) + call HIDDEN_JUMPTARGET(_ZGVbN2vl8l8_sincos_sse4) + movdqa 16(%esp), %xmm1 + movsd 32(%esp), %xmm0 + movq %xmm1, %rax + movdqa (%esp), %xmm2 + movsd %xmm0, (%eax) + movsd 40(%esp), %xmm0 + pextrd $1, %xmm1, %eax + movsd %xmm0, (%eax) + movsd 48(%esp), %xmm0 + movq %xmm2, %rax + movsd %xmm0, (%eax) + movsd 56(%esp), %xmm0 + pextrd $1, %xmm2, %eax + movsd %xmm0, (%eax) + addl $72, %esp + .cfi_def_cfa_offset 8 + ret +#endif END (_ZGVbN2vvv_sincos_sse4) |