/* strcpy with SSSE3 Copyright (C) 2011-2016 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU C Library; if not, see . */ #if IS_IN (libc) # ifndef USE_AS_STRCAT # include # ifndef STRCPY # define STRCPY __strcpy_ssse3 # endif .section .text.ssse3,"ax",@progbits ENTRY (STRCPY) mov %rsi, %rcx # ifdef USE_AS_STRNCPY mov %rdx, %r8 # endif mov %rdi, %rdx # ifdef USE_AS_STRNCPY test %r8, %r8 jz L(Exit0) cmp $8, %r8 jbe L(StrncpyExit8Bytes) # endif cmpb $0, (%rcx) jz L(Exit1) cmpb $0, 1(%rcx) jz L(Exit2) cmpb $0, 2(%rcx) jz L(Exit3) cmpb $0, 3(%rcx) jz L(Exit4) cmpb $0, 4(%rcx) jz L(Exit5) cmpb $0, 5(%rcx) jz L(Exit6) cmpb $0, 6(%rcx) jz L(Exit7) cmpb $0, 7(%rcx) jz L(Exit8) # ifdef USE_AS_STRNCPY cmp $16, %r8 jb L(StrncpyExit15Bytes) # endif cmpb $0, 8(%rcx) jz L(Exit9) cmpb $0, 9(%rcx) jz L(Exit10) cmpb $0, 10(%rcx) jz L(Exit11) cmpb $0, 11(%rcx) jz L(Exit12) cmpb $0, 12(%rcx) jz L(Exit13) cmpb $0, 13(%rcx) jz L(Exit14) cmpb $0, 14(%rcx) jz L(Exit15) # ifdef USE_AS_STRNCPY cmp $16, %r8 je L(Exit16) # endif cmpb $0, 15(%rcx) jz L(Exit16) # endif # ifdef USE_AS_STRNCPY mov %rcx, %rsi sub $16, %r8 and $0xf, %rsi /* add 16 bytes rcx_offset to r8 */ add %rsi, %r8 # endif lea 16(%rcx), %rsi and $-16, %rsi pxor %xmm0, %xmm0 mov (%rcx), %r9 mov %r9, (%rdx) pcmpeqb (%rsi), %xmm0 mov 8(%rcx), %r9 mov %r9, 8(%rdx) /* convert byte mask in xmm0 to bit mask */ pmovmskb %xmm0, %rax sub %rcx, %rsi # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(CopyFrom1To16BytesCase2OrCase3) # endif test %rax, %rax jnz L(CopyFrom1To16Bytes) mov %rdx, %rax lea 16(%rdx), %rdx and $-16, %rdx sub %rdx, %rax # ifdef USE_AS_STRNCPY add %rax, %rsi lea -1(%rsi), %rsi and $1<<31, %esi test %rsi, %rsi jnz L(ContinueCopy) lea 16(%r8), %r8 L(ContinueCopy): # endif sub %rax, %rcx mov %rcx, %rax and $0xf, %rax mov $0, %rsi /* case: rcx_offset == rdx_offset */ jz L(Align16Both) cmp $8, %rax jae L(ShlHigh8) cmp $1, %rax je L(Shl1) cmp $2, %rax je L(Shl2) cmp $3, %rax je L(Shl3) cmp $4, %rax je L(Shl4) cmp $5, %rax je L(Shl5) cmp $6, %rax je L(Shl6) jmp L(Shl7) L(ShlHigh8): je L(Shl8) cmp $9, %rax je L(Shl9) cmp $10, %rax je L(Shl10) cmp $11, %rax je L(Shl11) cmp $12, %rax je L(Shl12) cmp $13, %rax je L(Shl13) cmp $14, %rax je L(Shl14) jmp L(Shl15) L(Align16Both): movaps (%rcx), %xmm1 movaps 16(%rcx), %xmm2 movaps %xmm1, (%rdx) pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %rax lea 16(%rsi), %rsi # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(CopyFrom1To16BytesCase2OrCase3) # endif test %rax, %rax jnz L(CopyFrom1To16Bytes) movaps 16(%rcx, %rsi), %xmm3 movaps %xmm2, (%rdx, %rsi) pcmpeqb %xmm3, %xmm0 pmovmskb %xmm0, %rax lea 16(%rsi), %rsi # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(CopyFrom1To16BytesCase2OrCase3) # endif test %rax, %rax jnz L(CopyFrom1To16Bytes) movaps 16(%rcx, %rsi), %xmm4 movaps %xmm3, (%rdx, %rsi) pcmpeqb %xmm4, %xmm0 pmovmskb %xmm0, %rax lea 16(%rsi), %rsi # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(CopyFrom1To16BytesCase2OrCase3) # endif test %rax, %rax jnz L(CopyFrom1To16Bytes) movaps 16(%rcx, %rsi), %xmm1 movaps %xmm4, (%rdx, %rsi) pcmpeqb %xmm1, %xmm0 pmovmskb %xmm0, %rax lea 16(%rsi), %rsi # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(CopyFrom1To16BytesCase2OrCase3) # endif test %rax, %rax jnz L(CopyFrom1To16Bytes) movaps 16(%rcx, %rsi), %xmm2 movaps %xmm1, (%rdx, %rsi) pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %rax lea 16(%rsi), %rsi # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(CopyFrom1To16BytesCase2OrCase3) # endif test %rax, %rax jnz L(CopyFrom1To16Bytes) movaps 16(%rcx, %rsi), %xmm3 movaps %xmm2, (%rdx, %rsi) pcmpeqb %xmm3, %xmm0 pmovmskb %xmm0, %rax lea 16(%rsi), %rsi # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(CopyFrom1To16BytesCase2OrCase3) # endif test %rax, %rax jnz L(CopyFrom1To16Bytes) movaps %xmm3, (%rdx, %rsi) mov %rcx, %rax lea 16(%rcx, %rsi), %rcx and $-0x40, %rcx sub %rcx, %rax sub %rax, %rdx # ifdef USE_AS_STRNCPY lea 112(%r8, %rax), %r8 # endif mov $-0x40, %rsi .p2align 4 L(Aligned64Loop): movaps (%rcx), %xmm2 movaps %xmm2, %xmm4 movaps 16(%rcx), %xmm5 movaps 32(%rcx), %xmm3 movaps %xmm3, %xmm6 movaps 48(%rcx), %xmm7 pminub %xmm5, %xmm2 pminub %xmm7, %xmm3 pminub %xmm2, %xmm3 pcmpeqb %xmm0, %xmm3 pmovmskb %xmm3, %rax lea 64(%rdx), %rdx lea 64(%rcx), %rcx # ifdef USE_AS_STRNCPY sub $64, %r8 jbe L(StrncpyLeaveCase2OrCase3) # endif test %rax, %rax jnz L(Aligned64Leave) movaps %xmm4, -64(%rdx) movaps %xmm5, -48(%rdx) movaps %xmm6, -32(%rdx) movaps %xmm7, -16(%rdx) jmp L(Aligned64Loop) L(Aligned64Leave): # ifdef USE_AS_STRNCPY lea 48(%r8), %r8 # endif pcmpeqb %xmm4, %xmm0 pmovmskb %xmm0, %rax test %rax, %rax jnz L(CopyFrom1To16Bytes) pcmpeqb %xmm5, %xmm0 # ifdef USE_AS_STRNCPY lea -16(%r8), %r8 # endif pmovmskb %xmm0, %rax movaps %xmm4, -64(%rdx) test %rax, %rax lea 16(%rsi), %rsi jnz L(CopyFrom1To16Bytes) pcmpeqb %xmm6, %xmm0 # ifdef USE_AS_STRNCPY lea -16(%r8), %r8 # endif pmovmskb %xmm0, %rax movaps %xmm5, -48(%rdx) test %rax, %rax lea 16(%rsi), %rsi jnz L(CopyFrom1To16Bytes) movaps %xmm6, -32(%rdx) pcmpeqb %xmm7, %xmm0 # ifdef USE_AS_STRNCPY lea -16(%r8), %r8 # endif pmovmskb %xmm0, %rax lea 16(%rsi), %rsi jmp L(CopyFrom1To16Bytes) .p2align 4 L(Shl1): movaps -1(%rcx), %xmm1 movaps 15(%rcx), %xmm2 L(Shl1Start): pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %rax movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit1Case2OrCase3) # endif test %rax, %rax jnz L(Shl1LoopExit) palignr $1, %xmm1, %xmm2 movaps %xmm2, (%rdx) movaps 31(%rcx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%rdx), %rdx pmovmskb %xmm0, %rax lea 16(%rcx), %rcx movaps %xmm2, %xmm1 # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit1Case2OrCase3) # endif test %rax, %rax jnz L(Shl1LoopExit) palignr $1, %xmm3, %xmm2 movaps %xmm2, (%rdx) movaps 31(%rcx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%rdx), %rdx pmovmskb %xmm0, %rax lea 16(%rcx), %rcx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit1Case2OrCase3) # endif test %rax, %rax jnz L(Shl1LoopExit) palignr $1, %xmm1, %xmm2 movaps %xmm2, (%rdx) movaps 31(%rcx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%rdx), %rdx pmovmskb %xmm0, %rax lea 16(%rcx), %rcx # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit1Case2OrCase3) # endif test %rax, %rax jnz L(Shl1LoopExit) palignr $1, %xmm3, %xmm2 movaps %xmm2, (%rdx) lea 31(%rcx), %rcx lea 16(%rdx), %rdx mov %rcx, %rax and $-0x40, %rcx sub %rcx, %rax lea -15(%rcx), %rcx sub %rax, %rdx # ifdef USE_AS_STRNCPY add %rax, %r8 # endif movaps -1(%rcx), %xmm1 /* 64 bytes loop */ .p2align 4 L(Shl1LoopStart): movaps 15(%rcx), %xmm2 movaps 31(%rcx), %xmm3 movaps %xmm3, %xmm6 movaps 47(%rcx), %xmm4 movaps %xmm4, %xmm7 movaps 63(%rcx), %xmm5 pminub %xmm2, %xmm6 pminub %xmm5, %xmm7 pminub %xmm6, %xmm7 pcmpeqb %xmm0, %xmm7 pmovmskb %xmm7, %rax movaps %xmm5, %xmm7 palignr $1, %xmm4, %xmm5 test %rax, %rax palignr $1, %xmm3, %xmm4 jnz L(Shl1Start) # ifdef USE_AS_STRNCPY sub $64, %r8 jbe L(StrncpyLeave1) # endif palignr $1, %xmm2, %xmm3 lea 64(%rcx), %rcx palignr $1, %xmm1, %xmm2 movaps %xmm7, %xmm1 movaps %xmm5, 48(%rdx) movaps %xmm4, 32(%rdx) movaps %xmm3, 16(%rdx) movaps %xmm2, (%rdx) lea 64(%rdx), %rdx jmp L(Shl1LoopStart) L(Shl1LoopExit): movdqu -1(%rcx), %xmm1 mov $15, %rsi movdqu %xmm1, -1(%rdx) jmp L(CopyFrom1To16Bytes) .p2align 4 L(Shl2): movaps -2(%rcx), %xmm1 movaps 14(%rcx), %xmm2 L(Shl2Start): pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %rax movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit2Case2OrCase3) # endif test %rax, %rax jnz L(Shl2LoopExit) palignr $2, %xmm1, %xmm2 movaps %xmm2, (%rdx) movaps 30(%rcx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%rdx), %rdx pmovmskb %xmm0, %rax lea 16(%rcx), %rcx movaps %xmm2, %xmm1 # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit2Case2OrCase3) # endif test %rax, %rax jnz L(Shl2LoopExit) palignr $2, %xmm3, %xmm2 movaps %xmm2, (%rdx) movaps 30(%rcx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%rdx), %rdx pmovmskb %xmm0, %rax lea 16(%rcx), %rcx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit2Case2OrCase3) # endif test %rax, %rax jnz L(Shl2LoopExit) palignr $2, %xmm1, %xmm2 movaps %xmm2, (%rdx) movaps 30(%rcx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%rdx), %rdx pmovmskb %xmm0, %rax lea 16(%rcx), %rcx # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit2Case2OrCase3) # endif test %rax, %rax jnz L(Shl2LoopExit) palignr $2, %xmm3, %xmm2 movaps %xmm2, (%rdx) lea 30(%rcx), %rcx lea 16(%rdx), %rdx mov %rcx, %rax and $-0x40, %rcx sub %rcx, %rax lea -14(%rcx), %rcx sub %rax, %rdx # ifdef USE_AS_STRNCPY add %rax, %r8 # endif movaps -2(%rcx), %xmm1 /* 64 bytes loop */ .p2align 4 L(Shl2LoopStart): movaps 14(%rcx), %xmm2 movaps 30(%rcx), %xmm3 movaps %xmm3, %xmm6 movaps 46(%rcx), %xmm4 movaps %xmm4, %xmm7 movaps 62(%rcx), %xmm5 pminub %xmm2, %xmm6 pminub %xmm5, %xmm7 pminub %xmm6, %xmm7 pcmpeqb %xmm0, %xmm7 pmovmskb %xmm7, %rax movaps %xmm5, %xmm7 palignr $2, %xmm4, %xmm5 test %rax, %rax palignr $2, %xmm3, %xmm4 jnz L(Shl2Start) # ifdef USE_AS_STRNCPY sub $64, %r8 jbe L(StrncpyLeave2) # endif palignr $2, %xmm2, %xmm3 lea 64(%rcx), %rcx palignr $2, %xmm1, %xmm2 movaps %xmm7, %xmm1 movaps %xmm5, 48(%rdx) movaps %xmm4, 32(%rdx) movaps %xmm3, 16(%rdx) movaps %xmm2, (%rdx) lea 64(%rdx), %rdx jmp L(Shl2LoopStart) L(Shl2LoopExit): movdqu -2(%rcx), %xmm1 mov $14, %rsi movdqu %xmm1, -2(%rdx) jmp L(CopyFrom1To16Bytes) .p2align 4 L(Shl3): movaps -3(%rcx), %xmm1 movaps 13(%rcx), %xmm2 L(Shl3Start): pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %rax movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit3Case2OrCase3) # endif test %rax, %rax jnz L(Shl3LoopExit) palignr $3, %xmm1, %xmm2 movaps %xmm2, (%rdx) movaps 29(%rcx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%rdx), %rdx pmovmskb %xmm0, %rax lea 16(%rcx), %rcx movaps %xmm2, %xmm1 # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit3Case2OrCase3) # endif test %rax, %rax jnz L(Shl3LoopExit) palignr $3, %xmm3, %xmm2 movaps %xmm2, (%rdx) movaps 29(%rcx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%rdx), %rdx pmovmskb %xmm0, %rax lea 16(%rcx), %rcx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit3Case2OrCase3) # endif test %rax, %rax jnz L(Shl3LoopExit) palignr $3, %xmm1, %xmm2 movaps %xmm2, (%rdx) movaps 29(%rcx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%rdx), %rdx pmovmskb %xmm0, %rax lea 16(%rcx), %rcx # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit3Case2OrCase3) # endif test %rax, %rax jnz L(Shl3LoopExit) palignr $3, %xmm3, %xmm2 movaps %xmm2, (%rdx) lea 29(%rcx), %rcx lea 16(%rdx), %rdx mov %rcx, %rax and $-0x40, %rcx sub %rcx, %rax lea -13(%rcx), %rcx sub %rax, %rdx # ifdef USE_AS_STRNCPY add %rax, %r8 # endif movaps -3(%rcx), %xmm1 /* 64 bytes loop */ .p2align 4 L(Shl3LoopStart): movaps 13(%rcx), %xmm2 movaps 29(%rcx), %xmm3 movaps %xmm3, %xmm6 movaps 45(%rcx), %xmm4 movaps %xmm4, %xmm7 movaps 61(%rcx), %xmm5 pminub %xmm2, %xmm6 pminub %xmm5, %xmm7 pminub %xmm6, %xmm7 pcmpeqb %xmm0, %xmm7 pmovmskb %xmm7, %rax movaps %xmm5, %xmm7 palignr $3, %xmm4, %xmm5 test %rax, %rax palignr $3, %xmm3, %xmm4 jnz L(Shl3Start) # ifdef USE_AS_STRNCPY sub $64, %r8 jbe L(StrncpyLeave3) # endif palignr $3, %xmm2, %xmm3 lea 64(%rcx), %rcx palignr $3, %xmm1, %xmm2 movaps %xmm7, %xmm1 movaps %xmm5, 48(%rdx) movaps %xmm4, 32(%rdx) movaps %xmm3, 16(%rdx) movaps %xmm2, (%rdx) lea 64(%rdx), %rdx jmp L(Shl3LoopStart) L(Shl3LoopExit): movdqu -3(%rcx), %xmm1 mov $13, %rsi movdqu %xmm1, -3(%rdx) jmp L(CopyFrom1To16Bytes) .p2align 4 L(Shl4): movaps -4(%rcx), %xmm1 movaps 12(%rcx), %xmm2 L(Shl4Start): pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %rax movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit4Case2OrCase3) # endif test %rax, %rax jnz L(Shl4LoopExit) palignr $4, %xmm1, %xmm2 movaps %xmm2, (%rdx) movaps 28(%rcx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%rdx), %rdx pmovmskb %xmm0, %rax lea 16(%rcx), %rcx movaps %xmm2, %xmm1 # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit4Case2OrCase3) # endif test %rax, %rax jnz L(Shl4LoopExit) palignr $4, %xmm3, %xmm2 movaps %xmm2, (%rdx) movaps 28(%rcx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%rdx), %rdx pmovmskb %xmm0, %rax lea 16(%rcx), %rcx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit4Case2OrCase3) # endif test %rax, %rax jnz L(Shl4LoopExit) palignr $4, %xmm1, %xmm2 movaps %xmm2, (%rdx) movaps 28(%rcx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%rdx), %rdx pmovmskb %xmm0, %rax lea 16(%rcx), %rcx # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit4Case2OrCase3) # endif test %rax, %rax jnz L(Shl4LoopExit) palignr $4, %xmm3, %xmm2 movaps %xmm2, (%rdx) lea 28(%rcx), %rcx lea 16(%rdx), %rdx mov %rcx, %rax and $-0x40, %rcx sub %rcx, %rax lea -12(%rcx), %rcx sub %rax, %rdx # ifdef USE_AS_STRNCPY add %rax, %r8 # endif movaps -4(%rcx), %xmm1 /* 64 bytes loop */ .p2align 4 L(Shl4LoopStart): movaps 12(%rcx), %xmm2 movaps 28(%rcx), %xmm3 movaps %xmm3, %xmm6 movaps 44(%rcx), %xmm4 movaps %xmm4, %xmm7 movaps 60(%rcx), %xmm5 pminub %xmm2, %xmm6 pminub %xmm5, %xmm7 pminub %xmm6, %xmm7 pcmpeqb %xmm0, %xmm7 pmovmskb %xmm7, %rax movaps %xmm5, %xmm7 palignr $4, %xmm4, %xmm5 test %rax, %rax palignr $4, %xmm3, %xmm4 jnz L(Shl4Start) # ifdef USE_AS_STRNCPY sub $64, %r8 jbe L(StrncpyLeave4) # endif palignr $4, %xmm2, %xmm3 lea 64(%rcx), %rcx palignr $4, %xmm1, %xmm2 movaps %xmm7, %xmm1 movaps %xmm5, 48(%rdx) movaps %xmm4, 32(%rdx) movaps %xmm3, 16(%rdx) movaps %xmm2, (%rdx) lea 64(%rdx), %rdx jmp L(Shl4LoopStart) L(Shl4LoopExit): movdqu -4(%rcx), %xmm1 mov $12, %rsi movdqu %xmm1, -4(%rdx) jmp L(CopyFrom1To16Bytes) .p2align 4 L(Shl5): movaps -5(%rcx), %xmm1 movaps 11(%rcx), %xmm2 L(Shl5Start): pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %rax movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit5Case2OrCase3) # endif test %rax, %rax jnz L(Shl5LoopExit) palignr $5, %xmm1, %xmm2 movaps %xmm2, (%rdx) movaps 27(%rcx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%rdx), %rdx pmovmskb %xmm0, %rax lea 16(%rcx), %rcx movaps %xmm2, %xmm1 # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit5Case2OrCase3) # endif test %rax, %rax jnz L(Shl5LoopExit) palignr $5, %xmm3, %xmm2 movaps %xmm2, (%rdx) movaps 27(%rcx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%rdx), %rdx pmovmskb %xmm0, %rax lea 16(%rcx), %rcx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit5Case2OrCase3) # endif test %rax, %rax jnz L(Shl5LoopExit) palignr $5, %xmm1, %xmm2 movaps %xmm2, (%rdx) movaps 27(%rcx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%rdx), %rdx pmovmskb %xmm0, %rax lea 16(%rcx), %rcx # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit5Case2OrCase3) # endif test %rax, %rax jnz L(Shl5LoopExit) palignr $5, %xmm3, %xmm2 movaps %xmm2, (%rdx) lea 27(%rcx), %rcx lea 16(%rdx), %rdx mov %rcx, %rax and $-0x40, %rcx sub %rcx, %rax lea -11(%rcx), %rcx sub %rax, %rdx # ifdef USE_AS_STRNCPY add %rax, %r8 # endif movaps -5(%rcx), %xmm1 /* 64 bytes loop */ .p2align 4 L(Shl5LoopStart): movaps 11(%rcx), %xmm2 movaps 27(%rcx), %xmm3 movaps %xmm3, %xmm6 movaps 43(%rcx), %xmm4 movaps %xmm4, %xmm7 movaps 59(%rcx), %xmm5 pminub %xmm2, %xmm6 pminub %xmm5, %xmm7 pminub %xmm6, %xmm7 pcmpeqb %xmm0, %xmm7 pmovmskb %xmm7, %rax movaps %xmm5, %xmm7 palignr $5, %xmm4, %xmm5 test %rax, %rax palignr $5, %xmm3, %xmm4 jnz L(Shl5Start) # ifdef USE_AS_STRNCPY sub $64, %r8 jbe L(StrncpyLeave5) # endif palignr $5, %xmm2, %xmm3 lea 64(%rcx), %rcx palignr $5, %xmm1, %xmm2 movaps %xmm7, %xmm1 movaps %xmm5, 48(%rdx) movaps %xmm4, 32(%rdx) movaps %xmm3, 16(%rdx) movaps %xmm2, (%rdx) lea 64(%rdx), %rdx jmp L(Shl5LoopStart) L(Shl5LoopExit): movdqu -5(%rcx), %xmm1 mov $11, %rsi movdqu %xmm1, -5(%rdx) jmp L(CopyFrom1To16Bytes) .p2align 4 L(Shl6): movaps -6(%rcx), %xmm1 movaps 10(%rcx), %xmm2 L(Shl6Start): pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %rax movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit6Case2OrCase3) # endif test %rax, %rax jnz L(Shl6LoopExit) palignr $6, %xmm1, %xmm2 movaps %xmm2, (%rdx) movaps 26(%rcx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%rdx), %rdx pmovmskb %xmm0, %rax lea 16(%rcx), %rcx movaps %xmm2, %xmm1 # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit6Case2OrCase3) # endif test %rax, %rax jnz L(Shl6LoopExit) palignr $6, %xmm3, %xmm2 movaps %xmm2, (%rdx) movaps 26(%rcx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%rdx), %rdx pmovmskb %xmm0, %rax lea 16(%rcx), %rcx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit6Case2OrCase3) # endif test %rax, %rax jnz L(Shl6LoopExit) palignr $6, %xmm1, %xmm2 movaps %xmm2, (%rdx) movaps 26(%rcx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%rdx), %rdx pmovmskb %xmm0, %rax lea 16(%rcx), %rcx # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit6Case2OrCase3) # endif test %rax, %rax jnz L(Shl6LoopExit) palignr $6, %xmm3, %xmm2 movaps %xmm2, (%rdx) lea 26(%rcx), %rcx lea 16(%rdx), %rdx mov %rcx, %rax and $-0x40, %rcx sub %rcx, %rax lea -10(%rcx), %rcx sub %rax, %rdx # ifdef USE_AS_STRNCPY add %rax, %r8 # endif movaps -6(%rcx), %xmm1 /* 64 bytes loop */ .p2align 4 L(Shl6LoopStart): movaps 10(%rcx), %xmm2 movaps 26(%rcx), %xmm3 movaps %xmm3, %xmm6 movaps 42(%rcx), %xmm4 movaps %xmm4, %xmm7 movaps 58(%rcx), %xmm5 pminub %xmm2, %xmm6 pminub %xmm5, %xmm7 pminub %xmm6, %xmm7 pcmpeqb %xmm0, %xmm7 pmovmskb %xmm7, %rax movaps %xmm5, %xmm7 palignr $6, %xmm4, %xmm5 test %rax, %rax palignr $6, %xmm3, %xmm4 jnz L(Shl6Start) # ifdef USE_AS_STRNCPY sub $64, %r8 jbe L(StrncpyLeave6) # endif palignr $6, %xmm2, %xmm3 lea 64(%rcx), %rcx palignr $6, %xmm1, %xmm2 movaps %xmm7, %xmm1 movaps %xmm5, 48(%rdx) movaps %xmm4, 32(%rdx) movaps %xmm3, 16(%rdx) movaps %xmm2, (%rdx) lea 64(%rdx), %rdx jmp L(Shl6LoopStart) L(Shl6LoopExit): mov (%rcx), %r9 mov 6(%rcx), %esi mov %r9, (%rdx) mov %esi, 6(%rdx) mov $10, %rsi jmp L(CopyFrom1To16Bytes) .p2align 4 L(Shl7): movaps -7(%rcx), %xmm1 movaps 9(%rcx), %xmm2 L(Shl7Start): pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %rax movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit7Case2OrCase3) # endif test %rax, %rax jnz L(Shl7LoopExit) palignr $7, %xmm1, %xmm2 movaps %xmm2, (%rdx) movaps 25(%rcx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%rdx), %rdx pmovmskb %xmm0, %rax lea 16(%rcx), %rcx movaps %xmm2, %xmm1 # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit7Case2OrCase3) # endif test %rax, %rax jnz L(Shl7LoopExit) palignr $7, %xmm3, %xmm2 movaps %xmm2, (%rdx) movaps 25(%rcx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%rdx), %rdx pmovmskb %xmm0, %rax lea 16(%rcx), %rcx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit7Case2OrCase3) # endif test %rax, %rax jnz L(Shl7LoopExit) palignr $7, %xmm1, %xmm2 movaps %xmm2, (%rdx) movaps 25(%rcx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%rdx), %rdx pmovmskb %xmm0, %rax lea 16(%rcx), %rcx # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit7Case2OrCase3) # endif test %rax, %rax jnz L(Shl7LoopExit) palignr $7, %xmm3, %xmm2 movaps %xmm2, (%rdx) lea 25(%rcx), %rcx lea 16(%rdx), %rdx mov %rcx, %rax and $-0x40, %rcx sub %rcx, %rax lea -9(%rcx), %rcx sub %rax, %rdx # ifdef USE_AS_STRNCPY add %rax, %r8 # endif movaps -7(%rcx), %xmm1 /* 64 bytes loop */ .p2align 4 L(Shl7LoopStart): movaps 9(%rcx), %xmm2 movaps 25(%rcx), %xmm3 movaps %xmm3, %xmm6 movaps 41(%rcx), %xmm4 movaps %xmm4, %xmm7 movaps 57(%rcx), %xmm5 pminub %xmm2, %xmm6 pminub %xmm5, %xmm7 pminub %xmm6, %xmm7 pcmpeqb %xmm0, %xmm7 pmovmskb %xmm7, %rax movaps %xmm5, %xmm7 palignr $7, %xmm4, %xmm5 test %rax, %rax palignr $7, %xmm3, %xmm4 jnz L(Shl7Start) # ifdef USE_AS_STRNCPY sub $64, %r8 jbe L(StrncpyLeave7) # endif palignr $7, %xmm2, %xmm3 lea 64(%rcx), %rcx palignr $7, %xmm1, %xmm2 movaps %xmm7, %xmm1 movaps %xmm5, 48(%rdx) movaps %xmm4, 32(%rdx) movaps %xmm3, 16(%rdx) movaps %xmm2, (%rdx) lea 64(%rdx), %rdx jmp L(Shl7LoopStart) L(Shl7LoopExit): mov (%rcx), %r9 mov 5(%rcx), %esi mov %r9, (%rdx) mov %esi, 5(%rdx) mov $9, %rsi jmp L(CopyFrom1To16Bytes) .p2align 4 L(Shl8): movaps -8(%rcx), %xmm1 movaps 8(%rcx), %xmm2 L(Shl8Start): pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %rax movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit8Case2OrCase3) # endif test %rax, %rax jnz L(Shl8LoopExit) palignr $8, %xmm1, %xmm2 movaps %xmm2, (%rdx) movaps 24(%rcx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%rdx), %rdx pmovmskb %xmm0, %rax lea 16(%rcx), %rcx movaps %xmm2, %xmm1 # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit8Case2OrCase3) # endif test %rax, %rax jnz L(Shl8LoopExit) palignr $8, %xmm3, %xmm2 movaps %xmm2, (%rdx) movaps 24(%rcx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%rdx), %rdx pmovmskb %xmm0, %rax lea 16(%rcx), %rcx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit8Case2OrCase3) # endif test %rax, %rax jnz L(Shl8LoopExit) palignr $8, %xmm1, %xmm2 movaps %xmm2, (%rdx) movaps 24(%rcx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%rdx), %rdx pmovmskb %xmm0, %rax lea 16(%rcx), %rcx # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit8Case2OrCase3) # endif test %rax, %rax jnz L(Shl8LoopExit) palignr $8, %xmm3, %xmm2 movaps %xmm2, (%rdx) lea 24(%rcx), %rcx lea 16(%rdx), %rdx mov %rcx, %rax and $-0x40, %rcx sub %rcx, %rax lea -8(%rcx), %rcx sub %rax, %rdx # ifdef USE_AS_STRNCPY add %rax, %r8 # endif movaps -8(%rcx), %xmm1 /* 64 bytes loop */ .p2align 4 L(Shl8LoopStart): movaps 8(%rcx), %xmm2 movaps 24(%rcx), %xmm3 movaps %xmm3, %xmm6 movaps 40(%rcx), %xmm4 movaps %xmm4, %xmm7 movaps 56(%rcx), %xmm5 pminub %xmm2, %xmm6 pminub %xmm5, %xmm7 pminub %xmm6, %xmm7 pcmpeqb %xmm0, %xmm7 pmovmskb %xmm7, %rax movaps %xmm5, %xmm7 palignr $8, %xmm4, %xmm5 test %rax, %rax palignr $8, %xmm3, %xmm4 jnz L(Shl8Start) # ifdef USE_AS_STRNCPY sub $64, %r8 jbe L(StrncpyLeave8) # endif palignr $8, %xmm2, %xmm3 lea 64(%rcx), %rcx palignr $8, %xmm1, %xmm2 movaps %xmm7, %xmm1 movaps %xmm5, 48(%rdx) movaps %xmm4, 32(%rdx) movaps %xmm3, 16(%rdx) movaps %xmm2, (%rdx) lea 64(%rdx), %rdx jmp L(Shl8LoopStart) L(Shl8LoopExit): mov (%rcx), %r9 mov $8, %rsi mov %r9, (%rdx) jmp L(CopyFrom1To16Bytes) .p2align 4 L(Shl9): movaps -9(%rcx), %xmm1 movaps 7(%rcx), %xmm2 L(Shl9Start): pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %rax movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit9Case2OrCase3) # endif test %rax, %rax jnz L(Shl9LoopExit) palignr $9, %xmm1, %xmm2 movaps %xmm2, (%rdx) movaps 23(%rcx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%rdx), %rdx pmovmskb %xmm0, %rax lea 16(%rcx), %rcx movaps %xmm2, %xmm1 # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit9Case2OrCase3) # endif test %rax, %rax jnz L(Shl9LoopExit) palignr $9, %xmm3, %xmm2 movaps %xmm2, (%rdx) movaps 23(%rcx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%rdx), %rdx pmovmskb %xmm0, %rax lea 16(%rcx), %rcx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit9Case2OrCase3) # endif test %rax, %rax jnz L(Shl9LoopExit) palignr $9, %xmm1, %xmm2 movaps %xmm2, (%rdx) movaps 23(%rcx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%rdx), %rdx pmovmskb %xmm0, %rax lea 16(%rcx), %rcx # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit9Case2OrCase3) # endif test %rax, %rax jnz L(Shl9LoopExit) palignr $9, %xmm3, %xmm2 movaps %xmm2, (%rdx) lea 23(%rcx), %rcx lea 16(%rdx), %rdx mov %rcx, %rax and $-0x40, %rcx sub %rcx, %rax lea -7(%rcx), %rcx sub %rax, %rdx # ifdef USE_AS_STRNCPY add %rax, %r8 # endif movaps -9(%rcx), %xmm1 /* 64 bytes loop */ .p2align 4 L(Shl9LoopStart): movaps 7(%rcx), %xmm2 movaps 23(%rcx), %xmm3 movaps %xmm3, %xmm6 movaps 39(%rcx), %xmm4 movaps %xmm4, %xmm7 movaps 55(%rcx), %xmm5 pminub %xmm2, %xmm6 pminub %xmm5, %xmm7 pminub %xmm6, %xmm7 pcmpeqb %xmm0, %xmm7 pmovmskb %xmm7, %rax movaps %xmm5, %xmm7 palignr $9, %xmm4, %xmm5 test %rax, %rax palignr $9, %xmm3, %xmm4 jnz L(Shl9Start) # ifdef USE_AS_STRNCPY sub $64, %r8 jbe L(StrncpyLeave9) # endif palignr $9, %xmm2, %xmm3 lea 64(%rcx), %rcx palignr $9, %xmm1, %xmm2 movaps %xmm7, %xmm1 movaps %xmm5, 48(%rdx) movaps %xmm4, 32(%rdx) movaps %xmm3, 16(%rdx) movaps %xmm2, (%rdx) lea 64(%rdx), %rdx jmp L(Shl9LoopStart) L(Shl9LoopExit): mov -1(%rcx), %r9 mov $7, %rsi mov %r9, -1(%rdx) jmp L(CopyFrom1To16Bytes) .p2align 4 L(Shl10): movaps -10(%rcx), %xmm1 movaps 6(%rcx), %xmm2 L(Shl10Start): pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %rax movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit10Case2OrCase3) # endif test %rax, %rax jnz L(Shl10LoopExit) palignr $10, %xmm1, %xmm2 movaps %xmm2, (%rdx) movaps 22(%rcx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%rdx), %rdx pmovmskb %xmm0, %rax lea 16(%rcx), %rcx movaps %xmm2, %xmm1 # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit10Case2OrCase3) # endif test %rax, %rax jnz L(Shl10LoopExit) palignr $10, %xmm3, %xmm2 movaps %xmm2, (%rdx) movaps 22(%rcx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%rdx), %rdx pmovmskb %xmm0, %rax lea 16(%rcx), %rcx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit10Case2OrCase3) # endif test %rax, %rax jnz L(Shl10LoopExit) palignr $10, %xmm1, %xmm2 movaps %xmm2, (%rdx) movaps 22(%rcx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%rdx), %rdx pmovmskb %xmm0, %rax lea 16(%rcx), %rcx # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit10Case2OrCase3) # endif test %rax, %rax jnz L(Shl10LoopExit) palignr $10, %xmm3, %xmm2 movaps %xmm2, (%rdx) lea 22(%rcx), %rcx lea 16(%rdx), %rdx mov %rcx, %rax and $-0x40, %rcx sub %rcx, %rax lea -6(%rcx), %rcx sub %rax, %rdx # ifdef USE_AS_STRNCPY add %rax, %r8 # endif movaps -10(%rcx), %xmm1 /* 64 bytes loop */ .p2align 4 L(Shl10LoopStart): movaps 6(%rcx), %xmm2 movaps 22(%rcx), %xmm3 movaps %xmm3, %xmm6 movaps 38(%rcx), %xmm4 movaps %xmm4, %xmm7 movaps 54(%rcx), %xmm5 pminub %xmm2, %xmm6 pminub %xmm5, %xmm7 pminub %xmm6, %xmm7 pcmpeqb %xmm0, %xmm7 pmovmskb %xmm7, %rax movaps %xmm5, %xmm7 palignr $10, %xmm4, %xmm5 test %rax, %rax palignr $10, %xmm3, %xmm4 jnz L(Shl10Start) # ifdef USE_AS_STRNCPY sub $64, %r8 jbe L(StrncpyLeave10) # endif palignr $10, %xmm2, %xmm3 lea 64(%rcx), %rcx palignr $10, %xmm1, %xmm2 movaps %xmm7, %xmm1 movaps %xmm5, 48(%rdx) movaps %xmm4, 32(%rdx) movaps %xmm3, 16(%rdx) movaps %xmm2, (%rdx) lea 64(%rdx), %rdx jmp L(Shl10LoopStart) L(Shl10LoopExit): mov -2(%rcx), %r9 mov $6, %rsi mov %r9, -2(%rdx) jmp L(CopyFrom1To16Bytes) .p2align 4 L(Shl11): movaps -11(%rcx), %xmm1 movaps 5(%rcx), %xmm2 L(Shl11Start): pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %rax movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit11Case2OrCase3) # endif test %rax, %rax jnz L(Shl11LoopExit) palignr $11, %xmm1, %xmm2 movaps %xmm2, (%rdx) movaps 21(%rcx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%rdx), %rdx pmovmskb %xmm0, %rax lea 16(%rcx), %rcx movaps %xmm2, %xmm1 # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit11Case2OrCase3) # endif test %rax, %rax jnz L(Shl11LoopExit) palignr $11, %xmm3, %xmm2 movaps %xmm2, (%rdx) movaps 21(%rcx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%rdx), %rdx pmovmskb %xmm0, %rax lea 16(%rcx), %rcx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit11Case2OrCase3) # endif test %rax, %rax jnz L(Shl11LoopExit) palignr $11, %xmm1, %xmm2 movaps %xmm2, (%rdx) movaps 21(%rcx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%rdx), %rdx pmovmskb %xmm0, %rax lea 16(%rcx), %rcx # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit11Case2OrCase3) # endif test %rax, %rax jnz L(Shl11LoopExit) palignr $11, %xmm3, %xmm2 movaps %xmm2, (%rdx) lea 21(%rcx), %rcx lea 16(%rdx), %rdx mov %rcx, %rax and $-0x40, %rcx sub %rcx, %rax lea -5(%rcx), %rcx sub %rax, %rdx # ifdef USE_AS_STRNCPY add %rax, %r8 # endif movaps -11(%rcx), %xmm1 /* 64 bytes loop */ .p2align 4 L(Shl11LoopStart): movaps 5(%rcx), %xmm2 movaps 21(%rcx), %xmm3 movaps %xmm3, %xmm6 movaps 37(%rcx), %xmm4 movaps %xmm4, %xmm7 movaps 53(%rcx), %xmm5 pminub %xmm2, %xmm6 pminub %xmm5, %xmm7 pminub %xmm6, %xmm7 pcmpeqb %xmm0, %xmm7 pmovmskb %xmm7, %rax movaps %xmm5, %xmm7 palignr $11, %xmm4, %xmm5 test %rax, %rax palignr $11, %xmm3, %xmm4 jnz L(Shl11Start) # ifdef USE_AS_STRNCPY sub $64, %r8 jbe L(StrncpyLeave11) # endif palignr $11, %xmm2, %xmm3 lea 64(%rcx), %rcx palignr $11, %xmm1, %xmm2 movaps %xmm7, %xmm1 movaps %xmm5, 48(%rdx) movaps %xmm4, 32(%rdx) movaps %xmm3, 16(%rdx) movaps %xmm2, (%rdx) lea 64(%rdx), %rdx jmp L(Shl11LoopStart) L(Shl11LoopExit): mov -3(%rcx), %r9 mov $5, %rsi mov %r9, -3(%rdx) jmp L(CopyFrom1To16Bytes) .p2align 4 L(Shl12): movaps -12(%rcx), %xmm1 movaps 4(%rcx), %xmm2 L(Shl12Start): pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %rax movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit12Case2OrCase3) # endif test %rax, %rax jnz L(Shl12LoopExit) palignr $12, %xmm1, %xmm2 movaps %xmm2, (%rdx) movaps 20(%rcx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%rdx), %rdx pmovmskb %xmm0, %rax lea 16(%rcx), %rcx movaps %xmm2, %xmm1 # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit12Case2OrCase3) # endif test %rax, %rax jnz L(Shl12LoopExit) palignr $12, %xmm3, %xmm2 movaps %xmm2, (%rdx) movaps 20(%rcx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%rdx), %rdx pmovmskb %xmm0, %rax lea 16(%rcx), %rcx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit12Case2OrCase3) # endif test %rax, %rax jnz L(Shl12LoopExit) palignr $12, %xmm1, %xmm2 movaps %xmm2, (%rdx) movaps 20(%rcx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%rdx), %rdx pmovmskb %xmm0, %rax lea 16(%rcx), %rcx # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit12Case2OrCase3) # endif test %rax, %rax jnz L(Shl12LoopExit) palignr $12, %xmm3, %xmm2 movaps %xmm2, (%rdx) lea 20(%rcx), %rcx lea 16(%rdx), %rdx mov %rcx, %rax and $-0x40, %rcx sub %rcx, %rax lea -4(%rcx), %rcx sub %rax, %rdx # ifdef USE_AS_STRNCPY add %rax, %r8 # endif movaps -12(%rcx), %xmm1 /* 64 bytes loop */ .p2align 4 L(Shl12LoopStart): movaps 4(%rcx), %xmm2 movaps 20(%rcx), %xmm3 movaps %xmm3, %xmm6 movaps 36(%rcx), %xmm4 movaps %xmm4, %xmm7 movaps 52(%rcx), %xmm5 pminub %xmm2, %xmm6 pminub %xmm5, %xmm7 pminub %xmm6, %xmm7 pcmpeqb %xmm0, %xmm7 pmovmskb %xmm7, %rax movaps %xmm5, %xmm7 palignr $12, %xmm4, %xmm5 test %rax, %rax palignr $12, %xmm3, %xmm4 jnz L(Shl12Start) # ifdef USE_AS_STRNCPY sub $64, %r8 jbe L(StrncpyLeave12) # endif palignr $12, %xmm2, %xmm3 lea 64(%rcx), %rcx palignr $12, %xmm1, %xmm2 movaps %xmm7, %xmm1 movaps %xmm5, 48(%rdx) movaps %xmm4, 32(%rdx) movaps %xmm3, 16(%rdx) movaps %xmm2, (%rdx) lea 64(%rdx), %rdx jmp L(Shl12LoopStart) L(Shl12LoopExit): mov (%rcx), %r9d mov $4, %rsi mov %r9d, (%rdx) jmp L(CopyFrom1To16Bytes) .p2align 4 L(Shl13): movaps -13(%rcx), %xmm1 movaps 3(%rcx), %xmm2 L(Shl13Start): pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %rax movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit13Case2OrCase3) # endif test %rax, %rax jnz L(Shl13LoopExit) palignr $13, %xmm1, %xmm2 movaps %xmm2, (%rdx) movaps 19(%rcx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%rdx), %rdx pmovmskb %xmm0, %rax lea 16(%rcx), %rcx movaps %xmm2, %xmm1 # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit13Case2OrCase3) # endif test %rax, %rax jnz L(Shl13LoopExit) palignr $13, %xmm3, %xmm2 movaps %xmm2, (%rdx) movaps 19(%rcx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%rdx), %rdx pmovmskb %xmm0, %rax lea 16(%rcx), %rcx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit13Case2OrCase3) # endif test %rax, %rax jnz L(Shl13LoopExit) palignr $13, %xmm1, %xmm2 movaps %xmm2, (%rdx) movaps 19(%rcx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%rdx), %rdx pmovmskb %xmm0, %rax lea 16(%rcx), %rcx # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit13Case2OrCase3) # endif test %rax, %rax jnz L(Shl13LoopExit) palignr $13, %xmm3, %xmm2 movaps %xmm2, (%rdx) lea 19(%rcx), %rcx lea 16(%rdx), %rdx mov %rcx, %rax and $-0x40, %rcx sub %rcx, %rax lea -3(%rcx), %rcx sub %rax, %rdx # ifdef USE_AS_STRNCPY add %rax, %r8 # endif movaps -13(%rcx), %xmm1 /* 64 bytes loop */ .p2align 4 L(Shl13LoopStart): movaps 3(%rcx), %xmm2 movaps 19(%rcx), %xmm3 movaps %xmm3, %xmm6 movaps 35(%rcx), %xmm4 movaps %xmm4, %xmm7 movaps 51(%rcx), %xmm5 pminub %xmm2, %xmm6 pminub %xmm5, %xmm7 pminub %xmm6, %xmm7 pcmpeqb %xmm0, %xmm7 pmovmskb %xmm7, %rax movaps %xmm5, %xmm7 palignr $13, %xmm4, %xmm5 test %rax, %rax palignr $13, %xmm3, %xmm4 jnz L(Shl13Start) # ifdef USE_AS_STRNCPY sub $64, %r8 jbe L(StrncpyLeave13) # endif palignr $13, %xmm2, %xmm3 lea 64(%rcx), %rcx palignr $13, %xmm1, %xmm2 movaps %xmm7, %xmm1 movaps %xmm5, 48(%rdx) movaps %xmm4, 32(%rdx) movaps %xmm3, 16(%rdx) movaps %xmm2, (%rdx) lea 64(%rdx), %rdx jmp L(Shl13LoopStart) L(Shl13LoopExit): mov -1(%rcx), %r9d mov $3, %rsi mov %r9d, -1(%rdx) jmp L(CopyFrom1To16Bytes) .p2align 4 L(Shl14): movaps -14(%rcx), %xmm1 movaps 2(%rcx), %xmm2 L(Shl14Start): pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %rax movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit14Case2OrCase3) # endif test %rax, %rax jnz L(Shl14LoopExit) palignr $14, %xmm1, %xmm2 movaps %xmm2, (%rdx) movaps 18(%rcx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%rdx), %rdx pmovmskb %xmm0, %rax lea 16(%rcx), %rcx movaps %xmm2, %xmm1 # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit14Case2OrCase3) # endif test %rax, %rax jnz L(Shl14LoopExit) palignr $14, %xmm3, %xmm2 movaps %xmm2, (%rdx) movaps 18(%rcx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%rdx), %rdx pmovmskb %xmm0, %rax lea 16(%rcx), %rcx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit14Case2OrCase3) # endif test %rax, %rax jnz L(Shl14LoopExit) palignr $14, %xmm1, %xmm2 movaps %xmm2, (%rdx) movaps 18(%rcx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%rdx), %rdx pmovmskb %xmm0, %rax lea 16(%rcx), %rcx # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit14Case2OrCase3) # endif test %rax, %rax jnz L(Shl14LoopExit) palignr $14, %xmm3, %xmm2 movaps %xmm2, (%rdx) lea 18(%rcx), %rcx lea 16(%rdx), %rdx mov %rcx, %rax and $-0x40, %rcx sub %rcx, %rax lea -2(%rcx), %rcx sub %rax, %rdx # ifdef USE_AS_STRNCPY add %rax, %r8 # endif movaps -14(%rcx), %xmm1 /* 64 bytes loop */ .p2align 4 L(Shl14LoopStart): movaps 2(%rcx), %xmm2 movaps 18(%rcx), %xmm3 movaps %xmm3, %xmm6 movaps 34(%rcx), %xmm4 movaps %xmm4, %xmm7 movaps 50(%rcx), %xmm5 pminub %xmm2, %xmm6 pminub %xmm5, %xmm7 pminub %xmm6, %xmm7 pcmpeqb %xmm0, %xmm7 pmovmskb %xmm7, %rax movaps %xmm5, %xmm7 palignr $14, %xmm4, %xmm5 test %rax, %rax palignr $14, %xmm3, %xmm4 jnz L(Shl14Start) # ifdef USE_AS_STRNCPY sub $64, %r8 jbe L(StrncpyLeave14) # endif palignr $14, %xmm2, %xmm3 lea 64(%rcx), %rcx palignr $14, %xmm1, %xmm2 movaps %xmm7, %xmm1 movaps %xmm5, 48(%rdx) movaps %xmm4, 32(%rdx) movaps %xmm3, 16(%rdx) movaps %xmm2, (%rdx) lea 64(%rdx), %rdx jmp L(Shl14LoopStart) L(Shl14LoopExit): mov -2(%rcx), %r9d mov $2, %rsi mov %r9d, -2(%rdx) jmp L(CopyFrom1To16Bytes) .p2align 4 L(Shl15): movaps -15(%rcx), %xmm1 movaps 1(%rcx), %xmm2 L(Shl15Start): pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %rax movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit15Case2OrCase3) # endif test %rax, %rax jnz L(Shl15LoopExit) palignr $15, %xmm1, %xmm2 movaps %xmm2, (%rdx) movaps 17(%rcx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%rdx), %rdx pmovmskb %xmm0, %rax lea 16(%rcx), %rcx movaps %xmm2, %xmm1 # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit15Case2OrCase3) # endif test %rax, %rax jnz L(Shl15LoopExit) palignr $15, %xmm3, %xmm2 movaps %xmm2, (%rdx) movaps 17(%rcx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%rdx), %rdx pmovmskb %xmm0, %rax lea 16(%rcx), %rcx movaps %xmm2, %xmm3 # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit15Case2OrCase3) # endif test %rax, %rax jnz L(Shl15LoopExit) palignr $15, %xmm1, %xmm2 movaps %xmm2, (%rdx) movaps 17(%rcx), %xmm2 pcmpeqb %xmm2, %xmm0 lea 16(%rdx), %rdx pmovmskb %xmm0, %rax lea 16(%rcx), %rcx # ifdef USE_AS_STRNCPY sub $16, %r8 jbe L(StrncpyExit15Case2OrCase3) # endif test %rax, %rax jnz L(Shl15LoopExit) palignr $15, %xmm3, %xmm2 movaps %xmm2, (%rdx) lea 17(%rcx), %rcx lea 16(%rdx), %rdx mov %rcx, %rax and $-0x40, %rcx sub %rcx, %rax lea -1(%rcx), %rcx sub %rax, %rdx # ifdef USE_AS_STRNCPY add %rax, %r8 # endif movaps -15(%rcx), %xmm1 /* 64 bytes loop */ .p2align 4 L(Shl15LoopStart): movaps 1(%rcx), %xmm2 movaps 17(%rcx), %xmm3 movaps %xmm3, %xmm6 movaps 33(%rcx), %xmm4 movaps %xmm4, %xmm7 movaps 49(%rcx), %xmm5 pminub %xmm2, %xmm6 pminub %xmm5, %xmm7 pminub %xmm6, %xmm7 pcmpeqb %xmm0, %xmm7 pmovmskb %xmm7, %rax movaps %xmm5, %xmm7 palignr $15, %xmm4, %xmm5 test %rax, %rax palignr $15, %xmm3, %xmm4 jnz L(Shl15Start) # ifdef USE_AS_STRNCPY sub $64, %r8 jbe L(StrncpyLeave15) # endif palignr $15, %xmm2, %xmm3 lea 64(%rcx), %rcx palignr $15, %xmm1, %xmm2 movaps %xmm7, %xmm1 movaps %xmm5, 48(%rdx) movaps %xmm4, 32(%rdx) movaps %xmm3, 16(%rdx) movaps %xmm2, (%rdx) lea 64(%rdx), %rdx jmp L(Shl15LoopStart) L(Shl15LoopExit): mov -3(%rcx), %r9d mov $1, %rsi mov %r9d, -3(%rdx) # ifdef USE_AS_STRCAT jmp L(CopyFrom1To16Bytes) # endif # ifndef USE_AS_STRCAT .p2align 4 L(CopyFrom1To16Bytes): # ifdef USE_AS_STRNCPY add $16, %r8 # endif add %rsi, %rdx add %rsi, %rcx test %al, %al jz L(ExitHigh) test $0x01, %al jnz L(Exit1) test $0x02, %al jnz L(Exit2) test $0x04, %al jnz L(Exit3) test $0x08, %al jnz L(Exit4) test $0x10, %al jnz L(Exit5) test $0x20, %al jnz L(Exit6) test $0x40, %al jnz L(Exit7) .p2align 4 L(Exit8): mov (%rcx), %rax mov %rax, (%rdx) # ifdef USE_AS_STPCPY lea 7(%rdx), %rax # else mov %rdi, %rax # endif # ifdef USE_AS_STRNCPY sub $8, %r8 lea 8(%rdx), %rcx jnz L(StrncpyFillTailWithZero1) # ifdef USE_AS_STPCPY cmpb $1, (%rax) sbb $-1, %rax # endif # endif ret .p2align 4 L(ExitHigh): test $0x01, %ah jnz L(Exit9) test $0x02, %ah jnz L(Exit10) test $0x04, %ah jnz L(Exit11) test $0x08, %ah jnz L(Exit12) test $0x10, %ah jnz L(Exit13) test $0x20, %ah jnz L(Exit14) test $0x40, %ah jnz L(Exit15) .p2align 4 L(Exit16): mov (%rcx), %rax mov %rax, (%rdx) mov 8(%rcx), %rax mov %rax, 8(%rdx) # ifdef USE_AS_STPCPY lea 15(%rdx), %rax # else mov %rdi, %rax # endif # ifdef USE_AS_STRNCPY sub $16, %r8 lea 16(%rdx), %rcx jnz L(StrncpyFillTailWithZero1) # ifdef USE_AS_STPCPY cmpb $1, (%rax) sbb $-1, %rax # endif # endif ret # ifdef USE_AS_STRNCPY .p2align 4 L(CopyFrom1To16BytesCase2): add $16, %r8 add %rsi, %rcx lea (%rsi, %rdx), %rsi lea -9(%r8), %rdx and $1<<7, %dh or %al, %dh test %dh, %dh lea (%rsi), %rdx jz L(ExitHighCase2) cmp $1, %r8 je L(Exit1) test $0x01, %al jnz L(Exit1) cmp $2, %r8 je L(Exit2) test $0x02, %al jnz L(Exit2) cmp $3, %r8 je L(Exit3) test $0x04, %al jnz L(Exit3) cmp $4, %r8 je L(Exit4) test $0x08, %al jnz L(Exit4) cmp $5, %r8 je L(Exit5) test $0x10, %al jnz L(Exit5) cmp $6, %r8 je L(Exit6) test $0x20, %al jnz L(Exit6) cmp $7, %r8 je L(Exit7) test $0x40, %al jnz L(Exit7) jmp L(Exit8) .p2align 4 L(ExitHighCase2): cmp $9, %r8 je L(Exit9) test $0x01, %ah jnz L(Exit9) cmp $10, %r8 je L(Exit10) test $0x02, %ah jnz L(Exit10) cmp $11, %r8 je L(Exit11) test $0x04, %ah jnz L(Exit11) cmp $12, %r8 je L(Exit12) test $0x8, %ah jnz L(Exit12) cmp $13, %r8 je L(Exit13) test $0x10, %ah jnz L(Exit13) cmp $14, %r8 je L(Exit14) test $0x20, %ah jnz L(Exit14) cmp $15, %r8 je L(Exit15) test $0x40, %ah jnz L(Exit15) jmp L(Exit16) L(CopyFrom1To16BytesCase2OrCase3): test %rax, %rax jnz L(CopyFrom1To16BytesCase2) .p2align 4 L(CopyFrom1To16BytesCase3): add $16, %r8 add %rsi, %rdx add %rsi, %rcx cmp $16, %r8 je L(Exit16) cmp $8, %r8 je L(Exit8) jg L(More8Case3) cmp $4, %r8 je L(Exit4) jg L(More4Case3) cmp $2, %r8 jl L(Exit1) je L(Exit2) jg L(Exit3) L(More8Case3): /* but less than 16 */ cmp $12, %r8 je L(Exit12) jl L(Less12Case3) cmp $14, %r8 jl L(Exit13) je L(Exit14) jg L(Exit15) L(More4Case3): /* but less than 8 */ cmp $6, %r8 jl L(Exit5) je L(Exit6) jg L(Exit7) L(Less12Case3): /* but more than 8 */ cmp $10, %r8 jl L(Exit9) je L(Exit10) jg L(Exit11) # endif .p2align 4 L(Exit1): movb (%rcx), %al movb %al, (%rdx) # ifdef USE_AS_STPCPY lea (%rdx), %rax # else mov %rdi, %rax # endif # ifdef USE_AS_STRNCPY sub $1, %r8 lea 1(%rdx), %rcx jnz L(StrncpyFillTailWithZero1) # ifdef USE_AS_STPCPY cmpb $1, (%rax) sbb $-1, %rax # endif # endif ret .p2align 4 L(Exit2): movw (%rcx), %ax movw %ax, (%rdx) # ifdef USE_AS_STPCPY lea 1(%rdx), %rax # else mov %rdi, %rax # endif # ifdef USE_AS_STRNCPY sub $2, %r8 lea 2(%rdx), %rcx jnz L(StrncpyFillTailWithZero1) # ifdef USE_AS_STPCPY cmpb $1, (%rax) sbb $-1, %rax # endif # endif ret .p2align 4 L(Exit3): movw (%rcx), %ax movw %ax, (%rdx) movb 2(%rcx), %al movb %al, 2(%rdx) # ifdef USE_AS_STPCPY lea 2(%rdx), %rax # else mov %rdi, %rax # endif # ifdef USE_AS_STRNCPY sub $3, %r8 lea 3(%rdx), %rcx jnz L(StrncpyFillTailWithZero1) # ifdef USE_AS_STPCPY cmpb $1, (%rax) sbb $-1, %rax # endif # endif ret .p2align 4 L(Exit4): movl (%rcx), %eax movl %eax, (%rdx) # ifdef USE_AS_STPCPY lea 3(%rdx), %rax # else mov %rdi, %rax # endif # ifdef USE_AS_STRNCPY sub $4, %r8 lea 4(%rdx), %rcx jnz L(StrncpyFillTailWithZero1) # ifdef USE_AS_STPCPY cmpb $1, (%rax) sbb $-1, %rax # endif # endif ret .p2align 4 L(Exit5): movl (%rcx), %eax movl %eax, (%rdx) movb 4(%rcx), %al movb %al, 4(%rdx) # ifdef USE_AS_STPCPY lea 4(%rdx), %rax # else mov %rdi, %rax # endif # ifdef USE_AS_STRNCPY sub $5, %r8 lea 5(%rdx), %rcx jnz L(StrncpyFillTailWithZero1) # ifdef USE_AS_STPCPY cmpb $1, (%rax) sbb $-1, %rax # endif # endif ret .p2align 4 L(Exit6): movl (%rcx), %eax movl %eax, (%rdx) movw 4(%rcx), %ax movw %ax, 4(%rdx) # ifdef USE_AS_STPCPY lea 5(%rdx), %rax # else mov %rdi, %rax # endif # ifdef USE_AS_STRNCPY sub $6, %r8 lea 6(%rdx), %rcx jnz L(StrncpyFillTailWithZero1) # ifdef USE_AS_STPCPY cmpb $1, (%rax) sbb $-1, %rax # endif # endif ret .p2align 4 L(Exit7): movl (%rcx), %eax movl %eax, (%rdx) movl 3(%rcx), %eax movl %eax, 3(%rdx) # ifdef USE_AS_STPCPY lea 6(%rdx), %rax # else mov %rdi, %rax # endif # ifdef USE_AS_STRNCPY sub $7, %r8 lea 7(%rdx), %rcx jnz L(StrncpyFillTailWithZero1) # ifdef USE_AS_STPCPY cmpb $1, (%rax) sbb $-1, %rax # endif # endif ret .p2align 4 L(Exit9): mov (%rcx), %rax mov %rax, (%rdx) mov 5(%rcx), %eax mov %eax, 5(%rdx) # ifdef USE_AS_STPCPY lea 8(%rdx), %rax # else mov %rdi, %rax # endif # ifdef USE_AS_STRNCPY sub $9, %r8 lea 9(%rdx), %rcx jnz L(StrncpyFillTailWithZero1) # ifdef USE_AS_STPCPY cmpb $1, (%rax) sbb $-1, %rax # endif # endif ret .p2align 4 L(Exit10): mov (%rcx), %rax mov %rax, (%rdx) mov 6(%rcx), %eax mov %eax, 6(%rdx) # ifdef USE_AS_STPCPY lea 9(%rdx), %rax # else mov %rdi, %rax # endif # ifdef USE_AS_STRNCPY sub $10, %r8 lea 10(%rdx), %rcx jnz L(StrncpyFillTailWithZero1) # ifdef USE_AS_STPCPY cmpb $1, (%rax) sbb $-1, %rax # endif # endif ret .p2align 4 L(Exit11): mov (%rcx), %rax mov %rax, (%rdx) mov 7(%rcx), %eax mov %eax, 7(%rdx) # ifdef USE_AS_STPCPY lea 10(%rdx), %rax # else mov %rdi, %rax # endif # ifdef USE_AS_STRNCPY sub $11, %r8 lea 11(%rdx), %rcx jnz L(StrncpyFillTailWithZero1) # ifdef USE_AS_STPCPY cmpb $1, (%rax) sbb $-1, %rax # endif # endif ret .p2align 4 L(Exit12): mov (%rcx), %rax mov %rax, (%rdx) mov 8(%rcx), %eax mov %eax, 8(%rdx) # ifdef USE_AS_STPCPY lea 11(%rdx), %rax # else mov %rdi, %rax # endif # ifdef USE_AS_STRNCPY sub $12, %r8 lea 12(%rdx), %rcx jnz L(StrncpyFillTailWithZero1) # ifdef USE_AS_STPCPY cmpb $1, (%rax) sbb $-1, %rax # endif # endif ret .p2align 4 L(Exit13): mov (%rcx), %rax mov %rax, (%rdx) mov 5(%rcx), %rax mov %rax, 5(%rdx) # ifdef USE_AS_STPCPY lea 12(%rdx), %rax # else mov %rdi, %rax # endif # ifdef USE_AS_STRNCPY sub $13, %r8 lea 13(%rdx), %rcx jnz L(StrncpyFillTailWithZero1) # ifdef USE_AS_STPCPY cmpb $1, (%rax) sbb $-1, %rax # endif # endif ret .p2align 4 L(Exit14): mov (%rcx), %rax mov %rax, (%rdx) mov 6(%rcx), %rax mov %rax, 6(%rdx) # ifdef USE_AS_STPCPY lea 13(%rdx), %rax # else mov %rdi, %rax # endif # ifdef USE_AS_STRNCPY sub $14, %r8 lea 14(%rdx), %rcx jnz L(StrncpyFillTailWithZero1) # ifdef USE_AS_STPCPY cmpb $1, (%rax) sbb $-1, %rax # endif # endif ret .p2align 4 L(Exit15): mov (%rcx), %rax mov %rax, (%rdx) mov 7(%rcx), %rax mov %rax, 7(%rdx) # ifdef USE_AS_STPCPY lea 14(%rdx), %rax # else mov %rdi, %rax # endif # ifdef USE_AS_STRNCPY sub $15, %r8 lea 15(%rdx), %rcx jnz L(StrncpyFillTailWithZero1) # ifdef USE_AS_STPCPY cmpb $1, (%rax) sbb $-1, %rax # endif # endif ret # ifdef USE_AS_STRNCPY .p2align 4 L(Fill0): ret .p2align 4 L(Fill1): movb %dl, (%rcx) ret .p2align 4 L(Fill2): movw %dx, (%rcx) ret .p2align 4 L(Fill3): movw %dx, (%rcx) movb %dl, 2(%rcx) ret .p2align 4 L(Fill4): movl %edx, (%rcx) ret .p2align 4 L(Fill5): movl %edx, (%rcx) movb %dl, 4(%rcx) ret .p2align 4 L(Fill6): movl %edx, (%rcx) movw %dx, 4(%rcx) ret .p2align 4 L(Fill7): movl %edx, (%rcx) movl %edx, 3(%rcx) ret .p2align 4 L(Fill8): mov %rdx, (%rcx) ret .p2align 4 L(Fill9): mov %rdx, (%rcx) movb %dl, 8(%rcx) ret .p2align 4 L(Fill10): mov %rdx, (%rcx) movw %dx, 8(%rcx) ret .p2align 4 L(Fill11): mov %rdx, (%rcx) movl %edx, 7(%rcx) ret .p2align 4 L(Fill12): mov %rdx, (%rcx) movl %edx, 8(%rcx) ret .p2align 4 L(Fill13): mov %rdx, (%rcx) mov %rdx, 5(%rcx) ret .p2align 4 L(Fill14): mov %rdx, (%rcx) mov %rdx, 6(%rcx) ret .p2align 4 L(Fill15): mov %rdx, (%rcx) mov %rdx, 7(%rcx) ret .p2align 4 L(Fill16): mov %rdx, (%rcx) mov %rdx, 8(%rcx) ret .p2align 4 L(StrncpyFillExit1): lea 16(%r8), %r8 L(FillFrom1To16Bytes): test %r8, %r8 jz L(Fill0) cmp $16, %r8 je L(Fill16) cmp $8, %r8 je L(Fill8) jg L(FillMore8) cmp $4, %r8 je L(Fill4) jg L(FillMore4) cmp $2, %r8 jl L(Fill1) je L(Fill2) jg L(Fill3) L(FillMore8): /* but less than 16 */ cmp $12, %r8 je L(Fill12) jl L(FillLess12) cmp $14, %r8 jl L(Fill13) je L(Fill14) jg L(Fill15) L(FillMore4): /* but less than 8 */ cmp $6, %r8 jl L(Fill5) je L(Fill6) jg L(Fill7) L(FillLess12): /* but more than 8 */ cmp $10, %r8 jl L(Fill9) je L(Fill10) jmp L(Fill11) .p2align 4 L(StrncpyFillTailWithZero1): xor %rdx, %rdx sub $16, %r8 jbe L(StrncpyFillExit1) pxor %xmm0, %xmm0 mov %rdx, (%rcx) mov %rdx, 8(%rcx) lea 16(%rcx), %rcx mov %rcx, %rdx and $0xf, %rdx sub %rdx, %rcx add %rdx, %r8 xor %rdx, %rdx sub $64, %r8 jb L(StrncpyFillLess64) L(StrncpyFillLoopMovdqa): movdqa %xmm0, (%rcx) movdqa %xmm0, 16(%rcx) movdqa %xmm0, 32(%rcx) movdqa %xmm0, 48(%rcx) lea 64(%rcx), %rcx sub $64, %r8 jae L(StrncpyFillLoopMovdqa) L(StrncpyFillLess64): add $32, %r8 jl L(StrncpyFillLess32) movdqa %xmm0, (%rcx) movdqa %xmm0, 16(%rcx) lea 32(%rcx), %rcx sub $16, %r8 jl L(StrncpyFillExit1) movdqa %xmm0, (%rcx) lea 16(%rcx), %rcx jmp L(FillFrom1To16Bytes) L(StrncpyFillLess32): add $16, %r8 jl L(StrncpyFillExit1) movdqa %xmm0, (%rcx) lea 16(%rcx), %rcx jmp L(FillFrom1To16Bytes) .p2align 4 L(Exit0): mov %rdx, %rax ret .p2align 4 L(StrncpyExit15Bytes): cmp $9, %r8 je L(Exit9) cmpb $0, 8(%rcx) jz L(Exit9) cmp $10, %r8 je L(Exit10) cmpb $0, 9(%rcx) jz L(Exit10) cmp $11, %r8 je L(Exit11) cmpb $0, 10(%rcx) jz L(Exit11) cmp $12, %r8 je L(Exit12) cmpb $0, 11(%rcx) jz L(Exit12) cmp $13, %r8 je L(Exit13) cmpb $0, 12(%rcx) jz L(Exit13) cmp $14, %r8 je L(Exit14) cmpb $0, 13(%rcx) jz L(Exit14) mov (%rcx), %rax mov %rax, (%rdx) mov 7(%rcx), %rax mov %rax, 7(%rdx) # ifdef USE_AS_STPCPY lea 14(%rdx), %rax cmpb $1, (%rax) sbb $-1, %rax # else mov %rdi, %rax # endif ret .p2align 4 L(StrncpyExit8Bytes): cmp $1, %r8 je L(Exit1) cmpb $0, (%rcx) jz L(Exit1) cmp $2, %r8 je L(Exit2) cmpb $0, 1(%rcx) jz L(Exit2) cmp $3, %r8 je L(Exit3) cmpb $0, 2(%rcx) jz L(Exit3) cmp $4, %r8 je L(Exit4) cmpb $0, 3(%rcx) jz L(Exit4) cmp $5, %r8 je L(Exit5) cmpb $0, 4(%rcx) jz L(Exit5) cmp $6, %r8 je L(Exit6) cmpb $0, 5(%rcx) jz L(Exit6) cmp $7, %r8 je L(Exit7) cmpb $0, 6(%rcx) jz L(Exit7) mov (%rcx), %rax mov %rax, (%rdx) # ifdef USE_AS_STPCPY lea 7(%rdx), %rax cmpb $1, (%rax) sbb $-1, %rax # else mov %rdi, %rax # endif ret # endif # endif # ifdef USE_AS_STRNCPY .p2align 4 L(StrncpyLeaveCase2OrCase3): test %rax, %rax jnz L(Aligned64LeaveCase2) L(Aligned64LeaveCase3): lea 64(%r8), %r8 sub $16, %r8 jbe L(CopyFrom1To16BytesCase3) movaps %xmm4, -64(%rdx) lea 16(%rsi), %rsi sub $16, %r8 jbe L(CopyFrom1To16BytesCase3) movaps %xmm5, -48(%rdx) lea 16(%rsi), %rsi sub $16, %r8 jbe L(CopyFrom1To16BytesCase3) movaps %xmm6, -32(%rdx) lea 16(%rsi), %rsi lea -16(%r8), %r8 jmp L(CopyFrom1To16BytesCase3) L(Aligned64LeaveCase2): pcmpeqb %xmm4, %xmm0 pmovmskb %xmm0, %rax add $48, %r8 jle L(CopyFrom1To16BytesCase2OrCase3) test %rax, %rax jnz L(CopyFrom1To16Bytes) pcmpeqb %xmm5, %xmm0 pmovmskb %xmm0, %rax movaps %xmm4, -64(%rdx) lea 16(%rsi), %rsi sub $16, %r8 jbe L(CopyFrom1To16BytesCase2OrCase3) test %rax, %rax jnz L(CopyFrom1To16Bytes) pcmpeqb %xmm6, %xmm0 pmovmskb %xmm0, %rax movaps %xmm5, -48(%rdx) lea 16(%rsi), %rsi sub $16, %r8 jbe L(CopyFrom1To16BytesCase2OrCase3) test %rax, %rax jnz L(CopyFrom1To16Bytes) pcmpeqb %xmm7, %xmm0 pmovmskb %xmm0, %rax movaps %xmm6, -32(%rdx) lea 16(%rsi), %rsi lea -16(%r8), %r8 jmp L(CopyFrom1To16BytesCase2) /*--------------------------------------------------*/ .p2align 4 L(StrncpyExit1Case2OrCase3): movdqu -1(%rcx), %xmm0 movdqu %xmm0, -1(%rdx) mov $15, %rsi test %rax, %rax jnz L(CopyFrom1To16BytesCase2) jmp L(CopyFrom1To16BytesCase3) .p2align 4 L(StrncpyExit2Case2OrCase3): movdqu -2(%rcx), %xmm0 movdqu %xmm0, -2(%rdx) mov $14, %rsi test %rax, %rax jnz L(CopyFrom1To16BytesCase2) jmp L(CopyFrom1To16BytesCase3) .p2align 4 L(StrncpyExit3Case2OrCase3): movdqu -3(%rcx), %xmm0 movdqu %xmm0, -3(%rdx) mov $13, %rsi test %rax, %rax jnz L(CopyFrom1To16BytesCase2) jmp L(CopyFrom1To16BytesCase3) .p2align 4 L(StrncpyExit4Case2OrCase3): movdqu -4(%rcx), %xmm0 movdqu %xmm0, -4(%rdx) mov $12, %rsi test %rax, %rax jnz L(CopyFrom1To16BytesCase2) jmp L(CopyFrom1To16BytesCase3) .p2align 4 L(StrncpyExit5Case2OrCase3): movdqu -5(%rcx), %xmm0 movdqu %xmm0, -5(%rdx) mov $11, %rsi test %rax, %rax jnz L(CopyFrom1To16BytesCase2) jmp L(CopyFrom1To16BytesCase3) .p2align 4 L(StrncpyExit6Case2OrCase3): mov (%rcx), %rsi mov 6(%rcx), %r9d mov %r9d, 6(%rdx) mov %rsi, (%rdx) test %rax, %rax mov $10, %rsi jnz L(CopyFrom1To16BytesCase2) jmp L(CopyFrom1To16BytesCase3) .p2align 4 L(StrncpyExit7Case2OrCase3): mov (%rcx), %rsi mov 5(%rcx), %r9d mov %r9d, 5(%rdx) mov %rsi, (%rdx) test %rax, %rax mov $9, %rsi jnz L(CopyFrom1To16BytesCase2) jmp L(CopyFrom1To16BytesCase3) .p2align 4 L(StrncpyExit8Case2OrCase3): mov (%rcx), %r9 mov $8, %rsi mov %r9, (%rdx) test %rax, %rax jnz L(CopyFrom1To16BytesCase2) jmp L(CopyFrom1To16BytesCase3) .p2align 4 L(StrncpyExit9Case2OrCase3): mov -1(%rcx), %r9 mov $7, %rsi mov %r9, -1(%rdx) test %rax, %rax jnz L(CopyFrom1To16BytesCase2) jmp L(CopyFrom1To16BytesCase3) .p2align 4 L(StrncpyExit10Case2OrCase3): mov -2(%rcx), %r9 mov $6, %rsi mov %r9, -2(%rdx) test %rax, %rax jnz L(CopyFrom1To16BytesCase2) jmp L(CopyFrom1To16BytesCase3) .p2align 4 L(StrncpyExit11Case2OrCase3): mov -3(%rcx), %r9 mov $5, %rsi mov %r9, -3(%rdx) test %rax, %rax jnz L(CopyFrom1To16BytesCase2) jmp L(CopyFrom1To16BytesCase3) .p2align 4 L(StrncpyExit12Case2OrCase3): mov (%rcx), %r9d mov $4, %rsi mov %r9d, (%rdx) test %rax, %rax jnz L(CopyFrom1To16BytesCase2) jmp L(CopyFrom1To16BytesCase3) .p2align 4 L(StrncpyExit13Case2OrCase3): mov -1(%rcx), %r9d mov $3, %rsi mov %r9d, -1(%rdx) test %rax, %rax jnz L(CopyFrom1To16BytesCase2) jmp L(CopyFrom1To16BytesCase3) .p2align 4 L(StrncpyExit14Case2OrCase3): mov -2(%rcx), %r9d mov $2, %rsi mov %r9d, -2(%rdx) test %rax, %rax jnz L(CopyFrom1To16BytesCase2) jmp L(CopyFrom1To16BytesCase3) .p2align 4 L(StrncpyExit15Case2OrCase3): mov -3(%rcx), %r9d mov $1, %rsi mov %r9d, -3(%rdx) test %rax, %rax jnz L(CopyFrom1To16BytesCase2) jmp L(CopyFrom1To16BytesCase3) .p2align 4 L(StrncpyLeave1): movaps %xmm2, %xmm3 add $48, %r8 jle L(StrncpyExit1) palignr $1, %xmm1, %xmm2 movaps %xmm2, (%rdx) movaps 31(%rcx), %xmm2 lea 16(%rsi), %rsi sub $16, %r8 jbe L(StrncpyExit1) palignr $1, %xmm3, %xmm2 movaps %xmm2, 16(%rdx) lea 16(%rsi), %rsi sub $16, %r8 jbe L(StrncpyExit1) movaps %xmm4, 32(%rdx) lea 16(%rsi), %rsi sub $16, %r8 jbe L(StrncpyExit1) movaps %xmm5, 48(%rdx) lea 16(%rsi), %rsi lea -16(%r8), %r8 L(StrncpyExit1): lea 15(%rdx, %rsi), %rdx lea 15(%rcx, %rsi), %rcx mov -15(%rcx), %rsi mov -8(%rcx), %rax mov %rsi, -15(%rdx) mov %rax, -8(%rdx) xor %rsi, %rsi jmp L(CopyFrom1To16BytesCase3) .p2align 4 L(StrncpyLeave2): movaps %xmm2, %xmm3 add $48, %r8 jle L(StrncpyExit2) palignr $2, %xmm1, %xmm2 movaps %xmm2, (%rdx) movaps 30(%rcx), %xmm2 lea 16(%rsi), %rsi sub $16, %r8 jbe L(StrncpyExit2) palignr $2, %xmm3, %xmm2 movaps %xmm2, 16(%rdx) lea 16(%rsi), %rsi sub $16, %r8 jbe L(StrncpyExit2) movaps %xmm4, 32(%rdx) lea 16(%rsi), %rsi sub $16, %r8 jbe L(StrncpyExit2) movaps %xmm5, 48(%rdx) lea 16(%rsi), %rsi lea -16(%r8), %r8 L(StrncpyExit2): lea 14(%rdx, %rsi), %rdx lea 14(%rcx, %rsi), %rcx mov -14(%rcx), %rsi mov -8(%rcx), %rax mov %rsi, -14(%rdx) mov %rax, -8(%rdx) xor %rsi, %rsi jmp L(CopyFrom1To16BytesCase3) .p2align 4 L(StrncpyLeave3): movaps %xmm2, %xmm3 add $48, %r8 jle L(StrncpyExit3) palignr $3, %xmm1, %xmm2 movaps %xmm2, (%rdx) movaps 29(%rcx), %xmm2 lea 16(%rsi), %rsi sub $16, %r8 jbe L(StrncpyExit3) palignr $3, %xmm3, %xmm2 movaps %xmm2, 16(%rdx) lea 16(%rsi), %rsi sub $16, %r8 jbe L(StrncpyExit3) movaps %xmm4, 32(%rdx) lea 16(%rsi), %rsi sub $16, %r8 jbe L(StrncpyExit3) movaps %xmm5, 48(%rdx) lea 16(%rsi), %rsi lea -16(%r8), %r8 L(StrncpyExit3): lea 13(%rdx, %rsi), %rdx lea 13(%rcx, %rsi), %rcx mov -13(%rcx), %rsi mov -8(%rcx), %rax mov %rsi, -13(%rdx) mov %rax, -8(%rdx) xor %rsi, %rsi jmp L(CopyFrom1To16BytesCase3) .p2align 4 L(StrncpyLeave4): movaps %xmm2, %xmm3 add $48, %r8 jle L(StrncpyExit4) palignr $4, %xmm1, %xmm2 movaps %xmm2, (%rdx) movaps 28(%rcx), %xmm2 lea 16(%rsi), %rsi sub $16, %r8 jbe L(StrncpyExit4) palignr $4, %xmm3, %xmm2 movaps %xmm2, 16(%rdx) lea 16(%rsi), %rsi sub $16, %r8 jbe L(StrncpyExit4) movaps %xmm4, 32(%rdx) lea 16(%rsi), %rsi sub $16, %r8 jbe L(StrncpyExit4) movaps %xmm5, 48(%rdx) lea 16(%rsi), %rsi lea -16(%r8), %r8 L(StrncpyExit4): lea 12(%rdx, %rsi), %rdx lea 12(%rcx, %rsi), %rcx mov -12(%rcx), %rsi mov -4(%rcx), %eax mov %rsi, -12(%rdx) mov %eax, -4(%rdx) xor %rsi, %rsi jmp L(CopyFrom1To16BytesCase3) .p2align 4 L(StrncpyLeave5): movaps %xmm2, %xmm3 add $48, %r8 jle L(StrncpyExit5) palignr $5, %xmm1, %xmm2 movaps %xmm2, (%rdx) movaps 27(%rcx), %xmm2 lea 16(%rsi), %rsi sub $16, %r8 jbe L(StrncpyExit5) palignr $5, %xmm3, %xmm2 movaps %xmm2, 16(%rdx) lea 16(%rsi), %rsi sub $16, %r8 jbe L(StrncpyExit5) movaps %xmm4, 32(%rdx) lea 16(%rsi), %rsi sub $16, %r8 jbe L(StrncpyExit5) movaps %xmm5, 48(%rdx) lea 16(%rsi), %rsi lea -16(%r8), %r8 L(StrncpyExit5): lea 11(%rdx, %rsi), %rdx lea 11(%rcx, %rsi), %rcx mov -11(%rcx), %rsi mov -4(%rcx), %eax mov %rsi, -11(%rdx) mov %eax, -4(%rdx) xor %rsi, %rsi jmp L(CopyFrom1To16BytesCase3) .p2align 4 L(StrncpyLeave6): movaps %xmm2, %xmm3 add $48, %r8 jle L(StrncpyExit6) palignr $6, %xmm1, %xmm2 movaps %xmm2, (%rdx) movaps 26(%rcx), %xmm2 lea 16(%rsi), %rsi sub $16, %r8 jbe L(StrncpyExit6) palignr $6, %xmm3, %xmm2 movaps %xmm2, 16(%rdx) lea 16(%rsi), %rsi sub $16, %r8 jbe L(StrncpyExit6) movaps %xmm4, 32(%rdx) lea 16(%rsi), %rsi sub $16, %r8 jbe L(StrncpyExit6) movaps %xmm5, 48(%rdx) lea 16(%rsi), %rsi lea -16(%r8), %r8 L(StrncpyExit6): lea 10(%rdx, %rsi), %rdx lea 10(%rcx, %rsi), %rcx mov -10(%rcx), %rsi movw -2(%rcx), %ax mov %rsi, -10(%rdx) movw %ax, -2(%rdx) xor %rsi, %rsi jmp L(CopyFrom1To16BytesCase3) .p2align 4 L(StrncpyLeave7): movaps %xmm2, %xmm3 add $48, %r8 jle L(StrncpyExit7) palignr $7, %xmm1, %xmm2 movaps %xmm2, (%rdx) movaps 25(%rcx), %xmm2 lea 16(%rsi), %rsi sub $16, %r8 jbe L(StrncpyExit7) palignr $7, %xmm3, %xmm2 movaps %xmm2, 16(%rdx) lea 16(%rsi), %rsi sub $16, %r8 jbe L(StrncpyExit7) movaps %xmm4, 32(%rdx) lea 16(%rsi), %rsi sub $16, %r8 jbe L(StrncpyExit7) movaps %xmm5, 48(%rdx) lea 16(%rsi), %rsi lea -16(%r8), %r8 L(StrncpyExit7): lea 9(%rdx, %rsi), %rdx lea 9(%rcx, %rsi), %rcx mov -9(%rcx), %rsi movb -1(%rcx), %ah mov %rsi, -9(%rdx) movb %ah, -1(%rdx) xor %rsi, %rsi jmp L(CopyFrom1To16BytesCase3) .p2align 4 L(StrncpyLeave8): movaps %xmm2, %xmm3 add $48, %r8 jle L(StrncpyExit8) palignr $8, %xmm1, %xmm2 movaps %xmm2, (%rdx) movaps 24(%rcx), %xmm2 lea 16(%rsi), %rsi sub $16, %r8 jbe L(StrncpyExit8) palignr $8, %xmm3, %xmm2 movaps %xmm2, 16(%rdx) lea 16(%rsi), %rsi sub $16, %r8 jbe L(StrncpyExit8) movaps %xmm4, 32(%rdx) lea 16(%rsi), %rsi sub $16, %r8 jbe L(StrncpyExit8) movaps %xmm5, 48(%rdx) lea 16(%rsi), %rsi lea -16(%r8), %r8 L(StrncpyExit8): lea 8(%rdx, %rsi), %rdx lea 8(%rcx, %rsi), %rcx mov -8(%rcx), %rax xor %rsi, %rsi mov %rax, -8(%rdx) jmp L(CopyFrom1To16BytesCase3) .p2align 4 L(StrncpyLeave9): movaps %xmm2, %xmm3 add $48, %r8 jle L(StrncpyExit9) palignr $9, %xmm1, %xmm2 movaps %xmm2, (%rdx) movaps 23(%rcx), %xmm2 lea 16(%rsi), %rsi sub $16, %r8 jbe L(StrncpyExit9) palignr $9, %xmm3, %xmm2 movaps %xmm2, 16(%rdx) lea 16(%rsi), %rsi sub $16, %r8 jbe L(StrncpyExit9) movaps %xmm4, 32(%rdx) lea 16(%rsi), %rsi sub $16, %r8 jbe L(StrncpyExit9) movaps %xmm5, 48(%rdx) lea 16(%rsi), %rsi lea -16(%r8), %r8 L(StrncpyExit9): lea 7(%rdx, %rsi), %rdx lea 7(%rcx, %rsi), %rcx mov -8(%rcx), %rax xor %rsi, %rsi mov %rax, -8(%rdx) jmp L(CopyFrom1To16BytesCase3) .p2align 4 L(StrncpyLeave10): movaps %xmm2, %xmm3 add $48, %r8 jle L(StrncpyExit10) palignr $10, %xmm1, %xmm2 movaps %xmm2, (%rdx) movaps 22(%rcx), %xmm2 lea 16(%rsi), %rsi sub $16, %r8 jbe L(StrncpyExit10) palignr $10, %xmm3, %xmm2 movaps %xmm2, 16(%rdx) lea 16(%rsi), %rsi sub $16, %r8 jbe L(StrncpyExit10) movaps %xmm4, 32(%rdx) lea 16(%rsi), %rsi sub $16, %r8 jbe L(StrncpyExit10) movaps %xmm5, 48(%rdx) lea 16(%rsi), %rsi lea -16(%r8), %r8 L(StrncpyExit10): lea 6(%rdx, %rsi), %rdx lea 6(%rcx, %rsi), %rcx mov -8(%rcx), %rax xor %rsi, %rsi mov %rax, -8(%rdx) jmp L(CopyFrom1To16BytesCase3) .p2align 4 L(StrncpyLeave11): movaps %xmm2, %xmm3 add $48, %r8 jle L(StrncpyExit11) palignr $11, %xmm1, %xmm2 movaps %xmm2, (%rdx) movaps 21(%rcx), %xmm2 lea 16(%rsi), %rsi sub $16, %r8 jbe L(StrncpyExit11) palignr $11, %xmm3, %xmm2 movaps %xmm2, 16(%rdx) lea 16(%rsi), %rsi sub $16, %r8 jbe L(StrncpyExit11) movaps %xmm4, 32(%rdx) lea 16(%rsi), %rsi sub $16, %r8 jbe L(StrncpyExit11) movaps %xmm5, 48(%rdx) lea 16(%rsi), %rsi lea -16(%r8), %r8 L(StrncpyExit11): lea 5(%rdx, %rsi), %rdx lea 5(%rcx, %rsi), %rcx mov -8(%rcx), %rax xor %rsi, %rsi mov %rax, -8(%rdx) jmp L(CopyFrom1To16BytesCase3) .p2align 4 L(StrncpyLeave12): movaps %xmm2, %xmm3 add $48, %r8 jle L(StrncpyExit12) palignr $12, %xmm1, %xmm2 movaps %xmm2, (%rdx) movaps 20(%rcx), %xmm2 lea 16(%rsi), %rsi sub $16, %r8 jbe L(StrncpyExit12) palignr $12, %xmm3, %xmm2 movaps %xmm2, 16(%rdx) lea 16(%rsi), %rsi sub $16, %r8 jbe L(StrncpyExit12) movaps %xmm4, 32(%rdx) lea 16(%rsi), %rsi sub $16, %r8 jbe L(StrncpyExit12) movaps %xmm5, 48(%rdx) lea 16(%rsi), %rsi lea -16(%r8), %r8 L(StrncpyExit12): lea 4(%rdx, %rsi), %rdx lea 4(%rcx, %rsi), %rcx mov -4(%rcx), %eax xor %rsi, %rsi mov %eax, -4(%rdx) jmp L(CopyFrom1To16BytesCase3) .p2align 4 L(StrncpyLeave13): movaps %xmm2, %xmm3 add $48, %r8 jle L(StrncpyExit13) palignr $13, %xmm1, %xmm2 movaps %xmm2, (%rdx) movaps 19(%rcx), %xmm2 lea 16(%rsi), %rsi sub $16, %r8 jbe L(StrncpyExit13) palignr $13, %xmm3, %xmm2 movaps %xmm2, 16(%rdx) lea 16(%rsi), %rsi sub $16, %r8 jbe L(StrncpyExit13) movaps %xmm4, 32(%rdx) lea 16(%rsi), %rsi sub $16, %r8 jbe L(StrncpyExit13) movaps %xmm5, 48(%rdx) lea 16(%rsi), %rsi lea -16(%r8), %r8 L(StrncpyExit13): lea 3(%rdx, %rsi), %rdx lea 3(%rcx, %rsi), %rcx mov -4(%rcx), %eax xor %rsi, %rsi mov %eax, -4(%rdx) jmp L(CopyFrom1To16BytesCase3) .p2align 4 L(StrncpyLeave14): movaps %xmm2, %xmm3 add $48, %r8 jle L(StrncpyExit14) palignr $14, %xmm1, %xmm2 movaps %xmm2, (%rdx) movaps 18(%rcx), %xmm2 lea 16(%rsi), %rsi sub $16, %r8 jbe L(StrncpyExit14) palignr $14, %xmm3, %xmm2 movaps %xmm2, 16(%rdx) lea 16(%rsi), %rsi sub $16, %r8 jbe L(StrncpyExit14) movaps %xmm4, 32(%rdx) lea 16(%rsi), %rsi sub $16, %r8 jbe L(StrncpyExit14) movaps %xmm5, 48(%rdx) lea 16(%rsi), %rsi lea -16(%r8), %r8 L(StrncpyExit14): lea 2(%rdx, %rsi), %rdx lea 2(%rcx, %rsi), %rcx movw -2(%rcx), %ax xor %rsi, %rsi movw %ax, -2(%rdx) jmp L(CopyFrom1To16BytesCase3) .p2align 4 L(StrncpyLeave15): movaps %xmm2, %xmm3 add $48, %r8 jle L(StrncpyExit15) palignr $15, %xmm1, %xmm2 movaps %xmm2, (%rdx) movaps 17(%rcx), %xmm2 lea 16(%rsi), %rsi sub $16, %r8 jbe L(StrncpyExit15) palignr $15, %xmm3, %xmm2 movaps %xmm2, 16(%rdx) lea 16(%rsi), %rsi sub $16, %r8 jbe L(StrncpyExit15) movaps %xmm4, 32(%rdx) lea 16(%rsi), %rsi sub $16, %r8 jbe L(StrncpyExit15) movaps %xmm5, 48(%rdx) lea 16(%rsi), %rsi lea -16(%r8), %r8 L(StrncpyExit15): lea 1(%rdx, %rsi), %rdx lea 1(%rcx, %rsi), %rcx movb -1(%rcx), %ah xor %rsi, %rsi movb %ah, -1(%rdx) jmp L(CopyFrom1To16BytesCase3) # endif # ifndef USE_AS_STRCAT END (STRCPY) # endif #endif