diff options
Diffstat (limited to 'sysdeps/x86_64/multiarch/memcpy-ssse3.S')
-rw-r--r-- | sysdeps/x86_64/multiarch/memcpy-ssse3.S | 147 |
1 files changed, 80 insertions, 67 deletions
diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3.S b/sysdeps/x86_64/multiarch/memcpy-ssse3.S index 95de9695f9..0240bfa309 100644 --- a/sysdeps/x86_64/multiarch/memcpy-ssse3.S +++ b/sysdeps/x86_64/multiarch/memcpy-ssse3.S @@ -1,5 +1,5 @@ /* memcpy with SSSE3 - Copyright (C) 2010-2016 Free Software Foundation, Inc. + Copyright (C) 2010-2018 Free Software Foundation, Inc. Contributed by Intel Corporation. This file is part of the GNU C Library. @@ -19,16 +19,15 @@ #include <sysdep.h> -#if IS_IN (libc) \ - && (defined SHARED \ - || defined USE_AS_MEMMOVE \ - || !defined USE_MULTIARCH) +#if IS_IN (libc) #include "asm-syntax.h" #ifndef MEMCPY # define MEMCPY __memcpy_ssse3 # define MEMCPY_CHK __memcpy_chk_ssse3 +# define MEMPCPY __mempcpy_ssse3 +# define MEMPCPY_CHK __mempcpy_chk_ssse3 #endif #define JMPTBL(I, B) I - B @@ -40,10 +39,23 @@ lea TABLE(%rip), %r11; \ movslq (%r11, INDEX, SCALE), INDEX; \ lea (%r11, INDEX), INDEX; \ - jmp *INDEX; \ + _CET_NOTRACK jmp *INDEX; \ ud2 .section .text.ssse3,"ax",@progbits +#if !defined USE_AS_MEMPCPY && !defined USE_AS_MEMMOVE +ENTRY (MEMPCPY_CHK) + cmpq %rdx, %rcx + jb HIDDEN_JUMPTARGET (__chk_fail) +END (MEMPCPY_CHK) + +ENTRY (MEMPCPY) + movq %rdi, %rax + addq %rdx, %rax + jmp L(start) +END (MEMPCPY) +#endif + #if !defined USE_AS_BCOPY ENTRY (MEMCPY_CHK) cmpq %rdx, %rcx @@ -66,6 +78,7 @@ ENTRY (MEMCPY) jmp L(copy_backward) L(copy_forward): #endif +L(start): cmp $79, %rdx lea L(table_less_80bytes)(%rip), %r11 ja L(80bytesormore) @@ -73,7 +86,7 @@ L(copy_forward): add %rdx, %rsi add %rdx, %rdi add %r11, %r9 - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 .p2align 4 @@ -428,7 +441,7 @@ L(shl_1): lea (L(shl_1_loop_L2)-L(shl_1_loop_L1))(%r9), %r9 L(L1_fwd): lea -64(%rdx), %rdx - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_1_loop_L2): prefetchnta 0x1c0(%rsi) @@ -451,7 +464,7 @@ L(shl_1_loop_L1): jb L(shl_1_end) movaps %xmm4, -0x20(%rdi) movaps %xmm5, -0x10(%rdi) - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_1_end): movaps %xmm4, -0x20(%rdi) @@ -471,7 +484,7 @@ L(shl_1_bwd): lea (L(shl_1_bwd_loop_L2)-L(shl_1_bwd_loop_L1))(%r9), %r9 L(L1_bwd): lea -64(%rdx), %rdx - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_1_bwd_loop_L2): prefetchnta -0x1c0(%rsi) @@ -496,7 +509,7 @@ L(shl_1_bwd_loop_L1): movaps %xmm3, 0x10(%rdi) jb L(shl_1_bwd_end) movaps %xmm4, (%rdi) - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_1_bwd_end): movaps %xmm4, (%rdi) @@ -513,7 +526,7 @@ L(shl_2): lea (L(shl_2_loop_L2)-L(shl_2_loop_L1))(%r9), %r9 L(L2_fwd): lea -64(%rdx), %rdx - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_2_loop_L2): prefetchnta 0x1c0(%rsi) @@ -536,7 +549,7 @@ L(shl_2_loop_L1): jb L(shl_2_end) movaps %xmm4, -0x20(%rdi) movaps %xmm5, -0x10(%rdi) - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_2_end): movaps %xmm4, -0x20(%rdi) @@ -556,7 +569,7 @@ L(shl_2_bwd): lea (L(shl_2_bwd_loop_L2)-L(shl_2_bwd_loop_L1))(%r9), %r9 L(L2_bwd): lea -64(%rdx), %rdx - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_2_bwd_loop_L2): prefetchnta -0x1c0(%rsi) @@ -581,7 +594,7 @@ L(shl_2_bwd_loop_L1): movaps %xmm3, 0x10(%rdi) jb L(shl_2_bwd_end) movaps %xmm4, (%rdi) - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_2_bwd_end): movaps %xmm4, (%rdi) @@ -598,7 +611,7 @@ L(shl_3): lea (L(shl_3_loop_L2)-L(shl_3_loop_L1))(%r9), %r9 L(L3_fwd): lea -64(%rdx), %rdx - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_3_loop_L2): prefetchnta 0x1c0(%rsi) @@ -621,7 +634,7 @@ L(shl_3_loop_L1): jb L(shl_3_end) movaps %xmm4, -0x20(%rdi) movaps %xmm5, -0x10(%rdi) - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_3_end): movaps %xmm4, -0x20(%rdi) @@ -641,7 +654,7 @@ L(shl_3_bwd): lea (L(shl_3_bwd_loop_L2)-L(shl_3_bwd_loop_L1))(%r9), %r9 L(L3_bwd): lea -64(%rdx), %rdx - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_3_bwd_loop_L2): prefetchnta -0x1c0(%rsi) @@ -666,7 +679,7 @@ L(shl_3_bwd_loop_L1): movaps %xmm3, 0x10(%rdi) jb L(shl_3_bwd_end) movaps %xmm4, (%rdi) - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_3_bwd_end): movaps %xmm4, (%rdi) @@ -683,7 +696,7 @@ L(shl_4): lea (L(shl_4_loop_L2)-L(shl_4_loop_L1))(%r9), %r9 L(L4_fwd): lea -64(%rdx), %rdx - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_4_loop_L2): prefetchnta 0x1c0(%rsi) @@ -706,7 +719,7 @@ L(shl_4_loop_L1): jb L(shl_4_end) movaps %xmm4, -0x20(%rdi) movaps %xmm5, -0x10(%rdi) - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_4_end): movaps %xmm4, -0x20(%rdi) @@ -726,7 +739,7 @@ L(shl_4_bwd): lea (L(shl_4_bwd_loop_L2)-L(shl_4_bwd_loop_L1))(%r9), %r9 L(L4_bwd): lea -64(%rdx), %rdx - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_4_bwd_loop_L2): prefetchnta -0x1c0(%rsi) @@ -751,7 +764,7 @@ L(shl_4_bwd_loop_L1): movaps %xmm3, 0x10(%rdi) jb L(shl_4_bwd_end) movaps %xmm4, (%rdi) - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_4_bwd_end): movaps %xmm4, (%rdi) @@ -768,7 +781,7 @@ L(shl_5): lea (L(shl_5_loop_L2)-L(shl_5_loop_L1))(%r9), %r9 L(L5_fwd): lea -64(%rdx), %rdx - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_5_loop_L2): prefetchnta 0x1c0(%rsi) @@ -791,7 +804,7 @@ L(shl_5_loop_L1): jb L(shl_5_end) movaps %xmm4, -0x20(%rdi) movaps %xmm5, -0x10(%rdi) - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_5_end): movaps %xmm4, -0x20(%rdi) @@ -811,7 +824,7 @@ L(shl_5_bwd): lea (L(shl_5_bwd_loop_L2)-L(shl_5_bwd_loop_L1))(%r9), %r9 L(L5_bwd): lea -64(%rdx), %rdx - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_5_bwd_loop_L2): prefetchnta -0x1c0(%rsi) @@ -836,7 +849,7 @@ L(shl_5_bwd_loop_L1): movaps %xmm3, 0x10(%rdi) jb L(shl_5_bwd_end) movaps %xmm4, (%rdi) - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_5_bwd_end): movaps %xmm4, (%rdi) @@ -853,7 +866,7 @@ L(shl_6): lea (L(shl_6_loop_L2)-L(shl_6_loop_L1))(%r9), %r9 L(L6_fwd): lea -64(%rdx), %rdx - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_6_loop_L2): prefetchnta 0x1c0(%rsi) @@ -876,7 +889,7 @@ L(shl_6_loop_L1): jb L(shl_6_end) movaps %xmm4, -0x20(%rdi) movaps %xmm5, -0x10(%rdi) - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_6_end): movaps %xmm4, -0x20(%rdi) @@ -896,7 +909,7 @@ L(shl_6_bwd): lea (L(shl_6_bwd_loop_L2)-L(shl_6_bwd_loop_L1))(%r9), %r9 L(L6_bwd): lea -64(%rdx), %rdx - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_6_bwd_loop_L2): prefetchnta -0x1c0(%rsi) @@ -921,7 +934,7 @@ L(shl_6_bwd_loop_L1): movaps %xmm3, 0x10(%rdi) jb L(shl_6_bwd_end) movaps %xmm4, (%rdi) - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_6_bwd_end): movaps %xmm4, (%rdi) @@ -938,7 +951,7 @@ L(shl_7): lea (L(shl_7_loop_L2)-L(shl_7_loop_L1))(%r9), %r9 L(L7_fwd): lea -64(%rdx), %rdx - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_7_loop_L2): prefetchnta 0x1c0(%rsi) @@ -961,7 +974,7 @@ L(shl_7_loop_L1): jb L(shl_7_end) movaps %xmm4, -0x20(%rdi) movaps %xmm5, -0x10(%rdi) - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_7_end): movaps %xmm4, -0x20(%rdi) @@ -981,7 +994,7 @@ L(shl_7_bwd): lea (L(shl_7_bwd_loop_L2)-L(shl_7_bwd_loop_L1))(%r9), %r9 L(L7_bwd): lea -64(%rdx), %rdx - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_7_bwd_loop_L2): prefetchnta -0x1c0(%rsi) @@ -1006,7 +1019,7 @@ L(shl_7_bwd_loop_L1): movaps %xmm3, 0x10(%rdi) jb L(shl_7_bwd_end) movaps %xmm4, (%rdi) - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_7_bwd_end): movaps %xmm4, (%rdi) @@ -1023,7 +1036,7 @@ L(shl_8): lea (L(shl_8_loop_L2)-L(shl_8_loop_L1))(%r9), %r9 L(L8_fwd): lea -64(%rdx), %rdx - jmp *%r9 + _CET_NOTRACK jmp *%r9 L(shl_8_loop_L2): prefetchnta 0x1c0(%rsi) L(shl_8_loop_L1): @@ -1045,7 +1058,7 @@ L(shl_8_loop_L1): jb L(shl_8_end) movaps %xmm4, -0x20(%rdi) movaps %xmm5, -0x10(%rdi) - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 .p2align 4 L(shl_8_end): @@ -1066,7 +1079,7 @@ L(shl_8_bwd): lea (L(shl_8_bwd_loop_L2)-L(shl_8_bwd_loop_L1))(%r9), %r9 L(L8_bwd): lea -64(%rdx), %rdx - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_8_bwd_loop_L2): prefetchnta -0x1c0(%rsi) @@ -1091,7 +1104,7 @@ L(shl_8_bwd_loop_L1): movaps %xmm3, 0x10(%rdi) jb L(shl_8_bwd_end) movaps %xmm4, (%rdi) - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_8_bwd_end): movaps %xmm4, (%rdi) @@ -1108,7 +1121,7 @@ L(shl_9): lea (L(shl_9_loop_L2)-L(shl_9_loop_L1))(%r9), %r9 L(L9_fwd): lea -64(%rdx), %rdx - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_9_loop_L2): prefetchnta 0x1c0(%rsi) @@ -1131,7 +1144,7 @@ L(shl_9_loop_L1): jb L(shl_9_end) movaps %xmm4, -0x20(%rdi) movaps %xmm5, -0x10(%rdi) - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_9_end): movaps %xmm4, -0x20(%rdi) @@ -1151,7 +1164,7 @@ L(shl_9_bwd): lea (L(shl_9_bwd_loop_L2)-L(shl_9_bwd_loop_L1))(%r9), %r9 L(L9_bwd): lea -64(%rdx), %rdx - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_9_bwd_loop_L2): prefetchnta -0x1c0(%rsi) @@ -1176,7 +1189,7 @@ L(shl_9_bwd_loop_L1): movaps %xmm3, 0x10(%rdi) jb L(shl_9_bwd_end) movaps %xmm4, (%rdi) - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_9_bwd_end): movaps %xmm4, (%rdi) @@ -1193,7 +1206,7 @@ L(shl_10): lea (L(shl_10_loop_L2)-L(shl_10_loop_L1))(%r9), %r9 L(L10_fwd): lea -64(%rdx), %rdx - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_10_loop_L2): prefetchnta 0x1c0(%rsi) @@ -1216,7 +1229,7 @@ L(shl_10_loop_L1): jb L(shl_10_end) movaps %xmm4, -0x20(%rdi) movaps %xmm5, -0x10(%rdi) - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_10_end): movaps %xmm4, -0x20(%rdi) @@ -1236,7 +1249,7 @@ L(shl_10_bwd): lea (L(shl_10_bwd_loop_L2)-L(shl_10_bwd_loop_L1))(%r9), %r9 L(L10_bwd): lea -64(%rdx), %rdx - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_10_bwd_loop_L2): prefetchnta -0x1c0(%rsi) @@ -1261,7 +1274,7 @@ L(shl_10_bwd_loop_L1): movaps %xmm3, 0x10(%rdi) jb L(shl_10_bwd_end) movaps %xmm4, (%rdi) - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_10_bwd_end): movaps %xmm4, (%rdi) @@ -1278,7 +1291,7 @@ L(shl_11): lea (L(shl_11_loop_L2)-L(shl_11_loop_L1))(%r9), %r9 L(L11_fwd): lea -64(%rdx), %rdx - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_11_loop_L2): prefetchnta 0x1c0(%rsi) @@ -1301,7 +1314,7 @@ L(shl_11_loop_L1): jb L(shl_11_end) movaps %xmm4, -0x20(%rdi) movaps %xmm5, -0x10(%rdi) - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_11_end): movaps %xmm4, -0x20(%rdi) @@ -1321,7 +1334,7 @@ L(shl_11_bwd): lea (L(shl_11_bwd_loop_L2)-L(shl_11_bwd_loop_L1))(%r9), %r9 L(L11_bwd): lea -64(%rdx), %rdx - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_11_bwd_loop_L2): prefetchnta -0x1c0(%rsi) @@ -1346,7 +1359,7 @@ L(shl_11_bwd_loop_L1): movaps %xmm3, 0x10(%rdi) jb L(shl_11_bwd_end) movaps %xmm4, (%rdi) - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_11_bwd_end): movaps %xmm4, (%rdi) @@ -1363,7 +1376,7 @@ L(shl_12): lea (L(shl_12_loop_L2)-L(shl_12_loop_L1))(%r9), %r9 L(L12_fwd): lea -64(%rdx), %rdx - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_12_loop_L2): prefetchnta 0x1c0(%rsi) @@ -1386,7 +1399,7 @@ L(shl_12_loop_L1): jb L(shl_12_end) movaps %xmm4, -0x20(%rdi) movaps %xmm5, -0x10(%rdi) - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_12_end): movaps %xmm4, -0x20(%rdi) @@ -1406,7 +1419,7 @@ L(shl_12_bwd): lea (L(shl_12_bwd_loop_L2)-L(shl_12_bwd_loop_L1))(%r9), %r9 L(L12_bwd): lea -64(%rdx), %rdx - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_12_bwd_loop_L2): prefetchnta -0x1c0(%rsi) @@ -1431,7 +1444,7 @@ L(shl_12_bwd_loop_L1): movaps %xmm3, 0x10(%rdi) jb L(shl_12_bwd_end) movaps %xmm4, (%rdi) - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_12_bwd_end): movaps %xmm4, (%rdi) @@ -1448,7 +1461,7 @@ L(shl_13): lea (L(shl_13_loop_L2)-L(shl_13_loop_L1))(%r9), %r9 L(L13_fwd): lea -64(%rdx), %rdx - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_13_loop_L2): prefetchnta 0x1c0(%rsi) @@ -1471,7 +1484,7 @@ L(shl_13_loop_L1): jb L(shl_13_end) movaps %xmm4, -0x20(%rdi) movaps %xmm5, -0x10(%rdi) - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_13_end): movaps %xmm4, -0x20(%rdi) @@ -1491,7 +1504,7 @@ L(shl_13_bwd): lea (L(shl_13_bwd_loop_L2)-L(shl_13_bwd_loop_L1))(%r9), %r9 L(L13_bwd): lea -64(%rdx), %rdx - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_13_bwd_loop_L2): prefetchnta -0x1c0(%rsi) @@ -1516,7 +1529,7 @@ L(shl_13_bwd_loop_L1): movaps %xmm3, 0x10(%rdi) jb L(shl_13_bwd_end) movaps %xmm4, (%rdi) - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_13_bwd_end): movaps %xmm4, (%rdi) @@ -1533,7 +1546,7 @@ L(shl_14): lea (L(shl_14_loop_L2)-L(shl_14_loop_L1))(%r9), %r9 L(L14_fwd): lea -64(%rdx), %rdx - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_14_loop_L2): prefetchnta 0x1c0(%rsi) @@ -1556,7 +1569,7 @@ L(shl_14_loop_L1): jb L(shl_14_end) movaps %xmm4, -0x20(%rdi) movaps %xmm5, -0x10(%rdi) - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_14_end): movaps %xmm4, -0x20(%rdi) @@ -1576,7 +1589,7 @@ L(shl_14_bwd): lea (L(shl_14_bwd_loop_L2)-L(shl_14_bwd_loop_L1))(%r9), %r9 L(L14_bwd): lea -64(%rdx), %rdx - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_14_bwd_loop_L2): prefetchnta -0x1c0(%rsi) @@ -1601,7 +1614,7 @@ L(shl_14_bwd_loop_L1): movaps %xmm3, 0x10(%rdi) jb L(shl_14_bwd_end) movaps %xmm4, (%rdi) - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_14_bwd_end): movaps %xmm4, (%rdi) @@ -1618,7 +1631,7 @@ L(shl_15): lea (L(shl_15_loop_L2)-L(shl_15_loop_L1))(%r9), %r9 L(L15_fwd): lea -64(%rdx), %rdx - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_15_loop_L2): prefetchnta 0x1c0(%rsi) @@ -1641,7 +1654,7 @@ L(shl_15_loop_L1): jb L(shl_15_end) movaps %xmm4, -0x20(%rdi) movaps %xmm5, -0x10(%rdi) - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_15_end): movaps %xmm4, -0x20(%rdi) @@ -1661,7 +1674,7 @@ L(shl_15_bwd): lea (L(shl_15_bwd_loop_L2)-L(shl_15_bwd_loop_L1))(%r9), %r9 L(L15_bwd): lea -64(%rdx), %rdx - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_15_bwd_loop_L2): prefetchnta -0x1c0(%rsi) @@ -1686,7 +1699,7 @@ L(shl_15_bwd_loop_L1): movaps %xmm3, 0x10(%rdi) jb L(shl_15_bwd_end) movaps %xmm4, (%rdi) - jmp *%r9 + _CET_NOTRACK jmp *%r9 ud2 L(shl_15_bwd_end): movaps %xmm4, (%rdi) |