summaryrefslogtreecommitdiff
path: root/sysdeps/x86_64/multiarch/memcpy-ssse3.S
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/x86_64/multiarch/memcpy-ssse3.S')
-rw-r--r--sysdeps/x86_64/multiarch/memcpy-ssse3.S147
1 files changed, 80 insertions, 67 deletions
diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3.S b/sysdeps/x86_64/multiarch/memcpy-ssse3.S
index 95de9695f9..0240bfa309 100644
--- a/sysdeps/x86_64/multiarch/memcpy-ssse3.S
+++ b/sysdeps/x86_64/multiarch/memcpy-ssse3.S
@@ -1,5 +1,5 @@
/* memcpy with SSSE3
- Copyright (C) 2010-2016 Free Software Foundation, Inc.
+ Copyright (C) 2010-2018 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
@@ -19,16 +19,15 @@
#include <sysdep.h>
-#if IS_IN (libc) \
- && (defined SHARED \
- || defined USE_AS_MEMMOVE \
- || !defined USE_MULTIARCH)
+#if IS_IN (libc)
#include "asm-syntax.h"
#ifndef MEMCPY
# define MEMCPY __memcpy_ssse3
# define MEMCPY_CHK __memcpy_chk_ssse3
+# define MEMPCPY __mempcpy_ssse3
+# define MEMPCPY_CHK __mempcpy_chk_ssse3
#endif
#define JMPTBL(I, B) I - B
@@ -40,10 +39,23 @@
lea TABLE(%rip), %r11; \
movslq (%r11, INDEX, SCALE), INDEX; \
lea (%r11, INDEX), INDEX; \
- jmp *INDEX; \
+ _CET_NOTRACK jmp *INDEX; \
ud2
.section .text.ssse3,"ax",@progbits
+#if !defined USE_AS_MEMPCPY && !defined USE_AS_MEMMOVE
+ENTRY (MEMPCPY_CHK)
+ cmpq %rdx, %rcx
+ jb HIDDEN_JUMPTARGET (__chk_fail)
+END (MEMPCPY_CHK)
+
+ENTRY (MEMPCPY)
+ movq %rdi, %rax
+ addq %rdx, %rax
+ jmp L(start)
+END (MEMPCPY)
+#endif
+
#if !defined USE_AS_BCOPY
ENTRY (MEMCPY_CHK)
cmpq %rdx, %rcx
@@ -66,6 +78,7 @@ ENTRY (MEMCPY)
jmp L(copy_backward)
L(copy_forward):
#endif
+L(start):
cmp $79, %rdx
lea L(table_less_80bytes)(%rip), %r11
ja L(80bytesormore)
@@ -73,7 +86,7 @@ L(copy_forward):
add %rdx, %rsi
add %rdx, %rdi
add %r11, %r9
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
.p2align 4
@@ -428,7 +441,7 @@ L(shl_1):
lea (L(shl_1_loop_L2)-L(shl_1_loop_L1))(%r9), %r9
L(L1_fwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_1_loop_L2):
prefetchnta 0x1c0(%rsi)
@@ -451,7 +464,7 @@ L(shl_1_loop_L1):
jb L(shl_1_end)
movaps %xmm4, -0x20(%rdi)
movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_1_end):
movaps %xmm4, -0x20(%rdi)
@@ -471,7 +484,7 @@ L(shl_1_bwd):
lea (L(shl_1_bwd_loop_L2)-L(shl_1_bwd_loop_L1))(%r9), %r9
L(L1_bwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_1_bwd_loop_L2):
prefetchnta -0x1c0(%rsi)
@@ -496,7 +509,7 @@ L(shl_1_bwd_loop_L1):
movaps %xmm3, 0x10(%rdi)
jb L(shl_1_bwd_end)
movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_1_bwd_end):
movaps %xmm4, (%rdi)
@@ -513,7 +526,7 @@ L(shl_2):
lea (L(shl_2_loop_L2)-L(shl_2_loop_L1))(%r9), %r9
L(L2_fwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_2_loop_L2):
prefetchnta 0x1c0(%rsi)
@@ -536,7 +549,7 @@ L(shl_2_loop_L1):
jb L(shl_2_end)
movaps %xmm4, -0x20(%rdi)
movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_2_end):
movaps %xmm4, -0x20(%rdi)
@@ -556,7 +569,7 @@ L(shl_2_bwd):
lea (L(shl_2_bwd_loop_L2)-L(shl_2_bwd_loop_L1))(%r9), %r9
L(L2_bwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_2_bwd_loop_L2):
prefetchnta -0x1c0(%rsi)
@@ -581,7 +594,7 @@ L(shl_2_bwd_loop_L1):
movaps %xmm3, 0x10(%rdi)
jb L(shl_2_bwd_end)
movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_2_bwd_end):
movaps %xmm4, (%rdi)
@@ -598,7 +611,7 @@ L(shl_3):
lea (L(shl_3_loop_L2)-L(shl_3_loop_L1))(%r9), %r9
L(L3_fwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_3_loop_L2):
prefetchnta 0x1c0(%rsi)
@@ -621,7 +634,7 @@ L(shl_3_loop_L1):
jb L(shl_3_end)
movaps %xmm4, -0x20(%rdi)
movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_3_end):
movaps %xmm4, -0x20(%rdi)
@@ -641,7 +654,7 @@ L(shl_3_bwd):
lea (L(shl_3_bwd_loop_L2)-L(shl_3_bwd_loop_L1))(%r9), %r9
L(L3_bwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_3_bwd_loop_L2):
prefetchnta -0x1c0(%rsi)
@@ -666,7 +679,7 @@ L(shl_3_bwd_loop_L1):
movaps %xmm3, 0x10(%rdi)
jb L(shl_3_bwd_end)
movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_3_bwd_end):
movaps %xmm4, (%rdi)
@@ -683,7 +696,7 @@ L(shl_4):
lea (L(shl_4_loop_L2)-L(shl_4_loop_L1))(%r9), %r9
L(L4_fwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_4_loop_L2):
prefetchnta 0x1c0(%rsi)
@@ -706,7 +719,7 @@ L(shl_4_loop_L1):
jb L(shl_4_end)
movaps %xmm4, -0x20(%rdi)
movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_4_end):
movaps %xmm4, -0x20(%rdi)
@@ -726,7 +739,7 @@ L(shl_4_bwd):
lea (L(shl_4_bwd_loop_L2)-L(shl_4_bwd_loop_L1))(%r9), %r9
L(L4_bwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_4_bwd_loop_L2):
prefetchnta -0x1c0(%rsi)
@@ -751,7 +764,7 @@ L(shl_4_bwd_loop_L1):
movaps %xmm3, 0x10(%rdi)
jb L(shl_4_bwd_end)
movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_4_bwd_end):
movaps %xmm4, (%rdi)
@@ -768,7 +781,7 @@ L(shl_5):
lea (L(shl_5_loop_L2)-L(shl_5_loop_L1))(%r9), %r9
L(L5_fwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_5_loop_L2):
prefetchnta 0x1c0(%rsi)
@@ -791,7 +804,7 @@ L(shl_5_loop_L1):
jb L(shl_5_end)
movaps %xmm4, -0x20(%rdi)
movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_5_end):
movaps %xmm4, -0x20(%rdi)
@@ -811,7 +824,7 @@ L(shl_5_bwd):
lea (L(shl_5_bwd_loop_L2)-L(shl_5_bwd_loop_L1))(%r9), %r9
L(L5_bwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_5_bwd_loop_L2):
prefetchnta -0x1c0(%rsi)
@@ -836,7 +849,7 @@ L(shl_5_bwd_loop_L1):
movaps %xmm3, 0x10(%rdi)
jb L(shl_5_bwd_end)
movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_5_bwd_end):
movaps %xmm4, (%rdi)
@@ -853,7 +866,7 @@ L(shl_6):
lea (L(shl_6_loop_L2)-L(shl_6_loop_L1))(%r9), %r9
L(L6_fwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_6_loop_L2):
prefetchnta 0x1c0(%rsi)
@@ -876,7 +889,7 @@ L(shl_6_loop_L1):
jb L(shl_6_end)
movaps %xmm4, -0x20(%rdi)
movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_6_end):
movaps %xmm4, -0x20(%rdi)
@@ -896,7 +909,7 @@ L(shl_6_bwd):
lea (L(shl_6_bwd_loop_L2)-L(shl_6_bwd_loop_L1))(%r9), %r9
L(L6_bwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_6_bwd_loop_L2):
prefetchnta -0x1c0(%rsi)
@@ -921,7 +934,7 @@ L(shl_6_bwd_loop_L1):
movaps %xmm3, 0x10(%rdi)
jb L(shl_6_bwd_end)
movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_6_bwd_end):
movaps %xmm4, (%rdi)
@@ -938,7 +951,7 @@ L(shl_7):
lea (L(shl_7_loop_L2)-L(shl_7_loop_L1))(%r9), %r9
L(L7_fwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_7_loop_L2):
prefetchnta 0x1c0(%rsi)
@@ -961,7 +974,7 @@ L(shl_7_loop_L1):
jb L(shl_7_end)
movaps %xmm4, -0x20(%rdi)
movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_7_end):
movaps %xmm4, -0x20(%rdi)
@@ -981,7 +994,7 @@ L(shl_7_bwd):
lea (L(shl_7_bwd_loop_L2)-L(shl_7_bwd_loop_L1))(%r9), %r9
L(L7_bwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_7_bwd_loop_L2):
prefetchnta -0x1c0(%rsi)
@@ -1006,7 +1019,7 @@ L(shl_7_bwd_loop_L1):
movaps %xmm3, 0x10(%rdi)
jb L(shl_7_bwd_end)
movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_7_bwd_end):
movaps %xmm4, (%rdi)
@@ -1023,7 +1036,7 @@ L(shl_8):
lea (L(shl_8_loop_L2)-L(shl_8_loop_L1))(%r9), %r9
L(L8_fwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
L(shl_8_loop_L2):
prefetchnta 0x1c0(%rsi)
L(shl_8_loop_L1):
@@ -1045,7 +1058,7 @@ L(shl_8_loop_L1):
jb L(shl_8_end)
movaps %xmm4, -0x20(%rdi)
movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
.p2align 4
L(shl_8_end):
@@ -1066,7 +1079,7 @@ L(shl_8_bwd):
lea (L(shl_8_bwd_loop_L2)-L(shl_8_bwd_loop_L1))(%r9), %r9
L(L8_bwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_8_bwd_loop_L2):
prefetchnta -0x1c0(%rsi)
@@ -1091,7 +1104,7 @@ L(shl_8_bwd_loop_L1):
movaps %xmm3, 0x10(%rdi)
jb L(shl_8_bwd_end)
movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_8_bwd_end):
movaps %xmm4, (%rdi)
@@ -1108,7 +1121,7 @@ L(shl_9):
lea (L(shl_9_loop_L2)-L(shl_9_loop_L1))(%r9), %r9
L(L9_fwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_9_loop_L2):
prefetchnta 0x1c0(%rsi)
@@ -1131,7 +1144,7 @@ L(shl_9_loop_L1):
jb L(shl_9_end)
movaps %xmm4, -0x20(%rdi)
movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_9_end):
movaps %xmm4, -0x20(%rdi)
@@ -1151,7 +1164,7 @@ L(shl_9_bwd):
lea (L(shl_9_bwd_loop_L2)-L(shl_9_bwd_loop_L1))(%r9), %r9
L(L9_bwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_9_bwd_loop_L2):
prefetchnta -0x1c0(%rsi)
@@ -1176,7 +1189,7 @@ L(shl_9_bwd_loop_L1):
movaps %xmm3, 0x10(%rdi)
jb L(shl_9_bwd_end)
movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_9_bwd_end):
movaps %xmm4, (%rdi)
@@ -1193,7 +1206,7 @@ L(shl_10):
lea (L(shl_10_loop_L2)-L(shl_10_loop_L1))(%r9), %r9
L(L10_fwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_10_loop_L2):
prefetchnta 0x1c0(%rsi)
@@ -1216,7 +1229,7 @@ L(shl_10_loop_L1):
jb L(shl_10_end)
movaps %xmm4, -0x20(%rdi)
movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_10_end):
movaps %xmm4, -0x20(%rdi)
@@ -1236,7 +1249,7 @@ L(shl_10_bwd):
lea (L(shl_10_bwd_loop_L2)-L(shl_10_bwd_loop_L1))(%r9), %r9
L(L10_bwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_10_bwd_loop_L2):
prefetchnta -0x1c0(%rsi)
@@ -1261,7 +1274,7 @@ L(shl_10_bwd_loop_L1):
movaps %xmm3, 0x10(%rdi)
jb L(shl_10_bwd_end)
movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_10_bwd_end):
movaps %xmm4, (%rdi)
@@ -1278,7 +1291,7 @@ L(shl_11):
lea (L(shl_11_loop_L2)-L(shl_11_loop_L1))(%r9), %r9
L(L11_fwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_11_loop_L2):
prefetchnta 0x1c0(%rsi)
@@ -1301,7 +1314,7 @@ L(shl_11_loop_L1):
jb L(shl_11_end)
movaps %xmm4, -0x20(%rdi)
movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_11_end):
movaps %xmm4, -0x20(%rdi)
@@ -1321,7 +1334,7 @@ L(shl_11_bwd):
lea (L(shl_11_bwd_loop_L2)-L(shl_11_bwd_loop_L1))(%r9), %r9
L(L11_bwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_11_bwd_loop_L2):
prefetchnta -0x1c0(%rsi)
@@ -1346,7 +1359,7 @@ L(shl_11_bwd_loop_L1):
movaps %xmm3, 0x10(%rdi)
jb L(shl_11_bwd_end)
movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_11_bwd_end):
movaps %xmm4, (%rdi)
@@ -1363,7 +1376,7 @@ L(shl_12):
lea (L(shl_12_loop_L2)-L(shl_12_loop_L1))(%r9), %r9
L(L12_fwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_12_loop_L2):
prefetchnta 0x1c0(%rsi)
@@ -1386,7 +1399,7 @@ L(shl_12_loop_L1):
jb L(shl_12_end)
movaps %xmm4, -0x20(%rdi)
movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_12_end):
movaps %xmm4, -0x20(%rdi)
@@ -1406,7 +1419,7 @@ L(shl_12_bwd):
lea (L(shl_12_bwd_loop_L2)-L(shl_12_bwd_loop_L1))(%r9), %r9
L(L12_bwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_12_bwd_loop_L2):
prefetchnta -0x1c0(%rsi)
@@ -1431,7 +1444,7 @@ L(shl_12_bwd_loop_L1):
movaps %xmm3, 0x10(%rdi)
jb L(shl_12_bwd_end)
movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_12_bwd_end):
movaps %xmm4, (%rdi)
@@ -1448,7 +1461,7 @@ L(shl_13):
lea (L(shl_13_loop_L2)-L(shl_13_loop_L1))(%r9), %r9
L(L13_fwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_13_loop_L2):
prefetchnta 0x1c0(%rsi)
@@ -1471,7 +1484,7 @@ L(shl_13_loop_L1):
jb L(shl_13_end)
movaps %xmm4, -0x20(%rdi)
movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_13_end):
movaps %xmm4, -0x20(%rdi)
@@ -1491,7 +1504,7 @@ L(shl_13_bwd):
lea (L(shl_13_bwd_loop_L2)-L(shl_13_bwd_loop_L1))(%r9), %r9
L(L13_bwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_13_bwd_loop_L2):
prefetchnta -0x1c0(%rsi)
@@ -1516,7 +1529,7 @@ L(shl_13_bwd_loop_L1):
movaps %xmm3, 0x10(%rdi)
jb L(shl_13_bwd_end)
movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_13_bwd_end):
movaps %xmm4, (%rdi)
@@ -1533,7 +1546,7 @@ L(shl_14):
lea (L(shl_14_loop_L2)-L(shl_14_loop_L1))(%r9), %r9
L(L14_fwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_14_loop_L2):
prefetchnta 0x1c0(%rsi)
@@ -1556,7 +1569,7 @@ L(shl_14_loop_L1):
jb L(shl_14_end)
movaps %xmm4, -0x20(%rdi)
movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_14_end):
movaps %xmm4, -0x20(%rdi)
@@ -1576,7 +1589,7 @@ L(shl_14_bwd):
lea (L(shl_14_bwd_loop_L2)-L(shl_14_bwd_loop_L1))(%r9), %r9
L(L14_bwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_14_bwd_loop_L2):
prefetchnta -0x1c0(%rsi)
@@ -1601,7 +1614,7 @@ L(shl_14_bwd_loop_L1):
movaps %xmm3, 0x10(%rdi)
jb L(shl_14_bwd_end)
movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_14_bwd_end):
movaps %xmm4, (%rdi)
@@ -1618,7 +1631,7 @@ L(shl_15):
lea (L(shl_15_loop_L2)-L(shl_15_loop_L1))(%r9), %r9
L(L15_fwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_15_loop_L2):
prefetchnta 0x1c0(%rsi)
@@ -1641,7 +1654,7 @@ L(shl_15_loop_L1):
jb L(shl_15_end)
movaps %xmm4, -0x20(%rdi)
movaps %xmm5, -0x10(%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_15_end):
movaps %xmm4, -0x20(%rdi)
@@ -1661,7 +1674,7 @@ L(shl_15_bwd):
lea (L(shl_15_bwd_loop_L2)-L(shl_15_bwd_loop_L1))(%r9), %r9
L(L15_bwd):
lea -64(%rdx), %rdx
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_15_bwd_loop_L2):
prefetchnta -0x1c0(%rsi)
@@ -1686,7 +1699,7 @@ L(shl_15_bwd_loop_L1):
movaps %xmm3, 0x10(%rdi)
jb L(shl_15_bwd_end)
movaps %xmm4, (%rdi)
- jmp *%r9
+ _CET_NOTRACK jmp *%r9
ud2
L(shl_15_bwd_end):
movaps %xmm4, (%rdi)