summaryrefslogtreecommitdiff
path: root/sysdeps/i386/i686
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/i386/i686')
-rw-r--r--sysdeps/i386/i686/Makefile16
-rw-r--r--sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S178
-rw-r--r--sysdeps/i386/i686/multiarch/memcpy-ssse3.S42
-rw-r--r--sysdeps/i386/i686/multiarch/memset-sse2-rep.S8
-rw-r--r--sysdeps/i386/i686/multiarch/memset-sse2.S7
-rw-r--r--sysdeps/i386/i686/multiarch/strcmp-sse4.S4
6 files changed, 172 insertions, 83 deletions
diff --git a/sysdeps/i386/i686/Makefile b/sysdeps/i386/i686/Makefile
index dbcf1c33d3..e6b2924584 100644
--- a/sysdeps/i386/i686/Makefile
+++ b/sysdeps/i386/i686/Makefile
@@ -9,3 +9,19 @@ stack-align-test-flags += -msse
ifeq ($(subdir),string)
sysdep_routines += cacheinfo
endif
+
+ifeq (yes,$(config-asflags-i686))
+CFLAGS-.o += -Wa,-mtune=i686
+CFLAGS-.os += -Wa,-mtune=i686
+CFLAGS-.op += -Wa,-mtune=i686
+CFLAGS-.og += -Wa,-mtune=i686
+CFLAGS-.ob += -Wa,-mtune=i686
+CFLAGS-.oS += -Wa,-mtune=i686
+
+ASFLAGS-.o += -Wa,-mtune=i686
+ASFLAGS-.os += -Wa,-mtune=i686
+ASFLAGS-.op += -Wa,-mtune=i686
+ASFLAGS-.og += -Wa,-mtune=i686
+ASFLAGS-.ob += -Wa,-mtune=i686
+ASFLAGS-.oS += -Wa,-mtune=i686
+endif
diff --git a/sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S b/sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S
index 0547b56d7c..48a109ccd6 100644
--- a/sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S
+++ b/sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S
@@ -127,10 +127,8 @@ ENTRY (MEMCPY)
cmp %eax, %edx
jb L(copy_forward)
je L(fwd_write_0bytes)
- cmp $32, %ecx
- jae L(memmove_bwd)
- jmp L(bk_write_less32bytes_2)
-L(memmove_bwd):
+ cmp $48, %ecx
+ jb L(bk_write_less48bytes)
add %ecx, %eax
cmp %eax, %edx
movl SRC(%esp), %eax
@@ -162,6 +160,7 @@ L(48bytesormore):
movl %edx, %edi
and $-16, %edx
PUSH (%esi)
+ cfi_remember_state
add $16, %edx
movl %edi, %esi
sub %edx, %edi
@@ -234,6 +233,8 @@ L(shl_0_end):
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
L(shl_0_gobble):
#ifdef DATA_CACHE_SIZE_HALF
@@ -252,7 +253,7 @@ L(shl_0_gobble):
sub %esi, %edi
cmp %edi, %ecx
jae L(shl_0_gobble_mem_start)
- lea -128(%ecx), %ecx
+ sub $128, %ecx
ALIGN (4)
L(shl_0_gobble_cache_loop):
movdqa (%eax), %xmm0
@@ -276,9 +277,9 @@ L(shl_0_gobble_cache_loop):
lea 0x80(%edx), %edx
jae L(shl_0_gobble_cache_loop)
- cmp $-0x40, %ecx
- lea 0x80(%ecx), %ecx
- jl L(shl_0_cache_less_64bytes)
+ add $0x80, %ecx
+ cmp $0x40, %ecx
+ jb L(shl_0_cache_less_64bytes)
movdqa (%eax), %xmm0
sub $0x40, %ecx
@@ -319,12 +320,13 @@ L(shl_0_cache_less_16bytes):
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
-
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_0_gobble_mem_start):
cmp %al, %dl
je L(copy_page_by_rep)
- lea -128(%ecx), %ecx
+ sub $128, %ecx
L(shl_0_gobble_mem_loop):
prefetchnta 0x1c0(%eax)
prefetchnta 0x280(%eax)
@@ -352,9 +354,9 @@ L(shl_0_gobble_mem_loop):
lea 0x80(%edx), %edx
jae L(shl_0_gobble_mem_loop)
- cmp $-0x40, %ecx
- lea 0x80(%ecx), %ecx
- jl L(shl_0_mem_less_64bytes)
+ add $0x80, %ecx
+ cmp $0x40, %ecx
+ jb L(shl_0_mem_less_64bytes)
movdqa (%eax), %xmm0
sub $0x40, %ecx
@@ -395,14 +397,15 @@ L(shl_0_mem_less_16bytes):
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
-
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_1):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
- lea -1(%eax), %eax
+ sub $1, %eax
movaps (%eax), %xmm1
xor %edi, %edi
- lea -32(%ecx), %ecx
+ sub $32, %ecx
movdqu %xmm0, (%esi)
POP (%esi)
L(shl_1_loop):
@@ -432,20 +435,22 @@ L(shl_1_loop):
jae L(shl_1_loop)
L(shl_1_end):
- lea 32(%ecx), %ecx
+ add $32, %ecx
add %ecx, %edi
add %edi, %edx
lea 1(%edi, %eax), %eax
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_2):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
- lea -2(%eax), %eax
+ sub $2, %eax
movaps (%eax), %xmm1
xor %edi, %edi
- lea -32(%ecx), %ecx
+ sub $32, %ecx
movdqu %xmm0, (%esi)
POP (%esi)
L(shl_2_loop):
@@ -475,20 +480,22 @@ L(shl_2_loop):
jae L(shl_2_loop)
L(shl_2_end):
- lea 32(%ecx), %ecx
+ add $32, %ecx
add %ecx, %edi
add %edi, %edx
lea 2(%edi, %eax), %eax
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_3):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
- lea -3(%eax), %eax
+ sub $3, %eax
movaps (%eax), %xmm1
xor %edi, %edi
- lea -32(%ecx), %ecx
+ sub $32, %ecx
movdqu %xmm0, (%esi)
POP (%esi)
L(shl_3_loop):
@@ -518,20 +525,22 @@ L(shl_3_loop):
jae L(shl_3_loop)
L(shl_3_end):
- lea 32(%ecx), %ecx
+ add $32, %ecx
add %ecx, %edi
add %edi, %edx
lea 3(%edi, %eax), %eax
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_4):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
- lea -4(%eax), %eax
+ sub $4, %eax
movaps (%eax), %xmm1
xor %edi, %edi
- lea -32(%ecx), %ecx
+ sub $32, %ecx
movdqu %xmm0, (%esi)
POP (%esi)
L(shl_4_loop):
@@ -561,20 +570,22 @@ L(shl_4_loop):
jae L(shl_4_loop)
L(shl_4_end):
- lea 32(%ecx), %ecx
+ add $32, %ecx
add %ecx, %edi
add %edi, %edx
lea 4(%edi, %eax), %eax
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_5):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
- lea -5(%eax), %eax
+ sub $5, %eax
movaps (%eax), %xmm1
xor %edi, %edi
- lea -32(%ecx), %ecx
+ sub $32, %ecx
movdqu %xmm0, (%esi)
POP (%esi)
L(shl_5_loop):
@@ -604,21 +615,22 @@ L(shl_5_loop):
jae L(shl_5_loop)
L(shl_5_end):
- lea 32(%ecx), %ecx
+ add $32, %ecx
add %ecx, %edi
add %edi, %edx
lea 5(%edi, %eax), %eax
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
-
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_6):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
- lea -6(%eax), %eax
+ sub $6, %eax
movaps (%eax), %xmm1
xor %edi, %edi
- lea -32(%ecx), %ecx
+ sub $32, %ecx
movdqu %xmm0, (%esi)
POP (%esi)
L(shl_6_loop):
@@ -648,20 +660,22 @@ L(shl_6_loop):
jae L(shl_6_loop)
L(shl_6_end):
- lea 32(%ecx), %ecx
+ add $32, %ecx
add %ecx, %edi
add %edi, %edx
lea 6(%edi, %eax), %eax
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_7):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
- lea -7(%eax), %eax
+ sub $7, %eax
movaps (%eax), %xmm1
xor %edi, %edi
- lea -32(%ecx), %ecx
+ sub $32, %ecx
movdqu %xmm0, (%esi)
POP (%esi)
L(shl_7_loop):
@@ -691,20 +705,22 @@ L(shl_7_loop):
jae L(shl_7_loop)
L(shl_7_end):
- lea 32(%ecx), %ecx
+ add $32, %ecx
add %ecx, %edi
add %edi, %edx
lea 7(%edi, %eax), %eax
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_8):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
- lea -8(%eax), %eax
+ sub $8, %eax
movaps (%eax), %xmm1
xor %edi, %edi
- lea -32(%ecx), %ecx
+ sub $32, %ecx
movdqu %xmm0, (%esi)
POP (%esi)
L(shl_8_loop):
@@ -734,20 +750,22 @@ L(shl_8_loop):
jae L(shl_8_loop)
L(shl_8_end):
- lea 32(%ecx), %ecx
+ add $32, %ecx
add %ecx, %edi
add %edi, %edx
lea 8(%edi, %eax), %eax
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_9):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
- lea -9(%eax), %eax
+ sub $9, %eax
movaps (%eax), %xmm1
xor %edi, %edi
- lea -32(%ecx), %ecx
+ sub $32, %ecx
movdqu %xmm0, (%esi)
POP (%esi)
L(shl_9_loop):
@@ -777,20 +795,22 @@ L(shl_9_loop):
jae L(shl_9_loop)
L(shl_9_end):
- lea 32(%ecx), %ecx
+ add $32, %ecx
add %ecx, %edi
add %edi, %edx
lea 9(%edi, %eax), %eax
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_10):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
- lea -10(%eax), %eax
+ sub $10, %eax
movaps (%eax), %xmm1
xor %edi, %edi
- lea -32(%ecx), %ecx
+ sub $32, %ecx
movdqu %xmm0, (%esi)
POP (%esi)
L(shl_10_loop):
@@ -820,20 +840,22 @@ L(shl_10_loop):
jae L(shl_10_loop)
L(shl_10_end):
- lea 32(%ecx), %ecx
+ add $32, %ecx
add %ecx, %edi
add %edi, %edx
lea 10(%edi, %eax), %eax
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_11):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
- lea -11(%eax), %eax
+ sub $11, %eax
movaps (%eax), %xmm1
xor %edi, %edi
- lea -32(%ecx), %ecx
+ sub $32, %ecx
movdqu %xmm0, (%esi)
POP (%esi)
L(shl_11_loop):
@@ -863,20 +885,22 @@ L(shl_11_loop):
jae L(shl_11_loop)
L(shl_11_end):
- lea 32(%ecx), %ecx
+ add $32, %ecx
add %ecx, %edi
add %edi, %edx
lea 11(%edi, %eax), %eax
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_12):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
- lea -12(%eax), %eax
+ sub $12, %eax
movaps (%eax), %xmm1
xor %edi, %edi
- lea -32(%ecx), %ecx
+ sub $32, %ecx
movdqu %xmm0, (%esi)
POP (%esi)
L(shl_12_loop):
@@ -906,20 +930,22 @@ L(shl_12_loop):
jae L(shl_12_loop)
L(shl_12_end):
- lea 32(%ecx), %ecx
+ add $32, %ecx
add %ecx, %edi
add %edi, %edx
lea 12(%edi, %eax), %eax
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_13):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
- lea -13(%eax), %eax
+ sub $13, %eax
movaps (%eax), %xmm1
xor %edi, %edi
- lea -32(%ecx), %ecx
+ sub $32, %ecx
movdqu %xmm0, (%esi)
POP (%esi)
L(shl_13_loop):
@@ -949,20 +975,22 @@ L(shl_13_loop):
jae L(shl_13_loop)
L(shl_13_end):
- lea 32(%ecx), %ecx
+ add $32, %ecx
add %ecx, %edi
add %edi, %edx
lea 13(%edi, %eax), %eax
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_14):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
- lea -14(%eax), %eax
+ sub $14, %eax
movaps (%eax), %xmm1
xor %edi, %edi
- lea -32(%ecx), %ecx
+ sub $32, %ecx
movdqu %xmm0, (%esi)
POP (%esi)
L(shl_14_loop):
@@ -992,21 +1020,22 @@ L(shl_14_loop):
jae L(shl_14_loop)
L(shl_14_end):
- lea 32(%ecx), %ecx
+ add $32, %ecx
add %ecx, %edi
add %edi, %edx
lea 14(%edi, %eax), %eax
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
-
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_15):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
- lea -15(%eax), %eax
+ sub $15, %eax
movaps (%eax), %xmm1
xor %edi, %edi
- lea -32(%ecx), %ecx
+ sub $32, %ecx
movdqu %xmm0, (%esi)
POP (%esi)
L(shl_15_loop):
@@ -1036,7 +1065,7 @@ L(shl_15_loop):
jae L(shl_15_loop)
L(shl_15_end):
- lea 32(%ecx), %ecx
+ add $32, %ecx
add %ecx, %edi
add %edi, %edx
lea 15(%edi, %eax), %eax
@@ -1240,21 +1269,23 @@ L(fwd_write_3bytes):
movl DEST(%esp), %eax
# endif
#endif
- RETURN
+ RETURN_END
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(large_page):
movdqu (%eax), %xmm1
- lea 16(%eax), %eax
movdqu %xmm0, (%esi)
movntdq %xmm1, (%edx)
- lea 16(%edx), %edx
- lea -16(%ecx), %ecx
+ add $0x10, %eax
+ add $0x10, %edx
+ sub $0x10, %ecx
cmp %al, %dl
je L(copy_page_by_rep)
L(large_page_loop_init):
POP (%esi)
- lea -0x80(%ecx), %ecx
+ sub $0x80, %ecx
POP (%edi)
L(large_page_loop):
prefetchnta 0x1c0(%eax)
@@ -1280,9 +1311,9 @@ L(large_page_loop):
movntdq %xmm7, 0x70(%edx)
lea 0x80(%edx), %edx
jae L(large_page_loop)
- cmp $-0x40, %ecx
- lea 0x80(%ecx), %ecx
- jl L(large_page_less_64bytes)
+ add $0x80, %ecx
+ cmp $0x40, %ecx
+ jb L(large_page_less_64bytes)
movdqu (%eax), %xmm0
movdqu 0x10(%eax), %xmm1
@@ -1312,6 +1343,8 @@ L(large_page_less_32bytes):
sfence
BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(copy_page_by_rep):
mov %eax, %esi
@@ -1658,8 +1691,8 @@ L(table_48_bytes_bwd):
L(copy_backward):
PUSH (%esi)
movl %eax, %esi
- lea (%ecx,%edx,1),%edx
- lea (%ecx,%esi,1),%esi
+ add %ecx, %edx
+ add %ecx, %esi
testl $0x3, %edx
jnz L(bk_align)
@@ -1698,9 +1731,10 @@ L(bk_write_less32bytes):
sub %ecx, %edx
sub %ecx, %eax
POP (%esi)
-L(bk_write_less32bytes_2):
+L(bk_write_less48bytes):
BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4)
+ CFI_PUSH (%esi)
ALIGN (4)
L(bk_align):
cmp $8, %ecx
diff --git a/sysdeps/i386/i686/multiarch/memcpy-ssse3.S b/sysdeps/i386/i686/multiarch/memcpy-ssse3.S
index c512b0e812..ec9eeb95e4 100644
--- a/sysdeps/i386/i686/multiarch/memcpy-ssse3.S
+++ b/sysdeps/i386/i686/multiarch/memcpy-ssse3.S
@@ -162,6 +162,7 @@ L(48bytesormore):
movl %edx, %edi
and $-16, %edx
PUSH (%esi)
+ cfi_remember_state
add $16, %edx
movl %edi, %esi
sub %edx, %edi
@@ -187,6 +188,8 @@ L(48bytesormore):
BRANCH_TO_JMPTBL_ENTRY (L(shl_table), %edi, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_0):
movdqu %xmm0, (%esi)
@@ -234,6 +237,7 @@ L(shl_0_end):
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
+ CFI_PUSH (%edi)
L(shl_0_gobble):
#ifdef DATA_CACHE_SIZE_HALF
@@ -383,7 +387,8 @@ L(shl_0_mem_less_16bytes):
add %ecx, %eax
BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
-
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_1):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
@@ -427,6 +432,8 @@ L(shl_1_end):
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_2):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
@@ -470,6 +477,8 @@ L(shl_2_end):
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_3):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
@@ -513,6 +522,8 @@ L(shl_3_end):
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_4):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
@@ -556,6 +567,8 @@ L(shl_4_end):
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_5):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
@@ -599,7 +612,8 @@ L(shl_5_end):
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
-
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_6):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
@@ -643,6 +657,8 @@ L(shl_6_end):
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_7):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
@@ -686,6 +702,8 @@ L(shl_7_end):
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_8):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
@@ -729,6 +747,8 @@ L(shl_8_end):
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_9):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
@@ -772,6 +792,8 @@ L(shl_9_end):
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_10):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
@@ -815,6 +837,8 @@ L(shl_10_end):
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_11):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
@@ -858,6 +882,8 @@ L(shl_11_end):
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_12):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
@@ -901,6 +927,8 @@ L(shl_12_end):
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_13):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
@@ -944,6 +972,8 @@ L(shl_13_end):
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_14):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
@@ -987,7 +1017,8 @@ L(shl_14_end):
POP (%edi)
BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
-
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(shl_15):
BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
@@ -1228,8 +1259,10 @@ L(fwd_write_3bytes):
movl DEST(%esp), %eax
# endif
#endif
- RETURN
+ RETURN_END
+ cfi_restore_state
+ cfi_remember_state
ALIGN (4)
L(large_page):
movdqu (%eax), %xmm1
@@ -1652,6 +1685,7 @@ L(bk_write_less32bytes):
L(bk_write_less32bytes_2):
BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4)
+ CFI_PUSH (%esi)
ALIGN (4)
L(bk_align):
cmp $8, %ecx
diff --git a/sysdeps/i386/i686/multiarch/memset-sse2-rep.S b/sysdeps/i386/i686/multiarch/memset-sse2-rep.S
index d4bf9b7d3e..f9a0b13d0c 100644
--- a/sysdeps/i386/i686/multiarch/memset-sse2-rep.S
+++ b/sysdeps/i386/i686/multiarch/memset-sse2-rep.S
@@ -243,7 +243,6 @@ L(32bytesormore):
pxor %xmm0, %xmm0
#else
movd %eax, %xmm0
- punpcklbw %xmm0, %xmm0
pshufd $0, %xmm0, %xmm0
#endif
testl $0xf, %edx
@@ -293,7 +292,7 @@ L(128bytesormore):
* fast string will prefetch and combine data efficiently.
*/
cmp %edi, %ecx
- jae L(128bytesormore_nt)
+ jae L(128bytesormore_endof_L1)
subl $128, %ecx
L(128bytesormore_normal):
sub $128, %ecx
@@ -323,11 +322,12 @@ L(128bytesormore_normal):
L(128bytesless_normal):
POP (%edi)
- lea 128(%ecx), %ecx
+ add $128, %ecx
BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
+ CFI_PUSH (%edi)
ALIGN (4)
-L(128bytesormore_nt):
+L(128bytesormore_endof_L1):
mov %edx, %edi
mov %ecx, %edx
shr $2, %ecx
diff --git a/sysdeps/i386/i686/multiarch/memset-sse2.S b/sysdeps/i386/i686/multiarch/memset-sse2.S
index 00e552e44e..92ad601bf2 100644
--- a/sysdeps/i386/i686/multiarch/memset-sse2.S
+++ b/sysdeps/i386/i686/multiarch/memset-sse2.S
@@ -243,7 +243,6 @@ L(32bytesormore):
pxor %xmm0, %xmm0
#else
movd %eax, %xmm0
- punpcklbw %xmm0, %xmm0
pshufd $0, %xmm0, %xmm0
#endif
testl $0xf, %edx
@@ -287,14 +286,17 @@ L(128bytesormore):
#ifdef DATA_CACHE_SIZE
POP (%ebx)
+# define RESTORE_EBX_STATE CFI_PUSH (%ebx)
cmp $DATA_CACHE_SIZE, %ecx
#else
# ifdef SHARED
+# define RESTORE_EBX_STATE
call __i686.get_pc_thunk.bx
add $_GLOBAL_OFFSET_TABLE_, %ebx
cmp __x86_data_cache_size@GOTOFF(%ebx), %ecx
# else
POP (%ebx)
+# define RESTORE_EBX_STATE CFI_PUSH (%ebx)
cmp __x86_data_cache_size, %ecx
# endif
#endif
@@ -328,7 +330,7 @@ L(128bytesormore_normal):
jae L(128bytesormore_normal)
L(128bytesless_normal):
- lea 128(%ecx), %ecx
+ add $128, %ecx
BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
ALIGN (4)
@@ -351,6 +353,7 @@ L(128bytes_L2_normal):
L(128bytesless_L2_normal):
BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
+ RESTORE_EBX_STATE
L(128bytesormore_nt_start):
sub %ebx, %ecx
ALIGN (4)
diff --git a/sysdeps/i386/i686/multiarch/strcmp-sse4.S b/sysdeps/i386/i686/multiarch/strcmp-sse4.S
index d5fd23e15c..81d6ec66f7 100644
--- a/sysdeps/i386/i686/multiarch/strcmp-sse4.S
+++ b/sysdeps/i386/i686/multiarch/strcmp-sse4.S
@@ -178,7 +178,9 @@ L(first4bytes):
PUSH (%ebx)
PUSH (%edi)
PUSH (%esi)
+#ifdef USE_AS_STRNCMP
cfi_remember_state
+#endif
mov %edx, %edi
mov %eax, %esi
xorl %eax, %eax
@@ -246,8 +248,8 @@ L(ret):
ret
.p2align 4
- cfi_restore_state
#ifdef USE_AS_STRNCMP
+ cfi_restore_state
L(more16byteseq):
POP (%esi)
POP (%edi)