diff options
author | Andreas Schwab <schwab@redhat.com> | 2010-03-09 15:47:50 +0100 |
---|---|---|
committer | Andreas Schwab <schwab@redhat.com> | 2010-03-09 15:47:50 +0100 |
commit | 35be409292e2aaab331d21d2bc42ed4c44477281 (patch) | |
tree | df56d1f09cc5c8aca42b2be2af61f775f25a5b23 /sysdeps | |
parent | 7bea85290bebde69cfb5aa057afb70e019154b4d (diff) | |
parent | 462a5227b0d3220ab68f65272bd5b9d6d4f49b1f (diff) |
Merge remote branch 'origin/master' into fedora/master
Diffstat (limited to 'sysdeps')
58 files changed, 1233 insertions, 2609 deletions
diff --git a/sysdeps/i386/configure b/sysdeps/i386/configure index f0c2758a5e..7814b3b313 100644 --- a/sysdeps/i386/configure +++ b/sysdeps/i386/configure @@ -637,3 +637,22 @@ if test $libc_cv_cc_sse4 = yes; then _ACEOF fi + +{ $as_echo "$as_me:$LINENO: checking for assembler -mtune=i686 support" >&5 +$as_echo_n "checking for assembler -mtune=i686 support... " >&6; } +if test "${libc_cv_as_i686+set}" = set; then + $as_echo_n "(cached) " >&6 +else + if { ac_try='${CC-cc} -Wa,-mtune=i686 -xc /dev/null -S -o /dev/null' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + libc_cv_as_i686=yes +else + libc_cv_as_i686=no +fi +fi +{ $as_echo "$as_me:$LINENO: result: $libc_cv_as_i686" >&5 +$as_echo "$libc_cv_as_i686" >&6; } diff --git a/sysdeps/i386/configure.in b/sysdeps/i386/configure.in index 12dceaf844..9fc7fa59fe 100644 --- a/sysdeps/i386/configure.in +++ b/sysdeps/i386/configure.in @@ -47,3 +47,11 @@ fi]) if test $libc_cv_cc_sse4 = yes; then AC_DEFINE(HAVE_SSE4_SUPPORT) fi + +dnl Check if -Wa,-mtune=i686 works. +AC_CACHE_CHECK(for assembler -mtune=i686 support, libc_cv_as_i686, [dnl +if AC_TRY_COMMAND([${CC-cc} -Wa,-mtune=i686 -xc /dev/null -S -o /dev/null]); then + libc_cv_as_i686=yes +else + libc_cv_as_i686=no +fi]) diff --git a/sysdeps/i386/i686/Makefile b/sysdeps/i386/i686/Makefile index dbcf1c33d3..e6b2924584 100644 --- a/sysdeps/i386/i686/Makefile +++ b/sysdeps/i386/i686/Makefile @@ -9,3 +9,19 @@ stack-align-test-flags += -msse ifeq ($(subdir),string) sysdep_routines += cacheinfo endif + +ifeq (yes,$(config-asflags-i686)) +CFLAGS-.o += -Wa,-mtune=i686 +CFLAGS-.os += -Wa,-mtune=i686 +CFLAGS-.op += -Wa,-mtune=i686 +CFLAGS-.og += -Wa,-mtune=i686 +CFLAGS-.ob += -Wa,-mtune=i686 +CFLAGS-.oS += -Wa,-mtune=i686 + +ASFLAGS-.o += -Wa,-mtune=i686 +ASFLAGS-.os += -Wa,-mtune=i686 +ASFLAGS-.op += -Wa,-mtune=i686 +ASFLAGS-.og += -Wa,-mtune=i686 +ASFLAGS-.ob += -Wa,-mtune=i686 +ASFLAGS-.oS += -Wa,-mtune=i686 +endif diff --git a/sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S b/sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S index 0547b56d7c..48a109ccd6 100644 --- a/sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S +++ b/sysdeps/i386/i686/multiarch/memcpy-ssse3-rep.S @@ -127,10 +127,8 @@ ENTRY (MEMCPY) cmp %eax, %edx jb L(copy_forward) je L(fwd_write_0bytes) - cmp $32, %ecx - jae L(memmove_bwd) - jmp L(bk_write_less32bytes_2) -L(memmove_bwd): + cmp $48, %ecx + jb L(bk_write_less48bytes) add %ecx, %eax cmp %eax, %edx movl SRC(%esp), %eax @@ -162,6 +160,7 @@ L(48bytesormore): movl %edx, %edi and $-16, %edx PUSH (%esi) + cfi_remember_state add $16, %edx movl %edi, %esi sub %edx, %edi @@ -234,6 +233,8 @@ L(shl_0_end): POP (%edi) BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state L(shl_0_gobble): #ifdef DATA_CACHE_SIZE_HALF @@ -252,7 +253,7 @@ L(shl_0_gobble): sub %esi, %edi cmp %edi, %ecx jae L(shl_0_gobble_mem_start) - lea -128(%ecx), %ecx + sub $128, %ecx ALIGN (4) L(shl_0_gobble_cache_loop): movdqa (%eax), %xmm0 @@ -276,9 +277,9 @@ L(shl_0_gobble_cache_loop): lea 0x80(%edx), %edx jae L(shl_0_gobble_cache_loop) - cmp $-0x40, %ecx - lea 0x80(%ecx), %ecx - jl L(shl_0_cache_less_64bytes) + add $0x80, %ecx + cmp $0x40, %ecx + jb L(shl_0_cache_less_64bytes) movdqa (%eax), %xmm0 sub $0x40, %ecx @@ -319,12 +320,13 @@ L(shl_0_cache_less_16bytes): POP (%edi) BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) - + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_0_gobble_mem_start): cmp %al, %dl je L(copy_page_by_rep) - lea -128(%ecx), %ecx + sub $128, %ecx L(shl_0_gobble_mem_loop): prefetchnta 0x1c0(%eax) prefetchnta 0x280(%eax) @@ -352,9 +354,9 @@ L(shl_0_gobble_mem_loop): lea 0x80(%edx), %edx jae L(shl_0_gobble_mem_loop) - cmp $-0x40, %ecx - lea 0x80(%ecx), %ecx - jl L(shl_0_mem_less_64bytes) + add $0x80, %ecx + cmp $0x40, %ecx + jb L(shl_0_mem_less_64bytes) movdqa (%eax), %xmm0 sub $0x40, %ecx @@ -395,14 +397,15 @@ L(shl_0_mem_less_16bytes): POP (%edi) BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) - + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_1): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - lea -1(%eax), %eax + sub $1, %eax movaps (%eax), %xmm1 xor %edi, %edi - lea -32(%ecx), %ecx + sub $32, %ecx movdqu %xmm0, (%esi) POP (%esi) L(shl_1_loop): @@ -432,20 +435,22 @@ L(shl_1_loop): jae L(shl_1_loop) L(shl_1_end): - lea 32(%ecx), %ecx + add $32, %ecx add %ecx, %edi add %edi, %edx lea 1(%edi, %eax), %eax POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_2): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - lea -2(%eax), %eax + sub $2, %eax movaps (%eax), %xmm1 xor %edi, %edi - lea -32(%ecx), %ecx + sub $32, %ecx movdqu %xmm0, (%esi) POP (%esi) L(shl_2_loop): @@ -475,20 +480,22 @@ L(shl_2_loop): jae L(shl_2_loop) L(shl_2_end): - lea 32(%ecx), %ecx + add $32, %ecx add %ecx, %edi add %edi, %edx lea 2(%edi, %eax), %eax POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_3): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - lea -3(%eax), %eax + sub $3, %eax movaps (%eax), %xmm1 xor %edi, %edi - lea -32(%ecx), %ecx + sub $32, %ecx movdqu %xmm0, (%esi) POP (%esi) L(shl_3_loop): @@ -518,20 +525,22 @@ L(shl_3_loop): jae L(shl_3_loop) L(shl_3_end): - lea 32(%ecx), %ecx + add $32, %ecx add %ecx, %edi add %edi, %edx lea 3(%edi, %eax), %eax POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_4): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - lea -4(%eax), %eax + sub $4, %eax movaps (%eax), %xmm1 xor %edi, %edi - lea -32(%ecx), %ecx + sub $32, %ecx movdqu %xmm0, (%esi) POP (%esi) L(shl_4_loop): @@ -561,20 +570,22 @@ L(shl_4_loop): jae L(shl_4_loop) L(shl_4_end): - lea 32(%ecx), %ecx + add $32, %ecx add %ecx, %edi add %edi, %edx lea 4(%edi, %eax), %eax POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_5): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - lea -5(%eax), %eax + sub $5, %eax movaps (%eax), %xmm1 xor %edi, %edi - lea -32(%ecx), %ecx + sub $32, %ecx movdqu %xmm0, (%esi) POP (%esi) L(shl_5_loop): @@ -604,21 +615,22 @@ L(shl_5_loop): jae L(shl_5_loop) L(shl_5_end): - lea 32(%ecx), %ecx + add $32, %ecx add %ecx, %edi add %edi, %edx lea 5(%edi, %eax), %eax POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) - + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_6): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - lea -6(%eax), %eax + sub $6, %eax movaps (%eax), %xmm1 xor %edi, %edi - lea -32(%ecx), %ecx + sub $32, %ecx movdqu %xmm0, (%esi) POP (%esi) L(shl_6_loop): @@ -648,20 +660,22 @@ L(shl_6_loop): jae L(shl_6_loop) L(shl_6_end): - lea 32(%ecx), %ecx + add $32, %ecx add %ecx, %edi add %edi, %edx lea 6(%edi, %eax), %eax POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_7): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - lea -7(%eax), %eax + sub $7, %eax movaps (%eax), %xmm1 xor %edi, %edi - lea -32(%ecx), %ecx + sub $32, %ecx movdqu %xmm0, (%esi) POP (%esi) L(shl_7_loop): @@ -691,20 +705,22 @@ L(shl_7_loop): jae L(shl_7_loop) L(shl_7_end): - lea 32(%ecx), %ecx + add $32, %ecx add %ecx, %edi add %edi, %edx lea 7(%edi, %eax), %eax POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_8): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - lea -8(%eax), %eax + sub $8, %eax movaps (%eax), %xmm1 xor %edi, %edi - lea -32(%ecx), %ecx + sub $32, %ecx movdqu %xmm0, (%esi) POP (%esi) L(shl_8_loop): @@ -734,20 +750,22 @@ L(shl_8_loop): jae L(shl_8_loop) L(shl_8_end): - lea 32(%ecx), %ecx + add $32, %ecx add %ecx, %edi add %edi, %edx lea 8(%edi, %eax), %eax POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_9): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - lea -9(%eax), %eax + sub $9, %eax movaps (%eax), %xmm1 xor %edi, %edi - lea -32(%ecx), %ecx + sub $32, %ecx movdqu %xmm0, (%esi) POP (%esi) L(shl_9_loop): @@ -777,20 +795,22 @@ L(shl_9_loop): jae L(shl_9_loop) L(shl_9_end): - lea 32(%ecx), %ecx + add $32, %ecx add %ecx, %edi add %edi, %edx lea 9(%edi, %eax), %eax POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_10): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - lea -10(%eax), %eax + sub $10, %eax movaps (%eax), %xmm1 xor %edi, %edi - lea -32(%ecx), %ecx + sub $32, %ecx movdqu %xmm0, (%esi) POP (%esi) L(shl_10_loop): @@ -820,20 +840,22 @@ L(shl_10_loop): jae L(shl_10_loop) L(shl_10_end): - lea 32(%ecx), %ecx + add $32, %ecx add %ecx, %edi add %edi, %edx lea 10(%edi, %eax), %eax POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_11): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - lea -11(%eax), %eax + sub $11, %eax movaps (%eax), %xmm1 xor %edi, %edi - lea -32(%ecx), %ecx + sub $32, %ecx movdqu %xmm0, (%esi) POP (%esi) L(shl_11_loop): @@ -863,20 +885,22 @@ L(shl_11_loop): jae L(shl_11_loop) L(shl_11_end): - lea 32(%ecx), %ecx + add $32, %ecx add %ecx, %edi add %edi, %edx lea 11(%edi, %eax), %eax POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_12): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - lea -12(%eax), %eax + sub $12, %eax movaps (%eax), %xmm1 xor %edi, %edi - lea -32(%ecx), %ecx + sub $32, %ecx movdqu %xmm0, (%esi) POP (%esi) L(shl_12_loop): @@ -906,20 +930,22 @@ L(shl_12_loop): jae L(shl_12_loop) L(shl_12_end): - lea 32(%ecx), %ecx + add $32, %ecx add %ecx, %edi add %edi, %edx lea 12(%edi, %eax), %eax POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_13): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - lea -13(%eax), %eax + sub $13, %eax movaps (%eax), %xmm1 xor %edi, %edi - lea -32(%ecx), %ecx + sub $32, %ecx movdqu %xmm0, (%esi) POP (%esi) L(shl_13_loop): @@ -949,20 +975,22 @@ L(shl_13_loop): jae L(shl_13_loop) L(shl_13_end): - lea 32(%ecx), %ecx + add $32, %ecx add %ecx, %edi add %edi, %edx lea 13(%edi, %eax), %eax POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_14): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - lea -14(%eax), %eax + sub $14, %eax movaps (%eax), %xmm1 xor %edi, %edi - lea -32(%ecx), %ecx + sub $32, %ecx movdqu %xmm0, (%esi) POP (%esi) L(shl_14_loop): @@ -992,21 +1020,22 @@ L(shl_14_loop): jae L(shl_14_loop) L(shl_14_end): - lea 32(%ecx), %ecx + add $32, %ecx add %ecx, %edi add %edi, %edx lea 14(%edi, %eax), %eax POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) - + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_15): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) - lea -15(%eax), %eax + sub $15, %eax movaps (%eax), %xmm1 xor %edi, %edi - lea -32(%ecx), %ecx + sub $32, %ecx movdqu %xmm0, (%esi) POP (%esi) L(shl_15_loop): @@ -1036,7 +1065,7 @@ L(shl_15_loop): jae L(shl_15_loop) L(shl_15_end): - lea 32(%ecx), %ecx + add $32, %ecx add %ecx, %edi add %edi, %edx lea 15(%edi, %eax), %eax @@ -1240,21 +1269,23 @@ L(fwd_write_3bytes): movl DEST(%esp), %eax # endif #endif - RETURN + RETURN_END + cfi_restore_state + cfi_remember_state ALIGN (4) L(large_page): movdqu (%eax), %xmm1 - lea 16(%eax), %eax movdqu %xmm0, (%esi) movntdq %xmm1, (%edx) - lea 16(%edx), %edx - lea -16(%ecx), %ecx + add $0x10, %eax + add $0x10, %edx + sub $0x10, %ecx cmp %al, %dl je L(copy_page_by_rep) L(large_page_loop_init): POP (%esi) - lea -0x80(%ecx), %ecx + sub $0x80, %ecx POP (%edi) L(large_page_loop): prefetchnta 0x1c0(%eax) @@ -1280,9 +1311,9 @@ L(large_page_loop): movntdq %xmm7, 0x70(%edx) lea 0x80(%edx), %edx jae L(large_page_loop) - cmp $-0x40, %ecx - lea 0x80(%ecx), %ecx - jl L(large_page_less_64bytes) + add $0x80, %ecx + cmp $0x40, %ecx + jb L(large_page_less_64bytes) movdqu (%eax), %xmm0 movdqu 0x10(%eax), %xmm1 @@ -1312,6 +1343,8 @@ L(large_page_less_32bytes): sfence BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(copy_page_by_rep): mov %eax, %esi @@ -1658,8 +1691,8 @@ L(table_48_bytes_bwd): L(copy_backward): PUSH (%esi) movl %eax, %esi - lea (%ecx,%edx,1),%edx - lea (%ecx,%esi,1),%esi + add %ecx, %edx + add %ecx, %esi testl $0x3, %edx jnz L(bk_align) @@ -1698,9 +1731,10 @@ L(bk_write_less32bytes): sub %ecx, %edx sub %ecx, %eax POP (%esi) -L(bk_write_less32bytes_2): +L(bk_write_less48bytes): BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4) + CFI_PUSH (%esi) ALIGN (4) L(bk_align): cmp $8, %ecx diff --git a/sysdeps/i386/i686/multiarch/memcpy-ssse3.S b/sysdeps/i386/i686/multiarch/memcpy-ssse3.S index c512b0e812..ec9eeb95e4 100644 --- a/sysdeps/i386/i686/multiarch/memcpy-ssse3.S +++ b/sysdeps/i386/i686/multiarch/memcpy-ssse3.S @@ -162,6 +162,7 @@ L(48bytesormore): movl %edx, %edi and $-16, %edx PUSH (%esi) + cfi_remember_state add $16, %edx movl %edi, %esi sub %edx, %edi @@ -187,6 +188,8 @@ L(48bytesormore): BRANCH_TO_JMPTBL_ENTRY (L(shl_table), %edi, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_0): movdqu %xmm0, (%esi) @@ -234,6 +237,7 @@ L(shl_0_end): POP (%edi) BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) + CFI_PUSH (%edi) L(shl_0_gobble): #ifdef DATA_CACHE_SIZE_HALF @@ -383,7 +387,8 @@ L(shl_0_mem_less_16bytes): add %ecx, %eax BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) - + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_1): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) @@ -427,6 +432,8 @@ L(shl_1_end): POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_2): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) @@ -470,6 +477,8 @@ L(shl_2_end): POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_3): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) @@ -513,6 +522,8 @@ L(shl_3_end): POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_4): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) @@ -556,6 +567,8 @@ L(shl_4_end): POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_5): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) @@ -599,7 +612,8 @@ L(shl_5_end): POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) - + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_6): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) @@ -643,6 +657,8 @@ L(shl_6_end): POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_7): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) @@ -686,6 +702,8 @@ L(shl_7_end): POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_8): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) @@ -729,6 +747,8 @@ L(shl_8_end): POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_9): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) @@ -772,6 +792,8 @@ L(shl_9_end): POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_10): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) @@ -815,6 +837,8 @@ L(shl_10_end): POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_11): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) @@ -858,6 +882,8 @@ L(shl_11_end): POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_12): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) @@ -901,6 +927,8 @@ L(shl_12_end): POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_13): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) @@ -944,6 +972,8 @@ L(shl_13_end): POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_14): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) @@ -987,7 +1017,8 @@ L(shl_14_end): POP (%edi) BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) - + cfi_restore_state + cfi_remember_state ALIGN (4) L(shl_15): BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) @@ -1228,8 +1259,10 @@ L(fwd_write_3bytes): movl DEST(%esp), %eax # endif #endif - RETURN + RETURN_END + cfi_restore_state + cfi_remember_state ALIGN (4) L(large_page): movdqu (%eax), %xmm1 @@ -1652,6 +1685,7 @@ L(bk_write_less32bytes): L(bk_write_less32bytes_2): BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4) + CFI_PUSH (%esi) ALIGN (4) L(bk_align): cmp $8, %ecx diff --git a/sysdeps/i386/i686/multiarch/memset-sse2-rep.S b/sysdeps/i386/i686/multiarch/memset-sse2-rep.S index d4bf9b7d3e..f9a0b13d0c 100644 --- a/sysdeps/i386/i686/multiarch/memset-sse2-rep.S +++ b/sysdeps/i386/i686/multiarch/memset-sse2-rep.S @@ -243,7 +243,6 @@ L(32bytesormore): pxor %xmm0, %xmm0 #else movd %eax, %xmm0 - punpcklbw %xmm0, %xmm0 pshufd $0, %xmm0, %xmm0 #endif testl $0xf, %edx @@ -293,7 +292,7 @@ L(128bytesormore): * fast string will prefetch and combine data efficiently. */ cmp %edi, %ecx - jae L(128bytesormore_nt) + jae L(128bytesormore_endof_L1) subl $128, %ecx L(128bytesormore_normal): sub $128, %ecx @@ -323,11 +322,12 @@ L(128bytesormore_normal): L(128bytesless_normal): POP (%edi) - lea 128(%ecx), %ecx + add $128, %ecx BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) + CFI_PUSH (%edi) ALIGN (4) -L(128bytesormore_nt): +L(128bytesormore_endof_L1): mov %edx, %edi mov %ecx, %edx shr $2, %ecx diff --git a/sysdeps/i386/i686/multiarch/memset-sse2.S b/sysdeps/i386/i686/multiarch/memset-sse2.S index 00e552e44e..92ad601bf2 100644 --- a/sysdeps/i386/i686/multiarch/memset-sse2.S +++ b/sysdeps/i386/i686/multiarch/memset-sse2.S @@ -243,7 +243,6 @@ L(32bytesormore): pxor %xmm0, %xmm0 #else movd %eax, %xmm0 - punpcklbw %xmm0, %xmm0 pshufd $0, %xmm0, %xmm0 #endif testl $0xf, %edx @@ -287,14 +286,17 @@ L(128bytesormore): #ifdef DATA_CACHE_SIZE POP (%ebx) +# define RESTORE_EBX_STATE CFI_PUSH (%ebx) cmp $DATA_CACHE_SIZE, %ecx #else # ifdef SHARED +# define RESTORE_EBX_STATE call __i686.get_pc_thunk.bx add $_GLOBAL_OFFSET_TABLE_, %ebx cmp __x86_data_cache_size@GOTOFF(%ebx), %ecx # else POP (%ebx) +# define RESTORE_EBX_STATE CFI_PUSH (%ebx) cmp __x86_data_cache_size, %ecx # endif #endif @@ -328,7 +330,7 @@ L(128bytesormore_normal): jae L(128bytesormore_normal) L(128bytesless_normal): - lea 128(%ecx), %ecx + add $128, %ecx BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) ALIGN (4) @@ -351,6 +353,7 @@ L(128bytes_L2_normal): L(128bytesless_L2_normal): BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) + RESTORE_EBX_STATE L(128bytesormore_nt_start): sub %ebx, %ecx ALIGN (4) diff --git a/sysdeps/i386/i686/multiarch/strcmp-sse4.S b/sysdeps/i386/i686/multiarch/strcmp-sse4.S index d5fd23e15c..81d6ec66f7 100644 --- a/sysdeps/i386/i686/multiarch/strcmp-sse4.S +++ b/sysdeps/i386/i686/multiarch/strcmp-sse4.S @@ -178,7 +178,9 @@ L(first4bytes): PUSH (%ebx) PUSH (%edi) PUSH (%esi) +#ifdef USE_AS_STRNCMP cfi_remember_state +#endif mov %edx, %edi mov %eax, %esi xorl %eax, %eax @@ -246,8 +248,8 @@ L(ret): ret .p2align 4 - cfi_restore_state #ifdef USE_AS_STRNCMP + cfi_restore_state L(more16byteseq): POP (%esi) POP (%edi) diff --git a/sysdeps/sparc/Makefile b/sysdeps/sparc/Makefile index 73b926554e..735e4a40db 100644 --- a/sysdeps/sparc/Makefile +++ b/sysdeps/sparc/Makefile @@ -10,3 +10,8 @@ endif ifeq ($(subdir),db2) CPPFLAGS += -DHAVE_SPINLOCKS=1 -DHAVE_ASSEM_SPARC_GCC=1 endif + +ifeq ($(subdir),csu) +# get offset to rtld_global._dl_hwcap +gen-as-const-headers += rtld-global-offsets.sym +endif diff --git a/sysdeps/sparc/elf/rtld-global-offsets.sym b/sysdeps/sparc/elf/rtld-global-offsets.sym new file mode 100644 index 0000000000..ff4e97f2a6 --- /dev/null +++ b/sysdeps/sparc/elf/rtld-global-offsets.sym @@ -0,0 +1,7 @@ +#define SHARED 1 + +#include <ldsodefs.h> + +#define rtld_global_ro_offsetof(mem) offsetof (struct rtld_global_ro, mem) + +RTLD_GLOBAL_RO_DL_HWCAP_OFFSET rtld_global_ro_offsetof (_dl_hwcap) diff --git a/sysdeps/sparc/sparc32/bcopy.c b/sysdeps/sparc/sparc32/bcopy.c deleted file mode 100644 index 9a455f33c4..0000000000 --- a/sysdeps/sparc/sparc32/bcopy.c +++ /dev/null @@ -1 +0,0 @@ -/* bcopy is in memcpy.S */ diff --git a/sysdeps/sparc/sparc32/dl-machine.h b/sysdeps/sparc/sparc32/dl-machine.h index 53257104a6..9631db32e1 100644 --- a/sysdeps/sparc/sparc32/dl-machine.h +++ b/sysdeps/sparc/sparc32/dl-machine.h @@ -563,7 +563,7 @@ elf_machine_lazy_rel (struct link_map *map, { Elf32_Addr value = map->l_addr + reloc->r_addend; value = ((Elf32_Addr (*) (void)) value) (); - sparc_fixup_plt (reloc, reloc_addr, value, 0, 1); + sparc_fixup_plt (reloc, reloc_addr, value, 1, 1); } else if (r_type == R_SPARC_NONE) ; diff --git a/sysdeps/sparc/sparc32/dl-plt.h b/sysdeps/sparc/sparc32/dl-plt.h index edcc5c1374..bfb891fe69 100644 --- a/sysdeps/sparc/sparc32/dl-plt.h +++ b/sysdeps/sparc/sparc32/dl-plt.h @@ -25,19 +25,55 @@ #define OPCODE_JMP_G1 0x81c06000 /* jmp %g1+?; add lo 10 bits of value */ #define OPCODE_SAVE_SP 0x9de3bfa8 /* save %sp, -(16+6)*4, %sp */ #define OPCODE_BA 0x30800000 /* b,a ?; add PC-rel word address */ +#define OPCODE_BA_PT 0x30480000 /* ba,a,pt %icc, ?; add PC-rel word address */ static inline __attribute__ ((always_inline)) Elf32_Addr sparc_fixup_plt (const Elf32_Rela *reloc, Elf32_Addr *reloc_addr, Elf32_Addr value, int t, int do_flush) { - Elf32_Sword disp = value - (Elf32_Addr) reloc_addr; + Elf32_Sword disp; - if (0 && disp >= -0x800000 && disp < 0x800000) + /* 't' is '0' if we are resolving this PLT entry for RTLD bootstrap, + in which case we'll be resolving all PLT entries and thus can + optimize by overwriting instructions starting at the first PLT entry + instruction and we need not be mindful of thread safety. + + Otherwise, 't' is '1'. */ + reloc_addr += t; + disp = value - (Elf32_Addr) reloc_addr; + + if (disp >= -0x800000 && disp < 0x800000) { - /* Don't need to worry about thread safety. We're writing just one - instruction. */ + unsigned int insn = OPCODE_BA | ((disp >> 2) & 0x3fffff); + +#ifdef __sparc_v9__ + /* On V9 we can do even better by using a branch with + prediction if we fit into the even smaller 19-bit + displacement field. */ + if (disp >= -0x100000 && disp < 0x100000) + insn = OPCODE_BA_PT | ((disp >> 2) & 0x07ffff); +#endif + + /* Even if we are writing just a single branch, we must not + ignore the 't' offset. Consider a case where we have some + PLT slots which can be optimized into a single branch and + some which cannot. Then we can end up with a PLT which looks + like: + + PLT4.0: sethi %(PLT_4_INDEX), %g1 + sethi %(fully_resolved_sym_4), %g1 + jmp %g1 + %lo(fully_resolved_sym_4) + PLT5.0: ba,a fully_resolved_sym_5 + ba,a PLT0.0 + ... + + The delay slot of that jmp must always be either a sethi to + %g1 or a nop. But if we try to place this displacement + branch there, PLT4.0 will jump to fully_resolved_sym_4 for 1 + instruction and then go immediately to + fully_resolved_sym_5. */ - reloc_addr[0] = OPCODE_BA | ((disp >> 2) & 0x3fffff); + reloc_addr[0] = insn; if (do_flush) __asm __volatile ("flush %0" : : "r"(reloc_addr)); } @@ -48,7 +84,6 @@ sparc_fixup_plt (const Elf32_Rela *reloc, Elf32_Addr *reloc_addr, need not be done during bootstrapping, since there are no threads. But we also can't tell if we _can_ use flush, so don't. */ - reloc_addr += t; reloc_addr[1] = OPCODE_JMP_G1 | (value & 0x3ff); if (do_flush) __asm __volatile ("flush %0+4" : : "r"(reloc_addr)); diff --git a/sysdeps/sparc/sparc32/memcpy.S b/sysdeps/sparc/sparc32/memcpy.S index 6bd55c06a1..748a0862fe 100644 --- a/sysdeps/sparc/sparc32/memcpy.S +++ b/sysdeps/sparc/sparc32/memcpy.S @@ -68,45 +68,6 @@ stb %t0, [%dst - offset - 0x02]; \ stb %t1, [%dst - offset - 0x01]; -/* Both these macros have to start with exactly the same insn */ -#define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ - ldd [%src - offset - 0x20], %t0; \ - ldd [%src - offset - 0x18], %t2; \ - ldd [%src - offset - 0x10], %t4; \ - ldd [%src - offset - 0x08], %t6; \ - st %t0, [%dst - offset - 0x20]; \ - st %t1, [%dst - offset - 0x1c]; \ - st %t2, [%dst - offset - 0x18]; \ - st %t3, [%dst - offset - 0x14]; \ - st %t4, [%dst - offset - 0x10]; \ - st %t5, [%dst - offset - 0x0c]; \ - st %t6, [%dst - offset - 0x08]; \ - st %t7, [%dst - offset - 0x04]; - -#define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ - ldd [%src - offset - 0x20], %t0; \ - ldd [%src - offset - 0x18], %t2; \ - ldd [%src - offset - 0x10], %t4; \ - ldd [%src - offset - 0x08], %t6; \ - std %t0, [%dst - offset - 0x20]; \ - std %t2, [%dst - offset - 0x18]; \ - std %t4, [%dst - offset - 0x10]; \ - std %t6, [%dst - offset - 0x08]; - -#define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldd [%src + offset + 0x00], %t0; \ - ldd [%src + offset + 0x08], %t2; \ - st %t0, [%dst + offset + 0x00]; \ - st %t1, [%dst + offset + 0x04]; \ - st %t2, [%dst + offset + 0x08]; \ - st %t3, [%dst + offset + 0x0c]; - -#define RMOVE_SHORTCHUNK(src, dst, offset, t0, t1) \ - ldub [%src + offset + 0x00], %t0; \ - ldub [%src + offset + 0x01], %t1; \ - stb %t0, [%dst + offset + 0x00]; \ - stb %t1, [%dst + offset + 0x01]; - #define SMOVE_CHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, prev, shil, shir, offset2) \ ldd [%src + offset + 0x00], %t0; \ ldd [%src + offset + 0x08], %t2; \ @@ -146,295 +107,20 @@ .text .align 4 -ENTRY(bcopy) - mov %o0, %o3 - mov %o1, %o0 - mov %o3, %o1 -END(bcopy) - -ENTRY(memmove) - cmp %o0, %o1 +ENTRY(memcpy) /* %o0=dst %o1=src %o2=len */ + sub %o0, %o1, %o4 st %o0, [%sp + 64] - bleu 9f - sub %o0, %o1, %o4 - - add %o1, %o2, %o3 - cmp %o3, %o0 - bleu 0f - andcc %o4, 3, %o5 - - add %o1, %o2, %o1 - add %o0, %o2, %o0 - bne 77f +9: andcc %o4, 3, %o5 +0: bne 86f cmp %o2, 15 - bleu 91f - andcc %o1, 3, %g0 - be 3f - nop - - andcc %o1, 1, %g0 - be 4f - andcc %o1, 2, %g0 - - ldub [%o1 - 1], %g2 - sub %o1, 1, %o1 - stb %g2, [%o0 - 1] - sub %o2, 1, %o2 - be 3f - sub %o0, 1, %o0 -4: lduh [%o1 - 2], %g2 - sub %o1, 2, %o1 - sth %g2, [%o0 - 2] - sub %o2, 2, %o2 - sub %o0, 2, %o0 - -3: andcc %o1, 4, %g0 - - be 2f - mov %o2, %g1 - - ld [%o1 - 4], %o4 - sub %g1, 4, %g1 - st %o4, [%o0 - 4] - sub %o1, 4, %o1 - sub %o0, 4, %o0 -2: andcc %g1, 0xffffff80, %g6 - be 3f - andcc %o0, 4, %g0 - - be 74f + 4 -5: RMOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) - RMOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) - RMOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) - RMOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) - subcc %g6, 128, %g6 - sub %o1, 128, %o1 - bne 5b - sub %o0, 128, %o0 - -3: andcc %g1, 0x70, %g6 - be 72f - andcc %g1, 8, %g0 - - srl %g6, 1, %o4 - mov %o7, %g2 - add %g6, %o4, %o4 -101: call 100f - sub %o1, %g6, %o1 - mov %g2, %o7 - jmpl %o5 + (72f - 101b), %g0 - sub %o0, %g6, %o0 - -71: RMOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5) - RMOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5) - RMOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5) - RMOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5) - RMOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5) - RMOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5) - RMOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5) -72: be 73f - andcc %g1, 4, %g0 - - ldd [%o1 - 0x08], %g2 - sub %o0, 8, %o0 - sub %o1, 8, %o1 - st %g2, [%o0] - st %g3, [%o0 + 0x04] -73: be 1f - andcc %g1, 2, %g0 - - ld [%o1 - 4], %g2 - sub %o1, 4, %o1 - st %g2, [%o0 - 4] - sub %o0, 4, %o0 -1: be 1f - andcc %g1, 1, %g0 - - lduh [%o1 - 2], %g2 - sub %o1, 2, %o1 - sth %g2, [%o0 - 2] - sub %o0, 2, %o0 -1: be 1f - nop - - ldub [%o1 - 1], %g2 - stb %g2, [%o0 - 1] -1: retl - ld [%sp + 64], %o0 - -74: RMOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) - RMOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) - RMOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) - RMOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) - subcc %g6, 128, %g6 - sub %o1, 128, %o1 - bne 74b - sub %o0, 128, %o0 - - andcc %g1, 0x70, %g6 - be 72b - andcc %g1, 8, %g0 - - srl %g6, 1, %o4 - mov %o7, %g2 - add %g6, %o4, %o4 -102: call 100f - sub %o1, %g6, %o1 - mov %g2, %o7 - jmpl %o5 + (72b - 102b), %g0 - sub %o0, %g6, %o0 - -75: and %o2, 0xe, %o3 - mov %o7, %g2 - sll %o3, 3, %o4 - sub %o0, %o3, %o0 -103: call 100f - sub %o1, %o3, %o1 - mov %g2, %o7 - jmpl %o5 + (76f - 103b), %g0 - andcc %o2, 1, %g0 - - RMOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3) - RMOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3) - RMOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3) - RMOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3) - RMOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3) - RMOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3) - RMOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3) - -76: be 1f - nop - ldub [%o1 - 1], %g2 - stb %g2, [%o0 - 1] -1: retl - ld [%sp + 64], %o0 -91: bne 75b - andcc %o2, 8, %g0 - - be 1f - andcc %o2, 4, %g0 - - ld [%o1 - 0x08], %g2 - ld [%o1 - 0x04], %g3 - sub %o1, 8, %o1 - st %g2, [%o0 - 0x08] - st %g3, [%o0 - 0x04] - sub %o0, 8, %o0 -1: b 73b - mov %o2, %g1 - -77: cmp %o2, 15 - bleu 75b - andcc %o0, 3, %g0 - be 64f - andcc %o0, 1, %g0 - be 63f - andcc %o0, 2, %g0 - ldub [%o1 - 1], %g5 - sub %o1, 1, %o1 - stb %g5, [%o0 - 1] - sub %o0, 1, %o0 - be 64f - sub %o2, 1, %o2 - -63: ldub [%o1 - 1], %g5 - sub %o1, 2, %o1 - stb %g5, [%o0 - 1] - sub %o0, 2, %o0 - ldub [%o1], %g5 - sub %o2, 2, %o2 - stb %g5, [%o0] -64: and %o1, 3, %g2 - and %o1, -4, %o1 - and %o2, 0xc, %g3 - add %o1, 4, %o1 - cmp %g3, 4 - sll %g2, 3, %g4 - mov 32, %g2 - be 4f - sub %g2, %g4, %g6 - - blu 3f - cmp %g3, 8 - - be 2f - srl %o2, 2, %g3 - - ld [%o1 - 4], %o3 - add %o0, -8, %o0 - ld [%o1 - 8], %o4 - add %o1, -16, %o1 - b 7f - add %g3, 1, %g3 -2: ld [%o1 - 4], %o4 - add %o0, -4, %o0 - ld [%o1 - 8], %g1 - add %o1, -12, %o1 - b 8f - add %g3, 2, %g3 -3: ld [%o1 - 4], %o5 - add %o0, -12, %o0 - ld [%o1 - 8], %o3 - add %o1, -20, %o1 - b 6f - srl %o2, 2, %g3 -4: ld [%o1 - 4], %g1 - srl %o2, 2, %g3 - ld [%o1 - 8], %o5 - add %o1, -24, %o1 - add %o0, -16, %o0 - add %g3, -1, %g3 + bleu 90f + andcc %o1, 3, %g0 - ld [%o1 + 12], %o3 -5: sll %o5, %g4, %g2 - srl %g1, %g6, %g5 - or %g2, %g5, %g2 - st %g2, [%o0 + 12] -6: ld [%o1 + 8], %o4 - sll %o3, %g4, %g2 - srl %o5, %g6, %g5 - or %g2, %g5, %g2 - st %g2, [%o0 + 8] -7: ld [%o1 + 4], %g1 - sll %o4, %g4, %g2 - srl %o3, %g6, %g5 - or %g2, %g5, %g2 - st %g2, [%o0 + 4] -8: ld [%o1], %o5 - sll %g1, %g4, %g2 - srl %o4, %g6, %g5 - addcc %g3, -4, %g3 - or %g2, %g5, %g2 - add %o1, -16, %o1 - st %g2, [%o0] - add %o0, -16, %o0 - bne,a 5b - ld [%o1 + 12], %o3 - sll %o5, %g4, %g2 - srl %g1, %g6, %g5 - srl %g4, 3, %g3 - or %g2, %g5, %g2 - add %o1, %g3, %o1 - andcc %o2, 2, %g0 - st %g2, [%o0 + 12] - be 1f - andcc %o2, 1, %g0 - - ldub [%o1 + 15], %g5 - add %o1, -2, %o1 - stb %g5, [%o0 + 11] - add %o0, -2, %o0 - ldub [%o1 + 16], %g5 - stb %g5, [%o0 + 12] -1: be 1f - nop - ldub [%o1 + 15], %g5 - stb %g5, [%o0 + 11] -1: retl - ld [%sp + 64], %o0 + be 78f + andcc %o1, 4, %g0 -78: andcc %o1, 1, %g0 + andcc %o1, 1, %g0 be 4f andcc %o1, 2, %g0 @@ -442,30 +128,16 @@ ENTRY(memmove) add %o1, 1, %o1 stb %g2, [%o0] sub %o2, 1, %o2 - bne 3f + bne 77f add %o0, 1, %o0 4: lduh [%o1], %g2 add %o1, 2, %o1 sth %g2, [%o0] sub %o2, 2, %o2 - b 3f - add %o0, 2, %o0 -END(memmove) - -ENTRY(memcpy) /* %o0=dst %o1=src %o2=len */ - sub %o0, %o1, %o4 - st %o0, [%sp + 64] -9: andcc %o4, 3, %o5 -0: bne 86f - cmp %o2, 15 - - bleu 90f - andcc %o1, 3, %g0 - - bne 78b -3: andcc %o1, 4, %g0 + add %o0, 2, %o0 - be 2f +77: andcc %o1, 4, %g0 +78: be 2f mov %o2, %g1 ld [%o1], %o4 @@ -968,5 +640,5 @@ ENTRY(memcpy) /* %o0=dst %o1=src %o2=len */ 110: retl sub %o7, %g6, %o5 END(memcpy) + libc_hidden_builtin_def (memcpy) -libc_hidden_builtin_def (memmove) diff --git a/sysdeps/sparc/sparc32/memmove.c b/sysdeps/sparc/sparc32/memmove.c deleted file mode 100644 index a8d2d49948..0000000000 --- a/sysdeps/sparc/sparc32/memmove.c +++ /dev/null @@ -1 +0,0 @@ -/* memmove is in memcpy.S */ diff --git a/sysdeps/sparc/sparc32/sparcv8/udiv_qrnnd.S b/sysdeps/sparc/sparc32/sparcv8/udiv_qrnnd.S deleted file mode 100644 index c3f097118f..0000000000 --- a/sysdeps/sparc/sparc32/sparcv8/udiv_qrnnd.S +++ /dev/null @@ -1,215 +0,0 @@ -! SPARC __udiv_qrnnd division support, used from longlong.h. - -! Copyright (C) 1993, 1994 Free Software Foundation, Inc. - -! This file is part of the GNU MP Library. - -! The GNU MP Library is free software; you can redistribute it and/or modify -! it under the terms of the GNU Lesser General Public License as published by -! the Free Software Foundation; either version 2.1 of the License, or (at your -! option) any later version. - -! The GNU MP Library is distributed in the hope that it will be useful, but -! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -! License for more details. - -! You should have received a copy of the GNU Lesser General Public License -! along with the GNU MP Library; see the file COPYING.LIB. If not, write to -! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - - -! INPUT PARAMETERS -! rem_ptr o0 -! n1 o1 -! n0 o2 -! d o3 - -#include "sysdep.h" - -ENTRY(__udiv_qrnnd) - tst %o3 - bneg LOC(largedivisor) - mov 8,%g1 - - b LOC(p1) - addxcc %o2,%o2,%o2 - -LOC(plop): - bcc LOC(n1) - addxcc %o2,%o2,%o2 -LOC(p1): - addx %o1,%o1,%o1 - subcc %o1,%o3,%o4 - bcc LOC(n2) - addxcc %o2,%o2,%o2 -LOC(p2): - addx %o1,%o1,%o1 - subcc %o1,%o3,%o4 - bcc LOC(n3) - addxcc %o2,%o2,%o2 -LOC(p3): - addx %o1,%o1,%o1 - subcc %o1,%o3,%o4 - bcc LOC(n4) - addxcc %o2,%o2,%o2 -LOC(p4): - addx %o1,%o1,%o1 - addcc %g1,-1,%g1 - bne LOC(plop) - subcc %o1,%o3,%o4 - bcc LOC(n5) - addxcc %o2,%o2,%o2 -LOC(p5): - st %o1,[%o0] - retl - xnor %g0,%o2,%o0 - -LOC(nlop): - bcc LOC(p1) - addxcc %o2,%o2,%o2 -LOC(n1): - addx %o4,%o4,%o4 - subcc %o4,%o3,%o1 - bcc LOC(p2) - addxcc %o2,%o2,%o2 -LOC(n2): - addx %o4,%o4,%o4 - subcc %o4,%o3,%o1 - bcc LOC(p3) - addxcc %o2,%o2,%o2 -LOC(n3): - addx %o4,%o4,%o4 - subcc %o4,%o3,%o1 - bcc LOC(p4) - addxcc %o2,%o2,%o2 -LOC(n4): - addx %o4,%o4,%o4 - addcc %g1,-1,%g1 - bne LOC(nlop) - subcc %o4,%o3,%o1 - bcc LOC(p5) - addxcc %o2,%o2,%o2 -LOC(n5): - st %o4,[%o0] - retl - xnor %g0,%o2,%o0 - -LOC(largedivisor): - and %o2,1,%o5 ! %o5 = n0 & 1 - - srl %o2,1,%o2 - sll %o1,31,%g2 - or %g2,%o2,%o2 ! %o2 = lo(n1n0 >> 1) - srl %o1,1,%o1 ! %o1 = hi(n1n0 >> 1) - - and %o3,1,%g2 - srl %o3,1,%g3 ! %g3 = floor(d / 2) - add %g3,%g2,%g3 ! %g3 = ceil(d / 2) - - b LOC(Lp1) - addxcc %o2,%o2,%o2 - -LOC(Lplop): - bcc LOC(Ln1) - addxcc %o2,%o2,%o2 -LOC(Lp1): - addx %o1,%o1,%o1 - subcc %o1,%g3,%o4 - bcc LOC(Ln2) - addxcc %o2,%o2,%o2 -LOC(Lp2): - addx %o1,%o1,%o1 - subcc %o1,%g3,%o4 - bcc LOC(Ln3) - addxcc %o2,%o2,%o2 -LOC(Lp3): - addx %o1,%o1,%o1 - subcc %o1,%g3,%o4 - bcc LOC(Ln4) - addxcc %o2,%o2,%o2 -LOC(Lp4): - addx %o1,%o1,%o1 - addcc %g1,-1,%g1 - bne LOC(Lplop) - subcc %o1,%g3,%o4 - bcc LOC(Ln5) - addxcc %o2,%o2,%o2 -LOC(Lp5): - add %o1,%o1,%o1 ! << 1 - tst %g2 - bne LOC(Oddp) - add %o5,%o1,%o1 - st %o1,[%o0] - retl - xnor %g0,%o2,%o0 - -LOC(Lnlop): - bcc LOC(Lp1) - addxcc %o2,%o2,%o2 -LOC(Ln1): - addx %o4,%o4,%o4 - subcc %o4,%g3,%o1 - bcc LOC(Lp2) - addxcc %o2,%o2,%o2 -LOC(Ln2): - addx %o4,%o4,%o4 - subcc %o4,%g3,%o1 - bcc LOC(Lp3) - addxcc %o2,%o2,%o2 -LOC(Ln3): - addx %o4,%o4,%o4 - subcc %o4,%g3,%o1 - bcc LOC(Lp4) - addxcc %o2,%o2,%o2 -LOC(Ln4): - addx %o4,%o4,%o4 - addcc %g1,-1,%g1 - bne LOC(Lnlop) - subcc %o4,%g3,%o1 - bcc LOC(Lp5) - addxcc %o2,%o2,%o2 -LOC(Ln5): - add %o4,%o4,%o4 ! << 1 - tst %g2 - bne LOC(Oddn) - add %o5,%o4,%o4 - st %o4,[%o0] - retl - xnor %g0,%o2,%o0 - -LOC(Oddp): - xnor %g0,%o2,%o2 - ! q' in %o2. r' in %o1 - addcc %o1,%o2,%o1 - bcc LOC(Lp6) - addx %o2,0,%o2 - sub %o1,%o3,%o1 -LOC(Lp6): - subcc %o1,%o3,%g0 - bcs LOC(Lp7) - subx %o2,-1,%o2 - sub %o1,%o3,%o1 -LOC(Lp7): - st %o1,[%o0] - retl - mov %o2,%o0 - -LOC(Oddn): - xnor %g0,%o2,%o2 - ! q' in %o2. r' in %o4 - addcc %o4,%o2,%o4 - bcc LOC(Ln6) - addx %o2,0,%o2 - sub %o4,%o3,%o4 -LOC(Ln6): - subcc %o4,%o3,%g0 - bcs LOC(Ln7) - subx %o2,-1,%o2 - sub %o4,%o3,%o4 -LOC(Ln7): - st %o4,[%o0] - retl - mov %o2,%o0 - -END(__udiv_qrnnd) diff --git a/sysdeps/sparc/sparc32/sparcv9/bcopy.c b/sysdeps/sparc/sparc32/sparcv9/bcopy.c deleted file mode 100644 index 9a455f33c4..0000000000 --- a/sysdeps/sparc/sparc32/sparcv9/bcopy.c +++ /dev/null @@ -1 +0,0 @@ -/* bcopy is in memcpy.S */ diff --git a/sysdeps/sparc/sparc32/sparcv9/memmove.c b/sysdeps/sparc/sparc32/sparcv9/memmove.c deleted file mode 100644 index a8d2d49948..0000000000 --- a/sysdeps/sparc/sparc32/sparcv9/memmove.c +++ /dev/null @@ -1 +0,0 @@ -/* memmove is in memcpy.S */ diff --git a/sysdeps/sparc/sparc32/sparcv9/multiarch/Makefile b/sysdeps/sparc/sparc32/sparcv9/multiarch/Makefile new file mode 100644 index 0000000000..4d45042a95 --- /dev/null +++ b/sysdeps/sparc/sparc32/sparcv9/multiarch/Makefile @@ -0,0 +1,4 @@ +ifeq ($(subdir),string) +sysdep_routines += memcpy-ultra3 memcpy-niagara1 memcpy-niagara2 \ + memset-niagara1 +endif diff --git a/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-niagara1.S b/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-niagara1.S new file mode 100644 index 0000000000..10aef85fe1 --- /dev/null +++ b/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-niagara1.S @@ -0,0 +1,2 @@ +#define XCC icc +#include <sparc64/multiarch/memcpy-niagara1.S> diff --git a/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-niagara2.S b/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-niagara2.S new file mode 100644 index 0000000000..6b1bf6ea70 --- /dev/null +++ b/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-niagara2.S @@ -0,0 +1,2 @@ +#define XCC icc +#include <sparc64/multiarch/memcpy-niagara2.S> diff --git a/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-ultra3.S b/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-ultra3.S new file mode 100644 index 0000000000..77adf151aa --- /dev/null +++ b/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-ultra3.S @@ -0,0 +1,2 @@ +#define XCC icc +#include <sparc64/multiarch/memcpy-ultra3.S> diff --git a/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy.S b/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy.S new file mode 100644 index 0000000000..14df91e005 --- /dev/null +++ b/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy.S @@ -0,0 +1,4 @@ +#define ASI_PNF 0x82 +#define ASI_BLK_P 0xf0 +#define XCC icc +#include <sparc64/multiarch/memcpy.S> diff --git a/sysdeps/sparc/sparc32/sparcv9/multiarch/memset-niagara1.S b/sysdeps/sparc/sparc32/sparcv9/multiarch/memset-niagara1.S new file mode 100644 index 0000000000..b432420876 --- /dev/null +++ b/sysdeps/sparc/sparc32/sparcv9/multiarch/memset-niagara1.S @@ -0,0 +1,2 @@ +#define XCC icc +#include <sparc64/multiarch/memset-niagara1.S> diff --git a/sysdeps/sparc/sparc32/sparcv9/multiarch/memset.S b/sysdeps/sparc/sparc32/sparcv9/multiarch/memset.S new file mode 100644 index 0000000000..8f8264337d --- /dev/null +++ b/sysdeps/sparc/sparc32/sparcv9/multiarch/memset.S @@ -0,0 +1,4 @@ +#define ASI_PNF 0x82 +#define ASI_BLK_P 0xf0 +#define XCC icc +#include <sparc64/multiarch/memset.S> diff --git a/sysdeps/sparc/sparc32/sparcv9/sparcv9b/memcpy.S b/sysdeps/sparc/sparc32/sparcv9/sparcv9b/memcpy.S deleted file mode 100644 index 61960dce61..0000000000 --- a/sysdeps/sparc/sparc32/sparcv9/sparcv9b/memcpy.S +++ /dev/null @@ -1,2 +0,0 @@ -#define XCC icc -#include <sparc64/sparcv9b/memcpy.S> diff --git a/sysdeps/sparc/sparc32/sparcv9/sparcv9v/memcpy.S b/sysdeps/sparc/sparc32/sparcv9/sparcv9v/memcpy.S deleted file mode 100644 index 4c05f57bc2..0000000000 --- a/sysdeps/sparc/sparc32/sparcv9/sparcv9v/memcpy.S +++ /dev/null @@ -1,2 +0,0 @@ -#define XCC icc -#include <sparc64/sparcv9v/memcpy.S> diff --git a/sysdeps/sparc/sparc32/sparcv9/sparcv9v/memset.S b/sysdeps/sparc/sparc32/sparcv9/sparcv9v/memset.S deleted file mode 100644 index 5e46c7489f..0000000000 --- a/sysdeps/sparc/sparc32/sparcv9/sparcv9v/memset.S +++ /dev/null @@ -1,2 +0,0 @@ -#define XCC icc -#include <sparc64/sparcv9v/memset.S> diff --git a/sysdeps/sparc/sparc32/sparcv9/sparcv9v2/memcpy.S b/sysdeps/sparc/sparc32/sparcv9/sparcv9v2/memcpy.S deleted file mode 100644 index 7f4606037c..0000000000 --- a/sysdeps/sparc/sparc32/sparcv9/sparcv9v2/memcpy.S +++ /dev/null @@ -1,2 +0,0 @@ -#define XCC icc -#include <sparc64/sparcv9v2/memcpy.S> diff --git a/sysdeps/sparc/sparc32/sparcv9/sparcv9v2/memset.S b/sysdeps/sparc/sparc32/sparcv9/sparcv9v2/memset.S deleted file mode 100644 index 72de7bb0cf..0000000000 --- a/sysdeps/sparc/sparc32/sparcv9/sparcv9v2/memset.S +++ /dev/null @@ -1,2 +0,0 @@ -#define XCC icc -#include <sparc64/sparcv9v2/memset.S> diff --git a/sysdeps/sparc/sparc32/sparcv9/strlen.S b/sysdeps/sparc/sparc32/sparcv9/strlen.S index b8f4dba4f4..28a216c076 100644 --- a/sysdeps/sparc/sparc32/sparcv9/strlen.S +++ b/sysdeps/sparc/sparc32/sparcv9/strlen.S @@ -1,4 +1 @@ -#define ASI_PNF 0x82 -#define ASI_BLK_P 0xf0 -#define XCC icc #include <sparc64/strlen.S> diff --git a/sysdeps/sparc/sparc32/strlen.S b/sysdeps/sparc/sparc32/strlen.S index ed92f20e28..2945bb5484 100644 --- a/sysdeps/sparc/sparc32/strlen.S +++ b/sysdeps/sparc/sparc32/strlen.S @@ -1,8 +1,9 @@ /* Determine the length of a string. For SPARC v7. - Copyright (C) 1996, 1999, 2003 Free Software Foundation, Inc. + Copyright (C) 1996, 1999, 2003, 2010 Free Software Foundation, Inc. This file is part of the GNU C Library. - Contributed by Jakub Jelinek <jj@ultra.linux.cz>. + Contributed by Jakub Jelinek <jj@ultra.linux.cz> and + David S. Miller <davem@davemloft.net>. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -21,86 +22,55 @@ #include <sysdep.h> - /* Normally, this uses ((xword - 0x01010101) & 0x80808080) test - to find out if any byte in xword could be zero. This is fast, but - also gives false alarm for any byte in range 0x81-0xff. It does - not matter for correctness, as if this test tells us there could - be some zero byte, we check it byte by byte, but if bytes with - high bits set are common in the strings, then this will give poor - performance. You can #define EIGHTBIT_NOT_RARE and the algorithm - will use one tick slower, but more precise test - ((xword - 0x01010101) & (~xword) & 0x80808080), - which does not give any false alarms (but if some bits are set, - one cannot assume from it which bytes are zero and which are not). - It is yet to be measured, what is the correct default for glibc - in these days for an average user. - */ - .text .align 4 ENTRY(strlen) - mov %o0, %o1 - andcc %o0, 3, %g0 - be 20f - sethi %hi(0x80808080), %o4 - - ldub [%o0], %o5 - cmp %o5, 0 - be 21f - add %o0, 1, %o0 - andcc %o0, 3, %g0 - be 4f - or %o4, %lo(0x80808080), %o3 - ldub [%o0], %o5 - cmp %o5, 0 - be 22f - add %o0, 1, %o0 - andcc %o0, 3, %g0 - be 5f - sethi %hi(0x01010101), %o4 - ldub [%o0], %o5 - cmp %o5, 0 - be 23f - add %o0, 1, %o0 - b 11f - or %o4, %lo(0x01010101), %o2 -21: retl - mov 0, %o0 -22: retl - mov 1, %o0 -23: retl - mov 2, %o0 - -20: or %o4, %lo(0x80808080), %o3 -4: sethi %hi(0x01010101), %o4 -5: or %o4, %lo(0x01010101), %o2 -11: ld [%o0], %o5 -12: sub %o5, %o2, %o4 -#ifdef EIGHTBIT_NOT_RARE - andn %o4, %o5, %o4 -#endif - andcc %o4, %o3, %g0 - be 11b - add %o0, 4, %o0 - - srl %o5, 24, %g5 - andcc %g5, 0xff, %g0 - be 13f - add %o0, -4, %o4 - srl %o5, 16, %g5 - andcc %g5, 0xff, %g0 - be 13f - add %o4, 1, %o4 - srl %o5, 8, %g5 - andcc %g5, 0xff, %g0 - be 13f - add %o4, 1, %o4 - andcc %o5, 0xff, %g0 - bne,a 12b - ld [%o0], %o5 - add %o4, 1, %o4 -13: retl - sub %o4, %o1, %o0 + mov %o0, %o1 + andn %o0, 0x3, %o0 + + ld [%o0], %o5 + and %o1, 0x3, %g1 + mov -1, %g5 + + sethi %hi(0x01010101), %o2 + sll %g1, 3, %g1 + + or %o2, %lo(0x01010101), %o2 + srl %g5, %g1, %g2 + + orn %o5, %g2, %o5 + sll %o2, 7, %o3 +10: add %o0, 4, %o0 + + andn %o3, %o5, %g1 + sub %o5, %o2, %g2 + + andcc %g1, %g2, %g0 + be,a 10b + ld [%o0], %o5 + + srl %o5, 24, %g1 + + andcc %g1, 0xff, %g0 + be 90f + sub %o0, 4, %o0 + + srl %o5, 16, %g2 + + andcc %g2, 0xff, %g0 + be 90f + add %o0, 1, %o0 + + srl %o5, 8, %g1 + + andcc %g1, 0xff, %g0 + be 90f + add %o0, 1, %o0 + + add %o0, 1, %o0 + +90: retl + sub %o0, %o1, %o0 END(strlen) libc_hidden_builtin_def (strlen) diff --git a/sysdeps/sparc/sparc32/udiv_qrnnd.S b/sysdeps/sparc/sparc32/udiv_qrnnd.S deleted file mode 100644 index 4955318a66..0000000000 --- a/sysdeps/sparc/sparc32/udiv_qrnnd.S +++ /dev/null @@ -1,168 +0,0 @@ -! SPARC __udiv_qrnnd division support, used from longlong.h. -! -! Copyright (C) 1993, 1994, 1997 Free Software Foundation, Inc. -! -! This file is part of the GNU MP Library. -! -! The GNU MP Library is free software; you can redistribute it and/or modify -! it under the terms of the GNU Lesser General Public License as published by -! the Free Software Foundation; either version 2.1 of the License, or (at your -! option) any later version. -! -! The GNU MP Library is distributed in the hope that it will be useful, but -! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -! License for more details. -! -! You should have received a copy of the GNU Lesser General Public License -! along with the GNU MP Library; see the file COPYING.LIB. If not, write to -! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -! -! Added PIC support - May/96, Miguel de Icaza -! -! INPUT PARAMETERS -! rem_ptr i0 -! n1 i1 -! n0 i2 -! d i3 - -#include <sysdep.h> -#undef ret /* Kludge for glibc */ - -#ifdef PIC - .text -#else - .section .rodata,#alloc -#endif - .align 8 - - .type two_to_32,@object - .size two_to_32,8 -two_to_32: - .double 0r4294967296 - - .type two_to_31,@object - .size two_to_31,8 -two_to_31: - .double 0r2147483648 - - .text -ENTRY(__udiv_qrnnd) - !#PROLOGUE# 0 - save %sp,-104,%sp - !#PROLOGUE# 1 - st %i1,[%fp-8] - ld [%fp-8],%f10 -#ifdef PIC -LOC(base): - call 1f - fitod %f10,%f4 -1: ldd [%o7-(LOC(base)-two_to_32)],%f8 -#else - sethi %hi(two_to_32),%o7 - fitod %f10,%f4 - ldd [%o7+%lo(two_to_32)],%f8 -#endif - cmp %i1,0 - bge LOC(248) - mov %i0,%i5 - faddd %f4,%f8,%f4 -LOC(248): - st %i2,[%fp-8] - ld [%fp-8],%f10 - fmuld %f4,%f8,%f6 - cmp %i2,0 - bge LOC(249) - fitod %f10,%f2 - faddd %f2,%f8,%f2 -LOC(249): - st %i3,[%fp-8] - faddd %f6,%f2,%f2 - ld [%fp-8],%f10 - cmp %i3,0 - bge LOC(250) - fitod %f10,%f4 - faddd %f4,%f8,%f4 -LOC(250): - fdivd %f2,%f4,%f2 -#ifdef PIC - ldd [%o7-(LOC(base)-two_to_31)],%f4 -#else - sethi %hi(two_to_31),%o7 - ldd [%o7+%lo(two_to_31)],%f4 -#endif - fcmped %f2,%f4 - nop - fbge,a LOC(251) - fsubd %f2,%f4,%f2 - fdtoi %f2,%f2 - st %f2,[%fp-8] - b LOC(252) - ld [%fp-8],%i4 -LOC(251): - fdtoi %f2,%f2 - st %f2,[%fp-8] - ld [%fp-8],%i4 - sethi %hi(-2147483648),%g2 - xor %i4,%g2,%i4 -LOC(252): - wr %g0,%i4,%y - sra %i3,31,%g2 - and %i4,%g2,%g2 - andcc %g0,0,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,0,%g1 - add %g1,%g2,%i0 - rd %y,%g3 - subcc %i2,%g3,%o7 - subxcc %i1,%i0,%g0 - be LOC(253) - cmp %o7,%i3 - - add %i4,-1,%i0 - add %o7,%i3,%o7 - st %o7,[%i5] - ret - restore -LOC(253): - blu LOC(246) - mov %i4,%i0 - add %i4,1,%i0 - sub %o7,%i3,%o7 -LOC(246): - st %o7,[%i5] - ret - restore - -END(__udiv_qrnnd) diff --git a/sysdeps/sparc/sparc64/Implies b/sysdeps/sparc/sparc64/Implies index 01bf14e73f..7abc50efcc 100644 --- a/sysdeps/sparc/sparc64/Implies +++ b/sysdeps/sparc/sparc64/Implies @@ -1,6 +1,7 @@ wordsize-64 # SPARC uses IEEE 754 floating point. ieee754/ldbl-128 +ieee754/dbl-64/wordsize-64 ieee754/dbl-64 ieee754/flt-32 sparc/sparc64/soft-fp diff --git a/sysdeps/sparc/sparc64/Makefile b/sysdeps/sparc/sparc64/Makefile index 3bb0238832..1a859dffc0 100644 --- a/sysdeps/sparc/sparc64/Makefile +++ b/sysdeps/sparc/sparc64/Makefile @@ -6,3 +6,7 @@ endif ifeq ($(subdir),csu) CFLAGS-initfini.s += -mcpu=v9 endif + +ifeq ($(subdir),string) +sysdep_routines += align-cpy +endif diff --git a/sysdeps/sparc/sparc64/align-cpy.S b/sysdeps/sparc/sparc64/align-cpy.S new file mode 100644 index 0000000000..bae788fe44 --- /dev/null +++ b/sysdeps/sparc/sparc64/align-cpy.S @@ -0,0 +1,85 @@ +/* Aligned copy routines specified by Sparc V9 ABI. + For 64-bit sparc. + Copyright (C) 2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller (davem@davemloft.net) + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sysdep.h> + + .text + .align 8 +ENTRY(__align_cpy_8) +10: cmp %o0, %o1 + be,pn %xcc, 9f + mov %o0, %o3 + subcc %o2, 0x08, %o2 + be,pn %xcc, 8f +1: ldx [%o1 + 0x00], %o5 + ldx [%o1 + 0x08], %o4 + subcc %o2, 0x10, %o2 + add %o1, 0x10, %o1 + stx %o5, [%o3 + 0x00] + stx %o4, [%o3 + 0x08] + bg,pt %xcc, 1b + add %o3, 0x10, %o3 + bne,pn %xcc, 9f + nop + ldx [%o1 + 0x00], %o5 +8: stx %o5, [%o3 + 0x00] +9: retl + nop +END(__align_cpy_8) + + .align 8 +ENTRY(__align_cpy_4) +20: cmp %o0, %o1 + be,pn %xcc, 9f + mov %o0, %o3 + subcc %o2, 0x04, %o2 + be,pn %xcc, 8f +1: lduw [%o1 + 0x00], %o5 + lduw [%o1 + 0x04], %o4 + subcc %o2, 0x08, %o2 + add %o1, 0x08, %o1 + stw %o5, [%o3 + 0x00] + stw %o4, [%o3 + 0x04] + bg,pt %xcc, 1b + add %o3, 0x08, %o3 + bne,pn %xcc, 9f + nop + lduw [%o1 + 0x00], %o5 +8: stw %o5, [%o3 + 0x00] +9: retl + nop +END(__align_cpy_4) + + .align 8 +ENTRY(__align_cpy_2) + or %o0, %o1, %o3 + or %o2, %o3, %o3 + andcc %o3, 0x7, %g0 + be,pt %xcc, 10b + andcc %o3, 0x3, %g0 + be,pt %xcc, 20b + mov %o7, %g1 + call HIDDEN_JUMPTARGET(memcpy) + mov %o7, %g1 +END(__align_cpy_2) + +weak_alias (__align_cpy_8, __align_cpy_16) +weak_alias (__align_cpy_2, __align_cpy_1) diff --git a/sysdeps/sparc/sparc64/bcopy.c b/sysdeps/sparc/sparc64/bcopy.c deleted file mode 100644 index 9a455f33c4..0000000000 --- a/sysdeps/sparc/sparc64/bcopy.c +++ /dev/null @@ -1 +0,0 @@ -/* bcopy is in memcpy.S */ diff --git a/sysdeps/sparc/sparc64/dl-machine.h b/sysdeps/sparc/sparc64/dl-machine.h index 4c915eb586..82ab5a4547 100644 --- a/sysdeps/sparc/sparc64/dl-machine.h +++ b/sysdeps/sparc/sparc64/dl-machine.h @@ -513,11 +513,13 @@ elf_machine_rela (struct link_map *map, const Elf64_Rela *reloc, value = sym->st_value - sym_map->l_tls_offset + reloc->r_addend; if (r_type == R_SPARC_TLS_LE_HIX22) - *reloc_addr = (*reloc_addr & 0xffc00000) - | (((~value) >> 10) & 0x3fffff); + *(unsigned int *)reloc_addr = + ((*(unsigned int *)reloc_addr & 0xffc00000) + | (((~value) >> 10) & 0x3fffff)); else - *reloc_addr = (*reloc_addr & 0xffffe000) | (value & 0x3ff) - | 0x1c00; + *(unsigned int *)reloc_addr = + ((*(unsigned int *)reloc_addr & 0xffffe000) | (value & 0x3ff) + | 0x1c00); } break; # endif @@ -661,7 +663,7 @@ elf_machine_lazy_rel (struct link_map *map, { /* 'high' is always zero, for large PLT entries the linker emits an R_SPARC_IRELATIVE. */ - sparc64_fixup_plt (map, reloc, reloc_addr, value, 0, 0); + sparc64_fixup_plt (map, reloc, reloc_addr, value, 0, 1); } else *reloc_addr = value; diff --git a/sysdeps/sparc/sparc64/dl-plt.h b/sysdeps/sparc/sparc64/dl-plt.h index e06be43a0a..ca2fe3bbd8 100644 --- a/sysdeps/sparc/sparc64/dl-plt.h +++ b/sysdeps/sparc/sparc64/dl-plt.h @@ -28,7 +28,14 @@ sparc64_fixup_plt (struct link_map *map, const Elf64_Rela *reloc, Elf64_Addr plt_vaddr = (Elf64_Addr) reloc_addr; Elf64_Sxword disp = value - plt_vaddr; - /* Now move plt_vaddr up to the call instruction. */ + /* 't' is '0' if we are resolving this PLT entry for RTLD bootstrap, + in which case we'll be resolving all PLT entries and thus can + optimize by overwriting instructions starting at the first PLT entry + instruction and we need not be mindful of thread safety. + + Otherwise, 't' is '1'. + + Now move plt_vaddr up to the call instruction. */ plt_vaddr += ((t + 1) * 4); /* PLT entries .PLT32768 and above look always the same. */ @@ -39,10 +46,22 @@ sparc64_fixup_plt (struct link_map *map, const Elf64_Rela *reloc, /* Near destination. */ else if (disp >= -0x800000 && disp < 0x800000) { - /* As this is just one instruction, it is thread safe and so - we can avoid the unnecessary sethi FOO, %g1. - b,a target */ - insns[0] = 0x30800000 | ((disp >> 2) & 0x3fffff); + unsigned int insn; + + /* ba,a */ + insn = 0x30800000 | ((disp >> 2) & 0x3fffff); + + if (disp >= -0x100000 && disp < 0x100000) + { + /* ba,a,pt %icc */ + insn = 0x30480000 | ((disp >> 2) & 0x07ffff); + } + + /* As this is just one instruction, it is thread safe and so we + can avoid the unnecessary sethi FOO, %g1. Each 64-bit PLT + entry is 8 instructions long, so we can't run into the 'jmp' + delay slot problems 32-bit PLTs can. */ + insns[0] = insn; __asm __volatile ("flush %0" : : "r" (insns)); } /* 32-bit Sparc style, the target is in the lower 32-bits of diff --git a/sysdeps/sparc/sparc64/memcopy.h b/sysdeps/sparc/sparc64/memcopy.h new file mode 100644 index 0000000000..ec978e3c80 --- /dev/null +++ b/sysdeps/sparc/sparc64/memcopy.h @@ -0,0 +1 @@ +#include <sparc32/memcopy.h> diff --git a/sysdeps/sparc/sparc64/memcpy.S b/sysdeps/sparc/sparc64/memcpy.S index 5993358017..71e7100658 100644 --- a/sysdeps/sparc/sparc64/memcpy.S +++ b/sysdeps/sparc/sparc64/memcpy.S @@ -136,79 +136,8 @@ stx %t0, [%dst - offset - 0x10]; \ stx %t1, [%dst - offset - 0x08]; - /* Macros for non-VIS memmove code. */ -#define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldx [%src - offset - 0x20], %t0; \ - ldx [%src - offset - 0x18], %t1; \ - ldx [%src - offset - 0x10], %t2; \ - ldx [%src - offset - 0x08], %t3; \ - stw %t0, [%dst - offset - 0x1c]; \ - srlx %t0, 32, %t0; \ - stw %t0, [%dst - offset - 0x20]; \ - stw %t1, [%dst - offset - 0x14]; \ - srlx %t1, 32, %t1; \ - stw %t1, [%dst - offset - 0x18]; \ - stw %t2, [%dst - offset - 0x0c]; \ - srlx %t2, 32, %t2; \ - stw %t2, [%dst - offset - 0x10]; \ - stw %t3, [%dst - offset - 0x04]; \ - srlx %t3, 32, %t3; \ - stw %t3, [%dst - offset - 0x08]; - -#define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldx [%src - offset - 0x20], %t0; \ - ldx [%src - offset - 0x18], %t1; \ - ldx [%src - offset - 0x10], %t2; \ - ldx [%src - offset - 0x08], %t3; \ - stx %t0, [%dst - offset - 0x20]; \ - stx %t1, [%dst - offset - 0x18]; \ - stx %t2, [%dst - offset - 0x10]; \ - stx %t3, [%dst - offset - 0x08]; \ - ldx [%src - offset - 0x40], %t0; \ - ldx [%src - offset - 0x38], %t1; \ - ldx [%src - offset - 0x30], %t2; \ - ldx [%src - offset - 0x28], %t3; \ - stx %t0, [%dst - offset - 0x40]; \ - stx %t1, [%dst - offset - 0x38]; \ - stx %t2, [%dst - offset - 0x30]; \ - stx %t3, [%dst - offset - 0x28]; - -#define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldx [%src + offset + 0x00], %t0; \ - ldx [%src + offset + 0x08], %t1; \ - stw %t0, [%dst + offset + 0x04]; \ - srlx %t0, 32, %t2; \ - stw %t2, [%dst + offset + 0x00]; \ - stw %t1, [%dst + offset + 0x0c]; \ - srlx %t1, 32, %t3; \ - stw %t3, [%dst + offset + 0x08]; - -#define RMOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1) \ - ldx [%src + offset + 0x00], %t0; \ - ldx [%src + offset + 0x08], %t1; \ - stx %t0, [%dst + offset + 0x00]; \ - stx %t1, [%dst + offset + 0x08]; - .text .align 32 - -ENTRY(bcopy) - sub %o1, %o0, %o4 /* IEU0 Group */ - mov %o0, %g3 /* IEU1 */ - cmp %o4, %o2 /* IEU1 Group */ - mov %o1, %o0 /* IEU0 */ - bgeu,pt %XCC, 210f /* CTI */ - mov %g3, %o1 /* IEU0 Group */ -#ifndef USE_BPR - srl %o2, 0, %o2 /* IEU1 */ -#endif - brnz,pn %o2, 220f /* CTI Group */ - add %o0, %o2, %o0 /* IEU0 */ - retl - nop -END(bcopy) - - .align 32 ENTRY(__memcpy_large) 200: be,pt %xcc, 201f /* CTI */ andcc %o0, 0x38, %g5 /* IEU1 Group */ @@ -446,65 +375,6 @@ ENTRY(__memcpy_large) mov %g4, %o0 END(__memcpy_large) -#ifdef USE_BPR - - /* void *__align_cpy_4(void *dest, void *src, size_t n) - * SPARC v9 SYSV ABI - * Like memcpy, but results are undefined if (!n || ((dest | src | n) & 3)) - */ - - .align 32 -ENTRY(__align_cpy_4) - mov %o0, %g4 /* IEU0 Group */ - cmp %o2, 15 /* IEU1 */ - bleu,pn %xcc, 208b /* CTI */ - cmp %o2, (64 * 6) /* IEU1 Group */ - bgeu,pn %xcc, 200b /* CTI */ - andcc %o0, 7, %g2 /* IEU1 Group */ - ba,pt %xcc, 216f /* CTI */ - andcc %o1, 4, %g0 /* IEU1 Group */ -END(__align_cpy_4) - - /* void *__align_cpy_8(void *dest, void *src, size_t n) - * SPARC v9 SYSV ABI - * Like memcpy, but results are undefined if (!n || ((dest | src | n) & 7)) - */ - - .align 32 -ENTRY(__align_cpy_8) - mov %o0, %g4 /* IEU0 Group */ - cmp %o2, 15 /* IEU1 */ - bleu,pn %xcc, 208b /* CTI */ - cmp %o2, (64 * 6) /* IEU1 Group */ - bgeu,pn %xcc, 201b /* CTI */ - andcc %o0, 0x38, %g5 /* IEU1 Group */ - andcc %o2, -128, %g6 /* IEU1 Group */ - bne,a,pt %xcc, 82f + 4 /* CTI */ - ldx [%o1], %g1 /* Load */ - ba,pt %xcc, 41f /* CTI Group */ - andcc %o2, 0x70, %g6 /* IEU1 */ -END(__align_cpy_8) - - /* void *__align_cpy_16(void *dest, void *src, size_t n) - * SPARC v9 SYSV ABI - * Like memcpy, but results are undefined if (!n || ((dest | src | n) & 15)) - */ - - .align 32 -ENTRY(__align_cpy_16) - mov %o0, %g4 /* IEU0 Group */ - cmp %o2, (64 * 6) /* IEU1 */ - bgeu,pn %xcc, 201b /* CTI */ - andcc %o0, 0x38, %g5 /* IEU1 Group */ - andcc %o2, -128, %g6 /* IEU1 Group */ - bne,a,pt %xcc, 82f + 4 /* CTI */ - ldx [%o1], %g1 /* Load */ - ba,pt %xcc, 41f /* CTI Group */ - andcc %o2, 0x70, %g6 /* IEU1 */ -END(__align_cpy_16) - -#endif - .align 32 ENTRY(memcpy) 210: @@ -699,227 +569,4 @@ ENTRY(memcpy) mov %g4, %o0 END(memcpy) - .align 32 -ENTRY(__memmove_slowpath) -228: andcc %o2, 1, %g0 /* IEU1 Group */ - be,pt %icc, 2f+4 /* CTI */ -1: ldub [%o1 - 1], %o5 /* LOAD Group */ - sub %o1, 1, %o1 /* IEU0 */ - sub %o0, 1, %o0 /* IEU1 */ - subcc %o2, 1, %o2 /* IEU1 Group */ - be,pn %xcc, 229f /* CTI */ - stb %o5, [%o0] /* Store */ -2: ldub [%o1 - 1], %o5 /* LOAD Group */ - sub %o0, 2, %o0 /* IEU0 */ - ldub [%o1 - 2], %g5 /* LOAD Group */ - sub %o1, 2, %o1 /* IEU0 */ - subcc %o2, 2, %o2 /* IEU1 Group */ - stb %o5, [%o0 + 1] /* Store */ - bne,pt %xcc, 2b /* CTI */ - stb %g5, [%o0] /* Store */ -229: retl - mov %g4, %o0 -219: retl - nop -END(__memmove_slowpath) - - .align 32 -ENTRY(memmove) -#ifndef USE_BPR - srl %o2, 0, %o2 /* IEU1 Group */ -#endif - brz,pn %o2, 219b /* CTI Group */ - sub %o0, %o1, %o4 /* IEU0 */ - cmp %o4, %o2 /* IEU1 Group */ - bgeu,pt %XCC, 218b /* CTI */ - mov %o0, %g4 /* IEU0 */ - add %o0, %o2, %o0 /* IEU0 Group */ -220: add %o1, %o2, %o1 /* IEU1 */ - cmp %o2, 15 /* IEU1 Group */ - bleu,pn %xcc, 228b /* CTI */ - andcc %o0, 7, %g2 /* IEU1 Group */ - sub %o0, %o1, %g5 /* IEU0 */ - andcc %g5, 3, %o5 /* IEU1 Group */ - bne,pn %xcc, 232f /* CTI */ - andcc %o1, 3, %g0 /* IEU1 Group */ - be,a,pt %xcc, 236f /* CTI */ - andcc %o1, 4, %g0 /* IEU1 Group */ - andcc %o1, 1, %g0 /* IEU1 Group */ - be,pn %xcc, 4f /* CTI */ - andcc %o1, 2, %g0 /* IEU1 Group */ - ldub [%o1 - 1], %g2 /* Load Group */ - sub %o1, 1, %o1 /* IEU0 */ - sub %o0, 1, %o0 /* IEU1 */ - sub %o2, 1, %o2 /* IEU0 Group */ - be,pn %xcc, 5f /* CTI Group */ - stb %g2, [%o0] /* Store */ -4: lduh [%o1 - 2], %g2 /* Load Group */ - sub %o1, 2, %o1 /* IEU0 */ - sub %o0, 2, %o0 /* IEU1 */ - sub %o2, 2, %o2 /* IEU0 */ - sth %g2, [%o0] /* Store Group + bubble */ -5: andcc %o1, 4, %g0 /* IEU1 */ -236: be,a,pn %xcc, 2f /* CTI */ - andcc %o2, -128, %g6 /* IEU1 Group */ - lduw [%o1 - 4], %g5 /* Load Group */ - sub %o1, 4, %o1 /* IEU0 */ - sub %o0, 4, %o0 /* IEU1 */ - sub %o2, 4, %o2 /* IEU0 Group */ - stw %g5, [%o0] /* Store */ - andcc %o2, -128, %g6 /* IEU1 Group */ -2: be,pn %xcc, 235f /* CTI */ - andcc %o0, 4, %g0 /* IEU1 Group */ - be,pn %xcc, 282f + 4 /* CTI Group */ -5: RMOVE_BIGCHUNK(o1, o0, 0x00, g1, g3, g5, o5) - RMOVE_BIGCHUNK(o1, o0, 0x20, g1, g3, g5, o5) - RMOVE_BIGCHUNK(o1, o0, 0x40, g1, g3, g5, o5) - RMOVE_BIGCHUNK(o1, o0, 0x60, g1, g3, g5, o5) - subcc %g6, 128, %g6 /* IEU1 Group */ - sub %o1, 128, %o1 /* IEU0 */ - bne,pt %xcc, 5b /* CTI */ - sub %o0, 128, %o0 /* IEU0 Group */ -235: andcc %o2, 0x70, %g6 /* IEU1 Group */ -41: be,pn %xcc, 280f /* CTI */ - andcc %o2, 8, %g0 /* IEU1 Group */ - /* Clk1 8-( */ - /* Clk2 8-( */ - /* Clk3 8-( */ - /* Clk4 8-( */ -279: rd %pc, %o5 /* PDU Group */ - sll %g6, 1, %g5 /* IEU0 Group */ - sub %o1, %g6, %o1 /* IEU1 */ - sub %o5, %g5, %o5 /* IEU0 Group */ - jmpl %o5 + %lo(280f - 279b), %g0 /* CTI Group brk forced*/ - sub %o0, %g6, %o0 /* IEU0 Group */ - RMOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g5, o5) -280: be,pt %xcc, 281f /* CTI */ - andcc %o2, 4, %g0 /* IEU1 */ - ldx [%o1 - 8], %g2 /* Load Group */ - sub %o0, 8, %o0 /* IEU0 */ - stw %g2, [%o0 + 4] /* Store Group */ - sub %o1, 8, %o1 /* IEU1 */ - srlx %g2, 32, %g2 /* IEU0 Group */ - stw %g2, [%o0] /* Store */ -281: be,pt %xcc, 1f /* CTI */ - andcc %o2, 2, %g0 /* IEU1 Group */ - lduw [%o1 - 4], %g2 /* Load Group */ - sub %o1, 4, %o1 /* IEU0 */ - stw %g2, [%o0 - 4] /* Store Group */ - sub %o0, 4, %o0 /* IEU0 */ -1: be,pt %xcc, 1f /* CTI */ - andcc %o2, 1, %g0 /* IEU1 Group */ - lduh [%o1 - 2], %g2 /* Load Group */ - sub %o1, 2, %o1 /* IEU0 */ - sth %g2, [%o0 - 2] /* Store Group */ - sub %o0, 2, %o0 /* IEU0 */ -1: be,pt %xcc, 211f /* CTI */ - nop /* IEU1 */ - ldub [%o1 - 1], %g2 /* Load Group */ - stb %g2, [%o0 - 1] /* Store Group + bubble */ -211: retl - mov %g4, %o0 - -282: RMOVE_BIGALIGNCHUNK(o1, o0, 0x00, g1, g3, g5, o5) - RMOVE_BIGALIGNCHUNK(o1, o0, 0x40, g1, g3, g5, o5) - subcc %g6, 128, %g6 /* IEU1 Group */ - sub %o1, 128, %o1 /* IEU0 */ - bne,pt %xcc, 282b /* CTI */ - sub %o0, 128, %o0 /* IEU0 Group */ - andcc %o2, 0x70, %g6 /* IEU1 */ - be,pn %xcc, 284f /* CTI */ - andcc %o2, 8, %g0 /* IEU1 Group */ - /* Clk1 8-( */ - /* Clk2 8-( */ - /* Clk3 8-( */ - /* Clk4 8-( */ -283: rd %pc, %o5 /* PDU Group */ - sub %o1, %g6, %o1 /* IEU0 Group */ - sub %o5, %g6, %o5 /* IEU1 */ - jmpl %o5 + %lo(284f - 283b), %g0 /* CTI Group brk forced*/ - sub %o0, %g6, %o0 /* IEU0 Group */ - RMOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3) -284: be,pt %xcc, 285f /* CTI Group */ - andcc %o2, 4, %g0 /* IEU1 */ - ldx [%o1 - 8], %g2 /* Load Group */ - sub %o0, 8, %o0 /* IEU0 */ - sub %o1, 8, %o1 /* IEU0 Group */ - stx %g2, [%o0] /* Store */ -285: be,pt %xcc, 1f /* CTI */ - andcc %o2, 2, %g0 /* IEU1 Group */ - lduw [%o1 - 4], %g2 /* Load Group */ - sub %o0, 4, %o0 /* IEU0 */ - sub %o1, 4, %o1 /* IEU0 Group */ - stw %g2, [%o0] /* Store */ -1: be,pt %xcc, 1f /* CTI */ - andcc %o2, 1, %g0 /* IEU1 Group */ - lduh [%o1 - 2], %g2 /* Load Group */ - sub %o0, 2, %o0 /* IEU0 */ - sub %o1, 2, %o1 /* IEU0 Group */ - sth %g2, [%o0] /* Store */ -1: be,pt %xcc, 1f /* CTI */ - nop /* IEU0 Group */ - ldub [%o1 - 1], %g2 /* Load Group */ - stb %g2, [%o0 - 1] /* Store Group + bubble */ -1: retl - mov %g4, %o0 - -232: brz,pt %g2, 2f /* CTI Group */ - sub %o2, %g2, %o2 /* IEU0 Group */ -1: ldub [%o1 - 1], %g5 /* Load Group */ - sub %o1, 1, %o1 /* IEU0 */ - sub %o0, 1, %o0 /* IEU1 */ - subcc %g2, 1, %g2 /* IEU1 Group */ - bne,pt %xcc, 1b /* CTI */ - stb %g5, [%o0] /* Store */ -2: andn %o2, 7, %g5 /* IEU0 Group */ - and %o2, 7, %o2 /* IEU1 */ - fmovd %f0, %f2 /* FPU */ - alignaddr %o1, %g0, %g1 /* GRU Group */ - ldd [%g1], %f4 /* Load Group */ -1: ldd [%g1 - 8], %f6 /* Load Group */ - sub %g1, 8, %g1 /* IEU0 Group */ - subcc %g5, 8, %g5 /* IEU1 */ - faligndata %f6, %f4, %f0 /* GRU Group */ - std %f0, [%o0 - 8] /* Store */ - sub %o1, 8, %o1 /* IEU0 Group */ - be,pn %xcc, 233f /* CTI */ - sub %o0, 8, %o0 /* IEU1 */ - ldd [%g1 - 8], %f4 /* Load Group */ - sub %g1, 8, %g1 /* IEU0 */ - subcc %g5, 8, %g5 /* IEU1 */ - faligndata %f4, %f6, %f0 /* GRU Group */ - std %f0, [%o0 - 8] /* Store */ - sub %o1, 8, %o1 /* IEU0 */ - bne,pn %xcc, 1b /* CTI Group */ - sub %o0, 8, %o0 /* IEU0 */ -233: brz,pn %o2, 234f /* CTI Group */ - nop /* IEU0 */ -237: ldub [%o1 - 1], %g5 /* LOAD */ - sub %o1, 1, %o1 /* IEU0 */ - sub %o0, 1, %o0 /* IEU1 */ - subcc %o2, 1, %o2 /* IEU1 */ - bne,pt %xcc, 237b /* CTI */ - stb %g5, [%o0] /* Store Group */ -234: wr %g0, FPRS_FEF, %fprs - retl - mov %g4, %o0 -END(memmove) - -#ifdef USE_BPR -weak_alias (memcpy, __align_cpy_1) -weak_alias (memcpy, __align_cpy_2) -#endif libc_hidden_builtin_def (memcpy) -libc_hidden_builtin_def (memmove) diff --git a/sysdeps/sparc/sparc64/memmove.c b/sysdeps/sparc/sparc64/memmove.c deleted file mode 100644 index a8d2d49948..0000000000 --- a/sysdeps/sparc/sparc64/memmove.c +++ /dev/null @@ -1 +0,0 @@ -/* memmove is in memcpy.S */ diff --git a/sysdeps/sparc/sparc64/multiarch/Makefile b/sysdeps/sparc/sparc64/multiarch/Makefile new file mode 100644 index 0000000000..4d45042a95 --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/Makefile @@ -0,0 +1,4 @@ +ifeq ($(subdir),string) +sysdep_routines += memcpy-ultra3 memcpy-niagara1 memcpy-niagara2 \ + memset-niagara1 +endif diff --git a/sysdeps/sparc/sparc64/sparcv9v/memcpy.S b/sysdeps/sparc/sparc64/multiarch/memcpy-niagara1.S index ad2b0f742c..6a78295e81 100644 --- a/sysdeps/sparc/sparc64/sparcv9v/memcpy.S +++ b/sysdeps/sparc/sparc64/multiarch/memcpy-niagara1.S @@ -36,34 +36,19 @@ #define XCC xcc #endif +#if !defined NOT_IN_libc + .register %g2,#scratch .register %g3,#scratch .register %g6,#scratch .text - .align 32 - -ENTRY(bcopy) - sub %o1, %o0, %o4 - mov %o0, %g4 - cmp %o4, %o2 - mov %o1, %o0 - bgeu,pt %XCC, 100f - mov %g4, %o1 -#ifndef USE_BPR - srl %o2, 0, %o2 -#endif - brnz,pn %o2, 220f - add %o0, %o2, %o0 - retl - nop -END(bcopy) .align 32 -ENTRY(memcpy) -#ifndef USE_BPR +ENTRY(__memcpy_niagara1) +# ifndef USE_BPR srl %o2, 0, %o2 -#endif +# endif 100: /* %o0=dst, %o1=src, %o2=len */ mov %o0, %g5 cmp %o2, 0 @@ -352,245 +337,6 @@ ENTRY(memcpy) retl mov %g5, %o0 -END(memcpy) - -#define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldx [%src - offset - 0x20], %t0; \ - ldx [%src - offset - 0x18], %t1; \ - ldx [%src - offset - 0x10], %t2; \ - ldx [%src - offset - 0x08], %t3; \ - stw %t0, [%dst - offset - 0x1c]; \ - srlx %t0, 32, %t0; \ - stw %t0, [%dst - offset - 0x20]; \ - stw %t1, [%dst - offset - 0x14]; \ - srlx %t1, 32, %t1; \ - stw %t1, [%dst - offset - 0x18]; \ - stw %t2, [%dst - offset - 0x0c]; \ - srlx %t2, 32, %t2; \ - stw %t2, [%dst - offset - 0x10]; \ - stw %t3, [%dst - offset - 0x04]; \ - srlx %t3, 32, %t3; \ - stw %t3, [%dst - offset - 0x08]; - -#define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldx [%src - offset - 0x20], %t0; \ - ldx [%src - offset - 0x18], %t1; \ - ldx [%src - offset - 0x10], %t2; \ - ldx [%src - offset - 0x08], %t3; \ - stx %t0, [%dst - offset - 0x20]; \ - stx %t1, [%dst - offset - 0x18]; \ - stx %t2, [%dst - offset - 0x10]; \ - stx %t3, [%dst - offset - 0x08]; \ - ldx [%src - offset - 0x40], %t0; \ - ldx [%src - offset - 0x38], %t1; \ - ldx [%src - offset - 0x30], %t2; \ - ldx [%src - offset - 0x28], %t3; \ - stx %t0, [%dst - offset - 0x40]; \ - stx %t1, [%dst - offset - 0x38]; \ - stx %t2, [%dst - offset - 0x30]; \ - stx %t3, [%dst - offset - 0x28]; - -#define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldx [%src + offset + 0x00], %t0; \ - ldx [%src + offset + 0x08], %t1; \ - stw %t0, [%dst + offset + 0x04]; \ - srlx %t0, 32, %t2; \ - stw %t2, [%dst + offset + 0x00]; \ - stw %t1, [%dst + offset + 0x0c]; \ - srlx %t1, 32, %t3; \ - stw %t3, [%dst + offset + 0x08]; - -#define RMOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1) \ - ldx [%src + offset + 0x00], %t0; \ - ldx [%src + offset + 0x08], %t1; \ - stx %t0, [%dst + offset + 0x00]; \ - stx %t1, [%dst + offset + 0x08]; +END(__memcpy_niagara1) - .align 32 -228: andcc %o2, 1, %g0 - be,pt %icc, 2f+4 -1: ldub [%o1 - 1], %o5 - sub %o1, 1, %o1 - sub %o0, 1, %o0 - subcc %o2, 1, %o2 - be,pn %xcc, 229f - stb %o5, [%o0] -2: ldub [%o1 - 1], %o5 - sub %o0, 2, %o0 - ldub [%o1 - 2], %g5 - sub %o1, 2, %o1 - subcc %o2, 2, %o2 - stb %o5, [%o0 + 1] - bne,pt %xcc, 2b - stb %g5, [%o0] -229: retl - mov %g4, %o0 -out: retl - mov %g5, %o0 - - .align 32 -ENTRY(memmove) - mov %o0, %g5 -#ifndef USE_BPR - srl %o2, 0, %o2 -#endif - brz,pn %o2, out - sub %o0, %o1, %o4 - cmp %o4, %o2 - bgeu,pt %XCC, 218b - mov %o0, %g4 - add %o0, %o2, %o0 -220: add %o1, %o2, %o1 - cmp %o2, 15 - bleu,pn %xcc, 228b - andcc %o0, 7, %g2 - sub %o0, %o1, %g5 - andcc %g5, 3, %o5 - bne,pn %xcc, 232f - andcc %o1, 3, %g0 - be,a,pt %xcc, 236f - andcc %o1, 4, %g0 - andcc %o1, 1, %g0 - be,pn %xcc, 4f - andcc %o1, 2, %g0 - ldub [%o1 - 1], %g2 - sub %o1, 1, %o1 - sub %o0, 1, %o0 - sub %o2, 1, %o2 - be,pn %xcc, 5f - stb %g2, [%o0] -4: lduh [%o1 - 2], %g2 - sub %o1, 2, %o1 - sub %o0, 2, %o0 - sub %o2, 2, %o2 - sth %g2, [%o0] -5: andcc %o1, 4, %g0 -236: be,a,pn %xcc, 2f - andcc %o2, -128, %g6 - lduw [%o1 - 4], %g5 - sub %o1, 4, %o1 - sub %o0, 4, %o0 - sub %o2, 4, %o2 - stw %g5, [%o0] - andcc %o2, -128, %g6 -2: be,pn %xcc, 235f - andcc %o0, 4, %g0 - be,pn %xcc, 282f + 4 -5: RMOVE_BIGCHUNK(o1, o0, 0x00, g1, g3, g5, o5) - RMOVE_BIGCHUNK(o1, o0, 0x20, g1, g3, g5, o5) - RMOVE_BIGCHUNK(o1, o0, 0x40, g1, g3, g5, o5) - RMOVE_BIGCHUNK(o1, o0, 0x60, g1, g3, g5, o5) - subcc %g6, 128, %g6 - sub %o1, 128, %o1 - bne,pt %xcc, 5b - sub %o0, 128, %o0 -235: andcc %o2, 0x70, %g6 -41: be,pn %xcc, 280f - andcc %o2, 8, %g0 - -279: rd %pc, %o5 - sll %g6, 1, %g5 - sub %o1, %g6, %o1 - sub %o5, %g5, %o5 - jmpl %o5 + %lo(280f - 279b), %g0 - sub %o0, %g6, %o0 - RMOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g5, o5) -280: be,pt %xcc, 281f - andcc %o2, 4, %g0 - ldx [%o1 - 8], %g2 - sub %o0, 8, %o0 - stw %g2, [%o0 + 4] - sub %o1, 8, %o1 - srlx %g2, 32, %g2 - stw %g2, [%o0] -281: be,pt %xcc, 1f - andcc %o2, 2, %g0 - lduw [%o1 - 4], %g2 - sub %o1, 4, %o1 - stw %g2, [%o0 - 4] - sub %o0, 4, %o0 -1: be,pt %xcc, 1f - andcc %o2, 1, %g0 - lduh [%o1 - 2], %g2 - sub %o1, 2, %o1 - sth %g2, [%o0 - 2] - sub %o0, 2, %o0 -1: be,pt %xcc, 211f - nop - ldub [%o1 - 1], %g2 - stb %g2, [%o0 - 1] -211: retl - mov %g4, %o0 - -282: RMOVE_BIGALIGNCHUNK(o1, o0, 0x00, g1, g3, g5, o5) - RMOVE_BIGALIGNCHUNK(o1, o0, 0x40, g1, g3, g5, o5) - subcc %g6, 128, %g6 - sub %o1, 128, %o1 - bne,pt %xcc, 282b - sub %o0, 128, %o0 - andcc %o2, 0x70, %g6 - be,pn %xcc, 284f - andcc %o2, 8, %g0 - -283: rd %pc, %o5 - sub %o1, %g6, %o1 - sub %o5, %g6, %o5 - jmpl %o5 + %lo(284f - 283b), %g0 - sub %o0, %g6, %o0 - RMOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3) -284: be,pt %xcc, 285f - andcc %o2, 4, %g0 - ldx [%o1 - 8], %g2 - sub %o0, 8, %o0 - sub %o1, 8, %o1 - stx %g2, [%o0] -285: be,pt %xcc, 1f - andcc %o2, 2, %g0 - lduw [%o1 - 4], %g2 - sub %o0, 4, %o0 - sub %o1, 4, %o1 - stw %g2, [%o0] -1: be,pt %xcc, 1f - andcc %o2, 1, %g0 - lduh [%o1 - 2], %g2 - sub %o0, 2, %o0 - sub %o1, 2, %o1 - sth %g2, [%o0] -1: be,pt %xcc, 1f - nop - ldub [%o1 - 1], %g2 - stb %g2, [%o0 - 1] -1: retl - mov %g4, %o0 - -232: ldub [%o1 - 1], %g5 - sub %o1, 1, %o1 - sub %o0, 1, %o0 - subcc %o2, 1, %o2 - bne,pt %xcc, 232b - stb %g5, [%o0] -234: retl - mov %g4, %o0 -END(memmove) - -#ifdef USE_BPR -weak_alias (memcpy, __align_cpy_1) -weak_alias (memcpy, __align_cpy_2) -weak_alias (memcpy, __align_cpy_4) -weak_alias (memcpy, __align_cpy_8) -weak_alias (memcpy, __align_cpy_16) #endif -libc_hidden_builtin_def (memcpy) -libc_hidden_builtin_def (memmove) diff --git a/sysdeps/sparc/sparc64/sparcv9v2/memcpy.S b/sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S index b261f461a4..35f6989aca 100644 --- a/sysdeps/sparc/sparc64/sparcv9v2/memcpy.S +++ b/sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S @@ -138,34 +138,19 @@ LOAD(ldd, base + 0x28, %x5); \ LOAD(ldd, base + 0x30, %x6); +#if !defined NOT_IN_libc + .register %g2,#scratch .register %g3,#scratch .register %g6,#scratch .text - .align 32 - -ENTRY(bcopy) - sub %o1, %o0, %o4 - mov %o0, %g4 - cmp %o4, %o2 - mov %o1, %o0 - bgeu,pt %XCC, 100f - mov %g4, %o1 -#ifndef USE_BPR - srl %o2, 0, %o2 -#endif - brnz,pn %o2, 220f - add %o0, %o2, %o0 - retl - nop -END(bcopy) .align 32 -ENTRY(memcpy) -#ifndef USE_BPR +ENTRY(__memcpy_niagara2) +# ifndef USE_BPR srl %o2, 0, %o2 -#endif +# endif 100: /* %o0=dst, %o1=src, %o2=len */ mov %o0, %g5 cmp %o2, 0 @@ -502,245 +487,6 @@ ENTRY(memcpy) retl mov %g5, %o0 -END(memcpy) - -#define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldx [%src - offset - 0x20], %t0; \ - ldx [%src - offset - 0x18], %t1; \ - ldx [%src - offset - 0x10], %t2; \ - ldx [%src - offset - 0x08], %t3; \ - stw %t0, [%dst - offset - 0x1c]; \ - srlx %t0, 32, %t0; \ - stw %t0, [%dst - offset - 0x20]; \ - stw %t1, [%dst - offset - 0x14]; \ - srlx %t1, 32, %t1; \ - stw %t1, [%dst - offset - 0x18]; \ - stw %t2, [%dst - offset - 0x0c]; \ - srlx %t2, 32, %t2; \ - stw %t2, [%dst - offset - 0x10]; \ - stw %t3, [%dst - offset - 0x04]; \ - srlx %t3, 32, %t3; \ - stw %t3, [%dst - offset - 0x08]; - -#define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldx [%src - offset - 0x20], %t0; \ - ldx [%src - offset - 0x18], %t1; \ - ldx [%src - offset - 0x10], %t2; \ - ldx [%src - offset - 0x08], %t3; \ - stx %t0, [%dst - offset - 0x20]; \ - stx %t1, [%dst - offset - 0x18]; \ - stx %t2, [%dst - offset - 0x10]; \ - stx %t3, [%dst - offset - 0x08]; \ - ldx [%src - offset - 0x40], %t0; \ - ldx [%src - offset - 0x38], %t1; \ - ldx [%src - offset - 0x30], %t2; \ - ldx [%src - offset - 0x28], %t3; \ - stx %t0, [%dst - offset - 0x40]; \ - stx %t1, [%dst - offset - 0x38]; \ - stx %t2, [%dst - offset - 0x30]; \ - stx %t3, [%dst - offset - 0x28]; - -#define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldx [%src + offset + 0x00], %t0; \ - ldx [%src + offset + 0x08], %t1; \ - stw %t0, [%dst + offset + 0x04]; \ - srlx %t0, 32, %t2; \ - stw %t2, [%dst + offset + 0x00]; \ - stw %t1, [%dst + offset + 0x0c]; \ - srlx %t1, 32, %t3; \ - stw %t3, [%dst + offset + 0x08]; - -#define RMOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1) \ - ldx [%src + offset + 0x00], %t0; \ - ldx [%src + offset + 0x08], %t1; \ - stx %t0, [%dst + offset + 0x00]; \ - stx %t1, [%dst + offset + 0x08]; +END(__memcpy_niagara2) - .align 32 -228: andcc %o2, 1, %g0 - be,pt %icc, 2f+4 -1: ldub [%o1 - 1], %o5 - sub %o1, 1, %o1 - sub %o0, 1, %o0 - subcc %o2, 1, %o2 - be,pn %XCC, 229f - stb %o5, [%o0] -2: ldub [%o1 - 1], %o5 - sub %o0, 2, %o0 - ldub [%o1 - 2], %g5 - sub %o1, 2, %o1 - subcc %o2, 2, %o2 - stb %o5, [%o0 + 1] - bne,pt %XCC, 2b - stb %g5, [%o0] -229: retl - mov %g4, %o0 -out: retl - mov %g5, %o0 - - .align 32 -ENTRY(memmove) - mov %o0, %g5 -#ifndef USE_BPR - srl %o2, 0, %o2 -#endif - brz,pn %o2, out - sub %o0, %o1, %o4 - cmp %o4, %o2 - bgeu,pt %XCC, 218b - mov %o0, %g4 - add %o0, %o2, %o0 -220: add %o1, %o2, %o1 - cmp %o2, 15 - bleu,pn %XCC, 228b - andcc %o0, 7, %g2 - sub %o0, %o1, %g5 - andcc %g5, 3, %o5 - bne,pn %XCC, 232f - andcc %o1, 3, %g0 - be,a,pt %XCC, 236f - andcc %o1, 4, %g0 - andcc %o1, 1, %g0 - be,pn %XCC, 4f - andcc %o1, 2, %g0 - ldub [%o1 - 1], %g2 - sub %o1, 1, %o1 - sub %o0, 1, %o0 - sub %o2, 1, %o2 - be,pn %XCC, 5f - stb %g2, [%o0] -4: lduh [%o1 - 2], %g2 - sub %o1, 2, %o1 - sub %o0, 2, %o0 - sub %o2, 2, %o2 - sth %g2, [%o0] -5: andcc %o1, 4, %g0 -236: be,a,pn %XCC, 2f - andcc %o2, -128, %g6 - lduw [%o1 - 4], %g5 - sub %o1, 4, %o1 - sub %o0, 4, %o0 - sub %o2, 4, %o2 - stw %g5, [%o0] - andcc %o2, -128, %g6 -2: be,pn %XCC, 235f - andcc %o0, 4, %g0 - be,pn %XCC, 282f + 4 -5: RMOVE_BIGCHUNK(o1, o0, 0x00, g1, g3, g5, o5) - RMOVE_BIGCHUNK(o1, o0, 0x20, g1, g3, g5, o5) - RMOVE_BIGCHUNK(o1, o0, 0x40, g1, g3, g5, o5) - RMOVE_BIGCHUNK(o1, o0, 0x60, g1, g3, g5, o5) - subcc %g6, 128, %g6 - sub %o1, 128, %o1 - bne,pt %XCC, 5b - sub %o0, 128, %o0 -235: andcc %o2, 0x70, %g6 -41: be,pn %XCC, 280f - andcc %o2, 8, %g0 - -279: rd %pc, %o5 - sll %g6, 1, %g5 - sub %o1, %g6, %o1 - sub %o5, %g5, %o5 - jmpl %o5 + %lo(280f - 279b), %g0 - sub %o0, %g6, %o0 - RMOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g5, o5) -280: be,pt %XCC, 281f - andcc %o2, 4, %g0 - ldx [%o1 - 8], %g2 - sub %o0, 8, %o0 - stw %g2, [%o0 + 4] - sub %o1, 8, %o1 - srlx %g2, 32, %g2 - stw %g2, [%o0] -281: be,pt %XCC, 1f - andcc %o2, 2, %g0 - lduw [%o1 - 4], %g2 - sub %o1, 4, %o1 - stw %g2, [%o0 - 4] - sub %o0, 4, %o0 -1: be,pt %XCC, 1f - andcc %o2, 1, %g0 - lduh [%o1 - 2], %g2 - sub %o1, 2, %o1 - sth %g2, [%o0 - 2] - sub %o0, 2, %o0 -1: be,pt %XCC, 211f - nop - ldub [%o1 - 1], %g2 - stb %g2, [%o0 - 1] -211: retl - mov %g4, %o0 - -282: RMOVE_BIGALIGNCHUNK(o1, o0, 0x00, g1, g3, g5, o5) - RMOVE_BIGALIGNCHUNK(o1, o0, 0x40, g1, g3, g5, o5) - subcc %g6, 128, %g6 - sub %o1, 128, %o1 - bne,pt %XCC, 282b - sub %o0, 128, %o0 - andcc %o2, 0x70, %g6 - be,pn %XCC, 284f - andcc %o2, 8, %g0 - -283: rd %pc, %o5 - sub %o1, %g6, %o1 - sub %o5, %g6, %o5 - jmpl %o5 + %lo(284f - 283b), %g0 - sub %o0, %g6, %o0 - RMOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3) -284: be,pt %XCC, 285f - andcc %o2, 4, %g0 - ldx [%o1 - 8], %g2 - sub %o0, 8, %o0 - sub %o1, 8, %o1 - stx %g2, [%o0] -285: be,pt %XCC, 1f - andcc %o2, 2, %g0 - lduw [%o1 - 4], %g2 - sub %o0, 4, %o0 - sub %o1, 4, %o1 - stw %g2, [%o0] -1: be,pt %XCC, 1f - andcc %o2, 1, %g0 - lduh [%o1 - 2], %g2 - sub %o0, 2, %o0 - sub %o1, 2, %o1 - sth %g2, [%o0] -1: be,pt %XCC, 1f - nop - ldub [%o1 - 1], %g2 - stb %g2, [%o0 - 1] -1: retl - mov %g4, %o0 - -232: ldub [%o1 - 1], %g5 - sub %o1, 1, %o1 - sub %o0, 1, %o0 - subcc %o2, 1, %o2 - bne,pt %XCC, 232b - stb %g5, [%o0] -234: retl - mov %g4, %o0 -END(memmove) - -#ifdef USE_BPR -weak_alias (memcpy, __align_cpy_1) -weak_alias (memcpy, __align_cpy_2) -weak_alias (memcpy, __align_cpy_4) -weak_alias (memcpy, __align_cpy_8) -weak_alias (memcpy, __align_cpy_16) #endif -libc_hidden_builtin_def (memcpy) -libc_hidden_builtin_def (memmove) diff --git a/sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S b/sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S new file mode 100644 index 0000000000..34ca089f93 --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S @@ -0,0 +1,320 @@ +/* Copy SIZE bytes from SRC to DEST. + For UltraSPARC-III. + Copyright (C) 2001, 2003 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller (davem@redhat.com) + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sysdep.h> + +#define ASI_BLK_P 0xf0 +#define FPRS_FEF 0x04 +#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs +#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs + +#ifndef XCC +#define USE_BPR +#define XCC xcc +#endif + +#if !defined NOT_IN_libc + + .register %g2,#scratch + .register %g3,#scratch + .register %g6,#scratch + + .text + + /* Special/non-trivial issues of this code: + * + * 1) %o5 is preserved from VISEntryHalf to VISExitHalf + * 2) Only low 32 FPU registers are used so that only the + * lower half of the FPU register set is dirtied by this + * code. This is especially important in the kernel. + * 3) This code never prefetches cachelines past the end + * of the source buffer. + * + * The cheetah's flexible spine, oversized liver, enlarged heart, + * slender muscular body, and claws make it the swiftest hunter + * in Africa and the fastest animal on land. Can reach speeds + * of up to 2.4GB per second. + */ + .align 32 +ENTRY(__memcpy_ultra3) + +100: /* %o0=dst, %o1=src, %o2=len */ + mov %o0, %g5 + cmp %o2, 0 + be,pn %XCC, out +218: or %o0, %o1, %o3 + cmp %o2, 16 + bleu,a,pn %XCC, small_copy + or %o3, %o2, %o3 + + cmp %o2, 256 + blu,pt %XCC, medium_copy + andcc %o3, 0x7, %g0 + + ba,pt %xcc, enter + andcc %o0, 0x3f, %g2 + + /* Here len >= 256 and condition codes reflect execution + * of "andcc %o0, 0x7, %g2", done by caller. + */ + .align 64 +enter: + /* Is 'dst' already aligned on an 64-byte boundary? */ + be,pt %XCC, 2f + + /* Compute abs((dst & 0x3f) - 0x40) into %g2. This is the number + * of bytes to copy to make 'dst' 64-byte aligned. We pre- + * subtract this from 'len'. + */ + sub %g2, 0x40, %g2 + sub %g0, %g2, %g2 + sub %o2, %g2, %o2 + + /* Copy %g2 bytes from src to dst, one byte at a time. */ +1: ldub [%o1 + 0x00], %o3 + add %o1, 0x1, %o1 + add %o0, 0x1, %o0 + subcc %g2, 0x1, %g2 + + bg,pt %XCC, 1b + stb %o3, [%o0 + -1] + +2: VISEntryHalf + and %o1, 0x7, %g1 + ba,pt %xcc, begin + alignaddr %o1, %g0, %o1 + + .align 64 +begin: + prefetch [%o1 + 0x000], #one_read + prefetch [%o1 + 0x040], #one_read + andn %o2, (0x40 - 1), %o4 + prefetch [%o1 + 0x080], #one_read + prefetch [%o1 + 0x0c0], #one_read + ldd [%o1 + 0x000], %f0 + prefetch [%o1 + 0x100], #one_read + ldd [%o1 + 0x008], %f2 + prefetch [%o1 + 0x140], #one_read + ldd [%o1 + 0x010], %f4 + prefetch [%o1 + 0x180], #one_read + faligndata %f0, %f2, %f16 + ldd [%o1 + 0x018], %f6 + faligndata %f2, %f4, %f18 + ldd [%o1 + 0x020], %f8 + faligndata %f4, %f6, %f20 + ldd [%o1 + 0x028], %f10 + faligndata %f6, %f8, %f22 + + ldd [%o1 + 0x030], %f12 + faligndata %f8, %f10, %f24 + ldd [%o1 + 0x038], %f14 + faligndata %f10, %f12, %f26 + ldd [%o1 + 0x040], %f0 + + sub %o4, 0x80, %o4 + add %o1, 0x40, %o1 + ba,pt %xcc, loop + srl %o4, 6, %o3 + + .align 64 +loop: + ldd [%o1 + 0x008], %f2 + faligndata %f12, %f14, %f28 + ldd [%o1 + 0x010], %f4 + faligndata %f14, %f0, %f30 + stda %f16, [%o0] ASI_BLK_P + ldd [%o1 + 0x018], %f6 + faligndata %f0, %f2, %f16 + + ldd [%o1 + 0x020], %f8 + faligndata %f2, %f4, %f18 + ldd [%o1 + 0x028], %f10 + faligndata %f4, %f6, %f20 + ldd [%o1 + 0x030], %f12 + faligndata %f6, %f8, %f22 + ldd [%o1 + 0x038], %f14 + faligndata %f8, %f10, %f24 + + ldd [%o1 + 0x040], %f0 + prefetch [%o1 + 0x180], #one_read + faligndata %f10, %f12, %f26 + subcc %o3, 0x01, %o3 + add %o1, 0x40, %o1 + bg,pt %XCC, loop + add %o0, 0x40, %o0 + + /* Finally we copy the last full 64-byte block. */ +loopfini: + ldd [%o1 + 0x008], %f2 + faligndata %f12, %f14, %f28 + ldd [%o1 + 0x010], %f4 + faligndata %f14, %f0, %f30 + stda %f16, [%o0] ASI_BLK_P + ldd [%o1 + 0x018], %f6 + faligndata %f0, %f2, %f16 + ldd [%o1 + 0x020], %f8 + faligndata %f2, %f4, %f18 + ldd [%o1 + 0x028], %f10 + faligndata %f4, %f6, %f20 + ldd [%o1 + 0x030], %f12 + faligndata %f6, %f8, %f22 + ldd [%o1 + 0x038], %f14 + faligndata %f8, %f10, %f24 + cmp %g1, 0 + be,pt %XCC, 1f + add %o0, 0x40, %o0 + ldd [%o1 + 0x040], %f0 +1: faligndata %f10, %f12, %f26 + faligndata %f12, %f14, %f28 + faligndata %f14, %f0, %f30 + stda %f16, [%o0] ASI_BLK_P + add %o0, 0x40, %o0 + add %o1, 0x40, %o1 + membar #Sync + + /* Now we copy the (len modulo 64) bytes at the end. + * Note how we borrow the %f0 loaded above. + * + * Also notice how this code is careful not to perform a + * load past the end of the src buffer. + */ +loopend: + and %o2, 0x3f, %o2 + andcc %o2, 0x38, %g2 + be,pn %XCC, endcruft + subcc %g2, 0x8, %g2 + be,pn %XCC, endcruft + cmp %g1, 0 + + be,a,pt %XCC, 1f + ldd [%o1 + 0x00], %f0 + +1: ldd [%o1 + 0x08], %f2 + add %o1, 0x8, %o1 + sub %o2, 0x8, %o2 + subcc %g2, 0x8, %g2 + faligndata %f0, %f2, %f8 + std %f8, [%o0 + 0x00] + be,pn %XCC, endcruft + add %o0, 0x8, %o0 + ldd [%o1 + 0x08], %f0 + add %o1, 0x8, %o1 + sub %o2, 0x8, %o2 + subcc %g2, 0x8, %g2 + faligndata %f2, %f0, %f8 + std %f8, [%o0 + 0x00] + bne,pn %XCC, 1b + add %o0, 0x8, %o0 + + /* If anything is left, we copy it one byte at a time. + * Note that %g1 is (src & 0x3) saved above before the + * alignaddr was performed. + */ +endcruft: + cmp %o2, 0 + add %o1, %g1, %o1 + VISExitHalf + be,pn %XCC, out + sub %o0, %o1, %o3 + + andcc %g1, 0x7, %g0 + bne,pn %icc, small_copy_unaligned + andcc %o2, 0x8, %g0 + be,pt %icc, 1f + nop + ldx [%o1], %o5 + stx %o5, [%o1 + %o3] + add %o1, 0x8, %o1 + +1: andcc %o2, 0x4, %g0 + be,pt %icc, 1f + nop + lduw [%o1], %o5 + stw %o5, [%o1 + %o3] + add %o1, 0x4, %o1 + +1: andcc %o2, 0x2, %g0 + be,pt %icc, 1f + nop + lduh [%o1], %o5 + sth %o5, [%o1 + %o3] + add %o1, 0x2, %o1 + +1: andcc %o2, 0x1, %g0 + be,pt %icc, out + nop + ldub [%o1], %o5 + ba,pt %xcc, out + stb %o5, [%o1 + %o3] + +medium_copy: /* 16 < len <= 64 */ + bne,pn %XCC, small_copy_unaligned + sub %o0, %o1, %o3 + +medium_copy_aligned: + andn %o2, 0x7, %o4 + and %o2, 0x7, %o2 +1: subcc %o4, 0x8, %o4 + ldx [%o1], %o5 + stx %o5, [%o1 + %o3] + bgu,pt %XCC, 1b + add %o1, 0x8, %o1 + andcc %o2, 0x4, %g0 + be,pt %XCC, 1f + nop + sub %o2, 0x4, %o2 + lduw [%o1], %o5 + stw %o5, [%o1 + %o3] + add %o1, 0x4, %o1 +1: cmp %o2, 0 + be,pt %XCC, out + nop + ba,pt %xcc, small_copy_unaligned + nop + +small_copy: /* 0 < len <= 16 */ + andcc %o3, 0x3, %g0 + bne,pn %XCC, small_copy_unaligned + sub %o0, %o1, %o3 + +small_copy_aligned: + subcc %o2, 4, %o2 + lduw [%o1], %g1 + stw %g1, [%o1 + %o3] + bgu,pt %XCC, small_copy_aligned + add %o1, 4, %o1 + +out: retl + mov %g5, %o0 + + .align 32 +small_copy_unaligned: + subcc %o2, 1, %o2 + ldub [%o1], %g1 + stb %g1, [%o1 + %o3] + bgu,pt %XCC, small_copy_unaligned + add %o1, 1, %o1 + retl + mov %g5, %o0 + +END(__memcpy_ultra3) + +#endif
\ No newline at end of file diff --git a/sysdeps/sparc/sparc64/multiarch/memcpy.S b/sysdeps/sparc/sparc64/multiarch/memcpy.S new file mode 100644 index 0000000000..a708de10e2 --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/memcpy.S @@ -0,0 +1,107 @@ +/* Multiple versions of memcpy + Copyright (C) 2010 Free Software Foundation, Inc. + Contributed by David S. Miller (davem@davemloft.net) + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sysdep.h> +#include <rtld-global-offsets.h> + +#if !defined NOT_IN_libc + .text +ENTRY(memcpy) + .type memcpy, @gnu_indirect_function +# ifdef SHARED + mov %o7, %o5 + sethi %hi(_GLOBAL_OFFSET_TABLE_-4), %o3 + call 1f + or %o3, %lo(_GLOBAL_OFFSET_TABLE_+4), %o3 +1: add %o7, %o3, %o3 + mov %o5, %o7 + sethi %hi(_rtld_global_ro), %o2 + or %o2, %lo(_rtld_global_ro), %o2 +# ifdef __arch64__ + ldx [%o3 + %o2], %o2 + ldx [%o2 + RTLD_GLOBAL_RO_DL_HWCAP_OFFSET], %o2 +# else + ld [%o3 + %o2], %o2 + ld [%o2 + RTLD_GLOBAL_RO_DL_HWCAP_OFFSET + 4], %o2 +# endif +# else + set _dl_hwcap, %o3 +# ifdef __arch64__ + ldx [%o3], %o2 +# else + ld [%o3 + 4], %o2 +# endif +# endif + andcc %o2, 0x80, %g0 ! HWCAP_SPARC_N2 + be 1f + andcc %o2, 0x40, %g0 ! HWCAP_SPARC_BLKINIT +# ifdef SHARED + sethi %gdop_hix22(__memcpy_niagara2), %o1 + xor %o1, %gdop_lox10(__memcpy_niagara2), %o1 +# else + set __memcpy_niagara2, %o1 +# endif + ba 10f + nop +1: be 1f + andcc %o2, 0x20, %g0 ! HWCAP_SPARC_ULTRA3 +# ifdef SHARED + sethi %gdop_hix22(__memcpy_niagara1), %o1 + xor %o1, %gdop_lox10(__memcpy_niagara1), %o1 +# else + set __memcpy_niagara1, %o1 +# endif + ba 10f + nop +1: be 9f + nop +# ifdef SHARED + sethi %gdop_hix22(__memcpy_ultra3), %o1 + xor %o1, %gdop_lox10(__memcpy_ultra3), %o1 +# else + set __memcpy_ultra3, %o1 +# endif + ba 10f + nop +9: +# ifdef SHARED + sethi %gdop_hix22(__memcpy_ultra1), %o1 + xor %o1, %gdop_lox10(__memcpy_ultra1), %o1 +# else + set __memcpy_ultra1, %o1 +# endif +10: +# ifdef SHARED + add %o3, %o1, %o1 +# endif + retl + mov %o1, %o0 +END(memcpy) + +# undef libc_hidden_builtin_def +/* IFUNC doesn't work with the hidden functions in a shared library. */ +# define libc_hidden_builtin_def(name) \ + .globl __GI_memcpy; __GI_memcpy = __memcpy_ultra1 + +#define memcpy __memcpy_ultra1 + +#endif + +#include "../memcpy.S" diff --git a/sysdeps/sparc/sparc64/sparcv9v/memset.S b/sysdeps/sparc/sparc64/multiarch/memset-niagara1.S index 64817b8871..20ea056216 100644 --- a/sysdeps/sparc/sparc64/sparcv9v/memset.S +++ b/sysdeps/sparc/sparc64/multiarch/memset-niagara1.S @@ -29,12 +29,14 @@ #define XCC xcc #endif +#if !defined NOT_IN_libc + .register %g2,#scratch .text .align 32 -ENTRY(memset) +ENTRY(__memset_niagara1) /* %o0=buf, %o1=pat, %o2=len */ and %o1, 0xff, %o3 mov %o2, %o1 @@ -45,14 +47,14 @@ ENTRY(memset) sllx %o2, 32, %g1 ba,pt %XCC, 1f or %g1, %o2, %o2 -END(memset) +END(__memset_niagara1) -ENTRY(__bzero) +ENTRY(__bzero_niagara1) clr %o2 1: -#ifndef USE_BRP +# ifndef USE_BRP srl %o1, 0, %o1 -#endif +# endif brz,pn %o1, 90f mov %o0, %o3 @@ -125,7 +127,6 @@ ENTRY(__bzero) 90: retl mov %o3, %o0 -END(__bzero) +END(__bzero_niagara1) -libc_hidden_builtin_def (memset) -weak_alias (__bzero, bzero) +#endif diff --git a/sysdeps/sparc/sparc64/multiarch/memset.S b/sysdeps/sparc/sparc64/multiarch/memset.S new file mode 100644 index 0000000000..23e513f18f --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/memset.S @@ -0,0 +1,145 @@ +/* Multiple versions of memset and bzero + Copyright (C) 2010 Free Software Foundation, Inc. + Contributed by David S. Miller (davem@davemloft.net) + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sysdep.h> +#include <rtld-global-offsets.h> + +#if !defined NOT_IN_libc + .text +ENTRY(memset) + .type memset, @gnu_indirect_function +# ifdef SHARED + mov %o7, %o5 + sethi %hi(_GLOBAL_OFFSET_TABLE_-4), %o3 + call 1f + or %o3, %lo(_GLOBAL_OFFSET_TABLE_+4), %o3 +1: add %o7, %o3, %o3 + mov %o5, %o7 + sethi %hi(_rtld_global_ro), %o2 + or %o2, %lo(_rtld_global_ro), %o2 +# ifdef __arch64__ + ldx [%o3 + %o2], %o2 + ldx [%o2 + RTLD_GLOBAL_RO_DL_HWCAP_OFFSET], %o2 +# else + ld [%o3 + %o2], %o2 + ld [%o2 + RTLD_GLOBAL_RO_DL_HWCAP_OFFSET + 4], %o2 +# endif +# else + set _dl_hwcap, %o3 +# ifdef __arch64__ + ldx [%o3], %o2 +# else + ld [%o3 + 4], %o2 +# endif +# endif + andcc %o2, 0x40, %g0 ! HWCAP_SPARC_BLKINIT + be 9f + nop +# ifdef SHARED + sethi %gdop_hix22(__memset_niagara1), %o1 + xor %o1, %gdop_lox10(__memset_niagara1), %o1 +# else + set __memset_niagara1, %o1 +# endif + ba 10f + nop +9: +# ifdef SHARED + sethi %gdop_hix22(__memset_ultra1), %o1 + xor %o1, %gdop_lox10(__memset_ultra1), %o1 +# else + set __memset_ultra1, %o1 +# endif +10: +# ifdef SHARED + add %o3, %o1, %o1 +# endif + retl + mov %o1, %o0 +END(memset) + +ENTRY(__bzero) + .type bzero, @gnu_indirect_function +# ifdef SHARED + mov %o7, %o5 + sethi %hi(_GLOBAL_OFFSET_TABLE_-4), %o3 + call 1f + or %o3, %lo(_GLOBAL_OFFSET_TABLE_+4), %o3 +1: add %o7, %o3, %o3 + mov %o5, %o7 + sethi %hi(_rtld_global_ro), %o2 + or %o2, %lo(_rtld_global_ro), %o2 +# ifdef __arch64__ + ldx [%o3 + %o2], %o2 + ldx [%o2 + RTLD_GLOBAL_RO_DL_HWCAP_OFFSET], %o2 +# else + ld [%o3 + %o2], %o2 + ld [%o2 + RTLD_GLOBAL_RO_DL_HWCAP_OFFSET + 4], %o2 +# endif +# else + set _dl_hwcap, %o3 +# ifdef __arch64__ + ldx [%o3], %o2 +# else + ld [%o3 + 4], %o2 +# endif +# endif + andcc %o2, 0x40, %g0 ! HWCAP_SPARC_BLKINIT + be 9f + nop +# ifdef SHARED + sethi %gdop_hix22(__bzero_niagara1), %o1 + xor %o1, %gdop_lox10(__bzero_niagara1), %o1 +# else + set __bzero_niagara1, %o1 +# endif + ba 10f + nop +9: +# ifdef SHARED + sethi %gdop_hix22(__memset_ultra1), %o1 + xor %o1, %gdop_lox10(__memset_ultra1), %o1 +# else + set __bzero_ultra1, %o1 +# endif +10: +# ifdef SHARED + add %o3, %o1, %o1 +# endif + retl + mov %o1, %o0 +END(__bzero) + +weak_alias (__bzero, bzero) + +# undef weak_alias +# define weak_alias(a, b) + +# undef libc_hidden_builtin_def +/* IFUNC doesn't work with the hidden functions in a shared library. */ +# define libc_hidden_builtin_def(name) \ + .globl __GI_memset; __GI_memset = __memset_ultra1 + +#define memset __memset_ultra1 +#define __bzero __bzero_ultra1 + +#endif + +#include "../memset.S" diff --git a/sysdeps/sparc/sparc64/sparcv9b/memcpy.S b/sysdeps/sparc/sparc64/sparcv9b/memcpy.S deleted file mode 100644 index 760d526630..0000000000 --- a/sysdeps/sparc/sparc64/sparcv9b/memcpy.S +++ /dev/null @@ -1,610 +0,0 @@ -/* Copy SIZE bytes from SRC to DEST. - For UltraSPARC-III. - Copyright (C) 2001, 2003 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by David S. Miller (davem@redhat.com) - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -#include <sysdep.h> - -#define ASI_BLK_P 0xf0 -#define FPRS_FEF 0x04 -#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs -#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs - -#ifndef XCC -#define USE_BPR -#define XCC xcc -#endif - - .register %g2,#scratch - .register %g3,#scratch - .register %g6,#scratch - - .text - .align 32 - -ENTRY(bcopy) - sub %o1, %o0, %o4 - mov %o0, %g4 - cmp %o4, %o2 - mov %o1, %o0 - bgeu,pt %XCC, 100f - mov %g4, %o1 -#ifndef USE_BPR - srl %o2, 0, %o2 -#endif - brnz,pn %o2, 220f - add %o0, %o2, %o0 - retl - nop -END(bcopy) - - /* Special/non-trivial issues of this code: - * - * 1) %o5 is preserved from VISEntryHalf to VISExitHalf - * 2) Only low 32 FPU registers are used so that only the - * lower half of the FPU register set is dirtied by this - * code. This is especially important in the kernel. - * 3) This code never prefetches cachelines past the end - * of the source buffer. - * - * The cheetah's flexible spine, oversized liver, enlarged heart, - * slender muscular body, and claws make it the swiftest hunter - * in Africa and the fastest animal on land. Can reach speeds - * of up to 2.4GB per second. - */ - .align 32 -ENTRY(memcpy) - -100: /* %o0=dst, %o1=src, %o2=len */ - mov %o0, %g5 - cmp %o2, 0 - be,pn %XCC, out -218: or %o0, %o1, %o3 - cmp %o2, 16 - bleu,a,pn %XCC, small_copy - or %o3, %o2, %o3 - - cmp %o2, 256 - blu,pt %XCC, medium_copy - andcc %o3, 0x7, %g0 - - ba,pt %xcc, enter - andcc %o0, 0x3f, %g2 - - /* Here len >= 256 and condition codes reflect execution - * of "andcc %o0, 0x7, %g2", done by caller. - */ - .align 64 -enter: - /* Is 'dst' already aligned on an 64-byte boundary? */ - be,pt %XCC, 2f - - /* Compute abs((dst & 0x3f) - 0x40) into %g2. This is the number - * of bytes to copy to make 'dst' 64-byte aligned. We pre- - * subtract this from 'len'. - */ - sub %g2, 0x40, %g2 - sub %g0, %g2, %g2 - sub %o2, %g2, %o2 - - /* Copy %g2 bytes from src to dst, one byte at a time. */ -1: ldub [%o1 + 0x00], %o3 - add %o1, 0x1, %o1 - add %o0, 0x1, %o0 - subcc %g2, 0x1, %g2 - - bg,pt %XCC, 1b - stb %o3, [%o0 + -1] - -2: VISEntryHalf - and %o1, 0x7, %g1 - ba,pt %xcc, begin - alignaddr %o1, %g0, %o1 - - .align 64 -begin: - prefetch [%o1 + 0x000], #one_read - prefetch [%o1 + 0x040], #one_read - andn %o2, (0x40 - 1), %o4 - prefetch [%o1 + 0x080], #one_read - prefetch [%o1 + 0x0c0], #one_read - ldd [%o1 + 0x000], %f0 - prefetch [%o1 + 0x100], #one_read - ldd [%o1 + 0x008], %f2 - prefetch [%o1 + 0x140], #one_read - ldd [%o1 + 0x010], %f4 - prefetch [%o1 + 0x180], #one_read - faligndata %f0, %f2, %f16 - ldd [%o1 + 0x018], %f6 - faligndata %f2, %f4, %f18 - ldd [%o1 + 0x020], %f8 - faligndata %f4, %f6, %f20 - ldd [%o1 + 0x028], %f10 - faligndata %f6, %f8, %f22 - - ldd [%o1 + 0x030], %f12 - faligndata %f8, %f10, %f24 - ldd [%o1 + 0x038], %f14 - faligndata %f10, %f12, %f26 - ldd [%o1 + 0x040], %f0 - - sub %o4, 0x80, %o4 - add %o1, 0x40, %o1 - ba,pt %xcc, loop - srl %o4, 6, %o3 - - .align 64 -loop: - ldd [%o1 + 0x008], %f2 - faligndata %f12, %f14, %f28 - ldd [%o1 + 0x010], %f4 - faligndata %f14, %f0, %f30 - stda %f16, [%o0] ASI_BLK_P - ldd [%o1 + 0x018], %f6 - faligndata %f0, %f2, %f16 - - ldd [%o1 + 0x020], %f8 - faligndata %f2, %f4, %f18 - ldd [%o1 + 0x028], %f10 - faligndata %f4, %f6, %f20 - ldd [%o1 + 0x030], %f12 - faligndata %f6, %f8, %f22 - ldd [%o1 + 0x038], %f14 - faligndata %f8, %f10, %f24 - - ldd [%o1 + 0x040], %f0 - prefetch [%o1 + 0x180], #one_read - faligndata %f10, %f12, %f26 - subcc %o3, 0x01, %o3 - add %o1, 0x40, %o1 - bg,pt %XCC, loop - add %o0, 0x40, %o0 - - /* Finally we copy the last full 64-byte block. */ -loopfini: - ldd [%o1 + 0x008], %f2 - faligndata %f12, %f14, %f28 - ldd [%o1 + 0x010], %f4 - faligndata %f14, %f0, %f30 - stda %f16, [%o0] ASI_BLK_P - ldd [%o1 + 0x018], %f6 - faligndata %f0, %f2, %f16 - ldd [%o1 + 0x020], %f8 - faligndata %f2, %f4, %f18 - ldd [%o1 + 0x028], %f10 - faligndata %f4, %f6, %f20 - ldd [%o1 + 0x030], %f12 - faligndata %f6, %f8, %f22 - ldd [%o1 + 0x038], %f14 - faligndata %f8, %f10, %f24 - cmp %g1, 0 - be,pt %XCC, 1f - add %o0, 0x40, %o0 - ldd [%o1 + 0x040], %f0 -1: faligndata %f10, %f12, %f26 - faligndata %f12, %f14, %f28 - faligndata %f14, %f0, %f30 - stda %f16, [%o0] ASI_BLK_P - add %o0, 0x40, %o0 - add %o1, 0x40, %o1 - membar #Sync - - /* Now we copy the (len modulo 64) bytes at the end. - * Note how we borrow the %f0 loaded above. - * - * Also notice how this code is careful not to perform a - * load past the end of the src buffer. - */ -loopend: - and %o2, 0x3f, %o2 - andcc %o2, 0x38, %g2 - be,pn %XCC, endcruft - subcc %g2, 0x8, %g2 - be,pn %XCC, endcruft - cmp %g1, 0 - - be,a,pt %XCC, 1f - ldd [%o1 + 0x00], %f0 - -1: ldd [%o1 + 0x08], %f2 - add %o1, 0x8, %o1 - sub %o2, 0x8, %o2 - subcc %g2, 0x8, %g2 - faligndata %f0, %f2, %f8 - std %f8, [%o0 + 0x00] - be,pn %XCC, endcruft - add %o0, 0x8, %o0 - ldd [%o1 + 0x08], %f0 - add %o1, 0x8, %o1 - sub %o2, 0x8, %o2 - subcc %g2, 0x8, %g2 - faligndata %f2, %f0, %f8 - std %f8, [%o0 + 0x00] - bne,pn %XCC, 1b - add %o0, 0x8, %o0 - - /* If anything is left, we copy it one byte at a time. - * Note that %g1 is (src & 0x3) saved above before the - * alignaddr was performed. - */ -endcruft: - cmp %o2, 0 - add %o1, %g1, %o1 - VISExitHalf - be,pn %XCC, out - sub %o0, %o1, %o3 - - andcc %g1, 0x7, %g0 - bne,pn %icc, small_copy_unaligned - andcc %o2, 0x8, %g0 - be,pt %icc, 1f - nop - ldx [%o1], %o5 - stx %o5, [%o1 + %o3] - add %o1, 0x8, %o1 - -1: andcc %o2, 0x4, %g0 - be,pt %icc, 1f - nop - lduw [%o1], %o5 - stw %o5, [%o1 + %o3] - add %o1, 0x4, %o1 - -1: andcc %o2, 0x2, %g0 - be,pt %icc, 1f - nop - lduh [%o1], %o5 - sth %o5, [%o1 + %o3] - add %o1, 0x2, %o1 - -1: andcc %o2, 0x1, %g0 - be,pt %icc, out - nop - ldub [%o1], %o5 - ba,pt %xcc, out - stb %o5, [%o1 + %o3] - -medium_copy: /* 16 < len <= 64 */ - bne,pn %XCC, small_copy_unaligned - sub %o0, %o1, %o3 - -medium_copy_aligned: - andn %o2, 0x7, %o4 - and %o2, 0x7, %o2 -1: subcc %o4, 0x8, %o4 - ldx [%o1], %o5 - stx %o5, [%o1 + %o3] - bgu,pt %XCC, 1b - add %o1, 0x8, %o1 - andcc %o2, 0x4, %g0 - be,pt %XCC, 1f - nop - sub %o2, 0x4, %o2 - lduw [%o1], %o5 - stw %o5, [%o1 + %o3] - add %o1, 0x4, %o1 -1: cmp %o2, 0 - be,pt %XCC, out - nop - ba,pt %xcc, small_copy_unaligned - nop - -small_copy: /* 0 < len <= 16 */ - andcc %o3, 0x3, %g0 - bne,pn %XCC, small_copy_unaligned - sub %o0, %o1, %o3 - -small_copy_aligned: - subcc %o2, 4, %o2 - lduw [%o1], %g1 - stw %g1, [%o1 + %o3] - bgu,pt %XCC, small_copy_aligned - add %o1, 4, %o1 - -out: retl - mov %g5, %o0 - - .align 32 -small_copy_unaligned: - subcc %o2, 1, %o2 - ldub [%o1], %g1 - stb %g1, [%o1 + %o3] - bgu,pt %XCC, small_copy_unaligned - add %o1, 1, %o1 - retl - mov %g5, %o0 - -END(memcpy) - -#define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldx [%src - offset - 0x20], %t0; \ - ldx [%src - offset - 0x18], %t1; \ - ldx [%src - offset - 0x10], %t2; \ - ldx [%src - offset - 0x08], %t3; \ - stw %t0, [%dst - offset - 0x1c]; \ - srlx %t0, 32, %t0; \ - stw %t0, [%dst - offset - 0x20]; \ - stw %t1, [%dst - offset - 0x14]; \ - srlx %t1, 32, %t1; \ - stw %t1, [%dst - offset - 0x18]; \ - stw %t2, [%dst - offset - 0x0c]; \ - srlx %t2, 32, %t2; \ - stw %t2, [%dst - offset - 0x10]; \ - stw %t3, [%dst - offset - 0x04]; \ - srlx %t3, 32, %t3; \ - stw %t3, [%dst - offset - 0x08]; - -#define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldx [%src - offset - 0x20], %t0; \ - ldx [%src - offset - 0x18], %t1; \ - ldx [%src - offset - 0x10], %t2; \ - ldx [%src - offset - 0x08], %t3; \ - stx %t0, [%dst - offset - 0x20]; \ - stx %t1, [%dst - offset - 0x18]; \ - stx %t2, [%dst - offset - 0x10]; \ - stx %t3, [%dst - offset - 0x08]; \ - ldx [%src - offset - 0x40], %t0; \ - ldx [%src - offset - 0x38], %t1; \ - ldx [%src - offset - 0x30], %t2; \ - ldx [%src - offset - 0x28], %t3; \ - stx %t0, [%dst - offset - 0x40]; \ - stx %t1, [%dst - offset - 0x38]; \ - stx %t2, [%dst - offset - 0x30]; \ - stx %t3, [%dst - offset - 0x28]; - -#define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldx [%src + offset + 0x00], %t0; \ - ldx [%src + offset + 0x08], %t1; \ - stw %t0, [%dst + offset + 0x04]; \ - srlx %t0, 32, %t2; \ - stw %t2, [%dst + offset + 0x00]; \ - stw %t1, [%dst + offset + 0x0c]; \ - srlx %t1, 32, %t3; \ - stw %t3, [%dst + offset + 0x08]; - -#define RMOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1) \ - ldx [%src + offset + 0x00], %t0; \ - ldx [%src + offset + 0x08], %t1; \ - stx %t0, [%dst + offset + 0x00]; \ - stx %t1, [%dst + offset + 0x08]; - - .align 32 -228: andcc %o2, 1, %g0 /* IEU1 Group */ - be,pt %icc, 2f+4 /* CTI */ -1: ldub [%o1 - 1], %o5 /* LOAD Group */ - sub %o1, 1, %o1 /* IEU0 */ - sub %o0, 1, %o0 /* IEU1 */ - subcc %o2, 1, %o2 /* IEU1 Group */ - be,pn %xcc, 229f /* CTI */ - stb %o5, [%o0] /* Store */ -2: ldub [%o1 - 1], %o5 /* LOAD Group */ - sub %o0, 2, %o0 /* IEU0 */ - ldub [%o1 - 2], %g5 /* LOAD Group */ - sub %o1, 2, %o1 /* IEU0 */ - subcc %o2, 2, %o2 /* IEU1 Group */ - stb %o5, [%o0 + 1] /* Store */ - bne,pt %xcc, 2b /* CTI */ - stb %g5, [%o0] /* Store */ -229: retl - mov %g4, %o0 - - .align 32 -ENTRY(memmove) - mov %o0, %g5 -#ifndef USE_BPR - srl %o2, 0, %o2 /* IEU1 Group */ -#endif - brz,pn %o2, out /* CTI Group */ - sub %o0, %o1, %o4 /* IEU0 */ - cmp %o4, %o2 /* IEU1 Group */ - bgeu,pt %XCC, 218b /* CTI */ - mov %o0, %g4 /* IEU0 */ - add %o0, %o2, %o0 /* IEU0 Group */ -220: add %o1, %o2, %o1 /* IEU1 */ - cmp %o2, 15 /* IEU1 Group */ - bleu,pn %xcc, 228b /* CTI */ - andcc %o0, 7, %g2 /* IEU1 Group */ - sub %o0, %o1, %g5 /* IEU0 */ - andcc %g5, 3, %o5 /* IEU1 Group */ - bne,pn %xcc, 232f /* CTI */ - andcc %o1, 3, %g0 /* IEU1 Group */ - be,a,pt %xcc, 236f /* CTI */ - andcc %o1, 4, %g0 /* IEU1 Group */ - andcc %o1, 1, %g0 /* IEU1 Group */ - be,pn %xcc, 4f /* CTI */ - andcc %o1, 2, %g0 /* IEU1 Group */ - ldub [%o1 - 1], %g2 /* Load Group */ - sub %o1, 1, %o1 /* IEU0 */ - sub %o0, 1, %o0 /* IEU1 */ - sub %o2, 1, %o2 /* IEU0 Group */ - be,pn %xcc, 5f /* CTI Group */ - stb %g2, [%o0] /* Store */ -4: lduh [%o1 - 2], %g2 /* Load Group */ - sub %o1, 2, %o1 /* IEU0 */ - sub %o0, 2, %o0 /* IEU1 */ - sub %o2, 2, %o2 /* IEU0 */ - sth %g2, [%o0] /* Store Group + bubble */ -5: andcc %o1, 4, %g0 /* IEU1 */ -236: be,a,pn %xcc, 2f /* CTI */ - andcc %o2, -128, %g6 /* IEU1 Group */ - lduw [%o1 - 4], %g5 /* Load Group */ - sub %o1, 4, %o1 /* IEU0 */ - sub %o0, 4, %o0 /* IEU1 */ - sub %o2, 4, %o2 /* IEU0 Group */ - stw %g5, [%o0] /* Store */ - andcc %o2, -128, %g6 /* IEU1 Group */ -2: be,pn %xcc, 235f /* CTI */ - andcc %o0, 4, %g0 /* IEU1 Group */ - be,pn %xcc, 282f + 4 /* CTI Group */ -5: RMOVE_BIGCHUNK(o1, o0, 0x00, g1, g3, g5, o5) - RMOVE_BIGCHUNK(o1, o0, 0x20, g1, g3, g5, o5) - RMOVE_BIGCHUNK(o1, o0, 0x40, g1, g3, g5, o5) - RMOVE_BIGCHUNK(o1, o0, 0x60, g1, g3, g5, o5) - subcc %g6, 128, %g6 /* IEU1 Group */ - sub %o1, 128, %o1 /* IEU0 */ - bne,pt %xcc, 5b /* CTI */ - sub %o0, 128, %o0 /* IEU0 Group */ -235: andcc %o2, 0x70, %g6 /* IEU1 Group */ -41: be,pn %xcc, 280f /* CTI */ - andcc %o2, 8, %g0 /* IEU1 Group */ - /* Clk1 8-( */ - /* Clk2 8-( */ - /* Clk3 8-( */ - /* Clk4 8-( */ -279: rd %pc, %o5 /* PDU Group */ - sll %g6, 1, %g5 /* IEU0 Group */ - sub %o1, %g6, %o1 /* IEU1 */ - sub %o5, %g5, %o5 /* IEU0 Group */ - jmpl %o5 + %lo(280f - 279b), %g0 /* CTI Group brk forced*/ - sub %o0, %g6, %o0 /* IEU0 Group */ - RMOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g5, o5) -280: be,pt %xcc, 281f /* CTI */ - andcc %o2, 4, %g0 /* IEU1 */ - ldx [%o1 - 8], %g2 /* Load Group */ - sub %o0, 8, %o0 /* IEU0 */ - stw %g2, [%o0 + 4] /* Store Group */ - sub %o1, 8, %o1 /* IEU1 */ - srlx %g2, 32, %g2 /* IEU0 Group */ - stw %g2, [%o0] /* Store */ -281: be,pt %xcc, 1f /* CTI */ - andcc %o2, 2, %g0 /* IEU1 Group */ - lduw [%o1 - 4], %g2 /* Load Group */ - sub %o1, 4, %o1 /* IEU0 */ - stw %g2, [%o0 - 4] /* Store Group */ - sub %o0, 4, %o0 /* IEU0 */ -1: be,pt %xcc, 1f /* CTI */ - andcc %o2, 1, %g0 /* IEU1 Group */ - lduh [%o1 - 2], %g2 /* Load Group */ - sub %o1, 2, %o1 /* IEU0 */ - sth %g2, [%o0 - 2] /* Store Group */ - sub %o0, 2, %o0 /* IEU0 */ -1: be,pt %xcc, 211f /* CTI */ - nop /* IEU1 */ - ldub [%o1 - 1], %g2 /* Load Group */ - stb %g2, [%o0 - 1] /* Store Group + bubble */ -211: retl - mov %g4, %o0 - -282: RMOVE_BIGALIGNCHUNK(o1, o0, 0x00, g1, g3, g5, o5) - RMOVE_BIGALIGNCHUNK(o1, o0, 0x40, g1, g3, g5, o5) - subcc %g6, 128, %g6 /* IEU1 Group */ - sub %o1, 128, %o1 /* IEU0 */ - bne,pt %xcc, 282b /* CTI */ - sub %o0, 128, %o0 /* IEU0 Group */ - andcc %o2, 0x70, %g6 /* IEU1 */ - be,pn %xcc, 284f /* CTI */ - andcc %o2, 8, %g0 /* IEU1 Group */ - /* Clk1 8-( */ - /* Clk2 8-( */ - /* Clk3 8-( */ - /* Clk4 8-( */ -283: rd %pc, %o5 /* PDU Group */ - sub %o1, %g6, %o1 /* IEU0 Group */ - sub %o5, %g6, %o5 /* IEU1 */ - jmpl %o5 + %lo(284f - 283b), %g0 /* CTI Group brk forced*/ - sub %o0, %g6, %o0 /* IEU0 Group */ - RMOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3) -284: be,pt %xcc, 285f /* CTI Group */ - andcc %o2, 4, %g0 /* IEU1 */ - ldx [%o1 - 8], %g2 /* Load Group */ - sub %o0, 8, %o0 /* IEU0 */ - sub %o1, 8, %o1 /* IEU0 Group */ - stx %g2, [%o0] /* Store */ -285: be,pt %xcc, 1f /* CTI */ - andcc %o2, 2, %g0 /* IEU1 Group */ - lduw [%o1 - 4], %g2 /* Load Group */ - sub %o0, 4, %o0 /* IEU0 */ - sub %o1, 4, %o1 /* IEU0 Group */ - stw %g2, [%o0] /* Store */ -1: be,pt %xcc, 1f /* CTI */ - andcc %o2, 1, %g0 /* IEU1 Group */ - lduh [%o1 - 2], %g2 /* Load Group */ - sub %o0, 2, %o0 /* IEU0 */ - sub %o1, 2, %o1 /* IEU0 Group */ - sth %g2, [%o0] /* Store */ -1: be,pt %xcc, 1f /* CTI */ - nop /* IEU0 Group */ - ldub [%o1 - 1], %g2 /* Load Group */ - stb %g2, [%o0 - 1] /* Store Group + bubble */ -1: retl - mov %g4, %o0 - -232: brz,pt %g2, 2f /* CTI Group */ - sub %o2, %g2, %o2 /* IEU0 Group */ -1: ldub [%o1 - 1], %g5 /* Load Group */ - sub %o1, 1, %o1 /* IEU0 */ - sub %o0, 1, %o0 /* IEU1 */ - subcc %g2, 1, %g2 /* IEU1 Group */ - bne,pt %xcc, 1b /* CTI */ - stb %g5, [%o0] /* Store */ -2: andn %o2, 7, %g5 /* IEU0 Group */ - and %o2, 7, %o2 /* IEU1 */ - fmovd %f0, %f2 /* FPU */ - alignaddr %o1, %g0, %g1 /* GRU Group */ - ldd [%g1], %f4 /* Load Group */ -1: ldd [%g1 - 8], %f6 /* Load Group */ - sub %g1, 8, %g1 /* IEU0 Group */ - subcc %g5, 8, %g5 /* IEU1 */ - faligndata %f6, %f4, %f0 /* GRU Group */ - std %f0, [%o0 - 8] /* Store */ - sub %o1, 8, %o1 /* IEU0 Group */ - be,pn %xcc, 233f /* CTI */ - sub %o0, 8, %o0 /* IEU1 */ - ldd [%g1 - 8], %f4 /* Load Group */ - sub %g1, 8, %g1 /* IEU0 */ - subcc %g5, 8, %g5 /* IEU1 */ - faligndata %f4, %f6, %f0 /* GRU Group */ - std %f0, [%o0 - 8] /* Store */ - sub %o1, 8, %o1 /* IEU0 */ - bne,pn %xcc, 1b /* CTI Group */ - sub %o0, 8, %o0 /* IEU0 */ -233: brz,pn %o2, 234f /* CTI Group */ - nop /* IEU0 */ -237: ldub [%o1 - 1], %g5 /* LOAD */ - sub %o1, 1, %o1 /* IEU0 */ - sub %o0, 1, %o0 /* IEU1 */ - subcc %o2, 1, %o2 /* IEU1 */ - bne,pt %xcc, 237b /* CTI */ - stb %g5, [%o0] /* Store Group */ -234: wr %g0, FPRS_FEF, %fprs - retl - mov %g4, %o0 -END(memmove) - -#ifdef USE_BPR -weak_alias (memcpy, __align_cpy_1) -weak_alias (memcpy, __align_cpy_2) -weak_alias (memcpy, __align_cpy_4) -weak_alias (memcpy, __align_cpy_8) -weak_alias (memcpy, __align_cpy_16) -#endif -libc_hidden_builtin_def (memcpy) -libc_hidden_builtin_def (memmove) diff --git a/sysdeps/sparc/sparc64/sparcv9v2/memset.S b/sysdeps/sparc/sparc64/sparcv9v2/memset.S deleted file mode 100644 index 809d3ed9c6..0000000000 --- a/sysdeps/sparc/sparc64/sparcv9v2/memset.S +++ /dev/null @@ -1 +0,0 @@ -#include <sparc64/sparcv9v/memset.S> diff --git a/sysdeps/sparc/sparc64/strlen.S b/sysdeps/sparc/sparc64/strlen.S index cc15e4e3fb..64350fb05e 100644 --- a/sysdeps/sparc/sparc64/strlen.S +++ b/sysdeps/sparc/sparc64/strlen.S @@ -1,8 +1,9 @@ /* Determine the length of a string. For SPARC v9. - Copyright (C) 1998, 1999, 2003 Free Software Foundation, Inc. + Copyright (C) 1998, 1999, 2003, 2010 Free Software Foundation, Inc. This file is part of the GNU C Library. - Contributed by Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz> and - Jakub Jelinek <jj@ultra.linux.cz>. + Contributed by Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz>, + Jakub Jelinek <jj@ultra.linux.cz>, and + David S. Miller <davem@davemloft.net>. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -20,155 +21,66 @@ 02111-1307 USA. */ #include <sysdep.h> -#include <asm/asi.h> - - /* Normally, this uses - ((xword - 0x0101010101010101) & 0x8080808080808080) test - to find out if any byte in xword could be zero. This is fast, but - also gives false alarm for any byte in range 0x81-0xff. It does - not matter for correctness, as if this test tells us there could - be some zero byte, we check it byte by byte, but if bytes with - high bits set are common in the strings, then this will give poor - performance. You can #define EIGHTBIT_NOT_RARE and the algorithm - will use one tick slower, but more precise test - ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080), - which does not give any false alarms (but if some bits are set, - one cannot assume from it which bytes are zero and which are not). - It is yet to be measured, what is the correct default for glibc - in these days for an average user. - */ + + .register %g2, #scratch + .register %g3, #scratch .text .align 32 ENTRY(strlen) - sethi %hi(0x01010101), %g1 /* IEU0 Group */ - ldub [%o0], %o3 /* Load */ - or %g1, %lo(0x01010101), %g1 /* IEU0 Group */ - mov %o0, %o1 /* IEU1 */ - - sllx %g1, 32, %g4 /* IEU0 Group */ - andcc %o0, 7, %g0 /* IEU1 */ - or %g1, %g4, %g1 /* IEU0 Group */ - brz,pn %o3, 13f /* CTI+IEU1 */ - - sllx %g1, 7, %g4 /* IEU0 Group */ - bne,a,pn %icc, 15f /* CTI */ - add %o0, 1, %o0 /* IEU1 */ - /* %g1 = 0x0101010101010101 * - * %g4 = 0x8080808080808080 * - * %o0 = string pointer * - * %o1 = start of string */ -1: ldx [%o0], %o3 /* Load Group */ - - add %o0, 8, %o0 /* IEU1 */ -2: sub %o3, %g1, %o2 /* IEU0 Group */ -#ifdef EIGHTBIT_NOT_RARE - andn %o2, %o3, %o5 /* IEU0 Group */ - ldxa [%o0] ASI_PNF, %o3 /* Load */ - andcc %o5, %g4, %g0 /* IEU1 Group */ -#else - ldxa [%o0] ASI_PNF, %o3 /* Load */ - andcc %o2, %g4, %g0 /* IEU1 Group */ -#endif - - be,pt %xcc, 2b /* CTI */ - add %o0, 8, %o0 /* IEU0 */ - addcc %o2, %g1, %g5 /* IEU1 Group */ -#ifdef EIGHTBIT_NOT_RARE - srlx %o5, 32, %o5 /* IEU0 */ - -3: andcc %o5, %g4, %g0 /* IEU1 Group */ -#else - srlx %o2, 32, %o2 /* IEU0 */ - -3: andcc %o2, %g4, %g0 /* IEU1 Group */ -#endif - be,pn %xcc, 4f /* CTI */ - srlx %g5, 56, %o2 /* IEU0 */ - andcc %o2, 0xff, %g0 /* IEU1 Group */ - - be,pn %icc, 12f /* CTI */ - srlx %g5, 48, %o2 /* IEU0 */ - andcc %o2, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 11f /* CTI */ - - srlx %g5, 40, %o2 /* IEU0 */ - andcc %o2, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 10f /* CTI */ - srlx %g5, 32, %o2 /* IEU0 */ - - andcc %o2, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 9f /* CTI */ -4: srlx %g5, 24, %o2 /* IEU0 */ - andcc %o2, 0xff, %g0 /* IEU1 Group */ - - be,pn %icc, 8f /* CTI */ - srlx %g5, 16, %o2 /* IEU0 */ - andcc %o2, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 7f /* CTI */ - - srlx %g5, 8, %o2 /* IEU0 */ - andcc %o2, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 6f /* CTI */ - sub %o3, %g1, %o2 /* IEU0 */ - - andcc %g5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 5f /* CTI */ - ldxa [%o0] ASI_PNF, %o3 /* Load */ - andcc %o2, %g4, %g0 /* IEU1 Group */ - - be,pt %xcc, 2b /* CTI */ - add %o0, 8, %o0 /* IEU0 */ - addcc %o2, %g1, %g5 /* IEU1 Group */ - ba,pt %xcc, 3b /* CTI */ - - srlx %o2, 32, %o2 /* IEU0 */ -5: add %o0, -9, %o0 /* IEU0 Group */ - retl /* CTI+IEU1 Group */ - sub %o0, %o1, %o0 /* IEU0 */ - -6: add %o0, -10, %o0 /* IEU0 Group */ - retl /* CTI+IEU1 Group */ - sub %o0, %o1, %o0 /* IEU0 */ -7: add %o0, -11, %o0 /* IEU0 Group */ - - retl /* CTI+IEU1 Group */ - sub %o0, %o1, %o0 /* IEU0 */ -8: add %o0, -12, %o0 /* IEU0 Group */ - retl /* CTI+IEU1 Group */ - - sub %o0, %o1, %o0 /* IEU0 */ -9: add %o0, -13, %o0 /* IEU0 Group */ - retl /* CTI+IEU1 Group */ - sub %o0, %o1, %o0 /* IEU0 */ - -10: add %o0, -14, %o0 /* IEU0 Group */ - retl /* CTI+IEU1 Group */ - sub %o0, %o1, %o0 /* IEU0 */ -11: add %o0, -15, %o0 /* IEU0 Group */ - - retl /* CTI+IEU1 Group */ - sub %o0, %o1, %o0 /* IEU0 */ -12: add %o0, -16, %o0 /* IEU0 Group */ - retl /* CTI+IEU1 Group */ - - sub %o0, %o1, %o0 /* IEU0 */ -13: retl /* CTI+IEU1 Group */ - mov 0, %o0 /* IEU0 */ - nop - -15: ldub [%o0], %o3 /* Load Group */ -16: andcc %o0, 7, %g0 /* IEU1 */ - be,pn %icc, 1b /* CTI */ - nop /* IEU0 Group */ - - add %o0, 1, %o0 /* IEU1 */ - andcc %o3, 0xff, %g0 /* IEU1 Group */ - bne,a,pt %icc, 16b /* CTI */ - lduba [%o0] ASI_PNF, %o3 /* Load */ - - add %o0, -1, %o0 /* IEU0 Group */ - retl /* CTI+IEU1 Group */ - sub %o0, %o1, %o0 /* IEU0 */ + mov %o0, %o1 + andn %o0, 0x7, %o0 + + ldx [%o0], %o5 + and %o1, 0x7, %g1 + mov -1, %g5 + + sethi %hi(0x01010101), %o2 + sll %g1, 3, %g1 + + or %o2, %lo(0x01010101), %o2 + srlx %g5, %g1, %o3 + + sllx %o2, 32, %g1 + sethi %hi(0x0000ff00), %g5 + + orn %o5, %o3, %o5 + or %o2, %g1, %o2 + + sllx %o2, 7, %o3 +10: add %o0, 8, %o0 + + andn %o3, %o5, %g1 + sub %o5, %o2, %g2 + + andcc %g1, %g2, %g0 + be,a,pt %xcc, 10b + ldx [%o0], %o5 + srlx %o5, 32, %g1 + + andn %o3, %g1, %o4 + sub %g1, %o2, %g2 + + add %o0, 4, %g3 + andcc %o4, %g2, %g0 + movne %icc, %g1, %o5 + + move %icc, %g3, %o0 + or %g5, %lo(0x0000ff00), %g5 + mov 3 - 8, %g2 + + andcc %o5, %g5, %g0 + srlx %o5, 16, %g1 + move %icc, 2 - 8, %g2 + + andcc %g1, 0xff, %g0 + srl %o5, 24, %o5 + move %icc, 1 - 8, %g2 + + movrz %o5, 0 - 8, %g2 + sub %o0, %o1, %o0 + + retl + add %o0, %g2, %o0 END(strlen) libc_hidden_builtin_def (strlen) diff --git a/sysdeps/unix/sysv/linux/bits/in.h b/sysdeps/unix/sysv/linux/bits/in.h index b457a1790f..0aa0d6638a 100644 --- a/sysdeps/unix/sysv/linux/bits/in.h +++ b/sysdeps/unix/sysv/linux/bits/in.h @@ -1,4 +1,4 @@ -/* Copyright (C) 1991-1999, 2000, 2004, 2008 Free Software Foundation, Inc. +/* Copyright (C) 1991-1999, 2000, 2004, 2008, 2010 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -56,14 +56,26 @@ # define MCAST_INCLUDE 1 #endif -#define IP_ROUTER_ALERT 5 /* bool */ -#define IP_PKTINFO 8 /* bool */ -#define IP_PKTOPTIONS 9 -#define IP_PMTUDISC 10 /* obsolete name? */ -#define IP_MTU_DISCOVER 10 /* int; see below */ -#define IP_RECVERR 11 /* bool */ -#define IP_RECVTTL 12 /* bool */ -#define IP_RECVTOS 13 /* bool */ +#define IP_ROUTER_ALERT 5 /* bool */ +#define IP_PKTINFO 8 /* bool */ +#define IP_PKTOPTIONS 9 +#define IP_PMTUDISC 10 /* obsolete name? */ +#define IP_MTU_DISCOVER 10 /* int; see below */ +#define IP_RECVERR 11 /* bool */ +#define IP_RECVTTL 12 /* bool */ +#define IP_RECVTOS 13 /* bool */ +#define IP_MTU 14 /* int */ +#define IP_FREEBIND 15 +#define IP_IPSEC_POLICY 16 +#define IP_XFRM_POLICY 17 +#define IP_PASSSEC 18 +#define IP_TRANSPARENT 19 + +/* TProxy original addresses */ +#define IP_ORIGDSTADDR 20 +#define IP_RECVORIGDSTADDR IP_ORIGDSTADDR + +#define IP_MINTTL 21 /* IP_MTU_DISCOVER arguments. */ diff --git a/sysdeps/unix/sysv/linux/internal_statvfs.c b/sysdeps/unix/sysv/linux/internal_statvfs.c index 28c1cb691f..59b173ed73 100644 --- a/sysdeps/unix/sysv/linux/internal_statvfs.c +++ b/sysdeps/unix/sysv/linux/internal_statvfs.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1998-2003, 2004, 2005, 2006 Free Software Foundation, Inc. +/* Copyright (C) 1998-2006, 2010 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. @@ -45,13 +45,15 @@ __statvfs_getflags (const char *name, int fstype, struct stat64 *st) const char *fsname = NULL; const char *fsname2 = NULL; + const char *fsname3 = NULL; /* Map the filesystem type we got from the statfs call to a string. */ switch (fstype) { case EXT2_SUPER_MAGIC: - fsname = "ext3"; - fsname2 = "ext2"; + fsname = "ext4"; + fsname2 = "ext3"; + fsname3 = "ext2"; break; case DEVPTS_SUPER_MAGIC: fsname= "devpts"; @@ -98,6 +100,9 @@ __statvfs_getflags (const char *name, int fstype, struct stat64 *st) case NTFS_SUPER_MAGIC: fsname = "ntfs"; break; + case LOGFS_MAGIC_U32: + fsname = "logfs"; + break; } FILE *mtab = __setmntent ("/proc/mounts", "r"); @@ -126,7 +131,9 @@ __statvfs_getflags (const char *name, int fstype, struct stat64 *st) else if (fsname != NULL && strcmp (fsname, mntbuf.mnt_type) != 0 && (fsname2 == NULL - || strcmp (fsname2, mntbuf.mnt_type) != 0)) + || strcmp (fsname2, mntbuf.mnt_type) != 0) + && (fsname3 == NULL + || strcmp (fsname3, mntbuf.mnt_type) != 0)) continue; /* Find out about the device the current entry is for. */ @@ -176,7 +183,7 @@ __statvfs_getflags (const char *name, int fstype, struct stat64 *st) { /* Try without a filesystem name. */ assert (fsname != NULL); - fsname = fsname2 = NULL; + fsname = fsname2 = fsname3 = NULL; } /* It is not strictly allowed to use rewind here. But diff --git a/sysdeps/unix/sysv/linux/linux_fsinfo.h b/sysdeps/unix/sysv/linux/linux_fsinfo.h index 8c6591ada3..b10e98b46f 100644 --- a/sysdeps/unix/sysv/linux/linux_fsinfo.h +++ b/sysdeps/unix/sysv/linux/linux_fsinfo.h @@ -1,5 +1,5 @@ /* Constants from kernel header for various FSes. - Copyright (C) 1998,1999,2000,2001,2002,2003,2005 Free Software Foundation, Inc. + Copyright (C) 1998-2003,2005,2010 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -25,22 +25,22 @@ filesystem types will become available we have to add the appropriate definitions here.*/ -/* Constants that identify the `adfs' filesystem. */ +/* Constant that identifies the `adfs' filesystem. */ #define ADFS_SUPER_MAGIC 0xadf5 -/* Constants that identify the `affs' filesystem. */ +/* Constant that identifies the `affs' filesystem. */ #define AFFS_SUPER_MAGIC 0xadff -/* Constants that identify the `autofs' filesystem. */ +/* Constant that identifies the `autofs' filesystem. */ #define AUTOFS_SUPER_MAGIC 0x187 -/* Constants that identify the `bfs' filesystem. */ +/* Constant that identifies the `bfs' filesystem. */ #define BFS_MAGIC 0x1BADFACE -/* Constants that identify the `coda' filesystem. */ +/* Constant that identifies the `coda' filesystem. */ #define CODA_SUPER_MAGIC 0x73757245 -/* Constants that identify the `coherent' filesystem. */ +/* Constant that identifies the `coherent' filesystem. */ #define COH_SUPER_MAGIC 0x012ff7b7 /* Constant that identifies the `ramfs' filesystem. */ @@ -52,7 +52,7 @@ /* Constant that identifies the `devpts' filesystem. */ #define DEVPTS_SUPER_MAGIC 0x1cd1 -/* Constant that identifies the `efs' filesystem. */ +/* Constants that identifies the `efs' filesystem. */ #define EFS_SUPER_MAGIC 0x414A53 #define EFS_MAGIC 0x072959 @@ -74,6 +74,9 @@ /* Constant that identifies the `jfs' filesystem. */ #define JFS_SUPER_MAGIC 0x3153464a +/* Constant that identifies the `logfs´ filesystem. */ +#define LOGFS_MAGIC_U32 0xc97e8168u + /* Constants that identify the `minix2' filesystem. */ #define MINIX2_SUPER_MAGIC 0x2468 #define MINIX2_SUPER_MAGIC2 0x2478 @@ -82,62 +85,62 @@ #define MINIX_SUPER_MAGIC 0x137f #define MINIX_SUPER_MAGIC2 0x138F -/* Constants that identify the `msdos' filesystem. */ +/* Constant that identifies the `msdos' filesystem. */ #define MSDOS_SUPER_MAGIC 0x4d44 -/* Constants that identify the `ncp' filesystem. */ +/* Constant that identifies the `ncp' filesystem. */ #define NCP_SUPER_MAGIC 0x564c -/* Constants that identify the `nfs' filesystem. */ +/* Constant that identifies the `nfs' filesystem. */ #define NFS_SUPER_MAGIC 0x6969 -/* Constants that identify the `ntfs' filesystem. */ +/* Constant that identifies the `ntfs' filesystem. */ #define NTFS_SUPER_MAGIC 0x5346544e -/* Constants that identify the `proc' filesystem. */ +/* Constant that identifies the `proc' filesystem. */ #define PROC_SUPER_MAGIC 0x9fa0 /* Constant that identifies the `usbdevfs' filesystem. */ #define USBDEVFS_SUPER_MAGIC 0x9fa2 -/* Constants that identify the `qnx4' filesystem. */ +/* Constant that identifies the `qnx4' filesystem. */ #define QNX4_SUPER_MAGIC 0x002f -/* Constants that identify the `reiser' filesystem. */ +/* Constant that identifies the `reiser' filesystem. */ #define REISERFS_SUPER_MAGIC 0x52654973 /* Constant that identifies the `romfs' filesystem. */ #define ROMFS_SUPER_MAGIC 0x7275 -/* Constants that identify the `smb' filesystem. */ +/* Constant that identifies the `shm' filesystem. */ +#define SHMFS_SUPER_MAGIC 0x01021994 + +/* Constant that identifies the `smb' filesystem. */ #define SMB_SUPER_MAGIC 0x517b +/* Constant that identifies the `sysfs´ filesystem. */ +#define SYSFS_MAGIC 0x62656572 + /* Constants that identify the `sysV' filesystem. */ #define SYSV2_SUPER_MAGIC 0x012ff7b6 #define SYSV4_SUPER_MAGIC 0x012ff7b5 -/* Constants that identify the `udf' filesystem. */ +/* Constant that identifies the `udf' filesystem. */ #define UDF_SUPER_MAGIC 0x15013346 -/* Constants that identify the `ufs' filesystem. */ +/* Constant that identify the `ufs' filesystem. */ #define UFS_MAGIC 0x00011954 #define UFS_CIGAM 0x54190100 /* byteswapped MAGIC */ -/* Constants that identify the `xenix' filesystem. */ -#define XENIX_SUPER_MAGIC 0x012ff7b4 +/* Constant that identifies the `vxfs' filesystem. */ +#define VXFS_SUPER_MAGIC 0xa501fcf5 -/* Constant that identifies the `shm' filesystem. */ -#define SHMFS_SUPER_MAGIC 0x01021994 +/* Constant that identifies the `xenix' filesystem. */ +#define XENIX_SUPER_MAGIC 0x012ff7b4 -/* Constants that identify the `xfs' filesystem. */ +/* Constant that identifies the `xfs' filesystem. */ #define XFS_SUPER_MAGIC 0x58465342 -/* Constants that identify the `vxfs' filesystem. */ -#define VXFS_SUPER_MAGIC 0xa501fcf5 - -/* Constants that identify the `sysfs´ filesystem. */ -#define SYSFS_MAGIC 0x62656572 - /* Maximum link counts. */ #define COH_LINK_MAX 10000 #define EXT2_LINK_MAX 32000 diff --git a/sysdeps/unix/sysv/linux/sparc/sparc64/msgrcv.c b/sysdeps/unix/sysv/linux/sparc/sparc64/msgrcv.c new file mode 100644 index 0000000000..117762c779 --- /dev/null +++ b/sysdeps/unix/sysv/linux/sparc/sparc64/msgrcv.c @@ -0,0 +1,49 @@ +/* Copyright (C) 2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <errno.h> +#include <sys/msg.h> +#include <ipc_priv.h> + +#include <sysdep-cancel.h> +#include <sys/syscall.h> + +#include <bp-checks.h> + +ssize_t +__libc_msgrcv (msqid, msgp, msgsz, msgtyp, msgflg) + int msqid; + void *msgp; + size_t msgsz; + long int msgtyp; + int msgflg; +{ + if (SINGLE_THREAD_P) + return INLINE_SYSCALL (ipc, 6, IPCOP_msgrcv, msqid, msgsz, msgflg, + CHECK_N (msgp, msgsz), msgtyp); + + int oldtype = LIBC_CANCEL_ASYNC (); + + ssize_t result = INLINE_SYSCALL (ipc, 6, IPCOP_msgrcv, msqid, msgsz, msgflg, + CHECK_N (msgp, msgsz), msgtyp); + + LIBC_CANCEL_RESET (oldtype); + + return result; +} +weak_alias (__libc_msgrcv, msgrcv) diff --git a/sysdeps/x86_64/Implies b/sysdeps/x86_64/Implies index 2b8412b0b6..2e0a323e13 100644 --- a/sysdeps/x86_64/Implies +++ b/sysdeps/x86_64/Implies @@ -1,4 +1,5 @@ wordsize-64 ieee754/ldbl-96 +ieee754/dbl-64/wordsize-64 ieee754/dbl-64 ieee754/flt-32 diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h index 61a0556d5e..f615e9591f 100644 --- a/sysdeps/x86_64/dl-machine.h +++ b/sysdeps/x86_64/dl-machine.h @@ -1,5 +1,5 @@ /* Machine-dependent ELF dynamic relocation inline functions. x86-64 version. - Copyright (C) 2001-2005, 2006, 2008, 2009 Free Software Foundation, Inc. + Copyright (C) 2001-2006, 2008, 2009, 2010 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Andreas Jaeger <aj@suse.de>. @@ -419,7 +419,7 @@ elf_machine_rela (struct link_map *map, const Elf64_Rela *reloc, case R_X86_64_PC32: value += reloc->r_addend - (Elf64_Addr) reloc_addr; *(unsigned int *) reloc_addr = value; - if (__builtin_expect (value != (unsigned int) value, 0)) + if (__builtin_expect (value != (int) value, 0)) { fmt = "\ %s: Symbol `%s' causes overflow in R_X86_64_PC32 relocation\n"; |