diff options
Diffstat (limited to 'sysdeps/sparc')
44 files changed, 917 insertions, 2481 deletions
diff --git a/sysdeps/sparc/Makefile b/sysdeps/sparc/Makefile index 73b926554e..735e4a40db 100644 --- a/sysdeps/sparc/Makefile +++ b/sysdeps/sparc/Makefile @@ -10,3 +10,8 @@ endif ifeq ($(subdir),db2) CPPFLAGS += -DHAVE_SPINLOCKS=1 -DHAVE_ASSEM_SPARC_GCC=1 endif + +ifeq ($(subdir),csu) +# get offset to rtld_global._dl_hwcap +gen-as-const-headers += rtld-global-offsets.sym +endif diff --git a/sysdeps/sparc/elf/rtld-global-offsets.sym b/sysdeps/sparc/elf/rtld-global-offsets.sym new file mode 100644 index 0000000000..ff4e97f2a6 --- /dev/null +++ b/sysdeps/sparc/elf/rtld-global-offsets.sym @@ -0,0 +1,7 @@ +#define SHARED 1 + +#include <ldsodefs.h> + +#define rtld_global_ro_offsetof(mem) offsetof (struct rtld_global_ro, mem) + +RTLD_GLOBAL_RO_DL_HWCAP_OFFSET rtld_global_ro_offsetof (_dl_hwcap) diff --git a/sysdeps/sparc/sparc32/bcopy.c b/sysdeps/sparc/sparc32/bcopy.c deleted file mode 100644 index 9a455f33c4..0000000000 --- a/sysdeps/sparc/sparc32/bcopy.c +++ /dev/null @@ -1 +0,0 @@ -/* bcopy is in memcpy.S */ diff --git a/sysdeps/sparc/sparc32/dl-machine.h b/sysdeps/sparc/sparc32/dl-machine.h index 53257104a6..9631db32e1 100644 --- a/sysdeps/sparc/sparc32/dl-machine.h +++ b/sysdeps/sparc/sparc32/dl-machine.h @@ -563,7 +563,7 @@ elf_machine_lazy_rel (struct link_map *map, { Elf32_Addr value = map->l_addr + reloc->r_addend; value = ((Elf32_Addr (*) (void)) value) (); - sparc_fixup_plt (reloc, reloc_addr, value, 0, 1); + sparc_fixup_plt (reloc, reloc_addr, value, 1, 1); } else if (r_type == R_SPARC_NONE) ; diff --git a/sysdeps/sparc/sparc32/dl-plt.h b/sysdeps/sparc/sparc32/dl-plt.h index edcc5c1374..bfb891fe69 100644 --- a/sysdeps/sparc/sparc32/dl-plt.h +++ b/sysdeps/sparc/sparc32/dl-plt.h @@ -25,19 +25,55 @@ #define OPCODE_JMP_G1 0x81c06000 /* jmp %g1+?; add lo 10 bits of value */ #define OPCODE_SAVE_SP 0x9de3bfa8 /* save %sp, -(16+6)*4, %sp */ #define OPCODE_BA 0x30800000 /* b,a ?; add PC-rel word address */ +#define OPCODE_BA_PT 0x30480000 /* ba,a,pt %icc, ?; add PC-rel word address */ static inline __attribute__ ((always_inline)) Elf32_Addr sparc_fixup_plt (const Elf32_Rela *reloc, Elf32_Addr *reloc_addr, Elf32_Addr value, int t, int do_flush) { - Elf32_Sword disp = value - (Elf32_Addr) reloc_addr; + Elf32_Sword disp; - if (0 && disp >= -0x800000 && disp < 0x800000) + /* 't' is '0' if we are resolving this PLT entry for RTLD bootstrap, + in which case we'll be resolving all PLT entries and thus can + optimize by overwriting instructions starting at the first PLT entry + instruction and we need not be mindful of thread safety. + + Otherwise, 't' is '1'. */ + reloc_addr += t; + disp = value - (Elf32_Addr) reloc_addr; + + if (disp >= -0x800000 && disp < 0x800000) { - /* Don't need to worry about thread safety. We're writing just one - instruction. */ + unsigned int insn = OPCODE_BA | ((disp >> 2) & 0x3fffff); + +#ifdef __sparc_v9__ + /* On V9 we can do even better by using a branch with + prediction if we fit into the even smaller 19-bit + displacement field. */ + if (disp >= -0x100000 && disp < 0x100000) + insn = OPCODE_BA_PT | ((disp >> 2) & 0x07ffff); +#endif + + /* Even if we are writing just a single branch, we must not + ignore the 't' offset. Consider a case where we have some + PLT slots which can be optimized into a single branch and + some which cannot. Then we can end up with a PLT which looks + like: + + PLT4.0: sethi %(PLT_4_INDEX), %g1 + sethi %(fully_resolved_sym_4), %g1 + jmp %g1 + %lo(fully_resolved_sym_4) + PLT5.0: ba,a fully_resolved_sym_5 + ba,a PLT0.0 + ... + + The delay slot of that jmp must always be either a sethi to + %g1 or a nop. But if we try to place this displacement + branch there, PLT4.0 will jump to fully_resolved_sym_4 for 1 + instruction and then go immediately to + fully_resolved_sym_5. */ - reloc_addr[0] = OPCODE_BA | ((disp >> 2) & 0x3fffff); + reloc_addr[0] = insn; if (do_flush) __asm __volatile ("flush %0" : : "r"(reloc_addr)); } @@ -48,7 +84,6 @@ sparc_fixup_plt (const Elf32_Rela *reloc, Elf32_Addr *reloc_addr, need not be done during bootstrapping, since there are no threads. But we also can't tell if we _can_ use flush, so don't. */ - reloc_addr += t; reloc_addr[1] = OPCODE_JMP_G1 | (value & 0x3ff); if (do_flush) __asm __volatile ("flush %0+4" : : "r"(reloc_addr)); diff --git a/sysdeps/sparc/sparc32/memcpy.S b/sysdeps/sparc/sparc32/memcpy.S index 6bd55c06a1..748a0862fe 100644 --- a/sysdeps/sparc/sparc32/memcpy.S +++ b/sysdeps/sparc/sparc32/memcpy.S @@ -68,45 +68,6 @@ stb %t0, [%dst - offset - 0x02]; \ stb %t1, [%dst - offset - 0x01]; -/* Both these macros have to start with exactly the same insn */ -#define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ - ldd [%src - offset - 0x20], %t0; \ - ldd [%src - offset - 0x18], %t2; \ - ldd [%src - offset - 0x10], %t4; \ - ldd [%src - offset - 0x08], %t6; \ - st %t0, [%dst - offset - 0x20]; \ - st %t1, [%dst - offset - 0x1c]; \ - st %t2, [%dst - offset - 0x18]; \ - st %t3, [%dst - offset - 0x14]; \ - st %t4, [%dst - offset - 0x10]; \ - st %t5, [%dst - offset - 0x0c]; \ - st %t6, [%dst - offset - 0x08]; \ - st %t7, [%dst - offset - 0x04]; - -#define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ - ldd [%src - offset - 0x20], %t0; \ - ldd [%src - offset - 0x18], %t2; \ - ldd [%src - offset - 0x10], %t4; \ - ldd [%src - offset - 0x08], %t6; \ - std %t0, [%dst - offset - 0x20]; \ - std %t2, [%dst - offset - 0x18]; \ - std %t4, [%dst - offset - 0x10]; \ - std %t6, [%dst - offset - 0x08]; - -#define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldd [%src + offset + 0x00], %t0; \ - ldd [%src + offset + 0x08], %t2; \ - st %t0, [%dst + offset + 0x00]; \ - st %t1, [%dst + offset + 0x04]; \ - st %t2, [%dst + offset + 0x08]; \ - st %t3, [%dst + offset + 0x0c]; - -#define RMOVE_SHORTCHUNK(src, dst, offset, t0, t1) \ - ldub [%src + offset + 0x00], %t0; \ - ldub [%src + offset + 0x01], %t1; \ - stb %t0, [%dst + offset + 0x00]; \ - stb %t1, [%dst + offset + 0x01]; - #define SMOVE_CHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, prev, shil, shir, offset2) \ ldd [%src + offset + 0x00], %t0; \ ldd [%src + offset + 0x08], %t2; \ @@ -146,295 +107,20 @@ .text .align 4 -ENTRY(bcopy) - mov %o0, %o3 - mov %o1, %o0 - mov %o3, %o1 -END(bcopy) - -ENTRY(memmove) - cmp %o0, %o1 +ENTRY(memcpy) /* %o0=dst %o1=src %o2=len */ + sub %o0, %o1, %o4 st %o0, [%sp + 64] - bleu 9f - sub %o0, %o1, %o4 - - add %o1, %o2, %o3 - cmp %o3, %o0 - bleu 0f - andcc %o4, 3, %o5 - - add %o1, %o2, %o1 - add %o0, %o2, %o0 - bne 77f +9: andcc %o4, 3, %o5 +0: bne 86f cmp %o2, 15 - bleu 91f - andcc %o1, 3, %g0 - be 3f - nop - - andcc %o1, 1, %g0 - be 4f - andcc %o1, 2, %g0 - - ldub [%o1 - 1], %g2 - sub %o1, 1, %o1 - stb %g2, [%o0 - 1] - sub %o2, 1, %o2 - be 3f - sub %o0, 1, %o0 -4: lduh [%o1 - 2], %g2 - sub %o1, 2, %o1 - sth %g2, [%o0 - 2] - sub %o2, 2, %o2 - sub %o0, 2, %o0 - -3: andcc %o1, 4, %g0 - - be 2f - mov %o2, %g1 - - ld [%o1 - 4], %o4 - sub %g1, 4, %g1 - st %o4, [%o0 - 4] - sub %o1, 4, %o1 - sub %o0, 4, %o0 -2: andcc %g1, 0xffffff80, %g6 - be 3f - andcc %o0, 4, %g0 - - be 74f + 4 -5: RMOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) - RMOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) - RMOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) - RMOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) - subcc %g6, 128, %g6 - sub %o1, 128, %o1 - bne 5b - sub %o0, 128, %o0 - -3: andcc %g1, 0x70, %g6 - be 72f - andcc %g1, 8, %g0 - - srl %g6, 1, %o4 - mov %o7, %g2 - add %g6, %o4, %o4 -101: call 100f - sub %o1, %g6, %o1 - mov %g2, %o7 - jmpl %o5 + (72f - 101b), %g0 - sub %o0, %g6, %o0 - -71: RMOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5) - RMOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5) - RMOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5) - RMOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5) - RMOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5) - RMOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5) - RMOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5) -72: be 73f - andcc %g1, 4, %g0 - - ldd [%o1 - 0x08], %g2 - sub %o0, 8, %o0 - sub %o1, 8, %o1 - st %g2, [%o0] - st %g3, [%o0 + 0x04] -73: be 1f - andcc %g1, 2, %g0 - - ld [%o1 - 4], %g2 - sub %o1, 4, %o1 - st %g2, [%o0 - 4] - sub %o0, 4, %o0 -1: be 1f - andcc %g1, 1, %g0 - - lduh [%o1 - 2], %g2 - sub %o1, 2, %o1 - sth %g2, [%o0 - 2] - sub %o0, 2, %o0 -1: be 1f - nop - - ldub [%o1 - 1], %g2 - stb %g2, [%o0 - 1] -1: retl - ld [%sp + 64], %o0 - -74: RMOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) - RMOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) - RMOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) - RMOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) - subcc %g6, 128, %g6 - sub %o1, 128, %o1 - bne 74b - sub %o0, 128, %o0 - - andcc %g1, 0x70, %g6 - be 72b - andcc %g1, 8, %g0 - - srl %g6, 1, %o4 - mov %o7, %g2 - add %g6, %o4, %o4 -102: call 100f - sub %o1, %g6, %o1 - mov %g2, %o7 - jmpl %o5 + (72b - 102b), %g0 - sub %o0, %g6, %o0 - -75: and %o2, 0xe, %o3 - mov %o7, %g2 - sll %o3, 3, %o4 - sub %o0, %o3, %o0 -103: call 100f - sub %o1, %o3, %o1 - mov %g2, %o7 - jmpl %o5 + (76f - 103b), %g0 - andcc %o2, 1, %g0 - - RMOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3) - RMOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3) - RMOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3) - RMOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3) - RMOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3) - RMOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3) - RMOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3) - -76: be 1f - nop - ldub [%o1 - 1], %g2 - stb %g2, [%o0 - 1] -1: retl - ld [%sp + 64], %o0 -91: bne 75b - andcc %o2, 8, %g0 - - be 1f - andcc %o2, 4, %g0 - - ld [%o1 - 0x08], %g2 - ld [%o1 - 0x04], %g3 - sub %o1, 8, %o1 - st %g2, [%o0 - 0x08] - st %g3, [%o0 - 0x04] - sub %o0, 8, %o0 -1: b 73b - mov %o2, %g1 - -77: cmp %o2, 15 - bleu 75b - andcc %o0, 3, %g0 - be 64f - andcc %o0, 1, %g0 - be 63f - andcc %o0, 2, %g0 - ldub [%o1 - 1], %g5 - sub %o1, 1, %o1 - stb %g5, [%o0 - 1] - sub %o0, 1, %o0 - be 64f - sub %o2, 1, %o2 - -63: ldub [%o1 - 1], %g5 - sub %o1, 2, %o1 - stb %g5, [%o0 - 1] - sub %o0, 2, %o0 - ldub [%o1], %g5 - sub %o2, 2, %o2 - stb %g5, [%o0] -64: and %o1, 3, %g2 - and %o1, -4, %o1 - and %o2, 0xc, %g3 - add %o1, 4, %o1 - cmp %g3, 4 - sll %g2, 3, %g4 - mov 32, %g2 - be 4f - sub %g2, %g4, %g6 - - blu 3f - cmp %g3, 8 - - be 2f - srl %o2, 2, %g3 - - ld [%o1 - 4], %o3 - add %o0, -8, %o0 - ld [%o1 - 8], %o4 - add %o1, -16, %o1 - b 7f - add %g3, 1, %g3 -2: ld [%o1 - 4], %o4 - add %o0, -4, %o0 - ld [%o1 - 8], %g1 - add %o1, -12, %o1 - b 8f - add %g3, 2, %g3 -3: ld [%o1 - 4], %o5 - add %o0, -12, %o0 - ld [%o1 - 8], %o3 - add %o1, -20, %o1 - b 6f - srl %o2, 2, %g3 -4: ld [%o1 - 4], %g1 - srl %o2, 2, %g3 - ld [%o1 - 8], %o5 - add %o1, -24, %o1 - add %o0, -16, %o0 - add %g3, -1, %g3 + bleu 90f + andcc %o1, 3, %g0 - ld [%o1 + 12], %o3 -5: sll %o5, %g4, %g2 - srl %g1, %g6, %g5 - or %g2, %g5, %g2 - st %g2, [%o0 + 12] -6: ld [%o1 + 8], %o4 - sll %o3, %g4, %g2 - srl %o5, %g6, %g5 - or %g2, %g5, %g2 - st %g2, [%o0 + 8] -7: ld [%o1 + 4], %g1 - sll %o4, %g4, %g2 - srl %o3, %g6, %g5 - or %g2, %g5, %g2 - st %g2, [%o0 + 4] -8: ld [%o1], %o5 - sll %g1, %g4, %g2 - srl %o4, %g6, %g5 - addcc %g3, -4, %g3 - or %g2, %g5, %g2 - add %o1, -16, %o1 - st %g2, [%o0] - add %o0, -16, %o0 - bne,a 5b - ld [%o1 + 12], %o3 - sll %o5, %g4, %g2 - srl %g1, %g6, %g5 - srl %g4, 3, %g3 - or %g2, %g5, %g2 - add %o1, %g3, %o1 - andcc %o2, 2, %g0 - st %g2, [%o0 + 12] - be 1f - andcc %o2, 1, %g0 - - ldub [%o1 + 15], %g5 - add %o1, -2, %o1 - stb %g5, [%o0 + 11] - add %o0, -2, %o0 - ldub [%o1 + 16], %g5 - stb %g5, [%o0 + 12] -1: be 1f - nop - ldub [%o1 + 15], %g5 - stb %g5, [%o0 + 11] -1: retl - ld [%sp + 64], %o0 + be 78f + andcc %o1, 4, %g0 -78: andcc %o1, 1, %g0 + andcc %o1, 1, %g0 be 4f andcc %o1, 2, %g0 @@ -442,30 +128,16 @@ ENTRY(memmove) add %o1, 1, %o1 stb %g2, [%o0] sub %o2, 1, %o2 - bne 3f + bne 77f add %o0, 1, %o0 4: lduh [%o1], %g2 add %o1, 2, %o1 sth %g2, [%o0] sub %o2, 2, %o2 - b 3f - add %o0, 2, %o0 -END(memmove) - -ENTRY(memcpy) /* %o0=dst %o1=src %o2=len */ - sub %o0, %o1, %o4 - st %o0, [%sp + 64] -9: andcc %o4, 3, %o5 -0: bne 86f - cmp %o2, 15 - - bleu 90f - andcc %o1, 3, %g0 - - bne 78b -3: andcc %o1, 4, %g0 + add %o0, 2, %o0 - be 2f +77: andcc %o1, 4, %g0 +78: be 2f mov %o2, %g1 ld [%o1], %o4 @@ -968,5 +640,5 @@ ENTRY(memcpy) /* %o0=dst %o1=src %o2=len */ 110: retl sub %o7, %g6, %o5 END(memcpy) + libc_hidden_builtin_def (memcpy) -libc_hidden_builtin_def (memmove) diff --git a/sysdeps/sparc/sparc32/memmove.c b/sysdeps/sparc/sparc32/memmove.c deleted file mode 100644 index a8d2d49948..0000000000 --- a/sysdeps/sparc/sparc32/memmove.c +++ /dev/null @@ -1 +0,0 @@ -/* memmove is in memcpy.S */ diff --git a/sysdeps/sparc/sparc32/sparcv8/udiv_qrnnd.S b/sysdeps/sparc/sparc32/sparcv8/udiv_qrnnd.S deleted file mode 100644 index c3f097118f..0000000000 --- a/sysdeps/sparc/sparc32/sparcv8/udiv_qrnnd.S +++ /dev/null @@ -1,215 +0,0 @@ -! SPARC __udiv_qrnnd division support, used from longlong.h. - -! Copyright (C) 1993, 1994 Free Software Foundation, Inc. - -! This file is part of the GNU MP Library. - -! The GNU MP Library is free software; you can redistribute it and/or modify -! it under the terms of the GNU Lesser General Public License as published by -! the Free Software Foundation; either version 2.1 of the License, or (at your -! option) any later version. - -! The GNU MP Library is distributed in the hope that it will be useful, but -! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -! License for more details. - -! You should have received a copy of the GNU Lesser General Public License -! along with the GNU MP Library; see the file COPYING.LIB. If not, write to -! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - - -! INPUT PARAMETERS -! rem_ptr o0 -! n1 o1 -! n0 o2 -! d o3 - -#include "sysdep.h" - -ENTRY(__udiv_qrnnd) - tst %o3 - bneg LOC(largedivisor) - mov 8,%g1 - - b LOC(p1) - addxcc %o2,%o2,%o2 - -LOC(plop): - bcc LOC(n1) - addxcc %o2,%o2,%o2 -LOC(p1): - addx %o1,%o1,%o1 - subcc %o1,%o3,%o4 - bcc LOC(n2) - addxcc %o2,%o2,%o2 -LOC(p2): - addx %o1,%o1,%o1 - subcc %o1,%o3,%o4 - bcc LOC(n3) - addxcc %o2,%o2,%o2 -LOC(p3): - addx %o1,%o1,%o1 - subcc %o1,%o3,%o4 - bcc LOC(n4) - addxcc %o2,%o2,%o2 -LOC(p4): - addx %o1,%o1,%o1 - addcc %g1,-1,%g1 - bne LOC(plop) - subcc %o1,%o3,%o4 - bcc LOC(n5) - addxcc %o2,%o2,%o2 -LOC(p5): - st %o1,[%o0] - retl - xnor %g0,%o2,%o0 - -LOC(nlop): - bcc LOC(p1) - addxcc %o2,%o2,%o2 -LOC(n1): - addx %o4,%o4,%o4 - subcc %o4,%o3,%o1 - bcc LOC(p2) - addxcc %o2,%o2,%o2 -LOC(n2): - addx %o4,%o4,%o4 - subcc %o4,%o3,%o1 - bcc LOC(p3) - addxcc %o2,%o2,%o2 -LOC(n3): - addx %o4,%o4,%o4 - subcc %o4,%o3,%o1 - bcc LOC(p4) - addxcc %o2,%o2,%o2 -LOC(n4): - addx %o4,%o4,%o4 - addcc %g1,-1,%g1 - bne LOC(nlop) - subcc %o4,%o3,%o1 - bcc LOC(p5) - addxcc %o2,%o2,%o2 -LOC(n5): - st %o4,[%o0] - retl - xnor %g0,%o2,%o0 - -LOC(largedivisor): - and %o2,1,%o5 ! %o5 = n0 & 1 - - srl %o2,1,%o2 - sll %o1,31,%g2 - or %g2,%o2,%o2 ! %o2 = lo(n1n0 >> 1) - srl %o1,1,%o1 ! %o1 = hi(n1n0 >> 1) - - and %o3,1,%g2 - srl %o3,1,%g3 ! %g3 = floor(d / 2) - add %g3,%g2,%g3 ! %g3 = ceil(d / 2) - - b LOC(Lp1) - addxcc %o2,%o2,%o2 - -LOC(Lplop): - bcc LOC(Ln1) - addxcc %o2,%o2,%o2 -LOC(Lp1): - addx %o1,%o1,%o1 - subcc %o1,%g3,%o4 - bcc LOC(Ln2) - addxcc %o2,%o2,%o2 -LOC(Lp2): - addx %o1,%o1,%o1 - subcc %o1,%g3,%o4 - bcc LOC(Ln3) - addxcc %o2,%o2,%o2 -LOC(Lp3): - addx %o1,%o1,%o1 - subcc %o1,%g3,%o4 - bcc LOC(Ln4) - addxcc %o2,%o2,%o2 -LOC(Lp4): - addx %o1,%o1,%o1 - addcc %g1,-1,%g1 - bne LOC(Lplop) - subcc %o1,%g3,%o4 - bcc LOC(Ln5) - addxcc %o2,%o2,%o2 -LOC(Lp5): - add %o1,%o1,%o1 ! << 1 - tst %g2 - bne LOC(Oddp) - add %o5,%o1,%o1 - st %o1,[%o0] - retl - xnor %g0,%o2,%o0 - -LOC(Lnlop): - bcc LOC(Lp1) - addxcc %o2,%o2,%o2 -LOC(Ln1): - addx %o4,%o4,%o4 - subcc %o4,%g3,%o1 - bcc LOC(Lp2) - addxcc %o2,%o2,%o2 -LOC(Ln2): - addx %o4,%o4,%o4 - subcc %o4,%g3,%o1 - bcc LOC(Lp3) - addxcc %o2,%o2,%o2 -LOC(Ln3): - addx %o4,%o4,%o4 - subcc %o4,%g3,%o1 - bcc LOC(Lp4) - addxcc %o2,%o2,%o2 -LOC(Ln4): - addx %o4,%o4,%o4 - addcc %g1,-1,%g1 - bne LOC(Lnlop) - subcc %o4,%g3,%o1 - bcc LOC(Lp5) - addxcc %o2,%o2,%o2 -LOC(Ln5): - add %o4,%o4,%o4 ! << 1 - tst %g2 - bne LOC(Oddn) - add %o5,%o4,%o4 - st %o4,[%o0] - retl - xnor %g0,%o2,%o0 - -LOC(Oddp): - xnor %g0,%o2,%o2 - ! q' in %o2. r' in %o1 - addcc %o1,%o2,%o1 - bcc LOC(Lp6) - addx %o2,0,%o2 - sub %o1,%o3,%o1 -LOC(Lp6): - subcc %o1,%o3,%g0 - bcs LOC(Lp7) - subx %o2,-1,%o2 - sub %o1,%o3,%o1 -LOC(Lp7): - st %o1,[%o0] - retl - mov %o2,%o0 - -LOC(Oddn): - xnor %g0,%o2,%o2 - ! q' in %o2. r' in %o4 - addcc %o4,%o2,%o4 - bcc LOC(Ln6) - addx %o2,0,%o2 - sub %o4,%o3,%o4 -LOC(Ln6): - subcc %o4,%o3,%g0 - bcs LOC(Ln7) - subx %o2,-1,%o2 - sub %o4,%o3,%o4 -LOC(Ln7): - st %o4,[%o0] - retl - mov %o2,%o0 - -END(__udiv_qrnnd) diff --git a/sysdeps/sparc/sparc32/sparcv9/bcopy.c b/sysdeps/sparc/sparc32/sparcv9/bcopy.c deleted file mode 100644 index 9a455f33c4..0000000000 --- a/sysdeps/sparc/sparc32/sparcv9/bcopy.c +++ /dev/null @@ -1 +0,0 @@ -/* bcopy is in memcpy.S */ diff --git a/sysdeps/sparc/sparc32/sparcv9/memmove.c b/sysdeps/sparc/sparc32/sparcv9/memmove.c deleted file mode 100644 index a8d2d49948..0000000000 --- a/sysdeps/sparc/sparc32/sparcv9/memmove.c +++ /dev/null @@ -1 +0,0 @@ -/* memmove is in memcpy.S */ diff --git a/sysdeps/sparc/sparc32/sparcv9/multiarch/Makefile b/sysdeps/sparc/sparc32/sparcv9/multiarch/Makefile new file mode 100644 index 0000000000..4d45042a95 --- /dev/null +++ b/sysdeps/sparc/sparc32/sparcv9/multiarch/Makefile @@ -0,0 +1,4 @@ +ifeq ($(subdir),string) +sysdep_routines += memcpy-ultra3 memcpy-niagara1 memcpy-niagara2 \ + memset-niagara1 +endif diff --git a/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-niagara1.S b/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-niagara1.S new file mode 100644 index 0000000000..10aef85fe1 --- /dev/null +++ b/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-niagara1.S @@ -0,0 +1,2 @@ +#define XCC icc +#include <sparc64/multiarch/memcpy-niagara1.S> diff --git a/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-niagara2.S b/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-niagara2.S new file mode 100644 index 0000000000..6b1bf6ea70 --- /dev/null +++ b/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-niagara2.S @@ -0,0 +1,2 @@ +#define XCC icc +#include <sparc64/multiarch/memcpy-niagara2.S> diff --git a/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-ultra3.S b/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-ultra3.S new file mode 100644 index 0000000000..77adf151aa --- /dev/null +++ b/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy-ultra3.S @@ -0,0 +1,2 @@ +#define XCC icc +#include <sparc64/multiarch/memcpy-ultra3.S> diff --git a/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy.S b/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy.S new file mode 100644 index 0000000000..14df91e005 --- /dev/null +++ b/sysdeps/sparc/sparc32/sparcv9/multiarch/memcpy.S @@ -0,0 +1,4 @@ +#define ASI_PNF 0x82 +#define ASI_BLK_P 0xf0 +#define XCC icc +#include <sparc64/multiarch/memcpy.S> diff --git a/sysdeps/sparc/sparc32/sparcv9/multiarch/memset-niagara1.S b/sysdeps/sparc/sparc32/sparcv9/multiarch/memset-niagara1.S new file mode 100644 index 0000000000..b432420876 --- /dev/null +++ b/sysdeps/sparc/sparc32/sparcv9/multiarch/memset-niagara1.S @@ -0,0 +1,2 @@ +#define XCC icc +#include <sparc64/multiarch/memset-niagara1.S> diff --git a/sysdeps/sparc/sparc32/sparcv9/multiarch/memset.S b/sysdeps/sparc/sparc32/sparcv9/multiarch/memset.S new file mode 100644 index 0000000000..8f8264337d --- /dev/null +++ b/sysdeps/sparc/sparc32/sparcv9/multiarch/memset.S @@ -0,0 +1,4 @@ +#define ASI_PNF 0x82 +#define ASI_BLK_P 0xf0 +#define XCC icc +#include <sparc64/multiarch/memset.S> diff --git a/sysdeps/sparc/sparc32/sparcv9/sparcv9b/memcpy.S b/sysdeps/sparc/sparc32/sparcv9/sparcv9b/memcpy.S deleted file mode 100644 index 61960dce61..0000000000 --- a/sysdeps/sparc/sparc32/sparcv9/sparcv9b/memcpy.S +++ /dev/null @@ -1,2 +0,0 @@ -#define XCC icc -#include <sparc64/sparcv9b/memcpy.S> diff --git a/sysdeps/sparc/sparc32/sparcv9/sparcv9v/memcpy.S b/sysdeps/sparc/sparc32/sparcv9/sparcv9v/memcpy.S deleted file mode 100644 index 4c05f57bc2..0000000000 --- a/sysdeps/sparc/sparc32/sparcv9/sparcv9v/memcpy.S +++ /dev/null @@ -1,2 +0,0 @@ -#define XCC icc -#include <sparc64/sparcv9v/memcpy.S> diff --git a/sysdeps/sparc/sparc32/sparcv9/sparcv9v/memset.S b/sysdeps/sparc/sparc32/sparcv9/sparcv9v/memset.S deleted file mode 100644 index 5e46c7489f..0000000000 --- a/sysdeps/sparc/sparc32/sparcv9/sparcv9v/memset.S +++ /dev/null @@ -1,2 +0,0 @@ -#define XCC icc -#include <sparc64/sparcv9v/memset.S> diff --git a/sysdeps/sparc/sparc32/sparcv9/sparcv9v2/memcpy.S b/sysdeps/sparc/sparc32/sparcv9/sparcv9v2/memcpy.S deleted file mode 100644 index 7f4606037c..0000000000 --- a/sysdeps/sparc/sparc32/sparcv9/sparcv9v2/memcpy.S +++ /dev/null @@ -1,2 +0,0 @@ -#define XCC icc -#include <sparc64/sparcv9v2/memcpy.S> diff --git a/sysdeps/sparc/sparc32/sparcv9/sparcv9v2/memset.S b/sysdeps/sparc/sparc32/sparcv9/sparcv9v2/memset.S deleted file mode 100644 index 72de7bb0cf..0000000000 --- a/sysdeps/sparc/sparc32/sparcv9/sparcv9v2/memset.S +++ /dev/null @@ -1,2 +0,0 @@ -#define XCC icc -#include <sparc64/sparcv9v2/memset.S> diff --git a/sysdeps/sparc/sparc32/sparcv9/strlen.S b/sysdeps/sparc/sparc32/sparcv9/strlen.S index b8f4dba4f4..28a216c076 100644 --- a/sysdeps/sparc/sparc32/sparcv9/strlen.S +++ b/sysdeps/sparc/sparc32/sparcv9/strlen.S @@ -1,4 +1 @@ -#define ASI_PNF 0x82 -#define ASI_BLK_P 0xf0 -#define XCC icc #include <sparc64/strlen.S> diff --git a/sysdeps/sparc/sparc32/strlen.S b/sysdeps/sparc/sparc32/strlen.S index ed92f20e28..2945bb5484 100644 --- a/sysdeps/sparc/sparc32/strlen.S +++ b/sysdeps/sparc/sparc32/strlen.S @@ -1,8 +1,9 @@ /* Determine the length of a string. For SPARC v7. - Copyright (C) 1996, 1999, 2003 Free Software Foundation, Inc. + Copyright (C) 1996, 1999, 2003, 2010 Free Software Foundation, Inc. This file is part of the GNU C Library. - Contributed by Jakub Jelinek <jj@ultra.linux.cz>. + Contributed by Jakub Jelinek <jj@ultra.linux.cz> and + David S. Miller <davem@davemloft.net>. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -21,86 +22,55 @@ #include <sysdep.h> - /* Normally, this uses ((xword - 0x01010101) & 0x80808080) test - to find out if any byte in xword could be zero. This is fast, but - also gives false alarm for any byte in range 0x81-0xff. It does - not matter for correctness, as if this test tells us there could - be some zero byte, we check it byte by byte, but if bytes with - high bits set are common in the strings, then this will give poor - performance. You can #define EIGHTBIT_NOT_RARE and the algorithm - will use one tick slower, but more precise test - ((xword - 0x01010101) & (~xword) & 0x80808080), - which does not give any false alarms (but if some bits are set, - one cannot assume from it which bytes are zero and which are not). - It is yet to be measured, what is the correct default for glibc - in these days for an average user. - */ - .text .align 4 ENTRY(strlen) - mov %o0, %o1 - andcc %o0, 3, %g0 - be 20f - sethi %hi(0x80808080), %o4 - - ldub [%o0], %o5 - cmp %o5, 0 - be 21f - add %o0, 1, %o0 - andcc %o0, 3, %g0 - be 4f - or %o4, %lo(0x80808080), %o3 - ldub [%o0], %o5 - cmp %o5, 0 - be 22f - add %o0, 1, %o0 - andcc %o0, 3, %g0 - be 5f - sethi %hi(0x01010101), %o4 - ldub [%o0], %o5 - cmp %o5, 0 - be 23f - add %o0, 1, %o0 - b 11f - or %o4, %lo(0x01010101), %o2 -21: retl - mov 0, %o0 -22: retl - mov 1, %o0 -23: retl - mov 2, %o0 - -20: or %o4, %lo(0x80808080), %o3 -4: sethi %hi(0x01010101), %o4 -5: or %o4, %lo(0x01010101), %o2 -11: ld [%o0], %o5 -12: sub %o5, %o2, %o4 -#ifdef EIGHTBIT_NOT_RARE - andn %o4, %o5, %o4 -#endif - andcc %o4, %o3, %g0 - be 11b - add %o0, 4, %o0 - - srl %o5, 24, %g5 - andcc %g5, 0xff, %g0 - be 13f - add %o0, -4, %o4 - srl %o5, 16, %g5 - andcc %g5, 0xff, %g0 - be 13f - add %o4, 1, %o4 - srl %o5, 8, %g5 - andcc %g5, 0xff, %g0 - be 13f - add %o4, 1, %o4 - andcc %o5, 0xff, %g0 - bne,a 12b - ld [%o0], %o5 - add %o4, 1, %o4 -13: retl - sub %o4, %o1, %o0 + mov %o0, %o1 + andn %o0, 0x3, %o0 + + ld [%o0], %o5 + and %o1, 0x3, %g1 + mov -1, %g5 + + sethi %hi(0x01010101), %o2 + sll %g1, 3, %g1 + + or %o2, %lo(0x01010101), %o2 + srl %g5, %g1, %g2 + + orn %o5, %g2, %o5 + sll %o2, 7, %o3 +10: add %o0, 4, %o0 + + andn %o3, %o5, %g1 + sub %o5, %o2, %g2 + + andcc %g1, %g2, %g0 + be,a 10b + ld [%o0], %o5 + + srl %o5, 24, %g1 + + andcc %g1, 0xff, %g0 + be 90f + sub %o0, 4, %o0 + + srl %o5, 16, %g2 + + andcc %g2, 0xff, %g0 + be 90f + add %o0, 1, %o0 + + srl %o5, 8, %g1 + + andcc %g1, 0xff, %g0 + be 90f + add %o0, 1, %o0 + + add %o0, 1, %o0 + +90: retl + sub %o0, %o1, %o0 END(strlen) libc_hidden_builtin_def (strlen) diff --git a/sysdeps/sparc/sparc32/udiv_qrnnd.S b/sysdeps/sparc/sparc32/udiv_qrnnd.S deleted file mode 100644 index 4955318a66..0000000000 --- a/sysdeps/sparc/sparc32/udiv_qrnnd.S +++ /dev/null @@ -1,168 +0,0 @@ -! SPARC __udiv_qrnnd division support, used from longlong.h. -! -! Copyright (C) 1993, 1994, 1997 Free Software Foundation, Inc. -! -! This file is part of the GNU MP Library. -! -! The GNU MP Library is free software; you can redistribute it and/or modify -! it under the terms of the GNU Lesser General Public License as published by -! the Free Software Foundation; either version 2.1 of the License, or (at your -! option) any later version. -! -! The GNU MP Library is distributed in the hope that it will be useful, but -! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -! License for more details. -! -! You should have received a copy of the GNU Lesser General Public License -! along with the GNU MP Library; see the file COPYING.LIB. If not, write to -! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -! -! Added PIC support - May/96, Miguel de Icaza -! -! INPUT PARAMETERS -! rem_ptr i0 -! n1 i1 -! n0 i2 -! d i3 - -#include <sysdep.h> -#undef ret /* Kludge for glibc */ - -#ifdef PIC - .text -#else - .section .rodata,#alloc -#endif - .align 8 - - .type two_to_32,@object - .size two_to_32,8 -two_to_32: - .double 0r4294967296 - - .type two_to_31,@object - .size two_to_31,8 -two_to_31: - .double 0r2147483648 - - .text -ENTRY(__udiv_qrnnd) - !#PROLOGUE# 0 - save %sp,-104,%sp - !#PROLOGUE# 1 - st %i1,[%fp-8] - ld [%fp-8],%f10 -#ifdef PIC -LOC(base): - call 1f - fitod %f10,%f4 -1: ldd [%o7-(LOC(base)-two_to_32)],%f8 -#else - sethi %hi(two_to_32),%o7 - fitod %f10,%f4 - ldd [%o7+%lo(two_to_32)],%f8 -#endif - cmp %i1,0 - bge LOC(248) - mov %i0,%i5 - faddd %f4,%f8,%f4 -LOC(248): - st %i2,[%fp-8] - ld [%fp-8],%f10 - fmuld %f4,%f8,%f6 - cmp %i2,0 - bge LOC(249) - fitod %f10,%f2 - faddd %f2,%f8,%f2 -LOC(249): - st %i3,[%fp-8] - faddd %f6,%f2,%f2 - ld [%fp-8],%f10 - cmp %i3,0 - bge LOC(250) - fitod %f10,%f4 - faddd %f4,%f8,%f4 -LOC(250): - fdivd %f2,%f4,%f2 -#ifdef PIC - ldd [%o7-(LOC(base)-two_to_31)],%f4 -#else - sethi %hi(two_to_31),%o7 - ldd [%o7+%lo(two_to_31)],%f4 -#endif - fcmped %f2,%f4 - nop - fbge,a LOC(251) - fsubd %f2,%f4,%f2 - fdtoi %f2,%f2 - st %f2,[%fp-8] - b LOC(252) - ld [%fp-8],%i4 -LOC(251): - fdtoi %f2,%f2 - st %f2,[%fp-8] - ld [%fp-8],%i4 - sethi %hi(-2147483648),%g2 - xor %i4,%g2,%i4 -LOC(252): - wr %g0,%i4,%y - sra %i3,31,%g2 - and %i4,%g2,%g2 - andcc %g0,0,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,%i3,%g1 - mulscc %g1,0,%g1 - add %g1,%g2,%i0 - rd %y,%g3 - subcc %i2,%g3,%o7 - subxcc %i1,%i0,%g0 - be LOC(253) - cmp %o7,%i3 - - add %i4,-1,%i0 - add %o7,%i3,%o7 - st %o7,[%i5] - ret - restore -LOC(253): - blu LOC(246) - mov %i4,%i0 - add %i4,1,%i0 - sub %o7,%i3,%o7 -LOC(246): - st %o7,[%i5] - ret - restore - -END(__udiv_qrnnd) diff --git a/sysdeps/sparc/sparc64/Implies b/sysdeps/sparc/sparc64/Implies index 01bf14e73f..7abc50efcc 100644 --- a/sysdeps/sparc/sparc64/Implies +++ b/sysdeps/sparc/sparc64/Implies @@ -1,6 +1,7 @@ wordsize-64 # SPARC uses IEEE 754 floating point. ieee754/ldbl-128 +ieee754/dbl-64/wordsize-64 ieee754/dbl-64 ieee754/flt-32 sparc/sparc64/soft-fp diff --git a/sysdeps/sparc/sparc64/Makefile b/sysdeps/sparc/sparc64/Makefile index 3bb0238832..1a859dffc0 100644 --- a/sysdeps/sparc/sparc64/Makefile +++ b/sysdeps/sparc/sparc64/Makefile @@ -6,3 +6,7 @@ endif ifeq ($(subdir),csu) CFLAGS-initfini.s += -mcpu=v9 endif + +ifeq ($(subdir),string) +sysdep_routines += align-cpy +endif diff --git a/sysdeps/sparc/sparc64/align-cpy.S b/sysdeps/sparc/sparc64/align-cpy.S new file mode 100644 index 0000000000..bae788fe44 --- /dev/null +++ b/sysdeps/sparc/sparc64/align-cpy.S @@ -0,0 +1,85 @@ +/* Aligned copy routines specified by Sparc V9 ABI. + For 64-bit sparc. + Copyright (C) 2010 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller (davem@davemloft.net) + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sysdep.h> + + .text + .align 8 +ENTRY(__align_cpy_8) +10: cmp %o0, %o1 + be,pn %xcc, 9f + mov %o0, %o3 + subcc %o2, 0x08, %o2 + be,pn %xcc, 8f +1: ldx [%o1 + 0x00], %o5 + ldx [%o1 + 0x08], %o4 + subcc %o2, 0x10, %o2 + add %o1, 0x10, %o1 + stx %o5, [%o3 + 0x00] + stx %o4, [%o3 + 0x08] + bg,pt %xcc, 1b + add %o3, 0x10, %o3 + bne,pn %xcc, 9f + nop + ldx [%o1 + 0x00], %o5 +8: stx %o5, [%o3 + 0x00] +9: retl + nop +END(__align_cpy_8) + + .align 8 +ENTRY(__align_cpy_4) +20: cmp %o0, %o1 + be,pn %xcc, 9f + mov %o0, %o3 + subcc %o2, 0x04, %o2 + be,pn %xcc, 8f +1: lduw [%o1 + 0x00], %o5 + lduw [%o1 + 0x04], %o4 + subcc %o2, 0x08, %o2 + add %o1, 0x08, %o1 + stw %o5, [%o3 + 0x00] + stw %o4, [%o3 + 0x04] + bg,pt %xcc, 1b + add %o3, 0x08, %o3 + bne,pn %xcc, 9f + nop + lduw [%o1 + 0x00], %o5 +8: stw %o5, [%o3 + 0x00] +9: retl + nop +END(__align_cpy_4) + + .align 8 +ENTRY(__align_cpy_2) + or %o0, %o1, %o3 + or %o2, %o3, %o3 + andcc %o3, 0x7, %g0 + be,pt %xcc, 10b + andcc %o3, 0x3, %g0 + be,pt %xcc, 20b + mov %o7, %g1 + call HIDDEN_JUMPTARGET(memcpy) + mov %o7, %g1 +END(__align_cpy_2) + +weak_alias (__align_cpy_8, __align_cpy_16) +weak_alias (__align_cpy_2, __align_cpy_1) diff --git a/sysdeps/sparc/sparc64/bcopy.c b/sysdeps/sparc/sparc64/bcopy.c deleted file mode 100644 index 9a455f33c4..0000000000 --- a/sysdeps/sparc/sparc64/bcopy.c +++ /dev/null @@ -1 +0,0 @@ -/* bcopy is in memcpy.S */ diff --git a/sysdeps/sparc/sparc64/dl-machine.h b/sysdeps/sparc/sparc64/dl-machine.h index 4c915eb586..82ab5a4547 100644 --- a/sysdeps/sparc/sparc64/dl-machine.h +++ b/sysdeps/sparc/sparc64/dl-machine.h @@ -513,11 +513,13 @@ elf_machine_rela (struct link_map *map, const Elf64_Rela *reloc, value = sym->st_value - sym_map->l_tls_offset + reloc->r_addend; if (r_type == R_SPARC_TLS_LE_HIX22) - *reloc_addr = (*reloc_addr & 0xffc00000) - | (((~value) >> 10) & 0x3fffff); + *(unsigned int *)reloc_addr = + ((*(unsigned int *)reloc_addr & 0xffc00000) + | (((~value) >> 10) & 0x3fffff)); else - *reloc_addr = (*reloc_addr & 0xffffe000) | (value & 0x3ff) - | 0x1c00; + *(unsigned int *)reloc_addr = + ((*(unsigned int *)reloc_addr & 0xffffe000) | (value & 0x3ff) + | 0x1c00); } break; # endif @@ -661,7 +663,7 @@ elf_machine_lazy_rel (struct link_map *map, { /* 'high' is always zero, for large PLT entries the linker emits an R_SPARC_IRELATIVE. */ - sparc64_fixup_plt (map, reloc, reloc_addr, value, 0, 0); + sparc64_fixup_plt (map, reloc, reloc_addr, value, 0, 1); } else *reloc_addr = value; diff --git a/sysdeps/sparc/sparc64/dl-plt.h b/sysdeps/sparc/sparc64/dl-plt.h index e06be43a0a..ca2fe3bbd8 100644 --- a/sysdeps/sparc/sparc64/dl-plt.h +++ b/sysdeps/sparc/sparc64/dl-plt.h @@ -28,7 +28,14 @@ sparc64_fixup_plt (struct link_map *map, const Elf64_Rela *reloc, Elf64_Addr plt_vaddr = (Elf64_Addr) reloc_addr; Elf64_Sxword disp = value - plt_vaddr; - /* Now move plt_vaddr up to the call instruction. */ + /* 't' is '0' if we are resolving this PLT entry for RTLD bootstrap, + in which case we'll be resolving all PLT entries and thus can + optimize by overwriting instructions starting at the first PLT entry + instruction and we need not be mindful of thread safety. + + Otherwise, 't' is '1'. + + Now move plt_vaddr up to the call instruction. */ plt_vaddr += ((t + 1) * 4); /* PLT entries .PLT32768 and above look always the same. */ @@ -39,10 +46,22 @@ sparc64_fixup_plt (struct link_map *map, const Elf64_Rela *reloc, /* Near destination. */ else if (disp >= -0x800000 && disp < 0x800000) { - /* As this is just one instruction, it is thread safe and so - we can avoid the unnecessary sethi FOO, %g1. - b,a target */ - insns[0] = 0x30800000 | ((disp >> 2) & 0x3fffff); + unsigned int insn; + + /* ba,a */ + insn = 0x30800000 | ((disp >> 2) & 0x3fffff); + + if (disp >= -0x100000 && disp < 0x100000) + { + /* ba,a,pt %icc */ + insn = 0x30480000 | ((disp >> 2) & 0x07ffff); + } + + /* As this is just one instruction, it is thread safe and so we + can avoid the unnecessary sethi FOO, %g1. Each 64-bit PLT + entry is 8 instructions long, so we can't run into the 'jmp' + delay slot problems 32-bit PLTs can. */ + insns[0] = insn; __asm __volatile ("flush %0" : : "r" (insns)); } /* 32-bit Sparc style, the target is in the lower 32-bits of diff --git a/sysdeps/sparc/sparc64/memcopy.h b/sysdeps/sparc/sparc64/memcopy.h new file mode 100644 index 0000000000..ec978e3c80 --- /dev/null +++ b/sysdeps/sparc/sparc64/memcopy.h @@ -0,0 +1 @@ +#include <sparc32/memcopy.h> diff --git a/sysdeps/sparc/sparc64/memcpy.S b/sysdeps/sparc/sparc64/memcpy.S index 5993358017..71e7100658 100644 --- a/sysdeps/sparc/sparc64/memcpy.S +++ b/sysdeps/sparc/sparc64/memcpy.S @@ -136,79 +136,8 @@ stx %t0, [%dst - offset - 0x10]; \ stx %t1, [%dst - offset - 0x08]; - /* Macros for non-VIS memmove code. */ -#define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldx [%src - offset - 0x20], %t0; \ - ldx [%src - offset - 0x18], %t1; \ - ldx [%src - offset - 0x10], %t2; \ - ldx [%src - offset - 0x08], %t3; \ - stw %t0, [%dst - offset - 0x1c]; \ - srlx %t0, 32, %t0; \ - stw %t0, [%dst - offset - 0x20]; \ - stw %t1, [%dst - offset - 0x14]; \ - srlx %t1, 32, %t1; \ - stw %t1, [%dst - offset - 0x18]; \ - stw %t2, [%dst - offset - 0x0c]; \ - srlx %t2, 32, %t2; \ - stw %t2, [%dst - offset - 0x10]; \ - stw %t3, [%dst - offset - 0x04]; \ - srlx %t3, 32, %t3; \ - stw %t3, [%dst - offset - 0x08]; - -#define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldx [%src - offset - 0x20], %t0; \ - ldx [%src - offset - 0x18], %t1; \ - ldx [%src - offset - 0x10], %t2; \ - ldx [%src - offset - 0x08], %t3; \ - stx %t0, [%dst - offset - 0x20]; \ - stx %t1, [%dst - offset - 0x18]; \ - stx %t2, [%dst - offset - 0x10]; \ - stx %t3, [%dst - offset - 0x08]; \ - ldx [%src - offset - 0x40], %t0; \ - ldx [%src - offset - 0x38], %t1; \ - ldx [%src - offset - 0x30], %t2; \ - ldx [%src - offset - 0x28], %t3; \ - stx %t0, [%dst - offset - 0x40]; \ - stx %t1, [%dst - offset - 0x38]; \ - stx %t2, [%dst - offset - 0x30]; \ - stx %t3, [%dst - offset - 0x28]; - -#define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldx [%src + offset + 0x00], %t0; \ - ldx [%src + offset + 0x08], %t1; \ - stw %t0, [%dst + offset + 0x04]; \ - srlx %t0, 32, %t2; \ - stw %t2, [%dst + offset + 0x00]; \ - stw %t1, [%dst + offset + 0x0c]; \ - srlx %t1, 32, %t3; \ - stw %t3, [%dst + offset + 0x08]; - -#define RMOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1) \ - ldx [%src + offset + 0x00], %t0; \ - ldx [%src + offset + 0x08], %t1; \ - stx %t0, [%dst + offset + 0x00]; \ - stx %t1, [%dst + offset + 0x08]; - .text .align 32 - -ENTRY(bcopy) - sub %o1, %o0, %o4 /* IEU0 Group */ - mov %o0, %g3 /* IEU1 */ - cmp %o4, %o2 /* IEU1 Group */ - mov %o1, %o0 /* IEU0 */ - bgeu,pt %XCC, 210f /* CTI */ - mov %g3, %o1 /* IEU0 Group */ -#ifndef USE_BPR - srl %o2, 0, %o2 /* IEU1 */ -#endif - brnz,pn %o2, 220f /* CTI Group */ - add %o0, %o2, %o0 /* IEU0 */ - retl - nop -END(bcopy) - - .align 32 ENTRY(__memcpy_large) 200: be,pt %xcc, 201f /* CTI */ andcc %o0, 0x38, %g5 /* IEU1 Group */ @@ -446,65 +375,6 @@ ENTRY(__memcpy_large) mov %g4, %o0 END(__memcpy_large) -#ifdef USE_BPR - - /* void *__align_cpy_4(void *dest, void *src, size_t n) - * SPARC v9 SYSV ABI - * Like memcpy, but results are undefined if (!n || ((dest | src | n) & 3)) - */ - - .align 32 -ENTRY(__align_cpy_4) - mov %o0, %g4 /* IEU0 Group */ - cmp %o2, 15 /* IEU1 */ - bleu,pn %xcc, 208b /* CTI */ - cmp %o2, (64 * 6) /* IEU1 Group */ - bgeu,pn %xcc, 200b /* CTI */ - andcc %o0, 7, %g2 /* IEU1 Group */ - ba,pt %xcc, 216f /* CTI */ - andcc %o1, 4, %g0 /* IEU1 Group */ -END(__align_cpy_4) - - /* void *__align_cpy_8(void *dest, void *src, size_t n) - * SPARC v9 SYSV ABI - * Like memcpy, but results are undefined if (!n || ((dest | src | n) & 7)) - */ - - .align 32 -ENTRY(__align_cpy_8) - mov %o0, %g4 /* IEU0 Group */ - cmp %o2, 15 /* IEU1 */ - bleu,pn %xcc, 208b /* CTI */ - cmp %o2, (64 * 6) /* IEU1 Group */ - bgeu,pn %xcc, 201b /* CTI */ - andcc %o0, 0x38, %g5 /* IEU1 Group */ - andcc %o2, -128, %g6 /* IEU1 Group */ - bne,a,pt %xcc, 82f + 4 /* CTI */ - ldx [%o1], %g1 /* Load */ - ba,pt %xcc, 41f /* CTI Group */ - andcc %o2, 0x70, %g6 /* IEU1 */ -END(__align_cpy_8) - - /* void *__align_cpy_16(void *dest, void *src, size_t n) - * SPARC v9 SYSV ABI - * Like memcpy, but results are undefined if (!n || ((dest | src | n) & 15)) - */ - - .align 32 -ENTRY(__align_cpy_16) - mov %o0, %g4 /* IEU0 Group */ - cmp %o2, (64 * 6) /* IEU1 */ - bgeu,pn %xcc, 201b /* CTI */ - andcc %o0, 0x38, %g5 /* IEU1 Group */ - andcc %o2, -128, %g6 /* IEU1 Group */ - bne,a,pt %xcc, 82f + 4 /* CTI */ - ldx [%o1], %g1 /* Load */ - ba,pt %xcc, 41f /* CTI Group */ - andcc %o2, 0x70, %g6 /* IEU1 */ -END(__align_cpy_16) - -#endif - .align 32 ENTRY(memcpy) 210: @@ -699,227 +569,4 @@ ENTRY(memcpy) mov %g4, %o0 END(memcpy) - .align 32 -ENTRY(__memmove_slowpath) -228: andcc %o2, 1, %g0 /* IEU1 Group */ - be,pt %icc, 2f+4 /* CTI */ -1: ldub [%o1 - 1], %o5 /* LOAD Group */ - sub %o1, 1, %o1 /* IEU0 */ - sub %o0, 1, %o0 /* IEU1 */ - subcc %o2, 1, %o2 /* IEU1 Group */ - be,pn %xcc, 229f /* CTI */ - stb %o5, [%o0] /* Store */ -2: ldub [%o1 - 1], %o5 /* LOAD Group */ - sub %o0, 2, %o0 /* IEU0 */ - ldub [%o1 - 2], %g5 /* LOAD Group */ - sub %o1, 2, %o1 /* IEU0 */ - subcc %o2, 2, %o2 /* IEU1 Group */ - stb %o5, [%o0 + 1] /* Store */ - bne,pt %xcc, 2b /* CTI */ - stb %g5, [%o0] /* Store */ -229: retl - mov %g4, %o0 -219: retl - nop -END(__memmove_slowpath) - - .align 32 -ENTRY(memmove) -#ifndef USE_BPR - srl %o2, 0, %o2 /* IEU1 Group */ -#endif - brz,pn %o2, 219b /* CTI Group */ - sub %o0, %o1, %o4 /* IEU0 */ - cmp %o4, %o2 /* IEU1 Group */ - bgeu,pt %XCC, 218b /* CTI */ - mov %o0, %g4 /* IEU0 */ - add %o0, %o2, %o0 /* IEU0 Group */ -220: add %o1, %o2, %o1 /* IEU1 */ - cmp %o2, 15 /* IEU1 Group */ - bleu,pn %xcc, 228b /* CTI */ - andcc %o0, 7, %g2 /* IEU1 Group */ - sub %o0, %o1, %g5 /* IEU0 */ - andcc %g5, 3, %o5 /* IEU1 Group */ - bne,pn %xcc, 232f /* CTI */ - andcc %o1, 3, %g0 /* IEU1 Group */ - be,a,pt %xcc, 236f /* CTI */ - andcc %o1, 4, %g0 /* IEU1 Group */ - andcc %o1, 1, %g0 /* IEU1 Group */ - be,pn %xcc, 4f /* CTI */ - andcc %o1, 2, %g0 /* IEU1 Group */ - ldub [%o1 - 1], %g2 /* Load Group */ - sub %o1, 1, %o1 /* IEU0 */ - sub %o0, 1, %o0 /* IEU1 */ - sub %o2, 1, %o2 /* IEU0 Group */ - be,pn %xcc, 5f /* CTI Group */ - stb %g2, [%o0] /* Store */ -4: lduh [%o1 - 2], %g2 /* Load Group */ - sub %o1, 2, %o1 /* IEU0 */ - sub %o0, 2, %o0 /* IEU1 */ - sub %o2, 2, %o2 /* IEU0 */ - sth %g2, [%o0] /* Store Group + bubble */ -5: andcc %o1, 4, %g0 /* IEU1 */ -236: be,a,pn %xcc, 2f /* CTI */ - andcc %o2, -128, %g6 /* IEU1 Group */ - lduw [%o1 - 4], %g5 /* Load Group */ - sub %o1, 4, %o1 /* IEU0 */ - sub %o0, 4, %o0 /* IEU1 */ - sub %o2, 4, %o2 /* IEU0 Group */ - stw %g5, [%o0] /* Store */ - andcc %o2, -128, %g6 /* IEU1 Group */ -2: be,pn %xcc, 235f /* CTI */ - andcc %o0, 4, %g0 /* IEU1 Group */ - be,pn %xcc, 282f + 4 /* CTI Group */ -5: RMOVE_BIGCHUNK(o1, o0, 0x00, g1, g3, g5, o5) - RMOVE_BIGCHUNK(o1, o0, 0x20, g1, g3, g5, o5) - RMOVE_BIGCHUNK(o1, o0, 0x40, g1, g3, g5, o5) - RMOVE_BIGCHUNK(o1, o0, 0x60, g1, g3, g5, o5) - subcc %g6, 128, %g6 /* IEU1 Group */ - sub %o1, 128, %o1 /* IEU0 */ - bne,pt %xcc, 5b /* CTI */ - sub %o0, 128, %o0 /* IEU0 Group */ -235: andcc %o2, 0x70, %g6 /* IEU1 Group */ -41: be,pn %xcc, 280f /* CTI */ - andcc %o2, 8, %g0 /* IEU1 Group */ - /* Clk1 8-( */ - /* Clk2 8-( */ - /* Clk3 8-( */ - /* Clk4 8-( */ -279: rd %pc, %o5 /* PDU Group */ - sll %g6, 1, %g5 /* IEU0 Group */ - sub %o1, %g6, %o1 /* IEU1 */ - sub %o5, %g5, %o5 /* IEU0 Group */ - jmpl %o5 + %lo(280f - 279b), %g0 /* CTI Group brk forced*/ - sub %o0, %g6, %o0 /* IEU0 Group */ - RMOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g5, o5) -280: be,pt %xcc, 281f /* CTI */ - andcc %o2, 4, %g0 /* IEU1 */ - ldx [%o1 - 8], %g2 /* Load Group */ - sub %o0, 8, %o0 /* IEU0 */ - stw %g2, [%o0 + 4] /* Store Group */ - sub %o1, 8, %o1 /* IEU1 */ - srlx %g2, 32, %g2 /* IEU0 Group */ - stw %g2, [%o0] /* Store */ -281: be,pt %xcc, 1f /* CTI */ - andcc %o2, 2, %g0 /* IEU1 Group */ - lduw [%o1 - 4], %g2 /* Load Group */ - sub %o1, 4, %o1 /* IEU0 */ - stw %g2, [%o0 - 4] /* Store Group */ - sub %o0, 4, %o0 /* IEU0 */ -1: be,pt %xcc, 1f /* CTI */ - andcc %o2, 1, %g0 /* IEU1 Group */ - lduh [%o1 - 2], %g2 /* Load Group */ - sub %o1, 2, %o1 /* IEU0 */ - sth %g2, [%o0 - 2] /* Store Group */ - sub %o0, 2, %o0 /* IEU0 */ -1: be,pt %xcc, 211f /* CTI */ - nop /* IEU1 */ - ldub [%o1 - 1], %g2 /* Load Group */ - stb %g2, [%o0 - 1] /* Store Group + bubble */ -211: retl - mov %g4, %o0 - -282: RMOVE_BIGALIGNCHUNK(o1, o0, 0x00, g1, g3, g5, o5) - RMOVE_BIGALIGNCHUNK(o1, o0, 0x40, g1, g3, g5, o5) - subcc %g6, 128, %g6 /* IEU1 Group */ - sub %o1, 128, %o1 /* IEU0 */ - bne,pt %xcc, 282b /* CTI */ - sub %o0, 128, %o0 /* IEU0 Group */ - andcc %o2, 0x70, %g6 /* IEU1 */ - be,pn %xcc, 284f /* CTI */ - andcc %o2, 8, %g0 /* IEU1 Group */ - /* Clk1 8-( */ - /* Clk2 8-( */ - /* Clk3 8-( */ - /* Clk4 8-( */ -283: rd %pc, %o5 /* PDU Group */ - sub %o1, %g6, %o1 /* IEU0 Group */ - sub %o5, %g6, %o5 /* IEU1 */ - jmpl %o5 + %lo(284f - 283b), %g0 /* CTI Group brk forced*/ - sub %o0, %g6, %o0 /* IEU0 Group */ - RMOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3) -284: be,pt %xcc, 285f /* CTI Group */ - andcc %o2, 4, %g0 /* IEU1 */ - ldx [%o1 - 8], %g2 /* Load Group */ - sub %o0, 8, %o0 /* IEU0 */ - sub %o1, 8, %o1 /* IEU0 Group */ - stx %g2, [%o0] /* Store */ -285: be,pt %xcc, 1f /* CTI */ - andcc %o2, 2, %g0 /* IEU1 Group */ - lduw [%o1 - 4], %g2 /* Load Group */ - sub %o0, 4, %o0 /* IEU0 */ - sub %o1, 4, %o1 /* IEU0 Group */ - stw %g2, [%o0] /* Store */ -1: be,pt %xcc, 1f /* CTI */ - andcc %o2, 1, %g0 /* IEU1 Group */ - lduh [%o1 - 2], %g2 /* Load Group */ - sub %o0, 2, %o0 /* IEU0 */ - sub %o1, 2, %o1 /* IEU0 Group */ - sth %g2, [%o0] /* Store */ -1: be,pt %xcc, 1f /* CTI */ - nop /* IEU0 Group */ - ldub [%o1 - 1], %g2 /* Load Group */ - stb %g2, [%o0 - 1] /* Store Group + bubble */ -1: retl - mov %g4, %o0 - -232: brz,pt %g2, 2f /* CTI Group */ - sub %o2, %g2, %o2 /* IEU0 Group */ -1: ldub [%o1 - 1], %g5 /* Load Group */ - sub %o1, 1, %o1 /* IEU0 */ - sub %o0, 1, %o0 /* IEU1 */ - subcc %g2, 1, %g2 /* IEU1 Group */ - bne,pt %xcc, 1b /* CTI */ - stb %g5, [%o0] /* Store */ -2: andn %o2, 7, %g5 /* IEU0 Group */ - and %o2, 7, %o2 /* IEU1 */ - fmovd %f0, %f2 /* FPU */ - alignaddr %o1, %g0, %g1 /* GRU Group */ - ldd [%g1], %f4 /* Load Group */ -1: ldd [%g1 - 8], %f6 /* Load Group */ - sub %g1, 8, %g1 /* IEU0 Group */ - subcc %g5, 8, %g5 /* IEU1 */ - faligndata %f6, %f4, %f0 /* GRU Group */ - std %f0, [%o0 - 8] /* Store */ - sub %o1, 8, %o1 /* IEU0 Group */ - be,pn %xcc, 233f /* CTI */ - sub %o0, 8, %o0 /* IEU1 */ - ldd [%g1 - 8], %f4 /* Load Group */ - sub %g1, 8, %g1 /* IEU0 */ - subcc %g5, 8, %g5 /* IEU1 */ - faligndata %f4, %f6, %f0 /* GRU Group */ - std %f0, [%o0 - 8] /* Store */ - sub %o1, 8, %o1 /* IEU0 */ - bne,pn %xcc, 1b /* CTI Group */ - sub %o0, 8, %o0 /* IEU0 */ -233: brz,pn %o2, 234f /* CTI Group */ - nop /* IEU0 */ -237: ldub [%o1 - 1], %g5 /* LOAD */ - sub %o1, 1, %o1 /* IEU0 */ - sub %o0, 1, %o0 /* IEU1 */ - subcc %o2, 1, %o2 /* IEU1 */ - bne,pt %xcc, 237b /* CTI */ - stb %g5, [%o0] /* Store Group */ -234: wr %g0, FPRS_FEF, %fprs - retl - mov %g4, %o0 -END(memmove) - -#ifdef USE_BPR -weak_alias (memcpy, __align_cpy_1) -weak_alias (memcpy, __align_cpy_2) -#endif libc_hidden_builtin_def (memcpy) -libc_hidden_builtin_def (memmove) diff --git a/sysdeps/sparc/sparc64/memmove.c b/sysdeps/sparc/sparc64/memmove.c deleted file mode 100644 index a8d2d49948..0000000000 --- a/sysdeps/sparc/sparc64/memmove.c +++ /dev/null @@ -1 +0,0 @@ -/* memmove is in memcpy.S */ diff --git a/sysdeps/sparc/sparc64/multiarch/Makefile b/sysdeps/sparc/sparc64/multiarch/Makefile new file mode 100644 index 0000000000..4d45042a95 --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/Makefile @@ -0,0 +1,4 @@ +ifeq ($(subdir),string) +sysdep_routines += memcpy-ultra3 memcpy-niagara1 memcpy-niagara2 \ + memset-niagara1 +endif diff --git a/sysdeps/sparc/sparc64/sparcv9v/memcpy.S b/sysdeps/sparc/sparc64/multiarch/memcpy-niagara1.S index ad2b0f742c..6a78295e81 100644 --- a/sysdeps/sparc/sparc64/sparcv9v/memcpy.S +++ b/sysdeps/sparc/sparc64/multiarch/memcpy-niagara1.S @@ -36,34 +36,19 @@ #define XCC xcc #endif +#if !defined NOT_IN_libc + .register %g2,#scratch .register %g3,#scratch .register %g6,#scratch .text - .align 32 - -ENTRY(bcopy) - sub %o1, %o0, %o4 - mov %o0, %g4 - cmp %o4, %o2 - mov %o1, %o0 - bgeu,pt %XCC, 100f - mov %g4, %o1 -#ifndef USE_BPR - srl %o2, 0, %o2 -#endif - brnz,pn %o2, 220f - add %o0, %o2, %o0 - retl - nop -END(bcopy) .align 32 -ENTRY(memcpy) -#ifndef USE_BPR +ENTRY(__memcpy_niagara1) +# ifndef USE_BPR srl %o2, 0, %o2 -#endif +# endif 100: /* %o0=dst, %o1=src, %o2=len */ mov %o0, %g5 cmp %o2, 0 @@ -352,245 +337,6 @@ ENTRY(memcpy) retl mov %g5, %o0 -END(memcpy) - -#define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldx [%src - offset - 0x20], %t0; \ - ldx [%src - offset - 0x18], %t1; \ - ldx [%src - offset - 0x10], %t2; \ - ldx [%src - offset - 0x08], %t3; \ - stw %t0, [%dst - offset - 0x1c]; \ - srlx %t0, 32, %t0; \ - stw %t0, [%dst - offset - 0x20]; \ - stw %t1, [%dst - offset - 0x14]; \ - srlx %t1, 32, %t1; \ - stw %t1, [%dst - offset - 0x18]; \ - stw %t2, [%dst - offset - 0x0c]; \ - srlx %t2, 32, %t2; \ - stw %t2, [%dst - offset - 0x10]; \ - stw %t3, [%dst - offset - 0x04]; \ - srlx %t3, 32, %t3; \ - stw %t3, [%dst - offset - 0x08]; - -#define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldx [%src - offset - 0x20], %t0; \ - ldx [%src - offset - 0x18], %t1; \ - ldx [%src - offset - 0x10], %t2; \ - ldx [%src - offset - 0x08], %t3; \ - stx %t0, [%dst - offset - 0x20]; \ - stx %t1, [%dst - offset - 0x18]; \ - stx %t2, [%dst - offset - 0x10]; \ - stx %t3, [%dst - offset - 0x08]; \ - ldx [%src - offset - 0x40], %t0; \ - ldx [%src - offset - 0x38], %t1; \ - ldx [%src - offset - 0x30], %t2; \ - ldx [%src - offset - 0x28], %t3; \ - stx %t0, [%dst - offset - 0x40]; \ - stx %t1, [%dst - offset - 0x38]; \ - stx %t2, [%dst - offset - 0x30]; \ - stx %t3, [%dst - offset - 0x28]; - -#define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldx [%src + offset + 0x00], %t0; \ - ldx [%src + offset + 0x08], %t1; \ - stw %t0, [%dst + offset + 0x04]; \ - srlx %t0, 32, %t2; \ - stw %t2, [%dst + offset + 0x00]; \ - stw %t1, [%dst + offset + 0x0c]; \ - srlx %t1, 32, %t3; \ - stw %t3, [%dst + offset + 0x08]; - -#define RMOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1) \ - ldx [%src + offset + 0x00], %t0; \ - ldx [%src + offset + 0x08], %t1; \ - stx %t0, [%dst + offset + 0x00]; \ - stx %t1, [%dst + offset + 0x08]; +END(__memcpy_niagara1) - .align 32 -228: andcc %o2, 1, %g0 - be,pt %icc, 2f+4 -1: ldub [%o1 - 1], %o5 - sub %o1, 1, %o1 - sub %o0, 1, %o0 - subcc %o2, 1, %o2 - be,pn %xcc, 229f - stb %o5, [%o0] -2: ldub [%o1 - 1], %o5 - sub %o0, 2, %o0 - ldub [%o1 - 2], %g5 - sub %o1, 2, %o1 - subcc %o2, 2, %o2 - stb %o5, [%o0 + 1] - bne,pt %xcc, 2b - stb %g5, [%o0] -229: retl - mov %g4, %o0 -out: retl - mov %g5, %o0 - - .align 32 -ENTRY(memmove) - mov %o0, %g5 -#ifndef USE_BPR - srl %o2, 0, %o2 -#endif - brz,pn %o2, out - sub %o0, %o1, %o4 - cmp %o4, %o2 - bgeu,pt %XCC, 218b - mov %o0, %g4 - add %o0, %o2, %o0 -220: add %o1, %o2, %o1 - cmp %o2, 15 - bleu,pn %xcc, 228b - andcc %o0, 7, %g2 - sub %o0, %o1, %g5 - andcc %g5, 3, %o5 - bne,pn %xcc, 232f - andcc %o1, 3, %g0 - be,a,pt %xcc, 236f - andcc %o1, 4, %g0 - andcc %o1, 1, %g0 - be,pn %xcc, 4f - andcc %o1, 2, %g0 - ldub [%o1 - 1], %g2 - sub %o1, 1, %o1 - sub %o0, 1, %o0 - sub %o2, 1, %o2 - be,pn %xcc, 5f - stb %g2, [%o0] -4: lduh [%o1 - 2], %g2 - sub %o1, 2, %o1 - sub %o0, 2, %o0 - sub %o2, 2, %o2 - sth %g2, [%o0] -5: andcc %o1, 4, %g0 -236: be,a,pn %xcc, 2f - andcc %o2, -128, %g6 - lduw [%o1 - 4], %g5 - sub %o1, 4, %o1 - sub %o0, 4, %o0 - sub %o2, 4, %o2 - stw %g5, [%o0] - andcc %o2, -128, %g6 -2: be,pn %xcc, 235f - andcc %o0, 4, %g0 - be,pn %xcc, 282f + 4 -5: RMOVE_BIGCHUNK(o1, o0, 0x00, g1, g3, g5, o5) - RMOVE_BIGCHUNK(o1, o0, 0x20, g1, g3, g5, o5) - RMOVE_BIGCHUNK(o1, o0, 0x40, g1, g3, g5, o5) - RMOVE_BIGCHUNK(o1, o0, 0x60, g1, g3, g5, o5) - subcc %g6, 128, %g6 - sub %o1, 128, %o1 - bne,pt %xcc, 5b - sub %o0, 128, %o0 -235: andcc %o2, 0x70, %g6 -41: be,pn %xcc, 280f - andcc %o2, 8, %g0 - -279: rd %pc, %o5 - sll %g6, 1, %g5 - sub %o1, %g6, %o1 - sub %o5, %g5, %o5 - jmpl %o5 + %lo(280f - 279b), %g0 - sub %o0, %g6, %o0 - RMOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g5, o5) -280: be,pt %xcc, 281f - andcc %o2, 4, %g0 - ldx [%o1 - 8], %g2 - sub %o0, 8, %o0 - stw %g2, [%o0 + 4] - sub %o1, 8, %o1 - srlx %g2, 32, %g2 - stw %g2, [%o0] -281: be,pt %xcc, 1f - andcc %o2, 2, %g0 - lduw [%o1 - 4], %g2 - sub %o1, 4, %o1 - stw %g2, [%o0 - 4] - sub %o0, 4, %o0 -1: be,pt %xcc, 1f - andcc %o2, 1, %g0 - lduh [%o1 - 2], %g2 - sub %o1, 2, %o1 - sth %g2, [%o0 - 2] - sub %o0, 2, %o0 -1: be,pt %xcc, 211f - nop - ldub [%o1 - 1], %g2 - stb %g2, [%o0 - 1] -211: retl - mov %g4, %o0 - -282: RMOVE_BIGALIGNCHUNK(o1, o0, 0x00, g1, g3, g5, o5) - RMOVE_BIGALIGNCHUNK(o1, o0, 0x40, g1, g3, g5, o5) - subcc %g6, 128, %g6 - sub %o1, 128, %o1 - bne,pt %xcc, 282b - sub %o0, 128, %o0 - andcc %o2, 0x70, %g6 - be,pn %xcc, 284f - andcc %o2, 8, %g0 - -283: rd %pc, %o5 - sub %o1, %g6, %o1 - sub %o5, %g6, %o5 - jmpl %o5 + %lo(284f - 283b), %g0 - sub %o0, %g6, %o0 - RMOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3) -284: be,pt %xcc, 285f - andcc %o2, 4, %g0 - ldx [%o1 - 8], %g2 - sub %o0, 8, %o0 - sub %o1, 8, %o1 - stx %g2, [%o0] -285: be,pt %xcc, 1f - andcc %o2, 2, %g0 - lduw [%o1 - 4], %g2 - sub %o0, 4, %o0 - sub %o1, 4, %o1 - stw %g2, [%o0] -1: be,pt %xcc, 1f - andcc %o2, 1, %g0 - lduh [%o1 - 2], %g2 - sub %o0, 2, %o0 - sub %o1, 2, %o1 - sth %g2, [%o0] -1: be,pt %xcc, 1f - nop - ldub [%o1 - 1], %g2 - stb %g2, [%o0 - 1] -1: retl - mov %g4, %o0 - -232: ldub [%o1 - 1], %g5 - sub %o1, 1, %o1 - sub %o0, 1, %o0 - subcc %o2, 1, %o2 - bne,pt %xcc, 232b - stb %g5, [%o0] -234: retl - mov %g4, %o0 -END(memmove) - -#ifdef USE_BPR -weak_alias (memcpy, __align_cpy_1) -weak_alias (memcpy, __align_cpy_2) -weak_alias (memcpy, __align_cpy_4) -weak_alias (memcpy, __align_cpy_8) -weak_alias (memcpy, __align_cpy_16) #endif -libc_hidden_builtin_def (memcpy) -libc_hidden_builtin_def (memmove) diff --git a/sysdeps/sparc/sparc64/sparcv9v2/memcpy.S b/sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S index b261f461a4..35f6989aca 100644 --- a/sysdeps/sparc/sparc64/sparcv9v2/memcpy.S +++ b/sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S @@ -138,34 +138,19 @@ LOAD(ldd, base + 0x28, %x5); \ LOAD(ldd, base + 0x30, %x6); +#if !defined NOT_IN_libc + .register %g2,#scratch .register %g3,#scratch .register %g6,#scratch .text - .align 32 - -ENTRY(bcopy) - sub %o1, %o0, %o4 - mov %o0, %g4 - cmp %o4, %o2 - mov %o1, %o0 - bgeu,pt %XCC, 100f - mov %g4, %o1 -#ifndef USE_BPR - srl %o2, 0, %o2 -#endif - brnz,pn %o2, 220f - add %o0, %o2, %o0 - retl - nop -END(bcopy) .align 32 -ENTRY(memcpy) -#ifndef USE_BPR +ENTRY(__memcpy_niagara2) +# ifndef USE_BPR srl %o2, 0, %o2 -#endif +# endif 100: /* %o0=dst, %o1=src, %o2=len */ mov %o0, %g5 cmp %o2, 0 @@ -502,245 +487,6 @@ ENTRY(memcpy) retl mov %g5, %o0 -END(memcpy) - -#define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldx [%src - offset - 0x20], %t0; \ - ldx [%src - offset - 0x18], %t1; \ - ldx [%src - offset - 0x10], %t2; \ - ldx [%src - offset - 0x08], %t3; \ - stw %t0, [%dst - offset - 0x1c]; \ - srlx %t0, 32, %t0; \ - stw %t0, [%dst - offset - 0x20]; \ - stw %t1, [%dst - offset - 0x14]; \ - srlx %t1, 32, %t1; \ - stw %t1, [%dst - offset - 0x18]; \ - stw %t2, [%dst - offset - 0x0c]; \ - srlx %t2, 32, %t2; \ - stw %t2, [%dst - offset - 0x10]; \ - stw %t3, [%dst - offset - 0x04]; \ - srlx %t3, 32, %t3; \ - stw %t3, [%dst - offset - 0x08]; - -#define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldx [%src - offset - 0x20], %t0; \ - ldx [%src - offset - 0x18], %t1; \ - ldx [%src - offset - 0x10], %t2; \ - ldx [%src - offset - 0x08], %t3; \ - stx %t0, [%dst - offset - 0x20]; \ - stx %t1, [%dst - offset - 0x18]; \ - stx %t2, [%dst - offset - 0x10]; \ - stx %t3, [%dst - offset - 0x08]; \ - ldx [%src - offset - 0x40], %t0; \ - ldx [%src - offset - 0x38], %t1; \ - ldx [%src - offset - 0x30], %t2; \ - ldx [%src - offset - 0x28], %t3; \ - stx %t0, [%dst - offset - 0x40]; \ - stx %t1, [%dst - offset - 0x38]; \ - stx %t2, [%dst - offset - 0x30]; \ - stx %t3, [%dst - offset - 0x28]; - -#define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldx [%src + offset + 0x00], %t0; \ - ldx [%src + offset + 0x08], %t1; \ - stw %t0, [%dst + offset + 0x04]; \ - srlx %t0, 32, %t2; \ - stw %t2, [%dst + offset + 0x00]; \ - stw %t1, [%dst + offset + 0x0c]; \ - srlx %t1, 32, %t3; \ - stw %t3, [%dst + offset + 0x08]; - -#define RMOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1) \ - ldx [%src + offset + 0x00], %t0; \ - ldx [%src + offset + 0x08], %t1; \ - stx %t0, [%dst + offset + 0x00]; \ - stx %t1, [%dst + offset + 0x08]; +END(__memcpy_niagara2) - .align 32 -228: andcc %o2, 1, %g0 - be,pt %icc, 2f+4 -1: ldub [%o1 - 1], %o5 - sub %o1, 1, %o1 - sub %o0, 1, %o0 - subcc %o2, 1, %o2 - be,pn %XCC, 229f - stb %o5, [%o0] -2: ldub [%o1 - 1], %o5 - sub %o0, 2, %o0 - ldub [%o1 - 2], %g5 - sub %o1, 2, %o1 - subcc %o2, 2, %o2 - stb %o5, [%o0 + 1] - bne,pt %XCC, 2b - stb %g5, [%o0] -229: retl - mov %g4, %o0 -out: retl - mov %g5, %o0 - - .align 32 -ENTRY(memmove) - mov %o0, %g5 -#ifndef USE_BPR - srl %o2, 0, %o2 -#endif - brz,pn %o2, out - sub %o0, %o1, %o4 - cmp %o4, %o2 - bgeu,pt %XCC, 218b - mov %o0, %g4 - add %o0, %o2, %o0 -220: add %o1, %o2, %o1 - cmp %o2, 15 - bleu,pn %XCC, 228b - andcc %o0, 7, %g2 - sub %o0, %o1, %g5 - andcc %g5, 3, %o5 - bne,pn %XCC, 232f - andcc %o1, 3, %g0 - be,a,pt %XCC, 236f - andcc %o1, 4, %g0 - andcc %o1, 1, %g0 - be,pn %XCC, 4f - andcc %o1, 2, %g0 - ldub [%o1 - 1], %g2 - sub %o1, 1, %o1 - sub %o0, 1, %o0 - sub %o2, 1, %o2 - be,pn %XCC, 5f - stb %g2, [%o0] -4: lduh [%o1 - 2], %g2 - sub %o1, 2, %o1 - sub %o0, 2, %o0 - sub %o2, 2, %o2 - sth %g2, [%o0] -5: andcc %o1, 4, %g0 -236: be,a,pn %XCC, 2f - andcc %o2, -128, %g6 - lduw [%o1 - 4], %g5 - sub %o1, 4, %o1 - sub %o0, 4, %o0 - sub %o2, 4, %o2 - stw %g5, [%o0] - andcc %o2, -128, %g6 -2: be,pn %XCC, 235f - andcc %o0, 4, %g0 - be,pn %XCC, 282f + 4 -5: RMOVE_BIGCHUNK(o1, o0, 0x00, g1, g3, g5, o5) - RMOVE_BIGCHUNK(o1, o0, 0x20, g1, g3, g5, o5) - RMOVE_BIGCHUNK(o1, o0, 0x40, g1, g3, g5, o5) - RMOVE_BIGCHUNK(o1, o0, 0x60, g1, g3, g5, o5) - subcc %g6, 128, %g6 - sub %o1, 128, %o1 - bne,pt %XCC, 5b - sub %o0, 128, %o0 -235: andcc %o2, 0x70, %g6 -41: be,pn %XCC, 280f - andcc %o2, 8, %g0 - -279: rd %pc, %o5 - sll %g6, 1, %g5 - sub %o1, %g6, %o1 - sub %o5, %g5, %o5 - jmpl %o5 + %lo(280f - 279b), %g0 - sub %o0, %g6, %o0 - RMOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g5, o5) -280: be,pt %XCC, 281f - andcc %o2, 4, %g0 - ldx [%o1 - 8], %g2 - sub %o0, 8, %o0 - stw %g2, [%o0 + 4] - sub %o1, 8, %o1 - srlx %g2, 32, %g2 - stw %g2, [%o0] -281: be,pt %XCC, 1f - andcc %o2, 2, %g0 - lduw [%o1 - 4], %g2 - sub %o1, 4, %o1 - stw %g2, [%o0 - 4] - sub %o0, 4, %o0 -1: be,pt %XCC, 1f - andcc %o2, 1, %g0 - lduh [%o1 - 2], %g2 - sub %o1, 2, %o1 - sth %g2, [%o0 - 2] - sub %o0, 2, %o0 -1: be,pt %XCC, 211f - nop - ldub [%o1 - 1], %g2 - stb %g2, [%o0 - 1] -211: retl - mov %g4, %o0 - -282: RMOVE_BIGALIGNCHUNK(o1, o0, 0x00, g1, g3, g5, o5) - RMOVE_BIGALIGNCHUNK(o1, o0, 0x40, g1, g3, g5, o5) - subcc %g6, 128, %g6 - sub %o1, 128, %o1 - bne,pt %XCC, 282b - sub %o0, 128, %o0 - andcc %o2, 0x70, %g6 - be,pn %XCC, 284f - andcc %o2, 8, %g0 - -283: rd %pc, %o5 - sub %o1, %g6, %o1 - sub %o5, %g6, %o5 - jmpl %o5 + %lo(284f - 283b), %g0 - sub %o0, %g6, %o0 - RMOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3) -284: be,pt %XCC, 285f - andcc %o2, 4, %g0 - ldx [%o1 - 8], %g2 - sub %o0, 8, %o0 - sub %o1, 8, %o1 - stx %g2, [%o0] -285: be,pt %XCC, 1f - andcc %o2, 2, %g0 - lduw [%o1 - 4], %g2 - sub %o0, 4, %o0 - sub %o1, 4, %o1 - stw %g2, [%o0] -1: be,pt %XCC, 1f - andcc %o2, 1, %g0 - lduh [%o1 - 2], %g2 - sub %o0, 2, %o0 - sub %o1, 2, %o1 - sth %g2, [%o0] -1: be,pt %XCC, 1f - nop - ldub [%o1 - 1], %g2 - stb %g2, [%o0 - 1] -1: retl - mov %g4, %o0 - -232: ldub [%o1 - 1], %g5 - sub %o1, 1, %o1 - sub %o0, 1, %o0 - subcc %o2, 1, %o2 - bne,pt %XCC, 232b - stb %g5, [%o0] -234: retl - mov %g4, %o0 -END(memmove) - -#ifdef USE_BPR -weak_alias (memcpy, __align_cpy_1) -weak_alias (memcpy, __align_cpy_2) -weak_alias (memcpy, __align_cpy_4) -weak_alias (memcpy, __align_cpy_8) -weak_alias (memcpy, __align_cpy_16) #endif -libc_hidden_builtin_def (memcpy) -libc_hidden_builtin_def (memmove) diff --git a/sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S b/sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S new file mode 100644 index 0000000000..34ca089f93 --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S @@ -0,0 +1,320 @@ +/* Copy SIZE bytes from SRC to DEST. + For UltraSPARC-III. + Copyright (C) 2001, 2003 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller (davem@redhat.com) + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sysdep.h> + +#define ASI_BLK_P 0xf0 +#define FPRS_FEF 0x04 +#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs +#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs + +#ifndef XCC +#define USE_BPR +#define XCC xcc +#endif + +#if !defined NOT_IN_libc + + .register %g2,#scratch + .register %g3,#scratch + .register %g6,#scratch + + .text + + /* Special/non-trivial issues of this code: + * + * 1) %o5 is preserved from VISEntryHalf to VISExitHalf + * 2) Only low 32 FPU registers are used so that only the + * lower half of the FPU register set is dirtied by this + * code. This is especially important in the kernel. + * 3) This code never prefetches cachelines past the end + * of the source buffer. + * + * The cheetah's flexible spine, oversized liver, enlarged heart, + * slender muscular body, and claws make it the swiftest hunter + * in Africa and the fastest animal on land. Can reach speeds + * of up to 2.4GB per second. + */ + .align 32 +ENTRY(__memcpy_ultra3) + +100: /* %o0=dst, %o1=src, %o2=len */ + mov %o0, %g5 + cmp %o2, 0 + be,pn %XCC, out +218: or %o0, %o1, %o3 + cmp %o2, 16 + bleu,a,pn %XCC, small_copy + or %o3, %o2, %o3 + + cmp %o2, 256 + blu,pt %XCC, medium_copy + andcc %o3, 0x7, %g0 + + ba,pt %xcc, enter + andcc %o0, 0x3f, %g2 + + /* Here len >= 256 and condition codes reflect execution + * of "andcc %o0, 0x7, %g2", done by caller. + */ + .align 64 +enter: + /* Is 'dst' already aligned on an 64-byte boundary? */ + be,pt %XCC, 2f + + /* Compute abs((dst & 0x3f) - 0x40) into %g2. This is the number + * of bytes to copy to make 'dst' 64-byte aligned. We pre- + * subtract this from 'len'. + */ + sub %g2, 0x40, %g2 + sub %g0, %g2, %g2 + sub %o2, %g2, %o2 + + /* Copy %g2 bytes from src to dst, one byte at a time. */ +1: ldub [%o1 + 0x00], %o3 + add %o1, 0x1, %o1 + add %o0, 0x1, %o0 + subcc %g2, 0x1, %g2 + + bg,pt %XCC, 1b + stb %o3, [%o0 + -1] + +2: VISEntryHalf + and %o1, 0x7, %g1 + ba,pt %xcc, begin + alignaddr %o1, %g0, %o1 + + .align 64 +begin: + prefetch [%o1 + 0x000], #one_read + prefetch [%o1 + 0x040], #one_read + andn %o2, (0x40 - 1), %o4 + prefetch [%o1 + 0x080], #one_read + prefetch [%o1 + 0x0c0], #one_read + ldd [%o1 + 0x000], %f0 + prefetch [%o1 + 0x100], #one_read + ldd [%o1 + 0x008], %f2 + prefetch [%o1 + 0x140], #one_read + ldd [%o1 + 0x010], %f4 + prefetch [%o1 + 0x180], #one_read + faligndata %f0, %f2, %f16 + ldd [%o1 + 0x018], %f6 + faligndata %f2, %f4, %f18 + ldd [%o1 + 0x020], %f8 + faligndata %f4, %f6, %f20 + ldd [%o1 + 0x028], %f10 + faligndata %f6, %f8, %f22 + + ldd [%o1 + 0x030], %f12 + faligndata %f8, %f10, %f24 + ldd [%o1 + 0x038], %f14 + faligndata %f10, %f12, %f26 + ldd [%o1 + 0x040], %f0 + + sub %o4, 0x80, %o4 + add %o1, 0x40, %o1 + ba,pt %xcc, loop + srl %o4, 6, %o3 + + .align 64 +loop: + ldd [%o1 + 0x008], %f2 + faligndata %f12, %f14, %f28 + ldd [%o1 + 0x010], %f4 + faligndata %f14, %f0, %f30 + stda %f16, [%o0] ASI_BLK_P + ldd [%o1 + 0x018], %f6 + faligndata %f0, %f2, %f16 + + ldd [%o1 + 0x020], %f8 + faligndata %f2, %f4, %f18 + ldd [%o1 + 0x028], %f10 + faligndata %f4, %f6, %f20 + ldd [%o1 + 0x030], %f12 + faligndata %f6, %f8, %f22 + ldd [%o1 + 0x038], %f14 + faligndata %f8, %f10, %f24 + + ldd [%o1 + 0x040], %f0 + prefetch [%o1 + 0x180], #one_read + faligndata %f10, %f12, %f26 + subcc %o3, 0x01, %o3 + add %o1, 0x40, %o1 + bg,pt %XCC, loop + add %o0, 0x40, %o0 + + /* Finally we copy the last full 64-byte block. */ +loopfini: + ldd [%o1 + 0x008], %f2 + faligndata %f12, %f14, %f28 + ldd [%o1 + 0x010], %f4 + faligndata %f14, %f0, %f30 + stda %f16, [%o0] ASI_BLK_P + ldd [%o1 + 0x018], %f6 + faligndata %f0, %f2, %f16 + ldd [%o1 + 0x020], %f8 + faligndata %f2, %f4, %f18 + ldd [%o1 + 0x028], %f10 + faligndata %f4, %f6, %f20 + ldd [%o1 + 0x030], %f12 + faligndata %f6, %f8, %f22 + ldd [%o1 + 0x038], %f14 + faligndata %f8, %f10, %f24 + cmp %g1, 0 + be,pt %XCC, 1f + add %o0, 0x40, %o0 + ldd [%o1 + 0x040], %f0 +1: faligndata %f10, %f12, %f26 + faligndata %f12, %f14, %f28 + faligndata %f14, %f0, %f30 + stda %f16, [%o0] ASI_BLK_P + add %o0, 0x40, %o0 + add %o1, 0x40, %o1 + membar #Sync + + /* Now we copy the (len modulo 64) bytes at the end. + * Note how we borrow the %f0 loaded above. + * + * Also notice how this code is careful not to perform a + * load past the end of the src buffer. + */ +loopend: + and %o2, 0x3f, %o2 + andcc %o2, 0x38, %g2 + be,pn %XCC, endcruft + subcc %g2, 0x8, %g2 + be,pn %XCC, endcruft + cmp %g1, 0 + + be,a,pt %XCC, 1f + ldd [%o1 + 0x00], %f0 + +1: ldd [%o1 + 0x08], %f2 + add %o1, 0x8, %o1 + sub %o2, 0x8, %o2 + subcc %g2, 0x8, %g2 + faligndata %f0, %f2, %f8 + std %f8, [%o0 + 0x00] + be,pn %XCC, endcruft + add %o0, 0x8, %o0 + ldd [%o1 + 0x08], %f0 + add %o1, 0x8, %o1 + sub %o2, 0x8, %o2 + subcc %g2, 0x8, %g2 + faligndata %f2, %f0, %f8 + std %f8, [%o0 + 0x00] + bne,pn %XCC, 1b + add %o0, 0x8, %o0 + + /* If anything is left, we copy it one byte at a time. + * Note that %g1 is (src & 0x3) saved above before the + * alignaddr was performed. + */ +endcruft: + cmp %o2, 0 + add %o1, %g1, %o1 + VISExitHalf + be,pn %XCC, out + sub %o0, %o1, %o3 + + andcc %g1, 0x7, %g0 + bne,pn %icc, small_copy_unaligned + andcc %o2, 0x8, %g0 + be,pt %icc, 1f + nop + ldx [%o1], %o5 + stx %o5, [%o1 + %o3] + add %o1, 0x8, %o1 + +1: andcc %o2, 0x4, %g0 + be,pt %icc, 1f + nop + lduw [%o1], %o5 + stw %o5, [%o1 + %o3] + add %o1, 0x4, %o1 + +1: andcc %o2, 0x2, %g0 + be,pt %icc, 1f + nop + lduh [%o1], %o5 + sth %o5, [%o1 + %o3] + add %o1, 0x2, %o1 + +1: andcc %o2, 0x1, %g0 + be,pt %icc, out + nop + ldub [%o1], %o5 + ba,pt %xcc, out + stb %o5, [%o1 + %o3] + +medium_copy: /* 16 < len <= 64 */ + bne,pn %XCC, small_copy_unaligned + sub %o0, %o1, %o3 + +medium_copy_aligned: + andn %o2, 0x7, %o4 + and %o2, 0x7, %o2 +1: subcc %o4, 0x8, %o4 + ldx [%o1], %o5 + stx %o5, [%o1 + %o3] + bgu,pt %XCC, 1b + add %o1, 0x8, %o1 + andcc %o2, 0x4, %g0 + be,pt %XCC, 1f + nop + sub %o2, 0x4, %o2 + lduw [%o1], %o5 + stw %o5, [%o1 + %o3] + add %o1, 0x4, %o1 +1: cmp %o2, 0 + be,pt %XCC, out + nop + ba,pt %xcc, small_copy_unaligned + nop + +small_copy: /* 0 < len <= 16 */ + andcc %o3, 0x3, %g0 + bne,pn %XCC, small_copy_unaligned + sub %o0, %o1, %o3 + +small_copy_aligned: + subcc %o2, 4, %o2 + lduw [%o1], %g1 + stw %g1, [%o1 + %o3] + bgu,pt %XCC, small_copy_aligned + add %o1, 4, %o1 + +out: retl + mov %g5, %o0 + + .align 32 +small_copy_unaligned: + subcc %o2, 1, %o2 + ldub [%o1], %g1 + stb %g1, [%o1 + %o3] + bgu,pt %XCC, small_copy_unaligned + add %o1, 1, %o1 + retl + mov %g5, %o0 + +END(__memcpy_ultra3) + +#endif
\ No newline at end of file diff --git a/sysdeps/sparc/sparc64/multiarch/memcpy.S b/sysdeps/sparc/sparc64/multiarch/memcpy.S new file mode 100644 index 0000000000..a708de10e2 --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/memcpy.S @@ -0,0 +1,107 @@ +/* Multiple versions of memcpy + Copyright (C) 2010 Free Software Foundation, Inc. + Contributed by David S. Miller (davem@davemloft.net) + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sysdep.h> +#include <rtld-global-offsets.h> + +#if !defined NOT_IN_libc + .text +ENTRY(memcpy) + .type memcpy, @gnu_indirect_function +# ifdef SHARED + mov %o7, %o5 + sethi %hi(_GLOBAL_OFFSET_TABLE_-4), %o3 + call 1f + or %o3, %lo(_GLOBAL_OFFSET_TABLE_+4), %o3 +1: add %o7, %o3, %o3 + mov %o5, %o7 + sethi %hi(_rtld_global_ro), %o2 + or %o2, %lo(_rtld_global_ro), %o2 +# ifdef __arch64__ + ldx [%o3 + %o2], %o2 + ldx [%o2 + RTLD_GLOBAL_RO_DL_HWCAP_OFFSET], %o2 +# else + ld [%o3 + %o2], %o2 + ld [%o2 + RTLD_GLOBAL_RO_DL_HWCAP_OFFSET + 4], %o2 +# endif +# else + set _dl_hwcap, %o3 +# ifdef __arch64__ + ldx [%o3], %o2 +# else + ld [%o3 + 4], %o2 +# endif +# endif + andcc %o2, 0x80, %g0 ! HWCAP_SPARC_N2 + be 1f + andcc %o2, 0x40, %g0 ! HWCAP_SPARC_BLKINIT +# ifdef SHARED + sethi %gdop_hix22(__memcpy_niagara2), %o1 + xor %o1, %gdop_lox10(__memcpy_niagara2), %o1 +# else + set __memcpy_niagara2, %o1 +# endif + ba 10f + nop +1: be 1f + andcc %o2, 0x20, %g0 ! HWCAP_SPARC_ULTRA3 +# ifdef SHARED + sethi %gdop_hix22(__memcpy_niagara1), %o1 + xor %o1, %gdop_lox10(__memcpy_niagara1), %o1 +# else + set __memcpy_niagara1, %o1 +# endif + ba 10f + nop +1: be 9f + nop +# ifdef SHARED + sethi %gdop_hix22(__memcpy_ultra3), %o1 + xor %o1, %gdop_lox10(__memcpy_ultra3), %o1 +# else + set __memcpy_ultra3, %o1 +# endif + ba 10f + nop +9: +# ifdef SHARED + sethi %gdop_hix22(__memcpy_ultra1), %o1 + xor %o1, %gdop_lox10(__memcpy_ultra1), %o1 +# else + set __memcpy_ultra1, %o1 +# endif +10: +# ifdef SHARED + add %o3, %o1, %o1 +# endif + retl + mov %o1, %o0 +END(memcpy) + +# undef libc_hidden_builtin_def +/* IFUNC doesn't work with the hidden functions in a shared library. */ +# define libc_hidden_builtin_def(name) \ + .globl __GI_memcpy; __GI_memcpy = __memcpy_ultra1 + +#define memcpy __memcpy_ultra1 + +#endif + +#include "../memcpy.S" diff --git a/sysdeps/sparc/sparc64/sparcv9v/memset.S b/sysdeps/sparc/sparc64/multiarch/memset-niagara1.S index 64817b8871..20ea056216 100644 --- a/sysdeps/sparc/sparc64/sparcv9v/memset.S +++ b/sysdeps/sparc/sparc64/multiarch/memset-niagara1.S @@ -29,12 +29,14 @@ #define XCC xcc #endif +#if !defined NOT_IN_libc + .register %g2,#scratch .text .align 32 -ENTRY(memset) +ENTRY(__memset_niagara1) /* %o0=buf, %o1=pat, %o2=len */ and %o1, 0xff, %o3 mov %o2, %o1 @@ -45,14 +47,14 @@ ENTRY(memset) sllx %o2, 32, %g1 ba,pt %XCC, 1f or %g1, %o2, %o2 -END(memset) +END(__memset_niagara1) -ENTRY(__bzero) +ENTRY(__bzero_niagara1) clr %o2 1: -#ifndef USE_BRP +# ifndef USE_BRP srl %o1, 0, %o1 -#endif +# endif brz,pn %o1, 90f mov %o0, %o3 @@ -125,7 +127,6 @@ ENTRY(__bzero) 90: retl mov %o3, %o0 -END(__bzero) +END(__bzero_niagara1) -libc_hidden_builtin_def (memset) -weak_alias (__bzero, bzero) +#endif diff --git a/sysdeps/sparc/sparc64/multiarch/memset.S b/sysdeps/sparc/sparc64/multiarch/memset.S new file mode 100644 index 0000000000..23e513f18f --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/memset.S @@ -0,0 +1,145 @@ +/* Multiple versions of memset and bzero + Copyright (C) 2010 Free Software Foundation, Inc. + Contributed by David S. Miller (davem@davemloft.net) + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include <sysdep.h> +#include <rtld-global-offsets.h> + +#if !defined NOT_IN_libc + .text +ENTRY(memset) + .type memset, @gnu_indirect_function +# ifdef SHARED + mov %o7, %o5 + sethi %hi(_GLOBAL_OFFSET_TABLE_-4), %o3 + call 1f + or %o3, %lo(_GLOBAL_OFFSET_TABLE_+4), %o3 +1: add %o7, %o3, %o3 + mov %o5, %o7 + sethi %hi(_rtld_global_ro), %o2 + or %o2, %lo(_rtld_global_ro), %o2 +# ifdef __arch64__ + ldx [%o3 + %o2], %o2 + ldx [%o2 + RTLD_GLOBAL_RO_DL_HWCAP_OFFSET], %o2 +# else + ld [%o3 + %o2], %o2 + ld [%o2 + RTLD_GLOBAL_RO_DL_HWCAP_OFFSET + 4], %o2 +# endif +# else + set _dl_hwcap, %o3 +# ifdef __arch64__ + ldx [%o3], %o2 +# else + ld [%o3 + 4], %o2 +# endif +# endif + andcc %o2, 0x40, %g0 ! HWCAP_SPARC_BLKINIT + be 9f + nop +# ifdef SHARED + sethi %gdop_hix22(__memset_niagara1), %o1 + xor %o1, %gdop_lox10(__memset_niagara1), %o1 +# else + set __memset_niagara1, %o1 +# endif + ba 10f + nop +9: +# ifdef SHARED + sethi %gdop_hix22(__memset_ultra1), %o1 + xor %o1, %gdop_lox10(__memset_ultra1), %o1 +# else + set __memset_ultra1, %o1 +# endif +10: +# ifdef SHARED + add %o3, %o1, %o1 +# endif + retl + mov %o1, %o0 +END(memset) + +ENTRY(__bzero) + .type bzero, @gnu_indirect_function +# ifdef SHARED + mov %o7, %o5 + sethi %hi(_GLOBAL_OFFSET_TABLE_-4), %o3 + call 1f + or %o3, %lo(_GLOBAL_OFFSET_TABLE_+4), %o3 +1: add %o7, %o3, %o3 + mov %o5, %o7 + sethi %hi(_rtld_global_ro), %o2 + or %o2, %lo(_rtld_global_ro), %o2 +# ifdef __arch64__ + ldx [%o3 + %o2], %o2 + ldx [%o2 + RTLD_GLOBAL_RO_DL_HWCAP_OFFSET], %o2 +# else + ld [%o3 + %o2], %o2 + ld [%o2 + RTLD_GLOBAL_RO_DL_HWCAP_OFFSET + 4], %o2 +# endif +# else + set _dl_hwcap, %o3 +# ifdef __arch64__ + ldx [%o3], %o2 +# else + ld [%o3 + 4], %o2 +# endif +# endif + andcc %o2, 0x40, %g0 ! HWCAP_SPARC_BLKINIT + be 9f + nop +# ifdef SHARED + sethi %gdop_hix22(__bzero_niagara1), %o1 + xor %o1, %gdop_lox10(__bzero_niagara1), %o1 +# else + set __bzero_niagara1, %o1 +# endif + ba 10f + nop +9: +# ifdef SHARED + sethi %gdop_hix22(__memset_ultra1), %o1 + xor %o1, %gdop_lox10(__memset_ultra1), %o1 +# else + set __bzero_ultra1, %o1 +# endif +10: +# ifdef SHARED + add %o3, %o1, %o1 +# endif + retl + mov %o1, %o0 +END(__bzero) + +weak_alias (__bzero, bzero) + +# undef weak_alias +# define weak_alias(a, b) + +# undef libc_hidden_builtin_def +/* IFUNC doesn't work with the hidden functions in a shared library. */ +# define libc_hidden_builtin_def(name) \ + .globl __GI_memset; __GI_memset = __memset_ultra1 + +#define memset __memset_ultra1 +#define __bzero __bzero_ultra1 + +#endif + +#include "../memset.S" diff --git a/sysdeps/sparc/sparc64/sparcv9b/memcpy.S b/sysdeps/sparc/sparc64/sparcv9b/memcpy.S deleted file mode 100644 index 760d526630..0000000000 --- a/sysdeps/sparc/sparc64/sparcv9b/memcpy.S +++ /dev/null @@ -1,610 +0,0 @@ -/* Copy SIZE bytes from SRC to DEST. - For UltraSPARC-III. - Copyright (C) 2001, 2003 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by David S. Miller (davem@redhat.com) - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -#include <sysdep.h> - -#define ASI_BLK_P 0xf0 -#define FPRS_FEF 0x04 -#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs -#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs - -#ifndef XCC -#define USE_BPR -#define XCC xcc -#endif - - .register %g2,#scratch - .register %g3,#scratch - .register %g6,#scratch - - .text - .align 32 - -ENTRY(bcopy) - sub %o1, %o0, %o4 - mov %o0, %g4 - cmp %o4, %o2 - mov %o1, %o0 - bgeu,pt %XCC, 100f - mov %g4, %o1 -#ifndef USE_BPR - srl %o2, 0, %o2 -#endif - brnz,pn %o2, 220f - add %o0, %o2, %o0 - retl - nop -END(bcopy) - - /* Special/non-trivial issues of this code: - * - * 1) %o5 is preserved from VISEntryHalf to VISExitHalf - * 2) Only low 32 FPU registers are used so that only the - * lower half of the FPU register set is dirtied by this - * code. This is especially important in the kernel. - * 3) This code never prefetches cachelines past the end - * of the source buffer. - * - * The cheetah's flexible spine, oversized liver, enlarged heart, - * slender muscular body, and claws make it the swiftest hunter - * in Africa and the fastest animal on land. Can reach speeds - * of up to 2.4GB per second. - */ - .align 32 -ENTRY(memcpy) - -100: /* %o0=dst, %o1=src, %o2=len */ - mov %o0, %g5 - cmp %o2, 0 - be,pn %XCC, out -218: or %o0, %o1, %o3 - cmp %o2, 16 - bleu,a,pn %XCC, small_copy - or %o3, %o2, %o3 - - cmp %o2, 256 - blu,pt %XCC, medium_copy - andcc %o3, 0x7, %g0 - - ba,pt %xcc, enter - andcc %o0, 0x3f, %g2 - - /* Here len >= 256 and condition codes reflect execution - * of "andcc %o0, 0x7, %g2", done by caller. - */ - .align 64 -enter: - /* Is 'dst' already aligned on an 64-byte boundary? */ - be,pt %XCC, 2f - - /* Compute abs((dst & 0x3f) - 0x40) into %g2. This is the number - * of bytes to copy to make 'dst' 64-byte aligned. We pre- - * subtract this from 'len'. - */ - sub %g2, 0x40, %g2 - sub %g0, %g2, %g2 - sub %o2, %g2, %o2 - - /* Copy %g2 bytes from src to dst, one byte at a time. */ -1: ldub [%o1 + 0x00], %o3 - add %o1, 0x1, %o1 - add %o0, 0x1, %o0 - subcc %g2, 0x1, %g2 - - bg,pt %XCC, 1b - stb %o3, [%o0 + -1] - -2: VISEntryHalf - and %o1, 0x7, %g1 - ba,pt %xcc, begin - alignaddr %o1, %g0, %o1 - - .align 64 -begin: - prefetch [%o1 + 0x000], #one_read - prefetch [%o1 + 0x040], #one_read - andn %o2, (0x40 - 1), %o4 - prefetch [%o1 + 0x080], #one_read - prefetch [%o1 + 0x0c0], #one_read - ldd [%o1 + 0x000], %f0 - prefetch [%o1 + 0x100], #one_read - ldd [%o1 + 0x008], %f2 - prefetch [%o1 + 0x140], #one_read - ldd [%o1 + 0x010], %f4 - prefetch [%o1 + 0x180], #one_read - faligndata %f0, %f2, %f16 - ldd [%o1 + 0x018], %f6 - faligndata %f2, %f4, %f18 - ldd [%o1 + 0x020], %f8 - faligndata %f4, %f6, %f20 - ldd [%o1 + 0x028], %f10 - faligndata %f6, %f8, %f22 - - ldd [%o1 + 0x030], %f12 - faligndata %f8, %f10, %f24 - ldd [%o1 + 0x038], %f14 - faligndata %f10, %f12, %f26 - ldd [%o1 + 0x040], %f0 - - sub %o4, 0x80, %o4 - add %o1, 0x40, %o1 - ba,pt %xcc, loop - srl %o4, 6, %o3 - - .align 64 -loop: - ldd [%o1 + 0x008], %f2 - faligndata %f12, %f14, %f28 - ldd [%o1 + 0x010], %f4 - faligndata %f14, %f0, %f30 - stda %f16, [%o0] ASI_BLK_P - ldd [%o1 + 0x018], %f6 - faligndata %f0, %f2, %f16 - - ldd [%o1 + 0x020], %f8 - faligndata %f2, %f4, %f18 - ldd [%o1 + 0x028], %f10 - faligndata %f4, %f6, %f20 - ldd [%o1 + 0x030], %f12 - faligndata %f6, %f8, %f22 - ldd [%o1 + 0x038], %f14 - faligndata %f8, %f10, %f24 - - ldd [%o1 + 0x040], %f0 - prefetch [%o1 + 0x180], #one_read - faligndata %f10, %f12, %f26 - subcc %o3, 0x01, %o3 - add %o1, 0x40, %o1 - bg,pt %XCC, loop - add %o0, 0x40, %o0 - - /* Finally we copy the last full 64-byte block. */ -loopfini: - ldd [%o1 + 0x008], %f2 - faligndata %f12, %f14, %f28 - ldd [%o1 + 0x010], %f4 - faligndata %f14, %f0, %f30 - stda %f16, [%o0] ASI_BLK_P - ldd [%o1 + 0x018], %f6 - faligndata %f0, %f2, %f16 - ldd [%o1 + 0x020], %f8 - faligndata %f2, %f4, %f18 - ldd [%o1 + 0x028], %f10 - faligndata %f4, %f6, %f20 - ldd [%o1 + 0x030], %f12 - faligndata %f6, %f8, %f22 - ldd [%o1 + 0x038], %f14 - faligndata %f8, %f10, %f24 - cmp %g1, 0 - be,pt %XCC, 1f - add %o0, 0x40, %o0 - ldd [%o1 + 0x040], %f0 -1: faligndata %f10, %f12, %f26 - faligndata %f12, %f14, %f28 - faligndata %f14, %f0, %f30 - stda %f16, [%o0] ASI_BLK_P - add %o0, 0x40, %o0 - add %o1, 0x40, %o1 - membar #Sync - - /* Now we copy the (len modulo 64) bytes at the end. - * Note how we borrow the %f0 loaded above. - * - * Also notice how this code is careful not to perform a - * load past the end of the src buffer. - */ -loopend: - and %o2, 0x3f, %o2 - andcc %o2, 0x38, %g2 - be,pn %XCC, endcruft - subcc %g2, 0x8, %g2 - be,pn %XCC, endcruft - cmp %g1, 0 - - be,a,pt %XCC, 1f - ldd [%o1 + 0x00], %f0 - -1: ldd [%o1 + 0x08], %f2 - add %o1, 0x8, %o1 - sub %o2, 0x8, %o2 - subcc %g2, 0x8, %g2 - faligndata %f0, %f2, %f8 - std %f8, [%o0 + 0x00] - be,pn %XCC, endcruft - add %o0, 0x8, %o0 - ldd [%o1 + 0x08], %f0 - add %o1, 0x8, %o1 - sub %o2, 0x8, %o2 - subcc %g2, 0x8, %g2 - faligndata %f2, %f0, %f8 - std %f8, [%o0 + 0x00] - bne,pn %XCC, 1b - add %o0, 0x8, %o0 - - /* If anything is left, we copy it one byte at a time. - * Note that %g1 is (src & 0x3) saved above before the - * alignaddr was performed. - */ -endcruft: - cmp %o2, 0 - add %o1, %g1, %o1 - VISExitHalf - be,pn %XCC, out - sub %o0, %o1, %o3 - - andcc %g1, 0x7, %g0 - bne,pn %icc, small_copy_unaligned - andcc %o2, 0x8, %g0 - be,pt %icc, 1f - nop - ldx [%o1], %o5 - stx %o5, [%o1 + %o3] - add %o1, 0x8, %o1 - -1: andcc %o2, 0x4, %g0 - be,pt %icc, 1f - nop - lduw [%o1], %o5 - stw %o5, [%o1 + %o3] - add %o1, 0x4, %o1 - -1: andcc %o2, 0x2, %g0 - be,pt %icc, 1f - nop - lduh [%o1], %o5 - sth %o5, [%o1 + %o3] - add %o1, 0x2, %o1 - -1: andcc %o2, 0x1, %g0 - be,pt %icc, out - nop - ldub [%o1], %o5 - ba,pt %xcc, out - stb %o5, [%o1 + %o3] - -medium_copy: /* 16 < len <= 64 */ - bne,pn %XCC, small_copy_unaligned - sub %o0, %o1, %o3 - -medium_copy_aligned: - andn %o2, 0x7, %o4 - and %o2, 0x7, %o2 -1: subcc %o4, 0x8, %o4 - ldx [%o1], %o5 - stx %o5, [%o1 + %o3] - bgu,pt %XCC, 1b - add %o1, 0x8, %o1 - andcc %o2, 0x4, %g0 - be,pt %XCC, 1f - nop - sub %o2, 0x4, %o2 - lduw [%o1], %o5 - stw %o5, [%o1 + %o3] - add %o1, 0x4, %o1 -1: cmp %o2, 0 - be,pt %XCC, out - nop - ba,pt %xcc, small_copy_unaligned - nop - -small_copy: /* 0 < len <= 16 */ - andcc %o3, 0x3, %g0 - bne,pn %XCC, small_copy_unaligned - sub %o0, %o1, %o3 - -small_copy_aligned: - subcc %o2, 4, %o2 - lduw [%o1], %g1 - stw %g1, [%o1 + %o3] - bgu,pt %XCC, small_copy_aligned - add %o1, 4, %o1 - -out: retl - mov %g5, %o0 - - .align 32 -small_copy_unaligned: - subcc %o2, 1, %o2 - ldub [%o1], %g1 - stb %g1, [%o1 + %o3] - bgu,pt %XCC, small_copy_unaligned - add %o1, 1, %o1 - retl - mov %g5, %o0 - -END(memcpy) - -#define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldx [%src - offset - 0x20], %t0; \ - ldx [%src - offset - 0x18], %t1; \ - ldx [%src - offset - 0x10], %t2; \ - ldx [%src - offset - 0x08], %t3; \ - stw %t0, [%dst - offset - 0x1c]; \ - srlx %t0, 32, %t0; \ - stw %t0, [%dst - offset - 0x20]; \ - stw %t1, [%dst - offset - 0x14]; \ - srlx %t1, 32, %t1; \ - stw %t1, [%dst - offset - 0x18]; \ - stw %t2, [%dst - offset - 0x0c]; \ - srlx %t2, 32, %t2; \ - stw %t2, [%dst - offset - 0x10]; \ - stw %t3, [%dst - offset - 0x04]; \ - srlx %t3, 32, %t3; \ - stw %t3, [%dst - offset - 0x08]; - -#define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldx [%src - offset - 0x20], %t0; \ - ldx [%src - offset - 0x18], %t1; \ - ldx [%src - offset - 0x10], %t2; \ - ldx [%src - offset - 0x08], %t3; \ - stx %t0, [%dst - offset - 0x20]; \ - stx %t1, [%dst - offset - 0x18]; \ - stx %t2, [%dst - offset - 0x10]; \ - stx %t3, [%dst - offset - 0x08]; \ - ldx [%src - offset - 0x40], %t0; \ - ldx [%src - offset - 0x38], %t1; \ - ldx [%src - offset - 0x30], %t2; \ - ldx [%src - offset - 0x28], %t3; \ - stx %t0, [%dst - offset - 0x40]; \ - stx %t1, [%dst - offset - 0x38]; \ - stx %t2, [%dst - offset - 0x30]; \ - stx %t3, [%dst - offset - 0x28]; - -#define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ - ldx [%src + offset + 0x00], %t0; \ - ldx [%src + offset + 0x08], %t1; \ - stw %t0, [%dst + offset + 0x04]; \ - srlx %t0, 32, %t2; \ - stw %t2, [%dst + offset + 0x00]; \ - stw %t1, [%dst + offset + 0x0c]; \ - srlx %t1, 32, %t3; \ - stw %t3, [%dst + offset + 0x08]; - -#define RMOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1) \ - ldx [%src + offset + 0x00], %t0; \ - ldx [%src + offset + 0x08], %t1; \ - stx %t0, [%dst + offset + 0x00]; \ - stx %t1, [%dst + offset + 0x08]; - - .align 32 -228: andcc %o2, 1, %g0 /* IEU1 Group */ - be,pt %icc, 2f+4 /* CTI */ -1: ldub [%o1 - 1], %o5 /* LOAD Group */ - sub %o1, 1, %o1 /* IEU0 */ - sub %o0, 1, %o0 /* IEU1 */ - subcc %o2, 1, %o2 /* IEU1 Group */ - be,pn %xcc, 229f /* CTI */ - stb %o5, [%o0] /* Store */ -2: ldub [%o1 - 1], %o5 /* LOAD Group */ - sub %o0, 2, %o0 /* IEU0 */ - ldub [%o1 - 2], %g5 /* LOAD Group */ - sub %o1, 2, %o1 /* IEU0 */ - subcc %o2, 2, %o2 /* IEU1 Group */ - stb %o5, [%o0 + 1] /* Store */ - bne,pt %xcc, 2b /* CTI */ - stb %g5, [%o0] /* Store */ -229: retl - mov %g4, %o0 - - .align 32 -ENTRY(memmove) - mov %o0, %g5 -#ifndef USE_BPR - srl %o2, 0, %o2 /* IEU1 Group */ -#endif - brz,pn %o2, out /* CTI Group */ - sub %o0, %o1, %o4 /* IEU0 */ - cmp %o4, %o2 /* IEU1 Group */ - bgeu,pt %XCC, 218b /* CTI */ - mov %o0, %g4 /* IEU0 */ - add %o0, %o2, %o0 /* IEU0 Group */ -220: add %o1, %o2, %o1 /* IEU1 */ - cmp %o2, 15 /* IEU1 Group */ - bleu,pn %xcc, 228b /* CTI */ - andcc %o0, 7, %g2 /* IEU1 Group */ - sub %o0, %o1, %g5 /* IEU0 */ - andcc %g5, 3, %o5 /* IEU1 Group */ - bne,pn %xcc, 232f /* CTI */ - andcc %o1, 3, %g0 /* IEU1 Group */ - be,a,pt %xcc, 236f /* CTI */ - andcc %o1, 4, %g0 /* IEU1 Group */ - andcc %o1, 1, %g0 /* IEU1 Group */ - be,pn %xcc, 4f /* CTI */ - andcc %o1, 2, %g0 /* IEU1 Group */ - ldub [%o1 - 1], %g2 /* Load Group */ - sub %o1, 1, %o1 /* IEU0 */ - sub %o0, 1, %o0 /* IEU1 */ - sub %o2, 1, %o2 /* IEU0 Group */ - be,pn %xcc, 5f /* CTI Group */ - stb %g2, [%o0] /* Store */ -4: lduh [%o1 - 2], %g2 /* Load Group */ - sub %o1, 2, %o1 /* IEU0 */ - sub %o0, 2, %o0 /* IEU1 */ - sub %o2, 2, %o2 /* IEU0 */ - sth %g2, [%o0] /* Store Group + bubble */ -5: andcc %o1, 4, %g0 /* IEU1 */ -236: be,a,pn %xcc, 2f /* CTI */ - andcc %o2, -128, %g6 /* IEU1 Group */ - lduw [%o1 - 4], %g5 /* Load Group */ - sub %o1, 4, %o1 /* IEU0 */ - sub %o0, 4, %o0 /* IEU1 */ - sub %o2, 4, %o2 /* IEU0 Group */ - stw %g5, [%o0] /* Store */ - andcc %o2, -128, %g6 /* IEU1 Group */ -2: be,pn %xcc, 235f /* CTI */ - andcc %o0, 4, %g0 /* IEU1 Group */ - be,pn %xcc, 282f + 4 /* CTI Group */ -5: RMOVE_BIGCHUNK(o1, o0, 0x00, g1, g3, g5, o5) - RMOVE_BIGCHUNK(o1, o0, 0x20, g1, g3, g5, o5) - RMOVE_BIGCHUNK(o1, o0, 0x40, g1, g3, g5, o5) - RMOVE_BIGCHUNK(o1, o0, 0x60, g1, g3, g5, o5) - subcc %g6, 128, %g6 /* IEU1 Group */ - sub %o1, 128, %o1 /* IEU0 */ - bne,pt %xcc, 5b /* CTI */ - sub %o0, 128, %o0 /* IEU0 Group */ -235: andcc %o2, 0x70, %g6 /* IEU1 Group */ -41: be,pn %xcc, 280f /* CTI */ - andcc %o2, 8, %g0 /* IEU1 Group */ - /* Clk1 8-( */ - /* Clk2 8-( */ - /* Clk3 8-( */ - /* Clk4 8-( */ -279: rd %pc, %o5 /* PDU Group */ - sll %g6, 1, %g5 /* IEU0 Group */ - sub %o1, %g6, %o1 /* IEU1 */ - sub %o5, %g5, %o5 /* IEU0 Group */ - jmpl %o5 + %lo(280f - 279b), %g0 /* CTI Group brk forced*/ - sub %o0, %g6, %o0 /* IEU0 Group */ - RMOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g5, o5) - RMOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g5, o5) -280: be,pt %xcc, 281f /* CTI */ - andcc %o2, 4, %g0 /* IEU1 */ - ldx [%o1 - 8], %g2 /* Load Group */ - sub %o0, 8, %o0 /* IEU0 */ - stw %g2, [%o0 + 4] /* Store Group */ - sub %o1, 8, %o1 /* IEU1 */ - srlx %g2, 32, %g2 /* IEU0 Group */ - stw %g2, [%o0] /* Store */ -281: be,pt %xcc, 1f /* CTI */ - andcc %o2, 2, %g0 /* IEU1 Group */ - lduw [%o1 - 4], %g2 /* Load Group */ - sub %o1, 4, %o1 /* IEU0 */ - stw %g2, [%o0 - 4] /* Store Group */ - sub %o0, 4, %o0 /* IEU0 */ -1: be,pt %xcc, 1f /* CTI */ - andcc %o2, 1, %g0 /* IEU1 Group */ - lduh [%o1 - 2], %g2 /* Load Group */ - sub %o1, 2, %o1 /* IEU0 */ - sth %g2, [%o0 - 2] /* Store Group */ - sub %o0, 2, %o0 /* IEU0 */ -1: be,pt %xcc, 211f /* CTI */ - nop /* IEU1 */ - ldub [%o1 - 1], %g2 /* Load Group */ - stb %g2, [%o0 - 1] /* Store Group + bubble */ -211: retl - mov %g4, %o0 - -282: RMOVE_BIGALIGNCHUNK(o1, o0, 0x00, g1, g3, g5, o5) - RMOVE_BIGALIGNCHUNK(o1, o0, 0x40, g1, g3, g5, o5) - subcc %g6, 128, %g6 /* IEU1 Group */ - sub %o1, 128, %o1 /* IEU0 */ - bne,pt %xcc, 282b /* CTI */ - sub %o0, 128, %o0 /* IEU0 Group */ - andcc %o2, 0x70, %g6 /* IEU1 */ - be,pn %xcc, 284f /* CTI */ - andcc %o2, 8, %g0 /* IEU1 Group */ - /* Clk1 8-( */ - /* Clk2 8-( */ - /* Clk3 8-( */ - /* Clk4 8-( */ -283: rd %pc, %o5 /* PDU Group */ - sub %o1, %g6, %o1 /* IEU0 Group */ - sub %o5, %g6, %o5 /* IEU1 */ - jmpl %o5 + %lo(284f - 283b), %g0 /* CTI Group brk forced*/ - sub %o0, %g6, %o0 /* IEU0 Group */ - RMOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3) - RMOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3) -284: be,pt %xcc, 285f /* CTI Group */ - andcc %o2, 4, %g0 /* IEU1 */ - ldx [%o1 - 8], %g2 /* Load Group */ - sub %o0, 8, %o0 /* IEU0 */ - sub %o1, 8, %o1 /* IEU0 Group */ - stx %g2, [%o0] /* Store */ -285: be,pt %xcc, 1f /* CTI */ - andcc %o2, 2, %g0 /* IEU1 Group */ - lduw [%o1 - 4], %g2 /* Load Group */ - sub %o0, 4, %o0 /* IEU0 */ - sub %o1, 4, %o1 /* IEU0 Group */ - stw %g2, [%o0] /* Store */ -1: be,pt %xcc, 1f /* CTI */ - andcc %o2, 1, %g0 /* IEU1 Group */ - lduh [%o1 - 2], %g2 /* Load Group */ - sub %o0, 2, %o0 /* IEU0 */ - sub %o1, 2, %o1 /* IEU0 Group */ - sth %g2, [%o0] /* Store */ -1: be,pt %xcc, 1f /* CTI */ - nop /* IEU0 Group */ - ldub [%o1 - 1], %g2 /* Load Group */ - stb %g2, [%o0 - 1] /* Store Group + bubble */ -1: retl - mov %g4, %o0 - -232: brz,pt %g2, 2f /* CTI Group */ - sub %o2, %g2, %o2 /* IEU0 Group */ -1: ldub [%o1 - 1], %g5 /* Load Group */ - sub %o1, 1, %o1 /* IEU0 */ - sub %o0, 1, %o0 /* IEU1 */ - subcc %g2, 1, %g2 /* IEU1 Group */ - bne,pt %xcc, 1b /* CTI */ - stb %g5, [%o0] /* Store */ -2: andn %o2, 7, %g5 /* IEU0 Group */ - and %o2, 7, %o2 /* IEU1 */ - fmovd %f0, %f2 /* FPU */ - alignaddr %o1, %g0, %g1 /* GRU Group */ - ldd [%g1], %f4 /* Load Group */ -1: ldd [%g1 - 8], %f6 /* Load Group */ - sub %g1, 8, %g1 /* IEU0 Group */ - subcc %g5, 8, %g5 /* IEU1 */ - faligndata %f6, %f4, %f0 /* GRU Group */ - std %f0, [%o0 - 8] /* Store */ - sub %o1, 8, %o1 /* IEU0 Group */ - be,pn %xcc, 233f /* CTI */ - sub %o0, 8, %o0 /* IEU1 */ - ldd [%g1 - 8], %f4 /* Load Group */ - sub %g1, 8, %g1 /* IEU0 */ - subcc %g5, 8, %g5 /* IEU1 */ - faligndata %f4, %f6, %f0 /* GRU Group */ - std %f0, [%o0 - 8] /* Store */ - sub %o1, 8, %o1 /* IEU0 */ - bne,pn %xcc, 1b /* CTI Group */ - sub %o0, 8, %o0 /* IEU0 */ -233: brz,pn %o2, 234f /* CTI Group */ - nop /* IEU0 */ -237: ldub [%o1 - 1], %g5 /* LOAD */ - sub %o1, 1, %o1 /* IEU0 */ - sub %o0, 1, %o0 /* IEU1 */ - subcc %o2, 1, %o2 /* IEU1 */ - bne,pt %xcc, 237b /* CTI */ - stb %g5, [%o0] /* Store Group */ -234: wr %g0, FPRS_FEF, %fprs - retl - mov %g4, %o0 -END(memmove) - -#ifdef USE_BPR -weak_alias (memcpy, __align_cpy_1) -weak_alias (memcpy, __align_cpy_2) -weak_alias (memcpy, __align_cpy_4) -weak_alias (memcpy, __align_cpy_8) -weak_alias (memcpy, __align_cpy_16) -#endif -libc_hidden_builtin_def (memcpy) -libc_hidden_builtin_def (memmove) diff --git a/sysdeps/sparc/sparc64/sparcv9v2/memset.S b/sysdeps/sparc/sparc64/sparcv9v2/memset.S deleted file mode 100644 index 809d3ed9c6..0000000000 --- a/sysdeps/sparc/sparc64/sparcv9v2/memset.S +++ /dev/null @@ -1 +0,0 @@ -#include <sparc64/sparcv9v/memset.S> diff --git a/sysdeps/sparc/sparc64/strlen.S b/sysdeps/sparc/sparc64/strlen.S index cc15e4e3fb..64350fb05e 100644 --- a/sysdeps/sparc/sparc64/strlen.S +++ b/sysdeps/sparc/sparc64/strlen.S @@ -1,8 +1,9 @@ /* Determine the length of a string. For SPARC v9. - Copyright (C) 1998, 1999, 2003 Free Software Foundation, Inc. + Copyright (C) 1998, 1999, 2003, 2010 Free Software Foundation, Inc. This file is part of the GNU C Library. - Contributed by Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz> and - Jakub Jelinek <jj@ultra.linux.cz>. + Contributed by Jan Vondrak <jvon4518@ss1000.ms.mff.cuni.cz>, + Jakub Jelinek <jj@ultra.linux.cz>, and + David S. Miller <davem@davemloft.net>. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -20,155 +21,66 @@ 02111-1307 USA. */ #include <sysdep.h> -#include <asm/asi.h> - - /* Normally, this uses - ((xword - 0x0101010101010101) & 0x8080808080808080) test - to find out if any byte in xword could be zero. This is fast, but - also gives false alarm for any byte in range 0x81-0xff. It does - not matter for correctness, as if this test tells us there could - be some zero byte, we check it byte by byte, but if bytes with - high bits set are common in the strings, then this will give poor - performance. You can #define EIGHTBIT_NOT_RARE and the algorithm - will use one tick slower, but more precise test - ((xword - 0x0101010101010101) & (~xword) & 0x8080808080808080), - which does not give any false alarms (but if some bits are set, - one cannot assume from it which bytes are zero and which are not). - It is yet to be measured, what is the correct default for glibc - in these days for an average user. - */ + + .register %g2, #scratch + .register %g3, #scratch .text .align 32 ENTRY(strlen) - sethi %hi(0x01010101), %g1 /* IEU0 Group */ - ldub [%o0], %o3 /* Load */ - or %g1, %lo(0x01010101), %g1 /* IEU0 Group */ - mov %o0, %o1 /* IEU1 */ - - sllx %g1, 32, %g4 /* IEU0 Group */ - andcc %o0, 7, %g0 /* IEU1 */ - or %g1, %g4, %g1 /* IEU0 Group */ - brz,pn %o3, 13f /* CTI+IEU1 */ - - sllx %g1, 7, %g4 /* IEU0 Group */ - bne,a,pn %icc, 15f /* CTI */ - add %o0, 1, %o0 /* IEU1 */ - /* %g1 = 0x0101010101010101 * - * %g4 = 0x8080808080808080 * - * %o0 = string pointer * - * %o1 = start of string */ -1: ldx [%o0], %o3 /* Load Group */ - - add %o0, 8, %o0 /* IEU1 */ -2: sub %o3, %g1, %o2 /* IEU0 Group */ -#ifdef EIGHTBIT_NOT_RARE - andn %o2, %o3, %o5 /* IEU0 Group */ - ldxa [%o0] ASI_PNF, %o3 /* Load */ - andcc %o5, %g4, %g0 /* IEU1 Group */ -#else - ldxa [%o0] ASI_PNF, %o3 /* Load */ - andcc %o2, %g4, %g0 /* IEU1 Group */ -#endif - - be,pt %xcc, 2b /* CTI */ - add %o0, 8, %o0 /* IEU0 */ - addcc %o2, %g1, %g5 /* IEU1 Group */ -#ifdef EIGHTBIT_NOT_RARE - srlx %o5, 32, %o5 /* IEU0 */ - -3: andcc %o5, %g4, %g0 /* IEU1 Group */ -#else - srlx %o2, 32, %o2 /* IEU0 */ - -3: andcc %o2, %g4, %g0 /* IEU1 Group */ -#endif - be,pn %xcc, 4f /* CTI */ - srlx %g5, 56, %o2 /* IEU0 */ - andcc %o2, 0xff, %g0 /* IEU1 Group */ - - be,pn %icc, 12f /* CTI */ - srlx %g5, 48, %o2 /* IEU0 */ - andcc %o2, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 11f /* CTI */ - - srlx %g5, 40, %o2 /* IEU0 */ - andcc %o2, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 10f /* CTI */ - srlx %g5, 32, %o2 /* IEU0 */ - - andcc %o2, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 9f /* CTI */ -4: srlx %g5, 24, %o2 /* IEU0 */ - andcc %o2, 0xff, %g0 /* IEU1 Group */ - - be,pn %icc, 8f /* CTI */ - srlx %g5, 16, %o2 /* IEU0 */ - andcc %o2, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 7f /* CTI */ - - srlx %g5, 8, %o2 /* IEU0 */ - andcc %o2, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 6f /* CTI */ - sub %o3, %g1, %o2 /* IEU0 */ - - andcc %g5, 0xff, %g0 /* IEU1 Group */ - be,pn %icc, 5f /* CTI */ - ldxa [%o0] ASI_PNF, %o3 /* Load */ - andcc %o2, %g4, %g0 /* IEU1 Group */ - - be,pt %xcc, 2b /* CTI */ - add %o0, 8, %o0 /* IEU0 */ - addcc %o2, %g1, %g5 /* IEU1 Group */ - ba,pt %xcc, 3b /* CTI */ - - srlx %o2, 32, %o2 /* IEU0 */ -5: add %o0, -9, %o0 /* IEU0 Group */ - retl /* CTI+IEU1 Group */ - sub %o0, %o1, %o0 /* IEU0 */ - -6: add %o0, -10, %o0 /* IEU0 Group */ - retl /* CTI+IEU1 Group */ - sub %o0, %o1, %o0 /* IEU0 */ -7: add %o0, -11, %o0 /* IEU0 Group */ - - retl /* CTI+IEU1 Group */ - sub %o0, %o1, %o0 /* IEU0 */ -8: add %o0, -12, %o0 /* IEU0 Group */ - retl /* CTI+IEU1 Group */ - - sub %o0, %o1, %o0 /* IEU0 */ -9: add %o0, -13, %o0 /* IEU0 Group */ - retl /* CTI+IEU1 Group */ - sub %o0, %o1, %o0 /* IEU0 */ - -10: add %o0, -14, %o0 /* IEU0 Group */ - retl /* CTI+IEU1 Group */ - sub %o0, %o1, %o0 /* IEU0 */ -11: add %o0, -15, %o0 /* IEU0 Group */ - - retl /* CTI+IEU1 Group */ - sub %o0, %o1, %o0 /* IEU0 */ -12: add %o0, -16, %o0 /* IEU0 Group */ - retl /* CTI+IEU1 Group */ - - sub %o0, %o1, %o0 /* IEU0 */ -13: retl /* CTI+IEU1 Group */ - mov 0, %o0 /* IEU0 */ - nop - -15: ldub [%o0], %o3 /* Load Group */ -16: andcc %o0, 7, %g0 /* IEU1 */ - be,pn %icc, 1b /* CTI */ - nop /* IEU0 Group */ - - add %o0, 1, %o0 /* IEU1 */ - andcc %o3, 0xff, %g0 /* IEU1 Group */ - bne,a,pt %icc, 16b /* CTI */ - lduba [%o0] ASI_PNF, %o3 /* Load */ - - add %o0, -1, %o0 /* IEU0 Group */ - retl /* CTI+IEU1 Group */ - sub %o0, %o1, %o0 /* IEU0 */ + mov %o0, %o1 + andn %o0, 0x7, %o0 + + ldx [%o0], %o5 + and %o1, 0x7, %g1 + mov -1, %g5 + + sethi %hi(0x01010101), %o2 + sll %g1, 3, %g1 + + or %o2, %lo(0x01010101), %o2 + srlx %g5, %g1, %o3 + + sllx %o2, 32, %g1 + sethi %hi(0x0000ff00), %g5 + + orn %o5, %o3, %o5 + or %o2, %g1, %o2 + + sllx %o2, 7, %o3 +10: add %o0, 8, %o0 + + andn %o3, %o5, %g1 + sub %o5, %o2, %g2 + + andcc %g1, %g2, %g0 + be,a,pt %xcc, 10b + ldx [%o0], %o5 + srlx %o5, 32, %g1 + + andn %o3, %g1, %o4 + sub %g1, %o2, %g2 + + add %o0, 4, %g3 + andcc %o4, %g2, %g0 + movne %icc, %g1, %o5 + + move %icc, %g3, %o0 + or %g5, %lo(0x0000ff00), %g5 + mov 3 - 8, %g2 + + andcc %o5, %g5, %g0 + srlx %o5, 16, %g1 + move %icc, 2 - 8, %g2 + + andcc %g1, 0xff, %g0 + srl %o5, 24, %o5 + move %icc, 1 - 8, %g2 + + movrz %o5, 0 - 8, %g2 + sub %o0, %o1, %o0 + + retl + add %o0, %g2, %o0 END(strlen) libc_hidden_builtin_def (strlen) |