/* Highly optimized version for i586. Copyright (C) 1997, 2000, 2003, 2005 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper , 1997. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU C Library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. */ #include #include "asm-syntax.h" #include "bp-sym.h" #include "bp-asm.h" /* BEWARE: `#ifdef memcpy' means that memcpy is redefined as `mempcpy', and the return value is the byte after the last one copied in the destination. */ #define MEMPCPY_P (defined memcpy) #define PARMS LINKAGE+8 /* space for 2 saved regs */ #define RTN PARMS #define DEST RTN+RTN_SIZE #define SRC DEST+PTR_SIZE #define LEN SRC+PTR_SIZE .text ENTRY (BP_SYM (memcpy)) ENTER pushl %edi cfi_adjust_cfa_offset (4) pushl %esi cfi_adjust_cfa_offset (4) movl DEST(%esp), %edi cfi_rel_offset (edi, 4) movl SRC(%esp), %esi cfi_rel_offset (esi, 4) movl LEN(%esp), %ecx CHECK_BOUNDS_BOTH_WIDE (%edi, DEST(%esp), %ecx) CHECK_BOUNDS_BOTH_WIDE (%esi, SRC(%esp), %ecx) movl %edi, %eax /* We need this in any case. */ cld /* Cutoff for the big loop is a size of 32 bytes since otherwise the loop will never be entered. */ cmpl $32, %ecx jbe L(1) negl %eax andl $3, %eax subl %eax, %ecx xchgl %eax, %ecx rep; movsb movl %eax, %ecx subl $32, %ecx js L(2) /* Read ahead to make sure we write in the cache since the stupid i586 designers haven't implemented read-on-write-miss. */ movl (%edi), %eax L(3): movl 28(%edi), %edx /* Now correct the loop counter. Please note that in the following code the flags are not changed anymore. */ subl $32, %ecx movl (%esi), %eax movl 4(%esi), %edx movl %eax, (%edi) movl %edx, 4(%edi) movl 8(%esi), %eax movl 12(%esi), %edx movl %eax, 8(%edi) movl %edx, 12(%edi) movl 16(%esi), %eax movl 20(%esi), %edx movl %eax, 16(%edi) movl %edx, 20(%edi) movl 24(%esi), %eax movl 28(%esi), %edx movl %eax, 24(%edi) movl %edx, 28(%edi) leal 32(%esi), %esi leal 32(%edi), %edi jns L(3) /* Correct extra loop counter modification. */ L(2): addl $32, %ecx #if !MEMPCPY_P movl DEST(%esp), %eax #endif L(1): rep; movsb #if MEMPCPY_P movl %edi, %eax #endif popl %esi cfi_adjust_cfa_offset (-4) cfi_restore (esi) popl %edi cfi_adjust_cfa_offset (-4) cfi_restore (edi) LEAVE RET_PTR END (BP_SYM (memcpy)) #if !MEMPCPY_P libc_hidden_builtin_def (memcpy) #endif