From 21208604353a51f9c6430db9b33f9bb85ff8b8b9 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Wed, 9 Apr 2008 20:01:52 +0000 Subject: [BZ #4314] * sysdeps/i386/i686/memcpy.S: Optimize copying of aligned buffers. --- sysdeps/i386/i686/memcpy.S | 57 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 46 insertions(+), 11 deletions(-) (limited to 'sysdeps') diff --git a/sysdeps/i386/i686/memcpy.S b/sysdeps/i386/i686/memcpy.S index 00e84ec2e5..ff5c66e9d4 100644 --- a/sysdeps/i386/i686/memcpy.S +++ b/sysdeps/i386/i686/memcpy.S @@ -1,7 +1,7 @@ /* Copy memory block and return pointer to beginning of destination block For Intel 80x86, x>=6. This file is part of the GNU C Library. - Copyright (C) 1999, 2000, 2003, 2004 Free Software Foundation, Inc. + Copyright (C) 1999, 2000, 2003, 2004, 2008 Free Software Foundation, Inc. Contributed by Ulrich Drepper , 1999. The GNU C Library is free software; you can redistribute it and/or @@ -41,29 +41,64 @@ END (__memcpy_chk) ENTRY (BP_SYM (memcpy)) ENTER - movl LEN(%esp), %ecx movl %edi, %eax movl DEST(%esp), %edi movl %esi, %edx movl SRC(%esp), %esi - CHECK_BOUNDS_BOTH_WIDE (%edi, DEST(%esp), %ecx) - CHECK_BOUNDS_BOTH_WIDE (%esi, SRC(%esp), %ecx) + movl %edi, %ecx + xorl %esi, %ecx + andl $3, %ecx + movl LEN(%esp), %ecx cld - shrl $1, %ecx - jnc 1f + jne .Lunaligned + + cmpl $3, %ecx + jbe .Lunaligned + + testl $3, %esi + je 1f movsb -1: shrl $1, %ecx - jnc 2f - movsw -2: rep + decl %ecx + testl $3, %esi + je 1f + movsb + decl %ecx + testl $3, %esi + je 1f + movsb + decl %ecx +1: pushl %eax + movl %ecx, %eax + shrl $2, %ecx + rep movsl - movl %eax, %edi + movl %eax, %ecx + andl $3, %ecx + rep + movsb + popl %eax + +.Lend: movl %eax, %edi movl %edx, %esi movl DEST(%esp), %eax RETURN_BOUNDED_POINTER (DEST(%esp)) LEAVE RET_PTR + + /* When we come here the pointers do not have the same + alignment or the length is too short. No need to optimize for + aligned memory accesses. */ +.Lunaligned: + shrl $1, %ecx + jnc 1f + movsb +1: shrl $1, %ecx + jnc 2f + movsw +2: rep + movsl + jmp .Lend END (BP_SYM (memcpy)) libc_hidden_builtin_def (memcpy) -- cgit v1.2.3