summaryrefslogtreecommitdiff
path: root/arch/arm/lib/memcpy.S
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm/lib/memcpy.S')
-rw-r--r--arch/arm/lib/memcpy.S393
1 files changed, 393 insertions, 0 deletions
diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S
new file mode 100644
index 00000000000..f5a593ceb8c
--- /dev/null
+++ b/arch/arm/lib/memcpy.S
@@ -0,0 +1,393 @@
+/*
+ * linux/arch/arm/lib/memcpy.S
+ *
+ * Copyright (C) 1995-1999 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * ASM optimised string functions
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+ .text
+
+#define ENTER \
+ mov ip,sp ;\
+ stmfd sp!,{r0,r4-r9,fp,ip,lr,pc} ;\
+ sub fp,ip,#4
+
+#define EXIT \
+ LOADREGS(ea, fp, {r0, r4 - r9, fp, sp, pc})
+
+#define EXITEQ \
+ LOADREGS(eqea, fp, {r0, r4 - r9, fp, sp, pc})
+
+/*
+ * Prototype: void memcpy(void *to,const void *from,unsigned long n);
+ */
+ENTRY(memcpy)
+ENTRY(memmove)
+ ENTER
+ cmp r1, r0
+ bcc 23f
+ subs r2, r2, #4
+ blt 6f
+ PLD( pld [r1, #0] )
+ ands ip, r0, #3
+ bne 7f
+ ands ip, r1, #3
+ bne 8f
+
+1: subs r2, r2, #8
+ blt 5f
+ subs r2, r2, #20
+ blt 4f
+ PLD( pld [r1, #28] )
+ PLD( subs r2, r2, #64 )
+ PLD( blt 3f )
+2: PLD( pld [r1, #60] )
+ PLD( pld [r1, #92] )
+ ldmia r1!, {r3 - r9, ip}
+ subs r2, r2, #32
+ stmgeia r0!, {r3 - r9, ip}
+ ldmgeia r1!, {r3 - r9, ip}
+ subges r2, r2, #32
+ stmia r0!, {r3 - r9, ip}
+ bge 2b
+3: PLD( ldmia r1!, {r3 - r9, ip} )
+ PLD( adds r2, r2, #32 )
+ PLD( stmgeia r0!, {r3 - r9, ip} )
+ PLD( ldmgeia r1!, {r3 - r9, ip} )
+ PLD( subges r2, r2, #32 )
+ PLD( stmia r0!, {r3 - r9, ip} )
+4: cmn r2, #16
+ ldmgeia r1!, {r3 - r6}
+ subge r2, r2, #16
+ stmgeia r0!, {r3 - r6}
+ adds r2, r2, #20
+ ldmgeia r1!, {r3 - r5}
+ subge r2, r2, #12
+ stmgeia r0!, {r3 - r5}
+5: adds r2, r2, #8
+ blt 6f
+ subs r2, r2, #4
+ ldrlt r3, [r1], #4
+ ldmgeia r1!, {r4, r5}
+ subge r2, r2, #4
+ strlt r3, [r0], #4
+ stmgeia r0!, {r4, r5}
+
+6: adds r2, r2, #4
+ EXITEQ
+ cmp r2, #2
+ ldrb r3, [r1], #1
+ ldrgeb r4, [r1], #1
+ ldrgtb r5, [r1], #1
+ strb r3, [r0], #1
+ strgeb r4, [r0], #1
+ strgtb r5, [r0], #1
+ EXIT
+
+7: rsb ip, ip, #4
+ cmp ip, #2
+ ldrb r3, [r1], #1
+ ldrgeb r4, [r1], #1
+ ldrgtb r5, [r1], #1
+ strb r3, [r0], #1
+ strgeb r4, [r0], #1
+ strgtb r5, [r0], #1
+ subs r2, r2, ip
+ blt 6b
+ ands ip, r1, #3
+ beq 1b
+
+8: bic r1, r1, #3
+ ldr r7, [r1], #4
+ cmp ip, #2
+ bgt 18f
+ beq 13f
+ cmp r2, #12
+ blt 11f
+ PLD( pld [r1, #12] )
+ sub r2, r2, #12
+ PLD( subs r2, r2, #32 )
+ PLD( blt 10f )
+ PLD( pld [r1, #28] )
+9: PLD( pld [r1, #44] )
+10: mov r3, r7, pull #8
+ ldmia r1!, {r4 - r7}
+ subs r2, r2, #16
+ orr r3, r3, r4, push #24
+ mov r4, r4, pull #8
+ orr r4, r4, r5, push #24
+ mov r5, r5, pull #8
+ orr r5, r5, r6, push #24
+ mov r6, r6, pull #8
+ orr r6, r6, r7, push #24
+ stmia r0!, {r3 - r6}
+ bge 9b
+ PLD( cmn r2, #32 )
+ PLD( bge 10b )
+ PLD( add r2, r2, #32 )
+ adds r2, r2, #12
+ blt 12f
+11: mov r3, r7, pull #8
+ ldr r7, [r1], #4
+ subs r2, r2, #4
+ orr r3, r3, r7, push #24
+ str r3, [r0], #4
+ bge 11b
+12: sub r1, r1, #3
+ b 6b
+
+13: cmp r2, #12
+ blt 16f
+ PLD( pld [r1, #12] )
+ sub r2, r2, #12
+ PLD( subs r2, r2, #32 )
+ PLD( blt 15f )
+ PLD( pld [r1, #28] )
+14: PLD( pld [r1, #44] )
+15: mov r3, r7, pull #16
+ ldmia r1!, {r4 - r7}
+ subs r2, r2, #16
+ orr r3, r3, r4, push #16
+ mov r4, r4, pull #16
+ orr r4, r4, r5, push #16
+ mov r5, r5, pull #16
+ orr r5, r5, r6, push #16
+ mov r6, r6, pull #16
+ orr r6, r6, r7, push #16
+ stmia r0!, {r3 - r6}
+ bge 14b
+ PLD( cmn r2, #32 )
+ PLD( bge 15b )
+ PLD( add r2, r2, #32 )
+ adds r2, r2, #12
+ blt 17f
+16: mov r3, r7, pull #16
+ ldr r7, [r1], #4
+ subs r2, r2, #4
+ orr r3, r3, r7, push #16
+ str r3, [r0], #4
+ bge 16b
+17: sub r1, r1, #2
+ b 6b
+
+18: cmp r2, #12
+ blt 21f
+ PLD( pld [r1, #12] )
+ sub r2, r2, #12
+ PLD( subs r2, r2, #32 )
+ PLD( blt 20f )
+ PLD( pld [r1, #28] )
+19: PLD( pld [r1, #44] )
+20: mov r3, r7, pull #24
+ ldmia r1!, {r4 - r7}
+ subs r2, r2, #16
+ orr r3, r3, r4, push #8
+ mov r4, r4, pull #24
+ orr r4, r4, r5, push #8
+ mov r5, r5, pull #24
+ orr r5, r5, r6, push #8
+ mov r6, r6, pull #24
+ orr r6, r6, r7, push #8
+ stmia r0!, {r3 - r6}
+ bge 19b
+ PLD( cmn r2, #32 )
+ PLD( bge 20b )
+ PLD( add r2, r2, #32 )
+ adds r2, r2, #12
+ blt 22f
+21: mov r3, r7, pull #24
+ ldr r7, [r1], #4
+ subs r2, r2, #4
+ orr r3, r3, r7, push #8
+ str r3, [r0], #4
+ bge 21b
+22: sub r1, r1, #1
+ b 6b
+
+
+23: add r1, r1, r2
+ add r0, r0, r2
+ subs r2, r2, #4
+ blt 29f
+ PLD( pld [r1, #-4] )
+ ands ip, r0, #3
+ bne 30f
+ ands ip, r1, #3
+ bne 31f
+
+24: subs r2, r2, #8
+ blt 28f
+ subs r2, r2, #20
+ blt 27f
+ PLD( pld [r1, #-32] )
+ PLD( subs r2, r2, #64 )
+ PLD( blt 26f )
+25: PLD( pld [r1, #-64] )
+ PLD( pld [r1, #-96] )
+ ldmdb r1!, {r3 - r9, ip}
+ subs r2, r2, #32
+ stmgedb r0!, {r3 - r9, ip}
+ ldmgedb r1!, {r3 - r9, ip}
+ subges r2, r2, #32
+ stmdb r0!, {r3 - r9, ip}
+ bge 25b
+26: PLD( ldmdb r1!, {r3 - r9, ip} )
+ PLD( adds r2, r2, #32 )
+ PLD( stmgedb r0!, {r3 - r9, ip} )
+ PLD( ldmgedb r1!, {r3 - r9, ip} )
+ PLD( subges r2, r2, #32 )
+ PLD( stmdb r0!, {r3 - r9, ip} )
+27: cmn r2, #16
+ ldmgedb r1!, {r3 - r6}
+ subge r2, r2, #16
+ stmgedb r0!, {r3 - r6}
+ adds r2, r2, #20
+ ldmgedb r1!, {r3 - r5}
+ subge r2, r2, #12
+ stmgedb r0!, {r3 - r5}
+28: adds r2, r2, #8
+ blt 29f
+ subs r2, r2, #4
+ ldrlt r3, [r1, #-4]!
+ ldmgedb r1!, {r4, r5}
+ subge r2, r2, #4
+ strlt r3, [r0, #-4]!
+ stmgedb r0!, {r4, r5}
+
+29: adds r2, r2, #4
+ EXITEQ
+ cmp r2, #2
+ ldrb r3, [r1, #-1]!
+ ldrgeb r4, [r1, #-1]!
+ ldrgtb r5, [r1, #-1]!
+ strb r3, [r0, #-1]!
+ strgeb r4, [r0, #-1]!
+ strgtb r5, [r0, #-1]!
+ EXIT
+
+30: cmp ip, #2
+ ldrb r3, [r1, #-1]!
+ ldrgeb r4, [r1, #-1]!
+ ldrgtb r5, [r1, #-1]!
+ strb r3, [r0, #-1]!
+ strgeb r4, [r0, #-1]!
+ strgtb r5, [r0, #-1]!
+ subs r2, r2, ip
+ blt 29b
+ ands ip, r1, #3
+ beq 24b
+
+31: bic r1, r1, #3
+ ldr r3, [r1], #0
+ cmp ip, #2
+ blt 41f
+ beq 36f
+ cmp r2, #12
+ blt 34f
+ PLD( pld [r1, #-16] )
+ sub r2, r2, #12
+ PLD( subs r2, r2, #32 )
+ PLD( blt 33f )
+ PLD( pld [r1, #-32] )
+32: PLD( pld [r1, #-48] )
+33: mov r7, r3, push #8
+ ldmdb r1!, {r3, r4, r5, r6}
+ subs r2, r2, #16
+ orr r7, r7, r6, pull #24
+ mov r6, r6, push #8
+ orr r6, r6, r5, pull #24
+ mov r5, r5, push #8
+ orr r5, r5, r4, pull #24
+ mov r4, r4, push #8
+ orr r4, r4, r3, pull #24
+ stmdb r0!, {r4, r5, r6, r7}
+ bge 32b
+ PLD( cmn r2, #32 )
+ PLD( bge 33b )
+ PLD( add r2, r2, #32 )
+ adds r2, r2, #12
+ blt 35f
+34: mov ip, r3, push #8
+ ldr r3, [r1, #-4]!
+ subs r2, r2, #4
+ orr ip, ip, r3, pull #24
+ str ip, [r0, #-4]!
+ bge 34b
+35: add r1, r1, #3
+ b 29b
+
+36: cmp r2, #12
+ blt 39f
+ PLD( pld [r1, #-16] )
+ sub r2, r2, #12
+ PLD( subs r2, r2, #32 )
+ PLD( blt 38f )
+ PLD( pld [r1, #-32] )
+37: PLD( pld [r1, #-48] )
+38: mov r7, r3, push #16
+ ldmdb r1!, {r3, r4, r5, r6}
+ subs r2, r2, #16
+ orr r7, r7, r6, pull #16
+ mov r6, r6, push #16
+ orr r6, r6, r5, pull #16
+ mov r5, r5, push #16
+ orr r5, r5, r4, pull #16
+ mov r4, r4, push #16
+ orr r4, r4, r3, pull #16
+ stmdb r0!, {r4, r5, r6, r7}
+ bge 37b
+ PLD( cmn r2, #32 )
+ PLD( bge 38b )
+ PLD( add r2, r2, #32 )
+ adds r2, r2, #12
+ blt 40f
+39: mov ip, r3, push #16
+ ldr r3, [r1, #-4]!
+ subs r2, r2, #4
+ orr ip, ip, r3, pull #16
+ str ip, [r0, #-4]!
+ bge 39b
+40: add r1, r1, #2
+ b 29b
+
+41: cmp r2, #12
+ blt 44f
+ PLD( pld [r1, #-16] )
+ sub r2, r2, #12
+ PLD( subs r2, r2, #32 )
+ PLD( blt 43f )
+ PLD( pld [r1, #-32] )
+42: PLD( pld [r1, #-48] )
+43: mov r7, r3, push #24
+ ldmdb r1!, {r3, r4, r5, r6}
+ subs r2, r2, #16
+ orr r7, r7, r6, pull #8
+ mov r6, r6, push #24
+ orr r6, r6, r5, pull #8
+ mov r5, r5, push #24
+ orr r5, r5, r4, pull #8
+ mov r4, r4, push #24
+ orr r4, r4, r3, pull #8
+ stmdb r0!, {r4, r5, r6, r7}
+ bge 42b
+ PLD( cmn r2, #32 )
+ PLD( bge 43b )
+ PLD( add r2, r2, #32 )
+ adds r2, r2, #12
+ blt 45f
+44: mov ip, r3, push #24
+ ldr r3, [r1, #-4]!
+ subs r2, r2, #4
+ orr ip, ip, r3, pull #8
+ str ip, [r0, #-4]!
+ bge 44b
+45: add r1, r1, #1
+ b 29b
+