patch-2.4.0-test8 linux/arch/arm/lib/copy_page.S

Next file: linux/arch/arm/lib/delay.S
Previous file: linux/arch/arm/lib/Makefile
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.4.0-test7/linux/arch/arm/lib/copy_page.S linux/arch/arm/lib/copy_page.S
@@ -10,26 +10,26 @@
 #include <asm/assembler.h>
 #include "constants.h"
 
-		.text 
+		.text
+		.align	5
 /*
  * StrongARM optimised copy_page routine
- * now 1.72bytes/cycle, was 1.60 bytes/cycle
- * (50MHz bus -> 86MB/s)
+ * now 1.78bytes/cycle, was 1.60 bytes/cycle (50MHz bus -> 89MB/s)
+ * Note that we probably achieve closer to the 100MB/s target with
+ * the core clock switching.
  */
-
 ENTRY(copy_page)
 		stmfd	sp!, {r4, lr}			@	2
 		mov	r2, #PAGE_SZ/64			@	1
-1:		ldmia	r1!, {r3, r4, ip, lr}		@	4
-		subs	r2, r2, #1			@	1
-		stmia	r0!, {r3, r4, ip, lr}		@	4
 		ldmia	r1!, {r3, r4, ip, lr}		@	4+1
-		stmia	r0!, {r3, r4, ip, lr}		@	4
+1:		stmia	r0!, {r3, r4, ip, lr}		@	4
 		ldmia	r1!, {r3, r4, ip, lr}		@	4+1
 		stmia	r0!, {r3, r4, ip, lr}		@	4
 		ldmia	r1!, {r3, r4, ip, lr}		@	4+1
 		stmia	r0!, {r3, r4, ip, lr}		@	4
+		ldmia	r1!, {r3, r4, ip, lr}		@	4
+		subs	r2, r2, #1			@	1
+		stmia	r0!, {r3, r4, ip, lr}		@	4
+		ldmneia	r1!, {r3, r4, ip, lr}		@	4
 		bne	1b				@	1
 		LOADREGS(fd, sp!, {r4, pc})		@	3
-
-

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)