patch-2.4.0-test9 linux/arch/ppc/lib/string.S
Next file: linux/arch/ppc/mbxboot/misc.c
Previous file: linux/arch/ppc/kernel/xics.c
Back to the patch index
Back to the overall index
- Lines: 330
- Date:
Tue Sep 19 08:31:53 2000
- Orig file:
v2.4.0-test8/linux/arch/ppc/lib/string.S
- Orig date:
Sun Feb 13 10:47:01 2000
diff -u --recursive --new-file v2.4.0-test8/linux/arch/ppc/lib/string.S linux/arch/ppc/lib/string.S
@@ -9,13 +9,74 @@
* 2 of the License, or (at your option) any later version.
*/
#include "../kernel/ppc_asm.tmpl"
+#include <linux/config.h>
#include <asm/processor.h>
#include <asm/errno.h>
-CACHELINE_BYTES = 32
-LG_CACHELINE_BYTES = 5
-CACHELINE_MASK = 0x1f
-CACHELINE_WORDS = 8
+#if defined(CONFIG_4xx) || defined(CONFIG_8xx)
+#define CACHE_LINE_SIZE 16
+#define LG_CACHE_LINE_SIZE 4
+#define MAX_COPY_PREFETCH 1
+#elif !defined(CONFIG_PPC64BRIDGE)
+#define CACHE_LINE_SIZE 32
+#define LG_CACHE_LINE_SIZE 5
+#define MAX_COPY_PREFETCH 4
+#else
+#define CACHE_LINE_SIZE 128
+#define LG_CACHE_LINE_SIZE 7
+#define MAX_COPY_PREFETCH 1
+#endif /* CONFIG_4xx || CONFIG_8xx */
+
+#define COPY_16_BYTES \
+ lwz r7,4(r4); \
+ lwz r8,8(r4); \
+ lwz r9,12(r4); \
+ lwzu r10,16(r4); \
+ stw r7,4(r6); \
+ stw r8,8(r6); \
+ stw r9,12(r6); \
+ stwu r10,16(r6)
+
+#define COPY_16_BYTES_WITHEX(n) \
+8 ## n ## 0: \
+ lwz r7,4(r4); \
+8 ## n ## 1: \
+ lwz r8,8(r4); \
+8 ## n ## 2: \
+ lwz r9,12(r4); \
+8 ## n ## 3: \
+ lwzu r10,16(r4); \
+8 ## n ## 4: \
+ stw r7,4(r6); \
+8 ## n ## 5: \
+ stw r8,8(r6); \
+8 ## n ## 6: \
+ stw r9,12(r6); \
+8 ## n ## 7: \
+ stwu r10,16(r6)
+
+#define COPY_16_BYTES_EXCODE(n) \
+9 ## n ## 0: \
+ addi r5,r5,-(16 * n); \
+ b 104f; \
+9 ## n ## 1: \
+ addi r5,r5,-(16 * n); \
+ b 105f; \
+.section __ex_table,"a"; \
+ .align 2; \
+ .long 8 ## n ## 0b,9 ## n ## 0b; \
+ .long 8 ## n ## 1b,9 ## n ## 0b; \
+ .long 8 ## n ## 2b,9 ## n ## 0b; \
+ .long 8 ## n ## 3b,9 ## n ## 0b; \
+ .long 8 ## n ## 4b,9 ## n ## 1b; \
+ .long 8 ## n ## 5b,9 ## n ## 1b; \
+ .long 8 ## n ## 6b,9 ## n ## 1b; \
+ .long 8 ## n ## 7b,9 ## n ## 1b; \
+.text
+
+CACHELINE_BYTES = CACHE_LINE_SIZE
+LG_CACHELINE_BYTES = LG_CACHE_LINE_SIZE
+CACHELINE_MASK = (CACHE_LINE_SIZE-1)
.globl strcpy
strcpy:
@@ -105,7 +166,14 @@
bdnz 4b
3: mtctr r9
li r7,4
+#if !defined(CONFIG_8xx)
10: dcbz r7,r6
+#else
+10: stw r4, 4(r6)
+ stw r4, 8(r6)
+ stw r4, 12(r6)
+ stw r4, 16(r6)
+#endif
addi r6,r6,CACHELINE_BYTES
bdnz 10b
clrlwi r5,r8,32-LG_CACHELINE_BYTES
@@ -202,23 +270,24 @@
li r11,4
mtctr r0
beq 63f
-53: dcbz r11,r6
- lwz r7,4(r4)
- lwz r8,8(r4)
- lwz r9,12(r4)
- lwzu r10,16(r4)
- stw r7,4(r6)
- stw r8,8(r6)
- stw r9,12(r6)
- stwu r10,16(r6)
- lwz r7,4(r4)
- lwz r8,8(r4)
- lwz r9,12(r4)
- lwzu r10,16(r4)
- stw r7,4(r6)
- stw r8,8(r6)
- stw r9,12(r6)
- stwu r10,16(r6)
+53:
+#if !defined(CONFIG_8xx)
+ dcbz r11,r6
+#endif
+ COPY_16_BYTES
+#if CACHE_LINE_SIZE >= 32
+ COPY_16_BYTES
+#if CACHE_LINE_SIZE >= 64
+ COPY_16_BYTES
+ COPY_16_BYTES
+#if CACHE_LINE_SIZE >= 128
+ COPY_16_BYTES
+ COPY_16_BYTES
+ COPY_16_BYTES
+ COPY_16_BYTES
+#endif
+#endif
+#endif
bdnz 53b
63: srwi. r0,r5,2
@@ -380,25 +449,59 @@
58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
clrlwi r5,r5,32-LG_CACHELINE_BYTES
li r11,4
- mtctr r0
beq 63f
-53: dcbz r11,r6
-10: lwz r7,4(r4)
-11: lwz r8,8(r4)
-12: lwz r9,12(r4)
-13: lwzu r10,16(r4)
-14: stw r7,4(r6)
-15: stw r8,8(r6)
-16: stw r9,12(r6)
-17: stwu r10,16(r6)
-20: lwz r7,4(r4)
-21: lwz r8,8(r4)
-22: lwz r9,12(r4)
-23: lwzu r10,16(r4)
-24: stw r7,4(r6)
-25: stw r8,8(r6)
-26: stw r9,12(r6)
-27: stwu r10,16(r6)
+
+#if !defined(CONFIG_8xx)
+ /* Here we decide how far ahead to prefetch the source */
+#if MAX_COPY_PREFETCH > 1
+ /* Heuristically, for large transfers we prefetch
+ MAX_COPY_PREFETCH cachelines ahead. For small transfers
+ we prefetch 1 cacheline ahead. */
+ cmpwi r0,MAX_COPY_PREFETCH
+ li r7,1
+ li r3,4
+ ble 111f
+ li r7,MAX_COPY_PREFETCH
+111: mtctr r7
+112: dcbt r3,r4
+ addi r3,r3,CACHELINE_BYTES
+ bdnz 112b
+#else /* MAX_COPY_PREFETCH == 1 */
+ li r3,CACHELINE_BYTES + 4
+ dcbt r11,r4
+#endif /* MAX_COPY_PREFETCH */
+#endif /* CONFIG_8xx */
+
+ mtctr r0
+53:
+#if !defined(CONFIG_8xx)
+ dcbt r3,r4
+ dcbz r11,r6
+#endif
+/* had to move these to keep extable in order */
+ .section __ex_table,"a"
+ .align 2
+ .long 70b,100f
+ .long 71b,101f
+ .long 72b,102f
+ .long 73b,103f
+ .long 53b,105f
+ .text
+/* the main body of the cacheline loop */
+ COPY_16_BYTES_WITHEX(0)
+#if CACHE_LINE_SIZE >= 32
+ COPY_16_BYTES_WITHEX(1)
+#if CACHE_LINE_SIZE >= 64
+ COPY_16_BYTES_WITHEX(2)
+ COPY_16_BYTES_WITHEX(3)
+#if CACHE_LINE_SIZE >= 128
+ COPY_16_BYTES_WITHEX(4)
+ COPY_16_BYTES_WITHEX(5)
+ COPY_16_BYTES_WITHEX(6)
+ COPY_16_BYTES_WITHEX(7)
+#endif
+#endif
+#endif
bdnz 53b
63: srwi. r0,r5,2
@@ -434,15 +537,31 @@
103: li r4,1
91: li r3,2
b 99f
-/* read fault in 2nd half of cacheline loop */
-106: addi r5,r5,-16
-/* read fault in 1st half of cacheline loop */
+
+/*
+ * this stuff handles faults in the cacheline loop and branches to either
+ * 104f (if in read part) or 105f (if in write part), after updating r5
+ */
+ COPY_16_BYTES_EXCODE(0)
+#if CACHE_LINE_SIZE >= 32
+ COPY_16_BYTES_EXCODE(1)
+#if CACHE_LINE_SIZE >= 64
+ COPY_16_BYTES_EXCODE(2)
+ COPY_16_BYTES_EXCODE(3)
+#if CACHE_LINE_SIZE >= 128
+ COPY_16_BYTES_EXCODE(4)
+ COPY_16_BYTES_EXCODE(5)
+ COPY_16_BYTES_EXCODE(6)
+ COPY_16_BYTES_EXCODE(7)
+#endif
+#endif
+#endif
+
+/* read fault in cacheline loop */
104: li r4,0
b 92f
-/* write fault in 2nd half of cacheline loop */
-107: addi r5,r5,-16
/* fault on dcbz (effectively a write fault) */
-/* or write fault in 1st half of cacheline loop */
+/* or write fault in cacheline loop */
105: li r4,1
92: li r3,LG_CACHELINE_BYTES
b 99f
@@ -485,36 +604,15 @@
bdnz 114b
120: blr
-.section __ex_table,"a"
+ .section __ex_table,"a"
.align 2
- .long 70b,100b
- .long 71b,101b
- .long 72b,102b
- .long 73b,103b
- .long 53b,105b
- .long 10b,104b
- .long 11b,104b
- .long 12b,104b
- .long 13b,104b
- .long 14b,105b
- .long 15b,105b
- .long 16b,105b
- .long 17b,105b
- .long 20b,106b
- .long 21b,106b
- .long 22b,106b
- .long 23b,106b
- .long 24b,107b
- .long 25b,107b
- .long 26b,107b
- .long 27b,107b
.long 30b,108b
.long 31b,109b
.long 40b,110b
.long 41b,111b
.long 112b,120b
.long 114b,120b
-.text
+ .text
.globl __clear_user
__clear_user:
@@ -546,12 +644,13 @@
blr
99: li r3,-EFAULT
blr
-.section __ex_table,"a"
+
+ .section __ex_table,"a"
.align 2
.long 11b,99b
.long 1b,99b
.long 8b,99b
-.text
+ .text
.globl __strncpy_from_user
__strncpy_from_user:
@@ -570,10 +669,11 @@
blr
99: li r3,-EFAULT
blr
-.section __ex_table,"a"
+
+ .section __ex_table,"a"
.align 2
.long 1b,99b
-.text
+ .text
/* r3 = str, r4 = len (> 0), r5 = top (highest addr) */
.globl __strnlen_user
@@ -596,6 +696,7 @@
blr
99: li r3,0 /* bad address, return 0 */
blr
-.section __ex_table,"a"
+
+ .section __ex_table,"a"
.align 2
.long 1b,99b
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)