patch-2.4.21 linux-2.4.21/arch/ppc64/lib/string.S

Next file: linux-2.4.21/arch/ppc64/mm/extable.c
Previous file: linux-2.4.21/arch/ppc64/lib/memcpy.S
Back to the patch index
Back to the overall index

diff -urN linux-2.4.20/arch/ppc64/lib/string.S linux-2.4.21/arch/ppc64/lib/string.S
@@ -12,61 +12,6 @@
 #include <asm/processor.h>
 #include <asm/errno.h>
 
-#define CACHE_LINE_SIZE		128
-#define LG_CACHE_LINE_SIZE	7
-#define MAX_COPY_PREFETCH	1
-
-#define COPY_16_BYTES		\
-	lwz	r7,4(r4);	\
-	lwz	r8,8(r4);	\
-	lwz	r9,12(r4);	\
-	lwzu	r10,16(r4);	\
-	stw	r7,4(r6);	\
-	stw	r8,8(r6);	\
-	stw	r9,12(r6);	\
-	stwu	r10,16(r6)
-
-#define COPY_16_BYTES_WITHEX(n)	\
-8 ## n ## 0:			\
-	lwz	r7,4(r4);	\
-8 ## n ## 1:			\
-	lwz	r8,8(r4);	\
-8 ## n ## 2:			\
-	lwz	r9,12(r4);	\
-8 ## n ## 3:			\
-	lwzu	r10,16(r4);	\
-8 ## n ## 4:			\
-	stw	r7,4(r6);	\
-8 ## n ## 5:			\
-	stw	r8,8(r6);	\
-8 ## n ## 6:			\
-	stw	r9,12(r6);	\
-8 ## n ## 7:			\
-	stwu	r10,16(r6)
-
-#define COPY_16_BYTES_EXCODE(n)			\
-9 ## n ## 0:					\
-	addi	r5,r5,-(16 * n);		\
-	b	104f;				\
-9 ## n ## 1:					\
-	addi	r5,r5,-(16 * n);		\
-	b	105f;				\
-.section __ex_table,"a";			\
-	.align	3;				\
-	.llong	8 ## n ## 0b,9 ## n ## 0b;	\
-	.llong	8 ## n ## 1b,9 ## n ## 0b;	\
-	.llong	8 ## n ## 2b,9 ## n ## 0b;	\
-	.llong	8 ## n ## 3b,9 ## n ## 0b;	\
-	.llong	8 ## n ## 4b,9 ## n ## 1b;	\
-	.llong	8 ## n ## 5b,9 ## n ## 1b;	\
-	.llong	8 ## n ## 6b,9 ## n ## 1b;	\
-	.llong	8 ## n ## 7b,9 ## n ## 1b;	\
-.text
-
-CACHELINE_BYTES = CACHE_LINE_SIZE
-LG_CACHELINE_BYTES = LG_CACHE_LINE_SIZE
-CACHELINE_MASK = (CACHE_LINE_SIZE-1)
-
 _GLOBAL(strcpy)
 	addi	r5,r3,-1
 	addi	r4,r4,-1
@@ -120,54 +65,6 @@
 	subf	r3,r3,r4
 	blr
 
-/*
- * Use dcbz on the complete cache lines in the destination
- * to set them to zero.  This requires that the destination
- * area is cacheable.  -- paulus
- */
-_GLOBAL(cacheable_memzero)
-	mr	r5,r4
-	li	r4,0
-	addi	r6,r3,-4
-	cmplwi	0,r5,4
-	blt	7f
-	stwu	r4,4(r6)
-	beqlr
-	andi.	r0,r6,3
-	add	r5,r0,r5
-	subf	r6,r0,r6
-	clrlwi	r7,r6,32-LG_CACHELINE_BYTES
-	add	r8,r7,r5
-	srwi	r9,r8,LG_CACHELINE_BYTES
-	addic.	r9,r9,-1	/* total number of complete cachelines */
-	ble	2f
-	xori	r0,r7,CACHELINE_MASK & ~3
-	srwi.	r0,r0,2
-	beq	3f
-	mtctr	r0
-4:	stwu	r4,4(r6)
-	bdnz	4b
-3:	mtctr	r9
-	li	r7,4
-10:	dcbz	r7,r6
-	addi	r6,r6,CACHELINE_BYTES
-	bdnz	10b
-	clrlwi	r5,r8,32-LG_CACHELINE_BYTES
-	addi	r5,r5,4
-2:	srwi	r0,r5,2
-	mtctr	r0
-	bdz	6f
-1:	stwu	r4,4(r6)
-	bdnz	1b
-6:	andi.	r5,r5,3
-7:	cmpwi	0,r5,0
-	beqlr
-	mtctr	r5
-	addi	r6,r6,3
-8:	stbu	r4,1(r6)
-	bdnz	8b
-	blr
-
 _GLOBAL(memset)
 	rlwimi	r4,r4,8,16,23
 	rlwimi	r4,r4,16,0,15
@@ -196,48 +93,7 @@
 _GLOBAL(memmove)
 	cmplw	0,r3,r4
 	bgt	.backwards_memcpy
-	/* fall through */
-
-_GLOBAL(memcpy)
-	srwi.	r7,r5,3
-	addi	r6,r3,-4
-	addi	r4,r4,-4
-	beq	2f			/* if less than 8 bytes to do */
-	andi.	r0,r6,3			/* get dest word aligned */
-	mtctr	r7
-	bne	5f
-1:	lwz	r7,4(r4)
-	lwzu	r8,8(r4)
-	stw	r7,4(r6)
-	stwu	r8,8(r6)
-	bdnz	1b
-	andi.	r5,r5,7
-2:	cmplwi	0,r5,4
-	blt	3f
-	lwzu	r0,4(r4)
-	addi	r5,r5,-4
-	stwu	r0,4(r6)
-3:	cmpwi	0,r5,0
-	beqlr
-	mtctr	r5
-	addi	r4,r4,3
-	addi	r6,r6,3
-4:	lbzu	r0,1(r4)
-	stbu	r0,1(r6)
-	bdnz	4b
-	blr
-5:	subfic	r0,r0,4
-	mtctr	r0
-6:	lbz	r7,4(r4)
-	addi	r4,r4,1
-	stb	r7,4(r6)
-	addi	r6,r6,1
-	bdnz	6b
-	subf	r5,r0,r5
-	rlwinm.	r7,r5,32-3,3,31
-	beq	2b
-	mtctr	r7
-	b	1b
+	b	.memcpy
 
 _GLOBAL(backwards_memcpy)
 	rlwinm.	r7,r5,32-3,3,31		/* r0 = r5 >> 3 */
@@ -301,195 +157,6 @@
 2:	li	r3,0
 	blr
 
-_GLOBAL(__copy_tofrom_user)
-	addi	r4,r4,-4
-	addi	r6,r3,-4
-	neg	r0,r3
-	andi.	r0,r0,CACHELINE_MASK	/* # bytes to start of cache line */
-	beq	58f
-
-	cmplw	0,r5,r0			/* is this more than total to do? */
-	blt	63f			/* if not much to do */
-	andi.	r8,r0,3			/* get it word-aligned first */
-	mtctr	r8
-	beq+	61f
-70:	lbz	r9,4(r4)		/* do some bytes */
-71:	stb	r9,4(r6)
-	addi	r4,r4,1
-	addi	r6,r6,1
-	bdnz	70b
-61:	subf	r5,r0,r5
-	srwi.	r0,r0,2
-	mtctr	r0
-	beq	58f
-72:	lwzu	r9,4(r4)		/* do some words */
-73:	stwu	r9,4(r6)
-	bdnz	72b
-
-58:	srwi.	r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
-	clrlwi	r5,r5,32-LG_CACHELINE_BYTES
-	li	r11,4
-	beq	63f
-
-	/* Here we decide how far ahead to prefetch the source */
-#if MAX_COPY_PREFETCH > 1
-	/* Heuristically, for large transfers we prefetch
-	   MAX_COPY_PREFETCH cachelines ahead.  For small transfers
-	   we prefetch 1 cacheline ahead. */
-	cmpwi	r0,MAX_COPY_PREFETCH
-	li	r7,1
-	li	r3,4
-	ble	111f
-	li	r7,MAX_COPY_PREFETCH
-111:	mtctr	r7
-112:	dcbt	r3,r4
-	addi	r3,r3,CACHELINE_BYTES
-	bdnz	112b
-#else /* MAX_COPY_PREFETCH == 1 */
-	li	r3,CACHELINE_BYTES + 4
-	dcbt	r11,r4
-#endif /* MAX_COPY_PREFETCH */
-
-	mtctr	r0
-	dcbt	r3,r4
-53:
-54:	dcbz	r11,r6
-/* had to move these to keep extable in order */
-	.section __ex_table,"a"
-	.align	3
-	.llong	70b,100f
-	.llong	71b,101f
-	.llong	72b,102f
-	.llong	73b,103f
-	.llong	54b,105f
-	.text
-/* the main body of the cacheline loop */
-	COPY_16_BYTES_WITHEX(0)
-#if CACHE_LINE_SIZE >= 32
-	COPY_16_BYTES_WITHEX(1)
-#if CACHE_LINE_SIZE >= 64
-	COPY_16_BYTES_WITHEX(2)
-	COPY_16_BYTES_WITHEX(3)
-#if CACHE_LINE_SIZE >= 128
-	COPY_16_BYTES_WITHEX(4)
-	COPY_16_BYTES_WITHEX(5)
-	COPY_16_BYTES_WITHEX(6)
-	COPY_16_BYTES_WITHEX(7)
-#endif
-#endif
-#endif
-	bdnz	53b
-
-63:	srwi.	r0,r5,2
-	mtctr	r0
-	beq	64f
-30:	lwzu	r0,4(r4)
-31:	stwu	r0,4(r6)
-	bdnz	30b
-
-64:	andi.	r0,r5,3
-	mtctr	r0
-	beq+	65f
-40:	lbz	r0,4(r4)
-41:	stb	r0,4(r6)
-	addi	r4,r4,1
-	addi	r6,r6,1
-	bdnz	40b
-65:	li	r3,0
-	blr
-
-/* read fault, initial single-byte copy */
-100:	li	r4,0
-	b	90f
-/* write fault, initial single-byte copy */
-101:	li	r4,1
-90:	subf	r5,r8,r5
-	li	r3,0
-	b	99f
-/* read fault, initial word copy */
-102:	li	r4,0
-	b	91f
-/* write fault, initial word copy */
-103:	li	r4,1
-91:	li	r3,2
-	b	99f
-
-/*
- * this stuff handles faults in the cacheline loop and branches to either
- * 104f (if in read part) or 105f (if in write part), after updating r5
- */
-	COPY_16_BYTES_EXCODE(0)
-#if CACHE_LINE_SIZE >= 32
-	COPY_16_BYTES_EXCODE(1)
-#if CACHE_LINE_SIZE >= 64
-	COPY_16_BYTES_EXCODE(2)
-	COPY_16_BYTES_EXCODE(3)
-#if CACHE_LINE_SIZE >= 128
-	COPY_16_BYTES_EXCODE(4)
-	COPY_16_BYTES_EXCODE(5)
-	COPY_16_BYTES_EXCODE(6)
-	COPY_16_BYTES_EXCODE(7)
-#endif
-#endif
-#endif
-
-/* read fault in cacheline loop */
-104:	li	r4,0
-	b	92f
-/* fault on dcbz (effectively a write fault) */
-/* or write fault in cacheline loop */
-105:	li	r4,1
-92:	li	r3,LG_CACHELINE_BYTES
-	b	99f
-/* read fault in final word loop */
-108:	li	r4,0
-	b	93f
-/* write fault in final word loop */
-109:	li	r4,1
-93:	andi.	r5,r5,3
-	li	r3,2
-	b	99f
-/* read fault in final byte loop */
-110:	li	r4,0
-	b	94f
-/* write fault in final byte loop */
-111:	li	r4,1
-94:	li	r5,0
-	li	r3,0
-/*
- * At this stage the number of bytes not copied is
- * r5 + (ctr << r3), and r4 is 0 for read or 1 for write.
- */
-99:	mfctr	r0
-	slw	r3,r0,r3
-	add	r3,r3,r5
-	cmpwi	0,r4,0
-	bne	120f
-/* for read fault, clear out the destination: r3 bytes starting at 4(r6) */
-	srwi.	r0,r3,2
-	li	r9,0
-	mtctr	r0
-	beq	113f
-112:	stwu	r9,4(r6)
-	bdnz	112b
-113:	andi.	r0,r3,3
-	mtctr	r0
-	beq	120f
-114:	stb	r9,4(r6)
-	addi	r6,r6,1
-	bdnz	114b
-120:	blr
-
-	.section __ex_table,"a"
-	.align	3
-	.llong	30b,108b
-	.llong	31b,109b
-	.llong	40b,110b
-	.llong	41b,111b
-	.llong	112b,120b
-	.llong	114b,120b
-	.text
-
 _GLOBAL(__clear_user)
 	addi	r6,r3,-4
 	li	r3,0

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)