patch-2.4.0-prerelease linux/arch/alpha/lib/memmove.S

Next file: linux/arch/alpha/math-emu/Makefile
Previous file: linux/arch/alpha/lib/ev67-strrchr.S
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.4.0-test12/linux/arch/alpha/lib/memmove.S linux/arch/alpha/lib/memmove.S
@@ -26,12 +26,16 @@
 	bne $1,memcpy
 
 	and $2,7,$2			/* Test for src/dest co-alignment.  */
-	bne $2,$misaligned
+	and $16,7,$1
+	cmpule $16,$17,$3
+	bne $3,$memmove_up		/* dest < src */
 
 	and $4,7,$1
-	beq $1,$skip_aligned_byte_loop_head
+	bne $2,$misaligned_dn
+	unop
+	beq $1,$skip_aligned_byte_loop_head_dn
 
-$aligned_byte_loop_head:
+$aligned_byte_loop_head_dn:
 	lda $4,-1($4)
 	lda $5,-1($5)
 	unop
@@ -48,13 +52,13 @@
 	and $4,7,$6
 
 	stq_u $1,0($4)
-	bne $6,$aligned_byte_loop_head
+	bne $6,$aligned_byte_loop_head_dn
 
-$skip_aligned_byte_loop_head:
+$skip_aligned_byte_loop_head_dn:
 	lda $18,-8($18)
-	blt $18,$skip_aligned_word_loop
+	blt $18,$skip_aligned_word_loop_dn
 
-$aligned_word_loop:
+$aligned_word_loop_dn:
 	ldq $1,-8($5)
 	nop
 	lda $5,-8($5)
@@ -63,22 +67,22 @@
 	stq $1,-8($4)
 	nop
 	lda $4,-8($4)
-	bge $18,$aligned_word_loop
+	bge $18,$aligned_word_loop_dn
 
-$skip_aligned_word_loop:
+$skip_aligned_word_loop_dn:
 	lda $18,8($18)
-	bgt $18,$byte_loop_tail
+	bgt $18,$byte_loop_tail_dn
 	unop
 	ret $31,($26),1
 
 	.align 4
-$misaligned:
+$misaligned_dn:
 	nop
 	fnop
 	unop
 	beq $18,$egress
 
-$byte_loop_tail:
+$byte_loop_tail_dn:
 	ldq_u $3,-1($5)
 	ldq_u $2,-1($4)
 	lda $5,-1($5)
@@ -91,8 +95,77 @@
 
 	bis $1,$2,$1
 	stq_u $1,0($4)
+	bgt $18,$byte_loop_tail_dn
+	br $egress
+
+$memmove_up:
+	mov $16,$4
+	mov $17,$5
+	bne $2,$misaligned_up
+	beq $1,$skip_aligned_byte_loop_head_up
+
+$aligned_byte_loop_head_up:
+	unop
+	ble $18,$egress
+	ldq_u $3,0($5)
+	ldq_u $2,0($4)
+
+	lda $18,-1($18)
+	extbl $3,$5,$1
+	insbl $1,$4,$1
+	mskbl $2,$4,$2
+
+	bis $1,$2,$1
+	lda $5,1($5)
+	stq_u $1,0($4)
+	lda $4,1($4)
+
+	and $4,7,$6
+	bne $6,$aligned_byte_loop_head_up
+
+$skip_aligned_byte_loop_head_up:
+	lda $18,-8($18)
+	blt $18,$skip_aligned_word_loop_up
+
+$aligned_word_loop_up:
+	ldq $1,0($5)
+	nop
+	lda $5,8($5)
+	lda $18,-8($18)
+
+	stq $1,0($4)
+	nop
+	lda $4,8($4)
+	bge $18,$aligned_word_loop_up
+
+$skip_aligned_word_loop_up:
+	lda $18,8($18)
+	bgt $18,$byte_loop_tail_up
+	unop
+	ret $31,($26),1
+
+	.align 4
+$misaligned_up:
+	nop
+	fnop
+	unop
+	beq $18,$egress
+
+$byte_loop_tail_up:
+	ldq_u $3,0($5)
+	ldq_u $2,0($4)
+	lda $18,-1($18)
+	extbl $3,$5,$1
+
+	insbl $1,$4,$1
+	mskbl $2,$4,$2
+	bis $1,$2,$1
+	stq_u $1,0($4)
+
+	lda $5,1($5)
+	lda $4,1($4)
 	nop
-	bgt $18,$byte_loop_tail
+	bgt $18,$byte_loop_tail_up
 
 $egress:
 	ret $31,($26),1

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)