patch-2.4.0-test12 linux/arch/mips64/lib/memcpy.S
Next file: linux/arch/mips64/lib/memset.S
Previous file: linux/arch/mips64/lib/Makefile
Back to the patch index
Back to the overall index
- Lines: 225
- Date:
Tue Dec 5 23:15:12 2000
- Orig file:
v2.4.0-test11/linux/arch/mips64/lib/memcpy.S
- Orig date:
Sat May 13 08:30:17 2000
diff -u --recursive --new-file v2.4.0-test11/linux/arch/mips64/lib/memcpy.S linux/arch/mips64/lib/memcpy.S
@@ -1,13 +1,12 @@
-/* $Id: memcpy.S,v 1.2 1999/10/19 20:51:51 ralf Exp $
- *
+/*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
* Unified implementation of memcpy, memmove and the __copy_user backend.
*
- * Copyright (C) 1998, 1999 Ralf Baechle
- * Copyright (C) 1999 Silicon Graphics, Inc.
+ * Copyright (C) 1998, 1999, 2000 Ralf Baechle
+ * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
*
* For __rmemcpy and memmove an exception is always a kernel bug, therefore
* they're not protected. In order to keep the exception fixup routine
@@ -28,12 +27,20 @@
#define uswU swr
#define ulwL lwl
#define ulwU lwr
+#define usdL sdl
+#define usdU sdr
+#define uldL ldl
+#define uldU ldr
#endif
#ifdef __MIPSEL__
#define uswL swr
#define uswU swl
#define ulwL lwr
#define ulwU lwl
+#define usdL sdr
+#define usdU sdl
+#define uldL ldr
+#define uldU ldl
#endif
#define EX(insn,reg,addr,handler) \
@@ -50,6 +57,33 @@
PTR 10b, handler; \
.previous
+#define UEXD(insn,reg,addr,handler) \
+9: insn ## L reg, addr; \
+10: insn ## U reg, 7 + addr; \
+ .section __ex_table,"a"; \
+ PTR 9b, handler; \
+ PTR 10b, handler; \
+ .previous
+
+/* ascending order, destination aligned */
+#define MOVE_BIGGERCHUNK(src, dst, offset, t0, t1, t2, t3) \
+ EX(ld, t0, (offset + 0x00)(src), l_fixup); \
+ EX(ld, t1, (offset + 0x08)(src), l_fixup); \
+ EX(ld, t2, (offset + 0x10)(src), l_fixup); \
+ EX(ld, t3, (offset + 0x18)(src), l_fixup); \
+ EX(sd, t0, (offset + 0x00)(dst), s_fixup); \
+ EX(sd, t1, (offset + 0x08)(dst), s_fixup); \
+ EX(sd, t2, (offset + 0x10)(dst), s_fixup); \
+ EX(sd, t3, (offset + 0x18)(dst), s_fixup); \
+ EX(ld, t0, (offset + 0x20)(src), l_fixup); \
+ EX(ld, t1, (offset + 0x28)(src), l_fixup); \
+ EX(ld, t2, (offset + 0x30)(src), l_fixup); \
+ EX(ld, t3, (offset + 0x38)(src), l_fixup); \
+ EX(sd, t0, (offset + 0x20)(dst), s_fixup); \
+ EX(sd, t1, (offset + 0x28)(dst), s_fixup); \
+ EX(sd, t2, (offset + 0x30)(dst), s_fixup); \
+ EX(sd, t3, (offset + 0x38)(dst), s_fixup)
+
/* ascending order, destination aligned */
#define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3) \
EX(lw, t0, (offset + 0x00)(src), l_fixup); \
@@ -70,6 +104,25 @@
EX(sw, t3, (offset + 0x1c)(dst), s_fixup)
/* ascending order, destination unaligned */
+#define UMOVE_BIGGERCHUNK(src, dst, offset, t0, t1, t2, t3) \
+ EX(ld, t0, (offset + 0x00)(src), l_fixup); \
+ EX(ld, t1, (offset + 0x08)(src), l_fixup); \
+ EX(ld, t2, (offset + 0x10)(src), l_fixup); \
+ EX(ld, t3, (offset + 0x18)(src), l_fixup); \
+ UEXD(usd, t0, (offset + 0x00)(dst), s_fixup); \
+ UEXD(usd, t1, (offset + 0x08)(dst), s_fixup); \
+ UEXD(usd, t2, (offset + 0x10)(dst), s_fixup); \
+ UEXD(usd, t3, (offset + 0x18)(dst), s_fixup); \
+ EX(ld, t0, (offset + 0x20)(src), l_fixup); \
+ EX(ld, t1, (offset + 0x28)(src), l_fixup); \
+ EX(ld, t2, (offset + 0x30)(src), l_fixup); \
+ EX(ld, t3, (offset + 0x38)(src), l_fixup); \
+ UEXD(usd, t0, (offset + 0x20)(dst), s_fixup); \
+ UEXD(usd, t1, (offset + 0x28)(dst), s_fixup); \
+ UEXD(usd, t2, (offset + 0x30)(dst), s_fixup); \
+ UEXD(usd, t3, (offset + 0x38)(dst), s_fixup)
+
+/* ascending order, destination unaligned */
#define UMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3) \
EX(lw, t0, (offset + 0x00)(src), l_fixup); \
EX(lw, t1, (offset + 0x04)(src), l_fixup); \
@@ -93,12 +146,12 @@
.set noat
.align 5
-LEAF(xxmemcpy) /* a0=dst a1=src a2=len */
+LEAF(memcpy) /* a0=dst a1=src a2=len */
move v0, a0 /* return value */
__memcpy:
FEXPORT(__copy_user)
xor ta0, a0, a1
- andi ta0, ta0, 0x3
+ andi ta0, ta0, 0x7
move t3, a0
beqz ta0, can_align
sltiu t8, a2, 0x8
@@ -127,7 +180,7 @@
word_align:
beqz t8, dword_align
sltiu t8, a2, 56
-
+
EX(lh, ta0, (a1), l_fixup)
dsubu a2, a2, 0x2
EX(sh, ta0, (a0), s_fixup)
@@ -185,10 +238,10 @@
andi ta2, a2, 0x40
move_128bytes:
- MOVE_BIGCHUNK(a1, a0, 0x00, ta0, ta1, ta3, t0)
- MOVE_BIGCHUNK(a1, a0, 0x20, ta0, ta1, ta3, t0)
- MOVE_BIGCHUNK(a1, a0, 0x40, ta0, ta1, ta3, t0)
- MOVE_BIGCHUNK(a1, a0, 0x60, ta0, ta1, ta3, t0)
+ pref 0, 2*128(a0)
+ pref 1, 2*128(a1)
+ MOVE_BIGGERCHUNK(a1, a0, 0x00, ta0, ta1, ta3, t0)
+ MOVE_BIGGERCHUNK(a1, a0, 0x40, ta0, ta1, ta3, t0)
dsubu t8, t8, 0x01
daddu a1, a1, 0x80
bnez t8, move_128bytes
@@ -199,8 +252,7 @@
andi ta2, a2, 0x20
move_64bytes:
- MOVE_BIGCHUNK(a1, a0, 0x00, ta0, ta1, ta3, t0)
- MOVE_BIGCHUNK(a1, a0, 0x20, ta0, ta1, ta3, t0)
+ MOVE_BIGGERCHUNK(a1, a0, 0x00, ta0, ta1, ta3, t0)
daddu a1, a1, 0x40
daddu a0, a0, 0x40
@@ -257,10 +309,8 @@
xori ta0, 7
dsubu ta0, a1
- UEX(ulw, ta1, 0(a1), l_fixup) # dword alignment
- UEX(ulw, ta2, 4(a1), l_fixup)
- UEX(usw, ta1, 0(a0), s_fixup)
- UEX(usw, ta2, 4(a0), s_fixup)
+ UEXD(uld, ta1, 0(a1), l_fixup) # dword alignment
+ UEXD(usd, ta1, 0(a0), s_fixup)
daddu a1, ta0 # src
daddu a0, ta0 # dst
@@ -276,11 +326,9 @@
beqz t8, u_oword_align
andi t8, a1, 0x10
- EX(lw, ta0, 0x00(a1), l_fixup)
- EX(lw, ta1, 0x04(a1), l_fixup)
+ EX(ld, ta0, 0x00(a1), l_fixup)
dsubu a2, a2, 0x8
- UEX(usw, ta0, 0x00(a0), s_fixup)
- UEX(usw, ta1, 0x04(a0), s_fixup)
+ UEXD(usd, ta0, 0x00(a0), s_fixup)
daddu a1, a1, 0x8
andi t8, a1, 0x10
daddu a0, a0, 0x8
@@ -307,10 +355,8 @@
andi ta2, a2, 0x40
u_move_128bytes:
- UMOVE_BIGCHUNK(a1, a0, 0x00, ta0, ta1, ta3, t0)
- UMOVE_BIGCHUNK(a1, a0, 0x20, ta0, ta1, ta3, t0)
- UMOVE_BIGCHUNK(a1, a0, 0x40, ta0, ta1, ta3, t0)
- UMOVE_BIGCHUNK(a1, a0, 0x60, ta0, ta1, ta3, t0)
+ UMOVE_BIGGERCHUNK(a1, a0, 0x00, ta0, ta1, ta3, t0)
+ UMOVE_BIGGERCHUNK(a1, a0, 0x40, ta0, ta1, ta3, t0)
dsubu t8, t8, 0x01
daddu a1, a1, 0x80
bnez t8, u_move_128bytes
@@ -321,8 +367,7 @@
andi ta2, a2, 0x20
u_move_64bytes:
- UMOVE_BIGCHUNK(a1, a0, 0x00, ta0, ta1, ta3, t0)
- UMOVE_BIGCHUNK(a1, a0, 0x20, ta0, ta1, ta3, t0)
+ UMOVE_BIGGERCHUNK(a1, a0, 0x00, ta0, ta1, ta3, t0)
daddu a1, a1, 0x40
daddu a0, a0, 0x40
@@ -365,7 +410,7 @@
jr ra
move a2, zero
- END(xxmemcpy)
+ END(memcpy)
/* descending order, destination aligned */
#define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3) \
@@ -406,14 +451,15 @@
usw t3, (offset + 0x0c)(dst)
.align 5
-LEAF(xxmemmove)
+LEAF(memmove)
sltu ta0, a0, a1 # dst < src -> memcpy
- bnez ta0, xxmemcpy
+ bnez ta0, memcpy
daddu v0, a0, a2
sltu ta0, v0, a1 # dst + len < src -> non-
bnez ta0, __memcpy # overlapping, can use memcpy
move v0, a0 /* return value */
- END(xxmemmove)
+ beqz a2, r_out
+ END(memmove)
LEAF(__rmemcpy) /* a0=dst a1=src a2=len */
daddu a0, a2 # dst = dst + len
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)