patch-2.4.21 linux-2.4.21/arch/cris/lib/usercopy.c

Next file: linux-2.4.21/arch/cris/mm/ioremap.c
Previous file: linux-2.4.21/arch/cris/lib/hw_settings.S
Back to the patch index
Back to the overall index

diff -urN linux-2.4.20/arch/cris/lib/usercopy.c linux-2.4.21/arch/cris/lib/usercopy.c
@@ -88,11 +88,11 @@
        If you want to check that the allocation was right; then
        check the equalities in the first comment.  It should say
        "r13=r13, r11=r11, r12=r12".  */
-    __asm__ volatile ("
-	;; Check that the following is true (same register names on
-	;; both sides of equal sign, as in r8=r8):
-	;; %0=r13, %1=r11, %2=r12 %3=r10
-	;;
+    __asm__ volatile ("\
+	.ifnc %0%1%2%3,$r13$r11$r12$r10					\n\
+	.err								\n\
+	.endif								\n\
+
 	;; Save the registers we'll use in the movem process
 	;; on the stack.
 	subq	11*4,$sp
@@ -189,7 +189,8 @@
 }
 
 /* Copy from user to kernel, zeroing the bytes that were inaccessible in
-   userland.  */
+   userland.  The return-value is the number of bytes that were
+   inaccessible.  */
 
 unsigned long
 __copy_user_zeroing (void *pdst, const void *psrc, unsigned long pn)
@@ -207,30 +208,34 @@
   register int n __asm__ ("r12") = pn;
   register int retn __asm__ ("r10") = 0;
 
-  /* When src is aligned but not dst, this makes a few extra needless
-     cycles.  I believe it would take as many to check that the
-     re-alignment was unnecessary.  */
-  if (((unsigned long) dst & 3) != 0
-      /* Don't align if we wouldn't copy more than a few bytes; so we
-	 don't have to check further for overflows.  */
-      && n >= 3)
+  /* The best reason to align src is that we then know that a read-fault
+     was for aligned bytes; there's no 1..3 remaining good bytes to
+     pickle.  */
+  if (((unsigned long) src & 3) != 0)
   {
-    if ((unsigned long) dst & 1)
+    if (((unsigned long) src & 1) && n != 0)
     {
       __asm_copy_from_user_1 (dst, src, retn);
       n--;
     }
 
-    if ((unsigned long) dst & 2)
+    if (((unsigned long) src & 2) && n >= 2)
     {
       __asm_copy_from_user_2 (dst, src, retn);
       n -= 2;
     }
+
+    /* We only need one check after the unalignment-adjustments, because
+       if both adjustments were done, either both or neither reference
+       had an exception.  */
+    if (retn != 0)
+      goto copy_exception_bytes;
   }
 
   /* Decide which copying method to use. */
   if (n >= 44*2)		/* Break even between movem and
-				   move16 is at 38.7*2, but modulo 44. */
+				   move16 is at 38.7*2, but modulo 44.
+				   FIXME: We use move4 now.  */
   {
     /* For large copies we use 'movem' */
 
@@ -249,10 +254,10 @@
        check the equalities in the first comment.  It should say
        "r13=r13, r11=r11, r12=r12" */
     __asm__ volatile ("
-	;; Check that the following is true (same register names on
-	;; both sides of equal sign, as in r8=r8):
-	;; %0=r13, %1=r11, %2=r12 %3=r10
-	;;
+	.ifnc %0%1%2%3,$r13$r11$r12$r10					\n\
+	.err								\n\
+	.endif								\n\
+
 	;; Save the registers we'll use in the movem process
 	;; on the stack.
 	subq	11*4,$sp
@@ -273,73 +278,30 @@
 	movem	$r10,[$r13+]
 
 	addq   44,$r12  ;; compensate for last loop underflowing n
-8:
+
 	;; Restore registers from stack
 	movem [$sp+],$r10
-
+4:
 	.section .fixup,\"ax\"
 
 ;; Do not jump back into the loop if we fail.  For some uses, we get a
-;; page fault but for performance reasons we care to not get further
-;; faults.  For example, fs/super.c at one time did
+;; page fault somewhere on the line.  Without checking for page limits,
+;; we don't know where, but we need to copy accurately and keep an
+;; accurate count; not just clear the whole line.  To do that, we fall
+;; down in the code below, proceeding with smaller amounts.  It should
+;; be kept in mind that we have to cater to code like what at one time
+;; was in fs/super.c:
 ;;  i = size - copy_from_user((void *)page, data, size);
 ;; which would cause repeated faults while clearing the remainder of
 ;; the SIZE bytes at PAGE after the first fault.
+;; A caveat here is that we must not fall through from a failing page
+;; to a valid page.
 
 3:
-	move.d [$sp],$r10
-
-;; Number of remaining bytes, cleared but not copied, is r12 + 44.
-
-	add.d $r12,$r10
-	addq 44,$r10
-
-	move.d $r10,[$sp]
-	clear.d $r0
-	clear.d $r1
-	clear.d $r2
-	clear.d $r3
-	clear.d $r4
-	clear.d $r5
-	clear.d $r6
-	clear.d $r7
-	clear.d $r8
-	clear.d $r9
-	clear.d $r10
-
-;; Perform clear similar to the copy-loop.
-
-4:
-	subq 44,$r12
-	bge 4b
-	movem $r10,[$r13+]
-
-;; Clear by four for the remaining multiples.
-
-	addq 40,$r12
-	bmi 6f
-	nop
-5:
-	subq 4,$r12
-	bpl 5b
-	clear.d [$r13+]
-6:
-	addq 4,$r12
-	beq 7f
-	nop
-
-	subq 1,$r12
-	beq 7f
-	clear.b [$r13+]
-
-	subq 1,$r12
-	beq 7f
-	clear.b [$r13+]
-
-	clear.d $r12
-	clear.b [$r13+]
-7:
-	jump 8b
+	movem  [$sp+],$r10
+	addq	44,$r12 ;; Get back count before faulting point.
+	subq	44,$r11 ;; Get back pointer to faulting movem-line.
+	jump	4b	;; Fall through, pretending the fault didn't happen.
 
 	.previous
 	.section __ex_table,\"a\"
@@ -354,25 +316,30 @@
   /* Either we directly start copying here, using dword copying in a loop,
      or we copy as much as possible with 'movem' and then the last block
      (<44 bytes) is copied here.  This will work since 'movem' will have
-     updated src, dst and n. */
+     updated src, dst and n.  (Except with failing src.)
 
-  while (n >= 16)
-  {
-    __asm_copy_from_user_16 (dst, src, retn);
-    n -= 16;
-  }
+     Since we want to keep src accurate, we can't use
+     __asm_copy_from_user_N with N != (1, 2, 4); it updates dst and
+     retn, but not src (by design; it's value is ignored elsewhere).  */
 
-  /* Having a separate by-four loops cuts down on cache footprint.
-     FIXME:  Test with and without; increasing switch to be 0..15.  */
   while (n >= 4)
   {
     __asm_copy_from_user_4 (dst, src, retn);
     n -= 4;
+
+    if (retn)
+      goto copy_exception_bytes;
   }
 
+  /* If we get here, there were no memory read faults.  */
   switch (n)
   {
+    /* These copies are at least "naturally aligned" (so we don't have
+       to check each byte), due to the src alignment code before the
+       movem loop.  The *_3 case *will* get the correct count for retn.  */
     case 0:
+      /* This case deliberately left in (if you have doubts check the
+	 generated assembly code).  */
       break;
     case 1:
       __asm_copy_from_user_1 (dst, src, retn);
@@ -385,7 +352,22 @@
       break;
   }
 
+  /* If we get here, retn correctly reflects the number of failing
+     bytes.  */
   return retn;
+
+copy_exception_bytes:
+  /* We already have "retn" bytes cleared, and need to clear the
+     remaining "n" bytes.  A non-optimized simple byte-for-byte in-line
+     memset is preferred here, since this isn't speed-critical code and
+     we'd rather have this a leaf-function than calling memset.  */
+  {
+    char *endp;
+    for (endp = dst + n; dst < endp; dst++)
+      *dst = 0;
+  }
+
+  return retn + n;
 }
 
 /* Zero userspace.  */
@@ -444,10 +426,10 @@
       check the equalities in the first comment.  It should say
       something like "r13=r13, r11=r11, r12=r12". */
     __asm__ volatile ("
-	;; Check that the following is true (same register names on
-	;; both sides of equal sign, as in r8=r8):
-	;; %0=r13, %1=r12 %2=r10
-	;;
+	.ifnc %0%1%2,$r13$r12$r10					\n\
+	.err								\n\
+	.endif								\n\
+
 	;; Save the registers we'll clobber in the movem process
 	;; on the stack.  Don't mention them to gcc, it will only be
 	;; upset.

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)