patch-2.4.0-test2 linux/arch/ppc/kernel/hashtable.S

Next file: linux/arch/ppc/kernel/head.S
Previous file: linux/arch/ppc/kernel/entry.S
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.4.0-test1/linux/arch/ppc/kernel/hashtable.S linux/arch/ppc/kernel/hashtable.S
@@ -52,6 +52,13 @@
 	
 	.globl	hash_page
 hash_page:
+#ifdef CONFIG_PPC64BRIDGE
+	mfmsr	r0
+	clrldi	r0,r0,1		/* make sure it's in 32-bit mode */
+	sync
+	MTMSRD(r0)
+	isync
+#endif
 #ifdef CONFIG_SMP
 	SAVE_2GPRS(7,r21)
 	eieio
@@ -120,28 +127,183 @@
 	ori	r4,r4,0xe04		/* clear out reserved bits */
 	andc	r6,r6,r4		/* PP=2 or 0, when _PAGE_HWWRITE */
 
+#ifdef CONFIG_POWER4
+	/*
+	 * XXX hack hack hack - translate 32-bit "physical" addresses
+	 * in the linux page tables to 42-bit real addresses in such
+	 * a fashion that we can get at the I/O we need to access.
+	 *	-- paulus
+	 */
+	cmpwi	0,r6,0
+	rlwinm	r4,r6,16,16,30
+	bge	57f
+	cmplwi	0,r4,0xfe00
+	li	r5,0x3fd
+	bne	56f
+	li	r5,0x3ff
+56:	sldi	r5,r5,32
+	or	r6,r6,r5
+57:
+#endif
+
+#ifdef CONFIG_PPC64BRIDGE
 	/* Construct the high word of the PPC-style PTE */
 	mfsrin	r5,r3			/* get segment reg for segment */
-#ifdef CONFIG_PPC64
+	rlwinm	r5,r5,0,5,31
 	sldi	r5,r5,12
-#else /* CONFIG_PPC64 */
-	rlwinm	r5,r5,7,1,24		/* put VSID in 0x7fffff80 bits */
-#endif /* CONFIG_PPC64 */
 	
 #ifndef CONFIG_SMP				/* do this later for SMP */
-#ifdef CONFIG_PPC64
 	ori	r5,r5,1			/* set V (valid) bit */
-#else /* CONFIG_PPC64 */
+#endif
+	
+	rlwimi	r5,r3,16,20,24		/* put in API (abbrev page index) */
+	/* Get the address of the primary PTE group in the hash table */
+	.globl	hash_page_patch_A
+hash_page_patch_A:
+	lis	r4,Hash_base@h		/* base address of hash table */
+	rlwimi	r4,r5,32-5,25-Hash_bits,24	/* (VSID & hash_mask) << 7 */
+	rlwinm	r0,r3,32-5,25-Hash_bits,24	/* (PI & hash_mask) << 7 */
+	xor	r4,r4,r0		/* make primary hash */
+
+	/* See whether it was a PTE not found exception or a
+	   protection violation. */
+	andis.	r0,r20,0x4000
+	li	r2,8			/* PTEs/group */
+	bne	10f			/* no PTE: go look for an empty slot */
+	tlbie	r3			/* invalidate TLB entry */
+
+	/* Search the primary PTEG for a PTE whose 1st dword matches r5 */
+	mtctr	r2
+	addi	r3,r4,-16
+1:	ldu	r0,16(r3)		/* get next PTE */
+	cmpd	0,r0,r5
+	bdnzf	2,1b			/* loop while ctr != 0 && !cr0.eq */
+	beq+	found_slot
+
+	/* Search the secondary PTEG for a matching PTE */
+	ori	r5,r5,0x2		/* set H (secondary hash) bit */
+	.globl	hash_page_patch_B
+hash_page_patch_B:
+	xoris	r3,r4,Hash_msk>>16	/* compute secondary hash */
+	xori	r3,r3,0xff80
+	addi	r3,r3,-16
+	mtctr	r2
+2:	ldu	r0,16(r3)
+	cmpd	0,r0,r5
+	bdnzf	2,2b
+	beq+	found_slot
+	xori	r5,r5,0x2		/* clear H bit again */
+
+	/* Search the primary PTEG for an empty slot */
+10:	mtctr	r2
+	addi	r3,r4,-16		/* search primary PTEG */
+1:	ldu	r0,16(r3)		/* get next PTE */
+	andi.	r0,r0,1
+	bdnzf	2,1b			/* loop while ctr != 0 && !cr0.eq */
+	beq+	found_empty
+
+	/* Search the secondary PTEG for an empty slot */
+	ori	r5,r5,0x2		/* set H (secondary hash) bit */
+	.globl	hash_page_patch_C
+hash_page_patch_C:
+	xoris	r3,r4,Hash_msk>>16	/* compute secondary hash */
+	xori	r3,r3,0xff80
+	addi	r3,r3,-16
+	mtctr	r2
+2:	ldu	r0,16(r3)
+	andi.	r0,r0,1
+	bdnzf	2,2b
+	beq+	found_empty
+
+	/*
+	 * Choose an arbitrary slot in the primary PTEG to overwrite.
+	 * Since both the primary and secondary PTEGs are full, and we
+	 * have no information that the PTEs in the primary PTEG are
+	 * more important or useful than those in the secondary PTEG,
+	 * and we know there is a definite (although small) speed
+	 * advantage to putting the PTE in the primary PTEG, we always
+	 * put the PTE in the primary PTEG.
+	 */
+	xori	r5,r5,0x2		/* clear H bit again */
+	lis	r3,next_slot@ha
+	tophys(r3,r3)
+	lwz	r2,next_slot@l(r3)
+	addi	r2,r2,16
+	andi.	r2,r2,0x70
+#ifdef CONFIG_POWER4
+	/*
+	 * Since we don't have BATs on POWER4, we rely on always having
+	 * PTEs in the hash table to map the hash table and the code
+	 * that manipulates it in virtual mode, namely flush_hash_page and
+	 * flush_hash_segments.  Otherwise we can get a DSI inside those
+	 * routines which leads to a deadlock on the hash_table_lock on
+	 * SMP machines.  We avoid this by never overwriting the first
+	 * PTE of each PTEG if it is already valid.
+	 *	-- paulus.
+	 */
+	bne	102f
+	li	r2,0x10
+102:
+#endif /* CONFIG_POWER4 */
+	stw	r2,next_slot@l(r3)
+	add	r3,r4,r2
+11:
+	/* update counter of evicted pages */
+	lis	r2,htab_evicts@ha
+	tophys(r2,r2)
+	lwz	r4,htab_evicts@l(r2)
+	addi	r4,r4,1
+	stw	r4,htab_evicts@l(r2)
+
+#ifndef CONFIG_SMP
+	/* Store PTE in PTEG */
+found_empty:
+	std	r5,0(r3)
+found_slot:
+	std	r6,8(r3)
+	sync
+
+#else /* CONFIG_SMP */
+/*
+ * Between the tlbie above and updating the hash table entry below,
+ * another CPU could read the hash table entry and put it in its TLB.
+ * There are 3 cases:
+ * 1. using an empty slot
+ * 2. updating an earlier entry to change permissions (i.e. enable write)
+ * 3. taking over the PTE for an unrelated address
+ *
+ * In each case it doesn't really matter if the other CPUs have the old
+ * PTE in their TLB.  So we don't need to bother with another tlbie here,
+ * which is convenient as we've overwritten the register that had the
+ * address. :-)  The tlbie above is mainly to make sure that this CPU comes
+ * and gets the new PTE from the hash table.
+ *
+ * We do however have to make sure that the PTE is never in an invalid
+ * state with the V bit set.
+ */
+found_empty:
+found_slot:
+	std	r5,0(r3)	/* clear V (valid) bit in PTE */
+	sync
+	tlbsync
+	sync
+	std	r6,8(r3)	/* put in correct RPN, WIMG, PP bits */
+	sync
+	ori	r5,r5,1
+	std	r5,0(r3)	/* finally set V bit in PTE */
+#endif /* CONFIG_SMP */
+
+#else /* CONFIG_PPC64BRIDGE */
+
+	/* Construct the high word of the PPC-style PTE */
+	mfsrin	r5,r3			/* get segment reg for segment */
+	rlwinm	r5,r5,7,1,24		/* put VSID in 0x7fffff80 bits */
+	
+#ifndef CONFIG_SMP				/* do this later for SMP */
 	oris	r5,r5,0x8000		/* set V (valid) bit */
-#endif /* CONFIG_PPC64 */
 #endif
 	
-#ifdef CONFIG_PPC64
-/* XXX:	 does this insert the api correctly? -- Cort */
-	rlwimi	r5,r3,17,21,25		/* put in API (abbrev page index) */
-#else /* CONFIG_PPC64 */
 	rlwimi	r5,r3,10,26,31		/* put in API (abbrev page index) */
-#endif /* CONFIG_PPC64 */
 	/* Get the address of the primary PTE group in the hash table */
 	.globl	hash_page_patch_A
 hash_page_patch_A:
@@ -160,89 +322,44 @@
 	/* Search the primary PTEG for a PTE whose 1st word matches r5 */
 	mtctr	r2
 	addi	r3,r4,-8
-1:		
-#ifdef CONFIG_PPC64	
-	lwzu	r0,16(r3)		/* get next PTE */
-#else	
-	lwzu	r0,8(r3)		/* get next PTE */
-#endif	
+1:	lwzu	r0,8(r3)		/* get next PTE */
 	cmp	0,r0,r5
 	bdnzf	2,1b			/* loop while ctr != 0 && !cr0.eq */
 	beq+	found_slot
 
 	/* Search the secondary PTEG for a matching PTE */
-#ifdef CONFIG_PPC64
-	ori	r5,r5,0x2		/* set H (secondary hash) bit */
-#else	
 	ori	r5,r5,0x40		/* set H (secondary hash) bit */
-#endif	
 	.globl	hash_page_patch_B
 hash_page_patch_B:
 	xoris	r3,r4,Hash_msk>>16	/* compute secondary hash */
 	xori	r3,r3,0xffc0
-#ifdef CONFIG_PPC64
-	addi	r3,r3,-16
-#else	
 	addi	r3,r3,-8
-#endif	
 	mtctr	r2
-2:
-#ifdef CONFIG_PPC64
-	lwzu	r0,16(r3)
-#else	
-	lwzu	r0,8(r3)
-#endif	
+2:	lwzu	r0,8(r3)
 	cmp	0,r0,r5
 	bdnzf	2,2b
 	beq+	found_slot
-#ifdef CONFIG_PPC64
-	xori	r5,r5,0x2		/* clear H bit again */
-#else	
 	xori	r5,r5,0x40		/* clear H bit again */
-#endif	
 
 	/* Search the primary PTEG for an empty slot */
 10:	mtctr	r2
-#ifdef CONFIG_PPC64
-	addi	r3,r4,-16		/* search primary PTEG */
-#else	
 	addi	r3,r4,-8		/* search primary PTEG */
-#endif	
-1:
-#ifdef CONFIG_PPC64
-	lwzu	r0,16(r3)		/* get next PTE */
-	andi.	r0,r0,1
-#else	
-	lwzu	r0,8(r3)		/* get next PTE */
+1:	lwzu	r0,8(r3)		/* get next PTE */
 	rlwinm.	r0,r0,0,0,0		/* only want to check valid bit */
-#endif	
 	bdnzf	2,1b			/* loop while ctr != 0 && !cr0.eq */
 	beq+	found_empty
 
 	/* Search the secondary PTEG for an empty slot */
-#ifdef CONFIG_PPC64
-	ori	r5,r5,0x2		/* set H (secondary hash) bit */
-#else	
 	ori	r5,r5,0x40		/* set H (secondary hash) bit */
-#endif	
 	.globl	hash_page_patch_C
 hash_page_patch_C:
 	xoris	r3,r4,Hash_msk>>16	/* compute secondary hash */
 	xori	r3,r3,0xffc0
-#ifdef CONFIG_PPC64
-	addi	r3,r3,-16
-#else	
 	addi	r3,r3,-8
-#endif	
 	mtctr	r2
 2:
-#ifdef CONFIG_PPC64
-	lwzu	r0,16(r3)
-	andi.	r0,r0,1
-#else	
 	lwzu	r0,8(r3)
 	rlwinm.	r0,r0,0,0,0		/* only want to check valid bit */
-#endif	
 	bdnzf	2,2b
 	beq+	found_empty
 
@@ -255,21 +372,12 @@
 	 * advantage to putting the PTE in the primary PTEG, we always
 	 * put the PTE in the primary PTEG.
 	 */
-#ifdef CONFIG_PPC64
-	xori	r5,r5,0x2		/* clear H bit again */
-#else
 	xori	r5,r5,0x40		/* clear H bit again */
-#endif		
 	lis	r3,next_slot@ha
 	tophys(r3,r3)
 	lwz	r2,next_slot@l(r3)
-#ifdef CONFIG_PPC64	
-	addi	r2,r2,16
-	andi.	r2,r2,0x78
-#else
 	addi	r2,r2,8
 	andi.	r2,r2,0x38
-#endif	
 	stw	r2,next_slot@l(r3)
 	add	r3,r4,r2
 11:		
@@ -283,17 +391,9 @@
 #ifndef CONFIG_SMP
 	/* Store PTE in PTEG */
 found_empty:
-#ifdef CONFIG_PPC64
-	std	r5,0(r3)
-#else		
 	stw	r5,0(r3)
-#endif	
 found_slot:
-#ifdef CONFIG_PPC64
-	std	r6,8(r3)
-#else
 	stw	r6,4(r3)
-#endif	
 	sync
 
 #else /* CONFIG_SMP */
@@ -325,6 +425,7 @@
 	oris	r5,r5,0x8000
 	stw	r5,0(r3)	/* finally set V bit in PTE */
 #endif /* CONFIG_SMP */
+#endif /* CONFIG_PPC64BRIDGE */
 
 /*
  * Update the hash table miss count.  We only want misses here
@@ -371,7 +472,7 @@
 	lwz	r22,GPR22(r21)
 	lwz	r23,GPR23(r21)
 	lwz	r21,GPR21(r21)
-	rfi
+	RFI
 	
 #ifdef CONFIG_SMP
 hash_page_out:
@@ -410,7 +511,7 @@
 #endif
 	blr
 99:
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) || defined(CONFIG_PPC64BRIDGE)
 	/* Note - we had better not do anything which could generate
 	   a hash table miss while we have the hash table locked,
 	   or we'll get a deadlock.  -paulus */
@@ -419,6 +520,8 @@
 	rlwinm	r0,r10,0,17,15	/* clear bit 16 (MSR_EE) */
 	mtmsr	r0
 	SYNC
+#endif
+#ifdef CONFIG_SMP
 	lis	r9,hash_table_lock@h
 	ori	r9,r9,hash_table_lock@l
 	lwz	r8,PROCESSOR(r2)
@@ -430,6 +533,7 @@
 	bne-	10b
 	eieio
 #endif
+#ifndef CONFIG_PPC64BRIDGE
 	rlwinm	r3,r3,7,1,24		/* put VSID lower limit in position */
 	oris	r3,r3,0x8000		/* set V bit */
 	rlwinm	r4,r4,7,1,24		/* put VSID upper limit in position */
@@ -448,6 +552,26 @@
 	blt	2f			/* branch if out of range */
 	stw	r0,0(r5)		/* invalidate entry */
 2:	bdnz	1b			/* continue with loop */
+#else /* CONFIG_PPC64BRIDGE */
+	rldic	r3,r3,12,20		/* put VSID lower limit in position */
+	ori	r3,r3,1			/* set V bit */
+	rldic	r4,r4,12,20		/* put VSID upper limit in position */
+	ori	r4,r4,0xfff		/* set V bit, API etc. */
+	lis	r6,Hash_size@ha
+	lwz	r6,Hash_size@l(r6)	/* size in bytes */
+	srwi	r6,r6,4			/* # PTEs */
+	mtctr	r6
+	addi	r5,r5,-16
+	li	r0,0
+1:	ldu	r6,16(r5)		/* get next tag word */
+	cmpld	0,r6,r3
+	cmpld	1,r6,r4
+	cror	0,0,5			/* set cr0.lt if out of range */
+	blt	2f			/* branch if out of range */
+	std	r0,0(r5)		/* invalidate entry */
+2:	bdnz	1b			/* continue with loop */
+#endif /* CONFIG_PPC64BRIDGE */
+
 	sync
 	tlbia
 	sync
@@ -456,6 +580,8 @@
 	sync
 	lis	r3,hash_table_lock@ha
 	stw	r0,hash_table_lock@l(r3)
+#endif
+#if defined(CONFIG_SMP) || defined(CONFIG_PPC64BRIDGE)
 	mtmsr	r10
 	SYNC
 #endif
@@ -479,7 +605,7 @@
 #endif
 	blr
 99:
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) || defined(CONFIG_PPC64BRIDGE)
 	/* Note - we had better not do anything which could generate
 	   a hash table miss while we have the hash table locked,
 	   or we'll get a deadlock.  -paulus */
@@ -488,6 +614,8 @@
 	rlwinm	r0,r10,0,17,15		/* clear bit 16 (MSR_EE) */
 	mtmsr	r0
 	SYNC
+#endif
+#ifdef CONFIG_SMP
 	lis	r9,hash_table_lock@h
 	ori	r9,r9,hash_table_lock@l
 	lwz	r8,PROCESSOR(r2)
@@ -499,6 +627,7 @@
 	bne-	10b
 	eieio
 #endif
+#ifndef CONFIG_PPC64BRIDGE
 	rlwinm	r3,r3,11,1,20		/* put context into vsid */
 	rlwimi	r3,r4,11,21,24		/* put top 4 bits of va into vsid */
 	oris	r3,r3,0x8000		/* set V (valid) bit */
@@ -528,6 +657,37 @@
 	bne	4f			/* if we didn't find it */
 3:	li	r0,0
 	stw	r0,0(r7)		/* invalidate entry */
+#else /* CONFIG_PPC64BRIDGE */
+	rldic	r3,r3,16,16		/* put context into vsid (<< 12) */
+	rlwimi	r3,r4,16,16,24		/* top 4 bits of va and API */
+	ori	r3,r3,1			/* set V (valid) bit */
+	rlwinm	r7,r4,32-5,9,24		/* get page index << 7 */
+	srdi	r5,r3,5			/* vsid << 7 */
+	rlwinm	r5,r5,0,1,24		/* vsid << 7 (limited to 24 bits) */
+	xor	r7,r7,r5		/* primary hash << 7 */
+	lis	r5,Hash_mask@ha
+	lwz	r5,Hash_mask@l(r5)	/* hash mask */
+	slwi	r5,r5,7			/*  << 7 */
+	and	r7,r7,r5
+	add	r6,r6,r7		/* address of primary PTEG */
+	li	r8,8
+	mtctr	r8
+	addi	r7,r6,-16
+1:	ldu	r0,16(r7)		/* get next PTE */
+	cmpd	0,r0,r3			/* see if tag matches */
+	bdnzf	2,1b			/* while --ctr != 0 && !cr0.eq */
+	beq	3f			/* if we found it */
+	ori	r3,r3,2			/* set H (alt. hash) bit */
+	xor	r6,r6,r5		/* address of secondary PTEG */
+	mtctr	r8
+	addi	r7,r6,-16
+2:	ldu	r0,16(r7)		/* get next PTE */
+	cmpd	0,r0,r3			/* see if tag matches */
+	bdnzf	2,2b			/* while --ctr != 0 && !cr0.eq */
+	bne	4f			/* if we didn't find it */
+3:	li	r0,0
+	std	r0,0(r7)		/* invalidate entry */
+#endif /* CONFIG_PPC64BRIDGE */
 4:	sync
 	tlbie	r4			/* in hw tlb too */
 	sync
@@ -536,6 +696,8 @@
 	sync
 	li	r0,0
 	stw	r0,0(r9)		/* clear hash_table_lock */
+#endif
+#if defined(CONFIG_SMP) || defined(CONFIG_PPC64BRIDGE)
 	mtmsr	r10
 	SYNC
 #endif

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)