patch-2.4.0-test10 linux/mm/page_alloc.c

Next file: linux/mm/swap.c
Previous file: linux/mm/oom_kill.c
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.4.0-test9/linux/mm/page_alloc.c linux/mm/page_alloc.c
@@ -17,13 +17,6 @@
 #include <linux/pagemap.h>
 #include <linux/bootmem.h>
 
-/* Use NUMNODES instead of numnodes for better code inside kernel APIs */
-#ifndef CONFIG_DISCONTIGMEM
-#define NUMNODES 1
-#else
-#define NUMNODES numnodes
-#endif
-
 int nr_swap_pages;
 int nr_active_pages;
 int nr_inactive_dirty_pages;
@@ -294,7 +287,7 @@
 	zone_t **zone;
 	int direct_reclaim = 0;
 	unsigned int gfp_mask = zonelist->gfp_mask;
-	struct page * page = NULL;
+	struct page * page;
 
 	/*
 	 * Allocations put pressure on the VM subsystem.
@@ -329,7 +322,7 @@
 	 * wake up bdflush.
 	 */
 	else if (free_shortage() && nr_inactive_dirty_pages > free_shortage()
-			&& nr_inactive_dirty_pages > freepages.high)
+			&& nr_inactive_dirty_pages >= freepages.high)
 		wakeup_bdflush(0);
 
 try_again:
@@ -347,7 +340,7 @@
 		if (!z->size)
 			BUG();
 
-		if (z->free_pages > z->pages_low) {
+		if (z->free_pages >= z->pages_low) {
 			page = rmqueue(z, order);
 			if (page)
 				return page;
@@ -517,17 +510,17 @@
 		 * happen when the OOM killer selects this task for
 		 * instant execution...
 		 */
-		if (direct_reclaim)
+		if (direct_reclaim) {
 			page = reclaim_page(z);
-		if (page)
-			return page;
+			if (page)
+				return page;
+		}
 
 		/* XXX: is pages_min/4 a good amount to reserve for this? */
 		if (z->free_pages < z->pages_min / 4 &&
 				!(current->flags & PF_MEMALLOC))
 			continue;
-		if (!page)
-			page = rmqueue(z, order);
+		page = rmqueue(z, order);
 		if (page)
 			return page;
 	}
@@ -588,12 +581,14 @@
 {
 	unsigned int sum;
 	zone_t *zone;
-	int i;
+	pg_data_t *pgdat = pgdat_list;
 
 	sum = 0;
-	for (i = 0; i < NUMNODES; i++)
-		for (zone = NODE_DATA(i)->node_zones; zone < NODE_DATA(i)->node_zones + MAX_NR_ZONES; zone++)
+	while (pgdat) {
+		for (zone = pgdat->node_zones; zone < pgdat->node_zones + MAX_NR_ZONES; zone++)
 			sum += zone->free_pages;
+		pgdat = pgdat->node_next;
+	}
 	return sum;
 }
 
@@ -604,12 +599,14 @@
 {
 	unsigned int sum;
 	zone_t *zone;
-	int i;
+	pg_data_t *pgdat = pgdat_list;
 
 	sum = 0;
-	for (i = 0; i < NUMNODES; i++)
-		for (zone = NODE_DATA(i)->node_zones; zone < NODE_DATA(i)->node_zones + MAX_NR_ZONES; zone++)
+	while (pgdat) {
+		for (zone = pgdat->node_zones; zone < pgdat->node_zones + MAX_NR_ZONES; zone++)
 			sum += zone->inactive_clean_pages;
+		pgdat = pgdat->node_next;
+	}
 	return sum;
 }
 
@@ -644,11 +641,13 @@
 #if CONFIG_HIGHMEM
 unsigned int nr_free_highpages (void)
 {
-	int i;
+	pg_data_t *pgdat = pgdat_list;
 	unsigned int pages = 0;
 
-	for (i = 0; i < NUMNODES; i++)
-		pages += NODE_DATA(i)->node_zones[ZONE_HIGHMEM].free_pages;
+	while (pgdat) {
+		pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages;
+		pgdat = pgdat->node_next;
+	}
 	return pages;
 }
 #endif
@@ -658,7 +657,7 @@
  * We also calculate the percentage fragmentation. We do this by counting the
  * memory on each free list with the exception of the first item on the list.
  */
-void show_free_areas_core(int nid)
+void show_free_areas_core(pg_data_t *pgdat)
 {
  	unsigned long order;
 	unsigned type;
@@ -678,7 +677,7 @@
 
 	for (type = 0; type < MAX_NR_ZONES; type++) {
 		struct list_head *head, *curr;
-		zone_t *zone = NODE_DATA(nid)->node_zones + type;
+		zone_t *zone = pgdat->node_zones + type;
  		unsigned long nr, total, flags;
 
 		total = 0;
@@ -710,7 +709,7 @@
 
 void show_free_areas(void)
 {
-	show_free_areas_core(0);
+	show_free_areas_core(pgdat_list);
 }
 
 /*
@@ -780,9 +779,6 @@
 	unsigned long totalpages, offset, realtotalpages;
 	unsigned int cumulative = 0;
 
-	pgdat->node_next = pgdat_list;
-	pgdat_list = pgdat;
-
 	totalpages = 0;
 	for (i = 0; i < MAX_NR_ZONES; i++) {
 		unsigned long size = zones_size[i];
@@ -795,21 +791,6 @@
 			
 	printk("On node %d totalpages: %lu\n", nid, realtotalpages);
 
-	/*
-	 * Select nr of pages we try to keep free for important stuff
-	 * with a minimum of 10 pages and a maximum of 256 pages, so
-	 * that we don't waste too much memory on large systems.
-	 * This is fairly arbitrary, but based on some behaviour
-	 * analysis.
-	 */
-	i = realtotalpages >> 7;
-	if (i < 10)
-		i = 10;
-	if (i > 256)
-		i = 256;
-	freepages.min += i;
-	freepages.low += i * 2;
-	freepages.high += i * 3;
 	memlist_init(&active_list);
 	memlist_init(&inactive_dirty_list);
 
@@ -822,7 +803,7 @@
 	 */
 	map_size = (totalpages + 1)*sizeof(struct page);
 	if (lmem_map == (struct page *)0) {
-		lmem_map = (struct page *) alloc_bootmem_node(nid, map_size);
+		lmem_map = (struct page *) alloc_bootmem_node(pgdat, map_size);
 		lmem_map = (struct page *)(PAGE_OFFSET + 
 			MAP_ALIGN((unsigned long)lmem_map - PAGE_OFFSET));
 	}
@@ -875,6 +856,20 @@
 		zone->pages_min = mask;
 		zone->pages_low = mask*2;
 		zone->pages_high = mask*3;
+		/*
+		 * Add these free targets to the global free target;
+		 * we have to be SURE that freepages.high is higher
+		 * than SUM [zone->pages_min] for all zones, otherwise
+		 * we may have bad bad problems.
+		 *
+		 * This means we cannot make the freepages array writable
+		 * in /proc, but have to add a separate extra_free_target
+		 * for people who require it to catch load spikes in eg.
+		 * gigabit ethernet routing...
+		 */
+		freepages.min += mask;
+		freepages.low += mask*2;
+		freepages.high += mask*3;
 		zone->zone_mem_map = mem_map + offset;
 		zone->zone_start_mapnr = offset;
 		zone->zone_start_paddr = zone_start_paddr;
@@ -900,7 +895,7 @@
 			bitmap_size = (bitmap_size + 7) >> 3;
 			bitmap_size = LONG_ALIGN(bitmap_size);
 			zone->free_area[i].map = 
-			  (unsigned int *) alloc_bootmem_node(nid, bitmap_size);
+			  (unsigned int *) alloc_bootmem_node(pgdat, bitmap_size);
 		}
 	}
 	build_zonelists(pgdat);
@@ -908,7 +903,7 @@
 
 void __init free_area_init(unsigned long *zones_size)
 {
-	free_area_init_core(0, NODE_DATA(0), &mem_map, zones_size, 0, 0, 0);
+	free_area_init_core(0, &contig_page_data, &mem_map, zones_size, 0, 0, 0);
 }
 
 static int __init setup_mem_frac(char *str)

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)