patch-2.4.0-test5 linux/drivers/block/md.c

Next file: linux/drivers/block/ps2esdi.c
Previous file: linux/drivers/block/ll_rw_blk.c
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.4.0-test4/linux/drivers/block/md.c linux/drivers/block/md.c
@@ -28,6 +28,7 @@
    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */
 
+#include <linux/module.h>
 #include <linux/config.h>
 #include <linux/raid/md.h>
 #include <linux/raid/xor.h>
@@ -100,6 +101,7 @@
  */
 struct hd_struct md_hd_struct[MAX_MD_DEVS];
 static int md_blocksizes[MAX_MD_DEVS];
+static int md_hardsect_sizes[MAX_MD_DEVS];
 static int md_maxreadahead[MAX_MD_DEVS];
 static mdk_thread_t *md_recovery_thread = NULL;
 
@@ -122,37 +124,6 @@
 	fops: &md_fops,
 };
 
-void md_plug_device (request_queue_t *mdqueue, kdev_t dev)
-{
-	mdk_rdev_t * rdev;
-	struct md_list_head *tmp;
-	request_queue_t *q;
-	mddev_t *mddev;
-
-	if (!md_test_and_set_bit(0, (atomic_t *)&mdqueue->plugged)) {
- 		mddev = kdev_to_mddev(dev);
-		ITERATE_RDEV(mddev,rdev,tmp) {
-			q = blk_get_queue(rdev->dev);
-			generic_unplug_device(q);
-		}
-		queue_task(&mdqueue->plug_tq, &tq_disk);
-	}
-}
-
-static void md_unplug_device (void * data)
-{
-	mdk_rdev_t * rdev;
-	struct md_list_head *tmp;
-	mddev_t *mddev = (mddev_t *)data;
-	request_queue_t *mdqueue = &mddev->queue, *q;
-
-	clear_bit(0, (atomic_t *)&mdqueue->plugged);
-	ITERATE_RDEV(mddev,rdev,tmp) {
-		q = blk_get_queue(rdev->dev);
-		generic_unplug_device(q);
-	}
-}
-
 /*
  * Enables to iterate over all existing md arrays
  */
@@ -197,28 +168,12 @@
 	mddev_map[minor].data = NULL;
 }
 
-static request_queue_t *md_get_queue (kdev_t dev)
-{
-	mddev_t *mddev = kdev_to_mddev(dev);
-
-	if (!mddev)
-		return NULL;
-	return &mddev->queue;
-}
-
-static void do_md_request (request_queue_t * q)
-{
-	printk(KERN_ALERT "Got md request, not good...");
-	BUG();
-	return;
-}
-
 static int md_make_request (request_queue_t *q, int rw, struct buffer_head * bh)
 {
 	mddev_t *mddev = kdev_to_mddev(bh->b_rdev);
 
 	if (mddev && mddev->pers)
-		return mddev->pers->make_request(q, mddev, rw, bh);
+		return mddev->pers->make_request(mddev, rw, bh);
 	else {
 		buffer_IO_error(bh);
 		return -1;
@@ -227,7 +182,6 @@
 
 static mddev_t * alloc_mddev (kdev_t dev)
 {
-	request_queue_t *q;
 	mddev_t *mddev;
 
 	if (MAJOR(dev) != MD_MAJOR) {
@@ -247,15 +201,6 @@
 	MD_INIT_LIST_HEAD(&mddev->disks);
 	MD_INIT_LIST_HEAD(&mddev->all_mddevs);
 
-	q = &mddev->queue;
-	blk_init_queue(q, DEVICE_REQUEST);
-	blk_queue_pluggable(q, md_plug_device);
-	blk_queue_make_request(q, md_make_request);
-	
-	q->plug_tq.sync = 0;
-	q->plug_tq.routine = &md_unplug_device;
-	q->plug_tq.data = mddev;
-
 	/*
 	 * The 'base' mddev is the one with data NULL.
 	 * personalities can create additional mddevs
@@ -264,6 +209,8 @@
 	add_mddev_mapping(mddev, dev, 0);
 	md_list_add(&mddev->all_mddevs, &all_mddevs);
 
+	MOD_INC_USE_COUNT;
+
 	return mddev;
 }
 
@@ -569,7 +516,7 @@
 		printk (NO_SB,partition_name(rdev->dev));
 		goto abort;
 	}
-	printk(" [events: %08lx]\n", (unsigned long)get_unaligned(&rdev->sb->events));
+	printk(" [events: %08lx]\n", (unsigned long)rdev->sb->events_lo);
 	ret = 0;
 abort:
 	if (bh)
@@ -804,8 +751,8 @@
 	del_mddev_mapping(mddev, MKDEV(MD_MAJOR, mdidx(mddev)));
 	md_list_del(&mddev->all_mddevs);
 	MD_INIT_LIST_HEAD(&mddev->all_mddevs);
-	blk_cleanup_queue(&mddev->queue);
 	kfree(mddev);
+	MOD_DEC_USE_COUNT;
 }
 
 #undef BAD_CSUM
@@ -834,7 +781,7 @@
 	printk("     UT:%08x ST:%d AD:%d WD:%d FD:%d SD:%d CSUM:%08x E:%08lx\n",
 		sb->utime, sb->state, sb->active_disks, sb->working_disks,
 		sb->failed_disks, sb->spare_disks,
-		sb->sb_csum, (unsigned long)get_unaligned(&sb->events));
+		sb->sb_csum, (unsigned long)sb->events_lo);
 
 	for (i = 0; i < MD_SB_DISKS; i++) {
 		mdp_disk_t *desc;
@@ -1064,22 +1011,20 @@
 	int first, err, count = 100;
 	struct md_list_head *tmp;
 	mdk_rdev_t *rdev;
-	__u64 ev;
 
 repeat:
 	mddev->sb->utime = CURRENT_TIME;
-	ev = get_unaligned(&mddev->sb->events);
-	++ev;
-	put_unaligned(ev,&mddev->sb->events);
-	if (ev == (__u64)0) {
+	if ((++mddev->sb->events_lo)==0)
+		++mddev->sb->events_hi;
+
+	if ((mddev->sb->events_lo|mddev->sb->events_hi)==0) {
 		/*
 		 * oops, this 64-bit counter should never wrap.
 		 * Either we are in around ~1 trillion A.C., assuming
 		 * 1 reboot per second, or we have a bug:
 		 */
 		MD_BUG();
-		--ev;
-		put_unaligned(ev,&mddev->sb->events);
+		mddev->sb->events_lo = mddev->sb->events_hi = 0xffffffff;
 	}
 	sync_sbs(mddev);
 
@@ -1105,7 +1050,7 @@
 		printk("%s ", partition_name(rdev->dev));
 		if (!rdev->faulty) {
 			printk("[events: %08lx]",
-				(unsigned long)get_unaligned(&rdev->sb->events));
+				(unsigned long)rdev->sb->events_lo);
 			err += write_disk_sb(rdev);
 		} else
 			printk(")\n");
@@ -1288,15 +1233,13 @@
 		 * one event)
 		 */
 		if (calc_sb_csum(rdev->sb) != rdev->sb->sb_csum) {
-			__u64 ev = get_unaligned(&rdev->sb->events);
-			if (ev != (__u64)0) {
-				--ev;
-				put_unaligned(ev,&rdev->sb->events);
-			}
+			if (rdev->sb->events_lo || rdev->sb->events_hi)
+				if ((rdev->sb->events_lo--)==0)
+					rdev->sb->events_hi--;
 		}
 
 		printk("%s's event counter: %08lx\n", partition_name(rdev->dev),
-			(unsigned long)get_unaligned(&rdev->sb->events));
+			(unsigned long)rdev->sb->events_lo);
 		if (!freshest) {
 			freshest = rdev;
 			continue;
@@ -1304,8 +1247,8 @@
 		/*
 		 * Find the newest superblock version
 		 */
-		ev1 = get_unaligned(&rdev->sb->events);
-		ev2 = get_unaligned(&freshest->sb->events);
+		ev1 = md_event(rdev->sb);
+		ev2 = md_event(freshest->sb);
 		if (ev1 != ev2) {
 			out_of_date = 1;
 			if (ev1 > ev2)
@@ -1329,8 +1272,8 @@
 		 * Kick all non-fresh devices faulty
 		 */
 		__u64 ev1, ev2;
-		ev1 = get_unaligned(&rdev->sb->events);
-		ev2 = get_unaligned(&sb->events);
+		ev1 = md_event(rdev->sb);
+		ev2 = md_event(sb);
 		++ev1;
 		if (ev1 < ev2) {
 			printk("md: kicking non-fresh %s from array!\n",
@@ -1350,8 +1293,8 @@
 			MD_BUG();
 			goto abort;
 		}
-		ev1 = get_unaligned(&rdev->sb->events);
-		ev2 = get_unaligned(&sb->events);
+		ev1 = md_event(rdev->sb);
+		ev2 = md_event(sb);
 		ev3 = ev2;
 		--ev3;
 		if ((rdev->dev != rdev->old_dev) &&
@@ -1694,14 +1637,22 @@
 	 * Drop all container device buffers, from now on
 	 * the only valid external interface is through the md
 	 * device.
+	 * Also find largest hardsector size
 	 */
+	md_hardsect_sizes[mdidx(mddev)] = 512;
 	ITERATE_RDEV(mddev,rdev,tmp) {
 		if (rdev->faulty)
 			continue;
 		fsync_dev(rdev->dev);
 		invalidate_buffers(rdev->dev);
-	}
-
+		if (get_hardsect_size(rdev->dev)
+		    > md_hardsect_sizes[mdidx(mddev)]) 
+			md_hardsect_sizes[mdidx(mddev)] =
+				get_hardsect_size(rdev->dev);
+	}
+	md_blocksizes[mdidx(mddev)] = 1024;
+	if (md_blocksizes[mdidx(mddev)] < md_hardsect_sizes[mdidx(mddev)])
+		md_blocksizes[mdidx(mddev)] = md_hardsect_sizes[mdidx(mddev)];
 	mddev->pers = pers[pnum];
 
 	err = mddev->pers->run(mddev);
@@ -2080,10 +2031,10 @@
  */
 #ifdef CONFIG_AUTODETECT_RAID
 static int detected_devices[128] md__initdata;
-static int dev_cnt md__initdata=0;
-void md__init md_autodetect_dev(kdev_t dev)
+static int dev_cnt=0;
+void md_autodetect_dev(kdev_t dev)
 {
-	if (dev_cnt < 127)
+	if (dev_cnt >= 0 && dev_cnt < 127)
 		detected_devices[dev_cnt++] = dev;
 }
 #endif
@@ -2094,36 +2045,39 @@
 	mdk_rdev_t *rdev;
 	int i;
 
-	if (raid_setup_args.noautodetect) {
+	if (raid_setup_args.noautodetect)
 		printk(KERN_INFO "skipping autodetection of RAID arrays\n");
-		return;
-	}
-	printk(KERN_INFO "autodetecting RAID arrays\n");
+	else {
 
-	for (i=0; i<dev_cnt; i++) {
-		kdev_t dev = detected_devices[i];
+		printk(KERN_INFO "autodetecting RAID arrays\n");
 
-		if (md_import_device(dev,1)) {
-			printk(KERN_ALERT "could not import %s!\n",
-			       partition_name(dev));
-			continue;
-		}
-		/*
-		 * Sanity checks:
-		 */
-		rdev = find_rdev_all(dev);
-		if (!rdev) {
-			MD_BUG();
-			continue;
-		}
-		if (rdev->faulty) {
-			MD_BUG();
-			continue;
+		for (i=0; i<dev_cnt; i++) {
+			kdev_t dev = detected_devices[i];
+
+			if (md_import_device(dev,1)) {
+				printk(KERN_ALERT "could not import %s!\n",
+				       partition_name(dev));
+				continue;
+			}
+			/*
+			 * Sanity checks:
+			 */
+			rdev = find_rdev_all(dev);
+			if (!rdev) {
+				MD_BUG();
+				continue;
+			}
+			if (rdev->faulty) {
+				MD_BUG();
+				continue;
+			}
+			md_list_add(&rdev->pending, &pending_raid_disks);
 		}
-		md_list_add(&rdev->pending, &pending_raid_disks);
+
+		autorun_devices();
 	}
 
-	autorun_devices();
+	dev_cnt = -1; /* make sure further calls to md_autodetect_dev are ignored */
 #endif
 #ifdef CONFIG_MD_BOOT
 	md_setup_drive();
@@ -2731,11 +2685,9 @@
 			goto done_unlock;
 
 		case STOP_ARRAY:
-			err = do_md_stop (mddev, 0);
-			if (err)
-				goto done_unlock;
-			else
-				goto done;
+			if (!(err = do_md_stop (mddev, 0)))
+				mddev = NULL;
+			goto done_unlock;
 
 		case STOP_ARRAY_RO:
 			err = do_md_stop (mddev, 1);
@@ -2837,7 +2789,8 @@
 			 */
 			if (err) {
 				mddev->sb_dirty = 0;
-				do_md_stop (mddev, 0);
+				if (!do_md_stop (mddev, 0))
+					mddev = NULL;
 			}
 			goto done_unlock;
 		}
@@ -2852,8 +2805,6 @@
 abort_unlock:
 	if (mddev)
 		unlock_mddev(mddev);
-	else
-		printk("huh11?\n");
 
 	return err;
 done:
@@ -2890,7 +2841,7 @@
 	/*
 	 * Detach thread
 	 */
-	sys_setsid();
+	daemonize();
 	sprintf(current->comm, thread->name);
 	md_init_signals();
 	md_flush_signals();
@@ -3248,6 +3199,19 @@
 	return NULL;
 }
 
+static unsigned int sync_io[DK_MAX_MAJOR][DK_MAX_DISK];
+void md_sync_acct(kdev_t dev, unsigned long nr_sectors)
+{
+	unsigned int major = MAJOR(dev);
+	unsigned int index;
+
+	index = disk_index(dev);
+	if ((index >= DK_MAX_DISK) || (major >= DK_MAX_MAJOR))
+		return;
+
+	sync_io[major][index] += nr_sectors;
+}
+
 static int is_mddev_idle (mddev_t *mddev)
 {
 	mdk_rdev_t * rdev;
@@ -3260,8 +3224,12 @@
 		int major = MAJOR(rdev->dev);
 		int idx = disk_index(rdev->dev);
 
+		if ((idx >= DK_MAX_DISK) || (major >= DK_MAX_MAJOR))
+			continue;
+
 		curr_events = kstat.dk_drive_rblk[major][idx] +
 						kstat.dk_drive_wblk[major][idx] ;
+		curr_events -= sync_io[major][idx];
 //		printk("events(major: %d, idx: %d): %ld\n", major, idx, curr_events);
 		if (curr_events != rdev->last_events) {
 //			printk("!I(%ld)", curr_events - rdev->last_events);
@@ -3560,34 +3528,30 @@
 	NULL,
 	0
 };
-
-void md__init raid_setup(char *str, int *ints)
+#ifndef MODULE
+static int md__init raid_setup(char *str)
 {
-	char tmpline[100];
-	int len, pos, nr, i;
+	int len, pos;
 
 	len = strlen(str) + 1;
-	nr = 0;
 	pos = 0;
 
-	for (i = 0; i < len; i++) {
-		char c = str[i];
-
-		if (c == ',' || !c) {
-			tmpline[pos] = 0;
-			if (!strcmp(tmpline,"noautodetect"))
-				raid_setup_args.noautodetect = 1;
-			nr++;
-			pos = 0;
-			continue;
-		}
-		tmpline[pos] = c;
-		pos++;
+	while (pos < len) {
+		char *comma = strchr(str+pos, ',');
+		int wlen;
+		if (comma)
+			wlen = (comma-str)-pos;
+		else	wlen = (len-1)-pos;
+
+		if (strncmp(str, "noautodetect", wlen) == 0)
+			raid_setup_args.noautodetect = 1;
+		pos += wlen+1;
 	}
 	raid_setup_args.set = 1;
-	return;
+	return 1;
 }
-
+__setup("raid=", raid_setup);
+#endif
 static void md_geninit (void)
 {
 	int i;
@@ -3595,12 +3559,14 @@
 	for(i = 0; i < MAX_MD_DEVS; i++) {
 		md_blocksizes[i] = 1024;
 		md_size[i] = 0;
+		md_hardsect_sizes[i] = 512;
 		md_maxreadahead[i] = MD_READAHEAD;
 		register_disk(&md_gendisk, MKDEV(MAJOR_NR,i), 1, &md_fops, 0);
 	}
-	blksize_size[MD_MAJOR] = md_blocksizes;
+	blksize_size[MAJOR_NR] = md_blocksizes;
 	blk_size[MAJOR_NR] = md_size;
-	max_readahead[MD_MAJOR] = md_maxreadahead;
+	max_readahead[MAJOR_NR] = md_maxreadahead;
+	hardsect_size[MAJOR_NR] = md_hardsect_sizes;
 
 	printk("md.c: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
 
@@ -3623,9 +3589,9 @@
 			MD_MAJOR_VERSION, MD_MINOR_VERSION,
 			MD_PATCHLEVEL_VERSION, MAX_MD_DEVS, MAX_REAL);
 
-	if (devfs_register_blkdev (MD_MAJOR, "md", &md_fops))
+	if (devfs_register_blkdev (MAJOR_NR, "md", &md_fops))
 	{
-		printk (KERN_ALERT "Unable to get major %d for md\n", MD_MAJOR);
+		printk (KERN_ALERT "Unable to get major %d for md\n", MAJOR_NR);
 		return (-1);
 	}
 	devfs_handle = devfs_mk_dir (NULL, "md", NULL);
@@ -3633,9 +3599,11 @@
 				MAJOR_NR, 0, S_IFBLK | S_IRUSR | S_IWUSR,
 				&md_fops, NULL);
 
-	blk_dev[MD_MAJOR].queue = md_get_queue;
+	/* forward all md request to md_make_request */
+	blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), md_make_request);
+	
 
-	read_ahead[MD_MAJOR] = INT_MAX;
+	read_ahead[MAJOR_NR] = INT_MAX;
 	md_gendisk.next = gendisk_head;
 
 	gendisk_head = &md_gendisk;
@@ -3659,12 +3627,6 @@
 #ifdef CONFIG_MD_RAID5
 	raid5_init ();
 #endif
-#if defined(CONFIG_MD_RAID5) || defined(CONFIG_MD_RAID5_MODULE)
-	/*
-	 * pick a XOR routine, runtime.
-	 */
-	calibrate_xor_block();
-#endif
 	md_geninit();
 	return (0);
 }
@@ -3835,6 +3797,54 @@
 __setup("md=", md_setup);
 #endif
 
+#ifdef MODULE
+int init_module (void)
+{
+	return md_init();
+}
+
+static void free_device_names(void)
+{
+	while (device_names.next != &device_names) {
+		struct list_head *tmp = device_names.next;
+		list_del(tmp);
+		kfree(tmp);
+	}
+}
+
+
+void cleanup_module (void)
+{
+	struct gendisk **gendisk_ptr;
+
+	md_unregister_thread(md_recovery_thread);
+	devfs_unregister(devfs_handle);
+
+	devfs_unregister_blkdev(MAJOR_NR,"md");
+	unregister_reboot_notifier(&md_notifier);
+	unregister_sysctl_table(raid_table_header);
+#ifdef CONFIG_PROC_FS
+	remove_proc_entry("mdstat", NULL);
+#endif
+	
+	gendisk_ptr = &gendisk_head;
+	while (*gendisk_ptr) {
+		if (*gendisk_ptr == &md_gendisk) {
+			*gendisk_ptr = md_gendisk.next;
+			break;
+		}
+		gendisk_ptr = & (*gendisk_ptr)->next;
+	}
+	blk_dev[MAJOR_NR].queue = NULL;
+	blksize_size[MAJOR_NR] = NULL;
+	blk_size[MAJOR_NR] = NULL;
+	max_readahead[MAJOR_NR] = NULL;
+	hardsect_size[MAJOR_NR] = NULL;
+	
+	free_device_names();
+
+}
+#endif
 
 MD_EXPORT_SYMBOL(md_size);
 MD_EXPORT_SYMBOL(register_md_personality);
@@ -3842,6 +3852,7 @@
 MD_EXPORT_SYMBOL(partition_name);
 MD_EXPORT_SYMBOL(md_error);
 MD_EXPORT_SYMBOL(md_do_sync);
+MD_EXPORT_SYMBOL(md_sync_acct);
 MD_EXPORT_SYMBOL(md_done_sync);
 MD_EXPORT_SYMBOL(md_recover_arrays);
 MD_EXPORT_SYMBOL(md_register_thread);

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)