patch-2.4.0-test5 linux/drivers/block/md.c
Next file: linux/drivers/block/ps2esdi.c
Previous file: linux/drivers/block/ll_rw_blk.c
Back to the patch index
Back to the overall index
- Lines: 584
- Date:
Wed Jul 26 19:47:48 2000
- Orig file:
v2.4.0-test4/linux/drivers/block/md.c
- Orig date:
Fri Jul 14 12:12:08 2000
diff -u --recursive --new-file v2.4.0-test4/linux/drivers/block/md.c linux/drivers/block/md.c
@@ -28,6 +28,7 @@
Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
+#include <linux/module.h>
#include <linux/config.h>
#include <linux/raid/md.h>
#include <linux/raid/xor.h>
@@ -100,6 +101,7 @@
*/
struct hd_struct md_hd_struct[MAX_MD_DEVS];
static int md_blocksizes[MAX_MD_DEVS];
+static int md_hardsect_sizes[MAX_MD_DEVS];
static int md_maxreadahead[MAX_MD_DEVS];
static mdk_thread_t *md_recovery_thread = NULL;
@@ -122,37 +124,6 @@
fops: &md_fops,
};
-void md_plug_device (request_queue_t *mdqueue, kdev_t dev)
-{
- mdk_rdev_t * rdev;
- struct md_list_head *tmp;
- request_queue_t *q;
- mddev_t *mddev;
-
- if (!md_test_and_set_bit(0, (atomic_t *)&mdqueue->plugged)) {
- mddev = kdev_to_mddev(dev);
- ITERATE_RDEV(mddev,rdev,tmp) {
- q = blk_get_queue(rdev->dev);
- generic_unplug_device(q);
- }
- queue_task(&mdqueue->plug_tq, &tq_disk);
- }
-}
-
-static void md_unplug_device (void * data)
-{
- mdk_rdev_t * rdev;
- struct md_list_head *tmp;
- mddev_t *mddev = (mddev_t *)data;
- request_queue_t *mdqueue = &mddev->queue, *q;
-
- clear_bit(0, (atomic_t *)&mdqueue->plugged);
- ITERATE_RDEV(mddev,rdev,tmp) {
- q = blk_get_queue(rdev->dev);
- generic_unplug_device(q);
- }
-}
-
/*
* Enables to iterate over all existing md arrays
*/
@@ -197,28 +168,12 @@
mddev_map[minor].data = NULL;
}
-static request_queue_t *md_get_queue (kdev_t dev)
-{
- mddev_t *mddev = kdev_to_mddev(dev);
-
- if (!mddev)
- return NULL;
- return &mddev->queue;
-}
-
-static void do_md_request (request_queue_t * q)
-{
- printk(KERN_ALERT "Got md request, not good...");
- BUG();
- return;
-}
-
static int md_make_request (request_queue_t *q, int rw, struct buffer_head * bh)
{
mddev_t *mddev = kdev_to_mddev(bh->b_rdev);
if (mddev && mddev->pers)
- return mddev->pers->make_request(q, mddev, rw, bh);
+ return mddev->pers->make_request(mddev, rw, bh);
else {
buffer_IO_error(bh);
return -1;
@@ -227,7 +182,6 @@
static mddev_t * alloc_mddev (kdev_t dev)
{
- request_queue_t *q;
mddev_t *mddev;
if (MAJOR(dev) != MD_MAJOR) {
@@ -247,15 +201,6 @@
MD_INIT_LIST_HEAD(&mddev->disks);
MD_INIT_LIST_HEAD(&mddev->all_mddevs);
- q = &mddev->queue;
- blk_init_queue(q, DEVICE_REQUEST);
- blk_queue_pluggable(q, md_plug_device);
- blk_queue_make_request(q, md_make_request);
-
- q->plug_tq.sync = 0;
- q->plug_tq.routine = &md_unplug_device;
- q->plug_tq.data = mddev;
-
/*
* The 'base' mddev is the one with data NULL.
* personalities can create additional mddevs
@@ -264,6 +209,8 @@
add_mddev_mapping(mddev, dev, 0);
md_list_add(&mddev->all_mddevs, &all_mddevs);
+ MOD_INC_USE_COUNT;
+
return mddev;
}
@@ -569,7 +516,7 @@
printk (NO_SB,partition_name(rdev->dev));
goto abort;
}
- printk(" [events: %08lx]\n", (unsigned long)get_unaligned(&rdev->sb->events));
+ printk(" [events: %08lx]\n", (unsigned long)rdev->sb->events_lo);
ret = 0;
abort:
if (bh)
@@ -804,8 +751,8 @@
del_mddev_mapping(mddev, MKDEV(MD_MAJOR, mdidx(mddev)));
md_list_del(&mddev->all_mddevs);
MD_INIT_LIST_HEAD(&mddev->all_mddevs);
- blk_cleanup_queue(&mddev->queue);
kfree(mddev);
+ MOD_DEC_USE_COUNT;
}
#undef BAD_CSUM
@@ -834,7 +781,7 @@
printk(" UT:%08x ST:%d AD:%d WD:%d FD:%d SD:%d CSUM:%08x E:%08lx\n",
sb->utime, sb->state, sb->active_disks, sb->working_disks,
sb->failed_disks, sb->spare_disks,
- sb->sb_csum, (unsigned long)get_unaligned(&sb->events));
+ sb->sb_csum, (unsigned long)sb->events_lo);
for (i = 0; i < MD_SB_DISKS; i++) {
mdp_disk_t *desc;
@@ -1064,22 +1011,20 @@
int first, err, count = 100;
struct md_list_head *tmp;
mdk_rdev_t *rdev;
- __u64 ev;
repeat:
mddev->sb->utime = CURRENT_TIME;
- ev = get_unaligned(&mddev->sb->events);
- ++ev;
- put_unaligned(ev,&mddev->sb->events);
- if (ev == (__u64)0) {
+ if ((++mddev->sb->events_lo)==0)
+ ++mddev->sb->events_hi;
+
+ if ((mddev->sb->events_lo|mddev->sb->events_hi)==0) {
/*
* oops, this 64-bit counter should never wrap.
* Either we are in around ~1 trillion A.C., assuming
* 1 reboot per second, or we have a bug:
*/
MD_BUG();
- --ev;
- put_unaligned(ev,&mddev->sb->events);
+ mddev->sb->events_lo = mddev->sb->events_hi = 0xffffffff;
}
sync_sbs(mddev);
@@ -1105,7 +1050,7 @@
printk("%s ", partition_name(rdev->dev));
if (!rdev->faulty) {
printk("[events: %08lx]",
- (unsigned long)get_unaligned(&rdev->sb->events));
+ (unsigned long)rdev->sb->events_lo);
err += write_disk_sb(rdev);
} else
printk(")\n");
@@ -1288,15 +1233,13 @@
* one event)
*/
if (calc_sb_csum(rdev->sb) != rdev->sb->sb_csum) {
- __u64 ev = get_unaligned(&rdev->sb->events);
- if (ev != (__u64)0) {
- --ev;
- put_unaligned(ev,&rdev->sb->events);
- }
+ if (rdev->sb->events_lo || rdev->sb->events_hi)
+ if ((rdev->sb->events_lo--)==0)
+ rdev->sb->events_hi--;
}
printk("%s's event counter: %08lx\n", partition_name(rdev->dev),
- (unsigned long)get_unaligned(&rdev->sb->events));
+ (unsigned long)rdev->sb->events_lo);
if (!freshest) {
freshest = rdev;
continue;
@@ -1304,8 +1247,8 @@
/*
* Find the newest superblock version
*/
- ev1 = get_unaligned(&rdev->sb->events);
- ev2 = get_unaligned(&freshest->sb->events);
+ ev1 = md_event(rdev->sb);
+ ev2 = md_event(freshest->sb);
if (ev1 != ev2) {
out_of_date = 1;
if (ev1 > ev2)
@@ -1329,8 +1272,8 @@
* Kick all non-fresh devices faulty
*/
__u64 ev1, ev2;
- ev1 = get_unaligned(&rdev->sb->events);
- ev2 = get_unaligned(&sb->events);
+ ev1 = md_event(rdev->sb);
+ ev2 = md_event(sb);
++ev1;
if (ev1 < ev2) {
printk("md: kicking non-fresh %s from array!\n",
@@ -1350,8 +1293,8 @@
MD_BUG();
goto abort;
}
- ev1 = get_unaligned(&rdev->sb->events);
- ev2 = get_unaligned(&sb->events);
+ ev1 = md_event(rdev->sb);
+ ev2 = md_event(sb);
ev3 = ev2;
--ev3;
if ((rdev->dev != rdev->old_dev) &&
@@ -1694,14 +1637,22 @@
* Drop all container device buffers, from now on
* the only valid external interface is through the md
* device.
+ * Also find largest hardsector size
*/
+ md_hardsect_sizes[mdidx(mddev)] = 512;
ITERATE_RDEV(mddev,rdev,tmp) {
if (rdev->faulty)
continue;
fsync_dev(rdev->dev);
invalidate_buffers(rdev->dev);
- }
-
+ if (get_hardsect_size(rdev->dev)
+ > md_hardsect_sizes[mdidx(mddev)])
+ md_hardsect_sizes[mdidx(mddev)] =
+ get_hardsect_size(rdev->dev);
+ }
+ md_blocksizes[mdidx(mddev)] = 1024;
+ if (md_blocksizes[mdidx(mddev)] < md_hardsect_sizes[mdidx(mddev)])
+ md_blocksizes[mdidx(mddev)] = md_hardsect_sizes[mdidx(mddev)];
mddev->pers = pers[pnum];
err = mddev->pers->run(mddev);
@@ -2080,10 +2031,10 @@
*/
#ifdef CONFIG_AUTODETECT_RAID
static int detected_devices[128] md__initdata;
-static int dev_cnt md__initdata=0;
-void md__init md_autodetect_dev(kdev_t dev)
+static int dev_cnt=0;
+void md_autodetect_dev(kdev_t dev)
{
- if (dev_cnt < 127)
+ if (dev_cnt >= 0 && dev_cnt < 127)
detected_devices[dev_cnt++] = dev;
}
#endif
@@ -2094,36 +2045,39 @@
mdk_rdev_t *rdev;
int i;
- if (raid_setup_args.noautodetect) {
+ if (raid_setup_args.noautodetect)
printk(KERN_INFO "skipping autodetection of RAID arrays\n");
- return;
- }
- printk(KERN_INFO "autodetecting RAID arrays\n");
+ else {
- for (i=0; i<dev_cnt; i++) {
- kdev_t dev = detected_devices[i];
+ printk(KERN_INFO "autodetecting RAID arrays\n");
- if (md_import_device(dev,1)) {
- printk(KERN_ALERT "could not import %s!\n",
- partition_name(dev));
- continue;
- }
- /*
- * Sanity checks:
- */
- rdev = find_rdev_all(dev);
- if (!rdev) {
- MD_BUG();
- continue;
- }
- if (rdev->faulty) {
- MD_BUG();
- continue;
+ for (i=0; i<dev_cnt; i++) {
+ kdev_t dev = detected_devices[i];
+
+ if (md_import_device(dev,1)) {
+ printk(KERN_ALERT "could not import %s!\n",
+ partition_name(dev));
+ continue;
+ }
+ /*
+ * Sanity checks:
+ */
+ rdev = find_rdev_all(dev);
+ if (!rdev) {
+ MD_BUG();
+ continue;
+ }
+ if (rdev->faulty) {
+ MD_BUG();
+ continue;
+ }
+ md_list_add(&rdev->pending, &pending_raid_disks);
}
- md_list_add(&rdev->pending, &pending_raid_disks);
+
+ autorun_devices();
}
- autorun_devices();
+ dev_cnt = -1; /* make sure further calls to md_autodetect_dev are ignored */
#endif
#ifdef CONFIG_MD_BOOT
md_setup_drive();
@@ -2731,11 +2685,9 @@
goto done_unlock;
case STOP_ARRAY:
- err = do_md_stop (mddev, 0);
- if (err)
- goto done_unlock;
- else
- goto done;
+ if (!(err = do_md_stop (mddev, 0)))
+ mddev = NULL;
+ goto done_unlock;
case STOP_ARRAY_RO:
err = do_md_stop (mddev, 1);
@@ -2837,7 +2789,8 @@
*/
if (err) {
mddev->sb_dirty = 0;
- do_md_stop (mddev, 0);
+ if (!do_md_stop (mddev, 0))
+ mddev = NULL;
}
goto done_unlock;
}
@@ -2852,8 +2805,6 @@
abort_unlock:
if (mddev)
unlock_mddev(mddev);
- else
- printk("huh11?\n");
return err;
done:
@@ -2890,7 +2841,7 @@
/*
* Detach thread
*/
- sys_setsid();
+ daemonize();
sprintf(current->comm, thread->name);
md_init_signals();
md_flush_signals();
@@ -3248,6 +3199,19 @@
return NULL;
}
+static unsigned int sync_io[DK_MAX_MAJOR][DK_MAX_DISK];
+void md_sync_acct(kdev_t dev, unsigned long nr_sectors)
+{
+ unsigned int major = MAJOR(dev);
+ unsigned int index;
+
+ index = disk_index(dev);
+ if ((index >= DK_MAX_DISK) || (major >= DK_MAX_MAJOR))
+ return;
+
+ sync_io[major][index] += nr_sectors;
+}
+
static int is_mddev_idle (mddev_t *mddev)
{
mdk_rdev_t * rdev;
@@ -3260,8 +3224,12 @@
int major = MAJOR(rdev->dev);
int idx = disk_index(rdev->dev);
+ if ((idx >= DK_MAX_DISK) || (major >= DK_MAX_MAJOR))
+ continue;
+
curr_events = kstat.dk_drive_rblk[major][idx] +
kstat.dk_drive_wblk[major][idx] ;
+ curr_events -= sync_io[major][idx];
// printk("events(major: %d, idx: %d): %ld\n", major, idx, curr_events);
if (curr_events != rdev->last_events) {
// printk("!I(%ld)", curr_events - rdev->last_events);
@@ -3560,34 +3528,30 @@
NULL,
0
};
-
-void md__init raid_setup(char *str, int *ints)
+#ifndef MODULE
+static int md__init raid_setup(char *str)
{
- char tmpline[100];
- int len, pos, nr, i;
+ int len, pos;
len = strlen(str) + 1;
- nr = 0;
pos = 0;
- for (i = 0; i < len; i++) {
- char c = str[i];
-
- if (c == ',' || !c) {
- tmpline[pos] = 0;
- if (!strcmp(tmpline,"noautodetect"))
- raid_setup_args.noautodetect = 1;
- nr++;
- pos = 0;
- continue;
- }
- tmpline[pos] = c;
- pos++;
+ while (pos < len) {
+ char *comma = strchr(str+pos, ',');
+ int wlen;
+ if (comma)
+ wlen = (comma-str)-pos;
+ else wlen = (len-1)-pos;
+
+ if (strncmp(str, "noautodetect", wlen) == 0)
+ raid_setup_args.noautodetect = 1;
+ pos += wlen+1;
}
raid_setup_args.set = 1;
- return;
+ return 1;
}
-
+__setup("raid=", raid_setup);
+#endif
static void md_geninit (void)
{
int i;
@@ -3595,12 +3559,14 @@
for(i = 0; i < MAX_MD_DEVS; i++) {
md_blocksizes[i] = 1024;
md_size[i] = 0;
+ md_hardsect_sizes[i] = 512;
md_maxreadahead[i] = MD_READAHEAD;
register_disk(&md_gendisk, MKDEV(MAJOR_NR,i), 1, &md_fops, 0);
}
- blksize_size[MD_MAJOR] = md_blocksizes;
+ blksize_size[MAJOR_NR] = md_blocksizes;
blk_size[MAJOR_NR] = md_size;
- max_readahead[MD_MAJOR] = md_maxreadahead;
+ max_readahead[MAJOR_NR] = md_maxreadahead;
+ hardsect_size[MAJOR_NR] = md_hardsect_sizes;
printk("md.c: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
@@ -3623,9 +3589,9 @@
MD_MAJOR_VERSION, MD_MINOR_VERSION,
MD_PATCHLEVEL_VERSION, MAX_MD_DEVS, MAX_REAL);
- if (devfs_register_blkdev (MD_MAJOR, "md", &md_fops))
+ if (devfs_register_blkdev (MAJOR_NR, "md", &md_fops))
{
- printk (KERN_ALERT "Unable to get major %d for md\n", MD_MAJOR);
+ printk (KERN_ALERT "Unable to get major %d for md\n", MAJOR_NR);
return (-1);
}
devfs_handle = devfs_mk_dir (NULL, "md", NULL);
@@ -3633,9 +3599,11 @@
MAJOR_NR, 0, S_IFBLK | S_IRUSR | S_IWUSR,
&md_fops, NULL);
- blk_dev[MD_MAJOR].queue = md_get_queue;
+ /* forward all md request to md_make_request */
+ blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), md_make_request);
+
- read_ahead[MD_MAJOR] = INT_MAX;
+ read_ahead[MAJOR_NR] = INT_MAX;
md_gendisk.next = gendisk_head;
gendisk_head = &md_gendisk;
@@ -3659,12 +3627,6 @@
#ifdef CONFIG_MD_RAID5
raid5_init ();
#endif
-#if defined(CONFIG_MD_RAID5) || defined(CONFIG_MD_RAID5_MODULE)
- /*
- * pick a XOR routine, runtime.
- */
- calibrate_xor_block();
-#endif
md_geninit();
return (0);
}
@@ -3835,6 +3797,54 @@
__setup("md=", md_setup);
#endif
+#ifdef MODULE
+int init_module (void)
+{
+ return md_init();
+}
+
+static void free_device_names(void)
+{
+ while (device_names.next != &device_names) {
+ struct list_head *tmp = device_names.next;
+ list_del(tmp);
+ kfree(tmp);
+ }
+}
+
+
+void cleanup_module (void)
+{
+ struct gendisk **gendisk_ptr;
+
+ md_unregister_thread(md_recovery_thread);
+ devfs_unregister(devfs_handle);
+
+ devfs_unregister_blkdev(MAJOR_NR,"md");
+ unregister_reboot_notifier(&md_notifier);
+ unregister_sysctl_table(raid_table_header);
+#ifdef CONFIG_PROC_FS
+ remove_proc_entry("mdstat", NULL);
+#endif
+
+ gendisk_ptr = &gendisk_head;
+ while (*gendisk_ptr) {
+ if (*gendisk_ptr == &md_gendisk) {
+ *gendisk_ptr = md_gendisk.next;
+ break;
+ }
+ gendisk_ptr = & (*gendisk_ptr)->next;
+ }
+ blk_dev[MAJOR_NR].queue = NULL;
+ blksize_size[MAJOR_NR] = NULL;
+ blk_size[MAJOR_NR] = NULL;
+ max_readahead[MAJOR_NR] = NULL;
+ hardsect_size[MAJOR_NR] = NULL;
+
+ free_device_names();
+
+}
+#endif
MD_EXPORT_SYMBOL(md_size);
MD_EXPORT_SYMBOL(register_md_personality);
@@ -3842,6 +3852,7 @@
MD_EXPORT_SYMBOL(partition_name);
MD_EXPORT_SYMBOL(md_error);
MD_EXPORT_SYMBOL(md_do_sync);
+MD_EXPORT_SYMBOL(md_sync_acct);
MD_EXPORT_SYMBOL(md_done_sync);
MD_EXPORT_SYMBOL(md_recover_arrays);
MD_EXPORT_SYMBOL(md_register_thread);
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)