patch-2.4.0-test3 linux/fs/super.c

Next file: linux/fs/sysv/fsync.c
Previous file: linux/fs/stat.c
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.4.0-test2/linux/fs/super.c linux/fs/super.c
@@ -260,7 +260,7 @@
 	return len;
 }
 
-static struct file_system_type *get_fs_type(const char *name)
+struct file_system_type *get_fs_type(const char *name)
 {
 	struct file_system_type *fs;
 	
@@ -281,14 +281,28 @@
 
 static LIST_HEAD(vfsmntlist);
 
-static struct vfsmount *add_vfsmnt(struct super_block *sb,
-				struct dentry *mountpoint,
+/**
+ *	add_vfsmnt - add a new mount node
+ *	@nd: location of mountpoint or %NULL if we want a root node
+ *	@root: root of (sub)tree to be mounted
+ *	@dev_name: device name to show in /proc/mounts
+ *
+ *	This is VFS idea of mount. New node is allocated, bound to a tree
+ *	we are mounting and optionally (OK, usually) registered as mounted
+ *	on a given mountpoint. Returns a pointer to new node or %NULL in
+ *	case of failure.
+ *
+ *	Potential reason for failure (aside of trivial lack of memory) is a
+ *	deleted mountpoint. Caller must hold ->i_zombie on mountpoint
+ *	dentry (if any).
+ */
+
+static struct vfsmount *add_vfsmnt(struct nameidata *nd,
 				struct dentry *root,
-				struct vfsmount *parent,
-				const char *dev_name,
-				const char *dir_name)
+				const char *dev_name)
 {
 	struct vfsmount *mnt;
+	struct super_block *sb = root->d_inode->i_sb;
 	char *name;
 
 	mnt = kmalloc(sizeof(struct vfsmount), GFP_KERNEL);
@@ -296,13 +310,7 @@
 		goto out;
 	memset(mnt, 0, sizeof(struct vfsmount));
 
-	atomic_set(&mnt->mnt_count,1);
-	mnt->mnt_sb = sb;
-	mnt->mnt_mountpoint = dget(mountpoint);
-	mnt->mnt_root = dget(root);
-	mnt->mnt_parent = parent ? mntget(parent) : mnt;
-
-	/* N.B. Is it really OK to have a vfsmount without names? */
+	/* It may be NULL, but who cares? */
 	if (dev_name) {
 		name = kmalloc(strlen(dev_name)+1, GFP_KERNEL);
 		if (name) {
@@ -310,51 +318,57 @@
 			mnt->mnt_devname = name;
 		}
 	}
-	name = kmalloc(strlen(dir_name)+1, GFP_KERNEL);
-	if (name) {
-		strcpy(name, dir_name);
-		mnt->mnt_dirname = name;
-	}
 	mnt->mnt_owner = current->uid;
+	atomic_set(&mnt->mnt_count,1);
+	mnt->mnt_sb = sb;
 
-	if (parent)
-		list_add(&mnt->mnt_child, &parent->mnt_mounts);
-	else
+	spin_lock(&dcache_lock);
+	if (nd && !IS_ROOT(nd->dentry) && d_unhashed(nd->dentry))
+		goto fail;
+	mnt->mnt_root = dget(root);
+	mnt->mnt_mountpoint = nd ? dget(nd->dentry) : dget(root);
+	mnt->mnt_parent = nd ? mntget(nd->mnt) : mnt;
+
+	if (nd) {
+		list_add(&mnt->mnt_child, &nd->mnt->mnt_mounts);
+		list_add(&mnt->mnt_clash, &nd->dentry->d_vfsmnt);
+	} else {
 		INIT_LIST_HEAD(&mnt->mnt_child);
+		INIT_LIST_HEAD(&mnt->mnt_clash);
+	}
 	INIT_LIST_HEAD(&mnt->mnt_mounts);
 	list_add(&mnt->mnt_instances, &sb->s_mounts);
-	list_add(&mnt->mnt_clash, &mountpoint->d_vfsmnt);
 	list_add(&mnt->mnt_list, vfsmntlist.prev);
+	spin_unlock(&dcache_lock);
 out:
 	return mnt;
+fail:
+	spin_unlock(&dcache_lock);
+	kfree(mnt->mnt_devname);
+	kfree(mnt);
+	return NULL;
 }
 
 static void move_vfsmnt(struct vfsmount *mnt,
 			struct dentry *mountpoint,
 			struct vfsmount *parent,
-			const char *dev_name,
-			const char *dir_name)
+			const char *dev_name)
 {
-	struct dentry *old_mountpoint = mnt->mnt_mountpoint;
-	struct vfsmount *old_parent = mnt->mnt_parent;
-	char *new_devname = NULL, *new_dirname = NULL;
+	struct dentry *old_mountpoint;
+	struct vfsmount *old_parent;
+	char *new_devname = NULL;
 
 	if (dev_name) {
 		new_devname = kmalloc(strlen(dev_name)+1, GFP_KERNEL);
 		if (new_devname)
 			strcpy(new_devname, dev_name);
 	}
-	if (dir_name) {
-		new_dirname = kmalloc(strlen(dir_name)+1, GFP_KERNEL);
-		if (new_dirname)
-			strcpy(new_dirname, dir_name);
-	}
+
+	spin_lock(&dcache_lock);
+	old_mountpoint = mnt->mnt_mountpoint;
+	old_parent = mnt->mnt_parent;
 
 	/* flip names */
-	if (new_dirname) {
-		kfree(mnt->mnt_dirname);
-		mnt->mnt_dirname = new_dirname;
-	}
 	if (new_devname) {
 		kfree(mnt->mnt_devname);
 		mnt->mnt_devname = new_devname;
@@ -365,11 +379,14 @@
 	mnt->mnt_parent = parent ? mntget(parent) : mnt;
 	list_del(&mnt->mnt_clash);
 	list_del(&mnt->mnt_child);
-	list_add(&mnt->mnt_clash, &mountpoint->d_vfsmnt);
-	if (parent)
+	if (parent) {
 		list_add(&mnt->mnt_child, &parent->mnt_mounts);
-	else
+		list_add(&mnt->mnt_clash, &mountpoint->d_vfsmnt);
+	} else {
 		INIT_LIST_HEAD(&mnt->mnt_child);
+		INIT_LIST_HEAD(&mnt->mnt_clash);
+	}
+	spin_unlock(&dcache_lock);
 
 	/* put the old stuff */
 	dput(old_mountpoint);
@@ -377,6 +394,9 @@
 		mntput(old_parent);
 }
 
+/*
+ * Called with spinlock held, releases it.
+ */
 static void remove_vfsmnt(struct vfsmount *mnt)
 {
 	/* First of all, remove it from all lists */
@@ -384,6 +404,7 @@
 	list_del(&mnt->mnt_clash);
 	list_del(&mnt->mnt_list);
 	list_del(&mnt->mnt_child);
+	spin_unlock(&dcache_lock);
 	/* Now we can work safely */
 	if (mnt->mnt_parent != mnt)
 		mntput(mnt->mnt_parent);
@@ -391,7 +412,6 @@
 	dput(mnt->mnt_mountpoint);
 	dput(mnt->mnt_root);
 	kfree(mnt->mnt_devname);
-	kfree(mnt->mnt_dirname);
 	kfree(mnt);
 }
 
@@ -590,6 +610,7 @@
 
 	lock_kernel();
         s = get_super(to_kdev_t(dev));
+	unlock_kernel();
         if (s == NULL)
                 goto out;
 	err = vfs_statfs(s, &sbuf);
@@ -602,7 +623,6 @@
 
         err = copy_to_user(ubuf,&tmp,sizeof(struct ustat)) ? -EFAULT : 0;
 out:
-	unlock_kernel();
 	return err;
 }
 
@@ -738,10 +758,6 @@
 	/* Done with lookups, semaphore down */
 	down(&mount_sem);
 	dev = to_kdev_t(bdev->bd_dev);
-	check_disk_change(dev);
-	error = -EACCES;
-	if (!(flags & MS_RDONLY) && is_read_only(dev))
-		goto out;
 	sb = get_super(dev);
 	if (sb) {
 		if (fs_type == sb->s_type) {
@@ -755,6 +771,10 @@
 		error = blkdev_get(bdev, mode, 0, BDEV_FS);
 		if (error)
 			goto out;
+		check_disk_change(dev);
+		error = -EACCES;
+		if (!(flags & MS_RDONLY) && is_read_only(dev))
+			goto out1;
 		error = -EINVAL;
 		sb = read_super(dev, bdev, fs_type, flags, data, 0);
 		if (sb) {
@@ -762,6 +782,7 @@
 			path_release(&nd);
 			return sb;
 		}
+out1:
 		blkdev_put(bdev, BDEV_FS);
 	}
 out:
@@ -812,8 +833,14 @@
 {
 	struct block_device *bdev;
 	kdev_t dev;
-	dput(sb->s_root);
+	struct dentry *root = sb->s_root;
 	sb->s_root = NULL;
+	/* Need to clean after the sucker */
+	if (sb->s_type->fs_flags & FS_LITTER)
+		d_genocide(root);
+	if (sb->s_type->fs_flags & (FS_SINGLE|FS_LITTER))
+		shrink_dcache_parent(root);
+	dput(root);
 	lock_super(sb);
 	if (sb->s_op) {
 		if (sb->s_op->write_super && sb->s_dirt)
@@ -895,7 +922,7 @@
 		put_unnamed_dev(dev);
 		return ERR_PTR(-EINVAL);
 	}
-	mnt = add_vfsmnt(sb, sb->s_root, sb->s_root, NULL, "none", type->name);
+	mnt = add_vfsmnt(NULL, sb->s_root, "none");
 	if (!mnt) {
 		kill_super(sb, 0);
 		return ERR_PTR(-ENOMEM);
@@ -909,10 +936,8 @@
 void kern_umount(struct vfsmount *mnt)
 {
 	struct super_block *sb = mnt->mnt_sb;
-	struct dentry *root = sb->s_root;
+	spin_lock(&dcache_lock);
 	remove_vfsmnt(mnt);
-	dput(root);
-	sb->s_root = NULL;
 	kill_super(sb, 0);
 }
 
@@ -932,6 +957,16 @@
 {
 	struct super_block * sb = mnt->mnt_sb;
 
+	/*
+	 * No sense to grab the lock for this test, but test itself looks
+	 * somewhat bogus. Suggestions for better replacement?
+	 * Ho-hum... In principle, we might treat that as umount + switch
+	 * to rootfs. GC would eventually take care of the old vfsmount.
+	 * The problem being: we have to implement rootfs and GC for that ;-)
+	 * Actually it makes sense, especially if rootfs would contain a
+	 * /reboot - static binary that would close all descriptors and
+	 * call reboot(9). Then init(8) could umount root and exec /reboot.
+	 */
 	if (mnt == current->fs->rootmnt && !umount_root) {
 		int retval = 0;
 		/*
@@ -944,7 +979,9 @@
 		return retval;
 	}
 
+	spin_lock(&dcache_lock);
 	if (atomic_read(&mnt->mnt_count) > 2) {
+		spin_unlock(&dcache_lock);
 		mntput(mnt);
 		return -EBUSY;
 	}
@@ -952,10 +989,12 @@
 	if (mnt->mnt_instances.next != mnt->mnt_instances.prev) {
 		if (sb->s_type->fs_flags & FS_SINGLE)
 			put_filesystem(sb->s_type);
+		/* We hold two references, so mntput() is safe */
 		mntput(mnt);
 		remove_vfsmnt(mnt);
 		return 0;
 	}
+	spin_unlock(&dcache_lock);
 
 	/*
 	 * Before checking whether the filesystem is still busy,
@@ -988,14 +1027,16 @@
 	shrink_dcache_sb(sb);
 	fsync_dev(sb->s_dev);
 
-	/* Something might grab it again - redo checks */
-
-	if (atomic_read(&mnt->mnt_count) > 2) {
+	if (sb->s_root->d_inode->i_state) {
 		mntput(mnt);
 		return -EBUSY;
 	}
 
-	if (sb->s_root->d_inode->i_state) {
+	/* Something might grab it again - redo checks */
+
+	spin_lock(&dcache_lock);
+	if (atomic_read(&mnt->mnt_count) > 2) {
+		spin_unlock(&dcache_lock);
 		mntput(mnt);
 		return -EBUSY;
 	}
@@ -1067,6 +1108,8 @@
 {
 	if (capable(CAP_SYS_ADMIN))
 		return 0;
+	return -EPERM;
+#ifdef notyet
 	if (S_ISLNK(nd->dentry->d_inode->i_mode))
 		return -EPERM;
 	if (nd->dentry->d_inode->i_mode & S_ISVTX) {
@@ -1076,6 +1119,7 @@
 	if (permission(nd->dentry->d_inode, MAY_WRITE))
 		return -EPERM;
 	return 0;
+#endif
 }
 
 /*
@@ -1102,22 +1146,22 @@
 	if (S_ISDIR(new_nd.dentry->d_inode->i_mode) !=
 	      S_ISDIR(old_nd.dentry->d_inode->i_mode))
 		goto out2;
-		
-	down(&mount_sem);
-	err = -ENOENT;
-	if (d_unhashed(old_nd.dentry) && !IS_ROOT(old_nd.dentry))
-		goto out3;
-	if (d_unhashed(new_nd.dentry) && !IS_ROOT(new_nd.dentry))
-		goto out3;
-	/* there we go */
+
 	err = -ENOMEM;
 	if (old_nd.mnt->mnt_sb->s_type->fs_flags & FS_SINGLE)
 		get_filesystem(old_nd.mnt->mnt_sb->s_type);
-	if (add_vfsmnt(old_nd.mnt->mnt_sb, new_nd.dentry, old_nd.dentry,
-	               new_nd.mnt, old_nd.mnt->mnt_devname, new_name))
+		
+	down(&mount_sem);
+	/* there we go */
+	down(&new_nd.dentry->d_inode->i_zombie);
+	if (IS_DEADDIR(new_nd.dentry->d_inode))
+		err = -ENOENT;
+	else if (add_vfsmnt(&new_nd, old_nd.dentry, old_nd.mnt->mnt_devname))
 		err = 0;
-out3:
+	up(&new_nd.dentry->d_inode->i_zombie);
 	up(&mount_sem);
+	if (err && old_nd.mnt->mnt_sb->s_type->fs_flags & FS_SINGLE)
+		put_filesystem(old_nd.mnt->mnt_sb->s_type);
 out2:
 	path_release(&new_nd);
 out1:
@@ -1215,7 +1259,7 @@
 {
 	struct file_system_type * fstype;
 	struct nameidata nd;
-	struct vfsmount *mnt;
+	struct vfsmount *mnt = NULL;
 	struct super_block *sb;
 	int retval = 0;
 	unsigned long flags = 0;
@@ -1224,8 +1268,6 @@
 
 	if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE))
 		return -EINVAL;
-	if (!type_page || !memchr(type_page, 0, PAGE_SIZE))
-		return -EINVAL;
 	if (dev_name && !memchr(dev_name, 0, PAGE_SIZE))
 		return -EINVAL;
 
@@ -1239,6 +1281,11 @@
 	if ((new_flags & MS_MGC_MSK) == MS_MGC_VAL)
 		flags = new_flags & ~MS_MGC_MSK;
 
+	/* For the rest we need the type */
+
+	if (!type_page || !memchr(type_page, 0, PAGE_SIZE))
+		return -EINVAL;
+
 	/* loopback mount? This is special - requires fewer capabilities */
 	if (strcmp(type_page, "bind")==0)
 		return do_loopback(dev_name, dir_name);
@@ -1272,16 +1319,18 @@
 	if (IS_ERR(sb))
 		goto dput_out;
 
-	retval = -ENOENT;
-	if (d_unhashed(nd.dentry) && !IS_ROOT(nd.dentry))
-		goto fail;
-
 	/* Something was mounted here while we slept */
 	while(d_mountpoint(nd.dentry) && follow_down(&nd.mnt, &nd.dentry))
 		;
-
-	retval = -ENOMEM;
-	mnt = add_vfsmnt(sb, nd.dentry, sb->s_root, nd.mnt, dev_name, dir_name);
+	retval = -ENOENT;
+	if (!nd.dentry->d_inode)
+		goto fail;
+	down(&nd.dentry->d_inode->i_zombie);
+	if (!IS_DEADDIR(nd.dentry->d_inode)) {
+		retval = -ENOMEM;
+		mnt = add_vfsmnt(&nd, sb->s_root, dev_name);
+	}
+	up(&nd.dentry->d_inode->i_zombie);
 	if (!mnt)
 		goto fail;
 	retval = 0;
@@ -1312,15 +1361,6 @@
 	if (retval < 0)
 		return retval;
 
-	/* copy_mount_options allows a NULL user pointer,
-	 * and just returns zero in that case.  But if we
-	 * allow the type to be NULL we will crash.
-	 * Previously we did not check this case.
-	 */
-	if (type_page == 0)
-		return -EINVAL;
-
-	lock_kernel();
 	dir_page = getname(dir_name);
 	retval = PTR_ERR(dir_page);
 	if (IS_ERR(dir_page))
@@ -1331,8 +1371,10 @@
 		goto out2;
 	retval = copy_mount_options (data, &data_page);
 	if (retval >= 0) {
+		lock_kernel();
 		retval = do_mount((char*)dev_page,dir_page,(char*)type_page,
 				      new_flags, (void*)data_page);
+		unlock_kernel();
 		free_page(data_page);
 	}
 	free_page(dev_page);
@@ -1340,7 +1382,6 @@
 	putname(dir_page);
 out1:
 	free_page(type_page);
-	unlock_kernel();
 	return retval;
 }
 
@@ -1414,7 +1455,7 @@
 #endif
 
 	devfs_make_root (root_device_name);
-	handle = devfs_find_handle (NULL, ROOT_DEVICE_NAME, 0,
+	handle = devfs_find_handle (NULL, ROOT_DEVICE_NAME,
 	                            MAJOR (ROOT_DEV), MINOR (ROOT_DEV),
 				    DEVFS_SPECIAL_BLK, 1);
 	if (handle)  /*  Sigh: bd*() functions only paper over the cracks  */
@@ -1485,17 +1526,14 @@
 		fs_type->name,
 		(sb->s_flags & MS_RDONLY) ? " readonly" : "");
 	if (path_start >= 0) {
-		devfs_mk_symlink (NULL,
-				  "root", 0, DEVFS_FL_DEFAULT,
-				  path + 5 + path_start, 0,
-				  NULL, NULL);
+		devfs_mk_symlink (NULL, "root", DEVFS_FL_DEFAULT,
+				  path + 5 + path_start, NULL, NULL);
 		memcpy (path + path_start, "/dev/", 5);
-		vfsmnt = add_vfsmnt (sb, sb->s_root, sb->s_root, NULL,
-					path + path_start, "/");
+		vfsmnt = add_vfsmnt(NULL, sb->s_root, path + path_start);
 	}
 	else
-		vfsmnt = add_vfsmnt (sb, sb->s_root, sb->s_root, NULL,
-					"/dev/root", "/");
+		vfsmnt = add_vfsmnt(NULL, sb->s_root, "/dev/root");
+	/* FIXME: if something will try to umount us right now... */
 	if (vfsmnt) {
 		set_fs_root(current->fs, vfsmnt, sb->s_root);
 		set_fs_pwd(current->fs, vfsmnt, sb->s_root);
@@ -1516,6 +1554,7 @@
 
 	read_lock(&tasklist_lock);
 	for_each_task(p) {
+		/* FIXME - unprotected usage of ->fs + (harmless) race */
 		if (!p->fs) continue;
 		if (p->fs->root == old_root && p->fs->rootmnt == old_rootmnt)
 			set_fs_root(p->fs, new_rootmnt, new_root);
@@ -1573,10 +1612,15 @@
 	if (error)
 		goto out1;
 
+	read_lock(&current->fs->lock);
 	root_mnt = mntget(current->fs->rootmnt);
 	root = dget(current->fs->root);
+	read_unlock(&current->fs->lock);
 	down(&mount_sem);
+	down(&old_nd.dentry->d_inode->i_zombie);
 	error = -ENOENT;
+	if (IS_DEADDIR(new_nd.dentry->d_inode))
+		goto out2;
 	if (d_unhashed(new_nd.dentry) && !IS_ROOT(new_nd.dentry))
 		goto out2;
 	if (d_unhashed(old_nd.dentry) && !IS_ROOT(old_nd.dentry))
@@ -1586,32 +1630,27 @@
 		goto out2; /* loop */
 	error = -EINVAL;
 	tmp = old_nd.mnt; /* make sure we can reach put_old from new_root */
+	spin_lock(&dcache_lock);
 	if (tmp != new_nd.mnt) {
 		for (;;) {
 			if (tmp->mnt_parent == tmp)
-				goto out2;
+				goto out3;
 			if (tmp->mnt_parent == new_nd.mnt)
 				break;
 			tmp = tmp->mnt_parent;
 		}
 		if (!is_subdir(tmp->mnt_mountpoint, new_nd.dentry))
-			goto out2;
+			goto out3;
 	} else if (!is_subdir(old_nd.dentry, new_nd.dentry))
-		goto out2;
-
-	error = -ENOMEM;
-	name = __getname();
-	if (!name)
-		goto out2;
+		goto out3;
+	spin_unlock(&dcache_lock);
 
-	move_vfsmnt(new_nd.mnt, new_nd.dentry, NULL, NULL, "/");
-	move_vfsmnt(root_mnt, old_nd.dentry, old_nd.mnt, NULL,
-			__d_path(old_nd.dentry, old_nd.mnt, new_nd.dentry,
-				new_nd.mnt, name, PAGE_SIZE));
-	putname(name);
+	move_vfsmnt(new_nd.mnt, new_nd.dentry, NULL, NULL);
+	move_vfsmnt(root_mnt, old_nd.dentry, old_nd.mnt, NULL);
 	chroot_fs_refs(root,root_mnt,new_nd.dentry,new_nd.mnt);
 	error = 0;
 out2:
+	up(&old_nd.dentry->d_inode->i_zombie);
 	up(&mount_sem);
 	dput(root);
 	mntput(root_mnt);
@@ -1621,6 +1660,9 @@
 out0:
 	unlock_kernel();
 	return error;
+out3:
+	spin_unlock(&dcache_lock);
+	goto out2;
 }
 
 
@@ -1628,17 +1670,17 @@
 
 int __init change_root(kdev_t new_root_dev,const char *put_old)
 {
-	kdev_t old_root_dev = ROOT_DEV;
-	struct vfsmount *old_rootmnt = mntget(current->fs->rootmnt);
+	struct vfsmount *old_rootmnt;
 	struct nameidata devfs_nd, nd;
 	int error = 0;
 
+	read_lock(&current->fs->lock);
+	old_rootmnt = mntget(current->fs->rootmnt);
+	read_unlock(&current->fs->lock);
 	/*  First unmount devfs if mounted  */
 	if (path_init("/dev", LOOKUP_FOLLOW|LOOKUP_POSITIVE, &devfs_nd))
 		error = path_walk("/dev", &devfs_nd);
 	if (!error) {
-		struct super_block *sb = devfs_nd.dentry->d_inode->i_sb;
-
 		if (devfs_nd.mnt->mnt_sb->s_magic == DEVFS_SUPER_MAGIC &&
 		    devfs_nd.dentry == devfs_nd.mnt->mnt_root) {
 			dput(devfs_nd.dentry);
@@ -1654,7 +1696,7 @@
 #if 1
 	shrink_dcache();
 	printk("change_root: old root has d_count=%d\n", 
-	       old_rootmnt->mnt_root->d_count);
+	       atomic_read(&old_rootmnt->mnt_root->d_count));
 #endif
 	mount_devfs_fs ();
 	/*
@@ -1672,10 +1714,11 @@
 			printk("okay\n");
 			return 0;
 		}
-		printk(KERN_ERR "error %ld\n",blivet);
+		printk(KERN_ERR "error %d\n",blivet);
 		return error;
 	}
-	move_vfsmnt(old_rootmnt, nd.dentry, nd.mnt, "/dev/root.old", put_old);
+	/* FIXME: we should hold i_zombie on nd.dentry */
+	move_vfsmnt(old_rootmnt, nd.dentry, nd.mnt, "/dev/root.old");
 	mntput(old_rootmnt);
 	path_release(&nd);
 	return 0;

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)