patch-2.4.0-test3 linux/fs/super.c
Next file: linux/fs/sysv/fsync.c
Previous file: linux/fs/stat.c
Back to the patch index
Back to the overall index
- Lines: 621
- Date:
Wed Jul 5 16:10:44 2000
- Orig file:
v2.4.0-test2/linux/fs/super.c
- Orig date:
Mon Jun 19 16:32:00 2000
diff -u --recursive --new-file v2.4.0-test2/linux/fs/super.c linux/fs/super.c
@@ -260,7 +260,7 @@
return len;
}
-static struct file_system_type *get_fs_type(const char *name)
+struct file_system_type *get_fs_type(const char *name)
{
struct file_system_type *fs;
@@ -281,14 +281,28 @@
static LIST_HEAD(vfsmntlist);
-static struct vfsmount *add_vfsmnt(struct super_block *sb,
- struct dentry *mountpoint,
+/**
+ * add_vfsmnt - add a new mount node
+ * @nd: location of mountpoint or %NULL if we want a root node
+ * @root: root of (sub)tree to be mounted
+ * @dev_name: device name to show in /proc/mounts
+ *
+ * This is VFS idea of mount. New node is allocated, bound to a tree
+ * we are mounting and optionally (OK, usually) registered as mounted
+ * on a given mountpoint. Returns a pointer to new node or %NULL in
+ * case of failure.
+ *
+ * Potential reason for failure (aside of trivial lack of memory) is a
+ * deleted mountpoint. Caller must hold ->i_zombie on mountpoint
+ * dentry (if any).
+ */
+
+static struct vfsmount *add_vfsmnt(struct nameidata *nd,
struct dentry *root,
- struct vfsmount *parent,
- const char *dev_name,
- const char *dir_name)
+ const char *dev_name)
{
struct vfsmount *mnt;
+ struct super_block *sb = root->d_inode->i_sb;
char *name;
mnt = kmalloc(sizeof(struct vfsmount), GFP_KERNEL);
@@ -296,13 +310,7 @@
goto out;
memset(mnt, 0, sizeof(struct vfsmount));
- atomic_set(&mnt->mnt_count,1);
- mnt->mnt_sb = sb;
- mnt->mnt_mountpoint = dget(mountpoint);
- mnt->mnt_root = dget(root);
- mnt->mnt_parent = parent ? mntget(parent) : mnt;
-
- /* N.B. Is it really OK to have a vfsmount without names? */
+ /* It may be NULL, but who cares? */
if (dev_name) {
name = kmalloc(strlen(dev_name)+1, GFP_KERNEL);
if (name) {
@@ -310,51 +318,57 @@
mnt->mnt_devname = name;
}
}
- name = kmalloc(strlen(dir_name)+1, GFP_KERNEL);
- if (name) {
- strcpy(name, dir_name);
- mnt->mnt_dirname = name;
- }
mnt->mnt_owner = current->uid;
+ atomic_set(&mnt->mnt_count,1);
+ mnt->mnt_sb = sb;
- if (parent)
- list_add(&mnt->mnt_child, &parent->mnt_mounts);
- else
+ spin_lock(&dcache_lock);
+ if (nd && !IS_ROOT(nd->dentry) && d_unhashed(nd->dentry))
+ goto fail;
+ mnt->mnt_root = dget(root);
+ mnt->mnt_mountpoint = nd ? dget(nd->dentry) : dget(root);
+ mnt->mnt_parent = nd ? mntget(nd->mnt) : mnt;
+
+ if (nd) {
+ list_add(&mnt->mnt_child, &nd->mnt->mnt_mounts);
+ list_add(&mnt->mnt_clash, &nd->dentry->d_vfsmnt);
+ } else {
INIT_LIST_HEAD(&mnt->mnt_child);
+ INIT_LIST_HEAD(&mnt->mnt_clash);
+ }
INIT_LIST_HEAD(&mnt->mnt_mounts);
list_add(&mnt->mnt_instances, &sb->s_mounts);
- list_add(&mnt->mnt_clash, &mountpoint->d_vfsmnt);
list_add(&mnt->mnt_list, vfsmntlist.prev);
+ spin_unlock(&dcache_lock);
out:
return mnt;
+fail:
+ spin_unlock(&dcache_lock);
+ kfree(mnt->mnt_devname);
+ kfree(mnt);
+ return NULL;
}
static void move_vfsmnt(struct vfsmount *mnt,
struct dentry *mountpoint,
struct vfsmount *parent,
- const char *dev_name,
- const char *dir_name)
+ const char *dev_name)
{
- struct dentry *old_mountpoint = mnt->mnt_mountpoint;
- struct vfsmount *old_parent = mnt->mnt_parent;
- char *new_devname = NULL, *new_dirname = NULL;
+ struct dentry *old_mountpoint;
+ struct vfsmount *old_parent;
+ char *new_devname = NULL;
if (dev_name) {
new_devname = kmalloc(strlen(dev_name)+1, GFP_KERNEL);
if (new_devname)
strcpy(new_devname, dev_name);
}
- if (dir_name) {
- new_dirname = kmalloc(strlen(dir_name)+1, GFP_KERNEL);
- if (new_dirname)
- strcpy(new_dirname, dir_name);
- }
+
+ spin_lock(&dcache_lock);
+ old_mountpoint = mnt->mnt_mountpoint;
+ old_parent = mnt->mnt_parent;
/* flip names */
- if (new_dirname) {
- kfree(mnt->mnt_dirname);
- mnt->mnt_dirname = new_dirname;
- }
if (new_devname) {
kfree(mnt->mnt_devname);
mnt->mnt_devname = new_devname;
@@ -365,11 +379,14 @@
mnt->mnt_parent = parent ? mntget(parent) : mnt;
list_del(&mnt->mnt_clash);
list_del(&mnt->mnt_child);
- list_add(&mnt->mnt_clash, &mountpoint->d_vfsmnt);
- if (parent)
+ if (parent) {
list_add(&mnt->mnt_child, &parent->mnt_mounts);
- else
+ list_add(&mnt->mnt_clash, &mountpoint->d_vfsmnt);
+ } else {
INIT_LIST_HEAD(&mnt->mnt_child);
+ INIT_LIST_HEAD(&mnt->mnt_clash);
+ }
+ spin_unlock(&dcache_lock);
/* put the old stuff */
dput(old_mountpoint);
@@ -377,6 +394,9 @@
mntput(old_parent);
}
+/*
+ * Called with spinlock held, releases it.
+ */
static void remove_vfsmnt(struct vfsmount *mnt)
{
/* First of all, remove it from all lists */
@@ -384,6 +404,7 @@
list_del(&mnt->mnt_clash);
list_del(&mnt->mnt_list);
list_del(&mnt->mnt_child);
+ spin_unlock(&dcache_lock);
/* Now we can work safely */
if (mnt->mnt_parent != mnt)
mntput(mnt->mnt_parent);
@@ -391,7 +412,6 @@
dput(mnt->mnt_mountpoint);
dput(mnt->mnt_root);
kfree(mnt->mnt_devname);
- kfree(mnt->mnt_dirname);
kfree(mnt);
}
@@ -590,6 +610,7 @@
lock_kernel();
s = get_super(to_kdev_t(dev));
+ unlock_kernel();
if (s == NULL)
goto out;
err = vfs_statfs(s, &sbuf);
@@ -602,7 +623,6 @@
err = copy_to_user(ubuf,&tmp,sizeof(struct ustat)) ? -EFAULT : 0;
out:
- unlock_kernel();
return err;
}
@@ -738,10 +758,6 @@
/* Done with lookups, semaphore down */
down(&mount_sem);
dev = to_kdev_t(bdev->bd_dev);
- check_disk_change(dev);
- error = -EACCES;
- if (!(flags & MS_RDONLY) && is_read_only(dev))
- goto out;
sb = get_super(dev);
if (sb) {
if (fs_type == sb->s_type) {
@@ -755,6 +771,10 @@
error = blkdev_get(bdev, mode, 0, BDEV_FS);
if (error)
goto out;
+ check_disk_change(dev);
+ error = -EACCES;
+ if (!(flags & MS_RDONLY) && is_read_only(dev))
+ goto out1;
error = -EINVAL;
sb = read_super(dev, bdev, fs_type, flags, data, 0);
if (sb) {
@@ -762,6 +782,7 @@
path_release(&nd);
return sb;
}
+out1:
blkdev_put(bdev, BDEV_FS);
}
out:
@@ -812,8 +833,14 @@
{
struct block_device *bdev;
kdev_t dev;
- dput(sb->s_root);
+ struct dentry *root = sb->s_root;
sb->s_root = NULL;
+ /* Need to clean after the sucker */
+ if (sb->s_type->fs_flags & FS_LITTER)
+ d_genocide(root);
+ if (sb->s_type->fs_flags & (FS_SINGLE|FS_LITTER))
+ shrink_dcache_parent(root);
+ dput(root);
lock_super(sb);
if (sb->s_op) {
if (sb->s_op->write_super && sb->s_dirt)
@@ -895,7 +922,7 @@
put_unnamed_dev(dev);
return ERR_PTR(-EINVAL);
}
- mnt = add_vfsmnt(sb, sb->s_root, sb->s_root, NULL, "none", type->name);
+ mnt = add_vfsmnt(NULL, sb->s_root, "none");
if (!mnt) {
kill_super(sb, 0);
return ERR_PTR(-ENOMEM);
@@ -909,10 +936,8 @@
void kern_umount(struct vfsmount *mnt)
{
struct super_block *sb = mnt->mnt_sb;
- struct dentry *root = sb->s_root;
+ spin_lock(&dcache_lock);
remove_vfsmnt(mnt);
- dput(root);
- sb->s_root = NULL;
kill_super(sb, 0);
}
@@ -932,6 +957,16 @@
{
struct super_block * sb = mnt->mnt_sb;
+ /*
+ * No sense to grab the lock for this test, but test itself looks
+ * somewhat bogus. Suggestions for better replacement?
+ * Ho-hum... In principle, we might treat that as umount + switch
+ * to rootfs. GC would eventually take care of the old vfsmount.
+ * The problem being: we have to implement rootfs and GC for that ;-)
+ * Actually it makes sense, especially if rootfs would contain a
+ * /reboot - static binary that would close all descriptors and
+ * call reboot(9). Then init(8) could umount root and exec /reboot.
+ */
if (mnt == current->fs->rootmnt && !umount_root) {
int retval = 0;
/*
@@ -944,7 +979,9 @@
return retval;
}
+ spin_lock(&dcache_lock);
if (atomic_read(&mnt->mnt_count) > 2) {
+ spin_unlock(&dcache_lock);
mntput(mnt);
return -EBUSY;
}
@@ -952,10 +989,12 @@
if (mnt->mnt_instances.next != mnt->mnt_instances.prev) {
if (sb->s_type->fs_flags & FS_SINGLE)
put_filesystem(sb->s_type);
+ /* We hold two references, so mntput() is safe */
mntput(mnt);
remove_vfsmnt(mnt);
return 0;
}
+ spin_unlock(&dcache_lock);
/*
* Before checking whether the filesystem is still busy,
@@ -988,14 +1027,16 @@
shrink_dcache_sb(sb);
fsync_dev(sb->s_dev);
- /* Something might grab it again - redo checks */
-
- if (atomic_read(&mnt->mnt_count) > 2) {
+ if (sb->s_root->d_inode->i_state) {
mntput(mnt);
return -EBUSY;
}
- if (sb->s_root->d_inode->i_state) {
+ /* Something might grab it again - redo checks */
+
+ spin_lock(&dcache_lock);
+ if (atomic_read(&mnt->mnt_count) > 2) {
+ spin_unlock(&dcache_lock);
mntput(mnt);
return -EBUSY;
}
@@ -1067,6 +1108,8 @@
{
if (capable(CAP_SYS_ADMIN))
return 0;
+ return -EPERM;
+#ifdef notyet
if (S_ISLNK(nd->dentry->d_inode->i_mode))
return -EPERM;
if (nd->dentry->d_inode->i_mode & S_ISVTX) {
@@ -1076,6 +1119,7 @@
if (permission(nd->dentry->d_inode, MAY_WRITE))
return -EPERM;
return 0;
+#endif
}
/*
@@ -1102,22 +1146,22 @@
if (S_ISDIR(new_nd.dentry->d_inode->i_mode) !=
S_ISDIR(old_nd.dentry->d_inode->i_mode))
goto out2;
-
- down(&mount_sem);
- err = -ENOENT;
- if (d_unhashed(old_nd.dentry) && !IS_ROOT(old_nd.dentry))
- goto out3;
- if (d_unhashed(new_nd.dentry) && !IS_ROOT(new_nd.dentry))
- goto out3;
- /* there we go */
+
err = -ENOMEM;
if (old_nd.mnt->mnt_sb->s_type->fs_flags & FS_SINGLE)
get_filesystem(old_nd.mnt->mnt_sb->s_type);
- if (add_vfsmnt(old_nd.mnt->mnt_sb, new_nd.dentry, old_nd.dentry,
- new_nd.mnt, old_nd.mnt->mnt_devname, new_name))
+
+ down(&mount_sem);
+ /* there we go */
+ down(&new_nd.dentry->d_inode->i_zombie);
+ if (IS_DEADDIR(new_nd.dentry->d_inode))
+ err = -ENOENT;
+ else if (add_vfsmnt(&new_nd, old_nd.dentry, old_nd.mnt->mnt_devname))
err = 0;
-out3:
+ up(&new_nd.dentry->d_inode->i_zombie);
up(&mount_sem);
+ if (err && old_nd.mnt->mnt_sb->s_type->fs_flags & FS_SINGLE)
+ put_filesystem(old_nd.mnt->mnt_sb->s_type);
out2:
path_release(&new_nd);
out1:
@@ -1215,7 +1259,7 @@
{
struct file_system_type * fstype;
struct nameidata nd;
- struct vfsmount *mnt;
+ struct vfsmount *mnt = NULL;
struct super_block *sb;
int retval = 0;
unsigned long flags = 0;
@@ -1224,8 +1268,6 @@
if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE))
return -EINVAL;
- if (!type_page || !memchr(type_page, 0, PAGE_SIZE))
- return -EINVAL;
if (dev_name && !memchr(dev_name, 0, PAGE_SIZE))
return -EINVAL;
@@ -1239,6 +1281,11 @@
if ((new_flags & MS_MGC_MSK) == MS_MGC_VAL)
flags = new_flags & ~MS_MGC_MSK;
+ /* For the rest we need the type */
+
+ if (!type_page || !memchr(type_page, 0, PAGE_SIZE))
+ return -EINVAL;
+
/* loopback mount? This is special - requires fewer capabilities */
if (strcmp(type_page, "bind")==0)
return do_loopback(dev_name, dir_name);
@@ -1272,16 +1319,18 @@
if (IS_ERR(sb))
goto dput_out;
- retval = -ENOENT;
- if (d_unhashed(nd.dentry) && !IS_ROOT(nd.dentry))
- goto fail;
-
/* Something was mounted here while we slept */
while(d_mountpoint(nd.dentry) && follow_down(&nd.mnt, &nd.dentry))
;
-
- retval = -ENOMEM;
- mnt = add_vfsmnt(sb, nd.dentry, sb->s_root, nd.mnt, dev_name, dir_name);
+ retval = -ENOENT;
+ if (!nd.dentry->d_inode)
+ goto fail;
+ down(&nd.dentry->d_inode->i_zombie);
+ if (!IS_DEADDIR(nd.dentry->d_inode)) {
+ retval = -ENOMEM;
+ mnt = add_vfsmnt(&nd, sb->s_root, dev_name);
+ }
+ up(&nd.dentry->d_inode->i_zombie);
if (!mnt)
goto fail;
retval = 0;
@@ -1312,15 +1361,6 @@
if (retval < 0)
return retval;
- /* copy_mount_options allows a NULL user pointer,
- * and just returns zero in that case. But if we
- * allow the type to be NULL we will crash.
- * Previously we did not check this case.
- */
- if (type_page == 0)
- return -EINVAL;
-
- lock_kernel();
dir_page = getname(dir_name);
retval = PTR_ERR(dir_page);
if (IS_ERR(dir_page))
@@ -1331,8 +1371,10 @@
goto out2;
retval = copy_mount_options (data, &data_page);
if (retval >= 0) {
+ lock_kernel();
retval = do_mount((char*)dev_page,dir_page,(char*)type_page,
new_flags, (void*)data_page);
+ unlock_kernel();
free_page(data_page);
}
free_page(dev_page);
@@ -1340,7 +1382,6 @@
putname(dir_page);
out1:
free_page(type_page);
- unlock_kernel();
return retval;
}
@@ -1414,7 +1455,7 @@
#endif
devfs_make_root (root_device_name);
- handle = devfs_find_handle (NULL, ROOT_DEVICE_NAME, 0,
+ handle = devfs_find_handle (NULL, ROOT_DEVICE_NAME,
MAJOR (ROOT_DEV), MINOR (ROOT_DEV),
DEVFS_SPECIAL_BLK, 1);
if (handle) /* Sigh: bd*() functions only paper over the cracks */
@@ -1485,17 +1526,14 @@
fs_type->name,
(sb->s_flags & MS_RDONLY) ? " readonly" : "");
if (path_start >= 0) {
- devfs_mk_symlink (NULL,
- "root", 0, DEVFS_FL_DEFAULT,
- path + 5 + path_start, 0,
- NULL, NULL);
+ devfs_mk_symlink (NULL, "root", DEVFS_FL_DEFAULT,
+ path + 5 + path_start, NULL, NULL);
memcpy (path + path_start, "/dev/", 5);
- vfsmnt = add_vfsmnt (sb, sb->s_root, sb->s_root, NULL,
- path + path_start, "/");
+ vfsmnt = add_vfsmnt(NULL, sb->s_root, path + path_start);
}
else
- vfsmnt = add_vfsmnt (sb, sb->s_root, sb->s_root, NULL,
- "/dev/root", "/");
+ vfsmnt = add_vfsmnt(NULL, sb->s_root, "/dev/root");
+ /* FIXME: if something will try to umount us right now... */
if (vfsmnt) {
set_fs_root(current->fs, vfsmnt, sb->s_root);
set_fs_pwd(current->fs, vfsmnt, sb->s_root);
@@ -1516,6 +1554,7 @@
read_lock(&tasklist_lock);
for_each_task(p) {
+ /* FIXME - unprotected usage of ->fs + (harmless) race */
if (!p->fs) continue;
if (p->fs->root == old_root && p->fs->rootmnt == old_rootmnt)
set_fs_root(p->fs, new_rootmnt, new_root);
@@ -1573,10 +1612,15 @@
if (error)
goto out1;
+ read_lock(¤t->fs->lock);
root_mnt = mntget(current->fs->rootmnt);
root = dget(current->fs->root);
+ read_unlock(¤t->fs->lock);
down(&mount_sem);
+ down(&old_nd.dentry->d_inode->i_zombie);
error = -ENOENT;
+ if (IS_DEADDIR(new_nd.dentry->d_inode))
+ goto out2;
if (d_unhashed(new_nd.dentry) && !IS_ROOT(new_nd.dentry))
goto out2;
if (d_unhashed(old_nd.dentry) && !IS_ROOT(old_nd.dentry))
@@ -1586,32 +1630,27 @@
goto out2; /* loop */
error = -EINVAL;
tmp = old_nd.mnt; /* make sure we can reach put_old from new_root */
+ spin_lock(&dcache_lock);
if (tmp != new_nd.mnt) {
for (;;) {
if (tmp->mnt_parent == tmp)
- goto out2;
+ goto out3;
if (tmp->mnt_parent == new_nd.mnt)
break;
tmp = tmp->mnt_parent;
}
if (!is_subdir(tmp->mnt_mountpoint, new_nd.dentry))
- goto out2;
+ goto out3;
} else if (!is_subdir(old_nd.dentry, new_nd.dentry))
- goto out2;
-
- error = -ENOMEM;
- name = __getname();
- if (!name)
- goto out2;
+ goto out3;
+ spin_unlock(&dcache_lock);
- move_vfsmnt(new_nd.mnt, new_nd.dentry, NULL, NULL, "/");
- move_vfsmnt(root_mnt, old_nd.dentry, old_nd.mnt, NULL,
- __d_path(old_nd.dentry, old_nd.mnt, new_nd.dentry,
- new_nd.mnt, name, PAGE_SIZE));
- putname(name);
+ move_vfsmnt(new_nd.mnt, new_nd.dentry, NULL, NULL);
+ move_vfsmnt(root_mnt, old_nd.dentry, old_nd.mnt, NULL);
chroot_fs_refs(root,root_mnt,new_nd.dentry,new_nd.mnt);
error = 0;
out2:
+ up(&old_nd.dentry->d_inode->i_zombie);
up(&mount_sem);
dput(root);
mntput(root_mnt);
@@ -1621,6 +1660,9 @@
out0:
unlock_kernel();
return error;
+out3:
+ spin_unlock(&dcache_lock);
+ goto out2;
}
@@ -1628,17 +1670,17 @@
int __init change_root(kdev_t new_root_dev,const char *put_old)
{
- kdev_t old_root_dev = ROOT_DEV;
- struct vfsmount *old_rootmnt = mntget(current->fs->rootmnt);
+ struct vfsmount *old_rootmnt;
struct nameidata devfs_nd, nd;
int error = 0;
+ read_lock(¤t->fs->lock);
+ old_rootmnt = mntget(current->fs->rootmnt);
+ read_unlock(¤t->fs->lock);
/* First unmount devfs if mounted */
if (path_init("/dev", LOOKUP_FOLLOW|LOOKUP_POSITIVE, &devfs_nd))
error = path_walk("/dev", &devfs_nd);
if (!error) {
- struct super_block *sb = devfs_nd.dentry->d_inode->i_sb;
-
if (devfs_nd.mnt->mnt_sb->s_magic == DEVFS_SUPER_MAGIC &&
devfs_nd.dentry == devfs_nd.mnt->mnt_root) {
dput(devfs_nd.dentry);
@@ -1654,7 +1696,7 @@
#if 1
shrink_dcache();
printk("change_root: old root has d_count=%d\n",
- old_rootmnt->mnt_root->d_count);
+ atomic_read(&old_rootmnt->mnt_root->d_count));
#endif
mount_devfs_fs ();
/*
@@ -1672,10 +1714,11 @@
printk("okay\n");
return 0;
}
- printk(KERN_ERR "error %ld\n",blivet);
+ printk(KERN_ERR "error %d\n",blivet);
return error;
}
- move_vfsmnt(old_rootmnt, nd.dentry, nd.mnt, "/dev/root.old", put_old);
+ /* FIXME: we should hold i_zombie on nd.dentry */
+ move_vfsmnt(old_rootmnt, nd.dentry, nd.mnt, "/dev/root.old");
mntput(old_rootmnt);
path_release(&nd);
return 0;
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)