[PATCH] in VFS automounting

David Howells (dhowells@redhat.com)
Fri, 13 Jun 2003 11:46:02 +0100


This is a MIME-formatted message. If you see this text it means that your
E-mail software does not support MIME-formatted messages.

--=_courier-870-1055501214-0001-2
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit

Hi Al,

I've come up with a way of doing automounting from within the VFS (see patch)
for my AFS filesystem. The core kernel changes aren't particularly extensive,
but I'd like your opinion of them. I don't see any obvious races, but that's
not to say there aren't any.

This patch involves the adding the following features:

(1) A new dentry operation that (a) marks a dentry as being an automount
point, and (b) gets called by the VFS to come up with a vfsmount
structure which the VFS then stitches into the mount tree fabric at the
appropriate place.

(2) A new lookup flag that is used by sys_*stat() to prevent automounting of
the path endpoint. This means "ls -l" in an automounter directory doesn't
cause a mount storm, but will display all the mountpoints in that
directory as subdirectories (either the underlying mountpoint dir or the
root dir of the mounted fs if the mountpoint has been triggered already).

(3) do_kern_mount() is now exported.

As an example, I've implemented this operation for mountpoints in my AFS
filesystem client:

static struct dentry_operations afs_fs_mntpt_dentry_operations = {
.d_revalidate = afs_d_revalidate,
.d_delete = afs_d_delete,
.d_automount = afs_mntpt_d_automount,
};

struct vfsmount *afs_mntpt_d_automount(struct dentry *mntpt)
{
struct vfsmount *mnt;
struct page *page = NULL;
size_t size;
char *buf, *devname = NULL;
int ret;

ret = -EINVAL;
size = mntpt->d_inode->i_size;
if (size > PAGE_SIZE - 1)
goto error;

ret = -ENOMEM;
devname = (char *) get_zeroed_page(GFP_KERNEL);
if (!devname)
goto error;

/* read the contents of the AFS special symlink */
page = read_cache_page(mntpt->d_inode->i_mapping,
0,
(filler_t*)mntpt->d_inode->i_mapping->a_ops->readpage,
NULL);
if (IS_ERR(page)) {
ret = PTR_ERR(page);
goto error;
}

ret = -EIO;
wait_on_page_locked(page);
if (!PageUptodate(page) || PageError(page))
goto error;

buf = kmap(page);
memcpy(devname, buf, size);
kunmap(page);
page_cache_release(page);
page = NULL;

mnt = do_kern_mount("afs", 0, devname, NULL);

free_page((unsigned long)devname);
return mnt;

error:
if (page)
page_cache_release(page);
if (devname)
free_page((unsigned long)devname);
return ERR_PTR(ret);
}

As you can see, the inode attached to the underlying mountpoint can be used to
determine _what_ should be mounted.

I've also got some ideas on automatic automount expiry in the VFS, but this
involves adding the following:

#define MNT_AUTOEXPIRE 8

struct vfsmount {
...
time_t expiry;
struct namespace *namespace;
};

struct namespace {
...
struct timer expiry_timer;
};

And then mntput() would timestamp the vfsmount start the timer going (if it's
not already active) when mnt_count==1, and the timer routine would walk the
namespace's list of mounts looking for expired vfsmounts (obviously this would
need to be done in process context somehow).

What do you think?

David

--=_courier-870-1055501214-0001-2
Content-Type: application/octet-stream; type=patch
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment; filename="automount-2570.diff"

diff -uNr linux-2.5.70/fs/namei.c linux-2.5.70-auto/fs/namei.c
--- linux-2.5.70/fs/namei.c 2003-06-12 11:35:52.000000000 +0100
+++ linux-2.5.70-auto/fs/namei.c 2003-06-12 15:46:15.000000000 +0100
@@ -434,23 +434,35 @@
return 1;
}

-static int follow_mount(struct vfsmount **mnt, struct dentry **dentry)
+static int follow_mount(struct vfsmount **mnt, struct dentry **dentry, unsigned int flags)
{
int res = 0;
- while (d_mountpoint(*dentry)) {
- struct vfsmount *mounted;
- spin_lock(&dcache_lock);
- mounted = lookup_mnt(*mnt, *dentry);
- if (!mounted) {
+ for (;;) {
+
+ if (d_mountpoint(*dentry)) {
+ struct vfsmount *mounted;
+ spin_lock(&dcache_lock);
+ mounted = lookup_mnt(*mnt, *dentry);
+ if (!mounted) {
+ spin_unlock(&dcache_lock);
+ break;
+ }
+ *mnt = mntget(mounted);
spin_unlock(&dcache_lock);
+ dput(*dentry);
+ mntput(mounted->mnt_parent);
+ *dentry = dget(mounted->mnt_root);
+ res = 1;
+
+ } else if (d_automount_point(*dentry)) {
+ if (flags & LOOKUP_NOAUTOMOUNT)
+ break;
+ res = kern_automount(*mnt, *dentry);
+ if (res < 0)
+ break;
+ } else {
break;
}
- *mnt = mntget(mounted);
- spin_unlock(&dcache_lock);
- dput(*dentry);
- mntput(mounted->mnt_parent);
- *dentry = dget(mounted->mnt_root);
- res = 1;
}
return res;
}
@@ -510,7 +522,7 @@
mntput(*mnt);
*mnt = parent;
}
- follow_mount(mnt, dentry);
+ follow_mount(mnt, dentry, 0);
}

struct path {
@@ -643,7 +655,9 @@
if (err)
break;
/* Check mountpoints.. */
- follow_mount(&next.mnt, &next.dentry);
+ err = follow_mount(&next.mnt, &next.dentry, 0);
+ if (err < 0)
+ goto out_dput;

err = -ENOENT;
inode = next.dentry->d_inode;
@@ -703,7 +717,10 @@
err = do_lookup(nd, &this, &next, 0);
if (err)
break;
- follow_mount(&next.mnt, &next.dentry);
+ err = follow_mount(&next.mnt, &next.dentry, nd->flags & LOOKUP_NOAUTOMOUNT);
+ if (err < 0)
+ goto out_dput;
+
inode = next.dentry->d_inode;
if ((lookup_flags & LOOKUP_FOLLOW)
&& inode && inode->i_op && inode->i_op->follow_link) {
diff -uNr linux-2.5.70/fs/namespace.c linux-2.5.70-auto/fs/namespace.c
--- linux-2.5.70/fs/namespace.c 2003-06-12 11:35:52.000000000 +0100
+++ linux-2.5.70-auto/fs/namespace.c 2003-06-12 15:07:00.000000000 +0100
@@ -674,6 +674,43 @@
return err;
}

+int kern_automount(struct vfsmount *on_mnt, struct dentry *on_dentry)
+{
+ struct nameidata nd;
+ struct vfsmount *mnt;
+ int err;
+
+ if (!on_dentry->d_inode || !S_ISDIR(on_dentry->d_inode->i_mode))
+ return -ENOTDIR;
+
+ mnt = on_dentry->d_op->d_automount(on_dentry);
+ if (IS_ERR(mnt))
+ return PTR_ERR(mnt);
+
+ memset(&nd,0,sizeof(nd));
+ nd.dentry = on_dentry;
+ nd.mnt = on_mnt;
+
+ down_write(&current->namespace->sem);
+ /* Something was mounted here while we slept */
+ while(d_mountpoint(nd.dentry) && follow_down(&nd.mnt, &nd.dentry))
+ ;
+ err = -EINVAL;
+ if (!check_mnt(nd.mnt))
+ goto unlock;
+
+ /* Refuse the same filesystem on the same mount point */
+ err = -EBUSY;
+ if (nd.mnt->mnt_sb == mnt->mnt_sb && nd.mnt->mnt_root == nd.dentry)
+ goto unlock;
+
+ err = graft_tree(mnt, &nd);
+unlock:
+ up_write(&current->namespace->sem);
+ mntput(mnt);
+ return err;
+}
+
static int copy_mount_options (const void __user *data, unsigned long *where)
{
int i;
diff -uNr linux-2.5.70/fs/stat.c linux-2.5.70-auto/fs/stat.c
--- linux-2.5.70/fs/stat.c 2003-06-12 11:35:52.000000000 +0100
+++ linux-2.5.70-auto/fs/stat.c 2003-06-12 14:16:29.000000000 +0100
@@ -61,7 +61,7 @@
struct nameidata nd;
int error;

- error = user_path_walk(name, &nd);
+ error = user_path_walk_stat(name, &nd);
if (!error) {
error = vfs_getattr(nd.mnt, nd.dentry, stat);
path_release(&nd);
@@ -74,7 +74,7 @@
struct nameidata nd;
int error;

- error = user_path_walk_link(name, &nd);
+ error = user_path_walk_link_stat(name, &nd);
if (!error) {
error = vfs_getattr(nd.mnt, nd.dentry, stat);
path_release(&nd);
diff -uNr linux-2.5.70/fs/super.c linux-2.5.70-auto/fs/super.c
--- linux-2.5.70/fs/super.c 2003-06-12 11:35:53.000000000 +0100
+++ linux-2.5.70-auto/fs/super.c 2003-06-12 16:15:06.000000000 +0100
@@ -21,6 +21,7 @@
*/

#include <linux/config.h>
+#include <linux/module.h>
#include <linux/slab.h>
#include <linux/smp_lock.h>
#include <linux/acct.h>
@@ -697,6 +698,8 @@
return (struct vfsmount *)sb;
}

+EXPORT_SYMBOL_GPL(do_kern_mount);
+
struct vfsmount *kern_mount(struct file_system_type *type)
{
return do_kern_mount(type->name, 0, type->name, NULL);
diff -uNr linux-2.5.70/include/linux/dcache.h linux-2.5.70-auto/include/linux/dcache.h
--- linux-2.5.70/include/linux/dcache.h 2003-06-12 11:32:25.000000000 +0100
+++ linux-2.5.70-auto/include/linux/dcache.h 2003-06-12 14:57:27.000000000 +0100
@@ -112,6 +112,7 @@
int (*d_delete)(struct dentry *);
void (*d_release)(struct dentry *);
void (*d_iput)(struct dentry *, struct inode *);
+ struct vfsmount *(*d_automount)(struct dentry *);
};

/* the dentry parameter passed to d_hash and d_compare is the parent
@@ -305,6 +306,11 @@
return dentry->d_mounted;
}

+static inline int d_automount_point(struct dentry *dentry)
+{
+ return dentry->d_op && dentry->d_op->d_automount;
+}
+
extern struct vfsmount *lookup_mnt(struct vfsmount *, struct dentry *);
#endif /* __KERNEL__ */

diff -uNr linux-2.5.70/include/linux/namei.h linux-2.5.70-auto/include/linux/namei.h
--- linux-2.5.70/include/linux/namei.h 2003-06-12 11:32:24.000000000 +0100
+++ linux-2.5.70-auto/include/linux/namei.h 2003-06-12 15:02:19.000000000 +0100
@@ -31,6 +31,7 @@
#define LOOKUP_CONTINUE 4
#define LOOKUP_PARENT 16
#define LOOKUP_NOALT 32
+#define LOOKUP_NOAUTOMOUNT 64


extern int FASTCALL(__user_walk(const char __user *, unsigned, struct nameidata *));
@@ -38,6 +39,10 @@
__user_walk(name, LOOKUP_FOLLOW, nd)
#define user_path_walk_link(name,nd) \
__user_walk(name, 0, nd)
+#define user_path_walk_stat(name,nd) \
+ __user_walk(name, LOOKUP_FOLLOW|LOOKUP_NOAUTOMOUNT, nd)
+#define user_path_walk_link_stat(name,nd) \
+ __user_walk(name, LOOKUP_NOAUTOMOUNT, nd)
extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *));
extern int FASTCALL(path_walk(const char *, struct nameidata *));
extern int FASTCALL(link_path_walk(const char *, struct nameidata *));
@@ -52,4 +57,6 @@
extern struct dentry *lock_rename(struct dentry *, struct dentry *);
extern void unlock_rename(struct dentry *, struct dentry *);

+extern int kern_automount(struct vfsmount *mnt, struct dentry *dentry);
+
#endif /* _LINUX_NAMEI_H */

--=_courier-870-1055501214-0001-2--