From: Pekka Enberg The revokeat(2) system call ensures that after successful revocation you can only access an inode via a file descriptor that is obtained from subsequent open(2) call. The open(2) system call can be blocked by the caller with chmod(2) and chown(2) prior to calling revokeat(2) to gain exclusive access to the inode. After an successful revocation, operations on file descriptors fail with the EBADF or ENXIO error code for regular and device files, respectively. Attempting to read from or write to a revoked mapping causes SIGBUS. What the revokeat(2) system call guarantees is: (1) open file descriptors are revoked, (2) file descriptors created by fork(2) and dup(2) during the operation are revoked, (3) file descriptors obtained via a SCM_RIGHTS datagram during or after the revoke operation are revoked, (4) in-flight read(2) and write(2) operations are either completed or aborted before revokeat(2) returns successfully, (5) attempting to read from or write to a shared memory mapping raises SIGBUS, and (6) copy-on-write to a private memory mapping after successful revokeat(2) call does not reveal any data written after the system call has returned. As not all struct file pointers are attached to a file descriptor, it's not enought that we clean up the file descriptor tables. Instead, we must cannibalize the struct files so that they can no longer be used to access the inode. This simplifies things as we no longer need to worry about fork(2) and dup(2) which always use the same struct file pointer and simply bump up the reference count. We also don't need to worry about descriptors obtained via SCM_RIGHTS datagram; as soon as the struct file is updated, it no longer can be used to access the inode. Updating the contents of struct file is safe because we freeze the whole block device to which the inode belongs to with freeze_bdev(). This effectively guarantees that any write(2) operation within the VFS is blocked by vfs_check_frozen(). Signed-off-by: Pekka Enberg --- arch/i386/kernel/syscall_table.S | 1 arch/x86_64/ia32/ia32entry.S | 1 fs/Makefile | 1 fs/ext2/file.c | 1 fs/revoke.c | 365 +++++++++++++++++++++++++++++++++++ fs/revoked_inode.c | 406 +++++++++++++++++++++++++++++++++++++++ include/asm-i386/unistd.h | 3 include/linux/fs.h | 9 include/linux/magic.h | 2 include/linux/mm.h | 2 mm/memory.c | 3 mm/mmap.c | 23 +- 12 files changed, 811 insertions(+), 6 deletions(-) Index: 2.6/fs/revoke.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ 2.6/fs/revoke.c 2007-07-19 09:50:03.000000000 +0300 @@ -0,0 +1,365 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +static struct vfsmount *revokefs_mnt; + +static inline bool can_revoke_vma(struct vm_area_struct *vma, + struct inode *inode) +{ + struct file *file = vma->vm_file; + + if (vma->vm_flags & VM_REVOKED || !file) + return false; + + return file->f_path.dentry->d_inode == inode; +} + +static int __revoke_break_cow(struct task_struct *tsk, struct inode *inode) +{ + struct mm_struct *mm = tsk->mm; + struct vm_area_struct *vma; + int err = 0; + + if (!mm) + return 0; + + down_read(&mm->mmap_sem); + for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) { + int ret; + + if (vma->vm_flags & VM_SHARED) + continue; + + if (!can_revoke_vma(vma, inode)) + continue; + + ret = get_user_pages(tsk, tsk->mm, vma->vm_start, + vma_pages(vma), 1, 1, NULL, NULL); + if (ret < 0) { + err = ret; + break; + } + unlink_file_vma(vma); + fput(vma->vm_file); + vma->vm_file = NULL; + } + up_read(&mm->mmap_sem); + return err; +} + +static int revoke_break_cow(struct inode *inode) +{ + struct task_struct *g, *p; + int err = 0; + + read_lock(&tasklist_lock); + do_each_thread(g, p) { + err = __revoke_break_cow(p, inode); + if (err) + goto exit_loop; + } + while_each_thread(g, p); + exit_loop: + read_unlock(&tasklist_lock); + + return err; +} + +/* + * LOCKING: down_write(&mm->mmap_sem) + * -> spin_lock(&mapping->i_mmap_lock) + */ +static int revoke_vma(struct vm_area_struct *vma, struct zap_details *details) +{ + unsigned long restart_addr, start_addr, end_addr; + int need_break; + + start_addr = vma->vm_start; + end_addr = vma->vm_end; + + again: + restart_addr = zap_page_range(vma, start_addr, end_addr - start_addr, + details); + + need_break = need_resched() || need_lockbreak(details->i_mmap_lock); + if (need_break) + goto out_need_break; + + if (restart_addr < end_addr) { + start_addr = restart_addr; + goto again; + } + vma->vm_flags |= VM_REVOKED; + return 0; + + out_need_break: + spin_unlock(details->i_mmap_lock); + cond_resched(); + spin_lock(details->i_mmap_lock); + return -EINTR; +} + +/* + * LOCKING: spin_lock(&mapping->i_mmap_lock) + */ +static int revoke_mm(struct mm_struct *mm, struct address_space *mapping) +{ + struct vm_area_struct *vma; + struct zap_details details; + int err = 0; + + details.i_mmap_lock = &mapping->i_mmap_lock; + + /* + * If ->mmap_sem is under contention, we continue scanning other + * mms and try again later. + */ + if (!down_write_trylock(&mm->mmap_sem)) { + err = -EAGAIN; + goto out; + } + for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) { + if (!(vma->vm_flags & VM_SHARED)) + continue; + + if (!can_revoke_vma(vma, mapping->host)) + continue; + + err = revoke_vma(vma, &details); + if (err) + break; + + __unlink_file_vma(vma); + fput(vma->vm_file); + vma->vm_file = NULL; + } + up_write(&mm->mmap_sem); + out: + return err; +} + +/* + * LOCKING: spin_lock(&mapping->i_mmap_lock) + */ +static void revoke_mapping_tree(struct address_space *mapping) +{ + struct vm_area_struct *vma; + struct prio_tree_iter iter; + int try_again = 0; + + restart: + vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, 0, ULONG_MAX) { + int err; + + if (!(vma->vm_flags & VM_SHARED)) + continue; + + if (likely(!can_revoke_vma(vma, mapping->host))) + continue; + + err = revoke_mm(vma->vm_mm, mapping); + if (err == -EAGAIN) + try_again = 1; + + goto restart; + } + if (try_again) { + cond_resched(); + goto restart; + } +} + +/* + * LOCKING: spin_lock(&mapping->i_mmap_lock) + */ +static void revoke_mapping_list(struct address_space *mapping) +{ + struct vm_area_struct *vma; + int try_again = 0; + + restart: + list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list) { + int err; + + if (likely(!can_revoke_vma(vma, mapping->host))) + continue; + + err = revoke_mm(vma->vm_mm, mapping); + if (err == -EAGAIN) { + try_again = 1; + continue; + } + if (err == -EINTR) + goto restart; + } + if (try_again) { + cond_resched(); + goto restart; + } +} + +static void revoke_mapping(struct address_space *mapping) +{ + spin_lock(&mapping->i_mmap_lock); + if (unlikely(!prio_tree_empty(&mapping->i_mmap))) + revoke_mapping_tree(mapping); + if (unlikely(!list_empty(&mapping->i_mmap_nonlinear))) + revoke_mapping_list(mapping); + spin_unlock(&mapping->i_mmap_lock); +} + +static int do_revoke(struct inode *inode) +{ + struct address_space *mapping = inode->i_mapping; + struct inode *revoke_inode; + struct super_block *sb; + struct dentry *dentry; + struct file *file; + struct qstr name; + int err = 0; + + if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER)) + return -EPERM; + + if (!inode->i_sb->s_bdev || !inode->i_fop->revoke) + return -EOPNOTSUPP; + + revoke_inode = new_inode(revokefs_mnt->mnt_sb); + if (!revoke_inode) + return -ENOMEM; + + revoke_inode->i_mode = inode->i_mode; + make_revoked_inode(revoke_inode); + name.name = "revoked_file"; + name.len = strlen(name.name); + dentry = d_alloc(revokefs_mnt->mnt_sb->s_root, &name); + if (!dentry) { + iput(revoke_inode); + return -ENOMEM; + } + d_instantiate(dentry, revoke_inode); + + sb = freeze_bdev(inode->i_sb->s_bdev); + if (!sb) { + err = -EINVAL; + goto failed_freeze_bdev; + } + + revoke_mapping(mapping); + + err = revoke_break_cow(inode); + if (err) + goto failed_break_cow; + + file_list_lock(); + list_for_each_entry(file, &sb->s_files, f_u.fu_list) { + struct dentry *dentry = file->f_path.dentry; + + if (dentry->d_inode != inode) + continue; + + /* We raced with another revoke() */ + if (!file->f_op->revoke) + continue; + + err = file->f_op->revoke(file, revoke_inode, dentry, revokefs_mnt); + if (err) + break; + } + file_list_unlock(); + + failed_break_cow: + thaw_bdev(sb->s_bdev, sb); + failed_freeze_bdev: + dput(dentry); + return err; +} + +asmlinkage long sys_revokeat(int dfd, const char __user *filename) +{ + struct nameidata nd; + int err; + + err = __user_walk_fd(dfd, filename, 0, &nd); + if (!err) { + err = do_revoke(nd.dentry->d_inode); + path_release(&nd); + } + return err; +} + +int generic_file_revoke(struct file *file, struct inode *inode, struct dentry *dentry, struct vfsmount *mnt) +{ + struct address_space *mapping = file->f_mapping; + int err; + + /* + * Flush pending writes. + */ + err = do_fsync(file, 1); + if (err) + goto out; + + file->f_mapping = inode->i_mapping; + file->f_dentry = dget(dentry); + file->f_vfsmnt = mntget(mnt); + file->f_op = fops_get(inode->i_fop); + file->f_pos = 0; + + /* + * Make pending reads fail. + */ + err = invalidate_inode_pages2(mapping); + out: + return err; +} +EXPORT_SYMBOL(generic_file_revoke); + +/* + * Filesystem for revoked files. + */ + +static struct super_operations revokefs_super_ops = { + .drop_inode = generic_delete_inode, +}; + +static int revokefs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, + struct vfsmount *mnt) +{ + return get_sb_pseudo(fs_type, "revoke:", &revokefs_super_ops, REVOKEFS_MAGIC, mnt); +} + +static struct file_system_type revokefs_fs_type = { + .name = "revokefs", + .get_sb = revokefs_get_sb, + .kill_sb = kill_anon_super +}; + +static int __init revokefs_init(void) +{ + int err = -ENOMEM; + + err = register_filesystem(&revokefs_fs_type); + if (err) + goto out; + + revokefs_mnt = kern_mount(&revokefs_fs_type); + if (IS_ERR(revokefs_mnt)) { + err = PTR_ERR(revokefs_mnt); + goto err_mnt; + } + out: + return err; + err_mnt: + unregister_filesystem(&revokefs_fs_type); + return err; +} + +late_initcall(revokefs_init); Index: 2.6/fs/Makefile =================================================================== --- 2.6.orig/fs/Makefile 2007-07-16 10:24:54.000000000 +0300 +++ 2.6/fs/Makefile 2007-07-19 09:09:00.000000000 +0300 @@ -19,6 +19,7 @@ else obj-y += no-block.o endif +obj-$(CONFIG_MMU) += revoke.o revoked_inode.o obj-$(CONFIG_INOTIFY) += inotify.o obj-$(CONFIG_INOTIFY_USER) += inotify_user.o obj-$(CONFIG_EPOLL) += eventpoll.o Index: 2.6/arch/i386/kernel/syscall_table.S =================================================================== --- 2.6.orig/arch/i386/kernel/syscall_table.S 2007-07-16 10:24:54.000000000 +0300 +++ 2.6/arch/i386/kernel/syscall_table.S 2007-07-19 09:09:00.000000000 +0300 @@ -323,3 +323,4 @@ .long sys_utimensat /* 320 */ .long sys_signalfd .long sys_timerfd .long sys_eventfd + .long sys_revokeat Index: 2.6/arch/x86_64/ia32/ia32entry.S =================================================================== --- 2.6.orig/arch/x86_64/ia32/ia32entry.S 2007-07-17 11:10:12.000000000 +0300 +++ 2.6/arch/x86_64/ia32/ia32entry.S 2007-07-19 09:09:00.000000000 +0300 @@ -719,4 +719,5 @@ .quad compat_sys_utimensat /* 320 */ .quad compat_sys_signalfd .quad compat_sys_timerfd .quad sys_eventfd + .quad sys_revokeat ia32_syscall_end: Index: 2.6/include/asm-i386/unistd.h =================================================================== --- 2.6.orig/include/asm-i386/unistd.h 2007-07-16 10:24:54.000000000 +0300 +++ 2.6/include/asm-i386/unistd.h 2007-07-19 09:09:00.000000000 +0300 @@ -329,10 +329,11 @@ #define __NR_utimensat 320 #define __NR_signalfd 321 #define __NR_timerfd 322 #define __NR_eventfd 323 +#define __NR_revokeat 324 #ifdef __KERNEL__ -#define NR_syscalls 324 +#define NR_syscalls 325 #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR Index: 2.6/fs/ext2/file.c =================================================================== --- 2.6.orig/fs/ext2/file.c 2007-07-17 11:10:25.000000000 +0300 +++ 2.6/fs/ext2/file.c 2007-07-19 09:09:01.000000000 +0300 @@ -55,6 +55,7 @@ const struct file_operations ext2_file_o .fsync = ext2_sync_file, .splice_read = generic_file_splice_read, .splice_write = generic_file_splice_write, + .revoke = generic_file_revoke, }; #ifdef CONFIG_EXT2_FS_XIP Index: 2.6/include/linux/fs.h =================================================================== --- 2.6.orig/include/linux/fs.h 2007-07-17 11:10:38.000000000 +0300 +++ 2.6/include/linux/fs.h 2007-07-19 09:09:01.000000000 +0300 @@ -1116,6 +1116,7 @@ struct file_operations { int (*flock) (struct file *, int, struct file_lock *); ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); + int (*revoke) (struct file *, struct inode *, struct dentry *, struct vfsmount *); }; struct inode_operations { @@ -1786,6 +1787,14 @@ extern ssize_t generic_splice_sendpage(s extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, size_t len, unsigned int flags); +/* fs/revoke.c */ +#ifdef CONFIG_MMU +extern void make_revoked_inode(struct inode *); +extern int generic_file_revoke(struct file *, struct inode *, struct dentry *, struct vfsmount *); +#else +#define generic_file_revoke NULL +#endif + extern void file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); extern loff_t no_llseek(struct file *file, loff_t offset, int origin); Index: 2.6/fs/revoked_inode.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ 2.6/fs/revoked_inode.c 2007-07-19 09:09:01.000000000 +0300 @@ -0,0 +1,406 @@ +/* + * fs/revoked_inode.c + * + * Copyright (C) 2007 Pekka Enberg + * + * Provide stub functions for revoked inodes. Based on fs/bad_inode.c which is + * + * Copyright (C) 1997 Stephen Tweedie + * + * This file is released under the GPLv2. + */ + +#include +#include +#include +#include +#include +#include +#include + +static loff_t revoked_file_llseek(struct file *file, loff_t offset, int origin) +{ + return -EBADF; +} + +static ssize_t revoked_file_read(struct file *filp, char __user * buf, + size_t size, loff_t * ppos) +{ + return -EBADF; +} + +static ssize_t revoked_special_file_read(struct file *filp, char __user * buf, + size_t size, loff_t * ppos) +{ + return 0; +} + +static ssize_t revoked_file_write(struct file *filp, const char __user * buf, + size_t siz, loff_t * ppos) +{ + return -EBADF; +} + +static ssize_t revoked_file_aio_read(struct kiocb *iocb, + const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + return -EBADF; +} + +static ssize_t revoked_file_aio_write(struct kiocb *iocb, + const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + return -EBADF; +} + +static int revoked_file_readdir(struct file *filp, void *dirent, + filldir_t filldir) +{ + return -EBADF; +} + +static unsigned int revoked_file_poll(struct file *filp, poll_table * wait) +{ + return POLLERR; +} + +static int revoked_file_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + return -EBADF; +} + +static long revoked_file_unlocked_ioctl(struct file *file, unsigned cmd, + unsigned long arg) +{ + return -EBADF; +} + +static long revoked_file_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + return -EBADF; +} + +static int revoked_file_mmap(struct file *file, struct vm_area_struct *vma) +{ + return -EBADF; +} + +static int revoked_file_open(struct inode *inode, struct file *filp) +{ + return -EBADF; +} + +static int revoked_file_flush(struct file *file, fl_owner_t id) +{ + return 0; +} + +static int revoked_file_release(struct inode *inode, struct file *filp) +{ + return -EBADF; +} + +static int revoked_file_fsync(struct file *file, struct dentry *dentry, + int datasync) +{ + return -EBADF; +} + +static int revoked_file_aio_fsync(struct kiocb *iocb, int datasync) +{ + return -EBADF; +} + +static int revoked_file_fasync(int fd, struct file *filp, int on) +{ + return -EBADF; +} + +static int revoked_file_lock(struct file *file, int cmd, struct file_lock *fl) +{ + return -EBADF; +} + +static ssize_t revoked_file_sendpage(struct file *file, struct page *page, + int off, size_t len, loff_t * pos, + int more) +{ + return -EBADF; +} + +static unsigned long revoked_file_get_unmapped_area(struct file *file, + unsigned long addr, + unsigned long len, + unsigned long pgoff, + unsigned long flags) +{ + return -EBADF; +} + +static int revoked_file_check_flags(int flags) +{ + return -EBADF; +} + +static int revoked_file_dir_notify(struct file *file, unsigned long arg) +{ + return -EBADF; +} + +static int revoked_file_flock(struct file *filp, int cmd, struct file_lock *fl) +{ + return -EBADF; +} + +static ssize_t revoked_file_splice_write(struct pipe_inode_info *pipe, + struct file *out, loff_t * ppos, + size_t len, unsigned int flags) +{ + return -EBADF; +} + +static ssize_t revoked_file_splice_read(struct file *in, loff_t * ppos, + struct pipe_inode_info *pipe, + size_t len, unsigned int flags) +{ + return -EBADF; +} + +static const struct file_operations revoked_file_ops = { + .llseek = revoked_file_llseek, + .read = revoked_file_read, + .write = revoked_file_write, + .aio_read = revoked_file_aio_read, + .aio_write = revoked_file_aio_write, + .readdir = revoked_file_readdir, + .poll = revoked_file_poll, + .ioctl = revoked_file_ioctl, + .unlocked_ioctl = revoked_file_unlocked_ioctl, + .compat_ioctl = revoked_file_compat_ioctl, + .mmap = revoked_file_mmap, + .open = revoked_file_open, + .flush = revoked_file_flush, + .release = revoked_file_release, + .fsync = revoked_file_fsync, + .aio_fsync = revoked_file_aio_fsync, + .fasync = revoked_file_fasync, + .lock = revoked_file_lock, + .sendpage = revoked_file_sendpage, + .get_unmapped_area = revoked_file_get_unmapped_area, + .check_flags = revoked_file_check_flags, + .dir_notify = revoked_file_dir_notify, + .flock = revoked_file_flock, + .splice_write = revoked_file_splice_write, + .splice_read = revoked_file_splice_read, +}; + +static const struct file_operations revoked_special_file_ops = { + .llseek = revoked_file_llseek, + .read = revoked_special_file_read, + .write = revoked_file_write, + .aio_read = revoked_file_aio_read, + .aio_write = revoked_file_aio_write, + .readdir = revoked_file_readdir, + .poll = revoked_file_poll, + .ioctl = revoked_file_ioctl, + .unlocked_ioctl = revoked_file_unlocked_ioctl, + .compat_ioctl = revoked_file_compat_ioctl, + .mmap = revoked_file_mmap, + .open = revoked_file_open, + .flush = revoked_file_flush, + .release = revoked_file_release, + .fsync = revoked_file_fsync, + .aio_fsync = revoked_file_aio_fsync, + .fasync = revoked_file_fasync, + .lock = revoked_file_lock, + .sendpage = revoked_file_sendpage, + .get_unmapped_area = revoked_file_get_unmapped_area, + .check_flags = revoked_file_check_flags, + .dir_notify = revoked_file_dir_notify, + .flock = revoked_file_flock, + .splice_write = revoked_file_splice_write, + .splice_read = revoked_file_splice_read, +}; + +static int revoked_inode_create(struct inode *dir, struct dentry *dentry, + int mode, struct nameidata *nd) +{ + return -EBADF; +} + +static struct dentry *revoked_inode_lookup(struct inode *dir, + struct dentry *dentry, + struct nameidata *nd) +{ + return ERR_PTR(-EBADF); +} + +static int revoked_inode_link(struct dentry *old_dentry, struct inode *dir, + struct dentry *dentry) +{ + return -EBADF; +} + +static int revoked_inode_unlink(struct inode *dir, struct dentry *dentry) +{ + return -EBADF; +} + +static int revoked_inode_symlink(struct inode *dir, struct dentry *dentry, + const char *symname) +{ + return -EBADF; +} + +static int revoked_inode_mkdir(struct inode *dir, struct dentry *dentry, + int mode) +{ + return -EBADF; +} + +static int revoked_inode_rmdir(struct inode *dir, struct dentry *dentry) +{ + return -EBADF; +} + +static int revoked_inode_mknod(struct inode *dir, struct dentry *dentry, + int mode, dev_t rdev) +{ + return -EBADF; +} + +static int revoked_inode_rename(struct inode *old_dir, + struct dentry *old_dentry, + struct inode *new_dir, + struct dentry *new_dentry) +{ + return -EBADF; +} + +static int revoked_inode_readlink(struct dentry *dentry, char __user * buffer, + int buflen) +{ + return -EBADF; +} + +static int revoked_inode_permission(struct inode *inode, int mask, + struct nameidata *nd) +{ + return -EBADF; +} + +static int revoked_inode_getattr(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat) +{ + return -EBADF; +} + +static int revoked_inode_setattr(struct dentry *direntry, struct iattr *attrs) +{ + return -EBADF; +} + +static int revoked_inode_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) +{ + return -EBADF; +} + +static ssize_t revoked_inode_getxattr(struct dentry *dentry, const char *name, + void *buffer, size_t size) +{ + return -EBADF; +} + +static ssize_t revoked_inode_listxattr(struct dentry *dentry, char *buffer, + size_t buffer_size) +{ + return -EBADF; +} + +static int revoked_inode_removexattr(struct dentry *dentry, const char *name) +{ + return -EBADF; +} + +static struct inode_operations revoked_inode_ops = { + .create = revoked_inode_create, + .lookup = revoked_inode_lookup, + .link = revoked_inode_link, + .unlink = revoked_inode_unlink, + .symlink = revoked_inode_symlink, + .mkdir = revoked_inode_mkdir, + .rmdir = revoked_inode_rmdir, + .mknod = revoked_inode_mknod, + .rename = revoked_inode_rename, + .readlink = revoked_inode_readlink, + /* follow_link must be no-op, otherwise unmounting this inode + won't work */ + /* put_link returns void */ + /* truncate returns void */ + .permission = revoked_inode_permission, + .getattr = revoked_inode_getattr, + .setattr = revoked_inode_setattr, + .setxattr = revoked_inode_setxattr, + .getxattr = revoked_inode_getxattr, + .listxattr = revoked_inode_listxattr, + .removexattr = revoked_inode_removexattr, + /* truncate_range returns void */ +}; + +static int revoked_readpage(struct file *file, struct page *page) +{ + return -EIO; +} + +static int revoked_writepage(struct page *page, struct writeback_control *wbc) +{ + return -EIO; +} + +static int revoked_prepare_write(struct file *file, struct page *page, + unsigned from, unsigned to) +{ + return -EIO; +} + +static int revoked_commit_write(struct file *file, struct page *page, + unsigned from, unsigned to) +{ + return -EIO; +} + +static ssize_t revoked_direct_IO(int rw, struct kiocb *iocb, + const struct iovec *iov, loff_t offset, + unsigned long nr_segs) +{ + return -EIO; +} + +static const struct address_space_operations revoked_aops = { + .readpage = revoked_readpage, + .writepage = revoked_writepage, + .prepare_write = revoked_prepare_write, + .commit_write = revoked_commit_write, + .direct_IO = revoked_direct_IO, +}; + +void make_revoked_inode(struct inode *inode) +{ + remove_inode_hash(inode); + + inode->i_atime = inode->i_mtime = inode->i_ctime = + current_fs_time(inode->i_sb); + inode->i_op = &revoked_inode_ops; + + if (special_file(inode->i_mode)) + inode->i_fop = &revoked_special_file_ops; + else + inode->i_fop = &revoked_file_ops; + + inode->i_mapping->a_ops = &revoked_aops; +} Index: 2.6/include/linux/mm.h =================================================================== --- 2.6.orig/include/linux/mm.h 2007-07-17 11:10:38.000000000 +0300 +++ 2.6/include/linux/mm.h 2007-07-19 09:09:01.000000000 +0300 @@ -167,6 +167,7 @@ #define VM_NONLINEAR 0x00800000 /* Is no #define VM_MAPPED_COPY 0x01000000 /* T if mapped copy of data (nommu mmap) */ #define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */ #define VM_ALWAYSDUMP 0x04000000 /* Always include in core dumps */ +#define VM_REVOKED 0x08000000 /* Mapping has been revoked */ #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS @@ -1056,6 +1057,7 @@ extern int split_vma(struct mm_struct *, extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *); extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *, struct rb_node **, struct rb_node *); +extern void __unlink_file_vma(struct vm_area_struct *); extern void unlink_file_vma(struct vm_area_struct *); extern struct vm_area_struct *copy_vma(struct vm_area_struct **, unsigned long addr, unsigned long len, pgoff_t pgoff); Index: 2.6/mm/memory.c =================================================================== --- 2.6.orig/mm/memory.c 2007-07-17 11:10:41.000000000 +0300 +++ 2.6/mm/memory.c 2007-07-19 09:09:01.000000000 +0300 @@ -2603,6 +2603,9 @@ int __handle_mm_fault(struct mm_struct * if (unlikely(is_vm_hugetlb_page(vma))) return hugetlb_fault(mm, vma, address, write_access); + if (unlikely(vma->vm_flags & VM_REVOKED)) + return VM_FAULT_SIGBUS; + pgd = pgd_offset(mm, address); pud = pud_alloc(mm, pgd, address); if (!pud) Index: 2.6/mm/mmap.c =================================================================== --- 2.6.orig/mm/mmap.c 2007-07-17 11:10:42.000000000 +0300 +++ 2.6/mm/mmap.c 2007-07-19 09:09:01.000000000 +0300 @@ -202,6 +202,17 @@ static void __remove_shared_vm_struct(st } /* + * Requires inode->i_mapping->i_mmap_lock + */ +void __unlink_file_vma(struct vm_area_struct *vma) +{ + struct file *file = vma->vm_file; + struct address_space *mapping = file->f_mapping; + + __remove_shared_vm_struct(vma, file, mapping); +} + +/* * Unlink a file-based vm structure from its prio_tree, to hide * vma from rmap and vmtruncate before freeing its page tables. */ @@ -1046,10 +1057,14 @@ unsigned long charged = 0; error = -ENOMEM; munmap_back: vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent); - if (vma && vma->vm_start < addr + len) { - if (do_munmap(mm, addr, len)) - return -ENOMEM; - goto munmap_back; + if (vma) { + if (unlikely(vma->vm_flags & VM_REVOKED)) + return -ENODEV; + if (vma->vm_start < addr + len) { + if (do_munmap(mm, addr, len)) + return -ENOMEM; + goto munmap_back; + } } /* Check against address space limit. */ Index: 2.6/include/linux/magic.h =================================================================== --- 2.6.orig/include/linux/magic.h 2007-07-16 10:24:54.000000000 +0300 +++ 2.6/include/linux/magic.h 2007-07-19 09:09:01.000000000 +0300 @@ -35,7 +35,7 @@ #define REISERFS_SUPER_MAGIC 0x52654973 #define REISERFS_SUPER_MAGIC_STRING "ReIsErFs" #define REISER2FS_SUPER_MAGIC_STRING "ReIsEr2Fs" #define REISER2FS_JR_SUPER_MAGIC_STRING "ReIsEr3Fs" - +#define REVOKEFS_MAGIC 0x5245564B /* REVK */ #define SMB_SUPER_MAGIC 0x517B #define USBDEVICE_SUPER_MAGIC 0x9fa2