summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-05-01 17:51:54 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2013-05-01 17:51:54 -0700
commit20b4fb485227404329e41ad15588afad3df23050 (patch)
treef3e099f0ab3da8a93b447203e294d2bb22f6dc05 /fs
parentb9394d8a657cd3c064fa432aa0905c1b58b38fe9 (diff)
parentac3e3c5b1164397656df81b9e9ab4991184d3236 (diff)
downloadlinux-stable-20b4fb485227404329e41ad15588afad3df23050.tar.gz
linux-stable-20b4fb485227404329e41ad15588afad3df23050.tar.bz2
linux-stable-20b4fb485227404329e41ad15588afad3df23050.zip
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull VFS updates from Al Viro, Misc cleanups all over the place, mainly wrt /proc interfaces (switch create_proc_entry to proc_create(), get rid of the deprecated create_proc_read_entry() in favor of using proc_create_data() and seq_file etc). 7kloc removed. * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (204 commits) don't bother with deferred freeing of fdtables proc: Move non-public stuff from linux/proc_fs.h to fs/proc/internal.h proc: Make the PROC_I() and PDE() macros internal to procfs proc: Supply a function to remove a proc entry by PDE take cgroup_open() and cpuset_open() to fs/proc/base.c ppc: Clean up scanlog ppc: Clean up rtas_flash driver somewhat hostap: proc: Use remove_proc_subtree() drm: proc: Use remove_proc_subtree() drm: proc: Use minor->index to label things, not PDE->name drm: Constify drm_proc_list[] zoran: Don't print proc_dir_entry data in debug reiserfs: Don't access the proc_dir_entry in r_open(), r_start() r_show() proc: Supply an accessor for getting the data from a PDE's parent airo: Use remove_proc_subtree() rtl8192u: Don't need to save device proc dir PDE rtl8187se: Use a dir under /proc/net/r8180/ proc: Add proc_mkdir_data() proc: Move some bits from linux/proc_fs.h to linux/{of.h,signal.h,tty.h} proc: Move PDE_NET() to fs/proc/proc_net.c ...
Diffstat (limited to 'fs')
-rw-r--r--fs/Makefile2
-rw-r--r--fs/afs/proc.c8
-rw-r--r--fs/aio.c4
-rw-r--r--fs/binfmt_aout.c25
-rw-r--r--fs/binfmt_elf_fdpic.c7
-rw-r--r--fs/binfmt_flat.c37
-rw-r--r--fs/btrfs/file.c3
-rw-r--r--fs/cachefiles/rdwr.c2
-rw-r--r--fs/cifs/file.c3
-rw-r--r--fs/coda/file.c2
-rw-r--r--fs/compat.c184
-rw-r--r--fs/coredump.c6
-rw-r--r--fs/efivarfs/file.c1
-rw-r--r--fs/efivarfs/inode.c1
-rw-r--r--fs/efivarfs/super.c2
-rw-r--r--fs/exec.c9
-rw-r--r--fs/ext4/mballoc.c2
-rw-r--r--fs/ext4/super.c2
-rw-r--r--fs/f2fs/acl.c2
-rw-r--r--fs/f2fs/dir.c2
-rw-r--r--fs/f2fs/file.c4
-rw-r--r--fs/fifo.c153
-rw-r--r--fs/file.c68
-rw-r--r--fs/fuse/dev.c2
-rw-r--r--fs/fuse/file.c2
-rw-r--r--fs/hpfs/file.c36
-rw-r--r--fs/hppfs/hppfs.c20
-rw-r--r--fs/inode.c2
-rw-r--r--fs/internal.h5
-rw-r--r--fs/jbd2/journal.c2
-rw-r--r--fs/mount.h7
-rw-r--r--fs/namespace.c341
-rw-r--r--fs/nfsd/nfsctl.c4
-rw-r--r--fs/notify/inotify/inotify_user.c3
-rw-r--r--fs/ntfs/file.c2
-rw-r--r--fs/ocfs2/file.c9
-rw-r--r--fs/pipe.c458
-rw-r--r--fs/pnode.c10
-rw-r--r--fs/pnode.h7
-rw-r--r--fs/proc/base.c56
-rw-r--r--fs/proc/fd.h5
-rw-r--r--fs/proc/generic.c377
-rw-r--r--fs/proc/inode.c283
-rw-r--r--fs/proc/internal.h313
-rw-r--r--fs/proc/kcore.c2
-rw-r--r--fs/proc/namespaces.c17
-rw-r--r--fs/proc/proc_devtree.c2
-rw-r--r--fs/proc/proc_net.c4
-rw-r--r--fs/proc/root.c2
-rw-r--r--fs/proc/self.c47
-rw-r--r--fs/proc/vmcore.c5
-rw-r--r--fs/read_write.c205
-rw-r--r--fs/read_write.h5
-rw-r--r--fs/reiserfs/file.c61
-rw-r--r--fs/reiserfs/procfs.c62
-rw-r--r--fs/seq_file.c18
-rw-r--r--fs/splice.c14
-rw-r--r--fs/xfs/xfs_file.c3
58 files changed, 1307 insertions, 1613 deletions
diff --git a/fs/Makefile b/fs/Makefile
index f0db9c941a5f..4fe6df3ec28f 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -7,7 +7,7 @@
obj-y := open.o read_write.o file_table.o super.o \
char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \
- ioctl.o readdir.o select.o fifo.o dcache.o inode.o \
+ ioctl.o readdir.o select.o dcache.o inode.o \
attr.o bad_inode.o file.o filesystems.o namespace.o \
seq_file.o xattr.o libfs.o fs-writeback.o \
pnode.o splice.o sync.o utimes.o \
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 096b23f821a1..526e4bbbde59 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -190,7 +190,7 @@ static int afs_proc_cells_open(struct inode *inode, struct file *file)
return ret;
m = file->private_data;
- m->private = PDE(inode)->data;
+ m->private = PDE_DATA(inode);
return 0;
}
@@ -448,7 +448,7 @@ static int afs_proc_cell_volumes_open(struct inode *inode, struct file *file)
struct seq_file *m;
int ret;
- cell = PDE(inode)->data;
+ cell = PDE_DATA(inode);
if (!cell)
return -ENOENT;
@@ -554,7 +554,7 @@ static int afs_proc_cell_vlservers_open(struct inode *inode, struct file *file)
struct seq_file *m;
int ret;
- cell = PDE(inode)->data;
+ cell = PDE_DATA(inode);
if (!cell)
return -ENOENT;
@@ -659,7 +659,7 @@ static int afs_proc_cell_servers_open(struct inode *inode, struct file *file)
struct seq_file *m;
int ret;
- cell = PDE(inode)->data;
+ cell = PDE_DATA(inode);
if (!cell)
return -ENOENT;
diff --git a/fs/aio.c b/fs/aio.c
index 6db8745c2edd..351afe7ac78e 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1324,6 +1324,8 @@ static ssize_t aio_rw_vect_retry(struct kiocb *iocb)
if (iocb->ki_pos < 0)
return -EINVAL;
+ if (opcode == IOCB_CMD_PWRITEV)
+ file_start_write(file);
do {
ret = rw_op(iocb, &iocb->ki_iovec[iocb->ki_cur_seg],
iocb->ki_nr_segs - iocb->ki_cur_seg,
@@ -1336,6 +1338,8 @@ static ssize_t aio_rw_vect_retry(struct kiocb *iocb)
} while (ret > 0 && iocb->ki_left > 0 &&
(opcode == IOCB_CMD_PWRITEV ||
(!S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode))));
+ if (opcode == IOCB_CMD_PWRITEV)
+ file_end_write(file);
/* This means we must have transferred all that we could */
/* No need to retry anymore */
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 02fe378fc506..bce87694f7b0 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -286,15 +286,12 @@ static int load_aout_binary(struct linux_binprm * bprm)
return error;
}
- error = bprm->file->f_op->read(bprm->file,
- (char __user *)text_addr,
- ex.a_text+ex.a_data, &pos);
+ error = read_code(bprm->file, text_addr, pos,
+ ex.a_text+ex.a_data);
if ((signed long)error < 0) {
send_sig(SIGKILL, current, 0);
return error;
}
-
- flush_icache_range(text_addr, text_addr+ex.a_text+ex.a_data);
} else {
if ((ex.a_text & 0xfff || ex.a_data & 0xfff) &&
(N_MAGIC(ex) != NMAGIC) && printk_ratelimit())
@@ -310,14 +307,9 @@ static int load_aout_binary(struct linux_binprm * bprm)
}
if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) {
- loff_t pos = fd_offset;
vm_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
- bprm->file->f_op->read(bprm->file,
- (char __user *)N_TXTADDR(ex),
- ex.a_text+ex.a_data, &pos);
- flush_icache_range((unsigned long) N_TXTADDR(ex),
- (unsigned long) N_TXTADDR(ex) +
- ex.a_text+ex.a_data);
+ read_code(bprm->file, N_TXTADDR(ex), fd_offset,
+ ex.a_text + ex.a_data);
goto beyond_if;
}
@@ -396,8 +388,6 @@ static int load_aout_library(struct file *file)
start_addr = ex.a_entry & 0xfffff000;
if ((N_TXTOFF(ex) & ~PAGE_MASK) != 0) {
- loff_t pos = N_TXTOFF(ex);
-
if (printk_ratelimit())
{
printk(KERN_WARNING
@@ -406,11 +396,8 @@ static int load_aout_library(struct file *file)
}
vm_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
- file->f_op->read(file, (char __user *)start_addr,
- ex.a_text + ex.a_data, &pos);
- flush_icache_range((unsigned long) start_addr,
- (unsigned long) start_addr + ex.a_text + ex.a_data);
-
+ read_code(file, start_addr, N_TXTOFF(ex),
+ ex.a_text + ex.a_data);
retval = 0;
goto out;
}
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index c1cc06aed601..9dac212fc6f9 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -926,7 +926,6 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(
struct elf32_fdpic_loadseg *seg;
struct elf32_phdr *phdr;
unsigned long load_addr, base = ULONG_MAX, top = 0, maddr = 0, mflags;
- loff_t fpos;
int loop, ret;
load_addr = params->load_addr;
@@ -964,14 +963,12 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(
if (params->phdrs[loop].p_type != PT_LOAD)
continue;
- fpos = phdr->p_offset;
-
seg->addr = maddr + (phdr->p_vaddr - base);
seg->p_vaddr = phdr->p_vaddr;
seg->p_memsz = phdr->p_memsz;
- ret = file->f_op->read(file, (void *) seg->addr,
- phdr->p_filesz, &fpos);
+ ret = read_code(file, seg->addr, phdr->p_offset,
+ phdr->p_filesz);
if (ret < 0)
return ret;
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 2036d21baaef..d50bbe59da1e 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -207,11 +207,12 @@ static int decompress_exec(
/* Read in first chunk of data and parse gzip header. */
fpos = offset;
- ret = bprm->file->f_op->read(bprm->file, buf, LBUFSIZE, &fpos);
+ ret = kernel_read(bprm->file, offset, buf, LBUFSIZE);
strm.next_in = buf;
strm.avail_in = ret;
strm.total_in = 0;
+ fpos += ret;
retval = -ENOEXEC;
@@ -277,7 +278,7 @@ static int decompress_exec(
}
while ((ret = zlib_inflate(&strm, Z_NO_FLUSH)) == Z_OK) {
- ret = bprm->file->f_op->read(bprm->file, buf, LBUFSIZE, &fpos);
+ ret = kernel_read(bprm->file, fpos, buf, LBUFSIZE);
if (ret <= 0)
break;
len -= ret;
@@ -285,6 +286,7 @@ static int decompress_exec(
strm.next_in = buf;
strm.avail_in = ret;
strm.total_in = 0;
+ fpos += ret;
}
if (ret < 0) {
@@ -428,6 +430,7 @@ static int load_flat_file(struct linux_binprm * bprm,
unsigned long textpos = 0, datapos = 0, result;
unsigned long realdatastart = 0;
unsigned long text_len, data_len, bss_len, stack_len, flags;
+ unsigned long full_data;
unsigned long len, memp = 0;
unsigned long memp_size, extra, rlim;
unsigned long *reloc = 0, *rp;
@@ -451,6 +454,7 @@ static int load_flat_file(struct linux_binprm * bprm,
relocs = ntohl(hdr->reloc_count);
flags = ntohl(hdr->flags);
rev = ntohl(hdr->rev);
+ full_data = data_len + relocs * sizeof(unsigned long);
if (strncmp(hdr->magic, "bFLT", 4)) {
/*
@@ -577,12 +581,12 @@ static int load_flat_file(struct linux_binprm * bprm,
#ifdef CONFIG_BINFMT_ZFLAT
if (flags & FLAT_FLAG_GZDATA) {
result = decompress_exec(bprm, fpos, (char *) datapos,
- data_len + (relocs * sizeof(unsigned long)), 0);
+ full_data, 0);
} else
#endif
{
- result = bprm->file->f_op->read(bprm->file, (char *) datapos,
- data_len + (relocs * sizeof(unsigned long)), &fpos);
+ result = read_code(bprm->file, datapos, fpos,
+ full_data);
}
if (IS_ERR_VALUE(result)) {
printk("Unable to read data+bss, errno %d\n", (int)-result);
@@ -627,30 +631,25 @@ static int load_flat_file(struct linux_binprm * bprm,
if (flags & FLAT_FLAG_GZIP) {
result = decompress_exec(bprm, sizeof (struct flat_hdr),
(((char *) textpos) + sizeof (struct flat_hdr)),
- (text_len + data_len + (relocs * sizeof(unsigned long))
+ (text_len + full_data
- sizeof (struct flat_hdr)),
0);
memmove((void *) datapos, (void *) realdatastart,
- data_len + (relocs * sizeof(unsigned long)));
+ full_data);
} else if (flags & FLAT_FLAG_GZDATA) {
- fpos = 0;
- result = bprm->file->f_op->read(bprm->file,
- (char *) textpos, text_len, &fpos);
+ result = read_code(bprm->file, textpos, 0, text_len);
if (!IS_ERR_VALUE(result))
result = decompress_exec(bprm, text_len, (char *) datapos,
- data_len + (relocs * sizeof(unsigned long)), 0);
+ full_data, 0);
}
else
#endif
{
- fpos = 0;
- result = bprm->file->f_op->read(bprm->file,
- (char *) textpos, text_len, &fpos);
- if (!IS_ERR_VALUE(result)) {
- fpos = ntohl(hdr->data_start);
- result = bprm->file->f_op->read(bprm->file, (char *) datapos,
- data_len + (relocs * sizeof(unsigned long)), &fpos);
- }
+ result = read_code(bprm->file, textpos, 0, text_len);
+ if (!IS_ERR_VALUE(result))
+ result = read_code(bprm->file, datapos,
+ ntohl(hdr->data_start),
+ full_data);
}
if (IS_ERR_VALUE(result)) {
printk("Unable to read code+data+bss, errno %d\n",(int)-result);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index ade03e6f7bd2..bb8b7a0e28a6 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1514,8 +1514,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
size_t count, ocount;
bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
- sb_start_write(inode->i_sb);
-
mutex_lock(&inode->i_mutex);
err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
@@ -1617,7 +1615,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
if (sync)
atomic_dec(&BTRFS_I(inode)->sync_writers);
out:
- sb_end_write(inode->i_sb);
current->backing_dev_info = NULL;
return num_written ? num_written : err;
}
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index 480992259707..317f9ee9c991 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -962,12 +962,14 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page)
}
data = kmap(page);
+ file_start_write(file);
old_fs = get_fs();
set_fs(KERNEL_DS);
ret = file->f_op->write(
file, (const void __user *) data, len, &pos);
set_fs(old_fs);
kunmap(page);
+ file_end_write(file);
if (ret != len)
ret = -EIO;
}
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 7a0dd99e4507..2d4a231dd70b 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2520,8 +2520,6 @@ cifs_writev(struct kiocb *iocb, const struct iovec *iov,
BUG_ON(iocb->ki_pos != pos);
- sb_start_write(inode->i_sb);
-
/*
* We need to hold the sem to be sure nobody modifies lock list
* with a brlock that prevents writing.
@@ -2545,7 +2543,6 @@ cifs_writev(struct kiocb *iocb, const struct iovec *iov,
}
up_read(&cinode->lock_sem);
- sb_end_write(inode->i_sb);
return rc;
}
diff --git a/fs/coda/file.c b/fs/coda/file.c
index fa4c100bdc7d..380b798f8443 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -79,6 +79,7 @@ coda_file_write(struct file *coda_file, const char __user *buf, size_t count, lo
return -EINVAL;
host_inode = file_inode(host_file);
+ file_start_write(host_file);
mutex_lock(&coda_inode->i_mutex);
ret = host_file->f_op->write(host_file, buf, count, ppos);
@@ -87,6 +88,7 @@ coda_file_write(struct file *coda_file, const char __user *buf, size_t count, lo
coda_inode->i_blocks = (coda_inode->i_size + 511) >> 9;
coda_inode->i_mtime = coda_inode->i_ctime = CURRENT_TIME_SEC;
mutex_unlock(&coda_inode->i_mutex);
+ file_end_write(host_file);
return ret;
}
diff --git a/fs/compat.c b/fs/compat.c
index 5f83ffa42115..d0560c93973d 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1068,190 +1068,6 @@ asmlinkage long compat_sys_getdents64(unsigned int fd,
}
#endif /* ! __ARCH_OMIT_COMPAT_SYS_GETDENTS64 */
-static ssize_t compat_do_readv_writev(int type, struct file *file,
- const struct compat_iovec __user *uvector,
- unsigned long nr_segs, loff_t *pos)
-{
- compat_ssize_t tot_len;
- struct iovec iovstack[UIO_FASTIOV];
- struct iovec *iov = iovstack;
- ssize_t ret;
- io_fn_t fn;
- iov_fn_t fnv;
-
- ret = -EINVAL;
- if (!file->f_op)
- goto out;
-
- ret = compat_rw_copy_check_uvector(type, uvector, nr_segs,
- UIO_FASTIOV, iovstack, &iov);
- if (ret <= 0)
- goto out;
-
- tot_len = ret;
- ret = rw_verify_area(type, file, pos, tot_len);
- if (ret < 0)
- goto out;
-
- fnv = NULL;
- if (type == READ) {
- fn = file->f_op->read;
- fnv = file->f_op->aio_read;
- } else {
- fn = (io_fn_t)file->f_op->write;
- fnv = file->f_op->aio_write;
- }
-
- if (fnv)
- ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
- pos, fnv);
- else
- ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
-
-out:
- if (iov != iovstack)
- kfree(iov);
- if ((ret + (type == READ)) > 0) {
- if (type == READ)
- fsnotify_access(file);
- else
- fsnotify_modify(file);
- }
- return ret;
-}
-
-static size_t compat_readv(struct file *file,
- const struct compat_iovec __user *vec,
- unsigned long vlen, loff_t *pos)
-{
- ssize_t ret = -EBADF;
-
- if (!(file->f_mode & FMODE_READ))
- goto out;
-
- ret = -EINVAL;
- if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read))
- goto out;
-
- ret = compat_do_readv_writev(READ, file, vec, vlen, pos);
-
-out:
- if (ret > 0)
- add_rchar(current, ret);
- inc_syscr(current);
- return ret;
-}
-
-asmlinkage ssize_t
-compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec,
- unsigned long vlen)
-{
- struct fd f = fdget(fd);
- ssize_t ret;
- loff_t pos;
-
- if (!f.file)
- return -EBADF;
- pos = f.file->f_pos;
- ret = compat_readv(f.file, vec, vlen, &pos);
- f.file->f_pos = pos;
- fdput(f);
- return ret;
-}
-
-asmlinkage ssize_t
-compat_sys_preadv64(unsigned long fd, const struct compat_iovec __user *vec,
- unsigned long vlen, loff_t pos)
-{
- struct fd f;
- ssize_t ret;
-
- if (pos < 0)
- return -EINVAL;
- f = fdget(fd);
- if (!f.file)
- return -EBADF;
- ret = -ESPIPE;
- if (f.file->f_mode & FMODE_PREAD)
- ret = compat_readv(f.file, vec, vlen, &pos);
- fdput(f);
- return ret;
-}
-
-asmlinkage ssize_t
-compat_sys_preadv(unsigned long fd, const struct compat_iovec __user *vec,
- unsigned long vlen, u32 pos_low, u32 pos_high)
-{
- loff_t pos = ((loff_t)pos_high << 32) | pos_low;
- return compat_sys_preadv64(fd, vec, vlen, pos);
-}
-
-static size_t compat_writev(struct file *file,
- const struct compat_iovec __user *vec,
- unsigned long vlen, loff_t *pos)
-{
- ssize_t ret = -EBADF;
-
- if (!(file->f_mode & FMODE_WRITE))
- goto out;
-
- ret = -EINVAL;
- if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write))
- goto out;
-
- ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos);
-
-out:
- if (ret > 0)
- add_wchar(current, ret);
- inc_syscw(current);
- return ret;
-}
-
-asmlinkage ssize_t
-compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec,
- unsigned long vlen)
-{
- struct fd f = fdget(fd);
- ssize_t ret;
- loff_t pos;
-
- if (!f.file)
- return -EBADF;
- pos = f.file->f_pos;
- ret = compat_writev(f.file, vec, vlen, &pos);
- f.file->f_pos = pos;
- fdput(f);
- return ret;
-}
-
-asmlinkage ssize_t
-compat_sys_pwritev64(unsigned long fd, const struct compat_iovec __user *vec,
- unsigned long vlen, loff_t pos)
-{
- struct fd f;
- ssize_t ret;
-
- if (pos < 0)
- return -EINVAL;
- f = fdget(fd);
- if (!f.file)
- return -EBADF;
- ret = -ESPIPE;
- if (f.file->f_mode & FMODE_PWRITE)
- ret = compat_writev(f.file, vec, vlen, &pos);
- fdput(f);
- return ret;
-}
-
-asmlinkage ssize_t
-compat_sys_pwritev(unsigned long fd, const struct compat_iovec __user *vec,
- unsigned long vlen, u32 pos_low, u32 pos_high)
-{
- loff_t pos = ((loff_t)pos_high << 32) | pos_low;
- return compat_sys_pwritev64(fd, vec, vlen, pos);
-}
-
/*
* Exactly like fs/open.c:sys_open(), except that it doesn't set the
* O_LARGEFILE flag.
diff --git a/fs/coredump.c b/fs/coredump.c
index ec306cc9a28a..a9abe313e8d5 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -432,9 +432,7 @@ static bool dump_interrupted(void)
static void wait_for_dump_helpers(struct file *file)
{
- struct pipe_inode_info *pipe;
-
- pipe = file_inode(file)->i_pipe;
+ struct pipe_inode_info *pipe = file->private_data;
pipe_lock(pipe);
pipe->readers++;
@@ -656,7 +654,9 @@ void do_coredump(siginfo_t *siginfo)
goto close_fail;
if (displaced)
put_files_struct(displaced);
+ file_start_write(cprm.file);
core_dumped = !dump_interrupted() && binfmt->core_dump(&cprm);
+ file_end_write(cprm.file);
if (ispipe && core_pipe_limit)
wait_for_dump_helpers(cprm.file);
diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c
index ede07fc7309f..bfb531564319 100644
--- a/fs/efivarfs/file.c
+++ b/fs/efivarfs/file.c
@@ -9,6 +9,7 @@
#include <linux/efi.h>
#include <linux/fs.h>
+#include <linux/slab.h>
#include "internal.h"
diff --git a/fs/efivarfs/inode.c b/fs/efivarfs/inode.c
index 640e289d522e..7e787fb90293 100644
--- a/fs/efivarfs/inode.c
+++ b/fs/efivarfs/inode.c
@@ -10,6 +10,7 @@
#include <linux/efi.h>
#include <linux/fs.h>
#include <linux/ctype.h>
+#include <linux/slab.h>
#include "internal.h"
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index 525a2a1ac16c..141aee31884f 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -13,6 +13,8 @@
#include <linux/module.h>
#include <linux/pagemap.h>
#include <linux/ucs2_string.h>
+#include <linux/slab.h>
+#include <linux/magic.h>
#include "internal.h"
diff --git a/fs/exec.c b/fs/exec.c
index 963f510a25ab..643019585574 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -802,6 +802,15 @@ int kernel_read(struct file *file, loff_t offset,
EXPORT_SYMBOL(kernel_read);
+ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len)
+{
+ ssize_t res = file->f_op->read(file, (void __user *)addr, len, &pos);
+ if (res > 0)
+ flush_icache_range(addr, addr + len);
+ return res;
+}
+EXPORT_SYMBOL(read_code);
+
static int exec_mmap(struct mm_struct *mm)
{
struct task_struct *tsk;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index a11ea4d6164c..b1ed9e07434b 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2260,7 +2260,7 @@ static const struct seq_operations ext4_mb_seq_groups_ops = {
static int ext4_mb_seq_groups_open(struct inode *inode, struct file *file)
{
- struct super_block *sb = PDE(inode)->data;
+ struct super_block *sb = PDE_DATA(inode);
int rc;
rc = seq_open(file, &ext4_mb_seq_groups_ops);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index dbc7c090c13a..24a146bde742 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1806,7 +1806,7 @@ static int options_seq_show(struct seq_file *seq, void *offset)
static int options_open_fs(struct inode *inode, struct file *file)
{
- return single_open(file, options_seq_show, PDE(inode)->data);
+ return single_open(file, options_seq_show, PDE_DATA(inode));
}
static const struct file_operations ext4_seq_options_fops = {
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index 137af4255da6..44abc2f286e0 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -299,7 +299,7 @@ int f2fs_acl_chmod(struct inode *inode)
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
struct posix_acl *acl;
int error;
- mode_t mode = get_inode_mode(inode);
+ umode_t mode = get_inode_mode(inode);
if (!test_opt(sbi, POSIX_ACL))
return 0;
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index a1f38443ecee..1be948768e2f 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -60,7 +60,7 @@ static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = {
static void set_de_type(struct f2fs_dir_entry *de, struct inode *inode)
{
- mode_t mode = inode->i_mode;
+ umode_t mode = inode->i_mode;
de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT];
}
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 958a46da19ae..db626282d424 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -590,7 +590,7 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
unsigned int oldflags;
- ret = mnt_want_write(filp->f_path.mnt);
+ ret = mnt_want_write_file(filp);
if (ret)
return ret;
@@ -627,7 +627,7 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
inode->i_ctime = CURRENT_TIME;
mark_inode_dirty(inode);
out:
- mnt_drop_write(filp->f_path.mnt);
+ mnt_drop_write_file(filp);
return ret;
}
default:
diff --git a/fs/fifo.c b/fs/fifo.c
deleted file mode 100644
index cf6f4345ceb0..000000000000
--- a/fs/fifo.c
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * linux/fs/fifo.c
- *
- * written by Paul H. Hargrove
- *
- * Fixes:
- * 10-06-1999, AV: fixed OOM handling in fifo_open(), moved
- * initialization there, switched to external
- * allocation of pipe_inode_info.
- */
-
-#include <linux/mm.h>
-#include <linux/fs.h>
-#include <linux/sched.h>
-#include <linux/pipe_fs_i.h>
-
-static int wait_for_partner(struct inode* inode, unsigned int *cnt)
-{
- int cur = *cnt;
-
- while (cur == *cnt) {
- pipe_wait(inode->i_pipe);
- if (signal_pending(current))
- break;
- }
- return cur == *cnt ? -ERESTARTSYS : 0;
-}
-
-static void wake_up_partner(struct inode* inode)
-{
- wake_up_interruptible(&inode->i_pipe->wait);
-}
-
-static int fifo_open(struct inode *inode, struct file *filp)
-{
- struct pipe_inode_info *pipe;
- int ret;
-
- mutex_lock(&inode->i_mutex);
- pipe = inode->i_pipe;
- if (!pipe) {
- ret = -ENOMEM;
- pipe = alloc_pipe_info(inode);
- if (!pipe)
- goto err_nocleanup;
- inode->i_pipe = pipe;
- }
- filp->f_version = 0;
-
- /* We can only do regular read/write on fifos */
- filp->f_mode &= (FMODE_READ | FMODE_WRITE);
-
- switch (filp->f_mode) {
- case FMODE_READ:
- /*
- * O_RDONLY
- * POSIX.1 says that O_NONBLOCK means return with the FIFO
- * opened, even when there is no process writing the FIFO.
- */
- filp->f_op = &read_pipefifo_fops;
- pipe->r_counter++;
- if (pipe->readers++ == 0)
- wake_up_partner(inode);
-
- if (!pipe->writers) {
- if ((filp->f_flags & O_NONBLOCK)) {
- /* suppress POLLHUP until we have
- * seen a writer */
- filp->f_version = pipe->w_counter;
- } else {
- if (wait_for_partner(inode, &pipe->w_counter))
- goto err_rd;
- }
- }
- break;
-
- case FMODE_WRITE:
- /*
- * O_WRONLY
- * POSIX.1 says that O_NONBLOCK means return -1 with
- * errno=ENXIO when there is no process reading the FIFO.
- */
- ret = -ENXIO;
- if ((filp->f_flags & O_NONBLOCK) && !pipe->readers)
- goto err;
-
- filp->f_op = &write_pipefifo_fops;
- pipe->w_counter++;
- if (!pipe->writers++)
- wake_up_partner(inode);
-
- if (!pipe->readers) {
- if (wait_for_partner(inode, &pipe->r_counter))
- goto err_wr;
- }
- break;
-
- case FMODE_READ | FMODE_WRITE:
- /*
- * O_RDWR
- * POSIX.1 leaves this case "undefined" when O_NONBLOCK is set.
- * This implementation will NEVER block on a O_RDWR open, since
- * the process can at least talk to itself.
- */
- filp->f_op = &rdwr_pipefifo_fops;
-
- pipe->readers++;
- pipe->writers++;
- pipe->r_counter++;
- pipe->w_counter++;
- if (pipe->readers == 1 || pipe->writers == 1)
- wake_up_partner(inode);
- break;
-
- default:
- ret = -EINVAL;
- goto err;
- }
-
- /* Ok! */
- mutex_unlock(&inode->i_mutex);
- return 0;
-
-err_rd:
- if (!--pipe->readers)
- wake_up_interruptible(&pipe->wait);
- ret = -ERESTARTSYS;
- goto err;
-
-err_wr:
- if (!--pipe->writers)
- wake_up_interruptible(&pipe->wait);
- ret = -ERESTARTSYS;
- goto err;
-
-err:
- if (!pipe->readers && !pipe->writers)
- free_pipe_info(inode);
-
-err_nocleanup:
- mutex_unlock(&inode->i_mutex);
- return ret;
-}
-
-/*
- * Dummy default file-operations: the only thing this does
- * is contain the open that then fills in the correct operations
- * depending on the access mode of the file...
- */
-const struct file_operations def_fifo_fops = {
- .open = fifo_open, /* will set read_ or write_pipefifo_fops */
- .llseek = noop_llseek,
-};
diff --git a/fs/file.c b/fs/file.c
index 3906d9577a18..4a78f981557a 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -23,24 +23,10 @@
#include <linux/rcupdate.h>
#include <linux/workqueue.h>
-struct fdtable_defer {
- spinlock_t lock;
- struct work_struct wq;
- struct fdtable *next;
-};
-
int sysctl_nr_open __read_mostly = 1024*1024;
int sysctl_nr_open_min = BITS_PER_LONG;
int sysctl_nr_open_max = 1024 * 1024; /* raised later */
-/*
- * We use this list to defer free fdtables that have vmalloced
- * sets/arrays. By keeping a per-cpu list, we avoid having to embed
- * the work_struct in fdtable itself which avoids a 64 byte (i386) increase in
- * this per-task structure.
- */
-static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list);
-
static void *alloc_fdmem(size_t size)
{
/*
@@ -67,46 +53,9 @@ static void __free_fdtable(struct fdtable *fdt)
kfree(fdt);
}
-static void free_fdtable_work(struct work_struct *work)
-{
- struct fdtable_defer *f =
- container_of(work, struct fdtable_defer, wq);
- struct fdtable *fdt;
-
- spin_lock_bh(&f->lock);
- fdt = f->next;
- f->next = NULL;
- spin_unlock_bh(&f->lock);
- while(fdt) {
- struct fdtable *next = fdt->next;
-
- __free_fdtable(fdt);
- fdt = next;
- }
-}
-
static void free_fdtable_rcu(struct rcu_head *rcu)
{
- struct fdtable *fdt = container_of(rcu, struct fdtable, rcu);
- struct fdtable_defer *fddef;
-
- BUG_ON(!fdt);
- BUG_ON(fdt->max_fds <= NR_OPEN_DEFAULT);
-
- if (!is_vmalloc_addr(fdt->fd) && !is_vmalloc_addr(fdt->open_fds)) {
- kfree(fdt->fd);
- kfree(fdt->open_fds);
- kfree(fdt);
- } else {
- fddef = &get_cpu_var(fdtable_defer_list);
- spin_lock(&fddef->lock);
- fdt->next = fddef->next;
- fddef->next = fdt;
- /* vmallocs are handled from the workqueue context */
- schedule_work(&fddef->wq);
- spin_unlock(&fddef->lock);
- put_cpu_var(fdtable_defer_list);
- }
+ __free_fdtable(container_of(rcu, struct fdtable, rcu));
}
/*
@@ -174,7 +123,6 @@ static struct fdtable * alloc_fdtable(unsigned int nr)
fdt->open_fds = data;
data += nr / BITS_PER_BYTE;
fdt->close_on_exec = data;
- fdt->next = NULL;
return fdt;
@@ -221,7 +169,7 @@ static int expand_fdtable(struct files_struct *files, int nr)
/* Continue as planned */
copy_fdtable(new_fdt, cur_fdt);
rcu_assign_pointer(files->fdt, new_fdt);
- if (cur_fdt->max_fds > NR_OPEN_DEFAULT)
+ if (cur_fdt != &files->fdtab)
call_rcu(&cur_fdt->rcu, free_fdtable_rcu);
} else {
/* Somebody else expanded, so undo our attempt */
@@ -316,7 +264,6 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
new_fdt->close_on_exec = newf->close_on_exec_init;
new_fdt->open_fds = newf->open_fds_init;
new_fdt->fd = &newf->fd_array[0];
- new_fdt->next = NULL;
spin_lock(&oldf->file_lock);
old_fdt = files_fdtable(oldf);
@@ -490,19 +437,8 @@ void exit_files(struct task_struct *tsk)
}
}
-static void fdtable_defer_list_init(int cpu)
-{
- struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu);
- spin_lock_init(&fddef->lock);
- INIT_WORK(&fddef->wq, free_fdtable_work);
- fddef->next = NULL;
-}
-
void __init files_defer_init(void)
{
- int i;
- for_each_possible_cpu(i)
- fdtable_defer_list_init(i);
sysctl_nr_open_max = min((size_t)INT_MAX, ~(size_t)0/sizeof(void *)) &
-BITS_PER_LONG;
}
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 11dfa0c3fb46..9bfd1a3214e6 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1319,7 +1319,7 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
page_nr++;
ret += buf->len;
- if (pipe->inode)
+ if (pipe->files)
do_wakeup = 1;
}
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 34b80ba95bad..d15c6f21c17f 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -971,7 +971,6 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
return err;
count = ocount;
- sb_start_write(inode->i_sb);
mutex_lock(&inode->i_mutex);
/* We can write back this queue in page reclaim */
@@ -1030,7 +1029,6 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
out:
current->backing_dev_info = NULL;
mutex_unlock(&inode->i_mutex);
- sb_end_write(inode->i_sb);
return written ? written : err;
}
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index 9f9dbeceeee7..3027f4dbbab5 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -131,6 +131,24 @@ static int hpfs_write_begin(struct file *file, struct address_space *mapping,
return ret;
}
+static int hpfs_write_end(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct page *pagep, void *fsdata)
+{
+ struct inode *inode = mapping->host;
+ int err;
+ err = generic_write_end(file, mapping, pos, len, copied, pagep, fsdata);
+ if (err < len)
+ hpfs_write_failed(mapping, pos + len);
+ if (!(err < 0)) {
+ /* make sure we write it on close, if not earlier */
+ hpfs_lock(inode->i_sb);
+ hpfs_i(inode)->i_dirty = 1;
+ hpfs_unlock(inode->i_sb);
+ }
+ return err;
+}
+
static sector_t _hpfs_bmap(struct address_space *mapping, sector_t block)
{
return generic_block_bmap(mapping,block,hpfs_get_block);
@@ -140,30 +158,16 @@ const struct address_space_operations hpfs_aops = {
.readpage = hpfs_readpage,
.writepage = hpfs_writepage,
.write_begin = hpfs_write_begin,
- .write_end = generic_write_end,
+ .write_end = hpfs_write_end,
.bmap = _hpfs_bmap
};
-static ssize_t hpfs_file_write(struct file *file, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- ssize_t retval;
-
- retval = do_sync_write(file, buf, count, ppos);
- if (retval > 0) {
- hpfs_lock(file->f_path.dentry->d_sb);
- hpfs_i(file_inode(file))->i_dirty = 1;
- hpfs_unlock(file->f_path.dentry->d_sb);
- }
- return retval;
-}
-
const struct file_operations hpfs_file_ops =
{
.llseek = generic_file_llseek,
.read = do_sync_read,
.aio_read = generic_file_aio_read,
- .write = hpfs_file_write,
+ .write = do_sync_write,
.aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
.release = hpfs_file_release,
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 126d3c2e2dee..cd3e38972c86 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -436,7 +436,6 @@ static int hppfs_open(struct inode *inode, struct file *file)
path.mnt = inode->i_sb->s_fs_info;
path.dentry = HPPFS_I(inode)->proc_dentry;
- /* XXX This isn't closed anywhere */
data->proc_file = dentry_open(&path, file_mode(file->f_mode), cred);
err = PTR_ERR(data->proc_file);
if (IS_ERR(data->proc_file))
@@ -523,12 +522,23 @@ static loff_t hppfs_llseek(struct file *file, loff_t off, int where)
return default_llseek(file, off, where);
}
+static int hppfs_release(struct inode *inode, struct file *file)
+{
+ struct hppfs_private *data = file->private_data;
+ struct file *proc_file = data->proc_file;
+ if (proc_file)
+ fput(proc_file);
+ kfree(data);
+ return 0;
+}
+
static const struct file_operations hppfs_file_fops = {
.owner = NULL,
.llseek = hppfs_llseek,
.read = hppfs_read,
.write = hppfs_write,
.open = hppfs_open,
+ .release = hppfs_release,
};
struct hppfs_dirent {
@@ -570,18 +580,12 @@ static int hppfs_readdir(struct file *file, void *ent, filldir_t filldir)
return err;
}
-static int hppfs_fsync(struct file *file, loff_t start, loff_t end,
- int datasync)
-{
- return filemap_write_and_wait_range(file->f_mapping, start, end);
-}
-
static const struct file_operations hppfs_dir_fops = {
.owner = NULL,
.readdir = hppfs_readdir,
.open = hppfs_dir_open,
- .fsync = hppfs_fsync,
.llseek = default_llseek,
+ .release = hppfs_release,
};
static int hppfs_statfs(struct dentry *dentry, struct kstatfs *sf)
diff --git a/fs/inode.c b/fs/inode.c
index a898b3d43ccf..00d5fc3b86e1 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1803,7 +1803,7 @@ void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
inode->i_fop = &def_blk_fops;
inode->i_rdev = rdev;
} else if (S_ISFIFO(mode))
- inode->i_fop = &def_fifo_fops;
+ inode->i_fop = &pipefifo_fops;
else if (S_ISSOCK(mode))
inode->i_fop = &bad_sock_fops;
else
diff --git a/fs/internal.h b/fs/internal.h
index 4be78237d896..eaa75f75b625 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -130,3 +130,8 @@ extern struct dentry *__d_alloc(struct super_block *, const struct qstr *);
* read_write.c
*/
extern ssize_t __kernel_write(struct file *, const char *, size_t, loff_t *);
+
+/*
+ * pipe.c
+ */
+extern const struct file_operations pipefifo_fops;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index f6c5ba027f4f..95457576e434 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -979,7 +979,7 @@ static const struct seq_operations jbd2_seq_info_ops = {
static int jbd2_seq_info_open(struct inode *inode, struct file *file)
{
- journal_t *journal = PDE(inode)->data;
+ journal_t *journal = PDE_DATA(inode);
struct jbd2_stats_proc_session *s;
int rc, size;
diff --git a/fs/mount.h b/fs/mount.h
index cd5007980400..64a858143ff9 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -18,6 +18,12 @@ struct mnt_pcp {
int mnt_writers;
};
+struct mountpoint {
+ struct list_head m_hash;
+ struct dentry *m_dentry;
+ int m_count;
+};
+
struct mount {
struct list_head mnt_hash;
struct mount *mnt_parent;
@@ -40,6 +46,7 @@ struct mount {
struct list_head mnt_slave; /* slave list entry */
struct mount *mnt_master; /* slave is on master->mnt_slave_list */
struct mnt_namespace *mnt_ns; /* containing namespace */
+ struct mountpoint *mnt_mp; /* where is it mounted */
#ifdef CONFIG_FSNOTIFY
struct hlist_head mnt_fsnotify_marks;
__u32 mnt_fsnotify_mask;
diff --git a/fs/namespace.c b/fs/namespace.c
index 341d3f564082..b4f96a5230a3 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -21,7 +21,8 @@
#include <linux/fs_struct.h> /* get_fs_root et.al. */
#include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */
#include <linux/uaccess.h>
-#include <linux/proc_fs.h>
+#include <linux/proc_ns.h>
+#include <linux/magic.h>
#include "pnode.h"
#include "internal.h"
@@ -36,6 +37,7 @@ static int mnt_id_start = 0;
static int mnt_group_start = 1;
static struct list_head *mount_hashtable __read_mostly;
+static struct list_head *mountpoint_hashtable __read_mostly;
static struct kmem_cache *mnt_cache __read_mostly;
static struct rw_semaphore namespace_sem;
@@ -605,6 +607,51 @@ struct vfsmount *lookup_mnt(struct path *path)
}
}
+static struct mountpoint *new_mountpoint(struct dentry *dentry)
+{
+ struct list_head *chain = mountpoint_hashtable + hash(NULL, dentry);
+ struct mountpoint *mp;
+
+ list_for_each_entry(mp, chain, m_hash) {
+ if (mp->m_dentry == dentry) {
+ /* might be worth a WARN_ON() */
+ if (d_unlinked(dentry))
+ return ERR_PTR(-ENOENT);
+ mp->m_count++;
+ return mp;
+ }
+ }
+
+ mp = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
+ if (!mp)
+ return ERR_PTR(-ENOMEM);
+
+ spin_lock(&dentry->d_lock);
+ if (d_unlinked(dentry)) {
+ spin_unlock(&dentry->d_lock);
+ kfree(mp);
+ return ERR_PTR(-ENOENT);
+ }
+ dentry->d_flags |= DCACHE_MOUNTED;
+ spin_unlock(&dentry->d_lock);
+ mp->m_dentry = dentry;
+ mp->m_count = 1;
+ list_add(&mp->m_hash, chain);
+ return mp;
+}
+
+static void put_mountpoint(struct mountpoint *mp)
+{
+ if (!--mp->m_count) {
+ struct dentry *dentry = mp->m_dentry;
+ spin_lock(&dentry->d_lock);
+ dentry->d_flags &= ~DCACHE_MOUNTED;
+ spin_unlock(&dentry->d_lock);
+ list_del(&mp->m_hash);
+ kfree(mp);
+ }
+}
+
static inline int check_mnt(struct mount *mnt)
{
return mnt->mnt_ns == current->nsproxy->mnt_ns;
@@ -633,27 +680,6 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns)
}
/*
- * Clear dentry's mounted state if it has no remaining mounts.
- * vfsmount_lock must be held for write.
- */
-static void dentry_reset_mounted(struct dentry *dentry)
-{
- unsigned u;
-
- for (u = 0; u < HASH_SIZE; u++) {
- struct mount *p;
-
- list_for_each_entry(p, &mount_hashtable[u], mnt_hash) {
- if (p->mnt_mountpoint == dentry)
- return;
- }
- }
- spin_lock(&dentry->d_lock);
- dentry->d_flags &= ~DCACHE_MOUNTED;
- spin_unlock(&dentry->d_lock);
-}
-
-/*
* vfsmount lock must be held for write
*/
static void detach_mnt(struct mount *mnt, struct path *old_path)
@@ -664,32 +690,35 @@ static void detach_mnt(struct mount *mnt, struct path *old_path)
mnt->mnt_mountpoint = mnt->mnt.mnt_root;
list_del_init(&mnt->mnt_child);
list_del_init(&mnt->mnt_hash);
- dentry_reset_mounted(old_path->dentry);
+ put_mountpoint(mnt->mnt_mp);
+ mnt->mnt_mp = NULL;
}
/*
* vfsmount lock must be held for write
*/
-void mnt_set_mountpoint(struct mount *mnt, struct dentry *dentry,
+void mnt_set_mountpoint(struct mount *mnt,
+ struct mountpoint *mp,
struct mount *child_mnt)
{
+ mp->m_count++;
mnt_add_count(mnt, 1); /* essentially, that's mntget */
- child_mnt->mnt_mountpoint = dget(dentry);
+ child_mnt->mnt_mountpoint = dget(mp->m_dentry);
child_mnt->mnt_parent = mnt;
- spin_lock(&dentry->d_lock);
- dentry->d_flags |= DCACHE_MOUNTED;
- spin_unlock(&dentry->d_lock);
+ child_mnt->mnt_mp = mp;
}
/*
* vfsmount lock must be held for write
*/
-static void attach_mnt(struct mount *mnt, struct path *path)
+static void attach_mnt(struct mount *mnt,
+ struct mount *parent,
+ struct mountpoint *mp)
{
- mnt_set_mountpoint(real_mount(path->mnt), path->dentry, mnt);
+ mnt_set_mountpoint(parent, mp, mnt);
list_add_tail(&mnt->mnt_hash, mount_hashtable +
- hash(path->mnt, path->dentry));
- list_add_tail(&mnt->mnt_child, &real_mount(path->mnt)->mnt_mounts);
+ hash(&parent->mnt, mp->m_dentry));
+ list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
}
/*
@@ -1095,11 +1124,23 @@ int may_umount(struct vfsmount *mnt)
EXPORT_SYMBOL(may_umount);
-void release_mounts(struct list_head *head)
+static LIST_HEAD(unmounted); /* protected by namespace_sem */
+
+static void namespace_unlock(void)
{
struct mount *mnt;
- while (!list_empty(head)) {
- mnt = list_first_entry(head, struct mount, mnt_hash);
+ LIST_HEAD(head);
+
+ if (likely(list_empty(&unmounted))) {
+ up_write(&namespace_sem);
+ return;
+ }
+
+ list_splice_init(&unmounted, &head);
+ up_write(&namespace_sem);
+
+ while (!list_empty(&head)) {
+ mnt = list_first_entry(&head, struct mount, mnt_hash);
list_del_init(&mnt->mnt_hash);
if (mnt_has_parent(mnt)) {
struct dentry *dentry;
@@ -1119,11 +1160,16 @@ void release_mounts(struct list_head *head)
}
}
+static inline void namespace_lock(void)
+{
+ down_write(&namespace_sem);
+}
+
/*
* vfsmount lock must be held for write
* namespace_sem must be held for write
*/
-void umount_tree(struct mount *mnt, int propagate, struct list_head *kill)
+void umount_tree(struct mount *mnt, int propagate)
{
LIST_HEAD(tmp_list);
struct mount *p;
@@ -1142,20 +1188,20 @@ void umount_tree(struct mount *mnt, int propagate, struct list_head *kill)
list_del_init(&p->mnt_child);
if (mnt_has_parent(p)) {
p->mnt_parent->mnt_ghosts++;
- dentry_reset_mounted(p->mnt_mountpoint);
+ put_mountpoint(p->mnt_mp);
+ p->mnt_mp = NULL;
}
change_mnt_propagation(p, MS_PRIVATE);
}
- list_splice(&tmp_list, kill);
+ list_splice(&tmp_list, &unmounted);
}
-static void shrink_submounts(struct mount *mnt, struct list_head *umounts);
+static void shrink_submounts(struct mount *mnt);
static int do_umount(struct mount *mnt, int flags)
{
struct super_block *sb = mnt->mnt.mnt_sb;
int retval;
- LIST_HEAD(umount_list);
retval = security_sb_umount(&mnt->mnt, flags);
if (retval)
@@ -1222,22 +1268,21 @@ static int do_umount(struct mount *mnt, int flags)
return retval;
}
- down_write(&namespace_sem);
+ namespace_lock();
br_write_lock(&vfsmount_lock);
event++;
if (!(flags & MNT_DETACH))
- shrink_submounts(mnt, &umount_list);
+ shrink_submounts(mnt);
retval = -EBUSY;
if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) {
if (!list_empty(&mnt->mnt_list))
- umount_tree(mnt, 1, &umount_list);
+ umount_tree(mnt, 1);
retval = 0;
}
br_write_unlock(&vfsmount_lock);
- up_write(&namespace_sem);
- release_mounts(&umount_list);
+ namespace_unlock();
return retval;
}
@@ -1310,13 +1355,13 @@ static bool mnt_ns_loop(struct path *path)
* mount namespace loop?
*/
struct inode *inode = path->dentry->d_inode;
- struct proc_inode *ei;
+ struct proc_ns *ei;
struct mnt_namespace *mnt_ns;
if (!proc_ns_inode(inode))
return false;
- ei = PROC_I(inode);
+ ei = get_proc_ns(inode);
if (ei->ns_ops != &mntns_operations)
return false;
@@ -1327,8 +1372,7 @@ static bool mnt_ns_loop(struct path *path)
struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
int flag)
{
- struct mount *res, *p, *q, *r;
- struct path path;
+ struct mount *res, *p, *q, *r, *parent;
if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt))
return ERR_PTR(-EINVAL);
@@ -1355,25 +1399,22 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
q = q->mnt_parent;
}
p = s;
- path.mnt = &q->mnt;
- path.dentry = p->mnt_mountpoint;
+ parent = q;
q = clone_mnt(p, p->mnt.mnt_root, flag);
if (IS_ERR(q))
goto out;
br_write_lock(&vfsmount_lock);
list_add_tail(&q->mnt_list, &res->mnt_list);
- attach_mnt(q, &path);
+ attach_mnt(q, parent, p->mnt_mp);
br_write_unlock(&vfsmount_lock);
}
}
return res;
out:
if (res) {
- LIST_HEAD(umount_list);
br_write_lock(&vfsmount_lock);
- umount_tree(res, 0, &umount_list);
+ umount_tree(res, 0);
br_write_unlock(&vfsmount_lock);
- release_mounts(&umount_list);
}
return q;
}
@@ -1383,10 +1424,10 @@ out:
struct vfsmount *collect_mounts(struct path *path)
{
struct mount *tree;
- down_write(&namespace_sem);
+ namespace_lock();
tree = copy_tree(real_mount(path->mnt), path->dentry,
CL_COPY_ALL | CL_PRIVATE);
- up_write(&namespace_sem);
+ namespace_unlock();
if (IS_ERR(tree))
return NULL;
return &tree->mnt;
@@ -1394,13 +1435,11 @@ struct vfsmount *collect_mounts(struct path *path)
void drop_collected_mounts(struct vfsmount *mnt)
{
- LIST_HEAD(umount_list);
- down_write(&namespace_sem);
+ namespace_lock();
br_write_lock(&vfsmount_lock);
- umount_tree(real_mount(mnt), 0, &umount_list);
+ umount_tree(real_mount(mnt), 0);
br_write_unlock(&vfsmount_lock);
- up_write(&namespace_sem);
- release_mounts(&umount_list);
+ namespace_unlock();
}
int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
@@ -1509,11 +1548,11 @@ static int invent_group_ids(struct mount *mnt, bool recurse)
* in allocations.
*/
static int attach_recursive_mnt(struct mount *source_mnt,
- struct path *path, struct path *parent_path)
+ struct mount *dest_mnt,
+ struct mountpoint *dest_mp,
+ struct path *parent_path)
{
LIST_HEAD(tree_list);
- struct mount *dest_mnt = real_mount(path->mnt);
- struct dentry *dest_dentry = path->dentry;
struct mount *child, *p;
int err;
@@ -1522,7 +1561,7 @@ static int attach_recursive_mnt(struct mount *source_mnt,
if (err)
goto out;
}
- err = propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list);
+ err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list);
if (err)
goto out_cleanup_ids;
@@ -1534,10 +1573,10 @@ static int attach_recursive_mnt(struct mount *source_mnt,
}
if (parent_path) {
detach_mnt(source_mnt, parent_path);
- attach_mnt(source_mnt, path);
+ attach_mnt(source_mnt, dest_mnt, dest_mp);
touch_mnt_namespace(source_mnt->mnt_ns);
} else {
- mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt);
+ mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
commit_tree(source_mnt);
}
@@ -1556,46 +1595,53 @@ static int attach_recursive_mnt(struct mount *source_mnt,
return err;
}
-static int lock_mount(struct path *path)
+static struct mountpoint *lock_mount(struct path *path)
{
struct vfsmount *mnt;
+ struct dentry *dentry = path->dentry;
retry:
- mutex_lock(&path->dentry->d_inode->i_mutex);
- if (unlikely(cant_mount(path->dentry))) {
- mutex_unlock(&path->dentry->d_inode->i_mutex);
- return -ENOENT;
+ mutex_lock(&dentry->d_inode->i_mutex);
+ if (unlikely(cant_mount(dentry))) {
+ mutex_unlock(&dentry->d_inode->i_mutex);
+ return ERR_PTR(-ENOENT);
}
- down_write(&namespace_sem);
+ namespace_lock();
mnt = lookup_mnt(path);
- if (likely(!mnt))
- return 0;
- up_write(&namespace_sem);
+ if (likely(!mnt)) {
+ struct mountpoint *mp = new_mountpoint(dentry);
+ if (IS_ERR(mp)) {
+ namespace_unlock();
+ mutex_unlock(&dentry->d_inode->i_mutex);
+ return mp;
+ }
+ return mp;
+ }
+ namespace_unlock();
mutex_unlock(&path->dentry->d_inode->i_mutex);
path_put(path);
path->mnt = mnt;
- path->dentry = dget(mnt->mnt_root);
+ dentry = path->dentry = dget(mnt->mnt_root);
goto retry;
}
-static void unlock_mount(struct path *path)
+static void unlock_mount(struct mountpoint *where)
{
- up_write(&namespace_sem);
- mutex_unlock(&path->dentry->d_inode->i_mutex);
+ struct dentry *dentry = where->m_dentry;
+ put_mountpoint(where);
+ namespace_unlock();
+ mutex_unlock(&dentry->d_inode->i_mutex);
}
-static int graft_tree(struct mount *mnt, struct path *path)
+static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
{
if (mnt->mnt.mnt_sb->s_flags & MS_NOUSER)
return -EINVAL;
- if (S_ISDIR(path->dentry->d_inode->i_mode) !=
+ if (S_ISDIR(mp->m_dentry->d_inode->i_mode) !=
S_ISDIR(mnt->mnt.mnt_root->d_inode->i_mode))
return -ENOTDIR;
- if (d_unlinked(path->dentry))
- return -ENOENT;
-
- return attach_recursive_mnt(mnt, path, NULL);
+ return attach_recursive_mnt(mnt, p, mp, NULL);
}
/*
@@ -1633,7 +1679,7 @@ static int do_change_type(struct path *path, int flag)
if (!type)
return -EINVAL;
- down_write(&namespace_sem);
+ namespace_lock();
if (type == MS_SHARED) {
err = invent_group_ids(mnt, recurse);
if (err)
@@ -1646,7 +1692,7 @@ static int do_change_type(struct path *path, int flag)
br_write_unlock(&vfsmount_lock);
out_unlock:
- up_write(&namespace_sem);
+ namespace_unlock();
return err;
}
@@ -1656,9 +1702,9 @@ static int do_change_type(struct path *path, int flag)
static int do_loopback(struct path *path, const char *old_name,
int recurse)
{
- LIST_HEAD(umount_list);
struct path old_path;
- struct mount *mnt = NULL, *old;
+ struct mount *mnt = NULL, *old, *parent;
+ struct mountpoint *mp;
int err;
if (!old_name || !*old_name)
return -EINVAL;
@@ -1670,17 +1716,19 @@ static int do_loopback(struct path *path, const char *old_name,
if (mnt_ns_loop(&old_path))
goto out;
- err = lock_mount(path);
- if (err)
+ mp = lock_mount(path);
+ err = PTR_ERR(mp);
+ if (IS_ERR(mp))
goto out;
old = real_mount(old_path.mnt);
+ parent = real_mount(path->mnt);
err = -EINVAL;
if (IS_MNT_UNBINDABLE(old))
goto out2;
- if (!check_mnt(real_mount(path->mnt)) || !check_mnt(old))
+ if (!check_mnt(parent) || !check_mnt(old))
goto out2;
if (recurse)
@@ -1693,15 +1741,14 @@ static int do_loopback(struct path *path, const char *old_name,
goto out2;
}
- err = graft_tree(mnt, path);
+ err = graft_tree(mnt, parent, mp);
if (err) {
br_write_lock(&vfsmount_lock);
- umount_tree(mnt, 0, &umount_list);
+ umount_tree(mnt, 0);
br_write_unlock(&vfsmount_lock);
}
out2:
- unlock_mount(path);
- release_mounts(&umount_list);
+ unlock_mount(mp);
out:
path_put(&old_path);
return err;
@@ -1786,6 +1833,7 @@ static int do_move_mount(struct path *path, const char *old_name)
struct path old_path, parent_path;
struct mount *p;
struct mount *old;
+ struct mountpoint *mp;
int err;
if (!old_name || !*old_name)
return -EINVAL;
@@ -1793,8 +1841,9 @@ static int do_move_mount(struct path *path, const char *old_name)
if (err)
return err;
- err = lock_mount(path);
- if (err < 0)
+ mp = lock_mount(path);
+ err = PTR_ERR(mp);
+ if (IS_ERR(mp))
goto out;
old = real_mount(old_path.mnt);
@@ -1804,9 +1853,6 @@ static int do_move_mount(struct path *path, const char *old_name)
if (!check_mnt(p) || !check_mnt(old))
goto out1;
- if (d_unlinked(path->dentry))
- goto out1;
-
err = -EINVAL;
if (old_path.dentry != old_path.mnt->mnt_root)
goto out1;
@@ -1833,7 +1879,7 @@ static int do_move_mount(struct path *path, const char *old_name)
if (p == old)
goto out1;
- err = attach_recursive_mnt(old, path, &parent_path);
+ err = attach_recursive_mnt(old, real_mount(path->mnt), mp, &parent_path);
if (err)
goto out1;
@@ -1841,7 +1887,7 @@ static int do_move_mount(struct path *path, const char *old_name)
* automatically */
list_del_init(&old->mnt_expire);
out1:
- unlock_mount(path);
+ unlock_mount(mp);
out:
if (!err)
path_put(&parent_path);
@@ -1877,21 +1923,24 @@ static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
*/
static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
{
+ struct mountpoint *mp;
+ struct mount *parent;
int err;
mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL);
- err = lock_mount(path);
- if (err)
- return err;
+ mp = lock_mount(path);
+ if (IS_ERR(mp))
+ return PTR_ERR(mp);
+ parent = real_mount(path->mnt);
err = -EINVAL;
- if (unlikely(!check_mnt(real_mount(path->mnt)))) {
+ if (unlikely(!check_mnt(parent))) {
/* that's acceptable only for automounts done in private ns */
if (!(mnt_flags & MNT_SHRINKABLE))
goto unlock;
/* ... and for those we'd better have mountpoint still alive */
- if (!real_mount(path->mnt)->mnt_ns)
+ if (!parent->mnt_ns)
goto unlock;
}
@@ -1906,10 +1955,10 @@ static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
goto unlock;
newmnt->mnt.mnt_flags = mnt_flags;
- err = graft_tree(newmnt, path);
+ err = graft_tree(newmnt, parent, mp);
unlock:
- unlock_mount(path);
+ unlock_mount(mp);
return err;
}
@@ -1982,11 +2031,11 @@ int finish_automount(struct vfsmount *m, struct path *path)
fail:
/* remove m from any expiration list it may be on */
if (!list_empty(&mnt->mnt_expire)) {
- down_write(&namespace_sem);
+ namespace_lock();
br_write_lock(&vfsmount_lock);
list_del_init(&mnt->mnt_expire);
br_write_unlock(&vfsmount_lock);
- up_write(&namespace_sem);
+ namespace_unlock();
}
mntput(m);
mntput(m);
@@ -2000,13 +2049,13 @@ fail:
*/
void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list)
{
- down_write(&namespace_sem);
+ namespace_lock();
br_write_lock(&vfsmount_lock);
list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list);
br_write_unlock(&vfsmount_lock);
- up_write(&namespace_sem);
+ namespace_unlock();
}
EXPORT_SYMBOL(mnt_set_expiry);
@@ -2019,12 +2068,11 @@ void mark_mounts_for_expiry(struct list_head *mounts)
{
struct mount *mnt, *next;
LIST_HEAD(graveyard);
- LIST_HEAD(umounts);
if (list_empty(mounts))
return;
- down_write(&namespace_sem);
+ namespace_lock();
br_write_lock(&vfsmount_lock);
/* extract from the expiration list every vfsmount that matches the
@@ -2042,12 +2090,10 @@ void mark_mounts_for_expiry(struct list_head *mounts)
while (!list_empty(&graveyard)) {
mnt = list_first_entry(&graveyard, struct mount, mnt_expire);
touch_mnt_namespace(mnt->mnt_ns);
- umount_tree(mnt, 1, &umounts);
+ umount_tree(mnt, 1);
}
br_write_unlock(&vfsmount_lock);
- up_write(&namespace_sem);
-
- release_mounts(&umounts);
+ namespace_unlock();
}
EXPORT_SYMBOL_GPL(mark_mounts_for_expiry);
@@ -2104,7 +2150,7 @@ resume:
*
* vfsmount_lock must be held for write
*/
-static void shrink_submounts(struct mount *mnt, struct list_head *umounts)
+static void shrink_submounts(struct mount *mnt)
{
LIST_HEAD(graveyard);
struct mount *m;
@@ -2115,7 +2161,7 @@ static void shrink_submounts(struct mount *mnt, struct list_head *umounts)
m = list_first_entry(&graveyard, struct mount,
mnt_expire);
touch_mnt_namespace(m->mnt_ns);
- umount_tree(m, 1, umounts);
+ umount_tree(m, 1);
}
}
}
@@ -2342,14 +2388,14 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
if (IS_ERR(new_ns))
return new_ns;
- down_write(&namespace_sem);
+ namespace_lock();
/* First pass: copy the tree topology */
copy_flags = CL_COPY_ALL | CL_EXPIRE;
if (user_ns != mnt_ns->user_ns)
copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED;
new = copy_tree(old, old->mnt.mnt_root, copy_flags);
if (IS_ERR(new)) {
- up_write(&namespace_sem);
+ namespace_unlock();
free_mnt_ns(new_ns);
return ERR_CAST(new);
}
@@ -2380,7 +2426,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
p = next_mnt(p, old);
q = next_mnt(q, new);
}
- up_write(&namespace_sem);
+ namespace_unlock();
if (rootmnt)
mntput(rootmnt);
@@ -2550,7 +2596,8 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
const char __user *, put_old)
{
struct path new, old, parent_path, root_parent, root;
- struct mount *new_mnt, *root_mnt;
+ struct mount *new_mnt, *root_mnt, *old_mnt;
+ struct mountpoint *old_mp, *root_mp;
int error;
if (!may_mount())
@@ -2569,14 +2616,16 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
goto out2;
get_fs_root(current->fs, &root);
- error = lock_mount(&old);
- if (error)
+ old_mp = lock_mount(&old);
+ error = PTR_ERR(old_mp);
+ if (IS_ERR(old_mp))
goto out3;
error = -EINVAL;
new_mnt = real_mount(new.mnt);
root_mnt = real_mount(root.mnt);
- if (IS_MNT_SHARED(real_mount(old.mnt)) ||
+ old_mnt = real_mount(old.mnt);
+ if (IS_MNT_SHARED(old_mnt) ||
IS_MNT_SHARED(new_mnt->mnt_parent) ||
IS_MNT_SHARED(root_mnt->mnt_parent))
goto out4;
@@ -2585,37 +2634,37 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
error = -ENOENT;
if (d_unlinked(new.dentry))
goto out4;
- if (d_unlinked(old.dentry))
- goto out4;
error = -EBUSY;
- if (new.mnt == root.mnt ||
- old.mnt == root.mnt)
+ if (new_mnt == root_mnt || old_mnt == root_mnt)
goto out4; /* loop, on the same file system */
error = -EINVAL;
if (root.mnt->mnt_root != root.dentry)
goto out4; /* not a mountpoint */
if (!mnt_has_parent(root_mnt))
goto out4; /* not attached */
+ root_mp = root_mnt->mnt_mp;
if (new.mnt->mnt_root != new.dentry)
goto out4; /* not a mountpoint */
if (!mnt_has_parent(new_mnt))
goto out4; /* not attached */
/* make sure we can reach put_old from new_root */
- if (!is_path_reachable(real_mount(old.mnt), old.dentry, &new))
+ if (!is_path_reachable(old_mnt, old.dentry, &new))
goto out4;
+ root_mp->m_count++; /* pin it so it won't go away */
br_write_lock(&vfsmount_lock);
detach_mnt(new_mnt, &parent_path);
detach_mnt(root_mnt, &root_parent);
/* mount old root on put_old */
- attach_mnt(root_mnt, &old);
+ attach_mnt(root_mnt, old_mnt, old_mp);
/* mount new_root on / */
- attach_mnt(new_mnt, &root_parent);
+ attach_mnt(new_mnt, real_mount(root_parent.mnt), root_mp);
touch_mnt_namespace(current->nsproxy->mnt_ns);
br_write_unlock(&vfsmount_lock);
chroot_fs_refs(&root, &new);
+ put_mountpoint(root_mp);
error = 0;
out4:
- unlock_mount(&old);
+ unlock_mount(old_mp);
if (!error) {
path_put(&root_parent);
path_put(&parent_path);
@@ -2670,14 +2719,17 @@ void __init mnt_init(void)
0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC);
+ mountpoint_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC);
- if (!mount_hashtable)
+ if (!mount_hashtable || !mountpoint_hashtable)
panic("Failed to allocate mount hash table\n");
printk(KERN_INFO "Mount-cache hash table entries: %lu\n", HASH_SIZE);
for (u = 0; u < HASH_SIZE; u++)
INIT_LIST_HEAD(&mount_hashtable[u]);
+ for (u = 0; u < HASH_SIZE; u++)
+ INIT_LIST_HEAD(&mountpoint_hashtable[u]);
br_lock_init(&vfsmount_lock);
@@ -2694,16 +2746,13 @@ void __init mnt_init(void)
void put_mnt_ns(struct mnt_namespace *ns)
{
- LIST_HEAD(umount_list);
-
if (!atomic_dec_and_test(&ns->count))
return;
- down_write(&namespace_sem);
+ namespace_lock();
br_write_lock(&vfsmount_lock);
- umount_tree(ns->root, 0, &umount_list);
+ umount_tree(ns->root, 0);
br_write_unlock(&vfsmount_lock);
- up_write(&namespace_sem);
- release_mounts(&umount_list);
+ namespace_unlock();
free_mnt_ns(ns);
}
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index f33455b4d957..5bee0313dffd 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -177,7 +177,7 @@ static int export_features_open(struct inode *inode, struct file *file)
return single_open(file, export_features_show, NULL);
}
-static struct file_operations export_features_operations = {
+static const struct file_operations export_features_operations = {
.open = export_features_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -196,7 +196,7 @@ static int supported_enctypes_open(struct inode *inode, struct file *file)
return single_open(file, supported_enctypes_show, NULL);
}
-static struct file_operations supported_enctypes_ops = {
+static const struct file_operations supported_enctypes_ops = {
.open = supported_enctypes_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index c616a70e8cf9..959815c1e017 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -287,9 +287,6 @@ static int inotify_release(struct inode *ignored, struct file *file)
pr_debug("%s: group=%p\n", __func__, group);
- if (file->f_flags & FASYNC)
- fsnotify_fasync(-1, file, 0);
-
/* free this group, matching get was inotify_init->fsnotify_obtain_group */
fsnotify_destroy_group(group);
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 5b2d4f0853ac..1da4b81e6f76 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2129,7 +2129,6 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
BUG_ON(iocb->ki_pos != pos);
- sb_start_write(inode->i_sb);
mutex_lock(&inode->i_mutex);
ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos);
mutex_unlock(&inode->i_mutex);
@@ -2138,7 +2137,6 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
if (err < 0)
ret = err;
}
- sb_end_write(inode->i_sb);
return ret;
}
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 6474cb44004d..8a7509f9e6f5 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2248,8 +2248,6 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
if (iocb->ki_left == 0)
return 0;
- sb_start_write(inode->i_sb);
-
appending = file->f_flags & O_APPEND ? 1 : 0;
direct_io = file->f_flags & O_DIRECT ? 1 : 0;
@@ -2423,7 +2421,6 @@ out_sems:
ocfs2_iocb_clear_sem_locked(iocb);
mutex_unlock(&inode->i_mutex);
- sb_end_write(inode->i_sb);
if (written)
ret = written;
@@ -2468,8 +2465,7 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
out->f_path.dentry->d_name.len,
out->f_path.dentry->d_name.name, len);
- if (pipe->inode)
- mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_PARENT);
+ pipe_lock(pipe);
splice_from_pipe_begin(&sd);
do {
@@ -2489,8 +2485,7 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
} while (ret > 0);
splice_from_pipe_end(pipe, &sd);
- if (pipe->inode)
- mutex_unlock(&pipe->inode->i_mutex);
+ pipe_unlock(pipe);
if (sd.num_spliced)
ret = sd.num_spliced;
diff --git a/fs/pipe.c b/fs/pipe.c
index 2234f3f61f8d..a029a14bacf1 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -25,6 +25,8 @@
#include <asm/uaccess.h>
#include <asm/ioctls.h>
+#include "internal.h"
+
/*
* The max size that a non-root user is allowed to grow the pipe. Can
* be set by root in /proc/sys/fs/pipe-max-size
@@ -53,8 +55,8 @@ unsigned int pipe_min_size = PAGE_SIZE;
static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass)
{
- if (pipe->inode)
- mutex_lock_nested(&pipe->inode->i_mutex, subclass);
+ if (pipe->files)
+ mutex_lock_nested(&pipe->mutex, subclass);
}
void pipe_lock(struct pipe_inode_info *pipe)
@@ -68,11 +70,21 @@ EXPORT_SYMBOL(pipe_lock);
void pipe_unlock(struct pipe_inode_info *pipe)
{
- if (pipe->inode)
- mutex_unlock(&pipe->inode->i_mutex);
+ if (pipe->files)
+ mutex_unlock(&pipe->mutex);
}
EXPORT_SYMBOL(pipe_unlock);
+static inline void __pipe_lock(struct pipe_inode_info *pipe)
+{
+ mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
+}
+
+static inline void __pipe_unlock(struct pipe_inode_info *pipe)
+{
+ mutex_unlock(&pipe->mutex);
+}
+
void pipe_double_lock(struct pipe_inode_info *pipe1,
struct pipe_inode_info *pipe2)
{
@@ -361,8 +373,7 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov,
unsigned long nr_segs, loff_t pos)
{
struct file *filp = iocb->ki_filp;
- struct inode *inode = file_inode(filp);
- struct pipe_inode_info *pipe;
+ struct pipe_inode_info *pipe = filp->private_data;
int do_wakeup;
ssize_t ret;
struct iovec *iov = (struct iovec *)_iov;
@@ -375,8 +386,7 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov,
do_wakeup = 0;
ret = 0;
- mutex_lock(&inode->i_mutex);
- pipe = inode->i_pipe;
+ __pipe_lock(pipe);
for (;;) {
int bufs = pipe->nrbufs;
if (bufs) {
@@ -464,7 +474,7 @@ redo:
}
pipe_wait(pipe);
}
- mutex_unlock(&inode->i_mutex);
+ __pipe_unlock(pipe);
/* Signal writers asynchronously that there is more room. */
if (do_wakeup) {
@@ -486,8 +496,7 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov,
unsigned long nr_segs, loff_t ppos)
{
struct file *filp = iocb->ki_filp;
- struct inode *inode = file_inode(filp);
- struct pipe_inode_info *pipe;
+ struct pipe_inode_info *pipe = filp->private_data;
ssize_t ret;
int do_wakeup;
struct iovec *iov = (struct iovec *)_iov;
@@ -501,8 +510,7 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov,
do_wakeup = 0;
ret = 0;
- mutex_lock(&inode->i_mutex);
- pipe = inode->i_pipe;
+ __pipe_lock(pipe);
if (!pipe->readers) {
send_sig(SIGPIPE, current, 0);
@@ -649,7 +657,7 @@ redo2:
pipe->waiting_writers--;
}
out:
- mutex_unlock(&inode->i_mutex);
+ __pipe_unlock(pipe);
if (do_wakeup) {
wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLRDNORM);
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
@@ -662,29 +670,14 @@ out:
return ret;
}
-static ssize_t
-bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
-{
- return -EBADF;
-}
-
-static ssize_t
-bad_pipe_w(struct file *filp, const char __user *buf, size_t count,
- loff_t *ppos)
-{
- return -EBADF;
-}
-
static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
- struct inode *inode = file_inode(filp);
- struct pipe_inode_info *pipe;
+ struct pipe_inode_info *pipe = filp->private_data;
int count, buf, nrbufs;
switch (cmd) {
case FIONREAD:
- mutex_lock(&inode->i_mutex);
- pipe = inode->i_pipe;
+ __pipe_lock(pipe);
count = 0;
buf = pipe->curbuf;
nrbufs = pipe->nrbufs;
@@ -692,7 +685,7 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
count += pipe->bufs[buf].len;
buf = (buf+1) & (pipe->buffers - 1);
}
- mutex_unlock(&inode->i_mutex);
+ __pipe_unlock(pipe);
return put_user(count, (int __user *)arg);
default:
@@ -705,8 +698,7 @@ static unsigned int
pipe_poll(struct file *filp, poll_table *wait)
{
unsigned int mask;
- struct inode *inode = file_inode(filp);
- struct pipe_inode_info *pipe = inode->i_pipe;
+ struct pipe_inode_info *pipe = filp->private_data;
int nrbufs;
poll_wait(filp, &pipe->wait, wait);
@@ -734,197 +726,56 @@ pipe_poll(struct file *filp, poll_table *wait)
}
static int
-pipe_release(struct inode *inode, int decr, int decw)
+pipe_release(struct inode *inode, struct file *file)
{
- struct pipe_inode_info *pipe;
+ struct pipe_inode_info *pipe = inode->i_pipe;
+ int kill = 0;
- mutex_lock(&inode->i_mutex);
- pipe = inode->i_pipe;
- pipe->readers -= decr;
- pipe->writers -= decw;
+ __pipe_lock(pipe);
+ if (file->f_mode & FMODE_READ)
+ pipe->readers--;
+ if (file->f_mode & FMODE_WRITE)
+ pipe->writers--;
- if (!pipe->readers && !pipe->writers) {
- free_pipe_info(inode);
- } else {
+ if (pipe->readers || pipe->writers) {
wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM | POLLERR | POLLHUP);
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
}
- mutex_unlock(&inode->i_mutex);
-
- return 0;
-}
-
-static int
-pipe_read_fasync(int fd, struct file *filp, int on)
-{
- struct inode *inode = file_inode(filp);
- int retval;
-
- mutex_lock(&inode->i_mutex);
- retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_readers);
- mutex_unlock(&inode->i_mutex);
-
- return retval;
-}
-
-
-static int
-pipe_write_fasync(int fd, struct file *filp, int on)
-{
- struct inode *inode = file_inode(filp);
- int retval;
+ spin_lock(&inode->i_lock);
+ if (!--pipe->files) {
+ inode->i_pipe = NULL;
+ kill = 1;
+ }
+ spin_unlock(&inode->i_lock);
+ __pipe_unlock(pipe);
- mutex_lock(&inode->i_mutex);
- retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_writers);
- mutex_unlock(&inode->i_mutex);
+ if (kill)
+ free_pipe_info(pipe);
- return retval;
+ return 0;
}
-
static int
-pipe_rdwr_fasync(int fd, struct file *filp, int on)
+pipe_fasync(int fd, struct file *filp, int on)
{
- struct inode *inode = file_inode(filp);
- struct pipe_inode_info *pipe = inode->i_pipe;
- int retval;
+ struct pipe_inode_info *pipe = filp->private_data;
+ int retval = 0;
- mutex_lock(&inode->i_mutex);
- retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
- if (retval >= 0) {
+ __pipe_lock(pipe);
+ if (filp->f_mode & FMODE_READ)
+ retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
+ if ((filp->f_mode & FMODE_WRITE) && retval >= 0) {
retval = fasync_helper(fd, filp, on, &pipe->fasync_writers);
- if (retval < 0) /* this can happen only if on == T */
+ if (retval < 0 && (filp->f_mode & FMODE_READ))
+ /* this can happen only if on == T */
fasync_helper(-1, filp, 0, &pipe->fasync_readers);
}
- mutex_unlock(&inode->i_mutex);
+ __pipe_unlock(pipe);
return retval;
}
-
-static int
-pipe_read_release(struct inode *inode, struct file *filp)
-{
- return pipe_release(inode, 1, 0);
-}
-
-static int
-pipe_write_release(struct inode *inode, struct file *filp)
-{
- return pipe_release(inode, 0, 1);
-}
-
-static int
-pipe_rdwr_release(struct inode *inode, struct file *filp)
-{
- int decr, decw;
-
- decr = (filp->f_mode & FMODE_READ) != 0;
- decw = (filp->f_mode & FMODE_WRITE) != 0;
- return pipe_release(inode, decr, decw);
-}
-
-static int
-pipe_read_open(struct inode *inode, struct file *filp)
-{
- int ret = -ENOENT;
-
- mutex_lock(&inode->i_mutex);
-
- if (inode->i_pipe) {
- ret = 0;
- inode->i_pipe->readers++;
- }
-
- mutex_unlock(&inode->i_mutex);
-
- return ret;
-}
-
-static int
-pipe_write_open(struct inode *inode, struct file *filp)
-{
- int ret = -ENOENT;
-
- mutex_lock(&inode->i_mutex);
-
- if (inode->i_pipe) {
- ret = 0;
- inode->i_pipe->writers++;
- }
-
- mutex_unlock(&inode->i_mutex);
-
- return ret;
-}
-
-static int
-pipe_rdwr_open(struct inode *inode, struct file *filp)
-{
- int ret = -ENOENT;
-
- if (!(filp->f_mode & (FMODE_READ|FMODE_WRITE)))
- return -EINVAL;
-
- mutex_lock(&inode->i_mutex);
-
- if (inode->i_pipe) {
- ret = 0;
- if (filp->f_mode & FMODE_READ)
- inode->i_pipe->readers++;
- if (filp->f_mode & FMODE_WRITE)
- inode->i_pipe->writers++;
- }
-
- mutex_unlock(&inode->i_mutex);
-
- return ret;
-}
-
-/*
- * The file_operations structs are not static because they
- * are also used in linux/fs/fifo.c to do operations on FIFOs.
- *
- * Pipes reuse fifos' file_operations structs.
- */
-const struct file_operations read_pipefifo_fops = {
- .llseek = no_llseek,
- .read = do_sync_read,
- .aio_read = pipe_read,
- .write = bad_pipe_w,
- .poll = pipe_poll,
- .unlocked_ioctl = pipe_ioctl,
- .open = pipe_read_open,
- .release = pipe_read_release,
- .fasync = pipe_read_fasync,
-};
-
-const struct file_operations write_pipefifo_fops = {
- .llseek = no_llseek,
- .read = bad_pipe_r,
- .write = do_sync_write,
- .aio_write = pipe_write,
- .poll = pipe_poll,
- .unlocked_ioctl = pipe_ioctl,
- .open = pipe_write_open,
- .release = pipe_write_release,
- .fasync = pipe_write_fasync,
-};
-
-const struct file_operations rdwr_pipefifo_fops = {
- .llseek = no_llseek,
- .read = do_sync_read,
- .aio_read = pipe_read,
- .write = do_sync_write,
- .aio_write = pipe_write,
- .poll = pipe_poll,
- .unlocked_ioctl = pipe_ioctl,
- .open = pipe_rdwr_open,
- .release = pipe_rdwr_release,
- .fasync = pipe_rdwr_fasync,
-};
-
-struct pipe_inode_info * alloc_pipe_info(struct inode *inode)
+struct pipe_inode_info *alloc_pipe_info(void)
{
struct pipe_inode_info *pipe;
@@ -934,8 +785,8 @@ struct pipe_inode_info * alloc_pipe_info(struct inode *inode)
if (pipe->bufs) {
init_waitqueue_head(&pipe->wait);
pipe->r_counter = pipe->w_counter = 1;
- pipe->inode = inode;
pipe->buffers = PIPE_DEF_BUFFERS;
+ mutex_init(&pipe->mutex);
return pipe;
}
kfree(pipe);
@@ -944,7 +795,7 @@ struct pipe_inode_info * alloc_pipe_info(struct inode *inode)
return NULL;
}
-void __free_pipe_info(struct pipe_inode_info *pipe)
+void free_pipe_info(struct pipe_inode_info *pipe)
{
int i;
@@ -959,12 +810,6 @@ void __free_pipe_info(struct pipe_inode_info *pipe)
kfree(pipe);
}
-void free_pipe_info(struct inode *inode)
-{
- __free_pipe_info(inode->i_pipe);
- inode->i_pipe = NULL;
-}
-
static struct vfsmount *pipe_mnt __read_mostly;
/*
@@ -990,13 +835,14 @@ static struct inode * get_pipe_inode(void)
inode->i_ino = get_next_ino();
- pipe = alloc_pipe_info(inode);
+ pipe = alloc_pipe_info();
if (!pipe)
goto fail_iput;
- inode->i_pipe = pipe;
+ inode->i_pipe = pipe;
+ pipe->files = 2;
pipe->readers = pipe->writers = 1;
- inode->i_fop = &rdwr_pipefifo_fops;
+ inode->i_fop = &pipefifo_fops;
/*
* Mark the inode dirty from the very beginning,
@@ -1039,17 +885,19 @@ int create_pipe_files(struct file **res, int flags)
d_instantiate(path.dentry, inode);
err = -ENFILE;
- f = alloc_file(&path, FMODE_WRITE, &write_pipefifo_fops);
+ f = alloc_file(&path, FMODE_WRITE, &pipefifo_fops);
if (IS_ERR(f))
goto err_dentry;
f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT));
+ f->private_data = inode->i_pipe;
- res[0] = alloc_file(&path, FMODE_READ, &read_pipefifo_fops);
+ res[0] = alloc_file(&path, FMODE_READ, &pipefifo_fops);
if (IS_ERR(res[0]))
goto err_file;
path_get(&path);
+ res[0]->private_data = inode->i_pipe;
res[0]->f_flags = O_RDONLY | (flags & O_NONBLOCK);
res[1] = f;
return 0;
@@ -1057,12 +905,12 @@ int create_pipe_files(struct file **res, int flags)
err_file:
put_filp(f);
err_dentry:
- free_pipe_info(inode);
+ free_pipe_info(inode->i_pipe);
path_put(&path);
return err;
err_inode:
- free_pipe_info(inode);
+ free_pipe_info(inode->i_pipe);
iput(inode);
return err;
}
@@ -1144,6 +992,168 @@ SYSCALL_DEFINE1(pipe, int __user *, fildes)
return sys_pipe2(fildes, 0);
}
+static int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt)
+{
+ int cur = *cnt;
+
+ while (cur == *cnt) {
+ pipe_wait(pipe);
+ if (signal_pending(current))
+ break;
+ }
+ return cur == *cnt ? -ERESTARTSYS : 0;
+}
+
+static void wake_up_partner(struct pipe_inode_info *pipe)
+{
+ wake_up_interruptible(&pipe->wait);
+}
+
+static int fifo_open(struct inode *inode, struct file *filp)
+{
+ struct pipe_inode_info *pipe;
+ bool is_pipe = inode->i_sb->s_magic == PIPEFS_MAGIC;
+ int kill = 0;
+ int ret;
+
+ filp->f_version = 0;
+
+ spin_lock(&inode->i_lock);
+ if (inode->i_pipe) {
+ pipe = inode->i_pipe;
+ pipe->files++;
+ spin_unlock(&inode->i_lock);
+ } else {
+ spin_unlock(&inode->i_lock);
+ pipe = alloc_pipe_info();
+ if (!pipe)
+ return -ENOMEM;
+ pipe->files = 1;
+ spin_lock(&inode->i_lock);
+ if (unlikely(inode->i_pipe)) {
+ inode->i_pipe->files++;
+ spin_unlock(&inode->i_lock);
+ free_pipe_info(pipe);
+ pipe = inode->i_pipe;
+ } else {
+ inode->i_pipe = pipe;
+ spin_unlock(&inode->i_lock);
+ }
+ }
+ filp->private_data = pipe;
+ /* OK, we have a pipe and it's pinned down */
+
+ __pipe_lock(pipe);
+
+ /* We can only do regular read/write on fifos */
+ filp->f_mode &= (FMODE_READ | FMODE_WRITE);
+
+ switch (filp->f_mode) {
+ case FMODE_READ:
+ /*
+ * O_RDONLY
+ * POSIX.1 says that O_NONBLOCK means return with the FIFO
+ * opened, even when there is no process writing the FIFO.
+ */
+ pipe->r_counter++;
+ if (pipe->readers++ == 0)
+ wake_up_partner(pipe);
+
+ if (!is_pipe && !pipe->writers) {
+ if ((filp->f_flags & O_NONBLOCK)) {
+ /* suppress POLLHUP until we have
+ * seen a writer */
+ filp->f_version = pipe->w_counter;
+ } else {
+ if (wait_for_partner(pipe, &pipe->w_counter))
+ goto err_rd;
+ }
+ }
+ break;
+
+ case FMODE_WRITE:
+ /*
+ * O_WRONLY
+ * POSIX.1 says that O_NONBLOCK means return -1 with
+ * errno=ENXIO when there is no process reading the FIFO.
+ */
+ ret = -ENXIO;
+ if (!is_pipe && (filp->f_flags & O_NONBLOCK) && !pipe->readers)
+ goto err;
+
+ pipe->w_counter++;
+ if (!pipe->writers++)
+ wake_up_partner(pipe);
+
+ if (!is_pipe && !pipe->readers) {
+ if (wait_for_partner(pipe, &pipe->r_counter))
+ goto err_wr;
+ }
+ break;
+
+ case FMODE_READ | FMODE_WRITE:
+ /*
+ * O_RDWR
+ * POSIX.1 leaves this case "undefined" when O_NONBLOCK is set.
+ * This implementation will NEVER block on a O_RDWR open, since
+ * the process can at least talk to itself.
+ */
+
+ pipe->readers++;
+ pipe->writers++;
+ pipe->r_counter++;
+ pipe->w_counter++;
+ if (pipe->readers == 1 || pipe->writers == 1)
+ wake_up_partner(pipe);
+ break;
+
+ default:
+ ret = -EINVAL;
+ goto err;
+ }
+
+ /* Ok! */
+ __pipe_unlock(pipe);
+ return 0;
+
+err_rd:
+ if (!--pipe->readers)
+ wake_up_interruptible(&pipe->wait);
+ ret = -ERESTARTSYS;
+ goto err;
+
+err_wr:
+ if (!--pipe->writers)
+ wake_up_interruptible(&pipe->wait);
+ ret = -ERESTARTSYS;
+ goto err;
+
+err:
+ spin_lock(&inode->i_lock);
+ if (!--pipe->files) {
+ inode->i_pipe = NULL;
+ kill = 1;
+ }
+ spin_unlock(&inode->i_lock);
+ __pipe_unlock(pipe);
+ if (kill)
+ free_pipe_info(pipe);
+ return ret;
+}
+
+const struct file_operations pipefifo_fops = {
+ .open = fifo_open,
+ .llseek = no_llseek,
+ .read = do_sync_read,
+ .aio_read = pipe_read,
+ .write = do_sync_write,
+ .aio_write = pipe_write,
+ .poll = pipe_poll,
+ .unlocked_ioctl = pipe_ioctl,
+ .release = pipe_release,
+ .fasync = pipe_fasync,
+};
+
/*
* Allocate a new array of pipe buffers and copy the info over. Returns the
* pipe size if successful, or return -ERROR on error.
@@ -1229,9 +1239,7 @@ int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf,
*/
struct pipe_inode_info *get_pipe_info(struct file *file)
{
- struct inode *i = file_inode(file);
-
- return S_ISFIFO(i->i_mode) ? i->i_pipe : NULL;
+ return file->f_op == &pipefifo_fops ? file->private_data : NULL;
}
long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
@@ -1243,7 +1251,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
if (!pipe)
return -EBADF;
- mutex_lock(&pipe->inode->i_mutex);
+ __pipe_lock(pipe);
switch (cmd) {
case F_SETPIPE_SZ: {
@@ -1272,7 +1280,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
}
out:
- mutex_unlock(&pipe->inode->i_mutex);
+ __pipe_unlock(pipe);
return ret;
}
diff --git a/fs/pnode.c b/fs/pnode.c
index 8b29d2164da6..3d2a7141b87a 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -218,7 +218,7 @@ static struct mount *get_source(struct mount *dest,
* @source_mnt: source mount.
* @tree_list : list of heads of trees to be attached.
*/
-int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry,
+int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp,
struct mount *source_mnt, struct list_head *tree_list)
{
struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
@@ -227,7 +227,6 @@ int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry,
struct mount *prev_dest_mnt = dest_mnt;
struct mount *prev_src_mnt = source_mnt;
LIST_HEAD(tmp_list);
- LIST_HEAD(umount_list);
for (m = propagation_next(dest_mnt, dest_mnt); m;
m = propagation_next(m, dest_mnt)) {
@@ -250,8 +249,8 @@ int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry,
goto out;
}
- if (is_subdir(dest_dentry, m->mnt.mnt_root)) {
- mnt_set_mountpoint(m, dest_dentry, child);
+ if (is_subdir(dest_mp->m_dentry, m->mnt.mnt_root)) {
+ mnt_set_mountpoint(m, dest_mp, child);
list_add_tail(&child->mnt_hash, tree_list);
} else {
/*
@@ -267,10 +266,9 @@ out:
br_write_lock(&vfsmount_lock);
while (!list_empty(&tmp_list)) {
child = list_first_entry(&tmp_list, struct mount, mnt_hash);
- umount_tree(child, 0, &umount_list);
+ umount_tree(child, 0);
}
br_write_unlock(&vfsmount_lock);
- release_mounts(&umount_list);
return ret;
}
diff --git a/fs/pnode.h b/fs/pnode.h
index a0493d5ebfbf..b091445c1c4a 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -32,17 +32,16 @@ static inline void set_mnt_shared(struct mount *mnt)
}
void change_mnt_propagation(struct mount *, int);
-int propagate_mnt(struct mount *, struct dentry *, struct mount *,
+int propagate_mnt(struct mount *, struct mountpoint *, struct mount *,
struct list_head *);
int propagate_umount(struct list_head *);
int propagate_mount_busy(struct mount *, int);
void mnt_release_group_id(struct mount *);
int get_dominating_id(struct mount *mnt, const struct path *root);
unsigned int mnt_get_count(struct mount *mnt);
-void mnt_set_mountpoint(struct mount *, struct dentry *,
+void mnt_set_mountpoint(struct mount *, struct mountpoint *,
struct mount *);
-void release_mounts(struct list_head *);
-void umount_tree(struct mount *, int, struct list_head *);
+void umount_tree(struct mount *, int);
struct mount *copy_tree(struct mount *, struct dentry *, int);
bool is_path_reachable(struct mount *, struct dentry *,
const struct path *root);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 3861bcec41ff..dd51e50001fe 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -405,6 +405,37 @@ static const struct file_operations proc_lstats_operations = {
#endif
+#ifdef CONFIG_CGROUPS
+static int cgroup_open(struct inode *inode, struct file *file)
+{
+ struct pid *pid = PROC_I(inode)->pid;
+ return single_open(file, proc_cgroup_show, pid);
+}
+
+static const struct file_operations proc_cgroup_operations = {
+ .open = cgroup_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+#endif
+
+#ifdef CONFIG_PROC_PID_CPUSET
+
+static int cpuset_open(struct inode *inode, struct file *file)
+{
+ struct pid *pid = PROC_I(inode)->pid;
+ return single_open(file, proc_cpuset_show, pid);
+}
+
+static const struct file_operations proc_cpuset_operations = {
+ .open = cpuset_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+#endif
+
static int proc_oom_score(struct task_struct *task, char *buffer)
{
unsigned long totalpages = totalram_pages + total_swap_pages;
@@ -1621,6 +1652,15 @@ int pid_revalidate(struct dentry *dentry, unsigned int flags)
return 0;
}
+int pid_delete_dentry(const struct dentry *dentry)
+{
+ /* Is the task we represent dead?
+ * If so, then don't put the dentry on the lru list,
+ * kill it immediately.
+ */
+ return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
+}
+
const struct dentry_operations pid_dentry_operations =
{
.d_revalidate = pid_revalidate,
@@ -2893,7 +2933,7 @@ retry:
return iter;
}
-#define TGID_OFFSET (FIRST_PROCESS_ENTRY)
+#define TGID_OFFSET (FIRST_PROCESS_ENTRY + 1)
static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
struct tgid_iter iter)
@@ -2916,13 +2956,21 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
struct tgid_iter iter;
struct pid_namespace *ns;
filldir_t __filldir;
+ loff_t pos = filp->f_pos;
- if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET)
+ if (pos >= PID_MAX_LIMIT + TGID_OFFSET)
goto out;
- ns = filp->f_dentry->d_sb->s_fs_info;
+ if (pos == TGID_OFFSET - 1) {
+ if (proc_fill_cache(filp, dirent, filldir, "self", 4,
+ NULL, NULL, NULL) < 0)
+ goto out;
+ iter.tgid = 0;
+ } else {
+ iter.tgid = pos - TGID_OFFSET;
+ }
iter.task = NULL;
- iter.tgid = filp->f_pos - TGID_OFFSET;
+ ns = filp->f_dentry->d_sb->s_fs_info;
for (iter = next_tgid(ns, iter);
iter.task;
iter.tgid += 1, iter = next_tgid(ns, iter)) {
diff --git a/fs/proc/fd.h b/fs/proc/fd.h
index cbb1d47deda8..7c047f256ae2 100644
--- a/fs/proc/fd.h
+++ b/fs/proc/fd.h
@@ -11,4 +11,9 @@ extern const struct inode_operations proc_fdinfo_inode_operations;
extern int proc_fd_permission(struct inode *inode, int mask);
+static inline int proc_fd(struct inode *inode)
+{
+ return PROC_I(inode)->fd;
+}
+
#endif /* __PROCFS_FD_H__ */
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 21e1a8f1659d..a2596afffae6 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -36,212 +36,6 @@ static int proc_match(unsigned int len, const char *name, struct proc_dir_entry
return !memcmp(name, de->name, len);
}
-/* buffer size is one page but our output routines use some slack for overruns */
-#define PROC_BLOCK_SIZE (PAGE_SIZE - 1024)
-
-static ssize_t
-__proc_file_read(struct file *file, char __user *buf, size_t nbytes,
- loff_t *ppos)
-{
- struct inode * inode = file_inode(file);
- char *page;
- ssize_t retval=0;
- int eof=0;
- ssize_t n, count;
- char *start;
- struct proc_dir_entry * dp;
- unsigned long long pos;
-
- /*
- * Gaah, please just use "seq_file" instead. The legacy /proc
- * interfaces cut loff_t down to off_t for reads, and ignore
- * the offset entirely for writes..
- */
- pos = *ppos;
- if (pos > MAX_NON_LFS)
- return 0;
- if (nbytes > MAX_NON_LFS - pos)
- nbytes = MAX_NON_LFS - pos;
-
- dp = PDE(inode);
- if (!(page = (char*) __get_free_page(GFP_TEMPORARY)))
- return -ENOMEM;
-
- while ((nbytes > 0) && !eof) {
- count = min_t(size_t, PROC_BLOCK_SIZE, nbytes);
-
- start = NULL;
- if (dp->read_proc) {
- /*
- * How to be a proc read function
- * ------------------------------
- * Prototype:
- * int f(char *buffer, char **start, off_t offset,
- * int count, int *peof, void *dat)
- *
- * Assume that the buffer is "count" bytes in size.
- *
- * If you know you have supplied all the data you
- * have, set *peof.
- *
- * You have three ways to return data:
- * 0) Leave *start = NULL. (This is the default.)
- * Put the data of the requested offset at that
- * offset within the buffer. Return the number (n)
- * of bytes there are from the beginning of the
- * buffer up to the last byte of data. If the
- * number of supplied bytes (= n - offset) is
- * greater than zero and you didn't signal eof
- * and the reader is prepared to take more data
- * you will be called again with the requested
- * offset advanced by the number of bytes
- * absorbed. This interface is useful for files
- * no larger than the buffer.
- * 1) Set *start = an unsigned long value less than
- * the buffer address but greater than zero.
- * Put the data of the requested offset at the
- * beginning of the buffer. Return the number of
- * bytes of data placed there. If this number is
- * greater than zero and you didn't signal eof
- * and the reader is prepared to take more data
- * you will be called again with the requested
- * offset advanced by *start. This interface is
- * useful when you have a large file consisting
- * of a series of blocks which you want to count
- * and return as wholes.
- * (Hack by Paul.Russell@rustcorp.com.au)
- * 2) Set *start = an address within the buffer.
- * Put the data of the requested offset at *start.
- * Return the number of bytes of data placed there.
- * If this number is greater than zero and you
- * didn't signal eof and the reader is prepared to
- * take more data you will be called again with the
- * requested offset advanced by the number of bytes
- * absorbed.
- */
- n = dp->read_proc(page, &start, *ppos,
- count, &eof, dp->data);
- } else
- break;
-
- if (n == 0) /* end of file */
- break;
- if (n < 0) { /* error */
- if (retval == 0)
- retval = n;
- break;
- }
-
- if (start == NULL) {
- if (n > PAGE_SIZE) /* Apparent buffer overflow */
- n = PAGE_SIZE;
- n -= *ppos;
- if (n <= 0)
- break;
- if (n > count)
- n = count;
- start = page + *ppos;
- } else if (start < page) {
- if (n > PAGE_SIZE) /* Apparent buffer overflow */
- n = PAGE_SIZE;
- if (n > count) {
- /*
- * Don't reduce n because doing so might
- * cut off part of a data block.
- */
- pr_warn("proc_file_read: count exceeded\n");
- }
- } else /* start >= page */ {
- unsigned long startoff = (unsigned long)(start - page);
- if (n > (PAGE_SIZE - startoff)) /* buffer overflow? */
- n = PAGE_SIZE - startoff;
- if (n > count)
- n = count;
- }
-
- n -= copy_to_user(buf, start < page ? page : start, n);
- if (n == 0) {
- if (retval == 0)
- retval = -EFAULT;
- break;
- }
-
- *ppos += start < page ? (unsigned long)start : n;
- nbytes -= n;
- buf += n;
- retval += n;
- }
- free_page((unsigned long) page);
- return retval;
-}
-
-static ssize_t
-proc_file_read(struct file *file, char __user *buf, size_t nbytes,
- loff_t *ppos)
-{
- struct proc_dir_entry *pde = PDE(file_inode(file));
- ssize_t rv = -EIO;
-
- spin_lock(&pde->pde_unload_lock);
- if (!pde->proc_fops) {
- spin_unlock(&pde->pde_unload_lock);
- return rv;
- }
- pde->pde_users++;
- spin_unlock(&pde->pde_unload_lock);
-
- rv = __proc_file_read(file, buf, nbytes, ppos);
-
- pde_users_dec(pde);
- return rv;
-}
-
-static ssize_t
-proc_file_write(struct file *file, const char __user *buffer,
- size_t count, loff_t *ppos)
-{
- struct proc_dir_entry *pde = PDE(file_inode(file));
- ssize_t rv = -EIO;
-
- if (pde->write_proc) {
- spin_lock(&pde->pde_unload_lock);
- if (!pde->proc_fops) {
- spin_unlock(&pde->pde_unload_lock);
- return rv;
- }
- pde->pde_users++;
- spin_unlock(&pde->pde_unload_lock);
-
- /* FIXME: does this routine need ppos? probably... */
- rv = pde->write_proc(file, buffer, count, pde->data);
- pde_users_dec(pde);
- }
- return rv;
-}
-
-
-static loff_t
-proc_file_lseek(struct file *file, loff_t offset, int orig)
-{
- loff_t retval = -EINVAL;
- switch (orig) {
- case 1:
- offset += file->f_pos;
- /* fallthrough */
- case 0:
- if (offset < 0 || offset > MAX_NON_LFS)
- break;
- file->f_pos = retval = offset;
- }
- return retval;
-}
-
-static const struct file_operations proc_file_operations = {
- .llseek = proc_file_lseek,
- .read = proc_file_read,
- .write = proc_file_write,
-};
-
static int proc_notify_change(struct dentry *dentry, struct iattr *iattr)
{
struct inode *inode = dentry->d_inode;
@@ -371,7 +165,7 @@ void proc_free_inum(unsigned int inum)
static void *proc_follow_link(struct dentry *dentry, struct nameidata *nd)
{
- nd_set_link(nd, PDE(dentry->d_inode)->data);
+ nd_set_link(nd, __PDE_DATA(dentry->d_inode));
return NULL;
}
@@ -541,19 +335,17 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp
return ret;
if (S_ISDIR(dp->mode)) {
- if (dp->proc_iops == NULL) {
- dp->proc_fops = &proc_dir_operations;
- dp->proc_iops = &proc_dir_inode_operations;
- }
+ dp->proc_fops = &proc_dir_operations;
+ dp->proc_iops = &proc_dir_inode_operations;
dir->nlink++;
} else if (S_ISLNK(dp->mode)) {
- if (dp->proc_iops == NULL)
- dp->proc_iops = &proc_link_inode_operations;
+ dp->proc_iops = &proc_link_inode_operations;
} else if (S_ISREG(dp->mode)) {
- if (dp->proc_fops == NULL)
- dp->proc_fops = &proc_file_operations;
- if (dp->proc_iops == NULL)
- dp->proc_iops = &proc_file_inode_operations;
+ BUG_ON(dp->proc_fops == NULL);
+ dp->proc_iops = &proc_file_inode_operations;
+ } else {
+ WARN_ON(1);
+ return -EINVAL;
}
spin_lock(&proc_subdir_lock);
@@ -636,13 +428,17 @@ struct proc_dir_entry *proc_symlink(const char *name,
}
EXPORT_SYMBOL(proc_symlink);
-struct proc_dir_entry *proc_mkdir_mode(const char *name, umode_t mode,
- struct proc_dir_entry *parent)
+struct proc_dir_entry *proc_mkdir_data(const char *name, umode_t mode,
+ struct proc_dir_entry *parent, void *data)
{
struct proc_dir_entry *ent;
+ if (mode == 0)
+ mode = S_IRUGO | S_IXUGO;
+
ent = __proc_create(&parent, name, S_IFDIR | mode, 2);
if (ent) {
+ ent->data = data;
if (proc_register(parent, ent) < 0) {
kfree(ent);
ent = NULL;
@@ -650,82 +446,39 @@ struct proc_dir_entry *proc_mkdir_mode(const char *name, umode_t mode,
}
return ent;
}
-EXPORT_SYMBOL(proc_mkdir_mode);
+EXPORT_SYMBOL_GPL(proc_mkdir_data);
-struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name,
- struct proc_dir_entry *parent)
+struct proc_dir_entry *proc_mkdir_mode(const char *name, umode_t mode,
+ struct proc_dir_entry *parent)
{
- struct proc_dir_entry *ent;
-
- ent = __proc_create(&parent, name, S_IFDIR | S_IRUGO | S_IXUGO, 2);
- if (ent) {
- ent->data = net;
- if (proc_register(parent, ent) < 0) {
- kfree(ent);
- ent = NULL;
- }
- }
- return ent;
+ return proc_mkdir_data(name, mode, parent, NULL);
}
-EXPORT_SYMBOL_GPL(proc_net_mkdir);
+EXPORT_SYMBOL(proc_mkdir_mode);
struct proc_dir_entry *proc_mkdir(const char *name,
struct proc_dir_entry *parent)
{
- return proc_mkdir_mode(name, S_IRUGO | S_IXUGO, parent);
+ return proc_mkdir_data(name, 0, parent, NULL);
}
EXPORT_SYMBOL(proc_mkdir);
-struct proc_dir_entry *create_proc_entry(const char *name, umode_t mode,
- struct proc_dir_entry *parent)
-{
- struct proc_dir_entry *ent;
- nlink_t nlink;
-
- if (S_ISDIR(mode)) {
- if ((mode & S_IALLUGO) == 0)
- mode |= S_IRUGO | S_IXUGO;
- nlink = 2;
- } else {
- if ((mode & S_IFMT) == 0)
- mode |= S_IFREG;
- if ((mode & S_IALLUGO) == 0)
- mode |= S_IRUGO;
- nlink = 1;
- }
-
- ent = __proc_create(&parent, name, mode, nlink);
- if (ent) {
- if (proc_register(parent, ent) < 0) {
- kfree(ent);
- ent = NULL;
- }
- }
- return ent;
-}
-EXPORT_SYMBOL(create_proc_entry);
-
struct proc_dir_entry *proc_create_data(const char *name, umode_t mode,
struct proc_dir_entry *parent,
const struct file_operations *proc_fops,
void *data)
{
struct proc_dir_entry *pde;
- nlink_t nlink;
+ if ((mode & S_IFMT) == 0)
+ mode |= S_IFREG;
- if (S_ISDIR(mode)) {
- if ((mode & S_IALLUGO) == 0)
- mode |= S_IRUGO | S_IXUGO;
- nlink = 2;
- } else {
- if ((mode & S_IFMT) == 0)
- mode |= S_IFREG;
- if ((mode & S_IALLUGO) == 0)
- mode |= S_IRUGO;
- nlink = 1;
+ if (!S_ISREG(mode)) {
+ WARN_ON(1); /* use proc_mkdir() */
+ return NULL;
}
- pde = __proc_create(&parent, name, mode, nlink);
+ if ((mode & S_IALLUGO) == 0)
+ mode |= S_IRUGO;
+ pde = __proc_create(&parent, name, mode, 1);
if (!pde)
goto out;
pde->proc_fops = proc_fops;
@@ -739,6 +492,19 @@ out:
return NULL;
}
EXPORT_SYMBOL(proc_create_data);
+
+void proc_set_size(struct proc_dir_entry *de, loff_t size)
+{
+ de->size = size;
+}
+EXPORT_SYMBOL(proc_set_size);
+
+void proc_set_user(struct proc_dir_entry *de, kuid_t uid, kgid_t gid)
+{
+ de->uid = uid;
+ de->gid = gid;
+}
+EXPORT_SYMBOL(proc_set_user);
static void free_proc_entry(struct proc_dir_entry *de)
{
@@ -755,41 +521,6 @@ void pde_put(struct proc_dir_entry *pde)
free_proc_entry(pde);
}
-static void entry_rundown(struct proc_dir_entry *de)
-{
- spin_lock(&de->pde_unload_lock);
- /*
- * Stop accepting new callers into module. If you're
- * dynamically allocating ->proc_fops, save a pointer somewhere.
- */
- de->proc_fops = NULL;
- /* Wait until all existing callers into module are done. */
- if (de->pde_users > 0) {
- DECLARE_COMPLETION_ONSTACK(c);
-
- if (!de->pde_unload_completion)
- de->pde_unload_completion = &c;
-
- spin_unlock(&de->pde_unload_lock);
-
- wait_for_completion(de->pde_unload_completion);
-
- spin_lock(&de->pde_unload_lock);
- }
-
- while (!list_empty(&de->pde_openers)) {
- struct pde_opener *pdeo;
-
- pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh);
- list_del(&pdeo->lh);
- spin_unlock(&de->pde_unload_lock);
- pdeo->release(pdeo->inode, pdeo->file);
- kfree(pdeo);
- spin_lock(&de->pde_unload_lock);
- }
- spin_unlock(&de->pde_unload_lock);
-}
-
/*
* Remove a /proc entry and free it if it's not currently in use.
*/
@@ -821,7 +552,7 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
return;
}
- entry_rundown(de);
+ proc_entry_rundown(de);
if (S_ISDIR(de->mode))
parent->nlink--;
@@ -870,7 +601,7 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent)
}
spin_unlock(&proc_subdir_lock);
- entry_rundown(de);
+ proc_entry_rundown(de);
next = de->parent;
if (S_ISDIR(de->mode))
next->nlink--;
@@ -886,3 +617,23 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent)
return 0;
}
EXPORT_SYMBOL(remove_proc_subtree);
+
+void *proc_get_parent_data(const struct inode *inode)
+{
+ struct proc_dir_entry *de = PDE(inode);
+ return de->parent->data;
+}
+EXPORT_SYMBOL_GPL(proc_get_parent_data);
+
+void proc_remove(struct proc_dir_entry *de)
+{
+ if (de)
+ remove_proc_subtree(de->name, de->parent);
+}
+EXPORT_SYMBOL(proc_remove);
+
+void *PDE_DATA(const struct inode *inode)
+{
+ return __PDE_DATA(inode);
+}
+EXPORT_SYMBOL(PDE_DATA);
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 869116c2afbe..073aea60cf8f 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -22,6 +22,7 @@
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/mount.h>
+#include <linux/magic.h>
#include <asm/uaccess.h>
@@ -50,8 +51,8 @@ static void proc_evict_inode(struct inode *inode)
sysctl_head_put(head);
}
/* Release any associated namespace */
- ns_ops = PROC_I(inode)->ns_ops;
- ns = PROC_I(inode)->ns;
+ ns_ops = PROC_I(inode)->ns.ns_ops;
+ ns = PROC_I(inode)->ns.ns;
if (ns_ops && ns)
ns_ops->put(ns);
}
@@ -72,8 +73,8 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
ei->pde = NULL;
ei->sysctl = NULL;
ei->sysctl_entry = NULL;
- ei->ns = NULL;
- ei->ns_ops = NULL;
+ ei->ns.ns = NULL;
+ ei->ns.ns_ops = NULL;
inode = &ei->vfs_inode;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
return inode;
@@ -129,96 +130,100 @@ static const struct super_operations proc_sops = {
.show_options = proc_show_options,
};
-static void __pde_users_dec(struct proc_dir_entry *pde)
+enum {BIAS = -1U<<31};
+
+static inline int use_pde(struct proc_dir_entry *pde)
+{
+ return atomic_inc_unless_negative(&pde->in_use);
+}
+
+static void unuse_pde(struct proc_dir_entry *pde)
{
- pde->pde_users--;
- if (pde->pde_unload_completion && pde->pde_users == 0)
+ if (atomic_dec_return(&pde->in_use) == BIAS)
complete(pde->pde_unload_completion);
}
-void pde_users_dec(struct proc_dir_entry *pde)
+/* pde is locked */
+static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo)
{
- spin_lock(&pde->pde_unload_lock);
- __pde_users_dec(pde);
- spin_unlock(&pde->pde_unload_lock);
+ if (pdeo->closing) {
+ /* somebody else is doing that, just wait */
+ DECLARE_COMPLETION_ONSTACK(c);
+ pdeo->c = &c;
+ spin_unlock(&pde->pde_unload_lock);
+ wait_for_completion(&c);
+ spin_lock(&pde->pde_unload_lock);
+ } else {
+ struct file *file;
+ pdeo->closing = 1;
+ spin_unlock(&pde->pde_unload_lock);
+ file = pdeo->file;
+ pde->proc_fops->release(file_inode(file), file);
+ spin_lock(&pde->pde_unload_lock);
+ list_del_init(&pdeo->lh);
+ if (pdeo->c)
+ complete(pdeo->c);
+ kfree(pdeo);
+ }
+}
+
+void proc_entry_rundown(struct proc_dir_entry *de)
+{
+ DECLARE_COMPLETION_ONSTACK(c);
+ /* Wait until all existing callers into module are done. */
+ de->pde_unload_completion = &c;
+ if (atomic_add_return(BIAS, &de->in_use) != BIAS)
+ wait_for_completion(&c);
+
+ spin_lock(&de->pde_unload_lock);
+ while (!list_empty(&de->pde_openers)) {
+ struct pde_opener *pdeo;
+ pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh);
+ close_pdeo(de, pdeo);
+ }
+ spin_unlock(&de->pde_unload_lock);
}
static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence)
{
struct proc_dir_entry *pde = PDE(file_inode(file));
loff_t rv = -EINVAL;
- loff_t (*llseek)(struct file *, loff_t, int);
-
- spin_lock(&pde->pde_unload_lock);
- /*
- * remove_proc_entry() is going to delete PDE (as part of module
- * cleanup sequence). No new callers into module allowed.
- */
- if (!pde->proc_fops) {
- spin_unlock(&pde->pde_unload_lock);
- return rv;
+ if (use_pde(pde)) {
+ loff_t (*llseek)(struct file *, loff_t, int);
+ llseek = pde->proc_fops->llseek;
+ if (!llseek)
+ llseek = default_llseek;
+ rv = llseek(file, offset, whence);
+ unuse_pde(pde);
}
- /*
- * Bump refcount so that remove_proc_entry will wail for ->llseek to
- * complete.
- */
- pde->pde_users++;
- /*
- * Save function pointer under lock, to protect against ->proc_fops
- * NULL'ifying right after ->pde_unload_lock is dropped.
- */
- llseek = pde->proc_fops->llseek;
- spin_unlock(&pde->pde_unload_lock);
-
- if (!llseek)
- llseek = default_llseek;
- rv = llseek(file, offset, whence);
-
- pde_users_dec(pde);
return rv;
}
static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
{
+ ssize_t (*read)(struct file *, char __user *, size_t, loff_t *);
struct proc_dir_entry *pde = PDE(file_inode(file));
ssize_t rv = -EIO;
- ssize_t (*read)(struct file *, char __user *, size_t, loff_t *);
-
- spin_lock(&pde->pde_unload_lock);
- if (!pde->proc_fops) {
- spin_unlock(&pde->pde_unload_lock);
- return rv;
+ if (use_pde(pde)) {
+ read = pde->proc_fops->read;
+ if (read)
+ rv = read(file, buf, count, ppos);
+ unuse_pde(pde);
}
- pde->pde_users++;
- read = pde->proc_fops->read;
- spin_unlock(&pde->pde_unload_lock);
-
- if (read)
- rv = read(file, buf, count, ppos);
-
- pde_users_dec(pde);
return rv;
}
static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
{
+ ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *);
struct proc_dir_entry *pde = PDE(file_inode(file));
ssize_t rv = -EIO;
- ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *);
-
- spin_lock(&pde->pde_unload_lock);
- if (!pde->proc_fops) {
- spin_unlock(&pde->pde_unload_lock);
- return rv;
+ if (use_pde(pde)) {
+ write = pde->proc_fops->write;
+ if (write)
+ rv = write(file, buf, count, ppos);
+ unuse_pde(pde);
}
- pde->pde_users++;
- write = pde->proc_fops->write;
- spin_unlock(&pde->pde_unload_lock);
-
- if (write)
- rv = write(file, buf, count, ppos);
-
- pde_users_dec(pde);
return rv;
}
@@ -227,20 +232,12 @@ static unsigned int proc_reg_poll(struct file *file, struct poll_table_struct *p
struct proc_dir_entry *pde = PDE(file_inode(file));
unsigned int rv = DEFAULT_POLLMASK;
unsigned int (*poll)(struct file *, struct poll_table_struct *);
-
- spin_lock(&pde->pde_unload_lock);
- if (!pde->proc_fops) {
- spin_unlock(&pde->pde_unload_lock);
- return rv;
+ if (use_pde(pde)) {
+ poll = pde->proc_fops->poll;
+ if (poll)
+ rv = poll(file, pts);
+ unuse_pde(pde);
}
- pde->pde_users++;
- poll = pde->proc_fops->poll;
- spin_unlock(&pde->pde_unload_lock);
-
- if (poll)
- rv = poll(file, pts);
-
- pde_users_dec(pde);
return rv;
}
@@ -249,20 +246,12 @@ static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigne
struct proc_dir_entry *pde = PDE(file_inode(file));
long rv = -ENOTTY;
long (*ioctl)(struct file *, unsigned int, unsigned long);
-
- spin_lock(&pde->pde_unload_lock);
- if (!pde->proc_fops) {
- spin_unlock(&pde->pde_unload_lock);
- return rv;
+ if (use_pde(pde)) {
+ ioctl = pde->proc_fops->unlocked_ioctl;
+ if (ioctl)
+ rv = ioctl(file, cmd, arg);
+ unuse_pde(pde);
}
- pde->pde_users++;
- ioctl = pde->proc_fops->unlocked_ioctl;
- spin_unlock(&pde->pde_unload_lock);
-
- if (ioctl)
- rv = ioctl(file, cmd, arg);
-
- pde_users_dec(pde);
return rv;
}
@@ -272,20 +261,12 @@ static long proc_reg_compat_ioctl(struct file *file, unsigned int cmd, unsigned
struct proc_dir_entry *pde = PDE(file_inode(file));
long rv = -ENOTTY;
long (*compat_ioctl)(struct file *, unsigned int, unsigned long);
-
- spin_lock(&pde->pde_unload_lock);
- if (!pde->proc_fops) {
- spin_unlock(&pde->pde_unload_lock);
- return rv;
+ if (use_pde(pde)) {
+ compat_ioctl = pde->proc_fops->compat_ioctl;
+ if (compat_ioctl)
+ rv = compat_ioctl(file, cmd, arg);
+ unuse_pde(pde);
}
- pde->pde_users++;
- compat_ioctl = pde->proc_fops->compat_ioctl;
- spin_unlock(&pde->pde_unload_lock);
-
- if (compat_ioctl)
- rv = compat_ioctl(file, cmd, arg);
-
- pde_users_dec(pde);
return rv;
}
#endif
@@ -295,20 +276,12 @@ static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma)
struct proc_dir_entry *pde = PDE(file_inode(file));
int rv = -EIO;
int (*mmap)(struct file *, struct vm_area_struct *);
-
- spin_lock(&pde->pde_unload_lock);
- if (!pde->proc_fops) {
- spin_unlock(&pde->pde_unload_lock);
- return rv;
+ if (use_pde(pde)) {
+ mmap = pde->proc_fops->mmap;
+ if (mmap)
+ rv = mmap(file, vma);
+ unuse_pde(pde);
}
- pde->pde_users++;
- mmap = pde->proc_fops->mmap;
- spin_unlock(&pde->pde_unload_lock);
-
- if (mmap)
- rv = mmap(file, vma);
-
- pde_users_dec(pde);
return rv;
}
@@ -330,91 +303,47 @@ static int proc_reg_open(struct inode *inode, struct file *file)
* by hand in remove_proc_entry(). For this, save opener's credentials
* for later.
*/
- pdeo = kmalloc(sizeof(struct pde_opener), GFP_KERNEL);
+ pdeo = kzalloc(sizeof(struct pde_opener), GFP_KERNEL);
if (!pdeo)
return -ENOMEM;
- spin_lock(&pde->pde_unload_lock);
- if (!pde->proc_fops) {
- spin_unlock(&pde->pde_unload_lock);
+ if (!use_pde(pde)) {
kfree(pdeo);
return -ENOENT;
}
- pde->pde_users++;
open = pde->proc_fops->open;
release = pde->proc_fops->release;
- spin_unlock(&pde->pde_unload_lock);
if (open)
rv = open(inode, file);
- spin_lock(&pde->pde_unload_lock);
if (rv == 0 && release) {
/* To know what to release. */
- pdeo->inode = inode;
pdeo->file = file;
/* Strictly for "too late" ->release in proc_reg_release(). */
- pdeo->release = release;
+ spin_lock(&pde->pde_unload_lock);
list_add(&pdeo->lh, &pde->pde_openers);
+ spin_unlock(&pde->pde_unload_lock);
} else
kfree(pdeo);
- __pde_users_dec(pde);
- spin_unlock(&pde->pde_unload_lock);
- return rv;
-}
-
-static struct pde_opener *find_pde_opener(struct proc_dir_entry *pde,
- struct inode *inode, struct file *file)
-{
- struct pde_opener *pdeo;
- list_for_each_entry(pdeo, &pde->pde_openers, lh) {
- if (pdeo->inode == inode && pdeo->file == file)
- return pdeo;
- }
- return NULL;
+ unuse_pde(pde);
+ return rv;
}
static int proc_reg_release(struct inode *inode, struct file *file)
{
struct proc_dir_entry *pde = PDE(inode);
- int rv = 0;
- int (*release)(struct inode *, struct file *);
struct pde_opener *pdeo;
-
spin_lock(&pde->pde_unload_lock);
- pdeo = find_pde_opener(pde, inode, file);
- if (!pde->proc_fops) {
- /*
- * Can't simply exit, __fput() will think that everything is OK,
- * and move on to freeing struct file. remove_proc_entry() will
- * find slacker in opener's list and will try to do non-trivial
- * things with struct file. Therefore, remove opener from list.
- *
- * But if opener is removed from list, who will ->release it?
- */
- if (pdeo) {
- list_del(&pdeo->lh);
- spin_unlock(&pde->pde_unload_lock);
- rv = pdeo->release(inode, file);
- kfree(pdeo);
- } else
- spin_unlock(&pde->pde_unload_lock);
- return rv;
- }
- pde->pde_users++;
- release = pde->proc_fops->release;
- if (pdeo) {
- list_del(&pdeo->lh);
- kfree(pdeo);
+ list_for_each_entry(pdeo, &pde->pde_openers, lh) {
+ if (pdeo->file == file) {
+ close_pdeo(pde, pdeo);
+ break;
+ }
}
spin_unlock(&pde->pde_unload_lock);
-
- if (release)
- rv = release(inode, file);
-
- pde_users_dec(pde);
- return rv;
+ return 0;
}
static const struct file_operations proc_reg_file_ops = {
@@ -462,8 +391,8 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
inode->i_size = de->size;
if (de->nlink)
set_nlink(inode, de->nlink);
- if (de->proc_iops)
- inode->i_op = de->proc_iops;
+ WARN_ON(!de->proc_iops);
+ inode->i_op = de->proc_iops;
if (de->proc_fops) {
if (S_ISREG(inode->i_mode)) {
#ifdef CONFIG_COMPAT
@@ -506,5 +435,5 @@ int proc_fill_super(struct super_block *s)
return -ENOMEM;
}
- return 0;
+ return proc_setup_self(s);
}
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 75710357a517..d600fb098b6a 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -1,4 +1,4 @@
-/* internal.h: internal procfs definitions
+/* Internal procfs definitions
*
* Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
@@ -9,62 +9,83 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <linux/sched.h>
#include <linux/proc_fs.h>
+#include <linux/proc_ns.h>
+#include <linux/spinlock.h>
+#include <linux/atomic.h>
#include <linux/binfmts.h>
-struct ctl_table_header;
-struct mempolicy;
-extern struct proc_dir_entry proc_root;
-extern void proc_self_init(void);
-#ifdef CONFIG_PROC_SYSCTL
-extern int proc_sys_init(void);
-extern void sysctl_head_put(struct ctl_table_header *head);
-#else
-static inline void proc_sys_init(void) { }
-static inline void sysctl_head_put(struct ctl_table_header *head) { }
-#endif
-#ifdef CONFIG_NET
-extern int proc_net_init(void);
-#else
-static inline int proc_net_init(void) { return 0; }
-#endif
+struct ctl_table_header;
+struct mempolicy;
-extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns,
- struct pid *pid, struct task_struct *task);
-extern int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns,
- struct pid *pid, struct task_struct *task);
-extern int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
- struct pid *pid, struct task_struct *task);
-extern int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns,
- struct pid *pid, struct task_struct *task);
-extern loff_t mem_lseek(struct file *file, loff_t offset, int orig);
+/*
+ * This is not completely implemented yet. The idea is to
+ * create an in-memory tree (like the actual /proc filesystem
+ * tree) of these proc_dir_entries, so that we can dynamically
+ * add new files to /proc.
+ *
+ * The "next" pointer creates a linked list of one /proc directory,
+ * while parent/subdir create the directory structure (every
+ * /proc file has a parent, but "subdir" is NULL for all
+ * non-directory entries).
+ */
+struct proc_dir_entry {
+ unsigned int low_ino;
+ umode_t mode;
+ nlink_t nlink;
+ kuid_t uid;
+ kgid_t gid;
+ loff_t size;
+ const struct inode_operations *proc_iops;
+ const struct file_operations *proc_fops;
+ struct proc_dir_entry *next, *parent, *subdir;
+ void *data;
+ atomic_t count; /* use count */
+ atomic_t in_use; /* number of callers into module in progress; */
+ /* negative -> it's going away RSN */
+ struct completion *pde_unload_completion;
+ struct list_head pde_openers; /* who did ->open, but not ->release */
+ spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */
+ u8 namelen;
+ char name[];
+};
-extern const struct file_operations proc_tid_children_operations;
-extern const struct file_operations proc_pid_maps_operations;
-extern const struct file_operations proc_tid_maps_operations;
-extern const struct file_operations proc_pid_numa_maps_operations;
-extern const struct file_operations proc_tid_numa_maps_operations;
-extern const struct file_operations proc_pid_smaps_operations;
-extern const struct file_operations proc_tid_smaps_operations;
-extern const struct file_operations proc_clear_refs_operations;
-extern const struct file_operations proc_pagemap_operations;
-extern const struct file_operations proc_net_operations;
-extern const struct inode_operations proc_net_inode_operations;
-extern const struct inode_operations proc_pid_link_inode_operations;
+union proc_op {
+ int (*proc_get_link)(struct dentry *, struct path *);
+ int (*proc_read)(struct task_struct *task, char *page);
+ int (*proc_show)(struct seq_file *m,
+ struct pid_namespace *ns, struct pid *pid,
+ struct task_struct *task);
+};
-struct proc_maps_private {
+struct proc_inode {
struct pid *pid;
- struct task_struct *task;
-#ifdef CONFIG_MMU
- struct vm_area_struct *tail_vma;
-#endif
-#ifdef CONFIG_NUMA
- struct mempolicy *task_mempolicy;
-#endif
+ int fd;
+ union proc_op op;
+ struct proc_dir_entry *pde;
+ struct ctl_table_header *sysctl;
+ struct ctl_table *sysctl_entry;
+ struct proc_ns ns;
+ struct inode vfs_inode;
};
-void proc_init_inodecache(void);
+/*
+ * General functions
+ */
+static inline struct proc_inode *PROC_I(const struct inode *inode)
+{
+ return container_of(inode, struct proc_inode, vfs_inode);
+}
+
+static inline struct proc_dir_entry *PDE(const struct inode *inode)
+{
+ return PROC_I(inode)->pde;
+}
+
+static inline void *__PDE_DATA(const struct inode *inode)
+{
+ return PDE(inode)->data;
+}
static inline struct pid *proc_pid(struct inode *inode)
{
@@ -76,11 +97,6 @@ static inline struct task_struct *get_proc_task(struct inode *inode)
return get_pid_task(proc_pid(inode), PIDTYPE_PID);
}
-static inline int proc_fd(struct inode *inode)
-{
- return PROC_I(inode)->fd;
-}
-
static inline int task_dumpable(struct task_struct *task)
{
int dumpable = 0;
@@ -96,15 +112,6 @@ static inline int task_dumpable(struct task_struct *task)
return 0;
}
-static inline int pid_delete_dentry(const struct dentry * dentry)
-{
- /* Is the task we represent dead?
- * If so, then don't put the dentry on the lru list,
- * kill it immediately.
- */
- return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
-}
-
static inline unsigned name_to_int(struct dentry *dentry)
{
const char *name = dentry->d_name.name;
@@ -127,63 +134,165 @@ out:
return ~0U;
}
-struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino,
- struct dentry *dentry);
-int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
- filldir_t filldir);
+/*
+ * Offset of the first process in the /proc root directory..
+ */
+#define FIRST_PROCESS_ENTRY 256
+
+/* Worst case buffer size needed for holding an integer. */
+#define PROC_NUMBUF 13
-struct pde_opener {
- struct inode *inode;
- struct file *file;
- int (*release)(struct inode *, struct file *);
- struct list_head lh;
-};
-void pde_users_dec(struct proc_dir_entry *pde);
+/*
+ * array.c
+ */
+extern const struct file_operations proc_tid_children_operations;
+extern int proc_tid_stat(struct seq_file *, struct pid_namespace *,
+ struct pid *, struct task_struct *);
+extern int proc_tgid_stat(struct seq_file *, struct pid_namespace *,
+ struct pid *, struct task_struct *);
+extern int proc_pid_status(struct seq_file *, struct pid_namespace *,
+ struct pid *, struct task_struct *);
+extern int proc_pid_statm(struct seq_file *, struct pid_namespace *,
+ struct pid *, struct task_struct *);
+
+/*
+ * base.c
+ */
+extern const struct dentry_operations pid_dentry_operations;
+extern int pid_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+extern int proc_setattr(struct dentry *, struct iattr *);
+extern struct inode *proc_pid_make_inode(struct super_block *, struct task_struct *);
+extern int pid_revalidate(struct dentry *, unsigned int);
+extern int pid_delete_dentry(const struct dentry *);
+extern int proc_pid_readdir(struct file *, void *, filldir_t);
+extern struct dentry *proc_pid_lookup(struct inode *, struct dentry *, unsigned int);
+extern loff_t mem_lseek(struct file *, loff_t, int);
+
+/* Lookups */
+typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
+ struct task_struct *, const void *);
+extern int proc_fill_cache(struct file *, void *, filldir_t, const char *, int,
+ instantiate_t, struct task_struct *, const void *);
+
+/*
+ * generic.c
+ */
extern spinlock_t proc_subdir_lock;
-struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int);
-int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir);
-unsigned long task_vsize(struct mm_struct *);
-unsigned long task_statm(struct mm_struct *,
- unsigned long *, unsigned long *, unsigned long *, unsigned long *);
-void task_mem(struct seq_file *, struct mm_struct *);
+extern struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int);
+extern struct dentry *proc_lookup_de(struct proc_dir_entry *, struct inode *,
+ struct dentry *);
+extern int proc_readdir(struct file *, void *, filldir_t);
+extern int proc_readdir_de(struct proc_dir_entry *, struct file *, void *, filldir_t);
static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde)
{
atomic_inc(&pde->count);
return pde;
}
-void pde_put(struct proc_dir_entry *pde);
-
-int proc_fill_super(struct super_block *);
-struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
-int proc_remount(struct super_block *sb, int *flags, char *data);
+extern void pde_put(struct proc_dir_entry *);
/*
- * These are generic /proc routines that use the internal
- * "struct proc_dir_entry" tree to traverse the filesystem.
- *
- * The /proc root directory has extended versions to take care
- * of the /proc/<pid> subdirectories.
+ * inode.c
*/
-int proc_readdir(struct file *, void *, filldir_t);
-struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int);
+struct pde_opener {
+ struct file *file;
+ struct list_head lh;
+ int closing;
+ struct completion *c;
+};
+extern const struct inode_operations proc_pid_link_inode_operations;
+extern void proc_init_inodecache(void);
+extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
+extern int proc_fill_super(struct super_block *);
+extern void proc_entry_rundown(struct proc_dir_entry *);
-/* Lookups */
-typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
- struct task_struct *, const void *);
-int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
- const char *name, int len,
- instantiate_t instantiate, struct task_struct *task, const void *ptr);
-int pid_revalidate(struct dentry *dentry, unsigned int flags);
-struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task);
-extern const struct dentry_operations pid_dentry_operations;
-int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
-int proc_setattr(struct dentry *dentry, struct iattr *attr);
+/*
+ * proc_devtree.c
+ */
+#ifdef CONFIG_PROC_DEVICETREE
+extern void proc_device_tree_init(void);
+#endif
+/*
+ * proc_namespaces.c
+ */
extern const struct inode_operations proc_ns_dir_inode_operations;
extern const struct file_operations proc_ns_dir_operations;
+/*
+ * proc_net.c
+ */
+extern const struct file_operations proc_net_operations;
+extern const struct inode_operations proc_net_inode_operations;
+
+#ifdef CONFIG_NET
+extern int proc_net_init(void);
+#else
+static inline int proc_net_init(void) { return 0; }
+#endif
+
+/*
+ * proc_self.c
+ */
+extern int proc_setup_self(struct super_block *);
+
+/*
+ * proc_sysctl.c
+ */
+#ifdef CONFIG_PROC_SYSCTL
+extern int proc_sys_init(void);
+extern void sysctl_head_put(struct ctl_table_header *);
+#else
+static inline void proc_sys_init(void) { }
+static inline void sysctl_head_put(struct ctl_table_header *head) { }
+#endif
+
+/*
+ * proc_tty.c
+ */
+#ifdef CONFIG_TTY
+extern void proc_tty_init(void);
+#else
+static inline void proc_tty_init(void) {}
+#endif
+
+/*
+ * root.c
+ */
+extern struct proc_dir_entry proc_root;
+
+extern void proc_self_init(void);
+extern int proc_remount(struct super_block *, int *, char *);
+
+/*
+ * task_[no]mmu.c
+ */
+struct proc_maps_private {
+ struct pid *pid;
+ struct task_struct *task;
+#ifdef CONFIG_MMU
+ struct vm_area_struct *tail_vma;
+#endif
+#ifdef CONFIG_NUMA
+ struct mempolicy *task_mempolicy;
+#endif
+};
+
+extern const struct file_operations proc_pid_maps_operations;
+extern const struct file_operations proc_tid_maps_operations;
+extern const struct file_operations proc_pid_numa_maps_operations;
+extern const struct file_operations proc_tid_numa_maps_operations;
+extern const struct file_operations proc_pid_smaps_operations;
+extern const struct file_operations proc_tid_smaps_operations;
+extern const struct file_operations proc_clear_refs_operations;
+extern const struct file_operations proc_pagemap_operations;
+
+extern unsigned long task_vsize(struct mm_struct *);
+extern unsigned long task_statm(struct mm_struct *,
+ unsigned long *, unsigned long *,
+ unsigned long *, unsigned long *);
+extern void task_mem(struct seq_file *, struct mm_struct *);
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index f6a13f489e30..0a22194e5d58 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -11,6 +11,7 @@
#include <linux/mm.h>
#include <linux/proc_fs.h>
+#include <linux/kcore.h>
#include <linux/user.h>
#include <linux/capability.h>
#include <linux/elf.h>
@@ -28,6 +29,7 @@
#include <linux/ioport.h>
#include <linux/memory.h>
#include <asm/sections.h>
+#include "internal.h"
#define CORE_STR "CORE"
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 66b51c0383da..54bdc6701e9f 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -51,7 +51,7 @@ static int ns_delete_dentry(const struct dentry *dentry)
static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
{
struct inode *inode = dentry->d_inode;
- const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops;
+ const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns.ns_ops;
return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]",
ns_ops->name, inode->i_ino);
@@ -95,8 +95,8 @@ static struct dentry *proc_ns_get_dentry(struct super_block *sb,
inode->i_op = &ns_inode_operations;
inode->i_mode = S_IFREG | S_IRUGO;
inode->i_fop = &ns_file_operations;
- ei->ns_ops = ns_ops;
- ei->ns = ns;
+ ei->ns.ns_ops = ns_ops;
+ ei->ns.ns = ns;
unlock_new_inode(inode);
} else {
ns_ops->put(ns);
@@ -128,7 +128,7 @@ static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd)
if (!ptrace_may_access(task, PTRACE_MODE_READ))
goto out_put_task;
- ns_path.dentry = proc_ns_get_dentry(sb, task, ei->ns_ops);
+ ns_path.dentry = proc_ns_get_dentry(sb, task, ei->ns.ns_ops);
if (IS_ERR(ns_path.dentry)) {
error = ERR_CAST(ns_path.dentry);
goto out_put_task;
@@ -148,7 +148,7 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl
{
struct inode *inode = dentry->d_inode;
struct proc_inode *ei = PROC_I(inode);
- const struct proc_ns_operations *ns_ops = ei->ns_ops;
+ const struct proc_ns_operations *ns_ops = ei->ns.ns_ops;
struct task_struct *task;
void *ns;
char name[50];
@@ -202,7 +202,7 @@ static struct dentry *proc_ns_instantiate(struct inode *dir,
ei = PROC_I(inode);
inode->i_mode = S_IFLNK|S_IRWXUGO;
inode->i_op = &proc_ns_link_inode_operations;
- ei->ns_ops = ns_ops;
+ ei->ns.ns_ops = ns_ops;
d_set_d_op(dentry, &pid_dentry_operations);
d_add(dentry, inode);
@@ -337,6 +337,11 @@ out_invalid:
return ERR_PTR(-EINVAL);
}
+struct proc_ns *get_proc_ns(struct inode *inode)
+{
+ return &PROC_I(inode)->ns;
+}
+
bool proc_ns_inode(struct inode *inode)
{
return inode->i_fop == &ns_file_operations;
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index 30b590f5bd35..505afc950e0a 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -41,7 +41,7 @@ static int property_proc_show(struct seq_file *m, void *v)
static int property_proc_open(struct inode *inode, struct file *file)
{
- return single_open(file, property_proc_show, PDE(inode)->data);
+ return single_open(file, property_proc_show, __PDE_DATA(inode));
}
static const struct file_operations property_proc_fops = {
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index b4ac6572474f..986e83220d56 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -26,6 +26,10 @@
#include "internal.h"
+static inline struct net *PDE_NET(struct proc_dir_entry *pde)
+{
+ return pde->parent->data;
+}
static struct net *get_proc_net(const struct inode *inode)
{
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 9c7fab1d23f0..41a6ea93f486 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -141,6 +141,8 @@ static void proc_kill_sb(struct super_block *sb)
struct pid_namespace *ns;
ns = (struct pid_namespace *)sb->s_fs_info;
+ if (ns->proc_self)
+ dput(ns->proc_self);
kill_anon_super(sb);
put_pid_ns(ns);
}
diff --git a/fs/proc/self.c b/fs/proc/self.c
index aa5cc3bff140..6b6a993b5c25 100644
--- a/fs/proc/self.c
+++ b/fs/proc/self.c
@@ -1,6 +1,8 @@
-#include <linux/proc_fs.h>
#include <linux/sched.h>
#include <linux/namei.h>
+#include <linux/slab.h>
+#include <linux/pid_namespace.h>
+#include "internal.h"
/*
* /proc/self:
@@ -48,12 +50,43 @@ static const struct inode_operations proc_self_inode_operations = {
.put_link = proc_self_put_link,
};
-void __init proc_self_init(void)
+static unsigned self_inum;
+
+int proc_setup_self(struct super_block *s)
{
- struct proc_dir_entry *proc_self_symlink;
- mode_t mode;
+ struct inode *root_inode = s->s_root->d_inode;
+ struct pid_namespace *ns = s->s_fs_info;
+ struct dentry *self;
+
+ mutex_lock(&root_inode->i_mutex);
+ self = d_alloc_name(s->s_root, "self");
+ if (self) {
+ struct inode *inode = new_inode_pseudo(s);
+ if (inode) {
+ inode->i_ino = self_inum;
+ inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+ inode->i_mode = S_IFLNK | S_IRWXUGO;
+ inode->i_uid = GLOBAL_ROOT_UID;
+ inode->i_gid = GLOBAL_ROOT_GID;
+ inode->i_op = &proc_self_inode_operations;
+ d_add(self, inode);
+ } else {
+ dput(self);
+ self = ERR_PTR(-ENOMEM);
+ }
+ } else {
+ self = ERR_PTR(-ENOMEM);
+ }
+ mutex_unlock(&root_inode->i_mutex);
+ if (IS_ERR(self)) {
+ pr_err("proc_fill_super: can't allocate /proc/self\n");
+ return PTR_ERR(self);
+ }
+ ns->proc_self = self;
+ return 0;
+}
- mode = S_IFLNK | S_IRWXUGO;
- proc_self_symlink = proc_create("self", mode, NULL, NULL );
- proc_self_symlink->proc_iops = &proc_self_inode_operations;
+void __init proc_self_init(void)
+{
+ proc_alloc_inum(&self_inum);
}
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index b870f740ab5a..17f7e080d7ff 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -8,7 +8,7 @@
*/
#include <linux/mm.h>
-#include <linux/proc_fs.h>
+#include <linux/kcore.h>
#include <linux/user.h>
#include <linux/elf.h>
#include <linux/elfcore.h>
@@ -22,6 +22,7 @@
#include <linux/list.h>
#include <asm/uaccess.h>
#include <asm/io.h>
+#include "internal.h"
/* List representing chunks of contiguous memory areas and their offsets in
* vmcore file.
@@ -698,7 +699,7 @@ void vmcore_cleanup(void)
struct list_head *pos, *next;
if (proc_vmcore) {
- remove_proc_entry(proc_vmcore->name, proc_vmcore->parent);
+ proc_remove(proc_vmcore);
proc_vmcore = NULL;
}
diff --git a/fs/read_write.c b/fs/read_write.c
index 8274a794253b..605dbbcb1973 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -459,6 +459,7 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
ret = rw_verify_area(WRITE, file, pos, count);
if (ret >= 0) {
count = ret;
+ file_start_write(file);
if (file->f_op->write)
ret = file->f_op->write(file, buf, count, pos);
else
@@ -468,6 +469,7 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
add_wchar(current, ret);
}
inc_syscw(current);
+ file_end_write(file);
}
return ret;
@@ -576,7 +578,7 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
}
EXPORT_SYMBOL(iov_shorten);
-ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
+static ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn)
{
struct kiocb kiocb;
@@ -601,7 +603,7 @@ ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
}
/* Do it by hand, with file-ops */
-ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
+static ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
unsigned long nr_segs, loff_t *ppos, io_fn_t fn)
{
struct iovec *vector = iov;
@@ -743,6 +745,7 @@ static ssize_t do_readv_writev(int type, struct file *file,
} else {
fn = (io_fn_t)file->f_op->write;
fnv = file->f_op->aio_write;
+ file_start_write(file);
}
if (fnv)
@@ -751,6 +754,9 @@ static ssize_t do_readv_writev(int type, struct file *file,
else
ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
+ if (type != READ)
+ file_end_write(file);
+
out:
if (iov != iovstack)
kfree(iov);
@@ -881,6 +887,201 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
return ret;
}
+#ifdef CONFIG_COMPAT
+
+static ssize_t compat_do_readv_writev(int type, struct file *file,
+ const struct compat_iovec __user *uvector,
+ unsigned long nr_segs, loff_t *pos)
+{
+ compat_ssize_t tot_len;
+ struct iovec iovstack[UIO_FASTIOV];
+ struct iovec *iov = iovstack;
+ ssize_t ret;
+ io_fn_t fn;
+ iov_fn_t fnv;
+
+ ret = -EINVAL;
+ if (!file->f_op)
+ goto out;
+
+ ret = -EFAULT;
+ if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector)))
+ goto out;
+
+ ret = compat_rw_copy_check_uvector(type, uvector, nr_segs,
+ UIO_FASTIOV, iovstack, &iov);
+ if (ret <= 0)
+ goto out;
+
+ tot_len = ret;
+ ret = rw_verify_area(type, file, pos, tot_len);
+ if (ret < 0)
+ goto out;
+
+ fnv = NULL;
+ if (type == READ) {
+ fn = file->f_op->read;
+ fnv = file->f_op->aio_read;
+ } else {
+ fn = (io_fn_t)file->f_op->write;
+ fnv = file->f_op->aio_write;
+ file_start_write(file);
+ }
+
+ if (fnv)
+ ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
+ pos, fnv);
+ else
+ ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
+
+ if (type != READ)
+ file_end_write(file);
+
+out:
+ if (iov != iovstack)
+ kfree(iov);
+ if ((ret + (type == READ)) > 0) {
+ if (type == READ)
+ fsnotify_access(file);
+ else
+ fsnotify_modify(file);
+ }
+ return ret;
+}
+
+static size_t compat_readv(struct file *file,
+ const struct compat_iovec __user *vec,
+ unsigned long vlen, loff_t *pos)
+{
+ ssize_t ret = -EBADF;
+
+ if (!(file->f_mode & FMODE_READ))
+ goto out;
+
+ ret = -EINVAL;
+ if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read))
+ goto out;
+
+ ret = compat_do_readv_writev(READ, file, vec, vlen, pos);
+
+out:
+ if (ret > 0)
+ add_rchar(current, ret);
+ inc_syscr(current);
+ return ret;
+}
+
+COMPAT_SYSCALL_DEFINE3(readv, unsigned long, fd,
+ const struct compat_iovec __user *,vec,
+ unsigned long, vlen)
+{
+ struct fd f = fdget(fd);
+ ssize_t ret;
+ loff_t pos;
+
+ if (!f.file)
+ return -EBADF;
+ pos = f.file->f_pos;
+ ret = compat_readv(f.file, vec, vlen, &pos);
+ f.file->f_pos = pos;
+ fdput(f);
+ return ret;
+}
+
+COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd,
+ const struct compat_iovec __user *,vec,
+ unsigned long, vlen, loff_t, pos)
+{
+ struct fd f;
+ ssize_t ret;
+
+ if (pos < 0)
+ return -EINVAL;
+ f = fdget(fd);
+ if (!f.file)
+ return -EBADF;
+ ret = -ESPIPE;
+ if (f.file->f_mode & FMODE_PREAD)
+ ret = compat_readv(f.file, vec, vlen, &pos);
+ fdput(f);
+ return ret;
+}
+
+COMPAT_SYSCALL_DEFINE5(preadv, unsigned long, fd,
+ const struct compat_iovec __user *,vec,
+ unsigned long, vlen, u32, pos_low, u32, pos_high)
+{
+ loff_t pos = ((loff_t)pos_high << 32) | pos_low;
+ return compat_sys_preadv64(fd, vec, vlen, pos);
+}
+
+static size_t compat_writev(struct file *file,
+ const struct compat_iovec __user *vec,
+ unsigned long vlen, loff_t *pos)
+{
+ ssize_t ret = -EBADF;
+
+ if (!(file->f_mode & FMODE_WRITE))
+ goto out;
+
+ ret = -EINVAL;
+ if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write))
+ goto out;
+
+ ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos);
+
+out:
+ if (ret > 0)
+ add_wchar(current, ret);
+ inc_syscw(current);
+ return ret;
+}
+
+COMPAT_SYSCALL_DEFINE3(writev, unsigned long, fd,
+ const struct compat_iovec __user *, vec,
+ unsigned long, vlen)
+{
+ struct fd f = fdget(fd);
+ ssize_t ret;
+ loff_t pos;
+
+ if (!f.file)
+ return -EBADF;
+ pos = f.file->f_pos;
+ ret = compat_writev(f.file, vec, vlen, &pos);
+ f.file->f_pos = pos;
+ fdput(f);
+ return ret;
+}
+
+COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd,
+ const struct compat_iovec __user *,vec,
+ unsigned long, vlen, loff_t, pos)
+{
+ struct fd f;
+ ssize_t ret;
+
+ if (pos < 0)
+ return -EINVAL;
+ f = fdget(fd);
+ if (!f.file)
+ return -EBADF;
+ ret = -ESPIPE;
+ if (f.file->f_mode & FMODE_PWRITE)
+ ret = compat_writev(f.file, vec, vlen, &pos);
+ fdput(f);
+ return ret;
+}
+
+COMPAT_SYSCALL_DEFINE5(pwritev, unsigned long, fd,
+ const struct compat_iovec __user *,vec,
+ unsigned long, vlen, u32, pos_low, u32, pos_high)
+{
+ loff_t pos = ((loff_t)pos_high << 32) | pos_low;
+ return compat_sys_pwritev64(fd, vec, vlen, pos);
+}
+#endif
+
static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
size_t count, loff_t max)
{
diff --git a/fs/read_write.h b/fs/read_write.h
index d07b954c6e0c..0ec530d9305b 100644
--- a/fs/read_write.h
+++ b/fs/read_write.h
@@ -7,8 +7,3 @@
typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *);
typedef ssize_t (*iov_fn_t)(struct kiocb *, const struct iovec *,
unsigned long, loff_t);
-
-ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
- unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn);
-ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
- unsigned long nr_segs, loff_t *ppos, io_fn_t fn);
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 6165bd4784f6..dcaafcfc23b0 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -234,68 +234,9 @@ int reiserfs_commit_page(struct inode *inode, struct page *page,
return ret;
}
-/* Write @count bytes at position @ppos in a file indicated by @file
- from the buffer @buf.
-
- generic_file_write() is only appropriate for filesystems that are not seeking to optimize performance and want
- something simple that works. It is not for serious use by general purpose filesystems, excepting the one that it was
- written for (ext2/3). This is for several reasons:
-
- * It has no understanding of any filesystem specific optimizations.
-
- * It enters the filesystem repeatedly for each page that is written.
-
- * It depends on reiserfs_get_block() function which if implemented by reiserfs performs costly search_by_key
- * operation for each page it is supplied with. By contrast reiserfs_file_write() feeds as much as possible at a time
- * to reiserfs which allows for fewer tree traversals.
-
- * Each indirect pointer insertion takes a lot of cpu, because it involves memory moves inside of blocks.
-
- * Asking the block allocation code for blocks one at a time is slightly less efficient.
-
- All of these reasons for not using only generic file write were understood back when reiserfs was first miscoded to
- use it, but we were in a hurry to make code freeze, and so it couldn't be revised then. This new code should make
- things right finally.
-
- Future Features: providing search_by_key with hints.
-
-*/
-static ssize_t reiserfs_file_write(struct file *file, /* the file we are going to write into */
- const char __user * buf, /* pointer to user supplied data
- (in userspace) */
- size_t count, /* amount of bytes to write */
- loff_t * ppos /* pointer to position in file that we start writing at. Should be updated to
- * new current position before returning. */
- )
-{
- struct inode *inode = file_inode(file); // Inode of the file that we are writing to.
- /* To simplify coding at this time, we store
- locked pages in array for now */
- struct reiserfs_transaction_handle th;
- th.t_trans_id = 0;
-
- /* If a filesystem is converted from 3.5 to 3.6, we'll have v3.5 items
- * lying around (most of the disk, in fact). Despite the filesystem
- * now being a v3.6 format, the old items still can't support large
- * file sizes. Catch this case here, as the rest of the VFS layer is
- * oblivious to the different limitations between old and new items.
- * reiserfs_setattr catches this for truncates. This chunk is lifted
- * from generic_write_checks. */
- if (get_inode_item_key_version (inode) == KEY_FORMAT_3_5 &&
- *ppos + count > MAX_NON_LFS) {
- if (*ppos >= MAX_NON_LFS) {
- return -EFBIG;
- }
- if (count > MAX_NON_LFS - (unsigned long)*ppos)
- count = MAX_NON_LFS - (unsigned long)*ppos;
- }
-
- return do_sync_write(file, buf, count, ppos);
-}
-
const struct file_operations reiserfs_file_operations = {
.read = do_sync_read,
- .write = reiserfs_file_write,
+ .write = do_sync_write,
.unlocked_ioctl = reiserfs_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = reiserfs_compat_ioctl,
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index 9cc0740adffa..33532f79b4f7 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -394,20 +394,24 @@ static int set_sb(struct super_block *sb, void *data)
return -ENOENT;
}
+struct reiserfs_seq_private {
+ struct super_block *sb;
+ int (*show) (struct seq_file *, struct super_block *);
+};
+
static void *r_start(struct seq_file *m, loff_t * pos)
{
- struct proc_dir_entry *de = m->private;
- struct super_block *s = de->parent->data;
+ struct reiserfs_seq_private *priv = m->private;
loff_t l = *pos;
if (l)
return NULL;
- if (IS_ERR(sget(&reiserfs_fs_type, test_sb, set_sb, 0, s)))
+ if (IS_ERR(sget(&reiserfs_fs_type, test_sb, set_sb, 0, priv->sb)))
return NULL;
- up_write(&s->s_umount);
- return s;
+ up_write(&priv->sb->s_umount);
+ return priv->sb;
}
static void *r_next(struct seq_file *m, void *v, loff_t * pos)
@@ -426,9 +430,8 @@ static void r_stop(struct seq_file *m, void *v)
static int r_show(struct seq_file *m, void *v)
{
- struct proc_dir_entry *de = m->private;
- int (*show) (struct seq_file *, struct super_block *) = de->data;
- return show(m, v);
+ struct reiserfs_seq_private *priv = m->private;
+ return priv->show(m, v);
}
static const struct seq_operations r_ops = {
@@ -440,11 +443,15 @@ static const struct seq_operations r_ops = {
static int r_open(struct inode *inode, struct file *file)
{
- int ret = seq_open(file, &r_ops);
+ struct reiserfs_seq_private *priv;
+ int ret = seq_open_private(file, &r_ops,
+ sizeof(struct reiserfs_seq_private));
if (!ret) {
struct seq_file *m = file->private_data;
- m->private = PDE(inode);
+ priv = m->private;
+ priv->sb = proc_get_parent_data(inode);
+ priv->show = PDE_DATA(inode);
}
return ret;
}
@@ -453,7 +460,7 @@ static const struct file_operations r_file_operations = {
.open = r_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release,
+ .release = seq_release_private,
.owner = THIS_MODULE,
};
@@ -479,9 +486,8 @@ int reiserfs_proc_info_init(struct super_block *sb)
*s = '!';
spin_lock_init(&__PINFO(sb).lock);
- REISERFS_SB(sb)->procdir = proc_mkdir(b, proc_info_root);
+ REISERFS_SB(sb)->procdir = proc_mkdir_data(b, 0, proc_info_root, sb);
if (REISERFS_SB(sb)->procdir) {
- REISERFS_SB(sb)->procdir->data = sb;
add_file(sb, "version", show_version);
add_file(sb, "super", show_super);
add_file(sb, "per-level", show_per_level);
@@ -499,29 +505,17 @@ int reiserfs_proc_info_init(struct super_block *sb)
int reiserfs_proc_info_done(struct super_block *sb)
{
struct proc_dir_entry *de = REISERFS_SB(sb)->procdir;
- char b[BDEVNAME_SIZE];
- char *s;
+ if (de) {
+ char b[BDEVNAME_SIZE];
+ char *s;
- /* Some block devices use /'s */
- strlcpy(b, reiserfs_bdevname(sb), BDEVNAME_SIZE);
- s = strchr(b, '/');
- if (s)
- *s = '!';
+ /* Some block devices use /'s */
+ strlcpy(b, reiserfs_bdevname(sb), BDEVNAME_SIZE);
+ s = strchr(b, '/');
+ if (s)
+ *s = '!';
- if (de) {
- remove_proc_entry("journal", de);
- remove_proc_entry("oidmap", de);
- remove_proc_entry("on-disk-super", de);
- remove_proc_entry("bitmap", de);
- remove_proc_entry("per-level", de);
- remove_proc_entry("super", de);
- remove_proc_entry("version", de);
- }
- spin_lock(&__PINFO(sb).lock);
- __PINFO(sb).exiting = 1;
- spin_unlock(&__PINFO(sb).lock);
- if (proc_info_root) {
- remove_proc_entry(b, proc_info_root);
+ remove_proc_subtree(b, proc_info_root);
REISERFS_SB(sb)->procdir = NULL;
}
return 0;
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 38bb59f3f2ad..774c1eb7f1c9 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -599,6 +599,24 @@ int single_open(struct file *file, int (*show)(struct seq_file *, void *),
}
EXPORT_SYMBOL(single_open);
+int single_open_size(struct file *file, int (*show)(struct seq_file *, void *),
+ void *data, size_t size)
+{
+ char *buf = kmalloc(size, GFP_KERNEL);
+ int ret;
+ if (!buf)
+ return -ENOMEM;
+ ret = single_open(file, show, data);
+ if (ret) {
+ kfree(buf);
+ return ret;
+ }
+ ((struct seq_file *)file->private_data)->buf = buf;
+ ((struct seq_file *)file->private_data)->size = size;
+ return 0;
+}
+EXPORT_SYMBOL(single_open_size);
+
int single_release(struct inode *inode, struct file *file)
{
const struct seq_operations *op = ((struct seq_file *)file->private_data)->op;
diff --git a/fs/splice.c b/fs/splice.c
index 6b485b8753bd..e6b25598c8c4 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -219,7 +219,7 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
page_nr++;
ret += buf->len;
- if (pipe->inode)
+ if (pipe->files)
do_wakeup = 1;
if (!--spd->nr_pages)
@@ -829,7 +829,7 @@ int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd,
ops->release(pipe, buf);
pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
pipe->nrbufs--;
- if (pipe->inode)
+ if (pipe->files)
sd->need_wakeup = true;
}
@@ -1001,8 +1001,6 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
};
ssize_t ret;
- sb_start_write(inode->i_sb);
-
pipe_lock(pipe);
splice_from_pipe_begin(&sd);
@@ -1038,7 +1036,6 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
*ppos += ret;
balance_dirty_pages_ratelimited(mapping);
}
- sb_end_write(inode->i_sb);
return ret;
}
@@ -1118,7 +1115,10 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
else
splice_write = default_file_splice_write;
- return splice_write(pipe, out, ppos, len, flags);
+ file_start_write(out);
+ ret = splice_write(pipe, out, ppos, len, flags);
+ file_end_write(out);
+ return ret;
}
/*
@@ -1184,7 +1184,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
*/
pipe = current->splice_pipe;
if (unlikely(!pipe)) {
- pipe = alloc_pipe_info(NULL);
+ pipe = alloc_pipe_info();
if (!pipe)
return -ENOMEM;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index f03bf1a456fb..3800128d2171 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -775,8 +775,6 @@ xfs_file_aio_write(
if (ocount == 0)
return 0;
- sb_start_write(inode->i_sb);
-
if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
ret = -EIO;
goto out;
@@ -800,7 +798,6 @@ xfs_file_aio_write(
}
out:
- sb_end_write(inode->i_sb);
return ret;
}