summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-03-10 12:57:26 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2014-03-10 12:57:26 -0700
commite6a4b6f5eaa8478b7a0b9a17e40c51463631db1a (patch)
tree35e164226b3a590481469a1739a2eb2a7b1217c1
parent2b64c5434d1303646388e748b7add69624a1cfee (diff)
parentbd2a31d522344b3ac2fb680bd2366e77a9bd8209 (diff)
downloadlinux-e6a4b6f5eaa8478b7a0b9a17e40c51463631db1a.tar.gz
linux-e6a4b6f5eaa8478b7a0b9a17e40c51463631db1a.tar.bz2
linux-e6a4b6f5eaa8478b7a0b9a17e40c51463631db1a.zip
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull vfs fixes from Al Viro. Clean up file table accesses (get rid of fget_light() in favor of the fdget() interface), add proper file position locking. * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: get rid of fget_light() sockfd_lookup_light(): switch to fdget^W^Waway from fget_light vfs: atomic f_pos accesses as per POSIX ocfs2 syncs the wrong range...
-rw-r--r--fs/file.c56
-rw-r--r--fs/file_table.c1
-rw-r--r--fs/namei.c2
-rw-r--r--fs/ocfs2/file.c8
-rw-r--r--fs/open.c4
-rw-r--r--fs/read_write.c40
-rw-r--r--include/linux/file.h27
-rw-r--r--include/linux/fs.h8
-rw-r--r--net/socket.c13
9 files changed, 107 insertions, 52 deletions
diff --git a/fs/file.c b/fs/file.c
index db25c2bdfe46..60a45e9f5323 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -683,35 +683,65 @@ EXPORT_SYMBOL(fget_raw);
* The fput_needed flag returned by fget_light should be passed to the
* corresponding fput_light.
*/
-struct file *__fget_light(unsigned int fd, fmode_t mask, int *fput_needed)
+static unsigned long __fget_light(unsigned int fd, fmode_t mask)
{
struct files_struct *files = current->files;
struct file *file;
- *fput_needed = 0;
if (atomic_read(&files->count) == 1) {
file = __fcheck_files(files, fd);
- if (file && (file->f_mode & mask))
- file = NULL;
+ if (!file || unlikely(file->f_mode & mask))
+ return 0;
+ return (unsigned long)file;
} else {
file = __fget(fd, mask);
- if (file)
- *fput_needed = 1;
+ if (!file)
+ return 0;
+ return FDPUT_FPUT | (unsigned long)file;
}
-
- return file;
}
-struct file *fget_light(unsigned int fd, int *fput_needed)
+unsigned long __fdget(unsigned int fd)
{
- return __fget_light(fd, FMODE_PATH, fput_needed);
+ return __fget_light(fd, FMODE_PATH);
}
-EXPORT_SYMBOL(fget_light);
+EXPORT_SYMBOL(__fdget);
-struct file *fget_raw_light(unsigned int fd, int *fput_needed)
+unsigned long __fdget_raw(unsigned int fd)
{
- return __fget_light(fd, 0, fput_needed);
+ return __fget_light(fd, 0);
+}
+
+unsigned long __fdget_pos(unsigned int fd)
+{
+ struct files_struct *files = current->files;
+ struct file *file;
+ unsigned long v;
+
+ if (atomic_read(&files->count) == 1) {
+ file = __fcheck_files(files, fd);
+ v = 0;
+ } else {
+ file = __fget(fd, 0);
+ v = FDPUT_FPUT;
+ }
+ if (!file)
+ return 0;
+
+ if (file->f_mode & FMODE_ATOMIC_POS) {
+ if (file_count(file) > 1) {
+ v |= FDPUT_POS_UNLOCK;
+ mutex_lock(&file->f_pos_lock);
+ }
+ }
+ return v | (unsigned long)file;
}
+/*
+ * We only lock f_pos if we have threads or if the file might be
+ * shared with another process. In both cases we'll have an elevated
+ * file count (done either by fdget() or by fork()).
+ */
+
void set_close_on_exec(unsigned int fd, int flag)
{
struct files_struct *files = current->files;
diff --git a/fs/file_table.c b/fs/file_table.c
index 5fff9030be34..5b24008ea4f6 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -135,6 +135,7 @@ struct file *get_empty_filp(void)
atomic_long_set(&f->f_count, 1);
rwlock_init(&f->f_owner.lock);
spin_lock_init(&f->f_lock);
+ mutex_init(&f->f_pos_lock);
eventpoll_init_file(f);
/* f->f_version: 0 */
return f;
diff --git a/fs/namei.c b/fs/namei.c
index 385f7817bfcc..2f730ef9b4b3 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1884,7 +1884,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
nd->path = f.file->f_path;
if (flags & LOOKUP_RCU) {
- if (f.need_put)
+ if (f.flags & FDPUT_FPUT)
*fp = f.file;
nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
rcu_read_lock();
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 8450262bcf2a..51632c40e896 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2393,8 +2393,8 @@ out_dio:
if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) ||
((file->f_flags & O_DIRECT) && !direct_io)) {
- ret = filemap_fdatawrite_range(file->f_mapping, pos,
- pos + count - 1);
+ ret = filemap_fdatawrite_range(file->f_mapping, *ppos,
+ *ppos + count - 1);
if (ret < 0)
written = ret;
@@ -2407,8 +2407,8 @@ out_dio:
}
if (!ret)
- ret = filemap_fdatawait_range(file->f_mapping, pos,
- pos + count - 1);
+ ret = filemap_fdatawait_range(file->f_mapping, *ppos,
+ *ppos + count - 1);
}
/*
diff --git a/fs/open.c b/fs/open.c
index 4b3e1edf2fe4..b9ed8b25c108 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -705,6 +705,10 @@ static int do_dentry_open(struct file *f,
return 0;
}
+ /* POSIX.1-2008/SUSv4 Section XSI 2.9.7 */
+ if (S_ISREG(inode->i_mode))
+ f->f_mode |= FMODE_ATOMIC_POS;
+
f->f_op = fops_get(inode->i_fop);
if (unlikely(WARN_ON(!f->f_op))) {
error = -ENODEV;
diff --git a/fs/read_write.c b/fs/read_write.c
index edc5746a902a..54e19b9392dc 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -264,10 +264,22 @@ loff_t vfs_llseek(struct file *file, loff_t offset, int whence)
}
EXPORT_SYMBOL(vfs_llseek);
+static inline struct fd fdget_pos(int fd)
+{
+ return __to_fd(__fdget_pos(fd));
+}
+
+static inline void fdput_pos(struct fd f)
+{
+ if (f.flags & FDPUT_POS_UNLOCK)
+ mutex_unlock(&f.file->f_pos_lock);
+ fdput(f);
+}
+
SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence)
{
off_t retval;
- struct fd f = fdget(fd);
+ struct fd f = fdget_pos(fd);
if (!f.file)
return -EBADF;
@@ -278,7 +290,7 @@ SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence)
if (res != (loff_t)retval)
retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */
}
- fdput(f);
+ fdput_pos(f);
return retval;
}
@@ -498,7 +510,7 @@ static inline void file_pos_write(struct file *file, loff_t pos)
SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
{
- struct fd f = fdget(fd);
+ struct fd f = fdget_pos(fd);
ssize_t ret = -EBADF;
if (f.file) {
@@ -506,7 +518,7 @@ SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
ret = vfs_read(f.file, buf, count, &pos);
if (ret >= 0)
file_pos_write(f.file, pos);
- fdput(f);
+ fdput_pos(f);
}
return ret;
}
@@ -514,7 +526,7 @@ SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
size_t, count)
{
- struct fd f = fdget(fd);
+ struct fd f = fdget_pos(fd);
ssize_t ret = -EBADF;
if (f.file) {
@@ -522,7 +534,7 @@ SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
ret = vfs_write(f.file, buf, count, &pos);
if (ret >= 0)
file_pos_write(f.file, pos);
- fdput(f);
+ fdput_pos(f);
}
return ret;
@@ -797,7 +809,7 @@ EXPORT_SYMBOL(vfs_writev);
SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
unsigned long, vlen)
{
- struct fd f = fdget(fd);
+ struct fd f = fdget_pos(fd);
ssize_t ret = -EBADF;
if (f.file) {
@@ -805,7 +817,7 @@ SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
ret = vfs_readv(f.file, vec, vlen, &pos);
if (ret >= 0)
file_pos_write(f.file, pos);
- fdput(f);
+ fdput_pos(f);
}
if (ret > 0)
@@ -817,7 +829,7 @@ SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
unsigned long, vlen)
{
- struct fd f = fdget(fd);
+ struct fd f = fdget_pos(fd);
ssize_t ret = -EBADF;
if (f.file) {
@@ -825,7 +837,7 @@ SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
ret = vfs_writev(f.file, vec, vlen, &pos);
if (ret >= 0)
file_pos_write(f.file, pos);
- fdput(f);
+ fdput_pos(f);
}
if (ret > 0)
@@ -968,7 +980,7 @@ COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
const struct compat_iovec __user *,vec,
compat_ulong_t, vlen)
{
- struct fd f = fdget(fd);
+ struct fd f = fdget_pos(fd);
ssize_t ret;
loff_t pos;
@@ -978,7 +990,7 @@ COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
ret = compat_readv(f.file, vec, vlen, &pos);
if (ret >= 0)
f.file->f_pos = pos;
- fdput(f);
+ fdput_pos(f);
return ret;
}
@@ -1035,7 +1047,7 @@ COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
const struct compat_iovec __user *, vec,
compat_ulong_t, vlen)
{
- struct fd f = fdget(fd);
+ struct fd f = fdget_pos(fd);
ssize_t ret;
loff_t pos;
@@ -1045,7 +1057,7 @@ COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
ret = compat_writev(f.file, vec, vlen, &pos);
if (ret >= 0)
f.file->f_pos = pos;
- fdput(f);
+ fdput_pos(f);
return ret;
}
diff --git a/include/linux/file.h b/include/linux/file.h
index cbacf4faf447..4d69123377a2 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -28,33 +28,36 @@ static inline void fput_light(struct file *file, int fput_needed)
struct fd {
struct file *file;
- int need_put;
+ unsigned int flags;
};
+#define FDPUT_FPUT 1
+#define FDPUT_POS_UNLOCK 2
static inline void fdput(struct fd fd)
{
- if (fd.need_put)
+ if (fd.flags & FDPUT_FPUT)
fput(fd.file);
}
extern struct file *fget(unsigned int fd);
-extern struct file *fget_light(unsigned int fd, int *fput_needed);
+extern struct file *fget_raw(unsigned int fd);
+extern unsigned long __fdget(unsigned int fd);
+extern unsigned long __fdget_raw(unsigned int fd);
+extern unsigned long __fdget_pos(unsigned int fd);
-static inline struct fd fdget(unsigned int fd)
+static inline struct fd __to_fd(unsigned long v)
{
- int b;
- struct file *f = fget_light(fd, &b);
- return (struct fd){f,b};
+ return (struct fd){(struct file *)(v & ~3),v & 3};
}
-extern struct file *fget_raw(unsigned int fd);
-extern struct file *fget_raw_light(unsigned int fd, int *fput_needed);
+static inline struct fd fdget(unsigned int fd)
+{
+ return __to_fd(__fdget(fd));
+}
static inline struct fd fdget_raw(unsigned int fd)
{
- int b;
- struct file *f = fget_raw_light(fd, &b);
- return (struct fd){f,b};
+ return __to_fd(__fdget_raw(fd));
}
extern int f_dupfd(unsigned int from, struct file *file, unsigned flags);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 60829565e552..23b2a35d712e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -123,6 +123,9 @@ typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
/* File is opened with O_PATH; almost nothing can be done with it */
#define FMODE_PATH ((__force fmode_t)0x4000)
+/* File needs atomic accesses to f_pos */
+#define FMODE_ATOMIC_POS ((__force fmode_t)0x8000)
+
/* File was opened by fanotify and shouldn't generate fanotify events */
#define FMODE_NONOTIFY ((__force fmode_t)0x1000000)
@@ -780,13 +783,14 @@ struct file {
const struct file_operations *f_op;
/*
- * Protects f_ep_links, f_flags, f_pos vs i_size in lseek SEEK_CUR.
+ * Protects f_ep_links, f_flags.
* Must not be taken from IRQ context.
*/
spinlock_t f_lock;
atomic_long_t f_count;
unsigned int f_flags;
fmode_t f_mode;
+ struct mutex f_pos_lock;
loff_t f_pos;
struct fown_struct f_owner;
const struct cred *f_cred;
@@ -808,7 +812,7 @@ struct file {
#ifdef CONFIG_DEBUG_WRITECOUNT
unsigned long f_mnt_write_state;
#endif
-};
+} __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */
struct file_handle {
__u32 handle_bytes;
diff --git a/net/socket.c b/net/socket.c
index 879933aaed4c..fd8d86e06f95 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -450,16 +450,17 @@ EXPORT_SYMBOL(sockfd_lookup);
static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
{
- struct file *file;
+ struct fd f = fdget(fd);
struct socket *sock;
*err = -EBADF;
- file = fget_light(fd, fput_needed);
- if (file) {
- sock = sock_from_file(file, err);
- if (sock)
+ if (f.file) {
+ sock = sock_from_file(f.file, err);
+ if (likely(sock)) {
+ *fput_needed = f.flags;
return sock;
- fput_light(file, *fput_needed);
+ }
+ fdput(f);
}
return NULL;
}