summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/cifs/cifsglob.h1
-rw-r--r--fs/cifs/readdir.c63
-rw-r--r--fs/cifs/smb2file.c2
-rw-r--r--fs/drop_caches.c2
-rw-r--r--fs/inode.c7
-rw-r--r--fs/io-wq.c10
-rw-r--r--fs/io_uring.c690
-rw-r--r--fs/locks.c2
-rw-r--r--fs/notify/fsnotify.c4
-rw-r--r--fs/quota/dquot.c1
-rw-r--r--fs/super.c4
11 files changed, 430 insertions, 356 deletions
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index ce9bac756c2a..40705e862451 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1693,6 +1693,7 @@ struct cifs_fattr {
struct timespec64 cf_atime;
struct timespec64 cf_mtime;
struct timespec64 cf_ctime;
+ u32 cf_cifstag;
};
static inline void free_dfs_info_param(struct dfs_info3_param *param)
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 3925a7bfc74d..d17587c2c4ab 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -139,6 +139,28 @@ retry:
dput(dentry);
}
+static bool reparse_file_needs_reval(const struct cifs_fattr *fattr)
+{
+ if (!(fattr->cf_cifsattrs & ATTR_REPARSE))
+ return false;
+ /*
+ * The DFS tags should be only intepreted by server side as per
+ * MS-FSCC 2.1.2.1, but let's include them anyway.
+ *
+ * Besides, if cf_cifstag is unset (0), then we still need it to be
+ * revalidated to know exactly what reparse point it is.
+ */
+ switch (fattr->cf_cifstag) {
+ case IO_REPARSE_TAG_DFS:
+ case IO_REPARSE_TAG_DFSR:
+ case IO_REPARSE_TAG_SYMLINK:
+ case IO_REPARSE_TAG_NFS:
+ case 0:
+ return true;
+ }
+ return false;
+}
+
static void
cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb)
{
@@ -158,7 +180,7 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb)
* is a symbolic link, DFS referral or a reparse point with a direct
* access like junctions, deduplicated files, NFS symlinks.
*/
- if (fattr->cf_cifsattrs & ATTR_REPARSE)
+ if (reparse_file_needs_reval(fattr))
fattr->cf_flags |= CIFS_FATTR_NEED_REVAL;
/* non-unix readdir doesn't provide nlink */
@@ -194,19 +216,37 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb)
}
}
+static void __dir_info_to_fattr(struct cifs_fattr *fattr, const void *info)
+{
+ const FILE_DIRECTORY_INFO *fi = info;
+
+ memset(fattr, 0, sizeof(*fattr));
+ fattr->cf_cifsattrs = le32_to_cpu(fi->ExtFileAttributes);
+ fattr->cf_eof = le64_to_cpu(fi->EndOfFile);
+ fattr->cf_bytes = le64_to_cpu(fi->AllocationSize);
+ fattr->cf_createtime = le64_to_cpu(fi->CreationTime);
+ fattr->cf_atime = cifs_NTtimeToUnix(fi->LastAccessTime);
+ fattr->cf_ctime = cifs_NTtimeToUnix(fi->ChangeTime);
+ fattr->cf_mtime = cifs_NTtimeToUnix(fi->LastWriteTime);
+}
+
void
cifs_dir_info_to_fattr(struct cifs_fattr *fattr, FILE_DIRECTORY_INFO *info,
struct cifs_sb_info *cifs_sb)
{
- memset(fattr, 0, sizeof(*fattr));
- fattr->cf_cifsattrs = le32_to_cpu(info->ExtFileAttributes);
- fattr->cf_eof = le64_to_cpu(info->EndOfFile);
- fattr->cf_bytes = le64_to_cpu(info->AllocationSize);
- fattr->cf_createtime = le64_to_cpu(info->CreationTime);
- fattr->cf_atime = cifs_NTtimeToUnix(info->LastAccessTime);
- fattr->cf_ctime = cifs_NTtimeToUnix(info->ChangeTime);
- fattr->cf_mtime = cifs_NTtimeToUnix(info->LastWriteTime);
+ __dir_info_to_fattr(fattr, info);
+ cifs_fill_common_info(fattr, cifs_sb);
+}
+static void cifs_fulldir_info_to_fattr(struct cifs_fattr *fattr,
+ SEARCH_ID_FULL_DIR_INFO *info,
+ struct cifs_sb_info *cifs_sb)
+{
+ __dir_info_to_fattr(fattr, info);
+
+ /* See MS-FSCC 2.4.18 FileIdFullDirectoryInformation */
+ if (fattr->cf_cifsattrs & ATTR_REPARSE)
+ fattr->cf_cifstag = le32_to_cpu(info->EaSize);
cifs_fill_common_info(fattr, cifs_sb);
}
@@ -755,6 +795,11 @@ static int cifs_filldir(char *find_entry, struct file *file,
(FIND_FILE_STANDARD_INFO *)find_entry,
cifs_sb);
break;
+ case SMB_FIND_FILE_ID_FULL_DIR_INFO:
+ cifs_fulldir_info_to_fattr(&fattr,
+ (SEARCH_ID_FULL_DIR_INFO *)find_entry,
+ cifs_sb);
+ break;
default:
cifs_dir_info_to_fattr(&fattr,
(FILE_DIRECTORY_INFO *)find_entry,
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index 8b0b512c5792..afe1f03aabe3 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -67,7 +67,7 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms,
goto out;
- if (oparms->tcon->use_resilient) {
+ if (oparms->tcon->use_resilient) {
/* default timeout is 0, servers pick default (120 seconds) */
nr_ioctl_req.Timeout =
cpu_to_le32(oparms->tcon->handle_timeout);
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index d31b6c72b476..dc1a1d5d825b 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -35,11 +35,11 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
spin_unlock(&inode->i_lock);
spin_unlock(&sb->s_inode_list_lock);
- cond_resched();
invalidate_mapping_pages(inode->i_mapping, 0, -1);
iput(toput_inode);
toput_inode = inode;
+ cond_resched();
spin_lock(&sb->s_inode_list_lock);
}
spin_unlock(&sb->s_inode_list_lock);
diff --git a/fs/inode.c b/fs/inode.c
index fef457a42882..96d62d97694e 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -676,6 +676,7 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty)
struct inode *inode, *next;
LIST_HEAD(dispose);
+again:
spin_lock(&sb->s_inode_list_lock);
list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
spin_lock(&inode->i_lock);
@@ -698,6 +699,12 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty)
inode_lru_list_del(inode);
spin_unlock(&inode->i_lock);
list_add(&inode->i_lru, &dispose);
+ if (need_resched()) {
+ spin_unlock(&sb->s_inode_list_lock);
+ cond_resched();
+ dispose_list(&dispose);
+ goto again;
+ }
}
spin_unlock(&sb->s_inode_list_lock);
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 11e80b7252a8..541c8a3e0bbb 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -92,7 +92,6 @@ struct io_wqe {
struct io_wqe_acct acct[2];
struct hlist_nulls_head free_list;
- struct hlist_nulls_head busy_list;
struct list_head all_list;
struct io_wq *wq;
@@ -327,7 +326,6 @@ static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker,
if (worker->flags & IO_WORKER_F_FREE) {
worker->flags &= ~IO_WORKER_F_FREE;
hlist_nulls_del_init_rcu(&worker->nulls_node);
- hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->busy_list);
}
/*
@@ -365,7 +363,6 @@ static bool __io_worker_idle(struct io_wqe *wqe, struct io_worker *worker)
{
if (!(worker->flags & IO_WORKER_F_FREE)) {
worker->flags |= IO_WORKER_F_FREE;
- hlist_nulls_del_init_rcu(&worker->nulls_node);
hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
}
@@ -432,6 +429,8 @@ next:
if (signal_pending(current))
flush_signals(current);
+ cond_resched();
+
spin_lock_irq(&worker->lock);
worker->cur_work = work;
spin_unlock_irq(&worker->lock);
@@ -798,10 +797,6 @@ void io_wq_cancel_all(struct io_wq *wq)
set_bit(IO_WQ_BIT_CANCEL, &wq->state);
- /*
- * Browse both lists, as there's a gap between handing work off
- * to a worker and the worker putting itself on the busy_list
- */
rcu_read_lock();
for_each_node(node) {
struct io_wqe *wqe = wq->wqes[node];
@@ -1049,7 +1044,6 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
spin_lock_init(&wqe->lock);
INIT_WQ_LIST(&wqe->work_list);
INIT_HLIST_NULLS_HEAD(&wqe->free_list, 0);
- INIT_HLIST_NULLS_HEAD(&wqe->busy_list, 1);
INIT_LIST_HEAD(&wqe->all_list);
}
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 6f084e3cf835..562e3a1a1bf9 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -330,6 +330,26 @@ struct io_timeout {
struct file *file;
u64 addr;
int flags;
+ unsigned count;
+};
+
+struct io_rw {
+ /* NOTE: kiocb has the file as the first member, so don't do it here */
+ struct kiocb kiocb;
+ u64 addr;
+ u64 len;
+};
+
+struct io_connect {
+ struct file *file;
+ struct sockaddr __user *addr;
+ int addr_len;
+};
+
+struct io_sr_msg {
+ struct file *file;
+ struct user_msghdr __user *msg;
+ int msg_flags;
};
struct io_async_connect {
@@ -351,7 +371,6 @@ struct io_async_rw {
};
struct io_async_ctx {
- struct io_uring_sqe sqe;
union {
struct io_async_rw rw;
struct io_async_msghdr msg;
@@ -369,15 +388,16 @@ struct io_async_ctx {
struct io_kiocb {
union {
struct file *file;
- struct kiocb rw;
+ struct io_rw rw;
struct io_poll_iocb poll;
struct io_accept accept;
struct io_sync sync;
struct io_cancel cancel;
struct io_timeout timeout;
+ struct io_connect connect;
+ struct io_sr_msg sr_msg;
};
- const struct io_uring_sqe *sqe;
struct io_async_ctx *io;
struct file *ring_file;
int ring_fd;
@@ -411,7 +431,6 @@ struct io_kiocb {
#define REQ_F_INFLIGHT 16384 /* on inflight list */
#define REQ_F_COMP_LOCKED 32768 /* completion under lock */
#define REQ_F_HARDLINK 65536 /* doesn't sever on completion < 0 */
-#define REQ_F_PREPPED 131072 /* request already opcode prepared */
u64 user_data;
u32 result;
u32 sequence;
@@ -609,33 +628,31 @@ static inline bool io_prep_async_work(struct io_kiocb *req,
{
bool do_hashed = false;
- if (req->sqe) {
- switch (req->opcode) {
- case IORING_OP_WRITEV:
- case IORING_OP_WRITE_FIXED:
- /* only regular files should be hashed for writes */
- if (req->flags & REQ_F_ISREG)
- do_hashed = true;
- /* fall-through */
- case IORING_OP_READV:
- case IORING_OP_READ_FIXED:
- case IORING_OP_SENDMSG:
- case IORING_OP_RECVMSG:
- case IORING_OP_ACCEPT:
- case IORING_OP_POLL_ADD:
- case IORING_OP_CONNECT:
- /*
- * We know REQ_F_ISREG is not set on some of these
- * opcodes, but this enables us to keep the check in
- * just one place.
- */
- if (!(req->flags & REQ_F_ISREG))
- req->work.flags |= IO_WQ_WORK_UNBOUND;
- break;
- }
- if (io_req_needs_user(req))
- req->work.flags |= IO_WQ_WORK_NEEDS_USER;
+ switch (req->opcode) {
+ case IORING_OP_WRITEV:
+ case IORING_OP_WRITE_FIXED:
+ /* only regular files should be hashed for writes */
+ if (req->flags & REQ_F_ISREG)
+ do_hashed = true;
+ /* fall-through */
+ case IORING_OP_READV:
+ case IORING_OP_READ_FIXED:
+ case IORING_OP_SENDMSG:
+ case IORING_OP_RECVMSG:
+ case IORING_OP_ACCEPT:
+ case IORING_OP_POLL_ADD:
+ case IORING_OP_CONNECT:
+ /*
+ * We know REQ_F_ISREG is not set on some of these
+ * opcodes, but this enables us to keep the check in
+ * just one place.
+ */
+ if (!(req->flags & REQ_F_ISREG))
+ req->work.flags |= IO_WQ_WORK_UNBOUND;
+ break;
}
+ if (io_req_needs_user(req))
+ req->work.flags |= IO_WQ_WORK_NEEDS_USER;
*link = io_prep_linked_timeout(req);
return do_hashed;
@@ -1180,7 +1197,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
ret = 0;
list_for_each_entry_safe(req, tmp, &ctx->poll_list, list) {
- struct kiocb *kiocb = &req->rw;
+ struct kiocb *kiocb = &req->rw.kiocb;
/*
* Move completed entries to our local list. If we find a
@@ -1335,7 +1352,7 @@ static inline void req_set_fail_links(struct io_kiocb *req)
static void io_complete_rw_common(struct kiocb *kiocb, long res)
{
- struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);
+ struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
if (kiocb->ki_flags & IOCB_WRITE)
kiocb_end_write(req);
@@ -1347,7 +1364,7 @@ static void io_complete_rw_common(struct kiocb *kiocb, long res)
static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
{
- struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);
+ struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
io_complete_rw_common(kiocb, res);
io_put_req(req);
@@ -1355,7 +1372,7 @@ static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
static struct io_kiocb *__io_complete_rw(struct kiocb *kiocb, long res)
{
- struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);
+ struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
struct io_kiocb *nxt = NULL;
io_complete_rw_common(kiocb, res);
@@ -1366,7 +1383,7 @@ static struct io_kiocb *__io_complete_rw(struct kiocb *kiocb, long res)
static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
{
- struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);
+ struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
if (kiocb->ki_flags & IOCB_WRITE)
kiocb_end_write(req);
@@ -1400,7 +1417,7 @@ static void io_iopoll_req_issued(struct io_kiocb *req)
list_req = list_first_entry(&ctx->poll_list, struct io_kiocb,
list);
- if (list_req->rw.ki_filp != req->rw.ki_filp)
+ if (list_req->file != req->file)
ctx->poll_multi_file = true;
}
@@ -1471,11 +1488,11 @@ static bool io_file_supports_async(struct file *file)
return false;
}
-static int io_prep_rw(struct io_kiocb *req, bool force_nonblock)
+static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
+ bool force_nonblock)
{
- const struct io_uring_sqe *sqe = req->sqe;
struct io_ring_ctx *ctx = req->ctx;
- struct kiocb *kiocb = &req->rw;
+ struct kiocb *kiocb = &req->rw.kiocb;
unsigned ioprio;
int ret;
@@ -1524,6 +1541,12 @@ static int io_prep_rw(struct io_kiocb *req, bool force_nonblock)
return -EINVAL;
kiocb->ki_complete = io_complete_rw;
}
+
+ req->rw.addr = READ_ONCE(sqe->addr);
+ req->rw.len = READ_ONCE(sqe->len);
+ /* we own ->private, reuse it for the buffer index */
+ req->rw.kiocb.private = (void *) (unsigned long)
+ READ_ONCE(sqe->buf_index);
return 0;
}
@@ -1557,11 +1580,11 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret, struct io_kiocb **nxt,
io_rw_done(kiocb, ret);
}
-static ssize_t io_import_fixed(struct io_ring_ctx *ctx, int rw,
- const struct io_uring_sqe *sqe,
+static ssize_t io_import_fixed(struct io_kiocb *req, int rw,
struct iov_iter *iter)
{
- size_t len = READ_ONCE(sqe->len);
+ struct io_ring_ctx *ctx = req->ctx;
+ size_t len = req->rw.len;
struct io_mapped_ubuf *imu;
unsigned index, buf_index;
size_t offset;
@@ -1571,13 +1594,13 @@ static ssize_t io_import_fixed(struct io_ring_ctx *ctx, int rw,
if (unlikely(!ctx->user_bufs))
return -EFAULT;
- buf_index = READ_ONCE(sqe->buf_index);
+ buf_index = (unsigned long) req->rw.kiocb.private;
if (unlikely(buf_index >= ctx->nr_user_bufs))
return -EFAULT;
index = array_index_nospec(buf_index, ctx->nr_user_bufs);
imu = &ctx->user_bufs[index];
- buf_addr = READ_ONCE(sqe->addr);
+ buf_addr = req->rw.addr;
/* overflow */
if (buf_addr + len < buf_addr)
@@ -1634,25 +1657,20 @@ static ssize_t io_import_fixed(struct io_ring_ctx *ctx, int rw,
static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
struct iovec **iovec, struct iov_iter *iter)
{
- const struct io_uring_sqe *sqe = req->sqe;
- void __user *buf = u64_to_user_ptr(READ_ONCE(sqe->addr));
- size_t sqe_len = READ_ONCE(sqe->len);
+ void __user *buf = u64_to_user_ptr(req->rw.addr);
+ size_t sqe_len = req->rw.len;
u8 opcode;
- /*
- * We're reading ->opcode for the second time, but the first read
- * doesn't care whether it's _FIXED or not, so it doesn't matter
- * whether ->opcode changes concurrently. The first read does care
- * about whether it is a READ or a WRITE, so we don't trust this read
- * for that purpose and instead let the caller pass in the read/write
- * flag.
- */
opcode = req->opcode;
if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) {
*iovec = NULL;
- return io_import_fixed(req->ctx, rw, sqe, iter);
+ return io_import_fixed(req, rw, iter);
}
+ /* buffer index only valid with fixed read/write */
+ if (req->rw.kiocb.private)
+ return -EINVAL;
+
if (req->io) {
struct io_async_rw *iorw = &req->io->rw;
@@ -1750,13 +1768,7 @@ static void io_req_map_rw(struct io_kiocb *req, ssize_t io_size,
static int io_alloc_async_ctx(struct io_kiocb *req)
{
req->io = kmalloc(sizeof(*req->io), GFP_KERNEL);
- if (req->io) {
- memcpy(&req->io->sqe, req->sqe, sizeof(req->io->sqe));
- req->sqe = &req->io->sqe;
- return 0;
- }
-
- return 1;
+ return req->io == NULL;
}
static void io_rw_async(struct io_wq_work **workptr)
@@ -1782,46 +1794,52 @@ static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size,
return 0;
}
-static int io_read_prep(struct io_kiocb *req, struct iovec **iovec,
- struct iov_iter *iter, bool force_nonblock)
+static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
+ bool force_nonblock)
{
+ struct io_async_ctx *io;
+ struct iov_iter iter;
ssize_t ret;
- ret = io_prep_rw(req, force_nonblock);
+ ret = io_prep_rw(req, sqe, force_nonblock);
if (ret)
return ret;
if (unlikely(!(req->file->f_mode & FMODE_READ)))
return -EBADF;
- return io_import_iovec(READ, req, iovec, iter);
+ if (!req->io)
+ return 0;
+
+ io = req->io;
+ io->rw.iov = io->rw.fast_iov;
+ req->io = NULL;
+ ret = io_import_iovec(READ, req, &io->rw.iov, &iter);
+ req->io = io;
+ if (ret < 0)
+ return ret;
+
+ io_req_map_rw(req, ret, io->rw.iov, io->rw.fast_iov, &iter);
+ return 0;
}
static int io_read(struct io_kiocb *req, struct io_kiocb **nxt,
bool force_nonblock)
{
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
- struct kiocb *kiocb = &req->rw;
+ struct kiocb *kiocb = &req->rw.kiocb;
struct iov_iter iter;
- struct file *file;
size_t iov_count;
ssize_t io_size, ret;
- if (!req->io) {
- ret = io_read_prep(req, &iovec, &iter, force_nonblock);
- if (ret < 0)
- return ret;
- } else {
- ret = io_import_iovec(READ, req, &iovec, &iter);
- if (ret < 0)
- return ret;
- }
+ ret = io_import_iovec(READ, req, &iovec, &iter);
+ if (ret < 0)
+ return ret;
/* Ensure we clear previously set non-block flag */
if (!force_nonblock)
- req->rw.ki_flags &= ~IOCB_NOWAIT;
+ req->rw.kiocb.ki_flags &= ~IOCB_NOWAIT;
- file = req->file;
io_size = ret;
if (req->flags & REQ_F_LINK)
req->result = io_size;
@@ -1830,20 +1848,20 @@ static int io_read(struct io_kiocb *req, struct io_kiocb **nxt,
* If the file doesn't support async, mark it as REQ_F_MUST_PUNT so
* we know to async punt it even if it was opened O_NONBLOCK
*/
- if (force_nonblock && !io_file_supports_async(file)) {
+ if (force_nonblock && !io_file_supports_async(req->file)) {
req->flags |= REQ_F_MUST_PUNT;
goto copy_iov;
}
iov_count = iov_iter_count(&iter);
- ret = rw_verify_area(READ, file, &kiocb->ki_pos, iov_count);
+ ret = rw_verify_area(READ, req->file, &kiocb->ki_pos, iov_count);
if (!ret) {
ssize_t ret2;
- if (file->f_op->read_iter)
- ret2 = call_read_iter(file, kiocb, &iter);
+ if (req->file->f_op->read_iter)
+ ret2 = call_read_iter(req->file, kiocb, &iter);
else
- ret2 = loop_rw_iter(READ, file, kiocb, &iter);
+ ret2 = loop_rw_iter(READ, req->file, kiocb, &iter);
/*
* In case of a short read, punt to async. This can happen
@@ -1875,46 +1893,52 @@ out_free:
return ret;
}
-static int io_write_prep(struct io_kiocb *req, struct iovec **iovec,
- struct iov_iter *iter, bool force_nonblock)
+static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
+ bool force_nonblock)
{
+ struct io_async_ctx *io;
+ struct iov_iter iter;
ssize_t ret;
- ret = io_prep_rw(req, force_nonblock);
+ ret = io_prep_rw(req, sqe, force_nonblock);
if (ret)
return ret;
if (unlikely(!(req->file->f_mode & FMODE_WRITE)))
return -EBADF;
- return io_import_iovec(WRITE, req, iovec, iter);
+ if (!req->io)
+ return 0;
+
+ io = req->io;
+ io->rw.iov = io->rw.fast_iov;
+ req->io = NULL;
+ ret = io_import_iovec(WRITE, req, &io->rw.iov, &iter);
+ req->io = io;
+ if (ret < 0)
+ return ret;
+
+ io_req_map_rw(req, ret, io->rw.iov, io->rw.fast_iov, &iter);
+ return 0;
}
static int io_write(struct io_kiocb *req, struct io_kiocb **nxt,
bool force_nonblock)
{
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
- struct kiocb *kiocb = &req->rw;
+ struct kiocb *kiocb = &req->rw.kiocb;
struct iov_iter iter;
- struct file *file;
size_t iov_count;
ssize_t ret, io_size;
- if (!req->io) {
- ret = io_write_prep(req, &iovec, &iter, force_nonblock);
- if (ret < 0)
- return ret;
- } else {
- ret = io_import_iovec(WRITE, req, &iovec, &iter);
- if (ret < 0)
- return ret;
- }
+ ret = io_import_iovec(WRITE, req, &iovec, &iter);
+ if (ret < 0)
+ return ret;
/* Ensure we clear previously set non-block flag */
if (!force_nonblock)
- req->rw.ki_flags &= ~IOCB_NOWAIT;
+ req->rw.kiocb.ki_flags &= ~IOCB_NOWAIT;
- file = kiocb->ki_filp;
io_size = ret;
if (req->flags & REQ_F_LINK)
req->result = io_size;
@@ -1934,7 +1958,7 @@ static int io_write(struct io_kiocb *req, struct io_kiocb **nxt,
goto copy_iov;
iov_count = iov_iter_count(&iter);
- ret = rw_verify_area(WRITE, file, &kiocb->ki_pos, iov_count);
+ ret = rw_verify_area(WRITE, req->file, &kiocb->ki_pos, iov_count);
if (!ret) {
ssize_t ret2;
@@ -1946,17 +1970,17 @@ static int io_write(struct io_kiocb *req, struct io_kiocb **nxt,
* we return to userspace.
*/
if (req->flags & REQ_F_ISREG) {
- __sb_start_write(file_inode(file)->i_sb,
+ __sb_start_write(file_inode(req->file)->i_sb,
SB_FREEZE_WRITE, true);
- __sb_writers_release(file_inode(file)->i_sb,
+ __sb_writers_release(file_inode(req->file)->i_sb,
SB_FREEZE_WRITE);
}
kiocb->ki_flags |= IOCB_WRITE;
- if (file->f_op->write_iter)
- ret2 = call_write_iter(file, kiocb, &iter);
+ if (req->file->f_op->write_iter)
+ ret2 = call_write_iter(req->file, kiocb, &iter);
else
- ret2 = loop_rw_iter(WRITE, file, kiocb, &iter);
+ ret2 = loop_rw_iter(WRITE, req->file, kiocb, &iter);
if (!force_nonblock || ret2 != -EAGAIN) {
kiocb_done(kiocb, ret2, nxt, req->in_async);
} else {
@@ -1989,13 +2013,10 @@ static int io_nop(struct io_kiocb *req)
return 0;
}
-static int io_prep_fsync(struct io_kiocb *req)
+static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- const struct io_uring_sqe *sqe = req->sqe;
struct io_ring_ctx *ctx = req->ctx;
- if (req->flags & REQ_F_PREPPED)
- return 0;
if (!req->file)
return -EBADF;
@@ -2010,7 +2031,6 @@ static int io_prep_fsync(struct io_kiocb *req)
req->sync.off = READ_ONCE(sqe->off);
req->sync.len = READ_ONCE(sqe->len);
- req->flags |= REQ_F_PREPPED;
return 0;
}
@@ -2036,7 +2056,7 @@ static void io_fsync_finish(struct io_wq_work **workptr)
if (io_req_cancelled(req))
return;
- ret = vfs_fsync_range(req->rw.ki_filp, req->sync.off,
+ ret = vfs_fsync_range(req->file, req->sync.off,
end > 0 ? end : LLONG_MAX,
req->sync.flags & IORING_FSYNC_DATASYNC);
if (ret < 0)
@@ -2051,11 +2071,6 @@ static int io_fsync(struct io_kiocb *req, struct io_kiocb **nxt,
bool force_nonblock)
{
struct io_wq_work *work, *old_work;
- int ret;
-
- ret = io_prep_fsync(req);
- if (ret)
- return ret;
/* fsync always requires a blocking context */
if (force_nonblock) {
@@ -2071,13 +2086,10 @@ static int io_fsync(struct io_kiocb *req, struct io_kiocb **nxt,
return 0;
}
-static int io_prep_sfr(struct io_kiocb *req)
+static int io_prep_sfr(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- const struct io_uring_sqe *sqe = req->sqe;
struct io_ring_ctx *ctx = req->ctx;
- if (req->flags & REQ_F_PREPPED)
- return 0;
if (!req->file)
return -EBADF;
@@ -2089,7 +2101,6 @@ static int io_prep_sfr(struct io_kiocb *req)
req->sync.off = READ_ONCE(sqe->off);
req->sync.len = READ_ONCE(sqe->len);
req->sync.flags = READ_ONCE(sqe->sync_range_flags);
- req->flags |= REQ_F_PREPPED;
return 0;
}
@@ -2102,7 +2113,7 @@ static void io_sync_file_range_finish(struct io_wq_work **workptr)
if (io_req_cancelled(req))
return;
- ret = sync_file_range(req->rw.ki_filp, req->sync.off, req->sync.len,
+ ret = sync_file_range(req->file, req->sync.off, req->sync.len,
req->sync.flags);
if (ret < 0)
req_set_fail_links(req);
@@ -2116,11 +2127,6 @@ static int io_sync_file_range(struct io_kiocb *req, struct io_kiocb **nxt,
bool force_nonblock)
{
struct io_wq_work *work, *old_work;
- int ret;
-
- ret = io_prep_sfr(req);
- if (ret)
- return ret;
/* sync_file_range always requires a blocking context */
if (force_nonblock) {
@@ -2149,19 +2155,23 @@ static void io_sendrecv_async(struct io_wq_work **workptr)
}
#endif
-static int io_sendmsg_prep(struct io_kiocb *req, struct io_async_ctx *io)
+static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
#if defined(CONFIG_NET)
- const struct io_uring_sqe *sqe = req->sqe;
- struct user_msghdr __user *msg;
- unsigned flags;
+ struct io_sr_msg *sr = &req->sr_msg;
+ struct io_async_ctx *io = req->io;
+
+ sr->msg_flags = READ_ONCE(sqe->msg_flags);
+ sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr));
+
+ if (!io)
+ return 0;
- flags = READ_ONCE(sqe->msg_flags);
- msg = (struct user_msghdr __user *)(unsigned long) READ_ONCE(sqe->addr);
io->msg.iov = io->msg.fast_iov;
- return sendmsg_copy_msghdr(&io->msg.msg, msg, flags, &io->msg.iov);
+ return sendmsg_copy_msghdr(&io->msg.msg, sr->msg, sr->msg_flags,
+ &io->msg.iov);
#else
- return 0;
+ return -EOPNOTSUPP;
#endif
}
@@ -2169,7 +2179,6 @@ static int io_sendmsg(struct io_kiocb *req, struct io_kiocb **nxt,
bool force_nonblock)
{
#if defined(CONFIG_NET)
- const struct io_uring_sqe *sqe = req->sqe;
struct io_async_msghdr *kmsg = NULL;
struct socket *sock;
int ret;
@@ -2183,12 +2192,6 @@ static int io_sendmsg(struct io_kiocb *req, struct io_kiocb **nxt,
struct sockaddr_storage addr;
unsigned flags;
- flags = READ_ONCE(sqe->msg_flags);
- if (flags & MSG_DONTWAIT)
- req->flags |= REQ_F_NOWAIT;
- else if (force_nonblock)
- flags |= MSG_DONTWAIT;
-
if (req->io) {
kmsg = &req->io->msg;
kmsg->msg.msg_name = &addr;
@@ -2197,13 +2200,24 @@ static int io_sendmsg(struct io_kiocb *req, struct io_kiocb **nxt,
kmsg->iov = kmsg->fast_iov;
kmsg->msg.msg_iter.iov = kmsg->iov;
} else {
+ struct io_sr_msg *sr = &req->sr_msg;
+
kmsg = &io.msg;
kmsg->msg.msg_name = &addr;
- ret = io_sendmsg_prep(req, &io);
+
+ io.msg.iov = io.msg.fast_iov;
+ ret = sendmsg_copy_msghdr(&io.msg.msg, sr->msg,
+ sr->msg_flags, &io.msg.iov);
if (ret)
- goto out;
+ return ret;
}
+ flags = req->sr_msg.msg_flags;
+ if (flags & MSG_DONTWAIT)
+ req->flags |= REQ_F_NOWAIT;
+ else if (force_nonblock)
+ flags |= MSG_DONTWAIT;
+
ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
if (force_nonblock && ret == -EAGAIN) {
if (req->io)
@@ -2218,7 +2232,6 @@ static int io_sendmsg(struct io_kiocb *req, struct io_kiocb **nxt,
ret = -EINTR;
}
-out:
if (!io_wq_current_is_worker() && kmsg && kmsg->iov != kmsg->fast_iov)
kfree(kmsg->iov);
io_cqring_add_event(req, ret);
@@ -2231,20 +2244,24 @@ out:
#endif
}
-static int io_recvmsg_prep(struct io_kiocb *req, struct io_async_ctx *io)
+static int io_recvmsg_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
{
#if defined(CONFIG_NET)
- const struct io_uring_sqe *sqe = req->sqe;
- struct user_msghdr __user *msg;
- unsigned flags;
+ struct io_sr_msg *sr = &req->sr_msg;
+ struct io_async_ctx *io = req->io;
+
+ sr->msg_flags = READ_ONCE(sqe->msg_flags);
+ sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr));
+
+ if (!io)
+ return 0;
- flags = READ_ONCE(sqe->msg_flags);
- msg = (struct user_msghdr __user *)(unsigned long) READ_ONCE(sqe->addr);
io->msg.iov = io->msg.fast_iov;
- return recvmsg_copy_msghdr(&io->msg.msg, msg, flags, &io->msg.uaddr,
- &io->msg.iov);
+ return recvmsg_copy_msghdr(&io->msg.msg, sr->msg, sr->msg_flags,
+ &io->msg.uaddr, &io->msg.iov);
#else
- return 0;
+ return -EOPNOTSUPP;
#endif
}
@@ -2252,7 +2269,6 @@ static int io_recvmsg(struct io_kiocb *req, struct io_kiocb **nxt,
bool force_nonblock)
{
#if defined(CONFIG_NET)
- const struct io_uring_sqe *sqe = req->sqe;
struct io_async_msghdr *kmsg = NULL;
struct socket *sock;
int ret;
@@ -2262,19 +2278,10 @@ static int io_recvmsg(struct io_kiocb *req, struct io_kiocb **nxt,
sock = sock_from_file(req->file, &ret);
if (sock) {
- struct user_msghdr __user *msg;
struct io_async_ctx io;
struct sockaddr_storage addr;
unsigned flags;
- flags = READ_ONCE(sqe->msg_flags);
- if (flags & MSG_DONTWAIT)
- req->flags |= REQ_F_NOWAIT;
- else if (force_nonblock)
- flags |= MSG_DONTWAIT;
-
- msg = (struct user_msghdr __user *) (unsigned long)
- READ_ONCE(sqe->addr);
if (req->io) {
kmsg = &req->io->msg;
kmsg->msg.msg_name = &addr;
@@ -2283,14 +2290,27 @@ static int io_recvmsg(struct io_kiocb *req, struct io_kiocb **nxt,
kmsg->iov = kmsg->fast_iov;
kmsg->msg.msg_iter.iov = kmsg->iov;
} else {
+ struct io_sr_msg *sr = &req->sr_msg;
+
kmsg = &io.msg;
kmsg->msg.msg_name = &addr;
- ret = io_recvmsg_prep(req, &io);
+
+ io.msg.iov = io.msg.fast_iov;
+ ret = recvmsg_copy_msghdr(&io.msg.msg, sr->msg,
+ sr->msg_flags, &io.msg.uaddr,
+ &io.msg.iov);
if (ret)
- goto out;
+ return ret;
}
- ret = __sys_recvmsg_sock(sock, &kmsg->msg, msg, kmsg->uaddr, flags);
+ flags = req->sr_msg.msg_flags;
+ if (flags & MSG_DONTWAIT)
+ req->flags |= REQ_F_NOWAIT;
+ else if (force_nonblock)
+ flags |= MSG_DONTWAIT;
+
+ ret = __sys_recvmsg_sock(sock, &kmsg->msg, req->sr_msg.msg,
+ kmsg->uaddr, flags);
if (force_nonblock && ret == -EAGAIN) {
if (req->io)
return -EAGAIN;
@@ -2304,7 +2324,6 @@ static int io_recvmsg(struct io_kiocb *req, struct io_kiocb **nxt,
ret = -EINTR;
}
-out:
if (!io_wq_current_is_worker() && kmsg && kmsg->iov != kmsg->fast_iov)
kfree(kmsg->iov);
io_cqring_add_event(req, ret);
@@ -2317,25 +2336,19 @@ out:
#endif
}
-static int io_accept_prep(struct io_kiocb *req)
+static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
#if defined(CONFIG_NET)
- const struct io_uring_sqe *sqe = req->sqe;
struct io_accept *accept = &req->accept;
- if (req->flags & REQ_F_PREPPED)
- return 0;
-
if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
return -EINVAL;
if (sqe->ioprio || sqe->len || sqe->buf_index)
return -EINVAL;
- accept->addr = (struct sockaddr __user *)
- (unsigned long) READ_ONCE(sqe->addr);
- accept->addr_len = (int __user *) (unsigned long) READ_ONCE(sqe->addr2);
+ accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
+ accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2));
accept->flags = READ_ONCE(sqe->accept_flags);
- req->flags |= REQ_F_PREPPED;
return 0;
#else
return -EOPNOTSUPP;
@@ -2383,10 +2396,6 @@ static int io_accept(struct io_kiocb *req, struct io_kiocb **nxt,
#if defined(CONFIG_NET)
int ret;
- ret = io_accept_prep(req);
- if (ret)
- return ret;
-
ret = __io_accept(req, nxt, force_nonblock);
if (ret == -EAGAIN && force_nonblock) {
req->work.func = io_accept_finish;
@@ -2400,18 +2409,27 @@ static int io_accept(struct io_kiocb *req, struct io_kiocb **nxt,
#endif
}
-static int io_connect_prep(struct io_kiocb *req, struct io_async_ctx *io)
+static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
#if defined(CONFIG_NET)
- const struct io_uring_sqe *sqe = req->sqe;
- struct sockaddr __user *addr;
- int addr_len;
+ struct io_connect *conn = &req->connect;
+ struct io_async_ctx *io = req->io;
+
+ if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
+ return -EINVAL;
+ if (sqe->ioprio || sqe->len || sqe->buf_index || sqe->rw_flags)
+ return -EINVAL;
- addr = (struct sockaddr __user *) (unsigned long) READ_ONCE(sqe->addr);
- addr_len = READ_ONCE(sqe->addr2);
- return move_addr_to_kernel(addr, addr_len, &io->connect.address);
+ conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
+ conn->addr_len = READ_ONCE(sqe->addr2);
+
+ if (!io)
+ return 0;
+
+ return move_addr_to_kernel(conn->addr, conn->addr_len,
+ &io->connect.address);
#else
- return 0;
+ return -EOPNOTSUPP;
#endif
}
@@ -2419,30 +2437,25 @@ static int io_connect(struct io_kiocb *req, struct io_kiocb **nxt,
bool force_nonblock)
{
#if defined(CONFIG_NET)
- const struct io_uring_sqe *sqe = req->sqe;
struct io_async_ctx __io, *io;
unsigned file_flags;
- int addr_len, ret;
-
- if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
- return -EINVAL;
- if (sqe->ioprio || sqe->len || sqe->buf_index || sqe->rw_flags)
- return -EINVAL;
-
- addr_len = READ_ONCE(sqe->addr2);
- file_flags = force_nonblock ? O_NONBLOCK : 0;
+ int ret;
if (req->io) {
io = req->io;
} else {
- ret = io_connect_prep(req, &__io);
+ ret = move_addr_to_kernel(req->connect.addr,
+ req->connect.addr_len,
+ &__io.connect.address);
if (ret)
goto out;
io = &__io;
}
- ret = __sys_connect_file(req->file, &io->connect.address, addr_len,
- file_flags);
+ file_flags = force_nonblock ? O_NONBLOCK : 0;
+
+ ret = __sys_connect_file(req->file, &io->connect.address,
+ req->connect.addr_len, file_flags);
if ((ret == -EAGAIN || ret == -EINPROGRESS) && force_nonblock) {
if (req->io)
return -EAGAIN;
@@ -2513,12 +2526,9 @@ static int io_poll_cancel(struct io_ring_ctx *ctx, __u64 sqe_addr)
return -ENOENT;
}
-static int io_poll_remove_prep(struct io_kiocb *req)
+static int io_poll_remove_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
{
- const struct io_uring_sqe *sqe = req->sqe;
-
- if (req->flags & REQ_F_PREPPED)
- return 0;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
if (sqe->ioprio || sqe->off || sqe->len || sqe->buf_index ||
@@ -2526,7 +2536,6 @@ static int io_poll_remove_prep(struct io_kiocb *req)
return -EINVAL;
req->poll.addr = READ_ONCE(sqe->addr);
- req->flags |= REQ_F_PREPPED;
return 0;
}
@@ -2540,10 +2549,6 @@ static int io_poll_remove(struct io_kiocb *req)
u64 addr;
int ret;
- ret = io_poll_remove_prep(req);
- if (ret)
- return ret;
-
addr = req->poll.addr;
spin_lock_irq(&ctx->completion_lock);
ret = io_poll_cancel(ctx, addr);
@@ -2681,14 +2686,11 @@ static void io_poll_req_insert(struct io_kiocb *req)
hlist_add_head(&req->hash_node, list);
}
-static int io_poll_add_prep(struct io_kiocb *req)
+static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- const struct io_uring_sqe *sqe = req->sqe;
struct io_poll_iocb *poll = &req->poll;
u16 events;
- if (req->flags & REQ_F_PREPPED)
- return 0;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
if (sqe->addr || sqe->ioprio || sqe->off || sqe->len || sqe->buf_index)
@@ -2696,7 +2698,6 @@ static int io_poll_add_prep(struct io_kiocb *req)
if (!poll->file)
return -EBADF;
- req->flags |= REQ_F_PREPPED;
events = READ_ONCE(sqe->poll_events);
poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP;
return 0;
@@ -2709,11 +2710,6 @@ static int io_poll_add(struct io_kiocb *req, struct io_kiocb **nxt)
struct io_poll_table ipt;
bool cancel = false;
__poll_t mask;
- int ret;
-
- ret = io_poll_add_prep(req);
- if (ret)
- return ret;
INIT_IO_WORK(&req->work, io_poll_complete_work);
INIT_HLIST_NODE(&req->hash_node);
@@ -2832,12 +2828,9 @@ static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data)
return 0;
}
-static int io_timeout_remove_prep(struct io_kiocb *req)
+static int io_timeout_remove_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
{
- const struct io_uring_sqe *sqe = req->sqe;
-
- if (req->flags & REQ_F_PREPPED)
- return 0;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
if (sqe->flags || sqe->ioprio || sqe->buf_index || sqe->len)
@@ -2848,7 +2841,6 @@ static int io_timeout_remove_prep(struct io_kiocb *req)
if (req->timeout.flags)
return -EINVAL;
- req->flags |= REQ_F_PREPPED;
return 0;
}
@@ -2860,10 +2852,6 @@ static int io_timeout_remove(struct io_kiocb *req)
struct io_ring_ctx *ctx = req->ctx;
int ret;
- ret = io_timeout_remove_prep(req);
- if (ret)
- return ret;
-
spin_lock_irq(&ctx->completion_lock);
ret = io_timeout_cancel(ctx, req->timeout.addr);
@@ -2877,10 +2865,9 @@ static int io_timeout_remove(struct io_kiocb *req)
return 0;
}
-static int io_timeout_prep(struct io_kiocb *req, struct io_async_ctx *io,
+static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
bool is_timeout_link)
{
- const struct io_uring_sqe *sqe = req->sqe;
struct io_timeout_data *data;
unsigned flags;
@@ -2894,7 +2881,12 @@ static int io_timeout_prep(struct io_kiocb *req, struct io_async_ctx *io,
if (flags & ~IORING_TIMEOUT_ABS)
return -EINVAL;
- data = &io->timeout;
+ req->timeout.count = READ_ONCE(sqe->off);
+
+ if (!req->io && io_alloc_async_ctx(req))
+ return -ENOMEM;
+
+ data = &req->io->timeout;
data->req = req;
req->flags |= REQ_F_TIMEOUT;
@@ -2912,21 +2904,12 @@ static int io_timeout_prep(struct io_kiocb *req, struct io_async_ctx *io,
static int io_timeout(struct io_kiocb *req)
{
- const struct io_uring_sqe *sqe = req->sqe;
unsigned count;
struct io_ring_ctx *ctx = req->ctx;
struct io_timeout_data *data;
struct list_head *entry;
unsigned span = 0;
- int ret;
- if (!req->io) {
- if (io_alloc_async_ctx(req))
- return -ENOMEM;
- ret = io_timeout_prep(req, req->io, false);
- if (ret)
- return ret;
- }
data = &req->io->timeout;
/*
@@ -2934,7 +2917,7 @@ static int io_timeout(struct io_kiocb *req)
* timeout event to be satisfied. If it isn't set, then this is
* a pure timeout request, sequence isn't used.
*/
- count = READ_ONCE(sqe->off);
+ count = req->timeout.count;
if (!count) {
req->flags |= REQ_F_TIMEOUT_NOSEQ;
spin_lock_irq(&ctx->completion_lock);
@@ -3052,19 +3035,15 @@ done:
io_put_req_find_next(req, nxt);
}
-static int io_async_cancel_prep(struct io_kiocb *req)
+static int io_async_cancel_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
{
- const struct io_uring_sqe *sqe = req->sqe;
-
- if (req->flags & REQ_F_PREPPED)
- return 0;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
if (sqe->flags || sqe->ioprio || sqe->off || sqe->len ||
sqe->cancel_flags)
return -EINVAL;
- req->flags |= REQ_F_PREPPED;
req->cancel.addr = READ_ONCE(sqe->addr);
return 0;
}
@@ -3072,21 +3051,14 @@ static int io_async_cancel_prep(struct io_kiocb *req)
static int io_async_cancel(struct io_kiocb *req, struct io_kiocb **nxt)
{
struct io_ring_ctx *ctx = req->ctx;
- int ret;
-
- ret = io_async_cancel_prep(req);
- if (ret)
- return ret;
io_async_find_and_cancel(ctx, req, req->cancel.addr, nxt, 0);
return 0;
}
-static int io_req_defer_prep(struct io_kiocb *req)
+static int io_req_defer_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
{
- struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
- struct io_async_ctx *io = req->io;
- struct iov_iter iter;
ssize_t ret = 0;
switch (req->opcode) {
@@ -3094,61 +3066,47 @@ static int io_req_defer_prep(struct io_kiocb *req)
break;
case IORING_OP_READV:
case IORING_OP_READ_FIXED:
- /* ensure prep does right import */
- req->io = NULL;
- ret = io_read_prep(req, &iovec, &iter, true);
- req->io = io;
- if (ret < 0)
- break;
- io_req_map_rw(req, ret, iovec, inline_vecs, &iter);
- ret = 0;
+ ret = io_read_prep(req, sqe, true);
break;
case IORING_OP_WRITEV:
case IORING_OP_WRITE_FIXED:
- /* ensure prep does right import */
- req->io = NULL;
- ret = io_write_prep(req, &iovec, &iter, true);
- req->io = io;
- if (ret < 0)
- break;
- io_req_map_rw(req, ret, iovec, inline_vecs, &iter);
- ret = 0;
+ ret = io_write_prep(req, sqe, true);
break;
case IORING_OP_POLL_ADD:
- ret = io_poll_add_prep(req);
+ ret = io_poll_add_prep(req, sqe);
break;
case IORING_OP_POLL_REMOVE:
- ret = io_poll_remove_prep(req);
+ ret = io_poll_remove_prep(req, sqe);
break;
case IORING_OP_FSYNC:
- ret = io_prep_fsync(req);
+ ret = io_prep_fsync(req, sqe);
break;
case IORING_OP_SYNC_FILE_RANGE:
- ret = io_prep_sfr(req);
+ ret = io_prep_sfr(req, sqe);
break;
case IORING_OP_SENDMSG:
- ret = io_sendmsg_prep(req, io);
+ ret = io_sendmsg_prep(req, sqe);
break;
case IORING_OP_RECVMSG:
- ret = io_recvmsg_prep(req, io);
+ ret = io_recvmsg_prep(req, sqe);
break;
case IORING_OP_CONNECT:
- ret = io_connect_prep(req, io);
+ ret = io_connect_prep(req, sqe);
break;
case IORING_OP_TIMEOUT:
- ret = io_timeout_prep(req, io, false);
+ ret = io_timeout_prep(req, sqe, false);
break;
case IORING_OP_TIMEOUT_REMOVE:
- ret = io_timeout_remove_prep(req);
+ ret = io_timeout_remove_prep(req, sqe);
break;
case IORING_OP_ASYNC_CANCEL:
- ret = io_async_cancel_prep(req);
+ ret = io_async_cancel_prep(req, sqe);
break;
case IORING_OP_LINK_TIMEOUT:
- ret = io_timeout_prep(req, io, true);
+ ret = io_timeout_prep(req, sqe, true);
break;
case IORING_OP_ACCEPT:
- ret = io_accept_prep(req);
+ ret = io_accept_prep(req, sqe);
break;
default:
printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
@@ -3160,7 +3118,7 @@ static int io_req_defer_prep(struct io_kiocb *req)
return ret;
}
-static int io_req_defer(struct io_kiocb *req)
+static int io_req_defer(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_ring_ctx *ctx = req->ctx;
int ret;
@@ -3169,10 +3127,10 @@ static int io_req_defer(struct io_kiocb *req)
if (!req_need_defer(req) && list_empty(&ctx->defer_list))
return 0;
- if (io_alloc_async_ctx(req))
+ if (!req->io && io_alloc_async_ctx(req))
return -EAGAIN;
- ret = io_req_defer_prep(req);
+ ret = io_req_defer_prep(req, sqe);
if (ret < 0)
return ret;
@@ -3188,9 +3146,8 @@ static int io_req_defer(struct io_kiocb *req)
return -EIOCBQUEUED;
}
-__attribute__((nonnull))
-static int io_issue_sqe(struct io_kiocb *req, struct io_kiocb **nxt,
- bool force_nonblock)
+static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
+ struct io_kiocb **nxt, bool force_nonblock)
{
struct io_ring_ctx *ctx = req->ctx;
int ret;
@@ -3200,52 +3157,109 @@ static int io_issue_sqe(struct io_kiocb *req, struct io_kiocb **nxt,
ret = io_nop(req);
break;
case IORING_OP_READV:
- if (unlikely(req->sqe->buf_index))
- return -EINVAL;
- ret = io_read(req, nxt, force_nonblock);
- break;
- case IORING_OP_WRITEV:
- if (unlikely(req->sqe->buf_index))
- return -EINVAL;
- ret = io_write(req, nxt, force_nonblock);
- break;
case IORING_OP_READ_FIXED:
+ if (sqe) {
+ ret = io_read_prep(req, sqe, force_nonblock);
+ if (ret < 0)
+ break;
+ }
ret = io_read(req, nxt, force_nonblock);
break;
+ case IORING_OP_WRITEV:
case IORING_OP_WRITE_FIXED:
+ if (sqe) {
+ ret = io_write_prep(req, sqe, force_nonblock);
+ if (ret < 0)
+ break;
+ }
ret = io_write(req, nxt, force_nonblock);
break;
case IORING_OP_FSYNC:
+ if (sqe) {
+ ret = io_prep_fsync(req, sqe);
+ if (ret < 0)
+ break;
+ }
ret = io_fsync(req, nxt, force_nonblock);
break;
case IORING_OP_POLL_ADD:
+ if (sqe) {
+ ret = io_poll_add_prep(req, sqe);
+ if (ret)
+ break;
+ }
ret = io_poll_add(req, nxt);
break;
case IORING_OP_POLL_REMOVE:
+ if (sqe) {
+ ret = io_poll_remove_prep(req, sqe);
+ if (ret < 0)
+ break;
+ }
ret = io_poll_remove(req);
break;
case IORING_OP_SYNC_FILE_RANGE:
+ if (sqe) {
+ ret = io_prep_sfr(req, sqe);
+ if (ret < 0)
+ break;
+ }
ret = io_sync_file_range(req, nxt, force_nonblock);
break;
case IORING_OP_SENDMSG:
+ if (sqe) {
+ ret = io_sendmsg_prep(req, sqe);
+ if (ret < 0)
+ break;
+ }
ret = io_sendmsg(req, nxt, force_nonblock);
break;
case IORING_OP_RECVMSG:
+ if (sqe) {
+ ret = io_recvmsg_prep(req, sqe);
+ if (ret)
+ break;
+ }
ret = io_recvmsg(req, nxt, force_nonblock);
break;
case IORING_OP_TIMEOUT:
+ if (sqe) {
+ ret = io_timeout_prep(req, sqe, false);
+ if (ret)
+ break;
+ }
ret = io_timeout(req);
break;
case IORING_OP_TIMEOUT_REMOVE:
+ if (sqe) {
+ ret = io_timeout_remove_prep(req, sqe);
+ if (ret)
+ break;
+ }
ret = io_timeout_remove(req);
break;
case IORING_OP_ACCEPT:
+ if (sqe) {
+ ret = io_accept_prep(req, sqe);
+ if (ret)
+ break;
+ }
ret = io_accept(req, nxt, force_nonblock);
break;
case IORING_OP_CONNECT:
+ if (sqe) {
+ ret = io_connect_prep(req, sqe);
+ if (ret)
+ break;
+ }
ret = io_connect(req, nxt, force_nonblock);
break;
case IORING_OP_ASYNC_CANCEL:
+ if (sqe) {
+ ret = io_async_cancel_prep(req, sqe);
+ if (ret)
+ break;
+ }
ret = io_async_cancel(req, nxt);
break;
default:
@@ -3289,7 +3303,7 @@ static void io_wq_submit_work(struct io_wq_work **workptr)
req->has_user = (work->flags & IO_WQ_WORK_HAS_MM) != 0;
req->in_async = true;
do {
- ret = io_issue_sqe(req, &nxt, false);
+ ret = io_issue_sqe(req, NULL, &nxt, false);
/*
* We can get EAGAIN for polled IO even though we're
* forcing a sync submission from here, since we can't
@@ -3355,14 +3369,15 @@ static inline struct file *io_file_from_index(struct io_ring_ctx *ctx,
return table->files[index & IORING_FILE_TABLE_MASK];
}
-static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req)
+static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
{
struct io_ring_ctx *ctx = req->ctx;
unsigned flags;
int fd, ret;
- flags = READ_ONCE(req->sqe->flags);
- fd = READ_ONCE(req->sqe->fd);
+ flags = READ_ONCE(sqe->flags);
+ fd = READ_ONCE(sqe->fd);
if (flags & IOSQE_IO_DRAIN)
req->flags |= REQ_F_IO_DRAIN;
@@ -3494,7 +3509,7 @@ static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req)
return nxt;
}
-static void __io_queue_sqe(struct io_kiocb *req)
+static void __io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_kiocb *linked_timeout;
struct io_kiocb *nxt = NULL;
@@ -3503,7 +3518,7 @@ static void __io_queue_sqe(struct io_kiocb *req)
again:
linked_timeout = io_prep_linked_timeout(req);
- ret = io_issue_sqe(req, &nxt, true);
+ ret = io_issue_sqe(req, sqe, &nxt, true);
/*
* We async punt it if the file wasn't marked NOWAIT, or if the file
@@ -3550,7 +3565,7 @@ done_req:
}
}
-static void io_queue_sqe(struct io_kiocb *req)
+static void io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
int ret;
@@ -3560,7 +3575,7 @@ static void io_queue_sqe(struct io_kiocb *req)
}
req->ctx->drain_next = (req->flags & REQ_F_DRAIN_LINK);
- ret = io_req_defer(req);
+ ret = io_req_defer(req, sqe);
if (ret) {
if (ret != -EIOCBQUEUED) {
io_cqring_add_event(req, ret);
@@ -3568,7 +3583,7 @@ static void io_queue_sqe(struct io_kiocb *req)
io_double_put_req(req);
}
} else
- __io_queue_sqe(req);
+ __io_queue_sqe(req, sqe);
}
static inline void io_queue_link_head(struct io_kiocb *req)
@@ -3577,25 +3592,25 @@ static inline void io_queue_link_head(struct io_kiocb *req)
io_cqring_add_event(req, -ECANCELED);
io_double_put_req(req);
} else
- io_queue_sqe(req);
+ io_queue_sqe(req, NULL);
}
#define SQE_VALID_FLAGS (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK| \
IOSQE_IO_HARDLINK)
-static bool io_submit_sqe(struct io_kiocb *req, struct io_submit_state *state,
- struct io_kiocb **link)
+static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
+ struct io_submit_state *state, struct io_kiocb **link)
{
struct io_ring_ctx *ctx = req->ctx;
int ret;
/* enforce forwards compatibility on users */
- if (unlikely(req->sqe->flags & ~SQE_VALID_FLAGS)) {
+ if (unlikely(sqe->flags & ~SQE_VALID_FLAGS)) {
ret = -EINVAL;
goto err_req;
}
- ret = io_req_set_file(state, req);
+ ret = io_req_set_file(state, req, sqe);
if (unlikely(ret)) {
err_req:
io_cqring_add_event(req, ret);
@@ -3613,10 +3628,10 @@ err_req:
if (*link) {
struct io_kiocb *prev = *link;
- if (req->sqe->flags & IOSQE_IO_DRAIN)
+ if (sqe->flags & IOSQE_IO_DRAIN)
(*link)->flags |= REQ_F_DRAIN_LINK | REQ_F_IO_DRAIN;
- if (req->sqe->flags & IOSQE_IO_HARDLINK)
+ if (sqe->flags & IOSQE_IO_HARDLINK)
req->flags |= REQ_F_HARDLINK;
if (io_alloc_async_ctx(req)) {
@@ -3624,7 +3639,7 @@ err_req:
goto err_req;
}
- ret = io_req_defer_prep(req);
+ ret = io_req_defer_prep(req, sqe);
if (ret) {
/* fail even hard links since we don't submit */
prev->flags |= REQ_F_FAIL_LINK;
@@ -3632,15 +3647,18 @@ err_req:
}
trace_io_uring_link(ctx, req, prev);
list_add_tail(&req->link_list, &prev->link_list);
- } else if (req->sqe->flags & (IOSQE_IO_LINK|IOSQE_IO_HARDLINK)) {
+ } else if (sqe->flags & (IOSQE_IO_LINK|IOSQE_IO_HARDLINK)) {
req->flags |= REQ_F_LINK;
- if (req->sqe->flags & IOSQE_IO_HARDLINK)
+ if (sqe->flags & IOSQE_IO_HARDLINK)
req->flags |= REQ_F_HARDLINK;
INIT_LIST_HEAD(&req->link_list);
+ ret = io_req_defer_prep(req, sqe);
+ if (ret)
+ req->flags |= REQ_F_FAIL_LINK;
*link = req;
} else {
- io_queue_sqe(req);
+ io_queue_sqe(req, sqe);
}
return true;
@@ -3685,14 +3703,15 @@ static void io_commit_sqring(struct io_ring_ctx *ctx)
}
/*
- * Fetch an sqe, if one is available. Note that req->sqe will point to memory
+ * Fetch an sqe, if one is available. Note that sqe_ptr will point to memory
* that is mapped by userspace. This means that care needs to be taken to
* ensure that reads are stable, as we cannot rely on userspace always
* being a good citizen. If members of the sqe are validated and then later
* used, it's important that those reads are done through READ_ONCE() to
* prevent a re-load down the line.
*/
-static bool io_get_sqring(struct io_ring_ctx *ctx, struct io_kiocb *req)
+static bool io_get_sqring(struct io_ring_ctx *ctx, struct io_kiocb *req,
+ const struct io_uring_sqe **sqe_ptr)
{
struct io_rings *rings = ctx->rings;
u32 *sq_array = ctx->sq_array;
@@ -3719,9 +3738,9 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct io_kiocb *req)
* link list.
*/
req->sequence = ctx->cached_sq_head;
- req->sqe = &ctx->sq_sqes[head];
- req->opcode = READ_ONCE(req->sqe->opcode);
- req->user_data = READ_ONCE(req->sqe->user_data);
+ *sqe_ptr = &ctx->sq_sqes[head];
+ req->opcode = READ_ONCE((*sqe_ptr)->opcode);
+ req->user_data = READ_ONCE((*sqe_ptr)->user_data);
ctx->cached_sq_head++;
return true;
}
@@ -3753,6 +3772,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
}
for (i = 0; i < nr; i++) {
+ const struct io_uring_sqe *sqe;
struct io_kiocb *req;
unsigned int sqe_flags;
@@ -3762,7 +3782,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
submitted = -EAGAIN;
break;
}
- if (!io_get_sqring(ctx, req)) {
+ if (!io_get_sqring(ctx, req, &sqe)) {
__io_free_req(req);
break;
}
@@ -3776,7 +3796,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
}
submitted++;
- sqe_flags = req->sqe->flags;
+ sqe_flags = sqe->flags;
req->ring_file = ring_file;
req->ring_fd = ring_fd;
@@ -3784,7 +3804,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
req->in_async = async;
req->needs_fixed_file = async;
trace_io_uring_submit_sqe(ctx, req->user_data, true, async);
- if (!io_submit_sqe(req, statep, &link))
+ if (!io_submit_sqe(req, sqe, statep, &link))
break;
/*
* If previous wasn't linked and we have a linked command,
@@ -4702,7 +4722,7 @@ static int io_copy_iov(struct io_ring_ctx *ctx, struct iovec *dst,
if (copy_from_user(&ciov, &ciovs[index], sizeof(ciov)))
return -EFAULT;
- dst->iov_base = (void __user *) (unsigned long) ciov.iov_base;
+ dst->iov_base = u64_to_user_ptr((u64)ciov.iov_base);
dst->iov_len = ciov.iov_len;
return 0;
}
diff --git a/fs/locks.c b/fs/locks.c
index 6970f55daf54..44b6da032842 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2853,7 +2853,7 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,
}
if (inode) {
/* userspace relies on this representation of dev_t */
- seq_printf(f, "%d %02x:%02x:%ld ", fl_pid,
+ seq_printf(f, "%d %02x:%02x:%lu ", fl_pid,
MAJOR(inode->i_sb->s_dev),
MINOR(inode->i_sb->s_dev), inode->i_ino);
} else {
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 3e77b728a22b..46f225580009 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -57,6 +57,9 @@ static void fsnotify_unmount_inodes(struct super_block *sb)
* doing an __iget/iput with SB_ACTIVE clear would actually
* evict all inodes with zero i_count from icache which is
* unnecessarily violent and may in fact be illegal to do.
+ * However, we should have been called /after/ evict_inodes
+ * removed all zero refcount inodes, in any case. Test to
+ * be sure.
*/
if (!atomic_read(&inode->i_count)) {
spin_unlock(&inode->i_lock);
@@ -77,6 +80,7 @@ static void fsnotify_unmount_inodes(struct super_block *sb)
iput_inode = inode;
+ cond_resched();
spin_lock(&sb->s_inode_list_lock);
}
spin_unlock(&sb->s_inode_list_lock);
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index b0688c02dc90..b6a4f692d345 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -984,6 +984,7 @@ static int add_dquot_ref(struct super_block *sb, int type)
* later.
*/
old_inode = inode;
+ cond_resched();
spin_lock(&sb->s_inode_list_lock);
}
spin_unlock(&sb->s_inode_list_lock);
diff --git a/fs/super.c b/fs/super.c
index cfadab2cbf35..cd352530eca9 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -448,10 +448,12 @@ void generic_shutdown_super(struct super_block *sb)
sync_filesystem(sb);
sb->s_flags &= ~SB_ACTIVE;
- fsnotify_sb_delete(sb);
cgroup_writeback_umount();
+ /* evict all inodes with zero refcount */
evict_inodes(sb);
+ /* only nonzero refcount inodes can have marks */
+ fsnotify_sb_delete(sb);
if (sb->s_dio_done_wq) {
destroy_workqueue(sb->s_dio_done_wq);