summaryrefslogtreecommitdiffstats
path: root/io_uring
diff options
context:
space:
mode:
Diffstat (limited to 'io_uring')
-rw-r--r--io_uring/io-wq.c10
-rw-r--r--io_uring/io_uring.c6
-rw-r--r--io_uring/io_uring.h2
-rw-r--r--io_uring/memmap.c7
-rw-r--r--io_uring/napi.c24
-rw-r--r--io_uring/net.c32
-rw-r--r--io_uring/opdef.c5
-rw-r--r--io_uring/register.c4
-rw-r--r--io_uring/rw.c4
-rw-r--r--io_uring/sqpoll.c6
10 files changed, 63 insertions, 37 deletions
diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c
index d1c47a9d9215..7d3316fe9bfc 100644
--- a/io_uring/io-wq.c
+++ b/io_uring/io-wq.c
@@ -927,7 +927,11 @@ void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work)
{
struct io_wq_acct *acct = io_work_get_acct(wq, work);
unsigned long work_flags = work->flags;
- struct io_cb_cancel_data match;
+ struct io_cb_cancel_data match = {
+ .fn = io_wq_work_match_item,
+ .data = work,
+ .cancel_all = false,
+ };
bool do_create;
/*
@@ -965,10 +969,6 @@ void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work)
raw_spin_unlock(&wq->lock);
/* fatal condition, failed to create the first worker */
- match.fn = io_wq_work_match_item,
- match.data = work,
- match.cancel_all = false,
-
io_acct_cancel_pending_work(wq, acct, &match);
}
}
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 86fd72f6a1c2..816e93e7f949 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -2597,13 +2597,11 @@ static unsigned long rings_size(struct io_ring_ctx *ctx, unsigned int sq_entries
#endif
if (ctx->flags & IORING_SETUP_NO_SQARRAY) {
- if (sq_offset)
- *sq_offset = SIZE_MAX;
+ *sq_offset = SIZE_MAX;
return off;
}
- if (sq_offset)
- *sq_offset = off;
+ *sq_offset = off;
sq_array_size = array_size(sizeof(u32), sq_entries);
if (sq_array_size == SIZE_MAX)
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index 624ca9076a50..726e6367af4d 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -433,7 +433,7 @@ static inline bool io_file_can_poll(struct io_kiocb *req)
{
if (req->flags & REQ_F_CAN_POLL)
return true;
- if (file_can_poll(req->file)) {
+ if (req->file && file_can_poll(req->file)) {
req->flags |= REQ_F_CAN_POLL;
return true;
}
diff --git a/io_uring/memmap.c b/io_uring/memmap.c
index 523d982af2b0..a0f32a255fd1 100644
--- a/io_uring/memmap.c
+++ b/io_uring/memmap.c
@@ -244,6 +244,7 @@ __cold int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
struct io_ring_ctx *ctx = file->private_data;
size_t sz = vma->vm_end - vma->vm_start;
long offset = vma->vm_pgoff << PAGE_SHIFT;
+ unsigned int npages;
void *ptr;
ptr = io_uring_validate_mmap_request(file, vma->vm_pgoff, sz);
@@ -253,8 +254,8 @@ __cold int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
switch (offset & IORING_OFF_MMAP_MASK) {
case IORING_OFF_SQ_RING:
case IORING_OFF_CQ_RING:
- return io_uring_mmap_pages(ctx, vma, ctx->ring_pages,
- ctx->n_ring_pages);
+ npages = min(ctx->n_ring_pages, (sz + PAGE_SIZE - 1) >> PAGE_SHIFT);
+ return io_uring_mmap_pages(ctx, vma, ctx->ring_pages, npages);
case IORING_OFF_SQES:
return io_uring_mmap_pages(ctx, vma, ctx->sqe_pages,
ctx->n_sqe_pages);
@@ -305,7 +306,7 @@ unsigned long io_uring_get_unmapped_area(struct file *filp, unsigned long addr,
#else
addr = 0UL;
#endif
- return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags);
+ return mm_get_unmapped_area(current->mm, filp, addr, len, pgoff, flags);
}
#else /* !CONFIG_MMU */
diff --git a/io_uring/napi.c b/io_uring/napi.c
index 883a1a665907..8c18ede595c4 100644
--- a/io_uring/napi.c
+++ b/io_uring/napi.c
@@ -261,12 +261,14 @@ int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg)
}
/*
- * __io_napi_adjust_timeout() - Add napi id to the busy poll list
+ * __io_napi_adjust_timeout() - adjust busy loop timeout
* @ctx: pointer to io-uring context structure
* @iowq: pointer to io wait queue
* @ts: pointer to timespec or NULL
*
* Adjust the busy loop timeout according to timespec and busy poll timeout.
+ * If the specified NAPI timeout is bigger than the wait timeout, then adjust
+ * the NAPI timeout accordingly.
*/
void __io_napi_adjust_timeout(struct io_ring_ctx *ctx, struct io_wait_queue *iowq,
struct timespec64 *ts)
@@ -274,16 +276,16 @@ void __io_napi_adjust_timeout(struct io_ring_ctx *ctx, struct io_wait_queue *iow
unsigned int poll_to = READ_ONCE(ctx->napi_busy_poll_to);
if (ts) {
- struct timespec64 poll_to_ts = ns_to_timespec64(1000 * (s64)poll_to);
-
- if (timespec64_compare(ts, &poll_to_ts) > 0) {
- *ts = timespec64_sub(*ts, poll_to_ts);
- } else {
- u64 to = timespec64_to_ns(ts);
-
- do_div(to, 1000);
- ts->tv_sec = 0;
- ts->tv_nsec = 0;
+ struct timespec64 poll_to_ts;
+
+ poll_to_ts = ns_to_timespec64(1000 * (s64)poll_to);
+ if (timespec64_compare(ts, &poll_to_ts) < 0) {
+ s64 poll_to_ns = timespec64_to_ns(ts);
+ if (poll_to_ns > 0) {
+ u64 val = poll_to_ns + 999;
+ do_div(val, (s64) 1000);
+ poll_to = val;
+ }
}
}
diff --git a/io_uring/net.c b/io_uring/net.c
index 070dea9a4eda..7c98c4d50946 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -1127,6 +1127,9 @@ int io_recv(struct io_kiocb *req, unsigned int issue_flags)
flags |= MSG_DONTWAIT;
retry_multishot:
+ kmsg->msg.msg_inq = -1;
+ kmsg->msg.msg_flags = 0;
+
if (io_do_buffer_select(req)) {
ret = io_recv_buf_select(req, kmsg, &len, issue_flags);
if (unlikely(ret))
@@ -1134,9 +1137,6 @@ retry_multishot:
sr->buf = NULL;
}
- kmsg->msg.msg_inq = -1;
- kmsg->msg.msg_flags = 0;
-
if (flags & MSG_WAITALL)
min_ret = iov_iter_count(&kmsg->msg.msg_iter);
@@ -1528,9 +1528,12 @@ int io_accept(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_accept *accept = io_kiocb_to_cmd(req, struct io_accept);
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
- unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0;
bool fixed = !!accept->file_slot;
+ struct proto_accept_arg arg = {
+ .flags = force_nonblock ? O_NONBLOCK : 0,
+ };
struct file *file;
+ unsigned cflags;
int ret, fd;
if (!(req->flags & REQ_F_POLLED) &&
@@ -1543,7 +1546,9 @@ retry:
if (unlikely(fd < 0))
return fd;
}
- file = do_accept(req->file, file_flags, accept->addr, accept->addr_len,
+ arg.err = 0;
+ arg.is_empty = -1;
+ file = do_accept(req->file, &arg, accept->addr, accept->addr_len,
accept->flags);
if (IS_ERR(file)) {
if (!fixed)
@@ -1571,17 +1576,26 @@ retry:
accept->file_slot);
}
+ cflags = 0;
+ if (!arg.is_empty)
+ cflags |= IORING_CQE_F_SOCK_NONEMPTY;
+
if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
- io_req_set_res(req, ret, 0);
+ io_req_set_res(req, ret, cflags);
return IOU_OK;
}
if (ret < 0)
return ret;
- if (io_req_post_cqe(req, ret, IORING_CQE_F_MORE))
- goto retry;
+ if (io_req_post_cqe(req, ret, cflags | IORING_CQE_F_MORE)) {
+ if (cflags & IORING_CQE_F_SOCK_NONEMPTY || arg.is_empty == -1)
+ goto retry;
+ if (issue_flags & IO_URING_F_MULTISHOT)
+ return IOU_ISSUE_SKIP_COMPLETE;
+ return -EAGAIN;
+ }
- io_req_set_res(req, ret, 0);
+ io_req_set_res(req, ret, cflags);
return IOU_STOP_MULTISHOT;
}
diff --git a/io_uring/opdef.c b/io_uring/opdef.c
index 2de5cca9504e..2e3b7b16effb 100644
--- a/io_uring/opdef.c
+++ b/io_uring/opdef.c
@@ -516,10 +516,12 @@ const struct io_cold_def io_cold_defs[] = {
},
[IORING_OP_READ_FIXED] = {
.name = "READ_FIXED",
+ .cleanup = io_readv_writev_cleanup,
.fail = io_rw_fail,
},
[IORING_OP_WRITE_FIXED] = {
.name = "WRITE_FIXED",
+ .cleanup = io_readv_writev_cleanup,
.fail = io_rw_fail,
},
[IORING_OP_POLL_ADD] = {
@@ -582,10 +584,12 @@ const struct io_cold_def io_cold_defs[] = {
},
[IORING_OP_READ] = {
.name = "READ",
+ .cleanup = io_readv_writev_cleanup,
.fail = io_rw_fail,
},
[IORING_OP_WRITE] = {
.name = "WRITE",
+ .cleanup = io_readv_writev_cleanup,
.fail = io_rw_fail,
},
[IORING_OP_FADVISE] = {
@@ -692,6 +696,7 @@ const struct io_cold_def io_cold_defs[] = {
},
[IORING_OP_READ_MULTISHOT] = {
.name = "READ_MULTISHOT",
+ .cleanup = io_readv_writev_cleanup,
},
[IORING_OP_WAITID] = {
.name = "WAITID",
diff --git a/io_uring/register.c b/io_uring/register.c
index ef8c908346a4..c0010a66a6f2 100644
--- a/io_uring/register.c
+++ b/io_uring/register.c
@@ -355,8 +355,10 @@ static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
}
if (sqd) {
+ mutex_unlock(&ctx->uring_lock);
mutex_unlock(&sqd->lock);
io_put_sq_data(sqd);
+ mutex_lock(&ctx->uring_lock);
}
if (copy_to_user(arg, new_count, sizeof(new_count)))
@@ -380,8 +382,10 @@ static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
return 0;
err:
if (sqd) {
+ mutex_unlock(&ctx->uring_lock);
mutex_unlock(&sqd->lock);
io_put_sq_data(sqd);
+ mutex_lock(&ctx->uring_lock);
}
return ret;
}
diff --git a/io_uring/rw.c b/io_uring/rw.c
index 894c43a5fc0e..1a2128459cb4 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -759,7 +759,7 @@ static inline int io_iter_do_read(struct io_rw *rw, struct iov_iter *iter)
struct file *file = rw->kiocb.ki_filp;
if (likely(file->f_op->read_iter))
- return call_read_iter(file, &rw->kiocb, iter);
+ return file->f_op->read_iter(&rw->kiocb, iter);
else if (file->f_op->read)
return loop_rw_iter(READ, rw, iter);
else
@@ -1046,7 +1046,7 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
kiocb->ki_flags |= IOCB_WRITE;
if (likely(req->file->f_op->write_iter))
- ret2 = call_write_iter(req->file, kiocb, &io->iter);
+ ret2 = req->file->f_op->write_iter(kiocb, &io->iter);
else if (req->file->f_op->write)
ret2 = loop_rw_iter(WRITE, rw, &io->iter);
else
diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c
index 554c7212aa46..b3722e5275e7 100644
--- a/io_uring/sqpoll.c
+++ b/io_uring/sqpoll.c
@@ -238,11 +238,13 @@ static unsigned int io_sq_tw(struct llist_node **retry_list, int max_entries)
if (*retry_list) {
*retry_list = io_handle_tw_list(*retry_list, &count, max_entries);
if (count >= max_entries)
- return count;
+ goto out;
max_entries -= count;
}
-
*retry_list = tctx_task_work_run(tctx, max_entries, &count);
+out:
+ if (task_work_pending(current))
+ task_work_run();
return count;
}