diff options
Diffstat (limited to 'io_uring')
-rw-r--r-- | io_uring/futex.c | 1 | ||||
-rw-r--r-- | io_uring/io_uring.c | 120 | ||||
-rw-r--r-- | io_uring/kbuf.c | 138 | ||||
-rw-r--r-- | io_uring/kbuf.h | 10 | ||||
-rw-r--r-- | io_uring/net.c | 10 | ||||
-rw-r--r-- | io_uring/poll.c | 4 | ||||
-rw-r--r-- | io_uring/rw.c | 11 | ||||
-rw-r--r-- | io_uring/sqpoll.c | 6 | ||||
-rw-r--r-- | io_uring/waitid.c | 7 |
9 files changed, 152 insertions, 155 deletions
diff --git a/io_uring/futex.c b/io_uring/futex.c index 3c3575303c3d..792a03df58de 100644 --- a/io_uring/futex.c +++ b/io_uring/futex.c @@ -159,6 +159,7 @@ bool io_futex_remove_all(struct io_ring_ctx *ctx, struct task_struct *task, hlist_for_each_entry_safe(req, tmp, &ctx->futex_list, hash_node) { if (!io_match_task_safe(req, task, cancel_all)) continue; + hlist_del_init(&req->hash_node); __io_futex_cancel(ctx, req); found = true; } diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index cf348c33f485..c170a2b8d2cf 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -147,6 +147,7 @@ static bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx, static void io_queue_sqe(struct io_kiocb *req); struct kmem_cache *req_cachep; +static struct workqueue_struct *iou_wq __ro_after_init; static int __read_mostly sysctl_io_uring_disabled; static int __read_mostly sysctl_io_uring_group = -1; @@ -350,7 +351,6 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) err: kfree(ctx->cancel_table.hbs); kfree(ctx->cancel_table_locked.hbs); - kfree(ctx->io_bl); xa_destroy(&ctx->io_bl_xa); kfree(ctx); return NULL; @@ -1982,10 +1982,15 @@ fail: err = -EBADFD; if (!io_file_can_poll(req)) goto fail; - err = -ECANCELED; - if (io_arm_poll_handler(req, issue_flags) != IO_APOLL_OK) - goto fail; - return; + if (req->file->f_flags & O_NONBLOCK || + req->file->f_mode & FMODE_NOWAIT) { + err = -ECANCELED; + if (io_arm_poll_handler(req, issue_flags) != IO_APOLL_OK) + goto fail; + return; + } else { + req->flags &= ~REQ_F_APOLL_MULTISHOT; + } } if (req->flags & REQ_F_FORCE_ASYNC) { @@ -2181,6 +2186,13 @@ static void io_init_req_drain(struct io_kiocb *req) } } +static __cold int io_init_fail_req(struct io_kiocb *req, int err) +{ + /* ensure per-opcode data is cleared if we fail before prep */ + memset(&req->cmd.data, 0, sizeof(req->cmd.data)); + return err; +} + static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, const struct io_uring_sqe *sqe) __must_hold(&ctx->uring_lock) @@ -2202,29 +2214,29 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, if (unlikely(opcode >= IORING_OP_LAST)) { req->opcode = 0; - return -EINVAL; + return io_init_fail_req(req, -EINVAL); } def = &io_issue_defs[opcode]; if (unlikely(sqe_flags & ~SQE_COMMON_FLAGS)) { /* enforce forwards compatibility on users */ if (sqe_flags & ~SQE_VALID_FLAGS) - return -EINVAL; + return io_init_fail_req(req, -EINVAL); if (sqe_flags & IOSQE_BUFFER_SELECT) { if (!def->buffer_select) - return -EOPNOTSUPP; + return io_init_fail_req(req, -EOPNOTSUPP); req->buf_index = READ_ONCE(sqe->buf_group); } if (sqe_flags & IOSQE_CQE_SKIP_SUCCESS) ctx->drain_disabled = true; if (sqe_flags & IOSQE_IO_DRAIN) { if (ctx->drain_disabled) - return -EOPNOTSUPP; + return io_init_fail_req(req, -EOPNOTSUPP); io_init_req_drain(req); } } if (unlikely(ctx->restricted || ctx->drain_active || ctx->drain_next)) { if (ctx->restricted && !io_check_restriction(ctx, req, sqe_flags)) - return -EACCES; + return io_init_fail_req(req, -EACCES); /* knock it to the slow queue path, will be drained there */ if (ctx->drain_active) req->flags |= REQ_F_FORCE_ASYNC; @@ -2237,9 +2249,9 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, } if (!def->ioprio && sqe->ioprio) - return -EINVAL; + return io_init_fail_req(req, -EINVAL); if (!def->iopoll && (ctx->flags & IORING_SETUP_IOPOLL)) - return -EINVAL; + return io_init_fail_req(req, -EINVAL); if (def->needs_file) { struct io_submit_state *state = &ctx->submit_state; @@ -2263,12 +2275,12 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, req->creds = xa_load(&ctx->personalities, personality); if (!req->creds) - return -EINVAL; + return io_init_fail_req(req, -EINVAL); get_cred(req->creds); ret = security_uring_override_creds(req->creds); if (ret) { put_cred(req->creds); - return ret; + return io_init_fail_req(req, ret); } req->flags |= REQ_F_CREDS; } @@ -2539,7 +2551,7 @@ static bool current_pending_io(void) static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx, struct io_wait_queue *iowq) { - int io_wait, ret; + int ret; if (unlikely(READ_ONCE(ctx->check_cq))) return 1; @@ -2557,7 +2569,6 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx, * can take into account that the task is waiting for IO - turns out * to be important for low QD IO. */ - io_wait = current->in_iowait; if (current_pending_io()) current->in_iowait = 1; ret = 0; @@ -2565,7 +2576,7 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx, schedule(); else if (!schedule_hrtimeout(&iowq->timeout, HRTIMER_MODE_ABS)) ret = -ETIME; - current->in_iowait = io_wait; + current->in_iowait = 0; return ret; } @@ -2591,19 +2602,6 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, if (__io_cqring_events_user(ctx) >= min_events) return 0; - if (sig) { -#ifdef CONFIG_COMPAT - if (in_compat_syscall()) - ret = set_compat_user_sigmask((const compat_sigset_t __user *)sig, - sigsz); - else -#endif - ret = set_user_sigmask(sig, sigsz); - - if (ret) - return ret; - } - init_waitqueue_func_entry(&iowq.wq, io_wake_function); iowq.wq.private = current; INIT_LIST_HEAD(&iowq.wq.entry); @@ -2622,6 +2620,19 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, io_napi_adjust_timeout(ctx, &iowq, &ts); } + if (sig) { +#ifdef CONFIG_COMPAT + if (in_compat_syscall()) + ret = set_compat_user_sigmask((const compat_sigset_t __user *)sig, + sigsz); + else +#endif + ret = set_user_sigmask(sig, sigsz); + + if (ret) + return ret; + } + io_napi_busy_loop(ctx, &iowq); trace_io_uring_cqring_wait(ctx, min_events); @@ -2697,13 +2708,9 @@ void io_mem_free(void *ptr) static void io_pages_free(struct page ***pages, int npages) { - struct page **page_array; + struct page **page_array = *pages; int i; - if (!pages) - return; - - page_array = *pages; if (!page_array) return; @@ -2719,7 +2726,7 @@ static void *__io_uaddr_map(struct page ***pages, unsigned short *npages, struct page **page_array; unsigned int nr_pages; void *page_addr; - int ret, i; + int ret, i, pinned; *npages = 0; @@ -2733,12 +2740,12 @@ static void *__io_uaddr_map(struct page ***pages, unsigned short *npages, if (!page_array) return ERR_PTR(-ENOMEM); - ret = pin_user_pages_fast(uaddr, nr_pages, FOLL_WRITE | FOLL_LONGTERM, - page_array); - if (ret != nr_pages) { -err: - io_pages_free(&page_array, ret > 0 ? ret : 0); - return ret < 0 ? ERR_PTR(ret) : ERR_PTR(-EFAULT); + + pinned = pin_user_pages_fast(uaddr, nr_pages, FOLL_WRITE | FOLL_LONGTERM, + page_array); + if (pinned != nr_pages) { + ret = (pinned < 0) ? pinned : -EFAULT; + goto free_pages; } page_addr = page_address(page_array[0]); @@ -2752,7 +2759,7 @@ err: * didn't support this feature. */ if (PageHighMem(page_array[i])) - goto err; + goto free_pages; /* * No support for discontig pages for now, should either be a @@ -2761,13 +2768,17 @@ err: * just fail them with EINVAL. */ if (page_address(page_array[i]) != page_addr) - goto err; + goto free_pages; page_addr += PAGE_SIZE; } *pages = page_array; *npages = nr_pages; return page_to_virt(page_array[0]); + +free_pages: + io_pages_free(&page_array, pinned > 0 ? pinned : 0); + return ERR_PTR(ret); } static void *io_rings_map(struct io_ring_ctx *ctx, unsigned long uaddr, @@ -2789,14 +2800,15 @@ static void io_rings_free(struct io_ring_ctx *ctx) if (!(ctx->flags & IORING_SETUP_NO_MMAP)) { io_mem_free(ctx->rings); io_mem_free(ctx->sq_sqes); - ctx->rings = NULL; - ctx->sq_sqes = NULL; } else { io_pages_free(&ctx->ring_pages, ctx->n_ring_pages); ctx->n_ring_pages = 0; io_pages_free(&ctx->sqe_pages, ctx->n_sqe_pages); ctx->n_sqe_pages = 0; } + + ctx->rings = NULL; + ctx->sq_sqes = NULL; } void *io_mem_alloc(size_t size) @@ -2919,7 +2931,6 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) io_napi_free(ctx); kfree(ctx->cancel_table.hbs); kfree(ctx->cancel_table_locked.hbs); - kfree(ctx->io_bl); xa_destroy(&ctx->io_bl_xa); kfree(ctx); } @@ -3154,7 +3165,7 @@ static __cold void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) * noise and overhead, there's no discernable change in runtime * over using system_wq. */ - queue_work(system_unbound_wq, &ctx->exit_work); + queue_work(iou_wq, &ctx->exit_work); } static int io_uring_release(struct inode *inode, struct file *file) @@ -3436,14 +3447,15 @@ static void *io_uring_validate_mmap_request(struct file *file, ptr = ctx->sq_sqes; break; case IORING_OFF_PBUF_RING: { + struct io_buffer_list *bl; unsigned int bgid; bgid = (offset & ~IORING_OFF_MMAP_MASK) >> IORING_OFF_PBUF_SHIFT; - rcu_read_lock(); - ptr = io_pbuf_get_address(ctx, bgid); - rcu_read_unlock(); - if (!ptr) - return ERR_PTR(-EINVAL); + bl = io_pbuf_get_bl(ctx, bgid); + if (IS_ERR(bl)) + return bl; + ptr = bl->buf_ring; + io_put_bl(ctx, bl); break; } default: @@ -4178,6 +4190,8 @@ static int __init io_uring_init(void) io_buf_cachep = KMEM_CACHE(io_buffer, SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT); + iou_wq = alloc_workqueue("iou_exit", WQ_UNBOUND, 64); + #ifdef CONFIG_SYSCTL register_sysctl_init("kernel", kernel_io_uring_disabled_table); #endif diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 9be42bff936b..3aa16e27f509 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -17,8 +17,6 @@ #define IO_BUFFER_LIST_BUF_PER_PAGE (PAGE_SIZE / sizeof(struct io_uring_buf)) -#define BGID_ARRAY 64 - /* BIDs are addressed by a 16-bit field in a CQE */ #define MAX_BIDS_PER_BGID (1 << 16) @@ -40,13 +38,9 @@ struct io_buf_free { int inuse; }; -static struct io_buffer_list *__io_buffer_get_list(struct io_ring_ctx *ctx, - struct io_buffer_list *bl, - unsigned int bgid) +static inline struct io_buffer_list *__io_buffer_get_list(struct io_ring_ctx *ctx, + unsigned int bgid) { - if (bl && bgid < BGID_ARRAY) - return &bl[bgid]; - return xa_load(&ctx->io_bl_xa, bgid); } @@ -55,7 +49,7 @@ static inline struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx, { lockdep_assert_held(&ctx->uring_lock); - return __io_buffer_get_list(ctx, ctx->io_bl, bgid); + return __io_buffer_get_list(ctx, bgid); } static int io_buffer_add_list(struct io_ring_ctx *ctx, @@ -67,11 +61,7 @@ static int io_buffer_add_list(struct io_ring_ctx *ctx, * always under the ->uring_lock, but the RCU lookup from mmap does. */ bl->bgid = bgid; - smp_store_release(&bl->is_ready, 1); - - if (bgid < BGID_ARRAY) - return 0; - + atomic_set(&bl->refs, 1); return xa_err(xa_store(&ctx->io_bl_xa, bgid, bl, GFP_KERNEL)); } @@ -199,7 +189,7 @@ void __user *io_buffer_select(struct io_kiocb *req, size_t *len, bl = io_buffer_get_list(ctx, req->buf_index); if (likely(bl)) { - if (bl->is_mapped) + if (bl->is_buf_ring) ret = io_ring_buffer_select(req, len, bl, issue_flags); else ret = io_provided_buffer_select(req, len, bl); @@ -208,24 +198,6 @@ void __user *io_buffer_select(struct io_kiocb *req, size_t *len, return ret; } -static __cold int io_init_bl_list(struct io_ring_ctx *ctx) -{ - struct io_buffer_list *bl; - int i; - - bl = kcalloc(BGID_ARRAY, sizeof(struct io_buffer_list), GFP_KERNEL); - if (!bl) - return -ENOMEM; - - for (i = 0; i < BGID_ARRAY; i++) { - INIT_LIST_HEAD(&bl[i].buf_list); - bl[i].bgid = i; - } - - smp_store_release(&ctx->io_bl, bl); - return 0; -} - /* * Mark the given mapped range as free for reuse */ @@ -253,7 +225,7 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx, if (!nbufs) return 0; - if (bl->is_mapped) { + if (bl->is_buf_ring) { i = bl->buf_ring->tail - bl->head; if (bl->is_mmap) { /* @@ -274,7 +246,7 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx, } /* make sure it's seen as empty */ INIT_LIST_HEAD(&bl->buf_list); - bl->is_mapped = 0; + bl->is_buf_ring = 0; return i; } @@ -294,24 +266,24 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx, return i; } +void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl) +{ + if (atomic_dec_and_test(&bl->refs)) { + __io_remove_buffers(ctx, bl, -1U); + kfree_rcu(bl, rcu); + } +} + void io_destroy_buffers(struct io_ring_ctx *ctx) { struct io_buffer_list *bl; struct list_head *item, *tmp; struct io_buffer *buf; unsigned long index; - int i; - - for (i = 0; i < BGID_ARRAY; i++) { - if (!ctx->io_bl) - break; - __io_remove_buffers(ctx, &ctx->io_bl[i], -1U); - } xa_for_each(&ctx->io_bl_xa, index, bl) { xa_erase(&ctx->io_bl_xa, bl->bgid); - __io_remove_buffers(ctx, bl, -1U); - kfree_rcu(bl, rcu); + io_put_bl(ctx, bl); } /* @@ -361,7 +333,7 @@ int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags) if (bl) { ret = -EINVAL; /* can't use provide/remove buffers command on mapped buffers */ - if (!bl->is_mapped) + if (!bl->is_buf_ring) ret = __io_remove_buffers(ctx, bl, p->nbufs); } io_ring_submit_unlock(ctx, issue_flags); @@ -489,12 +461,6 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags) io_ring_submit_lock(ctx, issue_flags); - if (unlikely(p->bgid < BGID_ARRAY && !ctx->io_bl)) { - ret = io_init_bl_list(ctx); - if (ret) - goto err; - } - bl = io_buffer_get_list(ctx, p->bgid); if (unlikely(!bl)) { bl = kzalloc(sizeof(*bl), GFP_KERNEL_ACCOUNT); @@ -507,19 +473,14 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags) if (ret) { /* * Doesn't need rcu free as it was never visible, but - * let's keep it consistent throughout. Also can't - * be a lower indexed array group, as adding one - * where lookup failed cannot happen. + * let's keep it consistent throughout. */ - if (p->bgid >= BGID_ARRAY) - kfree_rcu(bl, rcu); - else - WARN_ON_ONCE(1); + kfree_rcu(bl, rcu); goto err; } } /* can't add buffers via this command for a mapped buffer ring */ - if (bl->is_mapped) { + if (bl->is_buf_ring) { ret = -EINVAL; goto err; } @@ -575,7 +536,7 @@ static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg, bl->buf_pages = pages; bl->buf_nr_pages = nr_pages; bl->buf_ring = br; - bl->is_mapped = 1; + bl->is_buf_ring = 1; bl->is_mmap = 0; return 0; error_unpin: @@ -642,7 +603,7 @@ static int io_alloc_pbuf_ring(struct io_ring_ctx *ctx, } ibf->inuse = 1; bl->buf_ring = ibf->mem; - bl->is_mapped = 1; + bl->is_buf_ring = 1; bl->is_mmap = 1; return 0; } @@ -679,16 +640,10 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) if (reg.ring_entries >= 65536) return -EINVAL; - if (unlikely(reg.bgid < BGID_ARRAY && !ctx->io_bl)) { - int ret = io_init_bl_list(ctx); - if (ret) - return ret; - } - bl = io_buffer_get_list(ctx, reg.bgid); if (bl) { /* if mapped buffer ring OR classic exists, don't allow */ - if (bl->is_mapped || !list_empty(&bl->buf_list)) + if (bl->is_buf_ring || !list_empty(&bl->buf_list)) return -EEXIST; } else { free_bl = bl = kzalloc(sizeof(*bl), GFP_KERNEL); @@ -730,14 +685,11 @@ int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) bl = io_buffer_get_list(ctx, reg.bgid); if (!bl) return -ENOENT; - if (!bl->is_mapped) + if (!bl->is_buf_ring) return -EINVAL; - __io_remove_buffers(ctx, bl, -1U); - if (bl->bgid >= BGID_ARRAY) { - xa_erase(&ctx->io_bl_xa, bl->bgid); - kfree_rcu(bl, rcu); - } + xa_erase(&ctx->io_bl_xa, bl->bgid); + io_put_bl(ctx, bl); return 0; } @@ -757,7 +709,7 @@ int io_register_pbuf_status(struct io_ring_ctx *ctx, void __user *arg) bl = io_buffer_get_list(ctx, buf_status.buf_group); if (!bl) return -ENOENT; - if (!bl->is_mapped) + if (!bl->is_buf_ring) return -EINVAL; buf_status.head = bl->head; @@ -767,23 +719,35 @@ int io_register_pbuf_status(struct io_ring_ctx *ctx, void __user *arg) return 0; } -void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid) +struct io_buffer_list *io_pbuf_get_bl(struct io_ring_ctx *ctx, + unsigned long bgid) { struct io_buffer_list *bl; + bool ret; - bl = __io_buffer_get_list(ctx, smp_load_acquire(&ctx->io_bl), bgid); - - if (!bl || !bl->is_mmap) - return NULL; /* - * Ensure the list is fully setup. Only strictly needed for RCU lookup - * via mmap, and in that case only for the array indexed groups. For - * the xarray lookups, it's either visible and ready, or not at all. + * We have to be a bit careful here - we're inside mmap and cannot grab + * the uring_lock. This means the buffer_list could be simultaneously + * going away, if someone is trying to be sneaky. Look it up under rcu + * so we know it's not going away, and attempt to grab a reference to + * it. If the ref is already zero, then fail the mapping. If successful, + * the caller will call io_put_bl() to drop the the reference at at the + * end. This may then safely free the buffer_list (and drop the pages) + * at that point, vm_insert_pages() would've already grabbed the + * necessary vma references. */ - if (!smp_load_acquire(&bl->is_ready)) - return NULL; - - return bl->buf_ring; + rcu_read_lock(); + bl = xa_load(&ctx->io_bl_xa, bgid); + /* must be a mmap'able buffer ring and have pages */ + ret = false; + if (bl && bl->is_mmap) + ret = atomic_inc_not_zero(&bl->refs); + rcu_read_unlock(); + + if (ret) + return bl; + + return ERR_PTR(-EINVAL); } /* diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h index 5218bfd79e87..df365b8860cf 100644 --- a/io_uring/kbuf.h +++ b/io_uring/kbuf.h @@ -25,12 +25,12 @@ struct io_buffer_list { __u16 head; __u16 mask; + atomic_t refs; + /* ring mapped provided buffers */ - __u8 is_mapped; + __u8 is_buf_ring; /* ring mapped provided buffers, but mmap'ed by application */ __u8 is_mmap; - /* bl is visible from an RCU point of view for lookup */ - __u8 is_ready; }; struct io_buffer { @@ -61,7 +61,9 @@ void __io_put_kbuf(struct io_kiocb *req, unsigned issue_flags); bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags); -void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid); +void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl); +struct io_buffer_list *io_pbuf_get_bl(struct io_ring_ctx *ctx, + unsigned long bgid); static inline bool io_kbuf_recycle_ring(struct io_kiocb *req) { diff --git a/io_uring/net.c b/io_uring/net.c index 19451f0dbf81..4afb475d4197 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -326,7 +326,10 @@ int io_send_prep_async(struct io_kiocb *req) struct io_async_msghdr *io; int ret; - if (!zc->addr || req_has_async_data(req)) + if (req_has_async_data(req)) + return 0; + zc->done_io = 0; + if (!zc->addr) return 0; io = io_msg_alloc_async_prep(req); if (!io) @@ -353,8 +356,10 @@ static int io_setup_async_addr(struct io_kiocb *req, int io_sendmsg_prep_async(struct io_kiocb *req) { + struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); int ret; + sr->done_io = 0; if (!io_msg_alloc_async_prep(req)) return -ENOMEM; ret = io_sendmsg_copy_hdr(req, req->async_data); @@ -608,9 +613,11 @@ static int io_recvmsg_copy_hdr(struct io_kiocb *req, int io_recvmsg_prep_async(struct io_kiocb *req) { + struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); struct io_async_msghdr *iomsg; int ret; + sr->done_io = 0; if (!io_msg_alloc_async_prep(req)) return -ENOMEM; iomsg = req->async_data; @@ -1269,6 +1276,7 @@ int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags) if (req_has_async_data(req)) { kmsg = req->async_data; + kmsg->msg.msg_control_user = sr->msg_control; } else { ret = io_sendmsg_copy_hdr(req, &iomsg); if (ret) diff --git a/io_uring/poll.c b/io_uring/poll.c index 5f779139cae1..6db1dcb2c797 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -996,7 +996,6 @@ int io_poll_remove(struct io_kiocb *req, unsigned int issue_flags) struct io_hash_bucket *bucket; struct io_kiocb *preq; int ret2, ret = 0; - struct io_tw_state ts = { .locked = true }; io_ring_submit_lock(ctx, issue_flags); preq = io_poll_find(ctx, true, &cd, &ctx->cancel_table, &bucket); @@ -1045,7 +1044,8 @@ found: req_set_fail(preq); io_req_set_res(preq, -ECANCELED, 0); - io_req_task_complete(preq, &ts); + preq->io_task_work.func = io_req_task_complete; + io_req_task_work_add(preq); out: io_ring_submit_unlock(ctx, issue_flags); if (ret < 0) { diff --git a/io_uring/rw.c b/io_uring/rw.c index 47e097ab5d7e..c8d48287439e 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -937,6 +937,13 @@ int io_read_mshot(struct io_kiocb *req, unsigned int issue_flags) ret = __io_read(req, issue_flags); /* + * If the file doesn't support proper NOWAIT, then disable multishot + * and stay in single shot mode. + */ + if (!io_file_supports_nowait(req)) + req->flags &= ~REQ_F_APOLL_MULTISHOT; + + /* * If we get -EAGAIN, recycle our buffer and just let normal poll * handling arm it. */ @@ -947,13 +954,15 @@ int io_read_mshot(struct io_kiocb *req, unsigned int issue_flags) */ if (io_kbuf_recycle(req, issue_flags)) rw->len = 0; + if (issue_flags & IO_URING_F_MULTISHOT) + return IOU_ISSUE_SKIP_COMPLETE; return -EAGAIN; } /* * Any successful return value will keep the multishot read armed. */ - if (ret > 0) { + if (ret > 0 && req->flags & REQ_F_APOLL_MULTISHOT) { /* * Put our buffer and post a CQE. If we fail to post a CQE, then * jump to the termination path. This request is then done. diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c index 363052b4ea76..3983708cef5b 100644 --- a/io_uring/sqpoll.c +++ b/io_uring/sqpoll.c @@ -274,6 +274,10 @@ static int io_sq_thread(void *data) char buf[TASK_COMM_LEN]; DEFINE_WAIT(wait); + /* offload context creation failed, just exit */ + if (!current->io_uring) + goto err_out; + snprintf(buf, sizeof(buf), "iou-sqp-%d", sqd->task_pid); set_task_comm(current, buf); @@ -371,7 +375,7 @@ static int io_sq_thread(void *data) atomic_or(IORING_SQ_NEED_WAKEUP, &ctx->rings->sq_flags); io_run_task_work(); mutex_unlock(&sqd->lock); - +err_out: complete(&sqd->exited); do_exit(0); } diff --git a/io_uring/waitid.c b/io_uring/waitid.c index 6f851978606d..77d340666cb9 100644 --- a/io_uring/waitid.c +++ b/io_uring/waitid.c @@ -125,12 +125,6 @@ static void io_waitid_complete(struct io_kiocb *req, int ret) lockdep_assert_held(&req->ctx->uring_lock); - /* - * Did cancel find it meanwhile? - */ - if (hlist_unhashed(&req->hash_node)) - return; - hlist_del_init(&req->hash_node); ret = io_waitid_finish(req, ret); @@ -202,6 +196,7 @@ bool io_waitid_remove_all(struct io_ring_ctx *ctx, struct task_struct *task, hlist_for_each_entry_safe(req, tmp, &ctx->waitid_list, hash_node) { if (!io_match_task_safe(req, task, cancel_all)) continue; + hlist_del_init(&req->hash_node); __io_waitid_cancel(ctx, req); found = true; } |