mirror of
https://github.com/torvalds/linux.git
synced 2025-12-07 20:06:24 +00:00
Merge branch 'io_uring-6.18' into for-6.19/io_uring
Merge 6.18-rc io_uring fixes, as certain coming changes depend on some of these. * io_uring-6.18: io_uring/rsrc: don't use blk_rq_nr_phys_segments() as number of bvecs io_uring/query: return number of available queries io_uring/rw: ensure allocated iovec gets cleared for early failure io_uring: fix regbuf vector size truncation io_uring: fix types for region size calulation io_uring/zcrx: remove sync refill uapi io_uring: fix buffer auto-commit for multishot uring_cmd io_uring: correct __must_hold annotation in io_install_fixed_file io_uring zcrx: add MAINTAINERS entry io_uring: Fix code indentation error io_uring/sqpoll: be smarter on when to update the stime usage io_uring/sqpoll: switch away from getrusage() for CPU accounting io_uring: fix incorrect unlikely() usage in io_waitid_prep() Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
@@ -13111,6 +13111,15 @@ F: include/uapi/linux/io_uring.h
|
|||||||
F: include/uapi/linux/io_uring/
|
F: include/uapi/linux/io_uring/
|
||||||
F: io_uring/
|
F: io_uring/
|
||||||
|
|
||||||
|
IO_URING ZCRX
|
||||||
|
M: Pavel Begunkov <asml.silence@gmail.com>
|
||||||
|
L: io-uring@vger.kernel.org
|
||||||
|
L: netdev@vger.kernel.org
|
||||||
|
T: git https://github.com/isilence/linux.git zcrx/for-next
|
||||||
|
T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux.git
|
||||||
|
S: Maintained
|
||||||
|
F: io_uring/zcrx.*
|
||||||
|
|
||||||
IPMI SUBSYSTEM
|
IPMI SUBSYSTEM
|
||||||
M: Corey Minyard <corey@minyard.net>
|
M: Corey Minyard <corey@minyard.net>
|
||||||
L: openipmi-developer@lists.sourceforge.net (moderated for non-subscribers)
|
L: openipmi-developer@lists.sourceforge.net (moderated for non-subscribers)
|
||||||
|
|||||||
@@ -697,9 +697,6 @@ enum io_uring_register_op {
|
|||||||
/* query various aspects of io_uring, see linux/io_uring/query.h */
|
/* query various aspects of io_uring, see linux/io_uring/query.h */
|
||||||
IORING_REGISTER_QUERY = 35,
|
IORING_REGISTER_QUERY = 35,
|
||||||
|
|
||||||
/* return zcrx buffers back into circulation */
|
|
||||||
IORING_REGISTER_ZCRX_REFILL = 36,
|
|
||||||
|
|
||||||
/* this goes last */
|
/* this goes last */
|
||||||
IORING_REGISTER_LAST,
|
IORING_REGISTER_LAST,
|
||||||
|
|
||||||
@@ -1081,15 +1078,6 @@ struct io_uring_zcrx_ifq_reg {
|
|||||||
__u64 __resv[3];
|
__u64 __resv[3];
|
||||||
};
|
};
|
||||||
|
|
||||||
struct io_uring_zcrx_sync_refill {
|
|
||||||
__u32 zcrx_id;
|
|
||||||
/* the number of entries to return */
|
|
||||||
__u32 nr_entries;
|
|
||||||
/* pointer to an array of struct io_uring_zcrx_rqe */
|
|
||||||
__u64 rqes;
|
|
||||||
__u64 __resv[2];
|
|
||||||
};
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -36,6 +36,9 @@ struct io_uring_query_opcode {
|
|||||||
__u64 enter_flags;
|
__u64 enter_flags;
|
||||||
/* Bitmask of all supported IOSQE_* flags */
|
/* Bitmask of all supported IOSQE_* flags */
|
||||||
__u64 sqe_flags;
|
__u64 sqe_flags;
|
||||||
|
/* The number of available query opcodes */
|
||||||
|
__u32 nr_query_opcodes;
|
||||||
|
__u32 __pad;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -61,7 +61,6 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
|
|||||||
{
|
{
|
||||||
struct io_overflow_cqe *ocqe;
|
struct io_overflow_cqe *ocqe;
|
||||||
struct io_rings *r = ctx->rings;
|
struct io_rings *r = ctx->rings;
|
||||||
struct rusage sq_usage;
|
|
||||||
unsigned int sq_mask = ctx->sq_entries - 1, cq_mask = ctx->cq_entries - 1;
|
unsigned int sq_mask = ctx->sq_entries - 1, cq_mask = ctx->cq_entries - 1;
|
||||||
unsigned int sq_head = READ_ONCE(r->sq.head);
|
unsigned int sq_head = READ_ONCE(r->sq.head);
|
||||||
unsigned int sq_tail = READ_ONCE(r->sq.tail);
|
unsigned int sq_tail = READ_ONCE(r->sq.tail);
|
||||||
@@ -179,14 +178,15 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
|
|||||||
* thread termination.
|
* thread termination.
|
||||||
*/
|
*/
|
||||||
if (tsk) {
|
if (tsk) {
|
||||||
|
u64 usec;
|
||||||
|
|
||||||
get_task_struct(tsk);
|
get_task_struct(tsk);
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
getrusage(tsk, RUSAGE_SELF, &sq_usage);
|
usec = io_sq_cpu_usec(tsk);
|
||||||
put_task_struct(tsk);
|
put_task_struct(tsk);
|
||||||
sq_pid = sq->task_pid;
|
sq_pid = sq->task_pid;
|
||||||
sq_cpu = sq->sq_cpu;
|
sq_cpu = sq->sq_cpu;
|
||||||
sq_total_time = (sq_usage.ru_stime.tv_sec * 1000000
|
sq_total_time = usec;
|
||||||
+ sq_usage.ru_stime.tv_usec);
|
|
||||||
sq_work_time = sq->work_time;
|
sq_work_time = sq->work_time;
|
||||||
} else {
|
} else {
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
|
|||||||
@@ -57,7 +57,7 @@ void io_free_file_tables(struct io_ring_ctx *ctx, struct io_file_table *table)
|
|||||||
|
|
||||||
static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file,
|
static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file,
|
||||||
u32 slot_index)
|
u32 slot_index)
|
||||||
__must_hold(&req->ctx->uring_lock)
|
__must_hold(&ctx->uring_lock)
|
||||||
{
|
{
|
||||||
struct io_rsrc_node *node;
|
struct io_rsrc_node *node;
|
||||||
|
|
||||||
|
|||||||
@@ -846,7 +846,7 @@ static inline struct io_cqe io_init_cqe(u64 user_data, s32 res, u32 cflags)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static __cold void io_cqe_overflow(struct io_ring_ctx *ctx, struct io_cqe *cqe,
|
static __cold void io_cqe_overflow(struct io_ring_ctx *ctx, struct io_cqe *cqe,
|
||||||
struct io_big_cqe *big_cqe)
|
struct io_big_cqe *big_cqe)
|
||||||
{
|
{
|
||||||
struct io_overflow_cqe *ocqe;
|
struct io_overflow_cqe *ocqe;
|
||||||
|
|
||||||
|
|||||||
@@ -155,6 +155,27 @@ static int io_provided_buffers_select(struct io_kiocb *req, size_t *len,
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool io_should_commit(struct io_kiocb *req, unsigned int issue_flags)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* If we came in unlocked, we have no choice but to consume the
|
||||||
|
* buffer here, otherwise nothing ensures that the buffer won't
|
||||||
|
* get used by others. This does mean it'll be pinned until the
|
||||||
|
* IO completes, coming in unlocked means we're being called from
|
||||||
|
* io-wq context and there may be further retries in async hybrid
|
||||||
|
* mode. For the locked case, the caller must call commit when
|
||||||
|
* the transfer completes (or if we get -EAGAIN and must poll of
|
||||||
|
* retry).
|
||||||
|
*/
|
||||||
|
if (issue_flags & IO_URING_F_UNLOCKED)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
/* uring_cmd commits kbuf upfront, no need to auto-commit */
|
||||||
|
if (!io_file_can_poll(req) && req->opcode != IORING_OP_URING_CMD)
|
||||||
|
return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
static struct io_br_sel io_ring_buffer_select(struct io_kiocb *req, size_t *len,
|
static struct io_br_sel io_ring_buffer_select(struct io_kiocb *req, size_t *len,
|
||||||
struct io_buffer_list *bl,
|
struct io_buffer_list *bl,
|
||||||
unsigned int issue_flags)
|
unsigned int issue_flags)
|
||||||
@@ -181,17 +202,7 @@ static struct io_br_sel io_ring_buffer_select(struct io_kiocb *req, size_t *len,
|
|||||||
sel.buf_list = bl;
|
sel.buf_list = bl;
|
||||||
sel.addr = u64_to_user_ptr(buf->addr);
|
sel.addr = u64_to_user_ptr(buf->addr);
|
||||||
|
|
||||||
if (issue_flags & IO_URING_F_UNLOCKED || !io_file_can_poll(req)) {
|
if (io_should_commit(req, issue_flags)) {
|
||||||
/*
|
|
||||||
* If we came in unlocked, we have no choice but to consume the
|
|
||||||
* buffer here, otherwise nothing ensures that the buffer won't
|
|
||||||
* get used by others. This does mean it'll be pinned until the
|
|
||||||
* IO completes, coming in unlocked means we're being called from
|
|
||||||
* io-wq context and there may be further retries in async hybrid
|
|
||||||
* mode. For the locked case, the caller must call commit when
|
|
||||||
* the transfer completes (or if we get -EAGAIN and must poll of
|
|
||||||
* retry).
|
|
||||||
*/
|
|
||||||
io_kbuf_commit(req, sel.buf_list, *len, 1);
|
io_kbuf_commit(req, sel.buf_list, *len, 1);
|
||||||
sel.buf_list = NULL;
|
sel.buf_list = NULL;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -383,7 +383,7 @@ static int io_send_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
|||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (sr->flags & IORING_SEND_VECTORIZED)
|
if (sr->flags & IORING_SEND_VECTORIZED)
|
||||||
return io_net_import_vec(req, kmsg, sr->buf, sr->len, ITER_SOURCE);
|
return io_net_import_vec(req, kmsg, sr->buf, sr->len, ITER_SOURCE);
|
||||||
|
|
||||||
return import_ubuf(ITER_SOURCE, sr->buf, sr->len, &kmsg->msg.msg_iter);
|
return import_ubuf(ITER_SOURCE, sr->buf, sr->len, &kmsg->msg.msg_iter);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -22,6 +22,8 @@ static ssize_t io_query_ops(union io_query_data *data)
|
|||||||
e->ring_setup_flags = IORING_SETUP_FLAGS;
|
e->ring_setup_flags = IORING_SETUP_FLAGS;
|
||||||
e->enter_flags = IORING_ENTER_FLAGS;
|
e->enter_flags = IORING_ENTER_FLAGS;
|
||||||
e->sqe_flags = SQE_VALID_FLAGS;
|
e->sqe_flags = SQE_VALID_FLAGS;
|
||||||
|
e->nr_query_opcodes = __IO_URING_QUERY_MAX;
|
||||||
|
e->__pad = 0;
|
||||||
return sizeof(*e);
|
return sizeof(*e);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -826,9 +826,6 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
|
|||||||
case IORING_REGISTER_QUERY:
|
case IORING_REGISTER_QUERY:
|
||||||
ret = io_query(ctx, arg, nr_args);
|
ret = io_query(ctx, arg, nr_args);
|
||||||
break;
|
break;
|
||||||
case IORING_REGISTER_ZCRX_REFILL:
|
|
||||||
ret = io_zcrx_return_bufs(ctx, arg, nr_args);
|
|
||||||
break;
|
|
||||||
default:
|
default:
|
||||||
ret = -EINVAL;
|
ret = -EINVAL;
|
||||||
break;
|
break;
|
||||||
|
|||||||
@@ -945,8 +945,8 @@ int io_buffer_register_bvec(struct io_uring_cmd *cmd, struct request *rq,
|
|||||||
struct req_iterator rq_iter;
|
struct req_iterator rq_iter;
|
||||||
struct io_mapped_ubuf *imu;
|
struct io_mapped_ubuf *imu;
|
||||||
struct io_rsrc_node *node;
|
struct io_rsrc_node *node;
|
||||||
struct bio_vec bv, *bvec;
|
struct bio_vec bv;
|
||||||
u16 nr_bvecs;
|
unsigned int nr_bvecs = 0;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
io_ring_submit_lock(ctx, issue_flags);
|
io_ring_submit_lock(ctx, issue_flags);
|
||||||
@@ -967,8 +967,11 @@ int io_buffer_register_bvec(struct io_uring_cmd *cmd, struct request *rq,
|
|||||||
goto unlock;
|
goto unlock;
|
||||||
}
|
}
|
||||||
|
|
||||||
nr_bvecs = blk_rq_nr_phys_segments(rq);
|
/*
|
||||||
imu = io_alloc_imu(ctx, nr_bvecs);
|
* blk_rq_nr_phys_segments() may overestimate the number of bvecs
|
||||||
|
* but avoids needing to iterate over the bvecs
|
||||||
|
*/
|
||||||
|
imu = io_alloc_imu(ctx, blk_rq_nr_phys_segments(rq));
|
||||||
if (!imu) {
|
if (!imu) {
|
||||||
kfree(node);
|
kfree(node);
|
||||||
ret = -ENOMEM;
|
ret = -ENOMEM;
|
||||||
@@ -979,16 +982,15 @@ int io_buffer_register_bvec(struct io_uring_cmd *cmd, struct request *rq,
|
|||||||
imu->len = blk_rq_bytes(rq);
|
imu->len = blk_rq_bytes(rq);
|
||||||
imu->acct_pages = 0;
|
imu->acct_pages = 0;
|
||||||
imu->folio_shift = PAGE_SHIFT;
|
imu->folio_shift = PAGE_SHIFT;
|
||||||
imu->nr_bvecs = nr_bvecs;
|
|
||||||
refcount_set(&imu->refs, 1);
|
refcount_set(&imu->refs, 1);
|
||||||
imu->release = release;
|
imu->release = release;
|
||||||
imu->priv = rq;
|
imu->priv = rq;
|
||||||
imu->is_kbuf = true;
|
imu->is_kbuf = true;
|
||||||
imu->dir = 1 << rq_data_dir(rq);
|
imu->dir = 1 << rq_data_dir(rq);
|
||||||
|
|
||||||
bvec = imu->bvec;
|
|
||||||
rq_for_each_bvec(bv, rq, rq_iter)
|
rq_for_each_bvec(bv, rq, rq_iter)
|
||||||
*bvec++ = bv;
|
imu->bvec[nr_bvecs++] = bv;
|
||||||
|
imu->nr_bvecs = nr_bvecs;
|
||||||
|
|
||||||
node->buf = imu;
|
node->buf = imu;
|
||||||
data->nodes[index] = node;
|
data->nodes[index] = node;
|
||||||
@@ -1405,8 +1407,11 @@ static int io_estimate_bvec_size(struct iovec *iov, unsigned nr_iovs,
|
|||||||
size_t max_segs = 0;
|
size_t max_segs = 0;
|
||||||
unsigned i;
|
unsigned i;
|
||||||
|
|
||||||
for (i = 0; i < nr_iovs; i++)
|
for (i = 0; i < nr_iovs; i++) {
|
||||||
max_segs += (iov[i].iov_len >> shift) + 2;
|
max_segs += (iov[i].iov_len >> shift) + 2;
|
||||||
|
if (max_segs > INT_MAX)
|
||||||
|
return -EOVERFLOW;
|
||||||
|
}
|
||||||
return max_segs;
|
return max_segs;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1512,7 +1517,11 @@ int io_import_reg_vec(int ddir, struct iov_iter *iter,
|
|||||||
if (unlikely(ret))
|
if (unlikely(ret))
|
||||||
return ret;
|
return ret;
|
||||||
} else {
|
} else {
|
||||||
nr_segs = io_estimate_bvec_size(iov, nr_iovs, imu);
|
int ret = io_estimate_bvec_size(iov, nr_iovs, imu);
|
||||||
|
|
||||||
|
if (ret < 0)
|
||||||
|
return ret;
|
||||||
|
nr_segs = ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sizeof(struct bio_vec) > sizeof(struct iovec)) {
|
if (sizeof(struct bio_vec) > sizeof(struct iovec)) {
|
||||||
|
|||||||
@@ -463,7 +463,10 @@ int io_read_mshot_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
|||||||
|
|
||||||
void io_readv_writev_cleanup(struct io_kiocb *req)
|
void io_readv_writev_cleanup(struct io_kiocb *req)
|
||||||
{
|
{
|
||||||
|
struct io_async_rw *rw = req->async_data;
|
||||||
|
|
||||||
lockdep_assert_held(&req->ctx->uring_lock);
|
lockdep_assert_held(&req->ctx->uring_lock);
|
||||||
|
io_vec_free(&rw->vec);
|
||||||
io_rw_recycle(req, 0);
|
io_rw_recycle(req, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -11,6 +11,7 @@
|
|||||||
#include <linux/audit.h>
|
#include <linux/audit.h>
|
||||||
#include <linux/security.h>
|
#include <linux/security.h>
|
||||||
#include <linux/cpuset.h>
|
#include <linux/cpuset.h>
|
||||||
|
#include <linux/sched/cputime.h>
|
||||||
#include <linux/io_uring.h>
|
#include <linux/io_uring.h>
|
||||||
|
|
||||||
#include <uapi/linux/io_uring.h>
|
#include <uapi/linux/io_uring.h>
|
||||||
@@ -170,7 +171,38 @@ static inline bool io_sqd_events_pending(struct io_sq_data *sqd)
|
|||||||
return READ_ONCE(sqd->state);
|
return READ_ONCE(sqd->state);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries)
|
struct io_sq_time {
|
||||||
|
bool started;
|
||||||
|
u64 usec;
|
||||||
|
};
|
||||||
|
|
||||||
|
u64 io_sq_cpu_usec(struct task_struct *tsk)
|
||||||
|
{
|
||||||
|
u64 utime, stime;
|
||||||
|
|
||||||
|
task_cputime_adjusted(tsk, &utime, &stime);
|
||||||
|
do_div(stime, 1000);
|
||||||
|
return stime;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void io_sq_update_worktime(struct io_sq_data *sqd, struct io_sq_time *ist)
|
||||||
|
{
|
||||||
|
if (!ist->started)
|
||||||
|
return;
|
||||||
|
ist->started = false;
|
||||||
|
sqd->work_time += io_sq_cpu_usec(current) - ist->usec;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void io_sq_start_worktime(struct io_sq_time *ist)
|
||||||
|
{
|
||||||
|
if (ist->started)
|
||||||
|
return;
|
||||||
|
ist->started = true;
|
||||||
|
ist->usec = io_sq_cpu_usec(current);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int __io_sq_thread(struct io_ring_ctx *ctx, struct io_sq_data *sqd,
|
||||||
|
bool cap_entries, struct io_sq_time *ist)
|
||||||
{
|
{
|
||||||
unsigned int to_submit;
|
unsigned int to_submit;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
@@ -183,6 +215,8 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries)
|
|||||||
if (to_submit || !wq_list_empty(&ctx->iopoll_list)) {
|
if (to_submit || !wq_list_empty(&ctx->iopoll_list)) {
|
||||||
const struct cred *creds = NULL;
|
const struct cred *creds = NULL;
|
||||||
|
|
||||||
|
io_sq_start_worktime(ist);
|
||||||
|
|
||||||
if (ctx->sq_creds != current_cred())
|
if (ctx->sq_creds != current_cred())
|
||||||
creds = override_creds(ctx->sq_creds);
|
creds = override_creds(ctx->sq_creds);
|
||||||
|
|
||||||
@@ -256,23 +290,11 @@ static bool io_sq_tw_pending(struct llist_node *retry_list)
|
|||||||
return retry_list || !llist_empty(&tctx->task_list);
|
return retry_list || !llist_empty(&tctx->task_list);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void io_sq_update_worktime(struct io_sq_data *sqd, struct rusage *start)
|
|
||||||
{
|
|
||||||
struct rusage end;
|
|
||||||
|
|
||||||
getrusage(current, RUSAGE_SELF, &end);
|
|
||||||
end.ru_stime.tv_sec -= start->ru_stime.tv_sec;
|
|
||||||
end.ru_stime.tv_usec -= start->ru_stime.tv_usec;
|
|
||||||
|
|
||||||
sqd->work_time += end.ru_stime.tv_usec + end.ru_stime.tv_sec * 1000000;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int io_sq_thread(void *data)
|
static int io_sq_thread(void *data)
|
||||||
{
|
{
|
||||||
struct llist_node *retry_list = NULL;
|
struct llist_node *retry_list = NULL;
|
||||||
struct io_sq_data *sqd = data;
|
struct io_sq_data *sqd = data;
|
||||||
struct io_ring_ctx *ctx;
|
struct io_ring_ctx *ctx;
|
||||||
struct rusage start;
|
|
||||||
unsigned long timeout = 0;
|
unsigned long timeout = 0;
|
||||||
char buf[TASK_COMM_LEN] = {};
|
char buf[TASK_COMM_LEN] = {};
|
||||||
DEFINE_WAIT(wait);
|
DEFINE_WAIT(wait);
|
||||||
@@ -310,6 +332,7 @@ static int io_sq_thread(void *data)
|
|||||||
mutex_lock(&sqd->lock);
|
mutex_lock(&sqd->lock);
|
||||||
while (1) {
|
while (1) {
|
||||||
bool cap_entries, sqt_spin = false;
|
bool cap_entries, sqt_spin = false;
|
||||||
|
struct io_sq_time ist = { };
|
||||||
|
|
||||||
if (io_sqd_events_pending(sqd) || signal_pending(current)) {
|
if (io_sqd_events_pending(sqd) || signal_pending(current)) {
|
||||||
if (io_sqd_handle_event(sqd))
|
if (io_sqd_handle_event(sqd))
|
||||||
@@ -318,9 +341,8 @@ static int io_sq_thread(void *data)
|
|||||||
}
|
}
|
||||||
|
|
||||||
cap_entries = !list_is_singular(&sqd->ctx_list);
|
cap_entries = !list_is_singular(&sqd->ctx_list);
|
||||||
getrusage(current, RUSAGE_SELF, &start);
|
|
||||||
list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
|
list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
|
||||||
int ret = __io_sq_thread(ctx, cap_entries);
|
int ret = __io_sq_thread(ctx, sqd, cap_entries, &ist);
|
||||||
|
|
||||||
if (!sqt_spin && (ret > 0 || !wq_list_empty(&ctx->iopoll_list)))
|
if (!sqt_spin && (ret > 0 || !wq_list_empty(&ctx->iopoll_list)))
|
||||||
sqt_spin = true;
|
sqt_spin = true;
|
||||||
@@ -328,15 +350,18 @@ static int io_sq_thread(void *data)
|
|||||||
if (io_sq_tw(&retry_list, IORING_TW_CAP_ENTRIES_VALUE))
|
if (io_sq_tw(&retry_list, IORING_TW_CAP_ENTRIES_VALUE))
|
||||||
sqt_spin = true;
|
sqt_spin = true;
|
||||||
|
|
||||||
list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
|
list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
|
||||||
if (io_napi(ctx))
|
if (io_napi(ctx)) {
|
||||||
|
io_sq_start_worktime(&ist);
|
||||||
io_napi_sqpoll_busy_poll(ctx);
|
io_napi_sqpoll_busy_poll(ctx);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
io_sq_update_worktime(sqd, &ist);
|
||||||
|
|
||||||
if (sqt_spin || !time_after(jiffies, timeout)) {
|
if (sqt_spin || !time_after(jiffies, timeout)) {
|
||||||
if (sqt_spin) {
|
if (sqt_spin)
|
||||||
io_sq_update_worktime(sqd, &start);
|
|
||||||
timeout = jiffies + sqd->sq_thread_idle;
|
timeout = jiffies + sqd->sq_thread_idle;
|
||||||
}
|
|
||||||
if (unlikely(need_resched())) {
|
if (unlikely(need_resched())) {
|
||||||
mutex_unlock(&sqd->lock);
|
mutex_unlock(&sqd->lock);
|
||||||
cond_resched();
|
cond_resched();
|
||||||
|
|||||||
@@ -29,6 +29,7 @@ void io_sq_thread_unpark(struct io_sq_data *sqd);
|
|||||||
void io_put_sq_data(struct io_sq_data *sqd);
|
void io_put_sq_data(struct io_sq_data *sqd);
|
||||||
void io_sqpoll_wait_sq(struct io_ring_ctx *ctx);
|
void io_sqpoll_wait_sq(struct io_ring_ctx *ctx);
|
||||||
int io_sqpoll_wq_cpu_affinity(struct io_ring_ctx *ctx, cpumask_var_t mask);
|
int io_sqpoll_wq_cpu_affinity(struct io_ring_ctx *ctx, cpumask_var_t mask);
|
||||||
|
u64 io_sq_cpu_usec(struct task_struct *tsk);
|
||||||
|
|
||||||
static inline struct task_struct *sqpoll_task_locked(struct io_sq_data *sqd)
|
static inline struct task_struct *sqpoll_task_locked(struct io_sq_data *sqd)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -266,7 +266,7 @@ int io_waitid_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
|||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
iwa = io_uring_alloc_async_data(NULL, req);
|
iwa = io_uring_alloc_async_data(NULL, req);
|
||||||
if (!unlikely(iwa))
|
if (unlikely(!iwa))
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
iwa->req = req;
|
iwa->req = req;
|
||||||
|
|
||||||
|
|||||||
@@ -941,74 +941,6 @@ static const struct memory_provider_ops io_uring_pp_zc_ops = {
|
|||||||
.uninstall = io_pp_uninstall,
|
.uninstall = io_pp_uninstall,
|
||||||
};
|
};
|
||||||
|
|
||||||
#define IO_ZCRX_MAX_SYS_REFILL_BUFS (1 << 16)
|
|
||||||
#define IO_ZCRX_SYS_REFILL_BATCH 32
|
|
||||||
|
|
||||||
static void io_return_buffers(struct io_zcrx_ifq *ifq,
|
|
||||||
struct io_uring_zcrx_rqe *rqes, unsigned nr)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
|
|
||||||
for (i = 0; i < nr; i++) {
|
|
||||||
struct net_iov *niov;
|
|
||||||
netmem_ref netmem;
|
|
||||||
|
|
||||||
if (!io_parse_rqe(&rqes[i], ifq, &niov))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
scoped_guard(spinlock_bh, &ifq->rq_lock) {
|
|
||||||
if (!io_zcrx_put_niov_uref(niov))
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
netmem = net_iov_to_netmem(niov);
|
|
||||||
if (!page_pool_unref_and_test(netmem))
|
|
||||||
continue;
|
|
||||||
io_zcrx_return_niov(niov);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int io_zcrx_return_bufs(struct io_ring_ctx *ctx,
|
|
||||||
void __user *arg, unsigned nr_arg)
|
|
||||||
{
|
|
||||||
struct io_uring_zcrx_rqe rqes[IO_ZCRX_SYS_REFILL_BATCH];
|
|
||||||
struct io_uring_zcrx_rqe __user *user_rqes;
|
|
||||||
struct io_uring_zcrx_sync_refill zr;
|
|
||||||
struct io_zcrx_ifq *ifq;
|
|
||||||
unsigned nr, i;
|
|
||||||
|
|
||||||
if (nr_arg)
|
|
||||||
return -EINVAL;
|
|
||||||
if (copy_from_user(&zr, arg, sizeof(zr)))
|
|
||||||
return -EFAULT;
|
|
||||||
if (!zr.nr_entries || zr.nr_entries > IO_ZCRX_MAX_SYS_REFILL_BUFS)
|
|
||||||
return -EINVAL;
|
|
||||||
if (!mem_is_zero(&zr.__resv, sizeof(zr.__resv)))
|
|
||||||
return -EINVAL;
|
|
||||||
|
|
||||||
ifq = xa_load(&ctx->zcrx_ctxs, zr.zcrx_id);
|
|
||||||
if (!ifq)
|
|
||||||
return -EINVAL;
|
|
||||||
nr = zr.nr_entries;
|
|
||||||
user_rqes = u64_to_user_ptr(zr.rqes);
|
|
||||||
|
|
||||||
for (i = 0; i < nr;) {
|
|
||||||
unsigned batch = min(nr - i, IO_ZCRX_SYS_REFILL_BATCH);
|
|
||||||
size_t size = batch * sizeof(rqes[0]);
|
|
||||||
|
|
||||||
if (copy_from_user(rqes, user_rqes + i, size))
|
|
||||||
return i ? i : -EFAULT;
|
|
||||||
io_return_buffers(ifq, rqes, batch);
|
|
||||||
|
|
||||||
i += batch;
|
|
||||||
|
|
||||||
if (fatal_signal_pending(current))
|
|
||||||
return i;
|
|
||||||
cond_resched();
|
|
||||||
}
|
|
||||||
return nr;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool io_zcrx_queue_cqe(struct io_kiocb *req, struct net_iov *niov,
|
static bool io_zcrx_queue_cqe(struct io_kiocb *req, struct net_iov *niov,
|
||||||
struct io_zcrx_ifq *ifq, int off, int len)
|
struct io_zcrx_ifq *ifq, int off, int len)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -65,8 +65,6 @@ struct io_zcrx_ifq {
|
|||||||
};
|
};
|
||||||
|
|
||||||
#if defined(CONFIG_IO_URING_ZCRX)
|
#if defined(CONFIG_IO_URING_ZCRX)
|
||||||
int io_zcrx_return_bufs(struct io_ring_ctx *ctx,
|
|
||||||
void __user *arg, unsigned nr_arg);
|
|
||||||
int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
|
int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
|
||||||
struct io_uring_zcrx_ifq_reg __user *arg);
|
struct io_uring_zcrx_ifq_reg __user *arg);
|
||||||
void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx);
|
void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx);
|
||||||
@@ -95,11 +93,6 @@ static inline struct io_mapped_region *io_zcrx_get_region(struct io_ring_ctx *ct
|
|||||||
{
|
{
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
static inline int io_zcrx_return_bufs(struct io_ring_ctx *ctx,
|
|
||||||
void __user *arg, unsigned nr_arg)
|
|
||||||
{
|
|
||||||
return -EOPNOTSUPP;
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int io_recvzc(struct io_kiocb *req, unsigned int issue_flags);
|
int io_recvzc(struct io_kiocb *req, unsigned int issue_flags);
|
||||||
|
|||||||
Reference in New Issue
Block a user