ublk: use copy_{to,from}_iter() for user copy

ublk_copy_user_pages()/ublk_copy_io_pages() currently uses
iov_iter_get_pages2() to extract the pages from the iov_iter and
memcpy()s between the bvec_iter and the iov_iter's pages one at a time.
Switch to using copy_to_iter()/copy_from_iter() instead. This avoids the
user page reference count increments and decrements and needing to split
the memcpy() at user page boundaries. It also simplifies the code
considerably.
Ming reports a 40% throughput improvement when issuing I/O to the
selftests null ublk server with zero-copy disabled.

Signed-off-by: Caleb Sander Mateos <csander@purestorage.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Caleb Sander Mateos
2025-11-06 10:16:46 -07:00
committed by Jens Axboe
parent 15638d52cb
commit 2299ceec36

View File

@@ -913,47 +913,35 @@ static const struct block_device_operations ub_fops = {
.report_zones = ublk_report_zones,
};
#define UBLK_MAX_PIN_PAGES 32
struct ublk_io_iter {
struct page *pages[UBLK_MAX_PIN_PAGES];
struct bio *bio;
struct bvec_iter iter;
};
/* return how many pages are copied */
static void ublk_copy_io_pages(struct ublk_io_iter *data,
size_t total, size_t pg_off, int dir)
/* return how many bytes are copied */
static size_t ublk_copy_io_pages(struct ublk_io_iter *data,
struct iov_iter *uiter, int dir)
{
unsigned done = 0;
unsigned pg_idx = 0;
size_t done = 0;
while (done < total) {
for (;;) {
struct bio_vec bv = bio_iter_iovec(data->bio, data->iter);
unsigned int bytes = min3(bv.bv_len, (unsigned)total - done,
(unsigned)(PAGE_SIZE - pg_off));
void *bv_buf = bvec_kmap_local(&bv);
void *pg_buf = kmap_local_page(data->pages[pg_idx]);
size_t copied;
if (dir == ITER_DEST)
memcpy(pg_buf + pg_off, bv_buf, bytes);
copied = copy_to_iter(bv_buf, bv.bv_len, uiter);
else
memcpy(bv_buf, pg_buf + pg_off, bytes);
copied = copy_from_iter(bv_buf, bv.bv_len, uiter);
kunmap_local(pg_buf);
kunmap_local(bv_buf);
/* advance page array */
pg_off += bytes;
if (pg_off == PAGE_SIZE) {
pg_idx += 1;
pg_off = 0;
}
done += bytes;
done += copied;
if (copied < bv.bv_len)
break;
/* advance bio */
bio_advance_iter_single(data->bio, &data->iter, bytes);
bio_advance_iter_single(data->bio, &data->iter, copied);
if (!data->iter.bi_size) {
data->bio = data->bio->bi_next;
if (data->bio == NULL)
@@ -961,6 +949,7 @@ static void ublk_copy_io_pages(struct ublk_io_iter *data,
data->iter = data->bio->bi_iter;
}
}
return done;
}
static bool ublk_advance_io_iter(const struct request *req,
@@ -988,34 +977,11 @@ static size_t ublk_copy_user_pages(const struct request *req,
unsigned offset, struct iov_iter *uiter, int dir)
{
struct ublk_io_iter iter;
size_t done = 0;
if (!ublk_advance_io_iter(req, &iter, offset))
return 0;
while (iov_iter_count(uiter) && iter.bio) {
unsigned nr_pages;
ssize_t len;
size_t off;
int i;
len = iov_iter_get_pages2(uiter, iter.pages,
iov_iter_count(uiter),
UBLK_MAX_PIN_PAGES, &off);
if (len <= 0)
return done;
ublk_copy_io_pages(&iter, len, off, dir);
nr_pages = DIV_ROUND_UP(len + off, PAGE_SIZE);
for (i = 0; i < nr_pages; i++) {
if (dir == ITER_DEST)
set_page_dirty(iter.pages[i]);
put_page(iter.pages[i]);
}
done += len;
}
return done;
return ublk_copy_io_pages(&iter, uiter, dir);
}
static inline bool ublk_need_map_req(const struct request *req)