iomap: track pending read bytes more optimally

Instead of incrementing read_bytes_pending for every folio range read in
(which requires acquiring the spinlock to do so), set read_bytes_pending
to the folio size when the first range is asynchronously read in, keep
track of how many bytes total are asynchronously read in, and adjust
read_bytes_pending accordingly after issuing requests to read in all the
necessary ranges.

iomap_read_folio_ctx->cur_folio_in_bio can be removed since a non-zero
value for pending bytes necessarily indicates the folio is in the bio.

Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
Suggested-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>
This commit is contained in:
Joanne Koong
2025-09-25 17:26:02 -07:00
committed by Christian Brauner
parent 87a13819dd
commit d43558ae67

View File

@@ -362,7 +362,6 @@ static void iomap_read_end_io(struct bio *bio)
struct iomap_read_folio_ctx {
struct folio *cur_folio;
bool cur_folio_in_bio;
void *read_ctx;
struct readahead_control *rac;
};
@@ -380,19 +379,11 @@ static void iomap_bio_read_folio_range(const struct iomap_iter *iter,
{
struct folio *folio = ctx->cur_folio;
const struct iomap *iomap = &iter->iomap;
struct iomap_folio_state *ifs = folio->private;
size_t poff = offset_in_folio(folio, pos);
loff_t length = iomap_length(iter);
sector_t sector;
struct bio *bio = ctx->read_ctx;
ctx->cur_folio_in_bio = true;
if (ifs) {
spin_lock_irq(&ifs->state_lock);
ifs->read_bytes_pending += plen;
spin_unlock_irq(&ifs->state_lock);
}
sector = iomap_sector(iomap, pos);
if (!bio || bio_end_sector(bio) != sector ||
!bio_add_folio(bio, folio, plen, poff)) {
@@ -422,8 +413,93 @@ static void iomap_bio_read_folio_range(const struct iomap_iter *iter,
}
}
static void iomap_read_init(struct folio *folio)
{
struct iomap_folio_state *ifs = folio->private;
if (ifs) {
size_t len = folio_size(folio);
/*
* ifs->read_bytes_pending is used to track how many bytes are
* read in asynchronously by the IO helper. We need to track
* this so that we can know when the IO helper has finished
* reading in all the necessary ranges of the folio and can end
* the read.
*
* Increase ->read_bytes_pending by the folio size to start, and
* add a +1 bias. We'll subtract the bias and any uptodate /
* zeroed ranges that did not require IO in iomap_read_end()
* after we're done processing the folio.
*
* We do this because otherwise, we would have to increment
* ifs->read_bytes_pending every time a range in the folio needs
* to be read in, which can get expensive since the spinlock
* needs to be held whenever modifying ifs->read_bytes_pending.
*
* We add the bias to ensure the read has not been ended on the
* folio when iomap_read_end() is called, even if the IO helper
* has already finished reading in the entire folio.
*/
spin_lock_irq(&ifs->state_lock);
ifs->read_bytes_pending += len + 1;
spin_unlock_irq(&ifs->state_lock);
}
}
/*
* This ends IO if no bytes were submitted to an IO helper.
*
* Otherwise, this calibrates ifs->read_bytes_pending to represent only the
* submitted bytes (see comment in iomap_read_init()). If all bytes submitted
* have already been completed by the IO helper, then this will end the read.
* Else the IO helper will end the read after all submitted ranges have been
* read.
*/
static void iomap_read_end(struct folio *folio, size_t bytes_pending)
{
struct iomap_folio_state *ifs;
/*
* If there are no bytes pending, this means we are responsible for
* unlocking the folio here, since no IO helper has taken ownership of
* it.
*/
if (!bytes_pending) {
folio_unlock(folio);
return;
}
ifs = folio->private;
if (ifs) {
bool end_read, uptodate;
/*
* Subtract any bytes that were initially accounted to
* read_bytes_pending but skipped for IO.
* The +1 accounts for the bias we added in iomap_read_init().
*/
size_t bytes_accounted = folio_size(folio) + 1 -
bytes_pending;
spin_lock_irq(&ifs->state_lock);
ifs->read_bytes_pending -= bytes_accounted;
/*
* If !ifs->read_bytes_pending, this means all pending reads
* by the IO helper have already completed, which means we need
* to end the folio read here. If ifs->read_bytes_pending != 0,
* the IO helper will end the folio read.
*/
end_read = !ifs->read_bytes_pending;
if (end_read)
uptodate = ifs_is_fully_uptodate(folio, ifs);
spin_unlock_irq(&ifs->state_lock);
if (end_read)
folio_end_read(folio, uptodate);
}
}
static int iomap_read_folio_iter(struct iomap_iter *iter,
struct iomap_read_folio_ctx *ctx)
struct iomap_read_folio_ctx *ctx, size_t *bytes_pending)
{
const struct iomap *iomap = &iter->iomap;
loff_t pos = iter->pos;
@@ -460,6 +536,9 @@ static int iomap_read_folio_iter(struct iomap_iter *iter,
folio_zero_range(folio, poff, plen);
iomap_set_range_uptodate(folio, poff, plen);
} else {
if (!*bytes_pending)
iomap_read_init(folio);
*bytes_pending += plen;
iomap_bio_read_folio_range(iter, ctx, pos, plen);
}
@@ -482,17 +561,18 @@ int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops)
struct iomap_read_folio_ctx ctx = {
.cur_folio = folio,
};
size_t bytes_pending = 0;
int ret;
trace_iomap_readpage(iter.inode, 1);
while ((ret = iomap_iter(&iter, ops)) > 0)
iter.status = iomap_read_folio_iter(&iter, &ctx);
iter.status = iomap_read_folio_iter(&iter, &ctx,
&bytes_pending);
iomap_bio_submit_read(&ctx);
if (!ctx.cur_folio_in_bio)
folio_unlock(folio);
iomap_read_end(folio, bytes_pending);
/*
* Just like mpage_readahead and block_read_full_folio, we always
@@ -504,24 +584,23 @@ int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops)
EXPORT_SYMBOL_GPL(iomap_read_folio);
static int iomap_readahead_iter(struct iomap_iter *iter,
struct iomap_read_folio_ctx *ctx)
struct iomap_read_folio_ctx *ctx, size_t *cur_bytes_pending)
{
int ret;
while (iomap_length(iter)) {
if (ctx->cur_folio &&
offset_in_folio(ctx->cur_folio, iter->pos) == 0) {
if (!ctx->cur_folio_in_bio)
folio_unlock(ctx->cur_folio);
iomap_read_end(ctx->cur_folio, *cur_bytes_pending);
ctx->cur_folio = NULL;
}
if (!ctx->cur_folio) {
ctx->cur_folio = readahead_folio(ctx->rac);
if (WARN_ON_ONCE(!ctx->cur_folio))
return -EINVAL;
ctx->cur_folio_in_bio = false;
*cur_bytes_pending = 0;
}
ret = iomap_read_folio_iter(iter, ctx);
ret = iomap_read_folio_iter(iter, ctx, cur_bytes_pending);
if (ret)
return ret;
}
@@ -554,16 +633,18 @@ void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops)
struct iomap_read_folio_ctx ctx = {
.rac = rac,
};
size_t cur_bytes_pending;
trace_iomap_readahead(rac->mapping->host, readahead_count(rac));
while (iomap_iter(&iter, ops) > 0)
iter.status = iomap_readahead_iter(&iter, &ctx);
iter.status = iomap_readahead_iter(&iter, &ctx,
&cur_bytes_pending);
iomap_bio_submit_read(&ctx);
if (ctx.cur_folio && !ctx.cur_folio_in_bio)
folio_unlock(ctx.cur_folio);
if (ctx.cur_folio)
iomap_read_end(ctx.cur_folio, cur_bytes_pending);
}
EXPORT_SYMBOL_GPL(iomap_readahead);