From 62bcbdca0ea9b1add9c22f400b51c56184902053 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 11 Nov 2025 09:11:58 +1030 Subject: [PATCH] btrfs: make btrfs_csum_one_bio() handle bs > ps without large folios For bs > ps cases, all folios passed into btrfs_csum_one_bio() are ensured to be backed by large folios. But that requirement excludes features like direct IO and encoded writes. To support bs > ps without large folios, enhance btrfs_csum_one_bio() by: - Split btrfs_calculate_block_csum() into two versions * btrfs_calculate_block_csum_folio() For call sites where a fs block is always backed by a large folio. This will do extra checks on the folio size, build a paddrs[] array, and pass it into the newer btrfs_calculate_block_csum_pages() helper. For now btrfs_check_block_csum() is still using this version. * btrfs_calculate_block_csum_pages() For call sites that may hit a fs block backed by noncontiguous pages. The pages are represented by paddrs[] array, which includes the offset inside the page. This function will do the proper sub-block handling. - Make btrfs_csum_one_bio() to use btrfs_calculate_block_csum_pages() This means we will need to build a local paddrs[] array, and after filling a fs block, do the checksum calculation. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/btrfs_inode.h | 6 ++-- fs/btrfs/file-item.c | 15 +++++++-- fs/btrfs/inode.c | 73 ++++++++++++++++++++++++++++++------------ 3 files changed, 68 insertions(+), 26 deletions(-) diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index af38d47fc131..00671b724079 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -543,8 +543,10 @@ static inline void btrfs_set_inode_mapping_order(struct btrfs_inode *inode) #endif } -void btrfs_calculate_block_csum(struct btrfs_fs_info *fs_info, phys_addr_t paddr, - u8 *dest); +void btrfs_calculate_block_csum_folio(struct btrfs_fs_info *fs_info, + const phys_addr_t paddr, u8 *dest); +void btrfs_calculate_block_csum_pages(struct btrfs_fs_info *fs_info, + const phys_addr_t paddrs[], u8 *dest); int btrfs_check_block_csum(struct btrfs_fs_info *fs_info, phys_addr_t paddr, u8 *csum, const u8 * const csum_expected); bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev, diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 72be3ede0edf..e7c219e83ff0 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -775,13 +775,22 @@ static void csum_one_bio(struct btrfs_bio *bbio, struct bvec_iter *src) struct bvec_iter iter = *src; phys_addr_t paddr; const u32 blocksize = fs_info->sectorsize; + const u32 step = min(blocksize, PAGE_SIZE); + const u32 nr_steps = blocksize / step; + phys_addr_t paddrs[BTRFS_MAX_BLOCKSIZE / PAGE_SIZE]; + u32 offset = 0; int index = 0; shash->tfm = fs_info->csum_shash; - btrfs_bio_for_each_block(paddr, bio, &iter, blocksize) { - btrfs_calculate_block_csum(fs_info, paddr, sums->sums + index); - index += fs_info->csum_size; + btrfs_bio_for_each_block(paddr, bio, &iter, step) { + paddrs[(offset / step) % nr_steps] = paddr; + offset += step; + + if (IS_ALIGNED(offset, blocksize)) { + btrfs_calculate_block_csum_pages(fs_info, paddrs, sums->sums + index); + index += fs_info->csum_size; + } } } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5929cc5fc01f..c083a67d0091 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3343,36 +3343,67 @@ int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered) return btrfs_finish_one_ordered(ordered); } -void btrfs_calculate_block_csum(struct btrfs_fs_info *fs_info, phys_addr_t paddr, - u8 *dest) +/* + * Calculate the checksum of an fs block at physical memory address @paddr, + * and save the result to @dest. + * + * The folio containing @paddr must be large enough to contain a full fs block. + */ +void btrfs_calculate_block_csum_folio(struct btrfs_fs_info *fs_info, + const phys_addr_t paddr, u8 *dest) { struct folio *folio = page_folio(phys_to_page(paddr)); const u32 blocksize = fs_info->sectorsize; - SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); + const u32 step = min(blocksize, PAGE_SIZE); + const u32 nr_steps = blocksize / step; + phys_addr_t paddrs[BTRFS_MAX_BLOCKSIZE / PAGE_SIZE]; - shash->tfm = fs_info->csum_shash; /* The full block must be inside the folio. */ ASSERT(offset_in_folio(folio, paddr) + blocksize <= folio_size(folio)); - if (folio_test_partial_kmap(folio)) { - size_t cur = paddr; + for (int i = 0; i < nr_steps; i++) { + u32 pindex = offset_in_folio(folio, paddr + i * step) >> PAGE_SHIFT; - crypto_shash_init(shash); - while (cur < paddr + blocksize) { - void *kaddr; - size_t len = min(paddr + blocksize - cur, - PAGE_SIZE - offset_in_page(cur)); - - kaddr = kmap_local_folio(folio, offset_in_folio(folio, cur)); - crypto_shash_update(shash, kaddr, len); - kunmap_local(kaddr); - cur += len; - } - crypto_shash_final(shash, dest); - } else { - crypto_shash_digest(shash, phys_to_virt(paddr), blocksize, dest); + /* + * For bs <= ps cases, we will only run the loop once, so the offset + * inside the page will only added to paddrs[0]. + * + * For bs > ps cases, the block must be page aligned, thus offset + * inside the page will always be 0. + */ + paddrs[i] = page_to_phys(folio_page(folio, pindex)) + offset_in_page(paddr); } + return btrfs_calculate_block_csum_pages(fs_info, paddrs, dest); } + +/* + * Calculate the checksum of a fs block backed by multiple noncontiguous pages + * at @paddrs[] and save the result to @dest. + * + * The folio containing @paddr must be large enough to contain a full fs block. + */ +void btrfs_calculate_block_csum_pages(struct btrfs_fs_info *fs_info, + const phys_addr_t paddrs[], u8 *dest) +{ + const u32 blocksize = fs_info->sectorsize; + const u32 step = min(blocksize, PAGE_SIZE); + const u32 nr_steps = blocksize / step; + SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); + + shash->tfm = fs_info->csum_shash; + crypto_shash_init(shash); + for (int i = 0; i < nr_steps; i++) { + const phys_addr_t paddr = paddrs[i]; + void *kaddr; + + ASSERT(offset_in_page(paddr) + step <= PAGE_SIZE); + kaddr = kmap_local_page(phys_to_page(paddr)) + offset_in_page(paddr); + crypto_shash_update(shash, kaddr, step); + kunmap_local(kaddr); + } + crypto_shash_final(shash, dest); +} + /* * Verify the checksum for a single sector without any extra action that depend * on the type of I/O. @@ -3382,7 +3413,7 @@ void btrfs_calculate_block_csum(struct btrfs_fs_info *fs_info, phys_addr_t paddr int btrfs_check_block_csum(struct btrfs_fs_info *fs_info, phys_addr_t paddr, u8 *csum, const u8 * const csum_expected) { - btrfs_calculate_block_csum(fs_info, paddr, csum); + btrfs_calculate_block_csum_folio(fs_info, paddr, csum); if (unlikely(memcmp(csum, csum_expected, fs_info->csum_size) != 0)) return -EIO; return 0;