Merge tag 'vfs-6.19-rc1.writeback' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull writeback updates from Christian Brauner:
 "Features:

   - Allow file systems to increase the minimum writeback chunk size.

     The relatively low minimal writeback size of 4MiB means that
     written back inodes on rotational media are switched a lot. Besides
     introducing additional seeks, this also can lead to extreme file
     fragmentation on zoned devices when a lot of files are cached
     relative to the available writeback bandwidth.

     This adds a superblock field that allows the file system to
     override the default size, and sets it to the zone size for zoned
     XFS.

   - Add logging for slow writeback when it exceeds
     sysctl_hung_task_timeout_secs. This helps identify tasks waiting
     for a long time and pinpoint potential issues. Recording the
     starting jiffies is also useful when debugging a crashed vmcore.

   - Wake up waiting tasks when finishing the writeback of a chunk

  Cleanups:

   - filemap_* writeback interface cleanups.

     Adding filemap_fdatawrite_wbc ended up being a mistake, as all but
     the original btrfs caller should be using better high level
     interfaces instead.

     This series removes all these low-level interfaces, switches btrfs
     to a more specific interface, and cleans up other too low-level
     interfaces. With this the writeback_control that is passed to the
     writeback code is only initialized in three places.

   - Remove __filemap_fdatawrite, __filemap_fdatawrite_range, and
     filemap_fdatawrite_wbc

   - Add filemap_flush_nr helper for btrfs

   - Push struct writeback_control into start_delalloc_inodes in btrfs

   - Rename filemap_fdatawrite_range_kick to filemap_flush_range

   - Stop opencoding filemap_fdatawrite_range in 9p, ocfs2, and mm

   - Make wbc_to_tag() inline and use it in fs"

* tag 'vfs-6.19-rc1.writeback' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  fs: Make wbc_to_tag() inline and use it in fs.
  xfs: set s_min_writeback_pages for zoned file systems
  writeback: allow the file system to override MIN_WRITEBACK_PAGES
  writeback: cleanup writeback_chunk_size
  mm: rename filemap_fdatawrite_range_kick to filemap_flush_range
  mm: remove __filemap_fdatawrite_range
  mm: remove filemap_fdatawrite_wbc
  mm: remove __filemap_fdatawrite
  mm,btrfs: add a filemap_flush_nr helper
  btrfs: push struct writeback_control into start_delalloc_inodes
  btrfs: use the local tmp_inode variable in start_delalloc_inodes
  ocfs2: don't opencode filemap_fdatawrite_range in ocfs2_journal_submit_inode_data_buffers
  9p: don't opencode filemap_fdatawrite_range in v9fs_mmap_vm_close
  mm: don't opencode filemap_fdatawrite_range in filemap_invalidate_inode
  writeback: Add logging for slow writeback (exceeds sysctl_hung_task_timeout_secs)
  writeback: Wake up waiting tasks when finishing the writeback of a chunk.
This commit is contained in:
Linus Torvalds
2025-12-01 09:20:51 -08:00
19 changed files with 157 additions and 187 deletions

View File

@@ -111,8 +111,7 @@ int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
spin_unlock(&file->f_lock);
break;
case POSIX_FADV_DONTNEED:
__filemap_fdatawrite_range(mapping, offset, endbyte,
WB_SYNC_NONE);
filemap_flush_range(mapping, offset, endbyte);
/*
* First and last FULL page! Partial pages are deliberately

View File

@@ -366,83 +366,60 @@ static int filemap_check_and_keep_errors(struct address_space *mapping)
return 0;
}
/**
* filemap_fdatawrite_wbc - start writeback on mapping dirty pages in range
* @mapping: address space structure to write
* @wbc: the writeback_control controlling the writeout
*
* Call writepages on the mapping using the provided wbc to control the
* writeout.
*
* Return: %0 on success, negative error code otherwise.
*/
int filemap_fdatawrite_wbc(struct address_space *mapping,
struct writeback_control *wbc)
static int filemap_writeback(struct address_space *mapping, loff_t start,
loff_t end, enum writeback_sync_modes sync_mode,
long *nr_to_write)
{
struct writeback_control wbc = {
.sync_mode = sync_mode,
.nr_to_write = nr_to_write ? *nr_to_write : LONG_MAX,
.range_start = start,
.range_end = end,
};
int ret;
if (!mapping_can_writeback(mapping) ||
!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
return 0;
wbc_attach_fdatawrite_inode(wbc, mapping->host);
ret = do_writepages(mapping, wbc);
wbc_detach_inode(wbc);
wbc_attach_fdatawrite_inode(&wbc, mapping->host);
ret = do_writepages(mapping, &wbc);
wbc_detach_inode(&wbc);
if (!ret && nr_to_write)
*nr_to_write = wbc.nr_to_write;
return ret;
}
EXPORT_SYMBOL(filemap_fdatawrite_wbc);
/**
* __filemap_fdatawrite_range - start writeback on mapping dirty pages in range
* filemap_fdatawrite_range - start writeback on mapping dirty pages in range
* @mapping: address space structure to write
* @start: offset in bytes where the range starts
* @end: offset in bytes where the range ends (inclusive)
* @sync_mode: enable synchronous operation
*
* Start writeback against all of a mapping's dirty pages that lie
* within the byte offsets <start, end> inclusive.
*
* If sync_mode is WB_SYNC_ALL then this is a "data integrity" operation, as
* opposed to a regular memory cleansing writeback. The difference between
* these two operations is that if a dirty page/buffer is encountered, it must
* be waited upon, and not just skipped over.
* This is a data integrity operation that waits upon dirty or in writeback
* pages.
*
* Return: %0 on success, negative error code otherwise.
*/
int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
loff_t end, int sync_mode)
{
struct writeback_control wbc = {
.sync_mode = sync_mode,
.nr_to_write = LONG_MAX,
.range_start = start,
.range_end = end,
};
return filemap_fdatawrite_wbc(mapping, &wbc);
}
static inline int __filemap_fdatawrite(struct address_space *mapping,
int sync_mode)
{
return __filemap_fdatawrite_range(mapping, 0, LLONG_MAX, sync_mode);
}
int filemap_fdatawrite(struct address_space *mapping)
{
return __filemap_fdatawrite(mapping, WB_SYNC_ALL);
}
EXPORT_SYMBOL(filemap_fdatawrite);
int filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
loff_t end)
loff_t end)
{
return __filemap_fdatawrite_range(mapping, start, end, WB_SYNC_ALL);
return filemap_writeback(mapping, start, end, WB_SYNC_ALL, NULL);
}
EXPORT_SYMBOL(filemap_fdatawrite_range);
int filemap_fdatawrite(struct address_space *mapping)
{
return filemap_fdatawrite_range(mapping, 0, LLONG_MAX);
}
EXPORT_SYMBOL(filemap_fdatawrite);
/**
* filemap_fdatawrite_range_kick - start writeback on a range
* filemap_flush_range - start writeback on a range
* @mapping: target address_space
* @start: index to start writeback on
* @end: last (inclusive) index for writeback
@@ -452,12 +429,12 @@ EXPORT_SYMBOL(filemap_fdatawrite_range);
*
* Return: %0 on success, negative error code otherwise.
*/
int filemap_fdatawrite_range_kick(struct address_space *mapping, loff_t start,
int filemap_flush_range(struct address_space *mapping, loff_t start,
loff_t end)
{
return __filemap_fdatawrite_range(mapping, start, end, WB_SYNC_NONE);
return filemap_writeback(mapping, start, end, WB_SYNC_NONE, NULL);
}
EXPORT_SYMBOL_GPL(filemap_fdatawrite_range_kick);
EXPORT_SYMBOL_GPL(filemap_flush_range);
/**
* filemap_flush - mostly a non-blocking flush
@@ -470,10 +447,22 @@ EXPORT_SYMBOL_GPL(filemap_fdatawrite_range_kick);
*/
int filemap_flush(struct address_space *mapping)
{
return __filemap_fdatawrite(mapping, WB_SYNC_NONE);
return filemap_flush_range(mapping, 0, LLONG_MAX);
}
EXPORT_SYMBOL(filemap_flush);
/*
* Start writeback on @nr_to_write pages from @mapping. No one but the existing
* btrfs caller should be using this. Talk to linux-mm if you think adding a
* new caller is a good idea.
*/
int filemap_flush_nr(struct address_space *mapping, long *nr_to_write)
{
return filemap_writeback(mapping, 0, LLONG_MAX, WB_SYNC_NONE,
nr_to_write);
}
EXPORT_SYMBOL_FOR_MODULES(filemap_flush_nr, "btrfs");
/**
* filemap_range_has_page - check if a page exists in range.
* @mapping: address space within which to check
@@ -691,8 +680,7 @@ int filemap_write_and_wait_range(struct address_space *mapping,
return 0;
if (mapping_needs_writeback(mapping)) {
err = __filemap_fdatawrite_range(mapping, lstart, lend,
WB_SYNC_ALL);
err = filemap_fdatawrite_range(mapping, lstart, lend);
/*
* Even if the above returned error, the pages may be
* written partially (e.g. -ENOSPC), so we wait for it.
@@ -794,8 +782,7 @@ int file_write_and_wait_range(struct file *file, loff_t lstart, loff_t lend)
return 0;
if (mapping_needs_writeback(mapping)) {
err = __filemap_fdatawrite_range(mapping, lstart, lend,
WB_SYNC_ALL);
err = filemap_fdatawrite_range(mapping, lstart, lend);
/* See comment of filemap_write_and_wait() */
if (err != -EIO)
__filemap_fdatawait_range(mapping, lstart, lend);
@@ -4528,16 +4515,8 @@ int filemap_invalidate_inode(struct inode *inode, bool flush,
unmap_mapping_pages(mapping, first, nr, false);
/* Write back the data if we're asked to. */
if (flush) {
struct writeback_control wbc = {
.sync_mode = WB_SYNC_ALL,
.nr_to_write = LONG_MAX,
.range_start = start,
.range_end = end,
};
filemap_fdatawrite_wbc(mapping, &wbc);
}
if (flush)
filemap_fdatawrite_range(mapping, start, end);
/* Wait for writeback to complete on all folios and discard. */
invalidate_inode_pages2_range(mapping, start / PAGE_SIZE, end / PAGE_SIZE);

View File

@@ -2434,12 +2434,6 @@ static bool folio_prepare_writeback(struct address_space *mapping,
return true;
}
static xa_mark_t wbc_to_tag(struct writeback_control *wbc)
{
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
return PAGECACHE_TAG_TOWRITE;
return PAGECACHE_TAG_DIRTY;
}
static pgoff_t wbc_end(struct writeback_control *wbc)
{