mirror of
https://github.com/torvalds/linux.git
synced 2025-12-07 20:06:24 +00:00
btrfs: introduce a new shutdown state
A new fs state EMERGENCY_SHUTDOWN is introduced, which is btrfs' equivalent of XFS_IOC_GOINGDOWN or EXT4_IOC_SHUTDOWN, after entering emergency shutdown state, all operations will return errors (-EIO), and can not be bring back to normal state until unmouont. The new state will reject the following file operations: - read_iter() - write_iter() - mmap() - open() - remap_file_range() - uring_cmd() - splice_read() This requires a small wrapper to do the extra shutdown check, then call the regular filemap_splice_read() function This should reject most of the file operations on a shutdown btrfs. And for the existing dirty folios, extra shutdown checks are introduced to the following functions: - run_delalloc_nocow() - run_delalloc_compressed() - cow_file_range() So that dirty ranges will still be properly cleaned without being submitted. Finally the shutdown state will also set the fs error, so that no new transaction will be committed, protecting the metadata from any possible further corruption. And when the fs entered shutdown mode for the first time, a critical level kernel message will show up to indicate the incident. That message will be important for end users as rejected delalloc ranges will output error messages, hopefully that shutdown message and the fact that all fs operations are returning error will prevent end users from getting too confused about the delalloc error messages. Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> Reviewed-by: Anand Jain <asj@kernel.org> Tested-by: Anand Jain <asj@kernel.org> Signed-off-by: Qu Wenruo <wqu@suse.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
@@ -1440,6 +1440,8 @@ ssize_t btrfs_do_write_iter(struct kiocb *iocb, struct iov_iter *from,
|
||||
struct btrfs_inode *inode = BTRFS_I(file_inode(file));
|
||||
ssize_t num_written, num_sync;
|
||||
|
||||
if (unlikely(btrfs_is_shutdown(inode->root->fs_info)))
|
||||
return -EIO;
|
||||
/*
|
||||
* If the fs flips readonly due to some impossible error, although we
|
||||
* have opened a file as writable, we have to stop this write operation
|
||||
@@ -2042,6 +2044,8 @@ static int btrfs_file_mmap_prepare(struct vm_area_desc *desc)
|
||||
struct file *filp = desc->file;
|
||||
struct address_space *mapping = filp->f_mapping;
|
||||
|
||||
if (unlikely(btrfs_is_shutdown(inode_to_fs_info(file_inode(filp)))))
|
||||
return -EIO;
|
||||
if (!mapping->a_ops->read_folio)
|
||||
return -ENOEXEC;
|
||||
|
||||
@@ -3111,6 +3115,9 @@ static long btrfs_fallocate(struct file *file, int mode,
|
||||
int blocksize = BTRFS_I(inode)->root->fs_info->sectorsize;
|
||||
int ret;
|
||||
|
||||
if (unlikely(btrfs_is_shutdown(inode_to_fs_info(inode))))
|
||||
return -EIO;
|
||||
|
||||
/* Do not allow fallocate in ZONED mode */
|
||||
if (btrfs_is_zoned(inode_to_fs_info(inode)))
|
||||
return -EOPNOTSUPP;
|
||||
@@ -3802,6 +3809,9 @@ static int btrfs_file_open(struct inode *inode, struct file *filp)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (unlikely(btrfs_is_shutdown(inode_to_fs_info(inode))))
|
||||
return -EIO;
|
||||
|
||||
filp->f_mode |= FMODE_NOWAIT | FMODE_CAN_ODIRECT;
|
||||
|
||||
ret = fsverity_file_open(inode, filp);
|
||||
@@ -3814,6 +3824,9 @@ static ssize_t btrfs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
|
||||
{
|
||||
ssize_t ret = 0;
|
||||
|
||||
if (unlikely(btrfs_is_shutdown(inode_to_fs_info(file_inode(iocb->ki_filp)))))
|
||||
return -EIO;
|
||||
|
||||
if (iocb->ki_flags & IOCB_DIRECT) {
|
||||
ret = btrfs_direct_read(iocb, to);
|
||||
if (ret < 0 || !iov_iter_count(to) ||
|
||||
@@ -3824,10 +3837,20 @@ static ssize_t btrfs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
|
||||
return filemap_read(iocb, to, ret);
|
||||
}
|
||||
|
||||
static ssize_t btrfs_file_splice_read(struct file *in, loff_t *ppos,
|
||||
struct pipe_inode_info *pipe,
|
||||
size_t len, unsigned int flags)
|
||||
{
|
||||
if (unlikely(btrfs_is_shutdown(inode_to_fs_info(file_inode(in)))))
|
||||
return -EIO;
|
||||
|
||||
return filemap_splice_read(in, ppos, pipe, len, flags);
|
||||
}
|
||||
|
||||
const struct file_operations btrfs_file_operations = {
|
||||
.llseek = btrfs_file_llseek,
|
||||
.read_iter = btrfs_file_read_iter,
|
||||
.splice_read = filemap_splice_read,
|
||||
.splice_read = btrfs_file_splice_read,
|
||||
.write_iter = btrfs_file_write_iter,
|
||||
.splice_write = iter_file_splice_write,
|
||||
.mmap_prepare = btrfs_file_mmap_prepare,
|
||||
|
||||
@@ -29,6 +29,7 @@
|
||||
#include "extent-io-tree.h"
|
||||
#include "async-thread.h"
|
||||
#include "block-rsv.h"
|
||||
#include "messages.h"
|
||||
|
||||
struct inode;
|
||||
struct super_block;
|
||||
@@ -124,6 +125,12 @@ enum {
|
||||
/* No more delayed iput can be queued. */
|
||||
BTRFS_FS_STATE_NO_DELAYED_IPUT,
|
||||
|
||||
/*
|
||||
* Emergency shutdown, a step further than transaction aborted by
|
||||
* rejecting all operations.
|
||||
*/
|
||||
BTRFS_FS_STATE_EMERGENCY_SHUTDOWN,
|
||||
|
||||
BTRFS_FS_STATE_COUNT
|
||||
};
|
||||
|
||||
@@ -1120,6 +1127,27 @@ static inline void btrfs_wake_unfinished_drop(struct btrfs_fs_info *fs_info)
|
||||
(unlikely(test_bit(BTRFS_FS_STATE_LOG_CLEANUP_ERROR, \
|
||||
&(fs_info)->fs_state)))
|
||||
|
||||
static inline bool btrfs_is_shutdown(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
return test_bit(BTRFS_FS_STATE_EMERGENCY_SHUTDOWN, &fs_info->fs_state);
|
||||
}
|
||||
|
||||
static inline void btrfs_force_shutdown(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
/*
|
||||
* Here we do not want to use handle_fs_error(), which will mark the fs
|
||||
* read-only.
|
||||
* Some call sites like shutdown ioctl will mark the fs shutdown when
|
||||
* the fs is frozen. But thaw path will handle RO and RW fs
|
||||
* differently.
|
||||
*
|
||||
* So here we only mark the fs error without flipping it RO.
|
||||
*/
|
||||
WRITE_ONCE(fs_info->fs_error, -EIO);
|
||||
if (!test_and_set_bit(BTRFS_FS_STATE_EMERGENCY_SHUTDOWN, &fs_info->fs_state))
|
||||
btrfs_crit(fs_info, "emergency shutdown");
|
||||
}
|
||||
|
||||
/*
|
||||
* We use folio flag owner_2 to indicate there is an ordered extent with
|
||||
* unfinished IO.
|
||||
|
||||
@@ -864,7 +864,7 @@ static void compress_file_range(struct btrfs_work *work)
|
||||
u64 actual_end;
|
||||
u64 i_size;
|
||||
int ret = 0;
|
||||
struct folio **folios;
|
||||
struct folio **folios = NULL;
|
||||
unsigned long nr_folios;
|
||||
unsigned long total_compressed = 0;
|
||||
unsigned long total_in = 0;
|
||||
@@ -873,6 +873,9 @@ static void compress_file_range(struct btrfs_work *work)
|
||||
int compress_type = fs_info->compress_type;
|
||||
int compress_level = fs_info->compress_level;
|
||||
|
||||
if (unlikely(btrfs_is_shutdown(fs_info)))
|
||||
goto cleanup_and_bail_uncompressed;
|
||||
|
||||
inode_should_defrag(inode, start, end, end - start + 1, SZ_16K);
|
||||
|
||||
/*
|
||||
@@ -1288,6 +1291,11 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
|
||||
unsigned long page_ops;
|
||||
int ret = 0;
|
||||
|
||||
if (unlikely(btrfs_is_shutdown(fs_info))) {
|
||||
ret = -EIO;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
if (btrfs_is_free_space_inode(inode)) {
|
||||
ret = -EINVAL;
|
||||
goto out_unlock;
|
||||
@@ -2006,7 +2014,7 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||
struct btrfs_root *root = inode->root;
|
||||
struct btrfs_path *path;
|
||||
struct btrfs_path *path = NULL;
|
||||
u64 cow_start = (u64)-1;
|
||||
/*
|
||||
* If not 0, represents the inclusive end of the last fallback_to_cow()
|
||||
@@ -2036,6 +2044,10 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
|
||||
*/
|
||||
ASSERT(!btrfs_is_zoned(fs_info) || btrfs_is_data_reloc_root(root));
|
||||
|
||||
if (unlikely(btrfs_is_shutdown(fs_info))) {
|
||||
ret = -EIO;
|
||||
goto error;
|
||||
}
|
||||
path = btrfs_alloc_path();
|
||||
if (!path) {
|
||||
ret = -ENOMEM;
|
||||
|
||||
@@ -5077,6 +5077,9 @@ out_acct:
|
||||
|
||||
int btrfs_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
|
||||
{
|
||||
if (unlikely(btrfs_is_shutdown(inode_to_fs_info(file_inode(cmd->file)))))
|
||||
return -EIO;
|
||||
|
||||
switch (cmd->cmd_op) {
|
||||
case BTRFS_IOC_ENCODED_READ:
|
||||
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
|
||||
|
||||
@@ -24,6 +24,7 @@ static const char fs_state_chars[] = {
|
||||
[BTRFS_FS_STATE_NO_DATA_CSUMS] = 'C',
|
||||
[BTRFS_FS_STATE_SKIP_META_CSUMS] = 'S',
|
||||
[BTRFS_FS_STATE_LOG_CLEANUP_ERROR] = 'L',
|
||||
[BTRFS_FS_STATE_EMERGENCY_SHUTDOWN] = 'E',
|
||||
};
|
||||
|
||||
static void btrfs_state_to_string(const struct btrfs_fs_info *info, char *buf)
|
||||
|
||||
@@ -868,6 +868,9 @@ loff_t btrfs_remap_file_range(struct file *src_file, loff_t off,
|
||||
bool same_inode = dst_inode == src_inode;
|
||||
int ret;
|
||||
|
||||
if (unlikely(btrfs_is_shutdown(inode_to_fs_info(file_inode(src_file)))))
|
||||
return -EIO;
|
||||
|
||||
if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
|
||||
return -EINVAL;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user