mirror of
https://github.com/torvalds/linux.git
synced 2025-12-07 20:06:24 +00:00
btrfs: qgroup: Fix qgroup reserved space underflow by only freeing reserved ranges
[BUG]
For the following case, btrfs can underflow qgroup reserved space
at an error path:
(Page size 4K, function name without "btrfs_" prefix)
Task A | Task B
----------------------------------------------------------------------
Buffered_write [0, 2K) |
|- check_data_free_space() |
| |- qgroup_reserve_data() |
| Range aligned to page |
| range [0, 4K) <<< |
| 4K bytes reserved <<< |
|- copy pages to page cache |
| Buffered_write [2K, 4K)
| |- check_data_free_space()
| | |- qgroup_reserved_data()
| | Range alinged to page
| | range [0, 4K)
| | Already reserved by A <<<
| | 0 bytes reserved <<<
| |- delalloc_reserve_metadata()
| | And it *FAILED* (Maybe EQUOTA)
| |- free_reserved_data_space()
|- qgroup_free_data()
Range aligned to page range
[0, 4K)
Freeing 4K
(Special thanks to Chandan for the detailed report and analyse)
[CAUSE]
Above Task B is freeing reserved data range [0, 4K) which is actually
reserved by Task A.
And at writeback time, page dirty by Task A will go through writeback
routine, which will free 4K reserved data space at file extent insert
time, causing the qgroup underflow.
[FIX]
For btrfs_qgroup_free_data(), add @reserved parameter to only free
data ranges reserved by previous btrfs_qgroup_reserve_data().
So in above case, Task B will try to free 0 byte, so no underflow.
Reported-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Reviewed-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Tested-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
@@ -345,7 +345,7 @@ out:
|
||||
* And at reserve time, it's always aligned to page size, so
|
||||
* just free one page here.
|
||||
*/
|
||||
btrfs_qgroup_free_data(inode, 0, PAGE_SIZE);
|
||||
btrfs_qgroup_free_data(inode, NULL, 0, PAGE_SIZE);
|
||||
btrfs_free_path(path);
|
||||
btrfs_end_transaction(trans);
|
||||
return ret;
|
||||
@@ -2935,7 +2935,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
|
||||
* space for NOCOW range.
|
||||
* As NOCOW won't cause a new delayed ref, just free the space
|
||||
*/
|
||||
btrfs_qgroup_free_data(inode, ordered_extent->file_offset,
|
||||
btrfs_qgroup_free_data(inode, NULL, ordered_extent->file_offset,
|
||||
ordered_extent->len);
|
||||
btrfs_ordered_update_i_size(inode, 0, ordered_extent);
|
||||
if (nolock)
|
||||
@@ -4794,7 +4794,7 @@ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
|
||||
again:
|
||||
page = find_or_create_page(mapping, index, mask);
|
||||
if (!page) {
|
||||
btrfs_delalloc_release_space(inode,
|
||||
btrfs_delalloc_release_space(inode, data_reserved,
|
||||
round_down(from, blocksize),
|
||||
blocksize);
|
||||
ret = -ENOMEM;
|
||||
@@ -4866,7 +4866,7 @@ again:
|
||||
|
||||
out_unlock:
|
||||
if (ret)
|
||||
btrfs_delalloc_release_space(inode, block_start,
|
||||
btrfs_delalloc_release_space(inode, data_reserved, block_start,
|
||||
blocksize);
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
@@ -5266,7 +5266,7 @@ static void evict_inode_truncate_pages(struct inode *inode)
|
||||
* Note, end is the bytenr of last byte, so we need + 1 here.
|
||||
*/
|
||||
if (state->state & EXTENT_DELALLOC)
|
||||
btrfs_qgroup_free_data(inode, start, end - start + 1);
|
||||
btrfs_qgroup_free_data(inode, NULL, start, end - start + 1);
|
||||
|
||||
clear_extent_bit(io_tree, start, end,
|
||||
EXTENT_LOCKED | EXTENT_DIRTY |
|
||||
@@ -8792,8 +8792,8 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
|
||||
current->journal_info = NULL;
|
||||
if (ret < 0 && ret != -EIOCBQUEUED) {
|
||||
if (dio_data.reserve)
|
||||
btrfs_delalloc_release_space(inode, offset,
|
||||
dio_data.reserve);
|
||||
btrfs_delalloc_release_space(inode, data_reserved,
|
||||
offset, dio_data.reserve);
|
||||
/*
|
||||
* On error we might have left some ordered extents
|
||||
* without submitting corresponding bios for them, so
|
||||
@@ -8808,8 +8808,8 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
|
||||
dio_data.unsubmitted_oe_range_start,
|
||||
false);
|
||||
} else if (ret >= 0 && (size_t)ret < count)
|
||||
btrfs_delalloc_release_space(inode, offset,
|
||||
count - (size_t)ret);
|
||||
btrfs_delalloc_release_space(inode, data_reserved,
|
||||
offset, count - (size_t)ret);
|
||||
}
|
||||
out:
|
||||
if (wakeup)
|
||||
@@ -9008,7 +9008,7 @@ again:
|
||||
* free the entire extent.
|
||||
*/
|
||||
if (PageDirty(page))
|
||||
btrfs_qgroup_free_data(inode, page_start, PAGE_SIZE);
|
||||
btrfs_qgroup_free_data(inode, NULL, page_start, PAGE_SIZE);
|
||||
if (!inode_evicting) {
|
||||
clear_extent_bit(tree, page_start, page_end,
|
||||
EXTENT_LOCKED | EXTENT_DIRTY |
|
||||
@@ -9130,8 +9130,8 @@ again:
|
||||
spin_lock(&BTRFS_I(inode)->lock);
|
||||
BTRFS_I(inode)->outstanding_extents++;
|
||||
spin_unlock(&BTRFS_I(inode)->lock);
|
||||
btrfs_delalloc_release_space(inode, page_start,
|
||||
PAGE_SIZE - reserved_space);
|
||||
btrfs_delalloc_release_space(inode, data_reserved,
|
||||
page_start, PAGE_SIZE - reserved_space);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9187,7 +9187,8 @@ out_unlock:
|
||||
}
|
||||
unlock_page(page);
|
||||
out:
|
||||
btrfs_delalloc_release_space(inode, page_start, reserved_space);
|
||||
btrfs_delalloc_release_space(inode, data_reserved, page_start,
|
||||
reserved_space);
|
||||
out_noreserve:
|
||||
sb_end_pagefault(inode->i_sb);
|
||||
extent_changeset_free(data_reserved);
|
||||
@@ -10557,7 +10558,7 @@ next:
|
||||
btrfs_end_transaction(trans);
|
||||
}
|
||||
if (cur_offset < end)
|
||||
btrfs_free_reserved_data_space(inode, cur_offset,
|
||||
btrfs_free_reserved_data_space(inode, NULL, cur_offset,
|
||||
end - cur_offset + 1);
|
||||
return ret;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user