btrfs: qgroup: Fix qgroup reserved space underflow by only freeing reserved ranges

[BUG]
For the following case, btrfs can underflow qgroup reserved space
at an error path:
(Page size 4K, function name without "btrfs_" prefix)

         Task A                  |             Task B
----------------------------------------------------------------------
Buffered_write [0, 2K)           |
|- check_data_free_space()       |
|  |- qgroup_reserve_data()      |
|     Range aligned to page      |
|     range [0, 4K)          <<< |
|     4K bytes reserved      <<< |
|- copy pages to page cache      |
                                 | Buffered_write [2K, 4K)
                                 | |- check_data_free_space()
                                 | |  |- qgroup_reserved_data()
                                 | |     Range alinged to page
                                 | |     range [0, 4K)
                                 | |     Already reserved by A <<<
                                 | |     0 bytes reserved      <<<
                                 | |- delalloc_reserve_metadata()
                                 | |  And it *FAILED* (Maybe EQUOTA)
                                 | |- free_reserved_data_space()
                                      |- qgroup_free_data()
                                         Range aligned to page range
                                         [0, 4K)
                                         Freeing 4K
(Special thanks to Chandan for the detailed report and analyse)

[CAUSE]
Above Task B is freeing reserved data range [0, 4K) which is actually
reserved by Task A.

And at writeback time, page dirty by Task A will go through writeback
routine, which will free 4K reserved data space at file extent insert
time, causing the qgroup underflow.

[FIX]
For btrfs_qgroup_free_data(), add @reserved parameter to only free
data ranges reserved by previous btrfs_qgroup_reserve_data().
So in above case, Task B will try to free 0 byte, so no underflow.

Reported-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Reviewed-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Tested-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
Qu Wenruo
2017-02-27 15:10:39 +08:00
committed by David Sterba
parent 364ecf3651
commit bc42bda223
8 changed files with 117 additions and 46 deletions

View File

@@ -345,7 +345,7 @@ out:
* And at reserve time, it's always aligned to page size, so
* just free one page here.
*/
btrfs_qgroup_free_data(inode, 0, PAGE_SIZE);
btrfs_qgroup_free_data(inode, NULL, 0, PAGE_SIZE);
btrfs_free_path(path);
btrfs_end_transaction(trans);
return ret;
@@ -2935,7 +2935,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
* space for NOCOW range.
* As NOCOW won't cause a new delayed ref, just free the space
*/
btrfs_qgroup_free_data(inode, ordered_extent->file_offset,
btrfs_qgroup_free_data(inode, NULL, ordered_extent->file_offset,
ordered_extent->len);
btrfs_ordered_update_i_size(inode, 0, ordered_extent);
if (nolock)
@@ -4794,7 +4794,7 @@ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
again:
page = find_or_create_page(mapping, index, mask);
if (!page) {
btrfs_delalloc_release_space(inode,
btrfs_delalloc_release_space(inode, data_reserved,
round_down(from, blocksize),
blocksize);
ret = -ENOMEM;
@@ -4866,7 +4866,7 @@ again:
out_unlock:
if (ret)
btrfs_delalloc_release_space(inode, block_start,
btrfs_delalloc_release_space(inode, data_reserved, block_start,
blocksize);
unlock_page(page);
put_page(page);
@@ -5266,7 +5266,7 @@ static void evict_inode_truncate_pages(struct inode *inode)
* Note, end is the bytenr of last byte, so we need + 1 here.
*/
if (state->state & EXTENT_DELALLOC)
btrfs_qgroup_free_data(inode, start, end - start + 1);
btrfs_qgroup_free_data(inode, NULL, start, end - start + 1);
clear_extent_bit(io_tree, start, end,
EXTENT_LOCKED | EXTENT_DIRTY |
@@ -8792,8 +8792,8 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
current->journal_info = NULL;
if (ret < 0 && ret != -EIOCBQUEUED) {
if (dio_data.reserve)
btrfs_delalloc_release_space(inode, offset,
dio_data.reserve);
btrfs_delalloc_release_space(inode, data_reserved,
offset, dio_data.reserve);
/*
* On error we might have left some ordered extents
* without submitting corresponding bios for them, so
@@ -8808,8 +8808,8 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
dio_data.unsubmitted_oe_range_start,
false);
} else if (ret >= 0 && (size_t)ret < count)
btrfs_delalloc_release_space(inode, offset,
count - (size_t)ret);
btrfs_delalloc_release_space(inode, data_reserved,
offset, count - (size_t)ret);
}
out:
if (wakeup)
@@ -9008,7 +9008,7 @@ again:
* free the entire extent.
*/
if (PageDirty(page))
btrfs_qgroup_free_data(inode, page_start, PAGE_SIZE);
btrfs_qgroup_free_data(inode, NULL, page_start, PAGE_SIZE);
if (!inode_evicting) {
clear_extent_bit(tree, page_start, page_end,
EXTENT_LOCKED | EXTENT_DIRTY |
@@ -9130,8 +9130,8 @@ again:
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents++;
spin_unlock(&BTRFS_I(inode)->lock);
btrfs_delalloc_release_space(inode, page_start,
PAGE_SIZE - reserved_space);
btrfs_delalloc_release_space(inode, data_reserved,
page_start, PAGE_SIZE - reserved_space);
}
}
@@ -9187,7 +9187,8 @@ out_unlock:
}
unlock_page(page);
out:
btrfs_delalloc_release_space(inode, page_start, reserved_space);
btrfs_delalloc_release_space(inode, data_reserved, page_start,
reserved_space);
out_noreserve:
sb_end_pagefault(inode->i_sb);
extent_changeset_free(data_reserved);
@@ -10557,7 +10558,7 @@ next:
btrfs_end_transaction(trans);
}
if (cur_offset < end)
btrfs_free_reserved_data_space(inode, cur_offset,
btrfs_free_reserved_data_space(inode, NULL, cur_offset,
end - cur_offset + 1);
return ret;
}