mirror of
https://github.com/torvalds/linux.git
synced 2025-12-07 20:06:24 +00:00
bcache: reduce gc latency by processing less nodes and sleep less time
When bcache device is busy for high I/O loads, there are two methods to reduce the garbage collection latency, - Process less nodes in eac loop of incremental garbage collection in btree_gc_recurse(). - Sleep less time between two full garbage collection in bch_btree_gc(). This patch introduces to hleper routines to provide different garbage collection nodes number and sleep intervel time. - btree_gc_min_nodes() If there is no front end I/O, return 128 nodes to process in each incremental loop, otherwise only 10 nodes are returned. Then front I/O is able to access the btree earlier. - btree_gc_sleep_ms() If there is no synchronized wait for bucket allocation, sleep 100 ms between two incremental GC loop. Othersize only sleep 10 ms before incremental GC loop. Then a faster GC may provide available buckets earlier, to avoid most of bcache working threads from being starved by buckets allocation. The idea is inspired by works from Mingzhe Zou and Robert Pang, but much simpler and the expected behavior is more predictable. Signed-off-by: Coly Li <colyli@fnnas.com> Signed-off-by: Robert Pang <robertpang@google.com> Signed-off-by: Mingzhe Zou <mingzhe.zou@easystack.cn> Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
@@ -399,7 +399,11 @@ long bch_bucket_alloc(struct cache *ca, unsigned int reserve, bool wait)
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
|
||||
mutex_unlock(&ca->set->bucket_lock);
|
||||
|
||||
atomic_inc(&ca->set->bucket_wait_cnt);
|
||||
schedule();
|
||||
atomic_dec(&ca->set->bucket_wait_cnt);
|
||||
|
||||
mutex_lock(&ca->set->bucket_lock);
|
||||
} while (!fifo_pop(&ca->free[RESERVE_NONE], r) &&
|
||||
!fifo_pop(&ca->free[reserve], r));
|
||||
|
||||
@@ -604,6 +604,7 @@ struct cache_set {
|
||||
*/
|
||||
atomic_t prio_blocked;
|
||||
wait_queue_head_t bucket_wait;
|
||||
atomic_t bucket_wait_cnt;
|
||||
|
||||
/*
|
||||
* For any bio we don't skip we subtract the number of sectors from
|
||||
|
||||
@@ -89,8 +89,9 @@
|
||||
* Test module load/unload
|
||||
*/
|
||||
|
||||
#define MAX_GC_TIMES 100
|
||||
#define MIN_GC_NODES 100
|
||||
#define MAX_GC_TIMES_SHIFT 7 /* 128 loops */
|
||||
#define GC_NODES_MIN 10
|
||||
#define GC_SLEEP_MS_MIN 10
|
||||
#define GC_SLEEP_MS 100
|
||||
|
||||
#define PTR_DIRTY_BIT (((uint64_t) 1 << 36))
|
||||
@@ -1578,29 +1579,29 @@ static unsigned int btree_gc_count_keys(struct btree *b)
|
||||
|
||||
static size_t btree_gc_min_nodes(struct cache_set *c)
|
||||
{
|
||||
size_t min_nodes;
|
||||
size_t min_nodes = GC_NODES_MIN;
|
||||
|
||||
/*
|
||||
* Since incremental GC would stop 100ms when front
|
||||
* side I/O comes, so when there are many btree nodes,
|
||||
* if GC only processes constant (100) nodes each time,
|
||||
* GC would last a long time, and the front side I/Os
|
||||
* would run out of the buckets (since no new bucket
|
||||
* can be allocated during GC), and be blocked again.
|
||||
* So GC should not process constant nodes, but varied
|
||||
* nodes according to the number of btree nodes, which
|
||||
* realized by dividing GC into constant(100) times,
|
||||
* so when there are many btree nodes, GC can process
|
||||
* more nodes each time, otherwise, GC will process less
|
||||
* nodes each time (but no less than MIN_GC_NODES)
|
||||
*/
|
||||
min_nodes = c->gc_stats.nodes / MAX_GC_TIMES;
|
||||
if (min_nodes < MIN_GC_NODES)
|
||||
min_nodes = MIN_GC_NODES;
|
||||
if (atomic_read(&c->search_inflight) == 0) {
|
||||
size_t n = c->gc_stats.nodes >> MAX_GC_TIMES_SHIFT;
|
||||
|
||||
if (min_nodes < n)
|
||||
min_nodes = n;
|
||||
}
|
||||
|
||||
return min_nodes;
|
||||
}
|
||||
|
||||
static uint64_t btree_gc_sleep_ms(struct cache_set *c)
|
||||
{
|
||||
uint64_t sleep_ms;
|
||||
|
||||
if (atomic_read(&c->bucket_wait_cnt) > 0)
|
||||
sleep_ms = GC_SLEEP_MS_MIN;
|
||||
else
|
||||
sleep_ms = GC_SLEEP_MS;
|
||||
|
||||
return sleep_ms;
|
||||
}
|
||||
|
||||
static int btree_gc_recurse(struct btree *b, struct btree_op *op,
|
||||
struct closure *writes, struct gc_stat *gc)
|
||||
@@ -1668,8 +1669,7 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op,
|
||||
memmove(r + 1, r, sizeof(r[0]) * (GC_MERGE_NODES - 1));
|
||||
r->b = NULL;
|
||||
|
||||
if (atomic_read(&b->c->search_inflight) &&
|
||||
gc->nodes >= gc->nodes_pre + btree_gc_min_nodes(b->c)) {
|
||||
if (gc->nodes >= (gc->nodes_pre + btree_gc_min_nodes(b->c))) {
|
||||
gc->nodes_pre = gc->nodes;
|
||||
ret = -EAGAIN;
|
||||
break;
|
||||
@@ -1846,8 +1846,8 @@ static void bch_btree_gc(struct cache_set *c)
|
||||
cond_resched();
|
||||
|
||||
if (ret == -EAGAIN)
|
||||
schedule_timeout_interruptible(msecs_to_jiffies
|
||||
(GC_SLEEP_MS));
|
||||
schedule_timeout_interruptible(
|
||||
msecs_to_jiffies(btree_gc_sleep_ms(c)));
|
||||
else if (ret)
|
||||
pr_warn("gc failed!\n");
|
||||
} while (ret && !test_bit(CACHE_SET_IO_DISABLE, &c->flags));
|
||||
|
||||
Reference in New Issue
Block a user