mirror of
https://github.com/torvalds/linux.git
synced 2025-12-07 20:06:24 +00:00
block: unify elevator tags and type xarrays into struct elv_change_ctx
Currently, the nr_hw_queues update path manages two disjoint xarrays — one for elevator tags and another for elevator type — both used during elevator switching. Maintaining these two parallel structures for the same purpose adds unnecessary complexity and potential for mismatched state. This patch unifies both xarrays into a single structure, struct elv_change_ctx, which holds all per-queue elevator change context. A single xarray, named elv_tbl, now maps each queue (q->id) in a tagset to its corresponding elv_change_ctx entry, encapsulating the elevator tags, type and name references. This unification simplifies the code, improves maintainability, and clarifies ownership of per-queue elevator state. Reviewed-by: Ming Lei <ming.lei@redhat.com> Reviewed-by: Yu Kuai <yukuai@fnnas.com> Signed-off-by: Nilay Shroff <nilay@linux.ibm.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
@@ -427,11 +427,11 @@ void blk_mq_free_sched_tags(struct elevator_tags *et,
|
|||||||
kfree(et);
|
kfree(et);
|
||||||
}
|
}
|
||||||
|
|
||||||
void blk_mq_free_sched_tags_batch(struct xarray *et_table,
|
void blk_mq_free_sched_tags_batch(struct xarray *elv_tbl,
|
||||||
struct blk_mq_tag_set *set)
|
struct blk_mq_tag_set *set)
|
||||||
{
|
{
|
||||||
struct request_queue *q;
|
struct request_queue *q;
|
||||||
struct elevator_tags *et;
|
struct elv_change_ctx *ctx;
|
||||||
|
|
||||||
lockdep_assert_held_write(&set->update_nr_hwq_lock);
|
lockdep_assert_held_write(&set->update_nr_hwq_lock);
|
||||||
|
|
||||||
@@ -444,15 +444,49 @@ void blk_mq_free_sched_tags_batch(struct xarray *et_table,
|
|||||||
* concurrently.
|
* concurrently.
|
||||||
*/
|
*/
|
||||||
if (q->elevator) {
|
if (q->elevator) {
|
||||||
et = xa_load(et_table, q->id);
|
ctx = xa_load(elv_tbl, q->id);
|
||||||
if (unlikely(!et))
|
if (!ctx || !ctx->et) {
|
||||||
WARN_ON_ONCE(1);
|
WARN_ON_ONCE(1);
|
||||||
else
|
continue;
|
||||||
blk_mq_free_sched_tags(et, set);
|
}
|
||||||
|
blk_mq_free_sched_tags(ctx->et, set);
|
||||||
|
ctx->et = NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void blk_mq_free_sched_ctx_batch(struct xarray *elv_tbl)
|
||||||
|
{
|
||||||
|
unsigned long i;
|
||||||
|
struct elv_change_ctx *ctx;
|
||||||
|
|
||||||
|
xa_for_each(elv_tbl, i, ctx) {
|
||||||
|
xa_erase(elv_tbl, i);
|
||||||
|
kfree(ctx);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int blk_mq_alloc_sched_ctx_batch(struct xarray *elv_tbl,
|
||||||
|
struct blk_mq_tag_set *set)
|
||||||
|
{
|
||||||
|
struct request_queue *q;
|
||||||
|
struct elv_change_ctx *ctx;
|
||||||
|
|
||||||
|
lockdep_assert_held_write(&set->update_nr_hwq_lock);
|
||||||
|
|
||||||
|
list_for_each_entry(q, &set->tag_list, tag_set_list) {
|
||||||
|
ctx = kzalloc(sizeof(struct elv_change_ctx), GFP_KERNEL);
|
||||||
|
if (!ctx)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
if (xa_insert(elv_tbl, q->id, ctx, GFP_KERNEL)) {
|
||||||
|
kfree(ctx);
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
struct elevator_tags *blk_mq_alloc_sched_tags(struct blk_mq_tag_set *set,
|
struct elevator_tags *blk_mq_alloc_sched_tags(struct blk_mq_tag_set *set,
|
||||||
unsigned int nr_hw_queues, unsigned int nr_requests)
|
unsigned int nr_hw_queues, unsigned int nr_requests)
|
||||||
{
|
{
|
||||||
@@ -497,12 +531,13 @@ out:
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
int blk_mq_alloc_sched_tags_batch(struct xarray *et_table,
|
int blk_mq_alloc_sched_tags_batch(struct xarray *elv_tbl,
|
||||||
struct blk_mq_tag_set *set, unsigned int nr_hw_queues)
|
struct blk_mq_tag_set *set, unsigned int nr_hw_queues)
|
||||||
{
|
{
|
||||||
|
struct elv_change_ctx *ctx;
|
||||||
struct request_queue *q;
|
struct request_queue *q;
|
||||||
struct elevator_tags *et;
|
struct elevator_tags *et;
|
||||||
gfp_t gfp = GFP_NOIO | __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY;
|
int ret = -ENOMEM;
|
||||||
|
|
||||||
lockdep_assert_held_write(&set->update_nr_hwq_lock);
|
lockdep_assert_held_write(&set->update_nr_hwq_lock);
|
||||||
|
|
||||||
@@ -515,26 +550,31 @@ int blk_mq_alloc_sched_tags_batch(struct xarray *et_table,
|
|||||||
* concurrently.
|
* concurrently.
|
||||||
*/
|
*/
|
||||||
if (q->elevator) {
|
if (q->elevator) {
|
||||||
et = blk_mq_alloc_sched_tags(set, nr_hw_queues,
|
ctx = xa_load(elv_tbl, q->id);
|
||||||
blk_mq_default_nr_requests(set));
|
if (WARN_ON_ONCE(!ctx)) {
|
||||||
if (!et)
|
ret = -ENOENT;
|
||||||
goto out_unwind;
|
goto out_unwind;
|
||||||
if (xa_insert(et_table, q->id, et, gfp))
|
}
|
||||||
goto out_free_tags;
|
|
||||||
|
ctx->et = blk_mq_alloc_sched_tags(set, nr_hw_queues,
|
||||||
|
blk_mq_default_nr_requests(set));
|
||||||
|
if (!ctx->et)
|
||||||
|
goto out_unwind;
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
out_free_tags:
|
|
||||||
blk_mq_free_sched_tags(et, set);
|
|
||||||
out_unwind:
|
out_unwind:
|
||||||
list_for_each_entry_continue_reverse(q, &set->tag_list, tag_set_list) {
|
list_for_each_entry_continue_reverse(q, &set->tag_list, tag_set_list) {
|
||||||
if (q->elevator) {
|
if (q->elevator) {
|
||||||
et = xa_load(et_table, q->id);
|
ctx = xa_load(elv_tbl, q->id);
|
||||||
if (et)
|
if (ctx && ctx->et) {
|
||||||
blk_mq_free_sched_tags(et, set);
|
blk_mq_free_sched_tags(ctx->et, set);
|
||||||
|
ctx->et = NULL;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return -ENOMEM;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* caller must have a reference to @e, will grab another one if successful */
|
/* caller must have a reference to @e, will grab another one if successful */
|
||||||
|
|||||||
@@ -27,6 +27,9 @@ struct elevator_tags *blk_mq_alloc_sched_tags(struct blk_mq_tag_set *set,
|
|||||||
unsigned int nr_hw_queues, unsigned int nr_requests);
|
unsigned int nr_hw_queues, unsigned int nr_requests);
|
||||||
int blk_mq_alloc_sched_tags_batch(struct xarray *et_table,
|
int blk_mq_alloc_sched_tags_batch(struct xarray *et_table,
|
||||||
struct blk_mq_tag_set *set, unsigned int nr_hw_queues);
|
struct blk_mq_tag_set *set, unsigned int nr_hw_queues);
|
||||||
|
int blk_mq_alloc_sched_ctx_batch(struct xarray *elv_tbl,
|
||||||
|
struct blk_mq_tag_set *set);
|
||||||
|
void blk_mq_free_sched_ctx_batch(struct xarray *elv_tbl);
|
||||||
void blk_mq_free_sched_tags(struct elevator_tags *et,
|
void blk_mq_free_sched_tags(struct elevator_tags *et,
|
||||||
struct blk_mq_tag_set *set);
|
struct blk_mq_tag_set *set);
|
||||||
void blk_mq_free_sched_tags_batch(struct xarray *et_table,
|
void blk_mq_free_sched_tags_batch(struct xarray *et_table,
|
||||||
|
|||||||
@@ -4989,27 +4989,28 @@ struct elevator_tags *blk_mq_update_nr_requests(struct request_queue *q,
|
|||||||
* Switch back to the elevator type stored in the xarray.
|
* Switch back to the elevator type stored in the xarray.
|
||||||
*/
|
*/
|
||||||
static void blk_mq_elv_switch_back(struct request_queue *q,
|
static void blk_mq_elv_switch_back(struct request_queue *q,
|
||||||
struct xarray *elv_tbl, struct xarray *et_tbl)
|
struct xarray *elv_tbl)
|
||||||
{
|
{
|
||||||
struct elevator_type *e = xa_load(elv_tbl, q->id);
|
struct elv_change_ctx *ctx = xa_load(elv_tbl, q->id);
|
||||||
struct elevator_tags *t = xa_load(et_tbl, q->id);
|
|
||||||
|
if (WARN_ON_ONCE(!ctx))
|
||||||
|
return;
|
||||||
|
|
||||||
/* The elv_update_nr_hw_queues unfreezes the queue. */
|
/* The elv_update_nr_hw_queues unfreezes the queue. */
|
||||||
elv_update_nr_hw_queues(q, e, t);
|
elv_update_nr_hw_queues(q, ctx);
|
||||||
|
|
||||||
/* Drop the reference acquired in blk_mq_elv_switch_none. */
|
/* Drop the reference acquired in blk_mq_elv_switch_none. */
|
||||||
if (e)
|
if (ctx->type)
|
||||||
elevator_put(e);
|
elevator_put(ctx->type);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Stores elevator type in xarray and set current elevator to none. It uses
|
* Stores elevator name and type in ctx and set current elevator to none.
|
||||||
* q->id as an index to store the elevator type into the xarray.
|
|
||||||
*/
|
*/
|
||||||
static int blk_mq_elv_switch_none(struct request_queue *q,
|
static int blk_mq_elv_switch_none(struct request_queue *q,
|
||||||
struct xarray *elv_tbl)
|
struct xarray *elv_tbl)
|
||||||
{
|
{
|
||||||
int ret = 0;
|
struct elv_change_ctx *ctx;
|
||||||
|
|
||||||
lockdep_assert_held_write(&q->tag_set->update_nr_hwq_lock);
|
lockdep_assert_held_write(&q->tag_set->update_nr_hwq_lock);
|
||||||
|
|
||||||
@@ -5021,10 +5022,11 @@ static int blk_mq_elv_switch_none(struct request_queue *q,
|
|||||||
* can't run concurrently.
|
* can't run concurrently.
|
||||||
*/
|
*/
|
||||||
if (q->elevator) {
|
if (q->elevator) {
|
||||||
|
ctx = xa_load(elv_tbl, q->id);
|
||||||
|
if (WARN_ON_ONCE(!ctx))
|
||||||
|
return -ENOENT;
|
||||||
|
|
||||||
ret = xa_insert(elv_tbl, q->id, q->elevator->type, GFP_KERNEL);
|
ctx->name = q->elevator->type->elevator_name;
|
||||||
if (WARN_ON_ONCE(ret))
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Before we switch elevator to 'none', take a reference to
|
* Before we switch elevator to 'none', take a reference to
|
||||||
@@ -5035,9 +5037,14 @@ static int blk_mq_elv_switch_none(struct request_queue *q,
|
|||||||
*/
|
*/
|
||||||
__elevator_get(q->elevator->type);
|
__elevator_get(q->elevator->type);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Store elevator type so that we can release the reference
|
||||||
|
* taken above later.
|
||||||
|
*/
|
||||||
|
ctx->type = q->elevator->type;
|
||||||
elevator_set_none(q);
|
elevator_set_none(q);
|
||||||
}
|
}
|
||||||
return ret;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
|
static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
|
||||||
@@ -5047,7 +5054,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
|
|||||||
int prev_nr_hw_queues = set->nr_hw_queues;
|
int prev_nr_hw_queues = set->nr_hw_queues;
|
||||||
unsigned int memflags;
|
unsigned int memflags;
|
||||||
int i;
|
int i;
|
||||||
struct xarray elv_tbl, et_tbl;
|
struct xarray elv_tbl;
|
||||||
bool queues_frozen = false;
|
bool queues_frozen = false;
|
||||||
|
|
||||||
lockdep_assert_held(&set->tag_list_lock);
|
lockdep_assert_held(&set->tag_list_lock);
|
||||||
@@ -5061,11 +5068,12 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
|
|||||||
|
|
||||||
memflags = memalloc_noio_save();
|
memflags = memalloc_noio_save();
|
||||||
|
|
||||||
xa_init(&et_tbl);
|
|
||||||
if (blk_mq_alloc_sched_tags_batch(&et_tbl, set, nr_hw_queues) < 0)
|
|
||||||
goto out_memalloc_restore;
|
|
||||||
|
|
||||||
xa_init(&elv_tbl);
|
xa_init(&elv_tbl);
|
||||||
|
if (blk_mq_alloc_sched_ctx_batch(&elv_tbl, set) < 0)
|
||||||
|
goto out_free_ctx;
|
||||||
|
|
||||||
|
if (blk_mq_alloc_sched_tags_batch(&elv_tbl, set, nr_hw_queues) < 0)
|
||||||
|
goto out_free_ctx;
|
||||||
|
|
||||||
list_for_each_entry(q, &set->tag_list, tag_set_list) {
|
list_for_each_entry(q, &set->tag_list, tag_set_list) {
|
||||||
blk_mq_debugfs_unregister_hctxs(q);
|
blk_mq_debugfs_unregister_hctxs(q);
|
||||||
@@ -5111,7 +5119,7 @@ switch_back:
|
|||||||
/* switch_back expects queue to be frozen */
|
/* switch_back expects queue to be frozen */
|
||||||
if (!queues_frozen)
|
if (!queues_frozen)
|
||||||
blk_mq_freeze_queue_nomemsave(q);
|
blk_mq_freeze_queue_nomemsave(q);
|
||||||
blk_mq_elv_switch_back(q, &elv_tbl, &et_tbl);
|
blk_mq_elv_switch_back(q, &elv_tbl);
|
||||||
}
|
}
|
||||||
|
|
||||||
list_for_each_entry(q, &set->tag_list, tag_set_list) {
|
list_for_each_entry(q, &set->tag_list, tag_set_list) {
|
||||||
@@ -5122,9 +5130,9 @@ switch_back:
|
|||||||
blk_mq_add_hw_queues_cpuhp(q);
|
blk_mq_add_hw_queues_cpuhp(q);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
out_free_ctx:
|
||||||
|
blk_mq_free_sched_ctx_batch(&elv_tbl);
|
||||||
xa_destroy(&elv_tbl);
|
xa_destroy(&elv_tbl);
|
||||||
xa_destroy(&et_tbl);
|
|
||||||
out_memalloc_restore:
|
|
||||||
memalloc_noio_restore(memflags);
|
memalloc_noio_restore(memflags);
|
||||||
|
|
||||||
/* Free the excess tags when nr_hw_queues shrink. */
|
/* Free the excess tags when nr_hw_queues shrink. */
|
||||||
|
|||||||
@@ -11,8 +11,7 @@
|
|||||||
#include <xen/xen.h>
|
#include <xen/xen.h>
|
||||||
#include "blk-crypto-internal.h"
|
#include "blk-crypto-internal.h"
|
||||||
|
|
||||||
struct elevator_type;
|
struct elv_change_ctx;
|
||||||
struct elevator_tags;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Default upper limit for the software max_sectors limit used for regular I/Os.
|
* Default upper limit for the software max_sectors limit used for regular I/Os.
|
||||||
@@ -333,8 +332,8 @@ bool blk_bio_list_merge(struct request_queue *q, struct list_head *list,
|
|||||||
|
|
||||||
bool blk_insert_flush(struct request *rq);
|
bool blk_insert_flush(struct request *rq);
|
||||||
|
|
||||||
void elv_update_nr_hw_queues(struct request_queue *q, struct elevator_type *e,
|
void elv_update_nr_hw_queues(struct request_queue *q,
|
||||||
struct elevator_tags *t);
|
struct elv_change_ctx *ctx);
|
||||||
void elevator_set_default(struct request_queue *q);
|
void elevator_set_default(struct request_queue *q);
|
||||||
void elevator_set_none(struct request_queue *q);
|
void elevator_set_none(struct request_queue *q);
|
||||||
|
|
||||||
|
|||||||
@@ -45,19 +45,6 @@
|
|||||||
#include "blk-wbt.h"
|
#include "blk-wbt.h"
|
||||||
#include "blk-cgroup.h"
|
#include "blk-cgroup.h"
|
||||||
|
|
||||||
/* Holding context data for changing elevator */
|
|
||||||
struct elv_change_ctx {
|
|
||||||
const char *name;
|
|
||||||
bool no_uevent;
|
|
||||||
|
|
||||||
/* for unregistering old elevator */
|
|
||||||
struct elevator_queue *old;
|
|
||||||
/* for registering new elevator */
|
|
||||||
struct elevator_queue *new;
|
|
||||||
/* holds sched tags data */
|
|
||||||
struct elevator_tags *et;
|
|
||||||
};
|
|
||||||
|
|
||||||
static DEFINE_SPINLOCK(elv_list_lock);
|
static DEFINE_SPINLOCK(elv_list_lock);
|
||||||
static LIST_HEAD(elv_list);
|
static LIST_HEAD(elv_list);
|
||||||
|
|
||||||
@@ -706,32 +693,28 @@ static int elevator_change(struct request_queue *q, struct elv_change_ctx *ctx)
|
|||||||
* The I/O scheduler depends on the number of hardware queues, this forces a
|
* The I/O scheduler depends on the number of hardware queues, this forces a
|
||||||
* reattachment when nr_hw_queues changes.
|
* reattachment when nr_hw_queues changes.
|
||||||
*/
|
*/
|
||||||
void elv_update_nr_hw_queues(struct request_queue *q, struct elevator_type *e,
|
void elv_update_nr_hw_queues(struct request_queue *q,
|
||||||
struct elevator_tags *t)
|
struct elv_change_ctx *ctx)
|
||||||
{
|
{
|
||||||
struct blk_mq_tag_set *set = q->tag_set;
|
struct blk_mq_tag_set *set = q->tag_set;
|
||||||
struct elv_change_ctx ctx = {};
|
|
||||||
int ret = -ENODEV;
|
int ret = -ENODEV;
|
||||||
|
|
||||||
WARN_ON_ONCE(q->mq_freeze_depth == 0);
|
WARN_ON_ONCE(q->mq_freeze_depth == 0);
|
||||||
|
|
||||||
if (e && !blk_queue_dying(q) && blk_queue_registered(q)) {
|
if (ctx->type && !blk_queue_dying(q) && blk_queue_registered(q)) {
|
||||||
ctx.name = e->elevator_name;
|
|
||||||
ctx.et = t;
|
|
||||||
|
|
||||||
mutex_lock(&q->elevator_lock);
|
mutex_lock(&q->elevator_lock);
|
||||||
/* force to reattach elevator after nr_hw_queue is updated */
|
/* force to reattach elevator after nr_hw_queue is updated */
|
||||||
ret = elevator_switch(q, &ctx);
|
ret = elevator_switch(q, ctx);
|
||||||
mutex_unlock(&q->elevator_lock);
|
mutex_unlock(&q->elevator_lock);
|
||||||
}
|
}
|
||||||
blk_mq_unfreeze_queue_nomemrestore(q);
|
blk_mq_unfreeze_queue_nomemrestore(q);
|
||||||
if (!ret)
|
if (!ret)
|
||||||
WARN_ON_ONCE(elevator_change_done(q, &ctx));
|
WARN_ON_ONCE(elevator_change_done(q, ctx));
|
||||||
/*
|
/*
|
||||||
* Free sched tags if it's allocated but we couldn't switch elevator.
|
* Free sched tags if it's allocated but we couldn't switch elevator.
|
||||||
*/
|
*/
|
||||||
if (t && !ctx.new)
|
if (ctx->et && !ctx->new)
|
||||||
blk_mq_free_sched_tags(t, set);
|
blk_mq_free_sched_tags(ctx->et, set);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|||||||
@@ -32,6 +32,21 @@ struct elevator_tags {
|
|||||||
struct blk_mq_tags *tags[];
|
struct blk_mq_tags *tags[];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* Holding context data for changing elevator */
|
||||||
|
struct elv_change_ctx {
|
||||||
|
const char *name;
|
||||||
|
bool no_uevent;
|
||||||
|
|
||||||
|
/* for unregistering old elevator */
|
||||||
|
struct elevator_queue *old;
|
||||||
|
/* for registering new elevator */
|
||||||
|
struct elevator_queue *new;
|
||||||
|
/* store elevator type */
|
||||||
|
struct elevator_type *type;
|
||||||
|
/* holds sched tags data */
|
||||||
|
struct elevator_tags *et;
|
||||||
|
};
|
||||||
|
|
||||||
struct elevator_mq_ops {
|
struct elevator_mq_ops {
|
||||||
int (*init_sched)(struct request_queue *, struct elevator_queue *);
|
int (*init_sched)(struct request_queue *, struct elevator_queue *);
|
||||||
void (*exit_sched)(struct elevator_queue *);
|
void (*exit_sched)(struct elevator_queue *);
|
||||||
|
|||||||
Reference in New Issue
Block a user