mirror of
https://github.com/torvalds/linux.git
synced 2025-12-07 20:06:24 +00:00
net_sched: add Qdisc_read_mostly and Qdisc_write groups
It is possible to reorg Qdisc to avoid always dirtying 2 cache lines in
fast path by reducing this to a single dirtied cache line.
In current layout, we change only four/six fields in the first cache line:
- q.spinlock
- q.qlen
- bstats.bytes
- bstats.packets
- some Qdisc also change q.next/q.prev
In the second cache line we change in the fast path:
- running
- state
- qstats.backlog
/* --- cacheline 2 boundary (128 bytes) --- */
struct sk_buff_head gso_skb __attribute__((__aligned__(64))); /* 0x80 0x18 */
struct qdisc_skb_head q; /* 0x98 0x18 */
struct gnet_stats_basic_sync bstats __attribute__((__aligned__(16))); /* 0xb0 0x10 */
/* --- cacheline 3 boundary (192 bytes) --- */
struct gnet_stats_queue qstats; /* 0xc0 0x14 */
bool running; /* 0xd4 0x1 */
/* XXX 3 bytes hole, try to pack */
unsigned long state; /* 0xd8 0x8 */
struct Qdisc * next_sched; /* 0xe0 0x8 */
struct sk_buff_head skb_bad_txq; /* 0xe8 0x18 */
/* --- cacheline 4 boundary (256 bytes) --- */
Reorganize things to have a first cache line mostly read,
then a mostly written one.
This gives a ~3% increase of performance under tx stress.
Note that there is an additional hole because @qstats now spans over a third cache line.
/* --- cacheline 2 boundary (128 bytes) --- */
__u8 __cacheline_group_begin__Qdisc_read_mostly[0] __attribute__((__aligned__(64))); /* 0x80 0 */
struct sk_buff_head gso_skb; /* 0x80 0x18 */
struct Qdisc * next_sched; /* 0x98 0x8 */
struct sk_buff_head skb_bad_txq; /* 0xa0 0x18 */
__u8 __cacheline_group_end__Qdisc_read_mostly[0]; /* 0xb8 0 */
/* XXX 8 bytes hole, try to pack */
/* --- cacheline 3 boundary (192 bytes) --- */
__u8 __cacheline_group_begin__Qdisc_write[0] __attribute__((__aligned__(64))); /* 0xc0 0 */
struct qdisc_skb_head q; /* 0xc0 0x18 */
unsigned long state; /* 0xd8 0x8 */
struct gnet_stats_basic_sync bstats __attribute__((__aligned__(16))); /* 0xe0 0x10 */
bool running; /* 0xf0 0x1 */
/* XXX 3 bytes hole, try to pack */
struct gnet_stats_queue qstats; /* 0xf4 0x14 */
/* --- cacheline 4 boundary (256 bytes) was 8 bytes ago --- */
__u8 __cacheline_group_end__Qdisc_write[0]; /* 0x108 0 */
/* XXX 56 bytes hole, try to pack */
Signed-off-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20251121083256.674562-8-edumazet@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
This commit is contained in:
committed by
Paolo Abeni
parent
c5d34f4583
commit
ad50d5a3fc
@@ -103,17 +103,24 @@ struct Qdisc {
|
||||
int pad;
|
||||
refcount_t refcnt;
|
||||
|
||||
/*
|
||||
* For performance sake on SMP, we put highly modified fields at the end
|
||||
*/
|
||||
struct sk_buff_head gso_skb ____cacheline_aligned_in_smp;
|
||||
struct qdisc_skb_head q;
|
||||
struct gnet_stats_basic_sync bstats;
|
||||
struct gnet_stats_queue qstats;
|
||||
bool running; /* must be written under qdisc spinlock */
|
||||
unsigned long state;
|
||||
struct Qdisc *next_sched;
|
||||
struct sk_buff_head skb_bad_txq;
|
||||
/* Cache line potentially dirtied in dequeue() or __netif_reschedule(). */
|
||||
__cacheline_group_begin(Qdisc_read_mostly) ____cacheline_aligned;
|
||||
struct sk_buff_head gso_skb;
|
||||
struct Qdisc *next_sched;
|
||||
struct sk_buff_head skb_bad_txq;
|
||||
__cacheline_group_end(Qdisc_read_mostly);
|
||||
|
||||
/* Fields dirtied in dequeue() fast path. */
|
||||
__cacheline_group_begin(Qdisc_write) ____cacheline_aligned;
|
||||
struct qdisc_skb_head q;
|
||||
unsigned long state;
|
||||
struct gnet_stats_basic_sync bstats;
|
||||
bool running; /* must be written under qdisc spinlock */
|
||||
|
||||
/* Note : we only change qstats.backlog in fast path. */
|
||||
struct gnet_stats_queue qstats;
|
||||
__cacheline_group_end(Qdisc_write);
|
||||
|
||||
|
||||
atomic_long_t defer_count ____cacheline_aligned_in_smp;
|
||||
struct llist_head defer_list;
|
||||
|
||||
Reference in New Issue
Block a user