mirror of
https://github.com/torvalds/linux.git
synced 2025-12-07 20:06:24 +00:00
ice: implement configurable header split for regular Rx
Add second page_pool for header buffers to each Rx queue and ability to toggle the header split on/off using Ethtool (default to off to match the current behaviour). Unlike idpf, all HW backed up by ice doesn't require any W/As and correctly splits all types of packets as configured: after L4 headers for TCP/UDP/SCTP, after L3 headers for other IPv4/IPv6 frames, after the Ethernet header otherwise (in case of tunneling, same as above, but after innermost headers). This doesn't affect the XSk path as there are no benefits of having it there. Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com> Tested-by: Alexander Nowlin <alexander.nowlin@intel.com> Reviewed-by: Simon Horman <horms@kernel.org> Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
This commit is contained in:
committed by
Tony Nguyen
parent
93f53db9f9
commit
8adfcfd6a2
@@ -351,6 +351,7 @@ struct ice_vsi {
|
||||
u16 num_q_vectors;
|
||||
/* tell if only dynamic irq allocation is allowed */
|
||||
bool irq_dyn_alloc;
|
||||
bool hsplit:1;
|
||||
|
||||
u16 vsi_num; /* HW (absolute) index of this VSI */
|
||||
u16 idx; /* software index in pf->vsi[] */
|
||||
|
||||
@@ -524,8 +524,29 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring)
|
||||
else
|
||||
rlan_ctx.l2tsel = 1;
|
||||
|
||||
rlan_ctx.dtype = ICE_RX_DTYPE_NO_SPLIT;
|
||||
rlan_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_NO_SPLIT;
|
||||
if (ring->hdr_pp) {
|
||||
rlan_ctx.hbuf = ring->rx_hdr_len >> ICE_RLAN_CTX_HBUF_S;
|
||||
rlan_ctx.dtype = ICE_RX_DTYPE_HEADER_SPLIT;
|
||||
|
||||
/*
|
||||
* If the frame is TCP/UDP/SCTP, it will be split by the
|
||||
* payload.
|
||||
* If not, but it's an IPv4/IPv6 frame, it will be split by
|
||||
* the IP header.
|
||||
* If not IP, it will be split by the Ethernet header.
|
||||
*
|
||||
* In any case, the header buffer will never be left empty.
|
||||
*/
|
||||
rlan_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_SPLIT_L2 |
|
||||
ICE_RLAN_RX_HSPLIT_0_SPLIT_IP |
|
||||
ICE_RLAN_RX_HSPLIT_0_SPLIT_TCP_UDP |
|
||||
ICE_RLAN_RX_HSPLIT_0_SPLIT_SCTP;
|
||||
} else {
|
||||
rlan_ctx.hbuf = 0;
|
||||
rlan_ctx.dtype = ICE_RX_DTYPE_NO_SPLIT;
|
||||
rlan_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_NO_SPLIT;
|
||||
}
|
||||
|
||||
rlan_ctx.hsplit_1 = ICE_RLAN_RX_HSPLIT_1_NO_SPLIT;
|
||||
|
||||
/* This controls whether VLAN is stripped from inner headers
|
||||
@@ -581,6 +602,53 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ice_rxq_pp_create(struct ice_rx_ring *rq)
|
||||
{
|
||||
struct libeth_fq fq = {
|
||||
.count = rq->count,
|
||||
.nid = NUMA_NO_NODE,
|
||||
.hsplit = rq->vsi->hsplit,
|
||||
.xdp = ice_is_xdp_ena_vsi(rq->vsi),
|
||||
.buf_len = LIBIE_MAX_RX_BUF_LEN,
|
||||
};
|
||||
int err;
|
||||
|
||||
err = libeth_rx_fq_create(&fq, &rq->q_vector->napi);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
rq->pp = fq.pp;
|
||||
rq->rx_fqes = fq.fqes;
|
||||
rq->truesize = fq.truesize;
|
||||
rq->rx_buf_len = fq.buf_len;
|
||||
|
||||
if (!fq.hsplit)
|
||||
return 0;
|
||||
|
||||
fq = (struct libeth_fq){
|
||||
.count = rq->count,
|
||||
.type = LIBETH_FQE_HDR,
|
||||
.nid = NUMA_NO_NODE,
|
||||
.xdp = ice_is_xdp_ena_vsi(rq->vsi),
|
||||
};
|
||||
|
||||
err = libeth_rx_fq_create(&fq, &rq->q_vector->napi);
|
||||
if (err)
|
||||
goto destroy;
|
||||
|
||||
rq->hdr_pp = fq.pp;
|
||||
rq->hdr_fqes = fq.fqes;
|
||||
rq->hdr_truesize = fq.truesize;
|
||||
rq->rx_hdr_len = fq.buf_len;
|
||||
|
||||
return 0;
|
||||
|
||||
destroy:
|
||||
ice_rxq_pp_destroy(rq);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
/**
|
||||
* ice_vsi_cfg_rxq - Configure an Rx queue
|
||||
* @ring: the ring being configured
|
||||
@@ -589,12 +657,6 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring)
|
||||
*/
|
||||
static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
|
||||
{
|
||||
struct libeth_fq fq = {
|
||||
.count = ring->count,
|
||||
.nid = NUMA_NO_NODE,
|
||||
.xdp = ice_is_xdp_ena_vsi(ring->vsi),
|
||||
.buf_len = LIBIE_MAX_RX_BUF_LEN,
|
||||
};
|
||||
struct device *dev = ice_pf_to_dev(ring->vsi->back);
|
||||
u32 num_bufs = ICE_DESC_UNUSED(ring);
|
||||
u32 rx_buf_len;
|
||||
@@ -636,15 +698,10 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
|
||||
dev_info(dev, "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n",
|
||||
ring->q_index);
|
||||
} else {
|
||||
err = libeth_rx_fq_create(&fq, &ring->q_vector->napi);
|
||||
err = ice_rxq_pp_create(ring);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
ring->pp = fq.pp;
|
||||
ring->rx_fqes = fq.fqes;
|
||||
ring->truesize = fq.truesize;
|
||||
ring->rx_buf_len = fq.buf_len;
|
||||
|
||||
if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) {
|
||||
err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
|
||||
ring->q_index,
|
||||
@@ -699,9 +756,7 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
|
||||
return 0;
|
||||
|
||||
err_destroy_fq:
|
||||
libeth_rx_fq_destroy(&fq);
|
||||
ring->rx_fqes = NULL;
|
||||
ring->pp = NULL;
|
||||
ice_rxq_pp_destroy(ring);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -3151,6 +3151,10 @@ ice_get_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring,
|
||||
ring->rx_jumbo_max_pending = 0;
|
||||
ring->rx_mini_pending = 0;
|
||||
ring->rx_jumbo_pending = 0;
|
||||
|
||||
kernel_ring->tcp_data_split = vsi->hsplit ?
|
||||
ETHTOOL_TCP_DATA_SPLIT_ENABLED :
|
||||
ETHTOOL_TCP_DATA_SPLIT_DISABLED;
|
||||
}
|
||||
|
||||
static int
|
||||
@@ -3167,6 +3171,7 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring,
|
||||
int i, timeout = 50, err = 0;
|
||||
struct ice_hw *hw = &pf->hw;
|
||||
u16 new_rx_cnt, new_tx_cnt;
|
||||
bool hsplit;
|
||||
|
||||
if (ring->tx_pending > ICE_MAX_NUM_DESC_BY_MAC(hw) ||
|
||||
ring->tx_pending < ICE_MIN_NUM_DESC ||
|
||||
@@ -3192,9 +3197,12 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring,
|
||||
netdev_info(netdev, "Requested Rx descriptor count rounded up to %d\n",
|
||||
new_rx_cnt);
|
||||
|
||||
hsplit = kernel_ring->tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED;
|
||||
|
||||
/* if nothing to do return success */
|
||||
if (new_tx_cnt == vsi->tx_rings[0]->count &&
|
||||
new_rx_cnt == vsi->rx_rings[0]->count) {
|
||||
new_rx_cnt == vsi->rx_rings[0]->count &&
|
||||
hsplit == vsi->hsplit) {
|
||||
netdev_dbg(netdev, "Nothing to change, descriptor count is same as requested\n");
|
||||
return 0;
|
||||
}
|
||||
@@ -3224,6 +3232,8 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring,
|
||||
vsi->xdp_rings[i]->count = new_tx_cnt;
|
||||
vsi->num_tx_desc = (u16)new_tx_cnt;
|
||||
vsi->num_rx_desc = (u16)new_rx_cnt;
|
||||
vsi->hsplit = hsplit;
|
||||
|
||||
netdev_dbg(netdev, "Link is down, descriptor count change happens when link is brought up\n");
|
||||
goto done;
|
||||
}
|
||||
@@ -3330,6 +3340,8 @@ rx_unwind:
|
||||
}
|
||||
|
||||
process_link:
|
||||
vsi->hsplit = hsplit;
|
||||
|
||||
/* Bring interface down, copy in the new ring info, then restore the
|
||||
* interface. if VSI is up, bring it down and then back up
|
||||
*/
|
||||
@@ -4811,6 +4823,7 @@ static const struct ethtool_ops ice_ethtool_ops = {
|
||||
ETHTOOL_COALESCE_USE_ADAPTIVE |
|
||||
ETHTOOL_COALESCE_RX_USECS_HIGH,
|
||||
.supported_input_xfrm = RXH_XFRM_SYM_XOR,
|
||||
.supported_ring_params = ETHTOOL_RING_USE_TCP_DATA_SPLIT,
|
||||
.get_link_ksettings = ice_get_link_ksettings,
|
||||
.set_link_ksettings = ice_set_link_ksettings,
|
||||
.get_fec_stats = ice_get_fec_stats,
|
||||
|
||||
@@ -342,6 +342,9 @@ enum ice_flg64_bits {
|
||||
/* for ice_32byte_rx_flex_desc.pkt_length member */
|
||||
#define ICE_RX_FLX_DESC_PKT_LEN_M (0x3FFF) /* 14-bits */
|
||||
|
||||
/* ice_32byte_rx_flex_desc::hdr_len_sph_flex_flags1 */
|
||||
#define ICE_RX_FLEX_DESC_HDR_LEN_M GENMASK(10, 0)
|
||||
|
||||
enum ice_rx_flex_desc_status_error_0_bits {
|
||||
/* Note: These are predefined bit offsets */
|
||||
ICE_RX_FLEX_DESC_STATUS0_DD_S = 0,
|
||||
|
||||
@@ -508,16 +508,34 @@ err:
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
void ice_rxq_pp_destroy(struct ice_rx_ring *rq)
|
||||
{
|
||||
struct libeth_fq fq = {
|
||||
.fqes = rq->rx_fqes,
|
||||
.pp = rq->pp,
|
||||
};
|
||||
|
||||
libeth_rx_fq_destroy(&fq);
|
||||
rq->rx_fqes = NULL;
|
||||
rq->pp = NULL;
|
||||
|
||||
if (!rq->hdr_pp)
|
||||
return;
|
||||
|
||||
fq.fqes = rq->hdr_fqes;
|
||||
fq.pp = rq->hdr_pp;
|
||||
|
||||
libeth_rx_fq_destroy(&fq);
|
||||
rq->hdr_fqes = NULL;
|
||||
rq->hdr_pp = NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* ice_clean_rx_ring - Free Rx buffers
|
||||
* @rx_ring: ring to be cleaned
|
||||
*/
|
||||
void ice_clean_rx_ring(struct ice_rx_ring *rx_ring)
|
||||
{
|
||||
struct libeth_fq fq = {
|
||||
.fqes = rx_ring->rx_fqes,
|
||||
.pp = rx_ring->pp,
|
||||
};
|
||||
u32 size;
|
||||
|
||||
if (rx_ring->xsk_pool) {
|
||||
@@ -533,9 +551,10 @@ void ice_clean_rx_ring(struct ice_rx_ring *rx_ring)
|
||||
|
||||
/* Free all the Rx ring sk_buffs */
|
||||
for (u32 i = rx_ring->next_to_clean; i != rx_ring->next_to_use; ) {
|
||||
const struct libeth_fqe *rx_fqes = &rx_ring->rx_fqes[i];
|
||||
libeth_rx_recycle_slow(rx_ring->rx_fqes[i].netmem);
|
||||
|
||||
libeth_rx_recycle_slow(rx_fqes->netmem);
|
||||
if (rx_ring->hdr_pp)
|
||||
libeth_rx_recycle_slow(rx_ring->hdr_fqes[i].netmem);
|
||||
|
||||
if (unlikely(++i == rx_ring->count))
|
||||
i = 0;
|
||||
@@ -547,12 +566,9 @@ void ice_clean_rx_ring(struct ice_rx_ring *rx_ring)
|
||||
xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
|
||||
}
|
||||
|
||||
libeth_rx_fq_destroy(&fq);
|
||||
rx_ring->rx_fqes = NULL;
|
||||
rx_ring->pp = NULL;
|
||||
ice_rxq_pp_destroy(rx_ring);
|
||||
|
||||
rx_skip_free:
|
||||
|
||||
/* Zero out the descriptor ring */
|
||||
size = ALIGN(rx_ring->count * sizeof(union ice_32byte_rx_desc),
|
||||
PAGE_SIZE);
|
||||
@@ -806,6 +822,12 @@ void ice_init_ctrl_rx_descs(struct ice_rx_ring *rx_ring, u32 count)
|
||||
*/
|
||||
bool ice_alloc_rx_bufs(struct ice_rx_ring *rx_ring, unsigned int cleaned_count)
|
||||
{
|
||||
const struct libeth_fq_fp hdr_fq = {
|
||||
.pp = rx_ring->hdr_pp,
|
||||
.fqes = rx_ring->hdr_fqes,
|
||||
.truesize = rx_ring->hdr_truesize,
|
||||
.count = rx_ring->count,
|
||||
};
|
||||
const struct libeth_fq_fp fq = {
|
||||
.pp = rx_ring->pp,
|
||||
.fqes = rx_ring->rx_fqes,
|
||||
@@ -836,6 +858,20 @@ bool ice_alloc_rx_bufs(struct ice_rx_ring *rx_ring, unsigned int cleaned_count)
|
||||
*/
|
||||
rx_desc->read.pkt_addr = cpu_to_le64(addr);
|
||||
|
||||
if (!hdr_fq.pp)
|
||||
goto next;
|
||||
|
||||
addr = libeth_rx_alloc(&hdr_fq, ntu);
|
||||
if (addr == DMA_MAPPING_ERROR) {
|
||||
rx_ring->ring_stats->rx_stats.alloc_page_failed++;
|
||||
|
||||
libeth_rx_recycle_slow(fq.fqes[ntu].netmem);
|
||||
break;
|
||||
}
|
||||
|
||||
rx_desc->read.hdr_addr = cpu_to_le64(addr);
|
||||
|
||||
next:
|
||||
rx_desc++;
|
||||
ntu++;
|
||||
if (unlikely(ntu == rx_ring->count)) {
|
||||
@@ -933,14 +969,16 @@ static int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
|
||||
unsigned int size;
|
||||
u16 stat_err_bits;
|
||||
u16 vlan_tci;
|
||||
bool rxe;
|
||||
|
||||
/* get the Rx desc from Rx ring based on 'next_to_clean' */
|
||||
rx_desc = ICE_RX_DESC(rx_ring, ntc);
|
||||
|
||||
/* status_error_len will always be zero for unused descriptors
|
||||
* because it's cleared in cleanup, and overlaps with hdr_addr
|
||||
* which is always zero because packet split isn't used, if the
|
||||
* hardware wrote DD then it will be non-zero
|
||||
/*
|
||||
* The DD bit will always be zero for unused descriptors
|
||||
* because it's cleared in cleanup or when setting the DMA
|
||||
* address of the header buffer, which never uses the DD bit.
|
||||
* If the hardware wrote the descriptor, it will be non-zero.
|
||||
*/
|
||||
stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S);
|
||||
if (!ice_test_staterr(rx_desc->wb.status_error0, stat_err_bits))
|
||||
@@ -954,12 +992,27 @@ static int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
|
||||
|
||||
ice_trace(clean_rx_irq, rx_ring, rx_desc);
|
||||
|
||||
stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_HBO_S) |
|
||||
BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S);
|
||||
rxe = ice_test_staterr(rx_desc->wb.status_error0,
|
||||
stat_err_bits);
|
||||
|
||||
if (!rx_ring->hdr_pp)
|
||||
goto payload;
|
||||
|
||||
size = le16_get_bits(rx_desc->wb.hdr_len_sph_flex_flags1,
|
||||
ICE_RX_FLEX_DESC_HDR_LEN_M);
|
||||
if (unlikely(rxe))
|
||||
size = 0;
|
||||
|
||||
rx_buf = &rx_ring->hdr_fqes[ntc];
|
||||
libeth_xdp_process_buff(xdp, rx_buf, size);
|
||||
rx_buf->netmem = 0;
|
||||
|
||||
payload:
|
||||
size = le16_to_cpu(rx_desc->wb.pkt_len) &
|
||||
ICE_RX_FLX_DESC_PKT_LEN_M;
|
||||
|
||||
stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S);
|
||||
if (unlikely(ice_test_staterr(rx_desc->wb.status_error0,
|
||||
stat_err_bits)))
|
||||
if (unlikely(rxe))
|
||||
size = 0;
|
||||
|
||||
/* retrieve a buffer from the ring */
|
||||
|
||||
@@ -255,6 +255,9 @@ struct ice_rx_ring {
|
||||
};
|
||||
|
||||
/* CL2 - 2nd cacheline starts here */
|
||||
struct libeth_fqe *hdr_fqes;
|
||||
struct page_pool *hdr_pp;
|
||||
|
||||
union {
|
||||
struct libeth_xdp_buff_stash xdp;
|
||||
struct libeth_xdp_buff *xsk;
|
||||
@@ -273,6 +276,8 @@ struct ice_rx_ring {
|
||||
/* used in interrupt processing */
|
||||
u16 next_to_use;
|
||||
u16 next_to_clean;
|
||||
|
||||
u32 hdr_truesize;
|
||||
u32 truesize;
|
||||
|
||||
/* stats structs */
|
||||
@@ -284,6 +289,7 @@ struct ice_rx_ring {
|
||||
struct ice_tx_ring *xdp_ring;
|
||||
struct ice_rx_ring *next; /* pointer to next ring in q_vector */
|
||||
struct xsk_buff_pool *xsk_pool;
|
||||
u16 rx_hdr_len;
|
||||
u16 rx_buf_len;
|
||||
dma_addr_t dma; /* physical address of ring */
|
||||
u8 dcb_tc; /* Traffic class of ring */
|
||||
@@ -396,6 +402,7 @@ static inline unsigned int ice_rx_pg_order(struct ice_rx_ring *ring)
|
||||
union ice_32b_rx_flex_desc;
|
||||
|
||||
void ice_init_ctrl_rx_descs(struct ice_rx_ring *rx_ring, u32 num_descs);
|
||||
void ice_rxq_pp_destroy(struct ice_rx_ring *rq);
|
||||
bool ice_alloc_rx_bufs(struct ice_rx_ring *rxr, unsigned int cleaned_count);
|
||||
netdev_tx_t ice_start_xmit(struct sk_buff *skb, struct net_device *netdev);
|
||||
u16
|
||||
|
||||
Reference in New Issue
Block a user