Merge branch '100GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/next-queue

Tony Nguyen says:

====================
Intel Wired LAN Driver Updates 2025-10-29 (ice, i40e, idpf, ixgbe, igbvf)

For ice:
Michal converts driver to utilize Page Pool and libeth APIs. Conversion
is based on similar changes done for iavf in order to simplify buffer
management, improve maintainability, and increase code reuse across
Intel Ethernet drivers.

Additional details:
https://lore.kernel.org/20250925092253.1306476-1-michal.kubiak@intel.com

Alexander adds support for header split, configurable via ethtool.

Grzegorz allows for use of 100Mbps on E825C SGMII devices.

For i40e:
Jay Vosburgh avoids sending link state changes to VF if it is already in
the requested state.

For idpf:
Sreedevi removes duplicated defines.

For ixgbe:
Alok Tiwari fixes some typos.

For igbvf:
Alok Tiwari fixes output of VLAN warning message.

* '100GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/next-queue:
  igbvf: fix misplaced newline in VLAN add warning message
  ixgbe: fix typos in ixgbe driver comments
  idpf: remove duplicate defines in IDPF_CAP_RSS
  i40e: avoid redundant VF link state updates
  ice: Allow 100M speed for E825C SGMII device
  ice: implement configurable header split for regular Rx
  ice: switch to Page Pool
  ice: drop page splitting and recycling
  ice: remove legacy Rx and construct SKB
====================

Link: https://patch.msgid.link/20251029231218.1277233-1-anthony.l.nguyen@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski
2025-10-30 17:24:56 -07:00
20 changed files with 365 additions and 972 deletions

View File

@@ -296,6 +296,7 @@ config ICE
depends on GNSS || GNSS = n
select AUXILIARY_BUS
select DIMLIB
select LIBETH_XDP
select LIBIE
select LIBIE_ADMINQ
select LIBIE_FWLOG

View File

@@ -4788,6 +4788,7 @@ int i40e_ndo_set_vf_link_state(struct net_device *netdev, int vf_id, int link)
unsigned long q_map;
struct i40e_vf *vf;
int abs_vf_id;
int old_link;
int ret = 0;
int tmp;
@@ -4806,6 +4807,17 @@ int i40e_ndo_set_vf_link_state(struct net_device *netdev, int vf_id, int link)
vf = &pf->vf[vf_id];
abs_vf_id = vf->vf_id + hw->func_caps.vf_base_id;
/* skip VF link state change if requested state is already set */
if (!vf->link_forced)
old_link = IFLA_VF_LINK_STATE_AUTO;
else if (vf->link_up)
old_link = IFLA_VF_LINK_STATE_ENABLE;
else
old_link = IFLA_VF_LINK_STATE_DISABLE;
if (link == old_link)
goto error_out;
pfe.event = VIRTCHNL_EVENT_LINK_CHANGE;
pfe.severity = PF_EVENT_SEVERITY_INFO;

View File

@@ -351,6 +351,7 @@ struct ice_vsi {
u16 num_q_vectors;
/* tell if only dynamic irq allocation is allowed */
bool irq_dyn_alloc;
bool hsplit:1;
u16 vsi_num; /* HW (absolute) index of this VSI */
u16 idx; /* software index in pf->vsi[] */
@@ -374,6 +375,8 @@ struct ice_vsi {
spinlock_t arfs_lock; /* protects aRFS hash table and filter state */
atomic_t *arfs_last_fltr_id;
u16 max_frame;
struct ice_aqc_vsi_props info; /* VSI properties */
struct ice_vsi_vlan_info vlan_info; /* vlan config to be restored */
@@ -509,7 +512,6 @@ enum ice_pf_flags {
ICE_FLAG_MOD_POWER_UNSUPPORTED,
ICE_FLAG_PHY_FW_LOAD_FAILED,
ICE_FLAG_ETHTOOL_CTXT, /* set when ethtool holds RTNL lock */
ICE_FLAG_LEGACY_RX,
ICE_FLAG_VF_TRUE_PROMISC_ENA,
ICE_FLAG_MDD_AUTO_RESET_VF,
ICE_FLAG_VF_VLAN_PRUNING,

View File

@@ -2,6 +2,7 @@
/* Copyright (c) 2019, Intel Corporation. */
#include <net/xdp_sock_drv.h>
#include <linux/net/intel/libie/rx.h>
#include "ice_base.h"
#include "ice_lib.h"
#include "ice_dcb_lib.h"
@@ -461,19 +462,6 @@ u16 ice_calc_ts_ring_count(struct ice_tx_ring *tx_ring)
return tx_ring->count + max_fetch_desc;
}
/**
* ice_rx_offset - Return expected offset into page to access data
* @rx_ring: Ring we are requesting offset of
*
* Returns the offset value for ring into the data buffer.
*/
static unsigned int ice_rx_offset(struct ice_rx_ring *rx_ring)
{
if (ice_ring_uses_build_skb(rx_ring))
return ICE_SKB_PAD;
return 0;
}
/**
* ice_setup_rx_ctx - Configure a receive ring context
* @ring: The Rx ring to configure
@@ -536,8 +524,29 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring)
else
rlan_ctx.l2tsel = 1;
rlan_ctx.dtype = ICE_RX_DTYPE_NO_SPLIT;
rlan_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_NO_SPLIT;
if (ring->hdr_pp) {
rlan_ctx.hbuf = ring->rx_hdr_len >> ICE_RLAN_CTX_HBUF_S;
rlan_ctx.dtype = ICE_RX_DTYPE_HEADER_SPLIT;
/*
* If the frame is TCP/UDP/SCTP, it will be split by the
* payload.
* If not, but it's an IPv4/IPv6 frame, it will be split by
* the IP header.
* If not IP, it will be split by the Ethernet header.
*
* In any case, the header buffer will never be left empty.
*/
rlan_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_SPLIT_L2 |
ICE_RLAN_RX_HSPLIT_0_SPLIT_IP |
ICE_RLAN_RX_HSPLIT_0_SPLIT_TCP_UDP |
ICE_RLAN_RX_HSPLIT_0_SPLIT_SCTP;
} else {
rlan_ctx.hbuf = 0;
rlan_ctx.dtype = ICE_RX_DTYPE_NO_SPLIT;
rlan_ctx.hsplit_0 = ICE_RLAN_RX_HSPLIT_0_NO_SPLIT;
}
rlan_ctx.hsplit_1 = ICE_RLAN_RX_HSPLIT_1_NO_SPLIT;
/* This controls whether VLAN is stripped from inner headers
@@ -549,7 +558,7 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring)
/* Max packet size for this queue - must not be set to a larger value
* than 5 x DBUF
*/
rlan_ctx.rxmax = min_t(u32, ring->max_frame,
rlan_ctx.rxmax = min_t(u32, vsi->max_frame,
ICE_MAX_CHAINED_RX_BUFS * ring->rx_buf_len);
/* Rx queue threshold in units of 64 */
@@ -586,14 +595,6 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring)
if (vsi->type == ICE_VSI_VF)
return 0;
/* configure Rx buffer alignment */
if (!vsi->netdev || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags))
ice_clear_ring_build_skb_ena(ring);
else
ice_set_ring_build_skb_ena(ring);
ring->rx_offset = ice_rx_offset(ring);
/* init queue specific tail register */
ring->tail = hw->hw_addr + QRX_TAIL(pf_q);
writel(0, ring->tail);
@@ -601,36 +602,51 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring)
return 0;
}
static void ice_xsk_pool_fill_cb(struct ice_rx_ring *ring)
static int ice_rxq_pp_create(struct ice_rx_ring *rq)
{
void *ctx_ptr = &ring->pkt_ctx;
struct xsk_cb_desc desc = {};
struct libeth_fq fq = {
.count = rq->count,
.nid = NUMA_NO_NODE,
.hsplit = rq->vsi->hsplit,
.xdp = ice_is_xdp_ena_vsi(rq->vsi),
.buf_len = LIBIE_MAX_RX_BUF_LEN,
};
int err;
XSK_CHECK_PRIV_TYPE(struct ice_xdp_buff);
desc.src = &ctx_ptr;
desc.off = offsetof(struct ice_xdp_buff, pkt_ctx) -
sizeof(struct xdp_buff);
desc.bytes = sizeof(ctx_ptr);
xsk_pool_fill_cb(ring->xsk_pool, &desc);
}
err = libeth_rx_fq_create(&fq, &rq->q_vector->napi);
if (err)
return err;
/**
* ice_get_frame_sz - calculate xdp_buff::frame_sz
* @rx_ring: the ring being configured
*
* Return frame size based on underlying PAGE_SIZE
*/
static unsigned int ice_get_frame_sz(struct ice_rx_ring *rx_ring)
{
unsigned int frame_sz;
rq->pp = fq.pp;
rq->rx_fqes = fq.fqes;
rq->truesize = fq.truesize;
rq->rx_buf_len = fq.buf_len;
#if (PAGE_SIZE >= 8192)
frame_sz = rx_ring->rx_buf_len;
#else
frame_sz = ice_rx_pg_size(rx_ring) / 2;
#endif
if (!fq.hsplit)
return 0;
return frame_sz;
fq = (struct libeth_fq){
.count = rq->count,
.type = LIBETH_FQE_HDR,
.nid = NUMA_NO_NODE,
.xdp = ice_is_xdp_ena_vsi(rq->vsi),
};
err = libeth_rx_fq_create(&fq, &rq->q_vector->napi);
if (err)
goto destroy;
rq->hdr_pp = fq.pp;
rq->hdr_fqes = fq.fqes;
rq->hdr_truesize = fq.truesize;
rq->rx_hdr_len = fq.buf_len;
return 0;
destroy:
ice_rxq_pp_destroy(rq);
return err;
}
/**
@@ -642,7 +658,8 @@ static unsigned int ice_get_frame_sz(struct ice_rx_ring *rx_ring)
static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
{
struct device *dev = ice_pf_to_dev(ring->vsi->back);
u32 num_bufs = ICE_RX_DESC_UNUSED(ring);
u32 num_bufs = ICE_DESC_UNUSED(ring);
u32 rx_buf_len;
int err;
if (ring->vsi->type == ICE_VSI_PF || ring->vsi->type == ICE_VSI_SF) {
@@ -656,15 +673,19 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
}
ice_rx_xsk_pool(ring);
err = ice_realloc_rx_xdp_bufs(ring, ring->xsk_pool);
if (err)
return err;
if (ring->xsk_pool) {
xdp_rxq_info_unreg(&ring->xdp_rxq);
ring->rx_buf_len =
rx_buf_len =
xsk_pool_get_rx_frame_size(ring->xsk_pool);
err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
ring->q_index,
ring->q_vector->napi.napi_id,
ring->rx_buf_len);
rx_buf_len);
if (err)
return err;
err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
@@ -673,36 +694,33 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
if (err)
return err;
xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq);
ice_xsk_pool_fill_cb(ring);
dev_info(dev, "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n",
ring->q_index);
} else {
err = ice_rxq_pp_create(ring);
if (err)
return err;
if (!xdp_rxq_info_is_reg(&ring->xdp_rxq)) {
err = __xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
ring->q_index,
ring->q_vector->napi.napi_id,
ring->rx_buf_len);
if (err)
return err;
goto err_destroy_fq;
}
err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
MEM_TYPE_PAGE_SHARED,
NULL);
if (err)
return err;
xdp_rxq_info_attach_page_pool(&ring->xdp_rxq,
ring->pp);
}
}
xdp_init_buff(&ring->xdp, ice_get_frame_sz(ring), &ring->xdp_rxq);
ring->xdp.data = NULL;
ring->xdp_ext.pkt_ctx = &ring->pkt_ctx;
err = ice_setup_rx_ctx(ring);
if (err) {
dev_err(dev, "ice_setup_rx_ctx failed for RxQ %d, err %d\n",
ring->q_index, err);
return err;
goto err_destroy_fq;
}
if (ring->xsk_pool) {
@@ -730,9 +748,17 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
if (ring->vsi->type == ICE_VSI_CTRL)
ice_init_ctrl_rx_descs(ring, num_bufs);
else
ice_alloc_rx_bufs(ring, num_bufs);
err = ice_alloc_rx_bufs(ring, num_bufs);
if (err)
goto err_destroy_fq;
return 0;
err_destroy_fq:
ice_rxq_pp_destroy(ring);
return err;
}
int ice_vsi_cfg_single_rxq(struct ice_vsi *vsi, u16 q_idx)
@@ -753,18 +779,10 @@ int ice_vsi_cfg_single_rxq(struct ice_vsi *vsi, u16 q_idx)
*/
static void ice_vsi_cfg_frame_size(struct ice_vsi *vsi, struct ice_rx_ring *ring)
{
if (!vsi->netdev || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags)) {
ring->max_frame = ICE_MAX_FRAME_LEGACY_RX;
ring->rx_buf_len = ICE_RXBUF_1664;
#if (PAGE_SIZE < 8192)
} else if (!ICE_2K_TOO_SMALL_WITH_PADDING &&
(vsi->netdev->mtu <= ETH_DATA_LEN)) {
ring->max_frame = ICE_RXBUF_1536 - NET_IP_ALIGN;
ring->rx_buf_len = ICE_RXBUF_1536 - NET_IP_ALIGN;
#endif
if (!vsi->netdev) {
vsi->max_frame = ICE_MAX_FRAME_LEGACY_RX;
} else {
ring->max_frame = ICE_AQ_SET_MAC_FRAME_SIZE_MAX;
ring->rx_buf_len = ICE_RXBUF_3072;
vsi->max_frame = ICE_AQ_SET_MAC_FRAME_SIZE_MAX;
}
}

View File

@@ -3392,6 +3392,7 @@ bool ice_is_100m_speed_supported(struct ice_hw *hw)
case ICE_DEV_ID_E822L_SGMII:
case ICE_DEV_ID_E823L_1GBE:
case ICE_DEV_ID_E823C_SGMII:
case ICE_DEV_ID_E825C_SGMII:
return true;
default:
return false;

View File

@@ -10,6 +10,7 @@
#include "ice_lib.h"
#include "ice_dcb_lib.h"
#include <net/dcbnl.h>
#include <net/libeth/rx.h>
struct ice_stats {
char stat_string[ETH_GSTRING_LEN];
@@ -340,7 +341,6 @@ static const struct ice_priv_flag ice_gstrings_priv_flags[] = {
ICE_FLAG_VF_TRUE_PROMISC_ENA),
ICE_PRIV_FLAG("mdd-auto-reset-vf", ICE_FLAG_MDD_AUTO_RESET_VF),
ICE_PRIV_FLAG("vf-vlan-pruning", ICE_FLAG_VF_VLAN_PRUNING),
ICE_PRIV_FLAG("legacy-rx", ICE_FLAG_LEGACY_RX),
};
#define ICE_PRIV_FLAG_ARRAY_SIZE ARRAY_SIZE(ice_gstrings_priv_flags)
@@ -1231,8 +1231,9 @@ static int ice_diag_send(struct ice_tx_ring *tx_ring, u8 *data, u16 size)
*/
static int ice_lbtest_receive_frames(struct ice_rx_ring *rx_ring)
{
struct ice_rx_buf *rx_buf;
struct libeth_fqe *rx_buf;
int valid_frames, i;
struct page *page;
u8 *received_buf;
valid_frames = 0;
@@ -1247,8 +1248,10 @@ static int ice_lbtest_receive_frames(struct ice_rx_ring *rx_ring)
cpu_to_le16(BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S)))))
continue;
rx_buf = &rx_ring->rx_buf[i];
received_buf = page_address(rx_buf->page) + rx_buf->page_offset;
rx_buf = &rx_ring->rx_fqes[i];
page = __netmem_to_page(rx_buf->netmem);
received_buf = page_address(page) + rx_buf->offset +
page->pp->p.offset;
if (ice_lbtest_check_frame(received_buf))
valid_frames++;
@@ -1856,10 +1859,6 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
ice_nway_reset(netdev);
}
}
if (test_bit(ICE_FLAG_LEGACY_RX, change_flags)) {
/* down and up VSI so that changes of Rx cfg are reflected. */
ice_down_up(vsi);
}
/* don't allow modification of this flag when a single VF is in
* promiscuous mode because it's not supported
*/
@@ -3152,6 +3151,10 @@ ice_get_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring,
ring->rx_jumbo_max_pending = 0;
ring->rx_mini_pending = 0;
ring->rx_jumbo_pending = 0;
kernel_ring->tcp_data_split = vsi->hsplit ?
ETHTOOL_TCP_DATA_SPLIT_ENABLED :
ETHTOOL_TCP_DATA_SPLIT_DISABLED;
}
static int
@@ -3168,6 +3171,7 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring,
int i, timeout = 50, err = 0;
struct ice_hw *hw = &pf->hw;
u16 new_rx_cnt, new_tx_cnt;
bool hsplit;
if (ring->tx_pending > ICE_MAX_NUM_DESC_BY_MAC(hw) ||
ring->tx_pending < ICE_MIN_NUM_DESC ||
@@ -3193,9 +3197,12 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring,
netdev_info(netdev, "Requested Rx descriptor count rounded up to %d\n",
new_rx_cnt);
hsplit = kernel_ring->tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED;
/* if nothing to do return success */
if (new_tx_cnt == vsi->tx_rings[0]->count &&
new_rx_cnt == vsi->rx_rings[0]->count) {
new_rx_cnt == vsi->rx_rings[0]->count &&
hsplit == vsi->hsplit) {
netdev_dbg(netdev, "Nothing to change, descriptor count is same as requested\n");
return 0;
}
@@ -3225,6 +3232,8 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring,
vsi->xdp_rings[i]->count = new_tx_cnt;
vsi->num_tx_desc = (u16)new_tx_cnt;
vsi->num_rx_desc = (u16)new_rx_cnt;
vsi->hsplit = hsplit;
netdev_dbg(netdev, "Link is down, descriptor count change happens when link is brought up\n");
goto done;
}
@@ -3308,7 +3317,8 @@ process_rx:
rx_rings[i].count = new_rx_cnt;
rx_rings[i].cached_phctime = pf->ptp.cached_phc_time;
rx_rings[i].desc = NULL;
rx_rings[i].rx_buf = NULL;
rx_rings[i].xdp_buf = NULL;
/* this is to allow wr32 to have something to write to
* during early allocation of Rx buffers
*/
@@ -3317,10 +3327,6 @@ process_rx:
err = ice_setup_rx_ring(&rx_rings[i]);
if (err)
goto rx_unwind;
/* allocate Rx buffers */
err = ice_alloc_rx_bufs(&rx_rings[i],
ICE_RX_DESC_UNUSED(&rx_rings[i]));
rx_unwind:
if (err) {
while (i) {
@@ -3334,6 +3340,8 @@ rx_unwind:
}
process_link:
vsi->hsplit = hsplit;
/* Bring interface down, copy in the new ring info, then restore the
* interface. if VSI is up, bring it down and then back up
*/
@@ -4815,6 +4823,7 @@ static const struct ethtool_ops ice_ethtool_ops = {
ETHTOOL_COALESCE_USE_ADAPTIVE |
ETHTOOL_COALESCE_RX_USECS_HIGH,
.supported_input_xfrm = RXH_XFRM_SYM_XOR,
.supported_ring_params = ETHTOOL_RING_USE_TCP_DATA_SPLIT,
.get_link_ksettings = ice_get_link_ksettings,
.set_link_ksettings = ice_set_link_ksettings,
.get_fec_stats = ice_get_fec_stats,

View File

@@ -342,6 +342,9 @@ enum ice_flg64_bits {
/* for ice_32byte_rx_flex_desc.pkt_length member */
#define ICE_RX_FLX_DESC_PKT_LEN_M (0x3FFF) /* 14-bits */
/* ice_32byte_rx_flex_desc::hdr_len_sph_flex_flags1 */
#define ICE_RX_FLEX_DESC_HDR_LEN_M GENMASK(10, 0)
enum ice_rx_flex_desc_status_error_0_bits {
/* Note: These are predefined bit offsets */
ICE_RX_FLEX_DESC_STATUS0_DD_S = 0,

View File

@@ -1427,7 +1427,6 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi)
ring->reg_idx = vsi->rxq_map[i];
ring->vsi = vsi;
ring->netdev = vsi->netdev;
ring->dev = dev;
ring->count = vsi->num_rx_desc;
ring->cached_phctime = pf->ptp.cached_phc_time;

View File

@@ -37,6 +37,8 @@ static const char ice_copyright[] = "Copyright (c) 2018, Intel Corporation.";
#define ICE_DDP_PKG_FILE ICE_DDP_PKG_PATH "ice.pkg"
MODULE_DESCRIPTION(DRV_SUMMARY);
MODULE_IMPORT_NS("LIBETH");
MODULE_IMPORT_NS("LIBETH_XDP");
MODULE_IMPORT_NS("LIBIE");
MODULE_IMPORT_NS("LIBIE_ADMINQ");
MODULE_IMPORT_NS("LIBIE_FWLOG");
@@ -2957,10 +2959,7 @@ int ice_vsi_determine_xdp_res(struct ice_vsi *vsi)
*/
static int ice_max_xdp_frame_size(struct ice_vsi *vsi)
{
if (test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags))
return ICE_RXBUF_1664;
else
return ICE_RXBUF_3072;
return ICE_RXBUF_3072;
}
/**
@@ -3018,19 +3017,11 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
}
}
xdp_features_set_redirect_target(vsi->netdev, true);
/* reallocate Rx queues that are used for zero-copy */
xdp_ring_err = ice_realloc_zc_buf(vsi, true);
if (xdp_ring_err)
NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Rx resources failed");
} else if (ice_is_xdp_ena_vsi(vsi) && !prog) {
xdp_features_clear_redirect_target(vsi->netdev);
xdp_ring_err = ice_destroy_xdp_rings(vsi, ICE_XDP_CFG_FULL);
if (xdp_ring_err)
NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Tx resources failed");
/* reallocate Rx queues that were used for zero-copy */
xdp_ring_err = ice_realloc_zc_buf(vsi, false);
if (xdp_ring_err)
NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Rx resources failed");
}
resume_if:
@@ -7864,12 +7855,6 @@ int ice_change_mtu(struct net_device *netdev, int new_mtu)
frame_size - ICE_ETH_PKT_HDR_PAD);
return -EINVAL;
}
} else if (test_bit(ICE_FLAG_LEGACY_RX, pf->flags)) {
if (new_mtu + ICE_ETH_PKT_HDR_PAD > ICE_MAX_FRAME_LEGACY_RX) {
netdev_err(netdev, "Too big MTU for legacy-rx; Max is %d\n",
ICE_MAX_FRAME_LEGACY_RX - ICE_ETH_PKT_HDR_PAD);
return -EINVAL;
}
}
/* if a reset is in progress, wait for some time for it to complete */

View File

@@ -7,6 +7,8 @@
#include <linux/netdevice.h>
#include <linux/prefetch.h>
#include <linux/bpf_trace.h>
#include <linux/net/intel/libie/rx.h>
#include <net/libeth/xdp.h>
#include <net/dsfield.h>
#include <net/mpls.h>
#include <net/xdp.h>
@@ -111,7 +113,7 @@ ice_prgm_fdir_fltr(struct ice_vsi *vsi, struct ice_fltr_desc *fdir_desc,
static void
ice_unmap_and_free_tx_buf(struct ice_tx_ring *ring, struct ice_tx_buf *tx_buf)
{
if (dma_unmap_len(tx_buf, len))
if (tx_buf->type != ICE_TX_BUF_XDP_TX && dma_unmap_len(tx_buf, len))
dma_unmap_page(ring->dev,
dma_unmap_addr(tx_buf, dma),
dma_unmap_len(tx_buf, len),
@@ -125,7 +127,7 @@ ice_unmap_and_free_tx_buf(struct ice_tx_ring *ring, struct ice_tx_buf *tx_buf)
dev_kfree_skb_any(tx_buf->skb);
break;
case ICE_TX_BUF_XDP_TX:
page_frag_free(tx_buf->raw_buf);
libeth_xdp_return_va(tx_buf->raw_buf, false);
break;
case ICE_TX_BUF_XDP_XMIT:
xdp_return_frame(tx_buf->xdpf);
@@ -506,61 +508,67 @@ err:
return -ENOMEM;
}
void ice_rxq_pp_destroy(struct ice_rx_ring *rq)
{
struct libeth_fq fq = {
.fqes = rq->rx_fqes,
.pp = rq->pp,
};
libeth_rx_fq_destroy(&fq);
rq->rx_fqes = NULL;
rq->pp = NULL;
if (!rq->hdr_pp)
return;
fq.fqes = rq->hdr_fqes;
fq.pp = rq->hdr_pp;
libeth_rx_fq_destroy(&fq);
rq->hdr_fqes = NULL;
rq->hdr_pp = NULL;
}
/**
* ice_clean_rx_ring - Free Rx buffers
* @rx_ring: ring to be cleaned
*/
void ice_clean_rx_ring(struct ice_rx_ring *rx_ring)
{
struct xdp_buff *xdp = &rx_ring->xdp;
struct device *dev = rx_ring->dev;
u32 size;
u16 i;
/* ring already cleared, nothing to do */
if (!rx_ring->rx_buf)
return;
if (rx_ring->xsk_pool) {
ice_xsk_clean_rx_ring(rx_ring);
goto rx_skip_free;
}
if (xdp->data) {
xdp_return_buff(xdp);
xdp->data = NULL;
}
/* ring already cleared, nothing to do */
if (!rx_ring->rx_fqes)
return;
libeth_xdp_return_stash(&rx_ring->xdp);
/* Free all the Rx ring sk_buffs */
for (i = 0; i < rx_ring->count; i++) {
struct ice_rx_buf *rx_buf = &rx_ring->rx_buf[i];
for (u32 i = rx_ring->next_to_clean; i != rx_ring->next_to_use; ) {
libeth_rx_recycle_slow(rx_ring->rx_fqes[i].netmem);
if (!rx_buf->page)
continue;
if (rx_ring->hdr_pp)
libeth_rx_recycle_slow(rx_ring->hdr_fqes[i].netmem);
/* Invalidate cache lines that may have been written to by
* device so that we avoid corrupting memory.
*/
dma_sync_single_range_for_cpu(dev, rx_buf->dma,
rx_buf->page_offset,
rx_ring->rx_buf_len,
DMA_FROM_DEVICE);
/* free resources associated with mapping */
dma_unmap_page_attrs(dev, rx_buf->dma, ice_rx_pg_size(rx_ring),
DMA_FROM_DEVICE, ICE_RX_DMA_ATTR);
__page_frag_cache_drain(rx_buf->page, rx_buf->pagecnt_bias);
rx_buf->page = NULL;
rx_buf->page_offset = 0;
if (unlikely(++i == rx_ring->count))
i = 0;
}
rx_skip_free:
if (rx_ring->xsk_pool)
memset(rx_ring->xdp_buf, 0, array_size(rx_ring->count, sizeof(*rx_ring->xdp_buf)));
else
memset(rx_ring->rx_buf, 0, array_size(rx_ring->count, sizeof(*rx_ring->rx_buf)));
if (rx_ring->vsi->type == ICE_VSI_PF &&
xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) {
xdp_rxq_info_detach_mem_model(&rx_ring->xdp_rxq);
xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
}
ice_rxq_pp_destroy(rx_ring);
rx_skip_free:
/* Zero out the descriptor ring */
size = ALIGN(rx_ring->count * sizeof(union ice_32byte_rx_desc),
PAGE_SIZE);
@@ -568,7 +576,6 @@ rx_skip_free:
rx_ring->next_to_alloc = 0;
rx_ring->next_to_clean = 0;
rx_ring->first_desc = 0;
rx_ring->next_to_use = 0;
}
@@ -580,26 +587,20 @@ rx_skip_free:
*/
void ice_free_rx_ring(struct ice_rx_ring *rx_ring)
{
struct device *dev = ice_pf_to_dev(rx_ring->vsi->back);
u32 size;
ice_clean_rx_ring(rx_ring);
if (rx_ring->vsi->type == ICE_VSI_PF)
if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
WRITE_ONCE(rx_ring->xdp_prog, NULL);
if (rx_ring->xsk_pool) {
kfree(rx_ring->xdp_buf);
rx_ring->xdp_buf = NULL;
} else {
kfree(rx_ring->rx_buf);
rx_ring->rx_buf = NULL;
}
if (rx_ring->desc) {
size = ALIGN(rx_ring->count * sizeof(union ice_32byte_rx_desc),
PAGE_SIZE);
dmam_free_coherent(rx_ring->dev, size,
rx_ring->desc, rx_ring->dma);
dmam_free_coherent(dev, size, rx_ring->desc, rx_ring->dma);
rx_ring->desc = NULL;
}
}
@@ -612,19 +613,9 @@ void ice_free_rx_ring(struct ice_rx_ring *rx_ring)
*/
int ice_setup_rx_ring(struct ice_rx_ring *rx_ring)
{
struct device *dev = rx_ring->dev;
struct device *dev = ice_pf_to_dev(rx_ring->vsi->back);
u32 size;
if (!dev)
return -ENOMEM;
/* warn if we are about to overwrite the pointer */
WARN_ON(rx_ring->rx_buf);
rx_ring->rx_buf =
kcalloc(rx_ring->count, sizeof(*rx_ring->rx_buf), GFP_KERNEL);
if (!rx_ring->rx_buf)
return -ENOMEM;
/* round up to nearest page */
size = ALIGN(rx_ring->count * sizeof(union ice_32byte_rx_desc),
PAGE_SIZE);
@@ -633,22 +624,16 @@ int ice_setup_rx_ring(struct ice_rx_ring *rx_ring)
if (!rx_ring->desc) {
dev_err(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n",
size);
goto err;
return -ENOMEM;
}
rx_ring->next_to_use = 0;
rx_ring->next_to_clean = 0;
rx_ring->first_desc = 0;
if (ice_is_xdp_ena_vsi(rx_ring->vsi))
WRITE_ONCE(rx_ring->xdp_prog, rx_ring->vsi->xdp_prog);
return 0;
err:
kfree(rx_ring->rx_buf);
rx_ring->rx_buf = NULL;
return -ENOMEM;
}
/**
@@ -662,7 +647,7 @@ err:
* Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
*/
static u32
ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
ice_run_xdp(struct ice_rx_ring *rx_ring, struct libeth_xdp_buff *xdp,
struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring,
union ice_32b_rx_flex_desc *eop_desc)
{
@@ -672,23 +657,23 @@ ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
if (!xdp_prog)
goto exit;
ice_xdp_meta_set_desc(xdp, eop_desc);
xdp->desc = eop_desc;
act = bpf_prog_run_xdp(xdp_prog, xdp);
act = bpf_prog_run_xdp(xdp_prog, &xdp->base);
switch (act) {
case XDP_PASS:
break;
case XDP_TX:
if (static_branch_unlikely(&ice_xdp_locking_key))
spin_lock(&xdp_ring->tx_lock);
ret = __ice_xmit_xdp_ring(xdp, xdp_ring, false);
ret = __ice_xmit_xdp_ring(&xdp->base, xdp_ring, false);
if (static_branch_unlikely(&ice_xdp_locking_key))
spin_unlock(&xdp_ring->tx_lock);
if (ret == ICE_XDP_CONSUMED)
goto out_failure;
break;
case XDP_REDIRECT:
if (xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog))
if (xdp_do_redirect(rx_ring->netdev, &xdp->base, xdp_prog))
goto out_failure;
ret = ICE_XDP_REDIR;
break;
@@ -700,8 +685,10 @@ out_failure:
trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
fallthrough;
case XDP_DROP:
libeth_xdp_return_buff(xdp);
ret = ICE_XDP_CONSUMED;
}
exit:
return ret;
}
@@ -789,53 +776,6 @@ ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
return nxmit;
}
/**
* ice_alloc_mapped_page - recycle or make a new page
* @rx_ring: ring to use
* @bi: rx_buf struct to modify
*
* Returns true if the page was successfully allocated or
* reused.
*/
static bool
ice_alloc_mapped_page(struct ice_rx_ring *rx_ring, struct ice_rx_buf *bi)
{
struct page *page = bi->page;
dma_addr_t dma;
/* since we are recycling buffers we should seldom need to alloc */
if (likely(page))
return true;
/* alloc new page for storage */
page = dev_alloc_pages(ice_rx_pg_order(rx_ring));
if (unlikely(!page)) {
rx_ring->ring_stats->rx_stats.alloc_page_failed++;
return false;
}
/* map page for use */
dma = dma_map_page_attrs(rx_ring->dev, page, 0, ice_rx_pg_size(rx_ring),
DMA_FROM_DEVICE, ICE_RX_DMA_ATTR);
/* if mapping failed free memory back to system since
* there isn't much point in holding memory we can't use
*/
if (dma_mapping_error(rx_ring->dev, dma)) {
__free_pages(page, ice_rx_pg_order(rx_ring));
rx_ring->ring_stats->rx_stats.alloc_page_failed++;
return false;
}
bi->dma = dma;
bi->page = page;
bi->page_offset = rx_ring->rx_offset;
page_ref_add(page, USHRT_MAX - 1);
bi->pagecnt_bias = USHRT_MAX;
return true;
}
/**
* ice_init_ctrl_rx_descs - Initialize Rx descriptors for control vsi.
* @rx_ring: ring to init descriptors on
@@ -882,9 +822,20 @@ void ice_init_ctrl_rx_descs(struct ice_rx_ring *rx_ring, u32 count)
*/
bool ice_alloc_rx_bufs(struct ice_rx_ring *rx_ring, unsigned int cleaned_count)
{
const struct libeth_fq_fp hdr_fq = {
.pp = rx_ring->hdr_pp,
.fqes = rx_ring->hdr_fqes,
.truesize = rx_ring->hdr_truesize,
.count = rx_ring->count,
};
const struct libeth_fq_fp fq = {
.pp = rx_ring->pp,
.fqes = rx_ring->rx_fqes,
.truesize = rx_ring->truesize,
.count = rx_ring->count,
};
union ice_32b_rx_flex_desc *rx_desc;
u16 ntu = rx_ring->next_to_use;
struct ice_rx_buf *bi;
/* do nothing if no valid netdev defined */
if (!rx_ring->netdev || !cleaned_count)
@@ -892,30 +843,39 @@ bool ice_alloc_rx_bufs(struct ice_rx_ring *rx_ring, unsigned int cleaned_count)
/* get the Rx descriptor and buffer based on next_to_use */
rx_desc = ICE_RX_DESC(rx_ring, ntu);
bi = &rx_ring->rx_buf[ntu];
do {
/* if we fail here, we have work remaining */
if (!ice_alloc_mapped_page(rx_ring, bi))
break;
dma_addr_t addr;
/* sync the buffer for use by the device */
dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
bi->page_offset,
rx_ring->rx_buf_len,
DMA_FROM_DEVICE);
addr = libeth_rx_alloc(&fq, ntu);
if (addr == DMA_MAPPING_ERROR) {
rx_ring->ring_stats->rx_stats.alloc_page_failed++;
break;
}
/* Refresh the desc even if buffer_addrs didn't change
* because each write-back erases this info.
*/
rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
rx_desc->read.pkt_addr = cpu_to_le64(addr);
if (!hdr_fq.pp)
goto next;
addr = libeth_rx_alloc(&hdr_fq, ntu);
if (addr == DMA_MAPPING_ERROR) {
rx_ring->ring_stats->rx_stats.alloc_page_failed++;
libeth_rx_recycle_slow(fq.fqes[ntu].netmem);
break;
}
rx_desc->read.hdr_addr = cpu_to_le64(addr);
next:
rx_desc++;
bi++;
ntu++;
if (unlikely(ntu == rx_ring->count)) {
rx_desc = ICE_RX_DESC(rx_ring, 0);
bi = rx_ring->rx_buf;
ntu = 0;
}
@@ -931,402 +891,6 @@ bool ice_alloc_rx_bufs(struct ice_rx_ring *rx_ring, unsigned int cleaned_count)
return !!cleaned_count;
}
/**
* ice_rx_buf_adjust_pg_offset - Prepare Rx buffer for reuse
* @rx_buf: Rx buffer to adjust
* @size: Size of adjustment
*
* Update the offset within page so that Rx buf will be ready to be reused.
* For systems with PAGE_SIZE < 8192 this function will flip the page offset
* so the second half of page assigned to Rx buffer will be used, otherwise
* the offset is moved by "size" bytes
*/
static void
ice_rx_buf_adjust_pg_offset(struct ice_rx_buf *rx_buf, unsigned int size)
{
#if (PAGE_SIZE < 8192)
/* flip page offset to other buffer */
rx_buf->page_offset ^= size;
#else
/* move offset up to the next cache line */
rx_buf->page_offset += size;
#endif
}
/**
* ice_can_reuse_rx_page - Determine if page can be reused for another Rx
* @rx_buf: buffer containing the page
*
* If page is reusable, we have a green light for calling ice_reuse_rx_page,
* which will assign the current buffer to the buffer that next_to_alloc is
* pointing to; otherwise, the DMA mapping needs to be destroyed and
* page freed
*/
static bool
ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf)
{
unsigned int pagecnt_bias = rx_buf->pagecnt_bias;
struct page *page = rx_buf->page;
/* avoid re-using remote and pfmemalloc pages */
if (!dev_page_is_reusable(page))
return false;
/* if we are only owner of page we can reuse it */
if (unlikely(rx_buf->pgcnt - pagecnt_bias > 1))
return false;
#if (PAGE_SIZE >= 8192)
#define ICE_LAST_OFFSET \
(SKB_WITH_OVERHEAD(PAGE_SIZE) - ICE_RXBUF_3072)
if (rx_buf->page_offset > ICE_LAST_OFFSET)
return false;
#endif /* PAGE_SIZE >= 8192) */
/* If we have drained the page fragment pool we need to update
* the pagecnt_bias and page count so that we fully restock the
* number of references the driver holds.
*/
if (unlikely(pagecnt_bias == 1)) {
page_ref_add(page, USHRT_MAX - 1);
rx_buf->pagecnt_bias = USHRT_MAX;
}
return true;
}
/**
* ice_add_xdp_frag - Add contents of Rx buffer to xdp buf as a frag
* @rx_ring: Rx descriptor ring to transact packets on
* @xdp: xdp buff to place the data into
* @rx_buf: buffer containing page to add
* @size: packet length from rx_desc
*
* This function will add the data contained in rx_buf->page to the xdp buf.
* It will just attach the page as a frag.
*/
static int
ice_add_xdp_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
struct ice_rx_buf *rx_buf, const unsigned int size)
{
struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
if (!size)
return 0;
if (!xdp_buff_has_frags(xdp)) {
sinfo->nr_frags = 0;
sinfo->xdp_frags_size = 0;
xdp_buff_set_frags_flag(xdp);
}
if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS))
return -ENOMEM;
__skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++, rx_buf->page,
rx_buf->page_offset, size);
sinfo->xdp_frags_size += size;
if (page_is_pfmemalloc(rx_buf->page))
xdp_buff_set_frag_pfmemalloc(xdp);
return 0;
}
/**
* ice_reuse_rx_page - page flip buffer and store it back on the ring
* @rx_ring: Rx descriptor ring to store buffers on
* @old_buf: donor buffer to have page reused
*
* Synchronizes page for reuse by the adapter
*/
static void
ice_reuse_rx_page(struct ice_rx_ring *rx_ring, struct ice_rx_buf *old_buf)
{
u16 nta = rx_ring->next_to_alloc;
struct ice_rx_buf *new_buf;
new_buf = &rx_ring->rx_buf[nta];
/* update, and store next to alloc */
nta++;
rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
/* Transfer page from old buffer to new buffer.
* Move each member individually to avoid possible store
* forwarding stalls and unnecessary copy of skb.
*/
new_buf->dma = old_buf->dma;
new_buf->page = old_buf->page;
new_buf->page_offset = old_buf->page_offset;
new_buf->pagecnt_bias = old_buf->pagecnt_bias;
}
/**
* ice_get_rx_buf - Fetch Rx buffer and synchronize data for use
* @rx_ring: Rx descriptor ring to transact packets on
* @size: size of buffer to add to skb
* @ntc: index of next to clean element
*
* This function will pull an Rx buffer from the ring and synchronize it
* for use by the CPU.
*/
static struct ice_rx_buf *
ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size,
const unsigned int ntc)
{
struct ice_rx_buf *rx_buf;
rx_buf = &rx_ring->rx_buf[ntc];
prefetchw(rx_buf->page);
if (!size)
return rx_buf;
/* we are reusing so sync this buffer for CPU use */
dma_sync_single_range_for_cpu(rx_ring->dev, rx_buf->dma,
rx_buf->page_offset, size,
DMA_FROM_DEVICE);
/* We have pulled a buffer for use, so decrement pagecnt_bias */
rx_buf->pagecnt_bias--;
return rx_buf;
}
/**
* ice_get_pgcnts - grab page_count() for gathered fragments
* @rx_ring: Rx descriptor ring to store the page counts on
* @ntc: the next to clean element (not included in this frame!)
*
* This function is intended to be called right before running XDP
* program so that the page recycling mechanism will be able to take
* a correct decision regarding underlying pages; this is done in such
* way as XDP program can change the refcount of page
*/
static void ice_get_pgcnts(struct ice_rx_ring *rx_ring, unsigned int ntc)
{
u32 idx = rx_ring->first_desc;
struct ice_rx_buf *rx_buf;
u32 cnt = rx_ring->count;
while (idx != ntc) {
rx_buf = &rx_ring->rx_buf[idx];
rx_buf->pgcnt = page_count(rx_buf->page);
if (++idx == cnt)
idx = 0;
}
}
/**
* ice_build_skb - Build skb around an existing buffer
* @rx_ring: Rx descriptor ring to transact packets on
* @xdp: xdp_buff pointing to the data
*
* This function builds an skb around an existing XDP buffer, taking care
* to set up the skb correctly and avoid any memcpy overhead. Driver has
* already combined frags (if any) to skb_shared_info.
*/
static struct sk_buff *
ice_build_skb(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp)
{
u8 metasize = xdp->data - xdp->data_meta;
struct skb_shared_info *sinfo = NULL;
unsigned int nr_frags;
struct sk_buff *skb;
if (unlikely(xdp_buff_has_frags(xdp))) {
sinfo = xdp_get_shared_info_from_buff(xdp);
nr_frags = sinfo->nr_frags;
}
/* Prefetch first cache line of first page. If xdp->data_meta
* is unused, this points exactly as xdp->data, otherwise we
* likely have a consumer accessing first few bytes of meta
* data, and then actual data.
*/
net_prefetch(xdp->data_meta);
/* build an skb around the page buffer */
skb = napi_build_skb(xdp->data_hard_start, xdp->frame_sz);
if (unlikely(!skb))
return NULL;
/* must to record Rx queue, otherwise OS features such as
* symmetric queue won't work
*/
skb_record_rx_queue(skb, rx_ring->q_index);
/* update pointers within the skb to store the data */
skb_reserve(skb, xdp->data - xdp->data_hard_start);
__skb_put(skb, xdp->data_end - xdp->data);
if (metasize)
skb_metadata_set(skb, metasize);
if (unlikely(xdp_buff_has_frags(xdp)))
xdp_update_skb_frags_info(skb, nr_frags, sinfo->xdp_frags_size,
nr_frags * xdp->frame_sz,
xdp_buff_get_skb_flags(xdp));
return skb;
}
/**
* ice_construct_skb - Allocate skb and populate it
* @rx_ring: Rx descriptor ring to transact packets on
* @xdp: xdp_buff pointing to the data
*
* This function allocates an skb. It then populates it with the page
* data from the current receive descriptor, taking care to set up the
* skb correctly.
*/
static struct sk_buff *
ice_construct_skb(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp)
{
unsigned int size = xdp->data_end - xdp->data;
struct skb_shared_info *sinfo = NULL;
struct ice_rx_buf *rx_buf;
unsigned int nr_frags = 0;
unsigned int headlen;
struct sk_buff *skb;
/* prefetch first cache line of first page */
net_prefetch(xdp->data);
if (unlikely(xdp_buff_has_frags(xdp))) {
sinfo = xdp_get_shared_info_from_buff(xdp);
nr_frags = sinfo->nr_frags;
}
/* allocate a skb to store the frags */
skb = napi_alloc_skb(&rx_ring->q_vector->napi, ICE_RX_HDR_SIZE);
if (unlikely(!skb))
return NULL;
rx_buf = &rx_ring->rx_buf[rx_ring->first_desc];
skb_record_rx_queue(skb, rx_ring->q_index);
/* Determine available headroom for copy */
headlen = size;
if (headlen > ICE_RX_HDR_SIZE)
headlen = eth_get_headlen(skb->dev, xdp->data, ICE_RX_HDR_SIZE);
/* align pull length to size of long to optimize memcpy performance */
memcpy(__skb_put(skb, headlen), xdp->data, ALIGN(headlen,
sizeof(long)));
/* if we exhaust the linear part then add what is left as a frag */
size -= headlen;
if (size) {
/* besides adding here a partial frag, we are going to add
* frags from xdp_buff, make sure there is enough space for
* them
*/
if (unlikely(nr_frags >= MAX_SKB_FRAGS - 1)) {
dev_kfree_skb(skb);
return NULL;
}
skb_add_rx_frag(skb, 0, rx_buf->page,
rx_buf->page_offset + headlen, size,
xdp->frame_sz);
} else {
/* buffer is unused, restore biased page count in Rx buffer;
* data was copied onto skb's linear part so there's no
* need for adjusting page offset and we can reuse this buffer
* as-is
*/
rx_buf->pagecnt_bias++;
}
if (unlikely(xdp_buff_has_frags(xdp))) {
struct skb_shared_info *skinfo = skb_shinfo(skb);
memcpy(&skinfo->frags[skinfo->nr_frags], &sinfo->frags[0],
sizeof(skb_frag_t) * nr_frags);
xdp_update_skb_frags_info(skb, skinfo->nr_frags + nr_frags,
sinfo->xdp_frags_size,
nr_frags * xdp->frame_sz,
xdp_buff_get_skb_flags(xdp));
}
return skb;
}
/**
* ice_put_rx_buf - Clean up used buffer and either recycle or free
* @rx_ring: Rx descriptor ring to transact packets on
* @rx_buf: Rx buffer to pull data from
*
* This function will clean up the contents of the rx_buf. It will either
* recycle the buffer or unmap it and free the associated resources.
*/
static void
ice_put_rx_buf(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf)
{
if (!rx_buf)
return;
if (ice_can_reuse_rx_page(rx_buf)) {
/* hand second half of page back to the ring */
ice_reuse_rx_page(rx_ring, rx_buf);
} else {
/* we are not reusing the buffer so unmap it */
dma_unmap_page_attrs(rx_ring->dev, rx_buf->dma,
ice_rx_pg_size(rx_ring), DMA_FROM_DEVICE,
ICE_RX_DMA_ATTR);
__page_frag_cache_drain(rx_buf->page, rx_buf->pagecnt_bias);
}
/* clear contents of buffer_info */
rx_buf->page = NULL;
}
/**
* ice_put_rx_mbuf - ice_put_rx_buf() caller, for all buffers in frame
* @rx_ring: Rx ring with all the auxiliary data
* @xdp: XDP buffer carrying linear + frags part
* @ntc: the next to clean element (not included in this frame!)
* @verdict: return code from XDP program execution
*
* Called after XDP program is completed, or on error with verdict set to
* ICE_XDP_CONSUMED.
*
* Walk through buffers from first_desc to the end of the frame, releasing
* buffers and satisfying internal page recycle mechanism. The action depends
* on verdict from XDP program.
*/
static void ice_put_rx_mbuf(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
u32 ntc, u32 verdict)
{
u32 idx = rx_ring->first_desc;
u32 cnt = rx_ring->count;
struct ice_rx_buf *buf;
u32 xdp_frags = 0;
int i = 0;
if (unlikely(xdp_buff_has_frags(xdp)))
xdp_frags = xdp_get_shared_info_from_buff(xdp)->nr_frags;
while (idx != ntc) {
buf = &rx_ring->rx_buf[idx];
if (++idx == cnt)
idx = 0;
/* An XDP program could release fragments from the end of the
* buffer. For these, we need to keep the pagecnt_bias as-is.
* To do this, only adjust pagecnt_bias for fragments up to
* the total remaining after the XDP program has run.
*/
if (verdict != ICE_XDP_CONSUMED)
ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz);
else if (i++ <= xdp_frags)
buf->pagecnt_bias++;
ice_put_rx_buf(rx_ring, buf);
}
xdp->data = NULL;
rx_ring->first_desc = ntc;
}
/**
* ice_clean_ctrl_rx_irq - Clean descriptors from flow director Rx ring
* @rx_ring: Rx descriptor ring for ctrl_vsi to transact packets on
@@ -1361,9 +925,8 @@ void ice_clean_ctrl_rx_irq(struct ice_rx_ring *rx_ring)
total_rx_pkts++;
}
rx_ring->first_desc = ntc;
rx_ring->next_to_clean = ntc;
ice_init_ctrl_rx_descs(rx_ring, ICE_RX_DESC_UNUSED(rx_ring));
ice_init_ctrl_rx_descs(rx_ring, ICE_DESC_UNUSED(rx_ring));
}
/**
@@ -1381,16 +944,17 @@ void ice_clean_ctrl_rx_irq(struct ice_rx_ring *rx_ring)
static int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
{
unsigned int total_rx_bytes = 0, total_rx_pkts = 0;
unsigned int offset = rx_ring->rx_offset;
struct xdp_buff *xdp = &rx_ring->xdp;
struct ice_tx_ring *xdp_ring = NULL;
struct bpf_prog *xdp_prog = NULL;
u32 ntc = rx_ring->next_to_clean;
LIBETH_XDP_ONSTACK_BUFF(xdp);
u32 cached_ntu, xdp_verdict;
u32 cnt = rx_ring->count;
u32 xdp_xmit = 0;
bool failure;
libeth_xdp_init_buff(xdp, &rx_ring->xdp, &rx_ring->xdp_rxq);
xdp_prog = READ_ONCE(rx_ring->xdp_prog);
if (xdp_prog) {
xdp_ring = rx_ring->xdp_ring;
@@ -1400,19 +964,21 @@ static int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
/* start the loop to process Rx packets bounded by 'budget' */
while (likely(total_rx_pkts < (unsigned int)budget)) {
union ice_32b_rx_flex_desc *rx_desc;
struct ice_rx_buf *rx_buf;
struct libeth_fqe *rx_buf;
struct sk_buff *skb;
unsigned int size;
u16 stat_err_bits;
u16 vlan_tci;
bool rxe;
/* get the Rx desc from Rx ring based on 'next_to_clean' */
rx_desc = ICE_RX_DESC(rx_ring, ntc);
/* status_error_len will always be zero for unused descriptors
* because it's cleared in cleanup, and overlaps with hdr_addr
* which is always zero because packet split isn't used, if the
* hardware wrote DD then it will be non-zero
/*
* The DD bit will always be zero for unused descriptors
* because it's cleared in cleanup or when setting the DMA
* address of the header buffer, which never uses the DD bit.
* If the hardware wrote the descriptor, it will be non-zero.
*/
stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S);
if (!ice_test_staterr(rx_desc->wb.status_error0, stat_err_bits))
@@ -1426,71 +992,65 @@ static int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
ice_trace(clean_rx_irq, rx_ring, rx_desc);
stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_HBO_S) |
BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S);
rxe = ice_test_staterr(rx_desc->wb.status_error0,
stat_err_bits);
if (!rx_ring->hdr_pp)
goto payload;
size = le16_get_bits(rx_desc->wb.hdr_len_sph_flex_flags1,
ICE_RX_FLEX_DESC_HDR_LEN_M);
if (unlikely(rxe))
size = 0;
rx_buf = &rx_ring->hdr_fqes[ntc];
libeth_xdp_process_buff(xdp, rx_buf, size);
rx_buf->netmem = 0;
payload:
size = le16_to_cpu(rx_desc->wb.pkt_len) &
ICE_RX_FLX_DESC_PKT_LEN_M;
if (unlikely(rxe))
size = 0;
/* retrieve a buffer from the ring */
rx_buf = ice_get_rx_buf(rx_ring, size, ntc);
rx_buf = &rx_ring->rx_fqes[ntc];
libeth_xdp_process_buff(xdp, rx_buf, size);
/* Increment ntc before calls to ice_put_rx_mbuf() */
if (++ntc == cnt)
ntc = 0;
if (!xdp->data) {
void *hard_start;
hard_start = page_address(rx_buf->page) + rx_buf->page_offset -
offset;
xdp_prepare_buff(xdp, hard_start, offset, size, !!offset);
xdp_buff_clear_frags_flag(xdp);
} else if (ice_add_xdp_frag(rx_ring, xdp, rx_buf, size)) {
ice_put_rx_mbuf(rx_ring, xdp, ntc, ICE_XDP_CONSUMED);
break;
}
/* skip if it is NOP desc */
if (ice_is_non_eop(rx_ring, rx_desc))
if (ice_is_non_eop(rx_ring, rx_desc) || unlikely(!xdp->data))
continue;
ice_get_pgcnts(rx_ring, ntc);
xdp_verdict = ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_desc);
if (xdp_verdict == ICE_XDP_PASS)
goto construct_skb;
total_rx_bytes += xdp_get_buff_len(xdp);
if (xdp_verdict & (ICE_XDP_TX | ICE_XDP_REDIR))
xdp_xmit |= xdp_verdict;
total_rx_bytes += xdp_get_buff_len(&xdp->base);
total_rx_pkts++;
ice_put_rx_mbuf(rx_ring, xdp, ntc, xdp_verdict);
xdp_xmit |= xdp_verdict & (ICE_XDP_TX | ICE_XDP_REDIR);
xdp->data = NULL;
continue;
construct_skb:
if (likely(ice_ring_uses_build_skb(rx_ring)))
skb = ice_build_skb(rx_ring, xdp);
else
skb = ice_construct_skb(rx_ring, xdp);
skb = xdp_build_skb_from_buff(&xdp->base);
xdp->data = NULL;
/* exit if we failed to retrieve a buffer */
if (!skb) {
libeth_xdp_return_buff_slow(xdp);
rx_ring->ring_stats->rx_stats.alloc_buf_failed++;
xdp_verdict = ICE_XDP_CONSUMED;
}
ice_put_rx_mbuf(rx_ring, xdp, ntc, xdp_verdict);
if (!skb)
break;
stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S);
if (unlikely(ice_test_staterr(rx_desc->wb.status_error0,
stat_err_bits))) {
dev_kfree_skb_any(skb);
continue;
}
vlan_tci = ice_get_vlan_tci(rx_desc);
/* pad the skb if needed, to make a valid ethernet frame */
if (eth_skb_pad(skb))
continue;
/* probably a little skewed due to removing CRC */
total_rx_bytes += skb->len;
@@ -1507,11 +1067,13 @@ construct_skb:
rx_ring->next_to_clean = ntc;
/* return up to cleaned_count buffers to hardware */
failure = ice_alloc_rx_bufs(rx_ring, ICE_RX_DESC_UNUSED(rx_ring));
failure = ice_alloc_rx_bufs(rx_ring, ICE_DESC_UNUSED(rx_ring));
if (xdp_xmit)
ice_finalize_xdp_rx(xdp_ring, xdp_xmit, cached_ntu);
libeth_xdp_save_buff(&rx_ring->xdp, xdp);
if (rx_ring->ring_stats)
ice_update_rx_ring_stats(rx_ring, total_rx_pkts,
total_rx_bytes);

View File

@@ -4,6 +4,8 @@
#ifndef _ICE_TXRX_H_
#define _ICE_TXRX_H_
#include <net/libeth/types.h>
#include "ice_type.h"
#define ICE_DFLT_IRQ_WORK 256
@@ -27,72 +29,6 @@
#define ICE_MAX_TXQ_PER_TXQG 128
/* Attempt to maximize the headroom available for incoming frames. We use a 2K
* buffer for MTUs <= 1500 and need 1536/1534 to store the data for the frame.
* This leaves us with 512 bytes of room. From that we need to deduct the
* space needed for the shared info and the padding needed to IP align the
* frame.
*
* Note: For cache line sizes 256 or larger this value is going to end
* up negative. In these cases we should fall back to the legacy
* receive path.
*/
#if (PAGE_SIZE < 8192)
#define ICE_2K_TOO_SMALL_WITH_PADDING \
((unsigned int)(NET_SKB_PAD + ICE_RXBUF_1536) > \
SKB_WITH_OVERHEAD(ICE_RXBUF_2048))
/**
* ice_compute_pad - compute the padding
* @rx_buf_len: buffer length
*
* Figure out the size of half page based on given buffer length and
* then subtract the skb_shared_info followed by subtraction of the
* actual buffer length; this in turn results in the actual space that
* is left for padding usage
*/
static inline int ice_compute_pad(int rx_buf_len)
{
int half_page_size;
half_page_size = ALIGN(rx_buf_len, PAGE_SIZE / 2);
return SKB_WITH_OVERHEAD(half_page_size) - rx_buf_len;
}
/**
* ice_skb_pad - determine the padding that we can supply
*
* Figure out the right Rx buffer size and based on that calculate the
* padding
*/
static inline int ice_skb_pad(void)
{
int rx_buf_len;
/* If a 2K buffer cannot handle a standard Ethernet frame then
* optimize padding for a 3K buffer instead of a 1.5K buffer.
*
* For a 3K buffer we need to add enough padding to allow for
* tailroom due to NET_IP_ALIGN possibly shifting us out of
* cache-line alignment.
*/
if (ICE_2K_TOO_SMALL_WITH_PADDING)
rx_buf_len = ICE_RXBUF_3072 + SKB_DATA_ALIGN(NET_IP_ALIGN);
else
rx_buf_len = ICE_RXBUF_1536;
/* if needed make room for NET_IP_ALIGN */
rx_buf_len -= NET_IP_ALIGN;
return ice_compute_pad(rx_buf_len);
}
#define ICE_SKB_PAD ice_skb_pad()
#else
#define ICE_2K_TOO_SMALL_WITH_PADDING false
#define ICE_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN)
#endif
/* We are assuming that the cache line is always 64 Bytes here for ice.
* In order to make sure that is a correct assumption there is a check in probe
* to print a warning if the read from GLPCI_CNF2 tells us that the cache line
@@ -112,10 +48,6 @@ static inline int ice_skb_pad(void)
(u16)((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \
(R)->next_to_clean - (R)->next_to_use - 1)
#define ICE_RX_DESC_UNUSED(R) \
((((R)->first_desc > (R)->next_to_use) ? 0 : (R)->count) + \
(R)->first_desc - (R)->next_to_use - 1)
#define ICE_RING_QUARTER(R) ((R)->count >> 2)
#define ICE_TX_FLAGS_TSO BIT(0)
@@ -197,14 +129,6 @@ struct ice_tx_offload_params {
u8 header_len;
};
struct ice_rx_buf {
dma_addr_t dma;
struct page *page;
unsigned int page_offset;
unsigned int pgcnt;
unsigned int pagecnt_bias;
};
struct ice_q_stats {
u64 pkts;
u64 bytes;
@@ -262,15 +186,6 @@ struct ice_pkt_ctx {
__be16 vlan_proto;
};
struct ice_xdp_buff {
struct xdp_buff xdp_buff;
const union ice_32b_rx_flex_desc *eop_desc;
const struct ice_pkt_ctx *pkt_ctx;
};
/* Required for compatibility with xdp_buffs from xsk_pool */
static_assert(offsetof(struct ice_xdp_buff, xdp_buff) == 0);
/* indices into GLINT_ITR registers */
#define ICE_RX_ITR ICE_IDX_ITR0
#define ICE_TX_ITR ICE_IDX_ITR1
@@ -323,7 +238,7 @@ struct ice_tstamp_ring {
struct ice_rx_ring {
/* CL1 - 1st cacheline starts here */
void *desc; /* Descriptor ring memory */
struct device *dev; /* Used for DMA mapping */
struct page_pool *pp;
struct net_device *netdev; /* netdev ring maps to */
struct ice_vsi *vsi; /* Backreference to associated VSI */
struct ice_q_vector *q_vector; /* Backreference to associated vector */
@@ -335,14 +250,19 @@ struct ice_rx_ring {
u16 next_to_alloc;
union {
struct ice_rx_buf *rx_buf;
struct libeth_fqe *rx_fqes;
struct xdp_buff **xdp_buf;
};
/* CL2 - 2nd cacheline starts here */
struct libeth_fqe *hdr_fqes;
struct page_pool *hdr_pp;
union {
struct ice_xdp_buff xdp_ext;
struct xdp_buff xdp;
struct libeth_xdp_buff_stash xdp;
struct libeth_xdp_buff *xsk;
};
/* CL3 - 3rd cacheline starts here */
union {
struct ice_pkt_ctx pkt_ctx;
@@ -352,12 +272,13 @@ struct ice_rx_ring {
};
};
struct bpf_prog *xdp_prog;
u16 rx_offset;
/* used in interrupt processing */
u16 next_to_use;
u16 next_to_clean;
u16 first_desc;
u32 hdr_truesize;
u32 truesize;
/* stats structs */
struct ice_ring_stats *ring_stats;
@@ -368,12 +289,11 @@ struct ice_rx_ring {
struct ice_tx_ring *xdp_ring;
struct ice_rx_ring *next; /* pointer to next ring in q_vector */
struct xsk_buff_pool *xsk_pool;
u16 max_frame;
u16 rx_hdr_len;
u16 rx_buf_len;
dma_addr_t dma; /* physical address of ring */
u8 dcb_tc; /* Traffic class of ring */
u8 ptp_rx;
#define ICE_RX_FLAGS_RING_BUILD_SKB BIT(1)
#define ICE_RX_FLAGS_CRC_STRIP_DIS BIT(2)
#define ICE_RX_FLAGS_MULTIDEV BIT(3)
#define ICE_RX_FLAGS_RING_GCS BIT(4)
@@ -422,21 +342,6 @@ struct ice_tx_ring {
u16 quanta_prof_id;
} ____cacheline_internodealigned_in_smp;
static inline bool ice_ring_uses_build_skb(struct ice_rx_ring *ring)
{
return !!(ring->flags & ICE_RX_FLAGS_RING_BUILD_SKB);
}
static inline void ice_set_ring_build_skb_ena(struct ice_rx_ring *ring)
{
ring->flags |= ICE_RX_FLAGS_RING_BUILD_SKB;
}
static inline void ice_clear_ring_build_skb_ena(struct ice_rx_ring *ring)
{
ring->flags &= ~ICE_RX_FLAGS_RING_BUILD_SKB;
}
static inline bool ice_ring_ch_enabled(struct ice_tx_ring *ring)
{
return !!ring->ch;
@@ -491,18 +396,13 @@ struct ice_coalesce_stored {
static inline unsigned int ice_rx_pg_order(struct ice_rx_ring *ring)
{
#if (PAGE_SIZE < 8192)
if (ring->rx_buf_len > (PAGE_SIZE / 2))
return 1;
#endif
return 0;
}
#define ice_rx_pg_size(_ring) (PAGE_SIZE << ice_rx_pg_order(_ring))
union ice_32b_rx_flex_desc;
void ice_init_ctrl_rx_descs(struct ice_rx_ring *rx_ring, u32 num_descs);
void ice_rxq_pp_destroy(struct ice_rx_ring *rq);
bool ice_alloc_rx_bufs(struct ice_rx_ring *rxr, unsigned int cleaned_count);
netdev_tx_t ice_start_xmit(struct sk_buff *skb, struct net_device *netdev);
u16

View File

@@ -3,6 +3,7 @@
#include <linux/filter.h>
#include <linux/net/intel/libie/rx.h>
#include <net/libeth/xdp.h>
#include "ice_txrx_lib.h"
#include "ice_eswitch.h"
@@ -230,9 +231,12 @@ ice_process_skb_fields(struct ice_rx_ring *rx_ring,
if (ice_is_port_repr_netdev(netdev))
ice_repr_inc_rx_stats(netdev, skb->len);
/* __skb_push() is needed because xdp_build_skb_from_buff()
* calls eth_type_trans()
*/
__skb_push(skb, ETH_HLEN);
skb->protocol = eth_type_trans(skb, netdev);
} else {
skb->protocol = eth_type_trans(skb, rx_ring->netdev);
}
ice_rx_csum(rx_ring, skb, rx_desc, ptype);
@@ -270,19 +274,18 @@ static void
ice_clean_xdp_tx_buf(struct device *dev, struct ice_tx_buf *tx_buf,
struct xdp_frame_bulk *bq)
{
dma_unmap_single(dev, dma_unmap_addr(tx_buf, dma),
dma_unmap_len(tx_buf, len), DMA_TO_DEVICE);
dma_unmap_len_set(tx_buf, len, 0);
switch (tx_buf->type) {
case ICE_TX_BUF_XDP_TX:
page_frag_free(tx_buf->raw_buf);
libeth_xdp_return_va(tx_buf->raw_buf, true);
break;
case ICE_TX_BUF_XDP_XMIT:
dma_unmap_single(dev, dma_unmap_addr(tx_buf, dma),
dma_unmap_len(tx_buf, len), DMA_TO_DEVICE);
xdp_return_frame_bulk(tx_buf->xdpf, bq);
break;
}
dma_unmap_len_set(tx_buf, len, 0);
tx_buf->type = ICE_TX_BUF_EMPTY;
}
@@ -377,9 +380,11 @@ int __ice_xmit_xdp_ring(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring,
struct ice_tx_buf *tx_buf;
u32 cnt = xdp_ring->count;
void *data = xdp->data;
struct page *page;
u32 nr_frags = 0;
u32 free_space;
u32 frag = 0;
u32 offset;
free_space = ICE_DESC_UNUSED(xdp_ring);
if (free_space < ICE_RING_QUARTER(xdp_ring))
@@ -399,24 +404,28 @@ int __ice_xmit_xdp_ring(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring,
tx_head = &xdp_ring->tx_buf[ntu];
tx_buf = tx_head;
page = virt_to_page(data);
offset = offset_in_page(xdp->data);
for (;;) {
dma_addr_t dma;
dma = dma_map_single(dev, data, size, DMA_TO_DEVICE);
if (dma_mapping_error(dev, dma))
goto dma_unmap;
if (frame) {
dma = dma_map_single(dev, data, size, DMA_TO_DEVICE);
if (dma_mapping_error(dev, dma))
goto dma_unmap;
tx_buf->type = ICE_TX_BUF_FRAG;
} else {
dma = page_pool_get_dma_addr(page) + offset;
dma_sync_single_for_device(dev, dma, size, DMA_BIDIRECTIONAL);
tx_buf->type = ICE_TX_BUF_XDP_TX;
tx_buf->raw_buf = data;
}
/* record length, and DMA address */
dma_unmap_len_set(tx_buf, len, size);
dma_unmap_addr_set(tx_buf, dma, dma);
if (frame) {
tx_buf->type = ICE_TX_BUF_FRAG;
} else {
tx_buf->type = ICE_TX_BUF_XDP_TX;
tx_buf->raw_buf = data;
}
tx_desc->buf_addr = cpu_to_le64(dma);
tx_desc->cmd_type_offset_bsz = ice_build_ctob(0, 0, size, 0);
@@ -430,6 +439,8 @@ int __ice_xmit_xdp_ring(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring,
tx_desc = ICE_TX_DESC(xdp_ring, ntu);
tx_buf = &xdp_ring->tx_buf[ntu];
page = skb_frag_page(&sinfo->frags[frag]);
offset = skb_frag_off(&sinfo->frags[frag]);
data = skb_frag_address(&sinfo->frags[frag]);
size = skb_frag_size(&sinfo->frags[frag]);
frag++;
@@ -514,10 +525,13 @@ void ice_finalize_xdp_rx(struct ice_tx_ring *xdp_ring, unsigned int xdp_res,
*/
static int ice_xdp_rx_hw_ts(const struct xdp_md *ctx, u64 *ts_ns)
{
const struct ice_xdp_buff *xdp_ext = (void *)ctx;
const struct libeth_xdp_buff *xdp_ext = (void *)ctx;
struct ice_rx_ring *rx_ring;
*ts_ns = ice_ptp_get_rx_hwts(xdp_ext->eop_desc,
xdp_ext->pkt_ctx);
rx_ring = libeth_xdp_buff_to_rq(xdp_ext, typeof(*rx_ring), xdp_rxq);
*ts_ns = ice_ptp_get_rx_hwts(xdp_ext->desc,
&rx_ring->pkt_ctx);
if (!*ts_ns)
return -ENODATA;
@@ -545,10 +559,10 @@ ice_xdp_rx_hash_type(const union ice_32b_rx_flex_desc *eop_desc)
static int ice_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash,
enum xdp_rss_hash_type *rss_type)
{
const struct ice_xdp_buff *xdp_ext = (void *)ctx;
const struct libeth_xdp_buff *xdp_ext = (void *)ctx;
*hash = ice_get_rx_hash(xdp_ext->eop_desc);
*rss_type = ice_xdp_rx_hash_type(xdp_ext->eop_desc);
*hash = ice_get_rx_hash(xdp_ext->desc);
*rss_type = ice_xdp_rx_hash_type(xdp_ext->desc);
if (!likely(*hash))
return -ENODATA;
@@ -567,13 +581,16 @@ static int ice_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash,
static int ice_xdp_rx_vlan_tag(const struct xdp_md *ctx, __be16 *vlan_proto,
u16 *vlan_tci)
{
const struct ice_xdp_buff *xdp_ext = (void *)ctx;
const struct libeth_xdp_buff *xdp_ext = (void *)ctx;
struct ice_rx_ring *rx_ring;
*vlan_proto = xdp_ext->pkt_ctx->vlan_proto;
rx_ring = libeth_xdp_buff_to_rq(xdp_ext, typeof(*rx_ring), xdp_rxq);
*vlan_proto = rx_ring->pkt_ctx.vlan_proto;
if (!*vlan_proto)
return -ENODATA;
*vlan_tci = ice_get_vlan_tci(xdp_ext->eop_desc);
*vlan_tci = ice_get_vlan_tci(xdp_ext->desc);
if (!*vlan_tci)
return -ENODATA;

View File

@@ -135,13 +135,4 @@ ice_process_skb_fields(struct ice_rx_ring *rx_ring,
void
ice_receive_skb(struct ice_rx_ring *rx_ring, struct sk_buff *skb, u16 vlan_tci);
static inline void
ice_xdp_meta_set_desc(struct xdp_buff *xdp,
union ice_32b_rx_flex_desc *eop_desc)
{
struct ice_xdp_buff *xdp_ext = container_of(xdp, struct ice_xdp_buff,
xdp_buff);
xdp_ext->eop_desc = eop_desc;
}
#endif /* !_ICE_TXRX_LIB_H_ */

View File

@@ -3,6 +3,7 @@
#include <linux/bpf_trace.h>
#include <linux/unroll.h>
#include <net/libeth/xdp.h>
#include <net/xdp_sock_drv.h>
#include <net/xdp.h>
#include "ice.h"
@@ -169,50 +170,18 @@ ice_xsk_pool_enable(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
* If allocation was successful, substitute buffer with allocated one.
* Returns 0 on success, negative on failure
*/
static int
int
ice_realloc_rx_xdp_bufs(struct ice_rx_ring *rx_ring, bool pool_present)
{
size_t elem_size = pool_present ? sizeof(*rx_ring->xdp_buf) :
sizeof(*rx_ring->rx_buf);
void *sw_ring = kcalloc(rx_ring->count, elem_size, GFP_KERNEL);
if (!sw_ring)
return -ENOMEM;
if (pool_present) {
kfree(rx_ring->rx_buf);
rx_ring->rx_buf = NULL;
rx_ring->xdp_buf = sw_ring;
rx_ring->xdp_buf = kcalloc(rx_ring->count,
sizeof(*rx_ring->xdp_buf),
GFP_KERNEL);
if (!rx_ring->xdp_buf)
return -ENOMEM;
} else {
kfree(rx_ring->xdp_buf);
rx_ring->xdp_buf = NULL;
rx_ring->rx_buf = sw_ring;
}
return 0;
}
/**
* ice_realloc_zc_buf - reallocate XDP ZC queue pairs
* @vsi: Current VSI
* @zc: is zero copy set
*
* Reallocate buffer for rx_rings that might be used by XSK.
* XDP requires more memory, than rx_buf provides.
* Returns 0 on success, negative on failure
*/
int ice_realloc_zc_buf(struct ice_vsi *vsi, bool zc)
{
struct ice_rx_ring *rx_ring;
uint i;
ice_for_each_rxq(vsi, i) {
rx_ring = vsi->rx_rings[i];
if (!rx_ring->xsk_pool)
continue;
if (ice_realloc_rx_xdp_bufs(rx_ring, zc))
return -ENOMEM;
}
return 0;
@@ -228,6 +197,7 @@ int ice_realloc_zc_buf(struct ice_vsi *vsi, bool zc)
*/
int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
{
struct ice_rx_ring *rx_ring = vsi->rx_rings[qid];
bool if_running, pool_present = !!pool;
int ret = 0, pool_failure = 0;
@@ -241,8 +211,6 @@ int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
ice_is_xdp_ena_vsi(vsi);
if (if_running) {
struct ice_rx_ring *rx_ring = vsi->rx_rings[qid];
ret = ice_qp_dis(vsi, qid);
if (ret) {
netdev_err(vsi->netdev, "ice_qp_dis error = %d\n", ret);
@@ -303,11 +271,6 @@ static u16 ice_fill_rx_descs(struct xsk_buff_pool *pool, struct xdp_buff **xdp,
rx_desc->read.pkt_addr = cpu_to_le64(dma);
rx_desc->wb.status_error0 = 0;
/* Put private info that changes on a per-packet basis
* into xdp_buff_xsk->cb.
*/
ice_xdp_meta_set_desc(*xdp, rx_desc);
rx_desc++;
xdp++;
}
@@ -392,69 +355,6 @@ bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring,
return __ice_alloc_rx_bufs_zc(rx_ring, xsk_pool, leftover);
}
/**
* ice_construct_skb_zc - Create an sk_buff from zero-copy buffer
* @rx_ring: Rx ring
* @xdp: Pointer to XDP buffer
*
* This function allocates a new skb from a zero-copy Rx buffer.
*
* Returns the skb on success, NULL on failure.
*/
static struct sk_buff *
ice_construct_skb_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp)
{
unsigned int totalsize = xdp->data_end - xdp->data_meta;
unsigned int metasize = xdp->data - xdp->data_meta;
struct skb_shared_info *sinfo = NULL;
struct sk_buff *skb;
u32 nr_frags = 0;
if (unlikely(xdp_buff_has_frags(xdp))) {
sinfo = xdp_get_shared_info_from_buff(xdp);
nr_frags = sinfo->nr_frags;
}
net_prefetch(xdp->data_meta);
skb = napi_alloc_skb(&rx_ring->q_vector->napi, totalsize);
if (unlikely(!skb))
return NULL;
memcpy(__skb_put(skb, totalsize), xdp->data_meta,
ALIGN(totalsize, sizeof(long)));
if (metasize) {
skb_metadata_set(skb, metasize);
__skb_pull(skb, metasize);
}
if (likely(!xdp_buff_has_frags(xdp)))
goto out;
for (int i = 0; i < nr_frags; i++) {
struct skb_shared_info *skinfo = skb_shinfo(skb);
skb_frag_t *frag = &sinfo->frags[i];
struct page *page;
void *addr;
page = dev_alloc_page();
if (!page) {
dev_kfree_skb(skb);
return NULL;
}
addr = page_to_virt(page);
memcpy(addr, skb_frag_page(frag), skb_frag_size(frag));
__skb_fill_page_desc_noacc(skinfo, skinfo->nr_frags++,
addr, 0, skb_frag_size(frag));
}
out:
xsk_buff_free(xdp);
return skb;
}
/**
* ice_clean_xdp_irq_zc - produce AF_XDP descriptors to CQ
* @xdp_ring: XDP Tx ring
@@ -669,10 +569,10 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring,
struct xsk_buff_pool *xsk_pool,
int budget)
{
struct xdp_buff *first = (struct xdp_buff *)rx_ring->xsk;
unsigned int total_rx_bytes = 0, total_rx_packets = 0;
u32 ntc = rx_ring->next_to_clean;
u32 ntu = rx_ring->next_to_use;
struct xdp_buff *first = NULL;
struct ice_tx_ring *xdp_ring;
unsigned int xdp_xmit = 0;
struct bpf_prog *xdp_prog;
@@ -686,9 +586,6 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring,
xdp_prog = READ_ONCE(rx_ring->xdp_prog);
xdp_ring = rx_ring->xdp_ring;
if (ntc != rx_ring->first_desc)
first = *ice_xdp_buf(rx_ring, rx_ring->first_desc);
while (likely(total_rx_packets < (unsigned int)budget)) {
union ice_32b_rx_flex_desc *rx_desc;
unsigned int size, xdp_res = 0;
@@ -724,15 +621,17 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring,
first = xdp;
} else if (likely(size) && !xsk_buff_add_frag(first, xdp)) {
xsk_buff_free(first);
break;
first = NULL;
}
if (++ntc == cnt)
ntc = 0;
if (ice_is_non_eop(rx_ring, rx_desc))
if (ice_is_non_eop(rx_ring, rx_desc) || unlikely(!first))
continue;
((struct libeth_xdp_buff *)first)->desc = rx_desc;
xdp_res = ice_run_xdp_zc(rx_ring, first, xdp_prog, xdp_ring,
xsk_pool);
if (likely(xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR))) {
@@ -740,7 +639,6 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring,
} else if (xdp_res == ICE_XDP_EXIT) {
failure = true;
first = NULL;
rx_ring->first_desc = ntc;
break;
} else if (xdp_res == ICE_XDP_CONSUMED) {
xsk_buff_free(first);
@@ -752,24 +650,20 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring,
total_rx_packets++;
first = NULL;
rx_ring->first_desc = ntc;
continue;
construct_skb:
/* XDP_PASS path */
skb = ice_construct_skb_zc(rx_ring, first);
skb = xdp_build_skb_from_zc(first);
if (!skb) {
xsk_buff_free(first);
first = NULL;
rx_ring->ring_stats->rx_stats.alloc_buf_failed++;
break;
continue;
}
first = NULL;
rx_ring->first_desc = ntc;
if (eth_skb_pad(skb)) {
skb = NULL;
continue;
}
total_rx_bytes += skb->len;
total_rx_packets++;
@@ -781,7 +675,9 @@ construct_skb:
}
rx_ring->next_to_clean = ntc;
entries_to_alloc = ICE_RX_DESC_UNUSED(rx_ring);
rx_ring->xsk = (struct libeth_xdp_buff *)first;
entries_to_alloc = ICE_DESC_UNUSED(rx_ring);
if (entries_to_alloc > ICE_RING_QUARTER(rx_ring))
failure |= !ice_alloc_rx_bufs_zc(rx_ring, xsk_pool,
entries_to_alloc);

View File

@@ -22,7 +22,7 @@ bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi);
void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring);
void ice_xsk_clean_xdp_ring(struct ice_tx_ring *xdp_ring);
bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, struct xsk_buff_pool *xsk_pool);
int ice_realloc_zc_buf(struct ice_vsi *vsi, bool zc);
int ice_realloc_rx_xdp_bufs(struct ice_rx_ring *rx_ring, bool pool_present);
void ice_qvec_cfg_msix(struct ice_vsi *vsi, struct ice_q_vector *q_vector,
u16 qid);
void ice_qvec_toggle_napi(struct ice_vsi *vsi, struct ice_q_vector *q_vector,
@@ -77,8 +77,8 @@ static inline void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring) { }
static inline void ice_xsk_clean_xdp_ring(struct ice_tx_ring *xdp_ring) { }
static inline int
ice_realloc_zc_buf(struct ice_vsi __always_unused *vsi,
bool __always_unused zc)
ice_realloc_rx_xdp_bufs(struct ice_rx_ring *rx_ring,
bool __always_unused pool_present)
{
return 0;
}

View File

@@ -842,18 +842,17 @@ int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
(qpi->rxq.databuffer_size > ((16 * 1024) - 128) ||
qpi->rxq.databuffer_size < 1024))
goto error_param;
ring->rx_buf_len = qpi->rxq.databuffer_size;
if (qpi->rxq.max_pkt_size > max_frame_size ||
qpi->rxq.max_pkt_size < 64)
goto error_param;
ring->max_frame = qpi->rxq.max_pkt_size;
vsi->max_frame = qpi->rxq.max_pkt_size;
/* add space for the port VLAN since the VF driver is
* not expected to account for it in the MTU
* calculation
*/
if (ice_vf_is_port_vlan_ena(vf))
ring->max_frame += VLAN_HLEN;
vsi->max_frame += VLAN_HLEN;
if (ice_vsi_cfg_single_rxq(vsi, q_idx)) {
dev_warn(ice_pf_to_dev(pf), "VF-%d failed to configure RX queue %d\n",

View File

@@ -734,13 +734,11 @@ static inline bool idpf_is_rdma_cap_ena(struct idpf_adapter *adapter)
}
#define IDPF_CAP_RSS (\
VIRTCHNL2_FLOW_IPV4_TCP |\
VIRTCHNL2_FLOW_IPV4_TCP |\
VIRTCHNL2_FLOW_IPV4_UDP |\
VIRTCHNL2_FLOW_IPV4_SCTP |\
VIRTCHNL2_FLOW_IPV4_OTHER |\
VIRTCHNL2_FLOW_IPV6_TCP |\
VIRTCHNL2_FLOW_IPV6_TCP |\
VIRTCHNL2_FLOW_IPV6_UDP |\
VIRTCHNL2_FLOW_IPV6_SCTP |\
VIRTCHNL2_FLOW_IPV6_OTHER)

View File

@@ -1235,7 +1235,7 @@ static int igbvf_vlan_rx_add_vid(struct net_device *netdev,
spin_lock_bh(&hw->mbx_lock);
if (hw->mac.ops.set_vfta(hw, vid, true)) {
dev_warn(&adapter->pdev->dev, "Vlan id %d\n is not added", vid);
dev_warn(&adapter->pdev->dev, "Vlan id %d is not added\n", vid);
spin_unlock_bh(&hw->mbx_lock);
return -EINVAL;
}

View File

@@ -198,7 +198,7 @@ static int prot_autoc_read_82599(struct ixgbe_hw *hw, bool *locked,
* @hw: pointer to hardware structure
* @autoc: value to write to AUTOC
* @locked: bool to indicate whether the SW/FW lock was already taken by
* previous proc_autoc_read_82599.
* previous prot_autoc_read_82599.
*
* This part (82599) may need to hold a the SW/FW lock around all writes to
* AUTOC. Likewise after a write we need to do a pipeline reset.
@@ -1622,7 +1622,7 @@ int ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw,
break;
}
/* store source and destination IP masks (big-enian) */
/* store source and destination IP masks (big-endian) */
IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRSIP4M,
~input_mask->formatted.src_ip[0]);
IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRDIP4M,

View File

@@ -318,7 +318,7 @@ static int ixgbe_xdp_queues(struct ixgbe_adapter *adapter)
* ixgbe_set_dcb_sriov_queues: Allocate queues for SR-IOV devices w/ DCB
* @adapter: board private structure to initialize
*
* When SR-IOV (Single Root IO Virtualiztion) is enabled, allocate queues
* When SR-IOV (Single Root IO Virtualization) is enabled, allocate queues
* and VM pools where appropriate. Also assign queues based on DCB
* priorities and map accordingly..
*
@@ -492,7 +492,7 @@ static bool ixgbe_set_dcb_queues(struct ixgbe_adapter *adapter)
* ixgbe_set_sriov_queues - Allocate queues for SR-IOV devices
* @adapter: board private structure to initialize
*
* When SR-IOV (Single Root IO Virtualiztion) is enabled, allocate queues
* When SR-IOV (Single Root IO Virtualization) is enabled, allocate queues
* and VM pools where appropriate. If RSS is available, then also try and
* enable RSS and map accordingly.
*