mirror of
https://github.com/torvalds/linux.git
synced 2025-12-07 20:06:24 +00:00
Merge tag 'nf-next-25-11-28' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next
Pablo Neira Ayuso says:
====================
Netfilter updates for net-next
The following batch contains Netfilter updates for net-next:
0) Add sanity check for maximum encapsulations in bridge vlan,
reported by the new AI robot.
1) Move the flowtable path discovery code to its own file, the
nft_flow_offload.c mixes the nf_tables evaluation with the path
discovery logic, just split this in two for clarity.
2) Consolidate flowtable xmit path by using dev_queue_xmit() and the
real device behind the layer 2 vlan/pppoe device. This allows to
inline encapsulation. After this update, hw_ifidx can be removed
since both ifidx and hw_ifidx now point to the same device.
3) Support for IPIP encapsulation in the flowtable, extend selftest
to cover for this new layer 3 offload, from Lorenzo Bianconi.
4) Push down the skb into the conncount API to fix duplicates in the
conncount list for packets with non-confirmed conntrack entries,
this is due to an optimization introduced in d265929930
("netfilter: nf_conncount: reduce unnecessary GC").
From Fernando Fernandez Mancera.
5) In conncount, disable BH when performing garbage collection
to consolidate existing behaviour in the conncount API, also
from Fernando.
6) A matching packet with a confirmed conntrack invokes GC if
conncount reaches the limit in an attempt to release slots.
This allows the existing extensions to be used for real conntrack
counting, not just limiting new connections, from Fernando.
7) Support for updating ct count objects in nf_tables, from Fernando.
8) Extend nft_flowtables.sh selftest to send IPv6 TCP traffic,
from Lorenzo Bianconi.
9) Fixes for UAPI kernel-doc documentation, from Randy Dunlap.
* tag 'nf-next-25-11-28' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next:
netfilter: nf_tables: improve UAPI kernel-doc comments
netfilter: ip6t_srh: fix UAPI kernel-doc comments format
selftests: netfilter: nft_flowtable.sh: Add the capability to send IPv6 TCP traffic
netfilter: nft_connlimit: add support to object update operation
netfilter: nft_connlimit: update the count if add was skipped
netfilter: nf_conncount: make nf_conncount_gc_list() to disable BH
netfilter: nf_conncount: rework API to use sk_buff directly
selftests: netfilter: nft_flowtable.sh: Add IPIP flowtable selftest
netfilter: flowtable: Add IPIP tx sw acceleration
netfilter: flowtable: Add IPIP rx sw acceleration
netfilter: flowtable: use tuple address to calculate next hop
netfilter: flowtable: remove hw_ifidx
netfilter: flowtable: inline pppoe encapsulation in xmit path
netfilter: flowtable: inline vlan encapsulation in xmit path
netfilter: flowtable: consolidate xmit path
netfilter: flowtable: move path discovery infrastructure to its own file
netfilter: flowtable: check for maximum number of encapsulations in bridge vlan
====================
Link: https://patch.msgid.link/20251128002345.29378-1-pablo@netfilter.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
@@ -877,6 +877,7 @@ enum net_device_path_type {
|
||||
DEV_PATH_PPPOE,
|
||||
DEV_PATH_DSA,
|
||||
DEV_PATH_MTK_WDMA,
|
||||
DEV_PATH_TUN,
|
||||
};
|
||||
|
||||
struct net_device_path {
|
||||
@@ -888,6 +889,18 @@ struct net_device_path {
|
||||
__be16 proto;
|
||||
u8 h_dest[ETH_ALEN];
|
||||
} encap;
|
||||
struct {
|
||||
union {
|
||||
struct in_addr src_v4;
|
||||
struct in6_addr src_v6;
|
||||
};
|
||||
union {
|
||||
struct in_addr dst_v4;
|
||||
struct in6_addr dst_v6;
|
||||
};
|
||||
|
||||
u8 l3_proto;
|
||||
} tun;
|
||||
struct {
|
||||
enum {
|
||||
DEV_PATH_BR_VLAN_KEEP,
|
||||
|
||||
@@ -18,15 +18,14 @@ struct nf_conncount_list {
|
||||
struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int keylen);
|
||||
void nf_conncount_destroy(struct net *net, struct nf_conncount_data *data);
|
||||
|
||||
unsigned int nf_conncount_count(struct net *net,
|
||||
struct nf_conncount_data *data,
|
||||
const u32 *key,
|
||||
const struct nf_conntrack_tuple *tuple,
|
||||
const struct nf_conntrack_zone *zone);
|
||||
unsigned int nf_conncount_count_skb(struct net *net,
|
||||
const struct sk_buff *skb,
|
||||
u16 l3num,
|
||||
struct nf_conncount_data *data,
|
||||
const u32 *key);
|
||||
|
||||
int nf_conncount_add(struct net *net, struct nf_conncount_list *list,
|
||||
const struct nf_conntrack_tuple *tuple,
|
||||
const struct nf_conntrack_zone *zone);
|
||||
int nf_conncount_add_skb(struct net *net, const struct sk_buff *skb,
|
||||
u16 l3num, struct nf_conncount_list *list);
|
||||
|
||||
void nf_conncount_list_init(struct nf_conncount_list *list);
|
||||
|
||||
|
||||
@@ -107,6 +107,19 @@ enum flow_offload_xmit_type {
|
||||
|
||||
#define NF_FLOW_TABLE_ENCAP_MAX 2
|
||||
|
||||
struct flow_offload_tunnel {
|
||||
union {
|
||||
struct in_addr src_v4;
|
||||
struct in6_addr src_v6;
|
||||
};
|
||||
union {
|
||||
struct in_addr dst_v4;
|
||||
struct in6_addr dst_v6;
|
||||
};
|
||||
|
||||
u8 l3_proto;
|
||||
};
|
||||
|
||||
struct flow_offload_tuple {
|
||||
union {
|
||||
struct in_addr src_v4;
|
||||
@@ -130,22 +143,25 @@ struct flow_offload_tuple {
|
||||
__be16 proto;
|
||||
} encap[NF_FLOW_TABLE_ENCAP_MAX];
|
||||
|
||||
struct flow_offload_tunnel tun;
|
||||
|
||||
/* All members above are keys for lookups, see flow_offload_hash(). */
|
||||
struct { } __hash;
|
||||
|
||||
u8 dir:2,
|
||||
xmit_type:3,
|
||||
encap_num:2,
|
||||
tun_num:2,
|
||||
in_vlan_ingress:2;
|
||||
u16 mtu;
|
||||
union {
|
||||
struct {
|
||||
struct dst_entry *dst_cache;
|
||||
u32 ifidx;
|
||||
u32 dst_cookie;
|
||||
};
|
||||
struct {
|
||||
u32 ifidx;
|
||||
u32 hw_ifidx;
|
||||
u8 h_source[ETH_ALEN];
|
||||
u8 h_dest[ETH_ALEN];
|
||||
} out;
|
||||
@@ -206,7 +222,9 @@ struct nf_flow_route {
|
||||
u16 id;
|
||||
__be16 proto;
|
||||
} encap[NF_FLOW_TABLE_ENCAP_MAX];
|
||||
struct flow_offload_tunnel tun;
|
||||
u8 num_encaps:2,
|
||||
num_tuns:2,
|
||||
ingress_vlans:2;
|
||||
} in;
|
||||
struct {
|
||||
@@ -222,6 +240,12 @@ struct nf_flow_route {
|
||||
struct flow_offload *flow_offload_alloc(struct nf_conn *ct);
|
||||
void flow_offload_free(struct flow_offload *flow);
|
||||
|
||||
struct nft_flowtable;
|
||||
struct nft_pktinfo;
|
||||
int nft_flow_route(const struct nft_pktinfo *pkt, const struct nf_conn *ct,
|
||||
struct nf_flow_route *route, enum ip_conntrack_dir dir,
|
||||
struct nft_flowtable *ft);
|
||||
|
||||
static inline int
|
||||
nf_flow_table_offload_add_cb(struct nf_flowtable *flow_table,
|
||||
flow_setup_cb_t *cb, void *cb_priv)
|
||||
|
||||
@@ -881,7 +881,7 @@ enum nft_exthdr_flags {
|
||||
* enum nft_exthdr_op - nf_tables match options
|
||||
*
|
||||
* @NFT_EXTHDR_OP_IPV6: match against ipv6 extension headers
|
||||
* @NFT_EXTHDR_OP_TCP: match against tcp options
|
||||
* @NFT_EXTHDR_OP_TCPOPT: match against tcp options
|
||||
* @NFT_EXTHDR_OP_IPV4: match against ipv4 options
|
||||
* @NFT_EXTHDR_OP_SCTP: match against sctp chunks
|
||||
* @NFT_EXTHDR_OP_DCCP: match against dccp otions
|
||||
@@ -1200,7 +1200,7 @@ enum nft_ct_attributes {
|
||||
#define NFTA_CT_MAX (__NFTA_CT_MAX - 1)
|
||||
|
||||
/**
|
||||
* enum nft_flow_attributes - ct offload expression attributes
|
||||
* enum nft_offload_attributes - ct offload expression attributes
|
||||
* @NFTA_FLOW_TABLE_NAME: flow table name (NLA_STRING)
|
||||
*/
|
||||
enum nft_offload_attributes {
|
||||
@@ -1410,7 +1410,7 @@ enum nft_reject_types {
|
||||
};
|
||||
|
||||
/**
|
||||
* enum nft_reject_code - Generic reject codes for IPv4/IPv6
|
||||
* enum nft_reject_inet_code - Generic reject codes for IPv4/IPv6
|
||||
*
|
||||
* @NFT_REJECT_ICMPX_NO_ROUTE: no route to host / network unreachable
|
||||
* @NFT_REJECT_ICMPX_PORT_UNREACH: port unreachable
|
||||
@@ -1480,9 +1480,9 @@ enum nft_nat_attributes {
|
||||
/**
|
||||
* enum nft_tproxy_attributes - nf_tables tproxy expression netlink attributes
|
||||
*
|
||||
* NFTA_TPROXY_FAMILY: Target address family (NLA_U32: nft_registers)
|
||||
* NFTA_TPROXY_REG_ADDR: Target address register (NLA_U32: nft_registers)
|
||||
* NFTA_TPROXY_REG_PORT: Target port register (NLA_U32: nft_registers)
|
||||
* @NFTA_TPROXY_FAMILY: Target address family (NLA_U32: nft_registers)
|
||||
* @NFTA_TPROXY_REG_ADDR: Target address register (NLA_U32: nft_registers)
|
||||
* @NFTA_TPROXY_REG_PORT: Target port register (NLA_U32: nft_registers)
|
||||
*/
|
||||
enum nft_tproxy_attributes {
|
||||
NFTA_TPROXY_UNSPEC,
|
||||
@@ -1783,7 +1783,7 @@ enum nft_synproxy_attributes {
|
||||
#define NFTA_SYNPROXY_MAX (__NFTA_SYNPROXY_MAX - 1)
|
||||
|
||||
/**
|
||||
* enum nft_device_attributes - nf_tables device netlink attributes
|
||||
* enum nft_devices_attributes - nf_tables device netlink attributes
|
||||
*
|
||||
* @NFTA_DEVICE_NAME: name of this device (NLA_STRING)
|
||||
* @NFTA_DEVICE_PREFIX: device name prefix, a simple wildcard (NLA_STRING)
|
||||
|
||||
@@ -41,13 +41,13 @@
|
||||
|
||||
/**
|
||||
* struct ip6t_srh - SRH match options
|
||||
* @ next_hdr: Next header field of SRH
|
||||
* @ hdr_len: Extension header length field of SRH
|
||||
* @ segs_left: Segments left field of SRH
|
||||
* @ last_entry: Last entry field of SRH
|
||||
* @ tag: Tag field of SRH
|
||||
* @ mt_flags: match options
|
||||
* @ mt_invflags: Invert the sense of match options
|
||||
* @next_hdr: Next header field of SRH
|
||||
* @hdr_len: Extension header length field of SRH
|
||||
* @segs_left: Segments left field of SRH
|
||||
* @last_entry: Last entry field of SRH
|
||||
* @tag: Tag field of SRH
|
||||
* @mt_flags: match options
|
||||
* @mt_invflags: Invert the sense of match options
|
||||
*/
|
||||
|
||||
struct ip6t_srh {
|
||||
@@ -62,19 +62,19 @@ struct ip6t_srh {
|
||||
|
||||
/**
|
||||
* struct ip6t_srh1 - SRH match options (revision 1)
|
||||
* @ next_hdr: Next header field of SRH
|
||||
* @ hdr_len: Extension header length field of SRH
|
||||
* @ segs_left: Segments left field of SRH
|
||||
* @ last_entry: Last entry field of SRH
|
||||
* @ tag: Tag field of SRH
|
||||
* @ psid_addr: Address of previous SID in SRH SID list
|
||||
* @ nsid_addr: Address of NEXT SID in SRH SID list
|
||||
* @ lsid_addr: Address of LAST SID in SRH SID list
|
||||
* @ psid_msk: Mask of previous SID in SRH SID list
|
||||
* @ nsid_msk: Mask of next SID in SRH SID list
|
||||
* @ lsid_msk: MAsk of last SID in SRH SID list
|
||||
* @ mt_flags: match options
|
||||
* @ mt_invflags: Invert the sense of match options
|
||||
* @next_hdr: Next header field of SRH
|
||||
* @hdr_len: Extension header length field of SRH
|
||||
* @segs_left: Segments left field of SRH
|
||||
* @last_entry: Last entry field of SRH
|
||||
* @tag: Tag field of SRH
|
||||
* @psid_addr: Address of previous SID in SRH SID list
|
||||
* @nsid_addr: Address of NEXT SID in SRH SID list
|
||||
* @lsid_addr: Address of LAST SID in SRH SID list
|
||||
* @psid_msk: Mask of previous SID in SRH SID list
|
||||
* @nsid_msk: Mask of next SID in SRH SID list
|
||||
* @lsid_msk: MAsk of last SID in SRH SID list
|
||||
* @mt_flags: match options
|
||||
* @mt_invflags: Invert the sense of match options
|
||||
*/
|
||||
|
||||
struct ip6t_srh1 {
|
||||
|
||||
@@ -353,6 +353,30 @@ ipip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm_kern *p, int cmd)
|
||||
return ip_tunnel_ctl(dev, p, cmd);
|
||||
}
|
||||
|
||||
static int ipip_fill_forward_path(struct net_device_path_ctx *ctx,
|
||||
struct net_device_path *path)
|
||||
{
|
||||
struct ip_tunnel *tunnel = netdev_priv(ctx->dev);
|
||||
const struct iphdr *tiph = &tunnel->parms.iph;
|
||||
struct rtable *rt;
|
||||
|
||||
rt = ip_route_output(dev_net(ctx->dev), tiph->daddr, 0, 0, 0,
|
||||
RT_SCOPE_UNIVERSE);
|
||||
if (IS_ERR(rt))
|
||||
return PTR_ERR(rt);
|
||||
|
||||
path->type = DEV_PATH_TUN;
|
||||
path->tun.src_v4.s_addr = tiph->saddr;
|
||||
path->tun.dst_v4.s_addr = tiph->daddr;
|
||||
path->tun.l3_proto = IPPROTO_IPIP;
|
||||
path->dev = ctx->dev;
|
||||
|
||||
ctx->dev = rt->dst.dev;
|
||||
ip_rt_put(rt);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct net_device_ops ipip_netdev_ops = {
|
||||
.ndo_init = ipip_tunnel_init,
|
||||
.ndo_uninit = ip_tunnel_uninit,
|
||||
@@ -362,6 +386,7 @@ static const struct net_device_ops ipip_netdev_ops = {
|
||||
.ndo_get_stats64 = dev_get_tstats64,
|
||||
.ndo_get_iflink = ip_tunnel_get_iflink,
|
||||
.ndo_tunnel_ctl = ipip_tunnel_ctl,
|
||||
.ndo_fill_forward_path = ipip_fill_forward_path,
|
||||
};
|
||||
|
||||
#define IPIP_FEATURES (NETIF_F_SG | \
|
||||
|
||||
@@ -141,6 +141,7 @@ obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_netdev.o
|
||||
# flow table infrastructure
|
||||
obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o
|
||||
nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o \
|
||||
nf_flow_table_path.o \
|
||||
nf_flow_table_offload.o nf_flow_table_xdp.o
|
||||
nf_flow_table-$(CONFIG_NF_FLOW_TABLE_PROCFS) += nf_flow_table_procfs.o
|
||||
ifeq ($(CONFIG_NF_FLOW_TABLE),m)
|
||||
|
||||
@@ -122,15 +122,65 @@ find_or_evict(struct net *net, struct nf_conncount_list *list,
|
||||
return ERR_PTR(-EAGAIN);
|
||||
}
|
||||
|
||||
static int __nf_conncount_add(struct net *net,
|
||||
struct nf_conncount_list *list,
|
||||
const struct nf_conntrack_tuple *tuple,
|
||||
const struct nf_conntrack_zone *zone)
|
||||
static bool get_ct_or_tuple_from_skb(struct net *net,
|
||||
const struct sk_buff *skb,
|
||||
u16 l3num,
|
||||
struct nf_conn **ct,
|
||||
struct nf_conntrack_tuple *tuple,
|
||||
const struct nf_conntrack_zone **zone,
|
||||
bool *refcounted)
|
||||
{
|
||||
const struct nf_conntrack_tuple_hash *h;
|
||||
enum ip_conntrack_info ctinfo;
|
||||
struct nf_conn *found_ct;
|
||||
|
||||
found_ct = nf_ct_get(skb, &ctinfo);
|
||||
if (found_ct && !nf_ct_is_template(found_ct)) {
|
||||
*tuple = found_ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
|
||||
*zone = nf_ct_zone(found_ct);
|
||||
*ct = found_ct;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), l3num, net, tuple))
|
||||
return false;
|
||||
|
||||
if (found_ct)
|
||||
*zone = nf_ct_zone(found_ct);
|
||||
|
||||
h = nf_conntrack_find_get(net, *zone, tuple);
|
||||
if (!h)
|
||||
return true;
|
||||
|
||||
found_ct = nf_ct_tuplehash_to_ctrack(h);
|
||||
*refcounted = true;
|
||||
*ct = found_ct;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int __nf_conncount_add(struct net *net,
|
||||
const struct sk_buff *skb,
|
||||
u16 l3num,
|
||||
struct nf_conncount_list *list)
|
||||
{
|
||||
const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
|
||||
const struct nf_conntrack_tuple_hash *found;
|
||||
struct nf_conncount_tuple *conn, *conn_n;
|
||||
struct nf_conntrack_tuple tuple;
|
||||
struct nf_conn *ct = NULL;
|
||||
struct nf_conn *found_ct;
|
||||
unsigned int collect = 0;
|
||||
bool refcounted = false;
|
||||
|
||||
if (!get_ct_or_tuple_from_skb(net, skb, l3num, &ct, &tuple, &zone, &refcounted))
|
||||
return -ENOENT;
|
||||
|
||||
if (ct && nf_ct_is_confirmed(ct)) {
|
||||
if (refcounted)
|
||||
nf_ct_put(ct);
|
||||
return -EEXIST;
|
||||
}
|
||||
|
||||
if ((u32)jiffies == list->last_gc)
|
||||
goto add_new_node;
|
||||
@@ -144,10 +194,10 @@ static int __nf_conncount_add(struct net *net,
|
||||
if (IS_ERR(found)) {
|
||||
/* Not found, but might be about to be confirmed */
|
||||
if (PTR_ERR(found) == -EAGAIN) {
|
||||
if (nf_ct_tuple_equal(&conn->tuple, tuple) &&
|
||||
if (nf_ct_tuple_equal(&conn->tuple, &tuple) &&
|
||||
nf_ct_zone_id(&conn->zone, conn->zone.dir) ==
|
||||
nf_ct_zone_id(zone, zone->dir))
|
||||
return 0; /* already exists */
|
||||
goto out_put; /* already exists */
|
||||
} else {
|
||||
collect++;
|
||||
}
|
||||
@@ -156,7 +206,7 @@ static int __nf_conncount_add(struct net *net,
|
||||
|
||||
found_ct = nf_ct_tuplehash_to_ctrack(found);
|
||||
|
||||
if (nf_ct_tuple_equal(&conn->tuple, tuple) &&
|
||||
if (nf_ct_tuple_equal(&conn->tuple, &tuple) &&
|
||||
nf_ct_zone_equal(found_ct, zone, zone->dir)) {
|
||||
/*
|
||||
* We should not see tuples twice unless someone hooks
|
||||
@@ -165,7 +215,7 @@ static int __nf_conncount_add(struct net *net,
|
||||
* Attempt to avoid a re-add in this case.
|
||||
*/
|
||||
nf_ct_put(found_ct);
|
||||
return 0;
|
||||
goto out_put;
|
||||
} else if (already_closed(found_ct)) {
|
||||
/*
|
||||
* we do not care about connections which are
|
||||
@@ -188,31 +238,35 @@ add_new_node:
|
||||
if (conn == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
conn->tuple = *tuple;
|
||||
conn->tuple = tuple;
|
||||
conn->zone = *zone;
|
||||
conn->cpu = raw_smp_processor_id();
|
||||
conn->jiffies32 = (u32)jiffies;
|
||||
list_add_tail(&conn->node, &list->head);
|
||||
list->count++;
|
||||
list->last_gc = (u32)jiffies;
|
||||
|
||||
out_put:
|
||||
if (refcounted)
|
||||
nf_ct_put(ct);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nf_conncount_add(struct net *net,
|
||||
struct nf_conncount_list *list,
|
||||
const struct nf_conntrack_tuple *tuple,
|
||||
const struct nf_conntrack_zone *zone)
|
||||
int nf_conncount_add_skb(struct net *net,
|
||||
const struct sk_buff *skb,
|
||||
u16 l3num,
|
||||
struct nf_conncount_list *list)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/* check the saved connections */
|
||||
spin_lock_bh(&list->list_lock);
|
||||
ret = __nf_conncount_add(net, list, tuple, zone);
|
||||
ret = __nf_conncount_add(net, skb, l3num, list);
|
||||
spin_unlock_bh(&list->list_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nf_conncount_add);
|
||||
EXPORT_SYMBOL_GPL(nf_conncount_add_skb);
|
||||
|
||||
void nf_conncount_list_init(struct nf_conncount_list *list)
|
||||
{
|
||||
@@ -224,8 +278,8 @@ void nf_conncount_list_init(struct nf_conncount_list *list)
|
||||
EXPORT_SYMBOL_GPL(nf_conncount_list_init);
|
||||
|
||||
/* Return true if the list is empty. Must be called with BH disabled. */
|
||||
bool nf_conncount_gc_list(struct net *net,
|
||||
struct nf_conncount_list *list)
|
||||
static bool __nf_conncount_gc_list(struct net *net,
|
||||
struct nf_conncount_list *list)
|
||||
{
|
||||
const struct nf_conntrack_tuple_hash *found;
|
||||
struct nf_conncount_tuple *conn, *conn_n;
|
||||
@@ -237,10 +291,6 @@ bool nf_conncount_gc_list(struct net *net,
|
||||
if ((u32)jiffies == READ_ONCE(list->last_gc))
|
||||
return false;
|
||||
|
||||
/* don't bother if other cpu is already doing GC */
|
||||
if (!spin_trylock(&list->list_lock))
|
||||
return false;
|
||||
|
||||
list_for_each_entry_safe(conn, conn_n, &list->head, node) {
|
||||
found = find_or_evict(net, list, conn);
|
||||
if (IS_ERR(found)) {
|
||||
@@ -269,7 +319,21 @@ bool nf_conncount_gc_list(struct net *net,
|
||||
if (!list->count)
|
||||
ret = true;
|
||||
list->last_gc = (u32)jiffies;
|
||||
spin_unlock(&list->list_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool nf_conncount_gc_list(struct net *net,
|
||||
struct nf_conncount_list *list)
|
||||
{
|
||||
bool ret;
|
||||
|
||||
/* don't bother if other cpu is already doing GC */
|
||||
if (!spin_trylock_bh(&list->list_lock))
|
||||
return false;
|
||||
|
||||
ret = __nf_conncount_gc_list(net, list);
|
||||
spin_unlock_bh(&list->list_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -309,19 +373,22 @@ static void schedule_gc_worker(struct nf_conncount_data *data, int tree)
|
||||
|
||||
static unsigned int
|
||||
insert_tree(struct net *net,
|
||||
const struct sk_buff *skb,
|
||||
u16 l3num,
|
||||
struct nf_conncount_data *data,
|
||||
struct rb_root *root,
|
||||
unsigned int hash,
|
||||
const u32 *key,
|
||||
const struct nf_conntrack_tuple *tuple,
|
||||
const struct nf_conntrack_zone *zone)
|
||||
const u32 *key)
|
||||
{
|
||||
struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES];
|
||||
struct rb_node **rbnode, *parent;
|
||||
struct nf_conncount_rb *rbconn;
|
||||
struct nf_conncount_tuple *conn;
|
||||
const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
|
||||
bool do_gc = true, refcounted = false;
|
||||
unsigned int count = 0, gc_count = 0;
|
||||
bool do_gc = true;
|
||||
struct rb_node **rbnode, *parent;
|
||||
struct nf_conntrack_tuple tuple;
|
||||
struct nf_conncount_tuple *conn;
|
||||
struct nf_conncount_rb *rbconn;
|
||||
struct nf_conn *ct = NULL;
|
||||
|
||||
spin_lock_bh(&nf_conncount_locks[hash]);
|
||||
restart:
|
||||
@@ -340,8 +407,8 @@ restart:
|
||||
} else {
|
||||
int ret;
|
||||
|
||||
ret = nf_conncount_add(net, &rbconn->list, tuple, zone);
|
||||
if (ret)
|
||||
ret = nf_conncount_add_skb(net, skb, l3num, &rbconn->list);
|
||||
if (ret && ret != -EEXIST)
|
||||
count = 0; /* hotdrop */
|
||||
else
|
||||
count = rbconn->list.count;
|
||||
@@ -364,30 +431,35 @@ restart:
|
||||
goto restart;
|
||||
}
|
||||
|
||||
/* expected case: match, insert new node */
|
||||
rbconn = kmem_cache_alloc(conncount_rb_cachep, GFP_ATOMIC);
|
||||
if (rbconn == NULL)
|
||||
goto out_unlock;
|
||||
if (get_ct_or_tuple_from_skb(net, skb, l3num, &ct, &tuple, &zone, &refcounted)) {
|
||||
/* expected case: match, insert new node */
|
||||
rbconn = kmem_cache_alloc(conncount_rb_cachep, GFP_ATOMIC);
|
||||
if (rbconn == NULL)
|
||||
goto out_unlock;
|
||||
|
||||
conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
|
||||
if (conn == NULL) {
|
||||
kmem_cache_free(conncount_rb_cachep, rbconn);
|
||||
goto out_unlock;
|
||||
conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC);
|
||||
if (conn == NULL) {
|
||||
kmem_cache_free(conncount_rb_cachep, rbconn);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
conn->tuple = tuple;
|
||||
conn->zone = *zone;
|
||||
conn->cpu = raw_smp_processor_id();
|
||||
conn->jiffies32 = (u32)jiffies;
|
||||
memcpy(rbconn->key, key, sizeof(u32) * data->keylen);
|
||||
|
||||
nf_conncount_list_init(&rbconn->list);
|
||||
list_add(&conn->node, &rbconn->list.head);
|
||||
count = 1;
|
||||
rbconn->list.count = count;
|
||||
|
||||
rb_link_node_rcu(&rbconn->node, parent, rbnode);
|
||||
rb_insert_color(&rbconn->node, root);
|
||||
|
||||
if (refcounted)
|
||||
nf_ct_put(ct);
|
||||
}
|
||||
|
||||
conn->tuple = *tuple;
|
||||
conn->zone = *zone;
|
||||
conn->cpu = raw_smp_processor_id();
|
||||
conn->jiffies32 = (u32)jiffies;
|
||||
memcpy(rbconn->key, key, sizeof(u32) * data->keylen);
|
||||
|
||||
nf_conncount_list_init(&rbconn->list);
|
||||
list_add(&conn->node, &rbconn->list.head);
|
||||
count = 1;
|
||||
rbconn->list.count = count;
|
||||
|
||||
rb_link_node_rcu(&rbconn->node, parent, rbnode);
|
||||
rb_insert_color(&rbconn->node, root);
|
||||
out_unlock:
|
||||
spin_unlock_bh(&nf_conncount_locks[hash]);
|
||||
return count;
|
||||
@@ -395,10 +467,10 @@ out_unlock:
|
||||
|
||||
static unsigned int
|
||||
count_tree(struct net *net,
|
||||
const struct sk_buff *skb,
|
||||
u16 l3num,
|
||||
struct nf_conncount_data *data,
|
||||
const u32 *key,
|
||||
const struct nf_conntrack_tuple *tuple,
|
||||
const struct nf_conntrack_zone *zone)
|
||||
const u32 *key)
|
||||
{
|
||||
struct rb_root *root;
|
||||
struct rb_node *parent;
|
||||
@@ -422,7 +494,7 @@ count_tree(struct net *net,
|
||||
} else {
|
||||
int ret;
|
||||
|
||||
if (!tuple) {
|
||||
if (!skb) {
|
||||
nf_conncount_gc_list(net, &rbconn->list);
|
||||
return rbconn->list.count;
|
||||
}
|
||||
@@ -437,19 +509,23 @@ count_tree(struct net *net,
|
||||
}
|
||||
|
||||
/* same source network -> be counted! */
|
||||
ret = __nf_conncount_add(net, &rbconn->list, tuple, zone);
|
||||
ret = __nf_conncount_add(net, skb, l3num, &rbconn->list);
|
||||
spin_unlock_bh(&rbconn->list.list_lock);
|
||||
if (ret)
|
||||
if (ret && ret != -EEXIST) {
|
||||
return 0; /* hotdrop */
|
||||
else
|
||||
} else {
|
||||
/* -EEXIST means add was skipped, update the list */
|
||||
if (ret == -EEXIST)
|
||||
nf_conncount_gc_list(net, &rbconn->list);
|
||||
return rbconn->list.count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!tuple)
|
||||
if (!skb)
|
||||
return 0;
|
||||
|
||||
return insert_tree(net, data, root, hash, key, tuple, zone);
|
||||
return insert_tree(net, skb, l3num, data, root, hash, key);
|
||||
}
|
||||
|
||||
static void tree_gc_worker(struct work_struct *work)
|
||||
@@ -511,18 +587,19 @@ next:
|
||||
}
|
||||
|
||||
/* Count and return number of conntrack entries in 'net' with particular 'key'.
|
||||
* If 'tuple' is not null, insert it into the accounting data structure.
|
||||
* Call with RCU read lock.
|
||||
* If 'skb' is not null, insert the corresponding tuple into the accounting
|
||||
* data structure. Call with RCU read lock.
|
||||
*/
|
||||
unsigned int nf_conncount_count(struct net *net,
|
||||
struct nf_conncount_data *data,
|
||||
const u32 *key,
|
||||
const struct nf_conntrack_tuple *tuple,
|
||||
const struct nf_conntrack_zone *zone)
|
||||
unsigned int nf_conncount_count_skb(struct net *net,
|
||||
const struct sk_buff *skb,
|
||||
u16 l3num,
|
||||
struct nf_conncount_data *data,
|
||||
const u32 *key)
|
||||
{
|
||||
return count_tree(net, data, key, tuple, zone);
|
||||
return count_tree(net, skb, l3num, data, key);
|
||||
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nf_conncount_count);
|
||||
EXPORT_SYMBOL_GPL(nf_conncount_count_skb);
|
||||
|
||||
struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int keylen)
|
||||
{
|
||||
|
||||
@@ -118,7 +118,10 @@ static int flow_offload_fill_route(struct flow_offload *flow,
|
||||
flow_tuple->in_vlan_ingress |= BIT(j);
|
||||
j++;
|
||||
}
|
||||
|
||||
flow_tuple->tun = route->tuple[dir].in.tun;
|
||||
flow_tuple->encap_num = route->tuple[dir].in.num_encaps;
|
||||
flow_tuple->tun_num = route->tuple[dir].in.num_tuns;
|
||||
|
||||
switch (route->tuple[dir].xmit_type) {
|
||||
case FLOW_OFFLOAD_XMIT_DIRECT:
|
||||
@@ -127,11 +130,11 @@ static int flow_offload_fill_route(struct flow_offload *flow,
|
||||
memcpy(flow_tuple->out.h_source, route->tuple[dir].out.h_source,
|
||||
ETH_ALEN);
|
||||
flow_tuple->out.ifidx = route->tuple[dir].out.ifindex;
|
||||
flow_tuple->out.hw_ifidx = route->tuple[dir].out.hw_ifindex;
|
||||
dst_release(dst);
|
||||
break;
|
||||
case FLOW_OFFLOAD_XMIT_XFRM:
|
||||
case FLOW_OFFLOAD_XMIT_NEIGH:
|
||||
flow_tuple->ifidx = route->tuple[dir].out.ifindex;
|
||||
flow_tuple->dst_cache = dst;
|
||||
flow_tuple->dst_cookie = flow_offload_dst_cookie(flow_tuple);
|
||||
break;
|
||||
|
||||
@@ -145,8 +145,11 @@ static bool ip_has_options(unsigned int thoff)
|
||||
static void nf_flow_tuple_encap(struct sk_buff *skb,
|
||||
struct flow_offload_tuple *tuple)
|
||||
{
|
||||
__be16 inner_proto = skb->protocol;
|
||||
struct vlan_ethhdr *veth;
|
||||
struct pppoe_hdr *phdr;
|
||||
struct iphdr *iph;
|
||||
u16 offset = 0;
|
||||
int i = 0;
|
||||
|
||||
if (skb_vlan_tag_present(skb)) {
|
||||
@@ -159,13 +162,26 @@ static void nf_flow_tuple_encap(struct sk_buff *skb,
|
||||
veth = (struct vlan_ethhdr *)skb_mac_header(skb);
|
||||
tuple->encap[i].id = ntohs(veth->h_vlan_TCI);
|
||||
tuple->encap[i].proto = skb->protocol;
|
||||
inner_proto = veth->h_vlan_encapsulated_proto;
|
||||
offset += VLAN_HLEN;
|
||||
break;
|
||||
case htons(ETH_P_PPP_SES):
|
||||
phdr = (struct pppoe_hdr *)skb_network_header(skb);
|
||||
tuple->encap[i].id = ntohs(phdr->sid);
|
||||
tuple->encap[i].proto = skb->protocol;
|
||||
inner_proto = *((__be16 *)(phdr + 1));
|
||||
offset += PPPOE_SES_HLEN;
|
||||
break;
|
||||
}
|
||||
|
||||
if (inner_proto == htons(ETH_P_IP)) {
|
||||
iph = (struct iphdr *)(skb_network_header(skb) + offset);
|
||||
if (iph->protocol == IPPROTO_IPIP) {
|
||||
tuple->tun.dst_v4.s_addr = iph->daddr;
|
||||
tuple->tun.src_v4.s_addr = iph->saddr;
|
||||
tuple->tun.l3_proto = IPPROTO_IPIP;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct nf_flowtable_ctx {
|
||||
@@ -277,11 +293,46 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
|
||||
return NF_STOLEN;
|
||||
}
|
||||
|
||||
static bool nf_flow_ip4_tunnel_proto(struct sk_buff *skb, u32 *psize)
|
||||
{
|
||||
struct iphdr *iph;
|
||||
u16 size;
|
||||
|
||||
if (!pskb_may_pull(skb, sizeof(*iph) + *psize))
|
||||
return false;
|
||||
|
||||
iph = (struct iphdr *)(skb_network_header(skb) + *psize);
|
||||
size = iph->ihl << 2;
|
||||
|
||||
if (ip_is_fragment(iph) || unlikely(ip_has_options(size)))
|
||||
return false;
|
||||
|
||||
if (iph->ttl <= 1)
|
||||
return false;
|
||||
|
||||
if (iph->protocol == IPPROTO_IPIP)
|
||||
*psize += size;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void nf_flow_ip4_tunnel_pop(struct sk_buff *skb)
|
||||
{
|
||||
struct iphdr *iph = (struct iphdr *)skb_network_header(skb);
|
||||
|
||||
if (iph->protocol != IPPROTO_IPIP)
|
||||
return;
|
||||
|
||||
skb_pull(skb, iph->ihl << 2);
|
||||
skb_reset_network_header(skb);
|
||||
}
|
||||
|
||||
static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto,
|
||||
u32 *offset)
|
||||
{
|
||||
__be16 inner_proto = skb->protocol;
|
||||
struct vlan_ethhdr *veth;
|
||||
__be16 inner_proto;
|
||||
bool ret = false;
|
||||
|
||||
switch (skb->protocol) {
|
||||
case htons(ETH_P_8021Q):
|
||||
@@ -291,19 +342,23 @@ static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto,
|
||||
veth = (struct vlan_ethhdr *)skb_mac_header(skb);
|
||||
if (veth->h_vlan_encapsulated_proto == proto) {
|
||||
*offset += VLAN_HLEN;
|
||||
return true;
|
||||
inner_proto = proto;
|
||||
ret = true;
|
||||
}
|
||||
break;
|
||||
case htons(ETH_P_PPP_SES):
|
||||
if (nf_flow_pppoe_proto(skb, &inner_proto) &&
|
||||
inner_proto == proto) {
|
||||
*offset += PPPOE_SES_HLEN;
|
||||
return true;
|
||||
ret = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
return false;
|
||||
if (inner_proto == htons(ETH_P_IP))
|
||||
ret = nf_flow_ip4_tunnel_proto(skb, offset);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void nf_flow_encap_pop(struct sk_buff *skb,
|
||||
@@ -331,21 +386,23 @@ static void nf_flow_encap_pop(struct sk_buff *skb,
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (skb->protocol == htons(ETH_P_IP))
|
||||
nf_flow_ip4_tunnel_pop(skb);
|
||||
}
|
||||
|
||||
struct nf_flow_xmit {
|
||||
const void *dest;
|
||||
const void *source;
|
||||
struct net_device *outdev;
|
||||
};
|
||||
|
||||
static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
|
||||
const struct flow_offload_tuple_rhash *tuplehash,
|
||||
unsigned short type)
|
||||
struct nf_flow_xmit *xmit)
|
||||
{
|
||||
struct net_device *outdev;
|
||||
|
||||
outdev = dev_get_by_index_rcu(net, tuplehash->tuple.out.ifidx);
|
||||
if (!outdev)
|
||||
return NF_DROP;
|
||||
|
||||
skb->dev = outdev;
|
||||
dev_hard_header(skb, skb->dev, type, tuplehash->tuple.out.h_dest,
|
||||
tuplehash->tuple.out.h_source, skb->len);
|
||||
skb->dev = xmit->outdev;
|
||||
dev_hard_header(skb, skb->dev, ntohs(skb->protocol),
|
||||
xmit->dest, xmit->source, skb->len);
|
||||
dev_queue_xmit(skb);
|
||||
|
||||
return NF_STOLEN;
|
||||
@@ -357,8 +414,7 @@ nf_flow_offload_lookup(struct nf_flowtable_ctx *ctx,
|
||||
{
|
||||
struct flow_offload_tuple tuple = {};
|
||||
|
||||
if (skb->protocol != htons(ETH_P_IP) &&
|
||||
!nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &ctx->offset))
|
||||
if (!nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &ctx->offset))
|
||||
return NULL;
|
||||
|
||||
if (nf_flow_tuple_ip(ctx, skb, &tuple) < 0)
|
||||
@@ -381,6 +437,9 @@ static int nf_flow_offload_forward(struct nf_flowtable_ctx *ctx,
|
||||
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
|
||||
|
||||
mtu = flow->tuplehash[dir].tuple.mtu + ctx->offset;
|
||||
if (flow->tuplehash[!dir].tuple.tun_num)
|
||||
mtu -= sizeof(*iph);
|
||||
|
||||
if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
|
||||
return 0;
|
||||
|
||||
@@ -414,20 +473,139 @@ static int nf_flow_offload_forward(struct nf_flowtable_ctx *ctx,
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int nf_flow_pppoe_push(struct sk_buff *skb, u16 id)
|
||||
{
|
||||
int data_len = skb->len + sizeof(__be16);
|
||||
struct ppp_hdr {
|
||||
struct pppoe_hdr hdr;
|
||||
__be16 proto;
|
||||
} *ph;
|
||||
__be16 proto;
|
||||
|
||||
if (skb_cow_head(skb, PPPOE_SES_HLEN))
|
||||
return -1;
|
||||
|
||||
switch (skb->protocol) {
|
||||
case htons(ETH_P_IP):
|
||||
proto = htons(PPP_IP);
|
||||
break;
|
||||
case htons(ETH_P_IPV6):
|
||||
proto = htons(PPP_IPV6);
|
||||
break;
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
|
||||
__skb_push(skb, PPPOE_SES_HLEN);
|
||||
skb_reset_network_header(skb);
|
||||
|
||||
ph = (struct ppp_hdr *)(skb->data);
|
||||
ph->hdr.ver = 1;
|
||||
ph->hdr.type = 1;
|
||||
ph->hdr.code = 0;
|
||||
ph->hdr.sid = htons(id);
|
||||
ph->hdr.length = htons(data_len);
|
||||
ph->proto = proto;
|
||||
skb->protocol = htons(ETH_P_PPP_SES);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nf_flow_tunnel_ipip_push(struct net *net, struct sk_buff *skb,
|
||||
struct flow_offload_tuple *tuple,
|
||||
__be32 *ip_daddr)
|
||||
{
|
||||
struct iphdr *iph = (struct iphdr *)skb_network_header(skb);
|
||||
struct rtable *rt = dst_rtable(tuple->dst_cache);
|
||||
u8 tos = iph->tos, ttl = iph->ttl;
|
||||
__be16 frag_off = iph->frag_off;
|
||||
u32 headroom = sizeof(*iph);
|
||||
int err;
|
||||
|
||||
err = iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
skb_set_inner_ipproto(skb, IPPROTO_IPIP);
|
||||
headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
|
||||
err = skb_cow_head(skb, headroom);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
skb_scrub_packet(skb, true);
|
||||
skb_clear_hash_if_not_l4(skb);
|
||||
|
||||
/* Push down and install the IP header. */
|
||||
skb_push(skb, sizeof(*iph));
|
||||
skb_reset_network_header(skb);
|
||||
|
||||
iph = ip_hdr(skb);
|
||||
iph->version = 4;
|
||||
iph->ihl = sizeof(*iph) >> 2;
|
||||
iph->frag_off = ip_mtu_locked(&rt->dst) ? 0 : frag_off;
|
||||
iph->protocol = tuple->tun.l3_proto;
|
||||
iph->tos = tos;
|
||||
iph->daddr = tuple->tun.src_v4.s_addr;
|
||||
iph->saddr = tuple->tun.dst_v4.s_addr;
|
||||
iph->ttl = ttl;
|
||||
iph->tot_len = htons(skb->len);
|
||||
__ip_select_ident(net, iph, skb_shinfo(skb)->gso_segs ?: 1);
|
||||
ip_send_check(iph);
|
||||
|
||||
*ip_daddr = tuple->tun.src_v4.s_addr;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nf_flow_tunnel_v4_push(struct net *net, struct sk_buff *skb,
|
||||
struct flow_offload_tuple *tuple,
|
||||
__be32 *ip_daddr)
|
||||
{
|
||||
if (tuple->tun_num)
|
||||
return nf_flow_tunnel_ipip_push(net, skb, tuple, ip_daddr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nf_flow_encap_push(struct sk_buff *skb,
|
||||
struct flow_offload_tuple *tuple)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < tuple->encap_num; i++) {
|
||||
switch (tuple->encap[i].proto) {
|
||||
case htons(ETH_P_8021Q):
|
||||
case htons(ETH_P_8021AD):
|
||||
if (skb_vlan_push(skb, tuple->encap[i].proto,
|
||||
tuple->encap[i].id) < 0)
|
||||
return -1;
|
||||
break;
|
||||
case htons(ETH_P_PPP_SES):
|
||||
if (nf_flow_pppoe_push(skb, tuple->encap[i].id) < 0)
|
||||
return -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
unsigned int
|
||||
nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
|
||||
const struct nf_hook_state *state)
|
||||
{
|
||||
struct flow_offload_tuple_rhash *tuplehash;
|
||||
struct nf_flowtable *flow_table = priv;
|
||||
struct flow_offload_tuple *other_tuple;
|
||||
enum flow_offload_tuple_dir dir;
|
||||
struct nf_flowtable_ctx ctx = {
|
||||
.in = state->in,
|
||||
};
|
||||
struct nf_flow_xmit xmit = {};
|
||||
struct flow_offload *flow;
|
||||
struct net_device *outdev;
|
||||
struct neighbour *neigh;
|
||||
struct rtable *rt;
|
||||
__be32 nexthop;
|
||||
__be32 ip_daddr;
|
||||
int ret;
|
||||
|
||||
tuplehash = nf_flow_offload_lookup(&ctx, flow_table, skb);
|
||||
@@ -450,29 +628,46 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
|
||||
|
||||
dir = tuplehash->tuple.dir;
|
||||
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
|
||||
other_tuple = &flow->tuplehash[!dir].tuple;
|
||||
ip_daddr = other_tuple->src_v4.s_addr;
|
||||
|
||||
if (nf_flow_tunnel_v4_push(state->net, skb, other_tuple, &ip_daddr) < 0)
|
||||
return NF_DROP;
|
||||
|
||||
if (nf_flow_encap_push(skb, other_tuple) < 0)
|
||||
return NF_DROP;
|
||||
|
||||
switch (tuplehash->tuple.xmit_type) {
|
||||
case FLOW_OFFLOAD_XMIT_NEIGH:
|
||||
rt = dst_rtable(tuplehash->tuple.dst_cache);
|
||||
outdev = rt->dst.dev;
|
||||
skb->dev = outdev;
|
||||
nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
|
||||
xmit.outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.ifidx);
|
||||
if (!xmit.outdev) {
|
||||
flow_offload_teardown(flow);
|
||||
return NF_DROP;
|
||||
}
|
||||
neigh = ip_neigh_gw4(rt->dst.dev, rt_nexthop(rt, ip_daddr));
|
||||
if (IS_ERR(neigh)) {
|
||||
flow_offload_teardown(flow);
|
||||
return NF_DROP;
|
||||
}
|
||||
xmit.dest = neigh->ha;
|
||||
skb_dst_set_noref(skb, &rt->dst);
|
||||
neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
|
||||
ret = NF_STOLEN;
|
||||
break;
|
||||
case FLOW_OFFLOAD_XMIT_DIRECT:
|
||||
ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IP);
|
||||
if (ret == NF_DROP)
|
||||
xmit.outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.out.ifidx);
|
||||
if (!xmit.outdev) {
|
||||
flow_offload_teardown(flow);
|
||||
return NF_DROP;
|
||||
}
|
||||
xmit.dest = tuplehash->tuple.out.h_dest;
|
||||
xmit.source = tuplehash->tuple.out.h_source;
|
||||
break;
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
ret = NF_DROP;
|
||||
break;
|
||||
return NF_DROP;
|
||||
}
|
||||
|
||||
return ret;
|
||||
return nf_flow_queue_xmit(state->net, skb, &xmit);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
|
||||
|
||||
@@ -715,13 +910,15 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
|
||||
{
|
||||
struct flow_offload_tuple_rhash *tuplehash;
|
||||
struct nf_flowtable *flow_table = priv;
|
||||
struct flow_offload_tuple *other_tuple;
|
||||
enum flow_offload_tuple_dir dir;
|
||||
struct nf_flowtable_ctx ctx = {
|
||||
.in = state->in,
|
||||
};
|
||||
const struct in6_addr *nexthop;
|
||||
struct nf_flow_xmit xmit = {};
|
||||
struct in6_addr *ip6_daddr;
|
||||
struct flow_offload *flow;
|
||||
struct net_device *outdev;
|
||||
struct neighbour *neigh;
|
||||
struct rt6_info *rt;
|
||||
int ret;
|
||||
|
||||
@@ -745,28 +942,42 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
|
||||
|
||||
dir = tuplehash->tuple.dir;
|
||||
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
|
||||
other_tuple = &flow->tuplehash[!dir].tuple;
|
||||
ip6_daddr = &other_tuple->src_v6;
|
||||
|
||||
if (nf_flow_encap_push(skb, other_tuple) < 0)
|
||||
return NF_DROP;
|
||||
|
||||
switch (tuplehash->tuple.xmit_type) {
|
||||
case FLOW_OFFLOAD_XMIT_NEIGH:
|
||||
rt = dst_rt6_info(tuplehash->tuple.dst_cache);
|
||||
outdev = rt->dst.dev;
|
||||
skb->dev = outdev;
|
||||
nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
|
||||
xmit.outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.ifidx);
|
||||
if (!xmit.outdev) {
|
||||
flow_offload_teardown(flow);
|
||||
return NF_DROP;
|
||||
}
|
||||
neigh = ip_neigh_gw6(rt->dst.dev, rt6_nexthop(rt, ip6_daddr));
|
||||
if (IS_ERR(neigh)) {
|
||||
flow_offload_teardown(flow);
|
||||
return NF_DROP;
|
||||
}
|
||||
xmit.dest = neigh->ha;
|
||||
skb_dst_set_noref(skb, &rt->dst);
|
||||
neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
|
||||
ret = NF_STOLEN;
|
||||
break;
|
||||
case FLOW_OFFLOAD_XMIT_DIRECT:
|
||||
ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IPV6);
|
||||
if (ret == NF_DROP)
|
||||
xmit.outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.out.ifidx);
|
||||
if (!xmit.outdev) {
|
||||
flow_offload_teardown(flow);
|
||||
return NF_DROP;
|
||||
}
|
||||
xmit.dest = tuplehash->tuple.out.h_dest;
|
||||
xmit.source = tuplehash->tuple.out.h_source;
|
||||
break;
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
ret = NF_DROP;
|
||||
break;
|
||||
return NF_DROP;
|
||||
}
|
||||
|
||||
return ret;
|
||||
return nf_flow_queue_xmit(state->net, skb, &xmit);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
|
||||
|
||||
@@ -555,7 +555,7 @@ static void flow_offload_redirect(struct net *net,
|
||||
switch (this_tuple->xmit_type) {
|
||||
case FLOW_OFFLOAD_XMIT_DIRECT:
|
||||
this_tuple = &flow->tuplehash[dir].tuple;
|
||||
ifindex = this_tuple->out.hw_ifidx;
|
||||
ifindex = this_tuple->out.ifidx;
|
||||
break;
|
||||
case FLOW_OFFLOAD_XMIT_NEIGH:
|
||||
other_tuple = &flow->tuplehash[!dir].tuple;
|
||||
|
||||
330
net/netfilter/nf_flow_table_path.c
Normal file
330
net/netfilter/nf_flow_table_path.c
Normal file
@@ -0,0 +1,330 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/netlink.h>
|
||||
#include <linux/netfilter.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/netfilter/nf_conntrack_common.h>
|
||||
#include <linux/netfilter/nf_tables.h>
|
||||
#include <net/ip.h>
|
||||
#include <net/inet_dscp.h>
|
||||
#include <net/netfilter/nf_tables.h>
|
||||
#include <net/netfilter/nf_tables_core.h>
|
||||
#include <net/netfilter/nf_conntrack_core.h>
|
||||
#include <net/netfilter/nf_conntrack_extend.h>
|
||||
#include <net/netfilter/nf_flow_table.h>
|
||||
|
||||
static enum flow_offload_xmit_type nft_xmit_type(struct dst_entry *dst)
|
||||
{
|
||||
if (dst_xfrm(dst))
|
||||
return FLOW_OFFLOAD_XMIT_XFRM;
|
||||
|
||||
return FLOW_OFFLOAD_XMIT_NEIGH;
|
||||
}
|
||||
|
||||
static void nft_default_forward_path(struct nf_flow_route *route,
|
||||
struct dst_entry *dst_cache,
|
||||
enum ip_conntrack_dir dir)
|
||||
{
|
||||
route->tuple[!dir].in.ifindex = dst_cache->dev->ifindex;
|
||||
route->tuple[dir].dst = dst_cache;
|
||||
route->tuple[dir].xmit_type = nft_xmit_type(dst_cache);
|
||||
}
|
||||
|
||||
static bool nft_is_valid_ether_device(const struct net_device *dev)
|
||||
{
|
||||
if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER ||
|
||||
dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
|
||||
const struct dst_entry *dst_cache,
|
||||
const struct nf_conn *ct,
|
||||
enum ip_conntrack_dir dir, u8 *ha,
|
||||
struct net_device_path_stack *stack)
|
||||
{
|
||||
const void *daddr = &ct->tuplehash[!dir].tuple.src.u3;
|
||||
struct net_device *dev = dst_cache->dev;
|
||||
struct neighbour *n;
|
||||
u8 nud_state;
|
||||
|
||||
if (!nft_is_valid_ether_device(dev))
|
||||
goto out;
|
||||
|
||||
n = dst_neigh_lookup(dst_cache, daddr);
|
||||
if (!n)
|
||||
return -1;
|
||||
|
||||
read_lock_bh(&n->lock);
|
||||
nud_state = n->nud_state;
|
||||
ether_addr_copy(ha, n->ha);
|
||||
read_unlock_bh(&n->lock);
|
||||
neigh_release(n);
|
||||
|
||||
if (!(nud_state & NUD_VALID))
|
||||
return -1;
|
||||
|
||||
out:
|
||||
return dev_fill_forward_path(dev, ha, stack);
|
||||
}
|
||||
|
||||
struct nft_forward_info {
|
||||
const struct net_device *indev;
|
||||
const struct net_device *outdev;
|
||||
struct id {
|
||||
__u16 id;
|
||||
__be16 proto;
|
||||
} encap[NF_FLOW_TABLE_ENCAP_MAX];
|
||||
u8 num_encaps;
|
||||
struct flow_offload_tunnel tun;
|
||||
u8 num_tuns;
|
||||
u8 ingress_vlans;
|
||||
u8 h_source[ETH_ALEN];
|
||||
u8 h_dest[ETH_ALEN];
|
||||
enum flow_offload_xmit_type xmit_type;
|
||||
};
|
||||
|
||||
static void nft_dev_path_info(const struct net_device_path_stack *stack,
|
||||
struct nft_forward_info *info,
|
||||
unsigned char *ha, struct nf_flowtable *flowtable)
|
||||
{
|
||||
const struct net_device_path *path;
|
||||
int i;
|
||||
|
||||
memcpy(info->h_dest, ha, ETH_ALEN);
|
||||
|
||||
for (i = 0; i < stack->num_paths; i++) {
|
||||
path = &stack->path[i];
|
||||
switch (path->type) {
|
||||
case DEV_PATH_ETHERNET:
|
||||
case DEV_PATH_DSA:
|
||||
case DEV_PATH_VLAN:
|
||||
case DEV_PATH_PPPOE:
|
||||
case DEV_PATH_TUN:
|
||||
info->indev = path->dev;
|
||||
if (is_zero_ether_addr(info->h_source))
|
||||
memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
|
||||
|
||||
if (path->type == DEV_PATH_ETHERNET)
|
||||
break;
|
||||
if (path->type == DEV_PATH_DSA) {
|
||||
i = stack->num_paths;
|
||||
break;
|
||||
}
|
||||
|
||||
/* DEV_PATH_VLAN, DEV_PATH_PPPOE and DEV_PATH_TUN */
|
||||
if (path->type == DEV_PATH_TUN) {
|
||||
if (info->num_tuns) {
|
||||
info->indev = NULL;
|
||||
break;
|
||||
}
|
||||
info->tun.src_v6 = path->tun.src_v6;
|
||||
info->tun.dst_v6 = path->tun.dst_v6;
|
||||
info->tun.l3_proto = path->tun.l3_proto;
|
||||
info->num_tuns++;
|
||||
} else {
|
||||
if (info->num_encaps >= NF_FLOW_TABLE_ENCAP_MAX) {
|
||||
info->indev = NULL;
|
||||
break;
|
||||
}
|
||||
info->encap[info->num_encaps].id =
|
||||
path->encap.id;
|
||||
info->encap[info->num_encaps].proto =
|
||||
path->encap.proto;
|
||||
info->num_encaps++;
|
||||
}
|
||||
if (path->type == DEV_PATH_PPPOE)
|
||||
memcpy(info->h_dest, path->encap.h_dest, ETH_ALEN);
|
||||
break;
|
||||
case DEV_PATH_BRIDGE:
|
||||
if (is_zero_ether_addr(info->h_source))
|
||||
memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
|
||||
|
||||
switch (path->bridge.vlan_mode) {
|
||||
case DEV_PATH_BR_VLAN_UNTAG_HW:
|
||||
info->ingress_vlans |= BIT(info->num_encaps - 1);
|
||||
break;
|
||||
case DEV_PATH_BR_VLAN_TAG:
|
||||
if (info->num_encaps >= NF_FLOW_TABLE_ENCAP_MAX) {
|
||||
info->indev = NULL;
|
||||
break;
|
||||
}
|
||||
info->encap[info->num_encaps].id = path->bridge.vlan_id;
|
||||
info->encap[info->num_encaps].proto = path->bridge.vlan_proto;
|
||||
info->num_encaps++;
|
||||
break;
|
||||
case DEV_PATH_BR_VLAN_UNTAG:
|
||||
if (WARN_ON_ONCE(info->num_encaps-- == 0)) {
|
||||
info->indev = NULL;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case DEV_PATH_BR_VLAN_KEEP:
|
||||
break;
|
||||
}
|
||||
info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
|
||||
break;
|
||||
default:
|
||||
info->indev = NULL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
info->outdev = info->indev;
|
||||
|
||||
if (nf_flowtable_hw_offload(flowtable) &&
|
||||
nft_is_valid_ether_device(info->indev))
|
||||
info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
|
||||
}
|
||||
|
||||
static bool nft_flowtable_find_dev(const struct net_device *dev,
|
||||
struct nft_flowtable *ft)
|
||||
{
|
||||
struct nft_hook *hook;
|
||||
bool found = false;
|
||||
|
||||
list_for_each_entry_rcu(hook, &ft->hook_list, list) {
|
||||
if (!nft_hook_find_ops_rcu(hook, dev))
|
||||
continue;
|
||||
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
|
||||
return found;
|
||||
}
|
||||
|
||||
static int nft_flow_tunnel_update_route(const struct nft_pktinfo *pkt,
|
||||
struct flow_offload_tunnel *tun,
|
||||
struct nf_flow_route *route,
|
||||
enum ip_conntrack_dir dir)
|
||||
{
|
||||
struct dst_entry *cur_dst = route->tuple[dir].dst;
|
||||
struct dst_entry *tun_dst = NULL;
|
||||
struct flowi fl = {};
|
||||
|
||||
switch (nft_pf(pkt)) {
|
||||
case NFPROTO_IPV4:
|
||||
fl.u.ip4.daddr = tun->dst_v4.s_addr;
|
||||
fl.u.ip4.saddr = tun->src_v4.s_addr;
|
||||
fl.u.ip4.flowi4_iif = nft_in(pkt)->ifindex;
|
||||
fl.u.ip4.flowi4_dscp = ip4h_dscp(ip_hdr(pkt->skb));
|
||||
fl.u.ip4.flowi4_mark = pkt->skb->mark;
|
||||
fl.u.ip4.flowi4_flags = FLOWI_FLAG_ANYSRC;
|
||||
break;
|
||||
case NFPROTO_IPV6:
|
||||
fl.u.ip6.daddr = tun->dst_v6;
|
||||
fl.u.ip6.saddr = tun->src_v6;
|
||||
fl.u.ip6.flowi6_iif = nft_in(pkt)->ifindex;
|
||||
fl.u.ip6.flowlabel = ip6_flowinfo(ipv6_hdr(pkt->skb));
|
||||
fl.u.ip6.flowi6_mark = pkt->skb->mark;
|
||||
fl.u.ip6.flowi6_flags = FLOWI_FLAG_ANYSRC;
|
||||
break;
|
||||
}
|
||||
|
||||
nf_route(nft_net(pkt), &tun_dst, &fl, false, nft_pf(pkt));
|
||||
if (!tun_dst)
|
||||
return -ENOENT;
|
||||
|
||||
route->tuple[dir].dst = tun_dst;
|
||||
dst_release(cur_dst);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void nft_dev_forward_path(const struct nft_pktinfo *pkt,
|
||||
struct nf_flow_route *route,
|
||||
const struct nf_conn *ct,
|
||||
enum ip_conntrack_dir dir,
|
||||
struct nft_flowtable *ft)
|
||||
{
|
||||
const struct dst_entry *dst = route->tuple[dir].dst;
|
||||
struct net_device_path_stack stack;
|
||||
struct nft_forward_info info = {};
|
||||
unsigned char ha[ETH_ALEN];
|
||||
int i;
|
||||
|
||||
if (nft_dev_fill_forward_path(route, dst, ct, dir, ha, &stack) >= 0)
|
||||
nft_dev_path_info(&stack, &info, ha, &ft->data);
|
||||
|
||||
if (!info.indev || !nft_flowtable_find_dev(info.indev, ft))
|
||||
return;
|
||||
|
||||
route->tuple[!dir].in.ifindex = info.indev->ifindex;
|
||||
for (i = 0; i < info.num_encaps; i++) {
|
||||
route->tuple[!dir].in.encap[i].id = info.encap[i].id;
|
||||
route->tuple[!dir].in.encap[i].proto = info.encap[i].proto;
|
||||
}
|
||||
|
||||
if (info.num_tuns &&
|
||||
!nft_flow_tunnel_update_route(pkt, &info.tun, route, dir)) {
|
||||
route->tuple[!dir].in.tun.src_v6 = info.tun.dst_v6;
|
||||
route->tuple[!dir].in.tun.dst_v6 = info.tun.src_v6;
|
||||
route->tuple[!dir].in.tun.l3_proto = info.tun.l3_proto;
|
||||
route->tuple[!dir].in.num_tuns = info.num_tuns;
|
||||
}
|
||||
|
||||
route->tuple[!dir].in.num_encaps = info.num_encaps;
|
||||
route->tuple[!dir].in.ingress_vlans = info.ingress_vlans;
|
||||
route->tuple[dir].out.ifindex = info.outdev->ifindex;
|
||||
|
||||
if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) {
|
||||
memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN);
|
||||
memcpy(route->tuple[dir].out.h_dest, info.h_dest, ETH_ALEN);
|
||||
route->tuple[dir].xmit_type = info.xmit_type;
|
||||
}
|
||||
}
|
||||
|
||||
int nft_flow_route(const struct nft_pktinfo *pkt, const struct nf_conn *ct,
|
||||
struct nf_flow_route *route, enum ip_conntrack_dir dir,
|
||||
struct nft_flowtable *ft)
|
||||
{
|
||||
struct dst_entry *this_dst = skb_dst(pkt->skb);
|
||||
struct dst_entry *other_dst = NULL;
|
||||
struct flowi fl;
|
||||
|
||||
memset(&fl, 0, sizeof(fl));
|
||||
switch (nft_pf(pkt)) {
|
||||
case NFPROTO_IPV4:
|
||||
fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip;
|
||||
fl.u.ip4.saddr = ct->tuplehash[!dir].tuple.src.u3.ip;
|
||||
fl.u.ip4.flowi4_oif = nft_in(pkt)->ifindex;
|
||||
fl.u.ip4.flowi4_iif = this_dst->dev->ifindex;
|
||||
fl.u.ip4.flowi4_dscp = ip4h_dscp(ip_hdr(pkt->skb));
|
||||
fl.u.ip4.flowi4_mark = pkt->skb->mark;
|
||||
fl.u.ip4.flowi4_flags = FLOWI_FLAG_ANYSRC;
|
||||
break;
|
||||
case NFPROTO_IPV6:
|
||||
fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6;
|
||||
fl.u.ip6.saddr = ct->tuplehash[!dir].tuple.src.u3.in6;
|
||||
fl.u.ip6.flowi6_oif = nft_in(pkt)->ifindex;
|
||||
fl.u.ip6.flowi6_iif = this_dst->dev->ifindex;
|
||||
fl.u.ip6.flowlabel = ip6_flowinfo(ipv6_hdr(pkt->skb));
|
||||
fl.u.ip6.flowi6_mark = pkt->skb->mark;
|
||||
fl.u.ip6.flowi6_flags = FLOWI_FLAG_ANYSRC;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!dst_hold_safe(this_dst))
|
||||
return -ENOENT;
|
||||
|
||||
nf_route(nft_net(pkt), &other_dst, &fl, false, nft_pf(pkt));
|
||||
if (!other_dst) {
|
||||
dst_release(this_dst);
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
nft_default_forward_path(route, this_dst, dir);
|
||||
nft_default_forward_path(route, other_dst, !dir);
|
||||
|
||||
if (route->tuple[dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH)
|
||||
nft_dev_forward_path(pkt, route, ct, dir, ft);
|
||||
if (route->tuple[!dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH)
|
||||
nft_dev_forward_path(pkt, route, ct, !dir, ft);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nft_flow_route);
|
||||
@@ -24,33 +24,27 @@ static inline void nft_connlimit_do_eval(struct nft_connlimit *priv,
|
||||
const struct nft_pktinfo *pkt,
|
||||
const struct nft_set_ext *ext)
|
||||
{
|
||||
const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
|
||||
const struct nf_conntrack_tuple *tuple_ptr;
|
||||
struct nf_conntrack_tuple tuple;
|
||||
enum ip_conntrack_info ctinfo;
|
||||
const struct nf_conn *ct;
|
||||
unsigned int count;
|
||||
int err;
|
||||
|
||||
tuple_ptr = &tuple;
|
||||
|
||||
ct = nf_ct_get(pkt->skb, &ctinfo);
|
||||
if (ct != NULL) {
|
||||
tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
|
||||
zone = nf_ct_zone(ct);
|
||||
} else if (!nf_ct_get_tuplepr(pkt->skb, skb_network_offset(pkt->skb),
|
||||
nft_pf(pkt), nft_net(pkt), &tuple)) {
|
||||
regs->verdict.code = NF_DROP;
|
||||
return;
|
||||
}
|
||||
|
||||
if (nf_conncount_add(nft_net(pkt), priv->list, tuple_ptr, zone)) {
|
||||
regs->verdict.code = NF_DROP;
|
||||
return;
|
||||
err = nf_conncount_add_skb(nft_net(pkt), pkt->skb, nft_pf(pkt), priv->list);
|
||||
if (err) {
|
||||
if (err == -EEXIST) {
|
||||
/* Call gc to update the list count if any connection has
|
||||
* been closed already. This is useful for softlimit
|
||||
* connections like limiting bandwidth based on a number
|
||||
* of open connections.
|
||||
*/
|
||||
nf_conncount_gc_list(nft_net(pkt), priv->list);
|
||||
} else {
|
||||
regs->verdict.code = NF_DROP;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
count = READ_ONCE(priv->list->count);
|
||||
|
||||
if ((count > priv->limit) ^ priv->invert) {
|
||||
if ((count > READ_ONCE(priv->limit)) ^ READ_ONCE(priv->invert)) {
|
||||
regs->verdict.code = NFT_BREAK;
|
||||
return;
|
||||
}
|
||||
@@ -137,6 +131,16 @@ static int nft_connlimit_obj_init(const struct nft_ctx *ctx,
|
||||
return nft_connlimit_do_init(ctx, tb, priv);
|
||||
}
|
||||
|
||||
static void nft_connlimit_obj_update(struct nft_object *obj,
|
||||
struct nft_object *newobj)
|
||||
{
|
||||
struct nft_connlimit *newpriv = nft_obj_data(newobj);
|
||||
struct nft_connlimit *priv = nft_obj_data(obj);
|
||||
|
||||
WRITE_ONCE(priv->limit, newpriv->limit);
|
||||
WRITE_ONCE(priv->invert, newpriv->invert);
|
||||
}
|
||||
|
||||
static void nft_connlimit_obj_destroy(const struct nft_ctx *ctx,
|
||||
struct nft_object *obj)
|
||||
{
|
||||
@@ -166,6 +170,7 @@ static const struct nft_object_ops nft_connlimit_obj_ops = {
|
||||
.init = nft_connlimit_obj_init,
|
||||
.destroy = nft_connlimit_obj_destroy,
|
||||
.dump = nft_connlimit_obj_dump,
|
||||
.update = nft_connlimit_obj_update,
|
||||
};
|
||||
|
||||
static struct nft_object_type nft_connlimit_obj_type __read_mostly = {
|
||||
@@ -238,13 +243,8 @@ static void nft_connlimit_destroy_clone(const struct nft_ctx *ctx,
|
||||
static bool nft_connlimit_gc(struct net *net, const struct nft_expr *expr)
|
||||
{
|
||||
struct nft_connlimit *priv = nft_expr_priv(expr);
|
||||
bool ret;
|
||||
|
||||
local_bh_disable();
|
||||
ret = nf_conncount_gc_list(net, priv->list);
|
||||
local_bh_enable();
|
||||
|
||||
return ret;
|
||||
return nf_conncount_gc_list(net, priv->list);
|
||||
}
|
||||
|
||||
static struct nft_expr_type nft_connlimit_type;
|
||||
|
||||
@@ -20,258 +20,6 @@ struct nft_flow_offload {
|
||||
struct nft_flowtable *flowtable;
|
||||
};
|
||||
|
||||
static enum flow_offload_xmit_type nft_xmit_type(struct dst_entry *dst)
|
||||
{
|
||||
if (dst_xfrm(dst))
|
||||
return FLOW_OFFLOAD_XMIT_XFRM;
|
||||
|
||||
return FLOW_OFFLOAD_XMIT_NEIGH;
|
||||
}
|
||||
|
||||
static void nft_default_forward_path(struct nf_flow_route *route,
|
||||
struct dst_entry *dst_cache,
|
||||
enum ip_conntrack_dir dir)
|
||||
{
|
||||
route->tuple[!dir].in.ifindex = dst_cache->dev->ifindex;
|
||||
route->tuple[dir].dst = dst_cache;
|
||||
route->tuple[dir].xmit_type = nft_xmit_type(dst_cache);
|
||||
}
|
||||
|
||||
static bool nft_is_valid_ether_device(const struct net_device *dev)
|
||||
{
|
||||
if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER ||
|
||||
dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
|
||||
const struct dst_entry *dst_cache,
|
||||
const struct nf_conn *ct,
|
||||
enum ip_conntrack_dir dir, u8 *ha,
|
||||
struct net_device_path_stack *stack)
|
||||
{
|
||||
const void *daddr = &ct->tuplehash[!dir].tuple.src.u3;
|
||||
struct net_device *dev = dst_cache->dev;
|
||||
struct neighbour *n;
|
||||
u8 nud_state;
|
||||
|
||||
if (!nft_is_valid_ether_device(dev))
|
||||
goto out;
|
||||
|
||||
n = dst_neigh_lookup(dst_cache, daddr);
|
||||
if (!n)
|
||||
return -1;
|
||||
|
||||
read_lock_bh(&n->lock);
|
||||
nud_state = n->nud_state;
|
||||
ether_addr_copy(ha, n->ha);
|
||||
read_unlock_bh(&n->lock);
|
||||
neigh_release(n);
|
||||
|
||||
if (!(nud_state & NUD_VALID))
|
||||
return -1;
|
||||
|
||||
out:
|
||||
return dev_fill_forward_path(dev, ha, stack);
|
||||
}
|
||||
|
||||
struct nft_forward_info {
|
||||
const struct net_device *indev;
|
||||
const struct net_device *outdev;
|
||||
const struct net_device *hw_outdev;
|
||||
struct id {
|
||||
__u16 id;
|
||||
__be16 proto;
|
||||
} encap[NF_FLOW_TABLE_ENCAP_MAX];
|
||||
u8 num_encaps;
|
||||
u8 ingress_vlans;
|
||||
u8 h_source[ETH_ALEN];
|
||||
u8 h_dest[ETH_ALEN];
|
||||
enum flow_offload_xmit_type xmit_type;
|
||||
};
|
||||
|
||||
static void nft_dev_path_info(const struct net_device_path_stack *stack,
|
||||
struct nft_forward_info *info,
|
||||
unsigned char *ha, struct nf_flowtable *flowtable)
|
||||
{
|
||||
const struct net_device_path *path;
|
||||
int i;
|
||||
|
||||
memcpy(info->h_dest, ha, ETH_ALEN);
|
||||
|
||||
for (i = 0; i < stack->num_paths; i++) {
|
||||
path = &stack->path[i];
|
||||
switch (path->type) {
|
||||
case DEV_PATH_ETHERNET:
|
||||
case DEV_PATH_DSA:
|
||||
case DEV_PATH_VLAN:
|
||||
case DEV_PATH_PPPOE:
|
||||
info->indev = path->dev;
|
||||
if (is_zero_ether_addr(info->h_source))
|
||||
memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
|
||||
|
||||
if (path->type == DEV_PATH_ETHERNET)
|
||||
break;
|
||||
if (path->type == DEV_PATH_DSA) {
|
||||
i = stack->num_paths;
|
||||
break;
|
||||
}
|
||||
|
||||
/* DEV_PATH_VLAN and DEV_PATH_PPPOE */
|
||||
if (info->num_encaps >= NF_FLOW_TABLE_ENCAP_MAX) {
|
||||
info->indev = NULL;
|
||||
break;
|
||||
}
|
||||
if (!info->outdev)
|
||||
info->outdev = path->dev;
|
||||
info->encap[info->num_encaps].id = path->encap.id;
|
||||
info->encap[info->num_encaps].proto = path->encap.proto;
|
||||
info->num_encaps++;
|
||||
if (path->type == DEV_PATH_PPPOE)
|
||||
memcpy(info->h_dest, path->encap.h_dest, ETH_ALEN);
|
||||
break;
|
||||
case DEV_PATH_BRIDGE:
|
||||
if (is_zero_ether_addr(info->h_source))
|
||||
memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
|
||||
|
||||
switch (path->bridge.vlan_mode) {
|
||||
case DEV_PATH_BR_VLAN_UNTAG_HW:
|
||||
info->ingress_vlans |= BIT(info->num_encaps - 1);
|
||||
break;
|
||||
case DEV_PATH_BR_VLAN_TAG:
|
||||
info->encap[info->num_encaps].id = path->bridge.vlan_id;
|
||||
info->encap[info->num_encaps].proto = path->bridge.vlan_proto;
|
||||
info->num_encaps++;
|
||||
break;
|
||||
case DEV_PATH_BR_VLAN_UNTAG:
|
||||
info->num_encaps--;
|
||||
break;
|
||||
case DEV_PATH_BR_VLAN_KEEP:
|
||||
break;
|
||||
}
|
||||
info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
|
||||
break;
|
||||
default:
|
||||
info->indev = NULL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!info->outdev)
|
||||
info->outdev = info->indev;
|
||||
|
||||
info->hw_outdev = info->indev;
|
||||
|
||||
if (nf_flowtable_hw_offload(flowtable) &&
|
||||
nft_is_valid_ether_device(info->indev))
|
||||
info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
|
||||
}
|
||||
|
||||
static bool nft_flowtable_find_dev(const struct net_device *dev,
|
||||
struct nft_flowtable *ft)
|
||||
{
|
||||
struct nft_hook *hook;
|
||||
bool found = false;
|
||||
|
||||
list_for_each_entry_rcu(hook, &ft->hook_list, list) {
|
||||
if (!nft_hook_find_ops_rcu(hook, dev))
|
||||
continue;
|
||||
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
|
||||
return found;
|
||||
}
|
||||
|
||||
static void nft_dev_forward_path(struct nf_flow_route *route,
|
||||
const struct nf_conn *ct,
|
||||
enum ip_conntrack_dir dir,
|
||||
struct nft_flowtable *ft)
|
||||
{
|
||||
const struct dst_entry *dst = route->tuple[dir].dst;
|
||||
struct net_device_path_stack stack;
|
||||
struct nft_forward_info info = {};
|
||||
unsigned char ha[ETH_ALEN];
|
||||
int i;
|
||||
|
||||
if (nft_dev_fill_forward_path(route, dst, ct, dir, ha, &stack) >= 0)
|
||||
nft_dev_path_info(&stack, &info, ha, &ft->data);
|
||||
|
||||
if (!info.indev || !nft_flowtable_find_dev(info.indev, ft))
|
||||
return;
|
||||
|
||||
route->tuple[!dir].in.ifindex = info.indev->ifindex;
|
||||
for (i = 0; i < info.num_encaps; i++) {
|
||||
route->tuple[!dir].in.encap[i].id = info.encap[i].id;
|
||||
route->tuple[!dir].in.encap[i].proto = info.encap[i].proto;
|
||||
}
|
||||
route->tuple[!dir].in.num_encaps = info.num_encaps;
|
||||
route->tuple[!dir].in.ingress_vlans = info.ingress_vlans;
|
||||
|
||||
if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) {
|
||||
memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN);
|
||||
memcpy(route->tuple[dir].out.h_dest, info.h_dest, ETH_ALEN);
|
||||
route->tuple[dir].out.ifindex = info.outdev->ifindex;
|
||||
route->tuple[dir].out.hw_ifindex = info.hw_outdev->ifindex;
|
||||
route->tuple[dir].xmit_type = info.xmit_type;
|
||||
}
|
||||
}
|
||||
|
||||
static int nft_flow_route(const struct nft_pktinfo *pkt,
|
||||
const struct nf_conn *ct,
|
||||
struct nf_flow_route *route,
|
||||
enum ip_conntrack_dir dir,
|
||||
struct nft_flowtable *ft)
|
||||
{
|
||||
struct dst_entry *this_dst = skb_dst(pkt->skb);
|
||||
struct dst_entry *other_dst = NULL;
|
||||
struct flowi fl;
|
||||
|
||||
memset(&fl, 0, sizeof(fl));
|
||||
switch (nft_pf(pkt)) {
|
||||
case NFPROTO_IPV4:
|
||||
fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip;
|
||||
fl.u.ip4.saddr = ct->tuplehash[!dir].tuple.src.u3.ip;
|
||||
fl.u.ip4.flowi4_oif = nft_in(pkt)->ifindex;
|
||||
fl.u.ip4.flowi4_iif = this_dst->dev->ifindex;
|
||||
fl.u.ip4.flowi4_dscp = ip4h_dscp(ip_hdr(pkt->skb));
|
||||
fl.u.ip4.flowi4_mark = pkt->skb->mark;
|
||||
fl.u.ip4.flowi4_flags = FLOWI_FLAG_ANYSRC;
|
||||
break;
|
||||
case NFPROTO_IPV6:
|
||||
fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6;
|
||||
fl.u.ip6.saddr = ct->tuplehash[!dir].tuple.src.u3.in6;
|
||||
fl.u.ip6.flowi6_oif = nft_in(pkt)->ifindex;
|
||||
fl.u.ip6.flowi6_iif = this_dst->dev->ifindex;
|
||||
fl.u.ip6.flowlabel = ip6_flowinfo(ipv6_hdr(pkt->skb));
|
||||
fl.u.ip6.flowi6_mark = pkt->skb->mark;
|
||||
fl.u.ip6.flowi6_flags = FLOWI_FLAG_ANYSRC;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!dst_hold_safe(this_dst))
|
||||
return -ENOENT;
|
||||
|
||||
nf_route(nft_net(pkt), &other_dst, &fl, false, nft_pf(pkt));
|
||||
if (!other_dst) {
|
||||
dst_release(this_dst);
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
nft_default_forward_path(route, this_dst, dir);
|
||||
nft_default_forward_path(route, other_dst, !dir);
|
||||
|
||||
if (route->tuple[dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH &&
|
||||
route->tuple[!dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH) {
|
||||
nft_dev_forward_path(route, ct, dir, ft);
|
||||
nft_dev_forward_path(route, ct, !dir, ft);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool nft_flow_offload_skip(struct sk_buff *skb, int family)
|
||||
{
|
||||
if (skb_sec_path(skb))
|
||||
|
||||
@@ -31,8 +31,6 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
|
||||
{
|
||||
struct net *net = xt_net(par);
|
||||
const struct xt_connlimit_info *info = par->matchinfo;
|
||||
struct nf_conntrack_tuple tuple;
|
||||
const struct nf_conntrack_tuple *tuple_ptr = &tuple;
|
||||
const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
|
||||
enum ip_conntrack_info ctinfo;
|
||||
const struct nf_conn *ct;
|
||||
@@ -40,13 +38,8 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
|
||||
u32 key[5];
|
||||
|
||||
ct = nf_ct_get(skb, &ctinfo);
|
||||
if (ct != NULL) {
|
||||
tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
|
||||
if (ct)
|
||||
zone = nf_ct_zone(ct);
|
||||
} else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
|
||||
xt_family(par), net, &tuple)) {
|
||||
goto hotdrop;
|
||||
}
|
||||
|
||||
if (xt_family(par) == NFPROTO_IPV6) {
|
||||
const struct ipv6hdr *iph = ipv6_hdr(skb);
|
||||
@@ -69,10 +62,9 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
|
||||
key[1] = zone->id;
|
||||
}
|
||||
|
||||
connections = nf_conncount_count(net, info->data, key, tuple_ptr,
|
||||
zone);
|
||||
connections = nf_conncount_count_skb(net, skb, xt_family(par), info->data, key);
|
||||
if (connections == 0)
|
||||
/* kmalloc failed, drop it entirely */
|
||||
/* kmalloc failed or tuple couldn't be found, drop it entirely */
|
||||
goto hotdrop;
|
||||
|
||||
return (connections > info->limit) ^ !!(info->flags & XT_CONNLIMIT_INVERT);
|
||||
|
||||
@@ -928,8 +928,8 @@ static u32 ct_limit_get(const struct ovs_ct_limit_info *info, u16 zone)
|
||||
}
|
||||
|
||||
static int ovs_ct_check_limit(struct net *net,
|
||||
const struct ovs_conntrack_info *info,
|
||||
const struct nf_conntrack_tuple *tuple)
|
||||
const struct sk_buff *skb,
|
||||
const struct ovs_conntrack_info *info)
|
||||
{
|
||||
struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
|
||||
const struct ovs_ct_limit_info *ct_limit_info = ovs_net->ct_limit_info;
|
||||
@@ -942,8 +942,9 @@ static int ovs_ct_check_limit(struct net *net,
|
||||
if (per_zone_limit == OVS_CT_LIMIT_UNLIMITED)
|
||||
return 0;
|
||||
|
||||
connections = nf_conncount_count(net, ct_limit_info->data,
|
||||
&conncount_key, tuple, &info->zone);
|
||||
connections = nf_conncount_count_skb(net, skb, info->family,
|
||||
ct_limit_info->data,
|
||||
&conncount_key);
|
||||
if (connections > per_zone_limit)
|
||||
return -ENOMEM;
|
||||
|
||||
@@ -972,8 +973,7 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
|
||||
#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
|
||||
if (static_branch_unlikely(&ovs_ct_limit_enabled)) {
|
||||
if (!nf_ct_is_confirmed(ct)) {
|
||||
err = ovs_ct_check_limit(net, info,
|
||||
&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
|
||||
err = ovs_ct_check_limit(net, skb, info);
|
||||
if (err) {
|
||||
net_warn_ratelimited("openvswitch: zone: %u "
|
||||
"exceeds conntrack limit\n",
|
||||
@@ -1770,8 +1770,8 @@ static int __ovs_ct_limit_get_zone_limit(struct net *net,
|
||||
zone_limit.limit = limit;
|
||||
nf_ct_zone_init(&ct_zone, zone_id, NF_CT_DEFAULT_ZONE_DIR, 0);
|
||||
|
||||
zone_limit.count = nf_conncount_count(net, data, &conncount_key, NULL,
|
||||
&ct_zone);
|
||||
zone_limit.count = nf_conncount_count_skb(net, NULL, 0, data,
|
||||
&conncount_key);
|
||||
return nla_put_nohdr(reply, sizeof(zone_limit), &zone_limit);
|
||||
}
|
||||
|
||||
|
||||
@@ -127,6 +127,8 @@ ip -net "$nsr1" addr add fee1:2::1/64 dev veth1 nodad
|
||||
ip -net "$nsr2" addr add 192.168.10.2/24 dev veth0
|
||||
ip -net "$nsr2" addr add fee1:2::2/64 dev veth0 nodad
|
||||
|
||||
ip netns exec "$nsr1" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
|
||||
ip netns exec "$nsr2" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
|
||||
for i in 0 1; do
|
||||
ip netns exec "$nsr1" sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null
|
||||
ip netns exec "$nsr2" sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null
|
||||
@@ -153,7 +155,9 @@ ip -net "$ns1" route add default via dead:1::1
|
||||
ip -net "$ns2" route add default via dead:2::1
|
||||
|
||||
ip -net "$nsr1" route add default via 192.168.10.2
|
||||
ip -6 -net "$nsr1" route add default via fee1:2::2
|
||||
ip -net "$nsr2" route add default via 192.168.10.1
|
||||
ip -6 -net "$nsr2" route add default via fee1:2::1
|
||||
|
||||
ip netns exec "$nsr1" nft -f - <<EOF
|
||||
table inet filter {
|
||||
@@ -352,8 +356,9 @@ test_tcp_forwarding_ip()
|
||||
local nsa=$1
|
||||
local nsb=$2
|
||||
local pmtu=$3
|
||||
local dstip=$4
|
||||
local dstport=$5
|
||||
local proto=$4
|
||||
local dstip=$5
|
||||
local dstport=$6
|
||||
local lret=0
|
||||
local socatc
|
||||
local socatl
|
||||
@@ -363,12 +368,14 @@ test_tcp_forwarding_ip()
|
||||
infile="$nsin_small"
|
||||
fi
|
||||
|
||||
timeout "$SOCAT_TIMEOUT" ip netns exec "$nsb" socat -4 TCP-LISTEN:12345,reuseaddr STDIO < "$infile" > "$ns2out" &
|
||||
timeout "$SOCAT_TIMEOUT" ip netns exec "$nsb" socat -${proto} \
|
||||
TCP"${proto}"-LISTEN:12345,reuseaddr STDIO < "$infile" > "$ns2out" &
|
||||
lpid=$!
|
||||
|
||||
busywait 1000 listener_ready
|
||||
|
||||
timeout "$SOCAT_TIMEOUT" ip netns exec "$nsa" socat -4 TCP:"$dstip":"$dstport" STDIO < "$infile" > "$ns1out"
|
||||
timeout "$SOCAT_TIMEOUT" ip netns exec "$nsa" socat -${proto} \
|
||||
TCP"${proto}":"$dstip":"$dstport" STDIO < "$infile" > "$ns1out"
|
||||
socatc=$?
|
||||
|
||||
wait $lpid
|
||||
@@ -394,8 +401,11 @@ test_tcp_forwarding_ip()
|
||||
test_tcp_forwarding()
|
||||
{
|
||||
local pmtu="$3"
|
||||
local proto="$4"
|
||||
local dstip="$5"
|
||||
local dstport="$6"
|
||||
|
||||
test_tcp_forwarding_ip "$1" "$2" "$pmtu" 10.0.2.99 12345
|
||||
test_tcp_forwarding_ip "$1" "$2" "$pmtu" "$proto" "$dstip" "$dstport"
|
||||
|
||||
return $?
|
||||
}
|
||||
@@ -403,6 +413,9 @@ test_tcp_forwarding()
|
||||
test_tcp_forwarding_set_dscp()
|
||||
{
|
||||
local pmtu="$3"
|
||||
local proto="$4"
|
||||
local dstip="$5"
|
||||
local dstport="$6"
|
||||
|
||||
ip netns exec "$nsr1" nft -f - <<EOF
|
||||
table netdev dscpmangle {
|
||||
@@ -413,7 +426,7 @@ table netdev dscpmangle {
|
||||
}
|
||||
EOF
|
||||
if [ $? -eq 0 ]; then
|
||||
test_tcp_forwarding_ip "$1" "$2" "$3" 10.0.2.99 12345
|
||||
test_tcp_forwarding_ip "$1" "$2" "$pmtu" "$proto" "$dstip" "$dstport"
|
||||
check_dscp "dscp_ingress" "$pmtu"
|
||||
|
||||
ip netns exec "$nsr1" nft delete table netdev dscpmangle
|
||||
@@ -430,7 +443,7 @@ table netdev dscpmangle {
|
||||
}
|
||||
EOF
|
||||
if [ $? -eq 0 ]; then
|
||||
test_tcp_forwarding_ip "$1" "$2" "$pmtu" 10.0.2.99 12345
|
||||
test_tcp_forwarding_ip "$1" "$2" "$pmtu" "$proto" "$dstip" "$dstport"
|
||||
check_dscp "dscp_egress" "$pmtu"
|
||||
|
||||
ip netns exec "$nsr1" nft delete table netdev dscpmangle
|
||||
@@ -441,7 +454,7 @@ fi
|
||||
# partial. If flowtable really works, then both dscp-is-0 and dscp-is-cs3
|
||||
# counters should have seen packets (before and after ft offload kicks in).
|
||||
ip netns exec "$nsr1" nft -a insert rule inet filter forward ip dscp set cs3
|
||||
test_tcp_forwarding_ip "$1" "$2" "$pmtu" 10.0.2.99 12345
|
||||
test_tcp_forwarding_ip "$1" "$2" "$pmtu" "$proto" "$dstip" "$dstport"
|
||||
check_dscp "dscp_fwd" "$pmtu"
|
||||
}
|
||||
|
||||
@@ -455,7 +468,7 @@ test_tcp_forwarding_nat()
|
||||
|
||||
[ "$pmtu" -eq 0 ] && what="$what (pmtu disabled)"
|
||||
|
||||
test_tcp_forwarding_ip "$nsa" "$nsb" "$pmtu" 10.0.2.99 12345
|
||||
test_tcp_forwarding_ip "$nsa" "$nsb" "$pmtu" 4 10.0.2.99 12345
|
||||
lret=$?
|
||||
|
||||
if [ "$lret" -eq 0 ] ; then
|
||||
@@ -465,7 +478,7 @@ test_tcp_forwarding_nat()
|
||||
echo "PASS: flow offload for ns1/ns2 with masquerade $what"
|
||||
fi
|
||||
|
||||
test_tcp_forwarding_ip "$1" "$2" "$pmtu" 10.6.6.6 1666
|
||||
test_tcp_forwarding_ip "$1" "$2" "$pmtu" 4 10.6.6.6 1666
|
||||
lret=$?
|
||||
if [ "$pmtu" -eq 1 ] ;then
|
||||
check_counters "flow offload for ns1/ns2 with dnat $what"
|
||||
@@ -487,7 +500,7 @@ make_file "$nsin_small" "$filesize_small"
|
||||
# Due to MTU mismatch in both directions, all packets (except small packets like pure
|
||||
# acks) have to be handled by normal forwarding path. Therefore, packet counters
|
||||
# are not checked.
|
||||
if test_tcp_forwarding "$ns1" "$ns2" 0; then
|
||||
if test_tcp_forwarding "$ns1" "$ns2" 0 4 10.0.2.99 12345; then
|
||||
echo "PASS: flow offloaded for ns1/ns2"
|
||||
else
|
||||
echo "FAIL: flow offload for ns1/ns2:" 1>&2
|
||||
@@ -495,6 +508,14 @@ else
|
||||
ret=1
|
||||
fi
|
||||
|
||||
if test_tcp_forwarding "$ns1" "$ns2" 0 6 "[dead:2::99]" 12345; then
|
||||
echo "PASS: IPv6 flow offloaded for ns1/ns2"
|
||||
else
|
||||
echo "FAIL: IPv6 flow offload for ns1/ns2:" 1>&2
|
||||
ip netns exec "$nsr1" nft list ruleset
|
||||
ret=1
|
||||
fi
|
||||
|
||||
# delete default route, i.e. ns2 won't be able to reach ns1 and
|
||||
# will depend on ns1 being masqueraded in nsr1.
|
||||
# expect ns1 has nsr1 address.
|
||||
@@ -520,7 +541,7 @@ table ip nat {
|
||||
EOF
|
||||
|
||||
check_dscp "dscp_none" "0"
|
||||
if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 0 ""; then
|
||||
if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 0 4 10.0.2.99 12345; then
|
||||
echo "FAIL: flow offload for ns1/ns2 with dscp update and no pmtu discovery" 1>&2
|
||||
exit 0
|
||||
fi
|
||||
@@ -546,7 +567,7 @@ ip netns exec "$ns2" sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
|
||||
ip netns exec "$nsr1" nft reset counters table inet filter >/dev/null
|
||||
ip netns exec "$ns2" nft reset counters table inet filter >/dev/null
|
||||
|
||||
if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 1 ""; then
|
||||
if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 1 4 10.0.2.99 12345; then
|
||||
echo "FAIL: flow offload for ns1/ns2 with dscp update and pmtu discovery" 1>&2
|
||||
exit 0
|
||||
fi
|
||||
@@ -558,6 +579,73 @@ if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 ""; then
|
||||
ip netns exec "$nsr1" nft list ruleset
|
||||
fi
|
||||
|
||||
# IPIP tunnel test:
|
||||
# Add IPIP tunnel interfaces and check flowtable acceleration.
|
||||
test_ipip() {
|
||||
if ! ip -net "$nsr1" link add name tun0 type ipip \
|
||||
local 192.168.10.1 remote 192.168.10.2 >/dev/null;then
|
||||
echo "SKIP: could not add ipip tunnel"
|
||||
[ "$ret" -eq 0 ] && ret=$ksft_skip
|
||||
return
|
||||
fi
|
||||
ip -net "$nsr1" link set tun0 up
|
||||
ip -net "$nsr1" addr add 192.168.100.1/24 dev tun0
|
||||
ip netns exec "$nsr1" sysctl net.ipv4.conf.tun0.forwarding=1 > /dev/null
|
||||
|
||||
ip -net "$nsr2" link add name tun0 type ipip local 192.168.10.2 remote 192.168.10.1
|
||||
ip -net "$nsr2" link set tun0 up
|
||||
ip -net "$nsr2" addr add 192.168.100.2/24 dev tun0
|
||||
ip netns exec "$nsr2" sysctl net.ipv4.conf.tun0.forwarding=1 > /dev/null
|
||||
|
||||
ip -net "$nsr1" route change default via 192.168.100.2
|
||||
ip -net "$nsr2" route change default via 192.168.100.1
|
||||
ip -net "$ns2" route add default via 10.0.2.1
|
||||
|
||||
ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun0 accept'
|
||||
ip netns exec "$nsr1" nft -a insert rule inet filter forward \
|
||||
'meta oif "veth0" tcp sport 12345 ct mark set 1 flow add @f1 counter name routed_repl accept'
|
||||
|
||||
if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel"; then
|
||||
echo "FAIL: flow offload for ns1/ns2 with IPIP tunnel" 1>&2
|
||||
ip netns exec "$nsr1" nft list ruleset
|
||||
ret=1
|
||||
fi
|
||||
|
||||
# Create vlan tagged devices for IPIP traffic.
|
||||
ip -net "$nsr1" link add link veth1 name veth1.10 type vlan id 10
|
||||
ip -net "$nsr1" link set veth1.10 up
|
||||
ip -net "$nsr1" addr add 192.168.20.1/24 dev veth1.10
|
||||
ip netns exec "$nsr1" sysctl net.ipv4.conf.veth1/10.forwarding=1 > /dev/null
|
||||
ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif veth1.10 accept'
|
||||
ip -net "$nsr1" link add name tun1 type ipip local 192.168.20.1 remote 192.168.20.2
|
||||
ip -net "$nsr1" link set tun1 up
|
||||
ip -net "$nsr1" addr add 192.168.200.1/24 dev tun1
|
||||
ip -net "$nsr1" route change default via 192.168.200.2
|
||||
ip netns exec "$nsr1" sysctl net.ipv4.conf.tun1.forwarding=1 > /dev/null
|
||||
ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun1 accept'
|
||||
|
||||
ip -net "$nsr2" link add link veth0 name veth0.10 type vlan id 10
|
||||
ip -net "$nsr2" link set veth0.10 up
|
||||
ip -net "$nsr2" addr add 192.168.20.2/24 dev veth0.10
|
||||
ip netns exec "$nsr2" sysctl net.ipv4.conf.veth0/10.forwarding=1 > /dev/null
|
||||
ip -net "$nsr2" link add name tun1 type ipip local 192.168.20.2 remote 192.168.20.1
|
||||
ip -net "$nsr2" link set tun1 up
|
||||
ip -net "$nsr2" addr add 192.168.200.2/24 dev tun1
|
||||
ip -net "$nsr2" route change default via 192.168.200.1
|
||||
ip netns exec "$nsr2" sysctl net.ipv4.conf.tun1.forwarding=1 > /dev/null
|
||||
|
||||
if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel over vlan"; then
|
||||
echo "FAIL: flow offload for ns1/ns2 with IPIP tunnel over vlan" 1>&2
|
||||
ip netns exec "$nsr1" nft list ruleset
|
||||
ret=1
|
||||
fi
|
||||
|
||||
# Restore the previous configuration
|
||||
ip -net "$nsr1" route change default via 192.168.10.2
|
||||
ip -net "$nsr2" route change default via 192.168.10.1
|
||||
ip -net "$ns2" route del default via 10.0.2.1
|
||||
}
|
||||
|
||||
# Another test:
|
||||
# Add bridge interface br0 to Router1, with NAT enabled.
|
||||
test_bridge() {
|
||||
@@ -643,6 +731,8 @@ ip -net "$nsr1" addr add dead:1::1/64 dev veth0 nodad
|
||||
ip -net "$nsr1" link set up dev veth0
|
||||
}
|
||||
|
||||
test_ipip
|
||||
|
||||
test_bridge
|
||||
|
||||
KEY_SHA="0x"$(ps -af | sha1sum | cut -d " " -f 1)
|
||||
@@ -683,7 +773,7 @@ ip -net "$ns2" route del 192.168.10.1 via 10.0.2.1
|
||||
ip -net "$ns2" route add default via 10.0.2.1
|
||||
ip -net "$ns2" route add default via dead:2::1
|
||||
|
||||
if test_tcp_forwarding "$ns1" "$ns2" 1; then
|
||||
if test_tcp_forwarding "$ns1" "$ns2" 1 4 10.0.2.99 12345; then
|
||||
check_counters "ipsec tunnel mode for ns1/ns2"
|
||||
else
|
||||
echo "FAIL: ipsec tunnel mode for ns1/ns2"
|
||||
@@ -691,6 +781,14 @@ else
|
||||
ip netns exec "$nsr1" cat /proc/net/xfrm_stat 1>&2
|
||||
fi
|
||||
|
||||
if test_tcp_forwarding "$ns1" "$ns2" 1 6 "[dead:2::99]" 12345; then
|
||||
check_counters "IPv6 ipsec tunnel mode for ns1/ns2"
|
||||
else
|
||||
echo "FAIL: IPv6 ipsec tunnel mode for ns1/ns2"
|
||||
ip netns exec "$nsr1" nft list ruleset 1>&2
|
||||
ip netns exec "$nsr1" cat /proc/net/xfrm_stat 1>&2
|
||||
fi
|
||||
|
||||
if [ "$1" = "" ]; then
|
||||
low=1280
|
||||
mtu=$((65536 - low))
|
||||
|
||||
Reference in New Issue
Block a user