mirror of
https://github.com/torvalds/linux.git
synced 2025-12-07 20:06:24 +00:00
Merge branch 'mptcp-memcg-accounting-for-passive-sockets-backlog-processing'
Matthieu Baerts says:
====================
mptcp: memcg accounting for passive sockets & backlog processing
This series is split in two: the 4 first patches are linked to memcg
accounting for passive sockets, and the rest introduce the backlog
processing. They are sent together, because the first one appeared to be
needed to get the second one fully working.
The second part includes RX path improvement built around backlog
processing. The main goals are improving the RX performances _and_
increase the long term maintainability.
- Patches 1-3: preparation work to ease the introduction of the next
patch.
- Patch 4: fix memcg accounting for passive sockets. Note that this is a
(non-urgent) fix, but it depends on material that is currently only in
net-next, e.g. commit 4a997d49d9 ("tcp: Save lock_sock() for memcg
in inet_csk_accept().").
- Patches 5-6: preparation of the stack for backlog processing, removing
assumptions that will not hold true any more after the backlog
introduction.
- Patches 7,8,10,11,12 are more cleanups that will make the backlog
patch a little less huge.
- Patch 9: somewhat an unrelated cleanup, included here not to forget
about it.
- Patches 13-14: The real work is done by them. Patch 13 introduces the
helpers needed to manipulate the msk-level backlog, and the data
struct itself, without any actual functional change. Patch 14 finally
uses the backlog for RX skb processing. Note that MPTCP can't use the
sk_backlog, as the MPTCP release callback can also release and
re-acquire the msk-level spinlock and core backlog processing works
under the assumption that such event is not possible.
A relevant point is memory accounts for skbs in the backlog. It's
somewhat "original" due to MPTCP constraints. Such skbs use space from
the incoming subflow receive buffer, do not use explicitly any forward
allocated memory, as we can't update the msk fwd mem while enqueuing,
nor we want to acquire again the ssk socket lock while processing the
skbs. Instead the msk borrows memory from the subflow and reserve it
for the backlog, see patch 5 and 14 for the gory details.
====================
Link: https://patch.msgid.link/20251121-net-next-mptcp-memcg-backlog-imp-v1-0-1f34b6c1e0b1@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
@@ -1631,6 +1631,8 @@ static inline void sk_mem_uncharge(struct sock *sk, int size)
|
||||
sk_mem_reclaim(sk);
|
||||
}
|
||||
|
||||
void __sk_charge(struct sock *sk, gfp_t gfp);
|
||||
|
||||
#if IS_ENABLED(CONFIG_PROVE_LOCKING) && IS_ENABLED(CONFIG_MODULES)
|
||||
static inline void sk_owner_set(struct sock *sk, struct module *owner)
|
||||
{
|
||||
|
||||
@@ -3448,6 +3448,24 @@ void __sk_mem_reclaim(struct sock *sk, int amount)
|
||||
}
|
||||
EXPORT_SYMBOL(__sk_mem_reclaim);
|
||||
|
||||
void __sk_charge(struct sock *sk, gfp_t gfp)
|
||||
{
|
||||
int amt;
|
||||
|
||||
gfp |= __GFP_NOFAIL;
|
||||
if (mem_cgroup_from_sk(sk)) {
|
||||
/* The socket has not been accepted yet, no need
|
||||
* to look at newsk->sk_wmem_queued.
|
||||
*/
|
||||
amt = sk_mem_pages(sk->sk_forward_alloc +
|
||||
atomic_read(&sk->sk_rmem_alloc));
|
||||
if (amt)
|
||||
mem_cgroup_sk_charge(sk, amt, gfp);
|
||||
}
|
||||
|
||||
kmem_cache_charge(sk, gfp);
|
||||
}
|
||||
|
||||
int sk_set_peek_off(struct sock *sk, int val)
|
||||
{
|
||||
WRITE_ONCE(sk->sk_peek_off, val);
|
||||
|
||||
@@ -756,23 +756,8 @@ EXPORT_SYMBOL(inet_stream_connect);
|
||||
void __inet_accept(struct socket *sock, struct socket *newsock, struct sock *newsk)
|
||||
{
|
||||
if (mem_cgroup_sockets_enabled) {
|
||||
gfp_t gfp = GFP_KERNEL | __GFP_NOFAIL;
|
||||
|
||||
mem_cgroup_sk_alloc(newsk);
|
||||
|
||||
if (mem_cgroup_from_sk(newsk)) {
|
||||
int amt;
|
||||
|
||||
/* The socket has not been accepted yet, no need
|
||||
* to look at newsk->sk_wmem_queued.
|
||||
*/
|
||||
amt = sk_mem_pages(newsk->sk_forward_alloc +
|
||||
atomic_read(&newsk->sk_rmem_alloc));
|
||||
if (amt)
|
||||
mem_cgroup_sk_charge(newsk, amt, gfp);
|
||||
}
|
||||
|
||||
kmem_cache_charge(newsk, gfp);
|
||||
__sk_charge(newsk, GFP_KERNEL);
|
||||
}
|
||||
|
||||
sock_rps_record_flow(newsk);
|
||||
|
||||
@@ -32,7 +32,8 @@ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subf
|
||||
/* dequeue the skb from sk receive queue */
|
||||
__skb_unlink(skb, &ssk->sk_receive_queue);
|
||||
skb_ext_reset(skb);
|
||||
skb_orphan(skb);
|
||||
|
||||
mptcp_subflow_lend_fwdmem(subflow, skb);
|
||||
|
||||
/* We copy the fastopen data, but that don't belong to the mptcp sequence
|
||||
* space, need to offset it in the subflow sequence, see mptcp_subflow_get_map_offset()
|
||||
@@ -50,6 +51,7 @@ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subf
|
||||
mptcp_data_lock(sk);
|
||||
DEBUG_NET_WARN_ON_ONCE(sock_owned_by_user_nocheck(sk));
|
||||
|
||||
mptcp_borrow_fwdmem(sk, skb);
|
||||
skb_set_owner_r(skb, sk);
|
||||
__skb_queue_tail(&sk->sk_receive_queue, skb);
|
||||
mptcp_sk(sk)->bytes_received += skb->len;
|
||||
|
||||
@@ -71,7 +71,6 @@ static const struct snmp_mib mptcp_snmp_list[] = {
|
||||
SNMP_MIB_ITEM("MPFastcloseRx", MPTCP_MIB_MPFASTCLOSERX),
|
||||
SNMP_MIB_ITEM("MPRstTx", MPTCP_MIB_MPRSTTX),
|
||||
SNMP_MIB_ITEM("MPRstRx", MPTCP_MIB_MPRSTRX),
|
||||
SNMP_MIB_ITEM("RcvPruned", MPTCP_MIB_RCVPRUNED),
|
||||
SNMP_MIB_ITEM("SubflowStale", MPTCP_MIB_SUBFLOWSTALE),
|
||||
SNMP_MIB_ITEM("SubflowRecover", MPTCP_MIB_SUBFLOWRECOVER),
|
||||
SNMP_MIB_ITEM("SndWndShared", MPTCP_MIB_SNDWNDSHARED),
|
||||
|
||||
@@ -70,7 +70,6 @@ enum linux_mptcp_mib_field {
|
||||
MPTCP_MIB_MPFASTCLOSERX, /* Received a MP_FASTCLOSE */
|
||||
MPTCP_MIB_MPRSTTX, /* Transmit a MP_RST */
|
||||
MPTCP_MIB_MPRSTRX, /* Received a MP_RST */
|
||||
MPTCP_MIB_RCVPRUNED, /* Incoming packet dropped due to memory limit */
|
||||
MPTCP_MIB_SUBFLOWSTALE, /* Subflows entered 'stale' status */
|
||||
MPTCP_MIB_SUBFLOWRECOVER, /* Subflows returned to active status after being stale */
|
||||
MPTCP_MIB_SNDWNDSHARED, /* Subflow snd wnd is overridden by msk's one */
|
||||
|
||||
@@ -195,7 +195,8 @@ static void mptcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
|
||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||
struct mptcp_info *info = _info;
|
||||
|
||||
r->idiag_rqueue = sk_rmem_alloc_get(sk);
|
||||
r->idiag_rqueue = sk_rmem_alloc_get(sk) +
|
||||
READ_ONCE(mptcp_sk(sk)->backlog_len);
|
||||
r->idiag_wqueue = sk_wmem_alloc_get(sk);
|
||||
|
||||
if (inet_sk_state_load(sk) == TCP_LISTEN) {
|
||||
|
||||
@@ -594,6 +594,7 @@ void mptcp_pm_subflow_established(struct mptcp_sock *msk)
|
||||
void mptcp_pm_subflow_check_next(struct mptcp_sock *msk,
|
||||
const struct mptcp_subflow_context *subflow)
|
||||
{
|
||||
struct sock *sk = (struct sock *)msk;
|
||||
struct mptcp_pm_data *pm = &msk->pm;
|
||||
bool update_subflows;
|
||||
|
||||
@@ -617,7 +618,8 @@ void mptcp_pm_subflow_check_next(struct mptcp_sock *msk,
|
||||
/* Even if this subflow is not really established, tell the PM to try
|
||||
* to pick the next ones, if possible.
|
||||
*/
|
||||
if (mptcp_pm_nl_check_work_pending(msk))
|
||||
if (mptcp_is_fully_established(sk) &&
|
||||
mptcp_pm_nl_check_work_pending(msk))
|
||||
mptcp_pm_schedule_work(msk, MPTCP_PM_SUBFLOW_ESTABLISHED);
|
||||
|
||||
spin_unlock_bh(&pm->lock);
|
||||
|
||||
@@ -337,6 +337,8 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
|
||||
struct mptcp_pm_local local;
|
||||
|
||||
mptcp_mpc_endpoint_setup(msk);
|
||||
if (!mptcp_is_fully_established(sk))
|
||||
return;
|
||||
|
||||
pr_debug("local %d:%d signal %d:%d subflows %d:%d\n",
|
||||
msk->pm.local_addr_used, endp_subflow_max,
|
||||
|
||||
@@ -358,7 +358,7 @@ end:
|
||||
static void mptcp_init_skb(struct sock *ssk, struct sk_buff *skb, int offset,
|
||||
int copy_len)
|
||||
{
|
||||
const struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
|
||||
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
|
||||
bool has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp;
|
||||
|
||||
/* the skb map_seq accounts for the skb offset:
|
||||
@@ -383,11 +383,7 @@ static bool __mptcp_move_skb(struct sock *sk, struct sk_buff *skb)
|
||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||
struct sk_buff *tail;
|
||||
|
||||
/* try to fetch required memory from subflow */
|
||||
if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
|
||||
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RCVPRUNED);
|
||||
goto drop;
|
||||
}
|
||||
mptcp_borrow_fwdmem(sk, skb);
|
||||
|
||||
if (MPTCP_SKB_CB(skb)->map_seq == msk->ack_seq) {
|
||||
/* in sequence */
|
||||
@@ -409,7 +405,6 @@ static bool __mptcp_move_skb(struct sock *sk, struct sk_buff *skb)
|
||||
* will retransmit as needed, if needed.
|
||||
*/
|
||||
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_DUPDATA);
|
||||
drop:
|
||||
mptcp_drop(sk, skb);
|
||||
return false;
|
||||
}
|
||||
@@ -664,8 +659,50 @@ static void mptcp_dss_corruption(struct mptcp_sock *msk, struct sock *ssk)
|
||||
}
|
||||
}
|
||||
|
||||
static void __mptcp_add_backlog(struct sock *sk,
|
||||
struct mptcp_subflow_context *subflow,
|
||||
struct sk_buff *skb)
|
||||
{
|
||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||
struct sk_buff *tail = NULL;
|
||||
struct sock *ssk = skb->sk;
|
||||
bool fragstolen;
|
||||
int delta;
|
||||
|
||||
if (unlikely(sk->sk_state == TCP_CLOSE)) {
|
||||
kfree_skb_reason(skb, SKB_DROP_REASON_SOCKET_CLOSE);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Try to coalesce with the last skb in our backlog */
|
||||
if (!list_empty(&msk->backlog_list))
|
||||
tail = list_last_entry(&msk->backlog_list, struct sk_buff, list);
|
||||
|
||||
if (tail && MPTCP_SKB_CB(skb)->map_seq == MPTCP_SKB_CB(tail)->end_seq &&
|
||||
ssk == tail->sk &&
|
||||
__mptcp_try_coalesce(sk, tail, skb, &fragstolen, &delta)) {
|
||||
skb->truesize -= delta;
|
||||
kfree_skb_partial(skb, fragstolen);
|
||||
__mptcp_subflow_lend_fwdmem(subflow, delta);
|
||||
goto account;
|
||||
}
|
||||
|
||||
list_add_tail(&skb->list, &msk->backlog_list);
|
||||
mptcp_subflow_lend_fwdmem(subflow, skb);
|
||||
delta = skb->truesize;
|
||||
|
||||
account:
|
||||
WRITE_ONCE(msk->backlog_len, msk->backlog_len + delta);
|
||||
|
||||
/* Possibly not accept()ed yet, keep track of memory not CG
|
||||
* accounted, mptcp_graft_subflows() will handle it.
|
||||
*/
|
||||
if (!mem_cgroup_from_sk(ssk))
|
||||
msk->backlog_unaccounted += delta;
|
||||
}
|
||||
|
||||
static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
|
||||
struct sock *ssk)
|
||||
struct sock *ssk, bool own_msk)
|
||||
{
|
||||
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
|
||||
struct sock *sk = (struct sock *)msk;
|
||||
@@ -681,9 +718,6 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
|
||||
struct sk_buff *skb;
|
||||
bool fin;
|
||||
|
||||
if (sk_rmem_alloc_get(sk) > sk->sk_rcvbuf)
|
||||
break;
|
||||
|
||||
/* try to move as much data as available */
|
||||
map_remaining = subflow->map_data_len -
|
||||
mptcp_subflow_get_map_offset(subflow);
|
||||
@@ -710,8 +744,13 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
|
||||
size_t len = skb->len - offset;
|
||||
|
||||
mptcp_init_skb(ssk, skb, offset, len);
|
||||
skb_orphan(skb);
|
||||
ret = __mptcp_move_skb(sk, skb) || ret;
|
||||
|
||||
if (own_msk && sk_rmem_alloc_get(sk) < sk->sk_rcvbuf) {
|
||||
mptcp_subflow_lend_fwdmem(subflow, skb);
|
||||
ret |= __mptcp_move_skb(sk, skb);
|
||||
} else {
|
||||
__mptcp_add_backlog(sk, subflow, skb);
|
||||
}
|
||||
seq += len;
|
||||
|
||||
if (unlikely(map_remaining < len)) {
|
||||
@@ -830,7 +869,7 @@ static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
|
||||
struct sock *sk = (struct sock *)msk;
|
||||
bool moved;
|
||||
|
||||
moved = __mptcp_move_skbs_from_subflow(msk, ssk);
|
||||
moved = __mptcp_move_skbs_from_subflow(msk, ssk, true);
|
||||
__mptcp_ofo_queue(msk);
|
||||
if (unlikely(ssk->sk_err))
|
||||
__mptcp_subflow_error_report(sk, ssk);
|
||||
@@ -845,31 +884,26 @@ static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
|
||||
return moved;
|
||||
}
|
||||
|
||||
static void __mptcp_data_ready(struct sock *sk, struct sock *ssk)
|
||||
{
|
||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||
|
||||
/* Wake-up the reader only for in-sequence data */
|
||||
if (move_skbs_to_msk(msk, ssk) && mptcp_epollin_ready(sk))
|
||||
sk->sk_data_ready(sk);
|
||||
}
|
||||
|
||||
void mptcp_data_ready(struct sock *sk, struct sock *ssk)
|
||||
{
|
||||
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
|
||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||
|
||||
/* The peer can send data while we are shutting down this
|
||||
* subflow at msk destruction time, but we must avoid enqueuing
|
||||
* subflow at subflow destruction time, but we must avoid enqueuing
|
||||
* more data to the msk receive queue
|
||||
*/
|
||||
if (unlikely(subflow->disposable))
|
||||
if (unlikely(subflow->closing))
|
||||
return;
|
||||
|
||||
mptcp_data_lock(sk);
|
||||
if (!sock_owned_by_user(sk))
|
||||
__mptcp_data_ready(sk, ssk);
|
||||
else
|
||||
__set_bit(MPTCP_DEQUEUE, &mptcp_sk(sk)->cb_flags);
|
||||
if (!sock_owned_by_user(sk)) {
|
||||
/* Wake-up the reader only for in-sequence data */
|
||||
if (move_skbs_to_msk(msk, ssk) && mptcp_epollin_ready(sk))
|
||||
sk->sk_data_ready(sk);
|
||||
} else {
|
||||
__mptcp_move_skbs_from_subflow(msk, ssk, false);
|
||||
}
|
||||
mptcp_data_unlock(sk);
|
||||
}
|
||||
|
||||
@@ -895,12 +929,6 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
|
||||
mptcp_subflow_joined(msk, ssk);
|
||||
spin_unlock_bh(&msk->fallback_lock);
|
||||
|
||||
/* attach to msk socket only after we are sure we will deal with it
|
||||
* at close time
|
||||
*/
|
||||
if (sk->sk_socket && !ssk->sk_socket)
|
||||
mptcp_sock_graft(ssk, sk->sk_socket);
|
||||
|
||||
mptcp_subflow_ctx(ssk)->subflow_id = msk->subflow_id++;
|
||||
mptcp_sockopt_sync_locked(msk, ssk);
|
||||
mptcp_stop_tout_timer(sk);
|
||||
@@ -2114,60 +2142,80 @@ new_measure:
|
||||
msk->rcvq_space.time = mstamp;
|
||||
}
|
||||
|
||||
static struct mptcp_subflow_context *
|
||||
__mptcp_first_ready_from(struct mptcp_sock *msk,
|
||||
struct mptcp_subflow_context *subflow)
|
||||
static bool __mptcp_move_skbs(struct sock *sk, struct list_head *skbs, u32 *delta)
|
||||
{
|
||||
struct mptcp_subflow_context *start_subflow = subflow;
|
||||
|
||||
while (!READ_ONCE(subflow->data_avail)) {
|
||||
subflow = mptcp_next_subflow(msk, subflow);
|
||||
if (subflow == start_subflow)
|
||||
return NULL;
|
||||
}
|
||||
return subflow;
|
||||
}
|
||||
|
||||
static bool __mptcp_move_skbs(struct sock *sk)
|
||||
{
|
||||
struct mptcp_subflow_context *subflow;
|
||||
struct sk_buff *skb = list_first_entry(skbs, struct sk_buff, list);
|
||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||
bool ret = false;
|
||||
bool moved = false;
|
||||
|
||||
if (list_empty(&msk->conn_list))
|
||||
return false;
|
||||
|
||||
subflow = list_first_entry(&msk->conn_list,
|
||||
struct mptcp_subflow_context, node);
|
||||
for (;;) {
|
||||
struct sock *ssk;
|
||||
bool slowpath;
|
||||
|
||||
/*
|
||||
* As an optimization avoid traversing the subflows list
|
||||
* and ev. acquiring the subflow socket lock before baling out
|
||||
*/
|
||||
*delta = 0;
|
||||
while (1) {
|
||||
/* If the msk recvbuf is full stop, don't drop */
|
||||
if (sk_rmem_alloc_get(sk) > sk->sk_rcvbuf)
|
||||
break;
|
||||
|
||||
subflow = __mptcp_first_ready_from(msk, subflow);
|
||||
if (!subflow)
|
||||
prefetch(skb->next);
|
||||
list_del(&skb->list);
|
||||
*delta += skb->truesize;
|
||||
|
||||
moved |= __mptcp_move_skb(sk, skb);
|
||||
if (list_empty(skbs))
|
||||
break;
|
||||
|
||||
ssk = mptcp_subflow_tcp_sock(subflow);
|
||||
slowpath = lock_sock_fast(ssk);
|
||||
ret = __mptcp_move_skbs_from_subflow(msk, ssk) || ret;
|
||||
if (unlikely(ssk->sk_err))
|
||||
__mptcp_error_report(sk);
|
||||
unlock_sock_fast(ssk, slowpath);
|
||||
|
||||
subflow = mptcp_next_subflow(msk, subflow);
|
||||
skb = list_first_entry(skbs, struct sk_buff, list);
|
||||
}
|
||||
|
||||
__mptcp_ofo_queue(msk);
|
||||
if (ret)
|
||||
if (moved)
|
||||
mptcp_check_data_fin((struct sock *)msk);
|
||||
return ret;
|
||||
return moved;
|
||||
}
|
||||
|
||||
static bool mptcp_can_spool_backlog(struct sock *sk, struct list_head *skbs)
|
||||
{
|
||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||
|
||||
/* After CG initialization, subflows should never add skb before
|
||||
* gaining the CG themself.
|
||||
*/
|
||||
DEBUG_NET_WARN_ON_ONCE(msk->backlog_unaccounted && sk->sk_socket &&
|
||||
mem_cgroup_from_sk(sk));
|
||||
|
||||
/* Don't spool the backlog if the rcvbuf is full. */
|
||||
if (list_empty(&msk->backlog_list) ||
|
||||
sk_rmem_alloc_get(sk) > sk->sk_rcvbuf)
|
||||
return false;
|
||||
|
||||
INIT_LIST_HEAD(skbs);
|
||||
list_splice_init(&msk->backlog_list, skbs);
|
||||
return true;
|
||||
}
|
||||
|
||||
static void mptcp_backlog_spooled(struct sock *sk, u32 moved,
|
||||
struct list_head *skbs)
|
||||
{
|
||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||
|
||||
WRITE_ONCE(msk->backlog_len, msk->backlog_len - moved);
|
||||
list_splice(skbs, &msk->backlog_list);
|
||||
}
|
||||
|
||||
static bool mptcp_move_skbs(struct sock *sk)
|
||||
{
|
||||
struct list_head skbs;
|
||||
bool enqueued = false;
|
||||
u32 moved;
|
||||
|
||||
mptcp_data_lock(sk);
|
||||
while (mptcp_can_spool_backlog(sk, &skbs)) {
|
||||
mptcp_data_unlock(sk);
|
||||
enqueued |= __mptcp_move_skbs(sk, &skbs, &moved);
|
||||
|
||||
mptcp_data_lock(sk);
|
||||
mptcp_backlog_spooled(sk, moved, &skbs);
|
||||
}
|
||||
mptcp_data_unlock(sk);
|
||||
return enqueued;
|
||||
}
|
||||
|
||||
static unsigned int mptcp_inq_hint(const struct sock *sk)
|
||||
@@ -2233,7 +2281,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
|
||||
|
||||
copied += bytes_read;
|
||||
|
||||
if (skb_queue_empty(&sk->sk_receive_queue) && __mptcp_move_skbs(sk))
|
||||
if (!list_empty(&msk->backlog_list) && mptcp_move_skbs(sk))
|
||||
continue;
|
||||
|
||||
/* only the MPTCP socket status is relevant here. The exit
|
||||
@@ -2447,6 +2495,25 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
|
||||
{
|
||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||
bool dispose_it, need_push = false;
|
||||
int fwd_remaining;
|
||||
|
||||
/* Do not pass RX data to the msk, even if the subflow socket is not
|
||||
* going to be freed (i.e. even for the first subflow on graceful
|
||||
* subflow close.
|
||||
*/
|
||||
lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
|
||||
subflow->closing = 1;
|
||||
|
||||
/* Borrow the fwd allocated page left-over; fwd memory for the subflow
|
||||
* could be negative at this point, but will be reach zero soon - when
|
||||
* the data allocated using such fragment will be freed.
|
||||
*/
|
||||
if (subflow->lent_mem_frag) {
|
||||
fwd_remaining = PAGE_SIZE - subflow->lent_mem_frag;
|
||||
sk_forward_alloc_add(sk, fwd_remaining);
|
||||
sk_forward_alloc_add(ssk, -fwd_remaining);
|
||||
subflow->lent_mem_frag = 0;
|
||||
}
|
||||
|
||||
/* If the first subflow moved to a close state before accept, e.g. due
|
||||
* to an incoming reset or listener shutdown, the subflow socket is
|
||||
@@ -2458,7 +2525,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
|
||||
/* ensure later check in mptcp_worker() will dispose the msk */
|
||||
sock_set_flag(sk, SOCK_DEAD);
|
||||
mptcp_set_close_tout(sk, tcp_jiffies32 - (mptcp_close_timeout(sk) + 1));
|
||||
lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
|
||||
mptcp_subflow_drop_ctx(ssk);
|
||||
goto out_release;
|
||||
}
|
||||
@@ -2467,8 +2533,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
|
||||
if (dispose_it)
|
||||
list_del(&subflow->node);
|
||||
|
||||
lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
|
||||
|
||||
if (subflow->send_fastclose && ssk->sk_state != TCP_CLOSE)
|
||||
tcp_set_state(ssk, TCP_CLOSE);
|
||||
|
||||
@@ -2531,6 +2595,9 @@ out:
|
||||
void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
|
||||
struct mptcp_subflow_context *subflow)
|
||||
{
|
||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||
struct sk_buff *skb;
|
||||
|
||||
/* The first subflow can already be closed and still in the list */
|
||||
if (subflow->close_event_done)
|
||||
return;
|
||||
@@ -2540,6 +2607,17 @@ void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
|
||||
if (sk->sk_state == TCP_ESTABLISHED)
|
||||
mptcp_event(MPTCP_EVENT_SUB_CLOSED, mptcp_sk(sk), ssk, GFP_KERNEL);
|
||||
|
||||
/* Remove any reference from the backlog to this ssk; backlog skbs consume
|
||||
* space in the msk receive queue, no need to touch sk->sk_rmem_alloc
|
||||
*/
|
||||
list_for_each_entry(skb, &msk->backlog_list, list) {
|
||||
if (skb->sk != ssk)
|
||||
continue;
|
||||
|
||||
atomic_sub(skb->truesize, &skb->sk->sk_rmem_alloc);
|
||||
skb->sk = NULL;
|
||||
}
|
||||
|
||||
/* subflow aborted before reaching the fully_established status
|
||||
* attempt the creation of the next subflow
|
||||
*/
|
||||
@@ -2769,12 +2847,31 @@ static void mptcp_mp_fail_no_response(struct mptcp_sock *msk)
|
||||
unlock_sock_fast(ssk, slow);
|
||||
}
|
||||
|
||||
static void mptcp_backlog_purge(struct sock *sk)
|
||||
{
|
||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||
struct sk_buff *tmp, *skb;
|
||||
LIST_HEAD(backlog);
|
||||
|
||||
mptcp_data_lock(sk);
|
||||
list_splice_init(&msk->backlog_list, &backlog);
|
||||
msk->backlog_len = 0;
|
||||
mptcp_data_unlock(sk);
|
||||
|
||||
list_for_each_entry_safe(skb, tmp, &backlog, list) {
|
||||
mptcp_borrow_fwdmem(sk, skb);
|
||||
kfree_skb_reason(skb, SKB_DROP_REASON_SOCKET_CLOSE);
|
||||
}
|
||||
sk_mem_reclaim(sk);
|
||||
}
|
||||
|
||||
static void mptcp_do_fastclose(struct sock *sk)
|
||||
{
|
||||
struct mptcp_subflow_context *subflow, *tmp;
|
||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||
|
||||
mptcp_set_state(sk, TCP_CLOSE);
|
||||
mptcp_backlog_purge(sk);
|
||||
|
||||
/* Explicitly send the fastclose reset as need */
|
||||
if (__mptcp_check_fallback(msk))
|
||||
@@ -2853,11 +2950,13 @@ static void __mptcp_init_sock(struct sock *sk)
|
||||
INIT_LIST_HEAD(&msk->conn_list);
|
||||
INIT_LIST_HEAD(&msk->join_list);
|
||||
INIT_LIST_HEAD(&msk->rtx_queue);
|
||||
INIT_LIST_HEAD(&msk->backlog_list);
|
||||
INIT_WORK(&msk->work, mptcp_worker);
|
||||
msk->out_of_order_queue = RB_ROOT;
|
||||
msk->first_pending = NULL;
|
||||
msk->timer_ival = TCP_RTO_MIN;
|
||||
msk->scaling_ratio = TCP_DEFAULT_SCALING_RATIO;
|
||||
msk->backlog_len = 0;
|
||||
|
||||
WRITE_ONCE(msk->first, NULL);
|
||||
inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss;
|
||||
@@ -3228,6 +3327,28 @@ static void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk)
|
||||
inet_sk(msk)->inet_rcv_saddr = inet_sk(ssk)->inet_rcv_saddr;
|
||||
}
|
||||
|
||||
static void mptcp_destroy_common(struct mptcp_sock *msk)
|
||||
{
|
||||
struct mptcp_subflow_context *subflow, *tmp;
|
||||
struct sock *sk = (struct sock *)msk;
|
||||
|
||||
__mptcp_clear_xmit(sk);
|
||||
mptcp_backlog_purge(sk);
|
||||
|
||||
/* join list will be eventually flushed (with rst) at sock lock release time */
|
||||
mptcp_for_each_subflow_safe(msk, subflow, tmp)
|
||||
__mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow), subflow, 0);
|
||||
|
||||
__skb_queue_purge(&sk->sk_receive_queue);
|
||||
skb_rbtree_purge(&msk->out_of_order_queue);
|
||||
|
||||
/* move all the rx fwd alloc into the sk_mem_reclaim_final in
|
||||
* inet_sock_destruct() will dispose it
|
||||
*/
|
||||
mptcp_token_destroy(msk);
|
||||
mptcp_pm_destroy(msk);
|
||||
}
|
||||
|
||||
static int mptcp_disconnect(struct sock *sk, int flags)
|
||||
{
|
||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||
@@ -3280,6 +3401,9 @@ static int mptcp_disconnect(struct sock *sk, int flags)
|
||||
msk->bytes_retrans = 0;
|
||||
msk->rcvspace_init = 0;
|
||||
|
||||
/* for fallback's sake */
|
||||
WRITE_ONCE(msk->ack_seq, 0);
|
||||
|
||||
WRITE_ONCE(sk->sk_shutdown, 0);
|
||||
sk_error_report(sk);
|
||||
return 0;
|
||||
@@ -3430,27 +3554,6 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk)
|
||||
msk->rcvq_space.space = TCP_INIT_CWND * TCP_MSS_DEFAULT;
|
||||
}
|
||||
|
||||
void mptcp_destroy_common(struct mptcp_sock *msk)
|
||||
{
|
||||
struct mptcp_subflow_context *subflow, *tmp;
|
||||
struct sock *sk = (struct sock *)msk;
|
||||
|
||||
__mptcp_clear_xmit(sk);
|
||||
|
||||
/* join list will be eventually flushed (with rst) at sock lock release time */
|
||||
mptcp_for_each_subflow_safe(msk, subflow, tmp)
|
||||
__mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow), subflow, 0);
|
||||
|
||||
__skb_queue_purge(&sk->sk_receive_queue);
|
||||
skb_rbtree_purge(&msk->out_of_order_queue);
|
||||
|
||||
/* move all the rx fwd alloc into the sk_mem_reclaim_final in
|
||||
* inet_sock_destruct() will dispose it
|
||||
*/
|
||||
mptcp_token_destroy(msk);
|
||||
mptcp_pm_destroy(msk);
|
||||
}
|
||||
|
||||
static void mptcp_destroy(struct sock *sk)
|
||||
{
|
||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||
@@ -3479,8 +3582,7 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk)
|
||||
|
||||
#define MPTCP_FLAGS_PROCESS_CTX_NEED (BIT(MPTCP_PUSH_PENDING) | \
|
||||
BIT(MPTCP_RETRANSMIT) | \
|
||||
BIT(MPTCP_FLUSH_JOIN_LIST) | \
|
||||
BIT(MPTCP_DEQUEUE))
|
||||
BIT(MPTCP_FLUSH_JOIN_LIST))
|
||||
|
||||
/* processes deferred events and flush wmem */
|
||||
static void mptcp_release_cb(struct sock *sk)
|
||||
@@ -3490,9 +3592,12 @@ static void mptcp_release_cb(struct sock *sk)
|
||||
|
||||
for (;;) {
|
||||
unsigned long flags = (msk->cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED);
|
||||
struct list_head join_list;
|
||||
struct list_head join_list, skbs;
|
||||
bool spool_bl;
|
||||
u32 moved;
|
||||
|
||||
if (!flags)
|
||||
spool_bl = mptcp_can_spool_backlog(sk, &skbs);
|
||||
if (!flags && !spool_bl)
|
||||
break;
|
||||
|
||||
INIT_LIST_HEAD(&join_list);
|
||||
@@ -3514,7 +3619,7 @@ static void mptcp_release_cb(struct sock *sk)
|
||||
__mptcp_push_pending(sk, 0);
|
||||
if (flags & BIT(MPTCP_RETRANSMIT))
|
||||
__mptcp_retrans(sk);
|
||||
if ((flags & BIT(MPTCP_DEQUEUE)) && __mptcp_move_skbs(sk)) {
|
||||
if (spool_bl && __mptcp_move_skbs(sk, &skbs, &moved)) {
|
||||
/* notify ack seq update */
|
||||
mptcp_cleanup_rbuf(msk, 0);
|
||||
sk->sk_data_ready(sk);
|
||||
@@ -3522,6 +3627,8 @@ static void mptcp_release_cb(struct sock *sk)
|
||||
|
||||
cond_resched();
|
||||
spin_lock_bh(&sk->sk_lock.slock);
|
||||
if (spool_bl)
|
||||
mptcp_backlog_spooled(sk, moved, &skbs);
|
||||
}
|
||||
|
||||
if (__test_and_clear_bit(MPTCP_CLEAN_UNA, &msk->cb_flags))
|
||||
@@ -3647,6 +3754,23 @@ void mptcp_sock_graft(struct sock *sk, struct socket *parent)
|
||||
write_unlock_bh(&sk->sk_callback_lock);
|
||||
}
|
||||
|
||||
/* Can be called without holding the msk socket lock; use the callback lock
|
||||
* to avoid {READ_,WRITE_}ONCE annotations on sk_socket.
|
||||
*/
|
||||
static void mptcp_sock_check_graft(struct sock *sk, struct sock *ssk)
|
||||
{
|
||||
struct socket *sock;
|
||||
|
||||
write_lock_bh(&sk->sk_callback_lock);
|
||||
sock = sk->sk_socket;
|
||||
write_unlock_bh(&sk->sk_callback_lock);
|
||||
if (sock) {
|
||||
mptcp_sock_graft(ssk, sock);
|
||||
__mptcp_inherit_cgrp_data(sk, ssk);
|
||||
__mptcp_inherit_memcg(sk, ssk, GFP_ATOMIC);
|
||||
}
|
||||
}
|
||||
|
||||
bool mptcp_finish_join(struct sock *ssk)
|
||||
{
|
||||
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
|
||||
@@ -3662,7 +3786,9 @@ bool mptcp_finish_join(struct sock *ssk)
|
||||
return false;
|
||||
}
|
||||
|
||||
/* active subflow, already present inside the conn_list */
|
||||
/* Active subflow, already present inside the conn_list; is grafted
|
||||
* either by __mptcp_subflow_connect() or accept.
|
||||
*/
|
||||
if (!list_empty(&subflow->node)) {
|
||||
spin_lock_bh(&msk->fallback_lock);
|
||||
if (!msk->allow_subflows) {
|
||||
@@ -3689,11 +3815,17 @@ bool mptcp_finish_join(struct sock *ssk)
|
||||
if (ret) {
|
||||
sock_hold(ssk);
|
||||
list_add_tail(&subflow->node, &msk->conn_list);
|
||||
mptcp_sock_check_graft(parent, ssk);
|
||||
}
|
||||
} else {
|
||||
sock_hold(ssk);
|
||||
list_add_tail(&subflow->node, &msk->join_list);
|
||||
__set_bit(MPTCP_FLUSH_JOIN_LIST, &msk->cb_flags);
|
||||
|
||||
/* In case of later failures, __mptcp_flush_join_list() will
|
||||
* properly orphan the ssk via mptcp_close_ssk().
|
||||
*/
|
||||
mptcp_sock_check_graft(parent, ssk);
|
||||
}
|
||||
mptcp_data_unlock(parent);
|
||||
|
||||
@@ -3754,7 +3886,7 @@ static int mptcp_ioctl(struct sock *sk, int cmd, int *karg)
|
||||
return -EINVAL;
|
||||
|
||||
lock_sock(sk);
|
||||
if (__mptcp_move_skbs(sk))
|
||||
if (mptcp_move_skbs(sk))
|
||||
mptcp_cleanup_rbuf(msk, 0);
|
||||
*karg = mptcp_inq_hint(sk);
|
||||
release_sock(sk);
|
||||
@@ -3954,6 +4086,69 @@ unlock:
|
||||
return err;
|
||||
}
|
||||
|
||||
static void mptcp_graft_subflows(struct sock *sk)
|
||||
{
|
||||
struct mptcp_subflow_context *subflow;
|
||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||
|
||||
if (mem_cgroup_sockets_enabled) {
|
||||
LIST_HEAD(join_list);
|
||||
|
||||
/* Subflows joining after __inet_accept() will get the
|
||||
* mem CG properly initialized at mptcp_finish_join() time,
|
||||
* but subflows pending in join_list need explicit
|
||||
* initialization before flushing `backlog_unaccounted`
|
||||
* or MPTCP can later unexpectedly observe unaccounted memory.
|
||||
*/
|
||||
mptcp_data_lock(sk);
|
||||
list_splice_init(&msk->join_list, &join_list);
|
||||
mptcp_data_unlock(sk);
|
||||
|
||||
__mptcp_flush_join_list(sk, &join_list);
|
||||
}
|
||||
|
||||
mptcp_for_each_subflow(msk, subflow) {
|
||||
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
|
||||
|
||||
lock_sock(ssk);
|
||||
|
||||
/* Set ssk->sk_socket of accept()ed flows to mptcp socket.
|
||||
* This is needed so NOSPACE flag can be set from tcp stack.
|
||||
*/
|
||||
if (!ssk->sk_socket)
|
||||
mptcp_sock_graft(ssk, sk->sk_socket);
|
||||
|
||||
if (!mem_cgroup_sk_enabled(sk))
|
||||
goto unlock;
|
||||
|
||||
__mptcp_inherit_cgrp_data(sk, ssk);
|
||||
__mptcp_inherit_memcg(sk, ssk, GFP_KERNEL);
|
||||
|
||||
unlock:
|
||||
release_sock(ssk);
|
||||
}
|
||||
|
||||
if (mem_cgroup_sk_enabled(sk)) {
|
||||
gfp_t gfp = GFP_KERNEL | __GFP_NOFAIL;
|
||||
int amt;
|
||||
|
||||
/* Account the backlog memory; prior accept() is aware of
|
||||
* fwd and rmem only.
|
||||
*/
|
||||
mptcp_data_lock(sk);
|
||||
amt = sk_mem_pages(sk->sk_forward_alloc +
|
||||
msk->backlog_unaccounted +
|
||||
atomic_read(&sk->sk_rmem_alloc)) -
|
||||
sk_mem_pages(sk->sk_forward_alloc +
|
||||
atomic_read(&sk->sk_rmem_alloc));
|
||||
msk->backlog_unaccounted = 0;
|
||||
mptcp_data_unlock(sk);
|
||||
|
||||
if (amt)
|
||||
mem_cgroup_sk_charge(sk, amt, gfp);
|
||||
}
|
||||
}
|
||||
|
||||
static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
|
||||
struct proto_accept_arg *arg)
|
||||
{
|
||||
@@ -4001,26 +4196,17 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
|
||||
msk = mptcp_sk(newsk);
|
||||
msk->in_accept_queue = 0;
|
||||
|
||||
/* set ssk->sk_socket of accept()ed flows to mptcp socket.
|
||||
* This is needed so NOSPACE flag can be set from tcp stack.
|
||||
*/
|
||||
mptcp_for_each_subflow(msk, subflow) {
|
||||
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
|
||||
|
||||
if (!ssk->sk_socket)
|
||||
mptcp_sock_graft(ssk, newsock);
|
||||
}
|
||||
|
||||
mptcp_graft_subflows(newsk);
|
||||
mptcp_rps_record_subflows(msk);
|
||||
|
||||
/* Do late cleanup for the first subflow as necessary. Also
|
||||
* deal with bad peers not doing a complete shutdown.
|
||||
*/
|
||||
if (unlikely(inet_sk_state_load(msk->first) == TCP_CLOSE)) {
|
||||
__mptcp_close_ssk(newsk, msk->first,
|
||||
mptcp_subflow_ctx(msk->first), 0);
|
||||
if (unlikely(list_is_singular(&msk->conn_list)))
|
||||
mptcp_set_state(newsk, TCP_CLOSE);
|
||||
mptcp_close_ssk(newsk, msk->first,
|
||||
mptcp_subflow_ctx(msk->first));
|
||||
}
|
||||
} else {
|
||||
tcpfallback:
|
||||
|
||||
@@ -124,7 +124,6 @@
|
||||
#define MPTCP_FLUSH_JOIN_LIST 5
|
||||
#define MPTCP_SYNC_STATE 6
|
||||
#define MPTCP_SYNC_SNDBUF 7
|
||||
#define MPTCP_DEQUEUE 8
|
||||
|
||||
struct mptcp_skb_cb {
|
||||
u64 map_seq;
|
||||
@@ -357,6 +356,10 @@ struct mptcp_sock {
|
||||
* allow_infinite_fallback and
|
||||
* allow_join
|
||||
*/
|
||||
|
||||
struct list_head backlog_list; /* protected by the data lock */
|
||||
u32 backlog_len;
|
||||
u32 backlog_unaccounted;
|
||||
};
|
||||
|
||||
#define mptcp_data_lock(sk) spin_lock_bh(&(sk)->sk_lock.slock)
|
||||
@@ -407,6 +410,7 @@ static inline int mptcp_space_from_win(const struct sock *sk, int win)
|
||||
static inline int __mptcp_space(const struct sock *sk)
|
||||
{
|
||||
return mptcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) -
|
||||
READ_ONCE(mptcp_sk(sk)->backlog_len) -
|
||||
sk_rmem_alloc_get(sk));
|
||||
}
|
||||
|
||||
@@ -536,16 +540,18 @@ struct mptcp_subflow_context {
|
||||
send_infinite_map : 1,
|
||||
remote_key_valid : 1, /* received the peer key from */
|
||||
disposable : 1, /* ctx can be free at ulp release time */
|
||||
closing : 1, /* must not pass rx data to msk anymore */
|
||||
stale : 1, /* unable to snd/rcv data, do not use for xmit */
|
||||
valid_csum_seen : 1, /* at least one csum validated */
|
||||
is_mptfo : 1, /* subflow is doing TFO */
|
||||
close_event_done : 1, /* has done the post-closed part */
|
||||
mpc_drop : 1, /* the MPC option has been dropped in a rtx */
|
||||
__unused : 9;
|
||||
__unused : 8;
|
||||
bool data_avail;
|
||||
bool scheduled;
|
||||
bool pm_listener; /* a listener managed by the kernel PM? */
|
||||
bool fully_established; /* path validated */
|
||||
u32 lent_mem_frag;
|
||||
u32 remote_nonce;
|
||||
u64 thmac;
|
||||
u32 local_nonce;
|
||||
@@ -645,6 +651,42 @@ mptcp_send_active_reset_reason(struct sock *sk)
|
||||
tcp_send_active_reset(sk, GFP_ATOMIC, reason);
|
||||
}
|
||||
|
||||
/* Made the fwd mem carried by the given skb available to the msk,
|
||||
* To be paired with a previous mptcp_subflow_lend_fwdmem() before freeing
|
||||
* the skb or setting the skb ownership.
|
||||
*/
|
||||
static inline void mptcp_borrow_fwdmem(struct sock *sk, struct sk_buff *skb)
|
||||
{
|
||||
struct sock *ssk = skb->sk;
|
||||
|
||||
/* The subflow just lend the skb fwd memory; if the subflow meanwhile
|
||||
* closed, mptcp_close_ssk() already released the ssk rcv memory.
|
||||
*/
|
||||
DEBUG_NET_WARN_ON_ONCE(skb->destructor);
|
||||
sk_forward_alloc_add(sk, skb->truesize);
|
||||
if (!ssk)
|
||||
return;
|
||||
|
||||
atomic_sub(skb->truesize, &ssk->sk_rmem_alloc);
|
||||
skb->sk = NULL;
|
||||
}
|
||||
|
||||
static inline void
|
||||
__mptcp_subflow_lend_fwdmem(struct mptcp_subflow_context *subflow, int size)
|
||||
{
|
||||
int frag = (subflow->lent_mem_frag + size) & (PAGE_SIZE - 1);
|
||||
|
||||
subflow->lent_mem_frag = frag;
|
||||
}
|
||||
|
||||
static inline void
|
||||
mptcp_subflow_lend_fwdmem(struct mptcp_subflow_context *subflow,
|
||||
struct sk_buff *skb)
|
||||
{
|
||||
__mptcp_subflow_lend_fwdmem(subflow, skb->truesize);
|
||||
skb->destructor = NULL;
|
||||
}
|
||||
|
||||
static inline u64
|
||||
mptcp_subflow_get_map_offset(const struct mptcp_subflow_context *subflow)
|
||||
{
|
||||
@@ -707,6 +749,9 @@ mptcp_subflow_delegated_next(struct mptcp_delegated_action *delegated)
|
||||
return ret;
|
||||
}
|
||||
|
||||
void __mptcp_inherit_memcg(struct sock *sk, struct sock *ssk, gfp_t gfp);
|
||||
void __mptcp_inherit_cgrp_data(struct sock *sk, struct sock *ssk);
|
||||
|
||||
int mptcp_is_enabled(const struct net *net);
|
||||
unsigned int mptcp_get_add_addr_timeout(const struct net *net);
|
||||
int mptcp_is_checksum_enabled(const struct net *net);
|
||||
@@ -977,8 +1022,6 @@ static inline void mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk)
|
||||
local_bh_enable();
|
||||
}
|
||||
|
||||
void mptcp_destroy_common(struct mptcp_sock *msk);
|
||||
|
||||
#define MPTCP_TOKEN_MAX_RETRIES 4
|
||||
|
||||
void __init mptcp_token_init(void);
|
||||
|
||||
@@ -491,6 +491,9 @@ static void subflow_set_remote_key(struct mptcp_sock *msk,
|
||||
mptcp_crypto_key_sha(subflow->remote_key, NULL, &subflow->iasn);
|
||||
subflow->iasn++;
|
||||
|
||||
/* for fallback's sake */
|
||||
subflow->map_seq = subflow->iasn;
|
||||
|
||||
WRITE_ONCE(msk->remote_key, subflow->remote_key);
|
||||
WRITE_ONCE(msk->ack_seq, subflow->iasn);
|
||||
WRITE_ONCE(msk->can_ack, true);
|
||||
@@ -1285,6 +1288,7 @@ static bool subflow_is_done(const struct sock *sk)
|
||||
/* sched mptcp worker for subflow cleanup if no more data is pending */
|
||||
static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ssk)
|
||||
{
|
||||
const struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
|
||||
struct sock *sk = (struct sock *)msk;
|
||||
|
||||
if (likely(ssk->sk_state != TCP_CLOSE &&
|
||||
@@ -1303,7 +1307,8 @@ static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ss
|
||||
*/
|
||||
if (__mptcp_check_fallback(msk) && subflow_is_done(ssk) &&
|
||||
msk->first == ssk &&
|
||||
mptcp_update_rcv_data_fin(msk, READ_ONCE(msk->ack_seq), true))
|
||||
mptcp_update_rcv_data_fin(msk, subflow->map_seq +
|
||||
subflow->map_data_len, true))
|
||||
mptcp_schedule_work(sk);
|
||||
}
|
||||
|
||||
@@ -1433,9 +1438,12 @@ reset:
|
||||
|
||||
skb = skb_peek(&ssk->sk_receive_queue);
|
||||
subflow->map_valid = 1;
|
||||
subflow->map_seq = READ_ONCE(msk->ack_seq);
|
||||
subflow->map_data_len = skb->len;
|
||||
subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - subflow->ssn_offset;
|
||||
subflow->map_seq = __mptcp_expand_seq(subflow->map_seq,
|
||||
subflow->iasn +
|
||||
TCP_SKB_CB(skb)->seq -
|
||||
subflow->ssn_offset - 1);
|
||||
WRITE_ONCE(subflow->data_avail, true);
|
||||
return true;
|
||||
}
|
||||
@@ -1712,21 +1720,35 @@ err_out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static void mptcp_attach_cgroup(struct sock *parent, struct sock *child)
|
||||
void __mptcp_inherit_memcg(struct sock *sk, struct sock *ssk, gfp_t gfp)
|
||||
{
|
||||
/* Only if the msk has been accepted already (and not orphaned).*/
|
||||
if (!mem_cgroup_sockets_enabled || !sk->sk_socket)
|
||||
return;
|
||||
|
||||
mem_cgroup_sk_inherit(sk, ssk);
|
||||
__sk_charge(ssk, gfp);
|
||||
}
|
||||
|
||||
void __mptcp_inherit_cgrp_data(struct sock *sk, struct sock *ssk)
|
||||
{
|
||||
#ifdef CONFIG_SOCK_CGROUP_DATA
|
||||
struct sock_cgroup_data *parent_skcd = &parent->sk_cgrp_data,
|
||||
*child_skcd = &child->sk_cgrp_data;
|
||||
struct sock_cgroup_data *sk_cd = &sk->sk_cgrp_data,
|
||||
*ssk_cd = &ssk->sk_cgrp_data;
|
||||
|
||||
/* only the additional subflows created by kworkers have to be modified */
|
||||
if (cgroup_id(sock_cgroup_ptr(parent_skcd)) !=
|
||||
cgroup_id(sock_cgroup_ptr(child_skcd))) {
|
||||
cgroup_sk_free(child_skcd);
|
||||
*child_skcd = *parent_skcd;
|
||||
cgroup_sk_clone(child_skcd);
|
||||
if (cgroup_id(sock_cgroup_ptr(sk_cd)) !=
|
||||
cgroup_id(sock_cgroup_ptr(ssk_cd))) {
|
||||
cgroup_sk_free(ssk_cd);
|
||||
*ssk_cd = *sk_cd;
|
||||
cgroup_sk_clone(sk_cd);
|
||||
}
|
||||
#endif /* CONFIG_SOCK_CGROUP_DATA */
|
||||
}
|
||||
|
||||
static void mptcp_attach_cgroup(struct sock *parent, struct sock *child)
|
||||
{
|
||||
__mptcp_inherit_cgrp_data(parent, child);
|
||||
if (mem_cgroup_sockets_enabled)
|
||||
mem_cgroup_sk_inherit(parent, child);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user