Merge branch 'fix-poll-behaviour-for-tcp-based-tunnel-protocols'

Ralf Lici says:

====================
fix poll behaviour for TCP-based tunnel protocols

This patch series introduces a polling function for datagram-style
sockets that operates on custom skb queues, and updates ovpn (the
OpenVPN data-channel offload module) and espintcp (the TCP Encapsulation
of IKE and IPsec Packets implementation) to use it accordingly.

Protocols like the aforementioned one decapsulate packets received over
TCP and deliver userspace-bound data through a separate skb queue, not
the standard sk_receive_queue. Previously, both relied on
datagram_poll(), which would signal readiness based on non-userspace
packets, leading to misleading poll results and unnecessary recv
attempts in userspace.

Patch 1 introduces datagram_poll_queue(), a variant of datagram_poll()
that accepts an explicit receive queue. This builds on the approach
introduced in commit b50b058, which extended other skb-related functions
to support custom queues. Patch 2 and 3 update espintcp_poll() and
ovpn_tcp_poll() respectively to use this helper, ensuring readiness is
only signaled when userspace data is available.

Each patch is self-contained and the ovpn one includes rationale and
lifecycle enforcement where appropriate.
====================

Link: https://patch.msgid.link/20251021100942.195010-1-ralf@mandelbit.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
This commit is contained in:
Paolo Abeni
2025-10-23 15:46:10 +02:00
4 changed files with 60 additions and 19 deletions

View File

@@ -560,16 +560,34 @@ static void ovpn_tcp_close(struct sock *sk, long timeout)
static __poll_t ovpn_tcp_poll(struct file *file, struct socket *sock,
poll_table *wait)
{
__poll_t mask = datagram_poll(file, sock, wait);
struct sk_buff_head *queue = &sock->sk->sk_receive_queue;
struct ovpn_socket *ovpn_sock;
struct ovpn_peer *peer = NULL;
__poll_t mask;
rcu_read_lock();
ovpn_sock = rcu_dereference_sk_user_data(sock->sk);
if (ovpn_sock && ovpn_sock->peer &&
!skb_queue_empty(&ovpn_sock->peer->tcp.user_queue))
mask |= EPOLLIN | EPOLLRDNORM;
/* if we landed in this callback, we expect to have a
* meaningful state. The ovpn_socket lifecycle would
* prevent it otherwise.
*/
if (WARN(!ovpn_sock || !ovpn_sock->peer,
"ovpn: null state in ovpn_tcp_poll!")) {
rcu_read_unlock();
return 0;
}
if (ovpn_peer_hold(ovpn_sock->peer)) {
peer = ovpn_sock->peer;
queue = &peer->tcp.user_queue;
}
rcu_read_unlock();
mask = datagram_poll_queue(file, sock, wait, queue);
if (peer)
ovpn_peer_put(peer);
return mask;
}

View File

@@ -4204,6 +4204,9 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk,
struct sk_buff_head *sk_queue,
unsigned int flags, int *off, int *err);
struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags, int *err);
__poll_t datagram_poll_queue(struct file *file, struct socket *sock,
struct poll_table_struct *wait,
struct sk_buff_head *rcv_queue);
__poll_t datagram_poll(struct file *file, struct socket *sock,
struct poll_table_struct *wait);
int skb_copy_datagram_iter(const struct sk_buff *from, int offset,

View File

@@ -920,21 +920,22 @@ fault:
EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg);
/**
* datagram_poll - generic datagram poll
* datagram_poll_queue - same as datagram_poll, but on a specific receive
* queue
* @file: file struct
* @sock: socket
* @wait: poll table
* @rcv_queue: receive queue to poll
*
* Datagram poll: Again totally generic. This also handles
* sequenced packet sockets providing the socket receive queue
* is only ever holding data ready to receive.
* Performs polling on the given receive queue, handling shutdown, error,
* and connection state. This is useful for protocols that deliver
* userspace-bound packets through a custom queue instead of
* sk->sk_receive_queue.
*
* Note: when you *don't* use this routine for this protocol,
* and you use a different write policy from sock_writeable()
* then please supply your own write_space callback.
* Return: poll bitmask indicating the socket's current state
*/
__poll_t datagram_poll(struct file *file, struct socket *sock,
poll_table *wait)
__poll_t datagram_poll_queue(struct file *file, struct socket *sock,
poll_table *wait, struct sk_buff_head *rcv_queue)
{
struct sock *sk = sock->sk;
__poll_t mask;
@@ -956,7 +957,7 @@ __poll_t datagram_poll(struct file *file, struct socket *sock,
mask |= EPOLLHUP;
/* readable? */
if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
if (!skb_queue_empty_lockless(rcv_queue))
mask |= EPOLLIN | EPOLLRDNORM;
/* Connection-based need to check for termination and startup */
@@ -978,4 +979,27 @@ __poll_t datagram_poll(struct file *file, struct socket *sock,
return mask;
}
EXPORT_SYMBOL(datagram_poll_queue);
/**
* datagram_poll - generic datagram poll
* @file: file struct
* @sock: socket
* @wait: poll table
*
* Datagram poll: Again totally generic. This also handles
* sequenced packet sockets providing the socket receive queue
* is only ever holding data ready to receive.
*
* Note: when you *don't* use this routine for this protocol,
* and you use a different write policy from sock_writeable()
* then please supply your own write_space callback.
*
* Return: poll bitmask indicating the socket's current state
*/
__poll_t datagram_poll(struct file *file, struct socket *sock, poll_table *wait)
{
return datagram_poll_queue(file, sock, wait,
&sock->sk->sk_receive_queue);
}
EXPORT_SYMBOL(datagram_poll);

View File

@@ -555,14 +555,10 @@ static void espintcp_close(struct sock *sk, long timeout)
static __poll_t espintcp_poll(struct file *file, struct socket *sock,
poll_table *wait)
{
__poll_t mask = datagram_poll(file, sock, wait);
struct sock *sk = sock->sk;
struct espintcp_ctx *ctx = espintcp_getctx(sk);
if (!skb_queue_empty(&ctx->ike_queue))
mask |= EPOLLIN | EPOLLRDNORM;
return mask;
return datagram_poll_queue(file, sock, wait, &ctx->ike_queue);
}
static void build_protos(struct proto *espintcp_prot,