sunrpc: allocate a separate bvec array for socket sends

svc_tcp_sendmsg() calls xdr_buf_to_bvec() with the second slot of
rq_bvec as the start, but doesn't reduce the array length by one, which
could lead to an array overrun. Also, rq_bvec is always rq_maxpages in
length, which can be too short in some cases, since the TCP record
marker consumes a slot.

Fix both problems by adding a separate bvec array to the svc_sock that
is specifically for sending. For TCP, make this array one slot longer
than rq_maxpages, to account for the record marker. For UDP, only
allocate as large an array as we need since it's limited to 64k of
payload.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: NeilBrown <neil@brown.name>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
This commit is contained in:
Jeff Layton
2025-10-13 09:54:53 -04:00
committed by Chuck Lever
parent ebd3330d1c
commit 6b3b697d65
2 changed files with 51 additions and 7 deletions

View File

@@ -26,6 +26,9 @@ struct svc_sock {
void (*sk_odata)(struct sock *);
void (*sk_owspace)(struct sock *);
/* For sends (protected by xpt_mutex) */
struct bio_vec *sk_bvec;
/* private TCP part */
/* On-the-wire fragment header: */
__be32 sk_marker;

View File

@@ -68,6 +68,17 @@
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
/*
* For UDP:
* 1 for header page
* enough pages for RPCSVC_MAXPAYLOAD_UDP
* 1 in case payload is not aligned
* 1 for tail page
*/
enum {
SUNRPC_MAX_UDP_SENDPAGES = 1 + RPCSVC_MAXPAYLOAD_UDP / PAGE_SIZE + 1 + 1
};
/* To-do: to avoid tying up an nfsd thread while waiting for a
* handshake request, the request could instead be deferred.
*/
@@ -740,14 +751,14 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
if (svc_xprt_is_dead(xprt))
goto out_notconn;
count = xdr_buf_to_bvec(rqstp->rq_bvec, rqstp->rq_maxpages, xdr);
count = xdr_buf_to_bvec(svsk->sk_bvec, SUNRPC_MAX_UDP_SENDPAGES, xdr);
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, svsk->sk_bvec,
count, rqstp->rq_res.len);
err = sock_sendmsg(svsk->sk_sock, &msg);
if (err == -ECONNREFUSED) {
/* ICMP error on earlier request. */
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, svsk->sk_bvec,
count, rqstp->rq_res.len);
err = sock_sendmsg(svsk->sk_sock, &msg);
}
@@ -1236,19 +1247,19 @@ static int svc_tcp_sendmsg(struct svc_sock *svsk, struct svc_rqst *rqstp,
int ret;
/* The stream record marker is copied into a temporary page
* fragment buffer so that it can be included in rq_bvec.
* fragment buffer so that it can be included in sk_bvec.
*/
buf = page_frag_alloc(&svsk->sk_frag_cache, sizeof(marker),
GFP_KERNEL);
if (!buf)
return -ENOMEM;
memcpy(buf, &marker, sizeof(marker));
bvec_set_virt(rqstp->rq_bvec, buf, sizeof(marker));
bvec_set_virt(svsk->sk_bvec, buf, sizeof(marker));
count = xdr_buf_to_bvec(rqstp->rq_bvec + 1, rqstp->rq_maxpages,
count = xdr_buf_to_bvec(svsk->sk_bvec + 1, rqstp->rq_maxpages,
&rqstp->rq_res);
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, svsk->sk_bvec,
1 + count, sizeof(marker) + rqstp->rq_res.len);
ret = sock_sendmsg(svsk->sk_sock, &msg);
page_frag_free(buf);
@@ -1393,6 +1404,20 @@ void svc_sock_update_bufs(struct svc_serv *serv)
spin_unlock_bh(&serv->sv_lock);
}
static int svc_sock_sendpages(struct svc_serv *serv, struct socket *sock, int flags)
{
switch (sock->type) {
case SOCK_STREAM:
/* +1 for TCP record marker */
if (flags & SVC_SOCK_TEMPORARY)
return svc_serv_maxpages(serv) + 1;
return 0;
case SOCK_DGRAM:
return SUNRPC_MAX_UDP_SENDPAGES;
}
return -EINVAL;
}
/*
* Initialize socket for RPC use and create svc_sock struct
*/
@@ -1403,12 +1428,26 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
struct svc_sock *svsk;
struct sock *inet;
int pmap_register = !(flags & SVC_SOCK_ANONYMOUS);
int sendpages;
unsigned long pages;
sendpages = svc_sock_sendpages(serv, sock, flags);
if (sendpages < 0)
return ERR_PTR(sendpages);
pages = svc_serv_maxpages(serv);
svsk = kzalloc(struct_size(svsk, sk_pages, pages), GFP_KERNEL);
if (!svsk)
return ERR_PTR(-ENOMEM);
if (sendpages) {
svsk->sk_bvec = kcalloc(sendpages, sizeof(*svsk->sk_bvec), GFP_KERNEL);
if (!svsk->sk_bvec) {
kfree(svsk);
return ERR_PTR(-ENOMEM);
}
}
svsk->sk_maxpages = pages;
inet = sock->sk;
@@ -1420,6 +1459,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
inet->sk_protocol,
ntohs(inet_sk(inet)->inet_sport));
if (err < 0) {
kfree(svsk->sk_bvec);
kfree(svsk);
return ERR_PTR(err);
}
@@ -1637,5 +1677,6 @@ static void svc_sock_free(struct svc_xprt *xprt)
sock_release(sock);
page_frag_cache_drain(&svsk->sk_frag_cache);
kfree(svsk->sk_bvec);
kfree(svsk);
}