fuse: {io-uring} Handle SQEs - register commands

This adds basic support for ring SQEs (with opcode=IORING_OP_URING_CMD).
For now only FUSE_IO_URING_CMD_REGISTER is handled to register queue
entries.

Signed-off-by: Bernd Schubert <bschubert@ddn.com>
Reviewed-by: Pavel Begunkov <asml.silence@gmail.com> # io_uring
Reviewed-by: Luis Henriques <luis@igalia.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
This commit is contained in:
Bernd Schubert
2025-01-20 02:28:59 +01:00
committed by Miklos Szeredi
parent 7ccd86ba3a
commit 24fe962c86
7 changed files with 542 additions and 1 deletions

View File

@@ -63,3 +63,15 @@ config FUSE_PASSTHROUGH
to be performed directly on a backing file.
If you want to allow passthrough operations, answer Y.
config FUSE_IO_URING
bool "FUSE communication over io-uring"
default y
depends on FUSE_FS
depends on IO_URING
help
This allows sending FUSE requests over the io-uring interface and
also adds request core affinity.
If you want to allow fuse server/client communication through io-uring,
answer Y

View File

@@ -15,5 +15,6 @@ fuse-y += iomode.o
fuse-$(CONFIG_FUSE_DAX) += dax.o
fuse-$(CONFIG_FUSE_PASSTHROUGH) += passthrough.o
fuse-$(CONFIG_SYSCTL) += sysctl.o
fuse-$(CONFIG_FUSE_IO_URING) += dev_uring.o
virtiofs-y := virtio_fs.o

326
fs/fuse/dev_uring.c Normal file
View File

@@ -0,0 +1,326 @@
// SPDX-License-Identifier: GPL-2.0
/*
* FUSE: Filesystem in Userspace
* Copyright (c) 2023-2024 DataDirect Networks.
*/
#include "fuse_i.h"
#include "dev_uring_i.h"
#include "fuse_dev_i.h"
#include <linux/fs.h>
#include <linux/io_uring/cmd.h>
static bool __read_mostly enable_uring;
module_param(enable_uring, bool, 0644);
MODULE_PARM_DESC(enable_uring,
"Enable userspace communication through io-uring");
#define FUSE_URING_IOV_SEGS 2 /* header and payload */
bool fuse_uring_enabled(void)
{
return enable_uring;
}
void fuse_uring_destruct(struct fuse_conn *fc)
{
struct fuse_ring *ring = fc->ring;
int qid;
if (!ring)
return;
for (qid = 0; qid < ring->nr_queues; qid++) {
struct fuse_ring_queue *queue = ring->queues[qid];
if (!queue)
continue;
WARN_ON(!list_empty(&queue->ent_avail_queue));
WARN_ON(!list_empty(&queue->ent_commit_queue));
kfree(queue);
ring->queues[qid] = NULL;
}
kfree(ring->queues);
kfree(ring);
fc->ring = NULL;
}
/*
* Basic ring setup for this connection based on the provided configuration
*/
static struct fuse_ring *fuse_uring_create(struct fuse_conn *fc)
{
struct fuse_ring *ring;
size_t nr_queues = num_possible_cpus();
struct fuse_ring *res = NULL;
size_t max_payload_size;
ring = kzalloc(sizeof(*fc->ring), GFP_KERNEL_ACCOUNT);
if (!ring)
return NULL;
ring->queues = kcalloc(nr_queues, sizeof(struct fuse_ring_queue *),
GFP_KERNEL_ACCOUNT);
if (!ring->queues)
goto out_err;
max_payload_size = max(FUSE_MIN_READ_BUFFER, fc->max_write);
max_payload_size = max(max_payload_size, fc->max_pages * PAGE_SIZE);
spin_lock(&fc->lock);
if (fc->ring) {
/* race, another thread created the ring in the meantime */
spin_unlock(&fc->lock);
res = fc->ring;
goto out_err;
}
fc->ring = ring;
ring->nr_queues = nr_queues;
ring->fc = fc;
ring->max_payload_sz = max_payload_size;
spin_unlock(&fc->lock);
return ring;
out_err:
kfree(ring->queues);
kfree(ring);
return res;
}
static struct fuse_ring_queue *fuse_uring_create_queue(struct fuse_ring *ring,
int qid)
{
struct fuse_conn *fc = ring->fc;
struct fuse_ring_queue *queue;
queue = kzalloc(sizeof(*queue), GFP_KERNEL_ACCOUNT);
if (!queue)
return NULL;
queue->qid = qid;
queue->ring = ring;
spin_lock_init(&queue->lock);
INIT_LIST_HEAD(&queue->ent_avail_queue);
INIT_LIST_HEAD(&queue->ent_commit_queue);
spin_lock(&fc->lock);
if (ring->queues[qid]) {
spin_unlock(&fc->lock);
kfree(queue);
return ring->queues[qid];
}
/*
* write_once and lock as the caller mostly doesn't take the lock at all
*/
WRITE_ONCE(ring->queues[qid], queue);
spin_unlock(&fc->lock);
return queue;
}
/*
* Make a ring entry available for fuse_req assignment
*/
static void fuse_uring_ent_avail(struct fuse_ring_ent *ent,
struct fuse_ring_queue *queue)
{
WARN_ON_ONCE(!ent->cmd);
list_move(&ent->list, &queue->ent_avail_queue);
ent->state = FRRS_AVAILABLE;
}
/*
* fuse_uring_req_fetch command handling
*/
static void fuse_uring_do_register(struct fuse_ring_ent *ent,
struct io_uring_cmd *cmd,
unsigned int issue_flags)
{
struct fuse_ring_queue *queue = ent->queue;
spin_lock(&queue->lock);
ent->cmd = cmd;
fuse_uring_ent_avail(ent, queue);
spin_unlock(&queue->lock);
}
/*
* sqe->addr is a ptr to an iovec array, iov[0] has the headers, iov[1]
* the payload
*/
static int fuse_uring_get_iovec_from_sqe(const struct io_uring_sqe *sqe,
struct iovec iov[FUSE_URING_IOV_SEGS])
{
struct iovec __user *uiov = u64_to_user_ptr(READ_ONCE(sqe->addr));
struct iov_iter iter;
ssize_t ret;
if (sqe->len != FUSE_URING_IOV_SEGS)
return -EINVAL;
/*
* Direction for buffer access will actually be READ and WRITE,
* using write for the import should include READ access as well.
*/
ret = import_iovec(WRITE, uiov, FUSE_URING_IOV_SEGS,
FUSE_URING_IOV_SEGS, &iov, &iter);
if (ret < 0)
return ret;
return 0;
}
static struct fuse_ring_ent *
fuse_uring_create_ring_ent(struct io_uring_cmd *cmd,
struct fuse_ring_queue *queue)
{
struct fuse_ring *ring = queue->ring;
struct fuse_ring_ent *ent;
size_t payload_size;
struct iovec iov[FUSE_URING_IOV_SEGS];
int err;
err = fuse_uring_get_iovec_from_sqe(cmd->sqe, iov);
if (err) {
pr_info_ratelimited("Failed to get iovec from sqe, err=%d\n",
err);
return ERR_PTR(err);
}
err = -EINVAL;
if (iov[0].iov_len < sizeof(struct fuse_uring_req_header)) {
pr_info_ratelimited("Invalid header len %zu\n", iov[0].iov_len);
return ERR_PTR(err);
}
payload_size = iov[1].iov_len;
if (payload_size < ring->max_payload_sz) {
pr_info_ratelimited("Invalid req payload len %zu\n",
payload_size);
return ERR_PTR(err);
}
err = -ENOMEM;
ent = kzalloc(sizeof(*ent), GFP_KERNEL_ACCOUNT);
if (!ent)
return ERR_PTR(err);
INIT_LIST_HEAD(&ent->list);
ent->queue = queue;
ent->headers = iov[0].iov_base;
ent->payload = iov[1].iov_base;
return ent;
}
/*
* Register header and payload buffer with the kernel and puts the
* entry as "ready to get fuse requests" on the queue
*/
static int fuse_uring_register(struct io_uring_cmd *cmd,
unsigned int issue_flags, struct fuse_conn *fc)
{
const struct fuse_uring_cmd_req *cmd_req = io_uring_sqe_cmd(cmd->sqe);
struct fuse_ring *ring = fc->ring;
struct fuse_ring_queue *queue;
struct fuse_ring_ent *ent;
int err;
unsigned int qid = READ_ONCE(cmd_req->qid);
err = -ENOMEM;
if (!ring) {
ring = fuse_uring_create(fc);
if (!ring)
return err;
}
if (qid >= ring->nr_queues) {
pr_info_ratelimited("fuse: Invalid ring qid %u\n", qid);
return -EINVAL;
}
queue = ring->queues[qid];
if (!queue) {
queue = fuse_uring_create_queue(ring, qid);
if (!queue)
return err;
}
/*
* The created queue above does not need to be destructed in
* case of entry errors below, will be done at ring destruction time.
*/
ent = fuse_uring_create_ring_ent(cmd, queue);
if (IS_ERR(ent))
return PTR_ERR(ent);
fuse_uring_do_register(ent, cmd, issue_flags);
return 0;
}
/*
* Entry function from io_uring to handle the given passthrough command
* (op code IORING_OP_URING_CMD)
*/
int __maybe_unused fuse_uring_cmd(struct io_uring_cmd *cmd,
unsigned int issue_flags)
{
struct fuse_dev *fud;
struct fuse_conn *fc;
u32 cmd_op = cmd->cmd_op;
int err;
if (!enable_uring) {
pr_info_ratelimited("fuse-io-uring is disabled\n");
return -EOPNOTSUPP;
}
/* This extra SQE size holds struct fuse_uring_cmd_req */
if (!(issue_flags & IO_URING_F_SQE128))
return -EINVAL;
fud = fuse_get_dev(cmd->file);
if (!fud) {
pr_info_ratelimited("No fuse device found\n");
return -ENOTCONN;
}
fc = fud->fc;
if (fc->aborted)
return -ECONNABORTED;
if (!fc->connected)
return -ENOTCONN;
/*
* fuse_uring_register() needs the ring to be initialized,
* we need to know the max payload size
*/
if (!fc->initialized)
return -EAGAIN;
switch (cmd_op) {
case FUSE_IO_URING_CMD_REGISTER:
err = fuse_uring_register(cmd, issue_flags, fc);
if (err) {
pr_info_once("FUSE_IO_URING_CMD_REGISTER failed err=%d\n",
err);
return err;
}
break;
default:
return -EINVAL;
}
return -EIOCBQUEUED;
}

113
fs/fuse/dev_uring_i.h Normal file
View File

@@ -0,0 +1,113 @@
/* SPDX-License-Identifier: GPL-2.0
*
* FUSE: Filesystem in Userspace
* Copyright (c) 2023-2024 DataDirect Networks.
*/
#ifndef _FS_FUSE_DEV_URING_I_H
#define _FS_FUSE_DEV_URING_I_H
#include "fuse_i.h"
#ifdef CONFIG_FUSE_IO_URING
enum fuse_ring_req_state {
FRRS_INVALID = 0,
/* The ring entry received from userspace and it is being processed */
FRRS_COMMIT,
/* The ring entry is waiting for new fuse requests */
FRRS_AVAILABLE,
/* The ring entry is in or on the way to user space */
FRRS_USERSPACE,
};
/** A fuse ring entry, part of the ring queue */
struct fuse_ring_ent {
/* userspace buffer */
struct fuse_uring_req_header __user *headers;
void __user *payload;
/* the ring queue that owns the request */
struct fuse_ring_queue *queue;
/* fields below are protected by queue->lock */
struct io_uring_cmd *cmd;
struct list_head list;
enum fuse_ring_req_state state;
struct fuse_req *fuse_req;
};
struct fuse_ring_queue {
/*
* back pointer to the main fuse uring structure that holds this
* queue
*/
struct fuse_ring *ring;
/* queue id, corresponds to the cpu core */
unsigned int qid;
/*
* queue lock, taken when any value in the queue changes _and_ also
* a ring entry state changes.
*/
spinlock_t lock;
/* available ring entries (struct fuse_ring_ent) */
struct list_head ent_avail_queue;
/*
* entries in the process of being committed or in the process
* to be sent to userspace
*/
struct list_head ent_commit_queue;
};
/**
* Describes if uring is for communication and holds alls the data needed
* for uring communication
*/
struct fuse_ring {
/* back pointer */
struct fuse_conn *fc;
/* number of ring queues */
size_t nr_queues;
/* maximum payload/arg size */
size_t max_payload_sz;
struct fuse_ring_queue **queues;
};
bool fuse_uring_enabled(void);
void fuse_uring_destruct(struct fuse_conn *fc);
int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags);
#else /* CONFIG_FUSE_IO_URING */
struct fuse_ring;
static inline void fuse_uring_create(struct fuse_conn *fc)
{
}
static inline void fuse_uring_destruct(struct fuse_conn *fc)
{
}
static inline bool fuse_uring_enabled(void)
{
return false;
}
#endif /* CONFIG_FUSE_IO_URING */
#endif /* _FS_FUSE_DEV_URING_I_H */

View File

@@ -923,6 +923,11 @@ struct fuse_conn {
/** IDR for backing files ids */
struct idr backing_files_map;
#endif
#ifdef CONFIG_FUSE_IO_URING
/** uring connection information*/
struct fuse_ring *ring;
#endif
};
/*

View File

@@ -7,6 +7,7 @@
*/
#include "fuse_i.h"
#include "dev_uring_i.h"
#include <linux/pagemap.h>
#include <linux/slab.h>
@@ -992,6 +993,8 @@ static void delayed_release(struct rcu_head *p)
{
struct fuse_conn *fc = container_of(p, struct fuse_conn, rcu);
fuse_uring_destruct(fc);
put_user_ns(fc->user_ns);
fc->release(fc);
}
@@ -1446,6 +1449,13 @@ void fuse_send_init(struct fuse_mount *fm)
if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
flags |= FUSE_PASSTHROUGH;
/*
* This is just an information flag for fuse server. No need to check
* the reply - server is either sending IORING_OP_URING_CMD or not.
*/
if (fuse_uring_enabled())
flags |= FUSE_OVER_IO_URING;
ia->in.flags = flags;
ia->in.flags2 = flags >> 32;

View File

@@ -220,6 +220,15 @@
*
* 7.41
* - add FUSE_ALLOW_IDMAP
* 7.42
* - Add FUSE_OVER_IO_URING and all other io-uring related flags and data
* structures:
* - struct fuse_uring_ent_in_out
* - struct fuse_uring_req_header
* - struct fuse_uring_cmd_req
* - FUSE_URING_IN_OUT_HEADER_SZ
* - FUSE_URING_OP_IN_OUT_SZ
* - enum fuse_uring_cmd
*/
#ifndef _LINUX_FUSE_H
@@ -255,7 +264,7 @@
#define FUSE_KERNEL_VERSION 7
/** Minor version number of this interface */
#define FUSE_KERNEL_MINOR_VERSION 41
#define FUSE_KERNEL_MINOR_VERSION 42
/** The node ID of the root inode */
#define FUSE_ROOT_ID 1
@@ -425,6 +434,7 @@ struct fuse_file_lock {
* FUSE_HAS_RESEND: kernel supports resending pending requests, and the high bit
* of the request ID indicates resend requests
* FUSE_ALLOW_IDMAP: allow creation of idmapped mounts
* FUSE_OVER_IO_URING: Indicate that client supports io-uring
*/
#define FUSE_ASYNC_READ (1 << 0)
#define FUSE_POSIX_LOCKS (1 << 1)
@@ -471,6 +481,7 @@ struct fuse_file_lock {
/* Obsolete alias for FUSE_DIRECT_IO_ALLOW_MMAP */
#define FUSE_DIRECT_IO_RELAX FUSE_DIRECT_IO_ALLOW_MMAP
#define FUSE_ALLOW_IDMAP (1ULL << 40)
#define FUSE_OVER_IO_URING (1ULL << 41)
/**
* CUSE INIT request/reply flags
@@ -1206,4 +1217,67 @@ struct fuse_supp_groups {
uint32_t groups[];
};
/**
* Size of the ring buffer header
*/
#define FUSE_URING_IN_OUT_HEADER_SZ 128
#define FUSE_URING_OP_IN_OUT_SZ 128
/* Used as part of the fuse_uring_req_header */
struct fuse_uring_ent_in_out {
uint64_t flags;
/*
* commit ID to be used in a reply to a ring request (see also
* struct fuse_uring_cmd_req)
*/
uint64_t commit_id;
/* size of user payload buffer */
uint32_t payload_sz;
uint32_t padding;
uint64_t reserved;
};
/**
* Header for all fuse-io-uring requests
*/
struct fuse_uring_req_header {
/* struct fuse_in_header / struct fuse_out_header */
char in_out[FUSE_URING_IN_OUT_HEADER_SZ];
/* per op code header */
char op_in[FUSE_URING_OP_IN_OUT_SZ];
struct fuse_uring_ent_in_out ring_ent_in_out;
};
/**
* sqe commands to the kernel
*/
enum fuse_uring_cmd {
FUSE_IO_URING_CMD_INVALID = 0,
/* register the request buffer and fetch a fuse request */
FUSE_IO_URING_CMD_REGISTER = 1,
/* commit fuse request result and fetch next request */
FUSE_IO_URING_CMD_COMMIT_AND_FETCH = 2,
};
/**
* In the 80B command area of the SQE.
*/
struct fuse_uring_cmd_req {
uint64_t flags;
/* entry identifier for commits */
uint64_t commit_id;
/* queue the command is for (queue index) */
uint16_t qid;
uint8_t padding[6];
};
#endif /* _LINUX_FUSE_H */