liveupdate: luo_session: add ioctls for file preservation

Introducing the userspace interface and internal logic required to manage
the lifecycle of file descriptors within a session.  Previously, a session
was merely a container; this change makes it a functional management unit.

The following capabilities are added:

A new set of ioctl commands are added, which operate on the file
descriptor returned by CREATE_SESSION. This allows userspace to:
- LIVEUPDATE_SESSION_PRESERVE_FD: Add a file descriptor to a session
  to be preserved across the live update.
- LIVEUPDATE_SESSION_RETRIEVE_FD: Retrieve a preserved file in the
  new kernel using its unique token.
- LIVEUPDATE_SESSION_FINISH: finish session

The session's .release handler is enhanced to be state-aware.  When a
session's file descriptor is closed, it correctly unpreserves the session
based on its current state before freeing all associated file resources.

Link: https://lkml.kernel.org/r/20251125165850.3389713-8-pasha.tatashin@soleen.com
Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Tested-by: David Matlack <dmatlack@google.com>
Cc: Aleksander Lobakin <aleksander.lobakin@intel.com>
Cc: Alexander Graf <graf@amazon.com>
Cc: Alice Ryhl <aliceryhl@google.com>
Cc: Andriy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: anish kumar <yesanishhere@gmail.com>
Cc: Anna Schumaker <anna.schumaker@oracle.com>
Cc: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Borislav Betkov <bp@alien8.de>
Cc: Chanwoo Choi <cw00.choi@samsung.com>
Cc: Chen Ridong <chenridong@huawei.com>
Cc: Chris Li <chrisl@kernel.org>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Daniel Wagner <wagi@kernel.org>
Cc: Danilo Krummrich <dakr@kernel.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: David Jeffery <djeffery@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guixin Liu <kanie@linux.alibaba.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Ira Weiny <ira.weiny@intel.com>
Cc: Jann Horn <jannh@google.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Joanthan Cameron <Jonathan.Cameron@huawei.com>
Cc: Joel Granados <joel.granados@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Lennart Poettering <lennart@poettering.net>
Cc: Leon Romanovsky <leon@kernel.org>
Cc: Leon Romanovsky <leonro@nvidia.com>
Cc: Lukas Wunner <lukas@wunner.de>
Cc: Marc Rutland <mark.rutland@arm.com>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Matthew Maurer <mmaurer@google.com>
Cc: Miguel Ojeda <ojeda@kernel.org>
Cc: Myugnjoo Ham <myungjoo.ham@samsung.com>
Cc: Parav Pandit <parav@nvidia.com>
Cc: Pratyush Yadav <ptyadav@amazon.de>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Saeed Mahameed <saeedm@nvidia.com>
Cc: Samiullah Khawaja <skhawaja@google.com>
Cc: Song Liu <song@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Stuart Hayes <stuart.w.hayes@gmail.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Thomas Weißschuh <linux@weissschuh.net>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: William Tu <witu@nvidia.com>
Cc: Yoann Congal <yoann.congal@smile.fr>
Cc: Zhu Yanjun <yanjun.zhu@linux.dev>
Cc: Zijun Hu <quic_zijuhu@quicinc.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
Pasha Tatashin
2025-11-25 11:58:37 -05:00
committed by Andrew Morton
parent 7c722a7f44
commit 16cec0d265
2 changed files with 288 additions and 2 deletions

View File

@@ -53,6 +53,14 @@ enum {
LIVEUPDATE_CMD_RETRIEVE_SESSION = 0x01,
};
/* ioctl commands for session file descriptors */
enum {
LIVEUPDATE_CMD_SESSION_BASE = 0x40,
LIVEUPDATE_CMD_SESSION_PRESERVE_FD = LIVEUPDATE_CMD_SESSION_BASE,
LIVEUPDATE_CMD_SESSION_RETRIEVE_FD = 0x41,
LIVEUPDATE_CMD_SESSION_FINISH = 0x42,
};
/**
* struct liveupdate_ioctl_create_session - ioctl(LIVEUPDATE_IOCTL_CREATE_SESSION)
* @size: Input; sizeof(struct liveupdate_ioctl_create_session)
@@ -110,4 +118,99 @@ struct liveupdate_ioctl_retrieve_session {
#define LIVEUPDATE_IOCTL_RETRIEVE_SESSION \
_IO(LIVEUPDATE_IOCTL_TYPE, LIVEUPDATE_CMD_RETRIEVE_SESSION)
/* Session specific IOCTLs */
/**
* struct liveupdate_session_preserve_fd - ioctl(LIVEUPDATE_SESSION_PRESERVE_FD)
* @size: Input; sizeof(struct liveupdate_session_preserve_fd)
* @fd: Input; The user-space file descriptor to be preserved.
* @token: Input; An opaque, unique token for preserved resource.
*
* Holds parameters for preserving a file descriptor.
*
* User sets the @fd field identifying the file descriptor to preserve
* (e.g., memfd, kvm, iommufd, VFIO). The kernel validates if this FD type
* and its dependencies are supported for preservation. If validation passes,
* the kernel marks the FD internally and *initiates the process* of preparing
* its state for saving. The actual snapshotting of the state typically occurs
* during the subsequent %LIVEUPDATE_IOCTL_PREPARE execution phase, though
* some finalization might occur during freeze.
* On successful validation and initiation, the kernel uses the @token
* field with an opaque identifier representing the resource being preserved.
* This token confirms the FD is targeted for preservation and is required for
* the subsequent %LIVEUPDATE_SESSION_RETRIEVE_FD call after the live update.
*
* Return: 0 on success (validation passed, preservation initiated), negative
* error code on failure (e.g., unsupported FD type, dependency issue,
* validation failed).
*/
struct liveupdate_session_preserve_fd {
__u32 size;
__s32 fd;
__aligned_u64 token;
};
#define LIVEUPDATE_SESSION_PRESERVE_FD \
_IO(LIVEUPDATE_IOCTL_TYPE, LIVEUPDATE_CMD_SESSION_PRESERVE_FD)
/**
* struct liveupdate_session_retrieve_fd - ioctl(LIVEUPDATE_SESSION_RETRIEVE_FD)
* @size: Input; sizeof(struct liveupdate_session_retrieve_fd)
* @fd: Output; The new file descriptor representing the fully restored
* kernel resource.
* @token: Input; An opaque, token that was used to preserve the resource.
*
* Retrieve a previously preserved file descriptor.
*
* User sets the @token field to the value obtained from a successful
* %LIVEUPDATE_IOCTL_FD_PRESERVE call before the live update. On success,
* the kernel restores the state (saved during the PREPARE/FREEZE phases)
* associated with the token and populates the @fd field with a new file
* descriptor referencing the restored resource in the current (new) kernel.
* This operation must be performed *before* signaling completion via
* %LIVEUPDATE_IOCTL_FINISH.
*
* Return: 0 on success, negative error code on failure (e.g., invalid token).
*/
struct liveupdate_session_retrieve_fd {
__u32 size;
__s32 fd;
__aligned_u64 token;
};
#define LIVEUPDATE_SESSION_RETRIEVE_FD \
_IO(LIVEUPDATE_IOCTL_TYPE, LIVEUPDATE_CMD_SESSION_RETRIEVE_FD)
/**
* struct liveupdate_session_finish - ioctl(LIVEUPDATE_SESSION_FINISH)
* @size: Input; sizeof(struct liveupdate_session_finish)
* @reserved: Input; Must be zero. Reserved for future use.
*
* Signals the completion of the restoration process for a retrieved session.
* This is the final operation that should be performed on a session file
* descriptor after a live update.
*
* This ioctl must be called once all required file descriptors for the session
* have been successfully retrieved (using %LIVEUPDATE_SESSION_RETRIEVE_FD) and
* are fully restored from the userspace and kernel perspective.
*
* Upon success, the kernel releases its ownership of the preserved resources
* associated with this session. This allows internal resources to be freed,
* typically by decrementing reference counts on the underlying preserved
* objects.
*
* If this operation fails, the resources remain preserved in memory. Userspace
* may attempt to call finish again. The resources will otherwise be reset
* during the next live update cycle.
*
* Return: 0 on success, negative error code on failure.
*/
struct liveupdate_session_finish {
__u32 size;
__u32 reserved;
};
#define LIVEUPDATE_SESSION_FINISH \
_IO(LIVEUPDATE_IOCTL_TYPE, LIVEUPDATE_CMD_SESSION_FINISH)
#endif /* _UAPI_LIVEUPDATE_H */

View File

@@ -125,6 +125,8 @@ static struct luo_session *luo_session_alloc(const char *name)
return ERR_PTR(-ENOMEM);
strscpy(session->name, name, sizeof(session->name));
INIT_LIST_HEAD(&session->file_set.files_list);
luo_file_set_init(&session->file_set);
INIT_LIST_HEAD(&session->list);
mutex_init(&session->mutex);
@@ -133,6 +135,7 @@ static struct luo_session *luo_session_alloc(const char *name)
static void luo_session_free(struct luo_session *session)
{
luo_file_set_destroy(&session->file_set);
mutex_destroy(&session->mutex);
kfree(session);
}
@@ -177,16 +180,46 @@ static void luo_session_remove(struct luo_session_header *sh,
sh->count--;
}
static int luo_session_finish_one(struct luo_session *session)
{
guard(mutex)(&session->mutex);
return luo_file_finish(&session->file_set);
}
static void luo_session_unfreeze_one(struct luo_session *session,
struct luo_session_ser *ser)
{
guard(mutex)(&session->mutex);
luo_file_unfreeze(&session->file_set, &ser->file_set_ser);
}
static int luo_session_freeze_one(struct luo_session *session,
struct luo_session_ser *ser)
{
guard(mutex)(&session->mutex);
return luo_file_freeze(&session->file_set, &ser->file_set_ser);
}
static int luo_session_release(struct inode *inodep, struct file *filep)
{
struct luo_session *session = filep->private_data;
struct luo_session_header *sh;
/* If retrieved is set, it means this session is from incoming list */
if (session->retrieved)
if (session->retrieved) {
int err = luo_session_finish_one(session);
if (err) {
pr_warn("Unable to finish session [%s] on release\n",
session->name);
return err;
}
sh = &luo_session_global.incoming;
else
} else {
scoped_guard(mutex, &session->mutex)
luo_file_unpreserve_files(&session->file_set);
sh = &luo_session_global.outgoing;
}
luo_session_remove(sh, session);
luo_session_free(session);
@@ -194,9 +227,140 @@ static int luo_session_release(struct inode *inodep, struct file *filep)
return 0;
}
static int luo_session_preserve_fd(struct luo_session *session,
struct luo_ucmd *ucmd)
{
struct liveupdate_session_preserve_fd *argp = ucmd->cmd;
int err;
guard(mutex)(&session->mutex);
err = luo_preserve_file(&session->file_set, argp->token, argp->fd);
if (err)
return err;
err = luo_ucmd_respond(ucmd, sizeof(*argp));
if (err)
pr_warn("The file was successfully preserved, but response to user failed\n");
return err;
}
static int luo_session_retrieve_fd(struct luo_session *session,
struct luo_ucmd *ucmd)
{
struct liveupdate_session_retrieve_fd *argp = ucmd->cmd;
struct file *file;
int err;
argp->fd = get_unused_fd_flags(O_CLOEXEC);
if (argp->fd < 0)
return argp->fd;
guard(mutex)(&session->mutex);
err = luo_retrieve_file(&session->file_set, argp->token, &file);
if (err < 0)
goto err_put_fd;
err = luo_ucmd_respond(ucmd, sizeof(*argp));
if (err)
goto err_put_file;
fd_install(argp->fd, file);
return 0;
err_put_file:
fput(file);
err_put_fd:
put_unused_fd(argp->fd);
return err;
}
static int luo_session_finish(struct luo_session *session,
struct luo_ucmd *ucmd)
{
struct liveupdate_session_finish *argp = ucmd->cmd;
int err = luo_session_finish_one(session);
if (err)
return err;
return luo_ucmd_respond(ucmd, sizeof(*argp));
}
union ucmd_buffer {
struct liveupdate_session_finish finish;
struct liveupdate_session_preserve_fd preserve;
struct liveupdate_session_retrieve_fd retrieve;
};
struct luo_ioctl_op {
unsigned int size;
unsigned int min_size;
unsigned int ioctl_num;
int (*execute)(struct luo_session *session, struct luo_ucmd *ucmd);
};
#define IOCTL_OP(_ioctl, _fn, _struct, _last) \
[_IOC_NR(_ioctl) - LIVEUPDATE_CMD_SESSION_BASE] = { \
.size = sizeof(_struct) + \
BUILD_BUG_ON_ZERO(sizeof(union ucmd_buffer) < \
sizeof(_struct)), \
.min_size = offsetofend(_struct, _last), \
.ioctl_num = _ioctl, \
.execute = _fn, \
}
static const struct luo_ioctl_op luo_session_ioctl_ops[] = {
IOCTL_OP(LIVEUPDATE_SESSION_FINISH, luo_session_finish,
struct liveupdate_session_finish, reserved),
IOCTL_OP(LIVEUPDATE_SESSION_PRESERVE_FD, luo_session_preserve_fd,
struct liveupdate_session_preserve_fd, token),
IOCTL_OP(LIVEUPDATE_SESSION_RETRIEVE_FD, luo_session_retrieve_fd,
struct liveupdate_session_retrieve_fd, token),
};
static long luo_session_ioctl(struct file *filep, unsigned int cmd,
unsigned long arg)
{
struct luo_session *session = filep->private_data;
const struct luo_ioctl_op *op;
struct luo_ucmd ucmd = {};
union ucmd_buffer buf;
unsigned int nr;
int ret;
nr = _IOC_NR(cmd);
if (nr < LIVEUPDATE_CMD_SESSION_BASE || (nr - LIVEUPDATE_CMD_SESSION_BASE) >=
ARRAY_SIZE(luo_session_ioctl_ops)) {
return -EINVAL;
}
ucmd.ubuffer = (void __user *)arg;
ret = get_user(ucmd.user_size, (u32 __user *)ucmd.ubuffer);
if (ret)
return ret;
op = &luo_session_ioctl_ops[nr - LIVEUPDATE_CMD_SESSION_BASE];
if (op->ioctl_num != cmd)
return -ENOIOCTLCMD;
if (ucmd.user_size < op->min_size)
return -EINVAL;
ucmd.cmd = &buf;
ret = copy_struct_from_user(ucmd.cmd, op->size, ucmd.ubuffer,
ucmd.user_size);
if (ret)
return ret;
return op->execute(session, &ucmd);
}
static const struct file_operations luo_session_fops = {
.owner = THIS_MODULE,
.release = luo_session_release,
.unlocked_ioctl = luo_session_ioctl,
};
/* Create a "struct file" for session */
@@ -392,6 +556,11 @@ int luo_session_deserialize(void)
luo_session_free(session);
return err;
}
scoped_guard(mutex, &session->mutex) {
luo_file_deserialize(&session->file_set,
&sh->ser[i].file_set_ser);
}
}
kho_restore_free(sh->header_ser);
@@ -406,9 +575,14 @@ int luo_session_serialize(void)
struct luo_session_header *sh = &luo_session_global.outgoing;
struct luo_session *session;
int i = 0;
int err;
guard(rwsem_write)(&sh->rwsem);
list_for_each_entry(session, &sh->list, list) {
err = luo_session_freeze_one(session, &sh->ser[i]);
if (err)
goto err_undo;
strscpy(sh->ser[i].name, session->name,
sizeof(sh->ser[i].name));
i++;
@@ -416,6 +590,15 @@ int luo_session_serialize(void)
sh->header_ser->count = sh->count;
return 0;
err_undo:
list_for_each_entry_continue_reverse(session, &sh->list, list) {
i--;
luo_session_unfreeze_one(session, &sh->ser[i]);
memset(sh->ser[i].name, 0, sizeof(sh->ser[i].name));
}
return err;
}
/**