accel/ivpu: Add support for user-managed preemption buffer

Allow user mode drivers to manage preemption buffers, enabling
memory savings by sharing a single buffer across multiple
command queues within the same memory context.

Introduce DRM_IVPU_PARAM_PREEMPT_BUFFER_SIZE to report the required
preemption buffer size as specified by the firmware.

The preemption buffer is now passed from user space as an entry
in the BO list of DRM_IVPU_CMDQ_SUBMIT. The buffer must be
non-mappable and large enough to hold preemption data.

For backward compatibility, the kernel will allocate an internal
preemption buffer if user space does not provide one.

User space can only provide a single preemption buffer,
simplifying the ioctl interface and parameter validation.
A separate secondary preemption buffer is only needed
to save below 4GB address space on 37xx and only if preemption
buffers are not shared.

Signed-off-by: Andrzej Kacprowski <Andrzej.Kacprowski@intel.com>
Reviewed-by: Lizhi Hou <lizhi.hou@amd.com>
Signed-off-by: Karol Wachowski <karol.wachowski@linux.intel.com>
Link: https://lore.kernel.org/r/20250915103437.830086-1-karol.wachowski@linux.intel.com
This commit is contained in:
Andrzej Kacprowski
2025-09-15 12:34:37 +02:00
committed by Karol Wachowski
parent 58b8b085b9
commit 0bf37f45d5
7 changed files with 141 additions and 44 deletions

View File

@@ -200,6 +200,9 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f
case DRM_IVPU_PARAM_CAPABILITIES:
args->value = ivpu_is_capable(vdev, args->index);
break;
case DRM_IVPU_PARAM_PREEMPT_BUFFER_SIZE:
args->value = ivpu_fw_preempt_buf_size(vdev);
break;
default:
ret = -EINVAL;
break;

View File

@@ -26,6 +26,8 @@
#define FW_RUNTIME_MIN_ADDR (FW_GLOBAL_MEM_START)
#define FW_RUNTIME_MAX_ADDR (FW_GLOBAL_MEM_END - FW_SHARED_MEM_SIZE)
#define FW_FILE_IMAGE_OFFSET (VPU_FW_HEADER_SIZE + FW_VERSION_HEADER_SIZE)
#define FW_PREEMPT_BUF_MIN_SIZE SZ_4K
#define FW_PREEMPT_BUF_MAX_SIZE SZ_32M
#define WATCHDOG_MSS_REDIRECT 32
#define WATCHDOG_NCE_REDIRECT 33
@@ -151,6 +153,47 @@ ivpu_fw_sched_mode_select(struct ivpu_device *vdev, const struct vpu_firmware_he
return VPU_SCHEDULING_MODE_HW;
}
static void
ivpu_preemption_config_parse(struct ivpu_device *vdev, const struct vpu_firmware_header *fw_hdr)
{
struct ivpu_fw_info *fw = vdev->fw;
u32 primary_preempt_buf_size, secondary_preempt_buf_size;
if (fw_hdr->preemption_buffer_1_max_size)
primary_preempt_buf_size = fw_hdr->preemption_buffer_1_max_size;
else
primary_preempt_buf_size = fw_hdr->preemption_buffer_1_size;
if (fw_hdr->preemption_buffer_2_max_size)
secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_max_size;
else
secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_size;
ivpu_dbg(vdev, FW_BOOT, "Preemption buffer size, primary: %u, secondary: %u\n",
primary_preempt_buf_size, secondary_preempt_buf_size);
if (primary_preempt_buf_size < FW_PREEMPT_BUF_MIN_SIZE ||
secondary_preempt_buf_size < FW_PREEMPT_BUF_MIN_SIZE) {
ivpu_warn(vdev, "Preemption buffers size too small\n");
return;
}
if (primary_preempt_buf_size > FW_PREEMPT_BUF_MAX_SIZE ||
secondary_preempt_buf_size > FW_PREEMPT_BUF_MAX_SIZE) {
ivpu_warn(vdev, "Preemption buffers size too big\n");
return;
}
if (fw->sched_mode != VPU_SCHEDULING_MODE_HW)
return;
if (ivpu_test_mode & IVPU_TEST_MODE_MIP_DISABLE)
return;
vdev->fw->primary_preempt_buf_size = ALIGN(primary_preempt_buf_size, PAGE_SIZE);
vdev->fw->secondary_preempt_buf_size = ALIGN(secondary_preempt_buf_size, PAGE_SIZE);
}
static int ivpu_fw_parse(struct ivpu_device *vdev)
{
struct ivpu_fw_info *fw = vdev->fw;
@@ -235,17 +278,9 @@ static int ivpu_fw_parse(struct ivpu_device *vdev)
fw->sched_mode = ivpu_fw_sched_mode_select(vdev, fw_hdr);
ivpu_info(vdev, "Scheduler mode: %s\n", fw->sched_mode ? "HW" : "OS");
if (fw_hdr->preemption_buffer_1_max_size)
fw->primary_preempt_buf_size = fw_hdr->preemption_buffer_1_max_size;
else
fw->primary_preempt_buf_size = fw_hdr->preemption_buffer_1_size;
if (fw_hdr->preemption_buffer_2_max_size)
fw->secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_max_size;
else
fw->secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_size;
ivpu_dbg(vdev, FW_BOOT, "Preemption buffer sizes: primary %u, secondary %u\n",
fw->primary_preempt_buf_size, fw->secondary_preempt_buf_size);
ivpu_preemption_config_parse(vdev, fw_hdr);
ivpu_dbg(vdev, FW_BOOT, "Mid-inference preemption %s supported\n",
ivpu_fw_preempt_buf_size(vdev) ? "is" : "is not");
if (fw_hdr->ro_section_start_address && !is_within_range(fw_hdr->ro_section_start_address,
fw_hdr->ro_section_size,

View File

@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2020-2024 Intel Corporation
* Copyright (C) 2020-2025 Intel Corporation
*/
#ifndef __IVPU_FW_H__
@@ -52,4 +52,9 @@ static inline bool ivpu_fw_is_cold_boot(struct ivpu_device *vdev)
return vdev->fw->entry_point == vdev->fw->cold_boot_entry_point;
}
static inline u32 ivpu_fw_preempt_buf_size(struct ivpu_device *vdev)
{
return vdev->fw->primary_preempt_buf_size + vdev->fw->secondary_preempt_buf_size;
}
#endif /* __IVPU_FW_H__ */

View File

@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2020-2023 Intel Corporation
* Copyright (C) 2020-2025 Intel Corporation
*/
#ifndef __IVPU_GEM_H__
#define __IVPU_GEM_H__
@@ -96,4 +96,9 @@ static inline u32 cpu_to_vpu_addr(struct ivpu_bo *bo, void *cpu_addr)
return bo->vpu_addr + (cpu_addr - ivpu_bo_vaddr(bo));
}
static inline bool ivpu_bo_is_mappable(struct ivpu_bo *bo)
{
return bo->flags & DRM_IVPU_BO_MAPPABLE;
}
#endif /* __IVPU_GEM_H__ */

View File

@@ -34,22 +34,20 @@ static void ivpu_cmdq_ring_db(struct ivpu_device *vdev, struct ivpu_cmdq *cmdq)
static int ivpu_preemption_buffers_create(struct ivpu_device *vdev,
struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq)
{
u64 primary_size = ALIGN(vdev->fw->primary_preempt_buf_size, PAGE_SIZE);
u64 secondary_size = ALIGN(vdev->fw->secondary_preempt_buf_size, PAGE_SIZE);
if (vdev->fw->sched_mode != VPU_SCHEDULING_MODE_HW ||
ivpu_test_mode & IVPU_TEST_MODE_MIP_DISABLE)
if (ivpu_fw_preempt_buf_size(vdev) == 0)
return 0;
cmdq->primary_preempt_buf = ivpu_bo_create(vdev, &file_priv->ctx, &vdev->hw->ranges.user,
primary_size, DRM_IVPU_BO_WC);
vdev->fw->primary_preempt_buf_size,
DRM_IVPU_BO_WC);
if (!cmdq->primary_preempt_buf) {
ivpu_err(vdev, "Failed to create primary preemption buffer\n");
return -ENOMEM;
}
cmdq->secondary_preempt_buf = ivpu_bo_create(vdev, &file_priv->ctx, &vdev->hw->ranges.dma,
secondary_size, DRM_IVPU_BO_WC);
vdev->fw->secondary_preempt_buf_size,
DRM_IVPU_BO_WC);
if (!cmdq->secondary_preempt_buf) {
ivpu_err(vdev, "Failed to create secondary preemption buffer\n");
goto err_free_primary;
@@ -66,20 +64,39 @@ err_free_primary:
static void ivpu_preemption_buffers_free(struct ivpu_device *vdev,
struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq)
{
if (vdev->fw->sched_mode != VPU_SCHEDULING_MODE_HW)
return;
if (cmdq->primary_preempt_buf)
ivpu_bo_free(cmdq->primary_preempt_buf);
if (cmdq->secondary_preempt_buf)
ivpu_bo_free(cmdq->secondary_preempt_buf);
}
static int ivpu_preemption_job_init(struct ivpu_device *vdev, struct ivpu_file_priv *file_priv,
struct ivpu_cmdq *cmdq, struct ivpu_job *job)
{
int ret;
/* Use preemption buffer provided by the user space */
if (job->primary_preempt_buf)
return 0;
if (!cmdq->primary_preempt_buf) {
/* Allocate per command queue preemption buffers */
ret = ivpu_preemption_buffers_create(vdev, file_priv, cmdq);
if (ret)
return ret;
}
/* Use preemption buffers allocated by the kernel */
job->primary_preempt_buf = cmdq->primary_preempt_buf;
job->secondary_preempt_buf = cmdq->secondary_preempt_buf;
return 0;
}
static struct ivpu_cmdq *ivpu_cmdq_alloc(struct ivpu_file_priv *file_priv)
{
struct ivpu_device *vdev = file_priv->vdev;
struct ivpu_cmdq *cmdq;
int ret;
cmdq = kzalloc(sizeof(*cmdq), GFP_KERNEL);
if (!cmdq)
@@ -89,10 +106,6 @@ static struct ivpu_cmdq *ivpu_cmdq_alloc(struct ivpu_file_priv *file_priv)
if (!cmdq->mem)
goto err_free_cmdq;
ret = ivpu_preemption_buffers_create(vdev, file_priv, cmdq);
if (ret)
ivpu_warn(vdev, "Failed to allocate preemption buffers, preemption limited\n");
return cmdq;
err_free_cmdq:
@@ -429,17 +442,14 @@ static int ivpu_cmdq_push_job(struct ivpu_cmdq *cmdq, struct ivpu_job *job)
if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_SUBMISSION))
entry->flags = VPU_JOB_FLAGS_NULL_SUBMISSION_MASK;
if (vdev->fw->sched_mode == VPU_SCHEDULING_MODE_HW) {
if (cmdq->primary_preempt_buf) {
entry->primary_preempt_buf_addr = cmdq->primary_preempt_buf->vpu_addr;
entry->primary_preempt_buf_size = ivpu_bo_size(cmdq->primary_preempt_buf);
}
if (job->primary_preempt_buf) {
entry->primary_preempt_buf_addr = job->primary_preempt_buf->vpu_addr;
entry->primary_preempt_buf_size = ivpu_bo_size(job->primary_preempt_buf);
}
if (cmdq->secondary_preempt_buf) {
entry->secondary_preempt_buf_addr = cmdq->secondary_preempt_buf->vpu_addr;
entry->secondary_preempt_buf_size =
ivpu_bo_size(cmdq->secondary_preempt_buf);
}
if (job->secondary_preempt_buf) {
entry->secondary_preempt_buf_addr = job->secondary_preempt_buf->vpu_addr;
entry->secondary_preempt_buf_size = ivpu_bo_size(job->secondary_preempt_buf);
}
wmb(); /* Ensure that tail is updated after filling entry */
@@ -663,6 +673,13 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority, u32 cmdq_id)
goto err_unlock;
}
ret = ivpu_preemption_job_init(vdev, file_priv, cmdq, job);
if (ret) {
ivpu_err(vdev, "Failed to initialize preemption buffers for job %d: %d\n",
job->job_id, ret);
goto err_unlock;
}
job->cmdq_id = cmdq->id;
is_first_job = xa_empty(&vdev->submitted_jobs_xa);
@@ -716,7 +733,7 @@ err_unlock:
static int
ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32 *buf_handles,
u32 buf_count, u32 commands_offset)
u32 buf_count, u32 commands_offset, u32 preempt_buffer_index)
{
struct ivpu_file_priv *file_priv = job->file_priv;
struct ivpu_device *vdev = file_priv->vdev;
@@ -752,6 +769,20 @@ ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32
job->cmd_buf_vpu_addr = bo->vpu_addr + commands_offset;
if (preempt_buffer_index) {
struct ivpu_bo *preempt_bo = job->bos[preempt_buffer_index];
if (ivpu_bo_size(preempt_bo) < ivpu_fw_preempt_buf_size(vdev)) {
ivpu_warn(vdev, "Preemption buffer is too small\n");
return -EINVAL;
}
if (ivpu_bo_is_mappable(preempt_bo)) {
ivpu_warn(vdev, "Preemption buffer cannot be mappable\n");
return -EINVAL;
}
job->primary_preempt_buf = preempt_bo;
}
ret = drm_gem_lock_reservations((struct drm_gem_object **)job->bos, buf_count,
&acquire_ctx);
if (ret) {
@@ -782,7 +813,7 @@ unlock_reservations:
static int ivpu_submit(struct drm_file *file, struct ivpu_file_priv *file_priv, u32 cmdq_id,
u32 buffer_count, u32 engine, void __user *buffers_ptr, u32 cmds_offset,
u8 priority)
u32 preempt_buffer_index, u8 priority)
{
struct ivpu_device *vdev = file_priv->vdev;
struct ivpu_job *job;
@@ -814,7 +845,8 @@ static int ivpu_submit(struct drm_file *file, struct ivpu_file_priv *file_priv,
goto err_exit_dev;
}
ret = ivpu_job_prepare_bos_for_submit(file, job, buf_handles, buffer_count, cmds_offset);
ret = ivpu_job_prepare_bos_for_submit(file, job, buf_handles, buffer_count, cmds_offset,
preempt_buffer_index);
if (ret) {
ivpu_err(vdev, "Failed to prepare job: %d\n", ret);
goto err_destroy_job;
@@ -868,7 +900,7 @@ int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
priority = ivpu_job_to_jsm_priority(args->priority);
return ivpu_submit(file, file_priv, 0, args->buffer_count, args->engine,
(void __user *)args->buffers_ptr, args->commands_offset, priority);
(void __user *)args->buffers_ptr, args->commands_offset, 0, priority);
}
int ivpu_cmdq_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
@@ -885,6 +917,9 @@ int ivpu_cmdq_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *
if (args->buffer_count == 0 || args->buffer_count > JOB_MAX_BUFFER_COUNT)
return -EINVAL;
if (args->preempt_buffer_index >= args->buffer_count)
return -EINVAL;
if (!IS_ALIGNED(args->commands_offset, 8))
return -EINVAL;
@@ -895,7 +930,8 @@ int ivpu_cmdq_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *
return -EBADFD;
return ivpu_submit(file, file_priv, args->cmdq_id, args->buffer_count, VPU_ENGINE_COMPUTE,
(void __user *)args->buffers_ptr, args->commands_offset, 0);
(void __user *)args->buffers_ptr, args->commands_offset,
args->preempt_buffer_index, 0);
}
int ivpu_cmdq_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file)

View File

@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2020-2024 Intel Corporation
* Copyright (C) 2020-2025 Intel Corporation
*/
#ifndef __IVPU_JOB_H__
@@ -55,6 +55,8 @@ struct ivpu_job {
u32 job_id;
u32 engine_idx;
size_t bo_count;
struct ivpu_bo *primary_preempt_buf;
struct ivpu_bo *secondary_preempt_buf;
struct ivpu_bo *bos[] __counted_by(bo_count);
};

View File

@@ -90,6 +90,7 @@ extern "C" {
#define DRM_IVPU_PARAM_TILE_CONFIG 11
#define DRM_IVPU_PARAM_SKU 12
#define DRM_IVPU_PARAM_CAPABILITIES 13
#define DRM_IVPU_PARAM_PREEMPT_BUFFER_SIZE 14
#define DRM_IVPU_PLATFORM_TYPE_SILICON 0
@@ -176,6 +177,9 @@ struct drm_ivpu_param {
*
* %DRM_IVPU_PARAM_CAPABILITIES:
* Supported capabilities (read-only)
*
* %DRM_IVPU_PARAM_PREEMPT_BUFFER_SIZE:
* Size of the preemption buffer (read-only)
*/
__u32 param;
@@ -371,6 +375,13 @@ struct drm_ivpu_cmdq_submit {
* to be executed. The offset has to be 8-byte aligned.
*/
__u32 commands_offset;
/**
* @preempt_buffer_index:
*
* Index of the preemption buffer in the buffers_ptr array.
*/
__u32 preempt_buffer_index;
__u32 reserved;
};
/* drm_ivpu_bo_wait job status codes */