drm/i915: Refactor shmem_pwrite() to use kiocb and write_iter

Refactors shmem_pwrite() to replace the ->write_begin/end logic
with a write_iter-based implementation using kiocb and iov_iter.

While kernel_write() was considered, it caused about 50% performance
regression. vfs_write() is not exported for kernel use. Therefore,
file->f_op->write_iter() is called directly with a synchronously
initialized kiocb to preserve performance and remove write_begin
usage.

Performance results use gem_pwrite on Intel CPU i7-10700
(average of 10 runs):

- ./gem_pwrite --run-subtest bench -s 16384
  Before: 0.205s, After: 0.214s

- ./gem_pwrite --run-subtest bench -s 524288
  Before: 6.1021s, After: 4.8047s

Part of a series refactoring address_space_operations write_begin and
write_end callbacks to use struct kiocb for passing write context and
flags.

Signed-off-by: Taotao Chen <chentaotao@didiglobal.com>
Link: https://lore.kernel.org/20250716093559.217344-3-chentaotao@didiglobal.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
This commit is contained in:
Taotao Chen
2025-07-16 09:36:04 +00:00
committed by Christian Brauner
parent e7b840fd49
commit 048832a3f4

View File

@@ -6,6 +6,7 @@
#include <linux/pagevec.h>
#include <linux/shmem_fs.h>
#include <linux/swap.h>
#include <linux/uio.h>
#include <drm/drm_cache.h>
@@ -400,12 +401,12 @@ static int
shmem_pwrite(struct drm_i915_gem_object *obj,
const struct drm_i915_gem_pwrite *arg)
{
struct address_space *mapping = obj->base.filp->f_mapping;
const struct address_space_operations *aops = mapping->a_ops;
char __user *user_data = u64_to_user_ptr(arg->data_ptr);
u64 remain;
loff_t pos;
unsigned int pg;
struct file *file = obj->base.filp;
struct kiocb kiocb;
struct iov_iter iter;
ssize_t written;
u64 size = arg->size;
/* Caller already validated user args */
GEM_BUG_ON(!access_ok(user_data, arg->size));
@@ -428,63 +429,24 @@ shmem_pwrite(struct drm_i915_gem_object *obj,
if (obj->mm.madv != I915_MADV_WILLNEED)
return -EFAULT;
/*
* Before the pages are instantiated the object is treated as being
* in the CPU domain. The pages will be clflushed as required before
* use, and we can freely write into the pages directly. If userspace
* races pwrite with any other operation; corruption will ensue -
* that is userspace's prerogative!
*/
if (size > MAX_RW_COUNT)
return -EFBIG;
remain = arg->size;
pos = arg->offset;
pg = offset_in_page(pos);
if (!file->f_op->write_iter)
return -EINVAL;
do {
unsigned int len, unwritten;
struct folio *folio;
void *data, *vaddr;
int err;
char __maybe_unused c;
init_sync_kiocb(&kiocb, file);
kiocb.ki_pos = arg->offset;
iov_iter_ubuf(&iter, ITER_SOURCE, (void __user *)user_data, size);
len = PAGE_SIZE - pg;
if (len > remain)
len = remain;
written = file->f_op->write_iter(&kiocb, &iter);
BUG_ON(written == -EIOCBQUEUED);
/* Prefault the user page to reduce potential recursion */
err = __get_user(c, user_data);
if (err)
return err;
if (written != size)
return -EIO;
err = __get_user(c, user_data + len - 1);
if (err)
return err;
err = aops->write_begin(obj->base.filp, mapping, pos, len,
&folio, &data);
if (err < 0)
return err;
vaddr = kmap_local_folio(folio, offset_in_folio(folio, pos));
pagefault_disable();
unwritten = __copy_from_user_inatomic(vaddr, user_data, len);
pagefault_enable();
kunmap_local(vaddr);
err = aops->write_end(obj->base.filp, mapping, pos, len,
len - unwritten, folio, data);
if (err < 0)
return err;
/* We don't handle -EFAULT, leave it to the caller to check */
if (unwritten)
return -ENODEV;
remain -= len;
user_data += len;
pos += len;
pg = 0;
} while (remain);
if (written < 0)
return written;
return 0;
}