mirror of
https://github.com/torvalds/linux.git
synced 2025-12-07 20:06:24 +00:00
The vma_modify_*() family of functions each either perform splits, a merge or no changes at all in preparation for the requested modification to occur. When doing so for a VMA flags change, we currently don't account for any flags which may remain (for instance, VM_SOFTDIRTY) despite the requested change in the case that a merge succeeded. This is made more important by subsequent patches which will introduce the concept of sticky VMA flags which rely on this behaviour. This patch fixes this by passing the VMA flags parameter as a pointer and updating it accordingly on merge and updating callers to accommodate for this. Additionally, while we are here, we add kdocs for each of the vma_modify_*() functions, as the fact that the requested modification is not performed is confusing so it is useful to make this abundantly clear. We also update the VMA userland tests to account for this change. Link: https://lkml.kernel.org/r/23b5b549b0eaefb2922625626e58c2a352f3e93c.1763460113.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> Reviewed-by: Pedro Falcato <pfalcato@suse.de> Reviewed-by: Vlastimil Babka <vbabka@suse.cz> Cc: Andrei Vagin <avagin@gmail.com> Cc: Baolin Wang <baolin.wang@linux.alibaba.com> Cc: Barry Song <baohua@kernel.org> Cc: David Hildenbrand (Red Hat) <david@kernel.org> Cc: Dev Jain <dev.jain@arm.com> Cc: Jann Horn <jannh@google.com> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Lance Yang <lance.yang@linux.dev> Cc: Liam Howlett <liam.howlett@oracle.com> Cc: "Masami Hiramatsu (Google)" <mhiramat@kernel.org> Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Cc: Michal Hocko <mhocko@suse.com> Cc: Mike Rapoport <rppt@kernel.org> Cc: Nico Pache <npache@redhat.com> Cc: Ryan Roberts <ryan.roberts@arm.com> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Suren Baghdasaryan <surenb@google.com> Cc: Zi Yan <ziy@nvidia.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
193 lines
5.0 KiB
C
193 lines
5.0 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Implement mseal() syscall.
|
|
*
|
|
* Copyright (c) 2023,2024 Google, Inc.
|
|
*
|
|
* Author: Jeff Xu <jeffxu@chromium.org>
|
|
*/
|
|
|
|
#include <linux/mempolicy.h>
|
|
#include <linux/mman.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/mm_inline.h>
|
|
#include <linux/syscalls.h>
|
|
#include <linux/sched.h>
|
|
#include "internal.h"
|
|
|
|
/*
|
|
* mseal() disallows an input range which contain unmapped ranges (VMA holes).
|
|
*
|
|
* It disallows unmapped regions from start to end whether they exist at the
|
|
* start, in the middle, or at the end of the range, or any combination thereof.
|
|
*
|
|
* This is because after sealng a range, there's nothing to stop memory mapping
|
|
* of ranges in the remaining gaps later, meaning that the user might then
|
|
* wrongly consider the entirety of the mseal()'d range to be sealed when it
|
|
* in fact isn't.
|
|
*/
|
|
|
|
/*
|
|
* Does the [start, end) range contain any unmapped memory?
|
|
*
|
|
* We ensure that:
|
|
* - start is part of a valid VMA.
|
|
* - end is part of a valid VMA.
|
|
* - no gap (unallocated memory) exists between start and end.
|
|
*/
|
|
static bool range_contains_unmapped(struct mm_struct *mm,
|
|
unsigned long start, unsigned long end)
|
|
{
|
|
struct vm_area_struct *vma;
|
|
unsigned long prev_end = start;
|
|
VMA_ITERATOR(vmi, current->mm, start);
|
|
|
|
for_each_vma_range(vmi, vma, end) {
|
|
if (vma->vm_start > prev_end)
|
|
return true;
|
|
|
|
prev_end = vma->vm_end;
|
|
}
|
|
|
|
return prev_end < end;
|
|
}
|
|
|
|
static int mseal_apply(struct mm_struct *mm,
|
|
unsigned long start, unsigned long end)
|
|
{
|
|
struct vm_area_struct *vma, *prev;
|
|
unsigned long curr_start = start;
|
|
VMA_ITERATOR(vmi, mm, start);
|
|
|
|
/* We know there are no gaps so this will be non-NULL. */
|
|
vma = vma_iter_load(&vmi);
|
|
prev = vma_prev(&vmi);
|
|
if (start > vma->vm_start)
|
|
prev = vma;
|
|
|
|
for_each_vma_range(vmi, vma, end) {
|
|
const unsigned long curr_end = MIN(vma->vm_end, end);
|
|
|
|
if (!(vma->vm_flags & VM_SEALED)) {
|
|
vm_flags_t vm_flags = vma->vm_flags | VM_SEALED;
|
|
|
|
vma = vma_modify_flags(&vmi, prev, vma, curr_start,
|
|
curr_end, &vm_flags);
|
|
if (IS_ERR(vma))
|
|
return PTR_ERR(vma);
|
|
vm_flags_set(vma, VM_SEALED);
|
|
}
|
|
|
|
prev = vma;
|
|
curr_start = curr_end;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* mseal(2) seals the VM's meta data from
|
|
* selected syscalls.
|
|
*
|
|
* addr/len: VM address range.
|
|
*
|
|
* The address range by addr/len must meet:
|
|
* start (addr) must be in a valid VMA.
|
|
* end (addr + len) must be in a valid VMA.
|
|
* no gap (unallocated memory) between start and end.
|
|
* start (addr) must be page aligned.
|
|
*
|
|
* len: len will be page aligned implicitly.
|
|
*
|
|
* Below VMA operations are blocked after sealing.
|
|
* 1> Unmapping, moving to another location, and shrinking
|
|
* the size, via munmap() and mremap(), can leave an empty
|
|
* space, therefore can be replaced with a VMA with a new
|
|
* set of attributes.
|
|
* 2> Moving or expanding a different vma into the current location,
|
|
* via mremap().
|
|
* 3> Modifying a VMA via mmap(MAP_FIXED).
|
|
* 4> Size expansion, via mremap(), does not appear to pose any
|
|
* specific risks to sealed VMAs. It is included anyway because
|
|
* the use case is unclear. In any case, users can rely on
|
|
* merging to expand a sealed VMA.
|
|
* 5> mprotect and pkey_mprotect.
|
|
* 6> Some destructive madvice() behavior (e.g. MADV_DONTNEED)
|
|
* for anonymous memory, when users don't have write permission to the
|
|
* memory. Those behaviors can alter region contents by discarding pages,
|
|
* effectively a memset(0) for anonymous memory.
|
|
*
|
|
* flags: reserved.
|
|
*
|
|
* return values:
|
|
* zero: success.
|
|
* -EINVAL:
|
|
* invalid input flags.
|
|
* start address is not page aligned.
|
|
* Address arange (start + len) overflow.
|
|
* -ENOMEM:
|
|
* addr is not a valid address (not allocated).
|
|
* end (start + len) is not a valid address.
|
|
* a gap (unallocated memory) between start and end.
|
|
* -EPERM:
|
|
* - In 32 bit architecture, sealing is not supported.
|
|
* Note:
|
|
* user can call mseal(2) multiple times, adding a seal on an
|
|
* already sealed memory is a no-action (no error).
|
|
*
|
|
* unseal() is not supported.
|
|
*/
|
|
int do_mseal(unsigned long start, size_t len_in, unsigned long flags)
|
|
{
|
|
size_t len;
|
|
int ret = 0;
|
|
unsigned long end;
|
|
struct mm_struct *mm = current->mm;
|
|
|
|
/* Verify flags not set. */
|
|
if (flags)
|
|
return -EINVAL;
|
|
|
|
start = untagged_addr(start);
|
|
if (!PAGE_ALIGNED(start))
|
|
return -EINVAL;
|
|
|
|
len = PAGE_ALIGN(len_in);
|
|
/* Check to see whether len was rounded up from small -ve to zero. */
|
|
if (len_in && !len)
|
|
return -EINVAL;
|
|
|
|
end = start + len;
|
|
if (end < start)
|
|
return -EINVAL;
|
|
|
|
if (end == start)
|
|
return 0;
|
|
|
|
if (mmap_write_lock_killable(mm))
|
|
return -EINTR;
|
|
|
|
if (range_contains_unmapped(mm, start, end)) {
|
|
ret = -ENOMEM;
|
|
goto out;
|
|
}
|
|
|
|
/*
|
|
* Second pass, this should success, unless there are errors
|
|
* from vma_modify_flags, e.g. merge/split error, or process
|
|
* reaching the max supported VMAs, however, those cases shall
|
|
* be rare.
|
|
*/
|
|
ret = mseal_apply(mm, start, end);
|
|
|
|
out:
|
|
mmap_write_unlock(mm);
|
|
return ret;
|
|
}
|
|
|
|
SYSCALL_DEFINE3(mseal, unsigned long, start, size_t, len, unsigned long,
|
|
flags)
|
|
{
|
|
return do_mseal(start, len, flags);
|
|
}
|