KVM: selftests: Add guest_memfd tests for mmap and NUMA policy support

Add tests for NUMA memory policy binding and NUMA aware allocation in
guest_memfd. This extends the existing selftests by adding proper
validation for:
  - KVM GMEM set_policy and get_policy() vm_ops functionality using
    mbind() and get_mempolicy()
  - NUMA policy application before and after memory allocation

Run the NUMA mbind() test with and without INIT_SHARED, as KVM should allow
doing mbind(), madvise(), etc. on guest-private memory, e.g. so that
userspace can set NUMA policy for CoCo VMs.

Run the NUMA allocation test only for INIT_SHARED, i.e. if the host can't
fault-in memory (via direct access, madvise(), etc.) as move_pages()
returns -ENOENT if the page hasn't been faulted in (walks the host page
tables to find the associated folio)

[sean: don't skip entire test when running on non-NUMA system, test mbind()
       with private memory, provide more info in assert messages]

Signed-off-by: Shivank Garg <shivankg@amd.com>
Tested-by: Ashish Kalra <ashish.kalra@amd.com>
Link: https://lore.kernel.org/r/20251016172853.52451-12-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
This commit is contained in:
Shivank Garg
2025-10-16 10:28:52 -07:00
committed by Sean Christopherson
parent e698e89b3e
commit 38ccc50ac0

View File

@@ -19,6 +19,7 @@
#include <sys/stat.h>
#include "kvm_util.h"
#include "numaif.h"
#include "test_util.h"
#include "ucall_common.h"
@@ -75,6 +76,101 @@ static void test_mmap_supported(int fd, size_t total_size)
kvm_munmap(mem, total_size);
}
static void test_mbind(int fd, size_t total_size)
{
const unsigned long nodemask_0 = 1; /* nid: 0 */
unsigned long nodemask = 0;
unsigned long maxnode = 8;
int policy;
char *mem;
int ret;
if (!is_multi_numa_node_system())
return;
mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
/* Test MPOL_INTERLEAVE policy */
kvm_mbind(mem, page_size * 2, MPOL_INTERLEAVE, &nodemask_0, maxnode, 0);
kvm_get_mempolicy(&policy, &nodemask, maxnode, mem, MPOL_F_ADDR);
TEST_ASSERT(policy == MPOL_INTERLEAVE && nodemask == nodemask_0,
"Wanted MPOL_INTERLEAVE (%u) and nodemask 0x%lx, got %u and 0x%lx",
MPOL_INTERLEAVE, nodemask_0, policy, nodemask);
/* Test basic MPOL_BIND policy */
kvm_mbind(mem + page_size * 2, page_size * 2, MPOL_BIND, &nodemask_0, maxnode, 0);
kvm_get_mempolicy(&policy, &nodemask, maxnode, mem + page_size * 2, MPOL_F_ADDR);
TEST_ASSERT(policy == MPOL_BIND && nodemask == nodemask_0,
"Wanted MPOL_BIND (%u) and nodemask 0x%lx, got %u and 0x%lx",
MPOL_BIND, nodemask_0, policy, nodemask);
/* Test MPOL_DEFAULT policy */
kvm_mbind(mem, total_size, MPOL_DEFAULT, NULL, 0, 0);
kvm_get_mempolicy(&policy, &nodemask, maxnode, mem, MPOL_F_ADDR);
TEST_ASSERT(policy == MPOL_DEFAULT && !nodemask,
"Wanted MPOL_DEFAULT (%u) and nodemask 0x0, got %u and 0x%lx",
MPOL_DEFAULT, policy, nodemask);
/* Test with invalid policy */
ret = mbind(mem, page_size, 999, &nodemask_0, maxnode, 0);
TEST_ASSERT(ret == -1 && errno == EINVAL,
"mbind with invalid policy should fail with EINVAL");
kvm_munmap(mem, total_size);
}
static void test_numa_allocation(int fd, size_t total_size)
{
unsigned long node0_mask = 1; /* Node 0 */
unsigned long node1_mask = 2; /* Node 1 */
unsigned long maxnode = 8;
void *pages[4];
int status[4];
char *mem;
int i;
if (!is_multi_numa_node_system())
return;
mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
for (i = 0; i < 4; i++)
pages[i] = (char *)mem + page_size * i;
/* Set NUMA policy after allocation */
memset(mem, 0xaa, page_size);
kvm_mbind(pages[0], page_size, MPOL_BIND, &node0_mask, maxnode, 0);
kvm_fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, page_size);
/* Set NUMA policy before allocation */
kvm_mbind(pages[0], page_size * 2, MPOL_BIND, &node1_mask, maxnode, 0);
kvm_mbind(pages[2], page_size * 2, MPOL_BIND, &node0_mask, maxnode, 0);
memset(mem, 0xaa, total_size);
/* Validate if pages are allocated on specified NUMA nodes */
kvm_move_pages(0, 4, pages, NULL, status, 0);
TEST_ASSERT(status[0] == 1, "Expected page 0 on node 1, got it on node %d", status[0]);
TEST_ASSERT(status[1] == 1, "Expected page 1 on node 1, got it on node %d", status[1]);
TEST_ASSERT(status[2] == 0, "Expected page 2 on node 0, got it on node %d", status[2]);
TEST_ASSERT(status[3] == 0, "Expected page 3 on node 0, got it on node %d", status[3]);
/* Punch hole for all pages */
kvm_fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, total_size);
/* Change NUMA policy nodes and reallocate */
kvm_mbind(pages[0], page_size * 2, MPOL_BIND, &node0_mask, maxnode, 0);
kvm_mbind(pages[2], page_size * 2, MPOL_BIND, &node1_mask, maxnode, 0);
memset(mem, 0xaa, total_size);
kvm_move_pages(0, 4, pages, NULL, status, 0);
TEST_ASSERT(status[0] == 0, "Expected page 0 on node 0, got it on node %d", status[0]);
TEST_ASSERT(status[1] == 0, "Expected page 1 on node 0, got it on node %d", status[1]);
TEST_ASSERT(status[2] == 1, "Expected page 2 on node 1, got it on node %d", status[2]);
TEST_ASSERT(status[3] == 1, "Expected page 3 on node 1, got it on node %d", status[3]);
kvm_munmap(mem, total_size);
}
static void test_fault_sigbus(int fd, size_t accessible_size, size_t map_size)
{
const char val = 0xaa;
@@ -273,11 +369,13 @@ static void __test_guest_memfd(struct kvm_vm *vm, uint64_t flags)
if (flags & GUEST_MEMFD_FLAG_INIT_SHARED) {
gmem_test(mmap_supported, vm, flags);
gmem_test(fault_overflow, vm, flags);
gmem_test(numa_allocation, vm, flags);
} else {
gmem_test(fault_private, vm, flags);
}
gmem_test(mmap_cow, vm, flags);
gmem_test(mbind, vm, flags);
} else {
gmem_test(mmap_not_supported, vm, flags);
}