Merge branch 'kvm-arm64/nv-xnx-haf' into kvmarm/next

* kvm-arm64/nv-xnx-haf: (22 commits)
  : Support for FEAT_XNX and FEAT_HAF in nested
  :
  : Add support for a couple of MMU-related features that weren't
  : implemented by KVM's software page table walk:
  :
  :  - FEAT_XNX: Allows the hypervisor to describe execute permissions
  :    separately for EL0 and EL1
  :
  :  - FEAT_HAF: Hardware update of the Access Flag, which in the context of
  :    nested means software walkers must also set the Access Flag.
  :
  : The series also adds some basic support for testing KVM's emulation of
  : the AT instruction, including the implementation detail that AT sets the
  : Access Flag in KVM.
  KVM: arm64: at: Update AF on software walk only if VM has FEAT_HAFDBS
  KVM: arm64: at: Use correct HA bit in TCR_EL2 when regime is EL2
  KVM: arm64: Document KVM_PGTABLE_PROT_{UX,PX}
  KVM: arm64: Fix spelling mistake "Unexpeced" -> "Unexpected"
  KVM: arm64: Add break to default case in kvm_pgtable_stage2_pte_prot()
  KVM: arm64: Add endian casting to kvm_swap_s[12]_desc()
  KVM: arm64: Fix compilation when CONFIG_ARM64_USE_LSE_ATOMICS=n
  KVM: arm64: selftests: Add test for AT emulation
  KVM: arm64: nv: Expose hardware access flag management to NV guests
  KVM: arm64: nv: Implement HW access flag management in stage-2 SW PTW
  KVM: arm64: Implement HW access flag management in stage-1 SW PTW
  KVM: arm64: Propagate PTW errors up to AT emulation
  KVM: arm64: Add helper for swapping guest descriptor
  KVM: arm64: nv: Use pgtable definitions in stage-2 walk
  KVM: arm64: Handle endianness in read helper for emulated PTW
  KVM: arm64: nv: Stop passing vCPU through void ptr in S2 PTW
  KVM: arm64: Call helper for reading descriptors directly
  KVM: arm64: nv: Advertise support for FEAT_XNX
  KVM: arm64: Teach ptdump about FEAT_XNX permissions
  KVM: arm64: nv: Forward FEAT_XNX permissions to the shadow stage-2
  ...

Signed-off-by: Oliver Upton <oupton@kernel.org>
This commit is contained in:
Oliver Upton
2025-12-01 00:47:41 -08:00
16 changed files with 607 additions and 95 deletions

View File

@@ -111,6 +111,7 @@
#define TCR_EL2_DS (1UL << 32)
#define TCR_EL2_RES1 ((1U << 31) | (1 << 23))
#define TCR_EL2_HPD (1 << 24)
#define TCR_EL2_HA (1 << 21)
#define TCR_EL2_TBI (1 << 20)
#define TCR_EL2_PS_SHIFT 16
#define TCR_EL2_PS_MASK (7 << TCR_EL2_PS_SHIFT)

View File

@@ -246,9 +246,9 @@ extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
extern int __kvm_tlbi_s1e2(struct kvm_s2_mmu *mmu, u64 va, u64 sys_encoding);
extern void __kvm_timer_set_cntvoff(u64 cntvoff);
extern void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
extern void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
extern void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
extern int __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
extern int __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
extern int __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);

View File

@@ -120,9 +120,42 @@ static inline bool kvm_s2_trans_writable(struct kvm_s2_trans *trans)
return trans->writable;
}
static inline bool kvm_s2_trans_executable(struct kvm_s2_trans *trans)
static inline bool kvm_has_xnx(struct kvm *kvm)
{
return !(trans->desc & BIT(54));
return cpus_have_final_cap(ARM64_HAS_XNX) &&
kvm_has_feat(kvm, ID_AA64MMFR1_EL1, XNX, IMP);
}
static inline bool kvm_s2_trans_exec_el0(struct kvm *kvm, struct kvm_s2_trans *trans)
{
u8 xn = FIELD_GET(KVM_PTE_LEAF_ATTR_HI_S2_XN, trans->desc);
if (!kvm_has_xnx(kvm))
xn &= FIELD_PREP(KVM_PTE_LEAF_ATTR_HI_S2_XN, 0b10);
switch (xn) {
case 0b00:
case 0b01:
return true;
default:
return false;
}
}
static inline bool kvm_s2_trans_exec_el1(struct kvm *kvm, struct kvm_s2_trans *trans)
{
u8 xn = FIELD_GET(KVM_PTE_LEAF_ATTR_HI_S2_XN, trans->desc);
if (!kvm_has_xnx(kvm))
xn &= FIELD_PREP(KVM_PTE_LEAF_ATTR_HI_S2_XN, 0b10);
switch (xn) {
case 0b00:
case 0b11:
return true;
default:
return false;
}
}
extern int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
@@ -320,6 +353,7 @@ struct s1_walk_info {
bool be;
bool s2;
bool pa52bit;
bool ha;
};
struct s1_walk_result {
@@ -370,4 +404,6 @@ void kvm_handle_s1e2_tlbi(struct kvm_vcpu *vcpu, u32 inst, u64 val);
(FIX_VNCR - __c); \
})
int __kvm_at_swap_desc(struct kvm *kvm, gpa_t ipa, u64 old, u64 new);
#endif /* __ARM64_KVM_NESTED_H */

View File

@@ -89,7 +89,7 @@ typedef u64 kvm_pte_t;
#define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54)
#define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54)
#define KVM_PTE_LEAF_ATTR_HI_S2_XN GENMASK(54, 53)
#define KVM_PTE_LEAF_ATTR_HI_S1_GP BIT(50)
@@ -240,7 +240,9 @@ enum kvm_pgtable_stage2_flags {
/**
* enum kvm_pgtable_prot - Page-table permissions and attributes.
* @KVM_PGTABLE_PROT_X: Execute permission.
* @KVM_PGTABLE_PROT_UX: Unprivileged execute permission.
* @KVM_PGTABLE_PROT_PX: Privileged execute permission.
* @KVM_PGTABLE_PROT_X: Privileged and unprivileged execute permission.
* @KVM_PGTABLE_PROT_W: Write permission.
* @KVM_PGTABLE_PROT_R: Read permission.
* @KVM_PGTABLE_PROT_DEVICE: Device attributes.
@@ -251,12 +253,15 @@ enum kvm_pgtable_stage2_flags {
* @KVM_PGTABLE_PROT_SW3: Software bit 3.
*/
enum kvm_pgtable_prot {
KVM_PGTABLE_PROT_X = BIT(0),
KVM_PGTABLE_PROT_W = BIT(1),
KVM_PGTABLE_PROT_R = BIT(2),
KVM_PGTABLE_PROT_PX = BIT(0),
KVM_PGTABLE_PROT_UX = BIT(1),
KVM_PGTABLE_PROT_X = KVM_PGTABLE_PROT_PX |
KVM_PGTABLE_PROT_UX,
KVM_PGTABLE_PROT_W = BIT(2),
KVM_PGTABLE_PROT_R = BIT(3),
KVM_PGTABLE_PROT_DEVICE = BIT(3),
KVM_PGTABLE_PROT_NORMAL_NC = BIT(4),
KVM_PGTABLE_PROT_DEVICE = BIT(4),
KVM_PGTABLE_PROT_NORMAL_NC = BIT(5),
KVM_PGTABLE_PROT_SW0 = BIT(55),
KVM_PGTABLE_PROT_SW1 = BIT(56),

View File

@@ -3140,6 +3140,13 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.capability = ARM64_HAS_GICV5_LEGACY,
.matches = test_has_gicv5_legacy,
},
{
.desc = "XNX",
.capability = ARM64_HAS_XNX,
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.matches = has_cpuid_feature,
ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, XNX, IMP)
},
{},
};

View File

@@ -346,6 +346,11 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
wi->baddr &= GENMASK_ULL(wi->max_oa_bits - 1, x);
wi->ha = kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, HAFDBS, AF);
wi->ha &= (wi->regime == TR_EL2 ?
FIELD_GET(TCR_EL2_HA, tcr) :
FIELD_GET(TCR_HA, tcr));
return 0;
addrsz:
@@ -362,10 +367,42 @@ transfault:
return -EFAULT;
}
static int kvm_read_s1_desc(struct kvm_vcpu *vcpu, u64 pa, u64 *desc,
struct s1_walk_info *wi)
{
u64 val;
int r;
r = kvm_read_guest(vcpu->kvm, pa, &val, sizeof(val));
if (r)
return r;
if (wi->be)
*desc = be64_to_cpu((__force __be64)val);
else
*desc = le64_to_cpu((__force __le64)val);
return 0;
}
static int kvm_swap_s1_desc(struct kvm_vcpu *vcpu, u64 pa, u64 old, u64 new,
struct s1_walk_info *wi)
{
if (wi->be) {
old = (__force u64)cpu_to_be64(old);
new = (__force u64)cpu_to_be64(new);
} else {
old = (__force u64)cpu_to_le64(old);
new = (__force u64)cpu_to_le64(new);
}
return __kvm_at_swap_desc(vcpu->kvm, pa, old, new);
}
static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
struct s1_walk_result *wr, u64 va)
{
u64 va_top, va_bottom, baddr, desc;
u64 va_top, va_bottom, baddr, desc, new_desc, ipa;
int level, stride, ret;
level = wi->sl;
@@ -375,7 +412,7 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
va_top = get_ia_size(wi) - 1;
while (1) {
u64 index, ipa;
u64 index;
va_bottom = (3 - level) * stride + wi->pgshift;
index = (va & GENMASK_ULL(va_top, va_bottom)) >> (va_bottom - 3);
@@ -414,16 +451,13 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
return ret;
}
ret = kvm_read_guest(vcpu->kvm, ipa, &desc, sizeof(desc));
ret = kvm_read_s1_desc(vcpu, ipa, &desc, wi);
if (ret) {
fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level), false);
return ret;
}
if (wi->be)
desc = be64_to_cpu((__force __be64)desc);
else
desc = le64_to_cpu((__force __le64)desc);
new_desc = desc;
/* Invalid descriptor */
if (!(desc & BIT(0)))
@@ -477,6 +511,17 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
if (check_output_size(baddr & GENMASK(52, va_bottom), wi))
goto addrsz;
if (wi->ha)
new_desc |= PTE_AF;
if (new_desc != desc) {
ret = kvm_swap_s1_desc(vcpu, ipa, desc, new_desc, wi);
if (ret)
return ret;
desc = new_desc;
}
if (!(desc & PTE_AF)) {
fail_s1_walk(wr, ESR_ELx_FSC_ACCESS_L(level), false);
return -EACCES;
@@ -1221,7 +1266,7 @@ static void compute_s1_permissions(struct kvm_vcpu *vcpu,
wr->pr &= !pan;
}
static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
static int handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr, u64 *par)
{
struct s1_walk_result wr = {};
struct s1_walk_info wi = {};
@@ -1246,6 +1291,11 @@ static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
srcu_read_unlock(&vcpu->kvm->srcu, idx);
/*
* Race to update a descriptor -- restart the walk.
*/
if (ret == -EAGAIN)
return ret;
if (ret)
goto compute_par;
@@ -1279,7 +1329,8 @@ static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
fail_s1_walk(&wr, ESR_ELx_FSC_PERM_L(wr.level), false);
compute_par:
return compute_par_s1(vcpu, &wi, &wr);
*par = compute_par_s1(vcpu, &wi, &wr);
return 0;
}
/*
@@ -1407,9 +1458,10 @@ static bool par_check_s1_access_fault(u64 par)
!(par & SYS_PAR_EL1_S));
}
void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
int __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
{
u64 par = __kvm_at_s1e01_fast(vcpu, op, vaddr);
int ret;
/*
* If PAR_EL1 reports that AT failed on a S1 permission or access
@@ -1421,15 +1473,20 @@ void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
*/
if ((par & SYS_PAR_EL1_F) &&
!par_check_s1_perm_fault(par) &&
!par_check_s1_access_fault(par))
par = handle_at_slow(vcpu, op, vaddr);
!par_check_s1_access_fault(par)) {
ret = handle_at_slow(vcpu, op, vaddr, &par);
if (ret)
return ret;
}
vcpu_write_sys_reg(vcpu, par, PAR_EL1);
return 0;
}
void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
int __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
{
u64 par;
int ret;
/*
* We've trapped, so everything is live on the CPU. As we will be
@@ -1476,13 +1533,17 @@ void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
}
/* We failed the translation, let's replay it in slow motion */
if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par))
par = handle_at_slow(vcpu, op, vaddr);
if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par)) {
ret = handle_at_slow(vcpu, op, vaddr, &par);
if (ret)
return ret;
}
vcpu_write_sys_reg(vcpu, par, PAR_EL1);
return 0;
}
void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
int __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
{
struct kvm_s2_trans out = {};
u64 ipa, par;
@@ -1509,13 +1570,13 @@ void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
break;
default:
WARN_ON_ONCE(1);
return;
return 0;
}
__kvm_at_s1e01(vcpu, op, vaddr);
par = vcpu_read_sys_reg(vcpu, PAR_EL1);
if (par & SYS_PAR_EL1_F)
return;
return 0;
/*
* If we only have a single stage of translation (EL2&0), exit
@@ -1523,14 +1584,14 @@ void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
*/
if (compute_translation_regime(vcpu, op) == TR_EL20 ||
!(vcpu_read_sys_reg(vcpu, HCR_EL2) & (HCR_VM | HCR_DC)))
return;
return 0;
/* Do the stage-2 translation */
ipa = (par & GENMASK_ULL(47, 12)) | (vaddr & GENMASK_ULL(11, 0));
out.esr = 0;
ret = kvm_walk_nested_s2(vcpu, ipa, &out);
if (ret < 0)
return;
return ret;
/* Check the access permission */
if (!out.esr &&
@@ -1539,6 +1600,7 @@ void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
par = compute_par_s12(vcpu, par, &out);
vcpu_write_sys_reg(vcpu, par, PAR_EL1);
return 0;
}
/*
@@ -1637,3 +1699,97 @@ int __kvm_find_s1_desc_level(struct kvm_vcpu *vcpu, u64 va, u64 ipa, int *level)
return ret;
}
}
#ifdef CONFIG_ARM64_LSE_ATOMICS
static int __lse_swap_desc(u64 __user *ptep, u64 old, u64 new)
{
u64 tmp = old;
int ret = 0;
uaccess_enable_privileged();
asm volatile(__LSE_PREAMBLE
"1: cas %[old], %[new], %[addr]\n"
"2:\n"
_ASM_EXTABLE_UACCESS_ERR(1b, 2b, %w[ret])
: [old] "+r" (old), [addr] "+Q" (*ptep), [ret] "+r" (ret)
: [new] "r" (new)
: "memory");
uaccess_disable_privileged();
if (ret)
return ret;
if (tmp != old)
return -EAGAIN;
return ret;
}
#else
static int __lse_swap_desc(u64 __user *ptep, u64 old, u64 new)
{
return -EINVAL;
}
#endif
static int __llsc_swap_desc(u64 __user *ptep, u64 old, u64 new)
{
int ret = 1;
u64 tmp;
uaccess_enable_privileged();
asm volatile("prfm pstl1strm, %[addr]\n"
"1: ldxr %[tmp], %[addr]\n"
"sub %[tmp], %[tmp], %[old]\n"
"cbnz %[tmp], 3f\n"
"2: stlxr %w[ret], %[new], %[addr]\n"
"3:\n"
_ASM_EXTABLE_UACCESS_ERR(1b, 3b, %w[ret])
_ASM_EXTABLE_UACCESS_ERR(2b, 3b, %w[ret])
: [ret] "+r" (ret), [addr] "+Q" (*ptep), [tmp] "=&r" (tmp)
: [old] "r" (old), [new] "r" (new)
: "memory");
uaccess_disable_privileged();
/* STLXR didn't update the descriptor, or the compare failed */
if (ret == 1)
return -EAGAIN;
return ret;
}
int __kvm_at_swap_desc(struct kvm *kvm, gpa_t ipa, u64 old, u64 new)
{
struct kvm_memory_slot *slot;
unsigned long hva;
u64 __user *ptep;
bool writable;
int offset;
gfn_t gfn;
int r;
lockdep_assert(srcu_read_lock_held(&kvm->srcu));
gfn = ipa >> PAGE_SHIFT;
offset = offset_in_page(ipa);
slot = gfn_to_memslot(kvm, gfn);
hva = gfn_to_hva_memslot_prot(slot, gfn, &writable);
if (kvm_is_error_hva(hva))
return -EINVAL;
if (!writable)
return -EPERM;
ptep = (u64 __user *)hva + offset;
if (cpus_have_final_cap(ARM64_HAS_LSE_ATOMICS))
r = __lse_swap_desc(ptep, old, new);
else
r = __llsc_swap_desc(ptep, old, new);
if (r < 0)
return r;
mark_page_dirty_in_slot(kvm, slot, gfn);
return 0;
}

View File

@@ -661,11 +661,37 @@ void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
#define KVM_S2_MEMATTR(pgt, attr) PAGE_S2_MEMATTR(attr, stage2_has_fwb(pgt))
static int stage2_set_xn_attr(enum kvm_pgtable_prot prot, kvm_pte_t *attr)
{
bool px, ux;
u8 xn;
px = prot & KVM_PGTABLE_PROT_PX;
ux = prot & KVM_PGTABLE_PROT_UX;
if (!cpus_have_final_cap(ARM64_HAS_XNX) && px != ux)
return -EINVAL;
if (px && ux)
xn = 0b00;
else if (!px && ux)
xn = 0b01;
else if (!px && !ux)
xn = 0b10;
else
xn = 0b11;
*attr &= ~KVM_PTE_LEAF_ATTR_HI_S2_XN;
*attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_HI_S2_XN, xn);
return 0;
}
static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot prot,
kvm_pte_t *ptep)
{
kvm_pte_t attr;
u32 sh = KVM_PTE_LEAF_ATTR_LO_S2_SH_IS;
int r;
switch (prot & (KVM_PGTABLE_PROT_DEVICE |
KVM_PGTABLE_PROT_NORMAL_NC)) {
@@ -685,8 +711,9 @@ static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot p
attr = KVM_S2_MEMATTR(pgt, NORMAL);
}
if (!(prot & KVM_PGTABLE_PROT_X))
attr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
r = stage2_set_xn_attr(prot, &attr);
if (r)
return r;
if (prot & KVM_PGTABLE_PROT_R)
attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R;
@@ -715,8 +742,20 @@ enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte)
prot |= KVM_PGTABLE_PROT_R;
if (pte & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W)
prot |= KVM_PGTABLE_PROT_W;
if (!(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN))
prot |= KVM_PGTABLE_PROT_X;
switch (FIELD_GET(KVM_PTE_LEAF_ATTR_HI_S2_XN, pte)) {
case 0b00:
prot |= KVM_PGTABLE_PROT_PX | KVM_PGTABLE_PROT_UX;
break;
case 0b01:
prot |= KVM_PGTABLE_PROT_UX;
break;
case 0b11:
prot |= KVM_PGTABLE_PROT_PX;
break;
default:
break;
}
return prot;
}
@@ -1290,9 +1329,9 @@ bool kvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr,
int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
enum kvm_pgtable_prot prot, enum kvm_pgtable_walk_flags flags)
{
int ret;
kvm_pte_t xn = 0, set = 0, clr = 0;
s8 level;
kvm_pte_t set = 0, clr = 0;
int ret;
if (prot & KVM_PTE_LEAF_ATTR_HI_SW)
return -EINVAL;
@@ -1303,8 +1342,12 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
if (prot & KVM_PGTABLE_PROT_W)
set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W;
if (prot & KVM_PGTABLE_PROT_X)
clr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
ret = stage2_set_xn_attr(prot, &xn);
if (ret)
return ret;
set |= xn & KVM_PTE_LEAF_ATTR_HI_S2_XN;
clr |= ~xn & KVM_PTE_LEAF_ATTR_HI_S2_XN;
ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level, flags);
if (!ret || ret == -EAGAIN)

View File

@@ -1553,6 +1553,16 @@ static void adjust_nested_fault_perms(struct kvm_s2_trans *nested,
*prot |= kvm_encode_nested_level(nested);
}
static void adjust_nested_exec_perms(struct kvm *kvm,
struct kvm_s2_trans *nested,
enum kvm_pgtable_prot *prot)
{
if (!kvm_s2_trans_exec_el0(kvm, nested))
*prot &= ~KVM_PGTABLE_PROT_UX;
if (!kvm_s2_trans_exec_el1(kvm, nested))
*prot &= ~KVM_PGTABLE_PROT_PX;
}
#define KVM_PGTABLE_WALK_MEMABORT_FLAGS (KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED)
static int gmem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
@@ -1604,11 +1614,12 @@ static int gmem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (writable)
prot |= KVM_PGTABLE_PROT_W;
if (exec_fault ||
(cpus_have_final_cap(ARM64_HAS_CACHE_DIC) &&
(!nested || kvm_s2_trans_executable(nested))))
if (exec_fault || cpus_have_final_cap(ARM64_HAS_CACHE_DIC))
prot |= KVM_PGTABLE_PROT_X;
if (nested)
adjust_nested_exec_perms(kvm, nested, &prot);
kvm_fault_lock(kvm);
if (mmu_invalidate_retry(kvm, mmu_seq)) {
ret = -EAGAIN;
@@ -1883,11 +1894,13 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
prot |= KVM_PGTABLE_PROT_NORMAL_NC;
else
prot |= KVM_PGTABLE_PROT_DEVICE;
} else if (cpus_have_final_cap(ARM64_HAS_CACHE_DIC) &&
(!nested || kvm_s2_trans_executable(nested))) {
} else if (cpus_have_final_cap(ARM64_HAS_CACHE_DIC)) {
prot |= KVM_PGTABLE_PROT_X;
}
if (nested)
adjust_nested_exec_perms(kvm, nested, &prot);
/*
* Under the premise of getting a FSC_PERM fault, we just need to relax
* permissions only if vma_pagesize equals fault_granule. Otherwise,
@@ -2097,6 +2110,11 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
u32 esr;
ret = kvm_walk_nested_s2(vcpu, fault_ipa, &nested_trans);
if (ret == -EAGAIN) {
ret = 1;
goto out_unlock;
}
if (ret) {
esr = kvm_s2_trans_esr(&nested_trans);
kvm_inject_s2_fault(vcpu, esr);

View File

@@ -124,14 +124,13 @@ int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu)
}
struct s2_walk_info {
int (*read_desc)(phys_addr_t pa, u64 *desc, void *data);
void *data;
u64 baddr;
unsigned int max_oa_bits;
unsigned int pgshift;
unsigned int sl;
unsigned int t0sz;
bool be;
u64 baddr;
unsigned int max_oa_bits;
unsigned int pgshift;
unsigned int sl;
unsigned int t0sz;
bool be;
bool ha;
};
static u32 compute_fsc(int level, u32 fsc)
@@ -199,6 +198,42 @@ static int check_output_size(struct s2_walk_info *wi, phys_addr_t output)
return 0;
}
static int read_guest_s2_desc(struct kvm_vcpu *vcpu, phys_addr_t pa, u64 *desc,
struct s2_walk_info *wi)
{
u64 val;
int r;
r = kvm_read_guest(vcpu->kvm, pa, &val, sizeof(val));
if (r)
return r;
/*
* Handle reversedescriptors if endianness differs between the
* host and the guest hypervisor.
*/
if (wi->be)
*desc = be64_to_cpu((__force __be64)val);
else
*desc = le64_to_cpu((__force __le64)val);
return 0;
}
static int swap_guest_s2_desc(struct kvm_vcpu *vcpu, phys_addr_t pa, u64 old, u64 new,
struct s2_walk_info *wi)
{
if (wi->be) {
old = (__force u64)cpu_to_be64(old);
new = (__force u64)cpu_to_be64(new);
} else {
old = (__force u64)cpu_to_le64(old);
new = (__force u64)cpu_to_le64(new);
}
return __kvm_at_swap_desc(vcpu->kvm, pa, old, new);
}
/*
* This is essentially a C-version of the pseudo code from the ARM ARM
* AArch64.TranslationTableWalk function. I strongly recommend looking at
@@ -206,13 +241,13 @@ static int check_output_size(struct s2_walk_info *wi, phys_addr_t output)
*
* Must be called with the kvm->srcu read lock held
*/
static int walk_nested_s2_pgd(phys_addr_t ipa,
static int walk_nested_s2_pgd(struct kvm_vcpu *vcpu, phys_addr_t ipa,
struct s2_walk_info *wi, struct kvm_s2_trans *out)
{
int first_block_level, level, stride, input_size, base_lower_bound;
phys_addr_t base_addr;
unsigned int addr_top, addr_bottom;
u64 desc; /* page table entry */
u64 desc, new_desc; /* page table entry */
int ret;
phys_addr_t paddr;
@@ -257,28 +292,30 @@ static int walk_nested_s2_pgd(phys_addr_t ipa,
>> (addr_bottom - 3);
paddr = base_addr | index;
ret = wi->read_desc(paddr, &desc, wi->data);
ret = read_guest_s2_desc(vcpu, paddr, &desc, wi);
if (ret < 0)
return ret;
/*
* Handle reversedescriptors if endianness differs between the
* host and the guest hypervisor.
*/
if (wi->be)
desc = be64_to_cpu((__force __be64)desc);
else
desc = le64_to_cpu((__force __le64)desc);
new_desc = desc;
/* Check for valid descriptor at this point */
if (!(desc & 1) || ((desc & 3) == 1 && level == 3)) {
if (!(desc & KVM_PTE_VALID)) {
out->esr = compute_fsc(level, ESR_ELx_FSC_FAULT);
out->desc = desc;
return 1;
}
/* We're at the final level or block translation level */
if ((desc & 3) == 1 || level == 3)
if (FIELD_GET(KVM_PTE_TYPE, desc) == KVM_PTE_TYPE_BLOCK) {
if (level < 3)
break;
out->esr = compute_fsc(level, ESR_ELx_FSC_FAULT);
out->desc = desc;
return 1;
}
/* We're at the final level */
if (level == 3)
break;
if (check_output_size(wi, desc)) {
@@ -305,7 +342,18 @@ static int walk_nested_s2_pgd(phys_addr_t ipa,
return 1;
}
if (!(desc & BIT(10))) {
if (wi->ha)
new_desc |= KVM_PTE_LEAF_ATTR_LO_S2_AF;
if (new_desc != desc) {
ret = swap_guest_s2_desc(vcpu, paddr, desc, new_desc, wi);
if (ret)
return ret;
desc = new_desc;
}
if (!(desc & KVM_PTE_LEAF_ATTR_LO_S2_AF)) {
out->esr = compute_fsc(level, ESR_ELx_FSC_ACCESS);
out->desc = desc;
return 1;
@@ -318,20 +366,13 @@ static int walk_nested_s2_pgd(phys_addr_t ipa,
(ipa & GENMASK_ULL(addr_bottom - 1, 0));
out->output = paddr;
out->block_size = 1UL << ((3 - level) * stride + wi->pgshift);
out->readable = desc & (0b01 << 6);
out->writable = desc & (0b10 << 6);
out->readable = desc & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R;
out->writable = desc & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W;
out->level = level;
out->desc = desc;
return 0;
}
static int read_guest_s2_desc(phys_addr_t pa, u64 *desc, void *data)
{
struct kvm_vcpu *vcpu = data;
return kvm_read_guest(vcpu->kvm, pa, desc, sizeof(*desc));
}
static void vtcr_to_walk_info(u64 vtcr, struct s2_walk_info *wi)
{
wi->t0sz = vtcr & TCR_EL2_T0SZ_MASK;
@@ -350,6 +391,8 @@ static void vtcr_to_walk_info(u64 vtcr, struct s2_walk_info *wi)
/* Global limit for now, should eventually be per-VM */
wi->max_oa_bits = min(get_kvm_ipa_limit(),
ps_to_output_size(FIELD_GET(VTCR_EL2_PS_MASK, vtcr), false));
wi->ha = vtcr & VTCR_EL2_HA;
}
int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
@@ -364,15 +407,13 @@ int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
if (!vcpu_has_nv(vcpu))
return 0;
wi.read_desc = read_guest_s2_desc;
wi.data = vcpu;
wi.baddr = vcpu_read_sys_reg(vcpu, VTTBR_EL2);
vtcr_to_walk_info(vtcr, &wi);
wi.be = vcpu_read_sys_reg(vcpu, SCTLR_EL2) & SCTLR_ELx_EE;
ret = walk_nested_s2_pgd(gipa, &wi, result);
ret = walk_nested_s2_pgd(vcpu, gipa, &wi, result);
if (ret)
result->esr |= (kvm_vcpu_get_esr(vcpu) & ~ESR_ELx_FSC);
@@ -788,7 +829,10 @@ int kvm_s2_handle_perm_fault(struct kvm_vcpu *vcpu, struct kvm_s2_trans *trans)
return 0;
if (kvm_vcpu_trap_is_iabt(vcpu)) {
forward_fault = !kvm_s2_trans_executable(trans);
if (vcpu_mode_priv(vcpu))
forward_fault = !kvm_s2_trans_exec_el1(vcpu->kvm, trans);
else
forward_fault = !kvm_s2_trans_exec_el0(vcpu->kvm, trans);
} else {
bool write_fault = kvm_is_write_fault(vcpu);
@@ -1555,12 +1599,13 @@ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val)
case SYS_ID_AA64MMFR1_EL1:
val &= ~(ID_AA64MMFR1_EL1_CMOW |
ID_AA64MMFR1_EL1_nTLBPA |
ID_AA64MMFR1_EL1_ETS |
ID_AA64MMFR1_EL1_XNX |
ID_AA64MMFR1_EL1_HAFDBS);
ID_AA64MMFR1_EL1_ETS);
/* FEAT_E2H0 implies no VHE */
if (test_bit(KVM_ARM_VCPU_HAS_EL2_E2H0, kvm->arch.vcpu_features))
val &= ~ID_AA64MMFR1_EL1_VH;
val = ID_REG_LIMIT_FIELD_ENUM(val, ID_AA64MMFR1_EL1, HAFDBS, AF);
break;
case SYS_ID_AA64MMFR2_EL1:

View File

@@ -31,27 +31,46 @@ static const struct ptdump_prot_bits stage2_pte_bits[] = {
.val = PTE_VALID,
.set = " ",
.clear = "F",
}, {
},
{
.mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R,
.val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R,
.set = "R",
.clear = " ",
}, {
},
{
.mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W,
.val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W,
.set = "W",
.clear = " ",
}, {
},
{
.mask = KVM_PTE_LEAF_ATTR_HI_S2_XN,
.val = KVM_PTE_LEAF_ATTR_HI_S2_XN,
.set = "NX",
.clear = "x ",
}, {
.val = 0b00UL << __bf_shf(KVM_PTE_LEAF_ATTR_HI_S2_XN),
.set = "px ux ",
},
{
.mask = KVM_PTE_LEAF_ATTR_HI_S2_XN,
.val = 0b01UL << __bf_shf(KVM_PTE_LEAF_ATTR_HI_S2_XN),
.set = "PXNux ",
},
{
.mask = KVM_PTE_LEAF_ATTR_HI_S2_XN,
.val = 0b10UL << __bf_shf(KVM_PTE_LEAF_ATTR_HI_S2_XN),
.set = "PXNUXN",
},
{
.mask = KVM_PTE_LEAF_ATTR_HI_S2_XN,
.val = 0b11UL << __bf_shf(KVM_PTE_LEAF_ATTR_HI_S2_XN),
.set = "px UXN",
},
{
.mask = KVM_PTE_LEAF_ATTR_LO_S2_AF,
.val = KVM_PTE_LEAF_ATTR_LO_S2_AF,
.set = "AF",
.clear = " ",
}, {
},
{
.mask = PMD_TYPE_MASK,
.val = PMD_TYPE_SECT,
.set = "BLK",

View File

@@ -3782,7 +3782,8 @@ static bool handle_at_s1e01(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
{
u32 op = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
__kvm_at_s1e01(vcpu, op, p->regval);
if (__kvm_at_s1e01(vcpu, op, p->regval))
return false;
return true;
}
@@ -3799,7 +3800,8 @@ static bool handle_at_s1e2(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
return false;
}
__kvm_at_s1e2(vcpu, op, p->regval);
if (__kvm_at_s1e2(vcpu, op, p->regval))
return false;
return true;
}
@@ -3809,7 +3811,8 @@ static bool handle_at_s12(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
{
u32 op = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
__kvm_at_s12(vcpu, op, p->regval);
if (__kvm_at_s12(vcpu, op, p->regval))
return false;
return true;
}

View File

@@ -65,6 +65,7 @@ HAS_TLB_RANGE
HAS_VA52
HAS_VIRT_HOST_EXTN
HAS_WFXT
HAS_XNX
HAFT
HW_DBM
KVM_HVHE

View File

@@ -156,6 +156,7 @@ TEST_GEN_PROGS_EXTENDED_x86 += x86/nx_huge_pages_test
TEST_GEN_PROGS_arm64 = $(TEST_GEN_PROGS_COMMON)
TEST_GEN_PROGS_arm64 += arm64/aarch32_id_regs
TEST_GEN_PROGS_arm64 += arm64/arch_timer_edge_cases
TEST_GEN_PROGS_arm64 += arm64/at
TEST_GEN_PROGS_arm64 += arm64/debug-exceptions
TEST_GEN_PROGS_arm64 += arm64/hello_el2
TEST_GEN_PROGS_arm64 += arm64/host_sve

View File

@@ -0,0 +1,166 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* at - Test for KVM's AT emulation in the EL2&0 and EL1&0 translation regimes.
*/
#include "kvm_util.h"
#include "processor.h"
#include "test_util.h"
#include "ucall.h"
#include <asm/sysreg.h>
#define TEST_ADDR 0x80000000
enum {
CLEAR_ACCESS_FLAG,
TEST_ACCESS_FLAG,
};
static u64 *ptep_hva;
#define copy_el2_to_el1(reg) \
write_sysreg_s(read_sysreg_s(SYS_##reg##_EL1), SYS_##reg##_EL12)
/* Yes, this is an ugly hack */
#define __at(op, addr) write_sysreg_s(addr, op)
#define test_at_insn(op, expect_fault) \
do { \
u64 par, fsc; \
bool fault; \
\
GUEST_SYNC(CLEAR_ACCESS_FLAG); \
\
__at(OP_AT_##op, TEST_ADDR); \
isb(); \
par = read_sysreg(par_el1); \
\
fault = par & SYS_PAR_EL1_F; \
fsc = FIELD_GET(SYS_PAR_EL1_FST, par); \
\
__GUEST_ASSERT((expect_fault) == fault, \
"AT "#op": %sexpected fault (par: %lx)1", \
(expect_fault) ? "" : "un", par); \
if ((expect_fault)) { \
__GUEST_ASSERT(fsc == ESR_ELx_FSC_ACCESS_L(3), \
"AT "#op": expected access flag fault (par: %lx)", \
par); \
} else { \
GUEST_ASSERT_EQ(FIELD_GET(SYS_PAR_EL1_ATTR, par), MAIR_ATTR_NORMAL); \
GUEST_ASSERT_EQ(FIELD_GET(SYS_PAR_EL1_SH, par), PTE_SHARED >> 8); \
GUEST_ASSERT_EQ(par & SYS_PAR_EL1_PA, TEST_ADDR); \
GUEST_SYNC(TEST_ACCESS_FLAG); \
} \
} while (0)
static void test_at(bool expect_fault)
{
test_at_insn(S1E2R, expect_fault);
test_at_insn(S1E2W, expect_fault);
/* Reuse the stage-1 MMU context from EL2 at EL1 */
copy_el2_to_el1(SCTLR);
copy_el2_to_el1(MAIR);
copy_el2_to_el1(TCR);
copy_el2_to_el1(TTBR0);
copy_el2_to_el1(TTBR1);
/* Disable stage-2 translation and enter a non-host context */
write_sysreg(0, vtcr_el2);
write_sysreg(0, vttbr_el2);
sysreg_clear_set(hcr_el2, HCR_EL2_TGE | HCR_EL2_VM, 0);
isb();
test_at_insn(S1E1R, expect_fault);
test_at_insn(S1E1W, expect_fault);
}
static void guest_code(void)
{
sysreg_clear_set(tcr_el1, TCR_HA, 0);
isb();
test_at(true);
if (!SYS_FIELD_GET(ID_AA64MMFR1_EL1, HAFDBS, read_sysreg(id_aa64mmfr1_el1)))
GUEST_DONE();
/*
* KVM's software PTW makes the implementation choice that the AT
* instruction sets the access flag.
*/
sysreg_clear_set(tcr_el1, 0, TCR_HA);
isb();
test_at(false);
GUEST_DONE();
}
static void handle_sync(struct kvm_vcpu *vcpu, struct ucall *uc)
{
switch (uc->args[1]) {
case CLEAR_ACCESS_FLAG:
/*
* Delete + reinstall the memslot to invalidate stage-2
* mappings of the stage-1 page tables, forcing KVM to
* use the 'slow' AT emulation path.
*
* This and clearing the access flag from host userspace
* ensures that the access flag cannot be set speculatively
* and is reliably cleared at the time of the AT instruction.
*/
clear_bit(__ffs(PTE_AF), ptep_hva);
vm_mem_region_reload(vcpu->vm, vcpu->vm->memslots[MEM_REGION_PT]);
break;
case TEST_ACCESS_FLAG:
TEST_ASSERT(test_bit(__ffs(PTE_AF), ptep_hva),
"Expected access flag to be set (desc: %lu)", *ptep_hva);
break;
default:
TEST_FAIL("Unexpected SYNC arg: %lu", uc->args[1]);
}
}
static void run_test(struct kvm_vcpu *vcpu)
{
struct ucall uc;
while (true) {
vcpu_run(vcpu);
switch (get_ucall(vcpu, &uc)) {
case UCALL_DONE:
return;
case UCALL_SYNC:
handle_sync(vcpu, &uc);
continue;
case UCALL_ABORT:
REPORT_GUEST_ASSERT(uc);
return;
default:
TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
}
}
}
int main(void)
{
struct kvm_vcpu_init init;
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
TEST_REQUIRE(kvm_check_cap(KVM_CAP_ARM_EL2));
vm = vm_create(1);
kvm_get_default_vcpu_target(vm, &init);
init.features[0] |= BIT(KVM_ARM_VCPU_HAS_EL2);
vcpu = aarch64_vcpu_add(vm, 0, &init, guest_code);
kvm_arch_vm_finalize_vcpus(vm);
virt_map(vm, TEST_ADDR, TEST_ADDR, 1);
ptep_hva = virt_get_pte_hva_at_level(vm, TEST_ADDR, 3);
run_test(vcpu);
kvm_vm_free(vm);
return 0;
}

View File

@@ -715,6 +715,7 @@ static inline bool vm_arch_has_protected_memory(struct kvm_vm *vm)
#endif
void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
void vm_mem_region_reload(struct kvm_vm *vm, uint32_t slot);
void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id);

View File

@@ -1201,6 +1201,16 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags)
ret, errno, slot, flags);
}
void vm_mem_region_reload(struct kvm_vm *vm, uint32_t slot)
{
struct userspace_mem_region *region = memslot2region(vm, slot);
struct kvm_userspace_memory_region2 tmp = region->region;
tmp.memory_size = 0;
vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, &tmp);
vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, &region->region);
}
/*
* VM Memory Region Move
*