KVM/arm64 updates for 6.19

 - Support for userspace handling of synchronous external aborts (SEAs),
   allowing the VMM to potentially handle the abort in a non-fatal
   manner.

 - Large rework of the VGIC's list register handling with the goal of
   supporting more active/pending IRQs than available list registers in
   hardware. In addition, the VGIC now supports EOImode==1 style
   deactivations for IRQs which may occur on a separate vCPU than the
   one that acked the IRQ.

 - Support for FEAT_XNX (user / privileged execute permissions) and
   FEAT_HAF (hardware update to the Access Flag) in the software page
   table walkers and shadow MMU.

 - Allow page table destruction to reschedule, fixing long need_resched
   latencies observed when destroying a large VM.

 - Minor fixes to KVM and selftests
This commit is contained in:
Paolo Bonzini
2025-12-02 18:36:26 +01:00
55 changed files with 2586 additions and 542 deletions

View File

@@ -7286,6 +7286,41 @@ exit, even without calls to ``KVM_ENABLE_CAP`` or similar. In this case,
it will enter with output fields already valid; in the common case, the
``unknown.ret`` field of the union will be ``TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED``.
Userspace need not do anything if it does not wish to support a TDVMCALL.
::
/* KVM_EXIT_ARM_SEA */
struct {
#define KVM_EXIT_ARM_SEA_FLAG_GPA_VALID (1ULL << 0)
__u64 flags;
__u64 esr;
__u64 gva;
__u64 gpa;
} arm_sea;
Used on arm64 systems. When the VM capability ``KVM_CAP_ARM_SEA_TO_USER`` is
enabled, a KVM exits to userspace if a guest access causes a synchronous
external abort (SEA) and the host APEI fails to handle the SEA.
``esr`` is set to a sanitized value of ESR_EL2 from the exception taken to KVM,
consisting of the following fields:
- ``ESR_EL2.EC``
- ``ESR_EL2.IL``
- ``ESR_EL2.FnV``
- ``ESR_EL2.EA``
- ``ESR_EL2.CM``
- ``ESR_EL2.WNR``
- ``ESR_EL2.FSC``
- ``ESR_EL2.SET`` (when FEAT_RAS is implemented for the VM)
``gva`` is set to the value of FAR_EL2 from the exception taken to KVM when
``ESR_EL2.FnV == 0``. Otherwise, the value of ``gva`` is unknown.
``gpa`` is set to the faulting IPA from the exception taken to KVM when
the ``KVM_EXIT_ARM_SEA_FLAG_GPA_VALID`` flag is set. Otherwise, the value of
``gpa`` is unknown.
::
/* Fix the size of the union. */
@@ -8703,6 +8738,18 @@ This capability indicate to the userspace whether a PFNMAP memory region
can be safely mapped as cacheable. This relies on the presence of
force write back (FWB) feature support on the hardware.
7.45 KVM_CAP_ARM_SEA_TO_USER
----------------------------
:Architecture: arm64
:Target: VM
:Parameters: none
:Returns: 0 on success, -EINVAL if unsupported.
When this capability is enabled, KVM may exit to userspace for SEAs taken to
EL2 resulting from a guest access. See ``KVM_EXIT_ARM_SEA`` for more
information.
8. Other capabilities.
======================

View File

@@ -111,6 +111,7 @@
#define TCR_EL2_DS (1UL << 32)
#define TCR_EL2_RES1 ((1U << 31) | (1 << 23))
#define TCR_EL2_HPD (1 << 24)
#define TCR_EL2_HA (1 << 21)
#define TCR_EL2_TBI (1 << 20)
#define TCR_EL2_PS_SHIFT 16
#define TCR_EL2_PS_MASK (7 << TCR_EL2_PS_SHIFT)

View File

@@ -79,7 +79,7 @@ enum __kvm_host_smccc_func {
__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_range,
__KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context,
__KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff,
__KVM_HOST_SMCCC_FUNC___vgic_v3_save_vmcr_aprs,
__KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs,
__KVM_HOST_SMCCC_FUNC___vgic_v3_restore_vmcr_aprs,
__KVM_HOST_SMCCC_FUNC___pkvm_reserve_vm,
__KVM_HOST_SMCCC_FUNC___pkvm_unreserve_vm,
@@ -246,9 +246,9 @@ extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
extern int __kvm_tlbi_s1e2(struct kvm_s2_mmu *mmu, u64 va, u64 sys_encoding);
extern void __kvm_timer_set_cntvoff(u64 cntvoff);
extern void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
extern void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
extern void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
extern int __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
extern int __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
extern int __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);

View File

@@ -54,6 +54,7 @@
#define KVM_REQ_NESTED_S2_UNMAP KVM_ARCH_REQ(8)
#define KVM_REQ_GUEST_HYP_IRQ_PENDING KVM_ARCH_REQ(9)
#define KVM_REQ_MAP_L1_VNCR_EL2 KVM_ARCH_REQ(10)
#define KVM_REQ_VGIC_PROCESS_UPDATE KVM_ARCH_REQ(11)
#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
KVM_DIRTY_LOG_INITIALLY_SET)
@@ -350,6 +351,8 @@ struct kvm_arch {
#define KVM_ARCH_FLAG_GUEST_HAS_SVE 9
/* MIDR_EL1, REVIDR_EL1, and AIDR_EL1 are writable from userspace */
#define KVM_ARCH_FLAG_WRITABLE_IMP_ID_REGS 10
/* Unhandled SEAs are taken to userspace */
#define KVM_ARCH_FLAG_EXIT_SEA 11
unsigned long flags;
/* VM-wide vCPU feature set */

View File

@@ -77,12 +77,13 @@ DECLARE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu);
u64 __gic_v3_get_lr(unsigned int lr);
void __gic_v3_set_lr(u64 val, int lr);
void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_save_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if);
void __vgic_v3_restore_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if);
int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu);

View File

@@ -120,9 +120,42 @@ static inline bool kvm_s2_trans_writable(struct kvm_s2_trans *trans)
return trans->writable;
}
static inline bool kvm_s2_trans_executable(struct kvm_s2_trans *trans)
static inline bool kvm_has_xnx(struct kvm *kvm)
{
return !(trans->desc & BIT(54));
return cpus_have_final_cap(ARM64_HAS_XNX) &&
kvm_has_feat(kvm, ID_AA64MMFR1_EL1, XNX, IMP);
}
static inline bool kvm_s2_trans_exec_el0(struct kvm *kvm, struct kvm_s2_trans *trans)
{
u8 xn = FIELD_GET(KVM_PTE_LEAF_ATTR_HI_S2_XN, trans->desc);
if (!kvm_has_xnx(kvm))
xn &= FIELD_PREP(KVM_PTE_LEAF_ATTR_HI_S2_XN, 0b10);
switch (xn) {
case 0b00:
case 0b01:
return true;
default:
return false;
}
}
static inline bool kvm_s2_trans_exec_el1(struct kvm *kvm, struct kvm_s2_trans *trans)
{
u8 xn = FIELD_GET(KVM_PTE_LEAF_ATTR_HI_S2_XN, trans->desc);
if (!kvm_has_xnx(kvm))
xn &= FIELD_PREP(KVM_PTE_LEAF_ATTR_HI_S2_XN, 0b10);
switch (xn) {
case 0b00:
case 0b11:
return true;
default:
return false;
}
}
extern int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
@@ -320,6 +353,7 @@ struct s1_walk_info {
bool be;
bool s2;
bool pa52bit;
bool ha;
};
struct s1_walk_result {
@@ -370,4 +404,6 @@ void kvm_handle_s1e2_tlbi(struct kvm_vcpu *vcpu, u32 inst, u64 val);
(FIX_VNCR - __c); \
})
int __kvm_at_swap_desc(struct kvm *kvm, gpa_t ipa, u64 old, u64 new);
#endif /* __ARM64_KVM_NESTED_H */

View File

@@ -89,7 +89,7 @@ typedef u64 kvm_pte_t;
#define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54)
#define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54)
#define KVM_PTE_LEAF_ATTR_HI_S2_XN GENMASK(54, 53)
#define KVM_PTE_LEAF_ATTR_HI_S1_GP BIT(50)
@@ -240,7 +240,9 @@ enum kvm_pgtable_stage2_flags {
/**
* enum kvm_pgtable_prot - Page-table permissions and attributes.
* @KVM_PGTABLE_PROT_X: Execute permission.
* @KVM_PGTABLE_PROT_UX: Unprivileged execute permission.
* @KVM_PGTABLE_PROT_PX: Privileged execute permission.
* @KVM_PGTABLE_PROT_X: Privileged and unprivileged execute permission.
* @KVM_PGTABLE_PROT_W: Write permission.
* @KVM_PGTABLE_PROT_R: Read permission.
* @KVM_PGTABLE_PROT_DEVICE: Device attributes.
@@ -251,12 +253,15 @@ enum kvm_pgtable_stage2_flags {
* @KVM_PGTABLE_PROT_SW3: Software bit 3.
*/
enum kvm_pgtable_prot {
KVM_PGTABLE_PROT_X = BIT(0),
KVM_PGTABLE_PROT_W = BIT(1),
KVM_PGTABLE_PROT_R = BIT(2),
KVM_PGTABLE_PROT_PX = BIT(0),
KVM_PGTABLE_PROT_UX = BIT(1),
KVM_PGTABLE_PROT_X = KVM_PGTABLE_PROT_PX |
KVM_PGTABLE_PROT_UX,
KVM_PGTABLE_PROT_W = BIT(2),
KVM_PGTABLE_PROT_R = BIT(3),
KVM_PGTABLE_PROT_DEVICE = BIT(3),
KVM_PGTABLE_PROT_NORMAL_NC = BIT(4),
KVM_PGTABLE_PROT_DEVICE = BIT(4),
KVM_PGTABLE_PROT_NORMAL_NC = BIT(5),
KVM_PGTABLE_PROT_SW0 = BIT(55),
KVM_PGTABLE_PROT_SW1 = BIT(56),
@@ -355,6 +360,11 @@ static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walke
return pteref;
}
static inline kvm_pte_t *kvm_dereference_pteref_raw(kvm_pteref_t pteref)
{
return pteref;
}
static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker)
{
/*
@@ -384,6 +394,11 @@ static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walke
return rcu_dereference_check(pteref, !(walker->flags & KVM_PGTABLE_WALK_SHARED));
}
static inline kvm_pte_t *kvm_dereference_pteref_raw(kvm_pteref_t pteref)
{
return rcu_dereference_raw(pteref);
}
static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker)
{
if (walker->flags & KVM_PGTABLE_WALK_SHARED)
@@ -551,6 +566,26 @@ static inline int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2
*/
void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
/**
* kvm_pgtable_stage2_destroy_range() - Destroy the unlinked range of addresses.
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
* @addr: Intermediate physical address at which to place the mapping.
* @size: Size of the mapping.
*
* The page-table is assumed to be unreachable by any hardware walkers prior
* to freeing and therefore no TLB invalidation is performed.
*/
void kvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt,
u64 addr, u64 size);
/**
* kvm_pgtable_stage2_destroy_pgd() - Destroy the PGD of guest stage-2 page-table.
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
*
* It is assumed that the rest of the page-table is freed before this operation.
*/
void kvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt);
/**
* kvm_pgtable_stage2_free_unlinked() - Free an unlinked stage-2 paging structure.
* @mm_ops: Memory management callbacks.

View File

@@ -180,7 +180,9 @@ struct pkvm_mapping {
int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
struct kvm_pgtable_mm_ops *mm_ops);
void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
void pkvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt,
u64 addr, u64 size);
void pkvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt);
int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
enum kvm_pgtable_prot prot, void *mc,
enum kvm_pgtable_walk_flags flags);

View File

@@ -40,8 +40,13 @@
*/
#define HVC_FINALISE_EL2 3
/*
* HVC_GET_ICH_VTR_EL2 - Retrieve the ICH_VTR_EL2 value
*/
#define HVC_GET_ICH_VTR_EL2 4
/* Max number of HYP stub hypercalls */
#define HVC_STUB_HCALL_NR 4
#define HVC_STUB_HCALL_NR 5
/* Error returned when an invalid stub number is passed into x0 */
#define HVC_STUB_ERR 0xbadca11

View File

@@ -2304,6 +2304,49 @@ static bool has_gic_prio_relaxed_sync(const struct arm64_cpu_capabilities *entry
}
#endif
static bool can_trap_icv_dir_el1(const struct arm64_cpu_capabilities *entry,
int scope)
{
static const struct midr_range has_vgic_v3[] = {
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM),
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM),
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_PRO),
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_PRO),
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_MAX),
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_MAX),
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD),
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE),
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_PRO),
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_PRO),
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_MAX),
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_MAX),
{},
};
struct arm_smccc_res res = {};
BUILD_BUG_ON(ARM64_HAS_ICH_HCR_EL2_TDIR <= ARM64_HAS_GICV3_CPUIF);
BUILD_BUG_ON(ARM64_HAS_ICH_HCR_EL2_TDIR <= ARM64_HAS_GICV5_LEGACY);
if (!this_cpu_has_cap(ARM64_HAS_GICV3_CPUIF) &&
!is_midr_in_range_list(has_vgic_v3))
return false;
if (!is_hyp_mode_available())
return false;
if (this_cpu_has_cap(ARM64_HAS_GICV5_LEGACY))
return true;
if (is_kernel_in_hyp_mode())
res.a1 = read_sysreg_s(SYS_ICH_VTR_EL2);
else
arm_smccc_1_1_hvc(HVC_GET_ICH_VTR_EL2, &res);
if (res.a0 == HVC_STUB_ERR)
return false;
return res.a1 & ICH_VTR_EL2_TDS;
}
#ifdef CONFIG_ARM64_BTI
static void bti_enable(const struct arm64_cpu_capabilities *__unused)
{
@@ -2815,6 +2858,15 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = has_gic_prio_relaxed_sync,
},
#endif
{
/*
* Depends on having GICv3
*/
.desc = "ICV_DIR_EL1 trapping",
.capability = ARM64_HAS_ICH_HCR_EL2_TDIR,
.type = ARM64_CPUCAP_EARLY_LOCAL_CPU_FEATURE,
.matches = can_trap_icv_dir_el1,
},
#ifdef CONFIG_ARM64_E0PD
{
.desc = "E0PD",
@@ -3089,6 +3141,13 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.capability = ARM64_HAS_GICV5_LEGACY,
.matches = test_has_gicv5_legacy,
},
{
.desc = "XNX",
.capability = ARM64_HAS_XNX,
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.matches = has_cpuid_feature,
ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, XNX, IMP)
},
{},
};

View File

@@ -54,6 +54,11 @@ SYM_CODE_START_LOCAL(elx_sync)
1: cmp x0, #HVC_FINALISE_EL2
b.eq __finalise_el2
cmp x0, #HVC_GET_ICH_VTR_EL2
b.ne 2f
mrs_s x1, SYS_ICH_VTR_EL2
b 9f
2: cmp x0, #HVC_SOFT_RESTART
b.ne 3f
mov x0, x2

View File

@@ -91,6 +91,7 @@ KVM_NVHE_ALIAS(spectre_bhb_patch_loop_mitigation_enable);
KVM_NVHE_ALIAS(spectre_bhb_patch_wa3);
KVM_NVHE_ALIAS(spectre_bhb_patch_clearbhb);
KVM_NVHE_ALIAS(alt_cb_patch_nops);
KVM_NVHE_ALIAS(kvm_compute_ich_hcr_trap_bits);
/* Global kernel state accessed by nVHE hyp code. */
KVM_NVHE_ALIAS(kvm_vgic_global_state);

View File

@@ -132,6 +132,10 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
}
mutex_unlock(&kvm->lock);
break;
case KVM_CAP_ARM_SEA_TO_USER:
r = 0;
set_bit(KVM_ARCH_FLAG_EXIT_SEA, &kvm->arch.flags);
break;
default:
break;
}
@@ -327,6 +331,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_IRQFD_RESAMPLE:
case KVM_CAP_COUNTER_OFFSET:
case KVM_CAP_ARM_WRITABLE_IMP_ID_REGS:
case KVM_CAP_ARM_SEA_TO_USER:
r = 1;
break;
case KVM_CAP_SET_GUEST_DEBUG2:
@@ -440,7 +445,7 @@ struct kvm *kvm_arch_alloc_vm(void)
if (!has_vhe())
return kzalloc(sz, GFP_KERNEL_ACCOUNT);
return __vmalloc(sz, GFP_KERNEL_ACCOUNT | __GFP_HIGHMEM | __GFP_ZERO);
return kvzalloc(sz, GFP_KERNEL_ACCOUNT);
}
int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
@@ -659,8 +664,7 @@ nommu:
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
if (is_protected_kvm_enabled()) {
kvm_call_hyp(__vgic_v3_save_vmcr_aprs,
&vcpu->arch.vgic_cpu.vgic_v3);
kvm_call_hyp(__vgic_v3_save_aprs, &vcpu->arch.vgic_cpu.vgic_v3);
kvm_call_hyp_nvhe(__pkvm_vcpu_put);
}
@@ -1042,6 +1046,10 @@ static int check_vcpu_requests(struct kvm_vcpu *vcpu)
*/
kvm_check_request(KVM_REQ_IRQ_PENDING, vcpu);
/* Process interrupts deactivated through a trap */
if (kvm_check_request(KVM_REQ_VGIC_PROCESS_UPDATE, vcpu))
kvm_vgic_process_async_update(vcpu);
if (kvm_check_request(KVM_REQ_RECORD_STEAL, vcpu))
kvm_update_stolen_time(vcpu);

View File

@@ -346,6 +346,11 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
wi->baddr &= GENMASK_ULL(wi->max_oa_bits - 1, x);
wi->ha = kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, HAFDBS, AF);
wi->ha &= (wi->regime == TR_EL2 ?
FIELD_GET(TCR_EL2_HA, tcr) :
FIELD_GET(TCR_HA, tcr));
return 0;
addrsz:
@@ -362,10 +367,42 @@ transfault:
return -EFAULT;
}
static int kvm_read_s1_desc(struct kvm_vcpu *vcpu, u64 pa, u64 *desc,
struct s1_walk_info *wi)
{
u64 val;
int r;
r = kvm_read_guest(vcpu->kvm, pa, &val, sizeof(val));
if (r)
return r;
if (wi->be)
*desc = be64_to_cpu((__force __be64)val);
else
*desc = le64_to_cpu((__force __le64)val);
return 0;
}
static int kvm_swap_s1_desc(struct kvm_vcpu *vcpu, u64 pa, u64 old, u64 new,
struct s1_walk_info *wi)
{
if (wi->be) {
old = (__force u64)cpu_to_be64(old);
new = (__force u64)cpu_to_be64(new);
} else {
old = (__force u64)cpu_to_le64(old);
new = (__force u64)cpu_to_le64(new);
}
return __kvm_at_swap_desc(vcpu->kvm, pa, old, new);
}
static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
struct s1_walk_result *wr, u64 va)
{
u64 va_top, va_bottom, baddr, desc;
u64 va_top, va_bottom, baddr, desc, new_desc, ipa;
int level, stride, ret;
level = wi->sl;
@@ -375,7 +412,7 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
va_top = get_ia_size(wi) - 1;
while (1) {
u64 index, ipa;
u64 index;
va_bottom = (3 - level) * stride + wi->pgshift;
index = (va & GENMASK_ULL(va_top, va_bottom)) >> (va_bottom - 3);
@@ -414,16 +451,13 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
return ret;
}
ret = kvm_read_guest(vcpu->kvm, ipa, &desc, sizeof(desc));
ret = kvm_read_s1_desc(vcpu, ipa, &desc, wi);
if (ret) {
fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level), false);
return ret;
}
if (wi->be)
desc = be64_to_cpu((__force __be64)desc);
else
desc = le64_to_cpu((__force __le64)desc);
new_desc = desc;
/* Invalid descriptor */
if (!(desc & BIT(0)))
@@ -477,6 +511,17 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
if (check_output_size(baddr & GENMASK(52, va_bottom), wi))
goto addrsz;
if (wi->ha)
new_desc |= PTE_AF;
if (new_desc != desc) {
ret = kvm_swap_s1_desc(vcpu, ipa, desc, new_desc, wi);
if (ret)
return ret;
desc = new_desc;
}
if (!(desc & PTE_AF)) {
fail_s1_walk(wr, ESR_ELx_FSC_ACCESS_L(level), false);
return -EACCES;
@@ -1221,7 +1266,7 @@ static void compute_s1_permissions(struct kvm_vcpu *vcpu,
wr->pr &= !pan;
}
static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
static int handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr, u64 *par)
{
struct s1_walk_result wr = {};
struct s1_walk_info wi = {};
@@ -1246,6 +1291,11 @@ static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
srcu_read_unlock(&vcpu->kvm->srcu, idx);
/*
* Race to update a descriptor -- restart the walk.
*/
if (ret == -EAGAIN)
return ret;
if (ret)
goto compute_par;
@@ -1279,7 +1329,8 @@ static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
fail_s1_walk(&wr, ESR_ELx_FSC_PERM_L(wr.level), false);
compute_par:
return compute_par_s1(vcpu, &wi, &wr);
*par = compute_par_s1(vcpu, &wi, &wr);
return 0;
}
/*
@@ -1407,9 +1458,10 @@ static bool par_check_s1_access_fault(u64 par)
!(par & SYS_PAR_EL1_S));
}
void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
int __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
{
u64 par = __kvm_at_s1e01_fast(vcpu, op, vaddr);
int ret;
/*
* If PAR_EL1 reports that AT failed on a S1 permission or access
@@ -1421,15 +1473,20 @@ void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
*/
if ((par & SYS_PAR_EL1_F) &&
!par_check_s1_perm_fault(par) &&
!par_check_s1_access_fault(par))
par = handle_at_slow(vcpu, op, vaddr);
!par_check_s1_access_fault(par)) {
ret = handle_at_slow(vcpu, op, vaddr, &par);
if (ret)
return ret;
}
vcpu_write_sys_reg(vcpu, par, PAR_EL1);
return 0;
}
void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
int __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
{
u64 par;
int ret;
/*
* We've trapped, so everything is live on the CPU. As we will be
@@ -1476,13 +1533,17 @@ void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
}
/* We failed the translation, let's replay it in slow motion */
if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par))
par = handle_at_slow(vcpu, op, vaddr);
if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par)) {
ret = handle_at_slow(vcpu, op, vaddr, &par);
if (ret)
return ret;
}
vcpu_write_sys_reg(vcpu, par, PAR_EL1);
return 0;
}
void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
int __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
{
struct kvm_s2_trans out = {};
u64 ipa, par;
@@ -1509,13 +1570,13 @@ void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
break;
default:
WARN_ON_ONCE(1);
return;
return 0;
}
__kvm_at_s1e01(vcpu, op, vaddr);
par = vcpu_read_sys_reg(vcpu, PAR_EL1);
if (par & SYS_PAR_EL1_F)
return;
return 0;
/*
* If we only have a single stage of translation (EL2&0), exit
@@ -1523,14 +1584,14 @@ void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
*/
if (compute_translation_regime(vcpu, op) == TR_EL20 ||
!(vcpu_read_sys_reg(vcpu, HCR_EL2) & (HCR_VM | HCR_DC)))
return;
return 0;
/* Do the stage-2 translation */
ipa = (par & GENMASK_ULL(47, 12)) | (vaddr & GENMASK_ULL(11, 0));
out.esr = 0;
ret = kvm_walk_nested_s2(vcpu, ipa, &out);
if (ret < 0)
return;
return ret;
/* Check the access permission */
if (!out.esr &&
@@ -1539,6 +1600,7 @@ void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
par = compute_par_s12(vcpu, par, &out);
vcpu_write_sys_reg(vcpu, par, PAR_EL1);
return 0;
}
/*
@@ -1637,3 +1699,97 @@ int __kvm_find_s1_desc_level(struct kvm_vcpu *vcpu, u64 va, u64 ipa, int *level)
return ret;
}
}
#ifdef CONFIG_ARM64_LSE_ATOMICS
static int __lse_swap_desc(u64 __user *ptep, u64 old, u64 new)
{
u64 tmp = old;
int ret = 0;
uaccess_enable_privileged();
asm volatile(__LSE_PREAMBLE
"1: cas %[old], %[new], %[addr]\n"
"2:\n"
_ASM_EXTABLE_UACCESS_ERR(1b, 2b, %w[ret])
: [old] "+r" (old), [addr] "+Q" (*ptep), [ret] "+r" (ret)
: [new] "r" (new)
: "memory");
uaccess_disable_privileged();
if (ret)
return ret;
if (tmp != old)
return -EAGAIN;
return ret;
}
#else
static int __lse_swap_desc(u64 __user *ptep, u64 old, u64 new)
{
return -EINVAL;
}
#endif
static int __llsc_swap_desc(u64 __user *ptep, u64 old, u64 new)
{
int ret = 1;
u64 tmp;
uaccess_enable_privileged();
asm volatile("prfm pstl1strm, %[addr]\n"
"1: ldxr %[tmp], %[addr]\n"
"sub %[tmp], %[tmp], %[old]\n"
"cbnz %[tmp], 3f\n"
"2: stlxr %w[ret], %[new], %[addr]\n"
"3:\n"
_ASM_EXTABLE_UACCESS_ERR(1b, 3b, %w[ret])
_ASM_EXTABLE_UACCESS_ERR(2b, 3b, %w[ret])
: [ret] "+r" (ret), [addr] "+Q" (*ptep), [tmp] "=&r" (tmp)
: [old] "r" (old), [new] "r" (new)
: "memory");
uaccess_disable_privileged();
/* STLXR didn't update the descriptor, or the compare failed */
if (ret == 1)
return -EAGAIN;
return ret;
}
int __kvm_at_swap_desc(struct kvm *kvm, gpa_t ipa, u64 old, u64 new)
{
struct kvm_memory_slot *slot;
unsigned long hva;
u64 __user *ptep;
bool writable;
int offset;
gfn_t gfn;
int r;
lockdep_assert(srcu_read_lock_held(&kvm->srcu));
gfn = ipa >> PAGE_SHIFT;
offset = offset_in_page(ipa);
slot = gfn_to_memslot(kvm, gfn);
hva = gfn_to_hva_memslot_prot(slot, gfn, &writable);
if (kvm_is_error_hva(hva))
return -EINVAL;
if (!writable)
return -EPERM;
ptep = (u64 __user *)hva + offset;
if (cpus_have_final_cap(ARM64_HAS_LSE_ATOMICS))
r = __lse_swap_desc(ptep, old, new);
else
r = __llsc_swap_desc(ptep, old, new);
if (r < 0)
return r;
mark_page_dirty_in_slot(kvm, slot, gfn);
return 0;
}

View File

@@ -157,6 +157,7 @@ static void sync_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
host_vcpu->arch.iflags = hyp_vcpu->vcpu.arch.iflags;
host_cpu_if->vgic_hcr = hyp_cpu_if->vgic_hcr;
host_cpu_if->vgic_vmcr = hyp_cpu_if->vgic_vmcr;
for (i = 0; i < hyp_cpu_if->used_lrs; ++i)
host_cpu_if->vgic_lr[i] = hyp_cpu_if->vgic_lr[i];
}
@@ -464,11 +465,11 @@ static void handle___vgic_v3_init_lrs(struct kvm_cpu_context *host_ctxt)
__vgic_v3_init_lrs();
}
static void handle___vgic_v3_save_vmcr_aprs(struct kvm_cpu_context *host_ctxt)
static void handle___vgic_v3_save_aprs(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct vgic_v3_cpu_if *, cpu_if, host_ctxt, 1);
__vgic_v3_save_vmcr_aprs(kern_hyp_va(cpu_if));
__vgic_v3_save_aprs(kern_hyp_va(cpu_if));
}
static void handle___vgic_v3_restore_vmcr_aprs(struct kvm_cpu_context *host_ctxt)
@@ -616,7 +617,7 @@ static const hcall_t host_hcall[] = {
HANDLE_FUNC(__kvm_tlb_flush_vmid_range),
HANDLE_FUNC(__kvm_flush_cpu_context),
HANDLE_FUNC(__kvm_timer_set_cntvoff),
HANDLE_FUNC(__vgic_v3_save_vmcr_aprs),
HANDLE_FUNC(__vgic_v3_save_aprs),
HANDLE_FUNC(__vgic_v3_restore_vmcr_aprs),
HANDLE_FUNC(__pkvm_reserve_vm),
HANDLE_FUNC(__pkvm_unreserve_vm),

View File

@@ -337,6 +337,9 @@ static void pkvm_init_features_from_host(struct pkvm_hyp_vm *hyp_vm, const struc
/* CTR_EL0 is always under host control, even for protected VMs. */
hyp_vm->kvm.arch.ctr_el0 = host_kvm->arch.ctr_el0;
/* Preserve the vgic model so that GICv3 emulation works */
hyp_vm->kvm.arch.vgic.vgic_model = host_kvm->arch.vgic.vgic_model;
if (test_bit(KVM_ARCH_FLAG_MTE_ENABLED, &host_kvm->arch.flags))
set_bit(KVM_ARCH_FLAG_MTE_ENABLED, &kvm->arch.flags);

View File

@@ -444,6 +444,8 @@ static const struct sys_reg_desc pvm_sys_reg_descs[] = {
/* Scalable Vector Registers are restricted. */
HOST_HANDLED(SYS_ICC_PMR_EL1),
RAZ_WI(SYS_ERRIDR_EL1),
RAZ_WI(SYS_ERRSELR_EL1),
RAZ_WI(SYS_ERXFR_EL1),
@@ -457,9 +459,12 @@ static const struct sys_reg_desc pvm_sys_reg_descs[] = {
/* Limited Ordering Regions Registers are restricted. */
HOST_HANDLED(SYS_ICC_DIR_EL1),
HOST_HANDLED(SYS_ICC_RPR_EL1),
HOST_HANDLED(SYS_ICC_SGI1R_EL1),
HOST_HANDLED(SYS_ICC_ASGI1R_EL1),
HOST_HANDLED(SYS_ICC_SGI0R_EL1),
HOST_HANDLED(SYS_ICC_CTLR_EL1),
{ SYS_DESC(SYS_ICC_SRE_EL1), .access = pvm_gic_read_sre, },
HOST_HANDLED(SYS_CCSIDR_EL1),

View File

@@ -661,11 +661,37 @@ void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
#define KVM_S2_MEMATTR(pgt, attr) PAGE_S2_MEMATTR(attr, stage2_has_fwb(pgt))
static int stage2_set_xn_attr(enum kvm_pgtable_prot prot, kvm_pte_t *attr)
{
bool px, ux;
u8 xn;
px = prot & KVM_PGTABLE_PROT_PX;
ux = prot & KVM_PGTABLE_PROT_UX;
if (!cpus_have_final_cap(ARM64_HAS_XNX) && px != ux)
return -EINVAL;
if (px && ux)
xn = 0b00;
else if (!px && ux)
xn = 0b01;
else if (!px && !ux)
xn = 0b10;
else
xn = 0b11;
*attr &= ~KVM_PTE_LEAF_ATTR_HI_S2_XN;
*attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_HI_S2_XN, xn);
return 0;
}
static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot prot,
kvm_pte_t *ptep)
{
kvm_pte_t attr;
u32 sh = KVM_PTE_LEAF_ATTR_LO_S2_SH_IS;
int r;
switch (prot & (KVM_PGTABLE_PROT_DEVICE |
KVM_PGTABLE_PROT_NORMAL_NC)) {
@@ -685,8 +711,9 @@ static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot p
attr = KVM_S2_MEMATTR(pgt, NORMAL);
}
if (!(prot & KVM_PGTABLE_PROT_X))
attr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
r = stage2_set_xn_attr(prot, &attr);
if (r)
return r;
if (prot & KVM_PGTABLE_PROT_R)
attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R;
@@ -715,8 +742,20 @@ enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte)
prot |= KVM_PGTABLE_PROT_R;
if (pte & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W)
prot |= KVM_PGTABLE_PROT_W;
if (!(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN))
prot |= KVM_PGTABLE_PROT_X;
switch (FIELD_GET(KVM_PTE_LEAF_ATTR_HI_S2_XN, pte)) {
case 0b00:
prot |= KVM_PGTABLE_PROT_PX | KVM_PGTABLE_PROT_UX;
break;
case 0b01:
prot |= KVM_PGTABLE_PROT_UX;
break;
case 0b11:
prot |= KVM_PGTABLE_PROT_PX;
break;
default:
break;
}
return prot;
}
@@ -1290,9 +1329,9 @@ bool kvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr,
int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
enum kvm_pgtable_prot prot, enum kvm_pgtable_walk_flags flags)
{
int ret;
kvm_pte_t xn = 0, set = 0, clr = 0;
s8 level;
kvm_pte_t set = 0, clr = 0;
int ret;
if (prot & KVM_PTE_LEAF_ATTR_HI_SW)
return -EINVAL;
@@ -1303,8 +1342,12 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
if (prot & KVM_PGTABLE_PROT_W)
set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W;
if (prot & KVM_PGTABLE_PROT_X)
clr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
ret = stage2_set_xn_attr(prot, &xn);
if (ret)
return ret;
set |= xn & KVM_PTE_LEAF_ATTR_HI_S2_XN;
clr |= ~xn & KVM_PTE_LEAF_ATTR_HI_S2_XN;
ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level, flags);
if (!ret || ret == -EAGAIN)
@@ -1535,37 +1578,80 @@ size_t kvm_pgtable_stage2_pgd_size(u64 vtcr)
return kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE;
}
static int stage2_free_walker(const struct kvm_pgtable_visit_ctx *ctx,
enum kvm_pgtable_walk_flags visit)
static int stage2_free_leaf(const struct kvm_pgtable_visit_ctx *ctx)
{
struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
if (!stage2_pte_is_counted(ctx->old))
return 0;
mm_ops->put_page(ctx->ptep);
if (kvm_pte_table(ctx->old, ctx->level))
mm_ops->put_page(kvm_pte_follow(ctx->old, mm_ops));
return 0;
}
void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
static int stage2_free_table_post(const struct kvm_pgtable_visit_ctx *ctx)
{
struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
kvm_pte_t *childp = kvm_pte_follow(ctx->old, mm_ops);
if (mm_ops->page_count(childp) != 1)
return 0;
/*
* Drop references and clear the now stale PTE to avoid rewalking the
* freed page table.
*/
mm_ops->put_page(ctx->ptep);
mm_ops->put_page(childp);
kvm_clear_pte(ctx->ptep);
return 0;
}
static int stage2_free_walker(const struct kvm_pgtable_visit_ctx *ctx,
enum kvm_pgtable_walk_flags visit)
{
if (!stage2_pte_is_counted(ctx->old))
return 0;
switch (visit) {
case KVM_PGTABLE_WALK_LEAF:
return stage2_free_leaf(ctx);
case KVM_PGTABLE_WALK_TABLE_POST:
return stage2_free_table_post(ctx);
default:
return -EINVAL;
}
}
void kvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt,
u64 addr, u64 size)
{
size_t pgd_sz;
struct kvm_pgtable_walker walker = {
.cb = stage2_free_walker,
.flags = KVM_PGTABLE_WALK_LEAF |
KVM_PGTABLE_WALK_TABLE_POST,
};
WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker));
WARN_ON(kvm_pgtable_walk(pgt, addr, size, &walker));
}
void kvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt)
{
size_t pgd_sz;
pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE;
pgt->mm_ops->free_pages_exact(kvm_dereference_pteref(&walker, pgt->pgd), pgd_sz);
/*
* Since the pgtable is unlinked at this point, and not shared with
* other walkers, safely deference pgd with kvm_dereference_pteref_raw()
*/
pgt->mm_ops->free_pages_exact(kvm_dereference_pteref_raw(pgt->pgd), pgd_sz);
pgt->pgd = NULL;
}
void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
{
kvm_pgtable_stage2_destroy_range(pgt, 0, BIT(pgt->ia_bits));
kvm_pgtable_stage2_destroy_pgd(pgt);
}
void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level)
{
kvm_pteref_t ptep = (kvm_pteref_t)pgtable;

View File

@@ -63,6 +63,10 @@ int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu)
return -1;
}
/* Handle deactivation as a normal exit */
if ((fault_ipa - vgic->vgic_cpu_base) >= GIC_CPU_DEACTIVATE)
return 0;
rd = kvm_vcpu_dabt_get_rd(vcpu);
addr = kvm_vgic_global_state.vcpu_hyp_va;
addr += fault_ipa - vgic->vgic_cpu_base;

View File

@@ -14,6 +14,8 @@
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
#include "../../vgic/vgic.h"
#define vtr_to_max_lr_idx(v) ((v) & 0xf)
#define vtr_to_nr_pre_bits(v) ((((u32)(v) >> 26) & 7) + 1)
#define vtr_to_nr_apr_regs(v) (1 << (vtr_to_nr_pre_bits(v) - 5))
@@ -58,7 +60,7 @@ u64 __gic_v3_get_lr(unsigned int lr)
unreachable();
}
static void __gic_v3_set_lr(u64 val, int lr)
void __gic_v3_set_lr(u64 val, int lr)
{
switch (lr & 0xf) {
case 0:
@@ -196,6 +198,11 @@ static u32 __vgic_v3_read_ap1rn(int n)
return val;
}
static u64 compute_ich_hcr(struct vgic_v3_cpu_if *cpu_if)
{
return cpu_if->vgic_hcr | vgic_ich_hcr_trap_bits();
}
void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if)
{
u64 used_lrs = cpu_if->used_lrs;
@@ -212,14 +219,12 @@ void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if)
}
}
if (used_lrs || cpu_if->its_vpe.its_vm) {
if (used_lrs) {
int i;
u32 elrsr;
elrsr = read_gicreg(ICH_ELRSR_EL2);
write_gicreg(cpu_if->vgic_hcr & ~ICH_HCR_EL2_En, ICH_HCR_EL2);
for (i = 0; i < used_lrs; i++) {
if (elrsr & (1 << i))
cpu_if->vgic_lr[i] &= ~ICH_LR_STATE;
@@ -229,6 +234,23 @@ void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if)
__gic_v3_set_lr(0, i);
}
}
cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2);
if (cpu_if->vgic_hcr & ICH_HCR_EL2_LRENPIE) {
u64 val = read_gicreg(ICH_HCR_EL2);
cpu_if->vgic_hcr &= ~ICH_HCR_EL2_EOIcount;
cpu_if->vgic_hcr |= val & ICH_HCR_EL2_EOIcount;
}
write_gicreg(0, ICH_HCR_EL2);
/*
* Hack alert: On NV, this results in a trap so that the above write
* actually takes effect... No synchronisation is necessary, as we
* only care about the effects when this traps.
*/
read_gicreg(ICH_MISR_EL2);
}
void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if)
@@ -236,12 +258,10 @@ void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if)
u64 used_lrs = cpu_if->used_lrs;
int i;
if (used_lrs || cpu_if->its_vpe.its_vm) {
write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
write_gicreg(compute_ich_hcr(cpu_if), ICH_HCR_EL2);
for (i = 0; i < used_lrs; i++)
__gic_v3_set_lr(cpu_if->vgic_lr[i], i);
}
for (i = 0; i < used_lrs; i++)
__gic_v3_set_lr(cpu_if->vgic_lr[i], i);
/*
* Ensure that writes to the LRs, and on non-VHE systems ensure that
@@ -307,24 +327,20 @@ void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if)
}
/*
* If we need to trap system registers, we must write
* ICH_HCR_EL2 anyway, even if no interrupts are being
* injected. Note that this also applies if we don't expect
* any system register access (no vgic at all).
* If we need to trap system registers, we must write ICH_HCR_EL2
* anyway, even if no interrupts are being injected. Note that this
* also applies if we don't expect any system register access (no
* vgic at all). In any case, no need to provide MI configuration.
*/
if (static_branch_unlikely(&vgic_v3_cpuif_trap) ||
cpu_if->its_vpe.its_vm || !cpu_if->vgic_sre)
write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
write_gicreg(vgic_ich_hcr_trap_bits() | ICH_HCR_EL2_En, ICH_HCR_EL2);
}
void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if)
{
u64 val;
if (!cpu_if->vgic_sre) {
cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2);
}
/* Only restore SRE if the host implements the GICv2 interface */
if (static_branch_unlikely(&vgic_v3_has_v2_compat)) {
val = read_gicreg(ICC_SRE_EL2);
@@ -346,7 +362,7 @@ void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if)
write_gicreg(0, ICH_HCR_EL2);
}
static void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if)
void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if)
{
u64 val;
u32 nr_pre_bits;
@@ -507,13 +523,6 @@ static void __vgic_v3_write_vmcr(u32 vmcr)
write_gicreg(vmcr, ICH_VMCR_EL2);
}
void __vgic_v3_save_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if)
{
__vgic_v3_save_aprs(cpu_if);
if (cpu_if->vgic_sre)
cpu_if->vgic_vmcr = __vgic_v3_read_vmcr();
}
void __vgic_v3_restore_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if)
{
__vgic_v3_compat_mode_enable();
@@ -790,7 +799,7 @@ static void __vgic_v3_bump_eoicount(void)
write_gicreg(hcr, ICH_HCR_EL2);
}
static void __vgic_v3_write_dir(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
static int ___vgic_v3_write_dir(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
u32 vid = vcpu_get_reg(vcpu, rt);
u64 lr_val;
@@ -798,19 +807,25 @@ static void __vgic_v3_write_dir(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
/* EOImode == 0, nothing to be done here */
if (!(vmcr & ICH_VMCR_EOIM_MASK))
return;
return 1;
/* No deactivate to be performed on an LPI */
if (vid >= VGIC_MIN_LPI)
return;
return 1;
lr = __vgic_v3_find_active_lr(vcpu, vid, &lr_val);
if (lr == -1) {
__vgic_v3_bump_eoicount();
return;
if (lr != -1) {
__vgic_v3_clear_active_lr(lr, lr_val);
return 1;
}
__vgic_v3_clear_active_lr(lr, lr_val);
return 0;
}
static void __vgic_v3_write_dir(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
{
if (!___vgic_v3_write_dir(vcpu, vmcr, rt))
__vgic_v3_bump_eoicount();
}
static void __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
@@ -1245,6 +1260,21 @@ int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu)
case SYS_ICC_DIR_EL1:
if (unlikely(is_read))
return 0;
/*
* Full exit if required to handle overflow deactivation,
* unless we can emulate it in the LRs (likely the majority
* of the cases).
*/
if (vcpu->arch.vgic_cpu.vgic_v3.vgic_hcr & ICH_HCR_EL2_TDIR) {
int ret;
ret = ___vgic_v3_write_dir(vcpu, __vgic_v3_read_vmcr(),
kvm_vcpu_sys_get_rt(vcpu));
if (ret)
__kvm_skip_instr(vcpu);
return ret;
}
fn = __vgic_v3_write_dir;
break;
case SYS_ICC_RPR_EL1:

View File

@@ -904,6 +904,38 @@ static int kvm_init_ipa_range(struct kvm_s2_mmu *mmu, unsigned long type)
return 0;
}
/*
* Assume that @pgt is valid and unlinked from the KVM MMU to free the
* page-table without taking the kvm_mmu_lock and without performing any
* TLB invalidations.
*
* Also, the range of addresses can be large enough to cause need_resched
* warnings, for instance on CONFIG_PREEMPT_NONE kernels. Hence, invoke
* cond_resched() periodically to prevent hogging the CPU for a long time
* and schedule something else, if required.
*/
static void stage2_destroy_range(struct kvm_pgtable *pgt, phys_addr_t addr,
phys_addr_t end)
{
u64 next;
do {
next = stage2_range_addr_end(addr, end);
KVM_PGT_FN(kvm_pgtable_stage2_destroy_range)(pgt, addr,
next - addr);
if (next != end)
cond_resched();
} while (addr = next, addr != end);
}
static void kvm_stage2_destroy(struct kvm_pgtable *pgt)
{
unsigned int ia_bits = VTCR_EL2_IPA(pgt->mmu->vtcr);
stage2_destroy_range(pgt, 0, BIT(ia_bits));
KVM_PGT_FN(kvm_pgtable_stage2_destroy_pgd)(pgt);
}
/**
* kvm_init_stage2_mmu - Initialise a S2 MMU structure
* @kvm: The pointer to the KVM structure
@@ -980,7 +1012,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t
return 0;
out_destroy_pgtable:
KVM_PGT_FN(kvm_pgtable_stage2_destroy)(pgt);
kvm_stage2_destroy(pgt);
out_free_pgtable:
kfree(pgt);
return err;
@@ -1081,7 +1113,7 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
write_unlock(&kvm->mmu_lock);
if (pgt) {
KVM_PGT_FN(kvm_pgtable_stage2_destroy)(pgt);
kvm_stage2_destroy(pgt);
kfree(pgt);
}
}
@@ -1521,6 +1553,16 @@ static void adjust_nested_fault_perms(struct kvm_s2_trans *nested,
*prot |= kvm_encode_nested_level(nested);
}
static void adjust_nested_exec_perms(struct kvm *kvm,
struct kvm_s2_trans *nested,
enum kvm_pgtable_prot *prot)
{
if (!kvm_s2_trans_exec_el0(kvm, nested))
*prot &= ~KVM_PGTABLE_PROT_UX;
if (!kvm_s2_trans_exec_el1(kvm, nested))
*prot &= ~KVM_PGTABLE_PROT_PX;
}
#define KVM_PGTABLE_WALK_MEMABORT_FLAGS (KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED)
static int gmem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
@@ -1572,11 +1614,12 @@ static int gmem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (writable)
prot |= KVM_PGTABLE_PROT_W;
if (exec_fault ||
(cpus_have_final_cap(ARM64_HAS_CACHE_DIC) &&
(!nested || kvm_s2_trans_executable(nested))))
if (exec_fault || cpus_have_final_cap(ARM64_HAS_CACHE_DIC))
prot |= KVM_PGTABLE_PROT_X;
if (nested)
adjust_nested_exec_perms(kvm, nested, &prot);
kvm_fault_lock(kvm);
if (mmu_invalidate_retry(kvm, mmu_seq)) {
ret = -EAGAIN;
@@ -1851,11 +1894,13 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
prot |= KVM_PGTABLE_PROT_NORMAL_NC;
else
prot |= KVM_PGTABLE_PROT_DEVICE;
} else if (cpus_have_final_cap(ARM64_HAS_CACHE_DIC) &&
(!nested || kvm_s2_trans_executable(nested))) {
} else if (cpus_have_final_cap(ARM64_HAS_CACHE_DIC)) {
prot |= KVM_PGTABLE_PROT_X;
}
if (nested)
adjust_nested_exec_perms(kvm, nested, &prot);
/*
* Under the premise of getting a FSC_PERM fault, we just need to relax
* permissions only if vma_pagesize equals fault_granule. Otherwise,
@@ -1899,8 +1944,48 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
read_unlock(&vcpu->kvm->mmu_lock);
}
/*
* Returns true if the SEA should be handled locally within KVM if the abort
* is caused by a kernel memory allocation (e.g. stage-2 table memory).
*/
static bool host_owns_sea(struct kvm_vcpu *vcpu, u64 esr)
{
/*
* Without FEAT_RAS HCR_EL2.TEA is RES0, meaning any external abort
* taken from a guest EL to EL2 is due to a host-imposed access (e.g.
* stage-2 PTW).
*/
if (!cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
return true;
/* KVM owns the VNCR when the vCPU isn't in a nested context. */
if (is_hyp_ctxt(vcpu) && !kvm_vcpu_trap_is_iabt(vcpu) && (esr & ESR_ELx_VNCR))
return true;
/*
* Determining if an external abort during a table walk happened at
* stage-2 is only possible with S1PTW is set. Otherwise, since KVM
* sets HCR_EL2.TEA, SEAs due to a stage-1 walk (i.e. accessing the
* PA of the stage-1 descriptor) can reach here and are reported
* with a TTW ESR value.
*/
return (esr_fsc_is_sea_ttw(esr) && (esr & ESR_ELx_S1PTW));
}
int kvm_handle_guest_sea(struct kvm_vcpu *vcpu)
{
struct kvm *kvm = vcpu->kvm;
struct kvm_run *run = vcpu->run;
u64 esr = kvm_vcpu_get_esr(vcpu);
u64 esr_mask = ESR_ELx_EC_MASK |
ESR_ELx_IL |
ESR_ELx_FnV |
ESR_ELx_EA |
ESR_ELx_CM |
ESR_ELx_WNR |
ESR_ELx_FSC;
u64 ipa;
/*
* Give APEI the opportunity to claim the abort before handling it
* within KVM. apei_claim_sea() expects to be called with IRQs enabled.
@@ -1909,7 +1994,33 @@ int kvm_handle_guest_sea(struct kvm_vcpu *vcpu)
if (apei_claim_sea(NULL) == 0)
return 1;
return kvm_inject_serror(vcpu);
if (host_owns_sea(vcpu, esr) ||
!test_bit(KVM_ARCH_FLAG_EXIT_SEA, &vcpu->kvm->arch.flags))
return kvm_inject_serror(vcpu);
/* ESR_ELx.SET is RES0 when FEAT_RAS isn't implemented. */
if (kvm_has_ras(kvm))
esr_mask |= ESR_ELx_SET_MASK;
/*
* Exit to userspace, and provide faulting guest virtual and physical
* addresses in case userspace wants to emulate SEA to guest by
* writing to FAR_ELx and HPFAR_ELx registers.
*/
memset(&run->arm_sea, 0, sizeof(run->arm_sea));
run->exit_reason = KVM_EXIT_ARM_SEA;
run->arm_sea.esr = esr & esr_mask;
if (!(esr & ESR_ELx_FnV))
run->arm_sea.gva = kvm_vcpu_get_hfar(vcpu);
ipa = kvm_vcpu_get_fault_ipa(vcpu);
if (ipa != INVALID_GPA) {
run->arm_sea.flags |= KVM_EXIT_ARM_SEA_FLAG_GPA_VALID;
run->arm_sea.gpa = ipa;
}
return 0;
}
/**
@@ -1999,6 +2110,11 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
u32 esr;
ret = kvm_walk_nested_s2(vcpu, fault_ipa, &nested_trans);
if (ret == -EAGAIN) {
ret = 1;
goto out_unlock;
}
if (ret) {
esr = kvm_s2_trans_esr(&nested_trans);
kvm_inject_s2_fault(vcpu, esr);

View File

@@ -124,14 +124,13 @@ int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu)
}
struct s2_walk_info {
int (*read_desc)(phys_addr_t pa, u64 *desc, void *data);
void *data;
u64 baddr;
unsigned int max_oa_bits;
unsigned int pgshift;
unsigned int sl;
unsigned int t0sz;
bool be;
u64 baddr;
unsigned int max_oa_bits;
unsigned int pgshift;
unsigned int sl;
unsigned int t0sz;
bool be;
bool ha;
};
static u32 compute_fsc(int level, u32 fsc)
@@ -199,6 +198,42 @@ static int check_output_size(struct s2_walk_info *wi, phys_addr_t output)
return 0;
}
static int read_guest_s2_desc(struct kvm_vcpu *vcpu, phys_addr_t pa, u64 *desc,
struct s2_walk_info *wi)
{
u64 val;
int r;
r = kvm_read_guest(vcpu->kvm, pa, &val, sizeof(val));
if (r)
return r;
/*
* Handle reversedescriptors if endianness differs between the
* host and the guest hypervisor.
*/
if (wi->be)
*desc = be64_to_cpu((__force __be64)val);
else
*desc = le64_to_cpu((__force __le64)val);
return 0;
}
static int swap_guest_s2_desc(struct kvm_vcpu *vcpu, phys_addr_t pa, u64 old, u64 new,
struct s2_walk_info *wi)
{
if (wi->be) {
old = (__force u64)cpu_to_be64(old);
new = (__force u64)cpu_to_be64(new);
} else {
old = (__force u64)cpu_to_le64(old);
new = (__force u64)cpu_to_le64(new);
}
return __kvm_at_swap_desc(vcpu->kvm, pa, old, new);
}
/*
* This is essentially a C-version of the pseudo code from the ARM ARM
* AArch64.TranslationTableWalk function. I strongly recommend looking at
@@ -206,13 +241,13 @@ static int check_output_size(struct s2_walk_info *wi, phys_addr_t output)
*
* Must be called with the kvm->srcu read lock held
*/
static int walk_nested_s2_pgd(phys_addr_t ipa,
static int walk_nested_s2_pgd(struct kvm_vcpu *vcpu, phys_addr_t ipa,
struct s2_walk_info *wi, struct kvm_s2_trans *out)
{
int first_block_level, level, stride, input_size, base_lower_bound;
phys_addr_t base_addr;
unsigned int addr_top, addr_bottom;
u64 desc; /* page table entry */
u64 desc, new_desc; /* page table entry */
int ret;
phys_addr_t paddr;
@@ -257,28 +292,30 @@ static int walk_nested_s2_pgd(phys_addr_t ipa,
>> (addr_bottom - 3);
paddr = base_addr | index;
ret = wi->read_desc(paddr, &desc, wi->data);
ret = read_guest_s2_desc(vcpu, paddr, &desc, wi);
if (ret < 0)
return ret;
/*
* Handle reversedescriptors if endianness differs between the
* host and the guest hypervisor.
*/
if (wi->be)
desc = be64_to_cpu((__force __be64)desc);
else
desc = le64_to_cpu((__force __le64)desc);
new_desc = desc;
/* Check for valid descriptor at this point */
if (!(desc & 1) || ((desc & 3) == 1 && level == 3)) {
if (!(desc & KVM_PTE_VALID)) {
out->esr = compute_fsc(level, ESR_ELx_FSC_FAULT);
out->desc = desc;
return 1;
}
/* We're at the final level or block translation level */
if ((desc & 3) == 1 || level == 3)
if (FIELD_GET(KVM_PTE_TYPE, desc) == KVM_PTE_TYPE_BLOCK) {
if (level < 3)
break;
out->esr = compute_fsc(level, ESR_ELx_FSC_FAULT);
out->desc = desc;
return 1;
}
/* We're at the final level */
if (level == 3)
break;
if (check_output_size(wi, desc)) {
@@ -305,7 +342,18 @@ static int walk_nested_s2_pgd(phys_addr_t ipa,
return 1;
}
if (!(desc & BIT(10))) {
if (wi->ha)
new_desc |= KVM_PTE_LEAF_ATTR_LO_S2_AF;
if (new_desc != desc) {
ret = swap_guest_s2_desc(vcpu, paddr, desc, new_desc, wi);
if (ret)
return ret;
desc = new_desc;
}
if (!(desc & KVM_PTE_LEAF_ATTR_LO_S2_AF)) {
out->esr = compute_fsc(level, ESR_ELx_FSC_ACCESS);
out->desc = desc;
return 1;
@@ -318,20 +366,13 @@ static int walk_nested_s2_pgd(phys_addr_t ipa,
(ipa & GENMASK_ULL(addr_bottom - 1, 0));
out->output = paddr;
out->block_size = 1UL << ((3 - level) * stride + wi->pgshift);
out->readable = desc & (0b01 << 6);
out->writable = desc & (0b10 << 6);
out->readable = desc & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R;
out->writable = desc & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W;
out->level = level;
out->desc = desc;
return 0;
}
static int read_guest_s2_desc(phys_addr_t pa, u64 *desc, void *data)
{
struct kvm_vcpu *vcpu = data;
return kvm_read_guest(vcpu->kvm, pa, desc, sizeof(*desc));
}
static void vtcr_to_walk_info(u64 vtcr, struct s2_walk_info *wi)
{
wi->t0sz = vtcr & TCR_EL2_T0SZ_MASK;
@@ -350,6 +391,8 @@ static void vtcr_to_walk_info(u64 vtcr, struct s2_walk_info *wi)
/* Global limit for now, should eventually be per-VM */
wi->max_oa_bits = min(get_kvm_ipa_limit(),
ps_to_output_size(FIELD_GET(VTCR_EL2_PS_MASK, vtcr), false));
wi->ha = vtcr & VTCR_EL2_HA;
}
int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
@@ -364,15 +407,13 @@ int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
if (!vcpu_has_nv(vcpu))
return 0;
wi.read_desc = read_guest_s2_desc;
wi.data = vcpu;
wi.baddr = vcpu_read_sys_reg(vcpu, VTTBR_EL2);
vtcr_to_walk_info(vtcr, &wi);
wi.be = vcpu_read_sys_reg(vcpu, SCTLR_EL2) & SCTLR_ELx_EE;
ret = walk_nested_s2_pgd(gipa, &wi, result);
ret = walk_nested_s2_pgd(vcpu, gipa, &wi, result);
if (ret)
result->esr |= (kvm_vcpu_get_esr(vcpu) & ~ESR_ELx_FSC);
@@ -788,7 +829,10 @@ int kvm_s2_handle_perm_fault(struct kvm_vcpu *vcpu, struct kvm_s2_trans *trans)
return 0;
if (kvm_vcpu_trap_is_iabt(vcpu)) {
forward_fault = !kvm_s2_trans_executable(trans);
if (vcpu_mode_priv(vcpu))
forward_fault = !kvm_s2_trans_exec_el1(vcpu->kvm, trans);
else
forward_fault = !kvm_s2_trans_exec_el0(vcpu->kvm, trans);
} else {
bool write_fault = kvm_is_write_fault(vcpu);
@@ -1555,12 +1599,13 @@ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val)
case SYS_ID_AA64MMFR1_EL1:
val &= ~(ID_AA64MMFR1_EL1_CMOW |
ID_AA64MMFR1_EL1_nTLBPA |
ID_AA64MMFR1_EL1_ETS |
ID_AA64MMFR1_EL1_XNX |
ID_AA64MMFR1_EL1_HAFDBS);
ID_AA64MMFR1_EL1_ETS);
/* FEAT_E2H0 implies no VHE */
if (test_bit(KVM_ARM_VCPU_HAS_EL2_E2H0, kvm->arch.vcpu_features))
val &= ~ID_AA64MMFR1_EL1_VH;
val = ID_REG_LIMIT_FIELD_ENUM(val, ID_AA64MMFR1_EL1, HAFDBS, AF);
break;
case SYS_ID_AA64MMFR2_EL1:

View File

@@ -344,9 +344,16 @@ static int __pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 start, u64 e
return 0;
}
void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
void pkvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt,
u64 addr, u64 size)
{
__pkvm_pgtable_stage2_unmap(pgt, 0, ~(0ULL));
__pkvm_pgtable_stage2_unmap(pgt, addr, addr + size);
}
void pkvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt)
{
/* Expected to be called after all pKVM mappings have been released. */
WARN_ON_ONCE(!RB_EMPTY_ROOT(&pgt->pkvm_mappings.rb_root));
}
int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,

View File

@@ -31,27 +31,46 @@ static const struct ptdump_prot_bits stage2_pte_bits[] = {
.val = PTE_VALID,
.set = " ",
.clear = "F",
}, {
},
{
.mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R,
.val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R,
.set = "R",
.clear = " ",
}, {
},
{
.mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W,
.val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W,
.set = "W",
.clear = " ",
}, {
},
{
.mask = KVM_PTE_LEAF_ATTR_HI_S2_XN,
.val = KVM_PTE_LEAF_ATTR_HI_S2_XN,
.set = "NX",
.clear = "x ",
}, {
.val = 0b00UL << __bf_shf(KVM_PTE_LEAF_ATTR_HI_S2_XN),
.set = "px ux ",
},
{
.mask = KVM_PTE_LEAF_ATTR_HI_S2_XN,
.val = 0b01UL << __bf_shf(KVM_PTE_LEAF_ATTR_HI_S2_XN),
.set = "PXNux ",
},
{
.mask = KVM_PTE_LEAF_ATTR_HI_S2_XN,
.val = 0b10UL << __bf_shf(KVM_PTE_LEAF_ATTR_HI_S2_XN),
.set = "PXNUXN",
},
{
.mask = KVM_PTE_LEAF_ATTR_HI_S2_XN,
.val = 0b11UL << __bf_shf(KVM_PTE_LEAF_ATTR_HI_S2_XN),
.set = "px UXN",
},
{
.mask = KVM_PTE_LEAF_ATTR_LO_S2_AF,
.val = KVM_PTE_LEAF_ATTR_LO_S2_AF,
.set = "AF",
.clear = " ",
}, {
},
{
.mask = PMD_TYPE_MASK,
.val = PMD_TYPE_SECT,
.set = "BLK",

View File

@@ -666,6 +666,21 @@ static bool access_gic_sre(struct kvm_vcpu *vcpu,
return true;
}
static bool access_gic_dir(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
if (!kvm_has_gicv3(vcpu->kvm))
return undef_access(vcpu, p, r);
if (!p->is_write)
return undef_access(vcpu, p, r);
vgic_v3_deactivate(vcpu, p->regval);
return true;
}
static bool trap_raz_wi(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
@@ -3373,7 +3388,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_ICC_AP1R1_EL1), undef_access },
{ SYS_DESC(SYS_ICC_AP1R2_EL1), undef_access },
{ SYS_DESC(SYS_ICC_AP1R3_EL1), undef_access },
{ SYS_DESC(SYS_ICC_DIR_EL1), undef_access },
{ SYS_DESC(SYS_ICC_DIR_EL1), access_gic_dir },
{ SYS_DESC(SYS_ICC_RPR_EL1), undef_access },
{ SYS_DESC(SYS_ICC_SGI1R_EL1), access_gic_sgi },
{ SYS_DESC(SYS_ICC_ASGI1R_EL1), access_gic_sgi },
@@ -3770,7 +3785,8 @@ static bool handle_at_s1e01(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
{
u32 op = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
__kvm_at_s1e01(vcpu, op, p->regval);
if (__kvm_at_s1e01(vcpu, op, p->regval))
return false;
return true;
}
@@ -3787,7 +3803,8 @@ static bool handle_at_s1e2(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
return false;
}
__kvm_at_s1e2(vcpu, op, p->regval);
if (__kvm_at_s1e2(vcpu, op, p->regval))
return false;
return true;
}
@@ -3797,7 +3814,8 @@ static bool handle_at_s12(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
{
u32 op = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
__kvm_at_s12(vcpu, op, p->regval);
if (__kvm_at_s12(vcpu, op, p->regval))
return false;
return true;
}
@@ -4498,7 +4516,7 @@ static const struct sys_reg_desc cp15_regs[] = {
{ CP15_SYS_DESC(SYS_ICC_AP1R1_EL1), undef_access },
{ CP15_SYS_DESC(SYS_ICC_AP1R2_EL1), undef_access },
{ CP15_SYS_DESC(SYS_ICC_AP1R3_EL1), undef_access },
{ CP15_SYS_DESC(SYS_ICC_DIR_EL1), undef_access },
{ CP15_SYS_DESC(SYS_ICC_DIR_EL1), access_gic_dir },
{ CP15_SYS_DESC(SYS_ICC_RPR_EL1), undef_access },
{ CP15_SYS_DESC(SYS_ICC_IAR1_EL1), undef_access },
{ CP15_SYS_DESC(SYS_ICC_EOIR1_EL1), undef_access },

View File

@@ -198,6 +198,7 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis)
struct kvm_vcpu *vcpu0 = kvm_get_vcpu(kvm, 0);
int i;
dist->active_spis = (atomic_t)ATOMIC_INIT(0);
dist->spis = kcalloc(nr_spis, sizeof(struct vgic_irq), GFP_KERNEL_ACCOUNT);
if (!dist->spis)
return -ENOMEM;
@@ -363,12 +364,12 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
return ret;
}
static void kvm_vgic_vcpu_enable(struct kvm_vcpu *vcpu)
static void kvm_vgic_vcpu_reset(struct kvm_vcpu *vcpu)
{
if (kvm_vgic_global_state.type == VGIC_V2)
vgic_v2_enable(vcpu);
vgic_v2_reset(vcpu);
else
vgic_v3_enable(vcpu);
vgic_v3_reset(vcpu);
}
/*
@@ -415,7 +416,7 @@ int vgic_init(struct kvm *kvm)
}
kvm_for_each_vcpu(idx, vcpu, kvm)
kvm_vgic_vcpu_enable(vcpu);
kvm_vgic_vcpu_reset(vcpu);
ret = kvm_vgic_setup_default_irq_routing(kvm);
if (ret)

View File

@@ -359,6 +359,16 @@ static void vgic_mmio_write_vcpuif(struct kvm_vcpu *vcpu,
vgic_set_vmcr(vcpu, &vmcr);
}
static void vgic_mmio_write_dir(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val)
{
if (kvm_vgic_global_state.type == VGIC_V2)
vgic_v2_deactivate(vcpu, val);
else
vgic_v3_deactivate(vcpu, val);
}
static unsigned long vgic_mmio_read_apr(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len)
{
@@ -482,6 +492,10 @@ static const struct vgic_register_region vgic_v2_cpu_registers[] = {
REGISTER_DESC_WITH_LENGTH(GIC_CPU_IDENT,
vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_LENGTH_UACCESS(GIC_CPU_DEACTIVATE,
vgic_mmio_read_raz, vgic_mmio_write_dir,
vgic_mmio_read_raz, vgic_mmio_uaccess_write_wi,
4, VGIC_ACCESS_32bit),
};
unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev)
@@ -494,6 +508,16 @@ unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev)
return SZ_4K;
}
unsigned int vgic_v2_init_cpuif_iodev(struct vgic_io_device *dev)
{
dev->regions = vgic_v2_cpu_registers;
dev->nr_regions = ARRAY_SIZE(vgic_v2_cpu_registers);
kvm_iodevice_init(&dev->dev, &kvm_io_gic_ops);
return KVM_VGIC_V2_CPU_SIZE;
}
int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr)
{
const struct vgic_register_region *region;

View File

@@ -213,6 +213,7 @@ void vgic_write_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid,
const u32 val);
unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev);
unsigned int vgic_v2_init_cpuif_iodev(struct vgic_io_device *dev);
unsigned int vgic_v3_init_dist_iodev(struct vgic_io_device *dev);

View File

@@ -9,6 +9,7 @@
#include <kvm/arm_vgic.h>
#include <asm/kvm_mmu.h>
#include "vgic-mmio.h"
#include "vgic.h"
static inline void vgic_v2_write_lr(int lr, u32 val)
@@ -26,11 +27,24 @@ void vgic_v2_init_lrs(void)
vgic_v2_write_lr(i, 0);
}
void vgic_v2_set_underflow(struct kvm_vcpu *vcpu)
void vgic_v2_configure_hcr(struct kvm_vcpu *vcpu,
struct ap_list_summary *als)
{
struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2;
cpuif->vgic_hcr |= GICH_HCR_UIE;
cpuif->vgic_hcr = GICH_HCR_EN;
if (irqs_pending_outside_lrs(als))
cpuif->vgic_hcr |= GICH_HCR_NPIE;
if (irqs_active_outside_lrs(als))
cpuif->vgic_hcr |= GICH_HCR_LRENPIE;
if (irqs_outside_lrs(als))
cpuif->vgic_hcr |= GICH_HCR_UIE;
cpuif->vgic_hcr |= (cpuif->vgic_vmcr & GICH_VMCR_ENABLE_GRP0_MASK) ?
GICH_HCR_VGrp0DIE : GICH_HCR_VGrp0EIE;
cpuif->vgic_hcr |= (cpuif->vgic_vmcr & GICH_VMCR_ENABLE_GRP1_MASK) ?
GICH_HCR_VGrp1DIE : GICH_HCR_VGrp1EIE;
}
static bool lr_signals_eoi_mi(u32 lr_val)
@@ -39,43 +53,23 @@ static bool lr_signals_eoi_mi(u32 lr_val)
!(lr_val & GICH_LR_HW);
}
/*
* transfer the content of the LRs back into the corresponding ap_list:
* - active bit is transferred as is
* - pending bit is
* - transferred as is in case of edge sensitive IRQs
* - set to the line-level (resample time) for level sensitive IRQs
*/
void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
static void vgic_v2_fold_lr(struct kvm_vcpu *vcpu, u32 val)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_v2_cpu_if *cpuif = &vgic_cpu->vgic_v2;
int lr;
u32 cpuid, intid = val & GICH_LR_VIRTUALID;
struct vgic_irq *irq;
bool deactivated;
DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
/* Extract the source vCPU id from the LR */
cpuid = FIELD_GET(GICH_LR_PHYSID_CPUID, val) & 7;
cpuif->vgic_hcr &= ~GICH_HCR_UIE;
/* Notify fds when the guest EOI'ed a level-triggered SPI */
if (lr_signals_eoi_mi(val) && vgic_valid_spi(vcpu->kvm, intid))
kvm_notify_acked_irq(vcpu->kvm, 0,
intid - VGIC_NR_PRIVATE_IRQS);
for (lr = 0; lr < vgic_cpu->vgic_v2.used_lrs; lr++) {
u32 val = cpuif->vgic_lr[lr];
u32 cpuid, intid = val & GICH_LR_VIRTUALID;
struct vgic_irq *irq;
bool deactivated;
/* Extract the source vCPU id from the LR */
cpuid = val & GICH_LR_PHYSID_CPUID;
cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT;
cpuid &= 7;
/* Notify fds when the guest EOI'ed a level-triggered SPI */
if (lr_signals_eoi_mi(val) && vgic_valid_spi(vcpu->kvm, intid))
kvm_notify_acked_irq(vcpu->kvm, 0,
intid - VGIC_NR_PRIVATE_IRQS);
irq = vgic_get_vcpu_irq(vcpu, intid);
raw_spin_lock(&irq->irq_lock);
irq = vgic_get_vcpu_irq(vcpu, intid);
scoped_guard(raw_spinlock, &irq->irq_lock) {
/* Always preserve the active bit, note deactivation */
deactivated = irq->active && !(val & GICH_LR_ACTIVE_BIT);
irq->active = !!(val & GICH_LR_ACTIVE_BIT);
@@ -101,29 +95,139 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
/* Handle resampling for mapped interrupts if required */
vgic_irq_handle_resampling(irq, deactivated, val & GICH_LR_PENDING_BIT);
raw_spin_unlock(&irq->irq_lock);
vgic_put_irq(vcpu->kvm, irq);
irq->on_lr = false;
}
vgic_put_irq(vcpu->kvm, irq);
}
static u32 vgic_v2_compute_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq);
/*
* transfer the content of the LRs back into the corresponding ap_list:
* - active bit is transferred as is
* - pending bit is
* - transferred as is in case of edge sensitive IRQs
* - set to the line-level (resample time) for level sensitive IRQs
*/
void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_v2_cpu_if *cpuif = &vgic_cpu->vgic_v2;
u32 eoicount = FIELD_GET(GICH_HCR_EOICOUNT, cpuif->vgic_hcr);
struct vgic_irq *irq;
DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
for (int lr = 0; lr < vgic_cpu->vgic_v2.used_lrs; lr++)
vgic_v2_fold_lr(vcpu, cpuif->vgic_lr[lr]);
/* See the GICv3 equivalent for the EOIcount handling rationale */
list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
u32 lr;
if (!eoicount) {
break;
} else {
guard(raw_spinlock)(&irq->irq_lock);
if (!(likely(vgic_target_oracle(irq) == vcpu) &&
irq->active))
continue;
lr = vgic_v2_compute_lr(vcpu, irq) & ~GICH_LR_ACTIVE_BIT;
}
if (lr & GICH_LR_HW)
writel_relaxed(FIELD_GET(GICH_LR_PHYSID_CPUID, lr),
kvm_vgic_global_state.gicc_base + GIC_CPU_DEACTIVATE);
vgic_v2_fold_lr(vcpu, lr);
eoicount--;
}
cpuif->used_lrs = 0;
}
/*
* Populates the particular LR with the state of a given IRQ:
* - for an edge sensitive IRQ the pending state is cleared in struct vgic_irq
* - for a level sensitive IRQ the pending state value is unchanged;
* it is dictated directly by the input level
*
* If @irq describes an SGI with multiple sources, we choose the
* lowest-numbered source VCPU and clear that bit in the source bitmap.
*
* The irq_lock must be held by the caller.
*/
void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
void vgic_v2_deactivate(struct kvm_vcpu *vcpu, u32 val)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_v2_cpu_if *cpuif = &vgic_cpu->vgic_v2;
struct kvm_vcpu *target_vcpu = NULL;
bool mmio = false;
struct vgic_irq *irq;
unsigned long flags;
u64 lr = 0;
u8 cpuid;
/* Snapshot CPUID, and remove it from the INTID */
cpuid = FIELD_GET(GENMASK_ULL(12, 10), val);
val &= ~GENMASK_ULL(12, 10);
/* We only deal with DIR when EOIMode==1 */
if (!(cpuif->vgic_vmcr & GICH_VMCR_EOI_MODE_MASK))
return;
/* Make sure we're in the same context as LR handling */
local_irq_save(flags);
irq = vgic_get_vcpu_irq(vcpu, val);
if (WARN_ON_ONCE(!irq))
goto out;
/* See the corresponding v3 code for the rationale */
scoped_guard(raw_spinlock, &irq->irq_lock) {
target_vcpu = irq->vcpu;
/* Not on any ap_list? */
if (!target_vcpu)
goto put;
/*
* Urgh. We're deactivating something that we cannot
* observe yet... Big hammer time.
*/
if (irq->on_lr) {
mmio = true;
goto put;
}
/* SGI: check that the cpuid matches */
if (val < VGIC_NR_SGIS && irq->active_source != cpuid) {
target_vcpu = NULL;
goto put;
}
/* (with a Dalek voice) DEACTIVATE!!!! */
lr = vgic_v2_compute_lr(vcpu, irq) & ~GICH_LR_ACTIVE_BIT;
}
if (lr & GICH_LR_HW)
writel_relaxed(FIELD_GET(GICH_LR_PHYSID_CPUID, lr),
kvm_vgic_global_state.gicc_base + GIC_CPU_DEACTIVATE);
vgic_v2_fold_lr(vcpu, lr);
put:
vgic_put_irq(vcpu->kvm, irq);
out:
local_irq_restore(flags);
if (mmio)
vgic_mmio_write_cactive(vcpu, (val / 32) * 4, 4, BIT(val % 32));
/* Force the ap_list to be pruned */
if (target_vcpu)
kvm_make_request(KVM_REQ_VGIC_PROCESS_UPDATE, target_vcpu);
}
static u32 vgic_v2_compute_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq)
{
u32 val = irq->intid;
bool allow_pending = true;
WARN_ON(irq->on_lr);
if (irq->active) {
val |= GICH_LR_ACTIVE_BIT;
if (vgic_irq_is_sgi(irq->intid))
@@ -163,22 +267,52 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
if (allow_pending && irq_is_pending(irq)) {
val |= GICH_LR_PENDING_BIT;
if (vgic_irq_is_sgi(irq->intid)) {
u32 src = ffs(irq->source);
if (WARN_RATELIMIT(!src, "No SGI source for INTID %d\n",
irq->intid))
return 0;
val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT;
if (irq->source & ~BIT(src - 1))
val |= GICH_LR_EOI;
}
}
/* The GICv2 LR only holds five bits of priority. */
val |= (irq->priority >> 3) << GICH_LR_PRIORITY_SHIFT;
return val;
}
/*
* Populates the particular LR with the state of a given IRQ:
* - for an edge sensitive IRQ the pending state is cleared in struct vgic_irq
* - for a level sensitive IRQ the pending state value is unchanged;
* it is dictated directly by the input level
*
* If @irq describes an SGI with multiple sources, we choose the
* lowest-numbered source VCPU and clear that bit in the source bitmap.
*
* The irq_lock must be held by the caller.
*/
void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
{
u32 val = vgic_v2_compute_lr(vcpu, irq);
vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = val;
if (val & GICH_LR_PENDING_BIT) {
if (irq->config == VGIC_CONFIG_EDGE)
irq->pending_latch = false;
if (vgic_irq_is_sgi(irq->intid)) {
u32 src = ffs(irq->source);
if (WARN_RATELIMIT(!src, "No SGI source for INTID %d\n",
irq->intid))
return;
val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT;
irq->source &= ~(1 << (src - 1));
if (irq->source) {
irq->source &= ~BIT(src - 1);
if (irq->source)
irq->pending_latch = true;
val |= GICH_LR_EOI;
}
}
}
@@ -194,7 +328,7 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
/* The GICv2 LR only holds five bits of priority. */
val |= (irq->priority >> 3) << GICH_LR_PRIORITY_SHIFT;
vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = val;
irq->on_lr = true;
}
void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr)
@@ -257,7 +391,7 @@ void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
GICH_VMCR_PRIMASK_SHIFT) << GICV_PMR_PRIORITY_SHIFT;
}
void vgic_v2_enable(struct kvm_vcpu *vcpu)
void vgic_v2_reset(struct kvm_vcpu *vcpu)
{
/*
* By forcing VMCR to zero, the GIC will restore the binary
@@ -265,9 +399,6 @@ void vgic_v2_enable(struct kvm_vcpu *vcpu)
* anyway.
*/
vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0;
/* Get the show on the road... */
vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN;
}
/* check for overlapping regions and for regions crossing the end of memory */
@@ -289,6 +420,7 @@ static bool vgic_v2_check_base(gpa_t dist_base, gpa_t cpu_base)
int vgic_v2_map_resources(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
unsigned int len;
int ret = 0;
if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) ||
@@ -312,10 +444,20 @@ int vgic_v2_map_resources(struct kvm *kvm)
return ret;
}
len = vgic_v2_init_cpuif_iodev(&dist->cpuif_iodev);
dist->cpuif_iodev.base_addr = dist->vgic_cpu_base;
dist->cpuif_iodev.iodev_type = IODEV_CPUIF;
dist->cpuif_iodev.redist_vcpu = NULL;
ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, dist->vgic_cpu_base,
len, &dist->cpuif_iodev.dev);
if (ret)
return ret;
if (!static_branch_unlikely(&vgic_v2_cpuif_trap)) {
ret = kvm_phys_addr_ioremap(kvm, dist->vgic_cpu_base,
kvm_vgic_global_state.vcpu_base,
KVM_VGIC_V2_CPU_SIZE, true);
KVM_VGIC_V2_CPU_SIZE - SZ_4K, true);
if (ret) {
kvm_err("Unable to remap VGIC CPU to VCPU\n");
return ret;
@@ -385,6 +527,7 @@ int vgic_v2_probe(const struct gic_kvm_info *info)
kvm_vgic_global_state.can_emulate_gicv2 = true;
kvm_vgic_global_state.vcpu_base = info->vcpu.start;
kvm_vgic_global_state.gicc_base = info->gicc_base;
kvm_vgic_global_state.type = VGIC_V2;
kvm_vgic_global_state.max_gic_vcpus = VGIC_V2_MAX_CPUS;
@@ -423,16 +566,26 @@ static void save_lrs(struct kvm_vcpu *vcpu, void __iomem *base)
void vgic_v2_save_state(struct kvm_vcpu *vcpu)
{
struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
void __iomem *base = kvm_vgic_global_state.vctrl_base;
u64 used_lrs = vcpu->arch.vgic_cpu.vgic_v2.used_lrs;
if (!base)
return;
if (used_lrs) {
cpu_if->vgic_vmcr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VMCR);
if (used_lrs)
save_lrs(vcpu, base);
writel_relaxed(0, base + GICH_HCR);
if (cpu_if->vgic_hcr & GICH_HCR_LRENPIE) {
u32 val = readl_relaxed(base + GICH_HCR);
cpu_if->vgic_hcr &= ~GICH_HCR_EOICOUNT;
cpu_if->vgic_hcr |= val & GICH_HCR_EOICOUNT;
}
writel_relaxed(0, base + GICH_HCR);
}
void vgic_v2_restore_state(struct kvm_vcpu *vcpu)
@@ -445,13 +598,10 @@ void vgic_v2_restore_state(struct kvm_vcpu *vcpu)
if (!base)
return;
if (used_lrs) {
writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR);
for (i = 0; i < used_lrs; i++) {
writel_relaxed(cpu_if->vgic_lr[i],
base + GICH_LR0 + (i * 4));
}
}
writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR);
for (i = 0; i < used_lrs; i++)
writel_relaxed(cpu_if->vgic_lr[i], base + GICH_LR0 + (i * 4));
}
void vgic_v2_load(struct kvm_vcpu *vcpu)
@@ -468,6 +618,5 @@ void vgic_v2_put(struct kvm_vcpu *vcpu)
{
struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
cpu_if->vgic_vmcr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VMCR);
cpu_if->vgic_apr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_APR);
}

View File

@@ -70,13 +70,14 @@ static int lr_map_idx_to_shadow_idx(struct shadow_if *shadow_if, int idx)
* - on L2 put: perform the inverse transformation, so that the result of L2
* running becomes visible to L1 in the VNCR-accessible registers.
*
* - there is nothing to do on L2 entry, as everything will have happened
* on load. However, this is the point where we detect that an interrupt
* targeting L1 and prepare the grand switcheroo.
* - there is nothing to do on L2 entry apart from enabling the vgic, as
* everything will have happened on load. However, this is the point where
* we detect that an interrupt targeting L1 and prepare the grand
* switcheroo.
*
* - on L2 exit: emulate the HW bit, and deactivate corresponding the L1
* interrupt. The L0 active state will be cleared by the HW if the L1
* interrupt was itself backed by a HW interrupt.
* - on L2 exit: resync the LRs and VMCR, emulate the HW bit, and deactivate
* corresponding the L1 interrupt. The L0 active state will be cleared by
* the HW if the L1 interrupt was itself backed by a HW interrupt.
*
* Maintenance Interrupt (MI) management:
*
@@ -93,8 +94,10 @@ static int lr_map_idx_to_shadow_idx(struct shadow_if *shadow_if, int idx)
*
* - because most of the ICH_*_EL2 registers live in the VNCR page, the
* quality of emulation is poor: L1 can setup the vgic so that an MI would
* immediately fire, and not observe anything until the next exit. Trying
* to read ICH_MISR_EL2 would do the trick, for example.
* immediately fire, and not observe anything until the next exit.
* Similarly, a pending MI is not immediately disabled by clearing
* ICH_HCR_EL2.En. Trying to read ICH_MISR_EL2 would do the trick, for
* example.
*
* System register emulation:
*
@@ -265,16 +268,37 @@ static void vgic_v3_create_shadow_lr(struct kvm_vcpu *vcpu,
s_cpu_if->used_lrs = hweight16(shadow_if->lr_map);
}
void vgic_v3_flush_nested(struct kvm_vcpu *vcpu)
{
u64 val = __vcpu_sys_reg(vcpu, ICH_HCR_EL2);
write_sysreg_s(val | vgic_ich_hcr_trap_bits(), SYS_ICH_HCR_EL2);
}
void vgic_v3_sync_nested(struct kvm_vcpu *vcpu)
{
struct shadow_if *shadow_if = get_shadow_if();
int i;
for_each_set_bit(i, &shadow_if->lr_map, kvm_vgic_global_state.nr_lr) {
u64 lr = __vcpu_sys_reg(vcpu, ICH_LRN(i));
struct vgic_irq *irq;
u64 val, host_lr, lr;
if (!(lr & ICH_LR_HW) || !(lr & ICH_LR_STATE))
host_lr = __gic_v3_get_lr(lr_map_idx_to_shadow_idx(shadow_if, i));
/* Propagate the new LR state */
lr = __vcpu_sys_reg(vcpu, ICH_LRN(i));
val = lr & ~ICH_LR_STATE;
val |= host_lr & ICH_LR_STATE;
__vcpu_assign_sys_reg(vcpu, ICH_LRN(i), val);
/*
* Deactivation of a HW interrupt: the LR must have the HW
* bit set, have been in a non-invalid state before the run,
* and now be in an invalid state. If any of that doesn't
* hold, we're done with this LR.
*/
if (!((lr & ICH_LR_HW) && (lr & ICH_LR_STATE) &&
!(host_lr & ICH_LR_STATE)))
continue;
/*
@@ -282,35 +306,27 @@ void vgic_v3_sync_nested(struct kvm_vcpu *vcpu)
* need to emulate the HW effect between the guest hypervisor
* and the nested guest.
*/
irq = vgic_get_vcpu_irq(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr));
if (WARN_ON(!irq)) /* Shouldn't happen as we check on load */
continue;
lr = __gic_v3_get_lr(lr_map_idx_to_shadow_idx(shadow_if, i));
if (!(lr & ICH_LR_STATE))
irq->active = false;
vgic_put_irq(vcpu->kvm, irq);
vgic_v3_deactivate(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr));
}
/* We need these to be synchronised to generate the MI */
__vcpu_assign_sys_reg(vcpu, ICH_VMCR_EL2, read_sysreg_s(SYS_ICH_VMCR_EL2));
__vcpu_rmw_sys_reg(vcpu, ICH_HCR_EL2, &=, ~ICH_HCR_EL2_EOIcount);
__vcpu_rmw_sys_reg(vcpu, ICH_HCR_EL2, |=, read_sysreg_s(SYS_ICH_HCR_EL2) & ICH_HCR_EL2_EOIcount);
write_sysreg_s(0, SYS_ICH_HCR_EL2);
isb();
vgic_v3_nested_update_mi(vcpu);
}
static void vgic_v3_create_shadow_state(struct kvm_vcpu *vcpu,
struct vgic_v3_cpu_if *s_cpu_if)
{
struct vgic_v3_cpu_if *host_if = &vcpu->arch.vgic_cpu.vgic_v3;
u64 val = 0;
int i;
/*
* If we're on a system with a broken vgic that requires
* trapping, propagate the trapping requirements.
*
* Ah, the smell of rotten fruits...
*/
if (static_branch_unlikely(&vgic_v3_cpuif_trap))
val = host_if->vgic_hcr & (ICH_HCR_EL2_TALL0 | ICH_HCR_EL2_TALL1 |
ICH_HCR_EL2_TC | ICH_HCR_EL2_TDIR);
s_cpu_if->vgic_hcr = __vcpu_sys_reg(vcpu, ICH_HCR_EL2) | val;
s_cpu_if->vgic_hcr = __vcpu_sys_reg(vcpu, ICH_HCR_EL2);
s_cpu_if->vgic_vmcr = __vcpu_sys_reg(vcpu, ICH_VMCR_EL2);
s_cpu_if->vgic_sre = host_if->vgic_sre;
@@ -334,7 +350,8 @@ void vgic_v3_load_nested(struct kvm_vcpu *vcpu)
__vgic_v3_restore_vmcr_aprs(cpu_if);
__vgic_v3_activate_traps(cpu_if);
__vgic_v3_restore_state(cpu_if);
for (int i = 0; i < cpu_if->used_lrs; i++)
__gic_v3_set_lr(cpu_if->vgic_lr[i], i);
/*
* Propagate the number of used LRs for the benefit of the HYP
@@ -347,36 +364,19 @@ void vgic_v3_put_nested(struct kvm_vcpu *vcpu)
{
struct shadow_if *shadow_if = get_shadow_if();
struct vgic_v3_cpu_if *s_cpu_if = &shadow_if->cpuif;
u64 val;
int i;
__vgic_v3_save_vmcr_aprs(s_cpu_if);
__vgic_v3_deactivate_traps(s_cpu_if);
__vgic_v3_save_state(s_cpu_if);
/*
* Translate the shadow state HW fields back to the virtual ones
* before copying the shadow struct back to the nested one.
*/
val = __vcpu_sys_reg(vcpu, ICH_HCR_EL2);
val &= ~ICH_HCR_EL2_EOIcount_MASK;
val |= (s_cpu_if->vgic_hcr & ICH_HCR_EL2_EOIcount_MASK);
__vcpu_assign_sys_reg(vcpu, ICH_HCR_EL2, val);
__vcpu_assign_sys_reg(vcpu, ICH_VMCR_EL2, s_cpu_if->vgic_vmcr);
__vgic_v3_save_aprs(s_cpu_if);
for (i = 0; i < 4; i++) {
__vcpu_assign_sys_reg(vcpu, ICH_AP0RN(i), s_cpu_if->vgic_ap0r[i]);
__vcpu_assign_sys_reg(vcpu, ICH_AP1RN(i), s_cpu_if->vgic_ap1r[i]);
}
for_each_set_bit(i, &shadow_if->lr_map, kvm_vgic_global_state.nr_lr) {
val = __vcpu_sys_reg(vcpu, ICH_LRN(i));
for (i = 0; i < s_cpu_if->used_lrs; i++)
__gic_v3_set_lr(0, i);
val &= ~ICH_LR_STATE;
val |= s_cpu_if->vgic_lr[lr_map_idx_to_shadow_idx(shadow_if, i)] & ICH_LR_STATE;
__vcpu_assign_sys_reg(vcpu, ICH_LRN(i), val);
}
__vgic_v3_deactivate_traps(s_cpu_if);
vcpu->arch.vgic_cpu.vgic_v3.used_lrs = 0;
}

View File

@@ -12,6 +12,7 @@
#include <asm/kvm_mmu.h>
#include <asm/kvm_asm.h>
#include "vgic-mmio.h"
#include "vgic.h"
static bool group0_trap;
@@ -20,11 +21,48 @@ static bool common_trap;
static bool dir_trap;
static bool gicv4_enable;
void vgic_v3_set_underflow(struct kvm_vcpu *vcpu)
void vgic_v3_configure_hcr(struct kvm_vcpu *vcpu,
struct ap_list_summary *als)
{
struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3;
cpuif->vgic_hcr |= ICH_HCR_EL2_UIE;
if (!irqchip_in_kernel(vcpu->kvm))
return;
cpuif->vgic_hcr = ICH_HCR_EL2_En;
if (irqs_pending_outside_lrs(als))
cpuif->vgic_hcr |= ICH_HCR_EL2_NPIE;
if (irqs_active_outside_lrs(als))
cpuif->vgic_hcr |= ICH_HCR_EL2_LRENPIE;
if (irqs_outside_lrs(als))
cpuif->vgic_hcr |= ICH_HCR_EL2_UIE;
if (!als->nr_sgi)
cpuif->vgic_hcr |= ICH_HCR_EL2_vSGIEOICount;
cpuif->vgic_hcr |= (cpuif->vgic_vmcr & ICH_VMCR_ENG0_MASK) ?
ICH_HCR_EL2_VGrp0DIE : ICH_HCR_EL2_VGrp0EIE;
cpuif->vgic_hcr |= (cpuif->vgic_vmcr & ICH_VMCR_ENG1_MASK) ?
ICH_HCR_EL2_VGrp1DIE : ICH_HCR_EL2_VGrp1EIE;
/*
* Dealing with EOImode=1 is a massive source of headache. Not
* only do we need to track that we have active interrupts
* outside of the LRs and force DIR to be trapped, we also
* need to deal with SPIs that can be deactivated on another
* CPU.
*
* On systems that do not implement TDIR, force the bit in the
* shadow state anyway to avoid IPI-ing on these poor sods.
*
* Note that we set the trap irrespective of EOIMode, as that
* can change behind our back without any warning...
*/
if (!cpus_have_final_cap(ARM64_HAS_ICH_HCR_EL2_TDIR) ||
irqs_active_outside_lrs(als) ||
atomic_read(&vcpu->kvm->arch.vgic.active_spis))
cpuif->vgic_hcr |= ICH_HCR_EL2_TDIR;
}
static bool lr_signals_eoi_mi(u64 lr_val)
@@ -33,84 +71,238 @@ static bool lr_signals_eoi_mi(u64 lr_val)
!(lr_val & ICH_LR_HW);
}
void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
static void vgic_v3_fold_lr(struct kvm_vcpu *vcpu, u64 val)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_v3_cpu_if *cpuif = &vgic_cpu->vgic_v3;
u32 model = vcpu->kvm->arch.vgic.vgic_model;
int lr;
struct vgic_irq *irq;
bool is_v2_sgi = false;
bool deactivated;
u32 intid;
DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
intid = val & ICH_LR_VIRTUAL_ID_MASK;
} else {
intid = val & GICH_LR_VIRTUALID;
is_v2_sgi = vgic_irq_is_sgi(intid);
}
cpuif->vgic_hcr &= ~ICH_HCR_EL2_UIE;
irq = vgic_get_vcpu_irq(vcpu, intid);
if (!irq) /* An LPI could have been unmapped. */
return;
for (lr = 0; lr < cpuif->used_lrs; lr++) {
u64 val = cpuif->vgic_lr[lr];
u32 intid, cpuid;
struct vgic_irq *irq;
bool is_v2_sgi = false;
bool deactivated;
cpuid = val & GICH_LR_PHYSID_CPUID;
cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT;
if (model == KVM_DEV_TYPE_ARM_VGIC_V3) {
intid = val & ICH_LR_VIRTUAL_ID_MASK;
} else {
intid = val & GICH_LR_VIRTUALID;
is_v2_sgi = vgic_irq_is_sgi(intid);
}
/* Notify fds when the guest EOI'ed a level-triggered IRQ */
if (lr_signals_eoi_mi(val) && vgic_valid_spi(vcpu->kvm, intid))
kvm_notify_acked_irq(vcpu->kvm, 0,
intid - VGIC_NR_PRIVATE_IRQS);
irq = vgic_get_vcpu_irq(vcpu, intid);
if (!irq) /* An LPI could have been unmapped. */
continue;
raw_spin_lock(&irq->irq_lock);
/* Always preserve the active bit, note deactivation */
scoped_guard(raw_spinlock, &irq->irq_lock) {
/* Always preserve the active bit for !LPIs, note deactivation */
if (irq->intid >= VGIC_MIN_LPI)
val &= ~ICH_LR_ACTIVE_BIT;
deactivated = irq->active && !(val & ICH_LR_ACTIVE_BIT);
irq->active = !!(val & ICH_LR_ACTIVE_BIT);
if (irq->active && is_v2_sgi)
irq->active_source = cpuid;
/* Edge is the only case where we preserve the pending bit */
if (irq->config == VGIC_CONFIG_EDGE &&
(val & ICH_LR_PENDING_BIT)) {
(val & ICH_LR_PENDING_BIT))
irq->pending_latch = true;
if (is_v2_sgi)
irq->source |= (1 << cpuid);
}
/*
* Clear soft pending state when level irqs have been acked.
*/
if (irq->config == VGIC_CONFIG_LEVEL && !(val & ICH_LR_STATE))
irq->pending_latch = false;
if (is_v2_sgi) {
u8 cpuid = FIELD_GET(GICH_LR_PHYSID_CPUID, val);
if (irq->active)
irq->active_source = cpuid;
if (val & ICH_LR_PENDING_BIT)
irq->source |= BIT(cpuid);
}
/* Handle resampling for mapped interrupts if required */
vgic_irq_handle_resampling(irq, deactivated, val & ICH_LR_PENDING_BIT);
raw_spin_unlock(&irq->irq_lock);
vgic_put_irq(vcpu->kvm, irq);
irq->on_lr = false;
}
/* Notify fds when the guest EOI'ed a level-triggered SPI, and drop the refcount */
if (deactivated && lr_signals_eoi_mi(val) && vgic_valid_spi(vcpu->kvm, intid)) {
kvm_notify_acked_irq(vcpu->kvm, 0,
intid - VGIC_NR_PRIVATE_IRQS);
atomic_dec_if_positive(&vcpu->kvm->arch.vgic.active_spis);
}
vgic_put_irq(vcpu->kvm, irq);
}
static u64 vgic_v3_compute_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq);
static void vgic_v3_deactivate_phys(u32 intid)
{
if (cpus_have_final_cap(ARM64_HAS_GICV5_LEGACY))
gic_insn(intid | FIELD_PREP(GICV5_GIC_CDDI_TYPE_MASK, 1), CDDI);
else
gic_write_dir(intid);
}
void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_v3_cpu_if *cpuif = &vgic_cpu->vgic_v3;
u32 eoicount = FIELD_GET(ICH_HCR_EL2_EOIcount, cpuif->vgic_hcr);
struct vgic_irq *irq;
DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
for (int lr = 0; lr < cpuif->used_lrs; lr++)
vgic_v3_fold_lr(vcpu, cpuif->vgic_lr[lr]);
/*
* EOIMode=0: use EOIcount to emulate deactivation. We are
* guaranteed to deactivate in reverse order of the activation, so
* just pick one active interrupt after the other in the ap_list,
* and replay the deactivation as if the CPU was doing it. We also
* rely on priority drop to have taken place, and the list to be
* sorted by priority.
*/
list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
u64 lr;
/*
* I would have loved to write this using a scoped_guard(),
* but using 'continue' here is a total train wreck.
*/
if (!eoicount) {
break;
} else {
guard(raw_spinlock)(&irq->irq_lock);
if (!(likely(vgic_target_oracle(irq) == vcpu) &&
irq->active))
continue;
lr = vgic_v3_compute_lr(vcpu, irq) & ~ICH_LR_ACTIVE_BIT;
}
if (lr & ICH_LR_HW)
vgic_v3_deactivate_phys(FIELD_GET(ICH_LR_PHYS_ID_MASK, lr));
vgic_v3_fold_lr(vcpu, lr);
eoicount--;
}
cpuif->used_lrs = 0;
}
void vgic_v3_deactivate(struct kvm_vcpu *vcpu, u64 val)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_v3_cpu_if *cpuif = &vgic_cpu->vgic_v3;
u32 model = vcpu->kvm->arch.vgic.vgic_model;
struct kvm_vcpu *target_vcpu = NULL;
bool mmio = false, is_v2_sgi;
struct vgic_irq *irq;
unsigned long flags;
u64 lr = 0;
u8 cpuid;
/* Snapshot CPUID, and remove it from the INTID */
cpuid = FIELD_GET(GENMASK_ULL(12, 10), val);
val &= ~GENMASK_ULL(12, 10);
is_v2_sgi = (model == KVM_DEV_TYPE_ARM_VGIC_V2 &&
val < VGIC_NR_SGIS);
/*
* We only deal with DIR when EOIMode==1, and only for SGI,
* PPI or SPI.
*/
if (!(cpuif->vgic_vmcr & ICH_VMCR_EOIM_MASK) ||
val >= vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS)
return;
/* Make sure we're in the same context as LR handling */
local_irq_save(flags);
irq = vgic_get_vcpu_irq(vcpu, val);
if (WARN_ON_ONCE(!irq))
goto out;
/*
* EOIMode=1: we must rely on traps to handle deactivate of
* overflowing interrupts, as there is no ordering guarantee and
* EOIcount isn't being incremented. Priority drop will have taken
* place, as ICV_EOIxR_EL1 only affects the APRs and not the LRs.
*
* Three possibities:
*
* - The irq is not queued on any CPU, and there is nothing to
* do,
*
* - Or the irq is in an LR, meaning that its state is not
* directly observable. Treat it bluntly by making it as if
* this was a write to GICD_ICACTIVER, which will force an
* exit on all vcpus. If it hurts, don't do that.
*
* - Or the irq is active, but not in an LR, and we can
* directly deactivate it by building a pseudo-LR, fold it,
* and queue a request to prune the resulting ap_list,
*
* Special care must be taken to match the source CPUID when
* deactivating a GICv2 SGI.
*/
scoped_guard(raw_spinlock, &irq->irq_lock) {
target_vcpu = irq->vcpu;
/* Not on any ap_list? */
if (!target_vcpu)
goto put;
/*
* Urgh. We're deactivating something that we cannot
* observe yet... Big hammer time.
*/
if (irq->on_lr) {
mmio = true;
goto put;
}
/* GICv2 SGI: check that the cpuid matches */
if (is_v2_sgi && irq->active_source != cpuid) {
target_vcpu = NULL;
goto put;
}
/* (with a Dalek voice) DEACTIVATE!!!! */
lr = vgic_v3_compute_lr(vcpu, irq) & ~ICH_LR_ACTIVE_BIT;
}
if (lr & ICH_LR_HW)
vgic_v3_deactivate_phys(FIELD_GET(ICH_LR_PHYS_ID_MASK, lr));
vgic_v3_fold_lr(vcpu, lr);
put:
vgic_put_irq(vcpu->kvm, irq);
out:
local_irq_restore(flags);
if (mmio)
vgic_mmio_write_cactive(vcpu, (val / 32) * 4, 4, BIT(val % 32));
/* Force the ap_list to be pruned */
if (target_vcpu)
kvm_make_request(KVM_REQ_VGIC_PROCESS_UPDATE, target_vcpu);
}
/* Requires the irq to be locked already */
void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
static u64 vgic_v3_compute_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq)
{
u32 model = vcpu->kvm->arch.vgic.vgic_model;
u64 val = irq->intid;
bool allow_pending = true, is_v2_sgi;
WARN_ON(irq->on_lr);
is_v2_sgi = (vgic_irq_is_sgi(irq->intid) &&
model == KVM_DEV_TYPE_ARM_VGIC_V2);
@@ -150,6 +342,35 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
if (allow_pending && irq_is_pending(irq)) {
val |= ICH_LR_PENDING_BIT;
if (is_v2_sgi) {
u32 src = ffs(irq->source);
if (WARN_RATELIMIT(!src, "No SGI source for INTID %d\n",
irq->intid))
return 0;
val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT;
if (irq->source & ~BIT(src - 1))
val |= ICH_LR_EOI;
}
}
if (irq->group)
val |= ICH_LR_GROUP;
val |= (u64)irq->priority << ICH_LR_PRIORITY_SHIFT;
return val;
}
void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
{
u32 model = vcpu->kvm->arch.vgic.vgic_model;
u64 val = vgic_v3_compute_lr(vcpu, irq);
vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr] = val;
if (val & ICH_LR_PENDING_BIT) {
if (irq->config == VGIC_CONFIG_EDGE)
irq->pending_latch = false;
@@ -157,16 +378,9 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
model == KVM_DEV_TYPE_ARM_VGIC_V2) {
u32 src = ffs(irq->source);
if (WARN_RATELIMIT(!src, "No SGI source for INTID %d\n",
irq->intid))
return;
val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT;
irq->source &= ~(1 << (src - 1));
if (irq->source) {
irq->source &= ~BIT(src - 1);
if (irq->source)
irq->pending_latch = true;
val |= ICH_LR_EOI;
}
}
}
@@ -179,12 +393,7 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
if (vgic_irq_is_mapped_level(irq) && (val & ICH_LR_PENDING_BIT))
irq->line_level = false;
if (irq->group)
val |= ICH_LR_GROUP;
val |= (u64)irq->priority << ICH_LR_PRIORITY_SHIFT;
vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr] = val;
irq->on_lr = true;
}
void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr)
@@ -258,7 +467,7 @@ void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
GIC_BASER_CACHEABILITY(GICR_PENDBASER, OUTER, SameAsInner) | \
GIC_BASER_SHAREABILITY(GICR_PENDBASER, InnerShareable))
void vgic_v3_enable(struct kvm_vcpu *vcpu)
void vgic_v3_reset(struct kvm_vcpu *vcpu)
{
struct vgic_v3_cpu_if *vgic_v3 = &vcpu->arch.vgic_cpu.vgic_v3;
@@ -288,9 +497,6 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu)
kvm_vgic_global_state.ich_vtr_el2);
vcpu->arch.vgic_cpu.num_pri_bits = FIELD_GET(ICH_VTR_EL2_PRIbits,
kvm_vgic_global_state.ich_vtr_el2) + 1;
/* Get the show on the road... */
vgic_v3->vgic_hcr = ICH_HCR_EL2_En;
}
void vcpu_set_ich_hcr(struct kvm_vcpu *vcpu)
@@ -302,20 +508,9 @@ void vcpu_set_ich_hcr(struct kvm_vcpu *vcpu)
/* Hide GICv3 sysreg if necessary */
if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2 ||
!irqchip_in_kernel(vcpu->kvm)) {
!irqchip_in_kernel(vcpu->kvm))
vgic_v3->vgic_hcr |= (ICH_HCR_EL2_TALL0 | ICH_HCR_EL2_TALL1 |
ICH_HCR_EL2_TC);
return;
}
if (group0_trap)
vgic_v3->vgic_hcr |= ICH_HCR_EL2_TALL0;
if (group1_trap)
vgic_v3->vgic_hcr |= ICH_HCR_EL2_TALL1;
if (common_trap)
vgic_v3->vgic_hcr |= ICH_HCR_EL2_TC;
if (dir_trap)
vgic_v3->vgic_hcr |= ICH_HCR_EL2_TDIR;
}
int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq)
@@ -636,8 +831,53 @@ static const struct midr_range broken_seis[] = {
static bool vgic_v3_broken_seis(void)
{
return ((kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_EL2_SEIS) &&
is_midr_in_range_list(broken_seis));
return (is_kernel_in_hyp_mode() &&
is_midr_in_range_list(broken_seis) &&
(read_sysreg_s(SYS_ICH_VTR_EL2) & ICH_VTR_EL2_SEIS));
}
void noinstr kvm_compute_ich_hcr_trap_bits(struct alt_instr *alt,
__le32 *origptr, __le32 *updptr,
int nr_inst)
{
u32 insn, oinsn, rd;
u64 hcr = 0;
if (cpus_have_cap(ARM64_WORKAROUND_CAVIUM_30115)) {
group0_trap = true;
group1_trap = true;
}
if (vgic_v3_broken_seis()) {
/* We know that these machines have ICH_HCR_EL2.TDIR */
group0_trap = true;
group1_trap = true;
dir_trap = true;
}
if (!cpus_have_cap(ARM64_HAS_ICH_HCR_EL2_TDIR))
common_trap = true;
if (group0_trap)
hcr |= ICH_HCR_EL2_TALL0;
if (group1_trap)
hcr |= ICH_HCR_EL2_TALL1;
if (common_trap)
hcr |= ICH_HCR_EL2_TC;
if (dir_trap)
hcr |= ICH_HCR_EL2_TDIR;
/* Compute target register */
oinsn = le32_to_cpu(*origptr);
rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, oinsn);
/* movz rd, #(val & 0xffff) */
insn = aarch64_insn_gen_movewide(rd,
(u16)hcr,
0,
AARCH64_INSN_VARIANT_64BIT,
AARCH64_INSN_MOVEWIDE_ZERO);
*updptr = cpu_to_le32(insn);
}
/**
@@ -651,6 +891,7 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
{
u64 ich_vtr_el2 = kvm_call_hyp_ret(__vgic_v3_get_gic_config);
bool has_v2;
u64 traps;
int ret;
has_v2 = ich_vtr_el2 >> 63;
@@ -709,29 +950,18 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
if (has_v2)
static_branch_enable(&vgic_v3_has_v2_compat);
if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_30115)) {
group0_trap = true;
group1_trap = true;
}
if (vgic_v3_broken_seis()) {
kvm_info("GICv3 with broken locally generated SEI\n");
kvm_vgic_global_state.ich_vtr_el2 &= ~ICH_VTR_EL2_SEIS;
group0_trap = true;
group1_trap = true;
if (ich_vtr_el2 & ICH_VTR_EL2_TDS)
dir_trap = true;
else
common_trap = true;
}
if (group0_trap || group1_trap || common_trap | dir_trap) {
traps = vgic_ich_hcr_trap_bits();
if (traps) {
kvm_info("GICv3 sysreg trapping enabled ([%s%s%s%s], reduced performance)\n",
group0_trap ? "G0" : "",
group1_trap ? "G1" : "",
common_trap ? "C" : "",
dir_trap ? "D" : "");
(traps & ICH_HCR_EL2_TALL0) ? "G0" : "",
(traps & ICH_HCR_EL2_TALL1) ? "G1" : "",
(traps & ICH_HCR_EL2_TC) ? "C" : "",
(traps & ICH_HCR_EL2_TDIR) ? "D" : "");
static_branch_enable(&vgic_v3_cpuif_trap);
}
@@ -771,7 +1001,7 @@ void vgic_v3_put(struct kvm_vcpu *vcpu)
}
if (likely(!is_protected_kvm_enabled()))
kvm_call_hyp(__vgic_v3_save_vmcr_aprs, cpu_if);
kvm_call_hyp(__vgic_v3_save_aprs, cpu_if);
WARN_ON(vgic_v4_put(vcpu));
if (has_vhe())

View File

@@ -163,6 +163,7 @@ static void vgic_v4_disable_vsgis(struct kvm_vcpu *vcpu)
struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, i);
struct irq_desc *desc;
unsigned long flags;
bool pending;
int ret;
raw_spin_lock_irqsave(&irq->irq_lock, flags);
@@ -173,9 +174,11 @@ static void vgic_v4_disable_vsgis(struct kvm_vcpu *vcpu)
irq->hw = false;
ret = irq_get_irqchip_state(irq->host_irq,
IRQCHIP_STATE_PENDING,
&irq->pending_latch);
&pending);
WARN_ON(ret);
irq->pending_latch = pending;
desc = irq_to_desc(irq->host_irq);
irq_domain_deactivate_irq(irq_desc_get_irq_data(desc));
unlock:

View File

@@ -244,7 +244,7 @@ void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active)
*
* Requires the IRQ lock to be held.
*/
static struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq)
struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq)
{
lockdep_assert_held(&irq->irq_lock);
@@ -272,17 +272,20 @@ static struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq)
return NULL;
}
struct vgic_sort_info {
struct kvm_vcpu *vcpu;
struct vgic_vmcr vmcr;
};
/*
* The order of items in the ap_lists defines how we'll pack things in LRs as
* well, the first items in the list being the first things populated in the
* LRs.
*
* A hard rule is that active interrupts can never be pushed out of the LRs
* (and therefore take priority) since we cannot reliably trap on deactivation
* of IRQs and therefore they have to be present in the LRs.
*
* Pending, non-active interrupts must be placed at the head of the list.
* Otherwise things should be sorted by the priority field and the GIC
* hardware support will take care of preemption of priority groups etc.
* Interrupts that are not deliverable should be at the end of the list.
*
* Return negative if "a" sorts before "b", 0 to preserve order, and positive
* to sort "b" before "a".
@@ -292,6 +295,8 @@ static int vgic_irq_cmp(void *priv, const struct list_head *a,
{
struct vgic_irq *irqa = container_of(a, struct vgic_irq, ap_list);
struct vgic_irq *irqb = container_of(b, struct vgic_irq, ap_list);
struct vgic_sort_info *info = priv;
struct kvm_vcpu *vcpu = info->vcpu;
bool penda, pendb;
int ret;
@@ -305,21 +310,32 @@ static int vgic_irq_cmp(void *priv, const struct list_head *a,
raw_spin_lock(&irqa->irq_lock);
raw_spin_lock_nested(&irqb->irq_lock, SINGLE_DEPTH_NESTING);
if (irqa->active || irqb->active) {
ret = (int)irqb->active - (int)irqa->active;
/* Undeliverable interrupts should be last */
ret = (int)(vgic_target_oracle(irqb) == vcpu) - (int)(vgic_target_oracle(irqa) == vcpu);
if (ret)
goto out;
}
penda = irqa->enabled && irq_is_pending(irqa);
pendb = irqb->enabled && irq_is_pending(irqb);
if (!penda || !pendb) {
ret = (int)pendb - (int)penda;
/* Same thing for interrupts targeting a disabled group */
ret = (int)(irqb->group ? info->vmcr.grpen1 : info->vmcr.grpen0);
ret -= (int)(irqa->group ? info->vmcr.grpen1 : info->vmcr.grpen0);
if (ret)
goto out;
}
/* Both pending and enabled, sort by priority */
ret = irqa->priority - irqb->priority;
penda = irqa->enabled && irq_is_pending(irqa) && !irqa->active;
pendb = irqb->enabled && irq_is_pending(irqb) && !irqb->active;
ret = (int)pendb - (int)penda;
if (ret)
goto out;
/* Both pending and enabled, sort by priority (lower number first) */
ret = (int)irqa->priority - (int)irqb->priority;
if (ret)
goto out;
/* Finally, HW bit active interrupts have priority over non-HW ones */
ret = (int)irqb->hw - (int)irqa->hw;
out:
raw_spin_unlock(&irqb->irq_lock);
raw_spin_unlock(&irqa->irq_lock);
@@ -330,10 +346,12 @@ out:
static void vgic_sort_ap_list(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_sort_info info = { .vcpu = vcpu, };
lockdep_assert_held(&vgic_cpu->ap_list_lock);
list_sort(NULL, &vgic_cpu->ap_list_head, vgic_irq_cmp);
vgic_get_vmcr(vcpu, &info.vmcr);
list_sort(&info, &vgic_cpu->ap_list_head, vgic_irq_cmp);
}
/*
@@ -356,6 +374,20 @@ static bool vgic_validate_injection(struct vgic_irq *irq, bool level, void *owne
return false;
}
static bool vgic_model_needs_bcst_kick(struct kvm *kvm)
{
/*
* A GICv3 (or GICv3-like) system exposing a GICv3 to the guest
* needs a broadcast kick to set TDIR globally.
*
* For systems that do not have TDIR (ARM's own v8.0 CPUs), the
* shadow TDIR bit is always set, and so is the register's TC bit,
* so no need to kick the CPUs.
*/
return (cpus_have_final_cap(ARM64_HAS_ICH_HCR_EL2_TDIR) &&
kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3);
}
/*
* Check whether an IRQ needs to (and can) be queued to a VCPU's ap list.
* Do the queuing if necessary, taking the right locks in the right order.
@@ -368,6 +400,7 @@ bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
unsigned long flags) __releases(&irq->irq_lock)
{
struct kvm_vcpu *vcpu;
bool bcast;
lockdep_assert_held(&irq->irq_lock);
@@ -442,11 +475,20 @@ retry:
list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head);
irq->vcpu = vcpu;
/* A new SPI may result in deactivation trapping on all vcpus */
bcast = (vgic_model_needs_bcst_kick(vcpu->kvm) &&
vgic_valid_spi(vcpu->kvm, irq->intid) &&
atomic_fetch_inc(&vcpu->kvm->arch.vgic.active_spis) == 0);
raw_spin_unlock(&irq->irq_lock);
raw_spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock, flags);
kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
kvm_vcpu_kick(vcpu);
if (!bcast) {
kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
kvm_vcpu_kick(vcpu);
} else {
kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_IRQ_PENDING);
}
return true;
}
@@ -798,98 +840,148 @@ static inline void vgic_clear_lr(struct kvm_vcpu *vcpu, int lr)
vgic_v3_clear_lr(vcpu, lr);
}
static inline void vgic_set_underflow(struct kvm_vcpu *vcpu)
{
if (kvm_vgic_global_state.type == VGIC_V2)
vgic_v2_set_underflow(vcpu);
else
vgic_v3_set_underflow(vcpu);
}
/* Requires the ap_list_lock to be held. */
static int compute_ap_list_depth(struct kvm_vcpu *vcpu,
bool *multi_sgi)
static void summarize_ap_list(struct kvm_vcpu *vcpu,
struct ap_list_summary *als)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_irq *irq;
int count = 0;
*multi_sgi = false;
lockdep_assert_held(&vgic_cpu->ap_list_lock);
*als = (typeof(*als)){};
list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
int w;
guard(raw_spinlock)(&irq->irq_lock);
raw_spin_lock(&irq->irq_lock);
/* GICv2 SGIs can count for more than one... */
w = vgic_irq_get_lr_count(irq);
raw_spin_unlock(&irq->irq_lock);
if (unlikely(vgic_target_oracle(irq) != vcpu))
continue;
count += w;
*multi_sgi |= (w > 1);
if (!irq->active)
als->nr_pend++;
else
als->nr_act++;
if (irq->intid < VGIC_NR_SGIS)
als->nr_sgi++;
}
return count;
}
/* Requires the VCPU's ap_list_lock to be held. */
/*
* Dealing with LR overflow is close to black magic -- dress accordingly.
*
* We have to present an almost infinite number of interrupts through a very
* limited number of registers. Therefore crucial decisions must be made to
* ensure we feed the most relevant interrupts into the LRs, and yet have
* some facilities to let the guest interact with those that are not there.
*
* All considerations below are in the context of interrupts targeting a
* single vcpu with non-idle state (either pending, active, or both),
* colloquially called the ap_list:
*
* - Pending interrupts must have priority over active interrupts. This also
* excludes pending+active interrupts. This ensures that a guest can
* perform priority drops on any number of interrupts, and yet be
* presented the next pending one.
*
* - Deactivation of interrupts outside of the LRs must be tracked by using
* either the EOIcount-driven maintenance interrupt, and sometimes by
* trapping the DIR register.
*
* - For EOImode=0, a non-zero EOIcount means walking the ap_list past the
* point that made it into the LRs, and deactivate interrupts that would
* have made it onto the LRs if we had the space.
*
* - The MI-generation bits must be used to try and force an exit when the
* guest has done enough changes to the LRs that we want to reevaluate the
* situation:
*
* - if the total number of pending interrupts exceeds the number of
* LR, NPIE must be set in order to exit once no pending interrupts
* are present in the LRs, allowing us to populate the next batch.
*
* - if there are active interrupts outside of the LRs, then LRENPIE
* must be set so that we exit on deactivation of one of these, and
* work out which one is to be deactivated. Note that this is not
* enough to deal with EOImode=1, see below.
*
* - if the overall number of interrupts exceeds the number of LRs,
* then UIE must be set to allow refilling of the LRs once the
* majority of them has been processed.
*
* - as usual, MI triggers are only an optimisation, since we cannot
* rely on the MI being delivered in timely manner...
*
* - EOImode=1 creates some additional problems:
*
* - deactivation can happen in any order, and we cannot rely on
* EOImode=0's coupling of priority-drop and deactivation which
* imposes strict reverse Ack order. This means that DIR must
* trap if we have active interrupts outside of the LRs.
*
* - deactivation of SPIs can occur on any CPU, while the SPI is only
* present in the ap_list of the CPU that actually ack-ed it. In that
* case, EOIcount doesn't provide enough information, and we must
* resort to trapping DIR even if we don't overflow the LRs. Bonus
* point for not trapping DIR when no SPIs are pending or active in
* the whole VM.
*
* - LPIs do not suffer the same problem as SPIs on deactivation, as we
* have to essentially discard the active state, see below.
*
* - Virtual LPIs have an active state (surprise!), which gets removed on
* priority drop (EOI). However, EOIcount doesn't get bumped when the LPI
* is not present in the LR (surprise again!). Special care must therefore
* be taken to remove the active state from any activated LPI when exiting
* from the guest. This is in a way no different from what happens on the
* physical side. We still rely on the running priority to have been
* removed from the APRs, irrespective of the LPI being present in the LRs
* or not.
*
* - Virtual SGIs directly injected via GICv4.1 must not affect EOIcount, as
* they are not managed in SW and don't have a true active state. So only
* set vSGIEOICount when no SGIs are in the ap_list.
*
* - GICv2 SGIs with multiple sources are injected one source at a time, as
* if they were made pending sequentially. This may mean that we don't
* always present the HPPI if other interrupts with lower priority are
* pending in the LRs. Big deal.
*/
static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct ap_list_summary als;
struct vgic_irq *irq;
int count;
bool multi_sgi;
u8 prio = 0xff;
int i = 0;
int count = 0;
lockdep_assert_held(&vgic_cpu->ap_list_lock);
count = compute_ap_list_depth(vcpu, &multi_sgi);
if (count > kvm_vgic_global_state.nr_lr || multi_sgi)
summarize_ap_list(vcpu, &als);
if (irqs_outside_lrs(&als))
vgic_sort_ap_list(vcpu);
count = 0;
list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
raw_spin_lock(&irq->irq_lock);
scoped_guard(raw_spinlock, &irq->irq_lock) {
if (likely(vgic_target_oracle(irq) == vcpu)) {
vgic_populate_lr(vcpu, irq, count++);
}
}
/*
* If we have multi-SGIs in the pipeline, we need to
* guarantee that they are all seen before any IRQ of
* lower priority. In that case, we need to filter out
* these interrupts by exiting early. This is easy as
* the AP list has been sorted already.
*/
if (multi_sgi && irq->priority > prio) {
raw_spin_unlock(&irq->irq_lock);
if (count == kvm_vgic_global_state.nr_lr)
break;
}
if (likely(vgic_target_oracle(irq) == vcpu)) {
vgic_populate_lr(vcpu, irq, count++);
if (irq->source)
prio = irq->priority;
}
raw_spin_unlock(&irq->irq_lock);
if (count == kvm_vgic_global_state.nr_lr) {
if (!list_is_last(&irq->ap_list,
&vgic_cpu->ap_list_head))
vgic_set_underflow(vcpu);
break;
}
}
/* Nuke remaining LRs */
for (i = count ; i < kvm_vgic_global_state.nr_lr; i++)
for (int i = count ; i < kvm_vgic_global_state.nr_lr; i++)
vgic_clear_lr(vcpu, i);
if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
vcpu->arch.vgic_cpu.vgic_v2.used_lrs = count;
else
vgic_v2_configure_hcr(vcpu, &als);
} else {
vcpu->arch.vgic_cpu.vgic_v3.used_lrs = count;
vgic_v3_configure_hcr(vcpu, &als);
}
}
static inline bool can_access_vgic_from_kernel(void)
@@ -913,8 +1005,6 @@ static inline void vgic_save_state(struct kvm_vcpu *vcpu)
/* Sync back the hardware VGIC state into our emulation after a guest's run. */
void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
{
int used_lrs;
/* If nesting, emulate the HW effect from L0 to L1 */
if (vgic_state_is_nested(vcpu)) {
vgic_v3_sync_nested(vcpu);
@@ -924,23 +1014,24 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
if (vcpu_has_nv(vcpu))
vgic_v3_nested_update_mi(vcpu);
/* An empty ap_list_head implies used_lrs == 0 */
if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head))
return;
if (can_access_vgic_from_kernel())
vgic_save_state(vcpu);
if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
used_lrs = vcpu->arch.vgic_cpu.vgic_v2.used_lrs;
else
used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs;
if (used_lrs)
vgic_fold_lr_state(vcpu);
vgic_fold_lr_state(vcpu);
vgic_prune_ap_list(vcpu);
}
/* Sync interrupts that were deactivated through a DIR trap */
void kvm_vgic_process_async_update(struct kvm_vcpu *vcpu)
{
unsigned long flags;
/* Make sure we're in the same context as LR handling */
local_irq_save(flags);
vgic_prune_ap_list(vcpu);
local_irq_restore(flags);
}
static inline void vgic_restore_state(struct kvm_vcpu *vcpu)
{
if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
@@ -965,8 +1056,9 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
* abort the entry procedure and inject the exception at the
* beginning of the run loop.
*
* - Otherwise, do exactly *NOTHING*. The guest state is
* already loaded, and we can carry on with running it.
* - Otherwise, do exactly *NOTHING* apart from enabling the virtual
* CPU interface. The guest state is already loaded, and we can
* carry on with running it.
*
* If we have NV, but are not in a nested state, compute the
* maintenance interrupt state, as it may fire.
@@ -975,35 +1067,17 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
if (kvm_vgic_vcpu_pending_irq(vcpu))
kvm_make_request(KVM_REQ_GUEST_HYP_IRQ_PENDING, vcpu);
vgic_v3_flush_nested(vcpu);
return;
}
if (vcpu_has_nv(vcpu))
vgic_v3_nested_update_mi(vcpu);
/*
* If there are no virtual interrupts active or pending for this
* VCPU, then there is no work to do and we can bail out without
* taking any lock. There is a potential race with someone injecting
* interrupts to the VCPU, but it is a benign race as the VCPU will
* either observe the new interrupt before or after doing this check,
* and introducing additional synchronization mechanism doesn't change
* this.
*
* Note that we still need to go through the whole thing if anything
* can be directly injected (GICv4).
*/
if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head) &&
!vgic_supports_direct_irqs(vcpu->kvm))
return;
DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
if (!list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) {
raw_spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock);
scoped_guard(raw_spinlock, &vcpu->arch.vgic_cpu.ap_list_lock)
vgic_flush_lr_state(vcpu);
raw_spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock);
}
if (can_access_vgic_from_kernel())
vgic_restore_state(vcpu);

View File

@@ -164,6 +164,22 @@ static inline int vgic_write_guest_lock(struct kvm *kvm, gpa_t gpa,
return ret;
}
void kvm_compute_ich_hcr_trap_bits(struct alt_instr *alt,
__le32 *origptr, __le32 *updptr, int nr_inst);
static inline u64 vgic_ich_hcr_trap_bits(void)
{
u64 hcr;
/* All the traps are in the bottom 16bits */
asm volatile(ALTERNATIVE_CB("movz %0, #0\n",
ARM64_ALWAYS_SYSTEM,
kvm_compute_ich_hcr_trap_bits)
: "=r" (hcr));
return hcr;
}
/*
* This struct provides an intermediate representation of the fields contained
* in the GICH_VMCR and ICH_VMCR registers, such that code exporting the GIC
@@ -220,6 +236,21 @@ struct its_ite {
u32 event_id;
};
struct ap_list_summary {
unsigned int nr_pend; /* purely pending, not active */
unsigned int nr_act; /* active, or active+pending */
unsigned int nr_sgi; /* any SGI */
};
#define irqs_outside_lrs(s) \
(((s)->nr_pend + (s)->nr_act) > kvm_vgic_global_state.nr_lr)
#define irqs_pending_outside_lrs(s) \
((s)->nr_pend > kvm_vgic_global_state.nr_lr)
#define irqs_active_outside_lrs(s) \
((s)->nr_act && irqs_outside_lrs(s))
int vgic_v3_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr,
struct vgic_reg_attr *reg_attr);
int vgic_v2_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr,
@@ -230,6 +261,7 @@ vgic_get_mmio_region(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev,
struct vgic_irq *vgic_get_irq(struct kvm *kvm, u32 intid);
struct vgic_irq *vgic_get_vcpu_irq(struct kvm_vcpu *vcpu, u32 intid);
void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq);
struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq);
bool vgic_get_phys_line_level(struct vgic_irq *irq);
void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending);
void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active);
@@ -245,8 +277,9 @@ int vgic_check_iorange(struct kvm *kvm, phys_addr_t ioaddr,
void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu);
void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
void vgic_v2_deactivate(struct kvm_vcpu *vcpu, u32 val);
void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr);
void vgic_v2_set_underflow(struct kvm_vcpu *vcpu);
void vgic_v2_configure_hcr(struct kvm_vcpu *vcpu, struct ap_list_summary *als);
int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr);
int vgic_v2_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
int offset, u32 *val);
@@ -254,7 +287,7 @@ int vgic_v2_cpuif_uaccess(struct kvm_vcpu *vcpu, bool is_write,
int offset, u32 *val);
void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
void vgic_v2_enable(struct kvm_vcpu *vcpu);
void vgic_v2_reset(struct kvm_vcpu *vcpu);
int vgic_v2_probe(const struct gic_kvm_info *info);
int vgic_v2_map_resources(struct kvm *kvm);
int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address,
@@ -286,10 +319,11 @@ static inline void vgic_get_irq_ref(struct vgic_irq *irq)
void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu);
void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr);
void vgic_v3_set_underflow(struct kvm_vcpu *vcpu);
void vgic_v3_deactivate(struct kvm_vcpu *vcpu, u64 val);
void vgic_v3_configure_hcr(struct kvm_vcpu *vcpu, struct ap_list_summary *als);
void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
void vgic_v3_enable(struct kvm_vcpu *vcpu);
void vgic_v3_reset(struct kvm_vcpu *vcpu);
int vgic_v3_probe(const struct gic_kvm_info *info);
int vgic_v3_map_resources(struct kvm *kvm);
int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq);
@@ -412,6 +446,7 @@ static inline bool kvm_has_gicv3(struct kvm *kvm)
return kvm_has_feat(kvm, ID_AA64PFR0_EL1, GIC, IMP);
}
void vgic_v3_flush_nested(struct kvm_vcpu *vcpu);
void vgic_v3_sync_nested(struct kvm_vcpu *vcpu);
void vgic_v3_load_nested(struct kvm_vcpu *vcpu);
void vgic_v3_put_nested(struct kvm_vcpu *vcpu);

View File

@@ -40,6 +40,7 @@ HAS_GICV5_CPUIF
HAS_GICV5_LEGACY
HAS_GIC_PRIO_MASKING
HAS_GIC_PRIO_RELAXED_SYNC
HAS_ICH_HCR_EL2_TDIR
HAS_HCR_NV1
HAS_HCX
HAS_LDAPR
@@ -64,6 +65,7 @@ HAS_TLB_RANGE
HAS_VA52
HAS_VIRT_HOST_EXTN
HAS_WFXT
HAS_XNX
HAFT
HW_DBM
KVM_HVHE

View File

@@ -411,12 +411,15 @@ static void __exception_irq_entry aic_handle_irq(struct pt_regs *regs)
if (is_kernel_in_hyp_mode() &&
(read_sysreg_s(SYS_ICH_HCR_EL2) & ICH_HCR_EL2_En) &&
read_sysreg_s(SYS_ICH_MISR_EL2) != 0) {
u64 val;
generic_handle_domain_irq(aic_irqc->hw_domain,
AIC_FIQ_HWIRQ(AIC_VGIC_MI));
if (unlikely((read_sysreg_s(SYS_ICH_HCR_EL2) & ICH_HCR_EL2_En) &&
read_sysreg_s(SYS_ICH_MISR_EL2))) {
pr_err_ratelimited("vGIC IRQ fired and not handled by KVM, disabling.\n");
(val = read_sysreg_s(SYS_ICH_MISR_EL2)))) {
pr_err_ratelimited("vGIC IRQ fired and not handled by KVM (MISR=%llx), disabling.\n",
val);
sysreg_clear_set_s(SYS_ICH_HCR_EL2, ICH_HCR_EL2_En, 0);
}
}

View File

@@ -1459,6 +1459,8 @@ static void __init gic_of_setup_kvm_info(struct device_node *node)
if (ret)
return;
gic_v2_kvm_info.gicc_base = gic_data[0].cpu_base.common_base;
if (static_branch_likely(&supports_deactivate_key))
vgic_set_kvm_info(&gic_v2_kvm_info);
}
@@ -1620,6 +1622,7 @@ static void __init gic_acpi_setup_kvm_info(void)
return;
gic_v2_kvm_info.maint_irq = irq;
gic_v2_kvm_info.gicc_base = gic_data[0].cpu_base.common_base;
vgic_set_kvm_info(&gic_v2_kvm_info);
}

View File

@@ -59,6 +59,9 @@ struct vgic_global {
/* virtual control interface mapping, HYP VA */
void __iomem *vctrl_hyp;
/* Physical CPU interface, kernel VA */
void __iomem *gicc_base;
/* Number of implemented list registers */
int nr_lr;
@@ -120,6 +123,7 @@ struct irq_ops {
struct vgic_irq {
raw_spinlock_t irq_lock; /* Protects the content of the struct */
u32 intid; /* Guest visible INTID */
struct rcu_head rcu;
struct list_head ap_list;
@@ -134,17 +138,18 @@ struct vgic_irq {
* affinity reg (v3).
*/
u32 intid; /* Guest visible INTID */
bool line_level; /* Level only */
bool pending_latch; /* The pending latch state used to calculate
* the pending state for both level
* and edge triggered IRQs. */
bool active;
bool pending_release; /* Used for LPIs only, unreferenced IRQ
bool pending_release:1; /* Used for LPIs only, unreferenced IRQ
* pending a release */
bool enabled;
bool hw; /* Tied to HW IRQ */
bool pending_latch:1; /* The pending latch state used to calculate
* the pending state for both level
* and edge triggered IRQs. */
enum vgic_irq_config config:1; /* Level or edge */
bool line_level:1; /* Level only */
bool enabled:1;
bool active:1;
bool hw:1; /* Tied to HW IRQ */
bool on_lr:1; /* Present in a CPU LR */
refcount_t refcount; /* Used for LPIs */
u32 hwintid; /* HW INTID number */
unsigned int host_irq; /* linux irq corresponding to hwintid */
@@ -156,7 +161,6 @@ struct vgic_irq {
u8 active_source; /* GICv2 SGIs only */
u8 priority;
u8 group; /* 0 == group 0, 1 == group 1 */
enum vgic_irq_config config; /* Level or edge */
struct irq_ops *ops;
@@ -259,6 +263,9 @@ struct vgic_dist {
/* The GIC maintenance IRQ for nested hypervisors. */
u32 mi_intid;
/* Track the number of in-flight active SPIs */
atomic_t active_spis;
/* base addresses in guest physical address space: */
gpa_t vgic_dist_base; /* distributor */
union {
@@ -280,6 +287,7 @@ struct vgic_dist {
struct vgic_irq *spis;
struct vgic_io_device dist_iodev;
struct vgic_io_device cpuif_iodev;
bool has_its;
bool table_write_in_progress;
@@ -417,6 +425,7 @@ bool kvm_vcpu_has_pending_irqs(struct kvm_vcpu *vcpu);
void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid);
void kvm_vgic_process_async_update(struct kvm_vcpu *vcpu);
void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1);

View File

@@ -86,7 +86,13 @@
#define GICH_HCR_EN (1 << 0)
#define GICH_HCR_UIE (1 << 1)
#define GICH_HCR_LRENPIE (1 << 2)
#define GICH_HCR_NPIE (1 << 3)
#define GICH_HCR_VGrp0EIE (1 << 4)
#define GICH_HCR_VGrp0DIE (1 << 5)
#define GICH_HCR_VGrp1EIE (1 << 6)
#define GICH_HCR_VGrp1DIE (1 << 7)
#define GICH_HCR_EOICOUNT GENMASK(31, 27)
#define GICH_LR_VIRTUALID (0x3ff << 0)
#define GICH_LR_PHYSID_CPUID_SHIFT (10)

View File

@@ -24,6 +24,8 @@ struct gic_kvm_info {
enum gic_type type;
/* Virtual CPU interface */
struct resource vcpu;
/* GICv2 GICC VA */
void __iomem *gicc_base;
/* Interrupt number */
unsigned int maint_irq;
/* No interrupt mask, no need to use the above field */

View File

@@ -179,6 +179,7 @@ struct kvm_xen_exit {
#define KVM_EXIT_LOONGARCH_IOCSR 38
#define KVM_EXIT_MEMORY_FAULT 39
#define KVM_EXIT_TDX 40
#define KVM_EXIT_ARM_SEA 41
/* For KVM_EXIT_INTERNAL_ERROR */
/* Emulate instruction failed. */
@@ -473,6 +474,14 @@ struct kvm_run {
} setup_event_notify;
};
} tdx;
/* KVM_EXIT_ARM_SEA */
struct {
#define KVM_EXIT_ARM_SEA_FLAG_GPA_VALID (1ULL << 0)
__u64 flags;
__u64 esr;
__u64 gva;
__u64 gpa;
} arm_sea;
/* Fix the size of the union. */
char padding[256];
};
@@ -963,6 +972,7 @@ struct kvm_enable_cap {
#define KVM_CAP_RISCV_MP_STATE_RESET 242
#define KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED 243
#define KVM_CAP_GUEST_MEMFD_FLAGS 244
#define KVM_CAP_ARM_SEA_TO_USER 245
struct kvm_irq_routing_irqchip {
__u32 irqchip;

View File

@@ -141,6 +141,8 @@
#define ESR_ELx_SF (UL(1) << ESR_ELx_SF_SHIFT)
#define ESR_ELx_AR_SHIFT (14)
#define ESR_ELx_AR (UL(1) << ESR_ELx_AR_SHIFT)
#define ESR_ELx_VNCR_SHIFT (13)
#define ESR_ELx_VNCR (UL(1) << ESR_ELx_VNCR_SHIFT)
#define ESR_ELx_CM_SHIFT (8)
#define ESR_ELx_CM (UL(1) << ESR_ELx_CM_SHIFT)

View File

@@ -158,6 +158,7 @@ TEST_GEN_PROGS_EXTENDED_x86 += x86/nx_huge_pages_test
TEST_GEN_PROGS_arm64 = $(TEST_GEN_PROGS_COMMON)
TEST_GEN_PROGS_arm64 += arm64/aarch32_id_regs
TEST_GEN_PROGS_arm64 += arm64/arch_timer_edge_cases
TEST_GEN_PROGS_arm64 += arm64/at
TEST_GEN_PROGS_arm64 += arm64/debug-exceptions
TEST_GEN_PROGS_arm64 += arm64/hello_el2
TEST_GEN_PROGS_arm64 += arm64/host_sve
@@ -165,6 +166,7 @@ TEST_GEN_PROGS_arm64 += arm64/hypercalls
TEST_GEN_PROGS_arm64 += arm64/external_aborts
TEST_GEN_PROGS_arm64 += arm64/page_fault_test
TEST_GEN_PROGS_arm64 += arm64/psci_test
TEST_GEN_PROGS_arm64 += arm64/sea_to_user
TEST_GEN_PROGS_arm64 += arm64/set_id_regs
TEST_GEN_PROGS_arm64 += arm64/smccc_filter
TEST_GEN_PROGS_arm64 += arm64/vcpu_width_config

View File

@@ -0,0 +1,166 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* at - Test for KVM's AT emulation in the EL2&0 and EL1&0 translation regimes.
*/
#include "kvm_util.h"
#include "processor.h"
#include "test_util.h"
#include "ucall.h"
#include <asm/sysreg.h>
#define TEST_ADDR 0x80000000
enum {
CLEAR_ACCESS_FLAG,
TEST_ACCESS_FLAG,
};
static u64 *ptep_hva;
#define copy_el2_to_el1(reg) \
write_sysreg_s(read_sysreg_s(SYS_##reg##_EL1), SYS_##reg##_EL12)
/* Yes, this is an ugly hack */
#define __at(op, addr) write_sysreg_s(addr, op)
#define test_at_insn(op, expect_fault) \
do { \
u64 par, fsc; \
bool fault; \
\
GUEST_SYNC(CLEAR_ACCESS_FLAG); \
\
__at(OP_AT_##op, TEST_ADDR); \
isb(); \
par = read_sysreg(par_el1); \
\
fault = par & SYS_PAR_EL1_F; \
fsc = FIELD_GET(SYS_PAR_EL1_FST, par); \
\
__GUEST_ASSERT((expect_fault) == fault, \
"AT "#op": %sexpected fault (par: %lx)1", \
(expect_fault) ? "" : "un", par); \
if ((expect_fault)) { \
__GUEST_ASSERT(fsc == ESR_ELx_FSC_ACCESS_L(3), \
"AT "#op": expected access flag fault (par: %lx)", \
par); \
} else { \
GUEST_ASSERT_EQ(FIELD_GET(SYS_PAR_EL1_ATTR, par), MAIR_ATTR_NORMAL); \
GUEST_ASSERT_EQ(FIELD_GET(SYS_PAR_EL1_SH, par), PTE_SHARED >> 8); \
GUEST_ASSERT_EQ(par & SYS_PAR_EL1_PA, TEST_ADDR); \
GUEST_SYNC(TEST_ACCESS_FLAG); \
} \
} while (0)
static void test_at(bool expect_fault)
{
test_at_insn(S1E2R, expect_fault);
test_at_insn(S1E2W, expect_fault);
/* Reuse the stage-1 MMU context from EL2 at EL1 */
copy_el2_to_el1(SCTLR);
copy_el2_to_el1(MAIR);
copy_el2_to_el1(TCR);
copy_el2_to_el1(TTBR0);
copy_el2_to_el1(TTBR1);
/* Disable stage-2 translation and enter a non-host context */
write_sysreg(0, vtcr_el2);
write_sysreg(0, vttbr_el2);
sysreg_clear_set(hcr_el2, HCR_EL2_TGE | HCR_EL2_VM, 0);
isb();
test_at_insn(S1E1R, expect_fault);
test_at_insn(S1E1W, expect_fault);
}
static void guest_code(void)
{
sysreg_clear_set(tcr_el1, TCR_HA, 0);
isb();
test_at(true);
if (!SYS_FIELD_GET(ID_AA64MMFR1_EL1, HAFDBS, read_sysreg(id_aa64mmfr1_el1)))
GUEST_DONE();
/*
* KVM's software PTW makes the implementation choice that the AT
* instruction sets the access flag.
*/
sysreg_clear_set(tcr_el1, 0, TCR_HA);
isb();
test_at(false);
GUEST_DONE();
}
static void handle_sync(struct kvm_vcpu *vcpu, struct ucall *uc)
{
switch (uc->args[1]) {
case CLEAR_ACCESS_FLAG:
/*
* Delete + reinstall the memslot to invalidate stage-2
* mappings of the stage-1 page tables, forcing KVM to
* use the 'slow' AT emulation path.
*
* This and clearing the access flag from host userspace
* ensures that the access flag cannot be set speculatively
* and is reliably cleared at the time of the AT instruction.
*/
clear_bit(__ffs(PTE_AF), ptep_hva);
vm_mem_region_reload(vcpu->vm, vcpu->vm->memslots[MEM_REGION_PT]);
break;
case TEST_ACCESS_FLAG:
TEST_ASSERT(test_bit(__ffs(PTE_AF), ptep_hva),
"Expected access flag to be set (desc: %lu)", *ptep_hva);
break;
default:
TEST_FAIL("Unexpected SYNC arg: %lu", uc->args[1]);
}
}
static void run_test(struct kvm_vcpu *vcpu)
{
struct ucall uc;
while (true) {
vcpu_run(vcpu);
switch (get_ucall(vcpu, &uc)) {
case UCALL_DONE:
return;
case UCALL_SYNC:
handle_sync(vcpu, &uc);
continue;
case UCALL_ABORT:
REPORT_GUEST_ASSERT(uc);
return;
default:
TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
}
}
}
int main(void)
{
struct kvm_vcpu_init init;
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
TEST_REQUIRE(kvm_check_cap(KVM_CAP_ARM_EL2));
vm = vm_create(1);
kvm_get_default_vcpu_target(vm, &init);
init.features[0] |= BIT(KVM_ARM_VCPU_HAS_EL2);
vcpu = aarch64_vcpu_add(vm, 0, &init, guest_code);
kvm_arch_vm_finalize_vcpus(vm);
virt_map(vm, TEST_ADDR, TEST_ADDR, 1);
ptep_hva = virt_get_pte_hva_at_level(vm, TEST_ADDR, 3);
run_test(vcpu);
kvm_vm_free(vm);
return 0;
}

View File

@@ -0,0 +1,331 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Test KVM returns to userspace with KVM_EXIT_ARM_SEA if host APEI fails
* to handle SEA and userspace has opt-ed in KVM_CAP_ARM_SEA_TO_USER.
*
* After reaching userspace with expected arm_sea info, also test userspace
* injecting a synchronous external data abort into the guest.
*
* This test utilizes EINJ to generate a REAL synchronous external data
* abort by consuming a recoverable uncorrectable memory error. Therefore
* the device under test must support EINJ in both firmware and host kernel,
* including the notrigger feature. Otherwise the test will be skipped.
* The under-test platform's APEI should be unable to claim SEA. Otherwise
* the test will also be skipped.
*/
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include "test_util.h"
#include "kvm_util.h"
#include "processor.h"
#include "guest_modes.h"
#define PAGE_PRESENT (1ULL << 63)
#define PAGE_PHYSICAL 0x007fffffffffffffULL
#define PAGE_ADDR_MASK (~(0xfffULL))
/* Group ISV and ISS[23:14]. */
#define ESR_ELx_INST_SYNDROME ((ESR_ELx_ISV) | (ESR_ELx_SAS) | \
(ESR_ELx_SSE) | (ESR_ELx_SRT_MASK) | \
(ESR_ELx_SF) | (ESR_ELx_AR))
#define EINJ_ETYPE "/sys/kernel/debug/apei/einj/error_type"
#define EINJ_ADDR "/sys/kernel/debug/apei/einj/param1"
#define EINJ_MASK "/sys/kernel/debug/apei/einj/param2"
#define EINJ_FLAGS "/sys/kernel/debug/apei/einj/flags"
#define EINJ_NOTRIGGER "/sys/kernel/debug/apei/einj/notrigger"
#define EINJ_DOIT "/sys/kernel/debug/apei/einj/error_inject"
/* Memory Uncorrectable non-fatal. */
#define ERROR_TYPE_MEMORY_UER 0x10
/* Memory address and mask valid (param1 and param2). */
#define MASK_MEMORY_UER 0b10
/* Guest virtual address region = [2G, 3G). */
#define START_GVA 0x80000000UL
#define VM_MEM_SIZE 0x40000000UL
/* Note: EINJ_OFFSET must < VM_MEM_SIZE. */
#define EINJ_OFFSET 0x01234badUL
#define EINJ_GVA ((START_GVA) + (EINJ_OFFSET))
static vm_paddr_t einj_gpa;
static void *einj_hva;
static uint64_t einj_hpa;
static bool far_invalid;
static uint64_t translate_to_host_paddr(unsigned long vaddr)
{
uint64_t pinfo;
int64_t offset = vaddr / getpagesize() * sizeof(pinfo);
int fd;
uint64_t page_addr;
uint64_t paddr;
fd = open("/proc/self/pagemap", O_RDONLY);
if (fd < 0)
ksft_exit_fail_perror("Failed to open /proc/self/pagemap");
if (pread(fd, &pinfo, sizeof(pinfo), offset) != sizeof(pinfo)) {
close(fd);
ksft_exit_fail_perror("Failed to read /proc/self/pagemap");
}
close(fd);
if ((pinfo & PAGE_PRESENT) == 0)
ksft_exit_fail_perror("Page not present");
page_addr = (pinfo & PAGE_PHYSICAL) << MIN_PAGE_SHIFT;
paddr = page_addr + (vaddr & (getpagesize() - 1));
return paddr;
}
static void write_einj_entry(const char *einj_path, uint64_t val)
{
char cmd[256] = {0};
FILE *cmdfile = NULL;
sprintf(cmd, "echo %#lx > %s", val, einj_path);
cmdfile = popen(cmd, "r");
if (pclose(cmdfile) == 0)
ksft_print_msg("echo %#lx > %s - done\n", val, einj_path);
else
ksft_exit_fail_perror("Failed to write EINJ entry");
}
static void inject_uer(uint64_t paddr)
{
if (access("/sys/firmware/acpi/tables/EINJ", R_OK) == -1)
ksft_test_result_skip("EINJ table no available in firmware");
if (access(EINJ_ETYPE, R_OK | W_OK) == -1)
ksft_test_result_skip("EINJ module probably not loaded?");
write_einj_entry(EINJ_ETYPE, ERROR_TYPE_MEMORY_UER);
write_einj_entry(EINJ_FLAGS, MASK_MEMORY_UER);
write_einj_entry(EINJ_ADDR, paddr);
write_einj_entry(EINJ_MASK, ~0x0UL);
write_einj_entry(EINJ_NOTRIGGER, 1);
write_einj_entry(EINJ_DOIT, 1);
}
/*
* When host APEI successfully claims the SEA caused by guest_code, kernel
* will send SIGBUS signal with BUS_MCEERR_AR to test thread.
*
* We set up this SIGBUS handler to skip the test for that case.
*/
static void sigbus_signal_handler(int sig, siginfo_t *si, void *v)
{
ksft_print_msg("SIGBUS (%d) received, dumping siginfo...\n", sig);
ksft_print_msg("si_signo=%d, si_errno=%d, si_code=%d, si_addr=%p\n",
si->si_signo, si->si_errno, si->si_code, si->si_addr);
if (si->si_code == BUS_MCEERR_AR)
ksft_test_result_skip("SEA is claimed by host APEI\n");
else
ksft_test_result_fail("Exit with signal unhandled\n");
exit(0);
}
static void setup_sigbus_handler(void)
{
struct sigaction act;
memset(&act, 0, sizeof(act));
sigemptyset(&act.sa_mask);
act.sa_sigaction = sigbus_signal_handler;
act.sa_flags = SA_SIGINFO;
TEST_ASSERT(sigaction(SIGBUS, &act, NULL) == 0,
"Failed to setup SIGBUS handler");
}
static void guest_code(void)
{
uint64_t guest_data;
/* Consumes error will cause a SEA. */
guest_data = *(uint64_t *)EINJ_GVA;
GUEST_FAIL("Poison not protected by SEA: gva=%#lx, guest_data=%#lx\n",
EINJ_GVA, guest_data);
}
static void expect_sea_handler(struct ex_regs *regs)
{
u64 esr = read_sysreg(esr_el1);
u64 far = read_sysreg(far_el1);
bool expect_far_invalid = far_invalid;
GUEST_PRINTF("Handling Guest SEA\n");
GUEST_PRINTF("ESR_EL1=%#lx, FAR_EL1=%#lx\n", esr, far);
GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR);
GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT);
if (expect_far_invalid) {
GUEST_ASSERT_EQ(esr & ESR_ELx_FnV, ESR_ELx_FnV);
GUEST_PRINTF("Guest observed garbage value in FAR\n");
} else {
GUEST_ASSERT_EQ(esr & ESR_ELx_FnV, 0);
GUEST_ASSERT_EQ(far, EINJ_GVA);
}
GUEST_DONE();
}
static void vcpu_inject_sea(struct kvm_vcpu *vcpu)
{
struct kvm_vcpu_events events = {};
events.exception.ext_dabt_pending = true;
vcpu_events_set(vcpu, &events);
}
static void run_vm(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
{
struct ucall uc;
bool guest_done = false;
struct kvm_run *run = vcpu->run;
u64 esr;
/* Resume the vCPU after error injection to consume the error. */
vcpu_run(vcpu);
ksft_print_msg("Dump kvm_run info about KVM_EXIT_%s\n",
exit_reason_str(run->exit_reason));
ksft_print_msg("kvm_run.arm_sea: esr=%#llx, flags=%#llx\n",
run->arm_sea.esr, run->arm_sea.flags);
ksft_print_msg("kvm_run.arm_sea: gva=%#llx, gpa=%#llx\n",
run->arm_sea.gva, run->arm_sea.gpa);
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_SEA);
esr = run->arm_sea.esr;
TEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_LOW);
TEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT);
TEST_ASSERT_EQ(ESR_ELx_ISS2(esr), 0);
TEST_ASSERT_EQ((esr & ESR_ELx_INST_SYNDROME), 0);
TEST_ASSERT_EQ(esr & ESR_ELx_VNCR, 0);
if (!(esr & ESR_ELx_FnV)) {
ksft_print_msg("Expect gva to match given FnV bit is 0\n");
TEST_ASSERT_EQ(run->arm_sea.gva, EINJ_GVA);
}
if (run->arm_sea.flags & KVM_EXIT_ARM_SEA_FLAG_GPA_VALID) {
ksft_print_msg("Expect gpa to match given KVM_EXIT_ARM_SEA_FLAG_GPA_VALID is set\n");
TEST_ASSERT_EQ(run->arm_sea.gpa, einj_gpa & PAGE_ADDR_MASK);
}
far_invalid = esr & ESR_ELx_FnV;
/* Inject a SEA into guest and expect handled in SEA handler. */
vcpu_inject_sea(vcpu);
/* Expect the guest to reach GUEST_DONE gracefully. */
do {
vcpu_run(vcpu);
switch (get_ucall(vcpu, &uc)) {
case UCALL_PRINTF:
ksft_print_msg("From guest: %s", uc.buffer);
break;
case UCALL_DONE:
ksft_print_msg("Guest done gracefully!\n");
guest_done = 1;
break;
case UCALL_ABORT:
ksft_print_msg("Guest aborted!\n");
guest_done = 1;
REPORT_GUEST_ASSERT(uc);
break;
default:
TEST_FAIL("Unexpected ucall: %lu\n", uc.cmd);
}
} while (!guest_done);
}
static struct kvm_vm *vm_create_with_sea_handler(struct kvm_vcpu **vcpu)
{
size_t backing_page_size;
size_t guest_page_size;
size_t alignment;
uint64_t num_guest_pages;
vm_paddr_t start_gpa;
enum vm_mem_backing_src_type src_type = VM_MEM_SRC_ANONYMOUS_HUGETLB_1GB;
struct kvm_vm *vm;
backing_page_size = get_backing_src_pagesz(src_type);
guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size;
alignment = max(backing_page_size, guest_page_size);
num_guest_pages = VM_MEM_SIZE / guest_page_size;
vm = __vm_create_with_one_vcpu(vcpu, num_guest_pages, guest_code);
vm_init_descriptor_tables(vm);
vcpu_init_descriptor_tables(*vcpu);
vm_install_sync_handler(vm,
/*vector=*/VECTOR_SYNC_CURRENT,
/*ec=*/ESR_ELx_EC_DABT_CUR,
/*handler=*/expect_sea_handler);
start_gpa = (vm->max_gfn - num_guest_pages) * guest_page_size;
start_gpa = align_down(start_gpa, alignment);
vm_userspace_mem_region_add(
/*vm=*/vm,
/*src_type=*/src_type,
/*guest_paddr=*/start_gpa,
/*slot=*/1,
/*npages=*/num_guest_pages,
/*flags=*/0);
virt_map(vm, START_GVA, start_gpa, num_guest_pages);
ksft_print_msg("Mapped %#lx pages: gva=%#lx to gpa=%#lx\n",
num_guest_pages, START_GVA, start_gpa);
return vm;
}
static void vm_inject_memory_uer(struct kvm_vm *vm)
{
uint64_t guest_data;
einj_gpa = addr_gva2gpa(vm, EINJ_GVA);
einj_hva = addr_gva2hva(vm, EINJ_GVA);
/* Populate certain data before injecting UER. */
*(uint64_t *)einj_hva = 0xBAADCAFE;
guest_data = *(uint64_t *)einj_hva;
ksft_print_msg("Before EINJect: data=%#lx\n",
guest_data);
einj_hpa = translate_to_host_paddr((unsigned long)einj_hva);
ksft_print_msg("EINJ_GVA=%#lx, einj_gpa=%#lx, einj_hva=%p, einj_hpa=%#lx\n",
EINJ_GVA, einj_gpa, einj_hva, einj_hpa);
inject_uer(einj_hpa);
ksft_print_msg("Memory UER EINJected\n");
}
int main(int argc, char *argv[])
{
struct kvm_vm *vm;
struct kvm_vcpu *vcpu;
TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SEA_TO_USER));
setup_sigbus_handler();
vm = vm_create_with_sea_handler(&vcpu);
vm_enable_cap(vm, KVM_CAP_ARM_SEA_TO_USER, 0);
vm_inject_memory_uer(vm);
run_vm(vm, vcpu);
kvm_vm_free(vm);
return 0;
}

View File

@@ -29,6 +29,7 @@ struct test_args {
bool level_sensitive; /* 1 is level, 0 is edge */
int kvm_max_routes; /* output of KVM_CAP_IRQ_ROUTING */
bool kvm_supports_irqfd; /* output of KVM_CAP_IRQFD */
uint32_t shared_data;
};
/*
@@ -205,7 +206,7 @@ static void kvm_inject_call(kvm_inject_cmd cmd, uint32_t first_intid,
do { \
uint32_t _intid; \
_intid = gic_get_and_ack_irq(); \
GUEST_ASSERT(_intid == 0 || _intid == IAR_SPURIOUS); \
GUEST_ASSERT(_intid == IAR_SPURIOUS); \
} while (0)
#define CAT_HELPER(a, b) a ## b
@@ -359,8 +360,9 @@ static uint32_t wait_for_and_activate_irq(void)
* interrupts for the whole test.
*/
static void test_inject_preemption(struct test_args *args,
uint32_t first_intid, int num,
kvm_inject_cmd cmd)
uint32_t first_intid, int num,
const unsigned long *exclude,
kvm_inject_cmd cmd)
{
uint32_t intid, prio, step = KVM_PRIO_STEPS;
int i;
@@ -379,6 +381,10 @@ static void test_inject_preemption(struct test_args *args,
for (i = 0; i < num; i++) {
uint32_t tmp;
intid = i + first_intid;
if (exclude && test_bit(i, exclude))
continue;
KVM_INJECT(cmd, intid);
/* Each successive IRQ will preempt the previous one. */
tmp = wait_for_and_activate_irq();
@@ -390,15 +396,33 @@ static void test_inject_preemption(struct test_args *args,
/* finish handling the IRQs starting with the highest priority one. */
for (i = 0; i < num; i++) {
intid = num - i - 1 + first_intid;
if (exclude && test_bit(intid - first_intid, exclude))
continue;
gic_set_eoi(intid);
if (args->eoi_split)
gic_set_dir(intid);
}
if (args->eoi_split) {
for (i = 0; i < num; i++) {
intid = i + first_intid;
if (exclude && test_bit(i, exclude))
continue;
if (args->eoi_split)
gic_set_dir(intid);
}
}
local_irq_enable();
for (i = 0; i < num; i++)
for (i = 0; i < num; i++) {
if (exclude && test_bit(i, exclude))
continue;
GUEST_ASSERT(!gic_irq_get_active(i + first_intid));
}
GUEST_ASSERT_EQ(gic_read_ap1r0(), 0);
GUEST_ASSERT_IAR_EMPTY();
@@ -436,33 +460,32 @@ static void test_injection_failure(struct test_args *args,
static void test_preemption(struct test_args *args, struct kvm_inject_desc *f)
{
/*
* Test up to 4 levels of preemption. The reason is that KVM doesn't
* currently implement the ability to have more than the number-of-LRs
* number of concurrently active IRQs. The number of LRs implemented is
* IMPLEMENTATION DEFINED, however, it seems that most implement 4.
*/
/* Timer PPIs cannot be injected from userspace */
static const unsigned long ppi_exclude = (BIT(27 - MIN_PPI) |
BIT(30 - MIN_PPI) |
BIT(28 - MIN_PPI) |
BIT(26 - MIN_PPI));
if (f->sgi)
test_inject_preemption(args, MIN_SGI, 4, f->cmd);
test_inject_preemption(args, MIN_SGI, 16, NULL, f->cmd);
if (f->ppi)
test_inject_preemption(args, MIN_PPI, 4, f->cmd);
test_inject_preemption(args, MIN_PPI, 16, &ppi_exclude, f->cmd);
if (f->spi)
test_inject_preemption(args, MIN_SPI, 4, f->cmd);
test_inject_preemption(args, MIN_SPI, 31, NULL, f->cmd);
}
static void test_restore_active(struct test_args *args, struct kvm_inject_desc *f)
{
/* Test up to 4 active IRQs. Same reason as in test_preemption. */
if (f->sgi)
guest_restore_active(args, MIN_SGI, 4, f->cmd);
guest_restore_active(args, MIN_SGI, 16, f->cmd);
if (f->ppi)
guest_restore_active(args, MIN_PPI, 4, f->cmd);
guest_restore_active(args, MIN_PPI, 16, f->cmd);
if (f->spi)
guest_restore_active(args, MIN_SPI, 4, f->cmd);
guest_restore_active(args, MIN_SPI, 31, f->cmd);
}
static void guest_code(struct test_args *args)
@@ -473,12 +496,12 @@ static void guest_code(struct test_args *args)
gic_init(GIC_V3, 1);
for (i = 0; i < nr_irqs; i++)
gic_irq_enable(i);
for (i = MIN_SPI; i < nr_irqs; i++)
gic_irq_set_config(i, !level_sensitive);
for (i = 0; i < nr_irqs; i++)
gic_irq_enable(i);
gic_set_eoi_split(args->eoi_split);
reset_priorities(args);
@@ -779,6 +802,221 @@ done:
kvm_vm_free(vm);
}
static void guest_code_asym_dir(struct test_args *args, int cpuid)
{
gic_init(GIC_V3, 2);
gic_set_eoi_split(1);
gic_set_priority_mask(CPU_PRIO_MASK);
if (cpuid == 0) {
uint32_t intid;
local_irq_disable();
gic_set_priority(MIN_PPI, IRQ_DEFAULT_PRIO);
gic_irq_enable(MIN_SPI);
gic_irq_set_pending(MIN_SPI);
intid = wait_for_and_activate_irq();
GUEST_ASSERT_EQ(intid, MIN_SPI);
gic_set_eoi(intid);
isb();
WRITE_ONCE(args->shared_data, MIN_SPI);
dsb(ishst);
do {
dsb(ishld);
} while (READ_ONCE(args->shared_data) == MIN_SPI);
GUEST_ASSERT(!gic_irq_get_active(MIN_SPI));
} else {
do {
dsb(ishld);
} while (READ_ONCE(args->shared_data) != MIN_SPI);
gic_set_dir(MIN_SPI);
isb();
WRITE_ONCE(args->shared_data, 0);
dsb(ishst);
}
GUEST_DONE();
}
static void guest_code_group_en(struct test_args *args, int cpuid)
{
uint32_t intid;
gic_init(GIC_V3, 2);
gic_set_eoi_split(0);
gic_set_priority_mask(CPU_PRIO_MASK);
/* SGI0 is G0, which is disabled */
gic_irq_set_group(0, 0);
/* Configure all SGIs with decreasing priority */
for (intid = 0; intid < MIN_PPI; intid++) {
gic_set_priority(intid, (intid + 1) * 8);
gic_irq_enable(intid);
gic_irq_set_pending(intid);
}
/* Ack and EOI all G1 interrupts */
for (int i = 1; i < MIN_PPI; i++) {
intid = wait_for_and_activate_irq();
GUEST_ASSERT(intid < MIN_PPI);
gic_set_eoi(intid);
isb();
}
/*
* Check that SGI0 is still pending, inactive, and that we cannot
* ack anything.
*/
GUEST_ASSERT(gic_irq_get_pending(0));
GUEST_ASSERT(!gic_irq_get_active(0));
GUEST_ASSERT_IAR_EMPTY();
GUEST_ASSERT(read_sysreg_s(SYS_ICC_IAR0_EL1) == IAR_SPURIOUS);
/* Open the G0 gates, and verify we can ack SGI0 */
write_sysreg_s(1, SYS_ICC_IGRPEN0_EL1);
isb();
do {
intid = read_sysreg_s(SYS_ICC_IAR0_EL1);
} while (intid == IAR_SPURIOUS);
GUEST_ASSERT(intid == 0);
GUEST_DONE();
}
static void guest_code_timer_spi(struct test_args *args, int cpuid)
{
uint32_t intid;
u64 val;
gic_init(GIC_V3, 2);
gic_set_eoi_split(1);
gic_set_priority_mask(CPU_PRIO_MASK);
/* Add a pending SPI so that KVM starts trapping DIR */
gic_set_priority(MIN_SPI + cpuid, IRQ_DEFAULT_PRIO);
gic_irq_set_pending(MIN_SPI + cpuid);
/* Configure the timer with a higher priority, make it pending */
gic_set_priority(27, IRQ_DEFAULT_PRIO - 8);
isb();
val = read_sysreg(cntvct_el0);
write_sysreg(val, cntv_cval_el0);
write_sysreg(1, cntv_ctl_el0);
isb();
GUEST_ASSERT(gic_irq_get_pending(27));
/* Enable both interrupts */
gic_irq_enable(MIN_SPI + cpuid);
gic_irq_enable(27);
/* The timer must fire */
intid = wait_for_and_activate_irq();
GUEST_ASSERT(intid == 27);
/* Check that we can deassert it */
write_sysreg(0, cntv_ctl_el0);
isb();
GUEST_ASSERT(!gic_irq_get_pending(27));
/*
* Priority drop, deactivation -- we expect that the host
* deactivation will have been effective
*/
gic_set_eoi(27);
gic_set_dir(27);
GUEST_ASSERT(!gic_irq_get_active(27));
/* Do it one more time */
isb();
val = read_sysreg(cntvct_el0);
write_sysreg(val, cntv_cval_el0);
write_sysreg(1, cntv_ctl_el0);
isb();
GUEST_ASSERT(gic_irq_get_pending(27));
/* The timer must fire again */
intid = wait_for_and_activate_irq();
GUEST_ASSERT(intid == 27);
GUEST_DONE();
}
static void *test_vcpu_run(void *arg)
{
struct kvm_vcpu *vcpu = arg;
struct ucall uc;
while (1) {
vcpu_run(vcpu);
switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
REPORT_GUEST_ASSERT(uc);
break;
case UCALL_DONE:
return NULL;
default:
TEST_FAIL("Unknown ucall %lu", uc.cmd);
}
}
return NULL;
}
static void test_vgic_two_cpus(void *gcode)
{
pthread_t thr[2];
struct kvm_vcpu *vcpus[2];
struct test_args args = {};
struct kvm_vm *vm;
vm_vaddr_t args_gva;
int gic_fd, ret;
vm = vm_create_with_vcpus(2, gcode, vcpus);
vm_init_descriptor_tables(vm);
vcpu_init_descriptor_tables(vcpus[0]);
vcpu_init_descriptor_tables(vcpus[1]);
/* Setup the guest args page (so it gets the args). */
args_gva = vm_vaddr_alloc_page(vm);
memcpy(addr_gva2hva(vm, args_gva), &args, sizeof(args));
vcpu_args_set(vcpus[0], 2, args_gva, 0);
vcpu_args_set(vcpus[1], 2, args_gva, 1);
gic_fd = vgic_v3_setup(vm, 2, 64);
ret = pthread_create(&thr[0], NULL, test_vcpu_run, vcpus[0]);
if (ret)
TEST_FAIL("Can't create thread for vcpu 0 (%d)\n", ret);
ret = pthread_create(&thr[1], NULL, test_vcpu_run, vcpus[1]);
if (ret)
TEST_FAIL("Can't create thread for vcpu 1 (%d)\n", ret);
pthread_join(thr[0], NULL);
pthread_join(thr[1], NULL);
close(gic_fd);
kvm_vm_free(vm);
}
static void help(const char *name)
{
printf(
@@ -835,6 +1073,9 @@ int main(int argc, char **argv)
test_vgic(nr_irqs, false /* level */, true /* eoi_split */);
test_vgic(nr_irqs, true /* level */, false /* eoi_split */);
test_vgic(nr_irqs, true /* level */, true /* eoi_split */);
test_vgic_two_cpus(guest_code_asym_dir);
test_vgic_two_cpus(guest_code_group_en);
test_vgic_two_cpus(guest_code_timer_spi);
} else {
test_vgic(nr_irqs, level_sensitive, eoi_split);
}

View File

@@ -118,6 +118,10 @@ static void guest_setup_gic(void)
guest_setup_its_mappings();
guest_invalidate_all_rdists();
/* SYNC to ensure ITS setup is complete */
for (cpuid = 0; cpuid < test_data.nr_cpus; cpuid++)
its_send_sync_cmd(test_data.cmdq_base_va, cpuid);
}
static void guest_code(size_t nr_lpis)

View File

@@ -57,6 +57,7 @@ void gic_irq_set_pending(unsigned int intid);
void gic_irq_clear_pending(unsigned int intid);
bool gic_irq_get_pending(unsigned int intid);
void gic_irq_set_config(unsigned int intid, bool is_edge);
void gic_irq_set_group(unsigned int intid, bool group);
void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size,
vm_paddr_t pend_table);

View File

@@ -15,5 +15,6 @@ void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool val
void its_send_mapti_cmd(void *cmdq_base, u32 device_id, u32 event_id,
u32 collection_id, u32 intid);
void its_send_invall_cmd(void *cmdq_base, u32 collection_id);
void its_send_sync_cmd(void *cmdq_base, u32 vcpu_id);
#endif // __SELFTESTS_GIC_V3_ITS_H__

View File

@@ -688,6 +688,7 @@ static inline bool vm_arch_has_protected_memory(struct kvm_vm *vm)
#endif
void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
void vm_mem_region_reload(struct kvm_vm *vm, uint32_t slot);
void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id);

View File

@@ -155,3 +155,9 @@ void gic_irq_set_config(unsigned int intid, bool is_edge)
GUEST_ASSERT(gic_common_ops);
gic_common_ops->gic_irq_set_config(intid, is_edge);
}
void gic_irq_set_group(unsigned int intid, bool group)
{
GUEST_ASSERT(gic_common_ops);
gic_common_ops->gic_irq_set_group(intid, group);
}

View File

@@ -25,6 +25,7 @@ struct gic_common_ops {
void (*gic_irq_clear_pending)(uint32_t intid);
bool (*gic_irq_get_pending)(uint32_t intid);
void (*gic_irq_set_config)(uint32_t intid, bool is_edge);
void (*gic_irq_set_group)(uint32_t intid, bool group);
};
extern const struct gic_common_ops gicv3_ops;

View File

@@ -293,17 +293,36 @@ static void gicv3_enable_redist(volatile void *redist_base)
}
}
static void gicv3_set_group(uint32_t intid, bool grp)
{
uint32_t cpu_or_dist;
uint32_t val;
cpu_or_dist = (get_intid_range(intid) == SPI_RANGE) ? DIST_BIT : guest_get_vcpuid();
val = gicv3_reg_readl(cpu_or_dist, GICD_IGROUPR + (intid / 32) * 4);
if (grp)
val |= BIT(intid % 32);
else
val &= ~BIT(intid % 32);
gicv3_reg_writel(cpu_or_dist, GICD_IGROUPR + (intid / 32) * 4, val);
}
static void gicv3_cpu_init(unsigned int cpu)
{
volatile void *sgi_base;
unsigned int i;
volatile void *redist_base_cpu;
u64 typer;
GUEST_ASSERT(cpu < gicv3_data.nr_cpus);
redist_base_cpu = gicr_base_cpu(cpu);
sgi_base = sgi_base_from_redist(redist_base_cpu);
/* Verify assumption that GICR_TYPER.Processor_number == cpu */
typer = readq_relaxed(redist_base_cpu + GICR_TYPER);
GUEST_ASSERT_EQ(GICR_TYPER_CPU_NUMBER(typer), cpu);
gicv3_enable_redist(redist_base_cpu);
/*
@@ -328,6 +347,8 @@ static void gicv3_cpu_init(unsigned int cpu)
/* Set a default priority threshold */
write_sysreg_s(ICC_PMR_DEF_PRIO, SYS_ICC_PMR_EL1);
/* Disable Group-0 interrupts */
write_sysreg_s(ICC_IGRPEN0_EL1_MASK, SYS_ICC_IGRPEN1_EL1);
/* Enable non-secure Group-1 interrupts */
write_sysreg_s(ICC_IGRPEN1_EL1_MASK, SYS_ICC_IGRPEN1_EL1);
}
@@ -400,6 +421,7 @@ const struct gic_common_ops gicv3_ops = {
.gic_irq_clear_pending = gicv3_irq_clear_pending,
.gic_irq_get_pending = gicv3_irq_get_pending,
.gic_irq_set_config = gicv3_irq_set_config,
.gic_irq_set_group = gicv3_set_group,
};
void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size,

View File

@@ -253,3 +253,13 @@ void its_send_invall_cmd(void *cmdq_base, u32 collection_id)
its_send_cmd(cmdq_base, &cmd);
}
void its_send_sync_cmd(void *cmdq_base, u32 vcpu_id)
{
struct its_cmd_block cmd = {};
its_encode_cmd(&cmd, GITS_CMD_SYNC);
its_encode_target(&cmd, procnum_to_rdbase(vcpu_id));
its_send_cmd(cmdq_base, &cmd);
}

View File

@@ -1184,6 +1184,16 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags)
ret, errno, slot, flags);
}
void vm_mem_region_reload(struct kvm_vm *vm, uint32_t slot)
{
struct userspace_mem_region *region = memslot2region(vm, slot);
struct kvm_userspace_memory_region2 tmp = region->region;
tmp.memory_size = 0;
vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, &tmp);
vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, &region->region);
}
/*
* VM Memory Region Move
*
@@ -2005,6 +2015,7 @@ static struct exit_reason {
KVM_EXIT_STRING(NOTIFY),
KVM_EXIT_STRING(LOONGARCH_IOCSR),
KVM_EXIT_STRING(MEMORY_FAULT),
KVM_EXIT_STRING(ARM_SEA),
};
/*