mirror of
https://github.com/torvalds/linux.git
synced 2025-12-07 20:06:24 +00:00
Merge tag 'kvmarm-6.19' of https://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD
KVM/arm64 updates for 6.19 - Support for userspace handling of synchronous external aborts (SEAs), allowing the VMM to potentially handle the abort in a non-fatal manner. - Large rework of the VGIC's list register handling with the goal of supporting more active/pending IRQs than available list registers in hardware. In addition, the VGIC now supports EOImode==1 style deactivations for IRQs which may occur on a separate vCPU than the one that acked the IRQ. - Support for FEAT_XNX (user / privileged execute permissions) and FEAT_HAF (hardware update to the Access Flag) in the software page table walkers and shadow MMU. - Allow page table destruction to reschedule, fixing long need_resched latencies observed when destroying a large VM. - Minor fixes to KVM and selftests
This commit is contained in:
@@ -7286,6 +7286,41 @@ exit, even without calls to ``KVM_ENABLE_CAP`` or similar. In this case,
|
||||
it will enter with output fields already valid; in the common case, the
|
||||
``unknown.ret`` field of the union will be ``TDVMCALL_STATUS_SUBFUNC_UNSUPPORTED``.
|
||||
Userspace need not do anything if it does not wish to support a TDVMCALL.
|
||||
|
||||
::
|
||||
|
||||
/* KVM_EXIT_ARM_SEA */
|
||||
struct {
|
||||
#define KVM_EXIT_ARM_SEA_FLAG_GPA_VALID (1ULL << 0)
|
||||
__u64 flags;
|
||||
__u64 esr;
|
||||
__u64 gva;
|
||||
__u64 gpa;
|
||||
} arm_sea;
|
||||
|
||||
Used on arm64 systems. When the VM capability ``KVM_CAP_ARM_SEA_TO_USER`` is
|
||||
enabled, a KVM exits to userspace if a guest access causes a synchronous
|
||||
external abort (SEA) and the host APEI fails to handle the SEA.
|
||||
|
||||
``esr`` is set to a sanitized value of ESR_EL2 from the exception taken to KVM,
|
||||
consisting of the following fields:
|
||||
|
||||
- ``ESR_EL2.EC``
|
||||
- ``ESR_EL2.IL``
|
||||
- ``ESR_EL2.FnV``
|
||||
- ``ESR_EL2.EA``
|
||||
- ``ESR_EL2.CM``
|
||||
- ``ESR_EL2.WNR``
|
||||
- ``ESR_EL2.FSC``
|
||||
- ``ESR_EL2.SET`` (when FEAT_RAS is implemented for the VM)
|
||||
|
||||
``gva`` is set to the value of FAR_EL2 from the exception taken to KVM when
|
||||
``ESR_EL2.FnV == 0``. Otherwise, the value of ``gva`` is unknown.
|
||||
|
||||
``gpa`` is set to the faulting IPA from the exception taken to KVM when
|
||||
the ``KVM_EXIT_ARM_SEA_FLAG_GPA_VALID`` flag is set. Otherwise, the value of
|
||||
``gpa`` is unknown.
|
||||
|
||||
::
|
||||
|
||||
/* Fix the size of the union. */
|
||||
@@ -8703,6 +8738,18 @@ This capability indicate to the userspace whether a PFNMAP memory region
|
||||
can be safely mapped as cacheable. This relies on the presence of
|
||||
force write back (FWB) feature support on the hardware.
|
||||
|
||||
7.45 KVM_CAP_ARM_SEA_TO_USER
|
||||
----------------------------
|
||||
|
||||
:Architecture: arm64
|
||||
:Target: VM
|
||||
:Parameters: none
|
||||
:Returns: 0 on success, -EINVAL if unsupported.
|
||||
|
||||
When this capability is enabled, KVM may exit to userspace for SEAs taken to
|
||||
EL2 resulting from a guest access. See ``KVM_EXIT_ARM_SEA`` for more
|
||||
information.
|
||||
|
||||
8. Other capabilities.
|
||||
======================
|
||||
|
||||
|
||||
@@ -111,6 +111,7 @@
|
||||
#define TCR_EL2_DS (1UL << 32)
|
||||
#define TCR_EL2_RES1 ((1U << 31) | (1 << 23))
|
||||
#define TCR_EL2_HPD (1 << 24)
|
||||
#define TCR_EL2_HA (1 << 21)
|
||||
#define TCR_EL2_TBI (1 << 20)
|
||||
#define TCR_EL2_PS_SHIFT 16
|
||||
#define TCR_EL2_PS_MASK (7 << TCR_EL2_PS_SHIFT)
|
||||
|
||||
@@ -79,7 +79,7 @@ enum __kvm_host_smccc_func {
|
||||
__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_range,
|
||||
__KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context,
|
||||
__KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff,
|
||||
__KVM_HOST_SMCCC_FUNC___vgic_v3_save_vmcr_aprs,
|
||||
__KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs,
|
||||
__KVM_HOST_SMCCC_FUNC___vgic_v3_restore_vmcr_aprs,
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_reserve_vm,
|
||||
__KVM_HOST_SMCCC_FUNC___pkvm_unreserve_vm,
|
||||
@@ -246,9 +246,9 @@ extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
|
||||
extern int __kvm_tlbi_s1e2(struct kvm_s2_mmu *mmu, u64 va, u64 sys_encoding);
|
||||
|
||||
extern void __kvm_timer_set_cntvoff(u64 cntvoff);
|
||||
extern void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
|
||||
extern void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
|
||||
extern void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
|
||||
extern int __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
|
||||
extern int __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
|
||||
extern int __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
|
||||
|
||||
extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
|
||||
|
||||
|
||||
@@ -54,6 +54,7 @@
|
||||
#define KVM_REQ_NESTED_S2_UNMAP KVM_ARCH_REQ(8)
|
||||
#define KVM_REQ_GUEST_HYP_IRQ_PENDING KVM_ARCH_REQ(9)
|
||||
#define KVM_REQ_MAP_L1_VNCR_EL2 KVM_ARCH_REQ(10)
|
||||
#define KVM_REQ_VGIC_PROCESS_UPDATE KVM_ARCH_REQ(11)
|
||||
|
||||
#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
|
||||
KVM_DIRTY_LOG_INITIALLY_SET)
|
||||
@@ -350,6 +351,8 @@ struct kvm_arch {
|
||||
#define KVM_ARCH_FLAG_GUEST_HAS_SVE 9
|
||||
/* MIDR_EL1, REVIDR_EL1, and AIDR_EL1 are writable from userspace */
|
||||
#define KVM_ARCH_FLAG_WRITABLE_IMP_ID_REGS 10
|
||||
/* Unhandled SEAs are taken to userspace */
|
||||
#define KVM_ARCH_FLAG_EXIT_SEA 11
|
||||
unsigned long flags;
|
||||
|
||||
/* VM-wide vCPU feature set */
|
||||
|
||||
@@ -77,12 +77,13 @@ DECLARE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
|
||||
int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu);
|
||||
|
||||
u64 __gic_v3_get_lr(unsigned int lr);
|
||||
void __gic_v3_set_lr(u64 val, int lr);
|
||||
|
||||
void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if);
|
||||
void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if);
|
||||
void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if);
|
||||
void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if);
|
||||
void __vgic_v3_save_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if);
|
||||
void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if);
|
||||
void __vgic_v3_restore_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if);
|
||||
int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu);
|
||||
|
||||
|
||||
@@ -120,9 +120,42 @@ static inline bool kvm_s2_trans_writable(struct kvm_s2_trans *trans)
|
||||
return trans->writable;
|
||||
}
|
||||
|
||||
static inline bool kvm_s2_trans_executable(struct kvm_s2_trans *trans)
|
||||
static inline bool kvm_has_xnx(struct kvm *kvm)
|
||||
{
|
||||
return !(trans->desc & BIT(54));
|
||||
return cpus_have_final_cap(ARM64_HAS_XNX) &&
|
||||
kvm_has_feat(kvm, ID_AA64MMFR1_EL1, XNX, IMP);
|
||||
}
|
||||
|
||||
static inline bool kvm_s2_trans_exec_el0(struct kvm *kvm, struct kvm_s2_trans *trans)
|
||||
{
|
||||
u8 xn = FIELD_GET(KVM_PTE_LEAF_ATTR_HI_S2_XN, trans->desc);
|
||||
|
||||
if (!kvm_has_xnx(kvm))
|
||||
xn &= FIELD_PREP(KVM_PTE_LEAF_ATTR_HI_S2_XN, 0b10);
|
||||
|
||||
switch (xn) {
|
||||
case 0b00:
|
||||
case 0b01:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool kvm_s2_trans_exec_el1(struct kvm *kvm, struct kvm_s2_trans *trans)
|
||||
{
|
||||
u8 xn = FIELD_GET(KVM_PTE_LEAF_ATTR_HI_S2_XN, trans->desc);
|
||||
|
||||
if (!kvm_has_xnx(kvm))
|
||||
xn &= FIELD_PREP(KVM_PTE_LEAF_ATTR_HI_S2_XN, 0b10);
|
||||
|
||||
switch (xn) {
|
||||
case 0b00:
|
||||
case 0b11:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
extern int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
|
||||
@@ -320,6 +353,7 @@ struct s1_walk_info {
|
||||
bool be;
|
||||
bool s2;
|
||||
bool pa52bit;
|
||||
bool ha;
|
||||
};
|
||||
|
||||
struct s1_walk_result {
|
||||
@@ -370,4 +404,6 @@ void kvm_handle_s1e2_tlbi(struct kvm_vcpu *vcpu, u32 inst, u64 val);
|
||||
(FIX_VNCR - __c); \
|
||||
})
|
||||
|
||||
int __kvm_at_swap_desc(struct kvm *kvm, gpa_t ipa, u64 old, u64 new);
|
||||
|
||||
#endif /* __ARM64_KVM_NESTED_H */
|
||||
|
||||
@@ -89,7 +89,7 @@ typedef u64 kvm_pte_t;
|
||||
|
||||
#define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54)
|
||||
|
||||
#define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54)
|
||||
#define KVM_PTE_LEAF_ATTR_HI_S2_XN GENMASK(54, 53)
|
||||
|
||||
#define KVM_PTE_LEAF_ATTR_HI_S1_GP BIT(50)
|
||||
|
||||
@@ -240,7 +240,9 @@ enum kvm_pgtable_stage2_flags {
|
||||
|
||||
/**
|
||||
* enum kvm_pgtable_prot - Page-table permissions and attributes.
|
||||
* @KVM_PGTABLE_PROT_X: Execute permission.
|
||||
* @KVM_PGTABLE_PROT_UX: Unprivileged execute permission.
|
||||
* @KVM_PGTABLE_PROT_PX: Privileged execute permission.
|
||||
* @KVM_PGTABLE_PROT_X: Privileged and unprivileged execute permission.
|
||||
* @KVM_PGTABLE_PROT_W: Write permission.
|
||||
* @KVM_PGTABLE_PROT_R: Read permission.
|
||||
* @KVM_PGTABLE_PROT_DEVICE: Device attributes.
|
||||
@@ -251,12 +253,15 @@ enum kvm_pgtable_stage2_flags {
|
||||
* @KVM_PGTABLE_PROT_SW3: Software bit 3.
|
||||
*/
|
||||
enum kvm_pgtable_prot {
|
||||
KVM_PGTABLE_PROT_X = BIT(0),
|
||||
KVM_PGTABLE_PROT_W = BIT(1),
|
||||
KVM_PGTABLE_PROT_R = BIT(2),
|
||||
KVM_PGTABLE_PROT_PX = BIT(0),
|
||||
KVM_PGTABLE_PROT_UX = BIT(1),
|
||||
KVM_PGTABLE_PROT_X = KVM_PGTABLE_PROT_PX |
|
||||
KVM_PGTABLE_PROT_UX,
|
||||
KVM_PGTABLE_PROT_W = BIT(2),
|
||||
KVM_PGTABLE_PROT_R = BIT(3),
|
||||
|
||||
KVM_PGTABLE_PROT_DEVICE = BIT(3),
|
||||
KVM_PGTABLE_PROT_NORMAL_NC = BIT(4),
|
||||
KVM_PGTABLE_PROT_DEVICE = BIT(4),
|
||||
KVM_PGTABLE_PROT_NORMAL_NC = BIT(5),
|
||||
|
||||
KVM_PGTABLE_PROT_SW0 = BIT(55),
|
||||
KVM_PGTABLE_PROT_SW1 = BIT(56),
|
||||
@@ -355,6 +360,11 @@ static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walke
|
||||
return pteref;
|
||||
}
|
||||
|
||||
static inline kvm_pte_t *kvm_dereference_pteref_raw(kvm_pteref_t pteref)
|
||||
{
|
||||
return pteref;
|
||||
}
|
||||
|
||||
static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker)
|
||||
{
|
||||
/*
|
||||
@@ -384,6 +394,11 @@ static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walke
|
||||
return rcu_dereference_check(pteref, !(walker->flags & KVM_PGTABLE_WALK_SHARED));
|
||||
}
|
||||
|
||||
static inline kvm_pte_t *kvm_dereference_pteref_raw(kvm_pteref_t pteref)
|
||||
{
|
||||
return rcu_dereference_raw(pteref);
|
||||
}
|
||||
|
||||
static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker)
|
||||
{
|
||||
if (walker->flags & KVM_PGTABLE_WALK_SHARED)
|
||||
@@ -551,6 +566,26 @@ static inline int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2
|
||||
*/
|
||||
void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
|
||||
|
||||
/**
|
||||
* kvm_pgtable_stage2_destroy_range() - Destroy the unlinked range of addresses.
|
||||
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
|
||||
* @addr: Intermediate physical address at which to place the mapping.
|
||||
* @size: Size of the mapping.
|
||||
*
|
||||
* The page-table is assumed to be unreachable by any hardware walkers prior
|
||||
* to freeing and therefore no TLB invalidation is performed.
|
||||
*/
|
||||
void kvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt,
|
||||
u64 addr, u64 size);
|
||||
|
||||
/**
|
||||
* kvm_pgtable_stage2_destroy_pgd() - Destroy the PGD of guest stage-2 page-table.
|
||||
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
|
||||
*
|
||||
* It is assumed that the rest of the page-table is freed before this operation.
|
||||
*/
|
||||
void kvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt);
|
||||
|
||||
/**
|
||||
* kvm_pgtable_stage2_free_unlinked() - Free an unlinked stage-2 paging structure.
|
||||
* @mm_ops: Memory management callbacks.
|
||||
|
||||
@@ -180,7 +180,9 @@ struct pkvm_mapping {
|
||||
|
||||
int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
|
||||
struct kvm_pgtable_mm_ops *mm_ops);
|
||||
void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
|
||||
void pkvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt,
|
||||
u64 addr, u64 size);
|
||||
void pkvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt);
|
||||
int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
|
||||
enum kvm_pgtable_prot prot, void *mc,
|
||||
enum kvm_pgtable_walk_flags flags);
|
||||
|
||||
@@ -40,8 +40,13 @@
|
||||
*/
|
||||
#define HVC_FINALISE_EL2 3
|
||||
|
||||
/*
|
||||
* HVC_GET_ICH_VTR_EL2 - Retrieve the ICH_VTR_EL2 value
|
||||
*/
|
||||
#define HVC_GET_ICH_VTR_EL2 4
|
||||
|
||||
/* Max number of HYP stub hypercalls */
|
||||
#define HVC_STUB_HCALL_NR 4
|
||||
#define HVC_STUB_HCALL_NR 5
|
||||
|
||||
/* Error returned when an invalid stub number is passed into x0 */
|
||||
#define HVC_STUB_ERR 0xbadca11
|
||||
|
||||
@@ -2304,6 +2304,49 @@ static bool has_gic_prio_relaxed_sync(const struct arm64_cpu_capabilities *entry
|
||||
}
|
||||
#endif
|
||||
|
||||
static bool can_trap_icv_dir_el1(const struct arm64_cpu_capabilities *entry,
|
||||
int scope)
|
||||
{
|
||||
static const struct midr_range has_vgic_v3[] = {
|
||||
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM),
|
||||
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM),
|
||||
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_PRO),
|
||||
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_PRO),
|
||||
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_MAX),
|
||||
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_MAX),
|
||||
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD),
|
||||
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE),
|
||||
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_PRO),
|
||||
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_PRO),
|
||||
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_MAX),
|
||||
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_MAX),
|
||||
{},
|
||||
};
|
||||
struct arm_smccc_res res = {};
|
||||
|
||||
BUILD_BUG_ON(ARM64_HAS_ICH_HCR_EL2_TDIR <= ARM64_HAS_GICV3_CPUIF);
|
||||
BUILD_BUG_ON(ARM64_HAS_ICH_HCR_EL2_TDIR <= ARM64_HAS_GICV5_LEGACY);
|
||||
if (!this_cpu_has_cap(ARM64_HAS_GICV3_CPUIF) &&
|
||||
!is_midr_in_range_list(has_vgic_v3))
|
||||
return false;
|
||||
|
||||
if (!is_hyp_mode_available())
|
||||
return false;
|
||||
|
||||
if (this_cpu_has_cap(ARM64_HAS_GICV5_LEGACY))
|
||||
return true;
|
||||
|
||||
if (is_kernel_in_hyp_mode())
|
||||
res.a1 = read_sysreg_s(SYS_ICH_VTR_EL2);
|
||||
else
|
||||
arm_smccc_1_1_hvc(HVC_GET_ICH_VTR_EL2, &res);
|
||||
|
||||
if (res.a0 == HVC_STUB_ERR)
|
||||
return false;
|
||||
|
||||
return res.a1 & ICH_VTR_EL2_TDS;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_ARM64_BTI
|
||||
static void bti_enable(const struct arm64_cpu_capabilities *__unused)
|
||||
{
|
||||
@@ -2815,6 +2858,15 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
|
||||
.matches = has_gic_prio_relaxed_sync,
|
||||
},
|
||||
#endif
|
||||
{
|
||||
/*
|
||||
* Depends on having GICv3
|
||||
*/
|
||||
.desc = "ICV_DIR_EL1 trapping",
|
||||
.capability = ARM64_HAS_ICH_HCR_EL2_TDIR,
|
||||
.type = ARM64_CPUCAP_EARLY_LOCAL_CPU_FEATURE,
|
||||
.matches = can_trap_icv_dir_el1,
|
||||
},
|
||||
#ifdef CONFIG_ARM64_E0PD
|
||||
{
|
||||
.desc = "E0PD",
|
||||
@@ -3089,6 +3141,13 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
|
||||
.capability = ARM64_HAS_GICV5_LEGACY,
|
||||
.matches = test_has_gicv5_legacy,
|
||||
},
|
||||
{
|
||||
.desc = "XNX",
|
||||
.capability = ARM64_HAS_XNX,
|
||||
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
|
||||
.matches = has_cpuid_feature,
|
||||
ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, XNX, IMP)
|
||||
},
|
||||
{},
|
||||
};
|
||||
|
||||
|
||||
@@ -54,6 +54,11 @@ SYM_CODE_START_LOCAL(elx_sync)
|
||||
1: cmp x0, #HVC_FINALISE_EL2
|
||||
b.eq __finalise_el2
|
||||
|
||||
cmp x0, #HVC_GET_ICH_VTR_EL2
|
||||
b.ne 2f
|
||||
mrs_s x1, SYS_ICH_VTR_EL2
|
||||
b 9f
|
||||
|
||||
2: cmp x0, #HVC_SOFT_RESTART
|
||||
b.ne 3f
|
||||
mov x0, x2
|
||||
|
||||
@@ -91,6 +91,7 @@ KVM_NVHE_ALIAS(spectre_bhb_patch_loop_mitigation_enable);
|
||||
KVM_NVHE_ALIAS(spectre_bhb_patch_wa3);
|
||||
KVM_NVHE_ALIAS(spectre_bhb_patch_clearbhb);
|
||||
KVM_NVHE_ALIAS(alt_cb_patch_nops);
|
||||
KVM_NVHE_ALIAS(kvm_compute_ich_hcr_trap_bits);
|
||||
|
||||
/* Global kernel state accessed by nVHE hyp code. */
|
||||
KVM_NVHE_ALIAS(kvm_vgic_global_state);
|
||||
|
||||
@@ -132,6 +132,10 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
|
||||
}
|
||||
mutex_unlock(&kvm->lock);
|
||||
break;
|
||||
case KVM_CAP_ARM_SEA_TO_USER:
|
||||
r = 0;
|
||||
set_bit(KVM_ARCH_FLAG_EXIT_SEA, &kvm->arch.flags);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@@ -327,6 +331,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
|
||||
case KVM_CAP_IRQFD_RESAMPLE:
|
||||
case KVM_CAP_COUNTER_OFFSET:
|
||||
case KVM_CAP_ARM_WRITABLE_IMP_ID_REGS:
|
||||
case KVM_CAP_ARM_SEA_TO_USER:
|
||||
r = 1;
|
||||
break;
|
||||
case KVM_CAP_SET_GUEST_DEBUG2:
|
||||
@@ -440,7 +445,7 @@ struct kvm *kvm_arch_alloc_vm(void)
|
||||
if (!has_vhe())
|
||||
return kzalloc(sz, GFP_KERNEL_ACCOUNT);
|
||||
|
||||
return __vmalloc(sz, GFP_KERNEL_ACCOUNT | __GFP_HIGHMEM | __GFP_ZERO);
|
||||
return kvzalloc(sz, GFP_KERNEL_ACCOUNT);
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
|
||||
@@ -659,8 +664,7 @@ nommu:
|
||||
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (is_protected_kvm_enabled()) {
|
||||
kvm_call_hyp(__vgic_v3_save_vmcr_aprs,
|
||||
&vcpu->arch.vgic_cpu.vgic_v3);
|
||||
kvm_call_hyp(__vgic_v3_save_aprs, &vcpu->arch.vgic_cpu.vgic_v3);
|
||||
kvm_call_hyp_nvhe(__pkvm_vcpu_put);
|
||||
}
|
||||
|
||||
@@ -1042,6 +1046,10 @@ static int check_vcpu_requests(struct kvm_vcpu *vcpu)
|
||||
*/
|
||||
kvm_check_request(KVM_REQ_IRQ_PENDING, vcpu);
|
||||
|
||||
/* Process interrupts deactivated through a trap */
|
||||
if (kvm_check_request(KVM_REQ_VGIC_PROCESS_UPDATE, vcpu))
|
||||
kvm_vgic_process_async_update(vcpu);
|
||||
|
||||
if (kvm_check_request(KVM_REQ_RECORD_STEAL, vcpu))
|
||||
kvm_update_stolen_time(vcpu);
|
||||
|
||||
|
||||
@@ -346,6 +346,11 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
|
||||
|
||||
wi->baddr &= GENMASK_ULL(wi->max_oa_bits - 1, x);
|
||||
|
||||
wi->ha = kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, HAFDBS, AF);
|
||||
wi->ha &= (wi->regime == TR_EL2 ?
|
||||
FIELD_GET(TCR_EL2_HA, tcr) :
|
||||
FIELD_GET(TCR_HA, tcr));
|
||||
|
||||
return 0;
|
||||
|
||||
addrsz:
|
||||
@@ -362,10 +367,42 @@ transfault:
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
static int kvm_read_s1_desc(struct kvm_vcpu *vcpu, u64 pa, u64 *desc,
|
||||
struct s1_walk_info *wi)
|
||||
{
|
||||
u64 val;
|
||||
int r;
|
||||
|
||||
r = kvm_read_guest(vcpu->kvm, pa, &val, sizeof(val));
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
if (wi->be)
|
||||
*desc = be64_to_cpu((__force __be64)val);
|
||||
else
|
||||
*desc = le64_to_cpu((__force __le64)val);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int kvm_swap_s1_desc(struct kvm_vcpu *vcpu, u64 pa, u64 old, u64 new,
|
||||
struct s1_walk_info *wi)
|
||||
{
|
||||
if (wi->be) {
|
||||
old = (__force u64)cpu_to_be64(old);
|
||||
new = (__force u64)cpu_to_be64(new);
|
||||
} else {
|
||||
old = (__force u64)cpu_to_le64(old);
|
||||
new = (__force u64)cpu_to_le64(new);
|
||||
}
|
||||
|
||||
return __kvm_at_swap_desc(vcpu->kvm, pa, old, new);
|
||||
}
|
||||
|
||||
static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
|
||||
struct s1_walk_result *wr, u64 va)
|
||||
{
|
||||
u64 va_top, va_bottom, baddr, desc;
|
||||
u64 va_top, va_bottom, baddr, desc, new_desc, ipa;
|
||||
int level, stride, ret;
|
||||
|
||||
level = wi->sl;
|
||||
@@ -375,7 +412,7 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
|
||||
va_top = get_ia_size(wi) - 1;
|
||||
|
||||
while (1) {
|
||||
u64 index, ipa;
|
||||
u64 index;
|
||||
|
||||
va_bottom = (3 - level) * stride + wi->pgshift;
|
||||
index = (va & GENMASK_ULL(va_top, va_bottom)) >> (va_bottom - 3);
|
||||
@@ -414,16 +451,13 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = kvm_read_guest(vcpu->kvm, ipa, &desc, sizeof(desc));
|
||||
ret = kvm_read_s1_desc(vcpu, ipa, &desc, wi);
|
||||
if (ret) {
|
||||
fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level), false);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (wi->be)
|
||||
desc = be64_to_cpu((__force __be64)desc);
|
||||
else
|
||||
desc = le64_to_cpu((__force __le64)desc);
|
||||
new_desc = desc;
|
||||
|
||||
/* Invalid descriptor */
|
||||
if (!(desc & BIT(0)))
|
||||
@@ -477,6 +511,17 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
|
||||
if (check_output_size(baddr & GENMASK(52, va_bottom), wi))
|
||||
goto addrsz;
|
||||
|
||||
if (wi->ha)
|
||||
new_desc |= PTE_AF;
|
||||
|
||||
if (new_desc != desc) {
|
||||
ret = kvm_swap_s1_desc(vcpu, ipa, desc, new_desc, wi);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
desc = new_desc;
|
||||
}
|
||||
|
||||
if (!(desc & PTE_AF)) {
|
||||
fail_s1_walk(wr, ESR_ELx_FSC_ACCESS_L(level), false);
|
||||
return -EACCES;
|
||||
@@ -1221,7 +1266,7 @@ static void compute_s1_permissions(struct kvm_vcpu *vcpu,
|
||||
wr->pr &= !pan;
|
||||
}
|
||||
|
||||
static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
|
||||
static int handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr, u64 *par)
|
||||
{
|
||||
struct s1_walk_result wr = {};
|
||||
struct s1_walk_info wi = {};
|
||||
@@ -1246,6 +1291,11 @@ static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
|
||||
|
||||
srcu_read_unlock(&vcpu->kvm->srcu, idx);
|
||||
|
||||
/*
|
||||
* Race to update a descriptor -- restart the walk.
|
||||
*/
|
||||
if (ret == -EAGAIN)
|
||||
return ret;
|
||||
if (ret)
|
||||
goto compute_par;
|
||||
|
||||
@@ -1279,7 +1329,8 @@ static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
|
||||
fail_s1_walk(&wr, ESR_ELx_FSC_PERM_L(wr.level), false);
|
||||
|
||||
compute_par:
|
||||
return compute_par_s1(vcpu, &wi, &wr);
|
||||
*par = compute_par_s1(vcpu, &wi, &wr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1407,9 +1458,10 @@ static bool par_check_s1_access_fault(u64 par)
|
||||
!(par & SYS_PAR_EL1_S));
|
||||
}
|
||||
|
||||
void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
|
||||
int __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
|
||||
{
|
||||
u64 par = __kvm_at_s1e01_fast(vcpu, op, vaddr);
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* If PAR_EL1 reports that AT failed on a S1 permission or access
|
||||
@@ -1421,15 +1473,20 @@ void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
|
||||
*/
|
||||
if ((par & SYS_PAR_EL1_F) &&
|
||||
!par_check_s1_perm_fault(par) &&
|
||||
!par_check_s1_access_fault(par))
|
||||
par = handle_at_slow(vcpu, op, vaddr);
|
||||
!par_check_s1_access_fault(par)) {
|
||||
ret = handle_at_slow(vcpu, op, vaddr, &par);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
vcpu_write_sys_reg(vcpu, par, PAR_EL1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
|
||||
int __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
|
||||
{
|
||||
u64 par;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* We've trapped, so everything is live on the CPU. As we will be
|
||||
@@ -1476,13 +1533,17 @@ void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
|
||||
}
|
||||
|
||||
/* We failed the translation, let's replay it in slow motion */
|
||||
if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par))
|
||||
par = handle_at_slow(vcpu, op, vaddr);
|
||||
if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par)) {
|
||||
ret = handle_at_slow(vcpu, op, vaddr, &par);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
vcpu_write_sys_reg(vcpu, par, PAR_EL1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
|
||||
int __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
|
||||
{
|
||||
struct kvm_s2_trans out = {};
|
||||
u64 ipa, par;
|
||||
@@ -1509,13 +1570,13 @@ void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
|
||||
break;
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
return;
|
||||
return 0;
|
||||
}
|
||||
|
||||
__kvm_at_s1e01(vcpu, op, vaddr);
|
||||
par = vcpu_read_sys_reg(vcpu, PAR_EL1);
|
||||
if (par & SYS_PAR_EL1_F)
|
||||
return;
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* If we only have a single stage of translation (EL2&0), exit
|
||||
@@ -1523,14 +1584,14 @@ void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
|
||||
*/
|
||||
if (compute_translation_regime(vcpu, op) == TR_EL20 ||
|
||||
!(vcpu_read_sys_reg(vcpu, HCR_EL2) & (HCR_VM | HCR_DC)))
|
||||
return;
|
||||
return 0;
|
||||
|
||||
/* Do the stage-2 translation */
|
||||
ipa = (par & GENMASK_ULL(47, 12)) | (vaddr & GENMASK_ULL(11, 0));
|
||||
out.esr = 0;
|
||||
ret = kvm_walk_nested_s2(vcpu, ipa, &out);
|
||||
if (ret < 0)
|
||||
return;
|
||||
return ret;
|
||||
|
||||
/* Check the access permission */
|
||||
if (!out.esr &&
|
||||
@@ -1539,6 +1600,7 @@ void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
|
||||
|
||||
par = compute_par_s12(vcpu, par, &out);
|
||||
vcpu_write_sys_reg(vcpu, par, PAR_EL1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1637,3 +1699,97 @@ int __kvm_find_s1_desc_level(struct kvm_vcpu *vcpu, u64 va, u64 ipa, int *level)
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_ARM64_LSE_ATOMICS
|
||||
static int __lse_swap_desc(u64 __user *ptep, u64 old, u64 new)
|
||||
{
|
||||
u64 tmp = old;
|
||||
int ret = 0;
|
||||
|
||||
uaccess_enable_privileged();
|
||||
|
||||
asm volatile(__LSE_PREAMBLE
|
||||
"1: cas %[old], %[new], %[addr]\n"
|
||||
"2:\n"
|
||||
_ASM_EXTABLE_UACCESS_ERR(1b, 2b, %w[ret])
|
||||
: [old] "+r" (old), [addr] "+Q" (*ptep), [ret] "+r" (ret)
|
||||
: [new] "r" (new)
|
||||
: "memory");
|
||||
|
||||
uaccess_disable_privileged();
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
if (tmp != old)
|
||||
return -EAGAIN;
|
||||
|
||||
return ret;
|
||||
}
|
||||
#else
|
||||
static int __lse_swap_desc(u64 __user *ptep, u64 old, u64 new)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int __llsc_swap_desc(u64 __user *ptep, u64 old, u64 new)
|
||||
{
|
||||
int ret = 1;
|
||||
u64 tmp;
|
||||
|
||||
uaccess_enable_privileged();
|
||||
|
||||
asm volatile("prfm pstl1strm, %[addr]\n"
|
||||
"1: ldxr %[tmp], %[addr]\n"
|
||||
"sub %[tmp], %[tmp], %[old]\n"
|
||||
"cbnz %[tmp], 3f\n"
|
||||
"2: stlxr %w[ret], %[new], %[addr]\n"
|
||||
"3:\n"
|
||||
_ASM_EXTABLE_UACCESS_ERR(1b, 3b, %w[ret])
|
||||
_ASM_EXTABLE_UACCESS_ERR(2b, 3b, %w[ret])
|
||||
: [ret] "+r" (ret), [addr] "+Q" (*ptep), [tmp] "=&r" (tmp)
|
||||
: [old] "r" (old), [new] "r" (new)
|
||||
: "memory");
|
||||
|
||||
uaccess_disable_privileged();
|
||||
|
||||
/* STLXR didn't update the descriptor, or the compare failed */
|
||||
if (ret == 1)
|
||||
return -EAGAIN;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __kvm_at_swap_desc(struct kvm *kvm, gpa_t ipa, u64 old, u64 new)
|
||||
{
|
||||
struct kvm_memory_slot *slot;
|
||||
unsigned long hva;
|
||||
u64 __user *ptep;
|
||||
bool writable;
|
||||
int offset;
|
||||
gfn_t gfn;
|
||||
int r;
|
||||
|
||||
lockdep_assert(srcu_read_lock_held(&kvm->srcu));
|
||||
|
||||
gfn = ipa >> PAGE_SHIFT;
|
||||
offset = offset_in_page(ipa);
|
||||
slot = gfn_to_memslot(kvm, gfn);
|
||||
hva = gfn_to_hva_memslot_prot(slot, gfn, &writable);
|
||||
if (kvm_is_error_hva(hva))
|
||||
return -EINVAL;
|
||||
if (!writable)
|
||||
return -EPERM;
|
||||
|
||||
ptep = (u64 __user *)hva + offset;
|
||||
if (cpus_have_final_cap(ARM64_HAS_LSE_ATOMICS))
|
||||
r = __lse_swap_desc(ptep, old, new);
|
||||
else
|
||||
r = __llsc_swap_desc(ptep, old, new);
|
||||
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
||||
mark_page_dirty_in_slot(kvm, slot, gfn);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -157,6 +157,7 @@ static void sync_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
|
||||
host_vcpu->arch.iflags = hyp_vcpu->vcpu.arch.iflags;
|
||||
|
||||
host_cpu_if->vgic_hcr = hyp_cpu_if->vgic_hcr;
|
||||
host_cpu_if->vgic_vmcr = hyp_cpu_if->vgic_vmcr;
|
||||
for (i = 0; i < hyp_cpu_if->used_lrs; ++i)
|
||||
host_cpu_if->vgic_lr[i] = hyp_cpu_if->vgic_lr[i];
|
||||
}
|
||||
@@ -464,11 +465,11 @@ static void handle___vgic_v3_init_lrs(struct kvm_cpu_context *host_ctxt)
|
||||
__vgic_v3_init_lrs();
|
||||
}
|
||||
|
||||
static void handle___vgic_v3_save_vmcr_aprs(struct kvm_cpu_context *host_ctxt)
|
||||
static void handle___vgic_v3_save_aprs(struct kvm_cpu_context *host_ctxt)
|
||||
{
|
||||
DECLARE_REG(struct vgic_v3_cpu_if *, cpu_if, host_ctxt, 1);
|
||||
|
||||
__vgic_v3_save_vmcr_aprs(kern_hyp_va(cpu_if));
|
||||
__vgic_v3_save_aprs(kern_hyp_va(cpu_if));
|
||||
}
|
||||
|
||||
static void handle___vgic_v3_restore_vmcr_aprs(struct kvm_cpu_context *host_ctxt)
|
||||
@@ -616,7 +617,7 @@ static const hcall_t host_hcall[] = {
|
||||
HANDLE_FUNC(__kvm_tlb_flush_vmid_range),
|
||||
HANDLE_FUNC(__kvm_flush_cpu_context),
|
||||
HANDLE_FUNC(__kvm_timer_set_cntvoff),
|
||||
HANDLE_FUNC(__vgic_v3_save_vmcr_aprs),
|
||||
HANDLE_FUNC(__vgic_v3_save_aprs),
|
||||
HANDLE_FUNC(__vgic_v3_restore_vmcr_aprs),
|
||||
HANDLE_FUNC(__pkvm_reserve_vm),
|
||||
HANDLE_FUNC(__pkvm_unreserve_vm),
|
||||
|
||||
@@ -337,6 +337,9 @@ static void pkvm_init_features_from_host(struct pkvm_hyp_vm *hyp_vm, const struc
|
||||
/* CTR_EL0 is always under host control, even for protected VMs. */
|
||||
hyp_vm->kvm.arch.ctr_el0 = host_kvm->arch.ctr_el0;
|
||||
|
||||
/* Preserve the vgic model so that GICv3 emulation works */
|
||||
hyp_vm->kvm.arch.vgic.vgic_model = host_kvm->arch.vgic.vgic_model;
|
||||
|
||||
if (test_bit(KVM_ARCH_FLAG_MTE_ENABLED, &host_kvm->arch.flags))
|
||||
set_bit(KVM_ARCH_FLAG_MTE_ENABLED, &kvm->arch.flags);
|
||||
|
||||
|
||||
@@ -444,6 +444,8 @@ static const struct sys_reg_desc pvm_sys_reg_descs[] = {
|
||||
|
||||
/* Scalable Vector Registers are restricted. */
|
||||
|
||||
HOST_HANDLED(SYS_ICC_PMR_EL1),
|
||||
|
||||
RAZ_WI(SYS_ERRIDR_EL1),
|
||||
RAZ_WI(SYS_ERRSELR_EL1),
|
||||
RAZ_WI(SYS_ERXFR_EL1),
|
||||
@@ -457,9 +459,12 @@ static const struct sys_reg_desc pvm_sys_reg_descs[] = {
|
||||
|
||||
/* Limited Ordering Regions Registers are restricted. */
|
||||
|
||||
HOST_HANDLED(SYS_ICC_DIR_EL1),
|
||||
HOST_HANDLED(SYS_ICC_RPR_EL1),
|
||||
HOST_HANDLED(SYS_ICC_SGI1R_EL1),
|
||||
HOST_HANDLED(SYS_ICC_ASGI1R_EL1),
|
||||
HOST_HANDLED(SYS_ICC_SGI0R_EL1),
|
||||
HOST_HANDLED(SYS_ICC_CTLR_EL1),
|
||||
{ SYS_DESC(SYS_ICC_SRE_EL1), .access = pvm_gic_read_sre, },
|
||||
|
||||
HOST_HANDLED(SYS_CCSIDR_EL1),
|
||||
|
||||
@@ -661,11 +661,37 @@ void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
|
||||
|
||||
#define KVM_S2_MEMATTR(pgt, attr) PAGE_S2_MEMATTR(attr, stage2_has_fwb(pgt))
|
||||
|
||||
static int stage2_set_xn_attr(enum kvm_pgtable_prot prot, kvm_pte_t *attr)
|
||||
{
|
||||
bool px, ux;
|
||||
u8 xn;
|
||||
|
||||
px = prot & KVM_PGTABLE_PROT_PX;
|
||||
ux = prot & KVM_PGTABLE_PROT_UX;
|
||||
|
||||
if (!cpus_have_final_cap(ARM64_HAS_XNX) && px != ux)
|
||||
return -EINVAL;
|
||||
|
||||
if (px && ux)
|
||||
xn = 0b00;
|
||||
else if (!px && ux)
|
||||
xn = 0b01;
|
||||
else if (!px && !ux)
|
||||
xn = 0b10;
|
||||
else
|
||||
xn = 0b11;
|
||||
|
||||
*attr &= ~KVM_PTE_LEAF_ATTR_HI_S2_XN;
|
||||
*attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_HI_S2_XN, xn);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot prot,
|
||||
kvm_pte_t *ptep)
|
||||
{
|
||||
kvm_pte_t attr;
|
||||
u32 sh = KVM_PTE_LEAF_ATTR_LO_S2_SH_IS;
|
||||
int r;
|
||||
|
||||
switch (prot & (KVM_PGTABLE_PROT_DEVICE |
|
||||
KVM_PGTABLE_PROT_NORMAL_NC)) {
|
||||
@@ -685,8 +711,9 @@ static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot p
|
||||
attr = KVM_S2_MEMATTR(pgt, NORMAL);
|
||||
}
|
||||
|
||||
if (!(prot & KVM_PGTABLE_PROT_X))
|
||||
attr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
|
||||
r = stage2_set_xn_attr(prot, &attr);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
if (prot & KVM_PGTABLE_PROT_R)
|
||||
attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R;
|
||||
@@ -715,8 +742,20 @@ enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte)
|
||||
prot |= KVM_PGTABLE_PROT_R;
|
||||
if (pte & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W)
|
||||
prot |= KVM_PGTABLE_PROT_W;
|
||||
if (!(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN))
|
||||
prot |= KVM_PGTABLE_PROT_X;
|
||||
|
||||
switch (FIELD_GET(KVM_PTE_LEAF_ATTR_HI_S2_XN, pte)) {
|
||||
case 0b00:
|
||||
prot |= KVM_PGTABLE_PROT_PX | KVM_PGTABLE_PROT_UX;
|
||||
break;
|
||||
case 0b01:
|
||||
prot |= KVM_PGTABLE_PROT_UX;
|
||||
break;
|
||||
case 0b11:
|
||||
prot |= KVM_PGTABLE_PROT_PX;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return prot;
|
||||
}
|
||||
@@ -1290,9 +1329,9 @@ bool kvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr,
|
||||
int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
|
||||
enum kvm_pgtable_prot prot, enum kvm_pgtable_walk_flags flags)
|
||||
{
|
||||
int ret;
|
||||
kvm_pte_t xn = 0, set = 0, clr = 0;
|
||||
s8 level;
|
||||
kvm_pte_t set = 0, clr = 0;
|
||||
int ret;
|
||||
|
||||
if (prot & KVM_PTE_LEAF_ATTR_HI_SW)
|
||||
return -EINVAL;
|
||||
@@ -1303,8 +1342,12 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
|
||||
if (prot & KVM_PGTABLE_PROT_W)
|
||||
set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W;
|
||||
|
||||
if (prot & KVM_PGTABLE_PROT_X)
|
||||
clr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
|
||||
ret = stage2_set_xn_attr(prot, &xn);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
set |= xn & KVM_PTE_LEAF_ATTR_HI_S2_XN;
|
||||
clr |= ~xn & KVM_PTE_LEAF_ATTR_HI_S2_XN;
|
||||
|
||||
ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level, flags);
|
||||
if (!ret || ret == -EAGAIN)
|
||||
@@ -1535,37 +1578,80 @@ size_t kvm_pgtable_stage2_pgd_size(u64 vtcr)
|
||||
return kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE;
|
||||
}
|
||||
|
||||
static int stage2_free_walker(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
enum kvm_pgtable_walk_flags visit)
|
||||
static int stage2_free_leaf(const struct kvm_pgtable_visit_ctx *ctx)
|
||||
{
|
||||
struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
|
||||
|
||||
if (!stage2_pte_is_counted(ctx->old))
|
||||
return 0;
|
||||
|
||||
mm_ops->put_page(ctx->ptep);
|
||||
|
||||
if (kvm_pte_table(ctx->old, ctx->level))
|
||||
mm_ops->put_page(kvm_pte_follow(ctx->old, mm_ops));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
|
||||
static int stage2_free_table_post(const struct kvm_pgtable_visit_ctx *ctx)
|
||||
{
|
||||
struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
|
||||
kvm_pte_t *childp = kvm_pte_follow(ctx->old, mm_ops);
|
||||
|
||||
if (mm_ops->page_count(childp) != 1)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Drop references and clear the now stale PTE to avoid rewalking the
|
||||
* freed page table.
|
||||
*/
|
||||
mm_ops->put_page(ctx->ptep);
|
||||
mm_ops->put_page(childp);
|
||||
kvm_clear_pte(ctx->ptep);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int stage2_free_walker(const struct kvm_pgtable_visit_ctx *ctx,
|
||||
enum kvm_pgtable_walk_flags visit)
|
||||
{
|
||||
if (!stage2_pte_is_counted(ctx->old))
|
||||
return 0;
|
||||
|
||||
switch (visit) {
|
||||
case KVM_PGTABLE_WALK_LEAF:
|
||||
return stage2_free_leaf(ctx);
|
||||
case KVM_PGTABLE_WALK_TABLE_POST:
|
||||
return stage2_free_table_post(ctx);
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
void kvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt,
|
||||
u64 addr, u64 size)
|
||||
{
|
||||
size_t pgd_sz;
|
||||
struct kvm_pgtable_walker walker = {
|
||||
.cb = stage2_free_walker,
|
||||
.flags = KVM_PGTABLE_WALK_LEAF |
|
||||
KVM_PGTABLE_WALK_TABLE_POST,
|
||||
};
|
||||
|
||||
WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker));
|
||||
WARN_ON(kvm_pgtable_walk(pgt, addr, size, &walker));
|
||||
}
|
||||
|
||||
void kvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt)
|
||||
{
|
||||
size_t pgd_sz;
|
||||
|
||||
pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE;
|
||||
pgt->mm_ops->free_pages_exact(kvm_dereference_pteref(&walker, pgt->pgd), pgd_sz);
|
||||
|
||||
/*
|
||||
* Since the pgtable is unlinked at this point, and not shared with
|
||||
* other walkers, safely deference pgd with kvm_dereference_pteref_raw()
|
||||
*/
|
||||
pgt->mm_ops->free_pages_exact(kvm_dereference_pteref_raw(pgt->pgd), pgd_sz);
|
||||
pgt->pgd = NULL;
|
||||
}
|
||||
|
||||
void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
|
||||
{
|
||||
kvm_pgtable_stage2_destroy_range(pgt, 0, BIT(pgt->ia_bits));
|
||||
kvm_pgtable_stage2_destroy_pgd(pgt);
|
||||
}
|
||||
|
||||
void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level)
|
||||
{
|
||||
kvm_pteref_t ptep = (kvm_pteref_t)pgtable;
|
||||
|
||||
@@ -63,6 +63,10 @@ int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu)
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Handle deactivation as a normal exit */
|
||||
if ((fault_ipa - vgic->vgic_cpu_base) >= GIC_CPU_DEACTIVATE)
|
||||
return 0;
|
||||
|
||||
rd = kvm_vcpu_dabt_get_rd(vcpu);
|
||||
addr = kvm_vgic_global_state.vcpu_hyp_va;
|
||||
addr += fault_ipa - vgic->vgic_cpu_base;
|
||||
|
||||
@@ -14,6 +14,8 @@
|
||||
#include <asm/kvm_hyp.h>
|
||||
#include <asm/kvm_mmu.h>
|
||||
|
||||
#include "../../vgic/vgic.h"
|
||||
|
||||
#define vtr_to_max_lr_idx(v) ((v) & 0xf)
|
||||
#define vtr_to_nr_pre_bits(v) ((((u32)(v) >> 26) & 7) + 1)
|
||||
#define vtr_to_nr_apr_regs(v) (1 << (vtr_to_nr_pre_bits(v) - 5))
|
||||
@@ -58,7 +60,7 @@ u64 __gic_v3_get_lr(unsigned int lr)
|
||||
unreachable();
|
||||
}
|
||||
|
||||
static void __gic_v3_set_lr(u64 val, int lr)
|
||||
void __gic_v3_set_lr(u64 val, int lr)
|
||||
{
|
||||
switch (lr & 0xf) {
|
||||
case 0:
|
||||
@@ -196,6 +198,11 @@ static u32 __vgic_v3_read_ap1rn(int n)
|
||||
return val;
|
||||
}
|
||||
|
||||
static u64 compute_ich_hcr(struct vgic_v3_cpu_if *cpu_if)
|
||||
{
|
||||
return cpu_if->vgic_hcr | vgic_ich_hcr_trap_bits();
|
||||
}
|
||||
|
||||
void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if)
|
||||
{
|
||||
u64 used_lrs = cpu_if->used_lrs;
|
||||
@@ -212,14 +219,12 @@ void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if)
|
||||
}
|
||||
}
|
||||
|
||||
if (used_lrs || cpu_if->its_vpe.its_vm) {
|
||||
if (used_lrs) {
|
||||
int i;
|
||||
u32 elrsr;
|
||||
|
||||
elrsr = read_gicreg(ICH_ELRSR_EL2);
|
||||
|
||||
write_gicreg(cpu_if->vgic_hcr & ~ICH_HCR_EL2_En, ICH_HCR_EL2);
|
||||
|
||||
for (i = 0; i < used_lrs; i++) {
|
||||
if (elrsr & (1 << i))
|
||||
cpu_if->vgic_lr[i] &= ~ICH_LR_STATE;
|
||||
@@ -229,6 +234,23 @@ void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if)
|
||||
__gic_v3_set_lr(0, i);
|
||||
}
|
||||
}
|
||||
|
||||
cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2);
|
||||
|
||||
if (cpu_if->vgic_hcr & ICH_HCR_EL2_LRENPIE) {
|
||||
u64 val = read_gicreg(ICH_HCR_EL2);
|
||||
cpu_if->vgic_hcr &= ~ICH_HCR_EL2_EOIcount;
|
||||
cpu_if->vgic_hcr |= val & ICH_HCR_EL2_EOIcount;
|
||||
}
|
||||
|
||||
write_gicreg(0, ICH_HCR_EL2);
|
||||
|
||||
/*
|
||||
* Hack alert: On NV, this results in a trap so that the above write
|
||||
* actually takes effect... No synchronisation is necessary, as we
|
||||
* only care about the effects when this traps.
|
||||
*/
|
||||
read_gicreg(ICH_MISR_EL2);
|
||||
}
|
||||
|
||||
void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if)
|
||||
@@ -236,12 +258,10 @@ void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if)
|
||||
u64 used_lrs = cpu_if->used_lrs;
|
||||
int i;
|
||||
|
||||
if (used_lrs || cpu_if->its_vpe.its_vm) {
|
||||
write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
|
||||
write_gicreg(compute_ich_hcr(cpu_if), ICH_HCR_EL2);
|
||||
|
||||
for (i = 0; i < used_lrs; i++)
|
||||
__gic_v3_set_lr(cpu_if->vgic_lr[i], i);
|
||||
}
|
||||
for (i = 0; i < used_lrs; i++)
|
||||
__gic_v3_set_lr(cpu_if->vgic_lr[i], i);
|
||||
|
||||
/*
|
||||
* Ensure that writes to the LRs, and on non-VHE systems ensure that
|
||||
@@ -307,24 +327,20 @@ void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if)
|
||||
}
|
||||
|
||||
/*
|
||||
* If we need to trap system registers, we must write
|
||||
* ICH_HCR_EL2 anyway, even if no interrupts are being
|
||||
* injected. Note that this also applies if we don't expect
|
||||
* any system register access (no vgic at all).
|
||||
* If we need to trap system registers, we must write ICH_HCR_EL2
|
||||
* anyway, even if no interrupts are being injected. Note that this
|
||||
* also applies if we don't expect any system register access (no
|
||||
* vgic at all). In any case, no need to provide MI configuration.
|
||||
*/
|
||||
if (static_branch_unlikely(&vgic_v3_cpuif_trap) ||
|
||||
cpu_if->its_vpe.its_vm || !cpu_if->vgic_sre)
|
||||
write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
|
||||
write_gicreg(vgic_ich_hcr_trap_bits() | ICH_HCR_EL2_En, ICH_HCR_EL2);
|
||||
}
|
||||
|
||||
void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if)
|
||||
{
|
||||
u64 val;
|
||||
|
||||
if (!cpu_if->vgic_sre) {
|
||||
cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2);
|
||||
}
|
||||
|
||||
/* Only restore SRE if the host implements the GICv2 interface */
|
||||
if (static_branch_unlikely(&vgic_v3_has_v2_compat)) {
|
||||
val = read_gicreg(ICC_SRE_EL2);
|
||||
@@ -346,7 +362,7 @@ void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if)
|
||||
write_gicreg(0, ICH_HCR_EL2);
|
||||
}
|
||||
|
||||
static void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if)
|
||||
void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if)
|
||||
{
|
||||
u64 val;
|
||||
u32 nr_pre_bits;
|
||||
@@ -507,13 +523,6 @@ static void __vgic_v3_write_vmcr(u32 vmcr)
|
||||
write_gicreg(vmcr, ICH_VMCR_EL2);
|
||||
}
|
||||
|
||||
void __vgic_v3_save_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if)
|
||||
{
|
||||
__vgic_v3_save_aprs(cpu_if);
|
||||
if (cpu_if->vgic_sre)
|
||||
cpu_if->vgic_vmcr = __vgic_v3_read_vmcr();
|
||||
}
|
||||
|
||||
void __vgic_v3_restore_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if)
|
||||
{
|
||||
__vgic_v3_compat_mode_enable();
|
||||
@@ -790,7 +799,7 @@ static void __vgic_v3_bump_eoicount(void)
|
||||
write_gicreg(hcr, ICH_HCR_EL2);
|
||||
}
|
||||
|
||||
static void __vgic_v3_write_dir(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
|
||||
static int ___vgic_v3_write_dir(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
|
||||
{
|
||||
u32 vid = vcpu_get_reg(vcpu, rt);
|
||||
u64 lr_val;
|
||||
@@ -798,19 +807,25 @@ static void __vgic_v3_write_dir(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
|
||||
|
||||
/* EOImode == 0, nothing to be done here */
|
||||
if (!(vmcr & ICH_VMCR_EOIM_MASK))
|
||||
return;
|
||||
return 1;
|
||||
|
||||
/* No deactivate to be performed on an LPI */
|
||||
if (vid >= VGIC_MIN_LPI)
|
||||
return;
|
||||
return 1;
|
||||
|
||||
lr = __vgic_v3_find_active_lr(vcpu, vid, &lr_val);
|
||||
if (lr == -1) {
|
||||
__vgic_v3_bump_eoicount();
|
||||
return;
|
||||
if (lr != -1) {
|
||||
__vgic_v3_clear_active_lr(lr, lr_val);
|
||||
return 1;
|
||||
}
|
||||
|
||||
__vgic_v3_clear_active_lr(lr, lr_val);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __vgic_v3_write_dir(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
|
||||
{
|
||||
if (!___vgic_v3_write_dir(vcpu, vmcr, rt))
|
||||
__vgic_v3_bump_eoicount();
|
||||
}
|
||||
|
||||
static void __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
|
||||
@@ -1245,6 +1260,21 @@ int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu)
|
||||
case SYS_ICC_DIR_EL1:
|
||||
if (unlikely(is_read))
|
||||
return 0;
|
||||
/*
|
||||
* Full exit if required to handle overflow deactivation,
|
||||
* unless we can emulate it in the LRs (likely the majority
|
||||
* of the cases).
|
||||
*/
|
||||
if (vcpu->arch.vgic_cpu.vgic_v3.vgic_hcr & ICH_HCR_EL2_TDIR) {
|
||||
int ret;
|
||||
|
||||
ret = ___vgic_v3_write_dir(vcpu, __vgic_v3_read_vmcr(),
|
||||
kvm_vcpu_sys_get_rt(vcpu));
|
||||
if (ret)
|
||||
__kvm_skip_instr(vcpu);
|
||||
|
||||
return ret;
|
||||
}
|
||||
fn = __vgic_v3_write_dir;
|
||||
break;
|
||||
case SYS_ICC_RPR_EL1:
|
||||
|
||||
@@ -904,6 +904,38 @@ static int kvm_init_ipa_range(struct kvm_s2_mmu *mmu, unsigned long type)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Assume that @pgt is valid and unlinked from the KVM MMU to free the
|
||||
* page-table without taking the kvm_mmu_lock and without performing any
|
||||
* TLB invalidations.
|
||||
*
|
||||
* Also, the range of addresses can be large enough to cause need_resched
|
||||
* warnings, for instance on CONFIG_PREEMPT_NONE kernels. Hence, invoke
|
||||
* cond_resched() periodically to prevent hogging the CPU for a long time
|
||||
* and schedule something else, if required.
|
||||
*/
|
||||
static void stage2_destroy_range(struct kvm_pgtable *pgt, phys_addr_t addr,
|
||||
phys_addr_t end)
|
||||
{
|
||||
u64 next;
|
||||
|
||||
do {
|
||||
next = stage2_range_addr_end(addr, end);
|
||||
KVM_PGT_FN(kvm_pgtable_stage2_destroy_range)(pgt, addr,
|
||||
next - addr);
|
||||
if (next != end)
|
||||
cond_resched();
|
||||
} while (addr = next, addr != end);
|
||||
}
|
||||
|
||||
static void kvm_stage2_destroy(struct kvm_pgtable *pgt)
|
||||
{
|
||||
unsigned int ia_bits = VTCR_EL2_IPA(pgt->mmu->vtcr);
|
||||
|
||||
stage2_destroy_range(pgt, 0, BIT(ia_bits));
|
||||
KVM_PGT_FN(kvm_pgtable_stage2_destroy_pgd)(pgt);
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_init_stage2_mmu - Initialise a S2 MMU structure
|
||||
* @kvm: The pointer to the KVM structure
|
||||
@@ -980,7 +1012,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t
|
||||
return 0;
|
||||
|
||||
out_destroy_pgtable:
|
||||
KVM_PGT_FN(kvm_pgtable_stage2_destroy)(pgt);
|
||||
kvm_stage2_destroy(pgt);
|
||||
out_free_pgtable:
|
||||
kfree(pgt);
|
||||
return err;
|
||||
@@ -1081,7 +1113,7 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
|
||||
write_unlock(&kvm->mmu_lock);
|
||||
|
||||
if (pgt) {
|
||||
KVM_PGT_FN(kvm_pgtable_stage2_destroy)(pgt);
|
||||
kvm_stage2_destroy(pgt);
|
||||
kfree(pgt);
|
||||
}
|
||||
}
|
||||
@@ -1521,6 +1553,16 @@ static void adjust_nested_fault_perms(struct kvm_s2_trans *nested,
|
||||
*prot |= kvm_encode_nested_level(nested);
|
||||
}
|
||||
|
||||
static void adjust_nested_exec_perms(struct kvm *kvm,
|
||||
struct kvm_s2_trans *nested,
|
||||
enum kvm_pgtable_prot *prot)
|
||||
{
|
||||
if (!kvm_s2_trans_exec_el0(kvm, nested))
|
||||
*prot &= ~KVM_PGTABLE_PROT_UX;
|
||||
if (!kvm_s2_trans_exec_el1(kvm, nested))
|
||||
*prot &= ~KVM_PGTABLE_PROT_PX;
|
||||
}
|
||||
|
||||
#define KVM_PGTABLE_WALK_MEMABORT_FLAGS (KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED)
|
||||
|
||||
static int gmem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
@@ -1572,11 +1614,12 @@ static int gmem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
if (writable)
|
||||
prot |= KVM_PGTABLE_PROT_W;
|
||||
|
||||
if (exec_fault ||
|
||||
(cpus_have_final_cap(ARM64_HAS_CACHE_DIC) &&
|
||||
(!nested || kvm_s2_trans_executable(nested))))
|
||||
if (exec_fault || cpus_have_final_cap(ARM64_HAS_CACHE_DIC))
|
||||
prot |= KVM_PGTABLE_PROT_X;
|
||||
|
||||
if (nested)
|
||||
adjust_nested_exec_perms(kvm, nested, &prot);
|
||||
|
||||
kvm_fault_lock(kvm);
|
||||
if (mmu_invalidate_retry(kvm, mmu_seq)) {
|
||||
ret = -EAGAIN;
|
||||
@@ -1851,11 +1894,13 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
prot |= KVM_PGTABLE_PROT_NORMAL_NC;
|
||||
else
|
||||
prot |= KVM_PGTABLE_PROT_DEVICE;
|
||||
} else if (cpus_have_final_cap(ARM64_HAS_CACHE_DIC) &&
|
||||
(!nested || kvm_s2_trans_executable(nested))) {
|
||||
} else if (cpus_have_final_cap(ARM64_HAS_CACHE_DIC)) {
|
||||
prot |= KVM_PGTABLE_PROT_X;
|
||||
}
|
||||
|
||||
if (nested)
|
||||
adjust_nested_exec_perms(kvm, nested, &prot);
|
||||
|
||||
/*
|
||||
* Under the premise of getting a FSC_PERM fault, we just need to relax
|
||||
* permissions only if vma_pagesize equals fault_granule. Otherwise,
|
||||
@@ -1899,8 +1944,48 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
|
||||
read_unlock(&vcpu->kvm->mmu_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns true if the SEA should be handled locally within KVM if the abort
|
||||
* is caused by a kernel memory allocation (e.g. stage-2 table memory).
|
||||
*/
|
||||
static bool host_owns_sea(struct kvm_vcpu *vcpu, u64 esr)
|
||||
{
|
||||
/*
|
||||
* Without FEAT_RAS HCR_EL2.TEA is RES0, meaning any external abort
|
||||
* taken from a guest EL to EL2 is due to a host-imposed access (e.g.
|
||||
* stage-2 PTW).
|
||||
*/
|
||||
if (!cpus_have_final_cap(ARM64_HAS_RAS_EXTN))
|
||||
return true;
|
||||
|
||||
/* KVM owns the VNCR when the vCPU isn't in a nested context. */
|
||||
if (is_hyp_ctxt(vcpu) && !kvm_vcpu_trap_is_iabt(vcpu) && (esr & ESR_ELx_VNCR))
|
||||
return true;
|
||||
|
||||
/*
|
||||
* Determining if an external abort during a table walk happened at
|
||||
* stage-2 is only possible with S1PTW is set. Otherwise, since KVM
|
||||
* sets HCR_EL2.TEA, SEAs due to a stage-1 walk (i.e. accessing the
|
||||
* PA of the stage-1 descriptor) can reach here and are reported
|
||||
* with a TTW ESR value.
|
||||
*/
|
||||
return (esr_fsc_is_sea_ttw(esr) && (esr & ESR_ELx_S1PTW));
|
||||
}
|
||||
|
||||
int kvm_handle_guest_sea(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
struct kvm_run *run = vcpu->run;
|
||||
u64 esr = kvm_vcpu_get_esr(vcpu);
|
||||
u64 esr_mask = ESR_ELx_EC_MASK |
|
||||
ESR_ELx_IL |
|
||||
ESR_ELx_FnV |
|
||||
ESR_ELx_EA |
|
||||
ESR_ELx_CM |
|
||||
ESR_ELx_WNR |
|
||||
ESR_ELx_FSC;
|
||||
u64 ipa;
|
||||
|
||||
/*
|
||||
* Give APEI the opportunity to claim the abort before handling it
|
||||
* within KVM. apei_claim_sea() expects to be called with IRQs enabled.
|
||||
@@ -1909,7 +1994,33 @@ int kvm_handle_guest_sea(struct kvm_vcpu *vcpu)
|
||||
if (apei_claim_sea(NULL) == 0)
|
||||
return 1;
|
||||
|
||||
return kvm_inject_serror(vcpu);
|
||||
if (host_owns_sea(vcpu, esr) ||
|
||||
!test_bit(KVM_ARCH_FLAG_EXIT_SEA, &vcpu->kvm->arch.flags))
|
||||
return kvm_inject_serror(vcpu);
|
||||
|
||||
/* ESR_ELx.SET is RES0 when FEAT_RAS isn't implemented. */
|
||||
if (kvm_has_ras(kvm))
|
||||
esr_mask |= ESR_ELx_SET_MASK;
|
||||
|
||||
/*
|
||||
* Exit to userspace, and provide faulting guest virtual and physical
|
||||
* addresses in case userspace wants to emulate SEA to guest by
|
||||
* writing to FAR_ELx and HPFAR_ELx registers.
|
||||
*/
|
||||
memset(&run->arm_sea, 0, sizeof(run->arm_sea));
|
||||
run->exit_reason = KVM_EXIT_ARM_SEA;
|
||||
run->arm_sea.esr = esr & esr_mask;
|
||||
|
||||
if (!(esr & ESR_ELx_FnV))
|
||||
run->arm_sea.gva = kvm_vcpu_get_hfar(vcpu);
|
||||
|
||||
ipa = kvm_vcpu_get_fault_ipa(vcpu);
|
||||
if (ipa != INVALID_GPA) {
|
||||
run->arm_sea.flags |= KVM_EXIT_ARM_SEA_FLAG_GPA_VALID;
|
||||
run->arm_sea.gpa = ipa;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1999,6 +2110,11 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
|
||||
u32 esr;
|
||||
|
||||
ret = kvm_walk_nested_s2(vcpu, fault_ipa, &nested_trans);
|
||||
if (ret == -EAGAIN) {
|
||||
ret = 1;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
if (ret) {
|
||||
esr = kvm_s2_trans_esr(&nested_trans);
|
||||
kvm_inject_s2_fault(vcpu, esr);
|
||||
|
||||
@@ -124,14 +124,13 @@ int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
|
||||
struct s2_walk_info {
|
||||
int (*read_desc)(phys_addr_t pa, u64 *desc, void *data);
|
||||
void *data;
|
||||
u64 baddr;
|
||||
unsigned int max_oa_bits;
|
||||
unsigned int pgshift;
|
||||
unsigned int sl;
|
||||
unsigned int t0sz;
|
||||
bool be;
|
||||
u64 baddr;
|
||||
unsigned int max_oa_bits;
|
||||
unsigned int pgshift;
|
||||
unsigned int sl;
|
||||
unsigned int t0sz;
|
||||
bool be;
|
||||
bool ha;
|
||||
};
|
||||
|
||||
static u32 compute_fsc(int level, u32 fsc)
|
||||
@@ -199,6 +198,42 @@ static int check_output_size(struct s2_walk_info *wi, phys_addr_t output)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int read_guest_s2_desc(struct kvm_vcpu *vcpu, phys_addr_t pa, u64 *desc,
|
||||
struct s2_walk_info *wi)
|
||||
{
|
||||
u64 val;
|
||||
int r;
|
||||
|
||||
r = kvm_read_guest(vcpu->kvm, pa, &val, sizeof(val));
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
/*
|
||||
* Handle reversedescriptors if endianness differs between the
|
||||
* host and the guest hypervisor.
|
||||
*/
|
||||
if (wi->be)
|
||||
*desc = be64_to_cpu((__force __be64)val);
|
||||
else
|
||||
*desc = le64_to_cpu((__force __le64)val);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int swap_guest_s2_desc(struct kvm_vcpu *vcpu, phys_addr_t pa, u64 old, u64 new,
|
||||
struct s2_walk_info *wi)
|
||||
{
|
||||
if (wi->be) {
|
||||
old = (__force u64)cpu_to_be64(old);
|
||||
new = (__force u64)cpu_to_be64(new);
|
||||
} else {
|
||||
old = (__force u64)cpu_to_le64(old);
|
||||
new = (__force u64)cpu_to_le64(new);
|
||||
}
|
||||
|
||||
return __kvm_at_swap_desc(vcpu->kvm, pa, old, new);
|
||||
}
|
||||
|
||||
/*
|
||||
* This is essentially a C-version of the pseudo code from the ARM ARM
|
||||
* AArch64.TranslationTableWalk function. I strongly recommend looking at
|
||||
@@ -206,13 +241,13 @@ static int check_output_size(struct s2_walk_info *wi, phys_addr_t output)
|
||||
*
|
||||
* Must be called with the kvm->srcu read lock held
|
||||
*/
|
||||
static int walk_nested_s2_pgd(phys_addr_t ipa,
|
||||
static int walk_nested_s2_pgd(struct kvm_vcpu *vcpu, phys_addr_t ipa,
|
||||
struct s2_walk_info *wi, struct kvm_s2_trans *out)
|
||||
{
|
||||
int first_block_level, level, stride, input_size, base_lower_bound;
|
||||
phys_addr_t base_addr;
|
||||
unsigned int addr_top, addr_bottom;
|
||||
u64 desc; /* page table entry */
|
||||
u64 desc, new_desc; /* page table entry */
|
||||
int ret;
|
||||
phys_addr_t paddr;
|
||||
|
||||
@@ -257,28 +292,30 @@ static int walk_nested_s2_pgd(phys_addr_t ipa,
|
||||
>> (addr_bottom - 3);
|
||||
|
||||
paddr = base_addr | index;
|
||||
ret = wi->read_desc(paddr, &desc, wi->data);
|
||||
ret = read_guest_s2_desc(vcpu, paddr, &desc, wi);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* Handle reversedescriptors if endianness differs between the
|
||||
* host and the guest hypervisor.
|
||||
*/
|
||||
if (wi->be)
|
||||
desc = be64_to_cpu((__force __be64)desc);
|
||||
else
|
||||
desc = le64_to_cpu((__force __le64)desc);
|
||||
new_desc = desc;
|
||||
|
||||
/* Check for valid descriptor at this point */
|
||||
if (!(desc & 1) || ((desc & 3) == 1 && level == 3)) {
|
||||
if (!(desc & KVM_PTE_VALID)) {
|
||||
out->esr = compute_fsc(level, ESR_ELx_FSC_FAULT);
|
||||
out->desc = desc;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* We're at the final level or block translation level */
|
||||
if ((desc & 3) == 1 || level == 3)
|
||||
if (FIELD_GET(KVM_PTE_TYPE, desc) == KVM_PTE_TYPE_BLOCK) {
|
||||
if (level < 3)
|
||||
break;
|
||||
|
||||
out->esr = compute_fsc(level, ESR_ELx_FSC_FAULT);
|
||||
out->desc = desc;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* We're at the final level */
|
||||
if (level == 3)
|
||||
break;
|
||||
|
||||
if (check_output_size(wi, desc)) {
|
||||
@@ -305,7 +342,18 @@ static int walk_nested_s2_pgd(phys_addr_t ipa,
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (!(desc & BIT(10))) {
|
||||
if (wi->ha)
|
||||
new_desc |= KVM_PTE_LEAF_ATTR_LO_S2_AF;
|
||||
|
||||
if (new_desc != desc) {
|
||||
ret = swap_guest_s2_desc(vcpu, paddr, desc, new_desc, wi);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
desc = new_desc;
|
||||
}
|
||||
|
||||
if (!(desc & KVM_PTE_LEAF_ATTR_LO_S2_AF)) {
|
||||
out->esr = compute_fsc(level, ESR_ELx_FSC_ACCESS);
|
||||
out->desc = desc;
|
||||
return 1;
|
||||
@@ -318,20 +366,13 @@ static int walk_nested_s2_pgd(phys_addr_t ipa,
|
||||
(ipa & GENMASK_ULL(addr_bottom - 1, 0));
|
||||
out->output = paddr;
|
||||
out->block_size = 1UL << ((3 - level) * stride + wi->pgshift);
|
||||
out->readable = desc & (0b01 << 6);
|
||||
out->writable = desc & (0b10 << 6);
|
||||
out->readable = desc & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R;
|
||||
out->writable = desc & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W;
|
||||
out->level = level;
|
||||
out->desc = desc;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int read_guest_s2_desc(phys_addr_t pa, u64 *desc, void *data)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = data;
|
||||
|
||||
return kvm_read_guest(vcpu->kvm, pa, desc, sizeof(*desc));
|
||||
}
|
||||
|
||||
static void vtcr_to_walk_info(u64 vtcr, struct s2_walk_info *wi)
|
||||
{
|
||||
wi->t0sz = vtcr & TCR_EL2_T0SZ_MASK;
|
||||
@@ -350,6 +391,8 @@ static void vtcr_to_walk_info(u64 vtcr, struct s2_walk_info *wi)
|
||||
/* Global limit for now, should eventually be per-VM */
|
||||
wi->max_oa_bits = min(get_kvm_ipa_limit(),
|
||||
ps_to_output_size(FIELD_GET(VTCR_EL2_PS_MASK, vtcr), false));
|
||||
|
||||
wi->ha = vtcr & VTCR_EL2_HA;
|
||||
}
|
||||
|
||||
int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
|
||||
@@ -364,15 +407,13 @@ int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
|
||||
if (!vcpu_has_nv(vcpu))
|
||||
return 0;
|
||||
|
||||
wi.read_desc = read_guest_s2_desc;
|
||||
wi.data = vcpu;
|
||||
wi.baddr = vcpu_read_sys_reg(vcpu, VTTBR_EL2);
|
||||
|
||||
vtcr_to_walk_info(vtcr, &wi);
|
||||
|
||||
wi.be = vcpu_read_sys_reg(vcpu, SCTLR_EL2) & SCTLR_ELx_EE;
|
||||
|
||||
ret = walk_nested_s2_pgd(gipa, &wi, result);
|
||||
ret = walk_nested_s2_pgd(vcpu, gipa, &wi, result);
|
||||
if (ret)
|
||||
result->esr |= (kvm_vcpu_get_esr(vcpu) & ~ESR_ELx_FSC);
|
||||
|
||||
@@ -788,7 +829,10 @@ int kvm_s2_handle_perm_fault(struct kvm_vcpu *vcpu, struct kvm_s2_trans *trans)
|
||||
return 0;
|
||||
|
||||
if (kvm_vcpu_trap_is_iabt(vcpu)) {
|
||||
forward_fault = !kvm_s2_trans_executable(trans);
|
||||
if (vcpu_mode_priv(vcpu))
|
||||
forward_fault = !kvm_s2_trans_exec_el1(vcpu->kvm, trans);
|
||||
else
|
||||
forward_fault = !kvm_s2_trans_exec_el0(vcpu->kvm, trans);
|
||||
} else {
|
||||
bool write_fault = kvm_is_write_fault(vcpu);
|
||||
|
||||
@@ -1555,12 +1599,13 @@ u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val)
|
||||
case SYS_ID_AA64MMFR1_EL1:
|
||||
val &= ~(ID_AA64MMFR1_EL1_CMOW |
|
||||
ID_AA64MMFR1_EL1_nTLBPA |
|
||||
ID_AA64MMFR1_EL1_ETS |
|
||||
ID_AA64MMFR1_EL1_XNX |
|
||||
ID_AA64MMFR1_EL1_HAFDBS);
|
||||
ID_AA64MMFR1_EL1_ETS);
|
||||
|
||||
/* FEAT_E2H0 implies no VHE */
|
||||
if (test_bit(KVM_ARM_VCPU_HAS_EL2_E2H0, kvm->arch.vcpu_features))
|
||||
val &= ~ID_AA64MMFR1_EL1_VH;
|
||||
|
||||
val = ID_REG_LIMIT_FIELD_ENUM(val, ID_AA64MMFR1_EL1, HAFDBS, AF);
|
||||
break;
|
||||
|
||||
case SYS_ID_AA64MMFR2_EL1:
|
||||
|
||||
@@ -344,9 +344,16 @@ static int __pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 start, u64 e
|
||||
return 0;
|
||||
}
|
||||
|
||||
void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
|
||||
void pkvm_pgtable_stage2_destroy_range(struct kvm_pgtable *pgt,
|
||||
u64 addr, u64 size)
|
||||
{
|
||||
__pkvm_pgtable_stage2_unmap(pgt, 0, ~(0ULL));
|
||||
__pkvm_pgtable_stage2_unmap(pgt, addr, addr + size);
|
||||
}
|
||||
|
||||
void pkvm_pgtable_stage2_destroy_pgd(struct kvm_pgtable *pgt)
|
||||
{
|
||||
/* Expected to be called after all pKVM mappings have been released. */
|
||||
WARN_ON_ONCE(!RB_EMPTY_ROOT(&pgt->pkvm_mappings.rb_root));
|
||||
}
|
||||
|
||||
int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
|
||||
|
||||
@@ -31,27 +31,46 @@ static const struct ptdump_prot_bits stage2_pte_bits[] = {
|
||||
.val = PTE_VALID,
|
||||
.set = " ",
|
||||
.clear = "F",
|
||||
}, {
|
||||
},
|
||||
{
|
||||
.mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R,
|
||||
.val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R,
|
||||
.set = "R",
|
||||
.clear = " ",
|
||||
}, {
|
||||
},
|
||||
{
|
||||
.mask = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W,
|
||||
.val = KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W,
|
||||
.set = "W",
|
||||
.clear = " ",
|
||||
}, {
|
||||
},
|
||||
{
|
||||
.mask = KVM_PTE_LEAF_ATTR_HI_S2_XN,
|
||||
.val = KVM_PTE_LEAF_ATTR_HI_S2_XN,
|
||||
.set = "NX",
|
||||
.clear = "x ",
|
||||
}, {
|
||||
.val = 0b00UL << __bf_shf(KVM_PTE_LEAF_ATTR_HI_S2_XN),
|
||||
.set = "px ux ",
|
||||
},
|
||||
{
|
||||
.mask = KVM_PTE_LEAF_ATTR_HI_S2_XN,
|
||||
.val = 0b01UL << __bf_shf(KVM_PTE_LEAF_ATTR_HI_S2_XN),
|
||||
.set = "PXNux ",
|
||||
},
|
||||
{
|
||||
.mask = KVM_PTE_LEAF_ATTR_HI_S2_XN,
|
||||
.val = 0b10UL << __bf_shf(KVM_PTE_LEAF_ATTR_HI_S2_XN),
|
||||
.set = "PXNUXN",
|
||||
},
|
||||
{
|
||||
.mask = KVM_PTE_LEAF_ATTR_HI_S2_XN,
|
||||
.val = 0b11UL << __bf_shf(KVM_PTE_LEAF_ATTR_HI_S2_XN),
|
||||
.set = "px UXN",
|
||||
},
|
||||
{
|
||||
.mask = KVM_PTE_LEAF_ATTR_LO_S2_AF,
|
||||
.val = KVM_PTE_LEAF_ATTR_LO_S2_AF,
|
||||
.set = "AF",
|
||||
.clear = " ",
|
||||
}, {
|
||||
},
|
||||
{
|
||||
.mask = PMD_TYPE_MASK,
|
||||
.val = PMD_TYPE_SECT,
|
||||
.set = "BLK",
|
||||
|
||||
@@ -666,6 +666,21 @@ static bool access_gic_sre(struct kvm_vcpu *vcpu,
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool access_gic_dir(struct kvm_vcpu *vcpu,
|
||||
struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r)
|
||||
{
|
||||
if (!kvm_has_gicv3(vcpu->kvm))
|
||||
return undef_access(vcpu, p, r);
|
||||
|
||||
if (!p->is_write)
|
||||
return undef_access(vcpu, p, r);
|
||||
|
||||
vgic_v3_deactivate(vcpu, p->regval);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool trap_raz_wi(struct kvm_vcpu *vcpu,
|
||||
struct sys_reg_params *p,
|
||||
const struct sys_reg_desc *r)
|
||||
@@ -3373,7 +3388,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
|
||||
{ SYS_DESC(SYS_ICC_AP1R1_EL1), undef_access },
|
||||
{ SYS_DESC(SYS_ICC_AP1R2_EL1), undef_access },
|
||||
{ SYS_DESC(SYS_ICC_AP1R3_EL1), undef_access },
|
||||
{ SYS_DESC(SYS_ICC_DIR_EL1), undef_access },
|
||||
{ SYS_DESC(SYS_ICC_DIR_EL1), access_gic_dir },
|
||||
{ SYS_DESC(SYS_ICC_RPR_EL1), undef_access },
|
||||
{ SYS_DESC(SYS_ICC_SGI1R_EL1), access_gic_sgi },
|
||||
{ SYS_DESC(SYS_ICC_ASGI1R_EL1), access_gic_sgi },
|
||||
@@ -3770,7 +3785,8 @@ static bool handle_at_s1e01(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
||||
{
|
||||
u32 op = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
|
||||
|
||||
__kvm_at_s1e01(vcpu, op, p->regval);
|
||||
if (__kvm_at_s1e01(vcpu, op, p->regval))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -3787,7 +3803,8 @@ static bool handle_at_s1e2(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
||||
return false;
|
||||
}
|
||||
|
||||
__kvm_at_s1e2(vcpu, op, p->regval);
|
||||
if (__kvm_at_s1e2(vcpu, op, p->regval))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -3797,7 +3814,8 @@ static bool handle_at_s12(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
||||
{
|
||||
u32 op = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
|
||||
|
||||
__kvm_at_s12(vcpu, op, p->regval);
|
||||
if (__kvm_at_s12(vcpu, op, p->regval))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -4498,7 +4516,7 @@ static const struct sys_reg_desc cp15_regs[] = {
|
||||
{ CP15_SYS_DESC(SYS_ICC_AP1R1_EL1), undef_access },
|
||||
{ CP15_SYS_DESC(SYS_ICC_AP1R2_EL1), undef_access },
|
||||
{ CP15_SYS_DESC(SYS_ICC_AP1R3_EL1), undef_access },
|
||||
{ CP15_SYS_DESC(SYS_ICC_DIR_EL1), undef_access },
|
||||
{ CP15_SYS_DESC(SYS_ICC_DIR_EL1), access_gic_dir },
|
||||
{ CP15_SYS_DESC(SYS_ICC_RPR_EL1), undef_access },
|
||||
{ CP15_SYS_DESC(SYS_ICC_IAR1_EL1), undef_access },
|
||||
{ CP15_SYS_DESC(SYS_ICC_EOIR1_EL1), undef_access },
|
||||
|
||||
@@ -198,6 +198,7 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis)
|
||||
struct kvm_vcpu *vcpu0 = kvm_get_vcpu(kvm, 0);
|
||||
int i;
|
||||
|
||||
dist->active_spis = (atomic_t)ATOMIC_INIT(0);
|
||||
dist->spis = kcalloc(nr_spis, sizeof(struct vgic_irq), GFP_KERNEL_ACCOUNT);
|
||||
if (!dist->spis)
|
||||
return -ENOMEM;
|
||||
@@ -363,12 +364,12 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void kvm_vgic_vcpu_enable(struct kvm_vcpu *vcpu)
|
||||
static void kvm_vgic_vcpu_reset(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (kvm_vgic_global_state.type == VGIC_V2)
|
||||
vgic_v2_enable(vcpu);
|
||||
vgic_v2_reset(vcpu);
|
||||
else
|
||||
vgic_v3_enable(vcpu);
|
||||
vgic_v3_reset(vcpu);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -415,7 +416,7 @@ int vgic_init(struct kvm *kvm)
|
||||
}
|
||||
|
||||
kvm_for_each_vcpu(idx, vcpu, kvm)
|
||||
kvm_vgic_vcpu_enable(vcpu);
|
||||
kvm_vgic_vcpu_reset(vcpu);
|
||||
|
||||
ret = kvm_vgic_setup_default_irq_routing(kvm);
|
||||
if (ret)
|
||||
|
||||
@@ -359,6 +359,16 @@ static void vgic_mmio_write_vcpuif(struct kvm_vcpu *vcpu,
|
||||
vgic_set_vmcr(vcpu, &vmcr);
|
||||
}
|
||||
|
||||
static void vgic_mmio_write_dir(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len,
|
||||
unsigned long val)
|
||||
{
|
||||
if (kvm_vgic_global_state.type == VGIC_V2)
|
||||
vgic_v2_deactivate(vcpu, val);
|
||||
else
|
||||
vgic_v3_deactivate(vcpu, val);
|
||||
}
|
||||
|
||||
static unsigned long vgic_mmio_read_apr(struct kvm_vcpu *vcpu,
|
||||
gpa_t addr, unsigned int len)
|
||||
{
|
||||
@@ -482,6 +492,10 @@ static const struct vgic_register_region vgic_v2_cpu_registers[] = {
|
||||
REGISTER_DESC_WITH_LENGTH(GIC_CPU_IDENT,
|
||||
vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4,
|
||||
VGIC_ACCESS_32bit),
|
||||
REGISTER_DESC_WITH_LENGTH_UACCESS(GIC_CPU_DEACTIVATE,
|
||||
vgic_mmio_read_raz, vgic_mmio_write_dir,
|
||||
vgic_mmio_read_raz, vgic_mmio_uaccess_write_wi,
|
||||
4, VGIC_ACCESS_32bit),
|
||||
};
|
||||
|
||||
unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev)
|
||||
@@ -494,6 +508,16 @@ unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev)
|
||||
return SZ_4K;
|
||||
}
|
||||
|
||||
unsigned int vgic_v2_init_cpuif_iodev(struct vgic_io_device *dev)
|
||||
{
|
||||
dev->regions = vgic_v2_cpu_registers;
|
||||
dev->nr_regions = ARRAY_SIZE(vgic_v2_cpu_registers);
|
||||
|
||||
kvm_iodevice_init(&dev->dev, &kvm_io_gic_ops);
|
||||
|
||||
return KVM_VGIC_V2_CPU_SIZE;
|
||||
}
|
||||
|
||||
int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr)
|
||||
{
|
||||
const struct vgic_register_region *region;
|
||||
|
||||
@@ -213,6 +213,7 @@ void vgic_write_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid,
|
||||
const u32 val);
|
||||
|
||||
unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev);
|
||||
unsigned int vgic_v2_init_cpuif_iodev(struct vgic_io_device *dev);
|
||||
|
||||
unsigned int vgic_v3_init_dist_iodev(struct vgic_io_device *dev);
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
#include <kvm/arm_vgic.h>
|
||||
#include <asm/kvm_mmu.h>
|
||||
|
||||
#include "vgic-mmio.h"
|
||||
#include "vgic.h"
|
||||
|
||||
static inline void vgic_v2_write_lr(int lr, u32 val)
|
||||
@@ -26,11 +27,24 @@ void vgic_v2_init_lrs(void)
|
||||
vgic_v2_write_lr(i, 0);
|
||||
}
|
||||
|
||||
void vgic_v2_set_underflow(struct kvm_vcpu *vcpu)
|
||||
void vgic_v2_configure_hcr(struct kvm_vcpu *vcpu,
|
||||
struct ap_list_summary *als)
|
||||
{
|
||||
struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2;
|
||||
|
||||
cpuif->vgic_hcr |= GICH_HCR_UIE;
|
||||
cpuif->vgic_hcr = GICH_HCR_EN;
|
||||
|
||||
if (irqs_pending_outside_lrs(als))
|
||||
cpuif->vgic_hcr |= GICH_HCR_NPIE;
|
||||
if (irqs_active_outside_lrs(als))
|
||||
cpuif->vgic_hcr |= GICH_HCR_LRENPIE;
|
||||
if (irqs_outside_lrs(als))
|
||||
cpuif->vgic_hcr |= GICH_HCR_UIE;
|
||||
|
||||
cpuif->vgic_hcr |= (cpuif->vgic_vmcr & GICH_VMCR_ENABLE_GRP0_MASK) ?
|
||||
GICH_HCR_VGrp0DIE : GICH_HCR_VGrp0EIE;
|
||||
cpuif->vgic_hcr |= (cpuif->vgic_vmcr & GICH_VMCR_ENABLE_GRP1_MASK) ?
|
||||
GICH_HCR_VGrp1DIE : GICH_HCR_VGrp1EIE;
|
||||
}
|
||||
|
||||
static bool lr_signals_eoi_mi(u32 lr_val)
|
||||
@@ -39,43 +53,23 @@ static bool lr_signals_eoi_mi(u32 lr_val)
|
||||
!(lr_val & GICH_LR_HW);
|
||||
}
|
||||
|
||||
/*
|
||||
* transfer the content of the LRs back into the corresponding ap_list:
|
||||
* - active bit is transferred as is
|
||||
* - pending bit is
|
||||
* - transferred as is in case of edge sensitive IRQs
|
||||
* - set to the line-level (resample time) for level sensitive IRQs
|
||||
*/
|
||||
void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
|
||||
static void vgic_v2_fold_lr(struct kvm_vcpu *vcpu, u32 val)
|
||||
{
|
||||
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
|
||||
struct vgic_v2_cpu_if *cpuif = &vgic_cpu->vgic_v2;
|
||||
int lr;
|
||||
u32 cpuid, intid = val & GICH_LR_VIRTUALID;
|
||||
struct vgic_irq *irq;
|
||||
bool deactivated;
|
||||
|
||||
DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
|
||||
/* Extract the source vCPU id from the LR */
|
||||
cpuid = FIELD_GET(GICH_LR_PHYSID_CPUID, val) & 7;
|
||||
|
||||
cpuif->vgic_hcr &= ~GICH_HCR_UIE;
|
||||
/* Notify fds when the guest EOI'ed a level-triggered SPI */
|
||||
if (lr_signals_eoi_mi(val) && vgic_valid_spi(vcpu->kvm, intid))
|
||||
kvm_notify_acked_irq(vcpu->kvm, 0,
|
||||
intid - VGIC_NR_PRIVATE_IRQS);
|
||||
|
||||
for (lr = 0; lr < vgic_cpu->vgic_v2.used_lrs; lr++) {
|
||||
u32 val = cpuif->vgic_lr[lr];
|
||||
u32 cpuid, intid = val & GICH_LR_VIRTUALID;
|
||||
struct vgic_irq *irq;
|
||||
bool deactivated;
|
||||
|
||||
/* Extract the source vCPU id from the LR */
|
||||
cpuid = val & GICH_LR_PHYSID_CPUID;
|
||||
cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT;
|
||||
cpuid &= 7;
|
||||
|
||||
/* Notify fds when the guest EOI'ed a level-triggered SPI */
|
||||
if (lr_signals_eoi_mi(val) && vgic_valid_spi(vcpu->kvm, intid))
|
||||
kvm_notify_acked_irq(vcpu->kvm, 0,
|
||||
intid - VGIC_NR_PRIVATE_IRQS);
|
||||
|
||||
irq = vgic_get_vcpu_irq(vcpu, intid);
|
||||
|
||||
raw_spin_lock(&irq->irq_lock);
|
||||
irq = vgic_get_vcpu_irq(vcpu, intid);
|
||||
|
||||
scoped_guard(raw_spinlock, &irq->irq_lock) {
|
||||
/* Always preserve the active bit, note deactivation */
|
||||
deactivated = irq->active && !(val & GICH_LR_ACTIVE_BIT);
|
||||
irq->active = !!(val & GICH_LR_ACTIVE_BIT);
|
||||
@@ -101,29 +95,139 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
|
||||
/* Handle resampling for mapped interrupts if required */
|
||||
vgic_irq_handle_resampling(irq, deactivated, val & GICH_LR_PENDING_BIT);
|
||||
|
||||
raw_spin_unlock(&irq->irq_lock);
|
||||
vgic_put_irq(vcpu->kvm, irq);
|
||||
irq->on_lr = false;
|
||||
}
|
||||
|
||||
vgic_put_irq(vcpu->kvm, irq);
|
||||
}
|
||||
|
||||
static u32 vgic_v2_compute_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq);
|
||||
|
||||
/*
|
||||
* transfer the content of the LRs back into the corresponding ap_list:
|
||||
* - active bit is transferred as is
|
||||
* - pending bit is
|
||||
* - transferred as is in case of edge sensitive IRQs
|
||||
* - set to the line-level (resample time) for level sensitive IRQs
|
||||
*/
|
||||
void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
|
||||
struct vgic_v2_cpu_if *cpuif = &vgic_cpu->vgic_v2;
|
||||
u32 eoicount = FIELD_GET(GICH_HCR_EOICOUNT, cpuif->vgic_hcr);
|
||||
struct vgic_irq *irq;
|
||||
|
||||
DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
|
||||
|
||||
for (int lr = 0; lr < vgic_cpu->vgic_v2.used_lrs; lr++)
|
||||
vgic_v2_fold_lr(vcpu, cpuif->vgic_lr[lr]);
|
||||
|
||||
/* See the GICv3 equivalent for the EOIcount handling rationale */
|
||||
list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
|
||||
u32 lr;
|
||||
|
||||
if (!eoicount) {
|
||||
break;
|
||||
} else {
|
||||
guard(raw_spinlock)(&irq->irq_lock);
|
||||
|
||||
if (!(likely(vgic_target_oracle(irq) == vcpu) &&
|
||||
irq->active))
|
||||
continue;
|
||||
|
||||
lr = vgic_v2_compute_lr(vcpu, irq) & ~GICH_LR_ACTIVE_BIT;
|
||||
}
|
||||
|
||||
if (lr & GICH_LR_HW)
|
||||
writel_relaxed(FIELD_GET(GICH_LR_PHYSID_CPUID, lr),
|
||||
kvm_vgic_global_state.gicc_base + GIC_CPU_DEACTIVATE);
|
||||
vgic_v2_fold_lr(vcpu, lr);
|
||||
eoicount--;
|
||||
}
|
||||
|
||||
cpuif->used_lrs = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Populates the particular LR with the state of a given IRQ:
|
||||
* - for an edge sensitive IRQ the pending state is cleared in struct vgic_irq
|
||||
* - for a level sensitive IRQ the pending state value is unchanged;
|
||||
* it is dictated directly by the input level
|
||||
*
|
||||
* If @irq describes an SGI with multiple sources, we choose the
|
||||
* lowest-numbered source VCPU and clear that bit in the source bitmap.
|
||||
*
|
||||
* The irq_lock must be held by the caller.
|
||||
*/
|
||||
void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
|
||||
void vgic_v2_deactivate(struct kvm_vcpu *vcpu, u32 val)
|
||||
{
|
||||
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
|
||||
struct vgic_v2_cpu_if *cpuif = &vgic_cpu->vgic_v2;
|
||||
struct kvm_vcpu *target_vcpu = NULL;
|
||||
bool mmio = false;
|
||||
struct vgic_irq *irq;
|
||||
unsigned long flags;
|
||||
u64 lr = 0;
|
||||
u8 cpuid;
|
||||
|
||||
/* Snapshot CPUID, and remove it from the INTID */
|
||||
cpuid = FIELD_GET(GENMASK_ULL(12, 10), val);
|
||||
val &= ~GENMASK_ULL(12, 10);
|
||||
|
||||
/* We only deal with DIR when EOIMode==1 */
|
||||
if (!(cpuif->vgic_vmcr & GICH_VMCR_EOI_MODE_MASK))
|
||||
return;
|
||||
|
||||
/* Make sure we're in the same context as LR handling */
|
||||
local_irq_save(flags);
|
||||
|
||||
irq = vgic_get_vcpu_irq(vcpu, val);
|
||||
if (WARN_ON_ONCE(!irq))
|
||||
goto out;
|
||||
|
||||
/* See the corresponding v3 code for the rationale */
|
||||
scoped_guard(raw_spinlock, &irq->irq_lock) {
|
||||
target_vcpu = irq->vcpu;
|
||||
|
||||
/* Not on any ap_list? */
|
||||
if (!target_vcpu)
|
||||
goto put;
|
||||
|
||||
/*
|
||||
* Urgh. We're deactivating something that we cannot
|
||||
* observe yet... Big hammer time.
|
||||
*/
|
||||
if (irq->on_lr) {
|
||||
mmio = true;
|
||||
goto put;
|
||||
}
|
||||
|
||||
/* SGI: check that the cpuid matches */
|
||||
if (val < VGIC_NR_SGIS && irq->active_source != cpuid) {
|
||||
target_vcpu = NULL;
|
||||
goto put;
|
||||
}
|
||||
|
||||
/* (with a Dalek voice) DEACTIVATE!!!! */
|
||||
lr = vgic_v2_compute_lr(vcpu, irq) & ~GICH_LR_ACTIVE_BIT;
|
||||
}
|
||||
|
||||
if (lr & GICH_LR_HW)
|
||||
writel_relaxed(FIELD_GET(GICH_LR_PHYSID_CPUID, lr),
|
||||
kvm_vgic_global_state.gicc_base + GIC_CPU_DEACTIVATE);
|
||||
|
||||
vgic_v2_fold_lr(vcpu, lr);
|
||||
|
||||
put:
|
||||
vgic_put_irq(vcpu->kvm, irq);
|
||||
|
||||
out:
|
||||
local_irq_restore(flags);
|
||||
|
||||
if (mmio)
|
||||
vgic_mmio_write_cactive(vcpu, (val / 32) * 4, 4, BIT(val % 32));
|
||||
|
||||
/* Force the ap_list to be pruned */
|
||||
if (target_vcpu)
|
||||
kvm_make_request(KVM_REQ_VGIC_PROCESS_UPDATE, target_vcpu);
|
||||
}
|
||||
|
||||
static u32 vgic_v2_compute_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq)
|
||||
{
|
||||
u32 val = irq->intid;
|
||||
bool allow_pending = true;
|
||||
|
||||
WARN_ON(irq->on_lr);
|
||||
|
||||
if (irq->active) {
|
||||
val |= GICH_LR_ACTIVE_BIT;
|
||||
if (vgic_irq_is_sgi(irq->intid))
|
||||
@@ -163,22 +267,52 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
|
||||
if (allow_pending && irq_is_pending(irq)) {
|
||||
val |= GICH_LR_PENDING_BIT;
|
||||
|
||||
if (vgic_irq_is_sgi(irq->intid)) {
|
||||
u32 src = ffs(irq->source);
|
||||
|
||||
if (WARN_RATELIMIT(!src, "No SGI source for INTID %d\n",
|
||||
irq->intid))
|
||||
return 0;
|
||||
|
||||
val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT;
|
||||
if (irq->source & ~BIT(src - 1))
|
||||
val |= GICH_LR_EOI;
|
||||
}
|
||||
}
|
||||
|
||||
/* The GICv2 LR only holds five bits of priority. */
|
||||
val |= (irq->priority >> 3) << GICH_LR_PRIORITY_SHIFT;
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
/*
|
||||
* Populates the particular LR with the state of a given IRQ:
|
||||
* - for an edge sensitive IRQ the pending state is cleared in struct vgic_irq
|
||||
* - for a level sensitive IRQ the pending state value is unchanged;
|
||||
* it is dictated directly by the input level
|
||||
*
|
||||
* If @irq describes an SGI with multiple sources, we choose the
|
||||
* lowest-numbered source VCPU and clear that bit in the source bitmap.
|
||||
*
|
||||
* The irq_lock must be held by the caller.
|
||||
*/
|
||||
void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
|
||||
{
|
||||
u32 val = vgic_v2_compute_lr(vcpu, irq);
|
||||
|
||||
vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = val;
|
||||
|
||||
if (val & GICH_LR_PENDING_BIT) {
|
||||
if (irq->config == VGIC_CONFIG_EDGE)
|
||||
irq->pending_latch = false;
|
||||
|
||||
if (vgic_irq_is_sgi(irq->intid)) {
|
||||
u32 src = ffs(irq->source);
|
||||
|
||||
if (WARN_RATELIMIT(!src, "No SGI source for INTID %d\n",
|
||||
irq->intid))
|
||||
return;
|
||||
|
||||
val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT;
|
||||
irq->source &= ~(1 << (src - 1));
|
||||
if (irq->source) {
|
||||
irq->source &= ~BIT(src - 1);
|
||||
if (irq->source)
|
||||
irq->pending_latch = true;
|
||||
val |= GICH_LR_EOI;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -194,7 +328,7 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
|
||||
/* The GICv2 LR only holds five bits of priority. */
|
||||
val |= (irq->priority >> 3) << GICH_LR_PRIORITY_SHIFT;
|
||||
|
||||
vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = val;
|
||||
irq->on_lr = true;
|
||||
}
|
||||
|
||||
void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr)
|
||||
@@ -257,7 +391,7 @@ void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
|
||||
GICH_VMCR_PRIMASK_SHIFT) << GICV_PMR_PRIORITY_SHIFT;
|
||||
}
|
||||
|
||||
void vgic_v2_enable(struct kvm_vcpu *vcpu)
|
||||
void vgic_v2_reset(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/*
|
||||
* By forcing VMCR to zero, the GIC will restore the binary
|
||||
@@ -265,9 +399,6 @@ void vgic_v2_enable(struct kvm_vcpu *vcpu)
|
||||
* anyway.
|
||||
*/
|
||||
vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0;
|
||||
|
||||
/* Get the show on the road... */
|
||||
vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN;
|
||||
}
|
||||
|
||||
/* check for overlapping regions and for regions crossing the end of memory */
|
||||
@@ -289,6 +420,7 @@ static bool vgic_v2_check_base(gpa_t dist_base, gpa_t cpu_base)
|
||||
int vgic_v2_map_resources(struct kvm *kvm)
|
||||
{
|
||||
struct vgic_dist *dist = &kvm->arch.vgic;
|
||||
unsigned int len;
|
||||
int ret = 0;
|
||||
|
||||
if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) ||
|
||||
@@ -312,10 +444,20 @@ int vgic_v2_map_resources(struct kvm *kvm)
|
||||
return ret;
|
||||
}
|
||||
|
||||
len = vgic_v2_init_cpuif_iodev(&dist->cpuif_iodev);
|
||||
dist->cpuif_iodev.base_addr = dist->vgic_cpu_base;
|
||||
dist->cpuif_iodev.iodev_type = IODEV_CPUIF;
|
||||
dist->cpuif_iodev.redist_vcpu = NULL;
|
||||
|
||||
ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, dist->vgic_cpu_base,
|
||||
len, &dist->cpuif_iodev.dev);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (!static_branch_unlikely(&vgic_v2_cpuif_trap)) {
|
||||
ret = kvm_phys_addr_ioremap(kvm, dist->vgic_cpu_base,
|
||||
kvm_vgic_global_state.vcpu_base,
|
||||
KVM_VGIC_V2_CPU_SIZE, true);
|
||||
KVM_VGIC_V2_CPU_SIZE - SZ_4K, true);
|
||||
if (ret) {
|
||||
kvm_err("Unable to remap VGIC CPU to VCPU\n");
|
||||
return ret;
|
||||
@@ -385,6 +527,7 @@ int vgic_v2_probe(const struct gic_kvm_info *info)
|
||||
|
||||
kvm_vgic_global_state.can_emulate_gicv2 = true;
|
||||
kvm_vgic_global_state.vcpu_base = info->vcpu.start;
|
||||
kvm_vgic_global_state.gicc_base = info->gicc_base;
|
||||
kvm_vgic_global_state.type = VGIC_V2;
|
||||
kvm_vgic_global_state.max_gic_vcpus = VGIC_V2_MAX_CPUS;
|
||||
|
||||
@@ -423,16 +566,26 @@ static void save_lrs(struct kvm_vcpu *vcpu, void __iomem *base)
|
||||
|
||||
void vgic_v2_save_state(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
|
||||
void __iomem *base = kvm_vgic_global_state.vctrl_base;
|
||||
u64 used_lrs = vcpu->arch.vgic_cpu.vgic_v2.used_lrs;
|
||||
|
||||
if (!base)
|
||||
return;
|
||||
|
||||
if (used_lrs) {
|
||||
cpu_if->vgic_vmcr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VMCR);
|
||||
|
||||
if (used_lrs)
|
||||
save_lrs(vcpu, base);
|
||||
writel_relaxed(0, base + GICH_HCR);
|
||||
|
||||
if (cpu_if->vgic_hcr & GICH_HCR_LRENPIE) {
|
||||
u32 val = readl_relaxed(base + GICH_HCR);
|
||||
|
||||
cpu_if->vgic_hcr &= ~GICH_HCR_EOICOUNT;
|
||||
cpu_if->vgic_hcr |= val & GICH_HCR_EOICOUNT;
|
||||
}
|
||||
|
||||
writel_relaxed(0, base + GICH_HCR);
|
||||
}
|
||||
|
||||
void vgic_v2_restore_state(struct kvm_vcpu *vcpu)
|
||||
@@ -445,13 +598,10 @@ void vgic_v2_restore_state(struct kvm_vcpu *vcpu)
|
||||
if (!base)
|
||||
return;
|
||||
|
||||
if (used_lrs) {
|
||||
writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR);
|
||||
for (i = 0; i < used_lrs; i++) {
|
||||
writel_relaxed(cpu_if->vgic_lr[i],
|
||||
base + GICH_LR0 + (i * 4));
|
||||
}
|
||||
}
|
||||
writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR);
|
||||
|
||||
for (i = 0; i < used_lrs; i++)
|
||||
writel_relaxed(cpu_if->vgic_lr[i], base + GICH_LR0 + (i * 4));
|
||||
}
|
||||
|
||||
void vgic_v2_load(struct kvm_vcpu *vcpu)
|
||||
@@ -468,6 +618,5 @@ void vgic_v2_put(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
|
||||
|
||||
cpu_if->vgic_vmcr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VMCR);
|
||||
cpu_if->vgic_apr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_APR);
|
||||
}
|
||||
|
||||
@@ -70,13 +70,14 @@ static int lr_map_idx_to_shadow_idx(struct shadow_if *shadow_if, int idx)
|
||||
* - on L2 put: perform the inverse transformation, so that the result of L2
|
||||
* running becomes visible to L1 in the VNCR-accessible registers.
|
||||
*
|
||||
* - there is nothing to do on L2 entry, as everything will have happened
|
||||
* on load. However, this is the point where we detect that an interrupt
|
||||
* targeting L1 and prepare the grand switcheroo.
|
||||
* - there is nothing to do on L2 entry apart from enabling the vgic, as
|
||||
* everything will have happened on load. However, this is the point where
|
||||
* we detect that an interrupt targeting L1 and prepare the grand
|
||||
* switcheroo.
|
||||
*
|
||||
* - on L2 exit: emulate the HW bit, and deactivate corresponding the L1
|
||||
* interrupt. The L0 active state will be cleared by the HW if the L1
|
||||
* interrupt was itself backed by a HW interrupt.
|
||||
* - on L2 exit: resync the LRs and VMCR, emulate the HW bit, and deactivate
|
||||
* corresponding the L1 interrupt. The L0 active state will be cleared by
|
||||
* the HW if the L1 interrupt was itself backed by a HW interrupt.
|
||||
*
|
||||
* Maintenance Interrupt (MI) management:
|
||||
*
|
||||
@@ -93,8 +94,10 @@ static int lr_map_idx_to_shadow_idx(struct shadow_if *shadow_if, int idx)
|
||||
*
|
||||
* - because most of the ICH_*_EL2 registers live in the VNCR page, the
|
||||
* quality of emulation is poor: L1 can setup the vgic so that an MI would
|
||||
* immediately fire, and not observe anything until the next exit. Trying
|
||||
* to read ICH_MISR_EL2 would do the trick, for example.
|
||||
* immediately fire, and not observe anything until the next exit.
|
||||
* Similarly, a pending MI is not immediately disabled by clearing
|
||||
* ICH_HCR_EL2.En. Trying to read ICH_MISR_EL2 would do the trick, for
|
||||
* example.
|
||||
*
|
||||
* System register emulation:
|
||||
*
|
||||
@@ -265,16 +268,37 @@ static void vgic_v3_create_shadow_lr(struct kvm_vcpu *vcpu,
|
||||
s_cpu_if->used_lrs = hweight16(shadow_if->lr_map);
|
||||
}
|
||||
|
||||
void vgic_v3_flush_nested(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u64 val = __vcpu_sys_reg(vcpu, ICH_HCR_EL2);
|
||||
|
||||
write_sysreg_s(val | vgic_ich_hcr_trap_bits(), SYS_ICH_HCR_EL2);
|
||||
}
|
||||
|
||||
void vgic_v3_sync_nested(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct shadow_if *shadow_if = get_shadow_if();
|
||||
int i;
|
||||
|
||||
for_each_set_bit(i, &shadow_if->lr_map, kvm_vgic_global_state.nr_lr) {
|
||||
u64 lr = __vcpu_sys_reg(vcpu, ICH_LRN(i));
|
||||
struct vgic_irq *irq;
|
||||
u64 val, host_lr, lr;
|
||||
|
||||
if (!(lr & ICH_LR_HW) || !(lr & ICH_LR_STATE))
|
||||
host_lr = __gic_v3_get_lr(lr_map_idx_to_shadow_idx(shadow_if, i));
|
||||
|
||||
/* Propagate the new LR state */
|
||||
lr = __vcpu_sys_reg(vcpu, ICH_LRN(i));
|
||||
val = lr & ~ICH_LR_STATE;
|
||||
val |= host_lr & ICH_LR_STATE;
|
||||
__vcpu_assign_sys_reg(vcpu, ICH_LRN(i), val);
|
||||
|
||||
/*
|
||||
* Deactivation of a HW interrupt: the LR must have the HW
|
||||
* bit set, have been in a non-invalid state before the run,
|
||||
* and now be in an invalid state. If any of that doesn't
|
||||
* hold, we're done with this LR.
|
||||
*/
|
||||
if (!((lr & ICH_LR_HW) && (lr & ICH_LR_STATE) &&
|
||||
!(host_lr & ICH_LR_STATE)))
|
||||
continue;
|
||||
|
||||
/*
|
||||
@@ -282,35 +306,27 @@ void vgic_v3_sync_nested(struct kvm_vcpu *vcpu)
|
||||
* need to emulate the HW effect between the guest hypervisor
|
||||
* and the nested guest.
|
||||
*/
|
||||
irq = vgic_get_vcpu_irq(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr));
|
||||
if (WARN_ON(!irq)) /* Shouldn't happen as we check on load */
|
||||
continue;
|
||||
|
||||
lr = __gic_v3_get_lr(lr_map_idx_to_shadow_idx(shadow_if, i));
|
||||
if (!(lr & ICH_LR_STATE))
|
||||
irq->active = false;
|
||||
|
||||
vgic_put_irq(vcpu->kvm, irq);
|
||||
vgic_v3_deactivate(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr));
|
||||
}
|
||||
|
||||
/* We need these to be synchronised to generate the MI */
|
||||
__vcpu_assign_sys_reg(vcpu, ICH_VMCR_EL2, read_sysreg_s(SYS_ICH_VMCR_EL2));
|
||||
__vcpu_rmw_sys_reg(vcpu, ICH_HCR_EL2, &=, ~ICH_HCR_EL2_EOIcount);
|
||||
__vcpu_rmw_sys_reg(vcpu, ICH_HCR_EL2, |=, read_sysreg_s(SYS_ICH_HCR_EL2) & ICH_HCR_EL2_EOIcount);
|
||||
|
||||
write_sysreg_s(0, SYS_ICH_HCR_EL2);
|
||||
isb();
|
||||
|
||||
vgic_v3_nested_update_mi(vcpu);
|
||||
}
|
||||
|
||||
static void vgic_v3_create_shadow_state(struct kvm_vcpu *vcpu,
|
||||
struct vgic_v3_cpu_if *s_cpu_if)
|
||||
{
|
||||
struct vgic_v3_cpu_if *host_if = &vcpu->arch.vgic_cpu.vgic_v3;
|
||||
u64 val = 0;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* If we're on a system with a broken vgic that requires
|
||||
* trapping, propagate the trapping requirements.
|
||||
*
|
||||
* Ah, the smell of rotten fruits...
|
||||
*/
|
||||
if (static_branch_unlikely(&vgic_v3_cpuif_trap))
|
||||
val = host_if->vgic_hcr & (ICH_HCR_EL2_TALL0 | ICH_HCR_EL2_TALL1 |
|
||||
ICH_HCR_EL2_TC | ICH_HCR_EL2_TDIR);
|
||||
s_cpu_if->vgic_hcr = __vcpu_sys_reg(vcpu, ICH_HCR_EL2) | val;
|
||||
s_cpu_if->vgic_hcr = __vcpu_sys_reg(vcpu, ICH_HCR_EL2);
|
||||
s_cpu_if->vgic_vmcr = __vcpu_sys_reg(vcpu, ICH_VMCR_EL2);
|
||||
s_cpu_if->vgic_sre = host_if->vgic_sre;
|
||||
|
||||
@@ -334,7 +350,8 @@ void vgic_v3_load_nested(struct kvm_vcpu *vcpu)
|
||||
__vgic_v3_restore_vmcr_aprs(cpu_if);
|
||||
__vgic_v3_activate_traps(cpu_if);
|
||||
|
||||
__vgic_v3_restore_state(cpu_if);
|
||||
for (int i = 0; i < cpu_if->used_lrs; i++)
|
||||
__gic_v3_set_lr(cpu_if->vgic_lr[i], i);
|
||||
|
||||
/*
|
||||
* Propagate the number of used LRs for the benefit of the HYP
|
||||
@@ -347,36 +364,19 @@ void vgic_v3_put_nested(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct shadow_if *shadow_if = get_shadow_if();
|
||||
struct vgic_v3_cpu_if *s_cpu_if = &shadow_if->cpuif;
|
||||
u64 val;
|
||||
int i;
|
||||
|
||||
__vgic_v3_save_vmcr_aprs(s_cpu_if);
|
||||
__vgic_v3_deactivate_traps(s_cpu_if);
|
||||
__vgic_v3_save_state(s_cpu_if);
|
||||
|
||||
/*
|
||||
* Translate the shadow state HW fields back to the virtual ones
|
||||
* before copying the shadow struct back to the nested one.
|
||||
*/
|
||||
val = __vcpu_sys_reg(vcpu, ICH_HCR_EL2);
|
||||
val &= ~ICH_HCR_EL2_EOIcount_MASK;
|
||||
val |= (s_cpu_if->vgic_hcr & ICH_HCR_EL2_EOIcount_MASK);
|
||||
__vcpu_assign_sys_reg(vcpu, ICH_HCR_EL2, val);
|
||||
__vcpu_assign_sys_reg(vcpu, ICH_VMCR_EL2, s_cpu_if->vgic_vmcr);
|
||||
__vgic_v3_save_aprs(s_cpu_if);
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
__vcpu_assign_sys_reg(vcpu, ICH_AP0RN(i), s_cpu_if->vgic_ap0r[i]);
|
||||
__vcpu_assign_sys_reg(vcpu, ICH_AP1RN(i), s_cpu_if->vgic_ap1r[i]);
|
||||
}
|
||||
|
||||
for_each_set_bit(i, &shadow_if->lr_map, kvm_vgic_global_state.nr_lr) {
|
||||
val = __vcpu_sys_reg(vcpu, ICH_LRN(i));
|
||||
for (i = 0; i < s_cpu_if->used_lrs; i++)
|
||||
__gic_v3_set_lr(0, i);
|
||||
|
||||
val &= ~ICH_LR_STATE;
|
||||
val |= s_cpu_if->vgic_lr[lr_map_idx_to_shadow_idx(shadow_if, i)] & ICH_LR_STATE;
|
||||
|
||||
__vcpu_assign_sys_reg(vcpu, ICH_LRN(i), val);
|
||||
}
|
||||
__vgic_v3_deactivate_traps(s_cpu_if);
|
||||
|
||||
vcpu->arch.vgic_cpu.vgic_v3.used_lrs = 0;
|
||||
}
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
#include <asm/kvm_mmu.h>
|
||||
#include <asm/kvm_asm.h>
|
||||
|
||||
#include "vgic-mmio.h"
|
||||
#include "vgic.h"
|
||||
|
||||
static bool group0_trap;
|
||||
@@ -20,11 +21,48 @@ static bool common_trap;
|
||||
static bool dir_trap;
|
||||
static bool gicv4_enable;
|
||||
|
||||
void vgic_v3_set_underflow(struct kvm_vcpu *vcpu)
|
||||
void vgic_v3_configure_hcr(struct kvm_vcpu *vcpu,
|
||||
struct ap_list_summary *als)
|
||||
{
|
||||
struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3;
|
||||
|
||||
cpuif->vgic_hcr |= ICH_HCR_EL2_UIE;
|
||||
if (!irqchip_in_kernel(vcpu->kvm))
|
||||
return;
|
||||
|
||||
cpuif->vgic_hcr = ICH_HCR_EL2_En;
|
||||
|
||||
if (irqs_pending_outside_lrs(als))
|
||||
cpuif->vgic_hcr |= ICH_HCR_EL2_NPIE;
|
||||
if (irqs_active_outside_lrs(als))
|
||||
cpuif->vgic_hcr |= ICH_HCR_EL2_LRENPIE;
|
||||
if (irqs_outside_lrs(als))
|
||||
cpuif->vgic_hcr |= ICH_HCR_EL2_UIE;
|
||||
|
||||
if (!als->nr_sgi)
|
||||
cpuif->vgic_hcr |= ICH_HCR_EL2_vSGIEOICount;
|
||||
|
||||
cpuif->vgic_hcr |= (cpuif->vgic_vmcr & ICH_VMCR_ENG0_MASK) ?
|
||||
ICH_HCR_EL2_VGrp0DIE : ICH_HCR_EL2_VGrp0EIE;
|
||||
cpuif->vgic_hcr |= (cpuif->vgic_vmcr & ICH_VMCR_ENG1_MASK) ?
|
||||
ICH_HCR_EL2_VGrp1DIE : ICH_HCR_EL2_VGrp1EIE;
|
||||
|
||||
/*
|
||||
* Dealing with EOImode=1 is a massive source of headache. Not
|
||||
* only do we need to track that we have active interrupts
|
||||
* outside of the LRs and force DIR to be trapped, we also
|
||||
* need to deal with SPIs that can be deactivated on another
|
||||
* CPU.
|
||||
*
|
||||
* On systems that do not implement TDIR, force the bit in the
|
||||
* shadow state anyway to avoid IPI-ing on these poor sods.
|
||||
*
|
||||
* Note that we set the trap irrespective of EOIMode, as that
|
||||
* can change behind our back without any warning...
|
||||
*/
|
||||
if (!cpus_have_final_cap(ARM64_HAS_ICH_HCR_EL2_TDIR) ||
|
||||
irqs_active_outside_lrs(als) ||
|
||||
atomic_read(&vcpu->kvm->arch.vgic.active_spis))
|
||||
cpuif->vgic_hcr |= ICH_HCR_EL2_TDIR;
|
||||
}
|
||||
|
||||
static bool lr_signals_eoi_mi(u64 lr_val)
|
||||
@@ -33,84 +71,238 @@ static bool lr_signals_eoi_mi(u64 lr_val)
|
||||
!(lr_val & ICH_LR_HW);
|
||||
}
|
||||
|
||||
void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
|
||||
static void vgic_v3_fold_lr(struct kvm_vcpu *vcpu, u64 val)
|
||||
{
|
||||
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
|
||||
struct vgic_v3_cpu_if *cpuif = &vgic_cpu->vgic_v3;
|
||||
u32 model = vcpu->kvm->arch.vgic.vgic_model;
|
||||
int lr;
|
||||
struct vgic_irq *irq;
|
||||
bool is_v2_sgi = false;
|
||||
bool deactivated;
|
||||
u32 intid;
|
||||
|
||||
DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
|
||||
if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
|
||||
intid = val & ICH_LR_VIRTUAL_ID_MASK;
|
||||
} else {
|
||||
intid = val & GICH_LR_VIRTUALID;
|
||||
is_v2_sgi = vgic_irq_is_sgi(intid);
|
||||
}
|
||||
|
||||
cpuif->vgic_hcr &= ~ICH_HCR_EL2_UIE;
|
||||
irq = vgic_get_vcpu_irq(vcpu, intid);
|
||||
if (!irq) /* An LPI could have been unmapped. */
|
||||
return;
|
||||
|
||||
for (lr = 0; lr < cpuif->used_lrs; lr++) {
|
||||
u64 val = cpuif->vgic_lr[lr];
|
||||
u32 intid, cpuid;
|
||||
struct vgic_irq *irq;
|
||||
bool is_v2_sgi = false;
|
||||
bool deactivated;
|
||||
|
||||
cpuid = val & GICH_LR_PHYSID_CPUID;
|
||||
cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT;
|
||||
|
||||
if (model == KVM_DEV_TYPE_ARM_VGIC_V3) {
|
||||
intid = val & ICH_LR_VIRTUAL_ID_MASK;
|
||||
} else {
|
||||
intid = val & GICH_LR_VIRTUALID;
|
||||
is_v2_sgi = vgic_irq_is_sgi(intid);
|
||||
}
|
||||
|
||||
/* Notify fds when the guest EOI'ed a level-triggered IRQ */
|
||||
if (lr_signals_eoi_mi(val) && vgic_valid_spi(vcpu->kvm, intid))
|
||||
kvm_notify_acked_irq(vcpu->kvm, 0,
|
||||
intid - VGIC_NR_PRIVATE_IRQS);
|
||||
|
||||
irq = vgic_get_vcpu_irq(vcpu, intid);
|
||||
if (!irq) /* An LPI could have been unmapped. */
|
||||
continue;
|
||||
|
||||
raw_spin_lock(&irq->irq_lock);
|
||||
|
||||
/* Always preserve the active bit, note deactivation */
|
||||
scoped_guard(raw_spinlock, &irq->irq_lock) {
|
||||
/* Always preserve the active bit for !LPIs, note deactivation */
|
||||
if (irq->intid >= VGIC_MIN_LPI)
|
||||
val &= ~ICH_LR_ACTIVE_BIT;
|
||||
deactivated = irq->active && !(val & ICH_LR_ACTIVE_BIT);
|
||||
irq->active = !!(val & ICH_LR_ACTIVE_BIT);
|
||||
|
||||
if (irq->active && is_v2_sgi)
|
||||
irq->active_source = cpuid;
|
||||
|
||||
/* Edge is the only case where we preserve the pending bit */
|
||||
if (irq->config == VGIC_CONFIG_EDGE &&
|
||||
(val & ICH_LR_PENDING_BIT)) {
|
||||
(val & ICH_LR_PENDING_BIT))
|
||||
irq->pending_latch = true;
|
||||
|
||||
if (is_v2_sgi)
|
||||
irq->source |= (1 << cpuid);
|
||||
}
|
||||
|
||||
/*
|
||||
* Clear soft pending state when level irqs have been acked.
|
||||
*/
|
||||
if (irq->config == VGIC_CONFIG_LEVEL && !(val & ICH_LR_STATE))
|
||||
irq->pending_latch = false;
|
||||
|
||||
if (is_v2_sgi) {
|
||||
u8 cpuid = FIELD_GET(GICH_LR_PHYSID_CPUID, val);
|
||||
|
||||
if (irq->active)
|
||||
irq->active_source = cpuid;
|
||||
|
||||
if (val & ICH_LR_PENDING_BIT)
|
||||
irq->source |= BIT(cpuid);
|
||||
}
|
||||
|
||||
/* Handle resampling for mapped interrupts if required */
|
||||
vgic_irq_handle_resampling(irq, deactivated, val & ICH_LR_PENDING_BIT);
|
||||
|
||||
raw_spin_unlock(&irq->irq_lock);
|
||||
vgic_put_irq(vcpu->kvm, irq);
|
||||
irq->on_lr = false;
|
||||
}
|
||||
|
||||
/* Notify fds when the guest EOI'ed a level-triggered SPI, and drop the refcount */
|
||||
if (deactivated && lr_signals_eoi_mi(val) && vgic_valid_spi(vcpu->kvm, intid)) {
|
||||
kvm_notify_acked_irq(vcpu->kvm, 0,
|
||||
intid - VGIC_NR_PRIVATE_IRQS);
|
||||
atomic_dec_if_positive(&vcpu->kvm->arch.vgic.active_spis);
|
||||
}
|
||||
|
||||
vgic_put_irq(vcpu->kvm, irq);
|
||||
}
|
||||
|
||||
static u64 vgic_v3_compute_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq);
|
||||
|
||||
static void vgic_v3_deactivate_phys(u32 intid)
|
||||
{
|
||||
if (cpus_have_final_cap(ARM64_HAS_GICV5_LEGACY))
|
||||
gic_insn(intid | FIELD_PREP(GICV5_GIC_CDDI_TYPE_MASK, 1), CDDI);
|
||||
else
|
||||
gic_write_dir(intid);
|
||||
}
|
||||
|
||||
void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
|
||||
struct vgic_v3_cpu_if *cpuif = &vgic_cpu->vgic_v3;
|
||||
u32 eoicount = FIELD_GET(ICH_HCR_EL2_EOIcount, cpuif->vgic_hcr);
|
||||
struct vgic_irq *irq;
|
||||
|
||||
DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
|
||||
|
||||
for (int lr = 0; lr < cpuif->used_lrs; lr++)
|
||||
vgic_v3_fold_lr(vcpu, cpuif->vgic_lr[lr]);
|
||||
|
||||
/*
|
||||
* EOIMode=0: use EOIcount to emulate deactivation. We are
|
||||
* guaranteed to deactivate in reverse order of the activation, so
|
||||
* just pick one active interrupt after the other in the ap_list,
|
||||
* and replay the deactivation as if the CPU was doing it. We also
|
||||
* rely on priority drop to have taken place, and the list to be
|
||||
* sorted by priority.
|
||||
*/
|
||||
list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
|
||||
u64 lr;
|
||||
|
||||
/*
|
||||
* I would have loved to write this using a scoped_guard(),
|
||||
* but using 'continue' here is a total train wreck.
|
||||
*/
|
||||
if (!eoicount) {
|
||||
break;
|
||||
} else {
|
||||
guard(raw_spinlock)(&irq->irq_lock);
|
||||
|
||||
if (!(likely(vgic_target_oracle(irq) == vcpu) &&
|
||||
irq->active))
|
||||
continue;
|
||||
|
||||
lr = vgic_v3_compute_lr(vcpu, irq) & ~ICH_LR_ACTIVE_BIT;
|
||||
}
|
||||
|
||||
if (lr & ICH_LR_HW)
|
||||
vgic_v3_deactivate_phys(FIELD_GET(ICH_LR_PHYS_ID_MASK, lr));
|
||||
|
||||
vgic_v3_fold_lr(vcpu, lr);
|
||||
eoicount--;
|
||||
}
|
||||
|
||||
cpuif->used_lrs = 0;
|
||||
}
|
||||
|
||||
void vgic_v3_deactivate(struct kvm_vcpu *vcpu, u64 val)
|
||||
{
|
||||
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
|
||||
struct vgic_v3_cpu_if *cpuif = &vgic_cpu->vgic_v3;
|
||||
u32 model = vcpu->kvm->arch.vgic.vgic_model;
|
||||
struct kvm_vcpu *target_vcpu = NULL;
|
||||
bool mmio = false, is_v2_sgi;
|
||||
struct vgic_irq *irq;
|
||||
unsigned long flags;
|
||||
u64 lr = 0;
|
||||
u8 cpuid;
|
||||
|
||||
/* Snapshot CPUID, and remove it from the INTID */
|
||||
cpuid = FIELD_GET(GENMASK_ULL(12, 10), val);
|
||||
val &= ~GENMASK_ULL(12, 10);
|
||||
|
||||
is_v2_sgi = (model == KVM_DEV_TYPE_ARM_VGIC_V2 &&
|
||||
val < VGIC_NR_SGIS);
|
||||
|
||||
/*
|
||||
* We only deal with DIR when EOIMode==1, and only for SGI,
|
||||
* PPI or SPI.
|
||||
*/
|
||||
if (!(cpuif->vgic_vmcr & ICH_VMCR_EOIM_MASK) ||
|
||||
val >= vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS)
|
||||
return;
|
||||
|
||||
/* Make sure we're in the same context as LR handling */
|
||||
local_irq_save(flags);
|
||||
|
||||
irq = vgic_get_vcpu_irq(vcpu, val);
|
||||
if (WARN_ON_ONCE(!irq))
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* EOIMode=1: we must rely on traps to handle deactivate of
|
||||
* overflowing interrupts, as there is no ordering guarantee and
|
||||
* EOIcount isn't being incremented. Priority drop will have taken
|
||||
* place, as ICV_EOIxR_EL1 only affects the APRs and not the LRs.
|
||||
*
|
||||
* Three possibities:
|
||||
*
|
||||
* - The irq is not queued on any CPU, and there is nothing to
|
||||
* do,
|
||||
*
|
||||
* - Or the irq is in an LR, meaning that its state is not
|
||||
* directly observable. Treat it bluntly by making it as if
|
||||
* this was a write to GICD_ICACTIVER, which will force an
|
||||
* exit on all vcpus. If it hurts, don't do that.
|
||||
*
|
||||
* - Or the irq is active, but not in an LR, and we can
|
||||
* directly deactivate it by building a pseudo-LR, fold it,
|
||||
* and queue a request to prune the resulting ap_list,
|
||||
*
|
||||
* Special care must be taken to match the source CPUID when
|
||||
* deactivating a GICv2 SGI.
|
||||
*/
|
||||
scoped_guard(raw_spinlock, &irq->irq_lock) {
|
||||
target_vcpu = irq->vcpu;
|
||||
|
||||
/* Not on any ap_list? */
|
||||
if (!target_vcpu)
|
||||
goto put;
|
||||
|
||||
/*
|
||||
* Urgh. We're deactivating something that we cannot
|
||||
* observe yet... Big hammer time.
|
||||
*/
|
||||
if (irq->on_lr) {
|
||||
mmio = true;
|
||||
goto put;
|
||||
}
|
||||
|
||||
/* GICv2 SGI: check that the cpuid matches */
|
||||
if (is_v2_sgi && irq->active_source != cpuid) {
|
||||
target_vcpu = NULL;
|
||||
goto put;
|
||||
}
|
||||
|
||||
/* (with a Dalek voice) DEACTIVATE!!!! */
|
||||
lr = vgic_v3_compute_lr(vcpu, irq) & ~ICH_LR_ACTIVE_BIT;
|
||||
}
|
||||
|
||||
if (lr & ICH_LR_HW)
|
||||
vgic_v3_deactivate_phys(FIELD_GET(ICH_LR_PHYS_ID_MASK, lr));
|
||||
|
||||
vgic_v3_fold_lr(vcpu, lr);
|
||||
|
||||
put:
|
||||
vgic_put_irq(vcpu->kvm, irq);
|
||||
|
||||
out:
|
||||
local_irq_restore(flags);
|
||||
|
||||
if (mmio)
|
||||
vgic_mmio_write_cactive(vcpu, (val / 32) * 4, 4, BIT(val % 32));
|
||||
|
||||
/* Force the ap_list to be pruned */
|
||||
if (target_vcpu)
|
||||
kvm_make_request(KVM_REQ_VGIC_PROCESS_UPDATE, target_vcpu);
|
||||
}
|
||||
|
||||
/* Requires the irq to be locked already */
|
||||
void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
|
||||
static u64 vgic_v3_compute_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq)
|
||||
{
|
||||
u32 model = vcpu->kvm->arch.vgic.vgic_model;
|
||||
u64 val = irq->intid;
|
||||
bool allow_pending = true, is_v2_sgi;
|
||||
|
||||
WARN_ON(irq->on_lr);
|
||||
|
||||
is_v2_sgi = (vgic_irq_is_sgi(irq->intid) &&
|
||||
model == KVM_DEV_TYPE_ARM_VGIC_V2);
|
||||
|
||||
@@ -150,6 +342,35 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
|
||||
if (allow_pending && irq_is_pending(irq)) {
|
||||
val |= ICH_LR_PENDING_BIT;
|
||||
|
||||
if (is_v2_sgi) {
|
||||
u32 src = ffs(irq->source);
|
||||
|
||||
if (WARN_RATELIMIT(!src, "No SGI source for INTID %d\n",
|
||||
irq->intid))
|
||||
return 0;
|
||||
|
||||
val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT;
|
||||
if (irq->source & ~BIT(src - 1))
|
||||
val |= ICH_LR_EOI;
|
||||
}
|
||||
}
|
||||
|
||||
if (irq->group)
|
||||
val |= ICH_LR_GROUP;
|
||||
|
||||
val |= (u64)irq->priority << ICH_LR_PRIORITY_SHIFT;
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
|
||||
{
|
||||
u32 model = vcpu->kvm->arch.vgic.vgic_model;
|
||||
u64 val = vgic_v3_compute_lr(vcpu, irq);
|
||||
|
||||
vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr] = val;
|
||||
|
||||
if (val & ICH_LR_PENDING_BIT) {
|
||||
if (irq->config == VGIC_CONFIG_EDGE)
|
||||
irq->pending_latch = false;
|
||||
|
||||
@@ -157,16 +378,9 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
|
||||
model == KVM_DEV_TYPE_ARM_VGIC_V2) {
|
||||
u32 src = ffs(irq->source);
|
||||
|
||||
if (WARN_RATELIMIT(!src, "No SGI source for INTID %d\n",
|
||||
irq->intid))
|
||||
return;
|
||||
|
||||
val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT;
|
||||
irq->source &= ~(1 << (src - 1));
|
||||
if (irq->source) {
|
||||
irq->source &= ~BIT(src - 1);
|
||||
if (irq->source)
|
||||
irq->pending_latch = true;
|
||||
val |= ICH_LR_EOI;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -179,12 +393,7 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
|
||||
if (vgic_irq_is_mapped_level(irq) && (val & ICH_LR_PENDING_BIT))
|
||||
irq->line_level = false;
|
||||
|
||||
if (irq->group)
|
||||
val |= ICH_LR_GROUP;
|
||||
|
||||
val |= (u64)irq->priority << ICH_LR_PRIORITY_SHIFT;
|
||||
|
||||
vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr] = val;
|
||||
irq->on_lr = true;
|
||||
}
|
||||
|
||||
void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr)
|
||||
@@ -258,7 +467,7 @@ void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
|
||||
GIC_BASER_CACHEABILITY(GICR_PENDBASER, OUTER, SameAsInner) | \
|
||||
GIC_BASER_SHAREABILITY(GICR_PENDBASER, InnerShareable))
|
||||
|
||||
void vgic_v3_enable(struct kvm_vcpu *vcpu)
|
||||
void vgic_v3_reset(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vgic_v3_cpu_if *vgic_v3 = &vcpu->arch.vgic_cpu.vgic_v3;
|
||||
|
||||
@@ -288,9 +497,6 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu)
|
||||
kvm_vgic_global_state.ich_vtr_el2);
|
||||
vcpu->arch.vgic_cpu.num_pri_bits = FIELD_GET(ICH_VTR_EL2_PRIbits,
|
||||
kvm_vgic_global_state.ich_vtr_el2) + 1;
|
||||
|
||||
/* Get the show on the road... */
|
||||
vgic_v3->vgic_hcr = ICH_HCR_EL2_En;
|
||||
}
|
||||
|
||||
void vcpu_set_ich_hcr(struct kvm_vcpu *vcpu)
|
||||
@@ -302,20 +508,9 @@ void vcpu_set_ich_hcr(struct kvm_vcpu *vcpu)
|
||||
|
||||
/* Hide GICv3 sysreg if necessary */
|
||||
if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2 ||
|
||||
!irqchip_in_kernel(vcpu->kvm)) {
|
||||
!irqchip_in_kernel(vcpu->kvm))
|
||||
vgic_v3->vgic_hcr |= (ICH_HCR_EL2_TALL0 | ICH_HCR_EL2_TALL1 |
|
||||
ICH_HCR_EL2_TC);
|
||||
return;
|
||||
}
|
||||
|
||||
if (group0_trap)
|
||||
vgic_v3->vgic_hcr |= ICH_HCR_EL2_TALL0;
|
||||
if (group1_trap)
|
||||
vgic_v3->vgic_hcr |= ICH_HCR_EL2_TALL1;
|
||||
if (common_trap)
|
||||
vgic_v3->vgic_hcr |= ICH_HCR_EL2_TC;
|
||||
if (dir_trap)
|
||||
vgic_v3->vgic_hcr |= ICH_HCR_EL2_TDIR;
|
||||
}
|
||||
|
||||
int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq)
|
||||
@@ -636,8 +831,53 @@ static const struct midr_range broken_seis[] = {
|
||||
|
||||
static bool vgic_v3_broken_seis(void)
|
||||
{
|
||||
return ((kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_EL2_SEIS) &&
|
||||
is_midr_in_range_list(broken_seis));
|
||||
return (is_kernel_in_hyp_mode() &&
|
||||
is_midr_in_range_list(broken_seis) &&
|
||||
(read_sysreg_s(SYS_ICH_VTR_EL2) & ICH_VTR_EL2_SEIS));
|
||||
}
|
||||
|
||||
void noinstr kvm_compute_ich_hcr_trap_bits(struct alt_instr *alt,
|
||||
__le32 *origptr, __le32 *updptr,
|
||||
int nr_inst)
|
||||
{
|
||||
u32 insn, oinsn, rd;
|
||||
u64 hcr = 0;
|
||||
|
||||
if (cpus_have_cap(ARM64_WORKAROUND_CAVIUM_30115)) {
|
||||
group0_trap = true;
|
||||
group1_trap = true;
|
||||
}
|
||||
|
||||
if (vgic_v3_broken_seis()) {
|
||||
/* We know that these machines have ICH_HCR_EL2.TDIR */
|
||||
group0_trap = true;
|
||||
group1_trap = true;
|
||||
dir_trap = true;
|
||||
}
|
||||
|
||||
if (!cpus_have_cap(ARM64_HAS_ICH_HCR_EL2_TDIR))
|
||||
common_trap = true;
|
||||
|
||||
if (group0_trap)
|
||||
hcr |= ICH_HCR_EL2_TALL0;
|
||||
if (group1_trap)
|
||||
hcr |= ICH_HCR_EL2_TALL1;
|
||||
if (common_trap)
|
||||
hcr |= ICH_HCR_EL2_TC;
|
||||
if (dir_trap)
|
||||
hcr |= ICH_HCR_EL2_TDIR;
|
||||
|
||||
/* Compute target register */
|
||||
oinsn = le32_to_cpu(*origptr);
|
||||
rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, oinsn);
|
||||
|
||||
/* movz rd, #(val & 0xffff) */
|
||||
insn = aarch64_insn_gen_movewide(rd,
|
||||
(u16)hcr,
|
||||
0,
|
||||
AARCH64_INSN_VARIANT_64BIT,
|
||||
AARCH64_INSN_MOVEWIDE_ZERO);
|
||||
*updptr = cpu_to_le32(insn);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -651,6 +891,7 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
|
||||
{
|
||||
u64 ich_vtr_el2 = kvm_call_hyp_ret(__vgic_v3_get_gic_config);
|
||||
bool has_v2;
|
||||
u64 traps;
|
||||
int ret;
|
||||
|
||||
has_v2 = ich_vtr_el2 >> 63;
|
||||
@@ -709,29 +950,18 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
|
||||
if (has_v2)
|
||||
static_branch_enable(&vgic_v3_has_v2_compat);
|
||||
|
||||
if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_30115)) {
|
||||
group0_trap = true;
|
||||
group1_trap = true;
|
||||
}
|
||||
|
||||
if (vgic_v3_broken_seis()) {
|
||||
kvm_info("GICv3 with broken locally generated SEI\n");
|
||||
|
||||
kvm_vgic_global_state.ich_vtr_el2 &= ~ICH_VTR_EL2_SEIS;
|
||||
group0_trap = true;
|
||||
group1_trap = true;
|
||||
if (ich_vtr_el2 & ICH_VTR_EL2_TDS)
|
||||
dir_trap = true;
|
||||
else
|
||||
common_trap = true;
|
||||
}
|
||||
|
||||
if (group0_trap || group1_trap || common_trap | dir_trap) {
|
||||
traps = vgic_ich_hcr_trap_bits();
|
||||
if (traps) {
|
||||
kvm_info("GICv3 sysreg trapping enabled ([%s%s%s%s], reduced performance)\n",
|
||||
group0_trap ? "G0" : "",
|
||||
group1_trap ? "G1" : "",
|
||||
common_trap ? "C" : "",
|
||||
dir_trap ? "D" : "");
|
||||
(traps & ICH_HCR_EL2_TALL0) ? "G0" : "",
|
||||
(traps & ICH_HCR_EL2_TALL1) ? "G1" : "",
|
||||
(traps & ICH_HCR_EL2_TC) ? "C" : "",
|
||||
(traps & ICH_HCR_EL2_TDIR) ? "D" : "");
|
||||
static_branch_enable(&vgic_v3_cpuif_trap);
|
||||
}
|
||||
|
||||
@@ -771,7 +1001,7 @@ void vgic_v3_put(struct kvm_vcpu *vcpu)
|
||||
}
|
||||
|
||||
if (likely(!is_protected_kvm_enabled()))
|
||||
kvm_call_hyp(__vgic_v3_save_vmcr_aprs, cpu_if);
|
||||
kvm_call_hyp(__vgic_v3_save_aprs, cpu_if);
|
||||
WARN_ON(vgic_v4_put(vcpu));
|
||||
|
||||
if (has_vhe())
|
||||
|
||||
@@ -163,6 +163,7 @@ static void vgic_v4_disable_vsgis(struct kvm_vcpu *vcpu)
|
||||
struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, i);
|
||||
struct irq_desc *desc;
|
||||
unsigned long flags;
|
||||
bool pending;
|
||||
int ret;
|
||||
|
||||
raw_spin_lock_irqsave(&irq->irq_lock, flags);
|
||||
@@ -173,9 +174,11 @@ static void vgic_v4_disable_vsgis(struct kvm_vcpu *vcpu)
|
||||
irq->hw = false;
|
||||
ret = irq_get_irqchip_state(irq->host_irq,
|
||||
IRQCHIP_STATE_PENDING,
|
||||
&irq->pending_latch);
|
||||
&pending);
|
||||
WARN_ON(ret);
|
||||
|
||||
irq->pending_latch = pending;
|
||||
|
||||
desc = irq_to_desc(irq->host_irq);
|
||||
irq_domain_deactivate_irq(irq_desc_get_irq_data(desc));
|
||||
unlock:
|
||||
|
||||
@@ -244,7 +244,7 @@ void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active)
|
||||
*
|
||||
* Requires the IRQ lock to be held.
|
||||
*/
|
||||
static struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq)
|
||||
struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq)
|
||||
{
|
||||
lockdep_assert_held(&irq->irq_lock);
|
||||
|
||||
@@ -272,17 +272,20 @@ static struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct vgic_sort_info {
|
||||
struct kvm_vcpu *vcpu;
|
||||
struct vgic_vmcr vmcr;
|
||||
};
|
||||
|
||||
/*
|
||||
* The order of items in the ap_lists defines how we'll pack things in LRs as
|
||||
* well, the first items in the list being the first things populated in the
|
||||
* LRs.
|
||||
*
|
||||
* A hard rule is that active interrupts can never be pushed out of the LRs
|
||||
* (and therefore take priority) since we cannot reliably trap on deactivation
|
||||
* of IRQs and therefore they have to be present in the LRs.
|
||||
*
|
||||
* Pending, non-active interrupts must be placed at the head of the list.
|
||||
* Otherwise things should be sorted by the priority field and the GIC
|
||||
* hardware support will take care of preemption of priority groups etc.
|
||||
* Interrupts that are not deliverable should be at the end of the list.
|
||||
*
|
||||
* Return negative if "a" sorts before "b", 0 to preserve order, and positive
|
||||
* to sort "b" before "a".
|
||||
@@ -292,6 +295,8 @@ static int vgic_irq_cmp(void *priv, const struct list_head *a,
|
||||
{
|
||||
struct vgic_irq *irqa = container_of(a, struct vgic_irq, ap_list);
|
||||
struct vgic_irq *irqb = container_of(b, struct vgic_irq, ap_list);
|
||||
struct vgic_sort_info *info = priv;
|
||||
struct kvm_vcpu *vcpu = info->vcpu;
|
||||
bool penda, pendb;
|
||||
int ret;
|
||||
|
||||
@@ -305,21 +310,32 @@ static int vgic_irq_cmp(void *priv, const struct list_head *a,
|
||||
raw_spin_lock(&irqa->irq_lock);
|
||||
raw_spin_lock_nested(&irqb->irq_lock, SINGLE_DEPTH_NESTING);
|
||||
|
||||
if (irqa->active || irqb->active) {
|
||||
ret = (int)irqb->active - (int)irqa->active;
|
||||
/* Undeliverable interrupts should be last */
|
||||
ret = (int)(vgic_target_oracle(irqb) == vcpu) - (int)(vgic_target_oracle(irqa) == vcpu);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
|
||||
penda = irqa->enabled && irq_is_pending(irqa);
|
||||
pendb = irqb->enabled && irq_is_pending(irqb);
|
||||
|
||||
if (!penda || !pendb) {
|
||||
ret = (int)pendb - (int)penda;
|
||||
/* Same thing for interrupts targeting a disabled group */
|
||||
ret = (int)(irqb->group ? info->vmcr.grpen1 : info->vmcr.grpen0);
|
||||
ret -= (int)(irqa->group ? info->vmcr.grpen1 : info->vmcr.grpen0);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Both pending and enabled, sort by priority */
|
||||
ret = irqa->priority - irqb->priority;
|
||||
penda = irqa->enabled && irq_is_pending(irqa) && !irqa->active;
|
||||
pendb = irqb->enabled && irq_is_pending(irqb) && !irqb->active;
|
||||
|
||||
ret = (int)pendb - (int)penda;
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
/* Both pending and enabled, sort by priority (lower number first) */
|
||||
ret = (int)irqa->priority - (int)irqb->priority;
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
/* Finally, HW bit active interrupts have priority over non-HW ones */
|
||||
ret = (int)irqb->hw - (int)irqa->hw;
|
||||
|
||||
out:
|
||||
raw_spin_unlock(&irqb->irq_lock);
|
||||
raw_spin_unlock(&irqa->irq_lock);
|
||||
@@ -330,10 +346,12 @@ out:
|
||||
static void vgic_sort_ap_list(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
|
||||
struct vgic_sort_info info = { .vcpu = vcpu, };
|
||||
|
||||
lockdep_assert_held(&vgic_cpu->ap_list_lock);
|
||||
|
||||
list_sort(NULL, &vgic_cpu->ap_list_head, vgic_irq_cmp);
|
||||
vgic_get_vmcr(vcpu, &info.vmcr);
|
||||
list_sort(&info, &vgic_cpu->ap_list_head, vgic_irq_cmp);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -356,6 +374,20 @@ static bool vgic_validate_injection(struct vgic_irq *irq, bool level, void *owne
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool vgic_model_needs_bcst_kick(struct kvm *kvm)
|
||||
{
|
||||
/*
|
||||
* A GICv3 (or GICv3-like) system exposing a GICv3 to the guest
|
||||
* needs a broadcast kick to set TDIR globally.
|
||||
*
|
||||
* For systems that do not have TDIR (ARM's own v8.0 CPUs), the
|
||||
* shadow TDIR bit is always set, and so is the register's TC bit,
|
||||
* so no need to kick the CPUs.
|
||||
*/
|
||||
return (cpus_have_final_cap(ARM64_HAS_ICH_HCR_EL2_TDIR) &&
|
||||
kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check whether an IRQ needs to (and can) be queued to a VCPU's ap list.
|
||||
* Do the queuing if necessary, taking the right locks in the right order.
|
||||
@@ -368,6 +400,7 @@ bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq,
|
||||
unsigned long flags) __releases(&irq->irq_lock)
|
||||
{
|
||||
struct kvm_vcpu *vcpu;
|
||||
bool bcast;
|
||||
|
||||
lockdep_assert_held(&irq->irq_lock);
|
||||
|
||||
@@ -442,11 +475,20 @@ retry:
|
||||
list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head);
|
||||
irq->vcpu = vcpu;
|
||||
|
||||
/* A new SPI may result in deactivation trapping on all vcpus */
|
||||
bcast = (vgic_model_needs_bcst_kick(vcpu->kvm) &&
|
||||
vgic_valid_spi(vcpu->kvm, irq->intid) &&
|
||||
atomic_fetch_inc(&vcpu->kvm->arch.vgic.active_spis) == 0);
|
||||
|
||||
raw_spin_unlock(&irq->irq_lock);
|
||||
raw_spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock, flags);
|
||||
|
||||
kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
|
||||
kvm_vcpu_kick(vcpu);
|
||||
if (!bcast) {
|
||||
kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
|
||||
kvm_vcpu_kick(vcpu);
|
||||
} else {
|
||||
kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_IRQ_PENDING);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -798,98 +840,148 @@ static inline void vgic_clear_lr(struct kvm_vcpu *vcpu, int lr)
|
||||
vgic_v3_clear_lr(vcpu, lr);
|
||||
}
|
||||
|
||||
static inline void vgic_set_underflow(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (kvm_vgic_global_state.type == VGIC_V2)
|
||||
vgic_v2_set_underflow(vcpu);
|
||||
else
|
||||
vgic_v3_set_underflow(vcpu);
|
||||
}
|
||||
|
||||
/* Requires the ap_list_lock to be held. */
|
||||
static int compute_ap_list_depth(struct kvm_vcpu *vcpu,
|
||||
bool *multi_sgi)
|
||||
static void summarize_ap_list(struct kvm_vcpu *vcpu,
|
||||
struct ap_list_summary *als)
|
||||
{
|
||||
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
|
||||
struct vgic_irq *irq;
|
||||
int count = 0;
|
||||
|
||||
*multi_sgi = false;
|
||||
|
||||
lockdep_assert_held(&vgic_cpu->ap_list_lock);
|
||||
|
||||
*als = (typeof(*als)){};
|
||||
|
||||
list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
|
||||
int w;
|
||||
guard(raw_spinlock)(&irq->irq_lock);
|
||||
|
||||
raw_spin_lock(&irq->irq_lock);
|
||||
/* GICv2 SGIs can count for more than one... */
|
||||
w = vgic_irq_get_lr_count(irq);
|
||||
raw_spin_unlock(&irq->irq_lock);
|
||||
if (unlikely(vgic_target_oracle(irq) != vcpu))
|
||||
continue;
|
||||
|
||||
count += w;
|
||||
*multi_sgi |= (w > 1);
|
||||
if (!irq->active)
|
||||
als->nr_pend++;
|
||||
else
|
||||
als->nr_act++;
|
||||
|
||||
if (irq->intid < VGIC_NR_SGIS)
|
||||
als->nr_sgi++;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
/* Requires the VCPU's ap_list_lock to be held. */
|
||||
/*
|
||||
* Dealing with LR overflow is close to black magic -- dress accordingly.
|
||||
*
|
||||
* We have to present an almost infinite number of interrupts through a very
|
||||
* limited number of registers. Therefore crucial decisions must be made to
|
||||
* ensure we feed the most relevant interrupts into the LRs, and yet have
|
||||
* some facilities to let the guest interact with those that are not there.
|
||||
*
|
||||
* All considerations below are in the context of interrupts targeting a
|
||||
* single vcpu with non-idle state (either pending, active, or both),
|
||||
* colloquially called the ap_list:
|
||||
*
|
||||
* - Pending interrupts must have priority over active interrupts. This also
|
||||
* excludes pending+active interrupts. This ensures that a guest can
|
||||
* perform priority drops on any number of interrupts, and yet be
|
||||
* presented the next pending one.
|
||||
*
|
||||
* - Deactivation of interrupts outside of the LRs must be tracked by using
|
||||
* either the EOIcount-driven maintenance interrupt, and sometimes by
|
||||
* trapping the DIR register.
|
||||
*
|
||||
* - For EOImode=0, a non-zero EOIcount means walking the ap_list past the
|
||||
* point that made it into the LRs, and deactivate interrupts that would
|
||||
* have made it onto the LRs if we had the space.
|
||||
*
|
||||
* - The MI-generation bits must be used to try and force an exit when the
|
||||
* guest has done enough changes to the LRs that we want to reevaluate the
|
||||
* situation:
|
||||
*
|
||||
* - if the total number of pending interrupts exceeds the number of
|
||||
* LR, NPIE must be set in order to exit once no pending interrupts
|
||||
* are present in the LRs, allowing us to populate the next batch.
|
||||
*
|
||||
* - if there are active interrupts outside of the LRs, then LRENPIE
|
||||
* must be set so that we exit on deactivation of one of these, and
|
||||
* work out which one is to be deactivated. Note that this is not
|
||||
* enough to deal with EOImode=1, see below.
|
||||
*
|
||||
* - if the overall number of interrupts exceeds the number of LRs,
|
||||
* then UIE must be set to allow refilling of the LRs once the
|
||||
* majority of them has been processed.
|
||||
*
|
||||
* - as usual, MI triggers are only an optimisation, since we cannot
|
||||
* rely on the MI being delivered in timely manner...
|
||||
*
|
||||
* - EOImode=1 creates some additional problems:
|
||||
*
|
||||
* - deactivation can happen in any order, and we cannot rely on
|
||||
* EOImode=0's coupling of priority-drop and deactivation which
|
||||
* imposes strict reverse Ack order. This means that DIR must
|
||||
* trap if we have active interrupts outside of the LRs.
|
||||
*
|
||||
* - deactivation of SPIs can occur on any CPU, while the SPI is only
|
||||
* present in the ap_list of the CPU that actually ack-ed it. In that
|
||||
* case, EOIcount doesn't provide enough information, and we must
|
||||
* resort to trapping DIR even if we don't overflow the LRs. Bonus
|
||||
* point for not trapping DIR when no SPIs are pending or active in
|
||||
* the whole VM.
|
||||
*
|
||||
* - LPIs do not suffer the same problem as SPIs on deactivation, as we
|
||||
* have to essentially discard the active state, see below.
|
||||
*
|
||||
* - Virtual LPIs have an active state (surprise!), which gets removed on
|
||||
* priority drop (EOI). However, EOIcount doesn't get bumped when the LPI
|
||||
* is not present in the LR (surprise again!). Special care must therefore
|
||||
* be taken to remove the active state from any activated LPI when exiting
|
||||
* from the guest. This is in a way no different from what happens on the
|
||||
* physical side. We still rely on the running priority to have been
|
||||
* removed from the APRs, irrespective of the LPI being present in the LRs
|
||||
* or not.
|
||||
*
|
||||
* - Virtual SGIs directly injected via GICv4.1 must not affect EOIcount, as
|
||||
* they are not managed in SW and don't have a true active state. So only
|
||||
* set vSGIEOICount when no SGIs are in the ap_list.
|
||||
*
|
||||
* - GICv2 SGIs with multiple sources are injected one source at a time, as
|
||||
* if they were made pending sequentially. This may mean that we don't
|
||||
* always present the HPPI if other interrupts with lower priority are
|
||||
* pending in the LRs. Big deal.
|
||||
*/
|
||||
static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
|
||||
struct ap_list_summary als;
|
||||
struct vgic_irq *irq;
|
||||
int count;
|
||||
bool multi_sgi;
|
||||
u8 prio = 0xff;
|
||||
int i = 0;
|
||||
int count = 0;
|
||||
|
||||
lockdep_assert_held(&vgic_cpu->ap_list_lock);
|
||||
|
||||
count = compute_ap_list_depth(vcpu, &multi_sgi);
|
||||
if (count > kvm_vgic_global_state.nr_lr || multi_sgi)
|
||||
summarize_ap_list(vcpu, &als);
|
||||
|
||||
if (irqs_outside_lrs(&als))
|
||||
vgic_sort_ap_list(vcpu);
|
||||
|
||||
count = 0;
|
||||
|
||||
list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
|
||||
raw_spin_lock(&irq->irq_lock);
|
||||
scoped_guard(raw_spinlock, &irq->irq_lock) {
|
||||
if (likely(vgic_target_oracle(irq) == vcpu)) {
|
||||
vgic_populate_lr(vcpu, irq, count++);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If we have multi-SGIs in the pipeline, we need to
|
||||
* guarantee that they are all seen before any IRQ of
|
||||
* lower priority. In that case, we need to filter out
|
||||
* these interrupts by exiting early. This is easy as
|
||||
* the AP list has been sorted already.
|
||||
*/
|
||||
if (multi_sgi && irq->priority > prio) {
|
||||
raw_spin_unlock(&irq->irq_lock);
|
||||
if (count == kvm_vgic_global_state.nr_lr)
|
||||
break;
|
||||
}
|
||||
|
||||
if (likely(vgic_target_oracle(irq) == vcpu)) {
|
||||
vgic_populate_lr(vcpu, irq, count++);
|
||||
|
||||
if (irq->source)
|
||||
prio = irq->priority;
|
||||
}
|
||||
|
||||
raw_spin_unlock(&irq->irq_lock);
|
||||
|
||||
if (count == kvm_vgic_global_state.nr_lr) {
|
||||
if (!list_is_last(&irq->ap_list,
|
||||
&vgic_cpu->ap_list_head))
|
||||
vgic_set_underflow(vcpu);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Nuke remaining LRs */
|
||||
for (i = count ; i < kvm_vgic_global_state.nr_lr; i++)
|
||||
for (int i = count ; i < kvm_vgic_global_state.nr_lr; i++)
|
||||
vgic_clear_lr(vcpu, i);
|
||||
|
||||
if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
|
||||
if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
|
||||
vcpu->arch.vgic_cpu.vgic_v2.used_lrs = count;
|
||||
else
|
||||
vgic_v2_configure_hcr(vcpu, &als);
|
||||
} else {
|
||||
vcpu->arch.vgic_cpu.vgic_v3.used_lrs = count;
|
||||
vgic_v3_configure_hcr(vcpu, &als);
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool can_access_vgic_from_kernel(void)
|
||||
@@ -913,8 +1005,6 @@ static inline void vgic_save_state(struct kvm_vcpu *vcpu)
|
||||
/* Sync back the hardware VGIC state into our emulation after a guest's run. */
|
||||
void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
int used_lrs;
|
||||
|
||||
/* If nesting, emulate the HW effect from L0 to L1 */
|
||||
if (vgic_state_is_nested(vcpu)) {
|
||||
vgic_v3_sync_nested(vcpu);
|
||||
@@ -924,23 +1014,24 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
|
||||
if (vcpu_has_nv(vcpu))
|
||||
vgic_v3_nested_update_mi(vcpu);
|
||||
|
||||
/* An empty ap_list_head implies used_lrs == 0 */
|
||||
if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head))
|
||||
return;
|
||||
|
||||
if (can_access_vgic_from_kernel())
|
||||
vgic_save_state(vcpu);
|
||||
|
||||
if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
|
||||
used_lrs = vcpu->arch.vgic_cpu.vgic_v2.used_lrs;
|
||||
else
|
||||
used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs;
|
||||
|
||||
if (used_lrs)
|
||||
vgic_fold_lr_state(vcpu);
|
||||
vgic_fold_lr_state(vcpu);
|
||||
vgic_prune_ap_list(vcpu);
|
||||
}
|
||||
|
||||
/* Sync interrupts that were deactivated through a DIR trap */
|
||||
void kvm_vgic_process_async_update(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
/* Make sure we're in the same context as LR handling */
|
||||
local_irq_save(flags);
|
||||
vgic_prune_ap_list(vcpu);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
static inline void vgic_restore_state(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif))
|
||||
@@ -965,8 +1056,9 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
|
||||
* abort the entry procedure and inject the exception at the
|
||||
* beginning of the run loop.
|
||||
*
|
||||
* - Otherwise, do exactly *NOTHING*. The guest state is
|
||||
* already loaded, and we can carry on with running it.
|
||||
* - Otherwise, do exactly *NOTHING* apart from enabling the virtual
|
||||
* CPU interface. The guest state is already loaded, and we can
|
||||
* carry on with running it.
|
||||
*
|
||||
* If we have NV, but are not in a nested state, compute the
|
||||
* maintenance interrupt state, as it may fire.
|
||||
@@ -975,35 +1067,17 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
|
||||
if (kvm_vgic_vcpu_pending_irq(vcpu))
|
||||
kvm_make_request(KVM_REQ_GUEST_HYP_IRQ_PENDING, vcpu);
|
||||
|
||||
vgic_v3_flush_nested(vcpu);
|
||||
return;
|
||||
}
|
||||
|
||||
if (vcpu_has_nv(vcpu))
|
||||
vgic_v3_nested_update_mi(vcpu);
|
||||
|
||||
/*
|
||||
* If there are no virtual interrupts active or pending for this
|
||||
* VCPU, then there is no work to do and we can bail out without
|
||||
* taking any lock. There is a potential race with someone injecting
|
||||
* interrupts to the VCPU, but it is a benign race as the VCPU will
|
||||
* either observe the new interrupt before or after doing this check,
|
||||
* and introducing additional synchronization mechanism doesn't change
|
||||
* this.
|
||||
*
|
||||
* Note that we still need to go through the whole thing if anything
|
||||
* can be directly injected (GICv4).
|
||||
*/
|
||||
if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head) &&
|
||||
!vgic_supports_direct_irqs(vcpu->kvm))
|
||||
return;
|
||||
|
||||
DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
|
||||
|
||||
if (!list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) {
|
||||
raw_spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock);
|
||||
scoped_guard(raw_spinlock, &vcpu->arch.vgic_cpu.ap_list_lock)
|
||||
vgic_flush_lr_state(vcpu);
|
||||
raw_spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock);
|
||||
}
|
||||
|
||||
if (can_access_vgic_from_kernel())
|
||||
vgic_restore_state(vcpu);
|
||||
|
||||
@@ -164,6 +164,22 @@ static inline int vgic_write_guest_lock(struct kvm *kvm, gpa_t gpa,
|
||||
return ret;
|
||||
}
|
||||
|
||||
void kvm_compute_ich_hcr_trap_bits(struct alt_instr *alt,
|
||||
__le32 *origptr, __le32 *updptr, int nr_inst);
|
||||
|
||||
static inline u64 vgic_ich_hcr_trap_bits(void)
|
||||
{
|
||||
u64 hcr;
|
||||
|
||||
/* All the traps are in the bottom 16bits */
|
||||
asm volatile(ALTERNATIVE_CB("movz %0, #0\n",
|
||||
ARM64_ALWAYS_SYSTEM,
|
||||
kvm_compute_ich_hcr_trap_bits)
|
||||
: "=r" (hcr));
|
||||
|
||||
return hcr;
|
||||
}
|
||||
|
||||
/*
|
||||
* This struct provides an intermediate representation of the fields contained
|
||||
* in the GICH_VMCR and ICH_VMCR registers, such that code exporting the GIC
|
||||
@@ -220,6 +236,21 @@ struct its_ite {
|
||||
u32 event_id;
|
||||
};
|
||||
|
||||
struct ap_list_summary {
|
||||
unsigned int nr_pend; /* purely pending, not active */
|
||||
unsigned int nr_act; /* active, or active+pending */
|
||||
unsigned int nr_sgi; /* any SGI */
|
||||
};
|
||||
|
||||
#define irqs_outside_lrs(s) \
|
||||
(((s)->nr_pend + (s)->nr_act) > kvm_vgic_global_state.nr_lr)
|
||||
|
||||
#define irqs_pending_outside_lrs(s) \
|
||||
((s)->nr_pend > kvm_vgic_global_state.nr_lr)
|
||||
|
||||
#define irqs_active_outside_lrs(s) \
|
||||
((s)->nr_act && irqs_outside_lrs(s))
|
||||
|
||||
int vgic_v3_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr,
|
||||
struct vgic_reg_attr *reg_attr);
|
||||
int vgic_v2_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr,
|
||||
@@ -230,6 +261,7 @@ vgic_get_mmio_region(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev,
|
||||
struct vgic_irq *vgic_get_irq(struct kvm *kvm, u32 intid);
|
||||
struct vgic_irq *vgic_get_vcpu_irq(struct kvm_vcpu *vcpu, u32 intid);
|
||||
void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq);
|
||||
struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq);
|
||||
bool vgic_get_phys_line_level(struct vgic_irq *irq);
|
||||
void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending);
|
||||
void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active);
|
||||
@@ -245,8 +277,9 @@ int vgic_check_iorange(struct kvm *kvm, phys_addr_t ioaddr,
|
||||
|
||||
void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu);
|
||||
void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
|
||||
void vgic_v2_deactivate(struct kvm_vcpu *vcpu, u32 val);
|
||||
void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr);
|
||||
void vgic_v2_set_underflow(struct kvm_vcpu *vcpu);
|
||||
void vgic_v2_configure_hcr(struct kvm_vcpu *vcpu, struct ap_list_summary *als);
|
||||
int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr);
|
||||
int vgic_v2_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
|
||||
int offset, u32 *val);
|
||||
@@ -254,7 +287,7 @@ int vgic_v2_cpuif_uaccess(struct kvm_vcpu *vcpu, bool is_write,
|
||||
int offset, u32 *val);
|
||||
void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
|
||||
void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
|
||||
void vgic_v2_enable(struct kvm_vcpu *vcpu);
|
||||
void vgic_v2_reset(struct kvm_vcpu *vcpu);
|
||||
int vgic_v2_probe(const struct gic_kvm_info *info);
|
||||
int vgic_v2_map_resources(struct kvm *kvm);
|
||||
int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address,
|
||||
@@ -286,10 +319,11 @@ static inline void vgic_get_irq_ref(struct vgic_irq *irq)
|
||||
void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu);
|
||||
void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
|
||||
void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr);
|
||||
void vgic_v3_set_underflow(struct kvm_vcpu *vcpu);
|
||||
void vgic_v3_deactivate(struct kvm_vcpu *vcpu, u64 val);
|
||||
void vgic_v3_configure_hcr(struct kvm_vcpu *vcpu, struct ap_list_summary *als);
|
||||
void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
|
||||
void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
|
||||
void vgic_v3_enable(struct kvm_vcpu *vcpu);
|
||||
void vgic_v3_reset(struct kvm_vcpu *vcpu);
|
||||
int vgic_v3_probe(const struct gic_kvm_info *info);
|
||||
int vgic_v3_map_resources(struct kvm *kvm);
|
||||
int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq);
|
||||
@@ -412,6 +446,7 @@ static inline bool kvm_has_gicv3(struct kvm *kvm)
|
||||
return kvm_has_feat(kvm, ID_AA64PFR0_EL1, GIC, IMP);
|
||||
}
|
||||
|
||||
void vgic_v3_flush_nested(struct kvm_vcpu *vcpu);
|
||||
void vgic_v3_sync_nested(struct kvm_vcpu *vcpu);
|
||||
void vgic_v3_load_nested(struct kvm_vcpu *vcpu);
|
||||
void vgic_v3_put_nested(struct kvm_vcpu *vcpu);
|
||||
|
||||
@@ -40,6 +40,7 @@ HAS_GICV5_CPUIF
|
||||
HAS_GICV5_LEGACY
|
||||
HAS_GIC_PRIO_MASKING
|
||||
HAS_GIC_PRIO_RELAXED_SYNC
|
||||
HAS_ICH_HCR_EL2_TDIR
|
||||
HAS_HCR_NV1
|
||||
HAS_HCX
|
||||
HAS_LDAPR
|
||||
@@ -64,6 +65,7 @@ HAS_TLB_RANGE
|
||||
HAS_VA52
|
||||
HAS_VIRT_HOST_EXTN
|
||||
HAS_WFXT
|
||||
HAS_XNX
|
||||
HAFT
|
||||
HW_DBM
|
||||
KVM_HVHE
|
||||
|
||||
@@ -411,12 +411,15 @@ static void __exception_irq_entry aic_handle_irq(struct pt_regs *regs)
|
||||
if (is_kernel_in_hyp_mode() &&
|
||||
(read_sysreg_s(SYS_ICH_HCR_EL2) & ICH_HCR_EL2_En) &&
|
||||
read_sysreg_s(SYS_ICH_MISR_EL2) != 0) {
|
||||
u64 val;
|
||||
|
||||
generic_handle_domain_irq(aic_irqc->hw_domain,
|
||||
AIC_FIQ_HWIRQ(AIC_VGIC_MI));
|
||||
|
||||
if (unlikely((read_sysreg_s(SYS_ICH_HCR_EL2) & ICH_HCR_EL2_En) &&
|
||||
read_sysreg_s(SYS_ICH_MISR_EL2))) {
|
||||
pr_err_ratelimited("vGIC IRQ fired and not handled by KVM, disabling.\n");
|
||||
(val = read_sysreg_s(SYS_ICH_MISR_EL2)))) {
|
||||
pr_err_ratelimited("vGIC IRQ fired and not handled by KVM (MISR=%llx), disabling.\n",
|
||||
val);
|
||||
sysreg_clear_set_s(SYS_ICH_HCR_EL2, ICH_HCR_EL2_En, 0);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1459,6 +1459,8 @@ static void __init gic_of_setup_kvm_info(struct device_node *node)
|
||||
if (ret)
|
||||
return;
|
||||
|
||||
gic_v2_kvm_info.gicc_base = gic_data[0].cpu_base.common_base;
|
||||
|
||||
if (static_branch_likely(&supports_deactivate_key))
|
||||
vgic_set_kvm_info(&gic_v2_kvm_info);
|
||||
}
|
||||
@@ -1620,6 +1622,7 @@ static void __init gic_acpi_setup_kvm_info(void)
|
||||
return;
|
||||
|
||||
gic_v2_kvm_info.maint_irq = irq;
|
||||
gic_v2_kvm_info.gicc_base = gic_data[0].cpu_base.common_base;
|
||||
|
||||
vgic_set_kvm_info(&gic_v2_kvm_info);
|
||||
}
|
||||
|
||||
@@ -59,6 +59,9 @@ struct vgic_global {
|
||||
/* virtual control interface mapping, HYP VA */
|
||||
void __iomem *vctrl_hyp;
|
||||
|
||||
/* Physical CPU interface, kernel VA */
|
||||
void __iomem *gicc_base;
|
||||
|
||||
/* Number of implemented list registers */
|
||||
int nr_lr;
|
||||
|
||||
@@ -120,6 +123,7 @@ struct irq_ops {
|
||||
|
||||
struct vgic_irq {
|
||||
raw_spinlock_t irq_lock; /* Protects the content of the struct */
|
||||
u32 intid; /* Guest visible INTID */
|
||||
struct rcu_head rcu;
|
||||
struct list_head ap_list;
|
||||
|
||||
@@ -134,17 +138,18 @@ struct vgic_irq {
|
||||
* affinity reg (v3).
|
||||
*/
|
||||
|
||||
u32 intid; /* Guest visible INTID */
|
||||
bool line_level; /* Level only */
|
||||
bool pending_latch; /* The pending latch state used to calculate
|
||||
* the pending state for both level
|
||||
* and edge triggered IRQs. */
|
||||
bool active;
|
||||
bool pending_release; /* Used for LPIs only, unreferenced IRQ
|
||||
bool pending_release:1; /* Used for LPIs only, unreferenced IRQ
|
||||
* pending a release */
|
||||
|
||||
bool enabled;
|
||||
bool hw; /* Tied to HW IRQ */
|
||||
bool pending_latch:1; /* The pending latch state used to calculate
|
||||
* the pending state for both level
|
||||
* and edge triggered IRQs. */
|
||||
enum vgic_irq_config config:1; /* Level or edge */
|
||||
bool line_level:1; /* Level only */
|
||||
bool enabled:1;
|
||||
bool active:1;
|
||||
bool hw:1; /* Tied to HW IRQ */
|
||||
bool on_lr:1; /* Present in a CPU LR */
|
||||
refcount_t refcount; /* Used for LPIs */
|
||||
u32 hwintid; /* HW INTID number */
|
||||
unsigned int host_irq; /* linux irq corresponding to hwintid */
|
||||
@@ -156,7 +161,6 @@ struct vgic_irq {
|
||||
u8 active_source; /* GICv2 SGIs only */
|
||||
u8 priority;
|
||||
u8 group; /* 0 == group 0, 1 == group 1 */
|
||||
enum vgic_irq_config config; /* Level or edge */
|
||||
|
||||
struct irq_ops *ops;
|
||||
|
||||
@@ -259,6 +263,9 @@ struct vgic_dist {
|
||||
/* The GIC maintenance IRQ for nested hypervisors. */
|
||||
u32 mi_intid;
|
||||
|
||||
/* Track the number of in-flight active SPIs */
|
||||
atomic_t active_spis;
|
||||
|
||||
/* base addresses in guest physical address space: */
|
||||
gpa_t vgic_dist_base; /* distributor */
|
||||
union {
|
||||
@@ -280,6 +287,7 @@ struct vgic_dist {
|
||||
struct vgic_irq *spis;
|
||||
|
||||
struct vgic_io_device dist_iodev;
|
||||
struct vgic_io_device cpuif_iodev;
|
||||
|
||||
bool has_its;
|
||||
bool table_write_in_progress;
|
||||
@@ -417,6 +425,7 @@ bool kvm_vcpu_has_pending_irqs(struct kvm_vcpu *vcpu);
|
||||
void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
|
||||
void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
|
||||
void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid);
|
||||
void kvm_vgic_process_async_update(struct kvm_vcpu *vcpu);
|
||||
|
||||
void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1);
|
||||
|
||||
|
||||
@@ -86,7 +86,13 @@
|
||||
|
||||
#define GICH_HCR_EN (1 << 0)
|
||||
#define GICH_HCR_UIE (1 << 1)
|
||||
#define GICH_HCR_LRENPIE (1 << 2)
|
||||
#define GICH_HCR_NPIE (1 << 3)
|
||||
#define GICH_HCR_VGrp0EIE (1 << 4)
|
||||
#define GICH_HCR_VGrp0DIE (1 << 5)
|
||||
#define GICH_HCR_VGrp1EIE (1 << 6)
|
||||
#define GICH_HCR_VGrp1DIE (1 << 7)
|
||||
#define GICH_HCR_EOICOUNT GENMASK(31, 27)
|
||||
|
||||
#define GICH_LR_VIRTUALID (0x3ff << 0)
|
||||
#define GICH_LR_PHYSID_CPUID_SHIFT (10)
|
||||
|
||||
@@ -24,6 +24,8 @@ struct gic_kvm_info {
|
||||
enum gic_type type;
|
||||
/* Virtual CPU interface */
|
||||
struct resource vcpu;
|
||||
/* GICv2 GICC VA */
|
||||
void __iomem *gicc_base;
|
||||
/* Interrupt number */
|
||||
unsigned int maint_irq;
|
||||
/* No interrupt mask, no need to use the above field */
|
||||
|
||||
@@ -179,6 +179,7 @@ struct kvm_xen_exit {
|
||||
#define KVM_EXIT_LOONGARCH_IOCSR 38
|
||||
#define KVM_EXIT_MEMORY_FAULT 39
|
||||
#define KVM_EXIT_TDX 40
|
||||
#define KVM_EXIT_ARM_SEA 41
|
||||
|
||||
/* For KVM_EXIT_INTERNAL_ERROR */
|
||||
/* Emulate instruction failed. */
|
||||
@@ -473,6 +474,14 @@ struct kvm_run {
|
||||
} setup_event_notify;
|
||||
};
|
||||
} tdx;
|
||||
/* KVM_EXIT_ARM_SEA */
|
||||
struct {
|
||||
#define KVM_EXIT_ARM_SEA_FLAG_GPA_VALID (1ULL << 0)
|
||||
__u64 flags;
|
||||
__u64 esr;
|
||||
__u64 gva;
|
||||
__u64 gpa;
|
||||
} arm_sea;
|
||||
/* Fix the size of the union. */
|
||||
char padding[256];
|
||||
};
|
||||
@@ -963,6 +972,7 @@ struct kvm_enable_cap {
|
||||
#define KVM_CAP_RISCV_MP_STATE_RESET 242
|
||||
#define KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED 243
|
||||
#define KVM_CAP_GUEST_MEMFD_FLAGS 244
|
||||
#define KVM_CAP_ARM_SEA_TO_USER 245
|
||||
|
||||
struct kvm_irq_routing_irqchip {
|
||||
__u32 irqchip;
|
||||
|
||||
@@ -141,6 +141,8 @@
|
||||
#define ESR_ELx_SF (UL(1) << ESR_ELx_SF_SHIFT)
|
||||
#define ESR_ELx_AR_SHIFT (14)
|
||||
#define ESR_ELx_AR (UL(1) << ESR_ELx_AR_SHIFT)
|
||||
#define ESR_ELx_VNCR_SHIFT (13)
|
||||
#define ESR_ELx_VNCR (UL(1) << ESR_ELx_VNCR_SHIFT)
|
||||
#define ESR_ELx_CM_SHIFT (8)
|
||||
#define ESR_ELx_CM (UL(1) << ESR_ELx_CM_SHIFT)
|
||||
|
||||
|
||||
@@ -158,6 +158,7 @@ TEST_GEN_PROGS_EXTENDED_x86 += x86/nx_huge_pages_test
|
||||
TEST_GEN_PROGS_arm64 = $(TEST_GEN_PROGS_COMMON)
|
||||
TEST_GEN_PROGS_arm64 += arm64/aarch32_id_regs
|
||||
TEST_GEN_PROGS_arm64 += arm64/arch_timer_edge_cases
|
||||
TEST_GEN_PROGS_arm64 += arm64/at
|
||||
TEST_GEN_PROGS_arm64 += arm64/debug-exceptions
|
||||
TEST_GEN_PROGS_arm64 += arm64/hello_el2
|
||||
TEST_GEN_PROGS_arm64 += arm64/host_sve
|
||||
@@ -165,6 +166,7 @@ TEST_GEN_PROGS_arm64 += arm64/hypercalls
|
||||
TEST_GEN_PROGS_arm64 += arm64/external_aborts
|
||||
TEST_GEN_PROGS_arm64 += arm64/page_fault_test
|
||||
TEST_GEN_PROGS_arm64 += arm64/psci_test
|
||||
TEST_GEN_PROGS_arm64 += arm64/sea_to_user
|
||||
TEST_GEN_PROGS_arm64 += arm64/set_id_regs
|
||||
TEST_GEN_PROGS_arm64 += arm64/smccc_filter
|
||||
TEST_GEN_PROGS_arm64 += arm64/vcpu_width_config
|
||||
|
||||
166
tools/testing/selftests/kvm/arm64/at.c
Normal file
166
tools/testing/selftests/kvm/arm64/at.c
Normal file
@@ -0,0 +1,166 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* at - Test for KVM's AT emulation in the EL2&0 and EL1&0 translation regimes.
|
||||
*/
|
||||
#include "kvm_util.h"
|
||||
#include "processor.h"
|
||||
#include "test_util.h"
|
||||
#include "ucall.h"
|
||||
|
||||
#include <asm/sysreg.h>
|
||||
|
||||
#define TEST_ADDR 0x80000000
|
||||
|
||||
enum {
|
||||
CLEAR_ACCESS_FLAG,
|
||||
TEST_ACCESS_FLAG,
|
||||
};
|
||||
|
||||
static u64 *ptep_hva;
|
||||
|
||||
#define copy_el2_to_el1(reg) \
|
||||
write_sysreg_s(read_sysreg_s(SYS_##reg##_EL1), SYS_##reg##_EL12)
|
||||
|
||||
/* Yes, this is an ugly hack */
|
||||
#define __at(op, addr) write_sysreg_s(addr, op)
|
||||
|
||||
#define test_at_insn(op, expect_fault) \
|
||||
do { \
|
||||
u64 par, fsc; \
|
||||
bool fault; \
|
||||
\
|
||||
GUEST_SYNC(CLEAR_ACCESS_FLAG); \
|
||||
\
|
||||
__at(OP_AT_##op, TEST_ADDR); \
|
||||
isb(); \
|
||||
par = read_sysreg(par_el1); \
|
||||
\
|
||||
fault = par & SYS_PAR_EL1_F; \
|
||||
fsc = FIELD_GET(SYS_PAR_EL1_FST, par); \
|
||||
\
|
||||
__GUEST_ASSERT((expect_fault) == fault, \
|
||||
"AT "#op": %sexpected fault (par: %lx)1", \
|
||||
(expect_fault) ? "" : "un", par); \
|
||||
if ((expect_fault)) { \
|
||||
__GUEST_ASSERT(fsc == ESR_ELx_FSC_ACCESS_L(3), \
|
||||
"AT "#op": expected access flag fault (par: %lx)", \
|
||||
par); \
|
||||
} else { \
|
||||
GUEST_ASSERT_EQ(FIELD_GET(SYS_PAR_EL1_ATTR, par), MAIR_ATTR_NORMAL); \
|
||||
GUEST_ASSERT_EQ(FIELD_GET(SYS_PAR_EL1_SH, par), PTE_SHARED >> 8); \
|
||||
GUEST_ASSERT_EQ(par & SYS_PAR_EL1_PA, TEST_ADDR); \
|
||||
GUEST_SYNC(TEST_ACCESS_FLAG); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static void test_at(bool expect_fault)
|
||||
{
|
||||
test_at_insn(S1E2R, expect_fault);
|
||||
test_at_insn(S1E2W, expect_fault);
|
||||
|
||||
/* Reuse the stage-1 MMU context from EL2 at EL1 */
|
||||
copy_el2_to_el1(SCTLR);
|
||||
copy_el2_to_el1(MAIR);
|
||||
copy_el2_to_el1(TCR);
|
||||
copy_el2_to_el1(TTBR0);
|
||||
copy_el2_to_el1(TTBR1);
|
||||
|
||||
/* Disable stage-2 translation and enter a non-host context */
|
||||
write_sysreg(0, vtcr_el2);
|
||||
write_sysreg(0, vttbr_el2);
|
||||
sysreg_clear_set(hcr_el2, HCR_EL2_TGE | HCR_EL2_VM, 0);
|
||||
isb();
|
||||
|
||||
test_at_insn(S1E1R, expect_fault);
|
||||
test_at_insn(S1E1W, expect_fault);
|
||||
}
|
||||
|
||||
static void guest_code(void)
|
||||
{
|
||||
sysreg_clear_set(tcr_el1, TCR_HA, 0);
|
||||
isb();
|
||||
|
||||
test_at(true);
|
||||
|
||||
if (!SYS_FIELD_GET(ID_AA64MMFR1_EL1, HAFDBS, read_sysreg(id_aa64mmfr1_el1)))
|
||||
GUEST_DONE();
|
||||
|
||||
/*
|
||||
* KVM's software PTW makes the implementation choice that the AT
|
||||
* instruction sets the access flag.
|
||||
*/
|
||||
sysreg_clear_set(tcr_el1, 0, TCR_HA);
|
||||
isb();
|
||||
test_at(false);
|
||||
|
||||
GUEST_DONE();
|
||||
}
|
||||
|
||||
static void handle_sync(struct kvm_vcpu *vcpu, struct ucall *uc)
|
||||
{
|
||||
switch (uc->args[1]) {
|
||||
case CLEAR_ACCESS_FLAG:
|
||||
/*
|
||||
* Delete + reinstall the memslot to invalidate stage-2
|
||||
* mappings of the stage-1 page tables, forcing KVM to
|
||||
* use the 'slow' AT emulation path.
|
||||
*
|
||||
* This and clearing the access flag from host userspace
|
||||
* ensures that the access flag cannot be set speculatively
|
||||
* and is reliably cleared at the time of the AT instruction.
|
||||
*/
|
||||
clear_bit(__ffs(PTE_AF), ptep_hva);
|
||||
vm_mem_region_reload(vcpu->vm, vcpu->vm->memslots[MEM_REGION_PT]);
|
||||
break;
|
||||
case TEST_ACCESS_FLAG:
|
||||
TEST_ASSERT(test_bit(__ffs(PTE_AF), ptep_hva),
|
||||
"Expected access flag to be set (desc: %lu)", *ptep_hva);
|
||||
break;
|
||||
default:
|
||||
TEST_FAIL("Unexpected SYNC arg: %lu", uc->args[1]);
|
||||
}
|
||||
}
|
||||
|
||||
static void run_test(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct ucall uc;
|
||||
|
||||
while (true) {
|
||||
vcpu_run(vcpu);
|
||||
switch (get_ucall(vcpu, &uc)) {
|
||||
case UCALL_DONE:
|
||||
return;
|
||||
case UCALL_SYNC:
|
||||
handle_sync(vcpu, &uc);
|
||||
continue;
|
||||
case UCALL_ABORT:
|
||||
REPORT_GUEST_ASSERT(uc);
|
||||
return;
|
||||
default:
|
||||
TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
struct kvm_vcpu_init init;
|
||||
struct kvm_vcpu *vcpu;
|
||||
struct kvm_vm *vm;
|
||||
|
||||
TEST_REQUIRE(kvm_check_cap(KVM_CAP_ARM_EL2));
|
||||
|
||||
vm = vm_create(1);
|
||||
|
||||
kvm_get_default_vcpu_target(vm, &init);
|
||||
init.features[0] |= BIT(KVM_ARM_VCPU_HAS_EL2);
|
||||
vcpu = aarch64_vcpu_add(vm, 0, &init, guest_code);
|
||||
kvm_arch_vm_finalize_vcpus(vm);
|
||||
|
||||
virt_map(vm, TEST_ADDR, TEST_ADDR, 1);
|
||||
ptep_hva = virt_get_pte_hva_at_level(vm, TEST_ADDR, 3);
|
||||
run_test(vcpu);
|
||||
|
||||
kvm_vm_free(vm);
|
||||
return 0;
|
||||
}
|
||||
331
tools/testing/selftests/kvm/arm64/sea_to_user.c
Normal file
331
tools/testing/selftests/kvm/arm64/sea_to_user.c
Normal file
@@ -0,0 +1,331 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Test KVM returns to userspace with KVM_EXIT_ARM_SEA if host APEI fails
|
||||
* to handle SEA and userspace has opt-ed in KVM_CAP_ARM_SEA_TO_USER.
|
||||
*
|
||||
* After reaching userspace with expected arm_sea info, also test userspace
|
||||
* injecting a synchronous external data abort into the guest.
|
||||
*
|
||||
* This test utilizes EINJ to generate a REAL synchronous external data
|
||||
* abort by consuming a recoverable uncorrectable memory error. Therefore
|
||||
* the device under test must support EINJ in both firmware and host kernel,
|
||||
* including the notrigger feature. Otherwise the test will be skipped.
|
||||
* The under-test platform's APEI should be unable to claim SEA. Otherwise
|
||||
* the test will also be skipped.
|
||||
*/
|
||||
|
||||
#include <signal.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "test_util.h"
|
||||
#include "kvm_util.h"
|
||||
#include "processor.h"
|
||||
#include "guest_modes.h"
|
||||
|
||||
#define PAGE_PRESENT (1ULL << 63)
|
||||
#define PAGE_PHYSICAL 0x007fffffffffffffULL
|
||||
#define PAGE_ADDR_MASK (~(0xfffULL))
|
||||
|
||||
/* Group ISV and ISS[23:14]. */
|
||||
#define ESR_ELx_INST_SYNDROME ((ESR_ELx_ISV) | (ESR_ELx_SAS) | \
|
||||
(ESR_ELx_SSE) | (ESR_ELx_SRT_MASK) | \
|
||||
(ESR_ELx_SF) | (ESR_ELx_AR))
|
||||
|
||||
#define EINJ_ETYPE "/sys/kernel/debug/apei/einj/error_type"
|
||||
#define EINJ_ADDR "/sys/kernel/debug/apei/einj/param1"
|
||||
#define EINJ_MASK "/sys/kernel/debug/apei/einj/param2"
|
||||
#define EINJ_FLAGS "/sys/kernel/debug/apei/einj/flags"
|
||||
#define EINJ_NOTRIGGER "/sys/kernel/debug/apei/einj/notrigger"
|
||||
#define EINJ_DOIT "/sys/kernel/debug/apei/einj/error_inject"
|
||||
/* Memory Uncorrectable non-fatal. */
|
||||
#define ERROR_TYPE_MEMORY_UER 0x10
|
||||
/* Memory address and mask valid (param1 and param2). */
|
||||
#define MASK_MEMORY_UER 0b10
|
||||
|
||||
/* Guest virtual address region = [2G, 3G). */
|
||||
#define START_GVA 0x80000000UL
|
||||
#define VM_MEM_SIZE 0x40000000UL
|
||||
/* Note: EINJ_OFFSET must < VM_MEM_SIZE. */
|
||||
#define EINJ_OFFSET 0x01234badUL
|
||||
#define EINJ_GVA ((START_GVA) + (EINJ_OFFSET))
|
||||
|
||||
static vm_paddr_t einj_gpa;
|
||||
static void *einj_hva;
|
||||
static uint64_t einj_hpa;
|
||||
static bool far_invalid;
|
||||
|
||||
static uint64_t translate_to_host_paddr(unsigned long vaddr)
|
||||
{
|
||||
uint64_t pinfo;
|
||||
int64_t offset = vaddr / getpagesize() * sizeof(pinfo);
|
||||
int fd;
|
||||
uint64_t page_addr;
|
||||
uint64_t paddr;
|
||||
|
||||
fd = open("/proc/self/pagemap", O_RDONLY);
|
||||
if (fd < 0)
|
||||
ksft_exit_fail_perror("Failed to open /proc/self/pagemap");
|
||||
if (pread(fd, &pinfo, sizeof(pinfo), offset) != sizeof(pinfo)) {
|
||||
close(fd);
|
||||
ksft_exit_fail_perror("Failed to read /proc/self/pagemap");
|
||||
}
|
||||
|
||||
close(fd);
|
||||
|
||||
if ((pinfo & PAGE_PRESENT) == 0)
|
||||
ksft_exit_fail_perror("Page not present");
|
||||
|
||||
page_addr = (pinfo & PAGE_PHYSICAL) << MIN_PAGE_SHIFT;
|
||||
paddr = page_addr + (vaddr & (getpagesize() - 1));
|
||||
return paddr;
|
||||
}
|
||||
|
||||
static void write_einj_entry(const char *einj_path, uint64_t val)
|
||||
{
|
||||
char cmd[256] = {0};
|
||||
FILE *cmdfile = NULL;
|
||||
|
||||
sprintf(cmd, "echo %#lx > %s", val, einj_path);
|
||||
cmdfile = popen(cmd, "r");
|
||||
|
||||
if (pclose(cmdfile) == 0)
|
||||
ksft_print_msg("echo %#lx > %s - done\n", val, einj_path);
|
||||
else
|
||||
ksft_exit_fail_perror("Failed to write EINJ entry");
|
||||
}
|
||||
|
||||
static void inject_uer(uint64_t paddr)
|
||||
{
|
||||
if (access("/sys/firmware/acpi/tables/EINJ", R_OK) == -1)
|
||||
ksft_test_result_skip("EINJ table no available in firmware");
|
||||
|
||||
if (access(EINJ_ETYPE, R_OK | W_OK) == -1)
|
||||
ksft_test_result_skip("EINJ module probably not loaded?");
|
||||
|
||||
write_einj_entry(EINJ_ETYPE, ERROR_TYPE_MEMORY_UER);
|
||||
write_einj_entry(EINJ_FLAGS, MASK_MEMORY_UER);
|
||||
write_einj_entry(EINJ_ADDR, paddr);
|
||||
write_einj_entry(EINJ_MASK, ~0x0UL);
|
||||
write_einj_entry(EINJ_NOTRIGGER, 1);
|
||||
write_einj_entry(EINJ_DOIT, 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* When host APEI successfully claims the SEA caused by guest_code, kernel
|
||||
* will send SIGBUS signal with BUS_MCEERR_AR to test thread.
|
||||
*
|
||||
* We set up this SIGBUS handler to skip the test for that case.
|
||||
*/
|
||||
static void sigbus_signal_handler(int sig, siginfo_t *si, void *v)
|
||||
{
|
||||
ksft_print_msg("SIGBUS (%d) received, dumping siginfo...\n", sig);
|
||||
ksft_print_msg("si_signo=%d, si_errno=%d, si_code=%d, si_addr=%p\n",
|
||||
si->si_signo, si->si_errno, si->si_code, si->si_addr);
|
||||
if (si->si_code == BUS_MCEERR_AR)
|
||||
ksft_test_result_skip("SEA is claimed by host APEI\n");
|
||||
else
|
||||
ksft_test_result_fail("Exit with signal unhandled\n");
|
||||
|
||||
exit(0);
|
||||
}
|
||||
|
||||
static void setup_sigbus_handler(void)
|
||||
{
|
||||
struct sigaction act;
|
||||
|
||||
memset(&act, 0, sizeof(act));
|
||||
sigemptyset(&act.sa_mask);
|
||||
act.sa_sigaction = sigbus_signal_handler;
|
||||
act.sa_flags = SA_SIGINFO;
|
||||
TEST_ASSERT(sigaction(SIGBUS, &act, NULL) == 0,
|
||||
"Failed to setup SIGBUS handler");
|
||||
}
|
||||
|
||||
static void guest_code(void)
|
||||
{
|
||||
uint64_t guest_data;
|
||||
|
||||
/* Consumes error will cause a SEA. */
|
||||
guest_data = *(uint64_t *)EINJ_GVA;
|
||||
|
||||
GUEST_FAIL("Poison not protected by SEA: gva=%#lx, guest_data=%#lx\n",
|
||||
EINJ_GVA, guest_data);
|
||||
}
|
||||
|
||||
static void expect_sea_handler(struct ex_regs *regs)
|
||||
{
|
||||
u64 esr = read_sysreg(esr_el1);
|
||||
u64 far = read_sysreg(far_el1);
|
||||
bool expect_far_invalid = far_invalid;
|
||||
|
||||
GUEST_PRINTF("Handling Guest SEA\n");
|
||||
GUEST_PRINTF("ESR_EL1=%#lx, FAR_EL1=%#lx\n", esr, far);
|
||||
|
||||
GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR);
|
||||
GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT);
|
||||
|
||||
if (expect_far_invalid) {
|
||||
GUEST_ASSERT_EQ(esr & ESR_ELx_FnV, ESR_ELx_FnV);
|
||||
GUEST_PRINTF("Guest observed garbage value in FAR\n");
|
||||
} else {
|
||||
GUEST_ASSERT_EQ(esr & ESR_ELx_FnV, 0);
|
||||
GUEST_ASSERT_EQ(far, EINJ_GVA);
|
||||
}
|
||||
|
||||
GUEST_DONE();
|
||||
}
|
||||
|
||||
static void vcpu_inject_sea(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvm_vcpu_events events = {};
|
||||
|
||||
events.exception.ext_dabt_pending = true;
|
||||
vcpu_events_set(vcpu, &events);
|
||||
}
|
||||
|
||||
static void run_vm(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct ucall uc;
|
||||
bool guest_done = false;
|
||||
struct kvm_run *run = vcpu->run;
|
||||
u64 esr;
|
||||
|
||||
/* Resume the vCPU after error injection to consume the error. */
|
||||
vcpu_run(vcpu);
|
||||
|
||||
ksft_print_msg("Dump kvm_run info about KVM_EXIT_%s\n",
|
||||
exit_reason_str(run->exit_reason));
|
||||
ksft_print_msg("kvm_run.arm_sea: esr=%#llx, flags=%#llx\n",
|
||||
run->arm_sea.esr, run->arm_sea.flags);
|
||||
ksft_print_msg("kvm_run.arm_sea: gva=%#llx, gpa=%#llx\n",
|
||||
run->arm_sea.gva, run->arm_sea.gpa);
|
||||
|
||||
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_SEA);
|
||||
|
||||
esr = run->arm_sea.esr;
|
||||
TEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_LOW);
|
||||
TEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT);
|
||||
TEST_ASSERT_EQ(ESR_ELx_ISS2(esr), 0);
|
||||
TEST_ASSERT_EQ((esr & ESR_ELx_INST_SYNDROME), 0);
|
||||
TEST_ASSERT_EQ(esr & ESR_ELx_VNCR, 0);
|
||||
|
||||
if (!(esr & ESR_ELx_FnV)) {
|
||||
ksft_print_msg("Expect gva to match given FnV bit is 0\n");
|
||||
TEST_ASSERT_EQ(run->arm_sea.gva, EINJ_GVA);
|
||||
}
|
||||
|
||||
if (run->arm_sea.flags & KVM_EXIT_ARM_SEA_FLAG_GPA_VALID) {
|
||||
ksft_print_msg("Expect gpa to match given KVM_EXIT_ARM_SEA_FLAG_GPA_VALID is set\n");
|
||||
TEST_ASSERT_EQ(run->arm_sea.gpa, einj_gpa & PAGE_ADDR_MASK);
|
||||
}
|
||||
|
||||
far_invalid = esr & ESR_ELx_FnV;
|
||||
|
||||
/* Inject a SEA into guest and expect handled in SEA handler. */
|
||||
vcpu_inject_sea(vcpu);
|
||||
|
||||
/* Expect the guest to reach GUEST_DONE gracefully. */
|
||||
do {
|
||||
vcpu_run(vcpu);
|
||||
switch (get_ucall(vcpu, &uc)) {
|
||||
case UCALL_PRINTF:
|
||||
ksft_print_msg("From guest: %s", uc.buffer);
|
||||
break;
|
||||
case UCALL_DONE:
|
||||
ksft_print_msg("Guest done gracefully!\n");
|
||||
guest_done = 1;
|
||||
break;
|
||||
case UCALL_ABORT:
|
||||
ksft_print_msg("Guest aborted!\n");
|
||||
guest_done = 1;
|
||||
REPORT_GUEST_ASSERT(uc);
|
||||
break;
|
||||
default:
|
||||
TEST_FAIL("Unexpected ucall: %lu\n", uc.cmd);
|
||||
}
|
||||
} while (!guest_done);
|
||||
}
|
||||
|
||||
static struct kvm_vm *vm_create_with_sea_handler(struct kvm_vcpu **vcpu)
|
||||
{
|
||||
size_t backing_page_size;
|
||||
size_t guest_page_size;
|
||||
size_t alignment;
|
||||
uint64_t num_guest_pages;
|
||||
vm_paddr_t start_gpa;
|
||||
enum vm_mem_backing_src_type src_type = VM_MEM_SRC_ANONYMOUS_HUGETLB_1GB;
|
||||
struct kvm_vm *vm;
|
||||
|
||||
backing_page_size = get_backing_src_pagesz(src_type);
|
||||
guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size;
|
||||
alignment = max(backing_page_size, guest_page_size);
|
||||
num_guest_pages = VM_MEM_SIZE / guest_page_size;
|
||||
|
||||
vm = __vm_create_with_one_vcpu(vcpu, num_guest_pages, guest_code);
|
||||
vm_init_descriptor_tables(vm);
|
||||
vcpu_init_descriptor_tables(*vcpu);
|
||||
|
||||
vm_install_sync_handler(vm,
|
||||
/*vector=*/VECTOR_SYNC_CURRENT,
|
||||
/*ec=*/ESR_ELx_EC_DABT_CUR,
|
||||
/*handler=*/expect_sea_handler);
|
||||
|
||||
start_gpa = (vm->max_gfn - num_guest_pages) * guest_page_size;
|
||||
start_gpa = align_down(start_gpa, alignment);
|
||||
|
||||
vm_userspace_mem_region_add(
|
||||
/*vm=*/vm,
|
||||
/*src_type=*/src_type,
|
||||
/*guest_paddr=*/start_gpa,
|
||||
/*slot=*/1,
|
||||
/*npages=*/num_guest_pages,
|
||||
/*flags=*/0);
|
||||
|
||||
virt_map(vm, START_GVA, start_gpa, num_guest_pages);
|
||||
|
||||
ksft_print_msg("Mapped %#lx pages: gva=%#lx to gpa=%#lx\n",
|
||||
num_guest_pages, START_GVA, start_gpa);
|
||||
return vm;
|
||||
}
|
||||
|
||||
static void vm_inject_memory_uer(struct kvm_vm *vm)
|
||||
{
|
||||
uint64_t guest_data;
|
||||
|
||||
einj_gpa = addr_gva2gpa(vm, EINJ_GVA);
|
||||
einj_hva = addr_gva2hva(vm, EINJ_GVA);
|
||||
|
||||
/* Populate certain data before injecting UER. */
|
||||
*(uint64_t *)einj_hva = 0xBAADCAFE;
|
||||
guest_data = *(uint64_t *)einj_hva;
|
||||
ksft_print_msg("Before EINJect: data=%#lx\n",
|
||||
guest_data);
|
||||
|
||||
einj_hpa = translate_to_host_paddr((unsigned long)einj_hva);
|
||||
|
||||
ksft_print_msg("EINJ_GVA=%#lx, einj_gpa=%#lx, einj_hva=%p, einj_hpa=%#lx\n",
|
||||
EINJ_GVA, einj_gpa, einj_hva, einj_hpa);
|
||||
|
||||
inject_uer(einj_hpa);
|
||||
ksft_print_msg("Memory UER EINJected\n");
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
struct kvm_vm *vm;
|
||||
struct kvm_vcpu *vcpu;
|
||||
|
||||
TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SEA_TO_USER));
|
||||
|
||||
setup_sigbus_handler();
|
||||
|
||||
vm = vm_create_with_sea_handler(&vcpu);
|
||||
vm_enable_cap(vm, KVM_CAP_ARM_SEA_TO_USER, 0);
|
||||
vm_inject_memory_uer(vm);
|
||||
run_vm(vm, vcpu);
|
||||
kvm_vm_free(vm);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -29,6 +29,7 @@ struct test_args {
|
||||
bool level_sensitive; /* 1 is level, 0 is edge */
|
||||
int kvm_max_routes; /* output of KVM_CAP_IRQ_ROUTING */
|
||||
bool kvm_supports_irqfd; /* output of KVM_CAP_IRQFD */
|
||||
uint32_t shared_data;
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -205,7 +206,7 @@ static void kvm_inject_call(kvm_inject_cmd cmd, uint32_t first_intid,
|
||||
do { \
|
||||
uint32_t _intid; \
|
||||
_intid = gic_get_and_ack_irq(); \
|
||||
GUEST_ASSERT(_intid == 0 || _intid == IAR_SPURIOUS); \
|
||||
GUEST_ASSERT(_intid == IAR_SPURIOUS); \
|
||||
} while (0)
|
||||
|
||||
#define CAT_HELPER(a, b) a ## b
|
||||
@@ -359,8 +360,9 @@ static uint32_t wait_for_and_activate_irq(void)
|
||||
* interrupts for the whole test.
|
||||
*/
|
||||
static void test_inject_preemption(struct test_args *args,
|
||||
uint32_t first_intid, int num,
|
||||
kvm_inject_cmd cmd)
|
||||
uint32_t first_intid, int num,
|
||||
const unsigned long *exclude,
|
||||
kvm_inject_cmd cmd)
|
||||
{
|
||||
uint32_t intid, prio, step = KVM_PRIO_STEPS;
|
||||
int i;
|
||||
@@ -379,6 +381,10 @@ static void test_inject_preemption(struct test_args *args,
|
||||
for (i = 0; i < num; i++) {
|
||||
uint32_t tmp;
|
||||
intid = i + first_intid;
|
||||
|
||||
if (exclude && test_bit(i, exclude))
|
||||
continue;
|
||||
|
||||
KVM_INJECT(cmd, intid);
|
||||
/* Each successive IRQ will preempt the previous one. */
|
||||
tmp = wait_for_and_activate_irq();
|
||||
@@ -390,15 +396,33 @@ static void test_inject_preemption(struct test_args *args,
|
||||
/* finish handling the IRQs starting with the highest priority one. */
|
||||
for (i = 0; i < num; i++) {
|
||||
intid = num - i - 1 + first_intid;
|
||||
|
||||
if (exclude && test_bit(intid - first_intid, exclude))
|
||||
continue;
|
||||
|
||||
gic_set_eoi(intid);
|
||||
if (args->eoi_split)
|
||||
gic_set_dir(intid);
|
||||
}
|
||||
|
||||
if (args->eoi_split) {
|
||||
for (i = 0; i < num; i++) {
|
||||
intid = i + first_intid;
|
||||
|
||||
if (exclude && test_bit(i, exclude))
|
||||
continue;
|
||||
|
||||
if (args->eoi_split)
|
||||
gic_set_dir(intid);
|
||||
}
|
||||
}
|
||||
|
||||
local_irq_enable();
|
||||
|
||||
for (i = 0; i < num; i++)
|
||||
for (i = 0; i < num; i++) {
|
||||
if (exclude && test_bit(i, exclude))
|
||||
continue;
|
||||
|
||||
GUEST_ASSERT(!gic_irq_get_active(i + first_intid));
|
||||
}
|
||||
GUEST_ASSERT_EQ(gic_read_ap1r0(), 0);
|
||||
GUEST_ASSERT_IAR_EMPTY();
|
||||
|
||||
@@ -436,33 +460,32 @@ static void test_injection_failure(struct test_args *args,
|
||||
|
||||
static void test_preemption(struct test_args *args, struct kvm_inject_desc *f)
|
||||
{
|
||||
/*
|
||||
* Test up to 4 levels of preemption. The reason is that KVM doesn't
|
||||
* currently implement the ability to have more than the number-of-LRs
|
||||
* number of concurrently active IRQs. The number of LRs implemented is
|
||||
* IMPLEMENTATION DEFINED, however, it seems that most implement 4.
|
||||
*/
|
||||
/* Timer PPIs cannot be injected from userspace */
|
||||
static const unsigned long ppi_exclude = (BIT(27 - MIN_PPI) |
|
||||
BIT(30 - MIN_PPI) |
|
||||
BIT(28 - MIN_PPI) |
|
||||
BIT(26 - MIN_PPI));
|
||||
|
||||
if (f->sgi)
|
||||
test_inject_preemption(args, MIN_SGI, 4, f->cmd);
|
||||
test_inject_preemption(args, MIN_SGI, 16, NULL, f->cmd);
|
||||
|
||||
if (f->ppi)
|
||||
test_inject_preemption(args, MIN_PPI, 4, f->cmd);
|
||||
test_inject_preemption(args, MIN_PPI, 16, &ppi_exclude, f->cmd);
|
||||
|
||||
if (f->spi)
|
||||
test_inject_preemption(args, MIN_SPI, 4, f->cmd);
|
||||
test_inject_preemption(args, MIN_SPI, 31, NULL, f->cmd);
|
||||
}
|
||||
|
||||
static void test_restore_active(struct test_args *args, struct kvm_inject_desc *f)
|
||||
{
|
||||
/* Test up to 4 active IRQs. Same reason as in test_preemption. */
|
||||
if (f->sgi)
|
||||
guest_restore_active(args, MIN_SGI, 4, f->cmd);
|
||||
guest_restore_active(args, MIN_SGI, 16, f->cmd);
|
||||
|
||||
if (f->ppi)
|
||||
guest_restore_active(args, MIN_PPI, 4, f->cmd);
|
||||
guest_restore_active(args, MIN_PPI, 16, f->cmd);
|
||||
|
||||
if (f->spi)
|
||||
guest_restore_active(args, MIN_SPI, 4, f->cmd);
|
||||
guest_restore_active(args, MIN_SPI, 31, f->cmd);
|
||||
}
|
||||
|
||||
static void guest_code(struct test_args *args)
|
||||
@@ -473,12 +496,12 @@ static void guest_code(struct test_args *args)
|
||||
|
||||
gic_init(GIC_V3, 1);
|
||||
|
||||
for (i = 0; i < nr_irqs; i++)
|
||||
gic_irq_enable(i);
|
||||
|
||||
for (i = MIN_SPI; i < nr_irqs; i++)
|
||||
gic_irq_set_config(i, !level_sensitive);
|
||||
|
||||
for (i = 0; i < nr_irqs; i++)
|
||||
gic_irq_enable(i);
|
||||
|
||||
gic_set_eoi_split(args->eoi_split);
|
||||
|
||||
reset_priorities(args);
|
||||
@@ -779,6 +802,221 @@ done:
|
||||
kvm_vm_free(vm);
|
||||
}
|
||||
|
||||
static void guest_code_asym_dir(struct test_args *args, int cpuid)
|
||||
{
|
||||
gic_init(GIC_V3, 2);
|
||||
|
||||
gic_set_eoi_split(1);
|
||||
gic_set_priority_mask(CPU_PRIO_MASK);
|
||||
|
||||
if (cpuid == 0) {
|
||||
uint32_t intid;
|
||||
|
||||
local_irq_disable();
|
||||
|
||||
gic_set_priority(MIN_PPI, IRQ_DEFAULT_PRIO);
|
||||
gic_irq_enable(MIN_SPI);
|
||||
gic_irq_set_pending(MIN_SPI);
|
||||
|
||||
intid = wait_for_and_activate_irq();
|
||||
GUEST_ASSERT_EQ(intid, MIN_SPI);
|
||||
|
||||
gic_set_eoi(intid);
|
||||
isb();
|
||||
|
||||
WRITE_ONCE(args->shared_data, MIN_SPI);
|
||||
dsb(ishst);
|
||||
|
||||
do {
|
||||
dsb(ishld);
|
||||
} while (READ_ONCE(args->shared_data) == MIN_SPI);
|
||||
GUEST_ASSERT(!gic_irq_get_active(MIN_SPI));
|
||||
} else {
|
||||
do {
|
||||
dsb(ishld);
|
||||
} while (READ_ONCE(args->shared_data) != MIN_SPI);
|
||||
|
||||
gic_set_dir(MIN_SPI);
|
||||
isb();
|
||||
|
||||
WRITE_ONCE(args->shared_data, 0);
|
||||
dsb(ishst);
|
||||
}
|
||||
|
||||
GUEST_DONE();
|
||||
}
|
||||
|
||||
static void guest_code_group_en(struct test_args *args, int cpuid)
|
||||
{
|
||||
uint32_t intid;
|
||||
|
||||
gic_init(GIC_V3, 2);
|
||||
|
||||
gic_set_eoi_split(0);
|
||||
gic_set_priority_mask(CPU_PRIO_MASK);
|
||||
/* SGI0 is G0, which is disabled */
|
||||
gic_irq_set_group(0, 0);
|
||||
|
||||
/* Configure all SGIs with decreasing priority */
|
||||
for (intid = 0; intid < MIN_PPI; intid++) {
|
||||
gic_set_priority(intid, (intid + 1) * 8);
|
||||
gic_irq_enable(intid);
|
||||
gic_irq_set_pending(intid);
|
||||
}
|
||||
|
||||
/* Ack and EOI all G1 interrupts */
|
||||
for (int i = 1; i < MIN_PPI; i++) {
|
||||
intid = wait_for_and_activate_irq();
|
||||
|
||||
GUEST_ASSERT(intid < MIN_PPI);
|
||||
gic_set_eoi(intid);
|
||||
isb();
|
||||
}
|
||||
|
||||
/*
|
||||
* Check that SGI0 is still pending, inactive, and that we cannot
|
||||
* ack anything.
|
||||
*/
|
||||
GUEST_ASSERT(gic_irq_get_pending(0));
|
||||
GUEST_ASSERT(!gic_irq_get_active(0));
|
||||
GUEST_ASSERT_IAR_EMPTY();
|
||||
GUEST_ASSERT(read_sysreg_s(SYS_ICC_IAR0_EL1) == IAR_SPURIOUS);
|
||||
|
||||
/* Open the G0 gates, and verify we can ack SGI0 */
|
||||
write_sysreg_s(1, SYS_ICC_IGRPEN0_EL1);
|
||||
isb();
|
||||
|
||||
do {
|
||||
intid = read_sysreg_s(SYS_ICC_IAR0_EL1);
|
||||
} while (intid == IAR_SPURIOUS);
|
||||
|
||||
GUEST_ASSERT(intid == 0);
|
||||
GUEST_DONE();
|
||||
}
|
||||
|
||||
static void guest_code_timer_spi(struct test_args *args, int cpuid)
|
||||
{
|
||||
uint32_t intid;
|
||||
u64 val;
|
||||
|
||||
gic_init(GIC_V3, 2);
|
||||
|
||||
gic_set_eoi_split(1);
|
||||
gic_set_priority_mask(CPU_PRIO_MASK);
|
||||
|
||||
/* Add a pending SPI so that KVM starts trapping DIR */
|
||||
gic_set_priority(MIN_SPI + cpuid, IRQ_DEFAULT_PRIO);
|
||||
gic_irq_set_pending(MIN_SPI + cpuid);
|
||||
|
||||
/* Configure the timer with a higher priority, make it pending */
|
||||
gic_set_priority(27, IRQ_DEFAULT_PRIO - 8);
|
||||
|
||||
isb();
|
||||
val = read_sysreg(cntvct_el0);
|
||||
write_sysreg(val, cntv_cval_el0);
|
||||
write_sysreg(1, cntv_ctl_el0);
|
||||
isb();
|
||||
|
||||
GUEST_ASSERT(gic_irq_get_pending(27));
|
||||
|
||||
/* Enable both interrupts */
|
||||
gic_irq_enable(MIN_SPI + cpuid);
|
||||
gic_irq_enable(27);
|
||||
|
||||
/* The timer must fire */
|
||||
intid = wait_for_and_activate_irq();
|
||||
GUEST_ASSERT(intid == 27);
|
||||
|
||||
/* Check that we can deassert it */
|
||||
write_sysreg(0, cntv_ctl_el0);
|
||||
isb();
|
||||
|
||||
GUEST_ASSERT(!gic_irq_get_pending(27));
|
||||
|
||||
/*
|
||||
* Priority drop, deactivation -- we expect that the host
|
||||
* deactivation will have been effective
|
||||
*/
|
||||
gic_set_eoi(27);
|
||||
gic_set_dir(27);
|
||||
|
||||
GUEST_ASSERT(!gic_irq_get_active(27));
|
||||
|
||||
/* Do it one more time */
|
||||
isb();
|
||||
val = read_sysreg(cntvct_el0);
|
||||
write_sysreg(val, cntv_cval_el0);
|
||||
write_sysreg(1, cntv_ctl_el0);
|
||||
isb();
|
||||
|
||||
GUEST_ASSERT(gic_irq_get_pending(27));
|
||||
|
||||
/* The timer must fire again */
|
||||
intid = wait_for_and_activate_irq();
|
||||
GUEST_ASSERT(intid == 27);
|
||||
|
||||
GUEST_DONE();
|
||||
}
|
||||
|
||||
static void *test_vcpu_run(void *arg)
|
||||
{
|
||||
struct kvm_vcpu *vcpu = arg;
|
||||
struct ucall uc;
|
||||
|
||||
while (1) {
|
||||
vcpu_run(vcpu);
|
||||
|
||||
switch (get_ucall(vcpu, &uc)) {
|
||||
case UCALL_ABORT:
|
||||
REPORT_GUEST_ASSERT(uc);
|
||||
break;
|
||||
case UCALL_DONE:
|
||||
return NULL;
|
||||
default:
|
||||
TEST_FAIL("Unknown ucall %lu", uc.cmd);
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void test_vgic_two_cpus(void *gcode)
|
||||
{
|
||||
pthread_t thr[2];
|
||||
struct kvm_vcpu *vcpus[2];
|
||||
struct test_args args = {};
|
||||
struct kvm_vm *vm;
|
||||
vm_vaddr_t args_gva;
|
||||
int gic_fd, ret;
|
||||
|
||||
vm = vm_create_with_vcpus(2, gcode, vcpus);
|
||||
|
||||
vm_init_descriptor_tables(vm);
|
||||
vcpu_init_descriptor_tables(vcpus[0]);
|
||||
vcpu_init_descriptor_tables(vcpus[1]);
|
||||
|
||||
/* Setup the guest args page (so it gets the args). */
|
||||
args_gva = vm_vaddr_alloc_page(vm);
|
||||
memcpy(addr_gva2hva(vm, args_gva), &args, sizeof(args));
|
||||
vcpu_args_set(vcpus[0], 2, args_gva, 0);
|
||||
vcpu_args_set(vcpus[1], 2, args_gva, 1);
|
||||
|
||||
gic_fd = vgic_v3_setup(vm, 2, 64);
|
||||
|
||||
ret = pthread_create(&thr[0], NULL, test_vcpu_run, vcpus[0]);
|
||||
if (ret)
|
||||
TEST_FAIL("Can't create thread for vcpu 0 (%d)\n", ret);
|
||||
ret = pthread_create(&thr[1], NULL, test_vcpu_run, vcpus[1]);
|
||||
if (ret)
|
||||
TEST_FAIL("Can't create thread for vcpu 1 (%d)\n", ret);
|
||||
|
||||
pthread_join(thr[0], NULL);
|
||||
pthread_join(thr[1], NULL);
|
||||
|
||||
close(gic_fd);
|
||||
kvm_vm_free(vm);
|
||||
}
|
||||
|
||||
static void help(const char *name)
|
||||
{
|
||||
printf(
|
||||
@@ -835,6 +1073,9 @@ int main(int argc, char **argv)
|
||||
test_vgic(nr_irqs, false /* level */, true /* eoi_split */);
|
||||
test_vgic(nr_irqs, true /* level */, false /* eoi_split */);
|
||||
test_vgic(nr_irqs, true /* level */, true /* eoi_split */);
|
||||
test_vgic_two_cpus(guest_code_asym_dir);
|
||||
test_vgic_two_cpus(guest_code_group_en);
|
||||
test_vgic_two_cpus(guest_code_timer_spi);
|
||||
} else {
|
||||
test_vgic(nr_irqs, level_sensitive, eoi_split);
|
||||
}
|
||||
|
||||
@@ -118,6 +118,10 @@ static void guest_setup_gic(void)
|
||||
|
||||
guest_setup_its_mappings();
|
||||
guest_invalidate_all_rdists();
|
||||
|
||||
/* SYNC to ensure ITS setup is complete */
|
||||
for (cpuid = 0; cpuid < test_data.nr_cpus; cpuid++)
|
||||
its_send_sync_cmd(test_data.cmdq_base_va, cpuid);
|
||||
}
|
||||
|
||||
static void guest_code(size_t nr_lpis)
|
||||
|
||||
@@ -57,6 +57,7 @@ void gic_irq_set_pending(unsigned int intid);
|
||||
void gic_irq_clear_pending(unsigned int intid);
|
||||
bool gic_irq_get_pending(unsigned int intid);
|
||||
void gic_irq_set_config(unsigned int intid, bool is_edge);
|
||||
void gic_irq_set_group(unsigned int intid, bool group);
|
||||
|
||||
void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size,
|
||||
vm_paddr_t pend_table);
|
||||
|
||||
@@ -15,5 +15,6 @@ void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool val
|
||||
void its_send_mapti_cmd(void *cmdq_base, u32 device_id, u32 event_id,
|
||||
u32 collection_id, u32 intid);
|
||||
void its_send_invall_cmd(void *cmdq_base, u32 collection_id);
|
||||
void its_send_sync_cmd(void *cmdq_base, u32 vcpu_id);
|
||||
|
||||
#endif // __SELFTESTS_GIC_V3_ITS_H__
|
||||
|
||||
@@ -688,6 +688,7 @@ static inline bool vm_arch_has_protected_memory(struct kvm_vm *vm)
|
||||
#endif
|
||||
|
||||
void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
|
||||
void vm_mem_region_reload(struct kvm_vm *vm, uint32_t slot);
|
||||
void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
|
||||
void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
|
||||
struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id);
|
||||
|
||||
@@ -155,3 +155,9 @@ void gic_irq_set_config(unsigned int intid, bool is_edge)
|
||||
GUEST_ASSERT(gic_common_ops);
|
||||
gic_common_ops->gic_irq_set_config(intid, is_edge);
|
||||
}
|
||||
|
||||
void gic_irq_set_group(unsigned int intid, bool group)
|
||||
{
|
||||
GUEST_ASSERT(gic_common_ops);
|
||||
gic_common_ops->gic_irq_set_group(intid, group);
|
||||
}
|
||||
|
||||
@@ -25,6 +25,7 @@ struct gic_common_ops {
|
||||
void (*gic_irq_clear_pending)(uint32_t intid);
|
||||
bool (*gic_irq_get_pending)(uint32_t intid);
|
||||
void (*gic_irq_set_config)(uint32_t intid, bool is_edge);
|
||||
void (*gic_irq_set_group)(uint32_t intid, bool group);
|
||||
};
|
||||
|
||||
extern const struct gic_common_ops gicv3_ops;
|
||||
|
||||
@@ -293,17 +293,36 @@ static void gicv3_enable_redist(volatile void *redist_base)
|
||||
}
|
||||
}
|
||||
|
||||
static void gicv3_set_group(uint32_t intid, bool grp)
|
||||
{
|
||||
uint32_t cpu_or_dist;
|
||||
uint32_t val;
|
||||
|
||||
cpu_or_dist = (get_intid_range(intid) == SPI_RANGE) ? DIST_BIT : guest_get_vcpuid();
|
||||
val = gicv3_reg_readl(cpu_or_dist, GICD_IGROUPR + (intid / 32) * 4);
|
||||
if (grp)
|
||||
val |= BIT(intid % 32);
|
||||
else
|
||||
val &= ~BIT(intid % 32);
|
||||
gicv3_reg_writel(cpu_or_dist, GICD_IGROUPR + (intid / 32) * 4, val);
|
||||
}
|
||||
|
||||
static void gicv3_cpu_init(unsigned int cpu)
|
||||
{
|
||||
volatile void *sgi_base;
|
||||
unsigned int i;
|
||||
volatile void *redist_base_cpu;
|
||||
u64 typer;
|
||||
|
||||
GUEST_ASSERT(cpu < gicv3_data.nr_cpus);
|
||||
|
||||
redist_base_cpu = gicr_base_cpu(cpu);
|
||||
sgi_base = sgi_base_from_redist(redist_base_cpu);
|
||||
|
||||
/* Verify assumption that GICR_TYPER.Processor_number == cpu */
|
||||
typer = readq_relaxed(redist_base_cpu + GICR_TYPER);
|
||||
GUEST_ASSERT_EQ(GICR_TYPER_CPU_NUMBER(typer), cpu);
|
||||
|
||||
gicv3_enable_redist(redist_base_cpu);
|
||||
|
||||
/*
|
||||
@@ -328,6 +347,8 @@ static void gicv3_cpu_init(unsigned int cpu)
|
||||
/* Set a default priority threshold */
|
||||
write_sysreg_s(ICC_PMR_DEF_PRIO, SYS_ICC_PMR_EL1);
|
||||
|
||||
/* Disable Group-0 interrupts */
|
||||
write_sysreg_s(ICC_IGRPEN0_EL1_MASK, SYS_ICC_IGRPEN1_EL1);
|
||||
/* Enable non-secure Group-1 interrupts */
|
||||
write_sysreg_s(ICC_IGRPEN1_EL1_MASK, SYS_ICC_IGRPEN1_EL1);
|
||||
}
|
||||
@@ -400,6 +421,7 @@ const struct gic_common_ops gicv3_ops = {
|
||||
.gic_irq_clear_pending = gicv3_irq_clear_pending,
|
||||
.gic_irq_get_pending = gicv3_irq_get_pending,
|
||||
.gic_irq_set_config = gicv3_irq_set_config,
|
||||
.gic_irq_set_group = gicv3_set_group,
|
||||
};
|
||||
|
||||
void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size,
|
||||
|
||||
@@ -253,3 +253,13 @@ void its_send_invall_cmd(void *cmdq_base, u32 collection_id)
|
||||
|
||||
its_send_cmd(cmdq_base, &cmd);
|
||||
}
|
||||
|
||||
void its_send_sync_cmd(void *cmdq_base, u32 vcpu_id)
|
||||
{
|
||||
struct its_cmd_block cmd = {};
|
||||
|
||||
its_encode_cmd(&cmd, GITS_CMD_SYNC);
|
||||
its_encode_target(&cmd, procnum_to_rdbase(vcpu_id));
|
||||
|
||||
its_send_cmd(cmdq_base, &cmd);
|
||||
}
|
||||
|
||||
@@ -1184,6 +1184,16 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags)
|
||||
ret, errno, slot, flags);
|
||||
}
|
||||
|
||||
void vm_mem_region_reload(struct kvm_vm *vm, uint32_t slot)
|
||||
{
|
||||
struct userspace_mem_region *region = memslot2region(vm, slot);
|
||||
struct kvm_userspace_memory_region2 tmp = region->region;
|
||||
|
||||
tmp.memory_size = 0;
|
||||
vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, &tmp);
|
||||
vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region);
|
||||
}
|
||||
|
||||
/*
|
||||
* VM Memory Region Move
|
||||
*
|
||||
@@ -2005,6 +2015,7 @@ static struct exit_reason {
|
||||
KVM_EXIT_STRING(NOTIFY),
|
||||
KVM_EXIT_STRING(LOONGARCH_IOCSR),
|
||||
KVM_EXIT_STRING(MEMORY_FAULT),
|
||||
KVM_EXIT_STRING(ARM_SEA),
|
||||
};
|
||||
|
||||
/*
|
||||
|
||||
Reference in New Issue
Block a user