From 250be9d61cf8898b1eea140fa31fe7713c49e989 Mon Sep 17 00:00:00 2001 From: Shanker Donthineni Date: Mon, 19 Feb 2018 09:38:07 -0600 Subject: KVM: arm/arm64: No need to zero CNTVOFF in kvm_timer_vcpu_put() for VHE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In AArch64/AArch32, the virtual counter uses a fixed virtual offset of zero in the following situations as per ARMv8 specifications: 1) HCR_EL2.E2H is 1, and CNTVCT_EL0/CNTVCT are read from EL2. 2) HCR_EL2.{E2H, TGE} is {1, 1}, and either: — CNTVCT_EL0 is read from Non-secure EL0 or EL2. — CNTVCT is read from Non-secure EL0. So, no need to zero CNTVOFF_EL2/CNTVOFF for VHE case. Acked-by: Marc Zyngier Acked-by: Christoffer Dall Signed-off-by: Shanker Donthineni Signed-off-by: Christoffer Dall --- virt/kvm/arm/arch_timer.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 70268c0bec79..86eca32474cc 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -541,9 +541,11 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) * The kernel may decide to run userspace after calling vcpu_put, so * we reset cntvoff to 0 to ensure a consistent read between user * accesses to the virtual counter and kernel access to the physical - * counter. + * counter of non-VHE case. For VHE, the virtual counter uses a fixed + * virtual offset of zero, so no need to zero CNTVOFF_EL2 register. */ - set_cntvoff(0); + if (!has_vhe()) + set_cntvoff(0); } /* -- cgit v1.2.3 From 62b06f8f429cd233e4e2e7bbd21081ad60c9018f Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Tue, 6 Mar 2018 09:21:06 +0000 Subject: KVM: arm/arm64: vgic: Add missing irq_lock to vgic_mmio_read_pending Our irq_is_pending() helper function accesses multiple members of the vgic_irq struct, so we need to hold the lock when calling it. Add that requirement as a comment to the definition and take the lock around the call in vgic_mmio_read_pending(), where we were missing it before. Fixes: 96b298000db4 ("KVM: arm/arm64: vgic-new: Add PENDING registers handlers") Signed-off-by: Andre Przywara Signed-off-by: Marc Zyngier --- virt/kvm/arm/vgic/vgic-mmio.c | 3 +++ virt/kvm/arm/vgic/vgic.h | 1 + 2 files changed, 4 insertions(+) (limited to 'virt') diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c index 83d82bd7dc4e..dbe99d635c80 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.c +++ b/virt/kvm/arm/vgic/vgic-mmio.c @@ -113,9 +113,12 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, /* Loop over all IRQs affected by this read */ for (i = 0; i < len * 8; i++) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + unsigned long flags; + spin_lock_irqsave(&irq->irq_lock, flags); if (irq_is_pending(irq)) value |= (1U << i); + spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); } diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index 12c37b89f7a3..5b11859a1a1e 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -96,6 +96,7 @@ /* we only support 64 kB translation table page size */ #define KVM_ITS_L1E_ADDR_MASK GENMASK_ULL(51, 16) +/* Requires the irq_lock to be held by the caller. */ static inline bool irq_is_pending(struct vgic_irq *irq) { if (irq->config == VGIC_CONFIG_EDGE) -- cgit v1.2.3 From e21a4f3a930cda6e4902cb5b3213365e5ff3ce7c Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 27 Feb 2018 12:33:50 +0100 Subject: KVM: arm/arm64: Avoid vcpu_load for other vcpu ioctls than KVM_RUN Calling vcpu_load() registers preempt notifiers for this vcpu and calls kvm_arch_vcpu_load(). The latter will soon be doing a lot of heavy lifting on arm/arm64 and will try to do things such as enabling the virtual timer and setting us up to handle interrupts from the timer hardware. Loading state onto hardware registers and enabling hardware to signal interrupts can be problematic when we're not actually about to run the VCPU, because it makes it difficult to establish the right context when handling interrupts from the timer, and it makes the register access code difficult to reason about. Luckily, now when we call vcpu_load in each ioctl implementation, we can simply remove the call from the non-KVM_RUN vcpu ioctls, and our kvm_arch_vcpu_load() is only used for loading vcpu content to the physical CPU when we're actually going to run the vcpu. Cc: stable@vger.kernel.org Fixes: 9b062471e52a ("KVM: Move vcpu_load to arch-specific kvm_arch_vcpu_ioctl") Reviewed-by: Julien Grall Reviewed-by: Marc Zyngier Reviewed-by: Andrew Jones Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- virt/kvm/arm/arm.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 86941f6181bb..53572304843b 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -384,14 +384,11 @@ static void vcpu_power_off(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { - vcpu_load(vcpu); - if (vcpu->arch.power_off) mp_state->mp_state = KVM_MP_STATE_STOPPED; else mp_state->mp_state = KVM_MP_STATE_RUNNABLE; - vcpu_put(vcpu); return 0; } @@ -400,8 +397,6 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, { int ret = 0; - vcpu_load(vcpu); - switch (mp_state->mp_state) { case KVM_MP_STATE_RUNNABLE: vcpu->arch.power_off = false; @@ -413,7 +408,6 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, ret = -EINVAL; } - vcpu_put(vcpu); return ret; } @@ -1036,8 +1030,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp, struct kvm_device_attr attr; long r; - vcpu_load(vcpu); - switch (ioctl) { case KVM_ARM_VCPU_INIT: { struct kvm_vcpu_init init; @@ -1114,7 +1106,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = -EINVAL; } - vcpu_put(vcpu); return r; } -- cgit v1.2.3 From 413aa807ae39fed7e387c175d2d0ae9fcf6c0c9d Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 5 Mar 2018 11:36:38 +0100 Subject: KVM: arm/arm64: Reset mapped IRQs on VM reset We currently don't allow resetting mapped IRQs from userspace, because their state is controlled by the hardware. But we do need to reset the state when the VM is reset, so we provide a function for the 'owner' of the mapped interrupt to reset the interrupt state. Currently only the timer uses mapped interrupts, so we call this function from the timer reset logic. Cc: stable@vger.kernel.org Fixes: 4c60e360d6df ("KVM: arm/arm64: Provide a get_input_level for the arch timer") Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- virt/kvm/arm/arch_timer.c | 4 ++++ virt/kvm/arm/vgic/vgic.c | 26 ++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) (limited to 'virt') diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 70f4c30918eb..3945021510a9 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -581,6 +581,7 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) { + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); @@ -594,6 +595,9 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) ptimer->cnt_ctl = 0; kvm_timer_update_state(vcpu); + if (timer->enabled && irqchip_in_kernel(vcpu->kvm)) + kvm_vgic_reset_mapped_irq(vcpu, vtimer->irq.irq); + return 0; } diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index c7c5ef190afa..0001858a2c23 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -495,6 +495,32 @@ int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq, return ret; } +/** + * kvm_vgic_reset_mapped_irq - Reset a mapped IRQ + * @vcpu: The VCPU pointer + * @vintid: The INTID of the interrupt + * + * Reset the active and pending states of a mapped interrupt. Kernel + * subsystems injecting mapped interrupts should reset their interrupt lines + * when we are doing a reset of the VM. + */ +void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid) +{ + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); + unsigned long flags; + + if (!irq->hw) + goto out; + + spin_lock_irqsave(&irq->irq_lock, flags); + irq->active = false; + irq->pending_latch = false; + irq->line_level = false; + spin_unlock_irqrestore(&irq->irq_lock, flags); +out: + vgic_put_irq(vcpu->kvm, irq); +} + int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid) { struct vgic_irq *irq; -- cgit v1.2.3 From 76600428c3677659e3c3633bb4f2ea302220a275 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 2 Mar 2018 08:16:30 +0000 Subject: KVM: arm/arm64: Reduce verbosity of KVM init log On my GICv3 system, the following is printed to the kernel log at boot: kvm [1]: 8-bit VMID kvm [1]: IDMAP page: d20e35000 kvm [1]: HYP VA range: 800000000000:ffffffffffff kvm [1]: vgic-v2@2c020000 kvm [1]: GIC system register CPU interface enabled kvm [1]: vgic interrupt IRQ1 kvm [1]: virtual timer IRQ4 kvm [1]: Hyp mode initialized successfully The KVM IDMAP is a mapping of a statically allocated kernel structure, and so printing its physical address leaks the physical placement of the kernel when physical KASLR in effect. So change the kvm_info() to kvm_debug() to remove it from the log output. While at it, trim the output a bit more: IRQ numbers can be found in /proc/interrupts, and the HYP VA and vgic-v2 lines are not highly informational either. Cc: Acked-by: Will Deacon Acked-by: Christoffer Dall Signed-off-by: Ard Biesheuvel Signed-off-by: Marc Zyngier --- virt/kvm/arm/arch_timer.c | 2 +- virt/kvm/arm/mmu.c | 6 +++--- virt/kvm/arm/vgic/vgic-v2.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 3945021510a9..282389eb204f 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -771,7 +771,7 @@ int kvm_timer_hyp_init(bool has_gic) static_branch_enable(&has_gic_active_state); } - kvm_info("virtual timer IRQ%d\n", host_vtimer_irq); + kvm_debug("virtual timer IRQ%d\n", host_vtimer_irq); cpuhp_setup_state(CPUHP_AP_KVM_ARM_TIMER_STARTING, "kvm/arm/timer:starting", kvm_timer_starting_cpu, diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index ec62d1cccab7..b960acdd0c05 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -1810,9 +1810,9 @@ int kvm_mmu_init(void) */ BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK); - kvm_info("IDMAP page: %lx\n", hyp_idmap_start); - kvm_info("HYP VA range: %lx:%lx\n", - kern_hyp_va(PAGE_OFFSET), kern_hyp_va(~0UL)); + kvm_debug("IDMAP page: %lx\n", hyp_idmap_start); + kvm_debug("HYP VA range: %lx:%lx\n", + kern_hyp_va(PAGE_OFFSET), kern_hyp_va(~0UL)); if (hyp_idmap_start >= kern_hyp_va(PAGE_OFFSET) && hyp_idmap_start < kern_hyp_va(~0UL) && diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index c32d7b93ffd1..e9d840a75e7b 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -410,7 +410,7 @@ int vgic_v2_probe(const struct gic_kvm_info *info) kvm_vgic_global_state.type = VGIC_V2; kvm_vgic_global_state.max_gic_vcpus = VGIC_V2_MAX_CPUS; - kvm_info("vgic-v2@%llx\n", info->vctrl.start); + kvm_debug("vgic-v2@%llx\n", info->vctrl.start); return 0; out: -- cgit v1.2.3 From 16ca6a607d84bef0129698d8d808f501afd08d43 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 6 Mar 2018 21:48:01 +0000 Subject: KVM: arm/arm64: vgic: Don't populate multiple LRs with the same vintid The vgic code is trying to be clever when injecting GICv2 SGIs, and will happily populate LRs with the same interrupt number if they come from multiple vcpus (after all, they are distinct interrupt sources). Unfortunately, this is against the letter of the architecture, and the GICv2 architecture spec says "Each valid interrupt stored in the List registers must have a unique VirtualID for that virtual CPU interface.". GICv3 has similar (although slightly ambiguous) restrictions. This results in guests locking up when using GICv2-on-GICv3, for example. The obvious fix is to stop trying so hard, and inject a single vcpu per SGI per guest entry. After all, pending SGIs with multiple source vcpus are pretty rare, and are mostly seen in scenario where the physical CPUs are severely overcomitted. But as we now only inject a single instance of a multi-source SGI per vcpu entry, we may delay those interrupts for longer than strictly necessary, and run the risk of injecting lower priority interrupts in the meantime. In order to address this, we adopt a three stage strategy: - If we encounter a multi-source SGI in the AP list while computing its depth, we force the list to be sorted - When populating the LRs, we prevent the injection of any interrupt of lower priority than that of the first multi-source SGI we've injected. - Finally, the injection of a multi-source SGI triggers the request of a maintenance interrupt when there will be no pending interrupt in the LRs (HCR_NPIE). At the point where the last pending interrupt in the LRs switches from Pending to Active, the maintenance interrupt will be delivered, allowing us to add the remaining SGIs using the same process. Cc: stable@vger.kernel.org Fixes: 0919e84c0fc1 ("KVM: arm/arm64: vgic-new: Add IRQ sync/flush framework") Acked-by: Christoffer Dall Signed-off-by: Marc Zyngier --- virt/kvm/arm/vgic/vgic-v2.c | 9 ++++++- virt/kvm/arm/vgic/vgic-v3.c | 9 ++++++- virt/kvm/arm/vgic/vgic.c | 61 ++++++++++++++++++++++++++++++++++----------- virt/kvm/arm/vgic/vgic.h | 2 ++ 4 files changed, 65 insertions(+), 16 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index e9d840a75e7b..29556f71b691 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -37,6 +37,13 @@ void vgic_v2_init_lrs(void) vgic_v2_write_lr(i, 0); } +void vgic_v2_set_npie(struct kvm_vcpu *vcpu) +{ + struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2; + + cpuif->vgic_hcr |= GICH_HCR_NPIE; +} + void vgic_v2_set_underflow(struct kvm_vcpu *vcpu) { struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2; @@ -64,7 +71,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) int lr; unsigned long flags; - cpuif->vgic_hcr &= ~GICH_HCR_UIE; + cpuif->vgic_hcr &= ~(GICH_HCR_UIE | GICH_HCR_NPIE); for (lr = 0; lr < vgic_cpu->used_lrs; lr++) { u32 val = cpuif->vgic_lr[lr]; diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 6b329414e57a..0ff2006f3781 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -26,6 +26,13 @@ static bool group1_trap; static bool common_trap; static bool gicv4_enable; +void vgic_v3_set_npie(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3; + + cpuif->vgic_hcr |= ICH_HCR_NPIE; +} + void vgic_v3_set_underflow(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3; @@ -47,7 +54,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) int lr; unsigned long flags; - cpuif->vgic_hcr &= ~ICH_HCR_UIE; + cpuif->vgic_hcr &= ~(ICH_HCR_UIE | ICH_HCR_NPIE); for (lr = 0; lr < vgic_cpu->used_lrs; lr++) { u64 val = cpuif->vgic_lr[lr]; diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index 0001858a2c23..8201899126f6 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -710,22 +710,37 @@ static inline void vgic_set_underflow(struct kvm_vcpu *vcpu) vgic_v3_set_underflow(vcpu); } +static inline void vgic_set_npie(struct kvm_vcpu *vcpu) +{ + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_set_npie(vcpu); + else + vgic_v3_set_npie(vcpu); +} + /* Requires the ap_list_lock to be held. */ -static int compute_ap_list_depth(struct kvm_vcpu *vcpu) +static int compute_ap_list_depth(struct kvm_vcpu *vcpu, + bool *multi_sgi) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_irq *irq; int count = 0; + *multi_sgi = false; + DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock)); list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { spin_lock(&irq->irq_lock); /* GICv2 SGIs can count for more than one... */ - if (vgic_irq_is_sgi(irq->intid) && irq->source) - count += hweight8(irq->source); - else + if (vgic_irq_is_sgi(irq->intid) && irq->source) { + int w = hweight8(irq->source); + + count += w; + *multi_sgi |= (w > 1); + } else { count++; + } spin_unlock(&irq->irq_lock); } return count; @@ -736,28 +751,43 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_irq *irq; - int count = 0; + int count; + bool npie = false; + bool multi_sgi; + u8 prio = 0xff; DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock)); - if (compute_ap_list_depth(vcpu) > kvm_vgic_global_state.nr_lr) + count = compute_ap_list_depth(vcpu, &multi_sgi); + if (count > kvm_vgic_global_state.nr_lr || multi_sgi) vgic_sort_ap_list(vcpu); + count = 0; + list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { spin_lock(&irq->irq_lock); - if (unlikely(vgic_target_oracle(irq) != vcpu)) - goto next; - /* - * If we get an SGI with multiple sources, try to get - * them in all at once. + * If we have multi-SGIs in the pipeline, we need to + * guarantee that they are all seen before any IRQ of + * lower priority. In that case, we need to filter out + * these interrupts by exiting early. This is easy as + * the AP list has been sorted already. */ - do { + if (multi_sgi && irq->priority > prio) { + spin_unlock(&irq->irq_lock); + break; + } + + if (likely(vgic_target_oracle(irq) == vcpu)) { vgic_populate_lr(vcpu, irq, count++); - } while (irq->source && count < kvm_vgic_global_state.nr_lr); -next: + if (irq->source) { + npie = true; + prio = irq->priority; + } + } + spin_unlock(&irq->irq_lock); if (count == kvm_vgic_global_state.nr_lr) { @@ -768,6 +798,9 @@ next: } } + if (npie) + vgic_set_npie(vcpu); + vcpu->arch.vgic_cpu.used_lrs = count; /* Nuke remaining LRs */ diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index 5b11859a1a1e..f5b8519e5546 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -160,6 +160,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu); void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr); void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr); void vgic_v2_set_underflow(struct kvm_vcpu *vcpu); +void vgic_v2_set_npie(struct kvm_vcpu *vcpu); int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr); int vgic_v2_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write, int offset, u32 *val); @@ -189,6 +190,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu); void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr); void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr); void vgic_v3_set_underflow(struct kvm_vcpu *vcpu); +void vgic_v3_set_npie(struct kvm_vcpu *vcpu); void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); void vgic_v3_enable(struct kvm_vcpu *vcpu); -- cgit v1.2.3 From 27e91ad1e746e341ca2312f29bccb9736be7b476 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 6 Mar 2018 21:44:37 +0000 Subject: kvm: arm/arm64: vgic-v3: Tighten synchronization for guests using v2 on v3 On guest exit, and when using GICv2 on GICv3, we use a dsb(st) to force synchronization between the memory-mapped guest view and the system-register view that the hypervisor uses. This is incorrect, as the spec calls out the need for "a DSB whose required access type is both loads and stores with any Shareability attribute", while we're only synchronizing stores. We also lack an isb after the dsb to ensure that the latter has actually been executed before we start reading stuff from the sysregs. The fix is pretty easy: turn dsb(st) into dsb(sy), and slap an isb() just after. Cc: stable@vger.kernel.org Fixes: f68d2b1b73cc ("arm64: KVM: Implement vgic-v3 save/restore") Acked-by: Christoffer Dall Reviewed-by: Andre Przywara Signed-off-by: Marc Zyngier --- virt/kvm/arm/hyp/vgic-v3-sr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index f5c3d6d7019e..b89ce5432214 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c @@ -215,7 +215,8 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) * are now visible to the system register interface. */ if (!cpu_if->vgic_sre) { - dsb(st); + dsb(sy); + isb(); cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2); } -- cgit v1.2.3 From 7a364bd5db1cfba50f3bde115db0dadc31b0169b Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sat, 25 Nov 2017 20:25:00 +0100 Subject: KVM: arm/arm64: Avoid vcpu_load for other vcpu ioctls than KVM_RUN Calling vcpu_load() registers preempt notifiers for this vcpu and calls kvm_arch_vcpu_load(). The latter will soon be doing a lot of heavy lifting on arm/arm64 and will try to do things such as enabling the virtual timer and setting us up to handle interrupts from the timer hardware. Loading state onto hardware registers and enabling hardware to signal interrupts can be problematic when we're not actually about to run the VCPU, because it makes it difficult to establish the right context when handling interrupts from the timer, and it makes the register access code difficult to reason about. Luckily, now when we call vcpu_load in each ioctl implementation, we can simply remove the call from the non-KVM_RUN vcpu ioctls, and our kvm_arch_vcpu_load() is only used for loading vcpu content to the physical CPU when we're actually going to run the vcpu. Reviewed-by: Julien Grall Reviewed-by: Marc Zyngier Reviewed-by: Andrew Jones Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- virt/kvm/arm/arm.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 86941f6181bb..53572304843b 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -384,14 +384,11 @@ static void vcpu_power_off(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { - vcpu_load(vcpu); - if (vcpu->arch.power_off) mp_state->mp_state = KVM_MP_STATE_STOPPED; else mp_state->mp_state = KVM_MP_STATE_RUNNABLE; - vcpu_put(vcpu); return 0; } @@ -400,8 +397,6 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, { int ret = 0; - vcpu_load(vcpu); - switch (mp_state->mp_state) { case KVM_MP_STATE_RUNNABLE: vcpu->arch.power_off = false; @@ -413,7 +408,6 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, ret = -EINVAL; } - vcpu_put(vcpu); return ret; } @@ -1036,8 +1030,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp, struct kvm_device_attr attr; long r; - vcpu_load(vcpu); - switch (ioctl) { case KVM_ARM_VCPU_INIT: { struct kvm_vcpu_init init; @@ -1114,7 +1106,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = -EINVAL; } - vcpu_put(vcpu); return r; } -- cgit v1.2.3 From 829a58635497d7cc668133ac17daca9b78652661 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Wed, 29 Nov 2017 16:37:53 +0100 Subject: KVM: arm/arm64: Move vcpu_load call after kvm_vcpu_first_run_init Moving the call to vcpu_load() in kvm_arch_vcpu_ioctl_run() to after we've called kvm_vcpu_first_run_init() simplifies some of the vgic and there is also no need to do vcpu_load() for things such as handling the immediate_exit flag. Reviewed-by: Julien Grall Reviewed-by: Marc Zyngier Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- virt/kvm/arm/arch_timer.c | 4 ---- virt/kvm/arm/arm.c | 22 ++++++++-------------- virt/kvm/arm/vgic/vgic-init.c | 11 ----------- 3 files changed, 8 insertions(+), 29 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 86eca32474cc..7dd8d9677dc5 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -842,11 +842,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) return ret; no_vgic: - preempt_disable(); timer->enabled = 1; - kvm_timer_vcpu_load(vcpu); - preempt_enable(); - return 0; } diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 53572304843b..932e61858c55 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -632,27 +632,22 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) if (unlikely(!kvm_vcpu_initialized(vcpu))) return -ENOEXEC; - vcpu_load(vcpu); - ret = kvm_vcpu_first_run_init(vcpu); if (ret) - goto out; + return ret; if (run->exit_reason == KVM_EXIT_MMIO) { ret = kvm_handle_mmio_return(vcpu, vcpu->run); if (ret) - goto out; - if (kvm_arm_handle_step_debug(vcpu, vcpu->run)) { - ret = 0; - goto out; - } - + return ret; + if (kvm_arm_handle_step_debug(vcpu, vcpu->run)) + return 0; } - if (run->immediate_exit) { - ret = -EINTR; - goto out; - } + if (run->immediate_exit) + return -EINTR; + + vcpu_load(vcpu); kvm_sigset_activate(vcpu); @@ -811,7 +806,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) kvm_sigset_deactivate(vcpu); -out: vcpu_put(vcpu); return ret; } diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index 743ca5cb05ef..3e8209a07585 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -302,17 +302,6 @@ int vgic_init(struct kvm *kvm) dist->initialized = true; - /* - * If we're initializing GICv2 on-demand when first running the VCPU - * then we need to load the VGIC state onto the CPU. We can detect - * this easily by checking if we are in between vcpu_load and vcpu_put - * when we just initialized the VGIC. - */ - preempt_disable(); - vcpu = kvm_arm_get_running_vcpu(); - if (vcpu) - kvm_vgic_load(vcpu); - preempt_enable(); out: return ret; } -- cgit v1.2.3 From 3df59d8dd3c2526b33d51af9e6f66e61262de71b Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 3 Aug 2017 12:09:05 +0200 Subject: KVM: arm/arm64: Get rid of vcpu->arch.irq_lines We currently have a separate read-modify-write of the HCR_EL2 on entry to the guest for the sole purpose of setting the VF and VI bits, if set. Since this is most rarely the case (only when using userspace IRQ chip and interrupts are in flight), let's get rid of this operation and instead modify the bits in the vcpu->arch.hcr[_el2] directly when needed. Acked-by: Marc Zyngier Reviewed-by: Andrew Jones Reviewed-by: Julien Thierry Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- virt/kvm/arm/arm.c | 11 ++++++----- virt/kvm/arm/mmu.c | 6 +++--- 2 files changed, 9 insertions(+), 8 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 932e61858c55..49d13510e9c2 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -420,7 +420,8 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, */ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) { - return ((!!v->arch.irq_lines || kvm_vgic_vcpu_pending_irq(v)) + bool irq_lines = *vcpu_hcr(v) & (HCR_VI | HCR_VF); + return ((irq_lines || kvm_vgic_vcpu_pending_irq(v)) && !v->arch.power_off && !v->arch.pause); } @@ -814,18 +815,18 @@ static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level) { int bit_index; bool set; - unsigned long *ptr; + unsigned long *hcr; if (number == KVM_ARM_IRQ_CPU_IRQ) bit_index = __ffs(HCR_VI); else /* KVM_ARM_IRQ_CPU_FIQ */ bit_index = __ffs(HCR_VF); - ptr = (unsigned long *)&vcpu->arch.irq_lines; + hcr = vcpu_hcr(vcpu); if (level) - set = test_and_set_bit(bit_index, ptr); + set = test_and_set_bit(bit_index, hcr); else - set = test_and_clear_bit(bit_index, ptr); + set = test_and_clear_bit(bit_index, hcr); /* * If we didn't change anything, no need to wake up or kick other CPUs diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index ec62d1cccab7..9ebff8e530f9 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -2035,7 +2035,7 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, */ void kvm_set_way_flush(struct kvm_vcpu *vcpu) { - unsigned long hcr = vcpu_get_hcr(vcpu); + unsigned long hcr = *vcpu_hcr(vcpu); /* * If this is the first time we do a S/W operation @@ -2050,7 +2050,7 @@ void kvm_set_way_flush(struct kvm_vcpu *vcpu) trace_kvm_set_way_flush(*vcpu_pc(vcpu), vcpu_has_cache_enabled(vcpu)); stage2_flush_vm(vcpu->kvm); - vcpu_set_hcr(vcpu, hcr | HCR_TVM); + *vcpu_hcr(vcpu) = hcr | HCR_TVM; } } @@ -2068,7 +2068,7 @@ void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled) /* Caches are now on, stop trapping VM ops (until a S/W op) */ if (now_enabled) - vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) & ~HCR_TVM); + *vcpu_hcr(vcpu) &= ~HCR_TVM; trace_kvm_toggle_cache(*vcpu_pc(vcpu), was_enabled, now_enabled); } -- cgit v1.2.3 From bc192ceec37108bf6c04a5c5795fcea5f940b0de Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 10 Oct 2017 10:21:18 +0200 Subject: KVM: arm/arm64: Add kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs As we are about to move a bunch of save/restore logic for VHE kernels to the load and put functions, we need some infrastructure to do this. Reviewed-by: Andrew Jones Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- virt/kvm/arm/arm.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'virt') diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 49d13510e9c2..2062d9357971 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -362,10 +362,12 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvm_arm_set_running_vcpu(vcpu); kvm_vgic_load(vcpu); kvm_timer_vcpu_load(vcpu); + kvm_vcpu_load_sysregs(vcpu); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { + kvm_vcpu_put_sysregs(vcpu); kvm_timer_vcpu_put(vcpu); kvm_vgic_put(vcpu); -- cgit v1.2.3 From 3f5c90b890acfa7ad0b817a67cfc5eaaf0e21f7d Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 3 Oct 2017 14:02:12 +0200 Subject: KVM: arm64: Introduce VHE-specific kvm_vcpu_run So far this is mostly (see below) a copy of the legacy non-VHE switch function, but we will start reworking these functions in separate directions to work on VHE and non-VHE in the most optimal way in later patches. The only difference after this patch between the VHE and non-VHE run functions is that we omit the branch-predictor variant-2 hardening for QC Falkor CPUs, because this workaround is specific to a series of non-VHE ARMv8.0 CPUs. Reviewed-by: Marc Zyngier Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- virt/kvm/arm/arm.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 2062d9357971..09dbee56ed8f 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -733,13 +733,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) */ trace_kvm_entry(*vcpu_pc(vcpu)); guest_enter_irqoff(); - if (has_vhe()) - kvm_arm_vhe_guest_enter(); - - ret = kvm_call_hyp(__kvm_vcpu_run, vcpu); - if (has_vhe()) + if (has_vhe()) { + kvm_arm_vhe_guest_enter(); + ret = kvm_vcpu_run_vhe(vcpu); kvm_arm_vhe_guest_exit(); + } else { + ret = kvm_call_hyp(__kvm_vcpu_run_nvhe, vcpu); + } + vcpu->mode = OUTSIDE_GUEST_MODE; vcpu->stat.exits++; /* -- cgit v1.2.3 From 04fef057003c207ff4d9f22d2127aee2f9abecd0 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sat, 5 Aug 2017 22:51:35 +0200 Subject: KVM: arm64: Remove noop calls to timer save/restore from VHE switch The VHE switch function calls __timer_enable_traps and __timer_disable_traps which don't do anything on VHE systems. Therefore, simply remove these calls from the VHE switch function and make the functions non-conditional as they are now only called from the non-VHE switch path. Acked-by: Marc Zyngier Reviewed-by: Andrew Jones Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- virt/kvm/arm/hyp/timer-sr.c | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/hyp/timer-sr.c b/virt/kvm/arm/hyp/timer-sr.c index f24404b3c8df..77754a62eb0c 100644 --- a/virt/kvm/arm/hyp/timer-sr.c +++ b/virt/kvm/arm/hyp/timer-sr.c @@ -27,34 +27,34 @@ void __hyp_text __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high) write_sysreg(cntvoff, cntvoff_el2); } +/* + * Should only be called on non-VHE systems. + * VHE systems use EL2 timers and configure EL1 timers in kvm_timer_init_vhe(). + */ void __hyp_text __timer_disable_traps(struct kvm_vcpu *vcpu) { - /* - * We don't need to do this for VHE since the host kernel runs in EL2 - * with HCR_EL2.TGE ==1, which makes those bits have no impact. - */ - if (!has_vhe()) { - u64 val; + u64 val; - /* Allow physical timer/counter access for the host */ - val = read_sysreg(cnthctl_el2); - val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN; - write_sysreg(val, cnthctl_el2); - } + /* Allow physical timer/counter access for the host */ + val = read_sysreg(cnthctl_el2); + val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN; + write_sysreg(val, cnthctl_el2); } +/* + * Should only be called on non-VHE systems. + * VHE systems use EL2 timers and configure EL1 timers in kvm_timer_init_vhe(). + */ void __hyp_text __timer_enable_traps(struct kvm_vcpu *vcpu) { - if (!has_vhe()) { - u64 val; + u64 val; - /* - * Disallow physical timer access for the guest - * Physical counter access is allowed - */ - val = read_sysreg(cnthctl_el2); - val &= ~CNTHCTL_EL1PCEN; - val |= CNTHCTL_EL1PCTEN; - write_sysreg(val, cnthctl_el2); - } + /* + * Disallow physical timer access for the guest + * Physical counter access is allowed + */ + val = read_sysreg(cnthctl_el2); + val &= ~CNTHCTL_EL1PCEN; + val |= CNTHCTL_EL1PCTEN; + write_sysreg(val, cnthctl_el2); } -- cgit v1.2.3 From 8d404c4c246137531f94dfee352f350d59d0e5a7 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Wed, 16 Mar 2016 15:38:53 +0100 Subject: KVM: arm64: Rewrite system register accessors to read/write functions Currently we access the system registers array via the vcpu_sys_reg() macro. However, we are about to change the behavior to some times modify the register file directly, so let's change this to two primitives: * Accessor macros vcpu_write_sys_reg() and vcpu_read_sys_reg() * Direct array access macro __vcpu_sys_reg() The accessor macros should be used in places where the code needs to access the currently loaded VCPU's state as observed by the guest. For example, when trapping on cache related registers, a write to a system register should go directly to the VCPU version of the register. The direct array access macro can be used in places where the VCPU is known to never be running (for example userspace access) or for registers which are never context switched (for example all the PMU system registers). This rewrites all users of vcpu_sys_regs to one of the macros described above. No functional change. Acked-by: Marc Zyngier Reviewed-by: Andrew Jones Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- virt/kvm/arm/pmu.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index 8a9c42366db7..1c5b76c46e26 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -37,7 +37,7 @@ u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) reg = (select_idx == ARMV8_PMU_CYCLE_IDX) ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx; - counter = vcpu_sys_reg(vcpu, reg); + counter = __vcpu_sys_reg(vcpu, reg); /* The real counter value is equal to the value of counter register plus * the value perf event counts. @@ -61,7 +61,7 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) reg = (select_idx == ARMV8_PMU_CYCLE_IDX) ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx; - vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx); + __vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx); } /** @@ -78,7 +78,7 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc) counter = kvm_pmu_get_counter_value(vcpu, pmc->idx); reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx; - vcpu_sys_reg(vcpu, reg) = counter; + __vcpu_sys_reg(vcpu, reg) = counter; perf_event_disable(pmc->perf_event); perf_event_release_kernel(pmc->perf_event); pmc->perf_event = NULL; @@ -125,7 +125,7 @@ void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu) { - u64 val = vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT; + u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT; val &= ARMV8_PMU_PMCR_N_MASK; if (val == 0) @@ -147,7 +147,7 @@ void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val) struct kvm_pmu *pmu = &vcpu->arch.pmu; struct kvm_pmc *pmc; - if (!(vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val) + if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val) return; for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { @@ -193,10 +193,10 @@ static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) { u64 reg = 0; - if ((vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) { - reg = vcpu_sys_reg(vcpu, PMOVSSET_EL0); - reg &= vcpu_sys_reg(vcpu, PMCNTENSET_EL0); - reg &= vcpu_sys_reg(vcpu, PMINTENSET_EL1); + if ((__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) { + reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0); + reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); + reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1); reg &= kvm_pmu_valid_counter_mask(vcpu); } @@ -295,7 +295,7 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event, struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); int idx = pmc->idx; - vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx); + __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx); if (kvm_pmu_overflow_status(vcpu)) { kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); @@ -316,19 +316,19 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) if (val == 0) return; - enable = vcpu_sys_reg(vcpu, PMCNTENSET_EL0); + enable = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) { if (!(val & BIT(i))) continue; - type = vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i) + type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i) & ARMV8_PMU_EVTYPE_EVENT; if ((type == ARMV8_PMUV3_PERFCTR_SW_INCR) && (enable & BIT(i))) { - reg = vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1; + reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1; reg = lower_32_bits(reg); - vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg; + __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg; if (!reg) - vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); + __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); } } } @@ -348,7 +348,7 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) mask = kvm_pmu_valid_counter_mask(vcpu); if (val & ARMV8_PMU_PMCR_E) { kvm_pmu_enable_counter(vcpu, - vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask); + __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask); } else { kvm_pmu_disable_counter(vcpu, mask); } @@ -369,8 +369,8 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx) { - return (vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) && - (vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx)); + return (__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) && + (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx)); } /** -- cgit v1.2.3 From 00536ec476601a60d976eebf6aeb9633d4fb37d9 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Wed, 27 Dec 2017 20:01:52 +0100 Subject: KVM: arm/arm64: Prepare to handle deferred save/restore of SPSR_EL1 SPSR_EL1 is not used by a VHE host kernel and can be deferred, but we need to rework the accesses to this register to access the latest value depending on whether or not guest system registers are loaded on the CPU or only reside in memory. The handling of accessing the various banked SPSRs for 32-bit VMs is a bit clunky, but this will be improved in following patches which will first prepare and subsequently implement deferred save/restore of the 32-bit registers, including the 32-bit SPSRs. Reviewed-by: Marc Zyngier Reviewed-by: Andrew Jones Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- virt/kvm/arm/aarch32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/arm/aarch32.c b/virt/kvm/arm/aarch32.c index 8bc479fa37e6..efc84cbe8277 100644 --- a/virt/kvm/arm/aarch32.c +++ b/virt/kvm/arm/aarch32.c @@ -178,7 +178,7 @@ static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset) *vcpu_cpsr(vcpu) = cpsr; /* Note: These now point to the banked copies */ - *vcpu_spsr(vcpu) = new_spsr_value; + vcpu_write_spsr(vcpu, new_spsr_value); *vcpu_reg32(vcpu, 14) = *vcpu_pc(vcpu) + return_offset; /* Branch to exception vector */ -- cgit v1.2.3 From bb5ed7035918d265189e2623d71c8f458713d3e9 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 5 Oct 2017 00:02:41 +0200 Subject: KVM: arm/arm64: Get rid of vgic_elrsr There is really no need to store the vgic_elrsr on the VGIC data structures as the only need we have for the elrsr is to figure out if an LR is inactive when we save the VGIC state upon returning from the guest. We can might as well store this in a temporary local variable. Reviewed-by: Marc Zyngier Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- virt/kvm/arm/hyp/vgic-v2-sr.c | 28 +++++++--------------------- virt/kvm/arm/hyp/vgic-v3-sr.c | 6 +++--- virt/kvm/arm/vgic/vgic-v2.c | 1 - virt/kvm/arm/vgic/vgic-v3.c | 1 - 4 files changed, 10 insertions(+), 26 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/hyp/vgic-v2-sr.c b/virt/kvm/arm/hyp/vgic-v2-sr.c index 4fe6e797e8b3..a91b0d2b9249 100644 --- a/virt/kvm/arm/hyp/vgic-v2-sr.c +++ b/virt/kvm/arm/hyp/vgic-v2-sr.c @@ -23,29 +23,19 @@ #include #include -static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base) -{ - struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; - int nr_lr = (kern_hyp_va(&kvm_vgic_global_state))->nr_lr; - u32 elrsr0, elrsr1; - - elrsr0 = readl_relaxed(base + GICH_ELRSR0); - if (unlikely(nr_lr > 32)) - elrsr1 = readl_relaxed(base + GICH_ELRSR1); - else - elrsr1 = 0; - - cpu_if->vgic_elrsr = ((u64)elrsr1 << 32) | elrsr0; -} - static void __hyp_text save_lrs(struct kvm_vcpu *vcpu, void __iomem *base) { struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; - int i; u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; + u64 elrsr; + int i; + + elrsr = readl_relaxed(base + GICH_ELRSR0); + if (unlikely(used_lrs > 32)) + elrsr |= ((u64)readl_relaxed(base + GICH_ELRSR1)) << 32; for (i = 0; i < used_lrs; i++) { - if (cpu_if->vgic_elrsr & (1UL << i)) + if (elrsr & (1UL << i)) cpu_if->vgic_lr[i] &= ~GICH_LR_STATE; else cpu_if->vgic_lr[i] = readl_relaxed(base + GICH_LR0 + (i * 4)); @@ -68,13 +58,9 @@ void __hyp_text __vgic_v2_save_state(struct kvm_vcpu *vcpu) if (used_lrs) { cpu_if->vgic_apr = readl_relaxed(base + GICH_APR); - - save_elrsr(vcpu, base); save_lrs(vcpu, base); - writel_relaxed(0, base + GICH_HCR); } else { - cpu_if->vgic_elrsr = ~0UL; cpu_if->vgic_apr = 0; } } diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index f5c3d6d7019e..9abf2f3c12b5 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c @@ -222,15 +222,16 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) if (used_lrs) { int i; u32 nr_pre_bits; + u32 elrsr; - cpu_if->vgic_elrsr = read_gicreg(ICH_ELSR_EL2); + elrsr = read_gicreg(ICH_ELSR_EL2); write_gicreg(0, ICH_HCR_EL2); val = read_gicreg(ICH_VTR_EL2); nr_pre_bits = vtr_to_nr_pre_bits(val); for (i = 0; i < used_lrs; i++) { - if (cpu_if->vgic_elrsr & (1 << i)) + if (elrsr & (1 << i)) cpu_if->vgic_lr[i] &= ~ICH_LR_STATE; else cpu_if->vgic_lr[i] = __gic_v3_get_lr(i); @@ -262,7 +263,6 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) cpu_if->its_vpe.its_vm) write_gicreg(0, ICH_HCR_EL2); - cpu_if->vgic_elrsr = 0xffff; cpu_if->vgic_ap0r[0] = 0; cpu_if->vgic_ap0r[1] = 0; cpu_if->vgic_ap0r[2] = 0; diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index c32d7b93ffd1..bb305d49cfdd 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -265,7 +265,6 @@ void vgic_v2_enable(struct kvm_vcpu *vcpu) * anyway. */ vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0; - vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr = ~0; /* Get the show on the road... */ vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN; diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 6b329414e57a..b76e21f3e6bd 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -267,7 +267,6 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu) * anyway. */ vgic_v3->vgic_vmcr = 0; - vgic_v3->vgic_elrsr = ~0; /* * If we are emulating a GICv3, we do it in an non-GICv2-compatible -- cgit v1.2.3 From 75174ba6ca9dcfddda88aa420da4d7aa2b279fdf Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 22 Dec 2016 20:39:10 +0100 Subject: KVM: arm/arm64: Handle VGICv2 save/restore from the main VGIC code We can program the GICv2 hypervisor control interface logic directly from the core vgic code and can instead do the save/restore directly from the flush/sync functions, which can lead to a number of future optimizations. Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- virt/kvm/arm/hyp/vgic-v2-sr.c | 65 ------------------------------------------- virt/kvm/arm/vgic/vgic-v2.c | 63 +++++++++++++++++++++++++++++++++++++++++ virt/kvm/arm/vgic/vgic.c | 19 ++++++++++++- virt/kvm/arm/vgic/vgic.h | 3 ++ 4 files changed, 84 insertions(+), 66 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/hyp/vgic-v2-sr.c b/virt/kvm/arm/hyp/vgic-v2-sr.c index a91b0d2b9249..0bbafdfd4adb 100644 --- a/virt/kvm/arm/hyp/vgic-v2-sr.c +++ b/virt/kvm/arm/hyp/vgic-v2-sr.c @@ -23,71 +23,6 @@ #include #include -static void __hyp_text save_lrs(struct kvm_vcpu *vcpu, void __iomem *base) -{ - struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; - u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; - u64 elrsr; - int i; - - elrsr = readl_relaxed(base + GICH_ELRSR0); - if (unlikely(used_lrs > 32)) - elrsr |= ((u64)readl_relaxed(base + GICH_ELRSR1)) << 32; - - for (i = 0; i < used_lrs; i++) { - if (elrsr & (1UL << i)) - cpu_if->vgic_lr[i] &= ~GICH_LR_STATE; - else - cpu_if->vgic_lr[i] = readl_relaxed(base + GICH_LR0 + (i * 4)); - - writel_relaxed(0, base + GICH_LR0 + (i * 4)); - } -} - -/* vcpu is already in the HYP VA space */ -void __hyp_text __vgic_v2_save_state(struct kvm_vcpu *vcpu) -{ - struct kvm *kvm = kern_hyp_va(vcpu->kvm); - struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; - struct vgic_dist *vgic = &kvm->arch.vgic; - void __iomem *base = kern_hyp_va(vgic->vctrl_base); - u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; - - if (!base) - return; - - if (used_lrs) { - cpu_if->vgic_apr = readl_relaxed(base + GICH_APR); - save_lrs(vcpu, base); - writel_relaxed(0, base + GICH_HCR); - } else { - cpu_if->vgic_apr = 0; - } -} - -/* vcpu is already in the HYP VA space */ -void __hyp_text __vgic_v2_restore_state(struct kvm_vcpu *vcpu) -{ - struct kvm *kvm = kern_hyp_va(vcpu->kvm); - struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; - struct vgic_dist *vgic = &kvm->arch.vgic; - void __iomem *base = kern_hyp_va(vgic->vctrl_base); - int i; - u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; - - if (!base) - return; - - if (used_lrs) { - writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR); - writel_relaxed(cpu_if->vgic_apr, base + GICH_APR); - for (i = 0; i < used_lrs; i++) { - writel_relaxed(cpu_if->vgic_lr[i], - base + GICH_LR0 + (i * 4)); - } - } -} - #ifdef CONFIG_ARM64 /* * __vgic_v2_perform_cpuif_access -- perform a GICV access on behalf of the diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index bb305d49cfdd..1e5f3eb6973d 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -421,6 +421,69 @@ out: return ret; } +static void save_lrs(struct kvm_vcpu *vcpu, void __iomem *base) +{ + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; + u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; + u64 elrsr; + int i; + + elrsr = readl_relaxed(base + GICH_ELRSR0); + if (unlikely(used_lrs > 32)) + elrsr |= ((u64)readl_relaxed(base + GICH_ELRSR1)) << 32; + + for (i = 0; i < used_lrs; i++) { + if (elrsr & (1UL << i)) + cpu_if->vgic_lr[i] &= ~GICH_LR_STATE; + else + cpu_if->vgic_lr[i] = readl_relaxed(base + GICH_LR0 + (i * 4)); + + writel_relaxed(0, base + GICH_LR0 + (i * 4)); + } +} + +void vgic_v2_save_state(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + struct vgic_dist *vgic = &kvm->arch.vgic; + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; + void __iomem *base = vgic->vctrl_base; + u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; + + if (!base) + return; + + if (used_lrs) { + cpu_if->vgic_apr = readl_relaxed(base + GICH_APR); + save_lrs(vcpu, base); + writel_relaxed(0, base + GICH_HCR); + } else { + cpu_if->vgic_apr = 0; + } +} + +void vgic_v2_restore_state(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + struct vgic_dist *vgic = &kvm->arch.vgic; + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; + void __iomem *base = vgic->vctrl_base; + u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; + int i; + + if (!base) + return; + + if (used_lrs) { + writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR); + writel_relaxed(cpu_if->vgic_apr, base + GICH_APR); + for (i = 0; i < used_lrs; i++) { + writel_relaxed(cpu_if->vgic_lr[i], + base + GICH_LR0 + (i * 4)); + } + } +} + void vgic_v2_load(struct kvm_vcpu *vcpu) { struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index c7c5ef190afa..12e2a28f437e 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -749,11 +749,19 @@ next: vgic_clear_lr(vcpu, count); } +static inline void vgic_save_state(struct kvm_vcpu *vcpu) +{ + if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) + vgic_v2_save_state(vcpu); +} + /* Sync back the hardware VGIC state into our emulation after a guest's run. */ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + vgic_save_state(vcpu); + WARN_ON(vgic_v4_sync_hwstate(vcpu)); /* An empty ap_list_head implies used_lrs == 0 */ @@ -765,6 +773,12 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) vgic_prune_ap_list(vcpu); } +static inline void vgic_restore_state(struct kvm_vcpu *vcpu) +{ + if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) + vgic_v2_restore_state(vcpu); +} + /* Flush our emulation state into the GIC hardware before entering the guest. */ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) { @@ -780,13 +794,16 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) * this. */ if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) - return; + goto out; DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock); vgic_flush_lr_state(vcpu); spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock); + +out: + vgic_restore_state(vcpu); } void kvm_vgic_load(struct kvm_vcpu *vcpu) diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index 12c37b89f7a3..89b9547fba27 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -176,6 +176,9 @@ void vgic_v2_init_lrs(void); void vgic_v2_load(struct kvm_vcpu *vcpu); void vgic_v2_put(struct kvm_vcpu *vcpu); +void vgic_v2_save_state(struct kvm_vcpu *vcpu); +void vgic_v2_restore_state(struct kvm_vcpu *vcpu); + static inline void vgic_get_irq_kref(struct vgic_irq *irq) { if (irq->intid < VGIC_MIN_LPI) -- cgit v1.2.3 From 8a43a2b34b7dc59dc6df5fa0a3b8540918bc4c58 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Wed, 4 Oct 2017 23:25:24 +0200 Subject: KVM: arm/arm64: Move arm64-only vgic-v2-sr.c file to arm64 The vgic-v2-sr.c file now only contains the logic to replay unaligned accesses to the virtual CPU interface on 16K and 64K page systems, which is only relevant on 64-bit platforms. Therefore move this file to the arm64 KVM tree, remove the compile directive from the 32-bit side makefile, and remove the ifdef in the C file. Since this file also no longer saves/restores anything, rename the file to vgic-v2-cpuif-proxy.c to more accurately describe the logic in this file. Reviewed-by: Andre Przywara Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- virt/kvm/arm/hyp/vgic-v2-sr.c | 80 ------------------------------------------- 1 file changed, 80 deletions(-) delete mode 100644 virt/kvm/arm/hyp/vgic-v2-sr.c (limited to 'virt') diff --git a/virt/kvm/arm/hyp/vgic-v2-sr.c b/virt/kvm/arm/hyp/vgic-v2-sr.c deleted file mode 100644 index 0bbafdfd4adb..000000000000 --- a/virt/kvm/arm/hyp/vgic-v2-sr.c +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (C) 2012-2015 - ARM Ltd - * Author: Marc Zyngier - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include -#include - -#include -#include -#include - -#ifdef CONFIG_ARM64 -/* - * __vgic_v2_perform_cpuif_access -- perform a GICV access on behalf of the - * guest. - * - * @vcpu: the offending vcpu - * - * Returns: - * 1: GICV access successfully performed - * 0: Not a GICV access - * -1: Illegal GICV access - */ -int __hyp_text __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu) -{ - struct kvm *kvm = kern_hyp_va(vcpu->kvm); - struct vgic_dist *vgic = &kvm->arch.vgic; - phys_addr_t fault_ipa; - void __iomem *addr; - int rd; - - /* Build the full address */ - fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); - fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0); - - /* If not for GICV, move on */ - if (fault_ipa < vgic->vgic_cpu_base || - fault_ipa >= (vgic->vgic_cpu_base + KVM_VGIC_V2_CPU_SIZE)) - return 0; - - /* Reject anything but a 32bit access */ - if (kvm_vcpu_dabt_get_as(vcpu) != sizeof(u32)) - return -1; - - /* Not aligned? Don't bother */ - if (fault_ipa & 3) - return -1; - - rd = kvm_vcpu_dabt_get_rd(vcpu); - addr = kern_hyp_va((kern_hyp_va(&kvm_vgic_global_state))->vcpu_base_va); - addr += fault_ipa - vgic->vgic_cpu_base; - - if (kvm_vcpu_dabt_iswrite(vcpu)) { - u32 data = vcpu_data_guest_to_host(vcpu, - vcpu_get_reg(vcpu, rd), - sizeof(u32)); - writel_relaxed(data, addr); - } else { - u32 data = readl_relaxed(addr); - vcpu_set_reg(vcpu, rd, vcpu_data_host_to_guest(vcpu, data, - sizeof(u32))); - } - - return 1; -} -#endif -- cgit v1.2.3 From 771621b0e2f80629948e0dc627d18b6730778c52 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Wed, 4 Oct 2017 23:42:32 +0200 Subject: KVM: arm/arm64: Handle VGICv3 save/restore from the main VGIC code on VHE Just like we can program the GICv2 hypervisor control interface directly from the core vgic code, we can do the same for the GICv3 hypervisor control interface on VHE systems. We do this by simply calling the save/restore functions when we have VHE and we can then get rid of the save/restore function calls from the VHE world switch function. One caveat is that we now write GICv3 system register state before the potential early exit path in the run loop, and because we sync back state in the early exit path, we have to ensure that we read a consistent GIC state from the sync path, even though we have never actually run the guest with the newly written GIC state. We solve this by inserting an ISB in the early exit path. Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- virt/kvm/arm/arm.c | 1 + virt/kvm/arm/vgic/vgic.c | 21 +++++++++++++++++++-- 2 files changed, 20 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 09dbee56ed8f..dba629c5f8ac 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -717,6 +717,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) || kvm_request_pending(vcpu)) { vcpu->mode = OUTSIDE_GUEST_MODE; + isb(); /* Ensure work in x_flush_hwstate is committed */ kvm_pmu_sync_hwstate(vcpu); if (static_branch_unlikely(&userspace_irqchip_in_use)) kvm_timer_sync_hwstate(vcpu); diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index 12e2a28f437e..eaab4a616ecf 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "vgic.h" @@ -749,10 +750,22 @@ next: vgic_clear_lr(vcpu, count); } +static inline bool can_access_vgic_from_kernel(void) +{ + /* + * GICv2 can always be accessed from the kernel because it is + * memory-mapped, and VHE systems can access GICv3 EL2 system + * registers. + */ + return !static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif) || has_vhe(); +} + static inline void vgic_save_state(struct kvm_vcpu *vcpu) { if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) vgic_v2_save_state(vcpu); + else + __vgic_v3_save_state(vcpu); } /* Sync back the hardware VGIC state into our emulation after a guest's run. */ @@ -760,7 +773,8 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - vgic_save_state(vcpu); + if (can_access_vgic_from_kernel()) + vgic_save_state(vcpu); WARN_ON(vgic_v4_sync_hwstate(vcpu)); @@ -777,6 +791,8 @@ static inline void vgic_restore_state(struct kvm_vcpu *vcpu) { if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) vgic_v2_restore_state(vcpu); + else + __vgic_v3_restore_state(vcpu); } /* Flush our emulation state into the GIC hardware before entering the guest. */ @@ -803,7 +819,8 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock); out: - vgic_restore_state(vcpu); + if (can_access_vgic_from_kernel()) + vgic_restore_state(vcpu); } void kvm_vgic_load(struct kvm_vcpu *vcpu) -- cgit v1.2.3 From 923a2e30e5745a8f94f953f7aacaafd3d551e12d Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 5 Oct 2017 00:18:07 +0200 Subject: KVM: arm/arm64: Move VGIC APR save/restore to vgic put/load The APRs can only have bits set when the guest acknowledges an interrupt in the LR and can only have a bit cleared when the guest EOIs an interrupt in the LR. Therefore, if we have no LRs with any pending/active interrupts, the APR cannot change value and there is no need to clear it on every exit from the VM (hint: it will have already been cleared when we exited the guest the last time with the LRs all EOIed). The only case we need to take care of is when we migrate the VCPU away from a CPU or migrate a new VCPU onto a CPU, or when we return to userspace to capture the state of the VCPU for migration. To make sure this works, factor out the APR save/restore functionality into separate functions called from the VCPU (and by extension VGIC) put/load hooks. Reviewed-by: Marc Zyngier Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- virt/kvm/arm/hyp/vgic-v3-sr.c | 124 +++++++++++++++++++++++------------------- virt/kvm/arm/vgic/vgic-v2.c | 7 +-- virt/kvm/arm/vgic/vgic-v3.c | 5 ++ 3 files changed, 74 insertions(+), 62 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index 9abf2f3c12b5..437d7af08683 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c @@ -21,6 +21,7 @@ #include #include +#include #define vtr_to_max_lr_idx(v) ((v) & 0xf) #define vtr_to_nr_pre_bits(v) ((((u32)(v) >> 26) & 7) + 1) @@ -221,14 +222,11 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) if (used_lrs) { int i; - u32 nr_pre_bits; u32 elrsr; elrsr = read_gicreg(ICH_ELSR_EL2); write_gicreg(0, ICH_HCR_EL2); - val = read_gicreg(ICH_VTR_EL2); - nr_pre_bits = vtr_to_nr_pre_bits(val); for (i = 0; i < used_lrs; i++) { if (elrsr & (1 << i)) @@ -238,39 +236,10 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) __gic_v3_set_lr(0, i); } - - switch (nr_pre_bits) { - case 7: - cpu_if->vgic_ap0r[3] = __vgic_v3_read_ap0rn(3); - cpu_if->vgic_ap0r[2] = __vgic_v3_read_ap0rn(2); - case 6: - cpu_if->vgic_ap0r[1] = __vgic_v3_read_ap0rn(1); - default: - cpu_if->vgic_ap0r[0] = __vgic_v3_read_ap0rn(0); - } - - switch (nr_pre_bits) { - case 7: - cpu_if->vgic_ap1r[3] = __vgic_v3_read_ap1rn(3); - cpu_if->vgic_ap1r[2] = __vgic_v3_read_ap1rn(2); - case 6: - cpu_if->vgic_ap1r[1] = __vgic_v3_read_ap1rn(1); - default: - cpu_if->vgic_ap1r[0] = __vgic_v3_read_ap1rn(0); - } } else { if (static_branch_unlikely(&vgic_v3_cpuif_trap) || cpu_if->its_vpe.its_vm) write_gicreg(0, ICH_HCR_EL2); - - cpu_if->vgic_ap0r[0] = 0; - cpu_if->vgic_ap0r[1] = 0; - cpu_if->vgic_ap0r[2] = 0; - cpu_if->vgic_ap0r[3] = 0; - cpu_if->vgic_ap1r[0] = 0; - cpu_if->vgic_ap1r[1] = 0; - cpu_if->vgic_ap1r[2] = 0; - cpu_if->vgic_ap1r[3] = 0; } val = read_gicreg(ICC_SRE_EL2); @@ -287,8 +256,6 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; - u64 val; - u32 nr_pre_bits; int i; /* @@ -306,32 +273,9 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2); } - val = read_gicreg(ICH_VTR_EL2); - nr_pre_bits = vtr_to_nr_pre_bits(val); - if (used_lrs) { write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); - switch (nr_pre_bits) { - case 7: - __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[3], 3); - __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[2], 2); - case 6: - __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[1], 1); - default: - __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[0], 0); - } - - switch (nr_pre_bits) { - case 7: - __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[3], 3); - __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[2], 2); - case 6: - __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[1], 1); - default: - __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[0], 0); - } - for (i = 0; i < used_lrs; i++) __gic_v3_set_lr(cpu_if->vgic_lr[i], i); } else { @@ -364,6 +308,72 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) ICC_SRE_EL2); } +void __hyp_text __vgic_v3_save_aprs(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpu_if; + u64 val; + u32 nr_pre_bits; + + vcpu = kern_hyp_va(vcpu); + cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + + val = read_gicreg(ICH_VTR_EL2); + nr_pre_bits = vtr_to_nr_pre_bits(val); + + switch (nr_pre_bits) { + case 7: + cpu_if->vgic_ap0r[3] = __vgic_v3_read_ap0rn(3); + cpu_if->vgic_ap0r[2] = __vgic_v3_read_ap0rn(2); + case 6: + cpu_if->vgic_ap0r[1] = __vgic_v3_read_ap0rn(1); + default: + cpu_if->vgic_ap0r[0] = __vgic_v3_read_ap0rn(0); + } + + switch (nr_pre_bits) { + case 7: + cpu_if->vgic_ap1r[3] = __vgic_v3_read_ap1rn(3); + cpu_if->vgic_ap1r[2] = __vgic_v3_read_ap1rn(2); + case 6: + cpu_if->vgic_ap1r[1] = __vgic_v3_read_ap1rn(1); + default: + cpu_if->vgic_ap1r[0] = __vgic_v3_read_ap1rn(0); + } +} + +void __hyp_text __vgic_v3_restore_aprs(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpu_if; + u64 val; + u32 nr_pre_bits; + + vcpu = kern_hyp_va(vcpu); + cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + + val = read_gicreg(ICH_VTR_EL2); + nr_pre_bits = vtr_to_nr_pre_bits(val); + + switch (nr_pre_bits) { + case 7: + __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[3], 3); + __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[2], 2); + case 6: + __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[1], 1); + default: + __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[0], 0); + } + + switch (nr_pre_bits) { + case 7: + __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[3], 3); + __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[2], 2); + case 6: + __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[1], 1); + default: + __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[0], 0); + } +} + void __hyp_text __vgic_v3_init_lrs(void) { int max_lr_idx = vtr_to_max_lr_idx(read_gicreg(ICH_VTR_EL2)); diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index 1e5f3eb6973d..ca7cfee9f353 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -446,7 +446,6 @@ void vgic_v2_save_state(struct kvm_vcpu *vcpu) { struct kvm *kvm = vcpu->kvm; struct vgic_dist *vgic = &kvm->arch.vgic; - struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; void __iomem *base = vgic->vctrl_base; u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; @@ -454,11 +453,8 @@ void vgic_v2_save_state(struct kvm_vcpu *vcpu) return; if (used_lrs) { - cpu_if->vgic_apr = readl_relaxed(base + GICH_APR); save_lrs(vcpu, base); writel_relaxed(0, base + GICH_HCR); - } else { - cpu_if->vgic_apr = 0; } } @@ -476,7 +472,6 @@ void vgic_v2_restore_state(struct kvm_vcpu *vcpu) if (used_lrs) { writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR); - writel_relaxed(cpu_if->vgic_apr, base + GICH_APR); for (i = 0; i < used_lrs; i++) { writel_relaxed(cpu_if->vgic_lr[i], base + GICH_LR0 + (i * 4)); @@ -490,6 +485,7 @@ void vgic_v2_load(struct kvm_vcpu *vcpu) struct vgic_dist *vgic = &vcpu->kvm->arch.vgic; writel_relaxed(cpu_if->vgic_vmcr, vgic->vctrl_base + GICH_VMCR); + writel_relaxed(cpu_if->vgic_apr, vgic->vctrl_base + GICH_APR); } void vgic_v2_put(struct kvm_vcpu *vcpu) @@ -498,4 +494,5 @@ void vgic_v2_put(struct kvm_vcpu *vcpu) struct vgic_dist *vgic = &vcpu->kvm->arch.vgic; cpu_if->vgic_vmcr = readl_relaxed(vgic->vctrl_base + GICH_VMCR); + cpu_if->vgic_apr = readl_relaxed(vgic->vctrl_base + GICH_APR); } diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index b76e21f3e6bd..4bafcd1e6bb8 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -587,6 +588,8 @@ void vgic_v3_load(struct kvm_vcpu *vcpu) */ if (likely(cpu_if->vgic_sre)) kvm_call_hyp(__vgic_v3_write_vmcr, cpu_if->vgic_vmcr); + + kvm_call_hyp(__vgic_v3_restore_aprs, vcpu); } void vgic_v3_put(struct kvm_vcpu *vcpu) @@ -595,4 +598,6 @@ void vgic_v3_put(struct kvm_vcpu *vcpu) if (likely(cpu_if->vgic_sre)) cpu_if->vgic_vmcr = kvm_call_hyp(__vgic_v3_read_vmcr); + + kvm_call_hyp(__vgic_v3_save_aprs, vcpu); } -- cgit v1.2.3 From 2d0e63e030babe19c94b4453ef4b272c0aacd75a Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 5 Oct 2017 17:19:19 +0200 Subject: KVM: arm/arm64: Avoid VGICv3 save/restore on VHE with no IRQs We can finally get completely rid of any calls to the VGICv3 save/restore functions when the AP lists are empty on VHE systems. This requires carefully factoring out trap configuration from saving and restoring state, and carefully choosing what to do on the VHE and non-VHE path. One of the challenges is that we cannot save/restore the VMCR lazily because we can only write the VMCR when ICC_SRE_EL1.SRE is cleared when emulating a GICv2-on-GICv3, since otherwise all Group-0 interrupts end up being delivered as FIQ. To solve this problem, and still provide fast performance in the fast path of exiting a VM when no interrupts are pending (which also optimized the latency for actually delivering virtual interrupts coming from physical interrupts), we orchestrate a dance of only doing the activate/deactivate traps in vgic load/put for VHE systems (which can have ICC_SRE_EL1.SRE cleared when running in the host), and doing the configuration on every round-trip on non-VHE systems. Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier --- virt/kvm/arm/hyp/vgic-v3-sr.c | 120 +++++++++++++++++++++++++++--------------- virt/kvm/arm/vgic/vgic-v3.c | 6 +++ virt/kvm/arm/vgic/vgic.c | 9 ++-- 3 files changed, 87 insertions(+), 48 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index 437d7af08683..b13cbd41dbc3 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c @@ -209,15 +209,15 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; - u64 val; /* * Make sure stores to the GIC via the memory mapped interface - * are now visible to the system register interface. + * are now visible to the system register interface when reading the + * LRs, and when reading back the VMCR on non-VHE systems. */ - if (!cpu_if->vgic_sre) { - dsb(st); - cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2); + if (used_lrs || !has_vhe()) { + if (!cpu_if->vgic_sre) + dsb(st); } if (used_lrs) { @@ -226,7 +226,7 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) elrsr = read_gicreg(ICH_ELSR_EL2); - write_gicreg(0, ICH_HCR_EL2); + write_gicreg(cpu_if->vgic_hcr & ~ICH_HCR_EN, ICH_HCR_EL2); for (i = 0; i < used_lrs; i++) { if (elrsr & (1 << i)) @@ -236,19 +236,6 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) __gic_v3_set_lr(0, i); } - } else { - if (static_branch_unlikely(&vgic_v3_cpuif_trap) || - cpu_if->its_vpe.its_vm) - write_gicreg(0, ICH_HCR_EL2); - } - - val = read_gicreg(ICC_SRE_EL2); - write_gicreg(val | ICC_SRE_EL2_ENABLE, ICC_SRE_EL2); - - if (!cpu_if->vgic_sre) { - /* Make sure ENABLE is set at EL2 before setting SRE at EL1 */ - isb(); - write_gicreg(1, ICC_SRE_EL1); } } @@ -258,6 +245,31 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; int i; + if (used_lrs) { + write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); + + for (i = 0; i < used_lrs; i++) + __gic_v3_set_lr(cpu_if->vgic_lr[i], i); + } + + /* + * Ensure that writes to the LRs, and on non-VHE systems ensure that + * the write to the VMCR in __vgic_v3_activate_traps(), will have + * reached the (re)distributors. This ensure the guest will read the + * correct values from the memory-mapped interface. + */ + if (used_lrs || !has_vhe()) { + if (!cpu_if->vgic_sre) { + isb(); + dsb(sy); + } + } +} + +void __hyp_text __vgic_v3_activate_traps(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + /* * VFIQEn is RES1 if ICC_SRE_EL1.SRE is 1. This causes a * Group0 interrupt (as generated in GICv2 mode) to be @@ -265,47 +277,69 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) * consequences. So we must make sure that ICC_SRE_EL1 has * been actually programmed with the value we want before * starting to mess with the rest of the GIC, and VMCR_EL2 in - * particular. + * particular. This logic must be called before + * __vgic_v3_restore_state(). */ if (!cpu_if->vgic_sre) { write_gicreg(0, ICC_SRE_EL1); isb(); write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2); - } - if (used_lrs) { - write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); - for (i = 0; i < used_lrs; i++) - __gic_v3_set_lr(cpu_if->vgic_lr[i], i); - } else { - /* - * If we need to trap system registers, we must write - * ICH_HCR_EL2 anyway, even if no interrupts are being - * injected. Same thing if GICv4 is used, as VLPI - * delivery is gated by ICH_HCR_EL2.En. - */ - if (static_branch_unlikely(&vgic_v3_cpuif_trap) || - cpu_if->its_vpe.its_vm) - write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); + if (has_vhe()) { + /* + * Ensure that the write to the VMCR will have reached + * the (re)distributors. This ensure the guest will + * read the correct values from the memory-mapped + * interface. + */ + isb(); + dsb(sy); + } } /* - * Ensures that the above will have reached the - * (re)distributors. This ensure the guest will read the - * correct values from the memory-mapped interface. + * Prevent the guest from touching the GIC system registers if + * SRE isn't enabled for GICv3 emulation. */ + write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE, + ICC_SRE_EL2); + + /* + * If we need to trap system registers, we must write + * ICH_HCR_EL2 anyway, even if no interrupts are being + * injected, + */ + if (static_branch_unlikely(&vgic_v3_cpuif_trap) || + cpu_if->its_vpe.its_vm) + write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); +} + +void __hyp_text __vgic_v3_deactivate_traps(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + u64 val; + if (!cpu_if->vgic_sre) { + cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2); + } + + val = read_gicreg(ICC_SRE_EL2); + write_gicreg(val | ICC_SRE_EL2_ENABLE, ICC_SRE_EL2); + + if (!cpu_if->vgic_sre) { + /* Make sure ENABLE is set at EL2 before setting SRE at EL1 */ isb(); - dsb(sy); + write_gicreg(1, ICC_SRE_EL1); } /* - * Prevent the guest from touching the GIC system registers if - * SRE isn't enabled for GICv3 emulation. + * If we were trapping system registers, we enabled the VGIC even if + * no interrupts were being injected, and we disable it again here. */ - write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE, - ICC_SRE_EL2); + if (static_branch_unlikely(&vgic_v3_cpuif_trap) || + cpu_if->its_vpe.its_vm) + write_gicreg(0, ICH_HCR_EL2); } void __hyp_text __vgic_v3_save_aprs(struct kvm_vcpu *vcpu) diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 4bafcd1e6bb8..4200657694f0 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -590,6 +590,9 @@ void vgic_v3_load(struct kvm_vcpu *vcpu) kvm_call_hyp(__vgic_v3_write_vmcr, cpu_if->vgic_vmcr); kvm_call_hyp(__vgic_v3_restore_aprs, vcpu); + + if (has_vhe()) + __vgic_v3_activate_traps(vcpu); } void vgic_v3_put(struct kvm_vcpu *vcpu) @@ -600,4 +603,7 @@ void vgic_v3_put(struct kvm_vcpu *vcpu) cpu_if->vgic_vmcr = kvm_call_hyp(__vgic_v3_read_vmcr); kvm_call_hyp(__vgic_v3_save_aprs, vcpu); + + if (has_vhe()) + __vgic_v3_deactivate_traps(vcpu); } diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index eaab4a616ecf..4c4b011685b4 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -773,15 +773,15 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - if (can_access_vgic_from_kernel()) - vgic_save_state(vcpu); - WARN_ON(vgic_v4_sync_hwstate(vcpu)); /* An empty ap_list_head implies used_lrs == 0 */ if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) return; + if (can_access_vgic_from_kernel()) + vgic_save_state(vcpu); + if (vgic_cpu->used_lrs) vgic_fold_lr_state(vcpu); vgic_prune_ap_list(vcpu); @@ -810,7 +810,7 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) * this. */ if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) - goto out; + return; DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); @@ -818,7 +818,6 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) vgic_flush_lr_state(vcpu); spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock); -out: if (can_access_vgic_from_kernel()) vgic_restore_state(vcpu); } -- cgit v1.2.3 From b4ef04995d33a1ecfec3bdfb7776c1ac36dbe87d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 3 Dec 2017 20:04:51 +0000 Subject: KVM: arm/arm64: Demote HYP VA range display to being a debug feature Displaying the HYP VA information is slightly counterproductive when using VA randomization. Turn it into a debug feature only, and adjust the last displayed value to reflect the top of RAM instead of ~0. Acked-by: Christoffer Dall Acked-by: Catalin Marinas Signed-off-by: Marc Zyngier --- virt/kvm/arm/mmu.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 9ebff8e530f9..3eaf46828caa 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -1810,9 +1810,10 @@ int kvm_mmu_init(void) */ BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK); - kvm_info("IDMAP page: %lx\n", hyp_idmap_start); - kvm_info("HYP VA range: %lx:%lx\n", - kern_hyp_va(PAGE_OFFSET), kern_hyp_va(~0UL)); + kvm_debug("IDMAP page: %lx\n", hyp_idmap_start); + kvm_debug("HYP VA range: %lx:%lx\n", + kern_hyp_va(PAGE_OFFSET), + kern_hyp_va((unsigned long)high_memory - 1)); if (hyp_idmap_start >= kern_hyp_va(PAGE_OFFSET) && hyp_idmap_start < kern_hyp_va(~0UL) && -- cgit v1.2.3 From 807a378425d27d377fdf181d1dba91539bac9294 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 4 Dec 2017 16:26:09 +0000 Subject: KVM: arm/arm64: Move ioremap calls to create_hyp_io_mappings Both HYP io mappings call ioremap, followed by create_hyp_io_mappings. Let's move the ioremap call into create_hyp_io_mappings itself, which simplifies the code a bit and allows for further refactoring. Reviewed-by: Christoffer Dall Acked-by: Catalin Marinas Signed-off-by: Marc Zyngier --- virt/kvm/arm/mmu.c | 24 ++++++++++++++---------- virt/kvm/arm/vgic/vgic-v2.c | 31 ++++++++----------------------- 2 files changed, 22 insertions(+), 33 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 3eaf46828caa..2bc32bdf932a 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -709,26 +709,30 @@ int create_hyp_mappings(void *from, void *to, pgprot_t prot) } /** - * create_hyp_io_mappings - duplicate a kernel IO mapping into Hyp mode - * @from: The kernel start VA of the range - * @to: The kernel end VA of the range (exclusive) + * create_hyp_io_mappings - Map IO into both kernel and HYP * @phys_addr: The physical start address which gets mapped + * @size: Size of the region being mapped + * @kaddr: Kernel VA for this mapping * * The resulting HYP VA is the same as the kernel VA, modulo * HYP_PAGE_OFFSET. */ -int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr) +int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size, + void __iomem **kaddr) { - unsigned long start = kern_hyp_va((unsigned long)from); - unsigned long end = kern_hyp_va((unsigned long)to); + unsigned long start, end; - if (is_kernel_in_hyp_mode()) + *kaddr = ioremap(phys_addr, size); + if (!*kaddr) + return -ENOMEM; + + if (is_kernel_in_hyp_mode()) { return 0; + } - /* Check for a valid kernel IO mapping */ - if (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1)) - return -EINVAL; + start = kern_hyp_va((unsigned long)*kaddr); + end = kern_hyp_va((unsigned long)*kaddr + size); return __create_hyp_mappings(hyp_pgd, PTRS_PER_PGD, start, end, __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE); } diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index ca7cfee9f353..66532f2f0e40 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -360,16 +360,10 @@ int vgic_v2_probe(const struct gic_kvm_info *info) if (!PAGE_ALIGNED(info->vcpu.start) || !PAGE_ALIGNED(resource_size(&info->vcpu))) { kvm_info("GICV region size/alignment is unsafe, using trapping (reduced performance)\n"); - kvm_vgic_global_state.vcpu_base_va = ioremap(info->vcpu.start, - resource_size(&info->vcpu)); - if (!kvm_vgic_global_state.vcpu_base_va) { - kvm_err("Cannot ioremap GICV\n"); - return -ENOMEM; - } - ret = create_hyp_io_mappings(kvm_vgic_global_state.vcpu_base_va, - kvm_vgic_global_state.vcpu_base_va + resource_size(&info->vcpu), - info->vcpu.start); + ret = create_hyp_io_mappings(info->vcpu.start, + resource_size(&info->vcpu), + &kvm_vgic_global_state.vcpu_base_va); if (ret) { kvm_err("Cannot map GICV into hyp\n"); goto out; @@ -378,26 +372,17 @@ int vgic_v2_probe(const struct gic_kvm_info *info) static_branch_enable(&vgic_v2_cpuif_trap); } - kvm_vgic_global_state.vctrl_base = ioremap(info->vctrl.start, - resource_size(&info->vctrl)); - if (!kvm_vgic_global_state.vctrl_base) { - kvm_err("Cannot ioremap GICH\n"); - ret = -ENOMEM; + ret = create_hyp_io_mappings(info->vctrl.start, + resource_size(&info->vctrl), + &kvm_vgic_global_state.vctrl_base); + if (ret) { + kvm_err("Cannot map VCTRL into hyp\n"); goto out; } vtr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VTR); kvm_vgic_global_state.nr_lr = (vtr & 0x3f) + 1; - ret = create_hyp_io_mappings(kvm_vgic_global_state.vctrl_base, - kvm_vgic_global_state.vctrl_base + - resource_size(&info->vctrl), - info->vctrl.start); - if (ret) { - kvm_err("Cannot map VCTRL into hyp\n"); - goto out; - } - ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2); if (ret) { kvm_err("Cannot register GICv2 KVM device\n"); -- cgit v1.2.3 From 1bb32a44aea1fe73c6f84e466a45ae559ef74559 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 4 Dec 2017 16:43:23 +0000 Subject: KVM: arm/arm64: Keep GICv2 HYP VAs in kvm_vgic_global_state As we're about to change the way we map devices at HYP, we need to move away from kern_hyp_va on an IO address. One way of achieving this is to store the VAs in kvm_vgic_global_state, and use that directly from the HYP code. This requires a small change to create_hyp_io_mappings so that it can also return a HYP VA. We take this opportunity to nuke the vctrl_base field in the emulated distributor, as it is not used anymore. Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier --- virt/kvm/arm/mmu.c | 20 +++++++++++++++----- virt/kvm/arm/vgic/vgic-init.c | 6 ------ virt/kvm/arm/vgic/vgic-v2.c | 26 ++++++++++++-------------- 3 files changed, 27 insertions(+), 25 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 2bc32bdf932a..52b0ee31ebee 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -713,28 +713,38 @@ int create_hyp_mappings(void *from, void *to, pgprot_t prot) * @phys_addr: The physical start address which gets mapped * @size: Size of the region being mapped * @kaddr: Kernel VA for this mapping - * - * The resulting HYP VA is the same as the kernel VA, modulo - * HYP_PAGE_OFFSET. + * @haddr: HYP VA for this mapping */ int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size, - void __iomem **kaddr) + void __iomem **kaddr, + void __iomem **haddr) { unsigned long start, end; + int ret; *kaddr = ioremap(phys_addr, size); if (!*kaddr) return -ENOMEM; if (is_kernel_in_hyp_mode()) { + *haddr = *kaddr; return 0; } start = kern_hyp_va((unsigned long)*kaddr); end = kern_hyp_va((unsigned long)*kaddr + size); - return __create_hyp_mappings(hyp_pgd, PTRS_PER_PGD, start, end, + ret = __create_hyp_mappings(hyp_pgd, PTRS_PER_PGD, start, end, __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE); + + if (ret) { + iounmap(*kaddr); + *kaddr = NULL; + return ret; + } + + *haddr = (void __iomem *)start; + return 0; } /** diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index 3e8209a07585..68378fe17a0e 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -166,12 +166,6 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) kvm->arch.vgic.in_kernel = true; kvm->arch.vgic.vgic_model = type; - /* - * kvm_vgic_global_state.vctrl_base is set on vgic probe (kvm_arch_init) - * it is stored in distributor struct for asm save/restore purpose - */ - kvm->arch.vgic.vctrl_base = kvm_vgic_global_state.vctrl_base; - kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; kvm->arch.vgic.vgic_redist_base = VGIC_ADDR_UNDEF; diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index 66532f2f0e40..96e144f9d93d 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -363,7 +363,8 @@ int vgic_v2_probe(const struct gic_kvm_info *info) ret = create_hyp_io_mappings(info->vcpu.start, resource_size(&info->vcpu), - &kvm_vgic_global_state.vcpu_base_va); + &kvm_vgic_global_state.vcpu_base_va, + &kvm_vgic_global_state.vcpu_hyp_va); if (ret) { kvm_err("Cannot map GICV into hyp\n"); goto out; @@ -374,7 +375,8 @@ int vgic_v2_probe(const struct gic_kvm_info *info) ret = create_hyp_io_mappings(info->vctrl.start, resource_size(&info->vctrl), - &kvm_vgic_global_state.vctrl_base); + &kvm_vgic_global_state.vctrl_base, + &kvm_vgic_global_state.vctrl_hyp); if (ret) { kvm_err("Cannot map VCTRL into hyp\n"); goto out; @@ -429,9 +431,7 @@ static void save_lrs(struct kvm_vcpu *vcpu, void __iomem *base) void vgic_v2_save_state(struct kvm_vcpu *vcpu) { - struct kvm *kvm = vcpu->kvm; - struct vgic_dist *vgic = &kvm->arch.vgic; - void __iomem *base = vgic->vctrl_base; + void __iomem *base = kvm_vgic_global_state.vctrl_base; u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; if (!base) @@ -445,10 +445,8 @@ void vgic_v2_save_state(struct kvm_vcpu *vcpu) void vgic_v2_restore_state(struct kvm_vcpu *vcpu) { - struct kvm *kvm = vcpu->kvm; - struct vgic_dist *vgic = &kvm->arch.vgic; struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; - void __iomem *base = vgic->vctrl_base; + void __iomem *base = kvm_vgic_global_state.vctrl_base; u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; int i; @@ -467,17 +465,17 @@ void vgic_v2_restore_state(struct kvm_vcpu *vcpu) void vgic_v2_load(struct kvm_vcpu *vcpu) { struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; - struct vgic_dist *vgic = &vcpu->kvm->arch.vgic; - writel_relaxed(cpu_if->vgic_vmcr, vgic->vctrl_base + GICH_VMCR); - writel_relaxed(cpu_if->vgic_apr, vgic->vctrl_base + GICH_APR); + writel_relaxed(cpu_if->vgic_vmcr, + kvm_vgic_global_state.vctrl_base + GICH_VMCR); + writel_relaxed(cpu_if->vgic_apr, + kvm_vgic_global_state.vctrl_base + GICH_APR); } void vgic_v2_put(struct kvm_vcpu *vcpu) { struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; - struct vgic_dist *vgic = &vcpu->kvm->arch.vgic; - cpu_if->vgic_vmcr = readl_relaxed(vgic->vctrl_base + GICH_VMCR); - cpu_if->vgic_apr = readl_relaxed(vgic->vctrl_base + GICH_APR); + cpu_if->vgic_vmcr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VMCR); + cpu_if->vgic_apr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_APR); } -- cgit v1.2.3 From 46fef158f10d255dfe47cf3a0ab5f0965b66b1da Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 12 Mar 2018 14:25:10 +0000 Subject: KVM: arm/arm64: Fix idmap size and alignment Although the idmap section of KVM can only be at most 4kB and must be aligned on a 4kB boundary, the rest of the code expects it to be page aligned. Things get messy when tearing down the HYP page tables when PAGE_SIZE is 64K, and the idmap section isn't 64K aligned. Let's fix this by computing aligned boundaries that the HYP code will use. Acked-by: Catalin Marinas Reported-by: James Morse Reviewed-by: James Morse Signed-off-by: Marc Zyngier --- virt/kvm/arm/mmu.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'virt') diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 52b0ee31ebee..c41d03a8af93 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -1815,7 +1815,9 @@ int kvm_mmu_init(void) int err; hyp_idmap_start = kvm_virt_to_phys(__hyp_idmap_text_start); + hyp_idmap_start = ALIGN_DOWN(hyp_idmap_start, PAGE_SIZE); hyp_idmap_end = kvm_virt_to_phys(__hyp_idmap_text_end); + hyp_idmap_end = ALIGN(hyp_idmap_end, PAGE_SIZE); hyp_idmap_vector = kvm_virt_to_phys(__kvm_hyp_init); /* -- cgit v1.2.3 From 3ddd45565373604d4f49cb0496fc0168e3863c1f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 14 Mar 2018 15:17:33 +0000 Subject: KVM: arm64: Fix HYP idmap unmap when using 52bit PA Unmapping the idmap range using 52bit PA is quite broken, as we don't take into account the right number of PGD entries, and rely on PTRS_PER_PGD. The result is that pgd_index() truncates the address, and we end-up in the weed. Let's introduce a new unmap_hyp_idmap_range() that knows about this, together with a kvm_pgd_index() helper, which hides a bit of the complexity of the issue. Fixes: 98732d1b189b ("KVM: arm/arm64: fix HYP ID map extension to 52 bits") Reported-by: James Morse Reviewed-by: Catalin Marinas Reviewed-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- virt/kvm/arm/mmu.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index c41d03a8af93..f7fe724c14f4 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -479,7 +479,13 @@ static void unmap_hyp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end) clear_hyp_pgd_entry(pgd); } -static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size) +static unsigned int kvm_pgd_index(unsigned long addr, unsigned int ptrs_per_pgd) +{ + return (addr >> PGDIR_SHIFT) & (ptrs_per_pgd - 1); +} + +static void __unmap_hyp_range(pgd_t *pgdp, unsigned long ptrs_per_pgd, + phys_addr_t start, u64 size) { pgd_t *pgd; phys_addr_t addr = start, end = start + size; @@ -489,7 +495,7 @@ static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size) * We don't unmap anything from HYP, except at the hyp tear down. * Hence, we don't have to invalidate the TLBs here. */ - pgd = pgdp + pgd_index(addr); + pgd = pgdp + kvm_pgd_index(addr, ptrs_per_pgd); do { next = pgd_addr_end(addr, end); if (!pgd_none(*pgd)) @@ -497,6 +503,16 @@ static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size) } while (pgd++, addr = next, addr != end); } +static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size) +{ + __unmap_hyp_range(pgdp, PTRS_PER_PGD, start, size); +} + +static void unmap_hyp_idmap_range(pgd_t *pgdp, phys_addr_t start, u64 size) +{ + __unmap_hyp_range(pgdp, __kvm_idmap_ptrs_per_pgd(), start, size); +} + /** * free_hyp_pgds - free Hyp-mode page tables * @@ -512,13 +528,13 @@ void free_hyp_pgds(void) mutex_lock(&kvm_hyp_pgd_mutex); if (boot_hyp_pgd) { - unmap_hyp_range(boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE); + unmap_hyp_idmap_range(boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE); free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order); boot_hyp_pgd = NULL; } if (hyp_pgd) { - unmap_hyp_range(hyp_pgd, hyp_idmap_start, PAGE_SIZE); + unmap_hyp_idmap_range(hyp_pgd, hyp_idmap_start, PAGE_SIZE); unmap_hyp_range(hyp_pgd, kern_hyp_va(PAGE_OFFSET), (uintptr_t)high_memory - PAGE_OFFSET); unmap_hyp_range(hyp_pgd, kern_hyp_va(VMALLOC_START), @@ -634,7 +650,7 @@ static int __create_hyp_mappings(pgd_t *pgdp, unsigned long ptrs_per_pgd, addr = start & PAGE_MASK; end = PAGE_ALIGN(end); do { - pgd = pgdp + ((addr >> PGDIR_SHIFT) & (ptrs_per_pgd - 1)); + pgd = pgdp + kvm_pgd_index(addr, ptrs_per_pgd); if (pgd_none(*pgd)) { pud = pud_alloc_one(NULL, addr); -- cgit v1.2.3 From e3f019b37b580c3b954419212da26ac5db412a08 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 4 Dec 2017 17:04:38 +0000 Subject: KVM: arm/arm64: Move HYP IO VAs to the "idmap" range We so far mapped our HYP IO (which is essentially the GICv2 control registers) using the same method as for memory. It recently appeared that is a bit unsafe: We compute the HYP VA using the kern_hyp_va helper, but that helper is only designed to deal with kernel VAs coming from the linear map, and not from the vmalloc region... This could in turn cause some bad aliasing between the two, amplified by the upcoming VA randomisation. A solution is to come up with our very own basic VA allocator for MMIO. Since half of the HYP address space only contains a single page (the idmap), we have plenty to borrow from. Let's use the idmap as a base, and allocate downwards from it. GICv2 now lives on the other side of the great VA barrier. Acked-by: Catalin Marinas Signed-off-by: Marc Zyngier --- virt/kvm/arm/mmu.c | 73 +++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 59 insertions(+), 14 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index f7fe724c14f4..d8ea68b78e9c 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -43,6 +43,8 @@ static unsigned long hyp_idmap_start; static unsigned long hyp_idmap_end; static phys_addr_t hyp_idmap_vector; +static unsigned long io_map_base; + #define S2_PGD_SIZE (PTRS_PER_S2_PGD * sizeof(pgd_t)) #define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t)) @@ -518,27 +520,35 @@ static void unmap_hyp_idmap_range(pgd_t *pgdp, phys_addr_t start, u64 size) * * Assumes hyp_pgd is a page table used strictly in Hyp-mode and * therefore contains either mappings in the kernel memory area (above - * PAGE_OFFSET), or device mappings in the vmalloc range (from - * VMALLOC_START to VMALLOC_END). + * PAGE_OFFSET), or device mappings in the idmap range. * - * boot_hyp_pgd should only map two pages for the init code. + * boot_hyp_pgd should only map the idmap range, and is only used in + * the extended idmap case. */ void free_hyp_pgds(void) { + pgd_t *id_pgd; + mutex_lock(&kvm_hyp_pgd_mutex); + id_pgd = boot_hyp_pgd ? boot_hyp_pgd : hyp_pgd; + + if (id_pgd) { + /* In case we never called hyp_mmu_init() */ + if (!io_map_base) + io_map_base = hyp_idmap_start; + unmap_hyp_idmap_range(id_pgd, io_map_base, + hyp_idmap_start + PAGE_SIZE - io_map_base); + } + if (boot_hyp_pgd) { - unmap_hyp_idmap_range(boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE); free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order); boot_hyp_pgd = NULL; } if (hyp_pgd) { - unmap_hyp_idmap_range(hyp_pgd, hyp_idmap_start, PAGE_SIZE); unmap_hyp_range(hyp_pgd, kern_hyp_va(PAGE_OFFSET), (uintptr_t)high_memory - PAGE_OFFSET); - unmap_hyp_range(hyp_pgd, kern_hyp_va(VMALLOC_START), - VMALLOC_END - VMALLOC_START); free_pages((unsigned long)hyp_pgd, hyp_pgd_order); hyp_pgd = NULL; @@ -735,8 +745,9 @@ int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size, void __iomem **kaddr, void __iomem **haddr) { - unsigned long start, end; - int ret; + pgd_t *pgd = hyp_pgd; + unsigned long base; + int ret = 0; *kaddr = ioremap(phys_addr, size); if (!*kaddr) @@ -747,19 +758,52 @@ int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size, return 0; } + mutex_lock(&kvm_hyp_pgd_mutex); - start = kern_hyp_va((unsigned long)*kaddr); - end = kern_hyp_va((unsigned long)*kaddr + size); - ret = __create_hyp_mappings(hyp_pgd, PTRS_PER_PGD, start, end, - __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE); + /* + * This assumes that we we have enough space below the idmap + * page to allocate our VAs. If not, the check below will + * kick. A potential alternative would be to detect that + * overflow and switch to an allocation above the idmap. + * + * The allocated size is always a multiple of PAGE_SIZE. + */ + size = PAGE_ALIGN(size + offset_in_page(phys_addr)); + base = io_map_base - size; + /* + * Verify that BIT(VA_BITS - 1) hasn't been flipped by + * allocating the new area, as it would indicate we've + * overflowed the idmap/IO address range. + */ + if ((base ^ io_map_base) & BIT(VA_BITS - 1)) + ret = -ENOMEM; + else + io_map_base = base; + + mutex_unlock(&kvm_hyp_pgd_mutex); + + if (ret) + goto out; + + if (__kvm_cpu_uses_extended_idmap()) + pgd = boot_hyp_pgd; + + ret = __create_hyp_mappings(pgd, __kvm_idmap_ptrs_per_pgd(), + base, base + size, + __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE); + if (ret) + goto out; + + *haddr = (void __iomem *)base + offset_in_page(phys_addr); + +out: if (ret) { iounmap(*kaddr); *kaddr = NULL; return ret; } - *haddr = (void __iomem *)start; return 0; } @@ -1892,6 +1936,7 @@ int kvm_mmu_init(void) goto out; } + io_map_base = hyp_idmap_start; return 0; out: free_hyp_pgds(); -- cgit v1.2.3 From ed57cac83e05f2e93567e4b5c57ee58a1bf8a582 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 3 Dec 2017 18:22:49 +0000 Subject: arm64: KVM: Introduce EL2 VA randomisation The main idea behind randomising the EL2 VA is that we usually have a few spare bits between the most significant bit of the VA mask and the most significant bit of the linear mapping. Those bits could be a bunch of zeroes, and could be useful to move things around a bit. Of course, the more memory you have, the less randomisation you get... Alternatively, these bits could be the result of KASLR, in which case they are already random. But it would be nice to have a *different* randomization, just to make the job of a potential attacker a bit more difficult. Inserting these random bits is a bit involved. We don't have a spare register (short of rewriting all the kern_hyp_va call sites), and the immediate we want to insert is too random to be used with the ORR instruction. The best option I could come up with is the following sequence: and x0, x0, #va_mask ror x0, x0, #first_random_bit add x0, x0, #(random & 0xfff) add x0, x0, #(random >> 12), lsl #12 ror x0, x0, #(63 - first_random_bit) making it a fairly long sequence, but one that a decent CPU should be able to execute without breaking a sweat. It is of course NOPed out on VHE. The last 4 instructions can also be turned into NOPs if it appears that there is no free bits to use. Acked-by: Catalin Marinas Reviewed-by: James Morse Signed-off-by: Marc Zyngier --- virt/kvm/arm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index d8ea68b78e9c..394f41a65732 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -1892,7 +1892,7 @@ int kvm_mmu_init(void) kern_hyp_va((unsigned long)high_memory - 1)); if (hyp_idmap_start >= kern_hyp_va(PAGE_OFFSET) && - hyp_idmap_start < kern_hyp_va(~0UL) && + hyp_idmap_start < kern_hyp_va((unsigned long)high_memory - 1) && hyp_idmap_start != (unsigned long)__hyp_idmap_text_start) { /* * The idmap page is intersecting with the VA space, -- cgit v1.2.3 From dc2e4633ff39c9fb5ef3a5443721f0e3499b36ed Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 13 Feb 2018 11:00:29 +0000 Subject: arm/arm64: KVM: Introduce EL2-specific executable mappings Until now, all EL2 executable mappings were derived from their EL1 VA. Since we want to decouple the vectors mapping from the rest of the hypervisor, we need to be able to map some text somewhere else. The "idmap" region (for lack of a better name) is ideally suited for this, as we have a huge range that hardly has anything in it. Let's extend the IO allocator to also deal with executable mappings, thus providing the required feature. Acked-by: Catalin Marinas Reviewed-by: Andrew Jones Signed-off-by: Marc Zyngier --- virt/kvm/arm/mmu.c | 80 ++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 59 insertions(+), 21 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 394f41a65732..7f6a944db23d 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -734,30 +734,13 @@ int create_hyp_mappings(void *from, void *to, pgprot_t prot) return 0; } -/** - * create_hyp_io_mappings - Map IO into both kernel and HYP - * @phys_addr: The physical start address which gets mapped - * @size: Size of the region being mapped - * @kaddr: Kernel VA for this mapping - * @haddr: HYP VA for this mapping - */ -int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size, - void __iomem **kaddr, - void __iomem **haddr) +static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size, + unsigned long *haddr, pgprot_t prot) { pgd_t *pgd = hyp_pgd; unsigned long base; int ret = 0; - *kaddr = ioremap(phys_addr, size); - if (!*kaddr) - return -ENOMEM; - - if (is_kernel_in_hyp_mode()) { - *haddr = *kaddr; - return 0; - } - mutex_lock(&kvm_hyp_pgd_mutex); /* @@ -791,19 +774,74 @@ int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size, ret = __create_hyp_mappings(pgd, __kvm_idmap_ptrs_per_pgd(), base, base + size, - __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE); + __phys_to_pfn(phys_addr), prot); if (ret) goto out; - *haddr = (void __iomem *)base + offset_in_page(phys_addr); + *haddr = base + offset_in_page(phys_addr); out: + return ret; +} + +/** + * create_hyp_io_mappings - Map IO into both kernel and HYP + * @phys_addr: The physical start address which gets mapped + * @size: Size of the region being mapped + * @kaddr: Kernel VA for this mapping + * @haddr: HYP VA for this mapping + */ +int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size, + void __iomem **kaddr, + void __iomem **haddr) +{ + unsigned long addr; + int ret; + + *kaddr = ioremap(phys_addr, size); + if (!*kaddr) + return -ENOMEM; + + if (is_kernel_in_hyp_mode()) { + *haddr = *kaddr; + return 0; + } + + ret = __create_hyp_private_mapping(phys_addr, size, + &addr, PAGE_HYP_DEVICE); if (ret) { iounmap(*kaddr); *kaddr = NULL; + *haddr = NULL; + return ret; + } + + *haddr = (void __iomem *)addr; + return 0; +} + +/** + * create_hyp_exec_mappings - Map an executable range into HYP + * @phys_addr: The physical start address which gets mapped + * @size: Size of the region being mapped + * @haddr: HYP VA for this mapping + */ +int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size, + void **haddr) +{ + unsigned long addr; + int ret; + + BUG_ON(is_kernel_in_hyp_mode()); + + ret = __create_hyp_private_mapping(phys_addr, size, + &addr, PAGE_HYP_EXEC); + if (ret) { + *haddr = NULL; return ret; } + *haddr = (void *)addr; return 0; } -- cgit v1.2.3 From 67b5b673ad4d469186edddecfe59909a9fd4b643 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 9 Mar 2018 14:59:40 +0000 Subject: KVM: arm/arm64: vgic: Disallow Active+Pending for level interrupts It was recently reported that VFIO mediated devices, and anything that VFIO exposes as level interrupts, do no strictly follow the expected logic of such interrupts as it only lowers the input line when the guest has EOId the interrupt at the GIC level, rather than when it Acked the interrupt at the device level. THe GIC's Active+Pending state is fundamentally incompatible with this behaviour, as it prevents KVM from observing the EOI, and in turn results in VFIO never dropping the line. This results in an interrupt storm in the guest, which it really never expected. As we cannot really change VFIO to follow the strict rules of level signalling, let's forbid the A+P state altogether, as it is in the end only an optimization. It ensures that we will transition via an invalid state, which we can use to notify VFIO of the EOI. Reviewed-by: Eric Auger Tested-by: Eric Auger Tested-by: Shunyong Yang Signed-off-by: Marc Zyngier --- virt/kvm/arm/vgic/vgic-v2.c | 54 +++++++++++++++++++++++++-------------------- virt/kvm/arm/vgic/vgic-v3.c | 54 +++++++++++++++++++++++++-------------------- 2 files changed, 60 insertions(+), 48 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index 8327ea932aea..45aa433f018f 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -105,12 +105,9 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) /* * Clear soft pending state when level irqs have been acked. - * Always regenerate the pending state. */ - if (irq->config == VGIC_CONFIG_LEVEL) { - if (!(val & GICH_LR_PENDING_BIT)) - irq->pending_latch = false; - } + if (irq->config == VGIC_CONFIG_LEVEL && !(val & GICH_LR_STATE)) + irq->pending_latch = false; /* * Level-triggered mapped IRQs are special because we only @@ -153,8 +150,35 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) { u32 val = irq->intid; + bool allow_pending = true; + + if (irq->active) + val |= GICH_LR_ACTIVE_BIT; + + if (irq->hw) { + val |= GICH_LR_HW; + val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT; + /* + * Never set pending+active on a HW interrupt, as the + * pending state is kept at the physical distributor + * level. + */ + if (irq->active) + allow_pending = false; + } else { + if (irq->config == VGIC_CONFIG_LEVEL) { + val |= GICH_LR_EOI; - if (irq_is_pending(irq)) { + /* + * Software resampling doesn't work very well + * if we allow P+A, so let's not do that. + */ + if (irq->active) + allow_pending = false; + } + } + + if (allow_pending && irq_is_pending(irq)) { val |= GICH_LR_PENDING_BIT; if (irq->config == VGIC_CONFIG_EDGE) @@ -171,24 +195,6 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) } } - if (irq->active) - val |= GICH_LR_ACTIVE_BIT; - - if (irq->hw) { - val |= GICH_LR_HW; - val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT; - /* - * Never set pending+active on a HW interrupt, as the - * pending state is kept at the physical distributor - * level. - */ - if (irq->active && irq_is_pending(irq)) - val &= ~GICH_LR_PENDING_BIT; - } else { - if (irq->config == VGIC_CONFIG_LEVEL) - val |= GICH_LR_EOI; - } - /* * Level-triggered mapped IRQs are special because we only observe * rising edges as input to the VGIC. We therefore lower the line diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index e17f61a434fa..8195f52ae6f0 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -97,12 +97,9 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) /* * Clear soft pending state when level irqs have been acked. - * Always regenerate the pending state. */ - if (irq->config == VGIC_CONFIG_LEVEL) { - if (!(val & ICH_LR_PENDING_BIT)) - irq->pending_latch = false; - } + if (irq->config == VGIC_CONFIG_LEVEL && !(val & ICH_LR_STATE)) + irq->pending_latch = false; /* * Level-triggered mapped IRQs are special because we only @@ -136,8 +133,35 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) { u32 model = vcpu->kvm->arch.vgic.vgic_model; u64 val = irq->intid; + bool allow_pending = true; + + if (irq->active) + val |= ICH_LR_ACTIVE_BIT; + + if (irq->hw) { + val |= ICH_LR_HW; + val |= ((u64)irq->hwintid) << ICH_LR_PHYS_ID_SHIFT; + /* + * Never set pending+active on a HW interrupt, as the + * pending state is kept at the physical distributor + * level. + */ + if (irq->active) + allow_pending = false; + } else { + if (irq->config == VGIC_CONFIG_LEVEL) { + val |= ICH_LR_EOI; - if (irq_is_pending(irq)) { + /* + * Software resampling doesn't work very well + * if we allow P+A, so let's not do that. + */ + if (irq->active) + allow_pending = false; + } + } + + if (allow_pending && irq_is_pending(irq)) { val |= ICH_LR_PENDING_BIT; if (irq->config == VGIC_CONFIG_EDGE) @@ -155,24 +179,6 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) } } - if (irq->active) - val |= ICH_LR_ACTIVE_BIT; - - if (irq->hw) { - val |= ICH_LR_HW; - val |= ((u64)irq->hwintid) << ICH_LR_PHYS_ID_SHIFT; - /* - * Never set pending+active on a HW interrupt, as the - * pending state is kept at the physical distributor - * level. - */ - if (irq->active && irq_is_pending(irq)) - val &= ~ICH_LR_PENDING_BIT; - } else { - if (irq->config == VGIC_CONFIG_LEVEL) - val |= ICH_LR_EOI; - } - /* * Level-triggered mapped IRQs are special because we only observe * rising edges as input to the VGIC. We therefore lower the line -- cgit v1.2.3 From 7d8b44c54e0c7c8f688e3a07f17e6083f849f01f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 23 Mar 2018 14:57:09 +0000 Subject: KVM: arm/arm64: vgic-its: Fix potential overrun in vgic_copy_lpi_list vgic_copy_lpi_list() parses the LPI list and picks LPIs targeting a given vcpu. We allocate the array containing the intids before taking the lpi_list_lock, which means we can have an array size that is not equal to the number of LPIs. This is particularly obvious when looking at the path coming from vgic_enable_lpis, which is not a command, and thus can run in parallel with commands: vcpu 0: vcpu 1: vgic_enable_lpis its_sync_lpi_pending_table vgic_copy_lpi_list intids = kmalloc_array(irq_count) MAPI(lpi targeting vcpu 0) list_for_each_entry(lpi_list_head) intids[i++] = irq->intid; At that stage, we will happily overrun the intids array. Boo. An easy fix is is to break once the array is full. The MAPI command will update the config anyway, and we won't miss a thing. We also make sure that lpi_list_count is read exactly once, so that further updates of that value will not affect the array bound check. Cc: stable@vger.kernel.org Fixes: ccb1d791ab9e ("KVM: arm64: vgic-its: Fix pending table sync") Reviewed-by: Andre Przywara Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier --- virt/kvm/arm/vgic/vgic-its.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 465095355666..a8f07243aa9f 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -316,21 +316,24 @@ static int vgic_copy_lpi_list(struct kvm_vcpu *vcpu, u32 **intid_ptr) struct vgic_dist *dist = &vcpu->kvm->arch.vgic; struct vgic_irq *irq; u32 *intids; - int irq_count = dist->lpi_list_count, i = 0; + int irq_count, i = 0; /* - * We use the current value of the list length, which may change - * after the kmalloc. We don't care, because the guest shouldn't - * change anything while the command handling is still running, - * and in the worst case we would miss a new IRQ, which one wouldn't - * expect to be covered by this command anyway. + * There is an obvious race between allocating the array and LPIs + * being mapped/unmapped. If we ended up here as a result of a + * command, we're safe (locks are held, preventing another + * command). If coming from another path (such as enabling LPIs), + * we must be careful not to overrun the array. */ + irq_count = READ_ONCE(dist->lpi_list_count); intids = kmalloc_array(irq_count, sizeof(intids[0]), GFP_KERNEL); if (!intids) return -ENOMEM; spin_lock(&dist->lpi_list_lock); list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) { + if (i == irq_count) + break; /* We don't need to "get" the IRQ, as we hold the list lock. */ if (irq->target_vcpu != vcpu) continue; -- cgit v1.2.3