diff options
Diffstat (limited to 'arch')
-rw-r--r-- | arch/arm64/include/asm/kvm_host.h | 5 | ||||
-rw-r--r-- | arch/arm64/include/asm/mmu.h | 11 | ||||
-rw-r--r-- | arch/arm64/kernel/cpu_errata.c | 2 | ||||
-rw-r--r-- | arch/arm64/kernel/image-vars.h | 4 | ||||
-rw-r--r-- | arch/arm64/kernel/pi/map_kernel.c | 25 | ||||
-rw-r--r-- | arch/arm64/kvm/arm.c | 5 | ||||
-rw-r--r-- | arch/riscv/include/asm/cacheflush.h | 15 | ||||
-rw-r--r-- | arch/riscv/kernel/probes/uprobes.c | 10 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 6 | ||||
-rw-r--r-- | arch/x86/kvm/svm/avic.c | 68 | ||||
-rw-r--r-- | arch/x86/kvm/trace.h | 13 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/posted_intr.c | 28 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 28 |
13 files changed, 124 insertions, 96 deletions
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index e98cfe7855a6..08ba91e6fb03 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1588,4 +1588,9 @@ void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val); #define kvm_has_s1poe(k) \ (kvm_has_feat((k), ID_AA64MMFR3_EL1, S1POE, IMP)) +static inline bool kvm_arch_has_irq_bypass(void) +{ + return true; +} + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 30a29e88994b..6e8aa8e72601 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -94,17 +94,6 @@ static inline bool kaslr_requires_kpti(void) return false; } - /* - * Systems affected by Cavium erratum 24756 are incompatible - * with KPTI. - */ - if (IS_ENABLED(CONFIG_CAVIUM_ERRATUM_27456)) { - extern const struct midr_range cavium_erratum_27456_cpus[]; - - if (is_midr_in_range_list(cavium_erratum_27456_cpus)) - return false; - } - return true; } diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index b55f5f705750..6b0ad5070d3e 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -335,7 +335,7 @@ static const struct midr_range cavium_erratum_23154_cpus[] = { #endif #ifdef CONFIG_CAVIUM_ERRATUM_27456 -const struct midr_range cavium_erratum_27456_cpus[] = { +static const struct midr_range cavium_erratum_27456_cpus[] = { /* Cavium ThunderX, T88 pass 1.x - 2.1 */ MIDR_RANGE(MIDR_THUNDERX, 0, 0, 1, 1), /* Cavium ThunderX, T81 pass 1.0 */ diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 5e3c4b58f279..2004b4f41ade 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -47,10 +47,6 @@ PROVIDE(__pi_id_aa64smfr0_override = id_aa64smfr0_override); PROVIDE(__pi_id_aa64zfr0_override = id_aa64zfr0_override); PROVIDE(__pi_arm64_sw_feature_override = arm64_sw_feature_override); PROVIDE(__pi_arm64_use_ng_mappings = arm64_use_ng_mappings); -#ifdef CONFIG_CAVIUM_ERRATUM_27456 -PROVIDE(__pi_cavium_erratum_27456_cpus = cavium_erratum_27456_cpus); -PROVIDE(__pi_is_midr_in_range_list = is_midr_in_range_list); -#endif PROVIDE(__pi__ctype = _ctype); PROVIDE(__pi_memstart_offset_seed = memstart_offset_seed); diff --git a/arch/arm64/kernel/pi/map_kernel.c b/arch/arm64/kernel/pi/map_kernel.c index e57b043f324b..c6650cfe706c 100644 --- a/arch/arm64/kernel/pi/map_kernel.c +++ b/arch/arm64/kernel/pi/map_kernel.c @@ -207,6 +207,29 @@ static void __init map_fdt(u64 fdt) dsb(ishst); } +/* + * PI version of the Cavium Eratum 27456 detection, which makes it + * impossible to use non-global mappings. + */ +static bool __init ng_mappings_allowed(void) +{ + static const struct midr_range cavium_erratum_27456_cpus[] __initconst = { + /* Cavium ThunderX, T88 pass 1.x - 2.1 */ + MIDR_RANGE(MIDR_THUNDERX, 0, 0, 1, 1), + /* Cavium ThunderX, T81 pass 1.0 */ + MIDR_REV(MIDR_THUNDERX_81XX, 0, 0), + {}, + }; + + for (const struct midr_range *r = cavium_erratum_27456_cpus; r->model; r++) { + if (midr_is_cpu_model_range(read_cpuid_id(), r->model, + r->rv_min, r->rv_max)) + return false; + } + + return true; +} + asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt) { static char const chosen_str[] __initconst = "/chosen"; @@ -246,7 +269,7 @@ asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt) u64 kaslr_seed = kaslr_early_init(fdt, chosen); if (kaslr_seed && kaslr_requires_kpti()) - arm64_use_ng_mappings = true; + arm64_use_ng_mappings = ng_mappings_allowed(); kaslr_offset |= kaslr_seed & ~(MIN_KIMG_ALIGN - 1); } diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 68fec8c95fee..19ca57def629 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -2743,11 +2743,6 @@ bool kvm_arch_irqchip_in_kernel(struct kvm *kvm) return irqchip_in_kernel(kvm); } -bool kvm_arch_has_irq_bypass(void) -{ - return true; -} - int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons, struct irq_bypass_producer *prod) { diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h index 8de73f91bfa3..b59ffeb668d6 100644 --- a/arch/riscv/include/asm/cacheflush.h +++ b/arch/riscv/include/asm/cacheflush.h @@ -34,11 +34,6 @@ static inline void flush_dcache_page(struct page *page) flush_dcache_folio(page_folio(page)); } -/* - * RISC-V doesn't have an instruction to flush parts of the instruction cache, - * so instead we just flush the whole thing. - */ -#define flush_icache_range(start, end) flush_icache_all() #define flush_icache_user_page(vma, pg, addr, len) \ do { \ if (vma->vm_flags & VM_EXEC) \ @@ -78,6 +73,16 @@ void flush_icache_mm(struct mm_struct *mm, bool local); #endif /* CONFIG_SMP */ +/* + * RISC-V doesn't have an instruction to flush parts of the instruction cache, + * so instead we just flush the whole thing. + */ +#define flush_icache_range flush_icache_range +static inline void flush_icache_range(unsigned long start, unsigned long end) +{ + flush_icache_all(); +} + extern unsigned int riscv_cbom_block_size; extern unsigned int riscv_cboz_block_size; void riscv_init_cbo_blocksizes(void); diff --git a/arch/riscv/kernel/probes/uprobes.c b/arch/riscv/kernel/probes/uprobes.c index 4b3dc8beaf77..cc15f7ca6cc1 100644 --- a/arch/riscv/kernel/probes/uprobes.c +++ b/arch/riscv/kernel/probes/uprobes.c @@ -167,6 +167,7 @@ void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, /* Initialize the slot */ void *kaddr = kmap_atomic(page); void *dst = kaddr + (vaddr & ~PAGE_MASK); + unsigned long start = (unsigned long)dst; memcpy(dst, src, len); @@ -176,13 +177,6 @@ void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, *(uprobe_opcode_t *)dst = __BUG_INSN_32; } + flush_icache_range(start, start + len); kunmap_atomic(kaddr); - - /* - * We probably need flush_icache_user_page() but it needs vma. - * This should work on most of architectures by default. If - * architecture needs to do something different it can define - * its own version of the function. - */ - flush_dcache_page(page); } diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3bdae454a959..7bc174a1f1cb 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -35,6 +35,7 @@ #include <asm/mtrr.h> #include <asm/msr-index.h> #include <asm/asm.h> +#include <asm/irq_remapping.h> #include <asm/kvm_page_track.h> #include <asm/kvm_vcpu_regs.h> #include <asm/reboot.h> @@ -2423,4 +2424,9 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages); */ #define KVM_EXIT_HYPERCALL_MBZ GENMASK_ULL(31, 1) +static inline bool kvm_arch_has_irq_bypass(void) +{ + return enable_apicv && irq_remapping_cap(IRQ_POSTING_CAP); +} + #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 65fd245a9953..7338879d1c0c 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -796,12 +796,15 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi) struct amd_svm_iommu_ir *ir; u64 entry; + if (WARN_ON_ONCE(!pi->ir_data)) + return -EINVAL; + /** * In some cases, the existing irte is updated and re-set, * so we need to check here if it's already been * added * to the ir_list. */ - if (pi->ir_data && (pi->prev_ga_tag != 0)) { + if (pi->prev_ga_tag) { struct kvm *kvm = svm->vcpu.kvm; u32 vcpu_id = AVIC_GATAG_TO_VCPUID(pi->prev_ga_tag); struct kvm_vcpu *prev_vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id); @@ -820,7 +823,7 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi) * Allocating new amd_iommu_pi_data, which will get * add to the per-vcpu ir_list. */ - ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL_ACCOUNT); + ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_ATOMIC | __GFP_ACCOUNT); if (!ir) { ret = -ENOMEM; goto out; @@ -896,10 +899,10 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq, { struct kvm_kernel_irq_routing_entry *e; struct kvm_irq_routing_table *irq_rt; + bool enable_remapped_mode = true; int idx, ret = 0; - if (!kvm_arch_has_assigned_device(kvm) || - !irq_remapping_cap(IRQ_POSTING_CAP)) + if (!kvm_arch_has_assigned_device(kvm) || !kvm_arch_has_irq_bypass()) return 0; pr_debug("SVM: %s: host_irq=%#x, guest_irq=%#x, set=%#x\n", @@ -933,6 +936,8 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq, kvm_vcpu_apicv_active(&svm->vcpu)) { struct amd_iommu_pi_data pi; + enable_remapped_mode = false; + /* Try to enable guest_mode in IRTE */ pi.base = __sme_set(page_to_phys(svm->avic_backing_page) & AVIC_HPA_MASK); @@ -951,33 +956,6 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq, */ if (!ret && pi.is_guest_mode) svm_ir_list_add(svm, &pi); - } else { - /* Use legacy mode in IRTE */ - struct amd_iommu_pi_data pi; - - /** - * Here, pi is used to: - * - Tell IOMMU to use legacy mode for this interrupt. - * - Retrieve ga_tag of prior interrupt remapping data. - */ - pi.prev_ga_tag = 0; - pi.is_guest_mode = false; - ret = irq_set_vcpu_affinity(host_irq, &pi); - - /** - * Check if the posted interrupt was previously - * setup with the guest_mode by checking if the ga_tag - * was cached. If so, we need to clean up the per-vcpu - * ir_list. - */ - if (!ret && pi.prev_ga_tag) { - int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag); - struct kvm_vcpu *vcpu; - - vcpu = kvm_get_vcpu_by_id(kvm, id); - if (vcpu) - svm_ir_list_del(to_svm(vcpu), &pi); - } } if (!ret && svm) { @@ -993,6 +971,34 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq, } ret = 0; + if (enable_remapped_mode) { + /* Use legacy mode in IRTE */ + struct amd_iommu_pi_data pi; + + /** + * Here, pi is used to: + * - Tell IOMMU to use legacy mode for this interrupt. + * - Retrieve ga_tag of prior interrupt remapping data. + */ + pi.prev_ga_tag = 0; + pi.is_guest_mode = false; + ret = irq_set_vcpu_affinity(host_irq, &pi); + + /** + * Check if the posted interrupt was previously + * setup with the guest_mode by checking if the ga_tag + * was cached. If so, we need to clean up the per-vcpu + * ir_list. + */ + if (!ret && pi.prev_ga_tag) { + int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag); + struct kvm_vcpu *vcpu; + + vcpu = kvm_get_vcpu_by_id(kvm, id); + if (vcpu) + svm_ir_list_del(to_svm(vcpu), &pi); + } + } out: srcu_read_unlock(&kvm->irq_srcu, idx); return ret; diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index ccda95e53f62..ba736cbb0587 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -11,6 +11,13 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM kvm +#ifdef CREATE_TRACE_POINTS +#define tracing_kvm_rip_read(vcpu) ({ \ + typeof(vcpu) __vcpu = vcpu; \ + __vcpu->arch.guest_state_protected ? 0 : kvm_rip_read(__vcpu); \ + }) +#endif + /* * Tracepoint for guest mode entry. */ @@ -28,7 +35,7 @@ TRACE_EVENT(kvm_entry, TP_fast_assign( __entry->vcpu_id = vcpu->vcpu_id; - __entry->rip = kvm_rip_read(vcpu); + __entry->rip = tracing_kvm_rip_read(vcpu); __entry->immediate_exit = force_immediate_exit; kvm_x86_call(get_entry_info)(vcpu, &__entry->intr_info, @@ -319,7 +326,7 @@ TRACE_EVENT(name, \ ), \ \ TP_fast_assign( \ - __entry->guest_rip = kvm_rip_read(vcpu); \ + __entry->guest_rip = tracing_kvm_rip_read(vcpu); \ __entry->isa = isa; \ __entry->vcpu_id = vcpu->vcpu_id; \ __entry->requests = READ_ONCE(vcpu->requests); \ @@ -423,7 +430,7 @@ TRACE_EVENT(kvm_page_fault, TP_fast_assign( __entry->vcpu_id = vcpu->vcpu_id; - __entry->guest_rip = kvm_rip_read(vcpu); + __entry->guest_rip = tracing_kvm_rip_read(vcpu); __entry->fault_address = fault_address; __entry->error_code = error_code; ), diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c index 51116fe69a50..d70e5b90087d 100644 --- a/arch/x86/kvm/vmx/posted_intr.c +++ b/arch/x86/kvm/vmx/posted_intr.c @@ -297,6 +297,7 @@ int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq, { struct kvm_kernel_irq_routing_entry *e; struct kvm_irq_routing_table *irq_rt; + bool enable_remapped_mode = true; struct kvm_lapic_irq irq; struct kvm_vcpu *vcpu; struct vcpu_data vcpu_info; @@ -335,21 +336,8 @@ int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq, kvm_set_msi_irq(kvm, e, &irq); if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) || - !kvm_irq_is_postable(&irq)) { - /* - * Make sure the IRTE is in remapped mode if - * we don't handle it in posted mode. - */ - ret = irq_set_vcpu_affinity(host_irq, NULL); - if (ret < 0) { - printk(KERN_INFO - "failed to back to remapped mode, irq: %u\n", - host_irq); - goto out; - } - + !kvm_irq_is_postable(&irq)) continue; - } vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu)); vcpu_info.vector = irq.vector; @@ -357,11 +345,12 @@ int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq, trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, e->gsi, vcpu_info.vector, vcpu_info.pi_desc_addr, set); - if (set) - ret = irq_set_vcpu_affinity(host_irq, &vcpu_info); - else - ret = irq_set_vcpu_affinity(host_irq, NULL); + if (!set) + continue; + enable_remapped_mode = false; + + ret = irq_set_vcpu_affinity(host_irq, &vcpu_info); if (ret < 0) { printk(KERN_INFO "%s: failed to update PI IRTE\n", __func__); @@ -369,6 +358,9 @@ int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq, } } + if (enable_remapped_mode) + ret = irq_set_vcpu_affinity(host_irq, NULL); + ret = 0; out: srcu_read_unlock(&kvm->irq_srcu, idx); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3712dde0bf9d..df5b99ea1f18 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -11098,7 +11098,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) /* * Profile KVM exit RIPs: */ - if (unlikely(prof_on == KVM_PROFILING)) { + if (unlikely(prof_on == KVM_PROFILING && + !vcpu->arch.guest_state_protected)) { unsigned long rip = kvm_rip_read(vcpu); profile_hit(KVM_PROFILING, (void *)rip); } @@ -13556,25 +13557,27 @@ bool kvm_arch_has_noncoherent_dma(struct kvm *kvm) } EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma); -bool kvm_arch_has_irq_bypass(void) -{ - return enable_apicv && irq_remapping_cap(IRQ_POSTING_CAP); -} - int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons, struct irq_bypass_producer *prod) { struct kvm_kernel_irqfd *irqfd = container_of(cons, struct kvm_kernel_irqfd, consumer); + struct kvm *kvm = irqfd->kvm; int ret; - irqfd->producer = prod; kvm_arch_start_assignment(irqfd->kvm); + + spin_lock_irq(&kvm->irqfds.lock); + irqfd->producer = prod; + ret = kvm_x86_call(pi_update_irte)(irqfd->kvm, prod->irq, irqfd->gsi, 1); if (ret) kvm_arch_end_assignment(irqfd->kvm); + spin_unlock_irq(&kvm->irqfds.lock); + + return ret; } @@ -13584,9 +13587,9 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, int ret; struct kvm_kernel_irqfd *irqfd = container_of(cons, struct kvm_kernel_irqfd, consumer); + struct kvm *kvm = irqfd->kvm; WARN_ON(irqfd->producer != prod); - irqfd->producer = NULL; /* * When producer of consumer is unregistered, we change back to @@ -13594,12 +13597,18 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, * when the irq is masked/disabled or the consumer side (KVM * int this case doesn't want to receive the interrupts. */ + spin_lock_irq(&kvm->irqfds.lock); + irqfd->producer = NULL; + ret = kvm_x86_call(pi_update_irte)(irqfd->kvm, prod->irq, irqfd->gsi, 0); if (ret) printk(KERN_INFO "irq bypass consumer (token %p) unregistration" " fails: %d\n", irqfd->consumer.token, ret); + spin_unlock_irq(&kvm->irqfds.lock); + + kvm_arch_end_assignment(irqfd->kvm); } @@ -13612,7 +13621,8 @@ int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq, bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *old, struct kvm_kernel_irq_routing_entry *new) { - if (new->type != KVM_IRQ_ROUTING_MSI) + if (old->type != KVM_IRQ_ROUTING_MSI || + new->type != KVM_IRQ_ROUTING_MSI) return true; return !!memcmp(&old->msi, &new->msi, sizeof(new->msi)); |