diff options
Diffstat (limited to 'drivers/iommu/intel/irq_remapping.c')
-rw-r--r-- | drivers/iommu/intel/irq_remapping.c | 71 |
1 files changed, 42 insertions, 29 deletions
diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index ad795c772f21..3bc2a03cceca 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -25,11 +25,6 @@ #include "../irq_remapping.h" #include "../iommu-pages.h" -enum irq_mode { - IRQ_REMAPPING, - IRQ_POSTING, -}; - struct ioapic_scope { struct intel_iommu *iommu; unsigned int id; @@ -49,8 +44,8 @@ struct irq_2_iommu { u16 irte_index; u16 sub_handle; u8 irte_mask; - enum irq_mode mode; bool posted_msi; + bool posted_vcpu; }; struct intel_ir_data { @@ -138,7 +133,6 @@ static int alloc_irte(struct intel_iommu *iommu, irq_iommu->irte_index = index; irq_iommu->sub_handle = 0; irq_iommu->irte_mask = mask; - irq_iommu->mode = IRQ_REMAPPING; } raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags); @@ -193,8 +187,6 @@ static int modify_irte(struct irq_2_iommu *irq_iommu, rc = qi_flush_iec(iommu, index, 0); - /* Update iommu mode according to the IRTE mode */ - irq_iommu->mode = irte->pst ? IRQ_POSTING : IRQ_REMAPPING; raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags); return rc; @@ -1169,7 +1161,26 @@ static void intel_ir_reconfigure_irte_posted(struct irq_data *irqd) static inline void intel_ir_reconfigure_irte_posted(struct irq_data *irqd) {} #endif -static void intel_ir_reconfigure_irte(struct irq_data *irqd, bool force) +static void __intel_ir_reconfigure_irte(struct irq_data *irqd, bool force_host) +{ + struct intel_ir_data *ir_data = irqd->chip_data; + + /* + * Don't modify IRTEs for IRQs that are being posted to vCPUs if the + * host CPU affinity changes. + */ + if (ir_data->irq_2_iommu.posted_vcpu && !force_host) + return; + + ir_data->irq_2_iommu.posted_vcpu = false; + + if (ir_data->irq_2_iommu.posted_msi) + intel_ir_reconfigure_irte_posted(irqd); + else + modify_irte(&ir_data->irq_2_iommu, &ir_data->irte_entry); +} + +static void intel_ir_reconfigure_irte(struct irq_data *irqd, bool force_host) { struct intel_ir_data *ir_data = irqd->chip_data; struct irte *irte = &ir_data->irte_entry; @@ -1182,10 +1193,7 @@ static void intel_ir_reconfigure_irte(struct irq_data *irqd, bool force) irte->vector = cfg->vector; irte->dest_id = IRTE_DEST(cfg->dest_apicid); - if (ir_data->irq_2_iommu.posted_msi) - intel_ir_reconfigure_irte_posted(irqd); - else if (force || ir_data->irq_2_iommu.mode == IRQ_REMAPPING) - modify_irte(&ir_data->irq_2_iommu, irte); + __intel_ir_reconfigure_irte(irqd, force_host); } /* @@ -1240,7 +1248,7 @@ static int intel_ir_set_vcpu_affinity(struct irq_data *data, void *info) /* stop posting interrupts, back to the default mode */ if (!vcpu_pi_info) { - modify_irte(&ir_data->irq_2_iommu, &ir_data->irte_entry); + __intel_ir_reconfigure_irte(data, true); } else { struct irte irte_pi; @@ -1263,6 +1271,7 @@ static int intel_ir_set_vcpu_affinity(struct irq_data *data, void *info) irte_pi.pda_h = (vcpu_pi_info->pi_desc_addr >> 32) & ~(-1UL << PDA_HIGH_BIT); + ir_data->irq_2_iommu.posted_vcpu = true; modify_irte(&ir_data->irq_2_iommu, &irte_pi); } @@ -1278,43 +1287,44 @@ static struct irq_chip intel_ir_chip = { }; /* - * With posted MSIs, all vectors are multiplexed into a single notification - * vector. Devices MSIs are then dispatched in a demux loop where - * EOIs can be coalesced as well. + * With posted MSIs, the MSI vectors are multiplexed into a single notification + * vector, and only the notification vector is sent to the APIC IRR. Device + * MSIs are then dispatched in a demux loop that harvests the MSIs from the + * CPU's Posted Interrupt Request bitmap. I.e. Posted MSIs never get sent to + * the APIC IRR, and thus do not need an EOI. The notification handler instead + * performs a single EOI after processing the PIR. * - * "INTEL-IR-POST" IRQ chip does not do EOI on ACK, thus the dummy irq_ack() - * function. Instead EOI is performed by the posted interrupt notification - * handler. + * Note! Pending SMP/CPU affinity changes, which are per MSI, must still be + * honored, only the APIC EOI is omitted. * * For the example below, 3 MSIs are coalesced into one CPU notification. Only - * one apic_eoi() is needed. + * one apic_eoi() is needed, but each MSI needs to process pending changes to + * its CPU affinity. * * __sysvec_posted_msi_notification() * irq_enter(); * handle_edge_irq() * irq_chip_ack_parent() - * dummy(); // No EOI + * irq_move_irq(); // No EOI * handle_irq_event() * driver_handler() * handle_edge_irq() * irq_chip_ack_parent() - * dummy(); // No EOI + * irq_move_irq(); // No EOI * handle_irq_event() * driver_handler() * handle_edge_irq() * irq_chip_ack_parent() - * dummy(); // No EOI + * irq_move_irq(); // No EOI * handle_irq_event() * driver_handler() * apic_eoi() * irq_exit() + * */ - -static void dummy_ack(struct irq_data *d) { } - static struct irq_chip intel_ir_chip_post_msi = { .name = "INTEL-IR-POST", - .irq_ack = dummy_ack, + .irq_ack = irq_move_irq, .irq_set_affinity = intel_ir_set_affinity, .irq_compose_msi_msg = intel_ir_compose_msi_msg, .irq_set_vcpu_affinity = intel_ir_set_vcpu_affinity, @@ -1489,6 +1499,9 @@ static void intel_irq_remapping_deactivate(struct irq_domain *domain, struct intel_ir_data *data = irq_data->chip_data; struct irte entry; + WARN_ON_ONCE(data->irq_2_iommu.posted_vcpu); + data->irq_2_iommu.posted_vcpu = false; + memset(&entry, 0, sizeof(entry)); modify_irte(&data->irq_2_iommu, &entry); } |