summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86_64/kernel/entry.S3
-rw-r--r--arch/x86_64/kernel/i8259.c6
-rw-r--r--arch/x86_64/kernel/io_apic.c78
-rw-r--r--include/asm-x86_64/hw_irq.h9
4 files changed, 88 insertions, 8 deletions
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index 9f5dac64aa8f..ed4350ced3d0 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -675,6 +675,9 @@ END(invalidate_interrupt\num)
ENTRY(call_function_interrupt)
apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
END(call_function_interrupt)
+ENTRY(irq_move_cleanup_interrupt)
+ apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
+END(irq_move_cleanup_interrupt)
#endif
ENTRY(apic_timer_interrupt)
diff --git a/arch/x86_64/kernel/i8259.c b/arch/x86_64/kernel/i8259.c
index 45d85630196a..21d95b747437 100644
--- a/arch/x86_64/kernel/i8259.c
+++ b/arch/x86_64/kernel/i8259.c
@@ -450,6 +450,7 @@ void spurious_interrupt(void);
void error_interrupt(void);
void reschedule_interrupt(void);
void call_function_interrupt(void);
+void irq_move_cleanup_interrupt(void);
void invalidate_interrupt0(void);
void invalidate_interrupt1(void);
void invalidate_interrupt2(void);
@@ -537,7 +538,10 @@ void __init init_IRQ(void)
/* IPI for generic function call */
set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
-#endif
+
+ /* Low priority IPI to cleanup after moving an irq */
+ set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
+#endif
set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
set_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index 8dede0bd2267..48593f6b708f 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -36,6 +36,7 @@
#include <acpi/acpi_bus.h>
#endif
+#include <asm/idle.h>
#include <asm/io.h>
#include <asm/smp.h>
#include <asm/desc.h>
@@ -49,7 +50,10 @@
struct irq_cfg {
cpumask_t domain;
+ cpumask_t old_domain;
+ unsigned move_cleanup_count;
u8 vector;
+ u8 move_in_progress : 1;
};
/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
@@ -652,7 +656,6 @@ static int __assign_irq_vector(int irq, cpumask_t mask)
* 0x80, because int 0x80 is hm, kind of importantish. ;)
*/
static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
- cpumask_t old_mask = CPU_MASK_NONE;
unsigned int old_vector;
int cpu;
struct irq_cfg *cfg;
@@ -663,18 +666,20 @@ static int __assign_irq_vector(int irq, cpumask_t mask)
/* Only try and allocate irqs on cpus that are present */
cpus_and(mask, mask, cpu_online_map);
+ if ((cfg->move_in_progress) || cfg->move_cleanup_count)
+ return -EBUSY;
+
old_vector = cfg->vector;
if (old_vector) {
cpumask_t tmp;
cpus_and(tmp, cfg->domain, mask);
if (!cpus_empty(tmp))
return 0;
- cpus_and(old_mask, cfg->domain, cpu_online_map);
}
for_each_cpu_mask(cpu, mask) {
cpumask_t domain, new_mask;
- int new_cpu, old_cpu;
+ int new_cpu;
int vector, offset;
domain = vector_allocation_domain(cpu);
@@ -699,8 +704,10 @@ next:
/* Found one! */
current_vector = vector;
current_offset = offset;
- for_each_cpu_mask(old_cpu, old_mask)
- per_cpu(vector_irq, old_cpu)[old_vector] = -1;
+ if (old_vector) {
+ cfg->move_in_progress = 1;
+ cfg->old_domain = cfg->domain;
+ }
for_each_cpu_mask(new_cpu, new_mask)
per_cpu(vector_irq, new_cpu)[vector] = irq;
cfg->vector = vector;
@@ -1360,8 +1367,68 @@ static int ioapic_retrigger_irq(unsigned int irq)
* races.
*/
+#ifdef CONFIG_SMP
+asmlinkage void smp_irq_move_cleanup_interrupt(void)
+{
+ unsigned vector, me;
+ ack_APIC_irq();
+ exit_idle();
+ irq_enter();
+
+ me = smp_processor_id();
+ for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
+ unsigned int irq;
+ struct irq_desc *desc;
+ struct irq_cfg *cfg;
+ irq = __get_cpu_var(vector_irq)[vector];
+ if (irq >= NR_IRQS)
+ continue;
+
+ desc = irq_desc + irq;
+ cfg = irq_cfg + irq;
+ spin_lock(&desc->lock);
+ if (!cfg->move_cleanup_count)
+ goto unlock;
+
+ if ((vector == cfg->vector) && cpu_isset(me, cfg->domain))
+ goto unlock;
+
+ __get_cpu_var(vector_irq)[vector] = -1;
+ cfg->move_cleanup_count--;
+unlock:
+ spin_unlock(&desc->lock);
+ }
+
+ irq_exit();
+}
+
+static void irq_complete_move(unsigned int irq)
+{
+ struct irq_cfg *cfg = irq_cfg + irq;
+ unsigned vector, me;
+
+ if (likely(!cfg->move_in_progress))
+ return;
+
+ vector = ~get_irq_regs()->orig_rax;
+ me = smp_processor_id();
+ if ((vector == cfg->vector) &&
+ cpu_isset(smp_processor_id(), cfg->domain)) {
+ cpumask_t cleanup_mask;
+
+ cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
+ cfg->move_cleanup_count = cpus_weight(cleanup_mask);
+ send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+ cfg->move_in_progress = 0;
+ }
+}
+#else
+static inline void irq_complete_move(unsigned int irq) {}
+#endif
+
static void ack_apic_edge(unsigned int irq)
{
+ irq_complete_move(irq);
move_native_irq(irq);
ack_APIC_irq();
}
@@ -1370,6 +1437,7 @@ static void ack_apic_level(unsigned int irq)
{
int do_unmask_irq = 0;
+ irq_complete_move(irq);
#if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE)
/* If we are moving the irq we need to mask it */
if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) {
diff --git a/include/asm-x86_64/hw_irq.h b/include/asm-x86_64/hw_irq.h
index dc395edc2f2a..2e4b7a5ed1c4 100644
--- a/include/asm-x86_64/hw_irq.h
+++ b/include/asm-x86_64/hw_irq.h
@@ -32,10 +32,15 @@
#define IA32_SYSCALL_VECTOR 0x80
+/* Reserve the lowest usable priority level 0x20 - 0x2f for triggering
+ * cleanup after irq migration.
+ */
+#define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR
+
/*
* Vectors 0x20-0x2f are used for ISA interrupts.
*/
-#define IRQ0_VECTOR FIRST_EXTERNAL_VECTOR
+#define IRQ0_VECTOR FIRST_EXTERNAL_VECTOR + 0x10
#define IRQ1_VECTOR IRQ0_VECTOR + 1
#define IRQ2_VECTOR IRQ0_VECTOR + 2
#define IRQ3_VECTOR IRQ0_VECTOR + 3
@@ -82,7 +87,7 @@
/*
* First APIC vector available to drivers: (vectors 0x30-0xee)
- * we start at 0x31 to spread out vectors evenly between priority
+ * we start at 0x41 to spread out vectors evenly between priority
* levels. (0x80 is the syscall vector)
*/
#define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 2)