From e0c1e9bf81badc7ba59e120d6218101903d5d103 Mon Sep 17 00:00:00 2001
From: Kimball Murray <kimball.murray@gmail.com>
Date: Mon, 8 May 2006 15:17:16 +0200
Subject: [PATCH] x86_64: avoid IRQ0 ioapic pin collision

The patch addresses a problem with ACPI SCI interrupt entry, which gets
re-used, and the IRQ is assigned to another unrelated device.  The patch
corrects the code such that SCI IRQ is skipped and duplicate entry is
avoided.  Second issue came up with VIA chipset, the problem was caused by
original patch assigning IRQs starting 16 and up.  The VIA chipset uses
4-bit IRQ register for internal interrupt routing, and therefore cannot
handle IRQ numbers assigned to its devices.  The patch corrects this
problem by allowing PCI IRQs below 16.

Cc: len.brown@intel.com

Signed-off by: Natalie Protasevich <Natalie.Protasevich@unisys.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/x86_64/kernel/io_apic.c |  5 +++++
 arch/x86_64/kernel/mpparse.c | 12 +++++++++++-
 2 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'arch/x86_64/kernel')

diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index 77b4c608cca0..0de3ea938830 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -1777,6 +1777,8 @@ static inline void unlock_ExtINT_logic(void)
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
+int timer_uses_ioapic_pin_0;
+
 /*
  * This code may look a bit paranoid, but it's supposed to cooperate with
  * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
@@ -1814,6 +1816,9 @@ static inline void check_timer(void)
 	pin2  = ioapic_i8259.pin;
 	apic2 = ioapic_i8259.apic;
 
+	if (pin1 == 0)
+		timer_uses_ioapic_pin_0 = 1;
+
 	apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
 		vector, apic1, pin1, apic2, pin2);
 
diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c
index b17cf3eba359..083da7e606b1 100644
--- a/arch/x86_64/kernel/mpparse.c
+++ b/arch/x86_64/kernel/mpparse.c
@@ -968,7 +968,17 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity)
 		 */
 		int irq = gsi;
 		if (gsi < MAX_GSI_NUM) {
-			if (gsi > 15)
+			/*
+			 * Retain the VIA chipset work-around (gsi > 15), but
+			 * avoid a problem where the 8254 timer (IRQ0) is setup
+			 * via an override (so it's not on pin 0 of the ioapic),
+			 * and at the same time, the pin 0 interrupt is a PCI
+			 * type.  The gsi > 15 test could cause these two pins
+			 * to be shared as IRQ0, and they are not shareable.
+			 * So test for this condition, and if necessary, avoid
+			 * the pin collision.
+			 */
+			if (gsi > 15 || (gsi == 0 && !timer_uses_ioapic_pin_0))
 				gsi = pci_irq++;
 			/*
 			 * Don't assign IRQ used by ACPI SCI
-- 
cgit v1.2.3


From 5192d84e4c32cd335fd572e5ff0712041f45f7e7 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Mon, 8 May 2006 15:17:19 +0200
Subject: [PATCH] x86_64: Check for too many northbridges in IOMMU code

The IOMMU code can only deal with 8 northbridges. Error out when
more are found.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/x86_64/kernel/pci-gart.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'arch/x86_64/kernel')

diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c
index 9d3d76c85ae7..2480d3f08a47 100644
--- a/arch/x86_64/kernel/pci-gart.c
+++ b/arch/x86_64/kernel/pci-gart.c
@@ -639,6 +639,14 @@ static int __init pci_iommu_init(void)
 		return -1;
 	}
 
+	i = 0;
+	for_all_nb(dev)
+		i++;
+	if (i > MAX_NB) {
+		printk(KERN_ERR "PCI-GART: Too many northbridges (%ld). Disabled\n", i);
+		return -1;
+	}
+
 	printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n");
 	aper_size = info.aper_size * 1024 * 1024;	
 	iommu_size = check_iommu_size(info.aper_base, aper_size); 
-- 
cgit v1.2.3


From cdc60a4c8e71c4bcf67e83fac6c0cabd0ff19bfe Mon Sep 17 00:00:00 2001
From: Corey Minyard <minyard@acm.org>
Date: Mon, 8 May 2006 15:17:22 +0200
Subject: [PATCH] x86_64: fix die_lock nesting

I noticed this when poking around in this area.

The oops_begin() function in x86_64 would only conditionally claim
the die_lock if the call is nested, but oops_end() would always
release the spinlock. This patch adds a nest count for the die lock
so that the release of the lock is only done on the final oops_end().

Signed-off-by: Corey Minyard <minyard@acm.org>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/x86_64/kernel/traps.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'arch/x86_64/kernel')

diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index 2700b1375c1f..0ebb281aa178 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -385,6 +385,7 @@ void out_of_line_bug(void)
 
 static DEFINE_SPINLOCK(die_lock);
 static int die_owner = -1;
+static unsigned int die_nest_count;
 
 unsigned __kprobes long oops_begin(void)
 {
@@ -399,6 +400,7 @@ unsigned __kprobes long oops_begin(void)
 		else
 			spin_lock(&die_lock);
 	}
+	die_nest_count++;
 	die_owner = cpu;
 	console_verbose();
 	bust_spinlocks(1);
@@ -409,7 +411,13 @@ void __kprobes oops_end(unsigned long flags)
 { 
 	die_owner = -1;
 	bust_spinlocks(0);
-	spin_unlock_irqrestore(&die_lock, flags);
+	die_nest_count--;
+	if (die_nest_count)
+		/* We still own the lock */
+		local_irq_restore(flags);
+	else
+		/* Nest count reaches zero, release the lock. */
+		spin_unlock_irqrestore(&die_lock, flags);
 	if (panic_on_oops)
 		panic("Oops");
 }
-- 
cgit v1.2.3


From 8b1ffe9550e71224c43d8c754245bd76f4ea9bb8 Mon Sep 17 00:00:00 2001
From: Corey Minyard <minyard@acm.org>
Date: Mon, 8 May 2006 15:17:25 +0200
Subject: [PATCH] x86_64: add nmi_exit to die_nmi

Playing with NMI watchdog on x86_64, I discovered that it didn't
do what I expected.  It always panic-ed, even when it didn't
happen from interrupt context.  This patch solves that
problem for me.  Also, in this case, do_exit() will be called
with interrupts disabled, I believe.  Would it be wise to also
call local_irq_enable() after nmi_exit()?
[Yes I added it -AK]

Currently, on x86_64, any NMI watchdog timeout will cause a panic
because the irq count will always be set to be in an interrupt
when do_exit() is called from die_nmi().  If we add nmi_exit() to
the die_nmi() call (since the nmi will never exit "normally")
it seems to solve this problem.  The following small program
can be used to trigger the NMI watchdog to reproduce this:
  main ()
  {
        iopl(3);
        for (;;) asm("cli");
  }

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/x86_64/kernel/traps.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86_64/kernel')

diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index 0ebb281aa178..6b87268c5c2e 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -472,6 +472,8 @@ void __kprobes die_nmi(char *str, struct pt_regs *regs)
 		panic("nmi watchdog");
 	printk("console shuts up ...\n");
 	oops_end(flags);
+	nmi_exit();
+	local_irq_enable();
 	do_exit(SIGSEGV);
 }
 
-- 
cgit v1.2.3


From ac71d12c990526b01ef6cfe50907ef8530a30331 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Mon, 8 May 2006 15:17:28 +0200
Subject: [PATCH] x86_64: Avoid EBDA area in early boot allocator

Based on analysis&patch from Robert Hentosch

Observed on a Dell PE6850 with 16GB

The problem occurs very early on, when the kernel allocates space for the
temporary memory map called bootmap. The bootmap overlaps the EBDA region.
EBDA region is not historically reserved in the e820 mapping. When the
bootmap is freed it marks the EBDA region as usable.

If you notice in setup.c there is already code to work around the EBDA
in reserve_ebda_region(), this check however occurs after the bootmap
is allocated and doesn't prevent the bootmap from using this range.

AK: I redid the original patch. Thanks also to Jan Beulich for
spotting some mistakes.

Cc: Robert_Hentosch@dell.com
Cc: jbeulich@novell.com
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/x86_64/kernel/e820.c  |  6 ++++++
 arch/x86_64/kernel/setup.c | 30 ++++++++++++++++++++++--------
 2 files changed, 28 insertions(+), 8 deletions(-)

(limited to 'arch/x86_64/kernel')

diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c
index 62776c07cff1..222b5b46d2b2 100644
--- a/arch/x86_64/kernel/e820.c
+++ b/arch/x86_64/kernel/e820.c
@@ -76,6 +76,12 @@ static inline int bad_addr(unsigned long *addrp, unsigned long size)
 		*addrp = __pa_symbol(&_end);
 		return 1;
 	}
+
+	if (last >= ebda_addr && addr < ebda_addr + ebda_size) {
+		*addrp = ebda_addr + ebda_size;
+		return 1;
+	}
+
 	/* XXX ramdisk image here? */ 
 	return 0;
 } 
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index ebc3c33b1c6c..f0870bef24d1 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -571,17 +571,28 @@ static inline void copy_edd(void)
 #endif
 
 #define EBDA_ADDR_POINTER 0x40E
-static void __init reserve_ebda_region(void)
+
+unsigned __initdata ebda_addr;
+unsigned __initdata ebda_size;
+
+static void discover_ebda(void)
 {
-	unsigned int addr;
-	/** 
+	/*
 	 * there is a real-mode segmented pointer pointing to the 
 	 * 4K EBDA area at 0x40E
 	 */
-	addr = *(unsigned short *)phys_to_virt(EBDA_ADDR_POINTER);
-	addr <<= 4;
-	if (addr)
-		reserve_bootmem_generic(addr, PAGE_SIZE);
+	ebda_addr = *(unsigned short *)EBDA_ADDR_POINTER;
+	ebda_addr <<= 4;
+
+	ebda_size = *(unsigned short *)(unsigned long)ebda_addr;
+
+	/* Round EBDA up to pages */
+	if (ebda_size == 0)
+		ebda_size = 1;
+	ebda_size <<= 10;
+	ebda_size = round_up(ebda_size + (ebda_addr & ~PAGE_MASK), PAGE_SIZE);
+	if (ebda_size > 64*1024)
+		ebda_size = 64*1024;
 }
 
 void __init setup_arch(char **cmdline_p)
@@ -627,6 +638,8 @@ void __init setup_arch(char **cmdline_p)
 
 	check_efer();
 
+	discover_ebda();
+
 	init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT));
 
 	dmi_scan_machine();
@@ -669,7 +682,8 @@ void __init setup_arch(char **cmdline_p)
 	reserve_bootmem_generic(0, PAGE_SIZE);
 
 	/* reserve ebda region */
-	reserve_ebda_region();
+	if (ebda_addr)
+		reserve_bootmem_generic(ebda_addr, ebda_size);
 
 #ifdef CONFIG_SMP
 	/*
-- 
cgit v1.2.3