From e0c1e9bf81badc7ba59e120d6218101903d5d103 Mon Sep 17 00:00:00 2001 From: Kimball Murray Date: Mon, 8 May 2006 15:17:16 +0200 Subject: [PATCH] x86_64: avoid IRQ0 ioapic pin collision The patch addresses a problem with ACPI SCI interrupt entry, which gets re-used, and the IRQ is assigned to another unrelated device. The patch corrects the code such that SCI IRQ is skipped and duplicate entry is avoided. Second issue came up with VIA chipset, the problem was caused by original patch assigning IRQs starting 16 and up. The VIA chipset uses 4-bit IRQ register for internal interrupt routing, and therefore cannot handle IRQ numbers assigned to its devices. The patch corrects this problem by allowing PCI IRQs below 16. Cc: len.brown@intel.com Signed-off by: Natalie Protasevich Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/io_apic.c | 5 +++++ arch/x86_64/kernel/mpparse.c | 12 +++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index 77b4c608cca0..0de3ea938830 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c @@ -1777,6 +1777,8 @@ static inline void unlock_ExtINT_logic(void) spin_unlock_irqrestore(&ioapic_lock, flags); } +int timer_uses_ioapic_pin_0; + /* * This code may look a bit paranoid, but it's supposed to cooperate with * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ @@ -1814,6 +1816,9 @@ static inline void check_timer(void) pin2 = ioapic_i8259.pin; apic2 = ioapic_i8259.apic; + if (pin1 == 0) + timer_uses_ioapic_pin_0 = 1; + apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", vector, apic1, pin1, apic2, pin2); diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c index b17cf3eba359..083da7e606b1 100644 --- a/arch/x86_64/kernel/mpparse.c +++ b/arch/x86_64/kernel/mpparse.c @@ -968,7 +968,17 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity) */ int irq = gsi; if (gsi < MAX_GSI_NUM) { - if (gsi > 15) + /* + * Retain the VIA chipset work-around (gsi > 15), but + * avoid a problem where the 8254 timer (IRQ0) is setup + * via an override (so it's not on pin 0 of the ioapic), + * and at the same time, the pin 0 interrupt is a PCI + * type. The gsi > 15 test could cause these two pins + * to be shared as IRQ0, and they are not shareable. + * So test for this condition, and if necessary, avoid + * the pin collision. + */ + if (gsi > 15 || (gsi == 0 && !timer_uses_ioapic_pin_0)) gsi = pci_irq++; /* * Don't assign IRQ used by ACPI SCI -- cgit v1.2.3 From 5192d84e4c32cd335fd572e5ff0712041f45f7e7 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 8 May 2006 15:17:19 +0200 Subject: [PATCH] x86_64: Check for too many northbridges in IOMMU code The IOMMU code can only deal with 8 northbridges. Error out when more are found. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/pci-gart.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c index 9d3d76c85ae7..2480d3f08a47 100644 --- a/arch/x86_64/kernel/pci-gart.c +++ b/arch/x86_64/kernel/pci-gart.c @@ -639,6 +639,14 @@ static int __init pci_iommu_init(void) return -1; } + i = 0; + for_all_nb(dev) + i++; + if (i > MAX_NB) { + printk(KERN_ERR "PCI-GART: Too many northbridges (%ld). Disabled\n", i); + return -1; + } + printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n"); aper_size = info.aper_size * 1024 * 1024; iommu_size = check_iommu_size(info.aper_base, aper_size); -- cgit v1.2.3 From cdc60a4c8e71c4bcf67e83fac6c0cabd0ff19bfe Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Mon, 8 May 2006 15:17:22 +0200 Subject: [PATCH] x86_64: fix die_lock nesting I noticed this when poking around in this area. The oops_begin() function in x86_64 would only conditionally claim the die_lock if the call is nested, but oops_end() would always release the spinlock. This patch adds a nest count for the die lock so that the release of the lock is only done on the final oops_end(). Signed-off-by: Corey Minyard Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/traps.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 2700b1375c1f..0ebb281aa178 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -385,6 +385,7 @@ void out_of_line_bug(void) static DEFINE_SPINLOCK(die_lock); static int die_owner = -1; +static unsigned int die_nest_count; unsigned __kprobes long oops_begin(void) { @@ -399,6 +400,7 @@ unsigned __kprobes long oops_begin(void) else spin_lock(&die_lock); } + die_nest_count++; die_owner = cpu; console_verbose(); bust_spinlocks(1); @@ -409,7 +411,13 @@ void __kprobes oops_end(unsigned long flags) { die_owner = -1; bust_spinlocks(0); - spin_unlock_irqrestore(&die_lock, flags); + die_nest_count--; + if (die_nest_count) + /* We still own the lock */ + local_irq_restore(flags); + else + /* Nest count reaches zero, release the lock. */ + spin_unlock_irqrestore(&die_lock, flags); if (panic_on_oops) panic("Oops"); } -- cgit v1.2.3 From 8b1ffe9550e71224c43d8c754245bd76f4ea9bb8 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Mon, 8 May 2006 15:17:25 +0200 Subject: [PATCH] x86_64: add nmi_exit to die_nmi Playing with NMI watchdog on x86_64, I discovered that it didn't do what I expected. It always panic-ed, even when it didn't happen from interrupt context. This patch solves that problem for me. Also, in this case, do_exit() will be called with interrupts disabled, I believe. Would it be wise to also call local_irq_enable() after nmi_exit()? [Yes I added it -AK] Currently, on x86_64, any NMI watchdog timeout will cause a panic because the irq count will always be set to be in an interrupt when do_exit() is called from die_nmi(). If we add nmi_exit() to the die_nmi() call (since the nmi will never exit "normally") it seems to solve this problem. The following small program can be used to trigger the NMI watchdog to reproduce this: main () { iopl(3); for (;;) asm("cli"); } Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/traps.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 0ebb281aa178..6b87268c5c2e 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -472,6 +472,8 @@ void __kprobes die_nmi(char *str, struct pt_regs *regs) panic("nmi watchdog"); printk("console shuts up ...\n"); oops_end(flags); + nmi_exit(); + local_irq_enable(); do_exit(SIGSEGV); } -- cgit v1.2.3 From ac71d12c990526b01ef6cfe50907ef8530a30331 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 8 May 2006 15:17:28 +0200 Subject: [PATCH] x86_64: Avoid EBDA area in early boot allocator Based on analysis&patch from Robert Hentosch Observed on a Dell PE6850 with 16GB The problem occurs very early on, when the kernel allocates space for the temporary memory map called bootmap. The bootmap overlaps the EBDA region. EBDA region is not historically reserved in the e820 mapping. When the bootmap is freed it marks the EBDA region as usable. If you notice in setup.c there is already code to work around the EBDA in reserve_ebda_region(), this check however occurs after the bootmap is allocated and doesn't prevent the bootmap from using this range. AK: I redid the original patch. Thanks also to Jan Beulich for spotting some mistakes. Cc: Robert_Hentosch@dell.com Cc: jbeulich@novell.com Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/e820.c | 6 ++++++ arch/x86_64/kernel/setup.c | 30 ++++++++++++++++++++++-------- 2 files changed, 28 insertions(+), 8 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c index 62776c07cff1..222b5b46d2b2 100644 --- a/arch/x86_64/kernel/e820.c +++ b/arch/x86_64/kernel/e820.c @@ -76,6 +76,12 @@ static inline int bad_addr(unsigned long *addrp, unsigned long size) *addrp = __pa_symbol(&_end); return 1; } + + if (last >= ebda_addr && addr < ebda_addr + ebda_size) { + *addrp = ebda_addr + ebda_size; + return 1; + } + /* XXX ramdisk image here? */ return 0; } diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index ebc3c33b1c6c..f0870bef24d1 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -571,17 +571,28 @@ static inline void copy_edd(void) #endif #define EBDA_ADDR_POINTER 0x40E -static void __init reserve_ebda_region(void) + +unsigned __initdata ebda_addr; +unsigned __initdata ebda_size; + +static void discover_ebda(void) { - unsigned int addr; - /** + /* * there is a real-mode segmented pointer pointing to the * 4K EBDA area at 0x40E */ - addr = *(unsigned short *)phys_to_virt(EBDA_ADDR_POINTER); - addr <<= 4; - if (addr) - reserve_bootmem_generic(addr, PAGE_SIZE); + ebda_addr = *(unsigned short *)EBDA_ADDR_POINTER; + ebda_addr <<= 4; + + ebda_size = *(unsigned short *)(unsigned long)ebda_addr; + + /* Round EBDA up to pages */ + if (ebda_size == 0) + ebda_size = 1; + ebda_size <<= 10; + ebda_size = round_up(ebda_size + (ebda_addr & ~PAGE_MASK), PAGE_SIZE); + if (ebda_size > 64*1024) + ebda_size = 64*1024; } void __init setup_arch(char **cmdline_p) @@ -627,6 +638,8 @@ void __init setup_arch(char **cmdline_p) check_efer(); + discover_ebda(); + init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT)); dmi_scan_machine(); @@ -669,7 +682,8 @@ void __init setup_arch(char **cmdline_p) reserve_bootmem_generic(0, PAGE_SIZE); /* reserve ebda region */ - reserve_ebda_region(); + if (ebda_addr) + reserve_bootmem_generic(ebda_addr, ebda_size); #ifdef CONFIG_SMP /* -- cgit v1.2.3