Merge branch 'for-linus' of git://one.firstfloor.org/home/andi/git/linux-2.6

* 'for-linus' of git://one.firstfloor.org/home/andi/git/linux-2.6: (231 commits) [PATCH] i386: Don't delete cpu_devs data to identify different x86 types in late_initcall [PATCH] i386: type may be unused [PATCH] i386: Some additional chipset register values validation. [PATCH] i386: Add missing !X86_PAE dependincy to the 2G/2G split. [PATCH] x86-64: Don't exclude asm-offsets.c in Documentation/dontdiff [PATCH] i386: avoid redundant preempt_disable in __unlazy_fpu [PATCH] i386: white space fixes in i387.h [PATCH] i386: Drop noisy e820 debugging printks [PATCH] x86-64: Fix allnoconfig error in genapic_flat.c [PATCH] x86-64: Shut up warnings for vfat compat ioctls on other file systems [PATCH] x86-64: Share identical video.S between i386 and x86-64 [PATCH] x86-64: Remove CONFIG_REORDER [PATCH] x86-64: Print type and size correctly for unknown compat ioctls [PATCH] i386: Remove copy_*_user BUG_ONs for (size < 0) [PATCH] i386: Little cleanups in smpboot.c [PATCH] x86-64: Don't enable NUMA for a single node in K8 NUMA scanning [PATCH] x86: Use RDTSCP for synchronous get_cycles if possible [PATCH] i386: Add X86_FEATURE_RDTSCP [PATCH] i386: Implement X86_FEATURE_SYNC_RDTSC on i386 [PATCH] i386: Implement alternative_io for i386 ... Fix up trivial conflict in include/linux/highmem.h manually. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Linus Torvalds <torvalds@woody.linux-foundation.org> 2007-05-05 14:55:20 -0700
committer: Linus Torvalds <torvalds@woody.linux-foundation.org> 2007-05-05 14:55:20 -0700
commit: ea62ccd00fd0b6720b033adfc9984f31130ce195 (patch)
tree: 9837b797b2466fffcb0af96c388b06eae9c3df18 /arch/i386/kernel/cpu
parent: 886a0768affe9a32f18c45f8e1393bca9ece5392 (diff)
parent: 35060b6a9a4e1c89bc6fbea61090e302dbc61847 (diff)
download: linux-ea62ccd00fd0b6720b033adfc9984f31130ce195.tar.gz
linux-ea62ccd00fd0b6720b033adfc9984f31130ce195.tar.bz2
linux-ea62ccd00fd0b6720b033adfc9984f31130ce195.zip
18 files changed, 1020 insertions, 286 deletions
diff --git a/arch/i386/kernel/cpu/Makefile b/arch/i386/kernel/cpu/Makefile
index 010aecfffbc1..74f27a463db0 100644
--- a/arch/i386/kernel/cpu/Makefile
+++ b/arch/i386/kernel/cpu/Makefile
@@ -2,7 +2,7 @@
 # Makefile for x86-compatible CPU details and quirks
 #
 
-obj-y	:=	common.o proc.o
+obj-y	:=	common.o proc.o bugs.o
 
 obj-y	+=	amd.o
 obj-y	+=	cyrix.o
@@ -17,3 +17,5 @@ obj-$(CONFIG_X86_MCE)	+=	mcheck/
 
 obj-$(CONFIG_MTRR)	+= 	mtrr/
 obj-$(CONFIG_CPU_FREQ)	+=	cpufreq/
+
+obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o
diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c
index 2d47db482972..4fec702afd7e 100644
--- a/arch/i386/kernel/cpu/amd.c
+++ b/arch/i386/kernel/cpu/amd.c
@@ -53,6 +53,8 @@ static __cpuinit int amd_apic_timer_broken(void)
 	return 0;
 }
 
+int force_mwait __cpuinitdata;
+
 static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 {
 	u32 l, h;
@@ -275,6 +277,9 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 
 	if (amd_apic_timer_broken())
 		set_bit(X86_FEATURE_LAPIC_TIMER_BROKEN, c->x86_capability);
+
+	if (c->x86 == 0x10 && !force_mwait)
+		clear_bit(X86_FEATURE_MWAIT, c->x86_capability);
 }
 
 static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 * c, unsigned int size)
@@ -314,13 +319,3 @@ int __init amd_init_cpu(void)
 	cpu_devs[X86_VENDOR_AMD] = &amd_cpu_dev;
 	return 0;
 }
-
-//early_arch_initcall(amd_init_cpu);
-
-static int __init amd_exit_cpu(void)
-{
-	cpu_devs[X86_VENDOR_AMD] = NULL;
-	return 0;
-}
-
-late_initcall(amd_exit_cpu);
diff --git a/arch/i386/kernel/cpu/bugs.c b/arch/i386/kernel/cpu/bugs.c
new file mode 100644
index 000000000000..54428a2500f3
--- /dev/null
+++ b/arch/i386/kernel/cpu/bugs.c
@@ -0,0 +1,191 @@
+/*
+ *  arch/i386/cpu/bugs.c
+ *
+ *  Copyright (C) 1994  Linus Torvalds
+ *
+ *  Cyrix stuff, June 1998 by:
+ *	- Rafael R. Reilova (moved everything from head.S),
+ *        <rreilova@ececs.uc.edu>
+ *	- Channing Corn (tests & fixes),
+ *	- Andrew D. Balsa (code cleanup).
+ */
+#include <linux/init.h>
+#include <linux/utsname.h>
+#include <asm/processor.h>
+#include <asm/i387.h>
+#include <asm/msr.h>
+#include <asm/paravirt.h>
+#include <asm/alternative.h>
+
+static int __init no_halt(char *s)
+{
+	boot_cpu_data.hlt_works_ok = 0;
+	return 1;
+}
+
+__setup("no-hlt", no_halt);
+
+static int __init mca_pentium(char *s)
+{
+	mca_pentium_flag = 1;
+	return 1;
+}
+
+__setup("mca-pentium", mca_pentium);
+
+static int __init no_387(char *s)
+{
+	boot_cpu_data.hard_math = 0;
+	write_cr0(0xE | read_cr0());
+	return 1;
+}
+
+__setup("no387", no_387);
+
+static double __initdata x = 4195835.0;
+static double __initdata y = 3145727.0;
+
+/*
+ * This used to check for exceptions..
+ * However, it turns out that to support that,
+ * the XMM trap handlers basically had to
+ * be buggy. So let's have a correct XMM trap
+ * handler, and forget about printing out
+ * some status at boot.
+ *
+ * We should really only care about bugs here
+ * anyway. Not features.
+ */
+static void __init check_fpu(void)
+{
+	if (!boot_cpu_data.hard_math) {
+#ifndef CONFIG_MATH_EMULATION
+		printk(KERN_EMERG "No coprocessor found and no math emulation present.\n");
+		printk(KERN_EMERG "Giving up.\n");
+		for (;;) ;
+#endif
+		return;
+	}
+
+/* trap_init() enabled FXSR and company _before_ testing for FP problems here. */
+	/* Test for the divl bug.. */
+	__asm__("fninit\n\t"
+		"fldl %1\n\t"
+		"fdivl %2\n\t"
+		"fmull %2\n\t"
+		"fldl %1\n\t"
+		"fsubp %%st,%%st(1)\n\t"
+		"fistpl %0\n\t"
+		"fwait\n\t"
+		"fninit"
+		: "=m" (*&boot_cpu_data.fdiv_bug)
+		: "m" (*&x), "m" (*&y));
+	if (boot_cpu_data.fdiv_bug)
+		printk("Hmm, FPU with FDIV bug.\n");
+}
+
+static void __init check_hlt(void)
+{
+	if (paravirt_enabled())
+		return;
+
+	printk(KERN_INFO "Checking 'hlt' instruction... ");
+	if (!boot_cpu_data.hlt_works_ok) {
+		printk("disabled\n");
+		return;
+	}
+	halt();
+	halt();
+	halt();
+	halt();
+	printk("OK.\n");
+}
+
+/*
+ *	Most 386 processors have a bug where a POPAD can lock the
+ *	machine even from user space.
+ */
+
+static void __init check_popad(void)
+{
+#ifndef CONFIG_X86_POPAD_OK
+	int res, inp = (int) &res;
+
+	printk(KERN_INFO "Checking for popad bug... ");
+	__asm__ __volatile__(
+	  "movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx "
+	  : "=&a" (res)
+	  : "d" (inp)
+	  : "ecx", "edi" );
+	/* If this fails, it means that any user program may lock the CPU hard. Too bad. */
+	if (res != 12345678) printk( "Buggy.\n" );
+		        else printk( "OK.\n" );
+#endif
+}
+
+/*
+ * Check whether we are able to run this kernel safely on SMP.
+ *
+ * - In order to run on a i386, we need to be compiled for i386
+ *   (for due to lack of "invlpg" and working WP on a i386)
+ * - In order to run on anything without a TSC, we need to be
+ *   compiled for a i486.
+ * - In order to support the local APIC on a buggy Pentium machine,
+ *   we need to be compiled with CONFIG_X86_GOOD_APIC disabled,
+ *   which happens implicitly if compiled for a Pentium or lower
+ *   (unless an advanced selection of CPU features is used) as an
+ *   otherwise config implies a properly working local APIC without
+ *   the need to do extra reads from the APIC.
+*/
+
+static void __init check_config(void)
+{
+/*
+ * We'd better not be a i386 if we're configured to use some
+ * i486+ only features! (WP works in supervisor mode and the
+ * new "invlpg" and "bswap" instructions)
+ */
+#if defined(CONFIG_X86_WP_WORKS_OK) || defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_BSWAP)
+	if (boot_cpu_data.x86 == 3)
+		panic("Kernel requires i486+ for 'invlpg' and other features");
+#endif
+
+/*
+ * If we configured ourselves for a TSC, we'd better have one!
+ */
+#ifdef CONFIG_X86_TSC
+	if (!cpu_has_tsc && !tsc_disable)
+		panic("Kernel compiled for Pentium+, requires TSC feature!");
+#endif
+
+/*
+ * If we were told we had a good local APIC, check for buggy Pentia,
+ * i.e. all B steppings and the C2 stepping of P54C when using their
+ * integrated APIC (see 11AP erratum in "Pentium Processor
+ * Specification Update").
+ */
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_GOOD_APIC)
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL
+	    && cpu_has_apic
+	    && boot_cpu_data.x86 == 5
+	    && boot_cpu_data.x86_model == 2
+	    && (boot_cpu_data.x86_mask < 6 || boot_cpu_data.x86_mask == 11))
+		panic("Kernel compiled for PMMX+, assumes a local APIC without the read-before-write bug!");
+#endif
+}
+
+
+void __init check_bugs(void)
+{
+	identify_boot_cpu();
+#ifndef CONFIG_SMP
+	printk("CPU: ");
+	print_cpu_info(&boot_cpu_data);
+#endif
+	check_config();
+	check_fpu();
+	check_hlt();
+	check_popad();
+	init_utsname()->machine[1] = '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
+	alternative_instructions();
+}
diff --git a/arch/i386/kernel/cpu/centaur.c b/arch/i386/kernel/cpu/centaur.c
index 8c25047975c0..473eac883c7b 100644
--- a/arch/i386/kernel/cpu/centaur.c
+++ b/arch/i386/kernel/cpu/centaur.c
@@ -469,13 +469,3 @@ int __init centaur_init_cpu(void)
 	cpu_devs[X86_VENDOR_CENTAUR] = &centaur_cpu_dev;
 	return 0;
 }
-
-//early_arch_initcall(centaur_init_cpu);
-
-static int __init centaur_exit_cpu(void)
-{
-	cpu_devs[X86_VENDOR_CENTAUR] = NULL;
-	return 0;
-}
-
-late_initcall(centaur_exit_cpu);
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
index dcbbd0a8bfc2..794d593c47eb 100644
--- a/arch/i386/kernel/cpu/common.c
+++ b/arch/i386/kernel/cpu/common.c
@@ -18,15 +18,37 @@
 #include <asm/apic.h>
 #include <mach_apic.h>
 #endif
-#include <asm/pda.h>
 
 #include "cpu.h"
 
-DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr);
-EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr);
+DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
+	[GDT_ENTRY_KERNEL_CS] = { 0x0000ffff, 0x00cf9a00 },
+	[GDT_ENTRY_KERNEL_DS] = { 0x0000ffff, 0x00cf9200 },
+	[GDT_ENTRY_DEFAULT_USER_CS] = { 0x0000ffff, 0x00cffa00 },
+	[GDT_ENTRY_DEFAULT_USER_DS] = { 0x0000ffff, 0x00cff200 },
+	/*
+	 * Segments used for calling PnP BIOS have byte granularity.
+	 * They code segments and data segments have fixed 64k limits,
+	 * the transfer segment sizes are set at run time.
+	 */
+	[GDT_ENTRY_PNPBIOS_CS32] = { 0x0000ffff, 0x00409a00 },/* 32-bit code */
+	[GDT_ENTRY_PNPBIOS_CS16] = { 0x0000ffff, 0x00009a00 },/* 16-bit code */
+	[GDT_ENTRY_PNPBIOS_DS] = { 0x0000ffff, 0x00009200 }, /* 16-bit data */
+	[GDT_ENTRY_PNPBIOS_TS1] = { 0x00000000, 0x00009200 },/* 16-bit data */
+	[GDT_ENTRY_PNPBIOS_TS2] = { 0x00000000, 0x00009200 },/* 16-bit data */
+	/*
+	 * The APM segments have byte granularity and their bases
+	 * are set at run time.  All have 64k limits.
+	 */
+	[GDT_ENTRY_APMBIOS_BASE] = { 0x0000ffff, 0x00409a00 },/* 32-bit code */
+	/* 16-bit code */
+	[GDT_ENTRY_APMBIOS_BASE+1] = { 0x0000ffff, 0x00009a00 },
+	[GDT_ENTRY_APMBIOS_BASE+2] = { 0x0000ffff, 0x00409200 }, /* data */
 
-struct i386_pda *_cpu_pda[NR_CPUS] __read_mostly;
-EXPORT_SYMBOL(_cpu_pda);
+	[GDT_ENTRY_ESPFIX_SS] = { 0x00000000, 0x00c09200 },
+	[GDT_ENTRY_PERCPU] = { 0x00000000, 0x00000000 },
+} };
+EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
 
 static int cachesize_override __cpuinitdata = -1;
 static int disable_x86_fxsr __cpuinitdata;
@@ -368,7 +390,7 @@ __setup("serialnumber", x86_serial_nr_setup);
 /*
  * This does the hard work of actually picking apart the CPU stuff...
  */
-void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
+static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 {
 	int i;
 
@@ -479,15 +501,22 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 
 	/* Init Machine Check Exception if available. */
 	mcheck_init(c);
+}
 
-	if (c == &boot_cpu_data)
-		sysenter_setup();
+void __init identify_boot_cpu(void)
+{
+	identify_cpu(&boot_cpu_data);
+	sysenter_setup();
 	enable_sep_cpu();
+	mtrr_bp_init();
+}
 
-	if (c == &boot_cpu_data)
-		mtrr_bp_init();
-	else
-		mtrr_ap_init();
+void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
+{
+	BUG_ON(c == &boot_cpu_data);
+	identify_cpu(c);
+	enable_sep_cpu();
+	mtrr_ap_init();
 }
 
 #ifdef CONFIG_X86_HT
@@ -601,129 +630,36 @@ void __init early_cpu_init(void)
 #endif
 }
 
-/* Make sure %gs is initialized properly in idle threads */
+/* Make sure %fs is initialized properly in idle threads */
 struct pt_regs * __devinit idle_regs(struct pt_regs *regs)
 {
 	memset(regs, 0, sizeof(struct pt_regs));
-	regs->xfs = __KERNEL_PDA;
+	regs->xfs = __KERNEL_PERCPU;
 	return regs;
 }
 
-static __cpuinit int alloc_gdt(int cpu)
+/* Current gdt points %fs at the "master" per-cpu area: after this,
+ * it's on the real one. */
+void switch_to_new_gdt(void)
 {
-	struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
-	struct desc_struct *gdt;
-	struct i386_pda *pda;
-
-	gdt = (struct desc_struct *)cpu_gdt_descr->address;
-	pda = cpu_pda(cpu);
-
-	/*
-	 * This is a horrible hack to allocate the GDT.  The problem
-	 * is that cpu_init() is called really early for the boot CPU
-	 * (and hence needs bootmem) but much later for the secondary
-	 * CPUs, when bootmem will have gone away
-	 */
-	if (NODE_DATA(0)->bdata->node_bootmem_map) {
-		BUG_ON(gdt != NULL || pda != NULL);
-
-		gdt = alloc_bootmem_pages(PAGE_SIZE);
-		pda = alloc_bootmem(sizeof(*pda));
-		/* alloc_bootmem(_pages) panics on failure, so no check */
-
-		memset(gdt, 0, PAGE_SIZE);
-		memset(pda, 0, sizeof(*pda));
-	} else {
-		/* GDT and PDA might already have been allocated if
-		   this is a CPU hotplug re-insertion. */
-		if (gdt == NULL)
-			gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL);
-
-		if (pda == NULL)
-			pda = kmalloc_node(sizeof(*pda), GFP_KERNEL, cpu_to_node(cpu));
-
-		if (unlikely(!gdt || !pda)) {
-			free_pages((unsigned long)gdt, 0);
-			kfree(pda);
-			return 0;
-		}
-	}
-
- 	cpu_gdt_descr->address = (unsigned long)gdt;
-	cpu_pda(cpu) = pda;
-
-	return 1;
-}
+	struct Xgt_desc_struct gdt_descr;
 
-/* Initial PDA used by boot CPU */
-struct i386_pda boot_pda = {
-	._pda = &boot_pda,
-	.cpu_number = 0,
-	.pcurrent = &init_task,
-};
-
-static inline void set_kernel_fs(void)
-{
-	/* Set %fs for this CPU's PDA.  Memory clobber is to create a
-	   barrier with respect to any PDA operations, so the compiler
-	   doesn't move any before here. */
-	asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_PDA) : "memory");
+	gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
+	gdt_descr.size = GDT_SIZE - 1;
+	load_gdt(&gdt_descr);
+	asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory");
 }
 
-/* Initialize the CPU's GDT and PDA.  The boot CPU does this for
-   itself, but secondaries find this done for them. */
-__cpuinit int init_gdt(int cpu, struct task_struct *idle)
-{
-	struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
-	struct desc_struct *gdt;
-	struct i386_pda *pda;
-
-	/* For non-boot CPUs, the GDT and PDA should already have been
-	   allocated. */
-	if (!alloc_gdt(cpu)) {
-		printk(KERN_CRIT "CPU%d failed to allocate GDT or PDA\n", cpu);
-		return 0;
-	}
-
-	gdt = (struct desc_struct *)cpu_gdt_descr->address;
-	pda = cpu_pda(cpu);
-
-	BUG_ON(gdt == NULL || pda == NULL);
-
-	/*
-	 * Initialize the per-CPU GDT with the boot GDT,
-	 * and set up the GDT descriptor:
-	 */
- 	memcpy(gdt, cpu_gdt_table, GDT_SIZE);
-	cpu_gdt_descr->size = GDT_SIZE - 1;
-
-	pack_descriptor((u32 *)&gdt[GDT_ENTRY_PDA].a,
-			(u32 *)&gdt[GDT_ENTRY_PDA].b,
-			(unsigned long)pda, sizeof(*pda) - 1,
-			0x80 | DESCTYPE_S | 0x2, 0); /* present read-write data segment */
-
-	memset(pda, 0, sizeof(*pda));
-	pda->_pda = pda;
-	pda->cpu_number = cpu;
-	pda->pcurrent = idle;
-
-	return 1;
-}
-
-void __cpuinit cpu_set_gdt(int cpu)
-{
-	struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
-
-	/* Reinit these anyway, even if they've already been done (on
-	   the boot CPU, this will transition from the boot gdt+pda to
-	   the real ones). */
-	load_gdt(cpu_gdt_descr);
-	set_kernel_fs();
-}
-
-/* Common CPU init for both boot and secondary CPUs */
-static void __cpuinit _cpu_init(int cpu, struct task_struct *curr)
+/*
+ * cpu_init() initializes state that is per-CPU. Some data is already
+ * initialized (naturally) in the bootstrap process, such as the GDT
+ * and IDT. We reload them nevertheless, this function acts as a
+ * 'CPU state barrier', nothing should get across.
+ */
+void __cpuinit cpu_init(void)
 {
+	int cpu = smp_processor_id();
+	struct task_struct *curr = current;
 	struct tss_struct * t = &per_cpu(init_tss, cpu);
 	struct thread_struct *thread = &curr->thread;
 
@@ -744,6 +680,7 @@ static void __cpuinit _cpu_init(int cpu, struct task_struct *curr)
 	}
 
 	load_idt(&idt_descr);
+	switch_to_new_gdt();
 
 	/*
 	 * Set up and load the per-CPU TSS and LDT
@@ -783,38 +720,6 @@ static void __cpuinit _cpu_init(int cpu, struct task_struct *curr)
 	mxcsr_feature_mask_init();
 }
 
-/* Entrypoint to initialize secondary CPU */
-void __cpuinit secondary_cpu_init(void)
-{
-	int cpu = smp_processor_id();
-	struct task_struct *curr = current;
-
-	_cpu_init(cpu, curr);
-}
-
-/*
- * cpu_init() initializes state that is per-CPU. Some data is already
- * initialized (naturally) in the bootstrap process, such as the GDT
- * and IDT. We reload them nevertheless, this function acts as a
- * 'CPU state barrier', nothing should get across.
- */
-void __cpuinit cpu_init(void)
-{
-	int cpu = smp_processor_id();
-	struct task_struct *curr = current;
-
-	/* Set up the real GDT and PDA, so we can transition from the
-	   boot versions. */
-	if (!init_gdt(cpu, curr)) {
-		/* failed to allocate something; not much we can do... */
-		for (;;)
-			local_irq_enable();
-	}
-
-	cpu_set_gdt(cpu);
-	_cpu_init(cpu, curr);
-}
-
 #ifdef CONFIG_HOTPLUG_CPU
 void __cpuinit cpu_uninit(void)
 {
diff --git a/arch/i386/kernel/cpu/cyrix.c b/arch/i386/kernel/cpu/cyrix.c
index de27bd07bc9c..0b8411a864fb 100644
--- a/arch/i386/kernel/cpu/cyrix.c
+++ b/arch/i386/kernel/cpu/cyrix.c
@@ -279,7 +279,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
 		 */  
 		if (vendor == PCI_VENDOR_ID_CYRIX &&
 	 (device == PCI_DEVICE_ID_CYRIX_5510 || device == PCI_DEVICE_ID_CYRIX_5520))
-			pit_latch_buggy = 1;
+			mark_tsc_unstable("cyrix 5510/5520 detected");
 	}
 #endif
 		c->x86_cache_size=16;	/* Yep 16K integrated cache thats it */
@@ -448,16 +448,6 @@ int __init cyrix_init_cpu(void)
 	return 0;
 }
 
-//early_arch_initcall(cyrix_init_cpu);
-
-static int __init cyrix_exit_cpu(void)
-{
-	cpu_devs[X86_VENDOR_CYRIX] = NULL;
-	return 0;
-}
-
-late_initcall(cyrix_exit_cpu);
-
 static struct cpu_dev nsc_cpu_dev __cpuinitdata = {
 	.c_vendor	= "NSC",
 	.c_ident 	= { "Geode by NSC" },
@@ -470,12 +460,3 @@ int __init nsc_init_cpu(void)
 	return 0;
 }
 
-//early_arch_initcall(nsc_init_cpu);
-
-static int __init nsc_exit_cpu(void)
-{
-	cpu_devs[X86_VENDOR_NSC] = NULL;
-	return 0;
-}
-
-late_initcall(nsc_exit_cpu);
diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c
index 56fe26584957..dc4e08147b1f 100644
--- a/arch/i386/kernel/cpu/intel.c
+++ b/arch/i386/kernel/cpu/intel.c
@@ -188,8 +188,10 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 	}
 #endif
 
-	if (c->x86 == 15)
+	if (c->x86 == 15) {
 		set_bit(X86_FEATURE_P4, c->x86_capability);
+		set_bit(X86_FEATURE_SYNC_RDTSC, c->x86_capability);
+	}
 	if (c->x86 == 6) 
 		set_bit(X86_FEATURE_P3, c->x86_capability);
 	if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
diff --git a/arch/i386/kernel/cpu/mcheck/k7.c b/arch/i386/kernel/cpu/mcheck/k7.c
index b0862af595aa..f9fa4142551e 100644
--- a/arch/i386/kernel/cpu/mcheck/k7.c
+++ b/arch/i386/kernel/cpu/mcheck/k7.c
@@ -75,6 +75,9 @@ void amd_mcheck_init(struct cpuinfo_x86 *c)
 	machine_check_vector = k7_machine_check;
 	wmb();
 
+	if (!cpu_has(c, X86_FEATURE_MCE))
+		return;
+
 	printk (KERN_INFO "Intel machine check architecture supported.\n");
 	rdmsr (MSR_IA32_MCG_CAP, l, h);
 	if (l & (1<<8))	/* Control register present ? */
@@ -82,9 +85,13 @@ void amd_mcheck_init(struct cpuinfo_x86 *c)
 	nr_mce_banks = l & 0xff;
 
 	/* Clear status for MC index 0 separately, we don't touch CTL,
-	 * as some Athlons cause spurious MCEs when its enabled. */
-	wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0);
-	for (i=1; i<nr_mce_banks; i++) {
+	 * as some K7 Athlons cause spurious MCEs when its enabled. */
+	if (boot_cpu_data.x86 == 6) {
+		wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0);
+		i = 1;
+	} else
+		i = 0;
+	for (; i<nr_mce_banks; i++) {
 		wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
 		wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
 	}
diff --git a/arch/i386/kernel/cpu/mcheck/mce.c b/arch/i386/kernel/cpu/mcheck/mce.c
index 4f10c62d180c..56cd485b127c 100644
--- a/arch/i386/kernel/cpu/mcheck/mce.c
+++ b/arch/i386/kernel/cpu/mcheck/mce.c
@@ -38,8 +38,7 @@ void mcheck_init(struct cpuinfo_x86 *c)
 
 	switch (c->x86_vendor) {
 		case X86_VENDOR_AMD:
-			if (c->x86==6 || c->x86==15)
-				amd_mcheck_init(c);
+			amd_mcheck_init(c);
 			break;
 
 		case X86_VENDOR_INTEL:
diff --git a/arch/i386/kernel/cpu/mcheck/p4.c b/arch/i386/kernel/cpu/mcheck/p4.c
index 504434a46011..1509edfb2313 100644
--- a/arch/i386/kernel/cpu/mcheck/p4.c
+++ b/arch/i386/kernel/cpu/mcheck/p4.c
@@ -124,13 +124,10 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
 
 
 /* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */
-static inline int intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
+static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
 {
 	u32 h;
 
-	if (mce_num_extended_msrs == 0)
-		goto done;
-
 	rdmsr (MSR_IA32_MCG_EAX, r->eax, h);
 	rdmsr (MSR_IA32_MCG_EBX, r->ebx, h);
 	rdmsr (MSR_IA32_MCG_ECX, r->ecx, h);
@@ -141,12 +138,6 @@ static inline int intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
 	rdmsr (MSR_IA32_MCG_ESP, r->esp, h);
 	rdmsr (MSR_IA32_MCG_EFLAGS, r->eflags, h);
 	rdmsr (MSR_IA32_MCG_EIP, r->eip, h);
-
-	/* can we rely on kmalloc to do a dynamic
-	 * allocation for the reserved registers?
-	 */
-done:
-	return mce_num_extended_msrs;
 }
 
 static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
@@ -155,7 +146,6 @@ static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
 	u32 alow, ahigh, high, low;
 	u32 mcgstl, mcgsth;
 	int i;
-	struct intel_mce_extended_msrs dbg;
 
 	rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
 	if (mcgstl & (1<<0))	/* Recoverable ? */
@@ -164,7 +154,9 @@ static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
 	printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
 		smp_processor_id(), mcgsth, mcgstl);
 
-	if (intel_get_extended_msrs(&dbg)) {
+	if (mce_num_extended_msrs > 0) {
+		struct intel_mce_extended_msrs dbg;
+		intel_get_extended_msrs(&dbg);
 		printk (KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n",
 			smp_processor_id(), dbg.eip, dbg.eflags);
 		printk (KERN_DEBUG "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n",
diff --git a/arch/i386/kernel/cpu/mtrr/generic.c b/arch/i386/kernel/cpu/mtrr/generic.c
index f77fc53db654..5367e32e0403 100644
--- a/arch/i386/kernel/cpu/mtrr/generic.c
+++ b/arch/i386/kernel/cpu/mtrr/generic.c
@@ -20,13 +20,25 @@ struct mtrr_state {
 	mtrr_type def_type;
 };
 
+struct fixed_range_block {
+	int base_msr; /* start address of an MTRR block */
+	int ranges;   /* number of MTRRs in this block  */
+};
+
+static struct fixed_range_block fixed_range_blocks[] = {
+	{ MTRRfix64K_00000_MSR, 1 }, /* one  64k MTRR  */
+	{ MTRRfix16K_80000_MSR, 2 }, /* two  16k MTRRs */
+	{ MTRRfix4K_C0000_MSR,  8 }, /* eight 4k MTRRs */
+	{}
+};
+
 static unsigned long smp_changes_mask;
 static struct mtrr_state mtrr_state = {};
 
 #undef MODULE_PARAM_PREFIX
 #define MODULE_PARAM_PREFIX "mtrr."
 
-static __initdata int mtrr_show;
+static int mtrr_show;
 module_param_named(show, mtrr_show, bool, 0);
 
 /*  Get the MSR pair relating to a var range  */
@@ -37,7 +49,7 @@ get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr)
 	rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
 }
 
-static void __init
+static void
 get_fixed_ranges(mtrr_type * frs)
 {
 	unsigned int *p = (unsigned int *) frs;
@@ -51,12 +63,18 @@ get_fixed_ranges(mtrr_type * frs)
 		rdmsr(MTRRfix4K_C0000_MSR + i, p[6 + i * 2], p[7 + i * 2]);
 }
 
-static void __init print_fixed(unsigned base, unsigned step, const mtrr_type*types)
+void mtrr_save_fixed_ranges(void *info)
+{
+	get_fixed_ranges(mtrr_state.fixed_ranges);
+}
+
+static void __cpuinit print_fixed(unsigned base, unsigned step, const mtrr_type*types)
 {
 	unsigned i;
 
 	for (i = 0; i < 8; ++i, ++types, base += step)
-		printk(KERN_INFO "MTRR %05X-%05X %s\n", base, base + step - 1, mtrr_attrib_to_str(*types));
+		printk(KERN_INFO "MTRR %05X-%05X %s\n",
+			base, base + step - 1, mtrr_attrib_to_str(*types));
 }
 
 /*  Grab all of the MTRR state for this CPU into *state  */
@@ -147,6 +165,44 @@ void mtrr_wrmsr(unsigned msr, unsigned a, unsigned b)
 			smp_processor_id(), msr, a, b);
 }
 
+/**
+ * Enable and allow read/write of extended fixed-range MTRR bits on K8 CPUs
+ * see AMD publication no. 24593, chapter 3.2.1 for more information
+ */
+static inline void k8_enable_fixed_iorrs(void)
+{
+	unsigned lo, hi;
+
+	rdmsr(MSR_K8_SYSCFG, lo, hi);
+	mtrr_wrmsr(MSR_K8_SYSCFG, lo
+				| K8_MTRRFIXRANGE_DRAM_ENABLE
+				| K8_MTRRFIXRANGE_DRAM_MODIFY, hi);
+}
+
+/**
+ * Checks and updates an fixed-range MTRR if it differs from the value it
+ * should have. If K8 extenstions are wanted, update the K8 SYSCFG MSR also.
+ * see AMD publication no. 24593, chapter 7.8.1, page 233 for more information
+ * \param msr MSR address of the MTTR which should be checked and updated
+ * \param changed pointer which indicates whether the MTRR needed to be changed
+ * \param msrwords pointer to the MSR values which the MSR should have
+ */
+static void set_fixed_range(int msr, int * changed, unsigned int * msrwords)
+{
+	unsigned lo, hi;
+
+	rdmsr(msr, lo, hi);
+
+	if (lo != msrwords[0] || hi != msrwords[1]) {
+		if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
+		    boot_cpu_data.x86 == 15 &&
+		    ((msrwords[0] | msrwords[1]) & K8_MTRR_RDMEM_WRMEM_MASK))
+			k8_enable_fixed_iorrs();
+		mtrr_wrmsr(msr, msrwords[0], msrwords[1]);
+		*changed = TRUE;
+	}
+}
+
 int generic_get_free_region(unsigned long base, unsigned long size, int replace_reg)
 /*  [SUMMARY] Get a free MTRR.
     <base> The starting (base) address of the region.
@@ -196,36 +252,21 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
 	*type = base_lo & 0xff;
 }
 
+/**
+ * Checks and updates the fixed-range MTRRs if they differ from the saved set
+ * \param frs pointer to fixed-range MTRR values, saved by get_fixed_ranges()
+ */
 static int set_fixed_ranges(mtrr_type * frs)
 {
-	unsigned int *p = (unsigned int *) frs;
+	unsigned long long *saved = (unsigned long long *) frs;
 	int changed = FALSE;
-	int i;
-	unsigned int lo, hi;
+	int block=-1, range;
 
-	rdmsr(MTRRfix64K_00000_MSR, lo, hi);
-	if (p[0] != lo || p[1] != hi) {
-		mtrr_wrmsr(MTRRfix64K_00000_MSR, p[0], p[1]);
-		changed = TRUE;
-	}
+	while (fixed_range_blocks[++block].ranges)
+	    for (range=0; range < fixed_range_blocks[block].ranges; range++)
+		set_fixed_range(fixed_range_blocks[block].base_msr + range,
+		    &changed, (unsigned int *) saved++);
 
-	for (i = 0; i < 2; i++) {
-		rdmsr(MTRRfix16K_80000_MSR + i, lo, hi);
-		if (p[2 + i * 2] != lo || p[3 + i * 2] != hi) {
-			mtrr_wrmsr(MTRRfix16K_80000_MSR + i, p[2 + i * 2],
-			      p[3 + i * 2]);
-			changed = TRUE;
-		}
-	}
-
-	for (i = 0; i < 8; i++) {
-		rdmsr(MTRRfix4K_C0000_MSR + i, lo, hi);
-		if (p[6 + i * 2] != lo || p[7 + i * 2] != hi) {
-			mtrr_wrmsr(MTRRfix4K_C0000_MSR + i, p[6 + i * 2],
-			      p[7 + i * 2]);
-			changed = TRUE;
-		}
-	}
 	return changed;
 }
 
@@ -428,7 +469,7 @@ int generic_validate_add_page(unsigned long base, unsigned long size, unsigned i
 		}
 	}
 
-	if (base + size < 0x100) {
+	if (base < 0x100) {
 		printk(KERN_WARNING "mtrr: cannot set region below 1 MiB (0x%lx000,0x%lx000)\n",
 		       base, size);
 		return -EINVAL;
diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c
index 0acfb6a5a220..02a2f39e5e0a 100644
--- a/arch/i386/kernel/cpu/mtrr/main.c
+++ b/arch/i386/kernel/cpu/mtrr/main.c
@@ -729,6 +729,17 @@ void mtrr_ap_init(void)
 	local_irq_restore(flags);
 }
 
+/**
+ * Save current fixed-range MTRR state of the BSP
+ */
+void mtrr_save_state(void)
+{
+	if (smp_processor_id() == 0)
+		mtrr_save_fixed_ranges(NULL);
+	else
+		smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1, 1);
+}
+
 static int __init mtrr_init_finialize(void)
 {
 	if (!mtrr_if)
diff --git a/arch/i386/kernel/cpu/nexgen.c b/arch/i386/kernel/cpu/nexgen.c
index 8bf23cc80c63..961fbe1a748f 100644
--- a/arch/i386/kernel/cpu/nexgen.c
+++ b/arch/i386/kernel/cpu/nexgen.c
@@ -58,13 +58,3 @@ int __init nexgen_init_cpu(void)
 	cpu_devs[X86_VENDOR_NEXGEN] = &nexgen_cpu_dev;
 	return 0;
 }
-
-//early_arch_initcall(nexgen_init_cpu);
-
-static int __init nexgen_exit_cpu(void)
-{
-	cpu_devs[X86_VENDOR_NEXGEN] = NULL;
-	return 0;
-}
-
-late_initcall(nexgen_exit_cpu);
diff --git a/arch/i386/kernel/cpu/perfctr-watchdog.c b/arch/i386/kernel/cpu/perfctr-watchdog.c
new file mode 100644
index 000000000000..2b04c8f1db62
--- /dev/null
+++ b/arch/i386/kernel/cpu/perfctr-watchdog.c
@@ -0,0 +1,658 @@
+/* local apic based NMI watchdog for various CPUs.
+   This file also handles reservation of performance counters for coordination
+   with other users (like oprofile).
+
+   Note that these events normally don't tick when the CPU idles. This means
+   the frequency varies with CPU load.
+
+   Original code for K7/P6 written by Keith Owens */
+
+#include <linux/percpu.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <linux/smp.h>
+#include <linux/nmi.h>
+#include <asm/apic.h>
+#include <asm/intel_arch_perfmon.h>
+
+struct nmi_watchdog_ctlblk {
+	unsigned int cccr_msr;
+	unsigned int perfctr_msr;  /* the MSR to reset in NMI handler */
+	unsigned int evntsel_msr;  /* the MSR to select the events to handle */
+};
+
+/* Interface defining a CPU specific perfctr watchdog */
+struct wd_ops {
+	int (*reserve)(void);
+	void (*unreserve)(void);
+	int (*setup)(unsigned nmi_hz);
+	void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz);
+	void (*stop)(void *);
+	unsigned perfctr;
+	unsigned evntsel;
+	u64 checkbit;
+};
+
+static struct wd_ops *wd_ops;
+
+/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
+ * offset from MSR_P4_BSU_ESCR0.  It will be the max for all platforms (for now)
+ */
+#define NMI_MAX_COUNTER_BITS 66
+
+/* perfctr_nmi_owner tracks the ownership of the perfctr registers:
+ * evtsel_nmi_owner tracks the ownership of the event selection
+ * - different performance counters/ event selection may be reserved for
+ *   different subsystems this reservation system just tries to coordinate
+ *   things a little
+ */
+static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS);
+static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS);
+
+static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
+
+/* converts an msr to an appropriate reservation bit */
+static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
+{
+	return wd_ops ? msr - wd_ops->perfctr : 0;
+}
+
+/* converts an msr to an appropriate reservation bit */
+/* returns the bit offset of the event selection register */
+static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
+{
+	return wd_ops ? msr - wd_ops->evntsel : 0;
+}
+
+/* checks for a bit availability (hack for oprofile) */
+int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
+{
+	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+
+	return (!test_bit(counter, perfctr_nmi_owner));
+}
+
+/* checks the an msr for availability */
+int avail_to_resrv_perfctr_nmi(unsigned int msr)
+{
+	unsigned int counter;
+
+	counter = nmi_perfctr_msr_to_bit(msr);
+	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+
+	return (!test_bit(counter, perfctr_nmi_owner));
+}
+
+int reserve_perfctr_nmi(unsigned int msr)
+{
+	unsigned int counter;
+
+	counter = nmi_perfctr_msr_to_bit(msr);
+	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+
+	if (!test_and_set_bit(counter, perfctr_nmi_owner))
+		return 1;
+	return 0;
+}
+
+void release_perfctr_nmi(unsigned int msr)
+{
+	unsigned int counter;
+
+	counter = nmi_perfctr_msr_to_bit(msr);
+	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+
+	clear_bit(counter, perfctr_nmi_owner);
+}
+
+int reserve_evntsel_nmi(unsigned int msr)
+{
+	unsigned int counter;
+
+	counter = nmi_evntsel_msr_to_bit(msr);
+	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+
+	if (!test_and_set_bit(counter, evntsel_nmi_owner))
+		return 1;
+	return 0;
+}
+
+void release_evntsel_nmi(unsigned int msr)
+{
+	unsigned int counter;
+
+	counter = nmi_evntsel_msr_to_bit(msr);
+	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+
+	clear_bit(counter, evntsel_nmi_owner);
+}
+
+EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
+EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
+EXPORT_SYMBOL(reserve_perfctr_nmi);
+EXPORT_SYMBOL(release_perfctr_nmi);
+EXPORT_SYMBOL(reserve_evntsel_nmi);
+EXPORT_SYMBOL(release_evntsel_nmi);
+
+void disable_lapic_nmi_watchdog(void)
+{
+	BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
+
+	if (atomic_read(&nmi_active) <= 0)
+		return;
+
+	on_each_cpu(wd_ops->stop, NULL, 0, 1);
+	wd_ops->unreserve();
+
+	BUG_ON(atomic_read(&nmi_active) != 0);
+}
+
+void enable_lapic_nmi_watchdog(void)
+{
+	BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
+
+	/* are we already enabled */
+	if (atomic_read(&nmi_active) != 0)
+		return;
+
+	/* are we lapic aware */
+	if (!wd_ops)
+		return;
+	if (!wd_ops->reserve()) {
+		printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n");
+		return;
+	}
+
+	on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
+	touch_nmi_watchdog();
+}
+
+/*
+ * Activate the NMI watchdog via the local APIC.
+ */
+
+static unsigned int adjust_for_32bit_ctr(unsigned int hz)
+{
+	u64 counter_val;
+	unsigned int retval = hz;
+
+	/*
+	 * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter
+	 * are writable, with higher bits sign extending from bit 31.
+	 * So, we can only program the counter with 31 bit values and
+	 * 32nd bit should be 1, for 33.. to be 1.
+	 * Find the appropriate nmi_hz
+	 */
+	counter_val = (u64)cpu_khz * 1000;
+	do_div(counter_val, retval);
+ 	if (counter_val > 0x7fffffffULL) {
+		u64 count = (u64)cpu_khz * 1000;
+		do_div(count, 0x7fffffffUL);
+		retval = count + 1;
+	}
+	return retval;
+}
+
+static void
+write_watchdog_counter(unsigned int perfctr_msr, const char *descr, unsigned nmi_hz)
+{
+	u64 count = (u64)cpu_khz * 1000;
+
+	do_div(count, nmi_hz);
+	if(descr)
+		Dprintk("setting %s to -0x%08Lx\n", descr, count);
+	wrmsrl(perfctr_msr, 0 - count);
+}
+
+static void write_watchdog_counter32(unsigned int perfctr_msr,
+		const char *descr, unsigned nmi_hz)
+{
+	u64 count = (u64)cpu_khz * 1000;
+
+	do_div(count, nmi_hz);
+	if(descr)
+		Dprintk("setting %s to -0x%08Lx\n", descr, count);
+	wrmsr(perfctr_msr, (u32)(-count), 0);
+}
+
+/* AMD K7/K8/Family10h/Family11h support. AMD keeps this interface
+   nicely stable so there is not much variety */
+
+#define K7_EVNTSEL_ENABLE	(1 << 22)
+#define K7_EVNTSEL_INT		(1 << 20)
+#define K7_EVNTSEL_OS		(1 << 17)
+#define K7_EVNTSEL_USR		(1 << 16)
+#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING	0x76
+#define K7_NMI_EVENT		K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
+
+static int setup_k7_watchdog(unsigned nmi_hz)
+{
+	unsigned int perfctr_msr, evntsel_msr;
+	unsigned int evntsel;
+	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+
+	perfctr_msr = MSR_K7_PERFCTR0;
+	evntsel_msr = MSR_K7_EVNTSEL0;
+
+	wrmsrl(perfctr_msr, 0UL);
+
+	evntsel = K7_EVNTSEL_INT
+		| K7_EVNTSEL_OS
+		| K7_EVNTSEL_USR
+		| K7_NMI_EVENT;
+
+	/* setup the timer */
+	wrmsr(evntsel_msr, evntsel, 0);
+	write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz);
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
+	evntsel |= K7_EVNTSEL_ENABLE;
+	wrmsr(evntsel_msr, evntsel, 0);
+
+	wd->perfctr_msr = perfctr_msr;
+	wd->evntsel_msr = evntsel_msr;
+	wd->cccr_msr = 0;  //unused
+	return 1;
+}
+
+static void single_msr_stop_watchdog(void *arg)
+{
+	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+
+	wrmsr(wd->evntsel_msr, 0, 0);
+}
+
+static int single_msr_reserve(void)
+{
+	if (!reserve_perfctr_nmi(wd_ops->perfctr))
+		return 0;
+
+	if (!reserve_evntsel_nmi(wd_ops->evntsel)) {
+		release_perfctr_nmi(wd_ops->perfctr);
+		return 0;
+	}
+	return 1;
+}
+
+static void single_msr_unreserve(void)
+{
+	release_evntsel_nmi(wd_ops->perfctr);
+	release_perfctr_nmi(wd_ops->evntsel);
+}
+
+static void single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
+{
+	/* start the cycle over again */
+	write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
+}
+
+static struct wd_ops k7_wd_ops = {
+	.reserve = single_msr_reserve,
+	.unreserve = single_msr_unreserve,
+	.setup = setup_k7_watchdog,
+	.rearm = single_msr_rearm,
+	.stop = single_msr_stop_watchdog,
+	.perfctr = MSR_K7_PERFCTR0,
+	.evntsel = MSR_K7_EVNTSEL0,
+	.checkbit = 1ULL<<63,
+};
+
+/* Intel Model 6 (PPro+,P2,P3,P-M,Core1) */
+
+#define P6_EVNTSEL0_ENABLE	(1 << 22)
+#define P6_EVNTSEL_INT		(1 << 20)
+#define P6_EVNTSEL_OS		(1 << 17)
+#define P6_EVNTSEL_USR		(1 << 16)
+#define P6_EVENT_CPU_CLOCKS_NOT_HALTED	0x79
+#define P6_NMI_EVENT		P6_EVENT_CPU_CLOCKS_NOT_HALTED
+
+static int setup_p6_watchdog(unsigned nmi_hz)
+{
+	unsigned int perfctr_msr, evntsel_msr;
+	unsigned int evntsel;
+	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+
+	perfctr_msr = MSR_P6_PERFCTR0;
+	evntsel_msr = MSR_P6_EVNTSEL0;
+
+	wrmsrl(perfctr_msr, 0UL);
+
+	evntsel = P6_EVNTSEL_INT
+		| P6_EVNTSEL_OS
+		| P6_EVNTSEL_USR
+		| P6_NMI_EVENT;
+
+	/* setup the timer */
+	wrmsr(evntsel_msr, evntsel, 0);
+	nmi_hz = adjust_for_32bit_ctr(nmi_hz);
+	write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz);
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
+	evntsel |= P6_EVNTSEL0_ENABLE;
+	wrmsr(evntsel_msr, evntsel, 0);
+
+	wd->perfctr_msr = perfctr_msr;
+	wd->evntsel_msr = evntsel_msr;
+	wd->cccr_msr = 0;  //unused
+	return 1;
+}
+
+static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
+{
+	/* P6 based Pentium M need to re-unmask
+	 * the apic vector but it doesn't hurt
+	 * other P6 variant.
+	 * ArchPerfom/Core Duo also needs this */
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
+	/* P6/ARCH_PERFMON has 32 bit counter write */
+	write_watchdog_counter32(wd->perfctr_msr, NULL,nmi_hz);
+}
+
+static struct wd_ops p6_wd_ops = {
+	.reserve = single_msr_reserve,
+	.unreserve = single_msr_unreserve,
+	.setup = setup_p6_watchdog,
+	.rearm = p6_rearm,
+	.stop = single_msr_stop_watchdog,
+	.perfctr = MSR_P6_PERFCTR0,
+	.evntsel = MSR_P6_EVNTSEL0,
+	.checkbit = 1ULL<<39,
+};
+
+/* Intel P4 performance counters. By far the most complicated of all. */
+
+#define MSR_P4_MISC_ENABLE_PERF_AVAIL	(1<<7)
+#define P4_ESCR_EVENT_SELECT(N)	((N)<<25)
+#define P4_ESCR_OS		(1<<3)
+#define P4_ESCR_USR		(1<<2)
+#define P4_CCCR_OVF_PMI0	(1<<26)
+#define P4_CCCR_OVF_PMI1	(1<<27)
+#define P4_CCCR_THRESHOLD(N)	((N)<<20)
+#define P4_CCCR_COMPLEMENT	(1<<19)
+#define P4_CCCR_COMPARE		(1<<18)
+#define P4_CCCR_REQUIRED	(3<<16)
+#define P4_CCCR_ESCR_SELECT(N)	((N)<<13)
+#define P4_CCCR_ENABLE		(1<<12)
+#define P4_CCCR_OVF 		(1<<31)
+
+/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
+   CRU_ESCR0 (with any non-null event selector) through a complemented
+   max threshold. [IA32-Vol3, Section 14.9.9] */
+
+static int setup_p4_watchdog(unsigned nmi_hz)
+{
+	unsigned int perfctr_msr, evntsel_msr, cccr_msr;
+	unsigned int evntsel, cccr_val;
+	unsigned int misc_enable, dummy;
+	unsigned int ht_num;
+	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+
+	rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
+	if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
+		return 0;
+
+#ifdef CONFIG_SMP
+	/* detect which hyperthread we are on */
+	if (smp_num_siblings == 2) {
+		unsigned int ebx, apicid;
+
+        	ebx = cpuid_ebx(1);
+	        apicid = (ebx >> 24) & 0xff;
+        	ht_num = apicid & 1;
+	} else
+#endif
+		ht_num = 0;
+
+	/* performance counters are shared resources
+	 * assign each hyperthread its own set
+	 * (re-use the ESCR0 register, seems safe
+	 * and keeps the cccr_val the same)
+	 */
+	if (!ht_num) {
+		/* logical cpu 0 */
+		perfctr_msr = MSR_P4_IQ_PERFCTR0;
+		evntsel_msr = MSR_P4_CRU_ESCR0;
+		cccr_msr = MSR_P4_IQ_CCCR0;
+		cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
+	} else {
+		/* logical cpu 1 */
+		perfctr_msr = MSR_P4_IQ_PERFCTR1;
+		evntsel_msr = MSR_P4_CRU_ESCR0;
+		cccr_msr = MSR_P4_IQ_CCCR1;
+		cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
+	}
+
+	evntsel = P4_ESCR_EVENT_SELECT(0x3F)
+	 	| P4_ESCR_OS
+		| P4_ESCR_USR;
+
+	cccr_val |= P4_CCCR_THRESHOLD(15)
+		 | P4_CCCR_COMPLEMENT
+		 | P4_CCCR_COMPARE
+		 | P4_CCCR_REQUIRED;
+
+	wrmsr(evntsel_msr, evntsel, 0);
+	wrmsr(cccr_msr, cccr_val, 0);
+	write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz);
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
+	cccr_val |= P4_CCCR_ENABLE;
+	wrmsr(cccr_msr, cccr_val, 0);
+	wd->perfctr_msr = perfctr_msr;
+	wd->evntsel_msr = evntsel_msr;
+	wd->cccr_msr = cccr_msr;
+	return 1;
+}
+
+static void stop_p4_watchdog(void *arg)
+{
+	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+	wrmsr(wd->cccr_msr, 0, 0);
+	wrmsr(wd->evntsel_msr, 0, 0);
+}
+
+static int p4_reserve(void)
+{
+	if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0))
+		return 0;
+#ifdef CONFIG_SMP
+	if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1))
+		goto fail1;
+#endif
+	if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0))
+		goto fail2;
+	/* RED-PEN why is ESCR1 not reserved here? */
+	return 1;
+ fail2:
+#ifdef CONFIG_SMP
+	if (smp_num_siblings > 1)
+		release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
+ fail1:
+#endif
+	release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
+	return 0;
+}
+
+static void p4_unreserve(void)
+{
+#ifdef CONFIG_SMP
+	if (smp_num_siblings > 1)
+		release_evntsel_nmi(MSR_P4_IQ_PERFCTR1);
+#endif
+	release_evntsel_nmi(MSR_P4_IQ_PERFCTR0);
+	release_perfctr_nmi(MSR_P4_CRU_ESCR0);
+}
+
+static void p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
+{
+	unsigned dummy;
+	/*
+ 	 * P4 quirks:
+	 * - An overflown perfctr will assert its interrupt
+	 *   until the OVF flag in its CCCR is cleared.
+	 * - LVTPC is masked on interrupt and must be
+	 *   unmasked by the LVTPC handler.
+	 */
+	rdmsrl(wd->cccr_msr, dummy);
+	dummy &= ~P4_CCCR_OVF;
+	wrmsrl(wd->cccr_msr, dummy);
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
+	/* start the cycle over again */
+	write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
+}
+
+static struct wd_ops p4_wd_ops = {
+	.reserve = p4_reserve,
+	.unreserve = p4_unreserve,
+	.setup = setup_p4_watchdog,
+	.rearm = p4_rearm,
+	.stop = stop_p4_watchdog,
+	/* RED-PEN this is wrong for the other sibling */
+	.perfctr = MSR_P4_BPU_PERFCTR0,
+	.evntsel = MSR_P4_BSU_ESCR0,
+	.checkbit = 1ULL<<39,
+};
+
+/* Watchdog using the Intel architected PerfMon. Used for Core2 and hopefully
+   all future Intel CPUs. */
+
+#define ARCH_PERFMON_NMI_EVENT_SEL	ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
+#define ARCH_PERFMON_NMI_EVENT_UMASK	ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
+
+static int setup_intel_arch_watchdog(unsigned nmi_hz)
+{
+	unsigned int ebx;
+	union cpuid10_eax eax;
+	unsigned int unused;
+	unsigned int perfctr_msr, evntsel_msr;
+	unsigned int evntsel;
+	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+
+	/*
+	 * Check whether the Architectural PerfMon supports
+	 * Unhalted Core Cycles Event or not.
+	 * NOTE: Corresponding bit = 0 in ebx indicates event present.
+	 */
+	cpuid(10, &(eax.full), &ebx, &unused, &unused);
+	if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
+	    (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
+		return 0;
+
+	perfctr_msr = MSR_ARCH_PERFMON_PERFCTR1;
+	evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL1;
+
+	wrmsrl(perfctr_msr, 0UL);
+
+	evntsel = ARCH_PERFMON_EVENTSEL_INT
+		| ARCH_PERFMON_EVENTSEL_OS
+		| ARCH_PERFMON_EVENTSEL_USR
+		| ARCH_PERFMON_NMI_EVENT_SEL
+		| ARCH_PERFMON_NMI_EVENT_UMASK;
+
+	/* setup the timer */
+	wrmsr(evntsel_msr, evntsel, 0);
+	nmi_hz = adjust_for_32bit_ctr(nmi_hz);
+	write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz);
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
+	evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+	wrmsr(evntsel_msr, evntsel, 0);
+
+	wd->perfctr_msr = perfctr_msr;
+	wd->evntsel_msr = evntsel_msr;
+	wd->cccr_msr = 0;  //unused
+	wd_ops->checkbit = 1ULL << (eax.split.bit_width - 1);
+	return 1;
+}
+
+static struct wd_ops intel_arch_wd_ops = {
+	.reserve = single_msr_reserve,
+	.unreserve = single_msr_unreserve,
+	.setup = setup_intel_arch_watchdog,
+	.rearm = p6_rearm,
+	.stop = single_msr_stop_watchdog,
+	.perfctr = MSR_ARCH_PERFMON_PERFCTR0,
+	.evntsel = MSR_ARCH_PERFMON_EVENTSEL0,
+};
+
+static void probe_nmi_watchdog(void)
+{
+	switch (boot_cpu_data.x86_vendor) {
+	case X86_VENDOR_AMD:
+		if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 &&
+		    boot_cpu_data.x86 != 16)
+			return;
+		wd_ops = &k7_wd_ops;
+		break;
+	case X86_VENDOR_INTEL:
+		if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
+			wd_ops = &intel_arch_wd_ops;
+			break;
+		}
+		switch (boot_cpu_data.x86) {
+		case 6:
+			if (boot_cpu_data.x86_model > 0xd)
+				return;
+
+			wd_ops = &p6_wd_ops;
+			break;
+		case 15:
+			if (boot_cpu_data.x86_model > 0x4)
+				return;
+
+			wd_ops = &p4_wd_ops;
+			break;
+		default:
+			return;
+		}
+		break;
+	}
+}
+
+/* Interface to nmi.c */
+
+int lapic_watchdog_init(unsigned nmi_hz)
+{
+	if (!wd_ops) {
+		probe_nmi_watchdog();
+		if (!wd_ops)
+			return -1;
+	}
+
+	if (!(wd_ops->setup(nmi_hz))) {
+		printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n",
+		       raw_smp_processor_id());
+		return -1;
+	}
+
+	return 0;
+}
+
+void lapic_watchdog_stop(void)
+{
+	if (wd_ops)
+		wd_ops->stop(NULL);
+}
+
+unsigned lapic_adjust_nmi_hz(unsigned hz)
+{
+	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+	if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
+	    wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1)
+		hz = adjust_for_32bit_ctr(hz);
+	return hz;
+}
+
+int lapic_wd_event(unsigned nmi_hz)
+{
+	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+	u64 ctr;
+	rdmsrl(wd->perfctr_msr, ctr);
+	if (ctr & wd_ops->checkbit) { /* perfctr still running? */
+		return 0;
+	}
+	wd_ops->rearm(wd, nmi_hz);
+	return 1;
+}
+
+int lapic_watchdog_ok(void)
+{
+	return wd_ops != NULL;
+}
diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c
index 47e3ebbfb28d..89d91e6cc972 100644
--- a/arch/i386/kernel/cpu/proc.c
+++ b/arch/i386/kernel/cpu/proc.c
@@ -72,8 +72,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 		"stc",
 		"100mhzsteps",
 		"hwpstate",
-		NULL,
-		NULL,	/* constant_tsc - moved to flags */
+		"",	/* constant_tsc - moved to flags */
 		/* nothing */
 	};
 	struct cpuinfo_x86 *c = v;
diff --git a/arch/i386/kernel/cpu/rise.c b/arch/i386/kernel/cpu/rise.c
index 9317f7414989..50076f22e90f 100644
--- a/arch/i386/kernel/cpu/rise.c
+++ b/arch/i386/kernel/cpu/rise.c
@@ -50,12 +50,3 @@ int __init rise_init_cpu(void)
 	return 0;
 }
 
-//early_arch_initcall(rise_init_cpu);
-
-static int __init rise_exit_cpu(void)
-{
-	cpu_devs[X86_VENDOR_RISE] = NULL;
-	return 0;
-}
-
-late_initcall(rise_exit_cpu);
diff --git a/arch/i386/kernel/cpu/transmeta.c b/arch/i386/kernel/cpu/transmeta.c
index 5678d46863c6..6471a5a13202 100644
--- a/arch/i386/kernel/cpu/transmeta.c
+++ b/arch/i386/kernel/cpu/transmeta.c
@@ -112,13 +112,3 @@ int __init transmeta_init_cpu(void)
 	cpu_devs[X86_VENDOR_TRANSMETA] = &transmeta_cpu_dev;
 	return 0;
 }
-
-//early_arch_initcall(transmeta_init_cpu);
-
-static int __init transmeta_exit_cpu(void)
-{
-	cpu_devs[X86_VENDOR_TRANSMETA] = NULL;
-	return 0;
-}
-
-late_initcall(transmeta_exit_cpu);
diff --git a/arch/i386/kernel/cpu/umc.c b/arch/i386/kernel/cpu/umc.c
index 1bf3f87e9c5b..a7a4e75bdcd7 100644
--- a/arch/i386/kernel/cpu/umc.c
+++ b/arch/i386/kernel/cpu/umc.c
@@ -24,13 +24,3 @@ int __init umc_init_cpu(void)
 	cpu_devs[X86_VENDOR_UMC] = &umc_cpu_dev;
 	return 0;
 }
-
-//early_arch_initcall(umc_init_cpu);
-
-static int __init umc_exit_cpu(void)
-{
-	cpu_devs[X86_VENDOR_UMC] = NULL;
-	return 0;
-}
-
-late_initcall(umc_exit_cpu);
author	Linus Torvalds <torvalds@woody.linux-foundation.org>	2007-05-05 14:55:20 -0700
committer	Linus Torvalds <torvalds@woody.linux-foundation.org>	2007-05-05 14:55:20 -0700
commit	ea62ccd00fd0b6720b033adfc9984f31130ce195 (patch)
tree	9837b797b2466fffcb0af96c388b06eae9c3df18 /arch/i386/kernel/cpu
parent	886a0768affe9a32f18c45f8e1393bca9ece5392 (diff)
parent	35060b6a9a4e1c89bc6fbea61090e302dbc61847 (diff)
download	linux-ea62ccd00fd0b6720b033adfc9984f31130ce195.tar.gz linux-ea62ccd00fd0b6720b033adfc9984f31130ce195.tar.bz2 linux-ea62ccd00fd0b6720b033adfc9984f31130ce195.zip