diff options
Diffstat (limited to 'arch/i386')
82 files changed, 1428 insertions, 1040 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index d2703cda61ea..6004bb0795e0 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -5,7 +5,7 @@ mainmenu "Linux Kernel Configuration" -config X86 +config X86_32 bool default y help @@ -18,6 +18,10 @@ config SEMAPHORE_SLEEPERS bool default y +config X86 + bool + default y + config MMU bool default y @@ -151,304 +155,7 @@ config ES7000_CLUSTERED_APIC default y depends on SMP && X86_ES7000 && MPENTIUMIII -if !X86_ELAN - -choice - prompt "Processor family" - default M686 - -config M386 - bool "386" - ---help--- - This is the processor type of your CPU. This information is used for - optimizing purposes. In order to compile a kernel that can run on - all x86 CPU types (albeit not optimally fast), you can specify - "386" here. - - The kernel will not necessarily run on earlier architectures than - the one you have chosen, e.g. a Pentium optimized kernel will run on - a PPro, but not necessarily on a i486. - - Here are the settings recommended for greatest speed: - - "386" for the AMD/Cyrix/Intel 386DX/DXL/SL/SLC/SX, Cyrix/TI - 486DLC/DLC2, UMC 486SX-S and NexGen Nx586. Only "386" kernels - will run on a 386 class machine. - - "486" for the AMD/Cyrix/IBM/Intel 486DX/DX2/DX4 or - SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or U5S. - - "586" for generic Pentium CPUs lacking the TSC - (time stamp counter) register. - - "Pentium-Classic" for the Intel Pentium. - - "Pentium-MMX" for the Intel Pentium MMX. - - "Pentium-Pro" for the Intel Pentium Pro. - - "Pentium-II" for the Intel Pentium II or pre-Coppermine Celeron. - - "Pentium-III" for the Intel Pentium III or Coppermine Celeron. - - "Pentium-4" for the Intel Pentium 4 or P4-based Celeron. - - "K6" for the AMD K6, K6-II and K6-III (aka K6-3D). - - "Athlon" for the AMD K7 family (Athlon/Duron/Thunderbird). - - "Crusoe" for the Transmeta Crusoe series. - - "Efficeon" for the Transmeta Efficeon series. - - "Winchip-C6" for original IDT Winchip. - - "Winchip-2" for IDT Winchip 2. - - "Winchip-2A" for IDT Winchips with 3dNow! capabilities. - - "GeodeGX1" for Geode GX1 (Cyrix MediaGX). - - "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3. - - "VIA C3-2 for VIA C3-2 "Nehemiah" (model 9 and above). - - If you don't know what to do, choose "386". - -config M486 - bool "486" - help - Select this for a 486 series processor, either Intel or one of the - compatible processors from AMD, Cyrix, IBM, or Intel. Includes DX, - DX2, and DX4 variants; also SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or - U5S. - -config M586 - bool "586/K5/5x86/6x86/6x86MX" - help - Select this for an 586 or 686 series processor such as the AMD K5, - the Cyrix 5x86, 6x86 and 6x86MX. This choice does not - assume the RDTSC (Read Time Stamp Counter) instruction. - -config M586TSC - bool "Pentium-Classic" - help - Select this for a Pentium Classic processor with the RDTSC (Read - Time Stamp Counter) instruction for benchmarking. - -config M586MMX - bool "Pentium-MMX" - help - Select this for a Pentium with the MMX graphics/multimedia - extended instructions. - -config M686 - bool "Pentium-Pro" - help - Select this for Intel Pentium Pro chips. This enables the use of - Pentium Pro extended instructions, and disables the init-time guard - against the f00f bug found in earlier Pentiums. - -config MPENTIUMII - bool "Pentium-II/Celeron(pre-Coppermine)" - help - Select this for Intel chips based on the Pentium-II and - pre-Coppermine Celeron core. This option enables an unaligned - copy optimization, compiles the kernel with optimization flags - tailored for the chip, and applies any applicable Pentium Pro - optimizations. - -config MPENTIUMIII - bool "Pentium-III/Celeron(Coppermine)/Pentium-III Xeon" - help - Select this for Intel chips based on the Pentium-III and - Celeron-Coppermine core. This option enables use of some - extended prefetch instructions in addition to the Pentium II - extensions. - -config MPENTIUMM - bool "Pentium M" - help - Select this for Intel Pentium M (not Pentium-4 M) - notebook chips. - -config MPENTIUM4 - bool "Pentium-4/Celeron(P4-based)/Pentium-4 M/Xeon" - help - Select this for Intel Pentium 4 chips. This includes the - Pentium 4, P4-based Celeron and Xeon, and Pentium-4 M - (not Pentium M) chips. This option enables compile flags - optimized for the chip, uses the correct cache shift, and - applies any applicable Pentium III optimizations. - -config MK6 - bool "K6/K6-II/K6-III" - help - Select this for an AMD K6-family processor. Enables use of - some extended instructions, and passes appropriate optimization - flags to GCC. - -config MK7 - bool "Athlon/Duron/K7" - help - Select this for an AMD Athlon K7-family processor. Enables use of - some extended instructions, and passes appropriate optimization - flags to GCC. - -config MK8 - bool "Opteron/Athlon64/Hammer/K8" - help - Select this for an AMD Opteron or Athlon64 Hammer-family processor. Enables - use of some extended instructions, and passes appropriate optimization - flags to GCC. - -config MCRUSOE - bool "Crusoe" - help - Select this for a Transmeta Crusoe processor. Treats the processor - like a 586 with TSC, and sets some GCC optimization flags (like a - Pentium Pro with no alignment requirements). - -config MEFFICEON - bool "Efficeon" - help - Select this for a Transmeta Efficeon processor. - -config MWINCHIPC6 - bool "Winchip-C6" - help - Select this for an IDT Winchip C6 chip. Linux and GCC - treat this chip as a 586TSC with some extended instructions - and alignment requirements. - -config MWINCHIP2 - bool "Winchip-2" - help - Select this for an IDT Winchip-2. Linux and GCC - treat this chip as a 586TSC with some extended instructions - and alignment requirements. - -config MWINCHIP3D - bool "Winchip-2A/Winchip-3" - help - Select this for an IDT Winchip-2A or 3. Linux and GCC - treat this chip as a 586TSC with some extended instructions - and alignment reqirements. Also enable out of order memory - stores for this CPU, which can increase performance of some - operations. - -config MGEODEGX1 - bool "GeodeGX1" - help - Select this for a Geode GX1 (Cyrix MediaGX) chip. - -config MCYRIXIII - bool "CyrixIII/VIA-C3" - help - Select this for a Cyrix III or C3 chip. Presently Linux and GCC - treat this chip as a generic 586. Whilst the CPU is 686 class, - it lacks the cmov extension which gcc assumes is present when - generating 686 code. - Note that Nehemiah (Model 9) and above will not boot with this - kernel due to them lacking the 3DNow! instructions used in earlier - incarnations of the CPU. - -config MVIAC3_2 - bool "VIA C3-2 (Nehemiah)" - help - Select this for a VIA C3 "Nehemiah". Selecting this enables usage - of SSE and tells gcc to treat the CPU as a 686. - Note, this kernel will not boot on older (pre model 9) C3s. - -endchoice - -config X86_GENERIC - bool "Generic x86 support" - help - Instead of just including optimizations for the selected - x86 variant (e.g. PII, Crusoe or Athlon), include some more - generic optimizations as well. This will make the kernel - perform better on x86 CPUs other than that selected. - - This is really intended for distributors who need more - generic optimizations. - -endif - -# -# Define implied options from the CPU selection here -# -config X86_CMPXCHG - bool - depends on !M386 - default y - -config X86_XADD - bool - depends on !M386 - default y - -config X86_L1_CACHE_SHIFT - int - default "7" if MPENTIUM4 || X86_GENERIC - default "4" if X86_ELAN || M486 || M386 - default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODEGX1 - default "6" if MK7 || MK8 || MPENTIUMM - -config RWSEM_GENERIC_SPINLOCK - bool - depends on M386 - default y - -config RWSEM_XCHGADD_ALGORITHM - bool - depends on !M386 - default y - -config GENERIC_CALIBRATE_DELAY - bool - default y - -config X86_PPRO_FENCE - bool - depends on M686 || M586MMX || M586TSC || M586 || M486 || M386 || MGEODEGX1 - default y - -config X86_F00F_BUG - bool - depends on M586MMX || M586TSC || M586 || M486 || M386 - default y - -config X86_WP_WORKS_OK - bool - depends on !M386 - default y - -config X86_INVLPG - bool - depends on !M386 - default y - -config X86_BSWAP - bool - depends on !M386 - default y - -config X86_POPAD_OK - bool - depends on !M386 - default y - -config X86_ALIGNMENT_16 - bool - depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1 - default y - -config X86_GOOD_APIC - bool - depends on MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || MK8 || MEFFICEON - default y - -config X86_INTEL_USERCOPY - bool - depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON - default y - -config X86_USE_PPRO_CHECKSUM - bool - depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON - default y - -config X86_USE_3DNOW - bool - depends on MCYRIXIII || MK7 - default y - -config X86_OOSTORE - bool - depends on (MWINCHIP3D || MWINCHIP2 || MWINCHIPC6) && MTRR - default y +source "arch/i386/Kconfig.cpu" config HPET_TIMER bool "HPET Timer Support" @@ -561,11 +268,6 @@ config X86_VISWS_APIC depends on X86_VISWS default y -config X86_TSC - bool - depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1) && !X86_NUMAQ - default y - config X86_MCE bool "Machine Check Exception" depends on !X86_VOYAGER @@ -997,7 +699,7 @@ depends on PM && !X86_VISWS config APM tristate "APM (Advanced Power Management) BIOS support" - depends on PM + depends on PM && PM_LEGACY ---help--- APM is a BIOS specification for saving power using several different techniques. This is mostly useful for battery powered laptops with @@ -1295,8 +997,21 @@ source "drivers/Kconfig" source "fs/Kconfig" +menu "Instrumentation Support" + depends on EXPERIMENTAL + source "arch/i386/oprofile/Kconfig" +config KPROBES + bool "Kprobes (EXPERIMENTAL)" + help + Kprobes allows you to trap at almost any kernel address and + execute a callback function. register_kprobe() establishes + a probepoint and specifies the callback. Kprobes is useful + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". +endmenu + source "arch/i386/Kconfig.debug" source "security/Kconfig" @@ -1340,8 +1055,3 @@ config X86_TRAMPOLINE bool depends on X86_SMP || (X86_VOYAGER && SMP) default y - -config PC - bool - depends on X86 && !EMBEDDED - default y diff --git a/arch/i386/Kconfig.cpu b/arch/i386/Kconfig.cpu new file mode 100644 index 000000000000..53bbb3c008ee --- /dev/null +++ b/arch/i386/Kconfig.cpu @@ -0,0 +1,309 @@ +# Put here option for CPU selection and depending optimization +if !X86_ELAN + +choice + prompt "Processor family" + default M686 + +config M386 + bool "386" + ---help--- + This is the processor type of your CPU. This information is used for + optimizing purposes. In order to compile a kernel that can run on + all x86 CPU types (albeit not optimally fast), you can specify + "386" here. + + The kernel will not necessarily run on earlier architectures than + the one you have chosen, e.g. a Pentium optimized kernel will run on + a PPro, but not necessarily on a i486. + + Here are the settings recommended for greatest speed: + - "386" for the AMD/Cyrix/Intel 386DX/DXL/SL/SLC/SX, Cyrix/TI + 486DLC/DLC2, UMC 486SX-S and NexGen Nx586. Only "386" kernels + will run on a 386 class machine. + - "486" for the AMD/Cyrix/IBM/Intel 486DX/DX2/DX4 or + SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or U5S. + - "586" for generic Pentium CPUs lacking the TSC + (time stamp counter) register. + - "Pentium-Classic" for the Intel Pentium. + - "Pentium-MMX" for the Intel Pentium MMX. + - "Pentium-Pro" for the Intel Pentium Pro. + - "Pentium-II" for the Intel Pentium II or pre-Coppermine Celeron. + - "Pentium-III" for the Intel Pentium III or Coppermine Celeron. + - "Pentium-4" for the Intel Pentium 4 or P4-based Celeron. + - "K6" for the AMD K6, K6-II and K6-III (aka K6-3D). + - "Athlon" for the AMD K7 family (Athlon/Duron/Thunderbird). + - "Crusoe" for the Transmeta Crusoe series. + - "Efficeon" for the Transmeta Efficeon series. + - "Winchip-C6" for original IDT Winchip. + - "Winchip-2" for IDT Winchip 2. + - "Winchip-2A" for IDT Winchips with 3dNow! capabilities. + - "GeodeGX1" for Geode GX1 (Cyrix MediaGX). + - "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3. + - "VIA C3-2 for VIA C3-2 "Nehemiah" (model 9 and above). + + If you don't know what to do, choose "386". + +config M486 + bool "486" + help + Select this for a 486 series processor, either Intel or one of the + compatible processors from AMD, Cyrix, IBM, or Intel. Includes DX, + DX2, and DX4 variants; also SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or + U5S. + +config M586 + bool "586/K5/5x86/6x86/6x86MX" + help + Select this for an 586 or 686 series processor such as the AMD K5, + the Cyrix 5x86, 6x86 and 6x86MX. This choice does not + assume the RDTSC (Read Time Stamp Counter) instruction. + +config M586TSC + bool "Pentium-Classic" + help + Select this for a Pentium Classic processor with the RDTSC (Read + Time Stamp Counter) instruction for benchmarking. + +config M586MMX + bool "Pentium-MMX" + help + Select this for a Pentium with the MMX graphics/multimedia + extended instructions. + +config M686 + bool "Pentium-Pro" + help + Select this for Intel Pentium Pro chips. This enables the use of + Pentium Pro extended instructions, and disables the init-time guard + against the f00f bug found in earlier Pentiums. + +config MPENTIUMII + bool "Pentium-II/Celeron(pre-Coppermine)" + help + Select this for Intel chips based on the Pentium-II and + pre-Coppermine Celeron core. This option enables an unaligned + copy optimization, compiles the kernel with optimization flags + tailored for the chip, and applies any applicable Pentium Pro + optimizations. + +config MPENTIUMIII + bool "Pentium-III/Celeron(Coppermine)/Pentium-III Xeon" + help + Select this for Intel chips based on the Pentium-III and + Celeron-Coppermine core. This option enables use of some + extended prefetch instructions in addition to the Pentium II + extensions. + +config MPENTIUMM + bool "Pentium M" + help + Select this for Intel Pentium M (not Pentium-4 M) + notebook chips. + +config MPENTIUM4 + bool "Pentium-4/Celeron(P4-based)/Pentium-4 M/Xeon" + help + Select this for Intel Pentium 4 chips. This includes the + Pentium 4, P4-based Celeron and Xeon, and Pentium-4 M + (not Pentium M) chips. This option enables compile flags + optimized for the chip, uses the correct cache shift, and + applies any applicable Pentium III optimizations. + +config MK6 + bool "K6/K6-II/K6-III" + help + Select this for an AMD K6-family processor. Enables use of + some extended instructions, and passes appropriate optimization + flags to GCC. + +config MK7 + bool "Athlon/Duron/K7" + help + Select this for an AMD Athlon K7-family processor. Enables use of + some extended instructions, and passes appropriate optimization + flags to GCC. + +config MK8 + bool "Opteron/Athlon64/Hammer/K8" + help + Select this for an AMD Opteron or Athlon64 Hammer-family processor. Enables + use of some extended instructions, and passes appropriate optimization + flags to GCC. + +config MCRUSOE + bool "Crusoe" + help + Select this for a Transmeta Crusoe processor. Treats the processor + like a 586 with TSC, and sets some GCC optimization flags (like a + Pentium Pro with no alignment requirements). + +config MEFFICEON + bool "Efficeon" + help + Select this for a Transmeta Efficeon processor. + +config MWINCHIPC6 + bool "Winchip-C6" + help + Select this for an IDT Winchip C6 chip. Linux and GCC + treat this chip as a 586TSC with some extended instructions + and alignment requirements. + +config MWINCHIP2 + bool "Winchip-2" + help + Select this for an IDT Winchip-2. Linux and GCC + treat this chip as a 586TSC with some extended instructions + and alignment requirements. + +config MWINCHIP3D + bool "Winchip-2A/Winchip-3" + help + Select this for an IDT Winchip-2A or 3. Linux and GCC + treat this chip as a 586TSC with some extended instructions + and alignment reqirements. Also enable out of order memory + stores for this CPU, which can increase performance of some + operations. + +config MGEODEGX1 + bool "GeodeGX1" + help + Select this for a Geode GX1 (Cyrix MediaGX) chip. + +config MCYRIXIII + bool "CyrixIII/VIA-C3" + help + Select this for a Cyrix III or C3 chip. Presently Linux and GCC + treat this chip as a generic 586. Whilst the CPU is 686 class, + it lacks the cmov extension which gcc assumes is present when + generating 686 code. + Note that Nehemiah (Model 9) and above will not boot with this + kernel due to them lacking the 3DNow! instructions used in earlier + incarnations of the CPU. + +config MVIAC3_2 + bool "VIA C3-2 (Nehemiah)" + help + Select this for a VIA C3 "Nehemiah". Selecting this enables usage + of SSE and tells gcc to treat the CPU as a 686. + Note, this kernel will not boot on older (pre model 9) C3s. + +endchoice + +config X86_GENERIC + bool "Generic x86 support" + help + Instead of just including optimizations for the selected + x86 variant (e.g. PII, Crusoe or Athlon), include some more + generic optimizations as well. This will make the kernel + perform better on x86 CPUs other than that selected. + + This is really intended for distributors who need more + generic optimizations. + +endif + +# +# Define implied options from the CPU selection here +# +config X86_CMPXCHG + bool + depends on !M386 + default y + +config X86_XADD + bool + depends on !M386 + default y + +config X86_L1_CACHE_SHIFT + int + default "7" if MPENTIUM4 || X86_GENERIC + default "4" if X86_ELAN || M486 || M386 + default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODEGX1 + default "6" if MK7 || MK8 || MPENTIUMM + +config RWSEM_GENERIC_SPINLOCK + bool + depends on M386 + default y + +config RWSEM_XCHGADD_ALGORITHM + bool + depends on !M386 + default y + +config GENERIC_CALIBRATE_DELAY + bool + default y + +config X86_PPRO_FENCE + bool + depends on M686 || M586MMX || M586TSC || M586 || M486 || M386 || MGEODEGX1 + default y + +config X86_F00F_BUG + bool + depends on M586MMX || M586TSC || M586 || M486 || M386 + default y + +config X86_WP_WORKS_OK + bool + depends on !M386 + default y + +config X86_INVLPG + bool + depends on !M386 + default y + +config X86_BSWAP + bool + depends on !M386 + default y + +config X86_POPAD_OK + bool + depends on !M386 + default y + +config X86_CMPXCHG64 + bool + depends on !M386 && !M486 + default y + +config X86_ALIGNMENT_16 + bool + depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1 + default y + +config X86_GOOD_APIC + bool + depends on MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || MK8 || MEFFICEON + default y + +config X86_INTEL_USERCOPY + bool + depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON + default y + +config X86_USE_PPRO_CHECKSUM + bool + depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON + default y + +config X86_USE_3DNOW + bool + depends on MCYRIXIII || MK7 + default y + +config X86_OOSTORE + bool + depends on (MWINCHIP3D || MWINCHIP2 || MWINCHIPC6) && MTRR + default y + +config X86_TSC + bool + depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1) && !X86_NUMAQ + default y diff --git a/arch/i386/Kconfig.debug b/arch/i386/Kconfig.debug index 5228c40a6fb2..c48b424dd640 100644 --- a/arch/i386/Kconfig.debug +++ b/arch/i386/Kconfig.debug @@ -22,16 +22,6 @@ config DEBUG_STACKOVERFLOW This option will cause messages to be printed if free stack space drops below a certain limit. -config KPROBES - bool "Kprobes" - depends on DEBUG_KERNEL - help - Kprobes allows you to trap at almost any kernel address and - execute a callback function. register_kprobe() establishes - a probepoint and specifies the callback. Kprobes is useful - for kernel debugging, non-intrusive instrumentation and testing. - If in doubt, say "N". - config DEBUG_STACK_USAGE bool "Stack utilization instrumentation" depends on DEBUG_KERNEL diff --git a/arch/i386/Makefile b/arch/i386/Makefile index 09951990a622..d121ea18460f 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile @@ -34,35 +34,8 @@ CFLAGS += -pipe -msoft-float # prevent gcc from keeping the stack 16 byte aligned CFLAGS += $(call cc-option,-mpreferred-stack-boundary=2) -align := $(cc-option-align) -cflags-$(CONFIG_M386) += -march=i386 -cflags-$(CONFIG_M486) += -march=i486 -cflags-$(CONFIG_M586) += -march=i586 -cflags-$(CONFIG_M586TSC) += -march=i586 -cflags-$(CONFIG_M586MMX) += $(call cc-option,-march=pentium-mmx,-march=i586) -cflags-$(CONFIG_M686) += -march=i686 -cflags-$(CONFIG_MPENTIUMII) += -march=i686 $(call cc-option,-mtune=pentium2) -cflags-$(CONFIG_MPENTIUMIII) += -march=i686 $(call cc-option,-mtune=pentium3) -cflags-$(CONFIG_MPENTIUMM) += -march=i686 $(call cc-option,-mtune=pentium3) -cflags-$(CONFIG_MPENTIUM4) += -march=i686 $(call cc-option,-mtune=pentium4) -cflags-$(CONFIG_MK6) += -march=k6 -# Please note, that patches that add -march=athlon-xp and friends are pointless. -# They make zero difference whatsosever to performance at this time. -cflags-$(CONFIG_MK7) += $(call cc-option,-march=athlon,-march=i686 $(align)-functions=4) -cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8,$(call cc-option,-march=athlon,-march=i686 $(align)-functions=4)) -cflags-$(CONFIG_MCRUSOE) += -march=i686 $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 -cflags-$(CONFIG_MEFFICEON) += -march=i686 $(call cc-option,-mtune=pentium3) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 -cflags-$(CONFIG_MWINCHIPC6) += $(call cc-option,-march=winchip-c6,-march=i586) -cflags-$(CONFIG_MWINCHIP2) += $(call cc-option,-march=winchip2,-march=i586) -cflags-$(CONFIG_MWINCHIP3D) += $(call cc-option,-march=winchip2,-march=i586) -cflags-$(CONFIG_MCYRIXIII) += $(call cc-option,-march=c3,-march=i486) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 -cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686) - -# AMD Elan support -cflags-$(CONFIG_X86_ELAN) += -march=i486 - -# Geode GX1 support -cflags-$(CONFIG_MGEODEGX1) += $(call cc-option,-march=pentium-mmx,-march=i486) +# CPU-specific tuning. Anything which can be shared with UML should go here. +include $(srctree)/arch/i386/Makefile.cpu # -mregparm=3 works ok on gcc-3.0 and later # diff --git a/arch/i386/Makefile.cpu b/arch/i386/Makefile.cpu new file mode 100644 index 000000000000..8e51456df23d --- /dev/null +++ b/arch/i386/Makefile.cpu @@ -0,0 +1,41 @@ +# CPU tuning section - shared with UML. +# Must change only cflags-y (or [yn]), not CFLAGS! That makes a difference for UML. + +#-mtune exists since gcc 3.4, and some -mcpu flavors didn't exist in gcc 2.95. +HAS_MTUNE := $(call cc-option-yn, -mtune=i386) +ifeq ($(HAS_MTUNE),y) +tune = $(call cc-option,-mtune=$(1),) +else +tune = $(call cc-option,-mcpu=$(1),) +endif + +align := $(cc-option-align) +cflags-$(CONFIG_M386) += -march=i386 +cflags-$(CONFIG_M486) += -march=i486 +cflags-$(CONFIG_M586) += -march=i586 +cflags-$(CONFIG_M586TSC) += -march=i586 +cflags-$(CONFIG_M586MMX) += $(call cc-option,-march=pentium-mmx,-march=i586) +cflags-$(CONFIG_M686) += -march=i686 +cflags-$(CONFIG_MPENTIUMII) += -march=i686 $(call tune,pentium2) +cflags-$(CONFIG_MPENTIUMIII) += -march=i686 $(call tune,pentium3) +cflags-$(CONFIG_MPENTIUMM) += -march=i686 $(call tune,pentium3) +cflags-$(CONFIG_MPENTIUM4) += -march=i686 $(call tune,pentium4) +cflags-$(CONFIG_MK6) += -march=k6 +# Please note, that patches that add -march=athlon-xp and friends are pointless. +# They make zero difference whatsosever to performance at this time. +cflags-$(CONFIG_MK7) += $(call cc-option,-march=athlon,-march=i686 $(align)-functions=4) +cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8,$(call cc-option,-march=athlon,-march=i686 $(align)-functions=4)) +cflags-$(CONFIG_MCRUSOE) += -march=i686 $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 +cflags-$(CONFIG_MEFFICEON) += -march=i686 $(call tune,pentium3) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 +cflags-$(CONFIG_MWINCHIPC6) += $(call cc-option,-march=winchip-c6,-march=i586) +cflags-$(CONFIG_MWINCHIP2) += $(call cc-option,-march=winchip2,-march=i586) +cflags-$(CONFIG_MWINCHIP3D) += $(call cc-option,-march=winchip2,-march=i586) +cflags-$(CONFIG_MCYRIXIII) += $(call cc-option,-march=c3,-march=i486) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 +cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686) + +# AMD Elan support +cflags-$(CONFIG_X86_ELAN) += -march=i486 + +# Geode GX1 support +cflags-$(CONFIG_MGEODEGX1) += $(call cc-option,-march=pentium-mmx,-march=i486) + diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index a63351c085c6..447fa9e33ffb 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -27,30 +27,26 @@ #include <linux/config.h> #include <linux/acpi.h> #include <linux/efi.h> -#include <linux/irq.h> #include <linux/module.h> #include <linux/dmi.h> +#include <linux/irq.h> #include <asm/pgtable.h> #include <asm/io_apic.h> #include <asm/apic.h> #include <asm/io.h> -#include <asm/irq.h> #include <asm/mpspec.h> #ifdef CONFIG_X86_64 -static inline void acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ -} extern void __init clustered_apic_check(void); -static inline int ioapic_setup_disabled(void) -{ - return 0; -} +extern int gsi_irq_sharing(int gsi); #include <asm/proto.h> +static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return 0; } + + #else /* X86 */ #ifdef CONFIG_X86_LOCAL_APIC @@ -58,6 +54,8 @@ static inline int ioapic_setup_disabled(void) #include <mach_mpparse.h> #endif /* CONFIG_X86_LOCAL_APIC */ +static inline int gsi_irq_sharing(int gsi) { return gsi; } + #endif /* X86 */ #define BAD_MADT_ENTRY(entry, end) ( \ @@ -250,9 +248,7 @@ acpi_parse_lapic(acpi_table_entry_header * header, const unsigned long end) acpi_table_print_madt_entry(header); - /* no utility in registering a disabled processor */ - if (processor->flags.enabled == 0) - return 0; + /* Register even disabled CPUs for cpu hotplug */ x86_acpiid_to_apicid[processor->acpi_id] = processor->id; @@ -460,7 +456,7 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) *irq = IO_APIC_VECTOR(gsi); else #endif - *irq = gsi; + *irq = gsi_irq_sharing(gsi); return 0; } @@ -544,7 +540,7 @@ acpi_scan_rsdp(unsigned long start, unsigned long length) * RSDP signature. */ for (offset = 0; offset < length; offset += 16) { - if (strncmp((char *)(start + offset), "RSD PTR ", sig_len)) + if (strncmp((char *)(phys_to_virt(start) + offset), "RSD PTR ", sig_len)) continue; return (start + offset); } @@ -642,6 +638,13 @@ static int __init acpi_parse_fadt(unsigned long phys, unsigned long size) return 0; pmtmr_ioport = fadt->xpm_tmr_blk.address; + /* + * "X" fields are optional extensions to the original V1.0 + * fields, so we must selectively expand V1.0 fields if the + * corresponding X field is zero. + */ + if (!pmtmr_ioport) + pmtmr_ioport = fadt->V1_pm_tmr_blk; } else { /* FADT rev. 1 */ pmtmr_ioport = fadt->V1_pm_tmr_blk; diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index a22a866de8f9..496a2c9909fe 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -18,7 +18,6 @@ #include <linux/init.h> #include <linux/mm.h> -#include <linux/irq.h> #include <linux/delay.h> #include <linux/bootmem.h> #include <linux/smp_lock.h> @@ -560,14 +559,20 @@ void __devinit setup_local_APIC(void) * If Linux enabled the LAPIC against the BIOS default * disable it down before re-entering the BIOS on shutdown. * Otherwise the BIOS may get confused and not power-off. + * Additionally clear all LVT entries before disable_local_APIC + * for the case where Linux didn't enable the LAPIC. */ void lapic_shutdown(void) { - if (!cpu_has_apic || !enabled_via_apicbase) + if (!cpu_has_apic) return; local_irq_disable(); - disable_local_APIC(); + clear_local_APIC(); + + if (enabled_via_apicbase) + disable_local_APIC(); + local_irq_enable(); } @@ -1047,10 +1052,11 @@ static unsigned int calibration_result; void __init setup_boot_APIC_clock(void) { + unsigned long flags; apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"); using_apic_timer = 1; - local_irq_disable(); + local_irq_save(flags); calibration_result = calibrate_APIC_clock(); /* @@ -1058,7 +1064,7 @@ void __init setup_boot_APIC_clock(void) */ setup_APIC_timer(calibration_result); - local_irq_enable(); + local_irq_restore(flags); } void __devinit setup_secondary_APIC_clock(void) diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c index d7811c4e8b50..1e60acbed3c1 100644 --- a/arch/i386/kernel/apm.c +++ b/arch/i386/kernel/apm.c @@ -218,6 +218,7 @@ #include <linux/time.h> #include <linux/sched.h> #include <linux/pm.h> +#include <linux/pm_legacy.h> #include <linux/device.h> #include <linux/kernel.h> #include <linux/smp.h> @@ -447,8 +448,7 @@ static char * apm_event_name[] = { "system standby resume", "capabilities change" }; -#define NR_APM_EVENT_NAME \ - (sizeof(apm_event_name) / sizeof(apm_event_name[0])) +#define NR_APM_EVENT_NAME ARRAY_SIZE(apm_event_name) typedef struct lookup_t { int key; @@ -479,7 +479,7 @@ static const lookup_t error_table[] = { { APM_NO_ERROR, "BIOS did not set a return code" }, { APM_NOT_PRESENT, "No APM present" } }; -#define ERROR_COUNT (sizeof(error_table)/sizeof(lookup_t)) +#define ERROR_COUNT ARRAY_SIZE(error_table) /** * apm_error - display an APM error @@ -597,12 +597,14 @@ static u8 apm_bios_call(u32 func, u32 ebx_in, u32 ecx_in, cpumask_t cpus; int cpu; struct desc_struct save_desc_40; + struct desc_struct *gdt; cpus = apm_save_cpus(); cpu = get_cpu(); - save_desc_40 = per_cpu(cpu_gdt_table, cpu)[0x40 / 8]; - per_cpu(cpu_gdt_table, cpu)[0x40 / 8] = bad_bios_desc; + gdt = get_cpu_gdt_table(cpu); + save_desc_40 = gdt[0x40 / 8]; + gdt[0x40 / 8] = bad_bios_desc; local_save_flags(flags); APM_DO_CLI; @@ -610,7 +612,7 @@ static u8 apm_bios_call(u32 func, u32 ebx_in, u32 ecx_in, apm_bios_call_asm(func, ebx_in, ecx_in, eax, ebx, ecx, edx, esi); APM_DO_RESTORE_SEGS; local_irq_restore(flags); - per_cpu(cpu_gdt_table, cpu)[0x40 / 8] = save_desc_40; + gdt[0x40 / 8] = save_desc_40; put_cpu(); apm_restore_cpus(cpus); @@ -639,13 +641,14 @@ static u8 apm_bios_call_simple(u32 func, u32 ebx_in, u32 ecx_in, u32 *eax) cpumask_t cpus; int cpu; struct desc_struct save_desc_40; - + struct desc_struct *gdt; cpus = apm_save_cpus(); cpu = get_cpu(); - save_desc_40 = per_cpu(cpu_gdt_table, cpu)[0x40 / 8]; - per_cpu(cpu_gdt_table, cpu)[0x40 / 8] = bad_bios_desc; + gdt = get_cpu_gdt_table(cpu); + save_desc_40 = gdt[0x40 / 8]; + gdt[0x40 / 8] = bad_bios_desc; local_save_flags(flags); APM_DO_CLI; @@ -653,7 +656,7 @@ static u8 apm_bios_call_simple(u32 func, u32 ebx_in, u32 ecx_in, u32 *eax) error = apm_bios_call_simple_asm(func, ebx_in, ecx_in, eax); APM_DO_RESTORE_SEGS; local_irq_restore(flags); - __get_cpu_var(cpu_gdt_table)[0x40 / 8] = save_desc_40; + gdt[0x40 / 8] = save_desc_40; put_cpu(); apm_restore_cpus(cpus); return error; @@ -767,8 +770,26 @@ static int set_system_power_state(u_short state) static int apm_do_idle(void) { u32 eax; + u8 ret = 0; + int idled = 0; + int polling; + + polling = test_thread_flag(TIF_POLLING_NRFLAG); + if (polling) { + clear_thread_flag(TIF_POLLING_NRFLAG); + smp_mb__after_clear_bit(); + } + if (!need_resched()) { + idled = 1; + ret = apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax); + } + if (polling) + set_thread_flag(TIF_POLLING_NRFLAG); + + if (!idled) + return 0; - if (apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax)) { + if (ret) { static unsigned long t; /* This always fails on some SMP boards running UP kernels. @@ -2295,35 +2316,36 @@ static int __init apm_init(void) apm_bios_entry.segment = APM_CS; for (i = 0; i < NR_CPUS; i++) { - set_base(per_cpu(cpu_gdt_table, i)[APM_CS >> 3], + struct desc_struct *gdt = get_cpu_gdt_table(i); + set_base(gdt[APM_CS >> 3], __va((unsigned long)apm_info.bios.cseg << 4)); - set_base(per_cpu(cpu_gdt_table, i)[APM_CS_16 >> 3], + set_base(gdt[APM_CS_16 >> 3], __va((unsigned long)apm_info.bios.cseg_16 << 4)); - set_base(per_cpu(cpu_gdt_table, i)[APM_DS >> 3], + set_base(gdt[APM_DS >> 3], __va((unsigned long)apm_info.bios.dseg << 4)); #ifndef APM_RELAX_SEGMENTS if (apm_info.bios.version == 0x100) { #endif /* For ASUS motherboard, Award BIOS rev 110 (and others?) */ - _set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_CS >> 3], 64 * 1024 - 1); + _set_limit((char *)&gdt[APM_CS >> 3], 64 * 1024 - 1); /* For some unknown machine. */ - _set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_CS_16 >> 3], 64 * 1024 - 1); + _set_limit((char *)&gdt[APM_CS_16 >> 3], 64 * 1024 - 1); /* For the DEC Hinote Ultra CT475 (and others?) */ - _set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_DS >> 3], 64 * 1024 - 1); + _set_limit((char *)&gdt[APM_DS >> 3], 64 * 1024 - 1); #ifndef APM_RELAX_SEGMENTS } else { - _set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_CS >> 3], + _set_limit((char *)&gdt[APM_CS >> 3], (apm_info.bios.cseg_len - 1) & 0xffff); - _set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_CS_16 >> 3], + _set_limit((char *)&gdt[APM_CS_16 >> 3], (apm_info.bios.cseg_16_len - 1) & 0xffff); - _set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_DS >> 3], + _set_limit((char *)&gdt[APM_DS >> 3], (apm_info.bios.dseg_len - 1) & 0xffff); /* workaround for broken BIOSes */ if (apm_info.bios.cseg_len <= apm_info.bios.offset) - _set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_CS >> 3], 64 * 1024 -1); + _set_limit((char *)&gdt[APM_CS >> 3], 64 * 1024 -1); if (apm_info.bios.dseg_len <= 0x40) { /* 0x40 * 4kB == 64kB */ /* for the BIOS that assumes granularity = 1 */ - per_cpu(cpu_gdt_table, i)[APM_DS >> 3].b |= 0x800000; + gdt[APM_DS >> 3].b |= 0x800000; printk(KERN_NOTICE "apm: we set the granularity of dseg.\n"); } } diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c index 73aeaf5a9d4e..e344ef88cfcd 100644 --- a/arch/i386/kernel/cpu/amd.c +++ b/arch/i386/kernel/cpu/amd.c @@ -28,6 +28,22 @@ static void __init init_amd(struct cpuinfo_x86 *c) int mbytes = num_physpages >> (20-PAGE_SHIFT); int r; +#ifdef CONFIG_SMP + unsigned long long value; + + /* Disable TLB flush filter by setting HWCR.FFDIS on K8 + * bit 6 of msr C001_0015 + * + * Errata 63 for SH-B3 steppings + * Errata 122 for all steppings (F+ have it disabled by default) + */ + if (c->x86 == 15) { + rdmsrl(MSR_K7_HWCR, value); + value |= 1 << 6; + wrmsrl(MSR_K7_HWCR, value); + } +#endif + /* * FIXME: We should handle the K5 here. Set up the write * range and also turn on MSR 83 bits 4 and 31 (write alloc, @@ -190,9 +206,9 @@ static void __init init_amd(struct cpuinfo_x86 *c) display_cacheinfo(c); if (cpuid_eax(0x80000000) >= 0x80000008) { - c->x86_num_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; - if (c->x86_num_cores & (c->x86_num_cores - 1)) - c->x86_num_cores = 1; + c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; + if (c->x86_max_cores & (c->x86_max_cores - 1)) + c->x86_max_cores = 1; } #ifdef CONFIG_X86_HT @@ -201,15 +217,15 @@ static void __init init_amd(struct cpuinfo_x86 *c) * distingush the cores. Assumes number of cores is a power * of two. */ - if (c->x86_num_cores > 1) { + if (c->x86_max_cores > 1) { int cpu = smp_processor_id(); unsigned bits = 0; - while ((1 << bits) < c->x86_num_cores) + while ((1 << bits) < c->x86_max_cores) bits++; cpu_core_id[cpu] = phys_proc_id[cpu] & ((1<<bits)-1); phys_proc_id[cpu] >>= bits; printk(KERN_INFO "CPU %d(%d) -> Core %d\n", - cpu, c->x86_num_cores, cpu_core_id[cpu]); + cpu, c->x86_max_cores, cpu_core_id[cpu]); } #endif } diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 9ad43be9a01f..31e344b26bae 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -30,8 +30,6 @@ static int disable_x86_serial_nr __devinitdata = 1; struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {}; -extern void mcheck_init(struct cpuinfo_x86 *c); - extern int disable_pse; static void default_init(struct cpuinfo_x86 * c) @@ -233,10 +231,10 @@ static void __init early_cpu_detect(void) cpuid(0x00000001, &tfms, &misc, &junk, &cap0); c->x86 = (tfms >> 8) & 15; c->x86_model = (tfms >> 4) & 15; - if (c->x86 == 0xf) { + if (c->x86 == 0xf) c->x86 += (tfms >> 20) & 0xff; + if (c->x86 >= 0x6) c->x86_model += ((tfms >> 16) & 0xF) << 4; - } c->x86_mask = tfms & 15; if (cap0 & (1<<19)) c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8; @@ -335,7 +333,7 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c) c->x86_model = c->x86_mask = 0; /* So far unknown... */ c->x86_vendor_id[0] = '\0'; /* Unset */ c->x86_model_id[0] = '\0'; /* Unset */ - c->x86_num_cores = 1; + c->x86_max_cores = 1; memset(&c->x86_capability, 0, sizeof c->x86_capability); if (!have_cpuid_p()) { @@ -429,9 +427,8 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c) } /* Init Machine Check Exception if available. */ -#ifdef CONFIG_X86_MCE mcheck_init(c); -#endif + if (c == &boot_cpu_data) sysenter_setup(); enable_sep_cpu(); @@ -446,52 +443,44 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c) void __devinit detect_ht(struct cpuinfo_x86 *c) { u32 eax, ebx, ecx, edx; - int index_msb, tmp; + int index_msb, core_bits; int cpu = smp_processor_id(); + cpuid(1, &eax, &ebx, &ecx, &edx); + + c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0); + if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) return; - cpuid(1, &eax, &ebx, &ecx, &edx); smp_num_siblings = (ebx & 0xff0000) >> 16; if (smp_num_siblings == 1) { printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); } else if (smp_num_siblings > 1 ) { - index_msb = 31; if (smp_num_siblings > NR_CPUS) { printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings); smp_num_siblings = 1; return; } - tmp = smp_num_siblings; - while ((tmp & 0x80000000 ) == 0) { - tmp <<=1 ; - index_msb--; - } - if (smp_num_siblings & (smp_num_siblings - 1)) - index_msb++; + + index_msb = get_count_order(smp_num_siblings); phys_proc_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb); printk(KERN_INFO "CPU: Physical Processor ID: %d\n", phys_proc_id[cpu]); - smp_num_siblings = smp_num_siblings / c->x86_num_cores; + smp_num_siblings = smp_num_siblings / c->x86_max_cores; - tmp = smp_num_siblings; - index_msb = 31; - while ((tmp & 0x80000000) == 0) { - tmp <<=1 ; - index_msb--; - } + index_msb = get_count_order(smp_num_siblings) ; - if (smp_num_siblings & (smp_num_siblings - 1)) - index_msb++; + core_bits = get_count_order(c->x86_max_cores); - cpu_core_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb); + cpu_core_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) & + ((1 << core_bits) - 1); - if (c->x86_num_cores > 1) + if (c->x86_max_cores > 1) printk(KERN_INFO "CPU: Processor Core ID: %d\n", cpu_core_id[cpu]); } @@ -573,6 +562,7 @@ void __devinit cpu_init(void) int cpu = smp_processor_id(); struct tss_struct * t = &per_cpu(init_tss, cpu); struct thread_struct *thread = ¤t->thread; + struct desc_struct *gdt = get_cpu_gdt_table(cpu); __u32 stk16_off = (__u32)&per_cpu(cpu_16bit_stack, cpu); if (cpu_test_and_set(cpu, cpu_initialized)) { @@ -594,24 +584,16 @@ void __devinit cpu_init(void) * Initialize the per-CPU GDT with the boot GDT, * and set up the GDT descriptor: */ - memcpy(&per_cpu(cpu_gdt_table, cpu), cpu_gdt_table, - GDT_SIZE); + memcpy(gdt, cpu_gdt_table, GDT_SIZE); /* Set up GDT entry for 16bit stack */ - *(__u64 *)&(per_cpu(cpu_gdt_table, cpu)[GDT_ENTRY_ESPFIX_SS]) |= + *(__u64 *)(&gdt[GDT_ENTRY_ESPFIX_SS]) |= ((((__u64)stk16_off) << 16) & 0x000000ffffff0000ULL) | ((((__u64)stk16_off) << 32) & 0xff00000000000000ULL) | (CPU_16BIT_STACK_SIZE - 1); cpu_gdt_descr[cpu].size = GDT_SIZE - 1; - cpu_gdt_descr[cpu].address = - (unsigned long)&per_cpu(cpu_gdt_table, cpu); - - /* - * Set up the per-thread TLS descriptor cache: - */ - memcpy(thread->tls_array, &per_cpu(cpu_gdt_table, cpu), - GDT_ENTRY_TLS_ENTRIES * 8); + cpu_gdt_descr[cpu].address = (unsigned long)gdt; load_gdt(&cpu_gdt_descr[cpu]); load_idt(&idt_descr); diff --git a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c index 822c8ce9d1f1..871366b83b3f 100644 --- a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -32,6 +32,7 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/compiler.h> +#include <linux/sched.h> /* current */ #include <asm/io.h> #include <asm/delay.h> #include <asm/uaccess.h> @@ -376,10 +377,9 @@ acpi_cpufreq_cpu_init ( arg0.buffer.length = 12; arg0.buffer.pointer = (u8 *) arg0_buf; - data = kmalloc(sizeof(struct cpufreq_acpi_io), GFP_KERNEL); + data = kzalloc(sizeof(struct cpufreq_acpi_io), GFP_KERNEL); if (!data) return (-ENOMEM); - memset(data, 0, sizeof(struct cpufreq_acpi_io)); acpi_io_data[cpu] = data; diff --git a/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c b/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c index aa622d52c6e5..270f2188d68b 100644 --- a/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c @@ -28,6 +28,7 @@ #include <linux/cpufreq.h> #include <linux/slab.h> #include <linux/cpumask.h> +#include <linux/sched.h> /* current / set_cpus_allowed() */ #include <asm/processor.h> #include <asm/msr.h> diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c index 73a5dc5b26b8..edcd626001da 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c @@ -171,10 +171,9 @@ static int get_ranges (unsigned char *pst) unsigned int speed; u8 fid, vid; - powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table) * (number_scales + 1)), GFP_KERNEL); + powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) * (number_scales + 1)), GFP_KERNEL); if (!powernow_table) return -ENOMEM; - memset(powernow_table, 0, (sizeof(struct cpufreq_frequency_table) * (number_scales + 1))); for (j=0 ; j < number_scales; j++) { fid = *pst++; @@ -305,16 +304,13 @@ static int powernow_acpi_init(void) goto err0; } - acpi_processor_perf = kmalloc(sizeof(struct acpi_processor_performance), + acpi_processor_perf = kzalloc(sizeof(struct acpi_processor_performance), GFP_KERNEL); - if (!acpi_processor_perf) { retval = -ENOMEM; goto err0; } - memset(acpi_processor_perf, 0, sizeof(struct acpi_processor_performance)); - if (acpi_processor_register_performance(acpi_processor_perf, 0)) { retval = -EIO; goto err1; @@ -337,14 +333,12 @@ static int powernow_acpi_init(void) goto err2; } - powernow_table = kmalloc((number_scales + 1) * (sizeof(struct cpufreq_frequency_table)), GFP_KERNEL); + powernow_table = kzalloc((number_scales + 1) * (sizeof(struct cpufreq_frequency_table)), GFP_KERNEL); if (!powernow_table) { retval = -ENOMEM; goto err2; } - memset(powernow_table, 0, ((number_scales + 1) * sizeof(struct cpufreq_frequency_table))); - pc.val = (unsigned long) acpi_processor_perf->states[0].control; for (i = 0; i < number_scales; i++) { u8 fid, vid; diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c index ab6e0611303d..68a1fc87f4ca 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c @@ -32,6 +32,7 @@ #include <linux/slab.h> #include <linux/string.h> #include <linux/cpumask.h> +#include <linux/sched.h> /* for current / set_cpus_allowed() */ #include <asm/msr.h> #include <asm/io.h> @@ -44,7 +45,7 @@ #define PFX "powernow-k8: " #define BFX PFX "BIOS error: " -#define VERSION "version 1.50.3" +#define VERSION "version 1.50.4" #include "powernow-k8.h" /* serialize freq changes */ @@ -111,8 +112,8 @@ static int query_current_values_with_pending_wait(struct powernow_k8_data *data) u32 i = 0; do { - if (i++ > 0x1000000) { - printk(KERN_ERR PFX "detected change pending stuck\n"); + if (i++ > 10000) { + dprintk("detected change pending stuck\n"); return 1; } rdmsr(MSR_FIDVID_STATUS, lo, hi); @@ -159,6 +160,7 @@ static int write_new_fid(struct powernow_k8_data *data, u32 fid) { u32 lo; u32 savevid = data->currvid; + u32 i = 0; if ((fid & INVALID_FID_MASK) || (data->currvid & INVALID_VID_MASK)) { printk(KERN_ERR PFX "internal error - overflow on fid write\n"); @@ -170,10 +172,13 @@ static int write_new_fid(struct powernow_k8_data *data, u32 fid) dprintk("writing fid 0x%x, lo 0x%x, hi 0x%x\n", fid, lo, data->plllock * PLL_LOCK_CONVERSION); - wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION); - - if (query_current_values_with_pending_wait(data)) - return 1; + do { + wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION); + if (i++ > 100) { + printk(KERN_ERR PFX "internal error - pending bit very stuck - no further pstate changes possible\n"); + return 1; + } + } while (query_current_values_with_pending_wait(data)); count_off_irt(data); @@ -197,6 +202,7 @@ static int write_new_vid(struct powernow_k8_data *data, u32 vid) { u32 lo; u32 savefid = data->currfid; + int i = 0; if ((data->currfid & INVALID_FID_MASK) || (vid & INVALID_VID_MASK)) { printk(KERN_ERR PFX "internal error - overflow on vid write\n"); @@ -208,10 +214,13 @@ static int write_new_vid(struct powernow_k8_data *data, u32 vid) dprintk("writing vid 0x%x, lo 0x%x, hi 0x%x\n", vid, lo, STOP_GRANT_5NS); - wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS); - - if (query_current_values_with_pending_wait(data)) - return 1; + do { + wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS); + if (i++ > 100) { + printk(KERN_ERR PFX "internal error - pending bit very stuck - no further pstate changes possible\n"); + return 1; + } + } while (query_current_values_with_pending_wait(data)); if (savefid != data->currfid) { printk(KERN_ERR PFX "fid changed on vid trans, old 0x%x new 0x%x\n", @@ -453,7 +462,6 @@ static int check_supported_cpu(unsigned int cpu) oldmask = current->cpus_allowed; set_cpus_allowed(current, cpumask_of_cpu(cpu)); - schedule(); if (smp_processor_id() != cpu) { printk(KERN_ERR "limiting to cpu %u failed\n", cpu); @@ -488,9 +496,7 @@ static int check_supported_cpu(unsigned int cpu) out: set_cpus_allowed(current, oldmask); - schedule(); return rc; - } static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, u8 maxvid) @@ -904,7 +910,6 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi /* only run on specific CPU from here on */ oldmask = current->cpus_allowed; set_cpus_allowed(current, cpumask_of_cpu(pol->cpu)); - schedule(); if (smp_processor_id() != pol->cpu) { printk(KERN_ERR "limiting to cpu %u failed\n", pol->cpu); @@ -959,8 +964,6 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi err_out: set_cpus_allowed(current, oldmask); - schedule(); - return ret; } @@ -982,12 +985,11 @@ static int __init powernowk8_cpu_init(struct cpufreq_policy *pol) if (!check_supported_cpu(pol->cpu)) return -ENODEV; - data = kmalloc(sizeof(struct powernow_k8_data), GFP_KERNEL); + data = kzalloc(sizeof(struct powernow_k8_data), GFP_KERNEL); if (!data) { printk(KERN_ERR PFX "unable to alloc powernow_k8_data"); return -ENOMEM; } - memset(data,0,sizeof(struct powernow_k8_data)); data->cpu = pol->cpu; @@ -1017,7 +1019,6 @@ static int __init powernowk8_cpu_init(struct cpufreq_policy *pol) /* only run on specific CPU from here on */ oldmask = current->cpus_allowed; set_cpus_allowed(current, cpumask_of_cpu(pol->cpu)); - schedule(); if (smp_processor_id() != pol->cpu) { printk(KERN_ERR "limiting to cpu %u failed\n", pol->cpu); @@ -1036,7 +1037,6 @@ static int __init powernowk8_cpu_init(struct cpufreq_policy *pol) /* run on any CPU again */ set_cpus_allowed(current, oldmask); - schedule(); pol->governor = CPUFREQ_DEFAULT_GOVERNOR; pol->cpus = cpu_core_map[pol->cpu]; @@ -1071,7 +1071,6 @@ static int __init powernowk8_cpu_init(struct cpufreq_policy *pol) err_out: set_cpus_allowed(current, oldmask); - schedule(); powernow_k8_cpu_exit_acpi(data); kfree(data); @@ -1107,17 +1106,14 @@ static unsigned int powernowk8_get (unsigned int cpu) set_cpus_allowed(current, oldmask); return 0; } - preempt_disable(); - + if (query_current_values_with_pending_wait(data)) goto out; khz = find_khz_freq_from_fid(data->currfid); - out: - preempt_enable_no_resched(); +out: set_cpus_allowed(current, oldmask); - return khz; } diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c index c397b6220430..edb9873e27e3 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c @@ -22,6 +22,7 @@ #include <linux/init.h> #include <linux/cpufreq.h> #include <linux/config.h> +#include <linux/sched.h> /* current */ #include <linux/delay.h> #include <linux/compiler.h> @@ -66,7 +67,7 @@ static const struct cpu_id cpu_ids[] = { [CPU_MP4HT_D0] = {15, 3, 4 }, [CPU_MP4HT_E0] = {15, 4, 1 }, }; -#define N_IDS (sizeof(cpu_ids)/sizeof(cpu_ids[0])) +#define N_IDS ARRAY_SIZE(cpu_ids) struct cpu_model { @@ -422,12 +423,11 @@ static int centrino_cpu_init_acpi(struct cpufreq_policy *policy) } } - centrino_model[cpu] = kmalloc(sizeof(struct cpu_model), GFP_KERNEL); + centrino_model[cpu] = kzalloc(sizeof(struct cpu_model), GFP_KERNEL); if (!centrino_model[cpu]) { result = -ENOMEM; goto err_unreg; } - memset(centrino_model[cpu], 0, sizeof(struct cpu_model)); centrino_model[cpu]->model_name=NULL; centrino_model[cpu]->max_freq = p.states[0].core_frequency * 1000; diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c index 43601de0f633..5e2da704f0fa 100644 --- a/arch/i386/kernel/cpu/intel.c +++ b/arch/i386/kernel/cpu/intel.c @@ -6,6 +6,7 @@ #include <linux/bitops.h> #include <linux/smp.h> #include <linux/thread_info.h> +#include <linux/module.h> #include <asm/processor.h> #include <asm/msr.h> @@ -157,7 +158,7 @@ static void __devinit init_intel(struct cpuinfo_x86 *c) if ( p ) strcpy(c->x86_model_id, p); - c->x86_num_cores = num_cpu_cores(c); + c->x86_max_cores = num_cpu_cores(c); detect_ht(c); @@ -264,5 +265,52 @@ __init int intel_cpu_init(void) return 0; } +#ifndef CONFIG_X86_CMPXCHG +unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new) +{ + u8 prev; + unsigned long flags; + + /* Poor man's cmpxchg for 386. Unsuitable for SMP */ + local_irq_save(flags); + prev = *(u8 *)ptr; + if (prev == old) + *(u8 *)ptr = new; + local_irq_restore(flags); + return prev; +} +EXPORT_SYMBOL(cmpxchg_386_u8); + +unsigned long cmpxchg_386_u16(volatile void *ptr, u16 old, u16 new) +{ + u16 prev; + unsigned long flags; + + /* Poor man's cmpxchg for 386. Unsuitable for SMP */ + local_irq_save(flags); + prev = *(u16 *)ptr; + if (prev == old) + *(u16 *)ptr = new; + local_irq_restore(flags); + return prev; +} +EXPORT_SYMBOL(cmpxchg_386_u16); + +unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new) +{ + u32 prev; + unsigned long flags; + + /* Poor man's cmpxchg for 386. Unsuitable for SMP */ + local_irq_save(flags); + prev = *(u32 *)ptr; + if (prev == old) + *(u32 *)ptr = new; + local_irq_restore(flags); + return prev; +} +EXPORT_SYMBOL(cmpxchg_386_u32); +#endif + // arch_initcall(intel_cpu_init); diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index 9e0d5f83cb9f..fbfd374aa336 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c @@ -3,6 +3,7 @@ * * Changes: * Venkatesh Pallipadi : Adding cache identification through cpuid(4) + * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure. */ #include <linux/init.h> @@ -10,6 +11,7 @@ #include <linux/device.h> #include <linux/compiler.h> #include <linux/cpu.h> +#include <linux/sched.h> #include <asm/processor.h> #include <asm/smp.h> @@ -28,7 +30,7 @@ struct _cache_table }; /* all the cache descriptor types we care about (no TLB or trace cache entries) */ -static struct _cache_table cache_table[] __devinitdata = +static struct _cache_table cache_table[] __cpuinitdata = { { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */ { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */ @@ -117,10 +119,9 @@ struct _cpuid4_info { cpumask_t shared_cpu_map; }; -#define MAX_CACHE_LEAVES 4 static unsigned short num_cache_leaves; -static int __devinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) +static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) { unsigned int eax, ebx, ecx, edx; union _cpuid4_leaf_eax cache_eax; @@ -144,23 +145,18 @@ static int __init find_num_cache_leaves(void) { unsigned int eax, ebx, ecx, edx; union _cpuid4_leaf_eax cache_eax; - int i; - int retval; + int i = -1; - retval = MAX_CACHE_LEAVES; - /* Do cpuid(4) loop to find out num_cache_leaves */ - for (i = 0; i < MAX_CACHE_LEAVES; i++) { + do { + ++i; + /* Do cpuid(4) loop to find out num_cache_leaves */ cpuid_count(4, i, &eax, &ebx, &ecx, &edx); cache_eax.full = eax; - if (cache_eax.split.type == CACHE_TYPE_NULL) { - retval = i; - break; - } - } - return retval; + } while (cache_eax.split.type != CACHE_TYPE_NULL); + return i; } -unsigned int __devinit init_intel_cacheinfo(struct cpuinfo_x86 *c) +unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) { unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */ unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ @@ -284,13 +280,7 @@ unsigned int __devinit init_intel_cacheinfo(struct cpuinfo_x86 *c) if ( l3 ) printk(KERN_INFO "CPU: L3 cache: %dK\n", l3); - /* - * This assumes the L3 cache is shared; it typically lives in - * the northbridge. The L1 caches are included by the L2 - * cache, and so should not be included for the purpose of - * SMP switching weights. - */ - c->x86_cache_size = l2 ? l2 : (l1i+l1d); + c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); } return l2; @@ -301,31 +291,47 @@ static struct _cpuid4_info *cpuid4_info[NR_CPUS]; #define CPUID4_INFO_IDX(x,y) (&((cpuid4_info[x])[y])) #ifdef CONFIG_SMP -static void __devinit cache_shared_cpu_map_setup(unsigned int cpu, int index) +static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) { - struct _cpuid4_info *this_leaf; + struct _cpuid4_info *this_leaf, *sibling_leaf; unsigned long num_threads_sharing; -#ifdef CONFIG_X86_HT - struct cpuinfo_x86 *c = cpu_data + cpu; -#endif + int index_msb, i; + struct cpuinfo_x86 *c = cpu_data; this_leaf = CPUID4_INFO_IDX(cpu, index); num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; if (num_threads_sharing == 1) cpu_set(cpu, this_leaf->shared_cpu_map); -#ifdef CONFIG_X86_HT - else if (num_threads_sharing == smp_num_siblings) - this_leaf->shared_cpu_map = cpu_sibling_map[cpu]; - else if (num_threads_sharing == (c->x86_num_cores * smp_num_siblings)) - this_leaf->shared_cpu_map = cpu_core_map[cpu]; - else - printk(KERN_DEBUG "Number of CPUs sharing cache didn't match " - "any known set of CPUs\n"); -#endif + else { + index_msb = get_count_order(num_threads_sharing); + + for_each_online_cpu(i) { + if (c[i].apicid >> index_msb == + c[cpu].apicid >> index_msb) { + cpu_set(i, this_leaf->shared_cpu_map); + if (i != cpu && cpuid4_info[i]) { + sibling_leaf = CPUID4_INFO_IDX(i, index); + cpu_set(cpu, sibling_leaf->shared_cpu_map); + } + } + } + } +} +static void __devinit cache_remove_shared_cpu_map(unsigned int cpu, int index) +{ + struct _cpuid4_info *this_leaf, *sibling_leaf; + int sibling; + + this_leaf = CPUID4_INFO_IDX(cpu, index); + for_each_cpu_mask(sibling, this_leaf->shared_cpu_map) { + sibling_leaf = CPUID4_INFO_IDX(sibling, index); + cpu_clear(cpu, sibling_leaf->shared_cpu_map); + } } #else static void __init cache_shared_cpu_map_setup(unsigned int cpu, int index) {} +static void __init cache_remove_shared_cpu_map(unsigned int cpu, int index) {} #endif static void free_cache_attributes(unsigned int cpu) @@ -334,7 +340,7 @@ static void free_cache_attributes(unsigned int cpu) cpuid4_info[cpu] = NULL; } -static int __devinit detect_cache_attributes(unsigned int cpu) +static int __cpuinit detect_cache_attributes(unsigned int cpu) { struct _cpuid4_info *this_leaf; unsigned long j; @@ -511,7 +517,7 @@ static void cpuid4_cache_sysfs_exit(unsigned int cpu) free_cache_attributes(cpu); } -static int __devinit cpuid4_cache_sysfs_init(unsigned int cpu) +static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu) { if (num_cache_leaves == 0) @@ -542,7 +548,7 @@ err_out: } /* Add/Remove cache interface for CPU device */ -static int __devinit cache_add_dev(struct sys_device * sys_dev) +static int __cpuinit cache_add_dev(struct sys_device * sys_dev) { unsigned int cpu = sys_dev->id; unsigned long i, j; @@ -579,33 +585,60 @@ static int __devinit cache_add_dev(struct sys_device * sys_dev) return retval; } -static int __devexit cache_remove_dev(struct sys_device * sys_dev) +static void __cpuexit cache_remove_dev(struct sys_device * sys_dev) { unsigned int cpu = sys_dev->id; unsigned long i; - for (i = 0; i < num_cache_leaves; i++) + for (i = 0; i < num_cache_leaves; i++) { + cache_remove_shared_cpu_map(cpu, i); kobject_unregister(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); + } kobject_unregister(cache_kobject[cpu]); cpuid4_cache_sysfs_exit(cpu); - return 0; + return; +} + +static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + struct sys_device *sys_dev; + + sys_dev = get_cpu_sysdev(cpu); + switch (action) { + case CPU_ONLINE: + cache_add_dev(sys_dev); + break; + case CPU_DEAD: + cache_remove_dev(sys_dev); + break; + } + return NOTIFY_OK; } -static struct sysdev_driver cache_sysdev_driver = { - .add = cache_add_dev, - .remove = __devexit_p(cache_remove_dev), +static struct notifier_block cacheinfo_cpu_notifier = +{ + .notifier_call = cacheinfo_cpu_callback, }; -/* Register/Unregister the cpu_cache driver */ -static int __devinit cache_register_driver(void) +static int __cpuinit cache_sysfs_init(void) { + int i; + if (num_cache_leaves == 0) return 0; - return sysdev_driver_register(&cpu_sysdev_class,&cache_sysdev_driver); + register_cpu_notifier(&cacheinfo_cpu_notifier); + + for_each_online_cpu(i) { + cacheinfo_cpu_callback(&cacheinfo_cpu_notifier, CPU_ONLINE, + (void *)(long)i); + } + + return 0; } -device_initcall(cache_register_driver); +device_initcall(cache_sysfs_init); #endif - diff --git a/arch/i386/kernel/cpu/mcheck/k7.c b/arch/i386/kernel/cpu/mcheck/k7.c index c4abe7657397..fc5d5215e23d 100644 --- a/arch/i386/kernel/cpu/mcheck/k7.c +++ b/arch/i386/kernel/cpu/mcheck/k7.c @@ -7,7 +7,6 @@ #include <linux/types.h> #include <linux/kernel.h> #include <linux/config.h> -#include <linux/irq.h> #include <linux/interrupt.h> #include <linux/smp.h> @@ -69,7 +68,7 @@ static fastcall void k7_machine_check(struct pt_regs * regs, long error_code) /* AMD K7 machine check is Intel like */ -void __devinit amd_mcheck_init(struct cpuinfo_x86 *c) +void amd_mcheck_init(struct cpuinfo_x86 *c) { u32 l, h; int i; diff --git a/arch/i386/kernel/cpu/mcheck/mce.c b/arch/i386/kernel/cpu/mcheck/mce.c index 2cf25d2ba0f1..6170af3c271a 100644 --- a/arch/i386/kernel/cpu/mcheck/mce.c +++ b/arch/i386/kernel/cpu/mcheck/mce.c @@ -16,7 +16,7 @@ #include "mce.h" -int mce_disabled __devinitdata = 0; +int mce_disabled = 0; int nr_mce_banks; EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ @@ -31,7 +31,7 @@ static fastcall void unexpected_machine_check(struct pt_regs * regs, long error_ void fastcall (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check; /* This has to be run for each processor */ -void __devinit mcheck_init(struct cpuinfo_x86 *c) +void mcheck_init(struct cpuinfo_x86 *c) { if (mce_disabled==1) return; diff --git a/arch/i386/kernel/cpu/mcheck/non-fatal.c b/arch/i386/kernel/cpu/mcheck/non-fatal.c index 7864ddfccf07..82dffe0d4954 100644 --- a/arch/i386/kernel/cpu/mcheck/non-fatal.c +++ b/arch/i386/kernel/cpu/mcheck/non-fatal.c @@ -12,7 +12,6 @@ #include <linux/kernel.h> #include <linux/jiffies.h> #include <linux/config.h> -#include <linux/irq.h> #include <linux/workqueue.h> #include <linux/interrupt.h> #include <linux/smp.h> diff --git a/arch/i386/kernel/cpu/mcheck/p4.c b/arch/i386/kernel/cpu/mcheck/p4.c index 0abccb6fdf9e..fd2c459a31ef 100644 --- a/arch/i386/kernel/cpu/mcheck/p4.c +++ b/arch/i386/kernel/cpu/mcheck/p4.c @@ -6,7 +6,6 @@ #include <linux/types.h> #include <linux/kernel.h> #include <linux/config.h> -#include <linux/irq.h> #include <linux/interrupt.h> #include <linux/smp.h> @@ -78,7 +77,7 @@ fastcall void smp_thermal_interrupt(struct pt_regs *regs) } /* P4/Xeon Thermal regulation detect and init */ -static void __devinit intel_init_thermal(struct cpuinfo_x86 *c) +static void intel_init_thermal(struct cpuinfo_x86 *c) { u32 l, h; unsigned int cpu = smp_processor_id(); @@ -232,7 +231,7 @@ static fastcall void intel_machine_check(struct pt_regs * regs, long error_code) } -void __devinit intel_p4_mcheck_init(struct cpuinfo_x86 *c) +void intel_p4_mcheck_init(struct cpuinfo_x86 *c) { u32 l, h; int i; diff --git a/arch/i386/kernel/cpu/mcheck/p5.c b/arch/i386/kernel/cpu/mcheck/p5.c index ec0614cd2925..94bc43d950cf 100644 --- a/arch/i386/kernel/cpu/mcheck/p5.c +++ b/arch/i386/kernel/cpu/mcheck/p5.c @@ -6,7 +6,6 @@ #include <linux/init.h> #include <linux/types.h> #include <linux/kernel.h> -#include <linux/irq.h> #include <linux/interrupt.h> #include <linux/smp.h> @@ -29,7 +28,7 @@ static fastcall void pentium_machine_check(struct pt_regs * regs, long error_cod } /* Set up machine check reporting for processors with Intel style MCE */ -void __devinit intel_p5_mcheck_init(struct cpuinfo_x86 *c) +void intel_p5_mcheck_init(struct cpuinfo_x86 *c) { u32 l, h; diff --git a/arch/i386/kernel/cpu/mcheck/p6.c b/arch/i386/kernel/cpu/mcheck/p6.c index f01b73f947e1..deeae42ce199 100644 --- a/arch/i386/kernel/cpu/mcheck/p6.c +++ b/arch/i386/kernel/cpu/mcheck/p6.c @@ -6,7 +6,6 @@ #include <linux/init.h> #include <linux/types.h> #include <linux/kernel.h> -#include <linux/irq.h> #include <linux/interrupt.h> #include <linux/smp.h> @@ -80,7 +79,7 @@ static fastcall void intel_machine_check(struct pt_regs * regs, long error_code) } /* Set up machine check reporting for processors with Intel style MCE */ -void __devinit intel_p6_mcheck_init(struct cpuinfo_x86 *c) +void intel_p6_mcheck_init(struct cpuinfo_x86 *c) { u32 l, h; int i; @@ -103,11 +102,16 @@ void __devinit intel_p6_mcheck_init(struct cpuinfo_x86 *c) wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); nr_mce_banks = l & 0xff; - /* Don't enable bank 0 on intel P6 cores, it goes bang quickly. */ - for (i=1; i<nr_mce_banks; i++) { + /* + * Following the example in IA-32 SDM Vol 3: + * - MC0_CTL should not be written + * - Status registers on all banks should be cleared on reset + */ + for (i=1; i<nr_mce_banks; i++) wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); + + for (i=0; i<nr_mce_banks; i++) wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); - } set_in_cr4 (X86_CR4_MCE); printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", diff --git a/arch/i386/kernel/cpu/mcheck/winchip.c b/arch/i386/kernel/cpu/mcheck/winchip.c index 7bae68fa168f..9e424b6c293d 100644 --- a/arch/i386/kernel/cpu/mcheck/winchip.c +++ b/arch/i386/kernel/cpu/mcheck/winchip.c @@ -6,7 +6,6 @@ #include <linux/init.h> #include <linux/types.h> #include <linux/kernel.h> -#include <linux/irq.h> #include <linux/interrupt.h> #include <asm/processor.h> @@ -23,7 +22,7 @@ static fastcall void winchip_machine_check(struct pt_regs * regs, long error_cod } /* Set up machine check reporting on the Winchip C6 series */ -void __devinit winchip_mcheck_init(struct cpuinfo_x86 *c) +void winchip_mcheck_init(struct cpuinfo_x86 *c) { u32 lo, hi; machine_check_vector = winchip_machine_check; diff --git a/arch/i386/kernel/cpu/mtrr/if.c b/arch/i386/kernel/cpu/mtrr/if.c index 1923e0aed26a..cf39e205d33c 100644 --- a/arch/i386/kernel/cpu/mtrr/if.c +++ b/arch/i386/kernel/cpu/mtrr/if.c @@ -149,60 +149,89 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) return -EINVAL; } -static int -mtrr_ioctl(struct inode *inode, struct file *file, - unsigned int cmd, unsigned long __arg) +static long +mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) { - int err; + int err = 0; mtrr_type type; struct mtrr_sentry sentry; struct mtrr_gentry gentry; void __user *arg = (void __user *) __arg; switch (cmd) { + case MTRRIOC_ADD_ENTRY: + case MTRRIOC_SET_ENTRY: + case MTRRIOC_DEL_ENTRY: + case MTRRIOC_KILL_ENTRY: + case MTRRIOC_ADD_PAGE_ENTRY: + case MTRRIOC_SET_PAGE_ENTRY: + case MTRRIOC_DEL_PAGE_ENTRY: + case MTRRIOC_KILL_PAGE_ENTRY: + if (copy_from_user(&sentry, arg, sizeof sentry)) + return -EFAULT; + break; + case MTRRIOC_GET_ENTRY: + case MTRRIOC_GET_PAGE_ENTRY: + if (copy_from_user(&gentry, arg, sizeof gentry)) + return -EFAULT; + break; +#ifdef CONFIG_COMPAT + case MTRRIOC32_ADD_ENTRY: + case MTRRIOC32_SET_ENTRY: + case MTRRIOC32_DEL_ENTRY: + case MTRRIOC32_KILL_ENTRY: + case MTRRIOC32_ADD_PAGE_ENTRY: + case MTRRIOC32_SET_PAGE_ENTRY: + case MTRRIOC32_DEL_PAGE_ENTRY: + case MTRRIOC32_KILL_PAGE_ENTRY: { + struct mtrr_sentry32 __user *s32 = (struct mtrr_sentry32 __user *)__arg; + err = get_user(sentry.base, &s32->base); + err |= get_user(sentry.size, &s32->size); + err |= get_user(sentry.type, &s32->type); + if (err) + return err; + break; + } + case MTRRIOC32_GET_ENTRY: + case MTRRIOC32_GET_PAGE_ENTRY: { + struct mtrr_gentry32 __user *g32 = (struct mtrr_gentry32 __user *)__arg; + err = get_user(gentry.regnum, &g32->regnum); + err |= get_user(gentry.base, &g32->base); + err |= get_user(gentry.size, &g32->size); + err |= get_user(gentry.type, &g32->type); + if (err) + return err; + break; + } +#endif + } + + switch (cmd) { default: return -ENOTTY; case MTRRIOC_ADD_ENTRY: if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (copy_from_user(&sentry, arg, sizeof sentry)) - return -EFAULT; err = mtrr_file_add(sentry.base, sentry.size, sentry.type, 1, file, 0); - if (err < 0) - return err; break; case MTRRIOC_SET_ENTRY: if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (copy_from_user(&sentry, arg, sizeof sentry)) - return -EFAULT; err = mtrr_add(sentry.base, sentry.size, sentry.type, 0); - if (err < 0) - return err; break; case MTRRIOC_DEL_ENTRY: if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (copy_from_user(&sentry, arg, sizeof sentry)) - return -EFAULT; err = mtrr_file_del(sentry.base, sentry.size, file, 0); - if (err < 0) - return err; break; case MTRRIOC_KILL_ENTRY: if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (copy_from_user(&sentry, arg, sizeof sentry)) - return -EFAULT; err = mtrr_del(-1, sentry.base, sentry.size); - if (err < 0) - return err; break; case MTRRIOC_GET_ENTRY: - if (copy_from_user(&gentry, arg, sizeof gentry)) - return -EFAULT; if (gentry.regnum >= num_var_ranges) return -EINVAL; mtrr_if->get(gentry.regnum, &gentry.base, &gentry.size, &type); @@ -217,60 +246,59 @@ mtrr_ioctl(struct inode *inode, struct file *file, gentry.type = type; } - if (copy_to_user(arg, &gentry, sizeof gentry)) - return -EFAULT; break; case MTRRIOC_ADD_PAGE_ENTRY: if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (copy_from_user(&sentry, arg, sizeof sentry)) - return -EFAULT; err = mtrr_file_add(sentry.base, sentry.size, sentry.type, 1, file, 1); - if (err < 0) - return err; break; case MTRRIOC_SET_PAGE_ENTRY: if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (copy_from_user(&sentry, arg, sizeof sentry)) - return -EFAULT; err = mtrr_add_page(sentry.base, sentry.size, sentry.type, 0); - if (err < 0) - return err; break; case MTRRIOC_DEL_PAGE_ENTRY: if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (copy_from_user(&sentry, arg, sizeof sentry)) - return -EFAULT; err = mtrr_file_del(sentry.base, sentry.size, file, 1); - if (err < 0) - return err; break; case MTRRIOC_KILL_PAGE_ENTRY: if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (copy_from_user(&sentry, arg, sizeof sentry)) - return -EFAULT; err = mtrr_del_page(-1, sentry.base, sentry.size); - if (err < 0) - return err; break; case MTRRIOC_GET_PAGE_ENTRY: - if (copy_from_user(&gentry, arg, sizeof gentry)) - return -EFAULT; if (gentry.regnum >= num_var_ranges) return -EINVAL; mtrr_if->get(gentry.regnum, &gentry.base, &gentry.size, &type); gentry.type = type; + break; + } + + if (err) + return err; + switch(cmd) { + case MTRRIOC_GET_ENTRY: + case MTRRIOC_GET_PAGE_ENTRY: if (copy_to_user(arg, &gentry, sizeof gentry)) - return -EFAULT; + err = -EFAULT; + break; +#ifdef CONFIG_COMPAT + case MTRRIOC32_GET_ENTRY: + case MTRRIOC32_GET_PAGE_ENTRY: { + struct mtrr_gentry32 __user *g32 = (struct mtrr_gentry32 __user *)__arg; + err = put_user(gentry.base, &g32->base); + err |= put_user(gentry.size, &g32->size); + err |= put_user(gentry.regnum, &g32->regnum); + err |= put_user(gentry.type, &g32->type); break; } - return 0; +#endif + } + return err; } static int @@ -310,7 +338,8 @@ static struct file_operations mtrr_fops = { .read = seq_read, .llseek = seq_lseek, .write = mtrr_write, - .ioctl = mtrr_ioctl, + .unlocked_ioctl = mtrr_ioctl, + .compat_ioctl = mtrr_ioctl, .release = mtrr_close, }; diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c index dd4ebd6af7e4..1e9db198c440 100644 --- a/arch/i386/kernel/cpu/mtrr/main.c +++ b/arch/i386/kernel/cpu/mtrr/main.c @@ -626,6 +626,14 @@ void __init mtrr_bp_init(void) if (cpuid_eax(0x80000000) >= 0x80000008) { u32 phys_addr; phys_addr = cpuid_eax(0x80000008) & 0xff; + /* CPUID workaround for Intel 0F33/0F34 CPU */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && + boot_cpu_data.x86 == 0xF && + boot_cpu_data.x86_model == 0x3 && + (boot_cpu_data.x86_mask == 0x3 || + boot_cpu_data.x86_mask == 0x4)) + phys_addr = 36; + size_or_mask = ~((1 << (phys_addr - PAGE_SHIFT)) - 1); size_and_mask = ~size_or_mask & 0xfff00000; } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR && diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c index 8bd77d948a84..e7921315ae9d 100644 --- a/arch/i386/kernel/cpu/proc.c +++ b/arch/i386/kernel/cpu/proc.c @@ -44,7 +44,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* Intel-defined (#2) */ - "pni", NULL, NULL, "monitor", "ds_cpl", NULL, NULL, "est", + "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", NULL, "est", "tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -94,12 +94,11 @@ static int show_cpuinfo(struct seq_file *m, void *v) if (c->x86_cache_size >= 0) seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); #ifdef CONFIG_X86_HT - if (c->x86_num_cores * smp_num_siblings > 1) { + if (c->x86_max_cores * smp_num_siblings > 1) { seq_printf(m, "physical id\t: %d\n", phys_proc_id[n]); - seq_printf(m, "siblings\t: %d\n", - c->x86_num_cores * smp_num_siblings); + seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[n])); seq_printf(m, "core id\t\t: %d\n", cpu_core_id[n]); - seq_printf(m, "cpu cores\t: %d\n", c->x86_num_cores); + seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); } #endif diff --git a/arch/i386/kernel/cpuid.c b/arch/i386/kernel/cpuid.c index 4647db4ad6de..13bae799e626 100644 --- a/arch/i386/kernel/cpuid.c +++ b/arch/i386/kernel/cpuid.c @@ -163,7 +163,7 @@ static int cpuid_class_device_create(int i) int err = 0; struct class_device *class_err; - class_err = class_device_create(cpuid_class, MKDEV(CPUID_MAJOR, i), NULL, "cpu%d",i); + class_err = class_device_create(cpuid_class, NULL, MKDEV(CPUID_MAJOR, i), NULL, "cpu%d",i); if (IS_ERR(class_err)) err = PTR_ERR(class_err); return err; diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c index 913be77bb844..0248e084017c 100644 --- a/arch/i386/kernel/crash.c +++ b/arch/i386/kernel/crash.c @@ -11,10 +11,8 @@ #include <linux/types.h> #include <linux/kernel.h> #include <linux/smp.h> -#include <linux/irq.h> #include <linux/reboot.h> #include <linux/kexec.h> -#include <linux/irq.h> #include <linux/delay.h> #include <linux/elf.h> #include <linux/elfcore.h> diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 9e24f7b207ee..e50b93155249 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -560,11 +560,10 @@ nmi_stack_fixup: nmi_debug_stack_check: cmpw $__KERNEL_CS,16(%esp) jne nmi_stack_correct - cmpl $debug - 1,(%esp) - jle nmi_stack_correct + cmpl $debug,(%esp) + jb nmi_stack_correct cmpl $debug_esp_fix_insn,(%esp) - jle nmi_debug_stack_fixup -nmi_debug_stack_fixup: + ja nmi_stack_correct FIX_STACK(24,nmi_stack_correct, 1) jmp nmi_stack_correct diff --git a/arch/i386/kernel/i8259.c b/arch/i386/kernel/i8259.c index 178f4e9bac9d..323ef8ab3244 100644 --- a/arch/i386/kernel/i8259.c +++ b/arch/i386/kernel/i8259.c @@ -16,7 +16,6 @@ #include <asm/atomic.h> #include <asm/system.h> #include <asm/io.h> -#include <asm/irq.h> #include <asm/timer.h> #include <asm/pgtable.h> #include <asm/delay.h> @@ -25,8 +24,6 @@ #include <asm/arch_hooks.h> #include <asm/i8259.h> -#include <linux/irq.h> - #include <io_ports.h> /* diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 378313b0cce9..22c8675c79f4 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -21,7 +21,6 @@ */ #include <linux/mm.h> -#include <linux/irq.h> #include <linux/interrupt.h> #include <linux/init.h> #include <linux/delay.h> @@ -47,6 +46,9 @@ int (*ioapic_renumber_irq)(int ioapic, int irq); atomic_t irq_mis_count; +/* Where if anywhere is the i8259 connect in external int mode */ +static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; + static DEFINE_SPINLOCK(ioapic_lock); /* @@ -739,7 +741,7 @@ static int find_irq_entry(int apic, int pin, int type) /* * Find the pin to which IRQ[irq] (ISA) is connected */ -static int find_isa_irq_pin(int irq, int type) +static int __init find_isa_irq_pin(int irq, int type) { int i; @@ -759,6 +761,33 @@ static int find_isa_irq_pin(int irq, int type) return -1; } +static int __init find_isa_irq_apic(int irq, int type) +{ + int i; + + for (i = 0; i < mp_irq_entries; i++) { + int lbus = mp_irqs[i].mpc_srcbus; + + if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA || + mp_bus_id_to_type[lbus] == MP_BUS_EISA || + mp_bus_id_to_type[lbus] == MP_BUS_MCA || + mp_bus_id_to_type[lbus] == MP_BUS_NEC98 + ) && + (mp_irqs[i].mpc_irqtype == type) && + (mp_irqs[i].mpc_srcbusirq == irq)) + break; + } + if (i < mp_irq_entries) { + int apic; + for(apic = 0; apic < nr_ioapics; apic++) { + if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic) + return apic; + } + } + + return -1; +} + /* * Find a specific PCI IRQ entry. * Not an __init, possibly needed by modules @@ -1254,7 +1283,7 @@ static void __init setup_IO_APIC_irqs(void) /* * Set up the 8259A-master output pin: */ -static void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector) +static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector) { struct IO_APIC_route_entry entry; unsigned long flags; @@ -1288,8 +1317,8 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector) * Add it to the IO-APIC irq-routing table: */ spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1)); - io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0)); + io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); + io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); spin_unlock_irqrestore(&ioapic_lock, flags); enable_8259A_irq(0); @@ -1596,7 +1625,8 @@ void /*__init*/ print_PIC(void) static void __init enable_IO_APIC(void) { union IO_APIC_reg_01 reg_01; - int i; + int i8259_apic, i8259_pin; + int i, apic; unsigned long flags; for (i = 0; i < PIN_MAP_SIZE; i++) { @@ -1610,11 +1640,52 @@ static void __init enable_IO_APIC(void) /* * The number of IO-APIC IRQ registers (== #pins): */ - for (i = 0; i < nr_ioapics; i++) { + for (apic = 0; apic < nr_ioapics; apic++) { spin_lock_irqsave(&ioapic_lock, flags); - reg_01.raw = io_apic_read(i, 1); + reg_01.raw = io_apic_read(apic, 1); spin_unlock_irqrestore(&ioapic_lock, flags); - nr_ioapic_registers[i] = reg_01.bits.entries+1; + nr_ioapic_registers[apic] = reg_01.bits.entries+1; + } + for(apic = 0; apic < nr_ioapics; apic++) { + int pin; + /* See if any of the pins is in ExtINT mode */ + for(pin = 0; pin < nr_ioapic_registers[i]; pin++) { + struct IO_APIC_route_entry entry; + spin_lock_irqsave(&ioapic_lock, flags); + *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin); + *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin); + spin_unlock_irqrestore(&ioapic_lock, flags); + + + /* If the interrupt line is enabled and in ExtInt mode + * I have found the pin where the i8259 is connected. + */ + if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) { + ioapic_i8259.apic = apic; + ioapic_i8259.pin = pin; + goto found_i8259; + } + } + } + found_i8259: + /* Look to see what if the MP table has reported the ExtINT */ + /* If we could not find the appropriate pin by looking at the ioapic + * the i8259 probably is not connected the ioapic but give the + * mptable a chance anyway. + */ + i8259_pin = find_isa_irq_pin(0, mp_ExtINT); + i8259_apic = find_isa_irq_apic(0, mp_ExtINT); + /* Trust the MP table if nothing is setup in the hardware */ + if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) { + printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n"); + ioapic_i8259.pin = i8259_pin; + ioapic_i8259.apic = i8259_apic; + } + /* Complain if the MP table and the hardware disagree */ + if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) && + (i8259_pin >= 0) && (ioapic_i8259.pin >= 0)) + { + printk(KERN_WARNING "ExtINT in hardware and MP table differ\n"); } /* @@ -1628,7 +1699,6 @@ static void __init enable_IO_APIC(void) */ void disable_IO_APIC(void) { - int pin; /* * Clear the IO-APIC before rebooting: */ @@ -1639,8 +1709,7 @@ void disable_IO_APIC(void) * Put that IOAPIC in virtual wire mode * so legacy interrupts can be delivered. */ - pin = find_isa_irq_pin(0, mp_ExtINT); - if (pin != -1) { + if (ioapic_i8259.pin != -1) { struct IO_APIC_route_entry entry; unsigned long flags; @@ -1651,7 +1720,7 @@ void disable_IO_APIC(void) entry.polarity = 0; /* High */ entry.delivery_status = 0; entry.dest_mode = 0; /* Physical */ - entry.delivery_mode = 7; /* ExtInt */ + entry.delivery_mode = dest_ExtINT; /* ExtInt */ entry.vector = 0; entry.dest.physical.physical_dest = 0; @@ -1660,11 +1729,13 @@ void disable_IO_APIC(void) * Add it to the IO-APIC irq-routing table: */ spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1)); - io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0)); + io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin, + *(((int *)&entry)+1)); + io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin, + *(((int *)&entry)+0)); spin_unlock_irqrestore(&ioapic_lock, flags); } - disconnect_bsp_APIC(pin != -1); + disconnect_bsp_APIC(ioapic_i8259.pin != -1); } /* @@ -1938,7 +2009,7 @@ static void ack_edge_ioapic_vector(unsigned int vector) { int irq = vector_to_irq(vector); - move_irq(vector); + move_native_irq(vector); ack_edge_ioapic_irq(irq); } @@ -1953,7 +2024,7 @@ static void end_level_ioapic_vector (unsigned int vector) { int irq = vector_to_irq(vector); - move_irq(vector); + move_native_irq(vector); end_level_ioapic_irq(irq); } @@ -2114,20 +2185,21 @@ static void setup_nmi (void) */ static inline void unlock_ExtINT_logic(void) { - int pin, i; + int apic, pin, i; struct IO_APIC_route_entry entry0, entry1; unsigned char save_control, save_freq_select; unsigned long flags; - pin = find_isa_irq_pin(8, mp_INT); + pin = find_isa_irq_pin(8, mp_INT); + apic = find_isa_irq_apic(8, mp_INT); if (pin == -1) return; spin_lock_irqsave(&ioapic_lock, flags); - *(((int *)&entry0) + 1) = io_apic_read(0, 0x11 + 2 * pin); - *(((int *)&entry0) + 0) = io_apic_read(0, 0x10 + 2 * pin); + *(((int *)&entry0) + 1) = io_apic_read(apic, 0x11 + 2 * pin); + *(((int *)&entry0) + 0) = io_apic_read(apic, 0x10 + 2 * pin); spin_unlock_irqrestore(&ioapic_lock, flags); - clear_IO_APIC_pin(0, pin); + clear_IO_APIC_pin(apic, pin); memset(&entry1, 0, sizeof(entry1)); @@ -2140,8 +2212,8 @@ static inline void unlock_ExtINT_logic(void) entry1.vector = 0; spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry1) + 1)); - io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry1) + 0)); + io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry1) + 1)); + io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry1) + 0)); spin_unlock_irqrestore(&ioapic_lock, flags); save_control = CMOS_READ(RTC_CONTROL); @@ -2159,11 +2231,11 @@ static inline void unlock_ExtINT_logic(void) CMOS_WRITE(save_control, RTC_CONTROL); CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); - clear_IO_APIC_pin(0, pin); + clear_IO_APIC_pin(apic, pin); spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry0) + 1)); - io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry0) + 0)); + io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry0) + 1)); + io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry0) + 0)); spin_unlock_irqrestore(&ioapic_lock, flags); } @@ -2175,7 +2247,7 @@ static inline void unlock_ExtINT_logic(void) */ static inline void check_timer(void) { - int pin1, pin2; + int apic1, pin1, apic2, pin2; int vector; /* @@ -2197,10 +2269,13 @@ static inline void check_timer(void) timer_ack = 1; enable_8259A_irq(0); - pin1 = find_isa_irq_pin(0, mp_INT); - pin2 = find_isa_irq_pin(0, mp_ExtINT); + pin1 = find_isa_irq_pin(0, mp_INT); + apic1 = find_isa_irq_apic(0, mp_INT); + pin2 = ioapic_i8259.pin; + apic2 = ioapic_i8259.apic; - printk(KERN_INFO "..TIMER: vector=0x%02X pin1=%d pin2=%d\n", vector, pin1, pin2); + printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", + vector, apic1, pin1, apic2, pin2); if (pin1 != -1) { /* @@ -2217,8 +2292,9 @@ static inline void check_timer(void) clear_IO_APIC_pin(0, pin1); return; } - clear_IO_APIC_pin(0, pin1); - printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n"); + clear_IO_APIC_pin(apic1, pin1); + printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to " + "IO-APIC\n"); } printk(KERN_INFO "...trying to set up timer (IRQ0) through the 8259A ... "); @@ -2227,13 +2303,13 @@ static inline void check_timer(void) /* * legacy devices should be connected to IO APIC #0 */ - setup_ExtINT_IRQ0_pin(pin2, vector); + setup_ExtINT_IRQ0_pin(apic2, pin2, vector); if (timer_irq_works()) { printk("works.\n"); if (pin1 != -1) - replace_pin_at_irq(0, 0, pin1, 0, pin2); + replace_pin_at_irq(0, apic1, pin1, apic2, pin2); else - add_pin_to_irq(0, 0, pin2); + add_pin_to_irq(0, apic2, pin2); if (nmi_watchdog == NMI_IO_APIC) { setup_nmi(); } @@ -2242,7 +2318,7 @@ static inline void check_timer(void) /* * Cleanup, just in case ... */ - clear_IO_APIC_pin(0, pin2); + clear_IO_APIC_pin(apic2, pin2); } printk(" failed.\n"); diff --git a/arch/i386/kernel/ioport.c b/arch/i386/kernel/ioport.c index f2b37654777f..b59a34dbe262 100644 --- a/arch/i386/kernel/ioport.c +++ b/arch/i386/kernel/ioport.c @@ -108,8 +108,11 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) /* * Sets the lazy trigger so that the next I/O operation will * reload the correct bitmap. + * Reset the owner so that a process switch will not set + * tss->io_bitmap_base to IO_BITMAP_OFFSET. */ tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY; + tss->io_bitmap_owner = NULL; put_cpu(); diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c index ce66dcc26d90..1a201a932865 100644 --- a/arch/i386/kernel/irq.c +++ b/arch/i386/kernel/irq.c @@ -218,7 +218,7 @@ int show_interrupts(struct seq_file *p, void *v) if (i == 0) { seq_printf(p, " "); - for_each_cpu(j) + for_each_online_cpu(j) seq_printf(p, "CPU%d ",j); seq_putc(p, '\n'); } @@ -232,7 +232,7 @@ int show_interrupts(struct seq_file *p, void *v) #ifndef CONFIG_SMP seq_printf(p, "%10u ", kstat_irqs(i)); #else - for_each_cpu(j) + for_each_online_cpu(j) seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); #endif seq_printf(p, " %14s", irq_desc[i].handler->typename); @@ -246,12 +246,12 @@ skip: spin_unlock_irqrestore(&irq_desc[i].lock, flags); } else if (i == NR_IRQS) { seq_printf(p, "NMI: "); - for_each_cpu(j) + for_each_online_cpu(j) seq_printf(p, "%10u ", nmi_count(j)); seq_putc(p, '\n'); #ifdef CONFIG_X86_LOCAL_APIC seq_printf(p, "LOC: "); - for_each_cpu(j) + for_each_online_cpu(j) seq_printf(p, "%10u ", per_cpu(irq_stat,j).apic_timer_irqs); seq_putc(p, '\n'); diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index 6345b430b105..32b0c24ab9a6 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c @@ -31,22 +31,16 @@ #include <linux/config.h> #include <linux/kprobes.h> #include <linux/ptrace.h> -#include <linux/spinlock.h> #include <linux/preempt.h> #include <asm/cacheflush.h> #include <asm/kdebug.h> #include <asm/desc.h> -static struct kprobe *current_kprobe; -static unsigned long kprobe_status, kprobe_old_eflags, kprobe_saved_eflags; -static struct kprobe *kprobe_prev; -static unsigned long kprobe_status_prev, kprobe_old_eflags_prev, kprobe_saved_eflags_prev; -static struct pt_regs jprobe_saved_regs; -static long *jprobe_saved_esp; -/* copy of the kernel stack at the probe fire time */ -static kprobe_opcode_t jprobes_stack[MAX_STACK_SIZE]; void jprobe_return_end(void); +DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; +DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); + /* * returns non-zero if opcode modifies the interrupt flag. */ @@ -91,29 +85,30 @@ void __kprobes arch_remove_kprobe(struct kprobe *p) { } -static inline void save_previous_kprobe(void) +static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb) { - kprobe_prev = current_kprobe; - kprobe_status_prev = kprobe_status; - kprobe_old_eflags_prev = kprobe_old_eflags; - kprobe_saved_eflags_prev = kprobe_saved_eflags; + kcb->prev_kprobe.kp = kprobe_running(); + kcb->prev_kprobe.status = kcb->kprobe_status; + kcb->prev_kprobe.old_eflags = kcb->kprobe_old_eflags; + kcb->prev_kprobe.saved_eflags = kcb->kprobe_saved_eflags; } -static inline void restore_previous_kprobe(void) +static inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb) { - current_kprobe = kprobe_prev; - kprobe_status = kprobe_status_prev; - kprobe_old_eflags = kprobe_old_eflags_prev; - kprobe_saved_eflags = kprobe_saved_eflags_prev; + __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; + kcb->kprobe_status = kcb->prev_kprobe.status; + kcb->kprobe_old_eflags = kcb->prev_kprobe.old_eflags; + kcb->kprobe_saved_eflags = kcb->prev_kprobe.saved_eflags; } -static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs) +static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) { - current_kprobe = p; - kprobe_saved_eflags = kprobe_old_eflags + __get_cpu_var(current_kprobe) = p; + kcb->kprobe_saved_eflags = kcb->kprobe_old_eflags = (regs->eflags & (TF_MASK | IF_MASK)); if (is_IF_modifier(p->opcode)) - kprobe_saved_eflags &= ~IF_MASK; + kcb->kprobe_saved_eflags &= ~IF_MASK; } static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) @@ -127,6 +122,7 @@ static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) regs->eip = (unsigned long)&p->ainsn.insn; } +/* Called with kretprobe_lock held */ void __kprobes arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) { @@ -157,9 +153,15 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) int ret = 0; kprobe_opcode_t *addr = NULL; unsigned long *lp; + struct kprobe_ctlblk *kcb; - /* We're in an interrupt, but this is clear and BUG()-safe. */ + /* + * We don't want to be preempted for the entire + * duration of kprobe processing + */ preempt_disable(); + kcb = get_kprobe_ctlblk(); + /* Check if the application is using LDT entry for its code segment and * calculate the address by reading the base address from the LDT entry. */ @@ -173,15 +175,12 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) } /* Check we're not actually recursing */ if (kprobe_running()) { - /* We *are* holding lock here, so this is safe. - Disarm the probe we just hit, and ignore it. */ p = get_kprobe(addr); if (p) { - if (kprobe_status == KPROBE_HIT_SS && + if (kcb->kprobe_status == KPROBE_HIT_SS && *p->ainsn.insn == BREAKPOINT_INSTRUCTION) { regs->eflags &= ~TF_MASK; - regs->eflags |= kprobe_saved_eflags; - unlock_kprobes(); + regs->eflags |= kcb->kprobe_saved_eflags; goto no_kprobe; } /* We have reentered the kprobe_handler(), since @@ -190,26 +189,23 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) * just single step on the instruction of the new probe * without calling any user handlers. */ - save_previous_kprobe(); - set_current_kprobe(p, regs); + save_previous_kprobe(kcb); + set_current_kprobe(p, regs, kcb); p->nmissed++; prepare_singlestep(p, regs); - kprobe_status = KPROBE_REENTER; + kcb->kprobe_status = KPROBE_REENTER; return 1; } else { - p = current_kprobe; + p = __get_cpu_var(current_kprobe); if (p->break_handler && p->break_handler(p, regs)) { goto ss_probe; } } - /* If it's not ours, can't be delete race, (we hold lock). */ goto no_kprobe; } - lock_kprobes(); p = get_kprobe(addr); if (!p) { - unlock_kprobes(); if (regs->eflags & VM_MASK) { /* We are in virtual-8086 mode. Return 0 */ goto no_kprobe; @@ -232,8 +228,8 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) goto no_kprobe; } - kprobe_status = KPROBE_HIT_ACTIVE; - set_current_kprobe(p, regs); + set_current_kprobe(p, regs, kcb); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; if (p->pre_handler && p->pre_handler(p, regs)) /* handler has already set things up, so skip ss setup */ @@ -241,7 +237,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) ss_probe: prepare_singlestep(p, regs); - kprobe_status = KPROBE_HIT_SS; + kcb->kprobe_status = KPROBE_HIT_SS; return 1; no_kprobe: @@ -269,9 +265,10 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) struct kretprobe_instance *ri = NULL; struct hlist_head *head; struct hlist_node *node, *tmp; - unsigned long orig_ret_address = 0; + unsigned long flags, orig_ret_address = 0; unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline; + spin_lock_irqsave(&kretprobe_lock, flags); head = kretprobe_inst_table_head(current); /* @@ -310,14 +307,15 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address)); regs->eip = orig_ret_address; - unlock_kprobes(); + reset_current_kprobe(); + spin_unlock_irqrestore(&kretprobe_lock, flags); preempt_enable_no_resched(); - /* - * By returning a non-zero value, we are telling - * kprobe_handler() that we have handled unlocking - * and re-enabling preemption. - */ + /* + * By returning a non-zero value, we are telling + * kprobe_handler() that we don't want the post_handler + * to run (and have re-enabled preemption) + */ return 1; } @@ -343,7 +341,8 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) * that is atop the stack is the address following the copied instruction. * We need to make it the address following the original instruction. */ -static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) +static void __kprobes resume_execution(struct kprobe *p, + struct pt_regs *regs, struct kprobe_ctlblk *kcb) { unsigned long *tos = (unsigned long *)®s->esp; unsigned long next_eip = 0; @@ -353,7 +352,7 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) switch (p->ainsn.insn[0]) { case 0x9c: /* pushfl */ *tos &= ~(TF_MASK | IF_MASK); - *tos |= kprobe_old_eflags; + *tos |= kcb->kprobe_old_eflags; break; case 0xc3: /* ret/lret */ case 0xcb: @@ -394,27 +393,30 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) /* * Interrupts are disabled on entry as trap1 is an interrupt gate and they - * remain disabled thoroughout this function. And we hold kprobe lock. + * remain disabled thoroughout this function. */ static inline int post_kprobe_handler(struct pt_regs *regs) { - if (!kprobe_running()) + struct kprobe *cur = kprobe_running(); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + if (!cur) return 0; - if ((kprobe_status != KPROBE_REENTER) && current_kprobe->post_handler) { - kprobe_status = KPROBE_HIT_SSDONE; - current_kprobe->post_handler(current_kprobe, regs, 0); + if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { + kcb->kprobe_status = KPROBE_HIT_SSDONE; + cur->post_handler(cur, regs, 0); } - resume_execution(current_kprobe, regs); - regs->eflags |= kprobe_saved_eflags; + resume_execution(cur, regs, kcb); + regs->eflags |= kcb->kprobe_saved_eflags; /*Restore back the original saved kprobes variables and continue. */ - if (kprobe_status == KPROBE_REENTER) { - restore_previous_kprobe(); + if (kcb->kprobe_status == KPROBE_REENTER) { + restore_previous_kprobe(kcb); goto out; } - unlock_kprobes(); + reset_current_kprobe(); out: preempt_enable_no_resched(); @@ -429,18 +431,19 @@ out: return 1; } -/* Interrupts disabled, kprobe_lock held. */ static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr) { - if (current_kprobe->fault_handler - && current_kprobe->fault_handler(current_kprobe, regs, trapnr)) + struct kprobe *cur = kprobe_running(); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) return 1; - if (kprobe_status & KPROBE_HIT_SS) { - resume_execution(current_kprobe, regs); - regs->eflags |= kprobe_old_eflags; + if (kcb->kprobe_status & KPROBE_HIT_SS) { + resume_execution(cur, regs, kcb); + regs->eflags |= kcb->kprobe_old_eflags; - unlock_kprobes(); + reset_current_kprobe(); preempt_enable_no_resched(); } return 0; @@ -453,39 +456,41 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data) { struct die_args *args = (struct die_args *)data; + int ret = NOTIFY_DONE; + switch (val) { case DIE_INT3: if (kprobe_handler(args->regs)) - return NOTIFY_STOP; + ret = NOTIFY_STOP; break; case DIE_DEBUG: if (post_kprobe_handler(args->regs)) - return NOTIFY_STOP; + ret = NOTIFY_STOP; break; case DIE_GPF: - if (kprobe_running() && - kprobe_fault_handler(args->regs, args->trapnr)) - return NOTIFY_STOP; - break; case DIE_PAGE_FAULT: + /* kprobe_running() needs smp_processor_id() */ + preempt_disable(); if (kprobe_running() && kprobe_fault_handler(args->regs, args->trapnr)) - return NOTIFY_STOP; + ret = NOTIFY_STOP; + preempt_enable(); break; default: break; } - return NOTIFY_DONE; + return ret; } int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) { struct jprobe *jp = container_of(p, struct jprobe, kp); unsigned long addr; + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - jprobe_saved_regs = *regs; - jprobe_saved_esp = ®s->esp; - addr = (unsigned long)jprobe_saved_esp; + kcb->jprobe_saved_regs = *regs; + kcb->jprobe_saved_esp = ®s->esp; + addr = (unsigned long)(kcb->jprobe_saved_esp); /* * TBD: As Linus pointed out, gcc assumes that the callee @@ -494,7 +499,8 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) * we also save and restore enough stack bytes to cover * the argument area. */ - memcpy(jprobes_stack, (kprobe_opcode_t *) addr, MIN_STACK_SIZE(addr)); + memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr, + MIN_STACK_SIZE(addr)); regs->eflags &= ~IF_MASK; regs->eip = (unsigned long)(jp->entry); return 1; @@ -502,36 +508,40 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) void __kprobes jprobe_return(void) { - preempt_enable_no_resched(); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + asm volatile (" xchgl %%ebx,%%esp \n" " int3 \n" " .globl jprobe_return_end \n" " jprobe_return_end: \n" " nop \n"::"b" - (jprobe_saved_esp):"memory"); + (kcb->jprobe_saved_esp):"memory"); } int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) { + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); u8 *addr = (u8 *) (regs->eip - 1); - unsigned long stack_addr = (unsigned long)jprobe_saved_esp; + unsigned long stack_addr = (unsigned long)(kcb->jprobe_saved_esp); struct jprobe *jp = container_of(p, struct jprobe, kp); if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) { - if (®s->esp != jprobe_saved_esp) { + if (®s->esp != kcb->jprobe_saved_esp) { struct pt_regs *saved_regs = - container_of(jprobe_saved_esp, struct pt_regs, esp); + container_of(kcb->jprobe_saved_esp, + struct pt_regs, esp); printk("current esp %p does not match saved esp %p\n", - ®s->esp, jprobe_saved_esp); + ®s->esp, kcb->jprobe_saved_esp); printk("Saved registers for jprobe %p\n", jp); show_registers(saved_regs); printk("Current registers\n"); show_registers(regs); BUG(); } - *regs = jprobe_saved_regs; - memcpy((kprobe_opcode_t *) stack_addr, jprobes_stack, + *regs = kcb->jprobe_saved_regs; + memcpy((kprobe_opcode_t *) stack_addr, kcb->jprobes_stack, MIN_STACK_SIZE(stack_addr)); + preempt_enable_no_resched(); return 1; } return 0; diff --git a/arch/i386/kernel/ldt.c b/arch/i386/kernel/ldt.c index fe1ffa55587d..983f95707e11 100644 --- a/arch/i386/kernel/ldt.c +++ b/arch/i386/kernel/ldt.c @@ -18,6 +18,7 @@ #include <asm/system.h> #include <asm/ldt.h> #include <asm/desc.h> +#include <asm/mmu_context.h> #ifdef CONFIG_SMP /* avoids "defined but not used" warnig */ static void flush_ldt(void *null) diff --git a/arch/i386/kernel/mca.c b/arch/i386/kernel/mca.c index 8600faeea29d..558bb207720f 100644 --- a/arch/i386/kernel/mca.c +++ b/arch/i386/kernel/mca.c @@ -132,7 +132,7 @@ static struct resource mca_standard_resources[] = { { .start = 0x100, .end = 0x107, .name = "POS (MCA)" } }; -#define MCA_STANDARD_RESOURCES (sizeof(mca_standard_resources)/sizeof(struct resource)) +#define MCA_STANDARD_RESOURCES ARRAY_SIZE(mca_standard_resources) /** * mca_read_and_store_pos - read the POS registers into a memory buffer diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c index 15949fd08109..1ca5269b1e86 100644 --- a/arch/i386/kernel/mpparse.c +++ b/arch/i386/kernel/mpparse.c @@ -14,7 +14,6 @@ */ #include <linux/mm.h> -#include <linux/irq.h> #include <linux/init.h> #include <linux/acpi.h> #include <linux/delay.h> @@ -70,7 +69,7 @@ unsigned int def_to_bigsmp = 0; /* Processor that is doing the boot up */ unsigned int boot_cpu_physical_apicid = -1U; /* Internal processor count */ -static unsigned int __initdata num_processors; +static unsigned int __devinitdata num_processors; /* Bitmask of physically existing CPUs */ physid_mask_t phys_cpu_present_map; @@ -120,7 +119,7 @@ static int MP_valid_apicid(int apicid, int version) } #endif -static void __init MP_processor_info (struct mpc_config_processor *m) +static void __devinit MP_processor_info (struct mpc_config_processor *m) { int ver, apicid; physid_mask_t phys_cpu; @@ -183,17 +182,6 @@ static void __init MP_processor_info (struct mpc_config_processor *m) boot_cpu_physical_apicid = m->mpc_apicid; } - if (num_processors >= NR_CPUS) { - printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." - " Processor ignored.\n", NR_CPUS); - return; - } - - if (num_processors >= maxcpus) { - printk(KERN_WARNING "WARNING: maxcpus limit of %i reached." - " Processor ignored.\n", maxcpus); - return; - } ver = m->mpc_apicver; if (!MP_valid_apicid(apicid, ver)) { @@ -202,11 +190,6 @@ static void __init MP_processor_info (struct mpc_config_processor *m) return; } - cpu_set(num_processors, cpu_possible_map); - num_processors++; - phys_cpu = apicid_to_cpu_present(apicid); - physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu); - /* * Validate version */ @@ -217,9 +200,29 @@ static void __init MP_processor_info (struct mpc_config_processor *m) ver = 0x10; } apic_version[m->mpc_apicid] = ver; + + phys_cpu = apicid_to_cpu_present(apicid); + physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu); + + if (num_processors >= NR_CPUS) { + printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." + " Processor ignored.\n", NR_CPUS); + return; + } + + if (num_processors >= maxcpus) { + printk(KERN_WARNING "WARNING: maxcpus limit of %i reached." + " Processor ignored.\n", maxcpus); + return; + } + + cpu_set(num_processors, cpu_possible_map); + num_processors++; + if ((num_processors > 8) && - APIC_XAPIC(ver) && - (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)) + ((APIC_XAPIC(ver) && + (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)) || + (boot_cpu_data.x86_vendor == X86_VENDOR_AMD))) def_to_bigsmp = 1; else def_to_bigsmp = 0; @@ -835,7 +838,7 @@ void __init mp_register_lapic_address ( } -void __init mp_register_lapic ( +void __devinit mp_register_lapic ( u8 id, u8 enabled) { diff --git a/arch/i386/kernel/msr.c b/arch/i386/kernel/msr.c index 03100d6fc5d6..44470fea4309 100644 --- a/arch/i386/kernel/msr.c +++ b/arch/i386/kernel/msr.c @@ -246,7 +246,7 @@ static int msr_class_device_create(int i) int err = 0; struct class_device *class_err; - class_err = class_device_create(msr_class, MKDEV(MSR_MAJOR, i), NULL, "msr%d",i); + class_err = class_device_create(msr_class, NULL, MKDEV(MSR_MAJOR, i), NULL, "msr%d",i); if (IS_ERR(class_err)) err = PTR_ERR(class_err); return err; diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index 0178457db721..d661703ac1cb 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -15,7 +15,6 @@ #include <linux/config.h> #include <linux/mm.h> -#include <linux/irq.h> #include <linux/delay.h> #include <linux/bootmem.h> #include <linux/smp_lock.h> @@ -101,16 +100,44 @@ int nmi_active; (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE) +#ifdef CONFIG_SMP +/* The performance counters used by NMI_LOCAL_APIC don't trigger when + * the CPU is idle. To make sure the NMI watchdog really ticks on all + * CPUs during the test make them busy. + */ +static __init void nmi_cpu_busy(void *data) +{ + volatile int *endflag = data; + local_irq_enable(); + /* Intentionally don't use cpu_relax here. This is + to make sure that the performance counter really ticks, + even if there is a simulator or similar that catches the + pause instruction. On a real HT machine this is fine because + all other CPUs are busy with "useless" delay loops and don't + care if they get somewhat less cycles. */ + while (*endflag == 0) + barrier(); +} +#endif + static int __init check_nmi_watchdog(void) { - unsigned int prev_nmi_count[NR_CPUS]; + volatile int endflag = 0; + unsigned int *prev_nmi_count; int cpu; if (nmi_watchdog == NMI_NONE) return 0; + prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); + if (!prev_nmi_count) + return -1; + printk(KERN_INFO "Testing NMI watchdog ... "); + if (nmi_watchdog == NMI_LOCAL_APIC) + smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0); + for (cpu = 0; cpu < NR_CPUS; cpu++) prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count; local_irq_enable(); @@ -124,12 +151,18 @@ static int __init check_nmi_watchdog(void) continue; #endif if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { - printk("CPU#%d: NMI appears to be stuck!\n", cpu); + endflag = 1; + printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", + cpu, + prev_nmi_count[cpu], + nmi_count(cpu)); nmi_active = 0; lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG; + kfree(prev_nmi_count); return -1; } } + endflag = 1; printk("OK.\n"); /* now that we know it works we can reduce NMI frequency to @@ -137,6 +170,7 @@ static int __init check_nmi_watchdog(void) if (nmi_watchdog == NMI_LOCAL_APIC) nmi_hz = 1; + kfree(prev_nmi_count); return 0; } /* This needs to happen later in boot so counters are working */ diff --git a/arch/i386/kernel/pci-dma.c b/arch/i386/kernel/pci-dma.c index 1e51427cc9eb..25fe66853934 100644 --- a/arch/i386/kernel/pci-dma.c +++ b/arch/i386/kernel/pci-dma.c @@ -23,7 +23,7 @@ struct dma_coherent_mem { }; void *dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, unsigned int __nocast gfp) + dma_addr_t *dma_handle, gfp_t gfp) { void *ret; struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index b45cbf93d439..df6c2bcde067 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -47,13 +47,11 @@ #include <asm/ldt.h> #include <asm/processor.h> #include <asm/i387.h> -#include <asm/irq.h> #include <asm/desc.h> #ifdef CONFIG_MATH_EMULATION #include <asm/math_emu.h> #endif -#include <linux/irq.h> #include <linux/err.h> #include <asm/tlbflush.h> @@ -101,14 +99,22 @@ EXPORT_SYMBOL(enable_hlt); */ void default_idle(void) { + local_irq_enable(); + if (!hlt_counter && boot_cpu_data.hlt_works_ok) { - local_irq_disable(); - if (!need_resched()) - safe_halt(); - else - local_irq_enable(); + clear_thread_flag(TIF_POLLING_NRFLAG); + smp_mb__after_clear_bit(); + while (!need_resched()) { + local_irq_disable(); + if (!need_resched()) + safe_halt(); + else + local_irq_enable(); + } + set_thread_flag(TIF_POLLING_NRFLAG); } else { - cpu_relax(); + while (!need_resched()) + cpu_relax(); } } #ifdef CONFIG_APM_MODULE @@ -122,29 +128,14 @@ EXPORT_SYMBOL(default_idle); */ static void poll_idle (void) { - int oldval; - local_irq_enable(); - /* - * Deal with another CPU just having chosen a thread to - * run here: - */ - oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); - - if (!oldval) { - set_thread_flag(TIF_POLLING_NRFLAG); - asm volatile( - "2:" - "testl %0, %1;" - "rep; nop;" - "je 2b;" - : : "i"(_TIF_NEED_RESCHED), "m" (current_thread_info()->flags)); - - clear_thread_flag(TIF_POLLING_NRFLAG); - } else { - set_need_resched(); - } + asm volatile( + "2:" + "testl %0, %1;" + "rep; nop;" + "je 2b;" + : : "i"(_TIF_NEED_RESCHED), "m" (current_thread_info()->flags)); } #ifdef CONFIG_HOTPLUG_CPU @@ -181,7 +172,9 @@ static inline void play_dead(void) */ void cpu_idle(void) { - int cpu = raw_smp_processor_id(); + int cpu = smp_processor_id(); + + set_thread_flag(TIF_POLLING_NRFLAG); /* endless idle loop with no priority at all */ while (1) { @@ -203,7 +196,9 @@ void cpu_idle(void) __get_cpu_var(irq_stat).idle_timestamp = jiffies; idle(); } + preempt_enable_no_resched(); schedule(); + preempt_disable(); } } @@ -246,15 +241,12 @@ static void mwait_idle(void) { local_irq_enable(); - if (!need_resched()) { - set_thread_flag(TIF_POLLING_NRFLAG); - do { - __monitor((void *)¤t_thread_info()->flags, 0, 0); - if (need_resched()) - break; - __mwait(0, 0); - } while (!need_resched()); - clear_thread_flag(TIF_POLLING_NRFLAG); + while (!need_resched()) { + __monitor((void *)¤t_thread_info()->flags, 0, 0); + smp_mb(); + if (need_resched()) + break; + __mwait(0, 0); } } @@ -401,13 +393,6 @@ void flush_thread(void) { struct task_struct *tsk = current; - /* - * Remove function-return probe instances associated with this task - * and put them back on the free list. Do not insert an exit probe for - * this function, it will be disabled by kprobe_flush_task if you do. - */ - kprobe_flush_task(tsk); - memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); /* diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c index 7b6368bf8974..5ffbb4b7ad05 100644 --- a/arch/i386/kernel/ptrace.c +++ b/arch/i386/kernel/ptrace.c @@ -354,49 +354,12 @@ ptrace_set_thread_area(struct task_struct *child, return 0; } -asmlinkage int sys_ptrace(long request, long pid, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, long addr, long data) { - struct task_struct *child; struct user * dummy = NULL; int i, ret; unsigned long __user *datap = (unsigned long __user *)data; - lock_kernel(); - ret = -EPERM; - if (request == PTRACE_TRACEME) { - /* are we already being traced? */ - if (current->ptrace & PT_PTRACED) - goto out; - ret = security_ptrace(current->parent, current); - if (ret) - goto out; - /* set the ptrace bit in the process flags. */ - current->ptrace |= PT_PTRACED; - ret = 0; - goto out; - } - ret = -ESRCH; - read_lock(&tasklist_lock); - child = find_task_by_pid(pid); - if (child) - get_task_struct(child); - read_unlock(&tasklist_lock); - if (!child) - goto out; - - ret = -EPERM; - if (pid == 1) /* you may not mess with init */ - goto out_tsk; - - if (request == PTRACE_ATTACH) { - ret = ptrace_attach(child); - goto out_tsk; - } - - ret = ptrace_check_attach(child, request == PTRACE_KILL); - if (ret < 0) - goto out_tsk; - switch (request) { /* when I and D space are separate, these will need to be fixed. */ case PTRACE_PEEKTEXT: /* read word at location addr. */ @@ -663,10 +626,7 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data) ret = ptrace_request(child, request, addr, data); break; } -out_tsk: - put_task_struct(child); -out: - unlock_kernel(); + out_tsk: return ret; } diff --git a/arch/i386/kernel/reboot.c b/arch/i386/kernel/reboot.c index 350ea6680f63..2afe0f8d555a 100644 --- a/arch/i386/kernel/reboot.c +++ b/arch/i386/kernel/reboot.c @@ -111,6 +111,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2400"), }, }, + { /* Handle problems with rebooting on HP nc6120 */ + .callback = set_bios_reboot, + .ident = "HP Compaq nc6120", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nc6120"), + }, + }, { } }; diff --git a/arch/i386/kernel/reboot_fixups.c b/arch/i386/kernel/reboot_fixups.c index 1b183b378c2c..10e21a4773dd 100644 --- a/arch/i386/kernel/reboot_fixups.c +++ b/arch/i386/kernel/reboot_fixups.c @@ -10,6 +10,7 @@ #include <asm/delay.h> #include <linux/pci.h> +#include <linux/reboot_fixups.h> static void cs5530a_warm_reset(struct pci_dev *dev) { @@ -42,9 +43,9 @@ void mach_reboot_fixups(void) struct pci_dev *dev; int i; - for (i=0; i < (sizeof(fixups_table)/sizeof(fixups_table[0])); i++) { + for (i=0; i < ARRAY_SIZE(fixups_table); i++) { cur = &(fixups_table[i]); - dev = pci_get_device(cur->vendor, cur->device, 0); + dev = pci_get_device(cur->vendor, cur->device, NULL); if (!dev) continue; diff --git a/arch/i386/kernel/scx200.c b/arch/i386/kernel/scx200.c index 69e203a0d330..9c968ae67c43 100644 --- a/arch/i386/kernel/scx200.c +++ b/arch/i386/kernel/scx200.c @@ -12,6 +12,7 @@ #include <linux/pci.h> #include <linux/scx200.h> +#include <linux/scx200_gpio.h> /* Verify that the configuration block really is there */ #define scx200_cb_probe(base) (inw((base) + SCx200_CBA) == (base)) diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 9b8c8a19824d..fdfcb0cba9b4 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -129,9 +129,7 @@ struct drive_info_struct { char dummy[32]; } drive_info; EXPORT_SYMBOL(drive_info); #endif struct screen_info screen_info; -#ifdef CONFIG_VT EXPORT_SYMBOL(screen_info); -#endif struct apm_info apm_info; EXPORT_SYMBOL(apm_info); struct sys_desc_table_struct { @@ -389,14 +387,24 @@ static void __init limit_regions(unsigned long long size) } } for (i = 0; i < e820.nr_map; i++) { - if (e820.map[i].type == E820_RAM) { - current_addr = e820.map[i].addr + e820.map[i].size; - if (current_addr >= size) { - e820.map[i].size -= current_addr-size; - e820.nr_map = i + 1; - return; - } + current_addr = e820.map[i].addr + e820.map[i].size; + if (current_addr < size) + continue; + + if (e820.map[i].type != E820_RAM) + continue; + + if (e820.map[i].addr >= size) { + /* + * This region starts past the end of the + * requested size, skip it completely. + */ + e820.nr_map = i; + } else { + e820.nr_map = i + 1; + e820.map[i].size -= current_addr - size; } + return; } } diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c index 61eb0c8a6e47..adcd069db91e 100644 --- a/arch/i386/kernel/signal.c +++ b/arch/i386/kernel/signal.c @@ -338,7 +338,11 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) esp = (unsigned long) ka->sa.sa_restorer; } - return (void __user *)((esp - frame_size) & -8ul); + esp -= frame_size; + /* Align the stack pointer according to the i386 ABI, + * i.e. so that on function entry ((sp + 4) & 15) == 0. */ + esp = ((esp + 4) & -16ul) - 4; + return (void __user *) esp; } /* These symbols are defined with the addresses in the vsyscall page. diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c index 48b55db3680f..218d725a5a1e 100644 --- a/arch/i386/kernel/smp.c +++ b/arch/i386/kernel/smp.c @@ -11,7 +11,6 @@ #include <linux/init.h> #include <linux/mm.h> -#include <linux/irq.h> #include <linux/delay.h> #include <linux/spinlock.h> #include <linux/smp_lock.h> diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 5f0a95d76a4f..d16520da4550 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -42,7 +42,6 @@ #include <linux/sched.h> #include <linux/kernel_stat.h> #include <linux/smp_lock.h> -#include <linux/irq.h> #include <linux/bootmem.h> #include <linux/notifier.h> #include <linux/cpu.h> @@ -69,15 +68,15 @@ EXPORT_SYMBOL(smp_num_siblings); /* Package ID of each logical CPU */ int phys_proc_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; -EXPORT_SYMBOL(phys_proc_id); /* Core ID of each logical CPU */ int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; -EXPORT_SYMBOL(cpu_core_id); +/* representing HT siblings of each logical CPU */ cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_sibling_map); +/* representing HT and core siblings of each logical CPU */ cpumask_t cpu_core_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_core_map); @@ -88,7 +87,11 @@ EXPORT_SYMBOL(cpu_online_map); cpumask_t cpu_callin_map; cpumask_t cpu_callout_map; EXPORT_SYMBOL(cpu_callout_map); +#ifdef CONFIG_HOTPLUG_CPU +cpumask_t cpu_possible_map = CPU_MASK_ALL; +#else cpumask_t cpu_possible_map; +#endif EXPORT_SYMBOL(cpu_possible_map); static cpumask_t smp_commenced_mask; @@ -441,35 +444,60 @@ static void __devinit smp_callin(void) static int cpucount; +/* representing cpus for which sibling maps can be computed */ +static cpumask_t cpu_sibling_setup_map; + static inline void set_cpu_sibling_map(int cpu) { int i; + struct cpuinfo_x86 *c = cpu_data; + + cpu_set(cpu, cpu_sibling_setup_map); if (smp_num_siblings > 1) { - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_isset(i, cpu_callout_map)) - continue; - if (cpu_core_id[cpu] == cpu_core_id[i]) { + for_each_cpu_mask(i, cpu_sibling_setup_map) { + if (phys_proc_id[cpu] == phys_proc_id[i] && + cpu_core_id[cpu] == cpu_core_id[i]) { cpu_set(i, cpu_sibling_map[cpu]); cpu_set(cpu, cpu_sibling_map[i]); + cpu_set(i, cpu_core_map[cpu]); + cpu_set(cpu, cpu_core_map[i]); } } } else { cpu_set(cpu, cpu_sibling_map[cpu]); } - if (current_cpu_data.x86_num_cores > 1) { - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_isset(i, cpu_callout_map)) - continue; - if (phys_proc_id[cpu] == phys_proc_id[i]) { - cpu_set(i, cpu_core_map[cpu]); - cpu_set(cpu, cpu_core_map[i]); - } - } - } else { + if (current_cpu_data.x86_max_cores == 1) { cpu_core_map[cpu] = cpu_sibling_map[cpu]; + c[cpu].booted_cores = 1; + return; + } + + for_each_cpu_mask(i, cpu_sibling_setup_map) { + if (phys_proc_id[cpu] == phys_proc_id[i]) { + cpu_set(i, cpu_core_map[cpu]); + cpu_set(cpu, cpu_core_map[i]); + /* + * Does this new cpu bringup a new core? + */ + if (cpus_weight(cpu_sibling_map[cpu]) == 1) { + /* + * for each core in package, increment + * the booted_cores for this new cpu + */ + if (first_cpu(cpu_sibling_map[i]) == i) + c[cpu].booted_cores++; + /* + * increment the core count for all + * the other cpus in this package + */ + if (i != cpu) + c[i].booted_cores++; + } else if (i != cpu && !c[cpu].booted_cores) + c[cpu].booted_cores = c[i].booted_cores; + } } } @@ -484,6 +512,7 @@ static void __devinit start_secondary(void *unused) * things done here to the most necessary things. */ cpu_init(); + preempt_disable(); smp_callin(); while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) rep_nop(); @@ -609,7 +638,7 @@ static inline void __inquire_remote_apic(int apicid) printk("Inquiring remote APIC #%d...\n", apicid); - for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) { + for (i = 0; i < ARRAY_SIZE(regs); i++) { printk("... APIC #%d %s: ", apicid, names[i]); /* @@ -1093,11 +1122,8 @@ static void __init smp_boot_cpus(unsigned int max_cpus) current_thread_info()->cpu = 0; smp_tune_scheduling(); - cpus_clear(cpu_sibling_map[0]); - cpu_set(0, cpu_sibling_map[0]); - cpus_clear(cpu_core_map[0]); - cpu_set(0, cpu_core_map[0]); + set_cpu_sibling_map(0); /* * If we couldn't find an SMP configuration at boot time, @@ -1276,15 +1302,24 @@ static void remove_siblinginfo(int cpu) { int sibling; + struct cpuinfo_x86 *c = cpu_data; + for_each_cpu_mask(sibling, cpu_core_map[cpu]) { + cpu_clear(cpu, cpu_core_map[sibling]); + /* + * last thread sibling in this cpu core going down + */ + if (cpus_weight(cpu_sibling_map[cpu]) == 1) + c[sibling].booted_cores--; + } + for_each_cpu_mask(sibling, cpu_sibling_map[cpu]) cpu_clear(cpu, cpu_sibling_map[sibling]); - for_each_cpu_mask(sibling, cpu_core_map[cpu]) - cpu_clear(cpu, cpu_core_map[sibling]); cpus_clear(cpu_sibling_map[cpu]); cpus_clear(cpu_core_map[cpu]); phys_proc_id[cpu] = BAD_APICID; cpu_core_id[cpu] = BAD_APICID; + cpu_clear(cpu, cpu_sibling_setup_map); } int __cpu_disable(void) diff --git a/arch/i386/kernel/srat.c b/arch/i386/kernel/srat.c index 516bf5653b02..52b3ed5d2cb5 100644 --- a/arch/i386/kernel/srat.c +++ b/arch/i386/kernel/srat.c @@ -137,8 +137,8 @@ static void __init parse_memory_affinity_structure (char *sratp) "enabled and removable" : "enabled" ) ); } -#if MAX_NR_ZONES != 3 -#error "MAX_NR_ZONES != 3, chunk_to_zone requires review" +#if MAX_NR_ZONES != 4 +#error "MAX_NR_ZONES != 4, chunk_to_zone requires review" #endif /* Take a chunk of pages from page frame cstart to cend and count the number * of pages in each zone, returned via zones[]. @@ -327,7 +327,12 @@ int __init get_memcfg_from_srat(void) int tables = 0; int i = 0; - acpi_find_root_pointer(ACPI_PHYSICAL_ADDRESSING, rsdp_address); + if (ACPI_FAILURE(acpi_find_root_pointer(ACPI_PHYSICAL_ADDRESSING, + rsdp_address))) { + printk("%s: System description tables not found\n", + __FUNCTION__); + goto out_err; + } if (rsdp_address->pointer_type == ACPI_PHYSICAL_POINTER) { printk("%s: assigning address to rsdp\n", __FUNCTION__); diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index 2883a4d4f01f..41c5b2dc6200 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c @@ -74,10 +74,6 @@ int pit_latch_buggy; /* extern */ #include "do_timer.h" -u64 jiffies_64 = INITIAL_JIFFIES; - -EXPORT_SYMBOL(jiffies_64); - unsigned int cpu_khz; /* Detected as we calibrate the TSC */ EXPORT_SYMBOL(cpu_khz); diff --git a/arch/i386/kernel/time_hpet.c b/arch/i386/kernel/time_hpet.c index 658c0629ba6a..9caeaa315cd7 100644 --- a/arch/i386/kernel/time_hpet.c +++ b/arch/i386/kernel/time_hpet.c @@ -275,6 +275,7 @@ static unsigned long PIE_freq = DEFAULT_RTC_INT_FREQ; static unsigned long PIE_count; static unsigned long hpet_rtc_int_freq; /* RTC interrupt frequency */ +static unsigned int hpet_t1_cmp; /* cached comparator register */ /* * Timer 1 for RTC, we do not use periodic interrupt feature, @@ -306,10 +307,12 @@ int hpet_rtc_timer_init(void) cnt = hpet_readl(HPET_COUNTER); cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq); hpet_writel(cnt, HPET_T1_CMP); + hpet_t1_cmp = cnt; local_irq_restore(flags); cfg = hpet_readl(HPET_T1_CFG); - cfg |= HPET_TN_ENABLE | HPET_TN_SETVAL | HPET_TN_32BIT; + cfg &= ~HPET_TN_PERIODIC; + cfg |= HPET_TN_ENABLE | HPET_TN_32BIT; hpet_writel(cfg, HPET_T1_CFG); return 1; @@ -319,8 +322,12 @@ static void hpet_rtc_timer_reinit(void) { unsigned int cfg, cnt; - if (!(PIE_on | AIE_on | UIE_on)) + if (unlikely(!(PIE_on | AIE_on | UIE_on))) { + cfg = hpet_readl(HPET_T1_CFG); + cfg &= ~HPET_TN_ENABLE; + hpet_writel(cfg, HPET_T1_CFG); return; + } if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ)) hpet_rtc_int_freq = PIE_freq; @@ -328,15 +335,10 @@ static void hpet_rtc_timer_reinit(void) hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ; /* It is more accurate to use the comparator value than current count.*/ - cnt = hpet_readl(HPET_T1_CMP); + cnt = hpet_t1_cmp; cnt += hpet_tick*HZ/hpet_rtc_int_freq; hpet_writel(cnt, HPET_T1_CMP); - - cfg = hpet_readl(HPET_T1_CFG); - cfg |= HPET_TN_ENABLE | HPET_TN_SETVAL | HPET_TN_32BIT; - hpet_writel(cfg, HPET_T1_CFG); - - return; + hpet_t1_cmp = cnt; } /* diff --git a/arch/i386/kernel/timers/timer_hpet.c b/arch/i386/kernel/timers/timer_hpet.c index d973a8b681fd..be242723c339 100644 --- a/arch/i386/kernel/timers/timer_hpet.c +++ b/arch/i386/kernel/timers/timer_hpet.c @@ -30,23 +30,28 @@ static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; * basic equation: * ns = cycles / (freq / ns_per_sec) * ns = cycles * (ns_per_sec / freq) - * ns = cycles * (10^9 / (cpu_mhz * 10^6)) - * ns = cycles * (10^3 / cpu_mhz) + * ns = cycles * (10^9 / (cpu_khz * 10^3)) + * ns = cycles * (10^6 / cpu_khz) * * Then we use scaling math (suggested by george@mvista.com) to get: - * ns = cycles * (10^3 * SC / cpu_mhz) / SC + * ns = cycles * (10^6 * SC / cpu_khz) / SC * ns = cycles * cyc2ns_scale / SC * * And since SC is a constant power of two, we can convert the div * into a shift. + * + * We can use khz divisor instead of mhz to keep a better percision, since + * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. + * (mathieu.desnoyers@polymtl.ca) + * * -johnstul@us.ibm.com "math is hard, lets go shopping!" */ static unsigned long cyc2ns_scale; #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ -static inline void set_cyc2ns_scale(unsigned long cpu_mhz) +static inline void set_cyc2ns_scale(unsigned long cpu_khz) { - cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz; + cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz; } static inline unsigned long long cycles_2_ns(unsigned long long cyc) @@ -163,7 +168,7 @@ static int __init init_hpet(char* override) printk("Detected %u.%03u MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); } - set_cyc2ns_scale(cpu_khz/1000); + set_cyc2ns_scale(cpu_khz); } /* set this only when cpu_has_tsc */ timer_hpet.read_timer = read_timer_tsc; diff --git a/arch/i386/kernel/timers/timer_pit.c b/arch/i386/kernel/timers/timer_pit.c index eddb64038234..b9b6bd56b9ba 100644 --- a/arch/i386/kernel/timers/timer_pit.c +++ b/arch/i386/kernel/timers/timer_pit.c @@ -6,7 +6,6 @@ #include <linux/spinlock.h> #include <linux/module.h> #include <linux/device.h> -#include <linux/irq.h> #include <linux/sysdev.h> #include <linux/timex.h> #include <asm/delay.h> @@ -26,8 +25,9 @@ static int __init init_pit(char* override) { /* check clock override */ if (override[0] && strncmp(override,"pit",3)) - printk(KERN_ERR "Warning: clock= override failed. Defaulting to PIT\n"); - + printk(KERN_ERR "Warning: clock= override failed. Defaulting " + "to PIT\n"); + init_cpu_khz(); count_p = LATCH; return 0; } diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c index 6dd470cc9f72..d395e3b42485 100644 --- a/arch/i386/kernel/timers/timer_tsc.c +++ b/arch/i386/kernel/timers/timer_tsc.c @@ -49,23 +49,28 @@ static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; * basic equation: * ns = cycles / (freq / ns_per_sec) * ns = cycles * (ns_per_sec / freq) - * ns = cycles * (10^9 / (cpu_mhz * 10^6)) - * ns = cycles * (10^3 / cpu_mhz) + * ns = cycles * (10^9 / (cpu_khz * 10^3)) + * ns = cycles * (10^6 / cpu_khz) * * Then we use scaling math (suggested by george@mvista.com) to get: - * ns = cycles * (10^3 * SC / cpu_mhz) / SC + * ns = cycles * (10^6 * SC / cpu_khz) / SC * ns = cycles * cyc2ns_scale / SC * * And since SC is a constant power of two, we can convert the div - * into a shift. + * into a shift. + * + * We can use khz divisor instead of mhz to keep a better percision, since + * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. + * (mathieu.desnoyers@polymtl.ca) + * * -johnstul@us.ibm.com "math is hard, lets go shopping!" */ static unsigned long cyc2ns_scale; #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ -static inline void set_cyc2ns_scale(unsigned long cpu_mhz) +static inline void set_cyc2ns_scale(unsigned long cpu_khz) { - cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz; + cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz; } static inline unsigned long long cycles_2_ns(unsigned long long cyc) @@ -286,7 +291,7 @@ time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, if (use_tsc) { if (!(freq->flags & CPUFREQ_CONST_LOOPS)) { fast_gettimeoffset_quotient = cpufreq_scale(fast_gettimeoffset_ref, freq->new, ref_freq); - set_cyc2ns_scale(cpu_khz/1000); + set_cyc2ns_scale(cpu_khz); } } #endif @@ -536,7 +541,7 @@ static int __init init_tsc(char* override) printk("Detected %u.%03u MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); } - set_cyc2ns_scale(cpu_khz/1000); + set_cyc2ns_scale(cpu_khz); return 0; } } diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 431a551e46ea..c34d1bfc5161 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -52,7 +52,6 @@ #include <asm/arch_hooks.h> #include <asm/kdebug.h> -#include <linux/irq.h> #include <linux/module.h> #include "mach_traps.h" @@ -489,6 +488,7 @@ fastcall void __kprobes do_general_protection(struct pt_regs * regs, tss->io_bitmap_max - thread->io_bitmap_max); tss->io_bitmap_max = thread->io_bitmap_max; tss->io_bitmap_base = IO_BITMAP_OFFSET; + tss->io_bitmap_owner = thread; put_cpu(); return; } diff --git a/arch/i386/kernel/vm86.c b/arch/i386/kernel/vm86.c index 16b485009622..fc1993564f98 100644 --- a/arch/i386/kernel/vm86.c +++ b/arch/i386/kernel/vm86.c @@ -134,17 +134,16 @@ struct pt_regs * fastcall save_v86_state(struct kernel_vm86_regs * regs) return ret; } -static void mark_screen_rdonly(struct task_struct * tsk) +static void mark_screen_rdonly(struct mm_struct *mm) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; - pte_t *pte, *mapped; + pte_t *pte; + spinlock_t *ptl; int i; - preempt_disable(); - spin_lock(&tsk->mm->page_table_lock); - pgd = pgd_offset(tsk->mm, 0xA0000); + pgd = pgd_offset(mm, 0xA0000); if (pgd_none_or_clear_bad(pgd)) goto out; pud = pud_offset(pgd, 0xA0000); @@ -153,16 +152,14 @@ static void mark_screen_rdonly(struct task_struct * tsk) pmd = pmd_offset(pud, 0xA0000); if (pmd_none_or_clear_bad(pmd)) goto out; - pte = mapped = pte_offset_map(pmd, 0xA0000); + pte = pte_offset_map_lock(mm, pmd, 0xA0000, &ptl); for (i = 0; i < 32; i++) { if (pte_present(*pte)) set_pte(pte, pte_wrprotect(*pte)); pte++; } - pte_unmap(mapped); + pte_unmap_unlock(pte, ptl); out: - spin_unlock(&tsk->mm->page_table_lock); - preempt_enable(); flush_tlb(); } @@ -306,7 +303,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk tsk->thread.screen_bitmap = info->screen_bitmap; if (info->flags & VM86_SCREEN_BITMAP) - mark_screen_rdonly(tsk); + mark_screen_rdonly(tsk->mm); __asm__ __volatile__( "xorl %%eax,%%eax; movl %%eax,%%fs; movl %%eax,%%gs\n\t" "movl %0,%%esp\n\t" diff --git a/arch/i386/mach-default/setup.c b/arch/i386/mach-default/setup.c index e5a1a83d09ef..b4a7455c6993 100644 --- a/arch/i386/mach-default/setup.c +++ b/arch/i386/mach-default/setup.c @@ -5,7 +5,6 @@ #include <linux/config.h> #include <linux/smp.h> #include <linux/init.h> -#include <linux/irq.h> #include <linux/interrupt.h> #include <asm/acpi.h> #include <asm/arch_hooks.h> diff --git a/arch/i386/mach-es7000/es7000.h b/arch/i386/mach-es7000/es7000.h index 898ed905e119..f1e3204f5dec 100644 --- a/arch/i386/mach-es7000/es7000.h +++ b/arch/i386/mach-es7000/es7000.h @@ -24,6 +24,15 @@ * http://www.unisys.com */ +/* + * ES7000 chipsets + */ + +#define NON_UNISYS 0 +#define ES7000_CLASSIC 1 +#define ES7000_ZORRO 2 + + #define MIP_REG 1 #define MIP_PSAI_REG 4 @@ -106,6 +115,6 @@ struct mip_reg { extern int parse_unisys_oem (char *oemptr); extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); -extern void setup_unisys (); +extern void setup_unisys(void); extern int es7000_start_cpu(int cpu, unsigned long eip); extern void es7000_sw_apic(void); diff --git a/arch/i386/mach-es7000/es7000plat.c b/arch/i386/mach-es7000/es7000plat.c index dc6660511b07..a9ab0644f403 100644 --- a/arch/i386/mach-es7000/es7000plat.c +++ b/arch/i386/mach-es7000/es7000plat.c @@ -62,6 +62,9 @@ static unsigned int base; static int es7000_rename_gsi(int ioapic, int gsi) { + if (es7000_plat == ES7000_ZORRO) + return gsi; + if (!base) { int i; for (i = 0; i < nr_ioapics; i++) @@ -76,7 +79,7 @@ es7000_rename_gsi(int ioapic, int gsi) #endif /* (CONFIG_X86_IO_APIC) && (CONFIG_ACPI) */ void __init -setup_unisys () +setup_unisys(void) { /* * Determine the generation of the ES7000 currently running. @@ -86,9 +89,9 @@ setup_unisys () * */ if (!(boot_cpu_data.x86 <= 15 && boot_cpu_data.x86_model <= 2)) - es7000_plat = 2; + es7000_plat = ES7000_ZORRO; else - es7000_plat = 1; + es7000_plat = ES7000_CLASSIC; ioapic_renumber_irq = es7000_rename_gsi; } @@ -151,7 +154,7 @@ parse_unisys_oem (char *oemptr) } if (success < 2) { - es7000_plat = 0; + es7000_plat = NON_UNISYS; } else setup_unisys(); return es7000_plat; diff --git a/arch/i386/mach-visws/setup.c b/arch/i386/mach-visws/setup.c index 26ada6fc0d77..07fac7e749c7 100644 --- a/arch/i386/mach-visws/setup.c +++ b/arch/i386/mach-visws/setup.c @@ -5,7 +5,6 @@ #include <linux/smp.h> #include <linux/init.h> -#include <linux/irq.h> #include <linux/interrupt.h> #include <asm/fixmap.h> diff --git a/arch/i386/mach-visws/visws_apic.c b/arch/i386/mach-visws/visws_apic.c index 04e6585849a2..3e64fb721291 100644 --- a/arch/i386/mach-visws/visws_apic.c +++ b/arch/i386/mach-visws/visws_apic.c @@ -19,7 +19,6 @@ #include <linux/config.h> #include <linux/kernel_stat.h> #include <linux/interrupt.h> -#include <linux/irq.h> #include <linux/smp_lock.h> #include <linux/init.h> diff --git a/arch/i386/mach-voyager/setup.c b/arch/i386/mach-voyager/setup.c index df123fc487bb..7d8a3acb9441 100644 --- a/arch/i386/mach-voyager/setup.c +++ b/arch/i386/mach-voyager/setup.c @@ -4,7 +4,6 @@ #include <linux/config.h> #include <linux/init.h> -#include <linux/irq.h> #include <linux/interrupt.h> #include <asm/acpi.h> #include <asm/arch_hooks.h> diff --git a/arch/i386/mach-voyager/voyager_basic.c b/arch/i386/mach-voyager/voyager_basic.c index cc69875d979b..aa49a33a572c 100644 --- a/arch/i386/mach-voyager/voyager_basic.c +++ b/arch/i386/mach-voyager/voyager_basic.c @@ -27,7 +27,6 @@ #include <asm/voyager.h> #include <asm/vic.h> #include <linux/pm.h> -#include <linux/irq.h> #include <asm/tlbflush.h> #include <asm/arch_hooks.h> #include <asm/i8253.h> diff --git a/arch/i386/mach-voyager/voyager_smp.c b/arch/i386/mach-voyager/voyager_smp.c index 46b0cf4a31e0..72a1b9cae2e4 100644 --- a/arch/i386/mach-voyager/voyager_smp.c +++ b/arch/i386/mach-voyager/voyager_smp.c @@ -30,8 +30,6 @@ #include <asm/tlbflush.h> #include <asm/arch_hooks.h> -#include <linux/irq.h> - /* TLB state -- visible externally, indexed physically */ DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_mm, 0 }; diff --git a/arch/i386/mach-voyager/voyager_thread.c b/arch/i386/mach-voyager/voyager_thread.c index a9341b0eebff..2b03884fdb2a 100644 --- a/arch/i386/mach-voyager/voyager_thread.c +++ b/arch/i386/mach-voyager/voyager_thread.c @@ -31,8 +31,6 @@ #include <asm/mtrr.h> #include <asm/msr.h> -#include <linux/irq.h> - #define THREAD_NAME "kvoyagerd" /* external variables */ diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c index 244d8ec66be2..c4af9638dbfa 100644 --- a/arch/i386/mm/discontig.c +++ b/arch/i386/mm/discontig.c @@ -98,7 +98,7 @@ unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn, extern unsigned long find_max_low_pfn(void); extern void find_max_pfn(void); -extern void one_highpage_init(struct page *, int, int); +extern void add_one_highpage_init(struct page *, int, int); extern struct e820map e820; extern unsigned long init_pg_tables_end; @@ -427,7 +427,7 @@ void __init set_highmem_pages_init(int bad_ppro) if (!pfn_valid(node_pfn)) continue; page = pfn_to_page(node_pfn); - one_highpage_init(page, node_pfn, bad_ppro); + add_one_highpage_init(page, node_pfn, bad_ppro); } } totalram_pages += totalhigh_pages; diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c index 9edd4485b91e..cf572d9a3b6e 100644 --- a/arch/i386/mm/fault.c +++ b/arch/i386/mm/fault.c @@ -108,7 +108,7 @@ static inline unsigned long get_segment_eip(struct pt_regs *regs, desc = (void *)desc + (seg & ~7); } else { /* Must disable preemption while reading the GDT. */ - desc = (u32 *)&per_cpu(cpu_gdt_table, get_cpu()); + desc = (u32 *)get_cpu_gdt_table(get_cpu()); desc = (void *)desc + (seg & ~7); } diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index 2ebaf75f732e..06e26f006238 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -27,6 +27,8 @@ #include <linux/slab.h> #include <linux/proc_fs.h> #include <linux/efi.h> +#include <linux/memory_hotplug.h> +#include <linux/initrd.h> #include <asm/processor.h> #include <asm/system.h> @@ -266,17 +268,46 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base) pkmap_page_table = pte; } -void __init one_highpage_init(struct page *page, int pfn, int bad_ppro) +static void __devinit free_new_highpage(struct page *page) +{ + set_page_count(page, 1); + __free_page(page); + totalhigh_pages++; +} + +void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro) { if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) { ClearPageReserved(page); - set_page_count(page, 1); - __free_page(page); - totalhigh_pages++; + free_new_highpage(page); } else SetPageReserved(page); } +static int add_one_highpage_hotplug(struct page *page, unsigned long pfn) +{ + free_new_highpage(page); + totalram_pages++; +#ifdef CONFIG_FLATMEM + max_mapnr = max(pfn, max_mapnr); +#endif + num_physpages++; + return 0; +} + +/* + * Not currently handling the NUMA case. + * Assuming single node and all memory that + * has been added dynamically that would be + * onlined here is in HIGHMEM + */ +void online_page(struct page *page) +{ + ClearPageReserved(page); + add_one_highpage_hotplug(page, page_to_pfn(page)); +} + + #ifdef CONFIG_NUMA extern void set_highmem_pages_init(int); #else @@ -284,7 +315,7 @@ static void __init set_highmem_pages_init(int bad_ppro) { int pfn; for (pfn = highstart_pfn; pfn < highend_pfn; pfn++) - one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro); + add_one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro); totalram_pages += totalhigh_pages; } #endif /* CONFIG_FLATMEM */ @@ -615,6 +646,28 @@ void __init mem_init(void) #endif } +/* + * this is for the non-NUMA, single node SMP system case. + * Specifically, in the case of x86, we will always add + * memory to the highmem for now. + */ +#ifndef CONFIG_NEED_MULTIPLE_NODES +int add_memory(u64 start, u64 size) +{ + struct pglist_data *pgdata = &contig_page_data; + struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1; + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + + return __add_pages(zone, start_pfn, nr_pages); +} + +int remove_memory(u64 start, u64 size) +{ + return -EINVAL; +} +#endif + kmem_cache_t *pgd_cache; kmem_cache_t *pmd_cache; diff --git a/arch/i386/mm/ioremap.c b/arch/i386/mm/ioremap.c index f379b8d67558..5d09de8d1c6b 100644 --- a/arch/i386/mm/ioremap.c +++ b/arch/i386/mm/ioremap.c @@ -28,7 +28,7 @@ static int ioremap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long pfn; pfn = phys_addr >> PAGE_SHIFT; - pte = pte_alloc_kernel(&init_mm, pmd, addr); + pte = pte_alloc_kernel(pmd, addr); if (!pte) return -ENOMEM; do { @@ -87,14 +87,12 @@ static int ioremap_page_range(unsigned long addr, flush_cache_all(); phys_addr -= addr; pgd = pgd_offset_k(addr); - spin_lock(&init_mm.page_table_lock); do { next = pgd_addr_end(addr, end); err = ioremap_pud_range(pgd, addr, next, phys_addr+addr, flags); if (err) break; } while (pgd++, addr = next, addr != end); - spin_unlock(&init_mm.page_table_lock); flush_tlb_all(); return err; } diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c index dcdce2c6c532..9db3242103be 100644 --- a/arch/i386/mm/pgtable.c +++ b/arch/i386/mm/pgtable.c @@ -31,11 +31,13 @@ void show_mem(void) pg_data_t *pgdat; unsigned long i; struct page_state ps; + unsigned long flags; printk(KERN_INFO "Mem-info:\n"); show_free_areas(); printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); for_each_pgdat(pgdat) { + pgdat_resize_lock(pgdat, &flags); for (i = 0; i < pgdat->node_spanned_pages; ++i) { page = pgdat_page_nr(pgdat, i); total++; @@ -48,6 +50,7 @@ void show_mem(void) else if (page_count(page)) shared += page_count(page) - 1; } + pgdat_resize_unlock(pgdat, &flags); } printk(KERN_INFO "%d pages of RAM\n", total); printk(KERN_INFO "%d pages of HIGHMEM\n", highmem); @@ -188,19 +191,19 @@ static inline void pgd_list_add(pgd_t *pgd) struct page *page = virt_to_page(pgd); page->index = (unsigned long)pgd_list; if (pgd_list) - pgd_list->private = (unsigned long)&page->index; + set_page_private(pgd_list, (unsigned long)&page->index); pgd_list = page; - page->private = (unsigned long)&pgd_list; + set_page_private(page, (unsigned long)&pgd_list); } static inline void pgd_list_del(pgd_t *pgd) { struct page *next, **pprev, *page = virt_to_page(pgd); next = (struct page *)page->index; - pprev = (struct page **)page->private; + pprev = (struct page **)page_private(page); *pprev = next; if (next) - next->private = (unsigned long)pprev; + set_page_private(next, (unsigned long)pprev); } void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused) diff --git a/arch/i386/oprofile/Kconfig b/arch/i386/oprofile/Kconfig index 5ade19801b97..d8a84088471a 100644 --- a/arch/i386/oprofile/Kconfig +++ b/arch/i386/oprofile/Kconfig @@ -1,7 +1,3 @@ - -menu "Profiling support" - depends on EXPERIMENTAL - config PROFILING bool "Profiling support (EXPERIMENTAL)" help @@ -19,5 +15,3 @@ config OPROFILE If unsure, say N. -endmenu - diff --git a/arch/i386/oprofile/backtrace.c b/arch/i386/oprofile/backtrace.c index 65dfd2edb671..21654be3f73f 100644 --- a/arch/i386/oprofile/backtrace.c +++ b/arch/i386/oprofile/backtrace.c @@ -12,6 +12,7 @@ #include <linux/sched.h> #include <linux/mm.h> #include <asm/ptrace.h> +#include <asm/uaccess.h> struct frame_head { struct frame_head * ebp; @@ -21,26 +22,22 @@ struct frame_head { static struct frame_head * dump_backtrace(struct frame_head * head) { - oprofile_add_trace(head->ret); + struct frame_head bufhead[2]; - /* frame pointers should strictly progress back up the stack - * (towards higher addresses) */ - if (head >= head->ebp) + /* Also check accessibility of one struct frame_head beyond */ + if (!access_ok(VERIFY_READ, head, sizeof(bufhead))) + return NULL; + if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead))) return NULL; - return head->ebp; -} - -/* check that the page(s) containing the frame head are present */ -static int pages_present(struct frame_head * head) -{ - struct mm_struct * mm = current->mm; + oprofile_add_trace(bufhead[0].ret); - /* FIXME: only necessary once per page */ - if (!check_user_page_readable(mm, (unsigned long)head)) - return 0; + /* frame pointers should strictly progress back up the stack + * (towards higher addresses) */ + if (head >= bufhead[0].ebp) + return NULL; - return check_user_page_readable(mm, (unsigned long)(head + 1)); + return bufhead[0].ebp; } /* @@ -97,15 +94,6 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth) return; } -#ifdef CONFIG_SMP - if (!spin_trylock(¤t->mm->page_table_lock)) - return; -#endif - - while (depth-- && head && pages_present(head)) + while (depth-- && head) head = dump_backtrace(head); - -#ifdef CONFIG_SMP - spin_unlock(¤t->mm->page_table_lock); -#endif } diff --git a/arch/i386/oprofile/nmi_timer_int.c b/arch/i386/oprofile/nmi_timer_int.c index ad93cdd55d63..930a1127bb30 100644 --- a/arch/i386/oprofile/nmi_timer_int.c +++ b/arch/i386/oprofile/nmi_timer_int.c @@ -9,7 +9,7 @@ #include <linux/init.h> #include <linux/smp.h> -#include <linux/irq.h> +#include <linux/errno.h> #include <linux/oprofile.h> #include <linux/rcupdate.h> diff --git a/arch/i386/pci/acpi.c b/arch/i386/pci/acpi.c index 2941674f35eb..4c4522b43be5 100644 --- a/arch/i386/pci/acpi.c +++ b/arch/i386/pci/acpi.c @@ -2,7 +2,6 @@ #include <linux/acpi.h> #include <linux/init.h> #include <linux/irq.h> -#include <asm/hw_irq.h> #include <asm/numa.h> #include "pci.h" diff --git a/arch/i386/pci/common.c b/arch/i386/pci/common.c index c96bea14b98f..f6bc48da4d2a 100644 --- a/arch/i386/pci/common.c +++ b/arch/i386/pci/common.c @@ -132,7 +132,7 @@ struct pci_bus * __devinit pcibios_scan_root(int busnum) } } - printk("PCI: Probing PCI hardware (bus %02x)\n", busnum); + printk(KERN_DEBUG "PCI: Probing PCI hardware (bus %02x)\n", busnum); return pci_scan_bus_parented(NULL, busnum, &pci_root_ops, NULL); } @@ -144,7 +144,7 @@ static int __init pcibios_init(void) struct cpuinfo_x86 *c = &boot_cpu_data; if (!raw_pci_ops) { - printk("PCI: System does not support PCI\n"); + printk(KERN_WARNING "PCI: System does not support PCI\n"); return 0; } diff --git a/arch/i386/pci/direct.c b/arch/i386/pci/direct.c index 30b7e9b4f6a2..94331d6be7a3 100644 --- a/arch/i386/pci/direct.c +++ b/arch/i386/pci/direct.c @@ -201,7 +201,7 @@ static int __init pci_sanity_check(struct pci_raw_ops *o) return 1; } - DBG("PCI: Sanity check failed\n"); + DBG(KERN_WARNING "PCI: Sanity check failed\n"); return 0; } diff --git a/arch/i386/pci/fixup.c b/arch/i386/pci/fixup.c index 8e8e895e1b5a..eeb1b1f2d548 100644 --- a/arch/i386/pci/fixup.c +++ b/arch/i386/pci/fixup.c @@ -2,6 +2,8 @@ * Exceptions for specific devices. Usually work-arounds for fatal design flaws. */ +#include <linux/delay.h> +#include <linux/dmi.h> #include <linux/pci.h> #include <linux/init.h> #include "pci.h" @@ -384,3 +386,59 @@ static void __devinit pci_fixup_video(struct pci_dev *pdev) } } DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pci_fixup_video); + +/* + * Some Toshiba laptops need extra code to enable their TI TSB43AB22/A. + * + * We pretend to bring them out of full D3 state, and restore the proper + * IRQ, PCI cache line size, and BARs, otherwise the device won't function + * properly. In some cases, the device will generate an interrupt on + * the wrong IRQ line, causing any devices sharing the the line it's + * *supposed* to use to be disabled by the kernel's IRQ debug code. + */ +static u16 toshiba_line_size; + +static struct dmi_system_id __devinitdata toshiba_ohci1394_dmi_table[] = { + { + .ident = "Toshiba PS5 based laptop", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"), + DMI_MATCH(DMI_PRODUCT_VERSION, "PS5"), + }, + }, + { + .ident = "Toshiba PSM4 based laptop", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"), + DMI_MATCH(DMI_PRODUCT_VERSION, "PSM4"), + }, + }, + { } +}; + +static void __devinit pci_pre_fixup_toshiba_ohci1394(struct pci_dev *dev) +{ + if (!dmi_check_system(toshiba_ohci1394_dmi_table)) + return; /* only applies to certain Toshibas (so far) */ + + dev->current_state = PCI_D3cold; + pci_read_config_word(dev, PCI_CACHE_LINE_SIZE, &toshiba_line_size); +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TI, 0x8032, + pci_pre_fixup_toshiba_ohci1394); + +static void __devinit pci_post_fixup_toshiba_ohci1394(struct pci_dev *dev) +{ + if (!dmi_check_system(toshiba_ohci1394_dmi_table)) + return; /* only applies to certain Toshibas (so far) */ + + /* Restore config space on Toshiba laptops */ + pci_write_config_word(dev, PCI_CACHE_LINE_SIZE, toshiba_line_size); + pci_read_config_byte(dev, PCI_INTERRUPT_LINE, (u8 *)&dev->irq); + pci_write_config_dword(dev, PCI_BASE_ADDRESS_0, + pci_resource_start(dev, 0)); + pci_write_config_dword(dev, PCI_BASE_ADDRESS_1, + pci_resource_start(dev, 1)); +} +DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_TI, 0x8032, + pci_post_fixup_toshiba_ohci1394); diff --git a/arch/i386/pci/i386.c b/arch/i386/pci/i386.c index 6d6338500c3c..ed2c8c899bd3 100644 --- a/arch/i386/pci/i386.c +++ b/arch/i386/pci/i386.c @@ -221,6 +221,11 @@ int pcibios_enable_resources(struct pci_dev *dev, int mask) continue; r = &dev->resource[idx]; + if (!(r->flags & (IORESOURCE_IO | IORESOURCE_MEM))) + continue; + if ((idx == PCI_ROM_RESOURCE) && + (!(r->flags & IORESOURCE_ROM_ENABLE))) + continue; if (!r->start && r->end) { printk(KERN_ERR "PCI: Device %s not available because of resource collisions\n", pci_name(dev)); return -EINVAL; @@ -230,8 +235,6 @@ int pcibios_enable_resources(struct pci_dev *dev, int mask) if (r->flags & IORESOURCE_MEM) cmd |= PCI_COMMAND_MEMORY; } - if (dev->resource[PCI_ROM_RESOURCE].start) - cmd |= PCI_COMMAND_MEMORY; if (cmd != old_cmd) { printk("PCI: Enabling device %s (%04x -> %04x)\n", pci_name(dev), old_cmd, cmd); pci_write_config_word(dev, PCI_COMMAND, cmd); diff --git a/arch/i386/pci/irq.c b/arch/i386/pci/irq.c index 326a2edc3834..19e6f4871d1e 100644 --- a/arch/i386/pci/irq.c +++ b/arch/i386/pci/irq.c @@ -11,12 +11,11 @@ #include <linux/init.h> #include <linux/slab.h> #include <linux/interrupt.h> -#include <linux/irq.h> #include <linux/dmi.h> #include <asm/io.h> #include <asm/smp.h> #include <asm/io_apic.h> -#include <asm/hw_irq.h> +#include <linux/irq.h> #include <linux/acpi.h> #include "pci.h" @@ -548,31 +547,48 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route return 0; } -static __init int via_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) +static __init int via_router_probe(struct irq_router *r, + struct pci_dev *router, u16 device) { /* FIXME: We should move some of the quirk fixup stuff here */ - if (router->device == PCI_DEVICE_ID_VIA_82C686 && - device == PCI_DEVICE_ID_VIA_82C586_0) { - /* Asus k7m bios wrongly reports 82C686A as 586-compatible */ - device = PCI_DEVICE_ID_VIA_82C686; + /* + * work arounds for some buggy BIOSes + */ + if (device == PCI_DEVICE_ID_VIA_82C586_0) { + switch(router->device) { + case PCI_DEVICE_ID_VIA_82C686: + /* + * Asus k7m bios wrongly reports 82C686A + * as 586-compatible + */ + device = PCI_DEVICE_ID_VIA_82C686; + break; + case PCI_DEVICE_ID_VIA_8235: + /** + * Asus a7v-x bios wrongly reports 8235 + * as 586-compatible + */ + device = PCI_DEVICE_ID_VIA_8235; + break; + } } - switch(device) - { - case PCI_DEVICE_ID_VIA_82C586_0: - r->name = "VIA"; - r->get = pirq_via586_get; - r->set = pirq_via586_set; - return 1; - case PCI_DEVICE_ID_VIA_82C596: - case PCI_DEVICE_ID_VIA_82C686: - case PCI_DEVICE_ID_VIA_8231: + switch(device) { + case PCI_DEVICE_ID_VIA_82C586_0: + r->name = "VIA"; + r->get = pirq_via586_get; + r->set = pirq_via586_set; + return 1; + case PCI_DEVICE_ID_VIA_82C596: + case PCI_DEVICE_ID_VIA_82C686: + case PCI_DEVICE_ID_VIA_8231: + case PCI_DEVICE_ID_VIA_8235: /* FIXME: add new ones for 8233/5 */ - r->name = "VIA"; - r->get = pirq_via_get; - r->set = pirq_via_set; - return 1; + r->name = "VIA"; + r->get = pirq_via_get; + r->set = pirq_via_set; + return 1; } return 0; } diff --git a/arch/i386/power/cpu.c b/arch/i386/power/cpu.c index 7b0b9ad848e5..50a0bef8c85f 100644 --- a/arch/i386/power/cpu.c +++ b/arch/i386/power/cpu.c @@ -8,25 +8,8 @@ */ #include <linux/config.h> -#include <linux/kernel.h> #include <linux/module.h> -#include <linux/init.h> -#include <linux/types.h> -#include <linux/spinlock.h> -#include <linux/poll.h> -#include <linux/delay.h> -#include <linux/sysrq.h> -#include <linux/proc_fs.h> -#include <linux/irq.h> -#include <linux/pm.h> -#include <linux/device.h> #include <linux/suspend.h> -#include <linux/acpi.h> - -#include <asm/uaccess.h> -#include <asm/acpi.h> -#include <asm/tlbflush.h> -#include <asm/processor.h> static struct saved_context saved_context; @@ -68,16 +51,14 @@ void save_processor_state(void) __save_processor_state(&saved_context); } -static void -do_fpu_end(void) +static void do_fpu_end(void) { - /* restore FPU regs if necessary */ - /* Do it out of line so that gcc does not move cr0 load to some stupid place */ - kernel_fpu_end(); - mxcsr_feature_mask_init(); + /* + * Restore FPU regs if necessary. + */ + kernel_fpu_end(); } - static void fix_processor_context(void) { int cpu = smp_processor_id(); @@ -137,6 +118,7 @@ void __restore_processor_state(struct saved_context *ctxt) fix_processor_context(); do_fpu_end(); mtrr_ap_init(); + mcheck_init(&boot_cpu_data); } void restore_processor_state(void) |