diff options
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/kernel/acpi/cstate.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/amd_iommu.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/amd_iommu_init.c | 67 | ||||
-rw-r--r-- | arch/x86/kernel/apic/io_apic.c | 11 | ||||
-rw-r--r-- | arch/x86/kernel/apic/x2apic_uv_x.c | 6 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/cpu.h | 1 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c | 18 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/intel.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce_amd.c | 13 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/therm_throt.c | 12 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 71 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 15 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_p4.c | 10 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/scattered.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/early-quirks.c | 18 | ||||
-rw-r--r-- | arch/x86/kernel/hpet.c | 33 | ||||
-rw-r--r-- | arch/x86/kernel/hw_breakpoint.c | 40 | ||||
-rw-r--r-- | arch/x86/kernel/module.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/trampoline.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/tsc.c | 38 |
22 files changed, 241 insertions, 130 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 0925676266bd..fedf32a8c3ec 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -11,6 +11,8 @@ ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_tsc.o = -pg CFLAGS_REMOVE_rtc.o = -pg CFLAGS_REMOVE_paravirt-spinlocks.o = -pg +CFLAGS_REMOVE_pvclock.o = -pg +CFLAGS_REMOVE_kvmclock.o = -pg CFLAGS_REMOVE_ftrace.o = -pg CFLAGS_REMOVE_early_printk.o = -pg endif diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index fb7a5f052e2b..fb16f17e59be 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -61,7 +61,7 @@ struct cstate_entry { unsigned int ecx; } states[ACPI_PROCESSOR_MAX_POWER]; }; -static struct cstate_entry *cpu_cstate_entry; /* per CPU ptr */ +static struct cstate_entry __percpu *cpu_cstate_entry; /* per CPU ptr */ static short mwait_supported[ACPI_PROCESSOR_MAX_POWER]; diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index fa044e1e30a2..679b6450382b 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1953,6 +1953,7 @@ static void __unmap_single(struct dma_ops_domain *dma_dom, size_t size, int dir) { + dma_addr_t flush_addr; dma_addr_t i, start; unsigned int pages; @@ -1960,6 +1961,7 @@ static void __unmap_single(struct dma_ops_domain *dma_dom, (dma_addr + size > dma_dom->aperture_size)) return; + flush_addr = dma_addr; pages = iommu_num_pages(dma_addr, size, PAGE_SIZE); dma_addr &= PAGE_MASK; start = dma_addr; @@ -1974,7 +1976,7 @@ static void __unmap_single(struct dma_ops_domain *dma_dom, dma_ops_free_addresses(dma_dom, dma_addr, pages); if (amd_iommu_unmap_flush || dma_dom->need_flush) { - iommu_flush_pages(&dma_dom->domain, dma_addr, size); + iommu_flush_pages(&dma_dom->domain, flush_addr, size); dma_dom->need_flush = false; } } diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 3cc63e2b8dd4..5a170cbbbed8 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -632,6 +632,13 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu) iommu->last_device = calc_devid(MMIO_GET_BUS(range), MMIO_GET_LD(range)); iommu->evt_msi_num = MMIO_MSI_NUM(misc); + + if (is_rd890_iommu(iommu->dev)) { + pci_read_config_dword(iommu->dev, 0xf0, &iommu->cache_cfg[0]); + pci_read_config_dword(iommu->dev, 0xf4, &iommu->cache_cfg[1]); + pci_read_config_dword(iommu->dev, 0xf8, &iommu->cache_cfg[2]); + pci_read_config_dword(iommu->dev, 0xfc, &iommu->cache_cfg[3]); + } } /* @@ -649,29 +656,9 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, struct ivhd_entry *e; /* - * First set the recommended feature enable bits from ACPI - * into the IOMMU control registers + * First save the recommended feature enable bits from ACPI */ - h->flags & IVHD_FLAG_HT_TUN_EN_MASK ? - iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) : - iommu_feature_disable(iommu, CONTROL_HT_TUN_EN); - - h->flags & IVHD_FLAG_PASSPW_EN_MASK ? - iommu_feature_enable(iommu, CONTROL_PASSPW_EN) : - iommu_feature_disable(iommu, CONTROL_PASSPW_EN); - - h->flags & IVHD_FLAG_RESPASSPW_EN_MASK ? - iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) : - iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN); - - h->flags & IVHD_FLAG_ISOC_EN_MASK ? - iommu_feature_enable(iommu, CONTROL_ISOC_EN) : - iommu_feature_disable(iommu, CONTROL_ISOC_EN); - - /* - * make IOMMU memory accesses cache coherent - */ - iommu_feature_enable(iommu, CONTROL_COHERENT_EN); + iommu->acpi_flags = h->flags; /* * Done. Now parse the device entries @@ -1116,6 +1103,40 @@ static void init_device_table(void) } } +static void iommu_init_flags(struct amd_iommu *iommu) +{ + iommu->acpi_flags & IVHD_FLAG_HT_TUN_EN_MASK ? + iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) : + iommu_feature_disable(iommu, CONTROL_HT_TUN_EN); + + iommu->acpi_flags & IVHD_FLAG_PASSPW_EN_MASK ? + iommu_feature_enable(iommu, CONTROL_PASSPW_EN) : + iommu_feature_disable(iommu, CONTROL_PASSPW_EN); + + iommu->acpi_flags & IVHD_FLAG_RESPASSPW_EN_MASK ? + iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) : + iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN); + + iommu->acpi_flags & IVHD_FLAG_ISOC_EN_MASK ? + iommu_feature_enable(iommu, CONTROL_ISOC_EN) : + iommu_feature_disable(iommu, CONTROL_ISOC_EN); + + /* + * make IOMMU memory accesses cache coherent + */ + iommu_feature_enable(iommu, CONTROL_COHERENT_EN); +} + +static void iommu_apply_quirks(struct amd_iommu *iommu) +{ + if (is_rd890_iommu(iommu->dev)) { + pci_write_config_dword(iommu->dev, 0xf0, iommu->cache_cfg[0]); + pci_write_config_dword(iommu->dev, 0xf4, iommu->cache_cfg[1]); + pci_write_config_dword(iommu->dev, 0xf8, iommu->cache_cfg[2]); + pci_write_config_dword(iommu->dev, 0xfc, iommu->cache_cfg[3]); + } +} + /* * This function finally enables all IOMMUs found in the system after * they have been initialized @@ -1126,6 +1147,8 @@ static void enable_iommus(void) for_each_iommu(iommu) { iommu_disable(iommu); + iommu_apply_quirks(iommu); + iommu_init_flags(iommu); iommu_set_device_table(iommu); iommu_enable_command_buffer(iommu); iommu_enable_event_buffer(iommu); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index f1efebaf5510..5c5b8f3dddb5 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -306,14 +306,19 @@ void arch_init_copy_chip_data(struct irq_desc *old_desc, old_cfg = old_desc->chip_data; - memcpy(cfg, old_cfg, sizeof(struct irq_cfg)); + cfg->vector = old_cfg->vector; + cfg->move_in_progress = old_cfg->move_in_progress; + cpumask_copy(cfg->domain, old_cfg->domain); + cpumask_copy(cfg->old_domain, old_cfg->old_domain); init_copy_irq_2_pin(old_cfg, cfg, node); } -static void free_irq_cfg(struct irq_cfg *old_cfg) +static void free_irq_cfg(struct irq_cfg *cfg) { - kfree(old_cfg); + free_cpumask_var(cfg->domain); + free_cpumask_var(cfg->old_domain); + kfree(cfg); } void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc) diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 7b598b84c902..f744f54cb248 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -698,9 +698,11 @@ void __init uv_system_init(void) for (j = 0; j < 64; j++) { if (!test_bit(j, &present)) continue; - uv_blade_info[blade].pnode = (i * 64 + j); + pnode = (i * 64 + j); + uv_blade_info[blade].pnode = pnode; uv_blade_info[blade].nr_possible_cpus = 0; uv_blade_info[blade].nr_online_cpus = 0; + max_pnode = max(pnode, max_pnode); blade++; } } @@ -738,7 +740,6 @@ void __init uv_system_init(void) uv_cpu_hub_info(cpu)->scir.offset = uv_scir_offset(apicid); uv_node_to_blade[nid] = blade; uv_cpu_to_blade[cpu] = blade; - max_pnode = max(pnode, max_pnode); } /* Add blade/pnode info for nodes without cpus */ @@ -750,7 +751,6 @@ void __init uv_system_init(void) pnode = (paddr >> m_val) & pnode_mask; blade = boot_pnode_to_blade(pnode); uv_node_to_blade[nid] = blade; - max_pnode = max(pnode, max_pnode); } map_gru_high(max_pnode); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 490dac63c2d2..f2f9ac7da25c 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -545,7 +545,7 @@ void __cpuinit cpu_detect(struct cpuinfo_x86 *c) } } -static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) +void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) { u32 tfms, xlvl; u32 ebx; diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 3624e8a0f71b..f668bb1f7d43 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -33,5 +33,6 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[], *const __x86_cpu_dev_end[]; extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); +extern void get_cpu_cap(struct cpuinfo_x86 *c); #endif diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c index 994230d4dc4e..4f6f679f2799 100644 --- a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c @@ -368,16 +368,22 @@ static int __init pcc_cpufreq_do_osc(acpi_handle *handle) return -ENODEV; out_obj = output.pointer; - if (out_obj->type != ACPI_TYPE_BUFFER) - return -ENODEV; + if (out_obj->type != ACPI_TYPE_BUFFER) { + ret = -ENODEV; + goto out_free; + } errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0); - if (errors) - return -ENODEV; + if (errors) { + ret = -ENODEV; + goto out_free; + } supported = *((u32 *)(out_obj->buffer.pointer + 4)); - if (!(supported & 0x1)) - return -ENODEV; + if (!(supported & 0x1)) { + ret = -ENODEV; + goto out_free; + } out_free: kfree(output.pointer); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 85f69cdeae10..b4389441efbb 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -39,6 +39,7 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) misc_enable &= ~MSR_IA32_MISC_ENABLE_LIMIT_CPUID; wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable); c->cpuid_level = cpuid_eax(0); + get_cpu_cap(c); } } diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 224392d8fe8c..39aaee5c1ab2 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -141,6 +141,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) address = (low & MASK_BLKPTR_LO) >> 21; if (!address) break; + address += MCG_XBLK_ADDR; } else ++address; @@ -148,12 +149,8 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) if (rdmsr_safe(address, &low, &high)) break; - if (!(high & MASK_VALID_HI)) { - if (block) - continue; - else - break; - } + if (!(high & MASK_VALID_HI)) + continue; if (!(high & MASK_CNTP_HI) || (high & MASK_LOCKED_HI)) @@ -530,7 +527,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) err = -ENOMEM; goto out; } - if (!alloc_cpumask_var(&b->cpus, GFP_KERNEL)) { + if (!zalloc_cpumask_var(&b->cpus, GFP_KERNEL)) { kfree(b); err = -ENOMEM; goto out; @@ -543,7 +540,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) #ifndef CONFIG_SMP cpumask_setall(b->cpus); #else - cpumask_copy(b->cpus, c->llc_shared_map); + cpumask_set_cpu(cpu, b->cpus); #endif per_cpu(threshold_banks, cpu)[bank] = b; diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index c2a8b26d4fea..169d8804a9f8 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -202,10 +202,11 @@ static int therm_throt_process(bool new_event, int event, int level) #ifdef CONFIG_SYSFS /* Add/Remove thermal_throttle interface for CPU device: */ -static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev) +static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev, + unsigned int cpu) { int err; - struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); + struct cpuinfo_x86 *c = &cpu_data(cpu); err = sysfs_create_group(&sys_dev->kobj, &thermal_attr_group); if (err) @@ -215,7 +216,7 @@ static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev) err = sysfs_add_file_to_group(&sys_dev->kobj, &attr_core_power_limit_count.attr, thermal_attr_group.name); - if (cpu_has(c, X86_FEATURE_PTS)) + if (cpu_has(c, X86_FEATURE_PTS)) { err = sysfs_add_file_to_group(&sys_dev->kobj, &attr_package_throttle_count.attr, thermal_attr_group.name); @@ -223,6 +224,7 @@ static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev) err = sysfs_add_file_to_group(&sys_dev->kobj, &attr_package_power_limit_count.attr, thermal_attr_group.name); + } return err; } @@ -251,7 +253,7 @@ thermal_throttle_cpu_callback(struct notifier_block *nfb, case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: mutex_lock(&therm_cpu_lock); - err = thermal_throttle_add_dev(sys_dev); + err = thermal_throttle_add_dev(sys_dev, cpu); mutex_unlock(&therm_cpu_lock); WARN_ON(err); break; @@ -287,7 +289,7 @@ static __init int thermal_throttle_init_device(void) #endif /* connect live CPUs to sysfs */ for_each_online_cpu(cpu) { - err = thermal_throttle_add_dev(get_cpu_sysdev(cpu)); + err = thermal_throttle_add_dev(get_cpu_sysdev(cpu), cpu); WARN_ON(err); } #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index f2da20fda02d..03a5b0385ad6 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -102,6 +102,7 @@ struct cpu_hw_events { */ struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + unsigned long running[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; int enabled; int n_events; @@ -1010,6 +1011,7 @@ static int x86_pmu_start(struct perf_event *event) x86_perf_event_set_period(event); cpuc->events[idx] = event; __set_bit(idx, cpuc->active_mask); + __set_bit(idx, cpuc->running); x86_pmu.enable(event); perf_event_update_userpage(event); @@ -1141,8 +1143,16 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) cpuc = &__get_cpu_var(cpu_hw_events); for (idx = 0; idx < x86_pmu.num_counters; idx++) { - if (!test_bit(idx, cpuc->active_mask)) + if (!test_bit(idx, cpuc->active_mask)) { + /* + * Though we deactivated the counter some cpus + * might still deliver spurious interrupts still + * in flight. Catch them: + */ + if (__test_and_clear_bit(idx, cpuc->running)) + handled++; continue; + } event = cpuc->events[idx]; hwc = &event->hw; @@ -1154,7 +1164,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) /* * event overflow */ - handled = 1; + handled++; data.period = event->hw.last_period; if (!x86_perf_event_set_period(event)) @@ -1200,12 +1210,20 @@ void perf_events_lapic_init(void) apic_write(APIC_LVTPC, APIC_DM_NMI); } +struct pmu_nmi_state { + unsigned int marked; + int handled; +}; + +static DEFINE_PER_CPU(struct pmu_nmi_state, pmu_nmi); + static int __kprobes perf_event_nmi_handler(struct notifier_block *self, unsigned long cmd, void *__args) { struct die_args *args = __args; - struct pt_regs *regs; + unsigned int this_nmi; + int handled; if (!atomic_read(&active_events)) return NOTIFY_DONE; @@ -1214,22 +1232,47 @@ perf_event_nmi_handler(struct notifier_block *self, case DIE_NMI: case DIE_NMI_IPI: break; - + case DIE_NMIUNKNOWN: + this_nmi = percpu_read(irq_stat.__nmi_count); + if (this_nmi != __get_cpu_var(pmu_nmi).marked) + /* let the kernel handle the unknown nmi */ + return NOTIFY_DONE; + /* + * This one is a PMU back-to-back nmi. Two events + * trigger 'simultaneously' raising two back-to-back + * NMIs. If the first NMI handles both, the latter + * will be empty and daze the CPU. So, we drop it to + * avoid false-positive 'unknown nmi' messages. + */ + return NOTIFY_STOP; default: return NOTIFY_DONE; } - regs = args->regs; - apic_write(APIC_LVTPC, APIC_DM_NMI); - /* - * Can't rely on the handled return value to say it was our NMI, two - * events could trigger 'simultaneously' raising two back-to-back NMIs. - * - * If the first NMI handles both, the latter will be empty and daze - * the CPU. - */ - x86_pmu.handle_irq(regs); + + handled = x86_pmu.handle_irq(args->regs); + if (!handled) + return NOTIFY_DONE; + + this_nmi = percpu_read(irq_stat.__nmi_count); + if ((handled > 1) || + /* the next nmi could be a back-to-back nmi */ + ((__get_cpu_var(pmu_nmi).marked == this_nmi) && + (__get_cpu_var(pmu_nmi).handled > 1))) { + /* + * We could have two subsequent back-to-back nmis: The + * first handles more than one counter, the 2nd + * handles only one counter and the 3rd handles no + * counter. + * + * This is the 2nd nmi because the previous was + * handling more than one counter. We will mark the + * next (3rd) and then drop it if unhandled. + */ + __get_cpu_var(pmu_nmi).marked = this_nmi + 1; + __get_cpu_var(pmu_nmi).handled = handled; + } return NOTIFY_STOP; } diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index d8d86d014008..ee05c90012d2 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -712,7 +712,8 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) struct perf_sample_data data; struct cpu_hw_events *cpuc; int bit, loops; - u64 ack, status; + u64 status; + int handled = 0; perf_sample_data_init(&data, 0); @@ -728,6 +729,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) loops = 0; again: + intel_pmu_ack_status(status); if (++loops > 100) { WARN_ONCE(1, "perfevents: irq loop stuck!\n"); perf_event_print_debug(); @@ -736,19 +738,22 @@ again: } inc_irq_stat(apic_perf_irqs); - ack = status; intel_pmu_lbr_read(); /* * PEBS overflow sets bit 62 in the global status register */ - if (__test_and_clear_bit(62, (unsigned long *)&status)) + if (__test_and_clear_bit(62, (unsigned long *)&status)) { + handled++; x86_pmu.drain_pebs(regs); + } for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { struct perf_event *event = cpuc->events[bit]; + handled++; + if (!test_bit(bit, cpuc->active_mask)) continue; @@ -761,8 +766,6 @@ again: x86_pmu_stop(event); } - intel_pmu_ack_status(ack); - /* * Repeat if there is more work to be done: */ @@ -772,7 +775,7 @@ again: done: intel_pmu_enable_all(0); - return 1; + return handled; } static struct event_constraint * diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index febb12cea795..249015173992 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -497,6 +497,8 @@ static int p4_hw_config(struct perf_event *event) event->hw.config |= event->attr.config & (p4_config_pack_escr(P4_ESCR_MASK_HT) | p4_config_pack_cccr(P4_CCCR_MASK_HT | P4_CCCR_RESERVED)); + + event->hw.config &= ~P4_CCCR_FORCE_OVF; } rc = x86_setup_perfctr(event); @@ -658,8 +660,12 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) for (idx = 0; idx < x86_pmu.num_counters; idx++) { int overflow; - if (!test_bit(idx, cpuc->active_mask)) + if (!test_bit(idx, cpuc->active_mask)) { + /* catch in-flight IRQs */ + if (__test_and_clear_bit(idx, cpuc->running)) + handled++; continue; + } event = cpuc->events[idx]; hwc = &event->hw; @@ -690,7 +696,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) inc_irq_stat(apic_perf_irqs); } - return handled > 0; + return handled; } /* diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 34b4dad6f0b8..d49079515122 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -31,6 +31,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) const struct cpuid_bit *cb; static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { + { X86_FEATURE_DTS, CR_EAX, 0, 0x00000006, 0 }, { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006, 0 }, { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006, 0 }, { X86_FEATURE_PLN, CR_EAX, 4, 0x00000006, 0 }, diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index e5cc7e82e60d..ebdb85cf2686 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -18,7 +18,6 @@ #include <asm/apic.h> #include <asm/iommu.h> #include <asm/gart.h> -#include <asm/hpet.h> static void __init fix_hypertransport_config(int num, int slot, int func) { @@ -192,21 +191,6 @@ static void __init ati_bugs_contd(int num, int slot, int func) } #endif -/* - * Force the read back of the CMP register in hpet_next_event() - * to work around the problem that the CMP register write seems to be - * delayed. See hpet_next_event() for details. - * - * We do this on all SMBUS incarnations for now until we have more - * information about the affected chipsets. - */ -static void __init ati_hpet_bugs(int num, int slot, int func) -{ -#ifdef CONFIG_HPET_TIMER - hpet_readback_cmp = 1; -#endif -} - #define QFLAG_APPLY_ONCE 0x1 #define QFLAG_APPLIED 0x2 #define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) @@ -236,8 +220,6 @@ static struct chipset early_qrk[] __initdata = { PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs }, { PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs_contd }, - { PCI_VENDOR_ID_ATI, PCI_ANY_ID, - PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_hpet_bugs }, {} }; diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 351f9c0fea1f..7494999141b3 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -35,7 +35,6 @@ unsigned long hpet_address; u8 hpet_blockid; /* OS timer block num */ u8 hpet_msi_disable; -u8 hpet_readback_cmp; #ifdef CONFIG_PCI_MSI static unsigned long hpet_num_timers; @@ -395,23 +394,27 @@ static int hpet_next_event(unsigned long delta, * at that point and we would wait for the next hpet interrupt * forever. We found out that reading the CMP register back * forces the transfer so we can rely on the comparison with - * the counter register below. + * the counter register below. If the read back from the + * compare register does not match the value we programmed + * then we might have a real hardware problem. We can not do + * much about it here, but at least alert the user/admin with + * a prominent warning. * - * That works fine on those ATI chipsets, but on newer Intel - * chipsets (ICH9...) this triggers due to an erratum: Reading - * the comparator immediately following a write is returning - * the old value. + * An erratum on some chipsets (ICH9,..), results in + * comparator read immediately following a write returning old + * value. Workaround for this is to read this value second + * time, when first read returns old value. * - * We restrict the read back to the affected ATI chipsets (set - * by quirks) and also run it with hpet=verbose for debugging - * purposes. + * In fact the write to the comparator register is delayed up + * to two HPET cycles so the workaround we tried to restrict + * the readback to those known to be borked ATI chipsets + * failed miserably. So we give up on optimizations forever + * and penalize all HPET incarnations unconditionally. */ - if (hpet_readback_cmp || hpet_verbose) { - u32 cmp = hpet_readl(HPET_Tn_CMP(timer)); - - if (cmp != cnt) + if (unlikely((u32)hpet_readl(HPET_Tn_CMP(timer)) != cnt)) { + if (hpet_readl(HPET_Tn_CMP(timer)) != cnt) printk_once(KERN_WARNING - "hpet: compare register read back failed.\n"); + "hpet: compare register read back failed.\n"); } return (s32)(hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0; @@ -503,7 +506,7 @@ static int hpet_assign_irq(struct hpet_dev *dev) { unsigned int irq; - irq = create_irq(); + irq = create_irq_nr(0, -1); if (!irq) return -EINVAL; diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index a474ec37c32f..ff15c9dcc25d 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -206,11 +206,27 @@ int arch_check_bp_in_kernelspace(struct perf_event *bp) int arch_bp_generic_fields(int x86_len, int x86_type, int *gen_len, int *gen_type) { - /* Len */ - switch (x86_len) { - case X86_BREAKPOINT_LEN_X: + /* Type */ + switch (x86_type) { + case X86_BREAKPOINT_EXECUTE: + if (x86_len != X86_BREAKPOINT_LEN_X) + return -EINVAL; + + *gen_type = HW_BREAKPOINT_X; *gen_len = sizeof(long); + return 0; + case X86_BREAKPOINT_WRITE: + *gen_type = HW_BREAKPOINT_W; break; + case X86_BREAKPOINT_RW: + *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R; + break; + default: + return -EINVAL; + } + + /* Len */ + switch (x86_len) { case X86_BREAKPOINT_LEN_1: *gen_len = HW_BREAKPOINT_LEN_1; break; @@ -229,21 +245,6 @@ int arch_bp_generic_fields(int x86_len, int x86_type, return -EINVAL; } - /* Type */ - switch (x86_type) { - case X86_BREAKPOINT_EXECUTE: - *gen_type = HW_BREAKPOINT_X; - break; - case X86_BREAKPOINT_WRITE: - *gen_type = HW_BREAKPOINT_W; - break; - case X86_BREAKPOINT_RW: - *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R; - break; - default: - return -EINVAL; - } - return 0; } @@ -316,9 +317,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) ret = -EINVAL; switch (info->len) { - case X86_BREAKPOINT_LEN_X: - align = sizeof(long) -1; - break; case X86_BREAKPOINT_LEN_1: align = 0; break; diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index e0bc186d7501..1c355c550960 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -239,11 +239,10 @@ int module_finalize(const Elf_Ehdr *hdr, apply_paravirt(pseg, pseg + para->sh_size); } - return module_bug_finalize(hdr, sechdrs, me); + return 0; } void module_arch_cleanup(struct module *mod) { alternatives_smp_module_del(mod); - module_bug_cleanup(mod); } diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c index a874495b3673..e2a595257390 100644 --- a/arch/x86/kernel/trampoline.c +++ b/arch/x86/kernel/trampoline.c @@ -45,8 +45,7 @@ void __init setup_trampoline_page_table(void) /* Copy kernel address range */ clone_pgd_range(trampoline_pg_dir + KERNEL_PGD_BOUNDARY, swapper_pg_dir + KERNEL_PGD_BOUNDARY, - min_t(unsigned long, KERNEL_PGD_PTRS, - KERNEL_PGD_BOUNDARY)); + KERNEL_PGD_PTRS); /* Initialize low mappings */ clone_pgd_range(trampoline_pg_dir, diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index ce8e50239332..26a863a9c2a8 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -626,6 +626,44 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) local_irq_restore(flags); } +static unsigned long long cyc2ns_suspend; + +void save_sched_clock_state(void) +{ + if (!sched_clock_stable) + return; + + cyc2ns_suspend = sched_clock(); +} + +/* + * Even on processors with invariant TSC, TSC gets reset in some the + * ACPI system sleep states. And in some systems BIOS seem to reinit TSC to + * arbitrary value (still sync'd across cpu's) during resume from such sleep + * states. To cope up with this, recompute the cyc2ns_offset for each cpu so + * that sched_clock() continues from the point where it was left off during + * suspend. + */ +void restore_sched_clock_state(void) +{ + unsigned long long offset; + unsigned long flags; + int cpu; + + if (!sched_clock_stable) + return; + + local_irq_save(flags); + + __get_cpu_var(cyc2ns_offset) = 0; + offset = cyc2ns_suspend - sched_clock(); + + for_each_possible_cpu(cpu) + per_cpu(cyc2ns_offset, cpu) = offset; + + local_irq_restore(flags); +} + #ifdef CONFIG_CPU_FREQ /* Frequency scaling support. Adjust the TSC based timer when the cpu frequency |