diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-07-02 16:15:23 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-07-02 16:15:23 -0700 |
commit | f0bb4c0ab064a8aeeffbda1cee380151a594eaab (patch) | |
tree | 14d55a89c5db455aa10ff9a96ca14c474a9c4d55 /drivers | |
parent | a4883ef6af5e513a1e8c2ab9aab721604aa3a4f5 (diff) | |
parent | 983433b5812c5cf33a9008fa38c6f9b407fedb76 (diff) | |
download | linux-stable-f0bb4c0ab064a8aeeffbda1cee380151a594eaab.tar.gz linux-stable-f0bb4c0ab064a8aeeffbda1cee380151a594eaab.tar.bz2 linux-stable-f0bb4c0ab064a8aeeffbda1cee380151a594eaab.zip |
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar:
"Kernel improvements:
- watchdog driver improvements by Li Zefan
- Power7 CPI stack events related improvements by Sukadev Bhattiprolu
- event multiplexing via hrtimers and other improvements by Stephane
Eranian
- kernel stack use optimization by Andrew Hunter
- AMD IOMMU uncore PMU support by Suravee Suthikulpanit
- NMI handling rate-limits by Dave Hansen
- various hw_breakpoint fixes by Oleg Nesterov
- hw_breakpoint overflow period sampling and related signal handling
fixes by Jiri Olsa
- Intel Haswell PMU support by Andi Kleen
Tooling improvements:
- Reset SIGTERM handler in workload child process, fix from David
Ahern.
- Makefile reorganization, prep work for Kconfig patches, from Jiri
Olsa.
- Add automated make test suite, from Jiri Olsa.
- Add --percent-limit option to 'top' and 'report', from Namhyung
Kim.
- Sorting improvements, from Namhyung Kim.
- Expand definition of sysfs format attribute, from Michael Ellerman.
Tooling fixes:
- 'perf tests' fixes from Jiri Olsa.
- Make Power7 CPI stack events available in sysfs, from Sukadev
Bhattiprolu.
- Handle death by SIGTERM in 'perf record', fix from David Ahern.
- Fix printing of perf_event_paranoid message, from David Ahern.
- Handle realloc failures in 'perf kvm', from David Ahern.
- Fix divide by 0 in variance, from David Ahern.
- Save parent pid in thread struct, from David Ahern.
- Handle JITed code in shared memory, from Andi Kleen.
- Fixes for 'perf diff', from Jiri Olsa.
- Remove some unused struct members, from Jiri Olsa.
- Add missing liblk.a dependency for python/perf.so, fix from Jiri
Olsa.
- Respect CROSS_COMPILE in liblk.a, from Rabin Vincent.
- No need to do locking when adding hists in perf report, only 'top'
needs that, from Namhyung Kim.
- Fix alignment of symbol column in in the hists browser (top,
report) when -v is given, from NAmhyung Kim.
- Fix 'perf top' -E option behavior, from Namhyung Kim.
- Fix bug in isupper() and islower(), from Sukadev Bhattiprolu.
- Fix compile errors in bp_signal 'perf test', from Sukadev
Bhattiprolu.
... and more things"
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (102 commits)
perf/x86: Disable PEBS-LL in intel_pmu_pebs_disable()
perf/x86: Fix shared register mutual exclusion enforcement
perf/x86/intel: Support full width counting
x86: Add NMI duration tracepoints
perf: Drop sample rate when sampling is too slow
x86: Warn when NMI handlers take large amounts of time
hw_breakpoint: Introduce "struct bp_cpuinfo"
hw_breakpoint: Simplify *register_wide_hw_breakpoint()
hw_breakpoint: Introduce cpumask_of_bp()
hw_breakpoint: Simplify the "weight" usage in toggle_bp_slot() paths
hw_breakpoint: Simplify list/idx mess in toggle_bp_slot() paths
perf/x86/intel: Add mem-loads/stores support for Haswell
perf/x86/intel: Support Haswell/v4 LBR format
perf/x86/intel: Move NMI clearing to end of PMI handler
perf/x86/intel: Add Haswell PEBS support
perf/x86/intel: Add simple Haswell PMU support
perf/x86/intel: Add Haswell PEBS record support
perf/x86/intel: Fix sparse warning
perf/x86/amd: AMD IOMMU Performance Counter PERF uncore PMU implementation
perf/x86/amd: Add IOMMU Performance Counter resource management
...
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/iommu/amd_iommu_init.c | 140 | ||||
-rw-r--r-- | drivers/iommu/amd_iommu_proto.h | 7 | ||||
-rw-r--r-- | drivers/iommu/amd_iommu_types.h | 15 |
3 files changed, 150 insertions, 12 deletions
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index bf51abb78dee..7acbf351e9af 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -99,7 +99,7 @@ struct ivhd_header { u64 mmio_phys; u16 pci_seg; u16 info; - u32 reserved; + u32 efr; } __attribute__((packed)); /* @@ -154,6 +154,7 @@ bool amd_iommu_iotlb_sup __read_mostly = true; u32 amd_iommu_max_pasids __read_mostly = ~0; bool amd_iommu_v2_present __read_mostly; +bool amd_iommu_pc_present __read_mostly; bool amd_iommu_force_isolation __read_mostly; @@ -369,23 +370,23 @@ static void iommu_disable(struct amd_iommu *iommu) * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in * the system has one. */ -static u8 __iomem * __init iommu_map_mmio_space(u64 address) +static u8 __iomem * __init iommu_map_mmio_space(u64 address, u64 end) { - if (!request_mem_region(address, MMIO_REGION_LENGTH, "amd_iommu")) { - pr_err("AMD-Vi: Can not reserve memory region %llx for mmio\n", - address); + if (!request_mem_region(address, end, "amd_iommu")) { + pr_err("AMD-Vi: Can not reserve memory region %llx-%llx for mmio\n", + address, end); pr_err("AMD-Vi: This is a BIOS bug. Please contact your hardware vendor\n"); return NULL; } - return (u8 __iomem *)ioremap_nocache(address, MMIO_REGION_LENGTH); + return (u8 __iomem *)ioremap_nocache(address, end); } static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu) { if (iommu->mmio_base) iounmap(iommu->mmio_base); - release_mem_region(iommu->mmio_phys, MMIO_REGION_LENGTH); + release_mem_region(iommu->mmio_phys, iommu->mmio_phys_end); } /**************************************************************************** @@ -1085,7 +1086,18 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) iommu->cap_ptr = h->cap_ptr; iommu->pci_seg = h->pci_seg; iommu->mmio_phys = h->mmio_phys; - iommu->mmio_base = iommu_map_mmio_space(h->mmio_phys); + + /* Check if IVHD EFR contains proper max banks/counters */ + if ((h->efr != 0) && + ((h->efr & (0xF << 13)) != 0) && + ((h->efr & (0x3F << 17)) != 0)) { + iommu->mmio_phys_end = MMIO_REG_END_OFFSET; + } else { + iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET; + } + + iommu->mmio_base = iommu_map_mmio_space(iommu->mmio_phys, + iommu->mmio_phys_end); if (!iommu->mmio_base) return -ENOMEM; @@ -1160,6 +1172,33 @@ static int __init init_iommu_all(struct acpi_table_header *table) return 0; } + +static void init_iommu_perf_ctr(struct amd_iommu *iommu) +{ + u64 val = 0xabcd, val2 = 0; + + if (!iommu_feature(iommu, FEATURE_PC)) + return; + + amd_iommu_pc_present = true; + + /* Check if the performance counters can be written to */ + if ((0 != amd_iommu_pc_get_set_reg_val(0, 0, 0, 0, &val, true)) || + (0 != amd_iommu_pc_get_set_reg_val(0, 0, 0, 0, &val2, false)) || + (val != val2)) { + pr_err("AMD-Vi: Unable to write to IOMMU perf counter.\n"); + amd_iommu_pc_present = false; + return; + } + + pr_info("AMD-Vi: IOMMU performance counters supported\n"); + + val = readl(iommu->mmio_base + MMIO_CNTR_CONF_OFFSET); + iommu->max_banks = (u8) ((val >> 12) & 0x3f); + iommu->max_counters = (u8) ((val >> 7) & 0xf); +} + + static int iommu_init_pci(struct amd_iommu *iommu) { int cap_ptr = iommu->cap_ptr; @@ -1226,6 +1265,8 @@ static int iommu_init_pci(struct amd_iommu *iommu) if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) amd_iommu_np_cache = true; + init_iommu_perf_ctr(iommu); + if (is_rd890_iommu(iommu->dev)) { int i, j; @@ -1278,7 +1319,7 @@ static void print_iommu_info(void) if (iommu_feature(iommu, (1ULL << i))) pr_cont(" %s", feat_str[i]); } - pr_cont("\n"); + pr_cont("\n"); } } if (irq_remapping_enabled) @@ -2232,3 +2273,84 @@ bool amd_iommu_v2_supported(void) return amd_iommu_v2_present; } EXPORT_SYMBOL(amd_iommu_v2_supported); + +/**************************************************************************** + * + * IOMMU EFR Performance Counter support functionality. This code allows + * access to the IOMMU PC functionality. + * + ****************************************************************************/ + +u8 amd_iommu_pc_get_max_banks(u16 devid) +{ + struct amd_iommu *iommu; + u8 ret = 0; + + /* locate the iommu governing the devid */ + iommu = amd_iommu_rlookup_table[devid]; + if (iommu) + ret = iommu->max_banks; + + return ret; +} +EXPORT_SYMBOL(amd_iommu_pc_get_max_banks); + +bool amd_iommu_pc_supported(void) +{ + return amd_iommu_pc_present; +} +EXPORT_SYMBOL(amd_iommu_pc_supported); + +u8 amd_iommu_pc_get_max_counters(u16 devid) +{ + struct amd_iommu *iommu; + u8 ret = 0; + + /* locate the iommu governing the devid */ + iommu = amd_iommu_rlookup_table[devid]; + if (iommu) + ret = iommu->max_counters; + + return ret; +} +EXPORT_SYMBOL(amd_iommu_pc_get_max_counters); + +int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn, + u64 *value, bool is_write) +{ + struct amd_iommu *iommu; + u32 offset; + u32 max_offset_lim; + + /* Make sure the IOMMU PC resource is available */ + if (!amd_iommu_pc_present) + return -ENODEV; + + /* Locate the iommu associated with the device ID */ + iommu = amd_iommu_rlookup_table[devid]; + + /* Check for valid iommu and pc register indexing */ + if (WARN_ON((iommu == NULL) || (fxn > 0x28) || (fxn & 7))) + return -ENODEV; + + offset = (u32)(((0x40|bank) << 12) | (cntr << 8) | fxn); + + /* Limit the offset to the hw defined mmio region aperture */ + max_offset_lim = (u32)(((0x40|iommu->max_banks) << 12) | + (iommu->max_counters << 8) | 0x28); + if ((offset < MMIO_CNTR_REG_OFFSET) || + (offset > max_offset_lim)) + return -EINVAL; + + if (is_write) { + writel((u32)*value, iommu->mmio_base + offset); + writel((*value >> 32), iommu->mmio_base + offset + 4); + } else { + *value = readl(iommu->mmio_base + offset + 4); + *value <<= 32; + *value = readl(iommu->mmio_base + offset); + } + + return 0; +} +EXPORT_SYMBOL(amd_iommu_pc_get_set_reg_val); diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h index c294961bdd36..95ed6deae47f 100644 --- a/drivers/iommu/amd_iommu_proto.h +++ b/drivers/iommu/amd_iommu_proto.h @@ -56,6 +56,13 @@ extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, int pasid, extern int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, int pasid); extern struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev); +/* IOMMU Performance Counter functions */ +extern bool amd_iommu_pc_supported(void); +extern u8 amd_iommu_pc_get_max_banks(u16 devid); +extern u8 amd_iommu_pc_get_max_counters(u16 devid); +extern int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn, + u64 *value, bool is_write); + #define PPR_SUCCESS 0x0 #define PPR_INVALID 0x1 #define PPR_FAILURE 0xf diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index 0285a215df16..e400fbe411de 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -38,9 +38,6 @@ #define ALIAS_TABLE_ENTRY_SIZE 2 #define RLOOKUP_TABLE_ENTRY_SIZE (sizeof(void *)) -/* Length of the MMIO region for the AMD IOMMU */ -#define MMIO_REGION_LENGTH 0x4000 - /* Capability offsets used by the driver */ #define MMIO_CAP_HDR_OFFSET 0x00 #define MMIO_RANGE_OFFSET 0x0c @@ -78,6 +75,10 @@ #define MMIO_STATUS_OFFSET 0x2020 #define MMIO_PPR_HEAD_OFFSET 0x2030 #define MMIO_PPR_TAIL_OFFSET 0x2038 +#define MMIO_CNTR_CONF_OFFSET 0x4000 +#define MMIO_CNTR_REG_OFFSET 0x40000 +#define MMIO_REG_END_OFFSET 0x80000 + /* Extended Feature Bits */ @@ -507,6 +508,10 @@ struct amd_iommu { /* physical address of MMIO space */ u64 mmio_phys; + + /* physical end address of MMIO space */ + u64 mmio_phys_end; + /* virtual address of MMIO space */ u8 __iomem *mmio_base; @@ -584,6 +589,10 @@ struct amd_iommu { /* The l2 indirect registers */ u32 stored_l2[0x83]; + + /* The maximum PC banks and counters/bank (PCSup=1) */ + u8 max_banks; + u8 max_counters; }; struct devid_map { |