From d2caa3cebda8b626336e100b80a0ed6f909dccab Mon Sep 17 00:00:00 2001 From: Tom Huynh Date: Tue, 20 Jan 2015 16:19:50 -0600 Subject: powerpc/perf: fix fsl_emb_pmu_start to write correct pmc value PMCs on PowerPC increases towards 0x80000000 and triggers an overflow interrupt when the msb is set to collect a sample. Therefore, to setup for the next sample collection, pmu_start should set the pmc value to 0x80000000 - left instead of left which incorrectly delays the next overflow interrupt. Same as commit 9a45a9407c69 ("powerpc/perf: power_pmu_start restores incorrect values, breaking frequency events") for book3s. Signed-off-by: Tom Huynh Signed-off-by: Scott Wood --- arch/powerpc/perf/core-fsl-emb.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/perf') diff --git a/arch/powerpc/perf/core-fsl-emb.c b/arch/powerpc/perf/core-fsl-emb.c index 4acaea01fe03..e9fe904b6538 100644 --- a/arch/powerpc/perf/core-fsl-emb.c +++ b/arch/powerpc/perf/core-fsl-emb.c @@ -389,6 +389,7 @@ static void fsl_emb_pmu_del(struct perf_event *event, int flags) static void fsl_emb_pmu_start(struct perf_event *event, int ef_flags) { unsigned long flags; + unsigned long val; s64 left; if (event->hw.idx < 0 || !event->hw.sample_period) @@ -405,7 +406,10 @@ static void fsl_emb_pmu_start(struct perf_event *event, int ef_flags) event->hw.state = 0; left = local64_read(&event->hw.period_left); - write_pmc(event->hw.idx, left); + val = 0; + if (left < 0x80000000L) + val = 0x80000000L - left; + write_pmc(event->hw.idx, val); perf_event_update_userpage(event); perf_pmu_enable(event->pmu); -- cgit v1.2.3 From 0d7d9b3a456aa8670cd9552f69529d57716f3122 Mon Sep 17 00:00:00 2001 From: Alexandru-Cezar Sardan Date: Thu, 26 Jun 2014 11:58:58 +0300 Subject: perf/powerpc: reset event hw state when adding it to the PMU When adding an event to the PMU with PERF_EF_START the STOPPED and UPTODATE flags need to be cleared in the hw.event status variable because they are preventing the update of the event count on overflow interrupt. Signed-off-by: Alexandru-Cezar Sardan Signed-off-by: Scott Wood --- arch/powerpc/perf/core-fsl-emb.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/perf') diff --git a/arch/powerpc/perf/core-fsl-emb.c b/arch/powerpc/perf/core-fsl-emb.c index e9fe904b6538..5d747b4cb8ee 100644 --- a/arch/powerpc/perf/core-fsl-emb.c +++ b/arch/powerpc/perf/core-fsl-emb.c @@ -330,9 +330,11 @@ static int fsl_emb_pmu_add(struct perf_event *event, int flags) } local64_set(&event->hw.prev_count, val); - if (!(flags & PERF_EF_START)) { + if (unlikely(!(flags & PERF_EF_START))) { event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE; val = 0; + } else { + event->hw.state &= ~(PERF_HES_STOPPED | PERF_HES_UPTODATE); } write_pmc(i, val); -- cgit v1.2.3 From e08e52824e41fb42e46593450d378ad1b33caedb Mon Sep 17 00:00:00 2001 From: "sukadev@linux.vnet.ibm.com" Date: Fri, 30 Jan 2015 13:45:59 -0800 Subject: perf: define EVENT_DEFINE_RANGE_FORMAT_LITE helper Define a lite version of the EVENT_DEFINE_RANGE_FORMAT() that avoids defining helper functions for the bit-field ranges. Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Michael Ellerman --- arch/powerpc/perf/hv-common.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch/powerpc/perf') diff --git a/arch/powerpc/perf/hv-common.h b/arch/powerpc/perf/hv-common.h index 5d79cecbd73d..349aaba4d2d1 100644 --- a/arch/powerpc/perf/hv-common.h +++ b/arch/powerpc/perf/hv-common.h @@ -20,6 +20,16 @@ unsigned long hv_perf_caps_get(struct hv_perf_caps *caps); PMU_FORMAT_ATTR(name, #attr_var ":" #bit_start "-" #bit_end); \ EVENT_DEFINE_RANGE(name, attr_var, bit_start, bit_end) +/* + * The EVENT_DEFINE_RANGE_FORMAT() macro above includes helper functions + * for the fields (eg: event_get_starting_index()). For some fields we + * need the bit-range definition, but no the helper functions. Define a + * lite version of the above macro without the helpers and silence + * compiler warnings unused static functions. + */ +#define EVENT_DEFINE_RANGE_FORMAT_LITE(name, attr_var, bit_start, bit_end) \ +PMU_FORMAT_ATTR(name, #attr_var ":" #bit_start "-" #bit_end); + #define EVENT_DEFINE_RANGE(name, attr_var, bit_start, bit_end) \ static u64 event_get_##name##_max(void) \ { \ -- cgit v1.2.3 From 5c5cd7b502595f6b90509b8aa4bba6f81b69315c Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Fri, 30 Jan 2015 13:46:00 -0800 Subject: powerpc/perf/hv-24x7: parse catalog and populate sysfs with events Retrieves and parses the 24x7 catalog on POWER systems that supply it (right now, only POWER 8). Events are exposed via sysfs in the standard fashion, and are all parameterized. $ cd /sys/bus/event_source/devices/hv_24x7/events $ cat HPM_CS_FROM_L4_LDATA__PHYS_CORE domain=0x2,offset=0xd58,core=?,lpar=0x0 $ cat HPM_TLBIE__VCPU_HOME_CHIP domain=0x4,offset=0x358,vcpu=?,lpar=? where user is required to specify values for the fields with '?' (like core, vcpu, lpar above), when specifying the event with the perf tool. Catalog is (at the moment) only parsed on boot. It needs re-parsing when a some hypervisor events occur. At that point we'll also need to prevent old events from continuing to function (counter that is passed in via spare space in the config values?). Signed-off-by: Cody P Schafer Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Michael Ellerman --- arch/powerpc/perf/hv-24x7-catalog.h | 25 ++ arch/powerpc/perf/hv-24x7-domains.h | 28 ++ arch/powerpc/perf/hv-24x7.c | 793 +++++++++++++++++++++++++++++++++++- arch/powerpc/perf/hv-24x7.h | 12 +- 4 files changed, 841 insertions(+), 17 deletions(-) create mode 100644 arch/powerpc/perf/hv-24x7-domains.h (limited to 'arch/powerpc/perf') diff --git a/arch/powerpc/perf/hv-24x7-catalog.h b/arch/powerpc/perf/hv-24x7-catalog.h index 21b19dd86d9c..69e2e1faf902 100644 --- a/arch/powerpc/perf/hv-24x7-catalog.h +++ b/arch/powerpc/perf/hv-24x7-catalog.h @@ -30,4 +30,29 @@ struct hv_24x7_catalog_page_0 { __u8 reserved6[2]; } __packed; +struct hv_24x7_event_data { + __be16 length; /* in bytes, must be a multiple of 16 */ + __u8 reserved1[2]; + __u8 domain; /* Chip = 1, Core = 2 */ + __u8 reserved2[1]; + __be16 event_group_record_offs; /* in bytes, must be 8 byte aligned */ + __be16 event_group_record_len; /* in bytes */ + + /* in bytes, offset from event_group_record */ + __be16 event_counter_offs; + + /* verified_state, unverified_state, caveat_state, broken_state, ... */ + __be32 flags; + + __be16 primary_group_ix; + __be16 group_count; + __be16 event_name_len; + __u8 remainder[]; + /* __u8 event_name[event_name_len - 2]; */ + /* __be16 event_description_len; */ + /* __u8 event_desc[event_description_len - 2]; */ + /* __be16 detailed_desc_len; */ + /* __u8 detailed_desc[detailed_desc_len - 2]; */ +} __packed; + #endif diff --git a/arch/powerpc/perf/hv-24x7-domains.h b/arch/powerpc/perf/hv-24x7-domains.h new file mode 100644 index 000000000000..49c1efd50045 --- /dev/null +++ b/arch/powerpc/perf/hv-24x7-domains.h @@ -0,0 +1,28 @@ + +/* + * DOMAIN(name, num, index_kind, is_physical) + * + * @name: An all caps token, suitable for use in generating an enum + * member and appending to an event name in sysfs. + * + * @num: The number corresponding to the domain as given in + * documentation. We assume the catalog domain and the hcall + * domain have the same numbering (so far they do), but this + * may need to be changed in the future. + * + * @index_kind: A stringifiable token describing the meaning of the index + * within the given domain. Must fit the parsing rules of the + * perf sysfs api. + * + * @is_physical: True if the domain is physical, false otherwise (if virtual). + * + * Note: The terms PHYS_CHIP, PHYS_CORE, VCPU correspond to physical chip, + * physical core and virtual processor in 24x7 Counters specifications. + */ + +DOMAIN(PHYS_CHIP, 0x01, chip, true) +DOMAIN(PHYS_CORE, 0x02, core, true) +DOMAIN(VCPU_HOME_CORE, 0x03, vcpu, false) +DOMAIN(VCPU_HOME_CHIP, 0x04, vcpu, false) +DOMAIN(VCPU_HOME_NODE, 0x05, vcpu, false) +DOMAIN(VCPU_REMOTE_NODE, 0x06, vcpu, false) diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index f162d0b8eea3..9445a824819e 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -13,16 +13,66 @@ #define pr_fmt(fmt) "hv-24x7: " fmt #include +#include #include #include +#include + #include #include #include +#include #include "hv-24x7.h" #include "hv-24x7-catalog.h" #include "hv-common.h" +static const char *event_domain_suffix(unsigned domain) +{ + switch (domain) { +#define DOMAIN(n, v, x, c) \ + case HV_PERF_DOMAIN_##n: \ + return "__" #n; +#include "hv-24x7-domains.h" +#undef DOMAIN + default: + WARN(1, "unknown domain %d\n", domain); + return "__UNKNOWN_DOMAIN_SUFFIX"; + } +} + +static bool domain_is_valid(unsigned domain) +{ + switch (domain) { +#define DOMAIN(n, v, x, c) \ + case HV_PERF_DOMAIN_##n: \ + /* fall through */ +#include "hv-24x7-domains.h" +#undef DOMAIN + return true; + default: + return false; + } +} + +static bool is_physical_domain(unsigned domain) +{ + switch (domain) { +#define DOMAIN(n, v, x, c) \ + case HV_PERF_DOMAIN_##n: \ + return c; +#include "hv-24x7-domains.h" +#undef DOMAIN + default: + return false; + } +} + +static bool catalog_entry_domain_is_valid(unsigned domain) +{ + return is_physical_domain(domain); +} + /* * TODO: Merging events: * - Think of the hcall as an interface to a 4d array of counters: @@ -44,13 +94,14 @@ /* * Example usage: - * perf stat -e 'hv_24x7/domain=2,offset=8,starting_index=0,lpar=0xffffffff/' + * perf stat -e 'hv_24x7/domain=2,offset=8,vcpu=0,lpar=0xffffffff/' */ /* u3 0-6, one of HV_24X7_PERF_DOMAIN */ EVENT_DEFINE_RANGE_FORMAT(domain, config, 0, 3); /* u16 */ -EVENT_DEFINE_RANGE_FORMAT(starting_index, config, 16, 31); +EVENT_DEFINE_RANGE_FORMAT(core, config, 16, 31); +EVENT_DEFINE_RANGE_FORMAT(vcpu, config, 16, 31); /* u32, see "data_offset" */ EVENT_DEFINE_RANGE_FORMAT(offset, config, 32, 63); /* u16 */ @@ -63,7 +114,8 @@ EVENT_DEFINE_RANGE(reserved3, config2, 0, 63); static struct attribute *format_attrs[] = { &format_attr_domain.attr, &format_attr_offset.attr, - &format_attr_starting_index.attr, + &format_attr_core.attr, + &format_attr_vcpu.attr, &format_attr_lpar.attr, NULL, }; @@ -73,8 +125,115 @@ static struct attribute_group format_group = { .attrs = format_attrs, }; +static struct attribute_group event_group = { + .name = "events", + /* .attrs is set in init */ +}; + +static struct attribute_group event_desc_group = { + .name = "event_descs", + /* .attrs is set in init */ +}; + +static struct attribute_group event_long_desc_group = { + .name = "event_long_descs", + /* .attrs is set in init */ +}; + static struct kmem_cache *hv_page_cache; +static char *event_name(struct hv_24x7_event_data *ev, int *len) +{ + *len = be16_to_cpu(ev->event_name_len) - 2; + return (char *)ev->remainder; +} + +static char *event_desc(struct hv_24x7_event_data *ev, int *len) +{ + unsigned nl = be16_to_cpu(ev->event_name_len); + __be16 *desc_len = (__be16 *)(ev->remainder + nl - 2); + *len = be16_to_cpu(*desc_len) - 2; + return (char *)ev->remainder + nl; +} + +static char *event_long_desc(struct hv_24x7_event_data *ev, int *len) +{ + unsigned nl = be16_to_cpu(ev->event_name_len); + __be16 *desc_len_ = (__be16 *)(ev->remainder + nl - 2); + unsigned desc_len = be16_to_cpu(*desc_len_); + __be16 *long_desc_len = (__be16 *)(ev->remainder + nl + desc_len - 2); + *len = be16_to_cpu(*long_desc_len) - 2; + return (char *)ev->remainder + nl + desc_len; +} + +static bool event_fixed_portion_is_within(struct hv_24x7_event_data *ev, + void *end) +{ + void *start = ev; + + return (start + offsetof(struct hv_24x7_event_data, remainder)) < end; +} + +/* + * Things we don't check: + * - padding for desc, name, and long/detailed desc is required to be '\0' + * bytes. + * + * Return NULL if we pass end, + * Otherwise return the address of the byte just following the event. + */ +static void *event_end(struct hv_24x7_event_data *ev, void *end) +{ + void *start = ev; + __be16 *dl_, *ldl_; + unsigned dl, ldl; + unsigned nl = be16_to_cpu(ev->event_name_len); + + if (nl < 2) { + pr_debug("%s: name length too short: %d", __func__, nl); + return NULL; + } + + if (start + nl > end) { + pr_debug("%s: start=%p + nl=%u > end=%p", + __func__, start, nl, end); + return NULL; + } + + dl_ = (__be16 *)(ev->remainder + nl - 2); + if (!IS_ALIGNED((uintptr_t)dl_, 2)) + pr_warn("desc len not aligned %p", dl_); + dl = be16_to_cpu(*dl_); + if (dl < 2) { + pr_debug("%s: desc len too short: %d", __func__, dl); + return NULL; + } + + if (start + nl + dl > end) { + pr_debug("%s: (start=%p + nl=%u + dl=%u)=%p > end=%p", + __func__, start, nl, dl, start + nl + dl, end); + return NULL; + } + + ldl_ = (__be16 *)(ev->remainder + nl + dl - 2); + if (!IS_ALIGNED((uintptr_t)ldl_, 2)) + pr_warn("long desc len not aligned %p", ldl_); + ldl = be16_to_cpu(*ldl_); + if (ldl < 2) { + pr_debug("%s: long desc len too short (ldl=%u)", + __func__, ldl); + return NULL; + } + + if (start + nl + dl + ldl > end) { + pr_debug("%s: start=%p + nl=%u + dl=%u + ldl=%u > end=%p", + __func__, start, nl, dl, ldl, end); + return NULL; + } + + return start + nl + dl + ldl; +} + static unsigned long h_get_24x7_catalog_page_(unsigned long phys_4096, unsigned long version, unsigned long index) @@ -97,6 +256,609 @@ static unsigned long h_get_24x7_catalog_page(char page[], version, index); } +static unsigned core_domains[] = { + HV_PERF_DOMAIN_PHYS_CORE, + HV_PERF_DOMAIN_VCPU_HOME_CORE, + HV_PERF_DOMAIN_VCPU_HOME_CHIP, + HV_PERF_DOMAIN_VCPU_HOME_NODE, + HV_PERF_DOMAIN_VCPU_REMOTE_NODE, +}; +/* chip event data always yeilds a single event, core yeilds multiple */ +#define MAX_EVENTS_PER_EVENT_DATA ARRAY_SIZE(core_domains) + +static char *event_fmt(struct hv_24x7_event_data *event, unsigned domain) +{ + const char *sindex; + const char *lpar; + + if (is_physical_domain(domain)) { + lpar = "0x0"; + sindex = "core"; + } else { + lpar = "?"; + sindex = "vcpu"; + } + + return kasprintf(GFP_KERNEL, + "domain=0x%x,offset=0x%x,%s=?,lpar=%s", + domain, + be16_to_cpu(event->event_counter_offs) + + be16_to_cpu(event->event_group_record_offs), + sindex, + lpar); +} + +/* Avoid trusting fw to NUL terminate strings */ +static char *memdup_to_str(char *maybe_str, int max_len, gfp_t gfp) +{ + return kasprintf(gfp, "%.*s", max_len, maybe_str); +} + +static ssize_t device_show_string(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct dev_ext_attribute *d; + + d = container_of(attr, struct dev_ext_attribute, attr); + return sprintf(buf, "%s\n", (char *)d->var); +} + +static struct attribute *device_str_attr_create_(char *name, char *str) +{ + struct dev_ext_attribute *attr = kzalloc(sizeof(*attr), GFP_KERNEL); + + if (!attr) + return NULL; + + attr->var = str; + attr->attr.attr.name = name; + attr->attr.attr.mode = 0444; + attr->attr.show = device_show_string; + return &attr->attr.attr; +} + +static struct attribute *device_str_attr_create(char *name, int name_max, + int name_nonce, + char *str, size_t str_max) +{ + char *n; + char *s = memdup_to_str(str, str_max, GFP_KERNEL); + struct attribute *a; + + if (!s) + return NULL; + + if (!name_nonce) + n = kasprintf(GFP_KERNEL, "%.*s", name_max, name); + else + n = kasprintf(GFP_KERNEL, "%.*s__%d", name_max, name, + name_nonce); + if (!n) + goto out_s; + + a = device_str_attr_create_(n, s); + if (!a) + goto out_n; + + return a; +out_n: + kfree(n); +out_s: + kfree(s); + return NULL; +} + +static void device_str_attr_destroy(struct attribute *attr) +{ + struct dev_ext_attribute *d; + + d = container_of(attr, struct dev_ext_attribute, attr.attr); + kfree(d->var); + kfree(d->attr.attr.name); + kfree(d); +} + +static struct attribute *event_to_attr(unsigned ix, + struct hv_24x7_event_data *event, + unsigned domain, + int nonce) +{ + int event_name_len; + char *ev_name, *a_ev_name, *val; + const char *ev_suffix; + struct attribute *attr; + + if (!domain_is_valid(domain)) { + pr_warn("catalog event %u has invalid domain %u\n", + ix, domain); + return NULL; + } + + val = event_fmt(event, domain); + if (!val) + return NULL; + + ev_suffix = event_domain_suffix(domain); + ev_name = event_name(event, &event_name_len); + if (!nonce) + a_ev_name = kasprintf(GFP_KERNEL, "%.*s%s", + (int)event_name_len, ev_name, ev_suffix); + else + a_ev_name = kasprintf(GFP_KERNEL, "%.*s%s__%d", + (int)event_name_len, ev_name, ev_suffix, nonce); + + + if (!a_ev_name) + goto out_val; + + attr = device_str_attr_create_(a_ev_name, val); + if (!attr) + goto out_name; + + return attr; +out_name: + kfree(a_ev_name); +out_val: + kfree(val); + return NULL; +} + +static struct attribute *event_to_desc_attr(struct hv_24x7_event_data *event, + int nonce) +{ + int nl, dl; + char *name = event_name(event, &nl); + char *desc = event_desc(event, &dl); + + /* If there isn't a description, don't create the sysfs file */ + if (!dl) + return NULL; + + return device_str_attr_create(name, nl, nonce, desc, dl); +} + +static struct attribute * +event_to_long_desc_attr(struct hv_24x7_event_data *event, int nonce) +{ + int nl, dl; + char *name = event_name(event, &nl); + char *desc = event_long_desc(event, &dl); + + /* If there isn't a description, don't create the sysfs file */ + if (!dl) + return NULL; + + return device_str_attr_create(name, nl, nonce, desc, dl); +} + +static ssize_t event_data_to_attrs(unsigned ix, struct attribute **attrs, + struct hv_24x7_event_data *event, int nonce) +{ + unsigned i; + + switch (event->domain) { + case HV_PERF_DOMAIN_PHYS_CHIP: + *attrs = event_to_attr(ix, event, event->domain, nonce); + return 1; + case HV_PERF_DOMAIN_PHYS_CORE: + for (i = 0; i < ARRAY_SIZE(core_domains); i++) { + attrs[i] = event_to_attr(ix, event, core_domains[i], + nonce); + if (!attrs[i]) { + pr_warn("catalog event %u: individual attr %u " + "creation failure\n", ix, i); + for (; i; i--) + device_str_attr_destroy(attrs[i - 1]); + return -1; + } + } + return i; + default: + pr_warn("catalog event %u: domain %u is not allowed in the " + "catalog\n", ix, event->domain); + return -1; + } +} + +static size_t event_to_attr_ct(struct hv_24x7_event_data *event) +{ + switch (event->domain) { + case HV_PERF_DOMAIN_PHYS_CHIP: + return 1; + case HV_PERF_DOMAIN_PHYS_CORE: + return ARRAY_SIZE(core_domains); + default: + return 0; + } +} + +static unsigned long vmalloc_to_phys(void *v) +{ + struct page *p = vmalloc_to_page(v); + + BUG_ON(!p); + return page_to_phys(p) + offset_in_page(v); +} + +/* */ +struct event_uniq { + struct rb_node node; + const char *name; + int nl; + unsigned ct; + unsigned domain; +}; + +static int memord(const void *d1, size_t s1, const void *d2, size_t s2) +{ + if (s1 < s2) + return 1; + if (s2 > s1) + return -1; + + return memcmp(d1, d2, s1); +} + +static int ev_uniq_ord(const void *v1, size_t s1, unsigned d1, const void *v2, + size_t s2, unsigned d2) +{ + int r = memord(v1, s1, v2, s2); + + if (r) + return r; + if (d1 > d2) + return 1; + if (d2 > d1) + return -1; + return 0; +} + +static int event_uniq_add(struct rb_root *root, const char *name, int nl, + unsigned domain) +{ + struct rb_node **new = &(root->rb_node), *parent = NULL; + struct event_uniq *data; + + /* Figure out where to put new node */ + while (*new) { + struct event_uniq *it; + int result; + + it = container_of(*new, struct event_uniq, node); + result = ev_uniq_ord(name, nl, domain, it->name, it->nl, + it->domain); + + parent = *new; + if (result < 0) + new = &((*new)->rb_left); + else if (result > 0) + new = &((*new)->rb_right); + else { + it->ct++; + pr_info("found a duplicate event %.*s, ct=%u\n", nl, + name, it->ct); + return it->ct; + } + } + + data = kmalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + *data = (struct event_uniq) { + .name = name, + .nl = nl, + .ct = 0, + .domain = domain, + }; + + /* Add new node and rebalance tree. */ + rb_link_node(&data->node, parent, new); + rb_insert_color(&data->node, root); + + /* data->ct */ + return 0; +} + +static void event_uniq_destroy(struct rb_root *root) +{ + /* + * the strings we point to are in the giant block of memory filled by + * the catalog, and are freed separately. + */ + struct event_uniq *pos, *n; + + rbtree_postorder_for_each_entry_safe(pos, n, root, node) + kfree(pos); +} + + +/* + * ensure the event structure's sizes are self consistent and don't cause us to + * read outside of the event + * + * On success, return the event length in bytes. + * Otherwise, return -1 (and print as appropriate). + */ +static ssize_t catalog_event_len_validate(struct hv_24x7_event_data *event, + size_t event_idx, + size_t event_data_bytes, + size_t event_entry_count, + size_t offset, void *end) +{ + ssize_t ev_len; + void *ev_end, *calc_ev_end; + + if (offset >= event_data_bytes) + return -1; + + if (event_idx >= event_entry_count) { + pr_devel("catalog event data has %zu bytes of padding after last event\n", + event_data_bytes - offset); + return -1; + } + + if (!event_fixed_portion_is_within(event, end)) { + pr_warn("event %zu fixed portion is not within range\n", + event_idx); + return -1; + } + + ev_len = be16_to_cpu(event->length); + + if (ev_len % 16) + pr_info("event %zu has length %zu not divisible by 16: event=%pK\n", + event_idx, ev_len, event); + + ev_end = (__u8 *)event + ev_len; + if (ev_end > end) { + pr_warn("event %zu has .length=%zu, ends after buffer end: ev_end=%pK > end=%pK, offset=%zu\n", + event_idx, ev_len, ev_end, end, + offset); + return -1; + } + + calc_ev_end = event_end(event, end); + if (!calc_ev_end) { + pr_warn("event %zu has a calculated length which exceeds buffer length %zu: event=%pK end=%pK, offset=%zu\n", + event_idx, event_data_bytes, event, end, + offset); + return -1; + } + + if (calc_ev_end > ev_end) { + pr_warn("event %zu exceeds it's own length: event=%pK, end=%pK, offset=%zu, calc_ev_end=%pK\n", + event_idx, event, ev_end, offset, calc_ev_end); + return -1; + } + + return ev_len; +} + +#define MAX_4K (SIZE_MAX / 4096) + +static void create_events_from_catalog(struct attribute ***events_, + struct attribute ***event_descs_, + struct attribute ***event_long_descs_) +{ + unsigned long hret; + size_t catalog_len, catalog_page_len, event_entry_count, + event_data_len, event_data_offs, + event_data_bytes, junk_events, event_idx, event_attr_ct, i, + attr_max, event_idx_last, desc_ct, long_desc_ct; + ssize_t ct, ev_len; + uint32_t catalog_version_num; + struct attribute **events, **event_descs, **event_long_descs; + struct hv_24x7_catalog_page_0 *page_0 = + kmem_cache_alloc(hv_page_cache, GFP_KERNEL); + void *page = page_0; + void *event_data, *end; + struct hv_24x7_event_data *event; + struct rb_root ev_uniq = RB_ROOT; + + if (!page) + goto e_out; + + hret = h_get_24x7_catalog_page(page, 0, 0); + if (hret) + goto e_free; + + catalog_version_num = be64_to_cpu(page_0->version); + catalog_page_len = be32_to_cpu(page_0->length); + + if (MAX_4K < catalog_page_len) { + pr_err("invalid page count: %zu\n", catalog_page_len); + goto e_free; + } + + catalog_len = catalog_page_len * 4096; + + event_entry_count = be16_to_cpu(page_0->event_entry_count); + event_data_offs = be16_to_cpu(page_0->event_data_offs); + event_data_len = be16_to_cpu(page_0->event_data_len); + + pr_devel("cv %zu cl %zu eec %zu edo %zu edl %zu\n", + (size_t)catalog_version_num, catalog_len, + event_entry_count, event_data_offs, event_data_len); + + if ((MAX_4K < event_data_len) + || (MAX_4K < event_data_offs) + || (MAX_4K - event_data_offs < event_data_len)) { + pr_err("invalid event data offs %zu and/or len %zu\n", + event_data_offs, event_data_len); + goto e_free; + } + + if ((event_data_offs + event_data_len) > catalog_page_len) { + pr_err("event data %zu-%zu does not fit inside catalog 0-%zu\n", + event_data_offs, + event_data_offs + event_data_len, + catalog_page_len); + goto e_free; + } + + if (SIZE_MAX / MAX_EVENTS_PER_EVENT_DATA - 1 < event_entry_count) { + pr_err("event_entry_count %zu is invalid\n", + event_entry_count); + goto e_free; + } + + event_data_bytes = event_data_len * 4096; + + /* + * event data can span several pages, events can cross between these + * pages. Use vmalloc to make this easier. + */ + event_data = vmalloc(event_data_bytes); + if (!event_data) { + pr_err("could not allocate event data\n"); + goto e_free; + } + + end = event_data + event_data_bytes; + + /* + * using vmalloc_to_phys() like this only works if PAGE_SIZE is + * divisible by 4096 + */ + BUILD_BUG_ON(PAGE_SIZE % 4096); + + for (i = 0; i < event_data_len; i++) { + hret = h_get_24x7_catalog_page_( + vmalloc_to_phys(event_data + i * 4096), + catalog_version_num, + i + event_data_offs); + if (hret) { + pr_err("failed to get event data in page %zu\n", + i + event_data_offs); + goto e_event_data; + } + } + + /* + * scan the catalog to determine the number of attributes we need, and + * verify it at the same time. + */ + for (junk_events = 0, event = event_data, event_idx = 0, attr_max = 0; + ; + event_idx++, event = (void *)event + ev_len) { + size_t offset = (void *)event - (void *)event_data; + char *name; + int nl; + + ev_len = catalog_event_len_validate(event, event_idx, + event_data_bytes, + event_entry_count, + offset, end); + if (ev_len < 0) + break; + + name = event_name(event, &nl); + + if (event->event_group_record_len == 0) { + pr_devel("invalid event %zu (%.*s): group_record_len == 0, skipping\n", + event_idx, nl, name); + junk_events++; + continue; + } + + if (!catalog_entry_domain_is_valid(event->domain)) { + pr_info("event %zu (%.*s) has invalid domain %d\n", + event_idx, nl, name, event->domain); + junk_events++; + continue; + } + + attr_max += event_to_attr_ct(event); + } + + event_idx_last = event_idx; + if (event_idx_last != event_entry_count) + pr_warn("event buffer ended before listed # of events were parsed (got %zu, wanted %zu, junk %zu)\n", + event_idx_last, event_entry_count, junk_events); + + events = kmalloc_array(attr_max + 1, sizeof(*events), GFP_KERNEL); + if (!events) + goto e_event_data; + + event_descs = kmalloc_array(event_idx + 1, sizeof(*event_descs), + GFP_KERNEL); + if (!event_descs) + goto e_event_attrs; + + event_long_descs = kmalloc_array(event_idx + 1, + sizeof(*event_long_descs), GFP_KERNEL); + if (!event_long_descs) + goto e_event_descs; + + /* Iterate over the catalog filling in the attribute vector */ + for (junk_events = 0, event_attr_ct = 0, desc_ct = 0, long_desc_ct = 0, + event = event_data, event_idx = 0; + event_idx < event_idx_last; + event_idx++, ev_len = be16_to_cpu(event->length), + event = (void *)event + ev_len) { + char *name; + int nl; + int nonce; + /* + * these are the only "bad" events that are intermixed and that + * we can ignore without issue. make sure to skip them here + */ + if (event->event_group_record_len == 0) + continue; + if (!catalog_entry_domain_is_valid(event->domain)) + continue; + + name = event_name(event, &nl); + nonce = event_uniq_add(&ev_uniq, name, nl, event->domain); + ct = event_data_to_attrs(event_idx, events + event_attr_ct, + event, nonce); + if (ct <= 0) { + pr_warn("event %zu (%.*s) creation failure, skipping\n", + event_idx, nl, name); + junk_events++; + } else { + event_attr_ct += ct; + event_descs[desc_ct] = event_to_desc_attr(event, nonce); + if (event_descs[desc_ct]) + desc_ct++; + event_long_descs[long_desc_ct] = + event_to_long_desc_attr(event, nonce); + if (event_long_descs[long_desc_ct]) + long_desc_ct++; + } + } + + pr_info("read %zu catalog entries, created %zu event attrs (%zu failures), %zu descs\n", + event_idx, event_attr_ct, junk_events, desc_ct); + + events[event_attr_ct] = NULL; + event_descs[desc_ct] = NULL; + event_long_descs[long_desc_ct] = NULL; + + event_uniq_destroy(&ev_uniq); + vfree(event_data); + kmem_cache_free(hv_page_cache, page); + + *events_ = events; + *event_descs_ = event_descs; + *event_long_descs_ = event_long_descs; + return; + +e_event_descs: + kfree(event_descs); +e_event_attrs: + kfree(events); +e_event_data: + vfree(event_data); +e_free: + kmem_cache_free(hv_page_cache, page); +e_out: + *events_ = NULL; + *event_descs_ = NULL; + *event_long_descs_ = NULL; +} + static ssize_t catalog_read(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t offset, size_t count) @@ -207,16 +969,13 @@ static struct attribute_group if_group = { static const struct attribute_group *attr_groups[] = { &format_group, + &event_group, + &event_desc_group, + &event_long_desc_group, &if_group, NULL, }; -static bool is_physical_domain(int domain) -{ - return domain == HV_24X7_PERF_DOMAIN_PHYSICAL_CHIP || - domain == HV_24X7_PERF_DOMAIN_PHYSICAL_CORE; -} - DEFINE_PER_CPU(char, hv_24x7_reqb[4096]) __aligned(4096); DEFINE_PER_CPU(char, hv_24x7_resb[4096]) __aligned(4096); @@ -291,9 +1050,17 @@ out: static unsigned long event_24x7_request(struct perf_event *event, u64 *res, bool success_expected) { + u16 idx; + unsigned domain = event_get_domain(event); + + if (is_physical_domain(domain)) + idx = event_get_core(event); + else + idx = event_get_vcpu(event); + return single_24x7_request(event_get_domain(event), event_get_offset(event), - event_get_starting_index(event), + idx, event_get_lpar(event), res, success_expected); @@ -356,7 +1123,7 @@ static int h_24x7_event_init(struct perf_event *event) return -EIO; } - /* PHYSICAL domains & other lpars require extra capabilities */ + /* Physical domains & other lpars require extra capabilities */ if (!caps.collect_privileged && (is_physical_domain(domain) || (event_get_lpar(event) != event_get_lpar_max()))) { pr_devel("hv permisions disallow: is_physical_domain:%d, lpar=0x%llx\n", @@ -452,6 +1219,10 @@ static int hv_24x7_init(void) /* sampling not supported */ h_24x7_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; + create_events_from_catalog(&event_group.attrs, + &event_desc_group.attrs, + &event_long_desc_group.attrs); + r = perf_pmu_register(&h_24x7_pmu, h_24x7_pmu.name, -1); if (r) return r; diff --git a/arch/powerpc/perf/hv-24x7.h b/arch/powerpc/perf/hv-24x7.h index 720ebce4b435..69cd4e690f58 100644 --- a/arch/powerpc/perf/hv-24x7.h +++ b/arch/powerpc/perf/hv-24x7.h @@ -3,14 +3,14 @@ #include +enum hv_perf_domains { +#define DOMAIN(n, v, x, c) HV_PERF_DOMAIN_##n = v, +#include "hv-24x7-domains.h" +#undef DOMAIN +}; + struct hv_24x7_request { /* PHYSICAL domains require enabling via phyp/hmc. */ -#define HV_24X7_PERF_DOMAIN_PHYSICAL_CHIP 0x01 -#define HV_24X7_PERF_DOMAIN_PHYSICAL_CORE 0x02 -#define HV_24X7_PERF_DOMAIN_VIRTUAL_PROCESSOR_HOME_CORE 0x03 -#define HV_24X7_PERF_DOMAIN_VIRTUAL_PROCESSOR_HOME_CHIP 0x04 -#define HV_24X7_PERF_DOMAIN_VIRTUAL_PROCESSOR_HOME_NODE 0x05 -#define HV_24X7_PERF_DOMAIN_VIRTUAL_PROCESSOR_REMOTE_NODE 0x06 __u8 performance_domain; __u8 reserved[0x1]; -- cgit v1.2.3 From 9e9f60108423f18a99c9cc93ef7f23490ecc709b Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Fri, 30 Jan 2015 13:46:01 -0800 Subject: powerpc/perf/{hv-gpci, hv-common}: generate requests with counters annotated This adds (in req-gen/) a framework for defining gpci counter requests. It uses macro magic similar to ftrace. Also convert the existing hv-gpci request structures and enum values to use the new framework (and adjust old users of the structs and enum values to cope with changes in naming). In exchange for this macro disaster, we get autogenerated event listing for GPCI in sysfs, build time field offset checking, and zero duplication of information about GPCI requests. Signed-off-by: Cody P Schafer Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Michael Ellerman --- arch/powerpc/perf/hv-common.c | 10 +- arch/powerpc/perf/hv-gpci-requests.h | 76 ++++++++++++++ arch/powerpc/perf/hv-gpci.c | 23 +++++ arch/powerpc/perf/hv-gpci.h | 37 +++---- arch/powerpc/perf/req-gen/_begin.h | 13 +++ arch/powerpc/perf/req-gen/_clear.h | 5 + arch/powerpc/perf/req-gen/_end.h | 4 + arch/powerpc/perf/req-gen/_request-begin.h | 15 +++ arch/powerpc/perf/req-gen/_request-end.h | 8 ++ arch/powerpc/perf/req-gen/perf.h | 155 +++++++++++++++++++++++++++++ 10 files changed, 316 insertions(+), 30 deletions(-) create mode 100644 arch/powerpc/perf/hv-gpci-requests.h create mode 100644 arch/powerpc/perf/req-gen/_begin.h create mode 100644 arch/powerpc/perf/req-gen/_clear.h create mode 100644 arch/powerpc/perf/req-gen/_end.h create mode 100644 arch/powerpc/perf/req-gen/_request-begin.h create mode 100644 arch/powerpc/perf/req-gen/_request-end.h create mode 100644 arch/powerpc/perf/req-gen/perf.h (limited to 'arch/powerpc/perf') diff --git a/arch/powerpc/perf/hv-common.c b/arch/powerpc/perf/hv-common.c index 47e02b366f58..7dce8f109967 100644 --- a/arch/powerpc/perf/hv-common.c +++ b/arch/powerpc/perf/hv-common.c @@ -9,13 +9,13 @@ unsigned long hv_perf_caps_get(struct hv_perf_caps *caps) unsigned long r; struct p { struct hv_get_perf_counter_info_params params; - struct cv_system_performance_capabilities caps; + struct hv_gpci_system_performance_capabilities caps; } __packed __aligned(sizeof(uint64_t)); struct p arg = { .params = { .counter_request = cpu_to_be32( - CIR_SYSTEM_PERFORMANCE_CAPABILITIES), + HV_GPCI_system_performance_capabilities), .starting_index = cpu_to_be32(-1), .counter_info_version_in = 0, } @@ -31,9 +31,9 @@ unsigned long hv_perf_caps_get(struct hv_perf_caps *caps) caps->version = arg.params.counter_info_version_out; caps->collect_privileged = !!arg.caps.perf_collect_privileged; - caps->ga = !!(arg.caps.capability_mask & CV_CM_GA); - caps->expanded = !!(arg.caps.capability_mask & CV_CM_EXPANDED); - caps->lab = !!(arg.caps.capability_mask & CV_CM_LAB); + caps->ga = !!(arg.caps.capability_mask & HV_GPCI_CM_GA); + caps->expanded = !!(arg.caps.capability_mask & HV_GPCI_CM_EXPANDED); + caps->lab = !!(arg.caps.capability_mask & HV_GPCI_CM_LAB); return r; } diff --git a/arch/powerpc/perf/hv-gpci-requests.h b/arch/powerpc/perf/hv-gpci-requests.h new file mode 100644 index 000000000000..80085444b1a0 --- /dev/null +++ b/arch/powerpc/perf/hv-gpci-requests.h @@ -0,0 +1,76 @@ + +#include "req-gen/_begin.h" + +/* + * Based on the document "getPerfCountInfo v1.07" + */ + +/* + * #define REQUEST_NAME counter_request_name + * #define REQUEST_NUM r_num + * #define REQUEST_IDX_KIND starting_index_kind + * #include I(REQUEST_BEGIN) + * REQUEST( + * __field(...) + * __field(...) + * __array(...) + * __count(...) + * ) + * #include I(REQUEST_END) + * + * - starting_index_kind is one of the following, depending on the event: + * + * chip_id: hardware chip id or -1 for current hw chip + * phys_processor_idx: + * 0xffffffffffffffff: or -1, which means it is irrelavant for the event + * + * __count(offset, bytes, name): + * a counter that should be exposed via perf + * __field(offset, bytes, name) + * a normal field + * __array(offset, bytes, name) + * an array of bytes + * + * + * @bytes for __count, and __field _must_ be a numeral token + * in decimal, not an expression and not in hex. + * + * + * TODO: + * - expose secondary index (if any counter ever uses it, only 0xA0 + * appears to use it right now, and it doesn't have any counters) + * - embed versioning info + * - include counter descriptions + */ +#define REQUEST_NAME dispatch_timebase_by_processor +#define REQUEST_NUM 0x10 +#define REQUEST_IDX_KIND "phys_processor_idx=?" +#include I(REQUEST_BEGIN) +REQUEST(__count(0, 8, processor_time_in_timebase_cycles) + __field(0x8, 4, hw_processor_id) + __field(0xC, 2, owning_part_id) + __field(0xE, 1, processor_state) + __field(0xF, 1, version) + __field(0x10, 4, hw_chip_id) + __field(0x14, 4, phys_module_id) + __field(0x18, 4, primary_affinity_domain_idx) + __field(0x1C, 4, secondary_affinity_domain_idx) + __field(0x20, 4, processor_version) + __field(0x24, 2, logical_processor_idx) + __field(0x26, 2, reserved) + __field(0x28, 4, processor_id_register) + __field(0x2C, 4, phys_processor_idx) +) +#include I(REQUEST_END) + +#define REQUEST_NAME system_performance_capabilities +#define REQUEST_NUM 0x40 +#define REQUEST_IDX_KIND "starting_index=0xffffffffffffffff" +#include I(REQUEST_BEGIN) +REQUEST(__field(0, 1, perf_collect_privileged) + __field(0x1, 1, capability_mask) + __array(0x2, 0xE, reserved) +) +#include I(REQUEST_END) + +#include "req-gen/_end.h" diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c index a051fe946c63..856fe6e03c2a 100644 --- a/arch/powerpc/perf/hv-gpci.c +++ b/arch/powerpc/perf/hv-gpci.c @@ -31,7 +31,18 @@ /* u32 */ EVENT_DEFINE_RANGE_FORMAT(request, config, 0, 31); /* u32 */ +/* + * Note that starting_index, phys_processor_idx, sibling_part_id, + * hw_chip_id, partition_id all refer to the same bit range. They + * are basically aliases for the starting_index. The specific alias + * used depends on the event. See REQUEST_IDX_KIND in hv-gpci-requests.h + */ EVENT_DEFINE_RANGE_FORMAT(starting_index, config, 32, 63); +EVENT_DEFINE_RANGE_FORMAT_LITE(phys_processor_idx, config, 32, 63); +EVENT_DEFINE_RANGE_FORMAT_LITE(sibling_part_id, config, 32, 63); +EVENT_DEFINE_RANGE_FORMAT_LITE(hw_chip_id, config, 32, 63); +EVENT_DEFINE_RANGE_FORMAT_LITE(partition_id, config, 32, 63); + /* u16 */ EVENT_DEFINE_RANGE_FORMAT(secondary_index, config1, 0, 15); /* u8 */ @@ -44,6 +55,10 @@ EVENT_DEFINE_RANGE_FORMAT(offset, config1, 32, 63); static struct attribute *format_attrs[] = { &format_attr_request.attr, &format_attr_starting_index.attr, + &format_attr_phys_processor_idx.attr, + &format_attr_sibling_part_id.attr, + &format_attr_hw_chip_id.attr, + &format_attr_partition_id.attr, &format_attr_secondary_index.attr, &format_attr_counter_info_version.attr, @@ -57,6 +72,11 @@ static struct attribute_group format_group = { .attrs = format_attrs, }; +static struct attribute_group event_group = { + .name = "events", + .attrs = hv_gpci_event_attrs, +}; + #define HV_CAPS_ATTR(_name, _format) \ static ssize_t _name##_show(struct device *dev, \ struct device_attribute *attr, \ @@ -102,6 +122,7 @@ static struct attribute_group interface_group = { static const struct attribute_group *attr_groups[] = { &format_group, + &event_group, &interface_group, NULL, }; @@ -265,6 +286,8 @@ static int hv_gpci_init(void) unsigned long hret; struct hv_perf_caps caps; + hv_gpci_assert_offsets_correct(); + if (!firmware_has_feature(FW_FEATURE_LPAR)) { pr_debug("not a virtualized system, not enabling\n"); return -ENODEV; diff --git a/arch/powerpc/perf/hv-gpci.h b/arch/powerpc/perf/hv-gpci.h index b25f460c9cce..86ede8275961 100644 --- a/arch/powerpc/perf/hv-gpci.h +++ b/arch/powerpc/perf/hv-gpci.h @@ -42,32 +42,19 @@ struct hv_get_perf_counter_info_params { */ #define COUNTER_INFO_VERSION_CURRENT 0x8 -/* - * These determine the counter_value[] layout and the meaning of starting_index - * and secondary_index. - * - * Unless otherwise noted, @secondary_index is unused and ignored. - */ -enum counter_info_requests { - - /* GENERAL */ - - /* @starting_index: must be -1 (to refer to the current partition) - */ - CIR_SYSTEM_PERFORMANCE_CAPABILITIES = 0X40, +/* capability mask masks. */ +enum { + HV_GPCI_CM_GA = (1 << 7), + HV_GPCI_CM_EXPANDED = (1 << 6), + HV_GPCI_CM_LAB = (1 << 5) }; -struct cv_system_performance_capabilities { - /* If != 0, allowed to collect data from other partitions */ - __u8 perf_collect_privileged; - - /* These following are only valid if counter_info_version >= 0x3 */ -#define CV_CM_GA (1 << 7) -#define CV_CM_EXPANDED (1 << 6) -#define CV_CM_LAB (1 << 5) - /* remaining bits are reserved */ - __u8 capability_mask; - __u8 reserved[0xE]; -} __packed; +#define REQUEST_FILE "../hv-gpci-requests.h" +#define NAME_LOWER hv_gpci +#define NAME_UPPER HV_GPCI +#include "req-gen/perf.h" +#undef REQUEST_FILE +#undef NAME_LOWER +#undef NAME_UPPER #endif diff --git a/arch/powerpc/perf/req-gen/_begin.h b/arch/powerpc/perf/req-gen/_begin.h new file mode 100644 index 000000000000..acfb17a55c16 --- /dev/null +++ b/arch/powerpc/perf/req-gen/_begin.h @@ -0,0 +1,13 @@ +/* Include paths to be used in interface defining headers */ +#ifndef POWERPC_PERF_REQ_GEN_H_ +#define POWERPC_PERF_REQ_GEN_H_ + +#define CAT2_STR_(t, s) __stringify(t/s) +#define CAT2_STR(t, s) CAT2_STR_(t, s) +#define I(...) __VA_ARGS__ + +#endif + +#define REQ_GEN_PREFIX req-gen +#define REQUEST_BEGIN CAT2_STR(REQ_GEN_PREFIX, _request-begin.h) +#define REQUEST_END CAT2_STR(REQ_GEN_PREFIX, _request-end.h) diff --git a/arch/powerpc/perf/req-gen/_clear.h b/arch/powerpc/perf/req-gen/_clear.h new file mode 100644 index 000000000000..422974f89573 --- /dev/null +++ b/arch/powerpc/perf/req-gen/_clear.h @@ -0,0 +1,5 @@ + +#undef __field_ +#undef __count_ +#undef __array_ +#undef REQUEST_ diff --git a/arch/powerpc/perf/req-gen/_end.h b/arch/powerpc/perf/req-gen/_end.h new file mode 100644 index 000000000000..8a406980b6bf --- /dev/null +++ b/arch/powerpc/perf/req-gen/_end.h @@ -0,0 +1,4 @@ + +#undef REQ_GEN_PREFIX +#undef REQUEST_BEGIN +#undef REQUEST_END diff --git a/arch/powerpc/perf/req-gen/_request-begin.h b/arch/powerpc/perf/req-gen/_request-begin.h new file mode 100644 index 000000000000..f6d98642cf1d --- /dev/null +++ b/arch/powerpc/perf/req-gen/_request-begin.h @@ -0,0 +1,15 @@ + +#define REQUEST(r_contents) \ + REQUEST_(REQUEST_NAME, REQUEST_NUM, REQUEST_IDX_KIND, I(r_contents)) + +#define __field(f_offset, f_bytes, f_name) \ + __field_(REQUEST_NAME, REQUEST_NUM, REQUEST_IDX_KIND, \ + f_offset, f_bytes, f_name) + +#define __array(f_offset, f_bytes, f_name) \ + __array_(REQUEST_NAME, REQUEST_NUM, REQUEST_IDX_KIND, \ + f_offset, f_bytes, f_name) + +#define __count(f_offset, f_bytes, f_name) \ + __count_(REQUEST_NAME, REQUEST_NUM, REQUEST_IDX_KIND, \ + f_offset, f_bytes, f_name) diff --git a/arch/powerpc/perf/req-gen/_request-end.h b/arch/powerpc/perf/req-gen/_request-end.h new file mode 100644 index 000000000000..5573be6c3588 --- /dev/null +++ b/arch/powerpc/perf/req-gen/_request-end.h @@ -0,0 +1,8 @@ +#undef REQUEST +#undef __field +#undef __array +#undef __count + +#undef REQUEST_NAME +#undef REQUEST_NUM +#undef REQUEST_IDX_KIND diff --git a/arch/powerpc/perf/req-gen/perf.h b/arch/powerpc/perf/req-gen/perf.h new file mode 100644 index 000000000000..1b122469323d --- /dev/null +++ b/arch/powerpc/perf/req-gen/perf.h @@ -0,0 +1,155 @@ +#ifndef LINUX_POWERPC_PERF_REQ_GEN_PERF_H_ +#define LINUX_POWERPC_PERF_REQ_GEN_PERF_H_ + +#include + +#ifndef REQUEST_FILE +#error "REQUEST_FILE must be defined before including" +#endif + +#ifndef NAME_LOWER +#error "NAME_LOWER must be defined before including" +#endif + +#ifndef NAME_UPPER +#error "NAME_UPPER must be defined before including" +#endif + +#define BE_TYPE_b1 __u8 +#define BE_TYPE_b2 __be16 +#define BE_TYPE_b4 __be32 +#define BE_TYPE_b8 __be64 + +#define BYTES_TO_BE_TYPE(bytes) \ + BE_TYPE_b##bytes + +#define CAT2_(a, b) a ## b +#define CAT2(a, b) CAT2_(a, b) +#define CAT3_(a, b, c) a ## b ## c +#define CAT3(a, b, c) CAT3_(a, b, c) + +/* + * enumerate the request values as + * _ = + */ +#define REQUEST_VALUE__(name_upper, r_name) name_upper ## _ ## r_name +#define REQUEST_VALUE_(name_upper, r_name) REQUEST_VALUE__(name_upper, r_name) +#define REQUEST_VALUE(r_name) REQUEST_VALUE_(NAME_UPPER, r_name) + +#include "_clear.h" +#define REQUEST_(r_name, r_value, r_idx_1, r_fields) \ + REQUEST_VALUE(r_name) = r_value, +enum CAT2(NAME_LOWER, _requests) { +#include REQUEST_FILE +}; + +/* + * For each request: + * struct _ { + * r_fields + * }; + */ +#include "_clear.h" +#define STRUCT_NAME__(name_lower, r_name) name_lower ## _ ## r_name +#define STRUCT_NAME_(name_lower, r_name) STRUCT_NAME__(name_lower, r_name) +#define STRUCT_NAME(r_name) STRUCT_NAME_(NAME_LOWER, r_name) +#define REQUEST_(r_name, r_value, r_idx_1, r_fields) \ +struct STRUCT_NAME(r_name) { \ + r_fields \ +}; +#define __field_(r_name, r_value, r_idx_1, f_offset, f_bytes, f_name) \ + BYTES_TO_BE_TYPE(f_bytes) f_name; +#define __count_(r_name, r_value, r_idx_1, f_offset, f_bytes, f_name) \ + __field_(r_name, r_value, r_idx_1, f_offset, f_bytes, f_name) +#define __array_(r_name, r_value, r_idx_1, a_offset, a_bytes, a_name) \ + __u8 a_name[a_bytes]; + +#include REQUEST_FILE + +/* + * Generate a check of the field offsets + * _assert_offsets_correct() + */ +#include "_clear.h" +#define REQUEST_(r_name, r_value, index, r_fields) \ +r_fields +#define __field_(r_name, r_value, r_idx_1, f_offset, f_size, f_name) \ + BUILD_BUG_ON(offsetof(struct STRUCT_NAME(r_name), f_name) != f_offset); +#define __count_(r_name, r_value, r_idx_1, c_offset, c_size, c_name) \ + __field_(r_name, r_value, r_idx_1, c_offset, c_size, c_name) +#define __array_(r_name, r_value, r_idx_1, a_offset, a_size, a_name) \ + __field_(r_name, r_value, r_idx_1, a_offset, a_size, a_name) + +static inline void CAT2(NAME_LOWER, _assert_offsets_correct)(void) +{ +#include REQUEST_FILE +} + +/* + * Generate event attributes: + * PMU_EVENT_ATTR_STRING(_, + * _event_attr__, + * "request=" + * "starting_index=" + * "counter_info_version=CURRENT_COUNTER_INFO_VERSION" + * "length=" + * "offset=") + * + * TODO: counter_info_version may need to vary, we should interperate the + * value to some extent + */ +#define EVENT_ATTR_NAME__(name, r_name, c_name) \ + name ## _event_attr_ ## r_name ## _ ## c_name +#define EVENT_ATTR_NAME_(name, r_name, c_name) \ + EVENT_ATTR_NAME__(name, r_name, c_name) +#define EVENT_ATTR_NAME(r_name, c_name) \ + EVENT_ATTR_NAME_(NAME_LOWER, r_name, c_name) + +#include "_clear.h" +#define __field_(r_name, r_value, r_idx_1, f_offset, f_size, f_name) +#define __array_(r_name, r_value, r_idx_1, a_offset, a_size, a_name) +#define __count_(r_name, r_value, r_idx_1, c_offset, c_size, c_name) \ +PMU_EVENT_ATTR_STRING( \ + CAT3(r_name, _, c_name), \ + EVENT_ATTR_NAME(r_name, c_name), \ + "request=" __stringify(r_value) "," \ + r_idx_1 "," \ + "counter_info_version=" \ + __stringify(COUNTER_INFO_VERSION_CURRENT) "," \ + "length=" #c_size "," \ + "offset=" #c_offset) +#define REQUEST_(r_name, r_value, r_idx_1, r_fields) \ + r_fields + +#include REQUEST_FILE + +/* + * Define event attribute array + * static struct attribute *hv_gpci_event_attrs[] = { + * &_event_attr__.attr, + * }; + */ +#include "_clear.h" +#define __field_(r_name, r_value, r_idx_1, f_offset, f_size, f_name) +#define __count_(r_name, r_value, r_idx_1, c_offset, c_size, c_name) \ + &EVENT_ATTR_NAME(r_name, c_name).attr.attr, +#define __array_(r_name, r_value, r_idx_1, a_offset, a_size, a_name) +#define REQUEST_(r_name, r_value, r_idx_1, r_fields) \ + r_fields + +static __maybe_unused struct attribute *hv_gpci_event_attrs[] = { +#include REQUEST_FILE + NULL +}; + +/* cleanup */ +#include "_clear.h" +#undef EVENT_ATTR_NAME +#undef EVENT_ATTR_NAME_ +#undef BIT_NAME +#undef BIT_NAME_ +#undef STRUCT_NAME +#undef REQUEST_VALUE +#undef REQUEST_VALUE_ + +#endif -- cgit v1.2.3 From 97bf2640184f4fb2b2bf2c58ae3112768a6174fa Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Fri, 30 Jan 2015 13:46:02 -0800 Subject: powerpc/perf/hv-gpci: add the remaining gpci requests Add the remaining gpci requests that contain counters suitable for use by perf. Omit those that don't contain any counters (but note their ommision). Signed-off-by: Cody P Schafer Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Michael Ellerman --- arch/powerpc/perf/hv-gpci-requests.h | 187 ++++++++++++++++++++++++++++++++++- 1 file changed, 186 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/perf') diff --git a/arch/powerpc/perf/hv-gpci-requests.h b/arch/powerpc/perf/hv-gpci-requests.h index 80085444b1a0..acd17648cd18 100644 --- a/arch/powerpc/perf/hv-gpci-requests.h +++ b/arch/powerpc/perf/hv-gpci-requests.h @@ -20,7 +20,9 @@ * * - starting_index_kind is one of the following, depending on the event: * - * chip_id: hardware chip id or -1 for current hw chip + * hw_chip_id: hardware chip id or -1 for current hw chip + * partition_id + * sibling_part_id, * phys_processor_idx: * 0xffffffffffffffff: or -1, which means it is irrelavant for the event * @@ -63,6 +65,33 @@ REQUEST(__count(0, 8, processor_time_in_timebase_cycles) ) #include I(REQUEST_END) +#define REQUEST_NAME entitled_capped_uncapped_donated_idle_timebase_by_partition +#define REQUEST_NUM 0x20 +#define REQUEST_IDX_KIND "sibling_part_id=?" +#include I(REQUEST_BEGIN) +REQUEST(__field(0, 8, partition_id) + __count(0x8, 8, entitled_cycles) + __count(0x10, 8, consumed_capped_cycles) + __count(0x18, 8, consumed_uncapped_cycles) + __count(0x20, 8, cycles_donated) + __count(0x28, 8, purr_idle_cycles) +) +#include I(REQUEST_END) + +/* + * Not available for counter_info_version >= 0x8, use + * run_instruction_cycles_by_partition(0x100) instead. + */ +#define REQUEST_NAME run_instructions_run_cycles_by_partition +#define REQUEST_NUM 0x30 +#define REQUEST_IDX_KIND "sibling_part_id=?" +#include I(REQUEST_BEGIN) +REQUEST(__field(0, 8, partition_id) + __count(0x8, 8, instructions_completed) + __count(0x10, 8, cycles) +) +#include I(REQUEST_END) + #define REQUEST_NAME system_performance_capabilities #define REQUEST_NUM 0x40 #define REQUEST_IDX_KIND "starting_index=0xffffffffffffffff" @@ -73,4 +102,160 @@ REQUEST(__field(0, 1, perf_collect_privileged) ) #include I(REQUEST_END) +#define REQUEST_NAME processor_bus_utilization_abc_links +#define REQUEST_NUM 0x50 +#define REQUEST_IDX_KIND "hw_chip_id=?" +#include I(REQUEST_BEGIN) +REQUEST(__field(0, 4, hw_chip_id) + __array(0x4, 0xC, reserved1) + __count(0x10, 8, total_link_cycles) + __count(0x18, 8, idle_cycles_for_a_link) + __count(0x20, 8, idle_cycles_for_b_link) + __count(0x28, 8, idle_cycles_for_c_link) + __array(0x30, 0x20, reserved2) +) +#include I(REQUEST_END) + +#define REQUEST_NAME processor_bus_utilization_wxyz_links +#define REQUEST_NUM 0x60 +#define REQUEST_IDX_KIND "hw_chip_id=?" +#include I(REQUEST_BEGIN) +REQUEST(__field(0, 4, hw_chip_id) + __array(0x4, 0xC, reserved1) + __count(0x10, 8, total_link_cycles) + __count(0x18, 8, idle_cycles_for_w_link) + __count(0x20, 8, idle_cycles_for_x_link) + __count(0x28, 8, idle_cycles_for_y_link) + __count(0x30, 8, idle_cycles_for_z_link) + __array(0x38, 0x28, reserved2) +) +#include I(REQUEST_END) + +#define REQUEST_NAME processor_bus_utilization_gx_links +#define REQUEST_NUM 0x70 +#define REQUEST_IDX_KIND "hw_chip_id=?" +#include I(REQUEST_BEGIN) +REQUEST(__field(0, 4, hw_chip_id) + __array(0x4, 0xC, reserved1) + __count(0x10, 8, gx0_in_address_cycles) + __count(0x18, 8, gx0_in_data_cycles) + __count(0x20, 8, gx0_in_retries) + __count(0x28, 8, gx0_in_bus_cycles) + __count(0x30, 8, gx0_in_cycles_total) + __count(0x38, 8, gx0_out_address_cycles) + __count(0x40, 8, gx0_out_data_cycles) + __count(0x48, 8, gx0_out_retries) + __count(0x50, 8, gx0_out_bus_cycles) + __count(0x58, 8, gx0_out_cycles_total) + __count(0x60, 8, gx1_in_address_cycles) + __count(0x68, 8, gx1_in_data_cycles) + __count(0x70, 8, gx1_in_retries) + __count(0x78, 8, gx1_in_bus_cycles) + __count(0x80, 8, gx1_in_cycles_total) + __count(0x88, 8, gx1_out_address_cycles) + __count(0x90, 8, gx1_out_data_cycles) + __count(0x98, 8, gx1_out_retries) + __count(0xA0, 8, gx1_out_bus_cycles) + __count(0xA8, 8, gx1_out_cycles_total) +) +#include I(REQUEST_END) + +#define REQUEST_NAME processor_bus_utilization_mc_links +#define REQUEST_NUM 0x80 +#define REQUEST_IDX_KIND "hw_chip_id=?" +#include I(REQUEST_BEGIN) +REQUEST(__field(0, 4, hw_chip_id) + __array(0x4, 0xC, reserved1) + __count(0x10, 8, mc0_frames) + __count(0x18, 8, mc0_reads) + __count(0x20, 8, mc0_write) + __count(0x28, 8, mc0_total_cycles) + __count(0x30, 8, mc1_frames) + __count(0x38, 8, mc1_reads) + __count(0x40, 8, mc1_writes) + __count(0x48, 8, mc1_total_cycles) +) +#include I(REQUEST_END) + +/* Processor_config (0x90) skipped, no counters */ +/* Current_processor_frequency (0x91) skipped, no counters */ + +#define REQUEST_NAME processor_core_utilization +#define REQUEST_NUM 0x94 +#define REQUEST_IDX_KIND "phys_processor_idx=?" +#include I(REQUEST_BEGIN) +REQUEST(__field(0, 4, phys_processor_idx) + __field(0x4, 4, hw_processor_id) + __count(0x8, 8, cycles_across_any_thread) + __count(0x10, 8, timebase_at_collection) + __count(0x18, 8, purr_cycles) + __count(0x20, 8, sum_of_cycles_across_all_threads) + __count(0x28, 8, instructions_completed) +) +#include I(REQUEST_END) + +/* Processor_core_power_mode (0x95) skipped, no counters */ +/* Affinity_domain_information_by_virtual_processor (0xA0) skipped, + * no counters */ +/* Affinity_domain_information_by_domain (0xB0) skipped, no counters */ +/* Affinity_domain_information_by_partition (0xB1) skipped, no counters */ +/* Physical_memory_info (0xC0) skipped, no counters */ +/* Processor_bus_topology (0xD0) skipped, no counters */ + +#define REQUEST_NAME partition_hypervisor_queuing_times +#define REQUEST_NUM 0xE0 +#define REQUEST_IDX_KIND "partition_id=?" +#include I(REQUEST_BEGIN) +REQUEST(__field(0, 2, partition_id) + __array(0x2, 6, reserved1) + __count(0x8, 8, time_waiting_for_entitlement) + __count(0x10, 8, times_waited_for_entitlement) + __count(0x18, 8, time_waiting_for_phys_processor) + __count(0x20, 8, times_waited_for_phys_processor) + __count(0x28, 8, dispatches_on_home_core) + __count(0x30, 8, dispatches_on_home_primary_affinity_domain) + __count(0x38, 8, dispatches_on_home_secondary_affinity_domain) + __count(0x40, 8, dispatches_off_home_secondary_affinity_domain) + __count(0x48, 8, dispatches_on_dedicated_processor_donating_cycles) +) +#include I(REQUEST_END) + +#define REQUEST_NAME system_hypervisor_times +#define REQUEST_NUM 0xF0 +#define REQUEST_IDX_KIND "starting_index=0xffffffffffffffff" +#include I(REQUEST_BEGIN) +REQUEST(__count(0, 8, time_spent_to_dispatch_virtual_processors) + __count(0x8, 8, time_spent_processing_virtual_processor_timers) + __count(0x10, 8, time_spent_managing_partitions_over_entitlement) + __count(0x18, 8, time_spent_on_system_management) +) +#include I(REQUEST_END) + +#define REQUEST_NAME system_tlbie_count_and_time +#define REQUEST_NUM 0xF4 +#define REQUEST_IDX_KIND "starting_index=0xffffffffffffffff" +#include I(REQUEST_BEGIN) +REQUEST(__count(0, 8, tlbie_instructions_issued) + /* + * FIXME: The spec says the offset here is 0x10, which I suspect + * is wrong. + */ + __count(0x8, 8, time_spent_issuing_tlbies) +) +#include I(REQUEST_END) + +#define REQUEST_NAME partition_instruction_count_and_time +#define REQUEST_NUM 0x100 +#define REQUEST_IDX_KIND "partition_id=?" +#include I(REQUEST_BEGIN) +REQUEST(__field(0, 2, partition_id) + __array(0x2, 0x6, reserved1) + __count(0x8, 8, instructions_performed) + __count(0x10, 8, time_collected) +) +#include I(REQUEST_END) + +/* set_mmcrh (0x80001000) skipped, no counters */ +/* retrieve_hpmcx (0x80002000) skipped, no counters */ + #include "req-gen/_end.h" -- cgit v1.2.3