diff options
Diffstat (limited to 'arch/powerpc/kernel/smp.c')
-rw-r--r-- | arch/powerpc/kernel/smp.c | 241 |
1 files changed, 172 insertions, 69 deletions
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 8c2857cbd960..2b9b1bb4c5f2 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -76,6 +76,7 @@ static DEFINE_PER_CPU(int, cpu_state) = { 0 }; struct task_struct *secondary_current; bool has_big_cores; bool coregroup_enabled; +bool thread_group_shares_l2; DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map); DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map); @@ -99,6 +100,7 @@ enum { #define MAX_THREAD_LIST_SIZE 8 #define THREAD_GROUP_SHARE_L1 1 +#define THREAD_GROUP_SHARE_L2 2 struct thread_groups { unsigned int property; unsigned int nr_groups; @@ -106,11 +108,27 @@ struct thread_groups { unsigned int thread_list[MAX_THREAD_LIST_SIZE]; }; +/* Maximum number of properties that groups of threads within a core can share */ +#define MAX_THREAD_GROUP_PROPERTIES 2 + +struct thread_groups_list { + unsigned int nr_properties; + struct thread_groups property_tgs[MAX_THREAD_GROUP_PROPERTIES]; +}; + +static struct thread_groups_list tgl[NR_CPUS] __initdata; /* - * On big-cores system, cpu_l1_cache_map for each CPU corresponds to + * On big-cores system, thread_group_l1_cache_map for each CPU corresponds to * the set its siblings that share the L1-cache. */ -DEFINE_PER_CPU(cpumask_var_t, cpu_l1_cache_map); +DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map); + +/* + * On some big-cores system, thread_group_l2_cache_map for each CPU + * corresponds to the set its siblings within the core that share the + * L2-cache. + */ +DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map); /* SMP operations for this machine */ struct smp_ops_t *smp_ops; @@ -695,81 +713,100 @@ static void or_cpumasks_related(int i, int j, struct cpumask *(*srcmask)(int), /* * parse_thread_groups: Parses the "ibm,thread-groups" device tree * property for the CPU device node @dn and stores - * the parsed output in the thread_groups - * structure @tg if the ibm,thread-groups[0] - * matches @property. + * the parsed output in the thread_groups_list + * structure @tglp. * * @dn: The device node of the CPU device. - * @tg: Pointer to a thread group structure into which the parsed + * @tglp: Pointer to a thread group list structure into which the parsed * output of "ibm,thread-groups" is stored. - * @property: The property of the thread-group that the caller is - * interested in. * * ibm,thread-groups[0..N-1] array defines which group of threads in * the CPU-device node can be grouped together based on the property. * - * ibm,thread-groups[0] tells us the property based on which the + * This array can represent thread groupings for multiple properties. + * + * ibm,thread-groups[i + 0] tells us the property based on which the * threads are being grouped together. If this value is 1, it implies - * that the threads in the same group share L1, translation cache. + * that the threads in the same group share L1, translation cache. If + * the value is 2, it implies that the threads in the same group share + * the same L2 cache. * - * ibm,thread-groups[1] tells us how many such thread groups exist. + * ibm,thread-groups[i+1] tells us how many such thread groups exist for the + * property ibm,thread-groups[i] * - * ibm,thread-groups[2] tells us the number of threads in each such + * ibm,thread-groups[i+2] tells us the number of threads in each such * group. + * Suppose k = (ibm,thread-groups[i+1] * ibm,thread-groups[i+2]), then, * - * ibm,thread-groups[3..N-1] is the list of threads identified by + * ibm,thread-groups[i+3..i+k+2] (is the list of threads identified by * "ibm,ppc-interrupt-server#s" arranged as per their membership in * the grouping. * - * Example: If ibm,thread-groups = [1,2,4,5,6,7,8,9,10,11,12] it - * implies that there are 2 groups of 4 threads each, where each group - * of threads share L1, translation cache. + * Example: + * If "ibm,thread-groups" = [1,2,4,8,10,12,14,9,11,13,15,2,2,4,8,10,12,14,9,11,13,15] + * This can be decomposed up into two consecutive arrays: + * a) [1,2,4,8,10,12,14,9,11,13,15] + * b) [2,2,4,8,10,12,14,9,11,13,15] + * + * where in, + * + * a) provides information of Property "1" being shared by "2" groups, + * each with "4" threads each. The "ibm,ppc-interrupt-server#s" of + * the first group is {8,10,12,14} and the + * "ibm,ppc-interrupt-server#s" of the second group is + * {9,11,13,15}. Property "1" is indicative of the thread in the + * group sharing L1 cache, translation cache and Instruction Data + * flow. * - * The "ibm,ppc-interrupt-server#s" of the first group is {5,6,7,8} - * and the "ibm,ppc-interrupt-server#s" of the second group is {9, 10, - * 11, 12} structure + * b) provides information of Property "2" being shared by "2" groups, + * each group with "4" threads. The "ibm,ppc-interrupt-server#s" of + * the first group is {8,10,12,14} and the + * "ibm,ppc-interrupt-server#s" of the second group is + * {9,11,13,15}. Property "2" indicates that the threads in each + * group share the L2-cache. * * Returns 0 on success, -EINVAL if the property does not exist, * -ENODATA if property does not have a value, and -EOVERFLOW if the * property data isn't large enough. */ static int parse_thread_groups(struct device_node *dn, - struct thread_groups *tg, - unsigned int property) + struct thread_groups_list *tglp) { - int i; - u32 thread_group_array[3 + MAX_THREAD_LIST_SIZE]; - u32 *thread_list; + unsigned int property_idx = 0; + u32 *thread_group_array; size_t total_threads; - int ret; + int ret = 0, count; + u32 *thread_list; + int i = 0; + count = of_property_count_u32_elems(dn, "ibm,thread-groups"); + thread_group_array = kcalloc(count, sizeof(u32), GFP_KERNEL); ret = of_property_read_u32_array(dn, "ibm,thread-groups", - thread_group_array, 3); + thread_group_array, count); if (ret) - return ret; - - tg->property = thread_group_array[0]; - tg->nr_groups = thread_group_array[1]; - tg->threads_per_group = thread_group_array[2]; - if (tg->property != property || - tg->nr_groups < 1 || - tg->threads_per_group < 1) - return -ENODATA; + goto out_free; - total_threads = tg->nr_groups * tg->threads_per_group; + while (i < count && property_idx < MAX_THREAD_GROUP_PROPERTIES) { + int j; + struct thread_groups *tg = &tglp->property_tgs[property_idx++]; - ret = of_property_read_u32_array(dn, "ibm,thread-groups", - thread_group_array, - 3 + total_threads); - if (ret) - return ret; + tg->property = thread_group_array[i]; + tg->nr_groups = thread_group_array[i + 1]; + tg->threads_per_group = thread_group_array[i + 2]; + total_threads = tg->nr_groups * tg->threads_per_group; - thread_list = &thread_group_array[3]; + thread_list = &thread_group_array[i + 3]; - for (i = 0 ; i < total_threads; i++) - tg->thread_list[i] = thread_list[i]; + for (j = 0; j < total_threads; j++) + tg->thread_list[j] = thread_list[j]; + i = i + 3 + total_threads; + } - return 0; + tglp->nr_properties = property_idx; + +out_free: + kfree(thread_group_array); + return ret; } /* @@ -805,50 +842,84 @@ static int get_cpu_thread_group_start(int cpu, struct thread_groups *tg) return -1; } -static int init_cpu_l1_cache_map(int cpu) - +static struct thread_groups *__init get_thread_groups(int cpu, + int group_property, + int *err) { struct device_node *dn = of_get_cpu_node(cpu, NULL); - struct thread_groups tg = {.property = 0, - .nr_groups = 0, - .threads_per_group = 0}; + struct thread_groups_list *cpu_tgl = &tgl[cpu]; + struct thread_groups *tg = NULL; + int i; + *err = 0; + + if (!dn) { + *err = -ENODATA; + return NULL; + } + + if (!cpu_tgl->nr_properties) { + *err = parse_thread_groups(dn, cpu_tgl); + if (*err) + goto out; + } + + for (i = 0; i < cpu_tgl->nr_properties; i++) { + if (cpu_tgl->property_tgs[i].property == group_property) { + tg = &cpu_tgl->property_tgs[i]; + break; + } + } + + if (!tg) + *err = -EINVAL; +out: + of_node_put(dn); + return tg; +} + +static int __init init_thread_group_cache_map(int cpu, int cache_property) + +{ int first_thread = cpu_first_thread_sibling(cpu); int i, cpu_group_start = -1, err = 0; + struct thread_groups *tg = NULL; + cpumask_var_t *mask = NULL; - if (!dn) - return -ENODATA; + if (cache_property != THREAD_GROUP_SHARE_L1 && + cache_property != THREAD_GROUP_SHARE_L2) + return -EINVAL; - err = parse_thread_groups(dn, &tg, THREAD_GROUP_SHARE_L1); - if (err) - goto out; + tg = get_thread_groups(cpu, cache_property, &err); + if (!tg) + return err; - cpu_group_start = get_cpu_thread_group_start(cpu, &tg); + cpu_group_start = get_cpu_thread_group_start(cpu, tg); if (unlikely(cpu_group_start == -1)) { WARN_ON_ONCE(1); - err = -ENODATA; - goto out; + return -ENODATA; } - zalloc_cpumask_var_node(&per_cpu(cpu_l1_cache_map, cpu), - GFP_KERNEL, cpu_to_node(cpu)); + if (cache_property == THREAD_GROUP_SHARE_L1) + mask = &per_cpu(thread_group_l1_cache_map, cpu); + else if (cache_property == THREAD_GROUP_SHARE_L2) + mask = &per_cpu(thread_group_l2_cache_map, cpu); + + zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu)); for (i = first_thread; i < first_thread + threads_per_core; i++) { - int i_group_start = get_cpu_thread_group_start(i, &tg); + int i_group_start = get_cpu_thread_group_start(i, tg); if (unlikely(i_group_start == -1)) { WARN_ON_ONCE(1); - err = -ENODATA; - goto out; + return -ENODATA; } if (i_group_start == cpu_group_start) - cpumask_set_cpu(i, per_cpu(cpu_l1_cache_map, cpu)); + cpumask_set_cpu(i, *mask); } -out: - of_node_put(dn); - return err; + return 0; } static bool shared_caches; @@ -924,7 +995,7 @@ static int init_big_cores(void) int cpu; for_each_possible_cpu(cpu) { - int err = init_cpu_l1_cache_map(cpu); + int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L1); if (err) return err; @@ -935,6 +1006,16 @@ static int init_big_cores(void) } has_big_cores = true; + + for_each_possible_cpu(cpu) { + int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L2); + + if (err) + return err; + } + + thread_group_shares_l2 = true; + pr_debug("L2 cache only shared by the threads in the small core\n"); return 0; } @@ -1249,6 +1330,28 @@ static bool update_mask_by_l2(int cpu, cpumask_var_t *mask) if (has_big_cores) submask_fn = cpu_smallcore_mask; + /* + * If the threads in a thread-group share L2 cache, then the + * L2-mask can be obtained from thread_group_l2_cache_map. + */ + if (thread_group_shares_l2) { + cpumask_set_cpu(cpu, cpu_l2_cache_mask(cpu)); + + for_each_cpu(i, per_cpu(thread_group_l2_cache_map, cpu)) { + if (cpu_online(i)) + set_cpus_related(i, cpu, cpu_l2_cache_mask); + } + + /* Verify that L1-cache siblings are a subset of L2 cache-siblings */ + if (!cpumask_equal(submask_fn(cpu), cpu_l2_cache_mask(cpu)) && + !cpumask_subset(submask_fn(cpu), cpu_l2_cache_mask(cpu))) { + pr_warn_once("CPU %d : Inconsistent L1 and L2 cache siblings\n", + cpu); + } + + return true; + } + l2_cache = cpu_to_l2cache(cpu); if (!l2_cache || !*mask) { /* Assume only core siblings share cache with this CPU */ @@ -1320,7 +1423,7 @@ static inline void add_cpu_to_smallcore_masks(int cpu) cpumask_set_cpu(cpu, cpu_smallcore_mask(cpu)); - for_each_cpu(i, per_cpu(cpu_l1_cache_map, cpu)) { + for_each_cpu(i, per_cpu(thread_group_l1_cache_map, cpu)) { if (cpu_online(i)) set_cpus_related(i, cpu, cpu_smallcore_mask); } |