x86/sched: Add basic support for CPU capacity scaling

In order be able to compute the sizes of tasks consistently across all CPUs in a hybrid system, it is necessary to provide CPU capacity scaling information to the scheduler via arch_scale_cpu_capacity(). Moreover, the value returned by arch_scale_freq_capacity() for the given CPU must correspond to the arch_scale_cpu_capacity() return value for it, or utilization computations will be inaccurate. Add support for it through per-CPU variables holding the capacity and maximum-to-base frequency ratio (times SCHED_CAPACITY_SCALE) that will be returned by arch_scale_cpu_capacity() and used by scale_freq_tick() to compute arch_freq_scale for the current CPU, respectively. In order to avoid adding measurable overhead for non-hybrid x86 systems, which are the vast majority in the field, whether or not the new hybrid CPU capacity scaling will be in effect is controlled by a static key. This static key is set by calling arch_enable_hybrid_capacity_scale() which also allocates memory for the per-CPU data and initializes it. Next, arch_set_cpu_capacity() is used to set the per-CPU variables mentioned above for each CPU and arch_rebuild_sched_domains() needs to be called for the scheduler to realize that capacity-aware scheduling can be used going forward. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com> Tested-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com> # scale invariance Link: https://patch.msgid.link/10523497.nUPlyArG6x@rjwysocki.net [ rjw: Added parens to function kerneldoc comments ] Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> 2024-08-28 13:47:25 +0200
committer: Rafael J. Wysocki <rafael.j.wysocki@intel.com> 2024-09-04 13:36:40 +0200
commit: 5a9d10145a54f7a3fb6297c0082bf030e04db3bc (patch)
tree: bb0fd666a1af2310d8f9e08f08b812244a3c88d8
parent: c4a6c82c9e834e40cba258e1f5e46cebb5cd1c24 (diff)
download: linux-5a9d10145a54f7a3fb6297c0082bf030e04db3bc.tar.gz
linux-5a9d10145a54f7a3fb6297c0082bf030e04db3bc.tar.bz2
linux-5a9d10145a54f7a3fb6297c0082bf030e04db3bc.zip
2 files changed, 100 insertions, 2 deletions
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index abe3a8f22cbd..aef70336d624 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -282,9 +282,22 @@ static inline long arch_scale_freq_capacity(int cpu)
 }
 #define arch_scale_freq_capacity arch_scale_freq_capacity
 
+bool arch_enable_hybrid_capacity_scale(void);
+void arch_set_cpu_capacity(int cpu, unsigned long cap, unsigned long max_cap,
+			   unsigned long cap_freq, unsigned long base_freq);
+
+unsigned long arch_scale_cpu_capacity(int cpu);
+#define arch_scale_cpu_capacity arch_scale_cpu_capacity
+
 extern void arch_set_max_freq_ratio(bool turbo_disabled);
 extern void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled);
 #else
+static inline bool arch_enable_hybrid_capacity_scale(void) { return false; }
+static inline void arch_set_cpu_capacity(int cpu, unsigned long cap,
+					 unsigned long max_cap,
+					 unsigned long cap_freq,
+					 unsigned long base_freq) { }
+
 static inline void arch_set_max_freq_ratio(bool turbo_disabled) { }
 static inline void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled) { }
 #endif
diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c
index 0b69bfbf345d..f642de2ebdac 100644
--- a/arch/x86/kernel/cpu/aperfmperf.c
+++ b/arch/x86/kernel/cpu/aperfmperf.c
@@ -349,9 +349,89 @@ static DECLARE_WORK(disable_freq_invariance_work,
 DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE;
 EXPORT_PER_CPU_SYMBOL_GPL(arch_freq_scale);
 
+static DEFINE_STATIC_KEY_FALSE(arch_hybrid_cap_scale_key);
+
+struct arch_hybrid_cpu_scale {
+	unsigned long capacity;
+	unsigned long freq_ratio;
+};
+
+static struct arch_hybrid_cpu_scale __percpu *arch_cpu_scale;
+
+/**
+ * arch_enable_hybrid_capacity_scale() - Enable hybrid CPU capacity scaling
+ *
+ * Allocate memory for per-CPU data used by hybrid CPU capacity scaling,
+ * initialize it and set the static key controlling its code paths.
+ *
+ * Must be called before arch_set_cpu_capacity().
+ */
+bool arch_enable_hybrid_capacity_scale(void)
+{
+	int cpu;
+
+	if (static_branch_unlikely(&arch_hybrid_cap_scale_key)) {
+		WARN_ONCE(1, "Hybrid CPU capacity scaling already enabled");
+		return true;
+	}
+
+	arch_cpu_scale = alloc_percpu(struct arch_hybrid_cpu_scale);
+	if (!arch_cpu_scale)
+		return false;
+
+	for_each_possible_cpu(cpu) {
+		per_cpu_ptr(arch_cpu_scale, cpu)->capacity = SCHED_CAPACITY_SCALE;
+		per_cpu_ptr(arch_cpu_scale, cpu)->freq_ratio = arch_max_freq_ratio;
+	}
+
+	static_branch_enable(&arch_hybrid_cap_scale_key);
+
+	pr_info("Hybrid CPU capacity scaling enabled\n");
+
+	return true;
+}
+
+/**
+ * arch_set_cpu_capacity() - Set scale-invariance parameters for a CPU
+ * @cpu: Target CPU.
+ * @cap: Capacity of @cpu at its maximum frequency, relative to @max_cap.
+ * @max_cap: System-wide maximum CPU capacity.
+ * @cap_freq: Frequency of @cpu corresponding to @cap.
+ * @base_freq: Frequency of @cpu at which MPERF counts.
+ *
+ * The units in which @cap and @max_cap are expressed do not matter, so long
+ * as they are consistent, because the former is effectively divided by the
+ * latter.  Analogously for @cap_freq and @base_freq.
+ *
+ * After calling this function for all CPUs, call arch_rebuild_sched_domains()
+ * to let the scheduler know that capacity-aware scheduling can be used going
+ * forward.
+ */
+void arch_set_cpu_capacity(int cpu, unsigned long cap, unsigned long max_cap,
+			   unsigned long cap_freq, unsigned long base_freq)
+{
+	if (static_branch_likely(&arch_hybrid_cap_scale_key)) {
+		WRITE_ONCE(per_cpu_ptr(arch_cpu_scale, cpu)->capacity,
+			   div_u64(cap << SCHED_CAPACITY_SHIFT, max_cap));
+		WRITE_ONCE(per_cpu_ptr(arch_cpu_scale, cpu)->freq_ratio,
+			   div_u64(cap_freq << SCHED_CAPACITY_SHIFT, base_freq));
+	} else {
+		WARN_ONCE(1, "Hybrid CPU capacity scaling not enabled");
+	}
+}
+
+unsigned long arch_scale_cpu_capacity(int cpu)
+{
+	if (static_branch_unlikely(&arch_hybrid_cap_scale_key))
+		return READ_ONCE(per_cpu_ptr(arch_cpu_scale, cpu)->capacity);
+
+	return SCHED_CAPACITY_SCALE;
+}
+EXPORT_SYMBOL_GPL(arch_scale_cpu_capacity);
+
 static void scale_freq_tick(u64 acnt, u64 mcnt)
 {
-	u64 freq_scale;
+	u64 freq_scale, freq_ratio;
 
 	if (!arch_scale_freq_invariant())
 		return;
@@ -359,7 +439,12 @@ static void scale_freq_tick(u64 acnt, u64 mcnt)
 	if (check_shl_overflow(acnt, 2*SCHED_CAPACITY_SHIFT, &acnt))
 		goto error;
 
-	if (check_mul_overflow(mcnt, arch_max_freq_ratio, &mcnt) || !mcnt)
+	if (static_branch_unlikely(&arch_hybrid_cap_scale_key))
+		freq_ratio = READ_ONCE(this_cpu_ptr(arch_cpu_scale)->freq_ratio);
+	else
+		freq_ratio = arch_max_freq_ratio;
+
+	if (check_mul_overflow(mcnt, freq_ratio, &mcnt) || !mcnt)
 		goto error;
 
 	freq_scale = div64_u64(acnt, mcnt);
author	Rafael J. Wysocki <rafael.j.wysocki@intel.com>	2024-08-28 13:47:25 +0200
committer	Rafael J. Wysocki <rafael.j.wysocki@intel.com>	2024-09-04 13:36:40 +0200
commit	5a9d10145a54f7a3fb6297c0082bf030e04db3bc (patch)
tree	bb0fd666a1af2310d8f9e08f08b812244a3c88d8
parent	c4a6c82c9e834e40cba258e1f5e46cebb5cd1c24 (diff)
download	linux-5a9d10145a54f7a3fb6297c0082bf030e04db3bc.tar.gz linux-5a9d10145a54f7a3fb6297c0082bf030e04db3bc.tar.bz2 linux-5a9d10145a54f7a3fb6297c0082bf030e04db3bc.zip