Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler updates from Ingo Molnar: "The main changes in this cycle are: - Various NUMA scheduling updates: harmonize the load-balancer and NUMA placement logic to not work against each other. The intended result is better locality, better utilization and fewer migrations. - Introduce Thermal Pressure tracking and optimizations, to improve task placement on thermally overloaded systems. - Implement frequency invariant scheduler accounting on (some) x86 CPUs. This is done by observing and sampling the 'recent' CPU frequency average at ~tick boundaries. The CPU provides this data via the APERF/MPERF MSRs. This hopefully makes our capacity estimates more precise and keeps tasks on the same CPU better even if it might seem overloaded at a lower momentary frequency. (As usual, turbo mode is a complication that we resolve by observing the maximum frequency and renormalizing to it.) - Add asymmetric CPU capacity wakeup scan to improve capacity utilization on asymmetric topologies. (big.LITTLE systems) - PSI fixes and optimizations. - RT scheduling capacity awareness fixes & improvements. - Optimize the CONFIG_RT_GROUP_SCHED constraints code. - Misc fixes, cleanups and optimizations - see the changelog for details" * 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (62 commits) threads: Update PID limit comment according to futex UAPI change sched/fair: Fix condition of avg_load calculation sched/rt: cpupri_find: Trigger a full search as fallback kthread: Do not preempt current task if it is going to call schedule() sched/fair: Improve spreading of utilization sched: Avoid scale real weight down to zero psi: Move PF_MEMSTALL out of task->flags MAINTAINERS: Add maintenance information for psi psi: Optimize switching tasks inside shared cgroups psi: Fix cpu.pressure for cpu.max and competing cgroups sched/core: Distribute tasks within affinity masks sched/fair: Fix enqueue_task_fair warning thermal/cpu-cooling, sched/core: Move the arch_set_thermal_pressure() API to generic scheduler code sched/rt: Remove unnecessary push for unfit tasks sched/rt: Allow pulling unfitting task sched/rt: Optimize cpupri_find() on non-heterogenous systems sched/rt: Re-instate old behavior in select_task_rq_rt() sched/rt: cpupri_find: Implement fallback mechanism for !fit case sched/fair: Fix reordering of enqueue/dequeue_task_fair() sched/fair: Fix runnable_avg for throttled cfs ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2020-03-30 17:01:51 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2020-03-30 17:01:51 -0700
commit: 642e53ead6aea8740a219ede509a5d138fd4f780 (patch)
tree: 5c4680d0c07315dab24fe7333c62f56bc19ec4e4 /kernel/sched/sched.h
parent: 9b82f05f869a823d43ea4186f5f732f2924d3693 (diff)
parent: 313f16e2e35abb833eab5bdebc6ae30699adca18 (diff)
download: linux-642e53ead6aea8740a219ede509a5d138fd4f780.tar.gz
linux-642e53ead6aea8740a219ede509a5d138fd4f780.tar.bz2
linux-642e53ead6aea8740a219ede509a5d138fd4f780.zip
1 files changed, 58 insertions, 11 deletions
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index fdc77e796324..464742874be3 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -118,7 +118,13 @@ extern long calc_load_fold_active(struct rq *this_rq, long adjust);
 #ifdef CONFIG_64BIT
 # define NICE_0_LOAD_SHIFT	(SCHED_FIXEDPOINT_SHIFT + SCHED_FIXEDPOINT_SHIFT)
 # define scale_load(w)		((w) << SCHED_FIXEDPOINT_SHIFT)
-# define scale_load_down(w)	((w) >> SCHED_FIXEDPOINT_SHIFT)
+# define scale_load_down(w) \
+({ \
+	unsigned long __w = (w); \
+	if (__w) \
+		__w = max(2UL, __w >> SCHED_FIXEDPOINT_SHIFT); \
+	__w; \
+})
 #else
 # define NICE_0_LOAD_SHIFT	(SCHED_FIXEDPOINT_SHIFT)
 # define scale_load(w)		(w)
@@ -305,7 +311,6 @@ bool __dl_overflow(struct dl_bw *dl_b, int cpus, u64 old_bw, u64 new_bw)
 	       dl_b->bw * cpus < dl_b->total_bw - old_bw + new_bw;
 }
 
-extern void dl_change_utilization(struct task_struct *p, u64 new_bw);
 extern void init_dl_bw(struct dl_bw *dl_b);
 extern int  sched_dl_global_validate(void);
 extern void sched_dl_do_global(void);
@@ -489,7 +494,6 @@ struct cfs_bandwidth { };
 /* CFS-related fields in a runqueue */
 struct cfs_rq {
 	struct load_weight	load;
-	unsigned long		runnable_weight;
 	unsigned int		nr_running;
 	unsigned int		h_nr_running;      /* SCHED_{NORMAL,BATCH,IDLE} */
 	unsigned int		idle_h_nr_running; /* SCHED_IDLE */
@@ -528,7 +532,7 @@ struct cfs_rq {
 		int		nr;
 		unsigned long	load_avg;
 		unsigned long	util_avg;
-		unsigned long	runnable_sum;
+		unsigned long	runnable_avg;
 	} removed;
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
@@ -688,8 +692,30 @@ struct dl_rq {
 #ifdef CONFIG_FAIR_GROUP_SCHED
 /* An entity is a task if it doesn't "own" a runqueue */
 #define entity_is_task(se)	(!se->my_q)
+
+static inline void se_update_runnable(struct sched_entity *se)
+{
+	if (!entity_is_task(se))
+		se->runnable_weight = se->my_q->h_nr_running;
+}
+
+static inline long se_runnable(struct sched_entity *se)
+{
+	if (entity_is_task(se))
+		return !!se->on_rq;
+	else
+		return se->runnable_weight;
+}
+
 #else
 #define entity_is_task(se)	1
+
+static inline void se_update_runnable(struct sched_entity *se) {}
+
+static inline long se_runnable(struct sched_entity *se)
+{
+	return !!se->on_rq;
+}
 #endif
 
 #ifdef CONFIG_SMP
@@ -701,10 +727,6 @@ static inline long se_weight(struct sched_entity *se)
 	return scale_load_down(se->load.weight);
 }
 
-static inline long se_runnable(struct sched_entity *se)
-{
-	return scale_load_down(se->runnable_weight);
-}
 
 static inline bool sched_asym_prefer(int a, int b)
 {
@@ -944,6 +966,9 @@ struct rq {
 #ifdef CONFIG_HAVE_SCHED_AVG_IRQ
 	struct sched_avg	avg_irq;
 #endif
+#ifdef CONFIG_SCHED_THERMAL_PRESSURE
+	struct sched_avg	avg_thermal;
+#endif
 	u64			idle_stamp;
 	u64			avg_idle;
 
@@ -1107,6 +1132,24 @@ static inline u64 rq_clock_task(struct rq *rq)
 	return rq->clock_task;
 }
 
+/**
+ * By default the decay is the default pelt decay period.
+ * The decay shift can change the decay period in
+ * multiples of 32.
+ *  Decay shift		Decay period(ms)
+ *	0			32
+ *	1			64
+ *	2			128
+ *	3			256
+ *	4			512
+ */
+extern int sched_thermal_decay_shift;
+
+static inline u64 rq_clock_thermal(struct rq *rq)
+{
+	return rq_clock_task(rq) >> sched_thermal_decay_shift;
+}
+
 static inline void rq_clock_skip_update(struct rq *rq)
 {
 	lockdep_assert_held(&rq->lock);
@@ -1337,8 +1380,6 @@ extern void sched_ttwu_pending(void);
 	for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); \
 			__sd; __sd = __sd->parent)
 
-#define for_each_lower_domain(sd) for (; sd; sd = sd->child)
-
 /**
  * highest_flag_domain - Return highest sched_domain containing flag.
  * @cpu:	The CPU whose highest level of sched domain is to
@@ -1869,7 +1910,6 @@ extern struct dl_bandwidth def_dl_bandwidth;
 extern void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime);
 extern void init_dl_task_timer(struct sched_dl_entity *dl_se);
 extern void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se);
-extern void init_dl_rq_bw_ratio(struct dl_rq *dl_rq);
 
 #define BW_SHIFT		20
 #define BW_UNIT			(1 << BW_SHIFT)
@@ -1968,6 +2008,13 @@ static inline int hrtick_enabled(struct rq *rq)
 
 #endif /* CONFIG_SCHED_HRTICK */
 
+#ifndef arch_scale_freq_tick
+static __always_inline
+void arch_scale_freq_tick(void)
+{
+}
+#endif
+
 #ifndef arch_scale_freq_capacity
 static __always_inline
 unsigned long arch_scale_freq_capacity(int cpu)
author	Linus Torvalds <torvalds@linux-foundation.org>	2020-03-30 17:01:51 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2020-03-30 17:01:51 -0700
commit	642e53ead6aea8740a219ede509a5d138fd4f780 (patch)
tree	5c4680d0c07315dab24fe7333c62f56bc19ec4e4 /kernel/sched/sched.h
parent	9b82f05f869a823d43ea4186f5f732f2924d3693 (diff)
parent	313f16e2e35abb833eab5bdebc6ae30699adca18 (diff)
download	linux-642e53ead6aea8740a219ede509a5d138fd4f780.tar.gz linux-642e53ead6aea8740a219ede509a5d138fd4f780.tar.bz2 linux-642e53ead6aea8740a219ede509a5d138fd4f780.zip