summaryrefslogtreecommitdiffstats
path: root/kernel/sched/sched.h
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-03-30 17:01:51 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-03-30 17:01:51 -0700
commit642e53ead6aea8740a219ede509a5d138fd4f780 (patch)
tree5c4680d0c07315dab24fe7333c62f56bc19ec4e4 /kernel/sched/sched.h
parent9b82f05f869a823d43ea4186f5f732f2924d3693 (diff)
parent313f16e2e35abb833eab5bdebc6ae30699adca18 (diff)
downloadlinux-642e53ead6aea8740a219ede509a5d138fd4f780.tar.gz
linux-642e53ead6aea8740a219ede509a5d138fd4f780.tar.bz2
linux-642e53ead6aea8740a219ede509a5d138fd4f780.zip
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar: "The main changes in this cycle are: - Various NUMA scheduling updates: harmonize the load-balancer and NUMA placement logic to not work against each other. The intended result is better locality, better utilization and fewer migrations. - Introduce Thermal Pressure tracking and optimizations, to improve task placement on thermally overloaded systems. - Implement frequency invariant scheduler accounting on (some) x86 CPUs. This is done by observing and sampling the 'recent' CPU frequency average at ~tick boundaries. The CPU provides this data via the APERF/MPERF MSRs. This hopefully makes our capacity estimates more precise and keeps tasks on the same CPU better even if it might seem overloaded at a lower momentary frequency. (As usual, turbo mode is a complication that we resolve by observing the maximum frequency and renormalizing to it.) - Add asymmetric CPU capacity wakeup scan to improve capacity utilization on asymmetric topologies. (big.LITTLE systems) - PSI fixes and optimizations. - RT scheduling capacity awareness fixes & improvements. - Optimize the CONFIG_RT_GROUP_SCHED constraints code. - Misc fixes, cleanups and optimizations - see the changelog for details" * 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (62 commits) threads: Update PID limit comment according to futex UAPI change sched/fair: Fix condition of avg_load calculation sched/rt: cpupri_find: Trigger a full search as fallback kthread: Do not preempt current task if it is going to call schedule() sched/fair: Improve spreading of utilization sched: Avoid scale real weight down to zero psi: Move PF_MEMSTALL out of task->flags MAINTAINERS: Add maintenance information for psi psi: Optimize switching tasks inside shared cgroups psi: Fix cpu.pressure for cpu.max and competing cgroups sched/core: Distribute tasks within affinity masks sched/fair: Fix enqueue_task_fair warning thermal/cpu-cooling, sched/core: Move the arch_set_thermal_pressure() API to generic scheduler code sched/rt: Remove unnecessary push for unfit tasks sched/rt: Allow pulling unfitting task sched/rt: Optimize cpupri_find() on non-heterogenous systems sched/rt: Re-instate old behavior in select_task_rq_rt() sched/rt: cpupri_find: Implement fallback mechanism for !fit case sched/fair: Fix reordering of enqueue/dequeue_task_fair() sched/fair: Fix runnable_avg for throttled cfs ...
Diffstat (limited to 'kernel/sched/sched.h')
-rw-r--r--kernel/sched/sched.h69
1 files changed, 58 insertions, 11 deletions
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index fdc77e796324..464742874be3 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -118,7 +118,13 @@ extern long calc_load_fold_active(struct rq *this_rq, long adjust);
#ifdef CONFIG_64BIT
# define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT + SCHED_FIXEDPOINT_SHIFT)
# define scale_load(w) ((w) << SCHED_FIXEDPOINT_SHIFT)
-# define scale_load_down(w) ((w) >> SCHED_FIXEDPOINT_SHIFT)
+# define scale_load_down(w) \
+({ \
+ unsigned long __w = (w); \
+ if (__w) \
+ __w = max(2UL, __w >> SCHED_FIXEDPOINT_SHIFT); \
+ __w; \
+})
#else
# define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT)
# define scale_load(w) (w)
@@ -305,7 +311,6 @@ bool __dl_overflow(struct dl_bw *dl_b, int cpus, u64 old_bw, u64 new_bw)
dl_b->bw * cpus < dl_b->total_bw - old_bw + new_bw;
}
-extern void dl_change_utilization(struct task_struct *p, u64 new_bw);
extern void init_dl_bw(struct dl_bw *dl_b);
extern int sched_dl_global_validate(void);
extern void sched_dl_do_global(void);
@@ -489,7 +494,6 @@ struct cfs_bandwidth { };
/* CFS-related fields in a runqueue */
struct cfs_rq {
struct load_weight load;
- unsigned long runnable_weight;
unsigned int nr_running;
unsigned int h_nr_running; /* SCHED_{NORMAL,BATCH,IDLE} */
unsigned int idle_h_nr_running; /* SCHED_IDLE */
@@ -528,7 +532,7 @@ struct cfs_rq {
int nr;
unsigned long load_avg;
unsigned long util_avg;
- unsigned long runnable_sum;
+ unsigned long runnable_avg;
} removed;
#ifdef CONFIG_FAIR_GROUP_SCHED
@@ -688,8 +692,30 @@ struct dl_rq {
#ifdef CONFIG_FAIR_GROUP_SCHED
/* An entity is a task if it doesn't "own" a runqueue */
#define entity_is_task(se) (!se->my_q)
+
+static inline void se_update_runnable(struct sched_entity *se)
+{
+ if (!entity_is_task(se))
+ se->runnable_weight = se->my_q->h_nr_running;
+}
+
+static inline long se_runnable(struct sched_entity *se)
+{
+ if (entity_is_task(se))
+ return !!se->on_rq;
+ else
+ return se->runnable_weight;
+}
+
#else
#define entity_is_task(se) 1
+
+static inline void se_update_runnable(struct sched_entity *se) {}
+
+static inline long se_runnable(struct sched_entity *se)
+{
+ return !!se->on_rq;
+}
#endif
#ifdef CONFIG_SMP
@@ -701,10 +727,6 @@ static inline long se_weight(struct sched_entity *se)
return scale_load_down(se->load.weight);
}
-static inline long se_runnable(struct sched_entity *se)
-{
- return scale_load_down(se->runnable_weight);
-}
static inline bool sched_asym_prefer(int a, int b)
{
@@ -944,6 +966,9 @@ struct rq {
#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
struct sched_avg avg_irq;
#endif
+#ifdef CONFIG_SCHED_THERMAL_PRESSURE
+ struct sched_avg avg_thermal;
+#endif
u64 idle_stamp;
u64 avg_idle;
@@ -1107,6 +1132,24 @@ static inline u64 rq_clock_task(struct rq *rq)
return rq->clock_task;
}
+/**
+ * By default the decay is the default pelt decay period.
+ * The decay shift can change the decay period in
+ * multiples of 32.
+ * Decay shift Decay period(ms)
+ * 0 32
+ * 1 64
+ * 2 128
+ * 3 256
+ * 4 512
+ */
+extern int sched_thermal_decay_shift;
+
+static inline u64 rq_clock_thermal(struct rq *rq)
+{
+ return rq_clock_task(rq) >> sched_thermal_decay_shift;
+}
+
static inline void rq_clock_skip_update(struct rq *rq)
{
lockdep_assert_held(&rq->lock);
@@ -1337,8 +1380,6 @@ extern void sched_ttwu_pending(void);
for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); \
__sd; __sd = __sd->parent)
-#define for_each_lower_domain(sd) for (; sd; sd = sd->child)
-
/**
* highest_flag_domain - Return highest sched_domain containing flag.
* @cpu: The CPU whose highest level of sched domain is to
@@ -1869,7 +1910,6 @@ extern struct dl_bandwidth def_dl_bandwidth;
extern void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime);
extern void init_dl_task_timer(struct sched_dl_entity *dl_se);
extern void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se);
-extern void init_dl_rq_bw_ratio(struct dl_rq *dl_rq);
#define BW_SHIFT 20
#define BW_UNIT (1 << BW_SHIFT)
@@ -1968,6 +2008,13 @@ static inline int hrtick_enabled(struct rq *rq)
#endif /* CONFIG_SCHED_HRTICK */
+#ifndef arch_scale_freq_tick
+static __always_inline
+void arch_scale_freq_tick(void)
+{
+}
+#endif
+
#ifndef arch_scale_freq_capacity
static __always_inline
unsigned long arch_scale_freq_capacity(int cpu)