From 4550487a993d579c7329bb5b19e516d36800c8bf Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 21 Dec 2017 10:47:48 +0100 Subject: sched/fair: Restructure nohz_balance_kick() The current: if (nohz_kick_needed()) nohz_balancer_kick() is pointless complexity, fold them into a single call and avoid the various conditions at the call site. When we introduce multiple different needs to kick the ilb, the above construct also becomes a problem. Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 218 ++++++++++++++++++++++++++-------------------------- 1 file changed, 111 insertions(+), 107 deletions(-) (limited to 'kernel/sched/fair.c') diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index fc058967c999..fa483d889f07 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -9065,12 +9065,29 @@ static inline int find_new_ilb(void) return nr_cpu_ids; } +static inline void set_cpu_sd_state_busy(void) +{ + struct sched_domain *sd; + int cpu = smp_processor_id(); + + rcu_read_lock(); + sd = rcu_dereference(per_cpu(sd_llc, cpu)); + + if (!sd || !sd->nohz_idle) + goto unlock; + sd->nohz_idle = 0; + + atomic_inc(&sd->shared->nr_busy_cpus); +unlock: + rcu_read_unlock(); +} + /* * Kick a CPU to do the nohz balancing, if it is time for it. We pick the * nohz_load_balancer CPU (if there is one) otherwise fallback to any idle * CPU (if there is one). */ -static void nohz_balancer_kick(void) +static void kick_ilb(void) { unsigned int flags; int ilb_cpu; @@ -9085,6 +9102,7 @@ static void nohz_balancer_kick(void) flags = atomic_fetch_or(NOHZ_KICK_MASK, nohz_flags(ilb_cpu)); if (flags & NOHZ_KICK_MASK) return; + /* * Use smp_send_reschedule() instead of resched_cpu(). * This way we generate a sched IPI on the target CPU which @@ -9092,7 +9110,94 @@ static void nohz_balancer_kick(void) * will be run before returning from the IPI. */ smp_send_reschedule(ilb_cpu); - return; +} + +/* + * Current heuristic for kicking the idle load balancer in the presence + * of an idle cpu in the system. + * - This rq has more than one task. + * - This rq has at least one CFS task and the capacity of the CPU is + * significantly reduced because of RT tasks or IRQs. + * - At parent of LLC scheduler domain level, this cpu's scheduler group has + * multiple busy cpu. + * - For SD_ASYM_PACKING, if the lower numbered cpu's in the scheduler + * domain span are idle. + */ +static void nohz_balancer_kick(struct rq *rq) +{ + unsigned long now = jiffies; + struct sched_domain_shared *sds; + struct sched_domain *sd; + int nr_busy, i, cpu = rq->cpu; + bool kick = false; + + if (unlikely(rq->idle_balance)) + return; + + /* + * We may be recently in ticked or tickless idle mode. At the first + * busy tick after returning from idle, we will update the busy stats. + */ + set_cpu_sd_state_busy(); + nohz_balance_exit_idle(cpu); + + /* + * None are in tickless mode and hence no need for NOHZ idle load + * balancing. + */ + if (likely(!atomic_read(&nohz.nr_cpus))) + return; + + if (time_before(now, nohz.next_balance)) + return; + + if (rq->nr_running >= 2) { + kick = true; + goto out; + } + + rcu_read_lock(); + sds = rcu_dereference(per_cpu(sd_llc_shared, cpu)); + if (sds) { + /* + * XXX: write a coherent comment on why we do this. + * See also: http://lkml.kernel.org/r/20111202010832.602203411@sbsiddha-desk.sc.intel.com + */ + nr_busy = atomic_read(&sds->nr_busy_cpus); + if (nr_busy > 1) { + kick = true; + goto unlock; + } + + } + + sd = rcu_dereference(rq->sd); + if (sd) { + if ((rq->cfs.h_nr_running >= 1) && + check_cpu_capacity(rq, sd)) { + kick = true; + goto unlock; + } + } + + sd = rcu_dereference(per_cpu(sd_asym, cpu)); + if (sd) { + for_each_cpu(i, sched_domain_span(sd)) { + if (i == cpu || + !cpumask_test_cpu(i, nohz.idle_cpus_mask)) + continue; + + if (sched_asym_prefer(i, cpu)) { + kick = true; + goto unlock; + } + } + } +unlock: + rcu_read_unlock(); +out: + if (kick) + kick_ilb(); } void nohz_balance_exit_idle(unsigned int cpu) @@ -9112,23 +9217,6 @@ void nohz_balance_exit_idle(unsigned int cpu) } } -static inline void set_cpu_sd_state_busy(void) -{ - struct sched_domain *sd; - int cpu = smp_processor_id(); - - rcu_read_lock(); - sd = rcu_dereference(per_cpu(sd_llc, cpu)); - - if (!sd || !sd->nohz_idle) - goto unlock; - sd->nohz_idle = 0; - - atomic_inc(&sd->shared->nr_busy_cpus); -unlock: - rcu_read_unlock(); -} - void set_cpu_sd_state_idle(void) { struct sched_domain *sd; @@ -9171,6 +9259,8 @@ void nohz_balance_enter_idle(int cpu) atomic_inc(&nohz.nr_cpus); atomic_or(NOHZ_TICK_STOPPED, nohz_flags(cpu)); } +#else +static inline void nohz_balancer_kick(struct rq *rq) { } #endif static DEFINE_SPINLOCK(balancing); @@ -9369,90 +9459,6 @@ static bool nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle) return true; } - -/* - * Current heuristic for kicking the idle load balancer in the presence - * of an idle CPU in the system. - * - This rq has more than one task. - * - This rq has at least one CFS task and the capacity of the CPU is - * significantly reduced because of RT tasks or IRQs. - * - At parent of LLC scheduler domain level, this CPU's scheduler group has - * multiple busy CPUs. - * - For SD_ASYM_PACKING, if the lower numbered CPU's in the scheduler - * domain span are idle. - */ -static inline bool nohz_kick_needed(struct rq *rq) -{ - unsigned long now = jiffies; - struct sched_domain_shared *sds; - struct sched_domain *sd; - int nr_busy, i, cpu = rq->cpu; - bool kick = false; - - if (unlikely(rq->idle_balance)) - return false; - - /* - * We may be recently in ticked or tickless idle mode. At the first - * busy tick after returning from idle, we will update the busy stats. - */ - set_cpu_sd_state_busy(); - nohz_balance_exit_idle(cpu); - - /* - * None are in tickless mode and hence no need for NOHZ idle load - * balancing. - */ - if (likely(!atomic_read(&nohz.nr_cpus))) - return false; - - if (time_before(now, nohz.next_balance)) - return false; - - if (rq->nr_running >= 2) - return true; - - rcu_read_lock(); - sds = rcu_dereference(per_cpu(sd_llc_shared, cpu)); - if (sds) { - /* - * XXX: write a coherent comment on why we do this. - * See also: http://lkml.kernel.org/r/20111202010832.602203411@sbsiddha-desk.sc.intel.com - */ - nr_busy = atomic_read(&sds->nr_busy_cpus); - if (nr_busy > 1) { - kick = true; - goto unlock; - } - - } - - sd = rcu_dereference(rq->sd); - if (sd) { - if ((rq->cfs.h_nr_running >= 1) && - check_cpu_capacity(rq, sd)) { - kick = true; - goto unlock; - } - } - - sd = rcu_dereference(per_cpu(sd_asym, cpu)); - if (sd) { - for_each_cpu(i, sched_domain_span(sd)) { - if (i == cpu || - !cpumask_test_cpu(i, nohz.idle_cpus_mask)) - continue; - - if (sched_asym_prefer(i, cpu)) { - kick = true; - goto unlock; - } - } - } -unlock: - rcu_read_unlock(); - return kick; -} #else static bool nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle) { @@ -9497,10 +9503,8 @@ void trigger_load_balance(struct rq *rq) if (time_after_eq(jiffies, rq->next_balance)) raise_softirq(SCHED_SOFTIRQ); -#ifdef CONFIG_NO_HZ_COMMON - if (nohz_kick_needed(rq)) - nohz_balancer_kick(); -#endif + + nohz_balancer_kick(rq); } static void rq_online_fair(struct rq *rq) -- cgit v1.2.3