diff options
-rw-r--r-- | Documentation/scheduler/sched-stats.txt | 3 | ||||
-rw-r--r-- | block/blk-softirq.c | 16 | ||||
-rw-r--r-- | block/blk.h | 16 | ||||
-rw-r--r-- | include/linux/init_task.h | 2 | ||||
-rw-r--r-- | include/linux/sched.h | 30 | ||||
-rw-r--r-- | kernel/sched/core.c | 6 | ||||
-rw-r--r-- | kernel/sched/debug.c | 1 | ||||
-rw-r--r-- | kernel/sched/fair.c | 28 | ||||
-rw-r--r-- | kernel/sched/rt.c | 4 | ||||
-rw-r--r-- | kernel/sched/sched.h | 5 | ||||
-rw-r--r-- | kernel/sched/stats.c | 4 |
11 files changed, 58 insertions, 57 deletions
diff --git a/Documentation/scheduler/sched-stats.txt b/Documentation/scheduler/sched-stats.txt index 1cd5d51bc761..8259b34a66ae 100644 --- a/Documentation/scheduler/sched-stats.txt +++ b/Documentation/scheduler/sched-stats.txt @@ -38,7 +38,8 @@ First field is a sched_yield() statistic: 1) # of times sched_yield() was called Next three are schedule() statistics: - 2) # of times we switched to the expired queue and reused it + 2) This field is a legacy array expiration count field used in the O(1) + scheduler. We kept it for ABI compatibility, but it is always set to zero. 3) # of times schedule() was called 4) # of times schedule() left the processor idle diff --git a/block/blk-softirq.c b/block/blk-softirq.c index 1366a89d8e66..467c8de88642 100644 --- a/block/blk-softirq.c +++ b/block/blk-softirq.c @@ -8,6 +8,7 @@ #include <linux/blkdev.h> #include <linux/interrupt.h> #include <linux/cpu.h> +#include <linux/sched.h> #include "blk.h" @@ -103,9 +104,10 @@ static struct notifier_block __cpuinitdata blk_cpu_notifier = { void __blk_complete_request(struct request *req) { - int ccpu, cpu, group_cpu = NR_CPUS; + int ccpu, cpu; struct request_queue *q = req->q; unsigned long flags; + bool shared = false; BUG_ON(!q->softirq_done_fn); @@ -117,22 +119,20 @@ void __blk_complete_request(struct request *req) */ if (req->cpu != -1) { ccpu = req->cpu; - if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags)) { - ccpu = blk_cpu_to_group(ccpu); - group_cpu = blk_cpu_to_group(cpu); - } + if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags)) + shared = cpus_share_cache(cpu, ccpu); } else ccpu = cpu; /* - * If current CPU and requested CPU are in the same group, running - * softirq in current CPU. One might concern this is just like + * If current CPU and requested CPU share a cache, run the softirq on + * the current CPU. One might concern this is just like * QUEUE_FLAG_SAME_FORCE, but actually not. blk_complete_request() is * running in interrupt handler, and currently I/O controller doesn't * support multiple interrupts, so current CPU is unique actually. This * avoids IPI sending from current CPU to the first CPU of a group. */ - if (ccpu == cpu || ccpu == group_cpu) { + if (ccpu == cpu || shared) { struct list_head *list; do_local: list = &__get_cpu_var(blk_cpu_done); diff --git a/block/blk.h b/block/blk.h index 9c12f80882b0..d45be871329e 100644 --- a/block/blk.h +++ b/block/blk.h @@ -166,22 +166,6 @@ static inline int queue_congestion_off_threshold(struct request_queue *q) return q->nr_congestion_off; } -static inline int blk_cpu_to_group(int cpu) -{ - int group = NR_CPUS; -#ifdef CONFIG_SCHED_MC - const struct cpumask *mask = cpu_coregroup_mask(cpu); - group = cpumask_first(mask); -#elif defined(CONFIG_SCHED_SMT) - group = cpumask_first(topology_thread_cpumask(cpu)); -#else - return cpu; -#endif - if (likely(group < NR_CPUS)) - return group; - return cpu; -} - /* * Contribute to IO statistics IFF: * diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 9c66b1ada9d7..f994d51f70f2 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -149,7 +149,7 @@ extern struct cred init_cred; }, \ .rt = { \ .run_list = LIST_HEAD_INIT(tsk.rt.run_list), \ - .time_slice = HZ, \ + .time_slice = RR_TIMESLICE, \ .nr_cpus_allowed = NR_CPUS, \ }, \ .tasks = LIST_HEAD_INIT(tsk.tasks), \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 7d379a6bfd88..1a6398424fab 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -905,6 +905,7 @@ struct sched_group_power { * single CPU. */ unsigned int power, power_orig; + unsigned long next_update; /* * Number of busy cpus in this group. */ @@ -1052,6 +1053,8 @@ static inline int test_sd_parent(struct sched_domain *sd, int flag) unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu); unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu); +bool cpus_share_cache(int this_cpu, int that_cpu); + #else /* CONFIG_SMP */ struct sched_domain_attr; @@ -1061,6 +1064,12 @@ partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], struct sched_domain_attr *dattr_new) { } + +static inline bool cpus_share_cache(int this_cpu, int that_cpu) +{ + return true; +} + #endif /* !CONFIG_SMP */ @@ -1225,6 +1234,12 @@ struct sched_rt_entity { #endif }; +/* + * default timeslice is 100 msecs (used only for SCHED_RR tasks). + * Timeslices get refilled after they expire. + */ +#define RR_TIMESLICE (100 * HZ / 1000) + struct rcu_node; enum perf_event_task_context { @@ -2390,12 +2405,15 @@ static inline void task_unlock(struct task_struct *p) extern struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, unsigned long *flags); -#define lock_task_sighand(tsk, flags) \ -({ struct sighand_struct *__ss; \ - __cond_lock(&(tsk)->sighand->siglock, \ - (__ss = __lock_task_sighand(tsk, flags))); \ - __ss; \ -}) \ +static inline struct sighand_struct *lock_task_sighand(struct task_struct *tsk, + unsigned long *flags) +{ + struct sighand_struct *ret; + + ret = __lock_task_sighand(tsk, flags); + (void)__cond_lock(&tsk->sighand->siglock, ret); + return ret; +} static inline void unlock_task_sighand(struct task_struct *tsk, unsigned long *flags) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index b342f57879e6..91fe6a0e9098 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1507,7 +1507,7 @@ static int ttwu_activate_remote(struct task_struct *p, int wake_flags) } #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ -static inline int ttwu_share_cache(int this_cpu, int that_cpu) +bool cpus_share_cache(int this_cpu, int that_cpu) { return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu); } @@ -1518,7 +1518,7 @@ static void ttwu_queue(struct task_struct *p, int cpu) struct rq *rq = cpu_rq(cpu); #if defined(CONFIG_SMP) - if (sched_feat(TTWU_QUEUE) && !ttwu_share_cache(smp_processor_id(), cpu)) { + if (sched_feat(TTWU_QUEUE) && !cpus_share_cache(smp_processor_id(), cpu)) { sched_clock_cpu(cpu); /* sync clocks x-cpu */ ttwu_queue_remote(p, cpu); return; @@ -5753,7 +5753,7 @@ static void destroy_sched_domains(struct sched_domain *sd, int cpu) * * Also keep a unique ID per domain (we use the first cpu number in * the cpumask of the domain), this allows us to quickly tell if - * two cpus are in the same cache domain, see ttwu_share_cache(). + * two cpus are in the same cache domain, see cpus_share_cache(). */ DEFINE_PER_CPU(struct sched_domain *, sd_llc); DEFINE_PER_CPU(int, sd_llc_id); diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 2a075e10004b..09acaa15161d 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -288,7 +288,6 @@ static void print_cpu(struct seq_file *m, int cpu) P(yld_count); - P(sched_switch); P(sched_count); P(sched_goidle); #ifdef CONFIG_SMP diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index aca16b843b7e..79e9e13c31ab 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2672,8 +2672,6 @@ static int select_idle_sibling(struct task_struct *p, int target) /* * Otherwise, iterate the domains and find an elegible idle cpu. */ - rcu_read_lock(); - sd = rcu_dereference(per_cpu(sd_llc, target)); for_each_lower_domain(sd) { sg = sd->groups; @@ -2695,8 +2693,6 @@ next: } while (sg != sd->groups); } done: - rcu_read_unlock(); - return target; } @@ -3086,6 +3082,8 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preemp * Fair scheduling class load-balancing methods: */ +static unsigned long __read_mostly max_load_balance_interval = HZ/10; + /* * pull_task - move a task from a remote runqueue to the local runqueue. * Both runqueues must be locked. @@ -3778,6 +3776,11 @@ void update_group_power(struct sched_domain *sd, int cpu) struct sched_domain *child = sd->child; struct sched_group *group, *sdg = sd->groups; unsigned long power; + unsigned long interval; + + interval = msecs_to_jiffies(sd->balance_interval); + interval = clamp(interval, 1UL, max_load_balance_interval); + sdg->sgp->next_update = jiffies + interval; if (!child) { update_cpu_power(sd, cpu); @@ -3885,12 +3888,15 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, * domains. In the newly idle case, we will allow all the cpu's * to do the newly idle load balance. */ - if (idle != CPU_NEWLY_IDLE && local_group) { - if (balance_cpu != this_cpu) { - *balance = 0; - return; - } - update_group_power(sd, this_cpu); + if (local_group) { + if (idle != CPU_NEWLY_IDLE) { + if (balance_cpu != this_cpu) { + *balance = 0; + return; + } + update_group_power(sd, this_cpu); + } else if (time_after_eq(jiffies, group->sgp->next_update)) + update_group_power(sd, this_cpu); } /* Adjust by relative CPU power of the group */ @@ -4947,8 +4953,6 @@ static int __cpuinit sched_ilb_notifier(struct notifier_block *nfb, static DEFINE_SPINLOCK(balancing); -static unsigned long __read_mostly max_load_balance_interval = HZ/10; - /* * Scale the max load_balance interval with the number of CPUs in the system. * This trades load-balance latency on larger machines for less cross talk. diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index f42ae7fb5ec5..f70206c2c802 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1972,7 +1972,7 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) if (--p->rt.time_slice) return; - p->rt.time_slice = DEF_TIMESLICE; + p->rt.time_slice = RR_TIMESLICE; /* * Requeue to the end of queue if we are not the only element @@ -2000,7 +2000,7 @@ static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task) * Time slice is 0 for SCHED_FIFO tasks */ if (task->policy == SCHED_RR) - return DEF_TIMESLICE; + return RR_TIMESLICE; else return 0; } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 98c0c2623db8..c0660a1a0cd1 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -36,11 +36,7 @@ extern __read_mostly int scheduler_running; /* * These are the 'tuning knobs' of the scheduler: - * - * default timeslice is 100 msecs (used only for SCHED_RR tasks). - * Timeslices get refilled after they expire. */ -#define DEF_TIMESLICE (100 * HZ / 1000) /* * single value that denotes runtime == period, ie unlimited time. @@ -462,7 +458,6 @@ struct rq { unsigned int yld_count; /* schedule() stats */ - unsigned int sched_switch; unsigned int sched_count; unsigned int sched_goidle; diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c index 2a581ba8e190..903ffa9e8872 100644 --- a/kernel/sched/stats.c +++ b/kernel/sched/stats.c @@ -32,9 +32,9 @@ static int show_schedstat(struct seq_file *seq, void *v) /* runqueue-specific stats */ seq_printf(seq, - "cpu%d %u %u %u %u %u %u %llu %llu %lu", + "cpu%d %u 0 %u %u %u %u %llu %llu %lu", cpu, rq->yld_count, - rq->sched_switch, rq->sched_count, rq->sched_goidle, + rq->sched_count, rq->sched_goidle, rq->ttwu_count, rq->ttwu_local, rq->rq_cpu_time, rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount); |