summaryrefslogtreecommitdiffstats
path: root/kernel/workqueue.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/workqueue.c')
-rw-r--r--kernel/workqueue.c86
1 files changed, 72 insertions, 14 deletions
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 61a0264e28f9..2232ae3e3ad6 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -301,11 +301,26 @@ static DEFINE_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */
static LIST_HEAD(workqueues); /* PR: list of all workqueues */
static bool workqueue_freezing; /* PL: have wqs started freezing? */
-static cpumask_var_t wq_unbound_cpumask; /* PL: low level cpumask for all unbound wqs */
+/* PL: allowable cpus for unbound wqs and work items */
+static cpumask_var_t wq_unbound_cpumask;
+
+/* CPU where unbound work was last round robin scheduled from this CPU */
+static DEFINE_PER_CPU(int, wq_rr_cpu_last);
+
+/*
+ * Local execution of unbound work items is no longer guaranteed. The
+ * following always forces round-robin CPU selection on unbound work items
+ * to uncover usages which depend on it.
+ */
+#ifdef CONFIG_DEBUG_WQ_FORCE_RR_CPU
+static bool wq_debug_force_rr_cpu = true;
+#else
+static bool wq_debug_force_rr_cpu = false;
+#endif
+module_param_named(debug_force_rr_cpu, wq_debug_force_rr_cpu, bool, 0644);
/* the per-cpu worker pools */
-static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
- cpu_worker_pools);
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS], cpu_worker_pools);
static DEFINE_IDR(worker_pool_idr); /* PR: idr of all pools */
@@ -570,6 +585,16 @@ static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq,
int node)
{
assert_rcu_or_wq_mutex_or_pool_mutex(wq);
+
+ /*
+ * XXX: @node can be NUMA_NO_NODE if CPU goes offline while a
+ * delayed item is pending. The plan is to keep CPU -> NODE
+ * mapping valid and stable across CPU on/offlines. Once that
+ * happens, this workaround can be removed.
+ */
+ if (unlikely(node == NUMA_NO_NODE))
+ return wq->dfl_pwq;
+
return rcu_dereference_raw(wq->numa_pwq_tbl[node]);
}
@@ -832,7 +857,6 @@ void wq_worker_waking_up(struct task_struct *task, int cpu)
/**
* wq_worker_sleeping - a worker is going to sleep
* @task: task going to sleep
- * @cpu: CPU in question, must be the current CPU number
*
* This function is called during schedule() when a busy worker is
* going to sleep. Worker on the same cpu can be woken up by
@@ -844,7 +868,7 @@ void wq_worker_waking_up(struct task_struct *task, int cpu)
* Return:
* Worker task on @cpu to wake up, %NULL if none.
*/
-struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu)
+struct task_struct *wq_worker_sleeping(struct task_struct *task)
{
struct worker *worker = kthread_data(task), *to_wakeup = NULL;
struct worker_pool *pool;
@@ -860,7 +884,7 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu)
pool = worker->pool;
/* this can only happen on the local cpu */
- if (WARN_ON_ONCE(cpu != raw_smp_processor_id() || pool->cpu != cpu))
+ if (WARN_ON_ONCE(pool->cpu != raw_smp_processor_id()))
return NULL;
/*
@@ -1298,6 +1322,39 @@ static bool is_chained_work(struct workqueue_struct *wq)
return worker && worker->current_pwq->wq == wq;
}
+/*
+ * When queueing an unbound work item to a wq, prefer local CPU if allowed
+ * by wq_unbound_cpumask. Otherwise, round robin among the allowed ones to
+ * avoid perturbing sensitive tasks.
+ */
+static int wq_select_unbound_cpu(int cpu)
+{
+ static bool printed_dbg_warning;
+ int new_cpu;
+
+ if (likely(!wq_debug_force_rr_cpu)) {
+ if (cpumask_test_cpu(cpu, wq_unbound_cpumask))
+ return cpu;
+ } else if (!printed_dbg_warning) {
+ pr_warn("workqueue: round-robin CPU selection forced, expect performance impact\n");
+ printed_dbg_warning = true;
+ }
+
+ if (cpumask_empty(wq_unbound_cpumask))
+ return cpu;
+
+ new_cpu = __this_cpu_read(wq_rr_cpu_last);
+ new_cpu = cpumask_next_and(new_cpu, wq_unbound_cpumask, cpu_online_mask);
+ if (unlikely(new_cpu >= nr_cpu_ids)) {
+ new_cpu = cpumask_first_and(wq_unbound_cpumask, cpu_online_mask);
+ if (unlikely(new_cpu >= nr_cpu_ids))
+ return cpu;
+ }
+ __this_cpu_write(wq_rr_cpu_last, new_cpu);
+
+ return new_cpu;
+}
+
static void __queue_work(int cpu, struct workqueue_struct *wq,
struct work_struct *work)
{
@@ -1323,7 +1380,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
return;
retry:
if (req_cpu == WORK_CPU_UNBOUND)
- cpu = raw_smp_processor_id();
+ cpu = wq_select_unbound_cpu(raw_smp_processor_id());
/* pwq which will be used unless @work is executing elsewhere */
if (!(wq->flags & WQ_UNBOUND))
@@ -1464,13 +1521,13 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
timer_stats_timer_set_start_info(&dwork->timer);
dwork->wq = wq;
- /* timer isn't guaranteed to run in this cpu, record earlier */
- if (cpu == WORK_CPU_UNBOUND)
- cpu = raw_smp_processor_id();
dwork->cpu = cpu;
timer->expires = jiffies + delay;
- add_timer_on(timer, cpu);
+ if (unlikely(cpu != WORK_CPU_UNBOUND))
+ add_timer_on(timer, cpu);
+ else
+ add_timer(timer);
}
/**
@@ -2355,7 +2412,8 @@ static void check_flush_dependency(struct workqueue_struct *target_wq,
WARN_ONCE(current->flags & PF_MEMALLOC,
"workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%pf",
current->pid, current->comm, target_wq->name, target_func);
- WARN_ONCE(worker && (worker->current_pwq->wq->flags & WQ_MEM_RECLAIM),
+ WARN_ONCE(worker && ((worker->current_pwq->wq->flags &
+ (WQ_MEM_RECLAIM | __WQ_LEGACY)) == WQ_MEM_RECLAIM),
"workqueue: WQ_MEM_RECLAIM %s:%pf is flushing !WQ_MEM_RECLAIM %s:%pf",
worker->current_pwq->wq->name, worker->current_func,
target_wq->name, target_func);
@@ -4636,7 +4694,7 @@ static void work_for_cpu_fn(struct work_struct *work)
}
/**
- * work_on_cpu - run a function in user context on a particular cpu
+ * work_on_cpu - run a function in thread context on a particular cpu
* @cpu: the cpu to run on
* @fn: the function to run
* @arg: the function arg
@@ -5162,8 +5220,8 @@ int workqueue_sysfs_register(struct workqueue_struct *wq)
wq_dev->wq = wq;
wq_dev->dev.bus = &wq_subsys;
- wq_dev->dev.init_name = wq->name;
wq_dev->dev.release = wq_device_release;
+ dev_set_name(&wq_dev->dev, "%s", wq->name);
/*
* unbound_attrs are created separately. Suppress uevent until