summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Kconfig.hz2
-rw-r--r--kernel/audit.c13
-rw-r--r--kernel/auditfilter.c10
-rw-r--r--kernel/auditsc.c11
-rw-r--r--kernel/cgroup.c39
-rw-r--r--kernel/cpu.c7
-rw-r--r--kernel/cpuset.c71
-rw-r--r--kernel/dma-coherent.c15
-rw-r--r--kernel/exit.c6
-rw-r--r--kernel/irq/manage.c3
-rw-r--r--kernel/irq/proc.c96
-rw-r--r--kernel/kgdb.c94
-rw-r--r--kernel/lockdep.c295
-rw-r--r--kernel/lockdep_internals.h6
-rw-r--r--kernel/lockdep_proc.c37
-rw-r--r--kernel/marker.c12
-rw-r--r--kernel/module.c2
-rw-r--r--kernel/mutex.c1
-rw-r--r--kernel/pm_qos_params.c16
-rw-r--r--kernel/posix-timers.c19
-rw-r--r--kernel/printk.c8
-rw-r--r--kernel/relay.c12
-rw-r--r--kernel/resource.c2
-rw-r--r--kernel/sched.c67
-rw-r--r--kernel/sched_clock.c178
-rw-r--r--kernel/sched_fair.c21
-rw-r--r--kernel/sched_rt.c8
-rw-r--r--kernel/semaphore.c4
-rw-r--r--kernel/signal.c1
-rw-r--r--kernel/smp.c58
-rw-r--r--kernel/spinlock.c11
-rw-r--r--kernel/stop_machine.c1
-rw-r--r--kernel/time/tick-sched.c2
-rw-r--r--kernel/workqueue.c37
34 files changed, 724 insertions, 441 deletions
diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz
index 382dd5a8b2d7..94fabd534b03 100644
--- a/kernel/Kconfig.hz
+++ b/kernel/Kconfig.hz
@@ -55,4 +55,4 @@ config HZ
default 1000 if HZ_1000
config SCHED_HRTICK
- def_bool HIGH_RES_TIMERS && USE_GENERIC_SMP_HELPERS
+ def_bool HIGH_RES_TIMERS && (!SMP || USE_GENERIC_SMP_HELPERS)
diff --git a/kernel/audit.c b/kernel/audit.c
index e092f1c0ce30..4414e93d8750 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -707,12 +707,14 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
if (status_get->mask & AUDIT_STATUS_ENABLED) {
err = audit_set_enabled(status_get->enabled,
loginuid, sessionid, sid);
- if (err < 0) return err;
+ if (err < 0)
+ return err;
}
if (status_get->mask & AUDIT_STATUS_FAILURE) {
err = audit_set_failure(status_get->failure,
loginuid, sessionid, sid);
- if (err < 0) return err;
+ if (err < 0)
+ return err;
}
if (status_get->mask & AUDIT_STATUS_PID) {
int new_pid = status_get->pid;
@@ -725,9 +727,12 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
audit_pid = new_pid;
audit_nlk_pid = NETLINK_CB(skb).pid;
}
- if (status_get->mask & AUDIT_STATUS_RATE_LIMIT)
+ if (status_get->mask & AUDIT_STATUS_RATE_LIMIT) {
err = audit_set_rate_limit(status_get->rate_limit,
loginuid, sessionid, sid);
+ if (err < 0)
+ return err;
+ }
if (status_get->mask & AUDIT_STATUS_BACKLOG_LIMIT)
err = audit_set_backlog_limit(status_get->backlog_limit,
loginuid, sessionid, sid);
@@ -1366,7 +1371,7 @@ int audit_string_contains_control(const char *string, size_t len)
{
const unsigned char *p;
for (p = string; p < (const unsigned char *)string + len && *p; p++) {
- if (*p == '"' || *p < 0x21 || *p > 0x7f)
+ if (*p == '"' || *p < 0x21 || *p > 0x7e)
return 1;
}
return 0;
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 98c50cc671bb..b7d354e2b0ef 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -1022,8 +1022,11 @@ static void audit_update_watch(struct audit_parent *parent,
struct audit_buffer *ab;
ab = audit_log_start(NULL, GFP_KERNEL,
AUDIT_CONFIG_CHANGE);
+ audit_log_format(ab, "auid=%u ses=%u",
+ audit_get_loginuid(current),
+ audit_get_sessionid(current));
audit_log_format(ab,
- "op=updated rules specifying path=");
+ " op=updated rules specifying path=");
audit_log_untrustedstring(ab, owatch->path);
audit_log_format(ab, " with dev=%u ino=%lu\n",
dev, ino);
@@ -1058,7 +1061,10 @@ static void audit_remove_parent_watches(struct audit_parent *parent)
struct audit_buffer *ab;
ab = audit_log_start(NULL, GFP_KERNEL,
AUDIT_CONFIG_CHANGE);
- audit_log_format(ab, "op=remove rule path=");
+ audit_log_format(ab, "auid=%u ses=%u",
+ audit_get_loginuid(current),
+ audit_get_sessionid(current));
+ audit_log_format(ab, " op=remove rule path=");
audit_log_untrustedstring(ab, w->path);
if (r->filterkey) {
audit_log_format(ab, " key=");
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 4699950e65bd..972f8e61d36a 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -243,6 +243,9 @@ static inline int open_arg(int flags, int mask)
static int audit_match_perm(struct audit_context *ctx, int mask)
{
+ if (unlikely(!ctx))
+ return 0;
+
unsigned n = ctx->major;
switch (audit_classify_syscall(ctx->arch, n)) {
case 0: /* native */
@@ -284,6 +287,10 @@ static int audit_match_filetype(struct audit_context *ctx, int which)
{
unsigned index = which & ~S_IFMT;
mode_t mode = which & S_IFMT;
+
+ if (unlikely(!ctx))
+ return 0;
+
if (index >= ctx->name_count)
return 0;
if (ctx->names[index].ino == -1)
@@ -610,7 +617,7 @@ static int audit_filter_rules(struct task_struct *tsk,
if (!result)
return 0;
}
- if (rule->filterkey)
+ if (rule->filterkey && ctx)
ctx->filterkey = kstrdup(rule->filterkey, GFP_ATOMIC);
switch (rule->action) {
case AUDIT_NEVER: *state = AUDIT_DISABLED; break;
@@ -2375,7 +2382,7 @@ int __audit_signal_info(int sig, struct task_struct *t)
struct audit_context *ctx = tsk->audit_context;
if (audit_pid && t->tgid == audit_pid) {
- if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1) {
+ if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1 || sig == SIGUSR2) {
audit_sig_pid = tsk->pid;
if (tsk->loginuid != -1)
audit_sig_uid = tsk->loginuid;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 657f8f8d93a5..13932abde159 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -355,6 +355,17 @@ static struct css_set *find_existing_css_set(
return NULL;
}
+static void free_cg_links(struct list_head *tmp)
+{
+ struct cg_cgroup_link *link;
+ struct cg_cgroup_link *saved_link;
+
+ list_for_each_entry_safe(link, saved_link, tmp, cgrp_link_list) {
+ list_del(&link->cgrp_link_list);
+ kfree(link);
+ }
+}
+
/*
* allocate_cg_links() allocates "count" cg_cgroup_link structures
* and chains them on tmp through their cgrp_link_list fields. Returns 0 on
@@ -363,17 +374,12 @@ static struct css_set *find_existing_css_set(
static int allocate_cg_links(int count, struct list_head *tmp)
{
struct cg_cgroup_link *link;
- struct cg_cgroup_link *saved_link;
int i;
INIT_LIST_HEAD(tmp);
for (i = 0; i < count; i++) {
link = kmalloc(sizeof(*link), GFP_KERNEL);
if (!link) {
- list_for_each_entry_safe(link, saved_link, tmp,
- cgrp_link_list) {
- list_del(&link->cgrp_link_list);
- kfree(link);
- }
+ free_cg_links(tmp);
return -ENOMEM;
}
list_add(&link->cgrp_link_list, tmp);
@@ -381,17 +387,6 @@ static int allocate_cg_links(int count, struct list_head *tmp)
return 0;
}
-static void free_cg_links(struct list_head *tmp)
-{
- struct cg_cgroup_link *link;
- struct cg_cgroup_link *saved_link;
-
- list_for_each_entry_safe(link, saved_link, tmp, cgrp_link_list) {
- list_del(&link->cgrp_link_list);
- kfree(link);
- }
-}
-
/*
* find_css_set() takes an existing cgroup group and a
* cgroup object, and returns a css_set object that's
@@ -956,7 +951,6 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
struct super_block *sb;
struct cgroupfs_root *root;
struct list_head tmp_cg_links;
- INIT_LIST_HEAD(&tmp_cg_links);
/* First find the desired set of subsystems */
ret = parse_cgroupfs_options(data, &opts);
@@ -1424,14 +1418,17 @@ static ssize_t cgroup_write_string(struct cgroup *cgrp, struct cftype *cft,
if (buffer == NULL)
return -ENOMEM;
}
- if (nbytes && copy_from_user(buffer, userbuf, nbytes))
- return -EFAULT;
+ if (nbytes && copy_from_user(buffer, userbuf, nbytes)) {
+ retval = -EFAULT;
+ goto out;
+ }
buffer[nbytes] = 0; /* nul-terminate */
strstrip(buffer);
retval = cft->write_string(cgrp, cft, buffer);
if (!retval)
retval = nbytes;
+out:
if (buffer != local_buffer)
kfree(buffer);
return retval;
@@ -2371,7 +2368,7 @@ static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode)
return cgroup_create(c_parent, dentry, mode | S_IFDIR);
}
-static inline int cgroup_has_css_refs(struct cgroup *cgrp)
+static int cgroup_has_css_refs(struct cgroup *cgrp)
{
/* Check the reference count on each subsystem. Since we
* already established that there are no tasks in the
diff --git a/kernel/cpu.c b/kernel/cpu.c
index e202a68d1cc1..f17e9854c246 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -349,6 +349,8 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
goto out_notify;
BUG_ON(!cpu_online(cpu));
+ cpu_set(cpu, cpu_active_map);
+
/* Now call notifier in preparation. */
raw_notifier_call_chain(&cpu_chain, CPU_ONLINE | mod, hcpu);
@@ -367,7 +369,7 @@ int __cpuinit cpu_up(unsigned int cpu)
if (!cpu_isset(cpu, cpu_possible_map)) {
printk(KERN_ERR "can't online cpu %d because it is not "
"configured as may-hotadd at boot time\n", cpu);
-#if defined(CONFIG_IA64) || defined(CONFIG_X86_64) || defined(CONFIG_S390)
+#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
printk(KERN_ERR "please check additional_cpus= boot "
"parameter\n");
#endif
@@ -383,9 +385,6 @@ int __cpuinit cpu_up(unsigned int cpu)
err = _cpu_up(cpu, 0);
- if (cpu_online(cpu))
- cpu_set(cpu, cpu_active_map);
-
out:
cpu_maps_update_done();
return err;
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 91cf85b36dd5..d5ab79cf516d 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -54,7 +54,6 @@
#include <asm/uaccess.h>
#include <asm/atomic.h>
#include <linux/mutex.h>
-#include <linux/kfifo.h>
#include <linux/workqueue.h>
#include <linux/cgroup.h>
@@ -486,13 +485,38 @@ static int cpusets_overlap(struct cpuset *a, struct cpuset *b)
static void
update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c)
{
- if (!dattr)
- return;
if (dattr->relax_domain_level < c->relax_domain_level)
dattr->relax_domain_level = c->relax_domain_level;
return;
}
+static void
+update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c)
+{
+ LIST_HEAD(q);
+
+ list_add(&c->stack_list, &q);
+ while (!list_empty(&q)) {
+ struct cpuset *cp;
+ struct cgroup *cont;
+ struct cpuset *child;
+
+ cp = list_first_entry(&q, struct cpuset, stack_list);
+ list_del(q.next);
+
+ if (cpus_empty(cp->cpus_allowed))
+ continue;
+
+ if (is_sched_load_balance(cp))
+ update_domain_attr(dattr, cp);
+
+ list_for_each_entry(cont, &cp->css.cgroup->children, sibling) {
+ child = cgroup_cs(cont);
+ list_add_tail(&child->stack_list, &q);
+ }
+ }
+}
+
/*
* rebuild_sched_domains()
*
@@ -532,7 +556,7 @@ update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c)
* So the reverse nesting would risk an ABBA deadlock.
*
* The three key local variables below are:
- * q - a kfifo queue of cpuset pointers, used to implement a
+ * q - a linked-list queue of cpuset pointers, used to implement a
* top-down scan of all cpusets. This scan loads a pointer
* to each cpuset marked is_sched_load_balance into the
* array 'csa'. For our purposes, rebuilding the schedulers
@@ -567,7 +591,7 @@ update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c)
void rebuild_sched_domains(void)
{
- struct kfifo *q; /* queue of cpusets to be scanned */
+ LIST_HEAD(q); /* queue of cpusets to be scanned*/
struct cpuset *cp; /* scans q */
struct cpuset **csa; /* array of all cpuset ptrs */
int csn; /* how many cpuset ptrs in csa so far */
@@ -577,7 +601,6 @@ void rebuild_sched_domains(void)
int ndoms; /* number of sched domains in result */
int nslot; /* next empty doms[] cpumask_t slot */
- q = NULL;
csa = NULL;
doms = NULL;
dattr = NULL;
@@ -591,35 +614,42 @@ void rebuild_sched_domains(void)
dattr = kmalloc(sizeof(struct sched_domain_attr), GFP_KERNEL);
if (dattr) {
*dattr = SD_ATTR_INIT;
- update_domain_attr(dattr, &top_cpuset);
+ update_domain_attr_tree(dattr, &top_cpuset);
}
*doms = top_cpuset.cpus_allowed;
goto rebuild;
}
- q = kfifo_alloc(number_of_cpusets * sizeof(cp), GFP_KERNEL, NULL);
- if (IS_ERR(q))
- goto done;
csa = kmalloc(number_of_cpusets * sizeof(cp), GFP_KERNEL);
if (!csa)
goto done;
csn = 0;
- cp = &top_cpuset;
- __kfifo_put(q, (void *)&cp, sizeof(cp));
- while (__kfifo_get(q, (void *)&cp, sizeof(cp))) {
+ list_add(&top_cpuset.stack_list, &q);
+ while (!list_empty(&q)) {
struct cgroup *cont;
struct cpuset *child; /* scans child cpusets of cp */
+ cp = list_first_entry(&q, struct cpuset, stack_list);
+ list_del(q.next);
+
if (cpus_empty(cp->cpus_allowed))
continue;
- if (is_sched_load_balance(cp))
+ /*
+ * All child cpusets contain a subset of the parent's cpus, so
+ * just skip them, and then we call update_domain_attr_tree()
+ * to calc relax_domain_level of the corresponding sched
+ * domain.
+ */
+ if (is_sched_load_balance(cp)) {
csa[csn++] = cp;
+ continue;
+ }
list_for_each_entry(cont, &cp->css.cgroup->children, sibling) {
child = cgroup_cs(cont);
- __kfifo_put(q, (void *)&child, sizeof(cp));
+ list_add_tail(&child->stack_list, &q);
}
}
@@ -686,7 +716,7 @@ restart:
cpus_or(*dp, *dp, b->cpus_allowed);
b->pn = -1;
if (dattr)
- update_domain_attr(dattr
+ update_domain_attr_tree(dattr
+ nslot, b);
}
}
@@ -702,8 +732,6 @@ rebuild:
put_online_cpus();
done:
- if (q && !IS_ERR(q))
- kfifo_free(q);
kfree(csa);
/* Don't kfree(doms) -- partition_sched_domains() does that. */
/* Don't kfree(dattr) -- partition_sched_domains() does that. */
@@ -1833,24 +1861,21 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
*/
static void scan_for_empty_cpusets(const struct cpuset *root)
{
+ LIST_HEAD(queue);
struct cpuset *cp; /* scans cpusets being updated */
struct cpuset *child; /* scans child cpusets of cp */
- struct list_head queue;
struct cgroup *cont;
nodemask_t oldmems;
- INIT_LIST_HEAD(&queue);
-
list_add_tail((struct list_head *)&root->stack_list, &queue);
while (!list_empty(&queue)) {
- cp = container_of(queue.next, struct cpuset, stack_list);
+ cp = list_first_entry(&queue, struct cpuset, stack_list);
list_del(queue.next);
list_for_each_entry(cont, &cp->css.cgroup->children, sibling) {
child = cgroup_cs(cont);
list_add_tail(&child->stack_list, &queue);
}
- cont = cp->css.cgroup;
/* Continue past cpusets with all cpus, mems online */
if (cpus_subset(cp->cpus_allowed, cpu_online_map) &&
diff --git a/kernel/dma-coherent.c b/kernel/dma-coherent.c
index 7517115a8cce..c1d4d5b4c61c 100644
--- a/kernel/dma-coherent.c
+++ b/kernel/dma-coherent.c
@@ -77,15 +77,14 @@ void *dma_mark_declared_memory_occupied(struct device *dev,
{
struct dma_coherent_mem *mem = dev->dma_mem;
int pos, err;
- int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1);
- pages >>= PAGE_SHIFT;
+ size += device_addr & ~PAGE_MASK;
if (!mem)
return ERR_PTR(-EINVAL);
pos = (device_addr - mem->device_base) >> PAGE_SHIFT;
- err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages));
+ err = bitmap_allocate_region(mem->bitmap, pos, get_order(size));
if (err != 0)
return ERR_PTR(err);
return mem->virt_base + (pos << PAGE_SHIFT);
@@ -93,7 +92,7 @@ void *dma_mark_declared_memory_occupied(struct device *dev,
EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
/**
- * Try to allocate memory from the per-device coherent area.
+ * dma_alloc_from_coherent() - try to allocate memory from the per-device coherent area
*
* @dev: device from which we allocate memory
* @size: size of requested memory area
@@ -101,11 +100,11 @@ EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
* @ret: This pointer will be filled with the virtual address
* to allocated area.
*
- * This function should be only called from per-arch %dma_alloc_coherent()
+ * This function should be only called from per-arch dma_alloc_coherent()
* to support allocation from per-device coherent memory pools.
*
* Returns 0 if dma_alloc_coherent should continue with allocating from
- * generic memory areas, or !0 if dma_alloc_coherent should return %ret.
+ * generic memory areas, or !0 if dma_alloc_coherent should return @ret.
*/
int dma_alloc_from_coherent(struct device *dev, ssize_t size,
dma_addr_t *dma_handle, void **ret)
@@ -127,7 +126,7 @@ int dma_alloc_from_coherent(struct device *dev, ssize_t size,
}
/**
- * Try to free the memory allocated from per-device coherent memory pool.
+ * dma_release_from_coherent() - try to free the memory allocated from per-device coherent memory pool
* @dev: device from which the memory was allocated
* @order: the order of pages allocated
* @vaddr: virtual address of allocated pages
@@ -136,7 +135,7 @@ int dma_alloc_from_coherent(struct device *dev, ssize_t size,
* coherent memory pool and if so, releases that memory.
*
* Returns 1 if we correctly released the memory, or 0 if
- * %dma_release_coherent() should proceed with releasing memory from
+ * dma_release_coherent() should proceed with releasing memory from
* generic pools.
*/
int dma_release_from_coherent(struct device *dev, int order, void *vaddr)
diff --git a/kernel/exit.c b/kernel/exit.c
index eb4d6470d1d0..38ec40630149 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -911,10 +911,10 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
tsk->exit_signal = SIGCHLD;
signal = tracehook_notify_death(tsk, &cookie, group_dead);
- if (signal > 0)
+ if (signal >= 0)
signal = do_notify_parent(tsk, signal);
- tsk->exit_state = signal < 0 ? EXIT_DEAD : EXIT_ZOMBIE;
+ tsk->exit_state = signal == DEATH_REAP ? EXIT_DEAD : EXIT_ZOMBIE;
/* mt-exec, de_thread() is waiting for us */
if (thread_group_leader(tsk) &&
@@ -927,7 +927,7 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
tracehook_report_death(tsk, signal, cookie, group_dead);
/* If the process is dead, release it - nobody will wait for it */
- if (signal < 0)
+ if (signal == DEATH_REAP)
release_task(tsk);
}
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 22d10d3189f8..60c49e324390 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -330,7 +330,8 @@ static int __irq_set_trigger(struct irq_chip *chip, unsigned int irq,
ret = chip->set_type(irq, flags & IRQF_TRIGGER_MASK);
if (ret)
- pr_err("setting flow type for irq %u failed (%pF)\n",
+ pr_err("setting trigger mode %d for irq %u failed (%pF)\n",
+ (int)(flags & IRQF_TRIGGER_MASK),
irq, chip->set_type);
return ret;
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 6c6d35d68ee9..a09dd29c2fd7 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -8,6 +8,7 @@
#include <linux/irq.h>
#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
#include <linux/interrupt.h>
#include "internals.h"
@@ -16,23 +17,18 @@ static struct proc_dir_entry *root_irq_dir;
#ifdef CONFIG_SMP
-static int irq_affinity_read_proc(char *page, char **start, off_t off,
- int count, int *eof, void *data)
+static int irq_affinity_proc_show(struct seq_file *m, void *v)
{
- struct irq_desc *desc = irq_desc + (long)data;
+ struct irq_desc *desc = irq_desc + (long)m->private;
cpumask_t *mask = &desc->affinity;
- int len;
#ifdef CONFIG_GENERIC_PENDING_IRQ
if (desc->status & IRQ_MOVE_PENDING)
mask = &desc->pending_mask;
#endif
- len = cpumask_scnprintf(page, count, *mask);
-
- if (count - len < 2)
- return -EINVAL;
- len += sprintf(page + len, "\n");
- return len;
+ seq_cpumask(m, mask);
+ seq_putc(m, '\n');
+ return 0;
}
#ifndef is_affinity_mask_valid
@@ -40,11 +36,12 @@ static int irq_affinity_read_proc(char *page, char **start, off_t off,
#endif
int no_irq_affinity;
-static int irq_affinity_write_proc(struct file *file, const char __user *buffer,
- unsigned long count, void *data)
+static ssize_t irq_affinity_proc_write(struct file *file,
+ const char __user *buffer, size_t count, loff_t *pos)
{
- unsigned int irq = (int)(long)data, full_count = count, err;
+ unsigned int irq = (int)(long)PDE(file->f_path.dentry->d_inode)->data;
cpumask_t new_value;
+ int err;
if (!irq_desc[irq].chip->set_affinity || no_irq_affinity ||
irq_balancing_disabled(irq))
@@ -65,28 +62,38 @@ static int irq_affinity_write_proc(struct file *file, const char __user *buffer,
if (!cpus_intersects(new_value, cpu_online_map))
/* Special case for empty set - allow the architecture
code to set default SMP affinity. */
- return irq_select_affinity(irq) ? -EINVAL : full_count;
+ return irq_select_affinity(irq) ? -EINVAL : count;
irq_set_affinity(irq, new_value);
- return full_count;
+ return count;
}
-static int default_affinity_read(char *page, char **start, off_t off,
- int count, int *eof, void *data)
+static int irq_affinity_proc_open(struct inode *inode, struct file *file)
{
- int len = cpumask_scnprintf(page, count, irq_default_affinity);
- if (count - len < 2)
- return -EINVAL;
- len += sprintf(page + len, "\n");
- return len;
+ return single_open(file, irq_affinity_proc_show, PDE(inode)->data);
}
-static int default_affinity_write(struct file *file, const char __user *buffer,
- unsigned long count, void *data)
+static const struct file_operations irq_affinity_proc_fops = {
+ .open = irq_affinity_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+ .write = irq_affinity_proc_write,
+};
+
+static int default_affinity_show(struct seq_file *m, void *v)
+{
+ seq_cpumask(m, &irq_default_affinity);
+ seq_putc(m, '\n');
+ return 0;
+}
+
+static ssize_t default_affinity_write(struct file *file,
+ const char __user *buffer, size_t count, loff_t *ppos)
{
- unsigned int full_count = count, err;
cpumask_t new_value;
+ int err;
err = cpumask_parse_user(buffer, count, new_value);
if (err)
@@ -105,8 +112,21 @@ static int default_affinity_write(struct file *file, const char __user *buffer,
irq_default_affinity = new_value;
- return full_count;
+ return count;
}
+
+static int default_affinity_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, default_affinity_show, NULL);
+}
+
+static const struct file_operations default_affinity_proc_fops = {
+ .open = default_affinity_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+ .write = default_affinity_write,
+};
#endif
static int irq_spurious_read(char *page, char **start, off_t off,
@@ -178,16 +198,9 @@ void register_irq_proc(unsigned int irq)
irq_desc[irq].dir = proc_mkdir(name, root_irq_dir);
#ifdef CONFIG_SMP
- {
- /* create /proc/irq/<irq>/smp_affinity */
- entry = create_proc_entry("smp_affinity", 0600, irq_desc[irq].dir);
-
- if (entry) {
- entry->data = (void *)(long)irq;
- entry->read_proc = irq_affinity_read_proc;
- entry->write_proc = irq_affinity_write_proc;
- }
- }
+ /* create /proc/irq/<irq>/smp_affinity */
+ proc_create_data("smp_affinity", 0600, irq_desc[irq].dir,
+ &irq_affinity_proc_fops, (void *)(long)irq);
#endif
entry = create_proc_entry("spurious", 0444, irq_desc[irq].dir);
@@ -208,15 +221,8 @@ void unregister_handler_proc(unsigned int irq, struct irqaction *action)
void register_default_affinity_proc(void)
{
#ifdef CONFIG_SMP
- struct proc_dir_entry *entry;
-
- /* create /proc/irq/default_smp_affinity */
- entry = create_proc_entry("default_smp_affinity", 0600, root_irq_dir);
- if (entry) {
- entry->data = NULL;
- entry->read_proc = default_affinity_read;
- entry->write_proc = default_affinity_write;
- }
+ proc_create("irq/default_smp_affinity", 0600, NULL,
+ &default_affinity_proc_fops);
#endif
}
diff --git a/kernel/kgdb.c b/kernel/kgdb.c
index 3ec23c3ec97f..eaa21fc9ad1d 100644
--- a/kernel/kgdb.c
+++ b/kernel/kgdb.c
@@ -56,12 +56,14 @@
static int kgdb_break_asap;
+#define KGDB_MAX_THREAD_QUERY 17
struct kgdb_state {
int ex_vector;
int signo;
int err_code;
int cpu;
int pass_exception;
+ unsigned long thr_query;
unsigned long threadid;
long kgdb_usethreadid;
struct pt_regs *linux_regs;
@@ -166,13 +168,6 @@ early_param("nokgdbroundup", opt_nokgdbroundup);
* Weak aliases for breakpoint management,
* can be overriden by architectures when needed:
*/
-int __weak kgdb_validate_break_address(unsigned long addr)
-{
- char tmp_variable[BREAK_INSTR_SIZE];
-
- return probe_kernel_read(tmp_variable, (char *)addr, BREAK_INSTR_SIZE);
-}
-
int __weak kgdb_arch_set_breakpoint(unsigned long addr, char *saved_instr)
{
int err;
@@ -191,6 +186,25 @@ int __weak kgdb_arch_remove_breakpoint(unsigned long addr, char *bundle)
(char *)bundle, BREAK_INSTR_SIZE);
}
+int __weak kgdb_validate_break_address(unsigned long addr)
+{
+ char tmp_variable[BREAK_INSTR_SIZE];
+ int err;
+ /* Validate setting the breakpoint and then removing it. In the
+ * remove fails, the kernel needs to emit a bad message because we
+ * are deep trouble not being able to put things back the way we
+ * found them.
+ */
+ err = kgdb_arch_set_breakpoint(addr, tmp_variable);
+ if (err)
+ return err;
+ err = kgdb_arch_remove_breakpoint(addr, tmp_variable);
+ if (err)
+ printk(KERN_ERR "KGDB: Critical breakpoint error, kernel "
+ "memory destroyed at: %lx", addr);
+ return err;
+}
+
unsigned long __weak kgdb_arch_pc(int exception, struct pt_regs *regs)
{
return instruction_pointer(regs);
@@ -433,9 +447,14 @@ int kgdb_hex2long(char **ptr, unsigned long *long_val)
{
int hex_val;
int num = 0;
+ int negate = 0;
*long_val = 0;
+ if (**ptr == '-') {
+ negate = 1;
+ (*ptr)++;
+ }
while (**ptr) {
hex_val = hex(**ptr);
if (hex_val < 0)
@@ -446,6 +465,9 @@ int kgdb_hex2long(char **ptr, unsigned long *long_val)
(*ptr)++;
}
+ if (negate)
+ *long_val = -*long_val;
+
return num;
}
@@ -515,10 +537,16 @@ static void int_to_threadref(unsigned char *id, int value)
static struct task_struct *getthread(struct pt_regs *regs, int tid)
{
/*
- * Non-positive TIDs are remapped idle tasks:
+ * Non-positive TIDs are remapped to the cpu shadow information
*/
- if (tid <= 0)
- return idle_task(-tid);
+ if (tid == 0 || tid == -1)
+ tid = -atomic_read(&kgdb_active) - 2;
+ if (tid < 0) {
+ if (kgdb_info[-tid - 2].task)
+ return kgdb_info[-tid - 2].task;
+ else
+ return idle_task(-tid - 2);
+ }
/*
* find_task_by_pid_ns() does not take the tasklist lock anymore
@@ -725,14 +753,15 @@ setundefined:
}
/*
- * Remap normal tasks to their real PID, idle tasks to -1 ... -NR_CPUs:
+ * Remap normal tasks to their real PID,
+ * CPU shadow threads are mapped to -CPU - 2
*/
static inline int shadow_pid(int realpid)
{
if (realpid)
return realpid;
- return -1-raw_smp_processor_id();
+ return -raw_smp_processor_id() - 2;
}
static char gdbmsgbuf[BUFMAX + 1];
@@ -826,7 +855,7 @@ static void gdb_cmd_getregs(struct kgdb_state *ks)
local_debuggerinfo = kgdb_info[ks->cpu].debuggerinfo;
} else {
local_debuggerinfo = NULL;
- for (i = 0; i < NR_CPUS; i++) {
+ for_each_online_cpu(i) {
/*
* Try to find the task on some other
* or possibly this node if we do not
@@ -960,10 +989,13 @@ static int gdb_cmd_reboot(struct kgdb_state *ks)
/* Handle the 'q' query packets */
static void gdb_cmd_query(struct kgdb_state *ks)
{
- struct task_struct *thread;
+ struct task_struct *g;
+ struct task_struct *p;
unsigned char thref[8];
char *ptr;
int i;
+ int cpu;
+ int finished = 0;
switch (remcom_in_buffer[1]) {
case 's':
@@ -973,22 +1005,34 @@ static void gdb_cmd_query(struct kgdb_state *ks)
break;
}
- if (remcom_in_buffer[1] == 'f')
- ks->threadid = 1;
-
+ i = 0;
remcom_out_buffer[0] = 'm';
ptr = remcom_out_buffer + 1;
-
- for (i = 0; i < 17; ks->threadid++) {
- thread = getthread(ks->linux_regs, ks->threadid);
- if (thread) {
- int_to_threadref(thref, ks->threadid);
+ if (remcom_in_buffer[1] == 'f') {
+ /* Each cpu is a shadow thread */
+ for_each_online_cpu(cpu) {
+ ks->thr_query = 0;
+ int_to_threadref(thref, -cpu - 2);
pack_threadid(ptr, thref);
ptr += BUF_THREAD_ID_SIZE;
*(ptr++) = ',';
i++;
}
}
+
+ do_each_thread(g, p) {
+ if (i >= ks->thr_query && !finished) {
+ int_to_threadref(thref, p->pid);
+ pack_threadid(ptr, thref);
+ ptr += BUF_THREAD_ID_SIZE;
+ *(ptr++) = ',';
+ ks->thr_query++;
+ if (ks->thr_query % KGDB_MAX_THREAD_QUERY == 0)
+ finished = 1;
+ }
+ i++;
+ } while_each_thread(g, p);
+
*(--ptr) = '\0';
break;
@@ -1011,15 +1055,15 @@ static void gdb_cmd_query(struct kgdb_state *ks)
error_packet(remcom_out_buffer, -EINVAL);
break;
}
- if (ks->threadid > 0) {
+ if ((int)ks->threadid > 0) {
kgdb_mem2hex(getthread(ks->linux_regs,
ks->threadid)->comm,
remcom_out_buffer, 16);
} else {
static char tmpstr[23 + BUF_THREAD_ID_SIZE];
- sprintf(tmpstr, "Shadow task %d for pid 0",
- (int)(-ks->threadid-1));
+ sprintf(tmpstr, "shadowCPU%d",
+ (int)(-ks->threadid - 2));
kgdb_mem2hex(tmpstr, remcom_out_buffer, strlen(tmpstr));
}
break;
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index d38a64362973..1aa91fd6b06e 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -124,6 +124,15 @@ static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES];
unsigned long nr_lock_classes;
static struct lock_class lock_classes[MAX_LOCKDEP_KEYS];
+static inline struct lock_class *hlock_class(struct held_lock *hlock)
+{
+ if (!hlock->class_idx) {
+ DEBUG_LOCKS_WARN_ON(1);
+ return NULL;
+ }
+ return lock_classes + hlock->class_idx - 1;
+}
+
#ifdef CONFIG_LOCK_STAT
static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], lock_stats);
@@ -222,7 +231,7 @@ static void lock_release_holdtime(struct held_lock *hlock)
holdtime = sched_clock() - hlock->holdtime_stamp;
- stats = get_lock_stats(hlock->class);
+ stats = get_lock_stats(hlock_class(hlock));
if (hlock->read)
lock_time_inc(&stats->read_holdtime, holdtime);
else
@@ -372,6 +381,19 @@ unsigned int nr_process_chains;
unsigned int max_lockdep_depth;
unsigned int max_recursion_depth;
+static unsigned int lockdep_dependency_gen_id;
+
+static bool lockdep_dependency_visit(struct lock_class *source,
+ unsigned int depth)
+{
+ if (!depth)
+ lockdep_dependency_gen_id++;
+ if (source->dep_gen_id == lockdep_dependency_gen_id)
+ return true;
+ source->dep_gen_id = lockdep_dependency_gen_id;
+ return false;
+}
+
#ifdef CONFIG_DEBUG_LOCKDEP
/*
* We cannot printk in early bootup code. Not even early_printk()
@@ -505,7 +527,7 @@ static void print_lockdep_cache(struct lockdep_map *lock)
static void print_lock(struct held_lock *hlock)
{
- print_lock_name(hlock->class);
+ print_lock_name(hlock_class(hlock));
printk(", at: ");
print_ip_sym(hlock->acquire_ip);
}
@@ -558,6 +580,9 @@ static void print_lock_dependencies(struct lock_class *class, int depth)
{
struct lock_list *entry;
+ if (lockdep_dependency_visit(class, depth))
+ return;
+
if (DEBUG_LOCKS_WARN_ON(depth >= 20))
return;
@@ -932,7 +957,7 @@ static noinline int print_circular_bug_tail(void)
if (debug_locks_silent)
return 0;
- this.class = check_source->class;
+ this.class = hlock_class(check_source);
if (!save_trace(&this.trace))
return 0;
@@ -959,6 +984,67 @@ static int noinline print_infinite_recursion_bug(void)
return 0;
}
+unsigned long __lockdep_count_forward_deps(struct lock_class *class,
+ unsigned int depth)
+{
+ struct lock_list *entry;
+ unsigned long ret = 1;
+
+ if (lockdep_dependency_visit(class, depth))
+ return 0;
+
+ /*
+ * Recurse this class's dependency list:
+ */
+ list_for_each_entry(entry, &class->locks_after, entry)
+ ret += __lockdep_count_forward_deps(entry->class, depth + 1);
+
+ return ret;
+}
+
+unsigned long lockdep_count_forward_deps(struct lock_class *class)
+{
+ unsigned long ret, flags;
+
+ local_irq_save(flags);
+ __raw_spin_lock(&lockdep_lock);
+ ret = __lockdep_count_forward_deps(class, 0);
+ __raw_spin_unlock(&lockdep_lock);
+ local_irq_restore(flags);
+
+ return ret;
+}
+
+unsigned long __lockdep_count_backward_deps(struct lock_class *class,
+ unsigned int depth)
+{
+ struct lock_list *entry;
+ unsigned long ret = 1;
+
+ if (lockdep_dependency_visit(class, depth))
+ return 0;
+ /*
+ * Recurse this class's dependency list:
+ */
+ list_for_each_entry(entry, &class->locks_before, entry)
+ ret += __lockdep_count_backward_deps(entry->class, depth + 1);
+
+ return ret;
+}
+
+unsigned long lockdep_count_backward_deps(struct lock_class *class)
+{
+ unsigned long ret, flags;
+
+ local_irq_save(flags);
+ __raw_spin_lock(&lockdep_lock);
+ ret = __lockdep_count_backward_deps(class, 0);
+ __raw_spin_unlock(&lockdep_lock);
+ local_irq_restore(flags);
+
+ return ret;
+}
+
/*
* Prove that the dependency graph starting at <entry> can not
* lead to <target>. Print an error and return 0 if it does.
@@ -968,6 +1054,9 @@ check_noncircular(struct lock_class *source, unsigned int depth)
{
struct lock_list *entry;
+ if (lockdep_dependency_visit(source, depth))
+ return 1;
+
debug_atomic_inc(&nr_cyclic_check_recursions);
if (depth > max_recursion_depth)
max_recursion_depth = depth;
@@ -977,7 +1066,7 @@ check_noncircular(struct lock_class *source, unsigned int depth)
* Check this lock's dependency list:
*/
list_for_each_entry(entry, &source->locks_after, entry) {
- if (entry->class == check_target->class)
+ if (entry->class == hlock_class(check_target))
return print_circular_bug_header(entry, depth+1);
debug_atomic_inc(&nr_cyclic_checks);
if (!check_noncircular(entry->class, depth+1))
@@ -1011,6 +1100,9 @@ find_usage_forwards(struct lock_class *source, unsigned int depth)
struct lock_list *entry;
int ret;
+ if (lockdep_dependency_visit(source, depth))
+ return 1;
+
if (depth > max_recursion_depth)
max_recursion_depth = depth;
if (depth >= RECURSION_LIMIT)
@@ -1050,6 +1142,9 @@ find_usage_backwards(struct lock_class *source, unsigned int depth)
struct lock_list *entry;
int ret;
+ if (lockdep_dependency_visit(source, depth))
+ return 1;
+
if (!__raw_spin_is_locked(&lockdep_lock))
return DEBUG_LOCKS_WARN_ON(1);
@@ -1064,6 +1159,11 @@ find_usage_backwards(struct lock_class *source, unsigned int depth)
return 2;
}
+ if (!source && debug_locks_off_graph_unlock()) {
+ WARN_ON(1);
+ return 0;
+ }
+
/*
* Check this lock's dependency list:
*/
@@ -1103,9 +1203,9 @@ print_bad_irq_dependency(struct task_struct *curr,
printk("\nand this task is already holding:\n");
print_lock(prev);
printk("which would create a new lock dependency:\n");
- print_lock_name(prev->class);
+ print_lock_name(hlock_class(prev));
printk(" ->");
- print_lock_name(next->class);
+ print_lock_name(hlock_class(next));
printk("\n");
printk("\nbut this new dependency connects a %s-irq-safe lock:\n",
@@ -1146,12 +1246,12 @@ check_usage(struct task_struct *curr, struct held_lock *prev,
find_usage_bit = bit_backwards;
/* fills in <backwards_match> */
- ret = find_usage_backwards(prev->class, 0);
+ ret = find_usage_backwards(hlock_class(prev), 0);
if (!ret || ret == 1)
return ret;
find_usage_bit = bit_forwards;
- ret = find_usage_forwards(next->class, 0);
+ ret = find_usage_forwards(hlock_class(next), 0);
if (!ret || ret == 1)
return ret;
/* ret == 2 */
@@ -1272,18 +1372,32 @@ check_deadlock(struct task_struct *curr, struct held_lock *next,
struct lockdep_map *next_instance, int read)
{
struct held_lock *prev;
+ struct held_lock *nest = NULL;
int i;
for (i = 0; i < curr->lockdep_depth; i++) {
prev = curr->held_locks + i;
- if (prev->class != next->class)
+
+ if (prev->instance == next->nest_lock)
+ nest = prev;
+
+ if (hlock_class(prev) != hlock_class(next))
continue;
+
/*
* Allow read-after-read recursion of the same
* lock class (i.e. read_lock(lock)+read_lock(lock)):
*/
if ((read == 2) && prev->read)
return 2;
+
+ /*
+ * We're holding the nest_lock, which serializes this lock's
+ * nesting behaviour.
+ */
+ if (nest)
+ return 2;
+
return print_deadlock_bug(curr, prev, next);
}
return 1;
@@ -1329,7 +1443,7 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
*/
check_source = next;
check_target = prev;
- if (!(check_noncircular(next->class, 0)))
+ if (!(check_noncircular(hlock_class(next), 0)))
return print_circular_bug_tail();
if (!check_prev_add_irq(curr, prev, next))
@@ -1353,8 +1467,8 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
* chains - the second one will be new, but L1 already has
* L2 added to its dependency list, due to the first chain.)
*/
- list_for_each_entry(entry, &prev->class->locks_after, entry) {
- if (entry->class == next->class) {
+ list_for_each_entry(entry, &hlock_class(prev)->locks_after, entry) {
+ if (entry->class == hlock_class(next)) {
if (distance == 1)
entry->distance = 1;
return 2;
@@ -1365,26 +1479,28 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
* Ok, all validations passed, add the new lock
* to the previous lock's dependency list:
*/
- ret = add_lock_to_list(prev->class, next->class,
- &prev->class->locks_after, next->acquire_ip, distance);
+ ret = add_lock_to_list(hlock_class(prev), hlock_class(next),
+ &hlock_class(prev)->locks_after,
+ next->acquire_ip, distance);
if (!ret)
return 0;
- ret = add_lock_to_list(next->class, prev->class,
- &next->class->locks_before, next->acquire_ip, distance);
+ ret = add_lock_to_list(hlock_class(next), hlock_class(prev),
+ &hlock_class(next)->locks_before,
+ next->acquire_ip, distance);
if (!ret)
return 0;
/*
* Debugging printouts:
*/
- if (verbose(prev->class) || verbose(next->class)) {
+ if (verbose(hlock_class(prev)) || verbose(hlock_class(next))) {
graph_unlock();
printk("\n new dependency: ");
- print_lock_name(prev->class);
+ print_lock_name(hlock_class(prev));
printk(" => ");
- print_lock_name(next->class);
+ print_lock_name(hlock_class(next));
printk("\n");
dump_stack();
return graph_lock();
@@ -1481,7 +1597,7 @@ static inline int lookup_chain_cache(struct task_struct *curr,
struct held_lock *hlock,
u64 chain_key)
{
- struct lock_class *class = hlock->class;
+ struct lock_class *class = hlock_class(hlock);
struct list_head *hash_head = chainhashentry(chain_key);
struct lock_chain *chain;
struct held_lock *hlock_curr, *hlock_next;
@@ -1554,7 +1670,7 @@ cache_hit:
if (likely(cn + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) {
chain->base = cn;
for (j = 0; j < chain->depth - 1; j++, i++) {
- int lock_id = curr->held_locks[i].class - lock_classes;
+ int lock_id = curr->held_locks[i].class_idx - 1;
chain_hlocks[chain->base + j] = lock_id;
}
chain_hlocks[chain->base + j] = class - lock_classes;
@@ -1650,7 +1766,7 @@ static void check_chain_key(struct task_struct *curr)
WARN_ON(1);
return;
}
- id = hlock->class - lock_classes;
+ id = hlock->class_idx - 1;
if (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS))
return;
@@ -1695,7 +1811,7 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this,
print_lock(this);
printk("{%s} state was registered at:\n", usage_str[prev_bit]);
- print_stack_trace(this->class->usage_traces + prev_bit, 1);
+ print_stack_trace(hlock_class(this)->usage_traces + prev_bit, 1);
print_irqtrace_events(curr);
printk("\nother info that might help us debug this:\n");
@@ -1714,7 +1830,7 @@ static inline int
valid_state(struct task_struct *curr, struct held_lock *this,
enum lock_usage_bit new_bit, enum lock_usage_bit bad_bit)
{
- if (unlikely(this->class->usage_mask & (1 << bad_bit)))
+ if (unlikely(hlock_class(this)->usage_mask & (1 << bad_bit)))
return print_usage_bug(curr, this, bad_bit, new_bit);
return 1;
}
@@ -1753,7 +1869,7 @@ print_irq_inversion_bug(struct task_struct *curr, struct lock_class *other,
lockdep_print_held_locks(curr);
printk("\nthe first lock's dependencies:\n");
- print_lock_dependencies(this->class, 0);
+ print_lock_dependencies(hlock_class(this), 0);
printk("\nthe second lock's dependencies:\n");
print_lock_dependencies(other, 0);
@@ -1776,7 +1892,7 @@ check_usage_forwards(struct task_struct *curr, struct held_lock *this,
find_usage_bit = bit;
/* fills in <forwards_match> */
- ret = find_usage_forwards(this->class, 0);
+ ret = find_usage_forwards(hlock_class(this), 0);
if (!ret || ret == 1)
return ret;
@@ -1795,7 +1911,7 @@ check_usage_backwards(struct task_struct *curr, struct held_lock *this,
find_usage_bit = bit;
/* fills in <backwards_match> */
- ret = find_usage_backwards(this->class, 0);
+ ret = find_usage_backwards(hlock_class(this), 0);
if (!ret || ret == 1)
return ret;
@@ -1861,7 +1977,7 @@ static int mark_lock_irq(struct task_struct *curr, struct held_lock *this,
LOCK_ENABLED_HARDIRQS_READ, "hard-read"))
return 0;
#endif
- if (hardirq_verbose(this->class))
+ if (hardirq_verbose(hlock_class(this)))
ret = 2;
break;
case LOCK_USED_IN_SOFTIRQ:
@@ -1886,7 +2002,7 @@ static int mark_lock_irq(struct task_struct *curr, struct held_lock *this,
LOCK_ENABLED_SOFTIRQS_READ, "soft-read"))
return 0;
#endif
- if (softirq_verbose(this->class))
+ if (softirq_verbose(hlock_class(this)))
ret = 2;
break;
case LOCK_USED_IN_HARDIRQ_READ:
@@ -1899,7 +2015,7 @@ static int mark_lock_irq(struct task_struct *curr, struct held_lock *this,
if (!check_usage_forwards(curr, this,
LOCK_ENABLED_HARDIRQS, "hard"))
return 0;
- if (hardirq_verbose(this->class))
+ if (hardirq_verbose(hlock_class(this)))
ret = 2;
break;
case LOCK_USED_IN_SOFTIRQ_READ:
@@ -1912,7 +2028,7 @@ static int mark_lock_irq(struct task_struct *curr, struct held_lock *this,
if (!check_usage_forwards(curr, this,
LOCK_ENABLED_SOFTIRQS, "soft"))
return 0;
- if (softirq_verbose(this->class))
+ if (softirq_verbose(hlock_class(this)))
ret = 2;
break;
case LOCK_ENABLED_HARDIRQS:
@@ -1938,7 +2054,7 @@ static int mark_lock_irq(struct task_struct *curr, struct held_lock *this,
LOCK_USED_IN_HARDIRQ_READ, "hard-read"))
return 0;
#endif
- if (hardirq_verbose(this->class))
+ if (hardirq_verbose(hlock_class(this)))
ret = 2;
break;
case LOCK_ENABLED_SOFTIRQS:
@@ -1964,7 +2080,7 @@ static int mark_lock_irq(struct task_struct *curr, struct held_lock *this,
LOCK_USED_IN_SOFTIRQ_READ, "soft-read"))
return 0;
#endif
- if (softirq_verbose(this->class))
+ if (softirq_verbose(hlock_class(this)))
ret = 2;
break;
case LOCK_ENABLED_HARDIRQS_READ:
@@ -1979,7 +2095,7 @@ static int mark_lock_irq(struct task_struct *curr, struct held_lock *this,
LOCK_USED_IN_HARDIRQ, "hard"))
return 0;
#endif
- if (hardirq_verbose(this->class))
+ if (hardirq_verbose(hlock_class(this)))
ret = 2;
break;
case LOCK_ENABLED_SOFTIRQS_READ:
@@ -1994,7 +2110,7 @@ static int mark_lock_irq(struct task_struct *curr, struct held_lock *this,
LOCK_USED_IN_SOFTIRQ, "soft"))
return 0;
#endif
- if (softirq_verbose(this->class))
+ if (softirq_verbose(hlock_class(this)))
ret = 2;
break;
default:
@@ -2310,7 +2426,7 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
* If already set then do not dirty the cacheline,
* nor do any checks:
*/
- if (likely(this->class->usage_mask & new_mask))
+ if (likely(hlock_class(this)->usage_mask & new_mask))
return 1;
if (!graph_lock())
@@ -2318,14 +2434,14 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
/*
* Make sure we didnt race:
*/
- if (unlikely(this->class->usage_mask & new_mask)) {
+ if (unlikely(hlock_class(this)->usage_mask & new_mask)) {
graph_unlock();
return 1;
}
- this->class->usage_mask |= new_mask;
+ hlock_class(this)->usage_mask |= new_mask;
- if (!save_trace(this->class->usage_traces + new_bit))
+ if (!save_trace(hlock_class(this)->usage_traces + new_bit))
return 0;
switch (new_bit) {
@@ -2405,7 +2521,7 @@ EXPORT_SYMBOL_GPL(lockdep_init_map);
*/
static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
int trylock, int read, int check, int hardirqs_off,
- unsigned long ip)
+ struct lockdep_map *nest_lock, unsigned long ip)
{
struct task_struct *curr = current;
struct lock_class *class = NULL;
@@ -2459,10 +2575,12 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
return 0;
hlock = curr->held_locks + depth;
-
- hlock->class = class;
+ if (DEBUG_LOCKS_WARN_ON(!class))
+ return 0;
+ hlock->class_idx = class - lock_classes + 1;
hlock->acquire_ip = ip;
hlock->instance = lock;
+ hlock->nest_lock = nest_lock;
hlock->trylock = trylock;
hlock->read = read;
hlock->check = check;
@@ -2574,6 +2692,55 @@ static int check_unlock(struct task_struct *curr, struct lockdep_map *lock,
return 1;
}
+static int
+__lock_set_subclass(struct lockdep_map *lock,
+ unsigned int subclass, unsigned long ip)
+{
+ struct task_struct *curr = current;
+ struct held_lock *hlock, *prev_hlock;
+ struct lock_class *class;
+ unsigned int depth;
+ int i;
+
+ depth = curr->lockdep_depth;
+ if (DEBUG_LOCKS_WARN_ON(!depth))
+ return 0;
+
+ prev_hlock = NULL;
+ for (i = depth-1; i >= 0; i--) {
+ hlock = curr->held_locks + i;
+ /*
+ * We must not cross into another context:
+ */
+ if (prev_hlock && prev_hlock->irq_context != hlock->irq_context)
+ break;
+ if (hlock->instance == lock)
+ goto found_it;
+ prev_hlock = hlock;
+ }
+ return print_unlock_inbalance_bug(curr, lock, ip);
+
+found_it:
+ class = register_lock_class(lock, subclass, 0);
+ hlock->class_idx = class - lock_classes + 1;
+
+ curr->lockdep_depth = i;
+ curr->curr_chain_key = hlock->prev_chain_key;
+
+ for (; i < depth; i++) {
+ hlock = curr->held_locks + i;
+ if (!__lock_acquire(hlock->instance,
+ hlock_class(hlock)->subclass, hlock->trylock,
+ hlock->read, hlock->check, hlock->hardirqs_off,
+ hlock->nest_lock, hlock->acquire_ip))
+ return 0;
+ }
+
+ if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth))
+ return 0;
+ return 1;
+}
+
/*
* Remove the lock to the list of currently held locks in a
* potentially non-nested (out of order) manner. This is a
@@ -2624,9 +2791,9 @@ found_it:
for (i++; i < depth; i++) {
hlock = curr->held_locks + i;
if (!__lock_acquire(hlock->instance,
- hlock->class->subclass, hlock->trylock,
+ hlock_class(hlock)->subclass, hlock->trylock,
hlock->read, hlock->check, hlock->hardirqs_off,
- hlock->acquire_ip))
+ hlock->nest_lock, hlock->acquire_ip))
return 0;
}
@@ -2669,7 +2836,7 @@ static int lock_release_nested(struct task_struct *curr,
#ifdef CONFIG_DEBUG_LOCKDEP
hlock->prev_chain_key = 0;
- hlock->class = NULL;
+ hlock->class_idx = 0;
hlock->acquire_ip = 0;
hlock->irq_context = 0;
#endif
@@ -2738,18 +2905,36 @@ static void check_flags(unsigned long flags)
#endif
}
+void
+lock_set_subclass(struct lockdep_map *lock,
+ unsigned int subclass, unsigned long ip)
+{
+ unsigned long flags;
+
+ if (unlikely(current->lockdep_recursion))
+ return;
+
+ raw_local_irq_save(flags);
+ current->lockdep_recursion = 1;
+ check_flags(flags);
+ if (__lock_set_subclass(lock, subclass, ip))
+ check_chain_key(current);
+ current->lockdep_recursion = 0;
+ raw_local_irq_restore(flags);
+}
+
+EXPORT_SYMBOL_GPL(lock_set_subclass);
+
/*
* We are not always called with irqs disabled - do that here,
* and also avoid lockdep recursion:
*/
void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
- int trylock, int read, int check, unsigned long ip)
+ int trylock, int read, int check,
+ struct lockdep_map *nest_lock, unsigned long ip)
{
unsigned long flags;
- if (unlikely(!lock_stat && !prove_locking))
- return;
-
if (unlikely(current->lockdep_recursion))
return;
@@ -2758,7 +2943,7 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
current->lockdep_recursion = 1;
__lock_acquire(lock, subclass, trylock, read, check,
- irqs_disabled_flags(flags), ip);
+ irqs_disabled_flags(flags), nest_lock, ip);
current->lockdep_recursion = 0;
raw_local_irq_restore(flags);
}
@@ -2770,9 +2955,6 @@ void lock_release(struct lockdep_map *lock, int nested,
{
unsigned long flags;
- if (unlikely(!lock_stat && !prove_locking))
- return;
-
if (unlikely(current->lockdep_recursion))
return;
@@ -2845,9 +3027,9 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip)
found_it:
hlock->waittime_stamp = sched_clock();
- point = lock_contention_point(hlock->class, ip);
+ point = lock_contention_point(hlock_class(hlock), ip);
- stats = get_lock_stats(hlock->class);
+ stats = get_lock_stats(hlock_class(hlock));
if (point < ARRAY_SIZE(stats->contention_point))
stats->contention_point[i]++;
if (lock->cpu != smp_processor_id())
@@ -2893,7 +3075,7 @@ found_it:
hlock->holdtime_stamp = now;
}
- stats = get_lock_stats(hlock->class);
+ stats = get_lock_stats(hlock_class(hlock));
if (waittime) {
if (hlock->read)
lock_time_inc(&stats->read_waittime, waittime);
@@ -2988,6 +3170,7 @@ static void zap_class(struct lock_class *class)
list_del_rcu(&class->hash_entry);
list_del_rcu(&class->lock_entry);
+ class->key = NULL;
}
static inline int within(const void *addr, void *start, unsigned long size)
diff --git a/kernel/lockdep_internals.h b/kernel/lockdep_internals.h
index c3600a091a28..55db193d366d 100644
--- a/kernel/lockdep_internals.h
+++ b/kernel/lockdep_internals.h
@@ -17,9 +17,6 @@
*/
#define MAX_LOCKDEP_ENTRIES 8192UL
-#define MAX_LOCKDEP_KEYS_BITS 11
-#define MAX_LOCKDEP_KEYS (1UL << MAX_LOCKDEP_KEYS_BITS)
-
#define MAX_LOCKDEP_CHAINS_BITS 14
#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS)
@@ -53,6 +50,9 @@ extern unsigned int nr_process_chains;
extern unsigned int max_lockdep_depth;
extern unsigned int max_recursion_depth;
+extern unsigned long lockdep_count_forward_deps(struct lock_class *);
+extern unsigned long lockdep_count_backward_deps(struct lock_class *);
+
#ifdef CONFIG_DEBUG_LOCKDEP
/*
* Various lockdep statistics:
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
index 9b0e940e2545..fa19aee604c2 100644
--- a/kernel/lockdep_proc.c
+++ b/kernel/lockdep_proc.c
@@ -63,34 +63,6 @@ static void l_stop(struct seq_file *m, void *v)
{
}
-static unsigned long count_forward_deps(struct lock_class *class)
-{
- struct lock_list *entry;
- unsigned long ret = 1;
-
- /*
- * Recurse this class's dependency list:
- */
- list_for_each_entry(entry, &class->locks_after, entry)
- ret += count_forward_deps(entry->class);
-
- return ret;
-}
-
-static unsigned long count_backward_deps(struct lock_class *class)
-{
- struct lock_list *entry;
- unsigned long ret = 1;
-
- /*
- * Recurse this class's dependency list:
- */
- list_for_each_entry(entry, &class->locks_before, entry)
- ret += count_backward_deps(entry->class);
-
- return ret;
-}
-
static void print_name(struct seq_file *m, struct lock_class *class)
{
char str[128];
@@ -124,10 +96,10 @@ static int l_show(struct seq_file *m, void *v)
#ifdef CONFIG_DEBUG_LOCKDEP
seq_printf(m, " OPS:%8ld", class->ops);
#endif
- nr_forward_deps = count_forward_deps(class);
+ nr_forward_deps = lockdep_count_forward_deps(class);
seq_printf(m, " FD:%5ld", nr_forward_deps);
- nr_backward_deps = count_backward_deps(class);
+ nr_backward_deps = lockdep_count_backward_deps(class);
seq_printf(m, " BD:%5ld", nr_backward_deps);
get_usage_chars(class, &c1, &c2, &c3, &c4);
@@ -229,6 +201,9 @@ static int lc_show(struct seq_file *m, void *v)
for (i = 0; i < chain->depth; i++) {
class = lock_chain_get_class(chain, i);
+ if (!class->key)
+ continue;
+
seq_printf(m, "[%p] ", class->key);
print_name(m, class);
seq_puts(m, "\n");
@@ -350,7 +325,7 @@ static int lockdep_stats_show(struct seq_file *m, void *v)
if (class->usage_mask & LOCKF_ENABLED_HARDIRQS_READ)
nr_hardirq_read_unsafe++;
- sum_forward_deps += count_forward_deps(class);
+ sum_forward_deps += lockdep_count_forward_deps(class);
}
#ifdef CONFIG_DEBUG_LOCKDEP
DEBUG_LOCKS_WARN_ON(debug_atomic_read(&nr_unused_locks) != nr_unused);
diff --git a/kernel/marker.c b/kernel/marker.c
index 971da5317903..7d1faecd7a51 100644
--- a/kernel/marker.c
+++ b/kernel/marker.c
@@ -126,6 +126,11 @@ void marker_probe_cb(const struct marker *mdata, void *call_private, ...)
struct marker_probe_closure *multi;
int i;
/*
+ * Read mdata->ptype before mdata->multi.
+ */
+ smp_rmb();
+ multi = mdata->multi;
+ /*
* multi points to an array, therefore accessing the array
* depends on reading multi. However, even in this case,
* we must insure that the pointer is read _before_ the array
@@ -133,7 +138,6 @@ void marker_probe_cb(const struct marker *mdata, void *call_private, ...)
* in the fast path, so put the explicit barrier here.
*/
smp_read_barrier_depends();
- multi = mdata->multi;
for (i = 0; multi[i].func; i++) {
va_start(args, call_private);
multi[i].func(multi[i].probe_private, call_private,
@@ -175,6 +179,11 @@ void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...)
struct marker_probe_closure *multi;
int i;
/*
+ * Read mdata->ptype before mdata->multi.
+ */
+ smp_rmb();
+ multi = mdata->multi;
+ /*
* multi points to an array, therefore accessing the array
* depends on reading multi. However, even in this case,
* we must insure that the pointer is read _before_ the array
@@ -182,7 +191,6 @@ void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...)
* in the fast path, so put the explicit barrier here.
*/
smp_read_barrier_depends();
- multi = mdata->multi;
for (i = 0; multi[i].func; i++)
multi[i].func(multi[i].probe_private, call_private,
mdata->format, &args);
diff --git a/kernel/module.c b/kernel/module.c
index 61d212120df4..08864d257eb0 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2288,7 +2288,7 @@ sys_init_module(void __user *umod,
/* Start the module */
if (mod->init != NULL)
- ret = mod->init();
+ ret = do_one_initcall(mod->init);
if (ret < 0) {
/* Init routine failed: abort. Try to protect us from
buggy refcounters. */
diff --git a/kernel/mutex.c b/kernel/mutex.c
index bcdc9ac8ef60..12c779dc65d4 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -34,6 +34,7 @@
/***
* mutex_init - initialize the mutex
* @lock: the mutex to be initialized
+ * @key: the lock_class_key for the class; used by mutex lock debugging
*
* Initialize the mutex to unlocked state.
*
diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c
index 8cb757026386..da9c2dda6a4e 100644
--- a/kernel/pm_qos_params.c
+++ b/kernel/pm_qos_params.c
@@ -24,7 +24,7 @@
* requirement that the application has is cleaned up when closes the file
* pointer or exits the pm_qos_object will get an opportunity to clean up.
*
- * mark gross mgross@linux.intel.com
+ * Mark Gross <mgross@linux.intel.com>
*/
#include <linux/pm_qos_params.h>
@@ -211,8 +211,8 @@ EXPORT_SYMBOL_GPL(pm_qos_requirement);
* @value: defines the qos request
*
* This function inserts a new entry in the pm_qos_class list of requested qos
- * performance charactoistics. It recomputes the agregate QoS expectations for
- * the pm_qos_class of parrameters.
+ * performance characteristics. It recomputes the aggregate QoS expectations
+ * for the pm_qos_class of parameters.
*/
int pm_qos_add_requirement(int pm_qos_class, char *name, s32 value)
{
@@ -250,10 +250,10 @@ EXPORT_SYMBOL_GPL(pm_qos_add_requirement);
* @name: identifies the request
* @value: defines the qos request
*
- * Updates an existing qos requierement for the pm_qos_class of parameters along
+ * Updates an existing qos requirement for the pm_qos_class of parameters along
* with updating the target pm_qos_class value.
*
- * If the named request isn't in the lest then no change is made.
+ * If the named request isn't in the list then no change is made.
*/
int pm_qos_update_requirement(int pm_qos_class, char *name, s32 new_value)
{
@@ -287,7 +287,7 @@ EXPORT_SYMBOL_GPL(pm_qos_update_requirement);
* @pm_qos_class: identifies which list of qos request to us
* @name: identifies the request
*
- * Will remove named qos request from pm_qos_class list of parrameters and
+ * Will remove named qos request from pm_qos_class list of parameters and
* recompute the current target value for the pm_qos_class.
*/
void pm_qos_remove_requirement(int pm_qos_class, char *name)
@@ -319,7 +319,7 @@ EXPORT_SYMBOL_GPL(pm_qos_remove_requirement);
* @notifier: notifier block managed by caller.
*
* will register the notifier into a notification chain that gets called
- * uppon changes to the pm_qos_class target value.
+ * upon changes to the pm_qos_class target value.
*/
int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier)
{
@@ -338,7 +338,7 @@ EXPORT_SYMBOL_GPL(pm_qos_add_notifier);
* @notifier: notifier block to be removed.
*
* will remove the notifier from the notification chain that gets called
- * uppon changes to the pm_qos_class target value.
+ * upon changes to the pm_qos_class target value.
*/
int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier)
{
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 9a21681aa80f..e36d5798cbff 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -289,21 +289,29 @@ void do_schedule_next_timer(struct siginfo *info)
else
schedule_next_timer(timr);
- info->si_overrun = timr->it_overrun_last;
+ info->si_overrun += timr->it_overrun_last;
}
if (timr)
unlock_timer(timr, flags);
}
-int posix_timer_event(struct k_itimer *timr,int si_private)
+int posix_timer_event(struct k_itimer *timr, int si_private)
{
- memset(&timr->sigq->info, 0, sizeof(siginfo_t));
+ /*
+ * FIXME: if ->sigq is queued we can race with
+ * dequeue_signal()->do_schedule_next_timer().
+ *
+ * If dequeue_signal() sees the "right" value of
+ * si_sys_private it calls do_schedule_next_timer().
+ * We re-queue ->sigq and drop ->it_lock().
+ * do_schedule_next_timer() locks the timer
+ * and re-schedules it while ->sigq is pending.
+ * Not really bad, but not that we want.
+ */
timr->sigq->info.si_sys_private = si_private;
- /* Send signal to the process that owns this timer.*/
timr->sigq->info.si_signo = timr->it_sigev_signo;
- timr->sigq->info.si_errno = 0;
timr->sigq->info.si_code = SI_TIMER;
timr->sigq->info.si_tid = timr->it_id;
timr->sigq->info.si_value = timr->it_sigev_value;
@@ -435,6 +443,7 @@ static struct k_itimer * alloc_posix_timer(void)
kmem_cache_free(posix_timers_cache, tmr);
tmr = NULL;
}
+ memset(&tmr->sigq->info, 0, sizeof(siginfo_t));
return tmr;
}
diff --git a/kernel/printk.c b/kernel/printk.c
index a7f7559c5f6c..b51b1567bb55 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1309,14 +1309,14 @@ void tty_write_message(struct tty_struct *tty, char *msg)
#if defined CONFIG_PRINTK
-DEFINE_RATELIMIT_STATE(printk_ratelimit_state, 5 * HZ, 10);
/*
* printk rate limiting, lifted from the networking subsystem.
*
- * This enforces a rate limit: not more than one kernel message
- * every printk_ratelimit_jiffies to make a denial-of-service
- * attack impossible.
+ * This enforces a rate limit: not more than 10 kernel messages
+ * every 5s to make a denial-of-service attack impossible.
*/
+DEFINE_RATELIMIT_STATE(printk_ratelimit_state, 5 * HZ, 10);
+
int printk_ratelimit(void)
{
return __ratelimit(&printk_ratelimit_state);
diff --git a/kernel/relay.c b/kernel/relay.c
index 04006ef970b8..8d13a7855c08 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -944,6 +944,10 @@ static void relay_file_read_consume(struct rchan_buf *buf,
size_t n_subbufs = buf->chan->n_subbufs;
size_t read_subbuf;
+ if (buf->subbufs_produced == buf->subbufs_consumed &&
+ buf->offset == buf->bytes_consumed)
+ return;
+
if (buf->bytes_consumed + bytes_consumed > subbuf_size) {
relay_subbufs_consumed(buf->chan, buf->cpu, 1);
buf->bytes_consumed = 0;
@@ -975,6 +979,8 @@ static int relay_file_read_avail(struct rchan_buf *buf, size_t read_pos)
relay_file_read_consume(buf, read_pos, 0);
+ consumed = buf->subbufs_consumed;
+
if (unlikely(buf->offset > subbuf_size)) {
if (produced == consumed)
return 0;
@@ -993,8 +999,12 @@ static int relay_file_read_avail(struct rchan_buf *buf, size_t read_pos)
if (consumed > produced)
produced += n_subbufs * subbuf_size;
- if (consumed == produced)
+ if (consumed == produced) {
+ if (buf->offset == subbuf_size &&
+ buf->subbufs_produced > buf->subbufs_consumed)
+ return 1;
return 0;
+ }
return 1;
}
diff --git a/kernel/resource.c b/kernel/resource.c
index 74af2d7cb5a1..f5b518eabefe 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -490,7 +490,7 @@ resource_size_t resource_alignment(struct resource *res)
{
switch (res->flags & (IORESOURCE_SIZEALIGN | IORESOURCE_STARTALIGN)) {
case IORESOURCE_SIZEALIGN:
- return res->end - res->start + 1;
+ return resource_size(res);
case IORESOURCE_STARTALIGN:
return res->start;
default:
diff --git a/kernel/sched.c b/kernel/sched.c
index 0236958addcb..d601fb0406ca 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -600,7 +600,6 @@ struct rq {
/* BKL stats */
unsigned int bkl_count;
#endif
- struct lock_class_key rq_lock_key;
};
static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
@@ -834,7 +833,7 @@ static inline u64 global_rt_period(void)
static inline u64 global_rt_runtime(void)
{
- if (sysctl_sched_rt_period < 0)
+ if (sysctl_sched_rt_runtime < 0)
return RUNTIME_INF;
return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
@@ -2759,10 +2758,10 @@ static void double_rq_lock(struct rq *rq1, struct rq *rq2)
} else {
if (rq1 < rq2) {
spin_lock(&rq1->lock);
- spin_lock(&rq2->lock);
+ spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING);
} else {
spin_lock(&rq2->lock);
- spin_lock(&rq1->lock);
+ spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING);
}
}
update_rq_clock(rq1);
@@ -2805,14 +2804,21 @@ static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
if (busiest < this_rq) {
spin_unlock(&this_rq->lock);
spin_lock(&busiest->lock);
- spin_lock(&this_rq->lock);
+ spin_lock_nested(&this_rq->lock, SINGLE_DEPTH_NESTING);
ret = 1;
} else
- spin_lock(&busiest->lock);
+ spin_lock_nested(&busiest->lock, SINGLE_DEPTH_NESTING);
}
return ret;
}
+static void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
+ __releases(busiest->lock)
+{
+ spin_unlock(&busiest->lock);
+ lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
+}
+
/*
* If dest_cpu is allowed for this process, migrate the task to it.
* This is accomplished by forcing the cpu_allowed mask to only
@@ -3637,7 +3643,7 @@ redo:
ld_moved = move_tasks(this_rq, this_cpu, busiest,
imbalance, sd, CPU_NEWLY_IDLE,
&all_pinned);
- spin_unlock(&busiest->lock);
+ double_unlock_balance(this_rq, busiest);
if (unlikely(all_pinned)) {
cpu_clear(cpu_of(busiest), *cpus);
@@ -3752,7 +3758,7 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
else
schedstat_inc(sd, alb_failed);
}
- spin_unlock(&target_rq->lock);
+ double_unlock_balance(busiest_rq, target_rq);
}
#ifdef CONFIG_NO_HZ
@@ -5004,19 +5010,21 @@ recheck:
return -EPERM;
}
+ if (user) {
#ifdef CONFIG_RT_GROUP_SCHED
- /*
- * Do not allow realtime tasks into groups that have no runtime
- * assigned.
- */
- if (user
- && rt_policy(policy) && task_group(p)->rt_bandwidth.rt_runtime == 0)
- return -EPERM;
+ /*
+ * Do not allow realtime tasks into groups that have no runtime
+ * assigned.
+ */
+ if (rt_policy(policy) && task_group(p)->rt_bandwidth.rt_runtime == 0)
+ return -EPERM;
#endif
- retval = security_task_setscheduler(p, policy, param);
- if (retval)
- return retval;
+ retval = security_task_setscheduler(p, policy, param);
+ if (retval)
+ return retval;
+ }
+
/*
* make sure no PI-waiters arrive (or leave) while we are
* changing the priority of the task:
@@ -7671,34 +7679,34 @@ static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
}
#ifdef CONFIG_SCHED_MC
-static ssize_t sched_mc_power_savings_show(struct sys_device *dev,
- struct sysdev_attribute *attr, char *page)
+static ssize_t sched_mc_power_savings_show(struct sysdev_class *class,
+ char *page)
{
return sprintf(page, "%u\n", sched_mc_power_savings);
}
-static ssize_t sched_mc_power_savings_store(struct sys_device *dev,
- struct sysdev_attribute *attr,
+static ssize_t sched_mc_power_savings_store(struct sysdev_class *class,
const char *buf, size_t count)
{
return sched_power_savings_store(buf, count, 0);
}
-static SYSDEV_ATTR(sched_mc_power_savings, 0644, sched_mc_power_savings_show,
- sched_mc_power_savings_store);
+static SYSDEV_CLASS_ATTR(sched_mc_power_savings, 0644,
+ sched_mc_power_savings_show,
+ sched_mc_power_savings_store);
#endif
#ifdef CONFIG_SCHED_SMT
-static ssize_t sched_smt_power_savings_show(struct sys_device *dev,
- struct sysdev_attribute *attr, char *page)
+static ssize_t sched_smt_power_savings_show(struct sysdev_class *dev,
+ char *page)
{
return sprintf(page, "%u\n", sched_smt_power_savings);
}
-static ssize_t sched_smt_power_savings_store(struct sys_device *dev,
- struct sysdev_attribute *attr,
+static ssize_t sched_smt_power_savings_store(struct sysdev_class *dev,
const char *buf, size_t count)
{
return sched_power_savings_store(buf, count, 1);
}
-static SYSDEV_ATTR(sched_smt_power_savings, 0644, sched_smt_power_savings_show,
+static SYSDEV_CLASS_ATTR(sched_smt_power_savings, 0644,
+ sched_smt_power_savings_show,
sched_smt_power_savings_store);
#endif
@@ -7998,7 +8006,6 @@ void __init sched_init(void)
rq = cpu_rq(i);
spin_lock_init(&rq->lock);
- lockdep_set_class(&rq->lock, &rq->rq_lock_key);
rq->nr_running = 0;
init_cfs_rq(&rq->cfs, rq);
init_rt_rq(&rq->rt, rq);
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c
index 22ed55d1167f..204991a0bfa7 100644
--- a/kernel/sched_clock.c
+++ b/kernel/sched_clock.c
@@ -32,13 +32,19 @@
#include <linux/ktime.h>
#include <linux/module.h>
+/*
+ * Scheduler clock - returns current time in nanosec units.
+ * This is default implementation.
+ * Architectures and sub-architectures can override this.
+ */
+unsigned long long __attribute__((weak)) sched_clock(void)
+{
+ return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ);
+}
-#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
+static __read_mostly int sched_clock_running;
-#define MULTI_SHIFT 15
-/* Max is double, Min is 1/2 */
-#define MAX_MULTI (2LL << MULTI_SHIFT)
-#define MIN_MULTI (1LL << (MULTI_SHIFT-1))
+#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
struct sched_clock_data {
/*
@@ -49,14 +55,9 @@ struct sched_clock_data {
raw_spinlock_t lock;
unsigned long tick_jiffies;
- u64 prev_raw;
u64 tick_raw;
u64 tick_gtod;
u64 clock;
- s64 multi;
-#ifdef CONFIG_NO_HZ
- int check_max;
-#endif
};
static DEFINE_PER_CPU_SHARED_ALIGNED(struct sched_clock_data, sched_clock_data);
@@ -71,8 +72,6 @@ static inline struct sched_clock_data *cpu_sdc(int cpu)
return &per_cpu(sched_clock_data, cpu);
}
-static __read_mostly int sched_clock_running;
-
void sched_clock_init(void)
{
u64 ktime_now = ktime_to_ns(ktime_get());
@@ -84,90 +83,39 @@ void sched_clock_init(void)
scd->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
scd->tick_jiffies = now_jiffies;
- scd->prev_raw = 0;
scd->tick_raw = 0;
scd->tick_gtod = ktime_now;
scd->clock = ktime_now;
- scd->multi = 1 << MULTI_SHIFT;
-#ifdef CONFIG_NO_HZ
- scd->check_max = 1;
-#endif
}
sched_clock_running = 1;
}
-#ifdef CONFIG_NO_HZ
-/*
- * The dynamic ticks makes the delta jiffies inaccurate. This
- * prevents us from checking the maximum time update.
- * Disable the maximum check during stopped ticks.
- */
-void sched_clock_tick_stop(int cpu)
-{
- struct sched_clock_data *scd = cpu_sdc(cpu);
-
- scd->check_max = 0;
-}
-
-void sched_clock_tick_start(int cpu)
-{
- struct sched_clock_data *scd = cpu_sdc(cpu);
-
- scd->check_max = 1;
-}
-
-static int check_max(struct sched_clock_data *scd)
-{
- return scd->check_max;
-}
-#else
-static int check_max(struct sched_clock_data *scd)
-{
- return 1;
-}
-#endif /* CONFIG_NO_HZ */
-
/*
* update the percpu scd from the raw @now value
*
* - filter out backward motion
* - use jiffies to generate a min,max window to clip the raw values
*/
-static void __update_sched_clock(struct sched_clock_data *scd, u64 now, u64 *time)
+static u64 __update_sched_clock(struct sched_clock_data *scd, u64 now)
{
unsigned long now_jiffies = jiffies;
long delta_jiffies = now_jiffies - scd->tick_jiffies;
u64 clock = scd->clock;
u64 min_clock, max_clock;
- s64 delta = now - scd->prev_raw;
+ s64 delta = now - scd->tick_raw;
WARN_ON_ONCE(!irqs_disabled());
-
- /*
- * At schedule tick the clock can be just under the gtod. We don't
- * want to push it too prematurely.
- */
- min_clock = scd->tick_gtod + (delta_jiffies * TICK_NSEC);
- if (min_clock > TICK_NSEC)
- min_clock -= TICK_NSEC / 2;
+ min_clock = scd->tick_gtod + delta_jiffies * TICK_NSEC;
if (unlikely(delta < 0)) {
clock++;
goto out;
}
- /*
- * The clock must stay within a jiffie of the gtod.
- * But since we may be at the start of a jiffy or the end of one
- * we add another jiffy buffer.
- */
- max_clock = scd->tick_gtod + (2 + delta_jiffies) * TICK_NSEC;
-
- delta *= scd->multi;
- delta >>= MULTI_SHIFT;
+ max_clock = min_clock + TICK_NSEC;
- if (unlikely(clock + delta > max_clock) && check_max(scd)) {
+ if (unlikely(clock + delta > max_clock)) {
if (clock < max_clock)
clock = max_clock;
else
@@ -180,12 +128,10 @@ static void __update_sched_clock(struct sched_clock_data *scd, u64 now, u64 *tim
if (unlikely(clock < min_clock))
clock = min_clock;
- if (time)
- *time = clock;
- else {
- scd->prev_raw = now;
- scd->clock = clock;
- }
+ scd->tick_jiffies = now_jiffies;
+ scd->clock = clock;
+
+ return clock;
}
static void lock_double_clock(struct sched_clock_data *data1,
@@ -203,7 +149,7 @@ static void lock_double_clock(struct sched_clock_data *data1,
u64 sched_clock_cpu(int cpu)
{
struct sched_clock_data *scd = cpu_sdc(cpu);
- u64 now, clock;
+ u64 now, clock, this_clock, remote_clock;
if (unlikely(!sched_clock_running))
return 0ull;
@@ -212,43 +158,44 @@ u64 sched_clock_cpu(int cpu)
now = sched_clock();
if (cpu != raw_smp_processor_id()) {
- /*
- * in order to update a remote cpu's clock based on our
- * unstable raw time rebase it against:
- * tick_raw (offset between raw counters)
- * tick_gotd (tick offset between cpus)
- */
struct sched_clock_data *my_scd = this_scd();
lock_double_clock(scd, my_scd);
- now -= my_scd->tick_raw;
- now += scd->tick_raw;
+ this_clock = __update_sched_clock(my_scd, now);
+ remote_clock = scd->clock;
- now += my_scd->tick_gtod;
- now -= scd->tick_gtod;
+ /*
+ * Use the opportunity that we have both locks
+ * taken to couple the two clocks: we take the
+ * larger time as the latest time for both
+ * runqueues. (this creates monotonic movement)
+ */
+ if (likely(remote_clock < this_clock)) {
+ clock = this_clock;
+ scd->clock = clock;
+ } else {
+ /*
+ * Should be rare, but possible:
+ */
+ clock = remote_clock;
+ my_scd->clock = remote_clock;
+ }
__raw_spin_unlock(&my_scd->lock);
-
- __update_sched_clock(scd, now, &clock);
-
- __raw_spin_unlock(&scd->lock);
-
} else {
__raw_spin_lock(&scd->lock);
- __update_sched_clock(scd, now, NULL);
- clock = scd->clock;
- __raw_spin_unlock(&scd->lock);
+ clock = __update_sched_clock(scd, now);
}
+ __raw_spin_unlock(&scd->lock);
+
return clock;
}
void sched_clock_tick(void)
{
struct sched_clock_data *scd = this_scd();
- unsigned long now_jiffies = jiffies;
- s64 mult, delta_gtod, delta_raw;
u64 now, now_gtod;
if (unlikely(!sched_clock_running))
@@ -260,29 +207,14 @@ void sched_clock_tick(void)
now = sched_clock();
__raw_spin_lock(&scd->lock);
- __update_sched_clock(scd, now, NULL);
+ __update_sched_clock(scd, now);
/*
* update tick_gtod after __update_sched_clock() because that will
* already observe 1 new jiffy; adding a new tick_gtod to that would
* increase the clock 2 jiffies.
*/
- delta_gtod = now_gtod - scd->tick_gtod;
- delta_raw = now - scd->tick_raw;
-
- if ((long)delta_raw > 0) {
- mult = delta_gtod << MULTI_SHIFT;
- do_div(mult, delta_raw);
- scd->multi = mult;
- if (scd->multi > MAX_MULTI)
- scd->multi = MAX_MULTI;
- else if (scd->multi < MIN_MULTI)
- scd->multi = MIN_MULTI;
- } else
- scd->multi = 1 << MULTI_SHIFT;
-
scd->tick_raw = now;
scd->tick_gtod = now_gtod;
- scd->tick_jiffies = now_jiffies;
__raw_spin_unlock(&scd->lock);
}
@@ -301,7 +233,6 @@ EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event);
void sched_clock_idle_wakeup_event(u64 delta_ns)
{
struct sched_clock_data *scd = this_scd();
- u64 now = sched_clock();
/*
* Override the previous timestamp and ignore all
@@ -310,27 +241,30 @@ void sched_clock_idle_wakeup_event(u64 delta_ns)
* rq clock:
*/
__raw_spin_lock(&scd->lock);
- scd->prev_raw = now;
scd->clock += delta_ns;
- scd->multi = 1 << MULTI_SHIFT;
__raw_spin_unlock(&scd->lock);
touch_softlockup_watchdog();
}
EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
-#endif
+#else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
-/*
- * Scheduler clock - returns current time in nanosec units.
- * This is default implementation.
- * Architectures and sub-architectures can override this.
- */
-unsigned long long __attribute__((weak)) sched_clock(void)
+void sched_clock_init(void)
{
- return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ);
+ sched_clock_running = 1;
}
+u64 sched_clock_cpu(int cpu)
+{
+ if (unlikely(!sched_clock_running))
+ return 0;
+
+ return sched_clock();
+}
+
+#endif
+
unsigned long long cpu_clock(int cpu)
{
unsigned long long clock;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index cf2cd6ce4cb2..fb8994c6d4bb 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -899,7 +899,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
* doesn't make sense. Rely on vruntime for fairness.
*/
if (rq->curr != p)
- delta = max(10000LL, delta);
+ delta = max_t(s64, 10000LL, delta);
hrtick_start(rq, delta);
}
@@ -1442,18 +1442,23 @@ __load_balance_iterator(struct cfs_rq *cfs_rq, struct list_head *next)
struct task_struct *p = NULL;
struct sched_entity *se;
- while (next != &cfs_rq->tasks) {
+ if (next == &cfs_rq->tasks)
+ return NULL;
+
+ /* Skip over entities that are not tasks */
+ do {
se = list_entry(next, struct sched_entity, group_node);
next = next->next;
+ } while (next != &cfs_rq->tasks && !entity_is_task(se));
- /* Skip over entities that are not tasks */
- if (entity_is_task(se)) {
- p = task_of(se);
- break;
- }
- }
+ if (next == &cfs_rq->tasks)
+ return NULL;
cfs_rq->balance_iterator = next;
+
+ if (entity_is_task(se))
+ p = task_of(se);
+
return p;
}
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 908c04f9dad0..6163e4cf885b 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -861,6 +861,8 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
#define RT_MAX_TRIES 3
static int double_lock_balance(struct rq *this_rq, struct rq *busiest);
+static void double_unlock_balance(struct rq *this_rq, struct rq *busiest);
+
static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
@@ -1022,7 +1024,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
break;
/* try again */
- spin_unlock(&lowest_rq->lock);
+ double_unlock_balance(rq, lowest_rq);
lowest_rq = NULL;
}
@@ -1091,7 +1093,7 @@ static int push_rt_task(struct rq *rq)
resched_task(lowest_rq->curr);
- spin_unlock(&lowest_rq->lock);
+ double_unlock_balance(rq, lowest_rq);
ret = 1;
out:
@@ -1197,7 +1199,7 @@ static int pull_rt_task(struct rq *this_rq)
}
skip:
- spin_unlock(&src_rq->lock);
+ double_unlock_balance(this_rq, src_rq);
}
return ret;
diff --git a/kernel/semaphore.c b/kernel/semaphore.c
index aaaeae8244e7..94a62c0d4ade 100644
--- a/kernel/semaphore.c
+++ b/kernel/semaphore.c
@@ -212,9 +212,7 @@ static inline int __sched __down_common(struct semaphore *sem, long state,
waiter.up = 0;
for (;;) {
- if (state == TASK_INTERRUPTIBLE && signal_pending(task))
- goto interrupted;
- if (state == TASK_KILLABLE && fatal_signal_pending(task))
+ if (signal_pending_state(state, task))
goto interrupted;
if (timeout <= 0)
goto timed_out;
diff --git a/kernel/signal.c b/kernel/signal.c
index 954f77d7e3bc..c539f60c6f41 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1304,6 +1304,7 @@ int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group)
q->info.si_overrun++;
goto out;
}
+ q->info.si_overrun = 0;
signalfd_notify(t, sig);
pending = group ? &t->signal->shared_pending : &t->pending;
diff --git a/kernel/smp.c b/kernel/smp.c
index 96fc7c0edc59..782e2b93e465 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -135,7 +135,8 @@ void generic_smp_call_function_interrupt(void)
*/
smp_wmb();
data->csd.flags &= ~CSD_FLAG_WAIT;
- } else
+ }
+ if (data->csd.flags & CSD_FLAG_ALLOC)
call_rcu(&data->rcu_head, rcu_free_call_data);
}
rcu_read_unlock();
@@ -260,6 +261,42 @@ void __smp_call_function_single(int cpu, struct call_single_data *data)
generic_exec_single(cpu, data);
}
+/* Dummy function */
+static void quiesce_dummy(void *unused)
+{
+}
+
+/*
+ * Ensure stack based data used in call function mask is safe to free.
+ *
+ * This is needed by smp_call_function_mask when using on-stack data, because
+ * a single call function queue is shared by all CPUs, and any CPU may pick up
+ * the data item on the queue at any time before it is deleted. So we need to
+ * ensure that all CPUs have transitioned through a quiescent state after
+ * this call.
+ *
+ * This is a very slow function, implemented by sending synchronous IPIs to
+ * all possible CPUs. For this reason, we have to alloc data rather than use
+ * stack based data even in the case of synchronous calls. The stack based
+ * data is then just used for deadlock/oom fallback which will be very rare.
+ *
+ * If a faster scheme can be made, we could go back to preferring stack based
+ * data -- the data allocation/free is non-zero cost.
+ */
+static void smp_call_function_mask_quiesce_stack(cpumask_t mask)
+{
+ struct call_single_data data;
+ int cpu;
+
+ data.func = quiesce_dummy;
+ data.info = NULL;
+
+ for_each_cpu_mask(cpu, mask) {
+ data.flags = CSD_FLAG_WAIT;
+ generic_exec_single(cpu, &data);
+ }
+}
+
/**
* smp_call_function_mask(): Run a function on a set of other CPUs.
* @mask: The set of cpus to run on.
@@ -285,6 +322,7 @@ int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info,
cpumask_t allbutself;
unsigned long flags;
int cpu, num_cpus;
+ int slowpath = 0;
/* Can deadlock when called with interrupts disabled */
WARN_ON(irqs_disabled());
@@ -306,15 +344,16 @@ int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info,
return smp_call_function_single(cpu, func, info, wait);
}
- if (!wait) {
- data = kmalloc(sizeof(*data), GFP_ATOMIC);
- if (data)
- data->csd.flags = CSD_FLAG_ALLOC;
- }
- if (!data) {
+ data = kmalloc(sizeof(*data), GFP_ATOMIC);
+ if (data) {
+ data->csd.flags = CSD_FLAG_ALLOC;
+ if (wait)
+ data->csd.flags |= CSD_FLAG_WAIT;
+ } else {
data = &d;
data->csd.flags = CSD_FLAG_WAIT;
wait = 1;
+ slowpath = 1;
}
spin_lock_init(&data->lock);
@@ -331,8 +370,11 @@ int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info,
arch_send_call_function_ipi(mask);
/* optionally wait for the CPUs to complete */
- if (wait)
+ if (wait) {
csd_flag_wait(&data->csd);
+ if (unlikely(slowpath))
+ smp_call_function_mask_quiesce_stack(mask);
+ }
return 0;
}
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index a1fb54c93cdd..44baeea94ab9 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -292,6 +292,7 @@ void __lockfunc _spin_lock_nested(spinlock_t *lock, int subclass)
}
EXPORT_SYMBOL(_spin_lock_nested);
+
unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock, int subclass)
{
unsigned long flags;
@@ -314,6 +315,16 @@ unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock, int subclas
EXPORT_SYMBOL(_spin_lock_irqsave_nested);
+void __lockfunc _spin_lock_nest_lock(spinlock_t *lock,
+ struct lockdep_map *nest_lock)
+{
+ preempt_disable();
+ spin_acquire_nest(&lock->dep_map, 0, 0, nest_lock, _RET_IP_);
+ LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
+}
+
+EXPORT_SYMBOL(_spin_lock_nest_lock);
+
#endif
void __lockfunc _spin_unlock(spinlock_t *lock)
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index e446c7c7d6a9..af3c7cea258b 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -65,7 +65,6 @@ static void ack_state(void)
static int stop_cpu(struct stop_machine_data *smdata)
{
enum stopmachine_state curstate = STOPMACHINE_NONE;
- int uninitialized_var(ret);
/* Simple state machine */
do {
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 825b4c00fe44..f5da526424a9 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -289,7 +289,6 @@ void tick_nohz_stop_sched_tick(int inidle)
ts->tick_stopped = 1;
ts->idle_jiffies = last_jiffies;
rcu_enter_nohz();
- sched_clock_tick_stop(cpu);
}
/*
@@ -392,7 +391,6 @@ void tick_nohz_restart_sched_tick(void)
select_nohz_load_balancer(0);
now = ktime_get();
tick_do_update_jiffies64(now);
- sched_clock_tick_start(cpu);
cpu_clear(cpu, nohz_cpu_mask);
/*
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index ec7e4f62aaff..4048e92aa04f 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -290,11 +290,11 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
BUG_ON(get_wq_data(work) != cwq);
work_clear_pending(work);
- lock_acquire(&cwq->wq->lockdep_map, 0, 0, 0, 2, _THIS_IP_);
- lock_acquire(&lockdep_map, 0, 0, 0, 2, _THIS_IP_);
+ lock_map_acquire(&cwq->wq->lockdep_map);
+ lock_map_acquire(&lockdep_map);
f(work);
- lock_release(&lockdep_map, 1, _THIS_IP_);
- lock_release(&cwq->wq->lockdep_map, 1, _THIS_IP_);
+ lock_map_release(&lockdep_map);
+ lock_map_release(&cwq->wq->lockdep_map);
if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
printk(KERN_ERR "BUG: workqueue leaked lock or atomic: "
@@ -413,8 +413,8 @@ void flush_workqueue(struct workqueue_struct *wq)
int cpu;
might_sleep();
- lock_acquire(&wq->lockdep_map, 0, 0, 0, 2, _THIS_IP_);
- lock_release(&wq->lockdep_map, 1, _THIS_IP_);
+ lock_map_acquire(&wq->lockdep_map);
+ lock_map_release(&wq->lockdep_map);
for_each_cpu_mask_nr(cpu, *cpu_map)
flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, cpu));
}
@@ -441,8 +441,8 @@ int flush_work(struct work_struct *work)
if (!cwq)
return 0;
- lock_acquire(&cwq->wq->lockdep_map, 0, 0, 0, 2, _THIS_IP_);
- lock_release(&cwq->wq->lockdep_map, 1, _THIS_IP_);
+ lock_map_acquire(&cwq->wq->lockdep_map);
+ lock_map_release(&cwq->wq->lockdep_map);
prev = NULL;
spin_lock_irq(&cwq->lock);
@@ -536,8 +536,8 @@ static void wait_on_work(struct work_struct *work)
might_sleep();
- lock_acquire(&work->lockdep_map, 0, 0, 0, 2, _THIS_IP_);
- lock_release(&work->lockdep_map, 1, _THIS_IP_);
+ lock_map_acquire(&work->lockdep_map);
+ lock_map_release(&work->lockdep_map);
cwq = get_wq_data(work);
if (!cwq)
@@ -830,10 +830,21 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
start_workqueue_thread(cwq, -1);
} else {
cpu_maps_update_begin();
+ /*
+ * We must place this wq on list even if the code below fails.
+ * cpu_down(cpu) can remove cpu from cpu_populated_map before
+ * destroy_workqueue() takes the lock, in that case we leak
+ * cwq[cpu]->thread.
+ */
spin_lock(&workqueue_lock);
list_add(&wq->list, &workqueues);
spin_unlock(&workqueue_lock);
-
+ /*
+ * We must initialize cwqs for each possible cpu even if we
+ * are going to call destroy_workqueue() finally. Otherwise
+ * cpu_up() can hit the uninitialized cwq once we drop the
+ * lock.
+ */
for_each_possible_cpu(cpu) {
cwq = init_cpu_workqueue(wq, cpu);
if (err || !cpu_online(cpu))
@@ -861,8 +872,8 @@ static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
if (cwq->thread == NULL)
return;
- lock_acquire(&cwq->wq->lockdep_map, 0, 0, 0, 2, _THIS_IP_);
- lock_release(&cwq->wq->lockdep_map, 1, _THIS_IP_);
+ lock_map_acquire(&cwq->wq->lockdep_map);
+ lock_map_release(&cwq->wq->lockdep_map);
flush_cpu_workqueue(cwq);
/*