diff options
Diffstat (limited to 'kernel')
32 files changed, 480 insertions, 331 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index b96ad75b7e64..14f7070b4ba2 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -270,11 +270,11 @@ static struct file_system_type cpuset_fs_type = { * are online. If none are online, walk up the cpuset hierarchy * until we find one that does have some online cpus. If we get * all the way to the top and still haven't found any online cpus, - * return cpu_online_map. Or if passed a NULL cs from an exit'ing - * task, return cpu_online_map. + * return cpu_online_mask. Or if passed a NULL cs from an exit'ing + * task, return cpu_online_mask. * * One way or another, we guarantee to return some non-empty subset - * of cpu_online_map. + * of cpu_online_mask. * * Call with callback_mutex held. */ @@ -867,7 +867,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, int retval; int is_load_balanced; - /* top_cpuset.cpus_allowed tracks cpu_online_map; it's read-only */ + /* top_cpuset.cpus_allowed tracks cpu_online_mask; it's read-only */ if (cs == &top_cpuset) return -EACCES; @@ -2149,7 +2149,7 @@ void __init cpuset_init_smp(void) * * Description: Returns the cpumask_var_t cpus_allowed of the cpuset * attached to the specified @tsk. Guaranteed to return some non-empty - * subset of cpu_online_map, even if this means going outside the + * subset of cpu_online_mask, even if this means going outside the * tasks cpuset. **/ diff --git a/kernel/cred.c b/kernel/cred.c index 97b36eeca4c9..e70683d9ec32 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -386,6 +386,8 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) struct cred *new; int ret; + p->replacement_session_keyring = NULL; + if ( #ifdef CONFIG_KEYS !p->cred->thread_keyring && diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index 1dc53bae56e1..0557f24c6bca 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -160,37 +160,39 @@ early_param("nokgdbroundup", opt_nokgdbroundup); * Weak aliases for breakpoint management, * can be overriden by architectures when needed: */ -int __weak kgdb_arch_set_breakpoint(unsigned long addr, char *saved_instr) +int __weak kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) { int err; - err = probe_kernel_read(saved_instr, (char *)addr, BREAK_INSTR_SIZE); + err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr, + BREAK_INSTR_SIZE); if (err) return err; - - return probe_kernel_write((char *)addr, arch_kgdb_ops.gdb_bpt_instr, - BREAK_INSTR_SIZE); + err = probe_kernel_write((char *)bpt->bpt_addr, + arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE); + return err; } -int __weak kgdb_arch_remove_breakpoint(unsigned long addr, char *bundle) +int __weak kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt) { - return probe_kernel_write((char *)addr, - (char *)bundle, BREAK_INSTR_SIZE); + return probe_kernel_write((char *)bpt->bpt_addr, + (char *)bpt->saved_instr, BREAK_INSTR_SIZE); } int __weak kgdb_validate_break_address(unsigned long addr) { - char tmp_variable[BREAK_INSTR_SIZE]; + struct kgdb_bkpt tmp; int err; - /* Validate setting the breakpoint and then removing it. In the + /* Validate setting the breakpoint and then removing it. If the * remove fails, the kernel needs to emit a bad message because we * are deep trouble not being able to put things back the way we * found them. */ - err = kgdb_arch_set_breakpoint(addr, tmp_variable); + tmp.bpt_addr = addr; + err = kgdb_arch_set_breakpoint(&tmp); if (err) return err; - err = kgdb_arch_remove_breakpoint(addr, tmp_variable); + err = kgdb_arch_remove_breakpoint(&tmp); if (err) printk(KERN_ERR "KGDB: Critical breakpoint error, kernel " "memory destroyed at: %lx", addr); @@ -234,7 +236,6 @@ static void kgdb_flush_swbreak_addr(unsigned long addr) */ int dbg_activate_sw_breakpoints(void) { - unsigned long addr; int error; int ret = 0; int i; @@ -243,16 +244,15 @@ int dbg_activate_sw_breakpoints(void) if (kgdb_break[i].state != BP_SET) continue; - addr = kgdb_break[i].bpt_addr; - error = kgdb_arch_set_breakpoint(addr, - kgdb_break[i].saved_instr); + error = kgdb_arch_set_breakpoint(&kgdb_break[i]); if (error) { ret = error; - printk(KERN_INFO "KGDB: BP install failed: %lx", addr); + printk(KERN_INFO "KGDB: BP install failed: %lx", + kgdb_break[i].bpt_addr); continue; } - kgdb_flush_swbreak_addr(addr); + kgdb_flush_swbreak_addr(kgdb_break[i].bpt_addr); kgdb_break[i].state = BP_ACTIVE; } return ret; @@ -301,7 +301,6 @@ int dbg_set_sw_break(unsigned long addr) int dbg_deactivate_sw_breakpoints(void) { - unsigned long addr; int error; int ret = 0; int i; @@ -309,15 +308,14 @@ int dbg_deactivate_sw_breakpoints(void) for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) { if (kgdb_break[i].state != BP_ACTIVE) continue; - addr = kgdb_break[i].bpt_addr; - error = kgdb_arch_remove_breakpoint(addr, - kgdb_break[i].saved_instr); + error = kgdb_arch_remove_breakpoint(&kgdb_break[i]); if (error) { - printk(KERN_INFO "KGDB: BP remove failed: %lx\n", addr); + printk(KERN_INFO "KGDB: BP remove failed: %lx\n", + kgdb_break[i].bpt_addr); ret = error; } - kgdb_flush_swbreak_addr(addr); + kgdb_flush_swbreak_addr(kgdb_break[i].bpt_addr); kgdb_break[i].state = BP_SET; } return ret; @@ -351,7 +349,6 @@ int kgdb_isremovedbreak(unsigned long addr) int dbg_remove_all_break(void) { - unsigned long addr; int error; int i; @@ -359,12 +356,10 @@ int dbg_remove_all_break(void) for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) { if (kgdb_break[i].state != BP_ACTIVE) goto setundefined; - addr = kgdb_break[i].bpt_addr; - error = kgdb_arch_remove_breakpoint(addr, - kgdb_break[i].saved_instr); + error = kgdb_arch_remove_breakpoint(&kgdb_break[i]); if (error) printk(KERN_ERR "KGDB: breakpoint remove failed: %lx\n", - addr); + kgdb_break[i].bpt_addr); setundefined: kgdb_break[i].state = BP_UNDEFINED; } diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index 9b5f17da1c56..bb9520f0f6ff 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c @@ -743,7 +743,7 @@ kdb_printit: kdb_input_flush(); c = console_drivers; - if (!dbg_io_ops->is_console) { + if (dbg_io_ops && !dbg_io_ops->is_console) { len = strlen(moreprompt); cp = moreprompt; while (len--) { diff --git a/kernel/events/core.c b/kernel/events/core.c index 4b50357914fb..a6a9ec4cd8f5 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3348,7 +3348,7 @@ static void calc_timer_values(struct perf_event *event, *running = ctx_time - event->tstamp_running; } -void __weak perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now) +void __weak arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) { } @@ -3398,7 +3398,7 @@ void perf_event_update_userpage(struct perf_event *event) userpg->time_running = running + atomic64_read(&event->child_total_time_running); - perf_update_user_clock(userpg, now); + arch_perf_update_userpage(userpg, now); barrier(); ++userpg->lock; @@ -7116,6 +7116,13 @@ void __init perf_event_init(void) /* do not patch jump label more than once per second */ jump_label_rate_limit(&perf_sched_events, HZ); + + /* + * Build time assertion that we keep the data_head at the intended + * location. IOW, validation we got the __reserved[] size right. + */ + BUILD_BUG_ON((offsetof(struct perf_event_mmap_page, data_head)) + != 1024); } static int __init perf_event_sysfs_init(void) diff --git a/kernel/futex.c b/kernel/futex.c index 72efa1e4359a..e2b0fb9a0b3b 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -59,6 +59,7 @@ #include <linux/magic.h> #include <linux/pid.h> #include <linux/nsproxy.h> +#include <linux/ptrace.h> #include <asm/futex.h> @@ -2443,40 +2444,31 @@ SYSCALL_DEFINE3(get_robust_list, int, pid, { struct robust_list_head __user *head; unsigned long ret; - const struct cred *cred = current_cred(), *pcred; + struct task_struct *p; if (!futex_cmpxchg_enabled) return -ENOSYS; + WARN_ONCE(1, "deprecated: get_robust_list will be deleted in 2013.\n"); + + rcu_read_lock(); + + ret = -ESRCH; if (!pid) - head = current->robust_list; + p = current; else { - struct task_struct *p; - - ret = -ESRCH; - rcu_read_lock(); p = find_task_by_vpid(pid); if (!p) goto err_unlock; - ret = -EPERM; - pcred = __task_cred(p); - /* If victim is in different user_ns, then uids are not - comparable, so we must have CAP_SYS_PTRACE */ - if (cred->user->user_ns != pcred->user->user_ns) { - if (!ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE)) - goto err_unlock; - goto ok; - } - /* If victim is in same user_ns, then uids are comparable */ - if (cred->euid != pcred->euid && - cred->euid != pcred->uid && - !ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE)) - goto err_unlock; -ok: - head = p->robust_list; - rcu_read_unlock(); } + ret = -EPERM; + if (!ptrace_may_access(p, PTRACE_MODE_READ)) + goto err_unlock; + + head = p->robust_list; + rcu_read_unlock(); + if (put_user(sizeof(*head), len_ptr)) return -EFAULT; return put_user(head, head_ptr); diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c index 5f9e689dc8f0..83e368b005fc 100644 --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c @@ -10,6 +10,7 @@ #include <linux/compat.h> #include <linux/nsproxy.h> #include <linux/futex.h> +#include <linux/ptrace.h> #include <asm/uaccess.h> @@ -136,40 +137,31 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr, { struct compat_robust_list_head __user *head; unsigned long ret; - const struct cred *cred = current_cred(), *pcred; + struct task_struct *p; if (!futex_cmpxchg_enabled) return -ENOSYS; + WARN_ONCE(1, "deprecated: get_robust_list will be deleted in 2013.\n"); + + rcu_read_lock(); + + ret = -ESRCH; if (!pid) - head = current->compat_robust_list; + p = current; else { - struct task_struct *p; - - ret = -ESRCH; - rcu_read_lock(); p = find_task_by_vpid(pid); if (!p) goto err_unlock; - ret = -EPERM; - pcred = __task_cred(p); - /* If victim is in different user_ns, then uids are not - comparable, so we must have CAP_SYS_PTRACE */ - if (cred->user->user_ns != pcred->user->user_ns) { - if (!ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE)) - goto err_unlock; - goto ok; - } - /* If victim is in same user_ns, then uids are comparable */ - if (cred->euid != pcred->euid && - cred->euid != pcred->uid && - !ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE)) - goto err_unlock; -ok: - head = p->compat_robust_list; - rcu_read_unlock(); } + ret = -EPERM; + if (!ptrace_may_access(p, PTRACE_MODE_READ)) + goto err_unlock; + + head = p->compat_robust_list; + rcu_read_unlock(); + if (put_user(sizeof(*head), len_ptr)) return -EFAULT; return put_user(ptr_to_compat(head), head_ptr); diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index cf1a4a68ce44..d1a758bc972a 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig @@ -62,7 +62,7 @@ config IRQ_DOMAIN_DEBUG help This option will show the mapping relationship between hardware irq numbers and Linux irq numbers. The mapping is exposed via debugfs - in the file "virq_mapping". + in the file "irq_domain_mapping". If you don't know what this means you don't need it. diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 6ff84e6a954c..bdb180325551 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -54,14 +54,18 @@ static void warn_no_thread(unsigned int irq, struct irqaction *action) static void irq_wake_thread(struct irq_desc *desc, struct irqaction *action) { /* - * Wake up the handler thread for this action. In case the - * thread crashed and was killed we just pretend that we - * handled the interrupt. The hardirq handler has disabled the - * device interrupt, so no irq storm is lurking. If the + * In case the thread crashed and was killed we just pretend that + * we handled the interrupt. The hardirq handler has disabled the + * device interrupt, so no irq storm is lurking. + */ + if (action->thread->flags & PF_EXITING) + return; + + /* + * Wake up the handler thread for this action. If the * RUNTHREAD bit is already set, nothing to do. */ - if ((action->thread->flags & PF_EXITING) || - test_and_set_bit(IRQTF_RUNTHREAD, &action->thread_flags)) + if (test_and_set_bit(IRQTF_RUNTHREAD, &action->thread_flags)) return; /* diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 3601f3fbf67c..d34413e78628 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -23,7 +23,6 @@ static LIST_HEAD(irq_domain_list); static DEFINE_MUTEX(irq_domain_mutex); static DEFINE_MUTEX(revmap_trees_mutex); -static unsigned int irq_virq_count = NR_IRQS; static struct irq_domain *irq_default_domain; /** @@ -184,13 +183,16 @@ struct irq_domain *irq_domain_add_linear(struct device_node *of_node, } struct irq_domain *irq_domain_add_nomap(struct device_node *of_node, + unsigned int max_irq, const struct irq_domain_ops *ops, void *host_data) { struct irq_domain *domain = irq_domain_alloc(of_node, IRQ_DOMAIN_MAP_NOMAP, ops, host_data); - if (domain) + if (domain) { + domain->revmap_data.nomap.max_irq = max_irq ? max_irq : ~0; irq_domain_add(domain); + } return domain; } @@ -262,22 +264,6 @@ void irq_set_default_host(struct irq_domain *domain) irq_default_domain = domain; } -/** - * irq_set_virq_count() - Set the maximum number of linux irqs - * @count: number of linux irqs, capped with NR_IRQS - * - * This is mainly for use by platforms like iSeries who want to program - * the virtual irq number in the controller to avoid the reverse mapping - */ -void irq_set_virq_count(unsigned int count) -{ - pr_debug("irq: Trying to set virq count to %d\n", count); - - BUG_ON(count < NUM_ISA_INTERRUPTS); - if (count < NR_IRQS) - irq_virq_count = count; -} - static int irq_setup_virq(struct irq_domain *domain, unsigned int virq, irq_hw_number_t hwirq) { @@ -320,13 +306,12 @@ unsigned int irq_create_direct_mapping(struct irq_domain *domain) pr_debug("irq: create_direct virq allocation failed\n"); return 0; } - if (virq >= irq_virq_count) { + if (virq >= domain->revmap_data.nomap.max_irq) { pr_err("ERROR: no free irqs available below %i maximum\n", - irq_virq_count); + domain->revmap_data.nomap.max_irq); irq_free_desc(virq); return 0; } - pr_debug("irq: create_direct obtained virq %d\n", virq); if (irq_setup_virq(domain, virq, virq)) { @@ -350,7 +335,8 @@ unsigned int irq_create_direct_mapping(struct irq_domain *domain) unsigned int irq_create_mapping(struct irq_domain *domain, irq_hw_number_t hwirq) { - unsigned int virq, hint; + unsigned int hint; + int virq; pr_debug("irq: irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq); @@ -377,13 +363,13 @@ unsigned int irq_create_mapping(struct irq_domain *domain, return irq_domain_legacy_revmap(domain, hwirq); /* Allocate a virtual interrupt number */ - hint = hwirq % irq_virq_count; + hint = hwirq % nr_irqs; if (hint == 0) hint++; virq = irq_alloc_desc_from(hint, 0); - if (!virq) + if (virq <= 0) virq = irq_alloc_desc_from(1, 0); - if (!virq) { + if (virq <= 0) { pr_debug("irq: -> virq allocation failed\n"); return 0; } @@ -515,7 +501,7 @@ unsigned int irq_find_mapping(struct irq_domain *domain, irq_hw_number_t hwirq) { unsigned int i; - unsigned int hint = hwirq % irq_virq_count; + unsigned int hint = hwirq % nr_irqs; /* Look for default domain if nececssary */ if (domain == NULL) @@ -536,7 +522,7 @@ unsigned int irq_find_mapping(struct irq_domain *domain, if (data && (data->domain == domain) && (data->hwirq == hwirq)) return i; i++; - if (i >= irq_virq_count) + if (i >= nr_irqs) i = 1; } while(i != hint); return 0; @@ -642,8 +628,8 @@ static int virq_debug_show(struct seq_file *m, void *private) void *data; int i; - seq_printf(m, "%-5s %-7s %-15s %-18s %s\n", "virq", "hwirq", - "chip name", "chip data", "domain name"); + seq_printf(m, "%-5s %-7s %-15s %-*s %s\n", "irq", "hwirq", + "chip name", 2 * sizeof(void *) + 2, "chip data", "domain name"); for (i = 1; i < nr_irqs; i++) { desc = irq_to_desc(i); @@ -666,7 +652,7 @@ static int virq_debug_show(struct seq_file *m, void *private) seq_printf(m, "%-15s ", p); data = irq_desc_get_chip_data(desc); - seq_printf(m, "0x%16p ", data); + seq_printf(m, data ? "0x%p " : " %p ", data); if (desc->irq_data.domain && desc->irq_data.domain->of_node) p = desc->irq_data.domain->of_node->full_name; diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index b0ccd1ac2d6a..89a3ea82569b 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -282,7 +282,7 @@ setup_affinity(unsigned int irq, struct irq_desc *desc, struct cpumask *mask) { struct irq_chip *chip = irq_desc_get_chip(desc); struct cpumask *set = irq_default_affinity; - int ret; + int ret, node = desc->irq_data.node; /* Excludes PER_CPU and NO_BALANCE interrupts */ if (!irq_can_set_affinity(irq)) @@ -301,6 +301,13 @@ setup_affinity(unsigned int irq, struct irq_desc *desc, struct cpumask *mask) } cpumask_and(mask, cpu_online_mask, set); + if (node != NUMA_NO_NODE) { + const struct cpumask *nodemask = cpumask_of_node(node); + + /* make sure at least one of the cpus in nodemask is online */ + if (cpumask_intersects(mask, nodemask)) + cpumask_and(mask, mask, nodemask); + } ret = chip->irq_set_affinity(&desc->irq_data, mask, false); switch (ret) { case IRQ_SET_MASK_OK: @@ -645,7 +652,7 @@ static int irq_wait_for_interrupt(struct irqaction *action) * is marked MASKED. */ static void irq_finalize_oneshot(struct irq_desc *desc, - struct irqaction *action, bool force) + struct irqaction *action) { if (!(desc->istate & IRQS_ONESHOT)) return; @@ -679,7 +686,7 @@ again: * we would clear the threads_oneshot bit of this thread which * was just set. */ - if (!force && test_bit(IRQTF_RUNTHREAD, &action->thread_flags)) + if (test_bit(IRQTF_RUNTHREAD, &action->thread_flags)) goto out_unlock; desc->threads_oneshot &= ~action->thread_mask; @@ -739,7 +746,7 @@ irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action) local_bh_disable(); ret = action->thread_fn(action->irq, action->dev_id); - irq_finalize_oneshot(desc, action, false); + irq_finalize_oneshot(desc, action); local_bh_enable(); return ret; } @@ -755,7 +762,7 @@ static irqreturn_t irq_thread_fn(struct irq_desc *desc, irqreturn_t ret; ret = action->thread_fn(action->irq, action->dev_id); - irq_finalize_oneshot(desc, action, false); + irq_finalize_oneshot(desc, action); return ret; } @@ -844,7 +851,7 @@ void exit_irq_thread(void) wake_threads_waitq(desc); /* Prevent a stale desc->threads_oneshot */ - irq_finalize_oneshot(desc, action, true); + irq_finalize_oneshot(desc, action); } static void irq_setup_forced_threading(struct irqaction *new) diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c index 47420908fba0..c3c89751b327 100644 --- a/kernel/irq/migration.c +++ b/kernel/irq/migration.c @@ -43,12 +43,16 @@ void irq_move_masked_irq(struct irq_data *idata) * masking the irqs. */ if (likely(cpumask_any_and(desc->pending_mask, cpu_online_mask) - < nr_cpu_ids)) - if (!chip->irq_set_affinity(&desc->irq_data, - desc->pending_mask, false)) { + < nr_cpu_ids)) { + int ret = chip->irq_set_affinity(&desc->irq_data, + desc->pending_mask, false); + switch (ret) { + case IRQ_SET_MASK_OK: cpumask_copy(desc->irq_data.affinity, desc->pending_mask); + case IRQ_SET_MASK_OK_NOCOPY: irq_set_thread_affinity(desc); } + } cpumask_clear(desc->pending_mask); } diff --git a/kernel/irq_work.c b/kernel/irq_work.c index c3c46c72046e..0c56d44b9fd5 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -5,6 +5,7 @@ * context. The enqueueing is NMI-safe. */ +#include <linux/bug.h> #include <linux/kernel.h> #include <linux/export.h> #include <linux/irq_work.h> diff --git a/kernel/kmod.c b/kernel/kmod.c index 957a7aab8ebc..05698a7415fe 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -322,7 +322,7 @@ static void __call_usermodehelper(struct work_struct *work) * land has been frozen during a system-wide hibernation or suspend operation). * Should always be manipulated under umhelper_sem acquired for write. */ -static int usermodehelper_disabled = 1; +static enum umh_disable_depth usermodehelper_disabled = UMH_DISABLED; /* Number of helpers running */ static atomic_t running_helpers = ATOMIC_INIT(0); @@ -334,32 +334,110 @@ static atomic_t running_helpers = ATOMIC_INIT(0); static DECLARE_WAIT_QUEUE_HEAD(running_helpers_waitq); /* + * Used by usermodehelper_read_lock_wait() to wait for usermodehelper_disabled + * to become 'false'. + */ +static DECLARE_WAIT_QUEUE_HEAD(usermodehelper_disabled_waitq); + +/* * Time to wait for running_helpers to become zero before the setting of * usermodehelper_disabled in usermodehelper_disable() fails */ #define RUNNING_HELPERS_TIMEOUT (5 * HZ) -void read_lock_usermodehelper(void) +int usermodehelper_read_trylock(void) { + DEFINE_WAIT(wait); + int ret = 0; + down_read(&umhelper_sem); + for (;;) { + prepare_to_wait(&usermodehelper_disabled_waitq, &wait, + TASK_INTERRUPTIBLE); + if (!usermodehelper_disabled) + break; + + if (usermodehelper_disabled == UMH_DISABLED) + ret = -EAGAIN; + + up_read(&umhelper_sem); + + if (ret) + break; + + schedule(); + try_to_freeze(); + + down_read(&umhelper_sem); + } + finish_wait(&usermodehelper_disabled_waitq, &wait); + return ret; +} +EXPORT_SYMBOL_GPL(usermodehelper_read_trylock); + +long usermodehelper_read_lock_wait(long timeout) +{ + DEFINE_WAIT(wait); + + if (timeout < 0) + return -EINVAL; + + down_read(&umhelper_sem); + for (;;) { + prepare_to_wait(&usermodehelper_disabled_waitq, &wait, + TASK_UNINTERRUPTIBLE); + if (!usermodehelper_disabled) + break; + + up_read(&umhelper_sem); + + timeout = schedule_timeout(timeout); + if (!timeout) + break; + + down_read(&umhelper_sem); + } + finish_wait(&usermodehelper_disabled_waitq, &wait); + return timeout; } -EXPORT_SYMBOL_GPL(read_lock_usermodehelper); +EXPORT_SYMBOL_GPL(usermodehelper_read_lock_wait); -void read_unlock_usermodehelper(void) +void usermodehelper_read_unlock(void) { up_read(&umhelper_sem); } -EXPORT_SYMBOL_GPL(read_unlock_usermodehelper); +EXPORT_SYMBOL_GPL(usermodehelper_read_unlock); /** - * usermodehelper_disable - prevent new helpers from being started + * __usermodehelper_set_disable_depth - Modify usermodehelper_disabled. + * depth: New value to assign to usermodehelper_disabled. + * + * Change the value of usermodehelper_disabled (under umhelper_sem locked for + * writing) and wakeup tasks waiting for it to change. */ -int usermodehelper_disable(void) +void __usermodehelper_set_disable_depth(enum umh_disable_depth depth) +{ + down_write(&umhelper_sem); + usermodehelper_disabled = depth; + wake_up(&usermodehelper_disabled_waitq); + up_write(&umhelper_sem); +} + +/** + * __usermodehelper_disable - Prevent new helpers from being started. + * @depth: New value to assign to usermodehelper_disabled. + * + * Set usermodehelper_disabled to @depth and wait for running helpers to exit. + */ +int __usermodehelper_disable(enum umh_disable_depth depth) { long retval; + if (!depth) + return -EINVAL; + down_write(&umhelper_sem); - usermodehelper_disabled = 1; + usermodehelper_disabled = depth; up_write(&umhelper_sem); /* @@ -374,31 +452,10 @@ int usermodehelper_disable(void) if (retval) return 0; - down_write(&umhelper_sem); - usermodehelper_disabled = 0; - up_write(&umhelper_sem); + __usermodehelper_set_disable_depth(UMH_ENABLED); return -EAGAIN; } -/** - * usermodehelper_enable - allow new helpers to be started again - */ -void usermodehelper_enable(void) -{ - down_write(&umhelper_sem); - usermodehelper_disabled = 0; - up_write(&umhelper_sem); -} - -/** - * usermodehelper_is_disabled - check if new helpers are allowed to be started - */ -bool usermodehelper_is_disabled(void) -{ - return usermodehelper_disabled; -} -EXPORT_SYMBOL_GPL(usermodehelper_is_disabled); - static void helper_lock(void) { atomic_inc(&running_helpers); diff --git a/kernel/padata.c b/kernel/padata.c index 6f10eb285ece..89fe3d1b9efb 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -1,6 +1,8 @@ /* * padata.c - generic interface to process data streams in parallel * + * See Documentation/padata.txt for an api documentation. + * * Copyright (C) 2008, 2009 secunet Security Networks AG * Copyright (C) 2008, 2009 Steffen Klassert <steffen.klassert@secunet.com> * @@ -354,13 +356,13 @@ static int padata_setup_cpumasks(struct parallel_data *pd, if (!alloc_cpumask_var(&pd->cpumask.pcpu, GFP_KERNEL)) return -ENOMEM; - cpumask_and(pd->cpumask.pcpu, pcpumask, cpu_active_mask); + cpumask_and(pd->cpumask.pcpu, pcpumask, cpu_online_mask); if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL)) { free_cpumask_var(pd->cpumask.cbcpu); return -ENOMEM; } - cpumask_and(pd->cpumask.cbcpu, cbcpumask, cpu_active_mask); + cpumask_and(pd->cpumask.cbcpu, cbcpumask, cpu_online_mask); return 0; } @@ -564,7 +566,7 @@ EXPORT_SYMBOL(padata_unregister_cpumask_notifier); static bool padata_validate_cpumask(struct padata_instance *pinst, const struct cpumask *cpumask) { - if (!cpumask_intersects(cpumask, cpu_active_mask)) { + if (!cpumask_intersects(cpumask, cpu_online_mask)) { pinst->flags |= PADATA_INVALID; return false; } @@ -678,7 +680,7 @@ static int __padata_add_cpu(struct padata_instance *pinst, int cpu) { struct parallel_data *pd; - if (cpumask_test_cpu(cpu, cpu_active_mask)) { + if (cpumask_test_cpu(cpu, cpu_online_mask)) { pd = padata_alloc_pd(pinst, pinst->cpumask.pcpu, pinst->cpumask.cbcpu); if (!pd) @@ -746,6 +748,9 @@ static int __padata_remove_cpu(struct padata_instance *pinst, int cpu) return -ENOMEM; padata_replace(pinst, pd); + + cpumask_clear_cpu(cpu, pd->cpumask.cbcpu); + cpumask_clear_cpu(cpu, pd->cpumask.pcpu); } return 0; diff --git a/kernel/panic.c b/kernel/panic.c index 80aed44e345a..8ed89a175d79 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -97,7 +97,7 @@ void panic(const char *fmt, ...) /* * Avoid nested stack-dumping if a panic occurs during oops processing */ - if (!oops_in_progress) + if (!test_taint(TAINT_DIE) && oops_in_progress <= 1) dump_stack(); #endif diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 0a186cfde788..e09dfbfeecee 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -16,7 +16,6 @@ #include <linux/string.h> #include <linux/device.h> #include <linux/async.h> -#include <linux/kmod.h> #include <linux/delay.h> #include <linux/fs.h> #include <linux/mount.h> @@ -611,14 +610,10 @@ int hibernate(void) if (error) goto Exit; - error = usermodehelper_disable(); - if (error) - goto Exit; - /* Allocate memory management structures */ error = create_basic_memory_bitmaps(); if (error) - goto Enable_umh; + goto Exit; printk(KERN_INFO "PM: Syncing filesystems ... "); sys_sync(); @@ -661,8 +656,6 @@ int hibernate(void) Free_bitmaps: free_basic_memory_bitmaps(); - Enable_umh: - usermodehelper_enable(); Exit: pm_notifier_call_chain(PM_POST_HIBERNATION); pm_restore_console(); @@ -777,15 +770,9 @@ static int software_resume(void) if (error) goto close_finish; - error = usermodehelper_disable(); - if (error) - goto close_finish; - error = create_basic_memory_bitmaps(); - if (error) { - usermodehelper_enable(); + if (error) goto close_finish; - } pr_debug("PM: Preparing processes for restore.\n"); error = freeze_processes(); @@ -806,7 +793,6 @@ static int software_resume(void) thaw_processes(); Done: free_basic_memory_bitmaps(); - usermodehelper_enable(); Finish: pm_notifier_call_chain(PM_POST_RESTORE); pm_restore_console(); diff --git a/kernel/power/process.c b/kernel/power/process.c index 0d2aeb226108..19db29f67558 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -16,6 +16,7 @@ #include <linux/freezer.h> #include <linux/delay.h> #include <linux/workqueue.h> +#include <linux/kmod.h> /* * Timeout for stopping processes @@ -122,6 +123,10 @@ int freeze_processes(void) { int error; + error = __usermodehelper_disable(UMH_FREEZING); + if (error) + return error; + if (!pm_freezing) atomic_inc(&system_freezing_cnt); @@ -130,6 +135,7 @@ int freeze_processes(void) error = try_to_freeze_tasks(true); if (!error) { printk("done."); + __usermodehelper_set_disable_depth(UMH_DISABLED); oom_killer_disable(); } printk("\n"); @@ -187,6 +193,8 @@ void thaw_processes(void) } while_each_thread(g, p); read_unlock(&tasklist_lock); + usermodehelper_enable(); + schedule(); printk("done.\n"); } diff --git a/kernel/power/qos.c b/kernel/power/qos.c index d6d6dbd1ecc0..6a031e684026 100644 --- a/kernel/power/qos.c +++ b/kernel/power/qos.c @@ -230,6 +230,21 @@ int pm_qos_request_active(struct pm_qos_request *req) EXPORT_SYMBOL_GPL(pm_qos_request_active); /** + * pm_qos_work_fn - the timeout handler of pm_qos_update_request_timeout + * @work: work struct for the delayed work (timeout) + * + * This cancels the timeout request by falling back to the default at timeout. + */ +static void pm_qos_work_fn(struct work_struct *work) +{ + struct pm_qos_request *req = container_of(to_delayed_work(work), + struct pm_qos_request, + work); + + pm_qos_update_request(req, PM_QOS_DEFAULT_VALUE); +} + +/** * pm_qos_add_request - inserts new qos request into the list * @req: pointer to a preallocated handle * @pm_qos_class: identifies which list of qos request to use @@ -253,6 +268,7 @@ void pm_qos_add_request(struct pm_qos_request *req, return; } req->pm_qos_class = pm_qos_class; + INIT_DELAYED_WORK(&req->work, pm_qos_work_fn); pm_qos_update_target(pm_qos_array[pm_qos_class]->constraints, &req->node, PM_QOS_ADD_REQ, value); } @@ -279,6 +295,9 @@ void pm_qos_update_request(struct pm_qos_request *req, return; } + if (delayed_work_pending(&req->work)) + cancel_delayed_work_sync(&req->work); + if (new_value != req->node.prio) pm_qos_update_target( pm_qos_array[req->pm_qos_class]->constraints, @@ -287,6 +306,34 @@ void pm_qos_update_request(struct pm_qos_request *req, EXPORT_SYMBOL_GPL(pm_qos_update_request); /** + * pm_qos_update_request_timeout - modifies an existing qos request temporarily. + * @req : handle to list element holding a pm_qos request to use + * @new_value: defines the temporal qos request + * @timeout_us: the effective duration of this qos request in usecs. + * + * After timeout_us, this qos request is cancelled automatically. + */ +void pm_qos_update_request_timeout(struct pm_qos_request *req, s32 new_value, + unsigned long timeout_us) +{ + if (!req) + return; + if (WARN(!pm_qos_request_active(req), + "%s called for unknown object.", __func__)) + return; + + if (delayed_work_pending(&req->work)) + cancel_delayed_work_sync(&req->work); + + if (new_value != req->node.prio) + pm_qos_update_target( + pm_qos_array[req->pm_qos_class]->constraints, + &req->node, PM_QOS_UPDATE_REQ, new_value); + + schedule_delayed_work(&req->work, usecs_to_jiffies(timeout_us)); +} + +/** * pm_qos_remove_request - modifies an existing qos request * @req: handle to request list element * @@ -305,6 +352,9 @@ void pm_qos_remove_request(struct pm_qos_request *req) return; } + if (delayed_work_pending(&req->work)) + cancel_delayed_work_sync(&req->work); + pm_qos_update_target(pm_qos_array[req->pm_qos_class]->constraints, &req->node, PM_QOS_REMOVE_REQ, PM_QOS_DEFAULT_VALUE); diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 88e5c967370d..396d262b8fd0 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -12,7 +12,6 @@ #include <linux/delay.h> #include <linux/errno.h> #include <linux/init.h> -#include <linux/kmod.h> #include <linux/console.h> #include <linux/cpu.h> #include <linux/syscalls.h> @@ -102,17 +101,12 @@ static int suspend_prepare(void) if (error) goto Finish; - error = usermodehelper_disable(); - if (error) - goto Finish; - error = suspend_freeze_processes(); if (!error) return 0; suspend_stats.failed_freeze++; dpm_save_failed_step(SUSPEND_FREEZE); - usermodehelper_enable(); Finish: pm_notifier_call_chain(PM_POST_SUSPEND); pm_restore_console(); @@ -259,7 +253,6 @@ int suspend_devices_and_enter(suspend_state_t state) static void suspend_finish(void) { suspend_thaw_processes(); - usermodehelper_enable(); pm_notifier_call_chain(PM_POST_SUSPEND); pm_restore_console(); } diff --git a/kernel/power/user.c b/kernel/power/user.c index 33c4329205af..91b0fd021a95 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -12,7 +12,6 @@ #include <linux/suspend.h> #include <linux/syscalls.h> #include <linux/reboot.h> -#include <linux/kmod.h> #include <linux/string.h> #include <linux/device.h> #include <linux/miscdevice.h> @@ -222,14 +221,8 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, sys_sync(); printk("done.\n"); - error = usermodehelper_disable(); - if (error) - break; - error = freeze_processes(); - if (error) - usermodehelper_enable(); - else + if (!error) data->frozen = 1; break; @@ -238,7 +231,6 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, break; pm_restore_gfp_mask(); thaw_processes(); - usermodehelper_enable(); data->frozen = 0; break; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index e3ed0ecee7c7..4603b9d8f30a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1270,7 +1270,7 @@ static int select_fallback_rq(int cpu, struct task_struct *p) int dest_cpu; /* Look for allowed, online CPU in same node. */ - for_each_cpu_mask(dest_cpu, *nodemask) { + for_each_cpu(dest_cpu, nodemask) { if (!cpu_online(dest_cpu)) continue; if (!cpu_active(dest_cpu)) @@ -1281,7 +1281,7 @@ static int select_fallback_rq(int cpu, struct task_struct *p) for (;;) { /* Any allowed, online CPU? */ - for_each_cpu_mask(dest_cpu, *tsk_cpus_allowed(p)) { + for_each_cpu(dest_cpu, tsk_cpus_allowed(p)) { if (!cpu_online(dest_cpu)) continue; if (!cpu_active(dest_cpu)) @@ -1964,6 +1964,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) local_irq_enable(); #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ finish_lock_switch(rq, prev); + finish_arch_post_lock_switch(); fire_sched_in_preempt_notifiers(current); if (mm) @@ -3101,8 +3102,6 @@ EXPORT_SYMBOL(sub_preempt_count); */ static noinline void __schedule_bug(struct task_struct *prev) { - struct pt_regs *regs = get_irq_regs(); - if (oops_in_progress) return; @@ -3113,11 +3112,7 @@ static noinline void __schedule_bug(struct task_struct *prev) print_modules(); if (irqs_disabled()) print_irqtrace_events(prev); - - if (regs) - show_regs(regs); - else - dump_stack(); + dump_stack(); } /* diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 42b1f304b044..fb3acba4d52e 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -681,6 +681,9 @@ static inline int task_running(struct rq *rq, struct task_struct *p) #ifndef finish_arch_switch # define finish_arch_switch(prev) do { } while (0) #endif +#ifndef finish_arch_post_lock_switch +# define finish_arch_post_lock_switch() do { } while (0) +#endif #ifndef __ARCH_WANT_UNLOCKED_CTXSW static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 52b3a06a02f8..4ab11879aeb4 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -170,7 +170,7 @@ static int proc_taint(struct ctl_table *table, int write, #endif #ifdef CONFIG_PRINTK -static int proc_dmesg_restrict(struct ctl_table *table, int write, +static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); #endif @@ -703,7 +703,7 @@ static struct ctl_table kern_table[] = { .data = &dmesg_restrict, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dointvec_minmax_sysadmin, .extra1 = &zero, .extra2 = &one, }, @@ -712,7 +712,7 @@ static struct ctl_table kern_table[] = { .data = &kptr_restrict, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dmesg_restrict, + .proc_handler = proc_dointvec_minmax_sysadmin, .extra1 = &zero, .extra2 = &two, }, @@ -1943,7 +1943,7 @@ static int proc_taint(struct ctl_table *table, int write, } #ifdef CONFIG_PRINTK -static int proc_dmesg_restrict(struct ctl_table *table, int write, +static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { if (write && !capable(CAP_SYS_ADMIN)) diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index cd3134510f3d..a1d2849f2473 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -141,7 +141,7 @@ if FTRACE config FUNCTION_TRACER bool "Kernel Function Tracer" depends on HAVE_FUNCTION_TRACER - select FRAME_POINTER if !ARM_UNWIND && !S390 && !MICROBLAZE + select FRAME_POINTER if !ARM_UNWIND && !PPC && !S390 && !MICROBLAZE select KALLSYMS select GENERIC_TRACER select CONTEXT_SWITCH_TRACER diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index cdea7b56b0c9..c0bd0308741c 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -311,13 +311,6 @@ int blk_trace_remove(struct request_queue *q) } EXPORT_SYMBOL_GPL(blk_trace_remove); -static int blk_dropped_open(struct inode *inode, struct file *filp) -{ - filp->private_data = inode->i_private; - - return 0; -} - static ssize_t blk_dropped_read(struct file *filp, char __user *buffer, size_t count, loff_t *ppos) { @@ -331,18 +324,11 @@ static ssize_t blk_dropped_read(struct file *filp, char __user *buffer, static const struct file_operations blk_dropped_fops = { .owner = THIS_MODULE, - .open = blk_dropped_open, + .open = simple_open, .read = blk_dropped_read, .llseek = default_llseek, }; -static int blk_msg_open(struct inode *inode, struct file *filp) -{ - filp->private_data = inode->i_private; - - return 0; -} - static ssize_t blk_msg_write(struct file *filp, const char __user *buffer, size_t count, loff_t *ppos) { @@ -371,7 +357,7 @@ static ssize_t blk_msg_write(struct file *filp, const char __user *buffer, static const struct file_operations blk_msg_fops = { .owner = THIS_MODULE, - .open = blk_msg_open, + .open = simple_open, .write = blk_msg_write, .llseek = noop_llseek, }; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 867bd1dd2dd0..0fa92f677c92 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -249,7 +249,8 @@ static void update_ftrace_function(void) #else __ftrace_trace_function = func; #endif - ftrace_trace_function = ftrace_test_stop_func; + ftrace_trace_function = + (func == ftrace_stub) ? func : ftrace_test_stop_func; #endif } diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index f5b7b5c1195b..cf8d11e91efd 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -154,33 +154,10 @@ enum { static unsigned long ring_buffer_flags __read_mostly = RB_BUFFERS_ON; -#define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data) - -/** - * tracing_on - enable all tracing buffers - * - * This function enables all tracing buffers that may have been - * disabled with tracing_off. - */ -void tracing_on(void) -{ - set_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags); -} -EXPORT_SYMBOL_GPL(tracing_on); +/* Used for individual buffers (after the counter) */ +#define RB_BUFFER_OFF (1 << 20) -/** - * tracing_off - turn off all tracing buffers - * - * This function stops all tracing buffers from recording data. - * It does not disable any overhead the tracers themselves may - * be causing. This function simply causes all recording to - * the ring buffers to fail. - */ -void tracing_off(void) -{ - clear_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags); -} -EXPORT_SYMBOL_GPL(tracing_off); +#define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data) /** * tracing_off_permanent - permanently disable ring buffers @@ -193,15 +170,6 @@ void tracing_off_permanent(void) set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags); } -/** - * tracing_is_on - show state of ring buffers enabled - */ -int tracing_is_on(void) -{ - return ring_buffer_flags == RB_BUFFERS_ON; -} -EXPORT_SYMBOL_GPL(tracing_is_on); - #define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array)) #define RB_ALIGNMENT 4U #define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) @@ -2619,6 +2587,63 @@ void ring_buffer_record_enable(struct ring_buffer *buffer) EXPORT_SYMBOL_GPL(ring_buffer_record_enable); /** + * ring_buffer_record_off - stop all writes into the buffer + * @buffer: The ring buffer to stop writes to. + * + * This prevents all writes to the buffer. Any attempt to write + * to the buffer after this will fail and return NULL. + * + * This is different than ring_buffer_record_disable() as + * it works like an on/off switch, where as the disable() verison + * must be paired with a enable(). + */ +void ring_buffer_record_off(struct ring_buffer *buffer) +{ + unsigned int rd; + unsigned int new_rd; + + do { + rd = atomic_read(&buffer->record_disabled); + new_rd = rd | RB_BUFFER_OFF; + } while (atomic_cmpxchg(&buffer->record_disabled, rd, new_rd) != rd); +} +EXPORT_SYMBOL_GPL(ring_buffer_record_off); + +/** + * ring_buffer_record_on - restart writes into the buffer + * @buffer: The ring buffer to start writes to. + * + * This enables all writes to the buffer that was disabled by + * ring_buffer_record_off(). + * + * This is different than ring_buffer_record_enable() as + * it works like an on/off switch, where as the enable() verison + * must be paired with a disable(). + */ +void ring_buffer_record_on(struct ring_buffer *buffer) +{ + unsigned int rd; + unsigned int new_rd; + + do { + rd = atomic_read(&buffer->record_disabled); + new_rd = rd & ~RB_BUFFER_OFF; + } while (atomic_cmpxchg(&buffer->record_disabled, rd, new_rd) != rd); +} +EXPORT_SYMBOL_GPL(ring_buffer_record_on); + +/** + * ring_buffer_record_is_on - return true if the ring buffer can write + * @buffer: The ring buffer to see if write is enabled + * + * Returns true if the ring buffer is in a state that it accepts writes. + */ +int ring_buffer_record_is_on(struct ring_buffer *buffer) +{ + return !atomic_read(&buffer->record_disabled); +} + +/** * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer * @buffer: The ring buffer to stop writes to. * @cpu: The CPU buffer to stop @@ -4039,68 +4064,6 @@ int ring_buffer_read_page(struct ring_buffer *buffer, } EXPORT_SYMBOL_GPL(ring_buffer_read_page); -#ifdef CONFIG_TRACING -static ssize_t -rb_simple_read(struct file *filp, char __user *ubuf, - size_t cnt, loff_t *ppos) -{ - unsigned long *p = filp->private_data; - char buf[64]; - int r; - - if (test_bit(RB_BUFFERS_DISABLED_BIT, p)) - r = sprintf(buf, "permanently disabled\n"); - else - r = sprintf(buf, "%d\n", test_bit(RB_BUFFERS_ON_BIT, p)); - - return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); -} - -static ssize_t -rb_simple_write(struct file *filp, const char __user *ubuf, - size_t cnt, loff_t *ppos) -{ - unsigned long *p = filp->private_data; - unsigned long val; - int ret; - - ret = kstrtoul_from_user(ubuf, cnt, 10, &val); - if (ret) - return ret; - - if (val) - set_bit(RB_BUFFERS_ON_BIT, p); - else - clear_bit(RB_BUFFERS_ON_BIT, p); - - (*ppos)++; - - return cnt; -} - -static const struct file_operations rb_simple_fops = { - .open = tracing_open_generic, - .read = rb_simple_read, - .write = rb_simple_write, - .llseek = default_llseek, -}; - - -static __init int rb_init_debugfs(void) -{ - struct dentry *d_tracer; - - d_tracer = tracing_init_dentry(); - - trace_create_file("tracing_on", 0644, d_tracer, - &ring_buffer_flags, &rb_simple_fops); - - return 0; -} - -fs_initcall(rb_init_debugfs); -#endif - #ifdef CONFIG_HOTPLUG_CPU static int rb_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 10d5503f0d04..ed7b5d1e12f4 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -36,6 +36,7 @@ #include <linux/ctype.h> #include <linux/init.h> #include <linux/poll.h> +#include <linux/nmi.h> #include <linux/fs.h> #include "trace.h" @@ -352,6 +353,59 @@ static void wakeup_work_handler(struct work_struct *work) static DECLARE_DELAYED_WORK(wakeup_work, wakeup_work_handler); /** + * tracing_on - enable tracing buffers + * + * This function enables tracing buffers that may have been + * disabled with tracing_off. + */ +void tracing_on(void) +{ + if (global_trace.buffer) + ring_buffer_record_on(global_trace.buffer); + /* + * This flag is only looked at when buffers haven't been + * allocated yet. We don't really care about the race + * between setting this flag and actually turning + * on the buffer. + */ + global_trace.buffer_disabled = 0; +} +EXPORT_SYMBOL_GPL(tracing_on); + +/** + * tracing_off - turn off tracing buffers + * + * This function stops the tracing buffers from recording data. + * It does not disable any overhead the tracers themselves may + * be causing. This function simply causes all recording to + * the ring buffers to fail. + */ +void tracing_off(void) +{ + if (global_trace.buffer) + ring_buffer_record_on(global_trace.buffer); + /* + * This flag is only looked at when buffers haven't been + * allocated yet. We don't really care about the race + * between setting this flag and actually turning + * on the buffer. + */ + global_trace.buffer_disabled = 1; +} +EXPORT_SYMBOL_GPL(tracing_off); + +/** + * tracing_is_on - show state of ring buffers enabled + */ +int tracing_is_on(void) +{ + if (global_trace.buffer) + return ring_buffer_record_is_on(global_trace.buffer); + return !global_trace.buffer_disabled; +} +EXPORT_SYMBOL_GPL(tracing_is_on); + +/** * trace_wake_up - wake up tasks waiting for trace input * * Schedules a delayed work to wake up any task that is blocked on the @@ -1644,6 +1698,7 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, int cpu_file = iter->cpu_file; u64 next_ts = 0, ts; int next_cpu = -1; + int next_size = 0; int cpu; /* @@ -1675,9 +1730,12 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, next_cpu = cpu; next_ts = ts; next_lost = lost_events; + next_size = iter->ent_size; } } + iter->ent_size = next_size; + if (ent_cpu) *ent_cpu = next_cpu; @@ -4567,6 +4625,55 @@ static __init void create_trace_options_dir(void) create_trace_option_core_file(trace_options[i], i); } +static ssize_t +rb_simple_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct ring_buffer *buffer = filp->private_data; + char buf[64]; + int r; + + if (buffer) + r = ring_buffer_record_is_on(buffer); + else + r = 0; + + r = sprintf(buf, "%d\n", r); + + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); +} + +static ssize_t +rb_simple_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct ring_buffer *buffer = filp->private_data; + unsigned long val; + int ret; + + ret = kstrtoul_from_user(ubuf, cnt, 10, &val); + if (ret) + return ret; + + if (buffer) { + if (val) + ring_buffer_record_on(buffer); + else + ring_buffer_record_off(buffer); + } + + (*ppos)++; + + return cnt; +} + +static const struct file_operations rb_simple_fops = { + .open = tracing_open_generic, + .read = rb_simple_read, + .write = rb_simple_write, + .llseek = default_llseek, +}; + static __init int tracer_init_debugfs(void) { struct dentry *d_tracer; @@ -4626,6 +4733,9 @@ static __init int tracer_init_debugfs(void) trace_create_file("trace_clock", 0644, d_tracer, NULL, &trace_clock_fops); + trace_create_file("tracing_on", 0644, d_tracer, + global_trace.buffer, &rb_simple_fops); + #ifdef CONFIG_DYNAMIC_FTRACE trace_create_file("dyn_ftrace_total_info", 0444, d_tracer, &ftrace_update_tot_cnt, &tracing_dyn_info_fops); @@ -4798,6 +4908,7 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode) if (ret != TRACE_TYPE_NO_CONSUME) trace_consume(&iter); } + touch_nmi_watchdog(); trace_printk_seq(&iter.seq); } @@ -4863,6 +4974,8 @@ __init static int tracer_alloc_buffers(void) goto out_free_cpumask; } global_trace.entries = ring_buffer_size(global_trace.buffer); + if (global_trace.buffer_disabled) + tracing_off(); #ifdef CONFIG_TRACER_MAX_TRACE diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 54faec790bc1..95059f091a24 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -154,6 +154,7 @@ struct trace_array { struct ring_buffer *buffer; unsigned long entries; int cpu; + int buffer_disabled; cycle_t time_start; struct task_struct *waiter; struct trace_array_cpu *data[NR_CPUS]; @@ -835,13 +836,11 @@ extern const char *__stop___trace_bprintk_fmt[]; filter) #include "trace_entries.h" -#ifdef CONFIG_PERF_EVENTS #ifdef CONFIG_FUNCTION_TRACER int perf_ftrace_event_register(struct ftrace_event_call *call, enum trace_reg type, void *data); #else #define perf_ftrace_event_register NULL #endif /* CONFIG_FUNCTION_TRACER */ -#endif /* CONFIG_PERF_EVENTS */ #endif /* _LINUX_KERNEL_TRACE_H */ diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index d91eb0541b3a..4108e1250ca2 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h @@ -166,6 +166,12 @@ FTRACE_ENTRY_DUP(wakeup, ctx_switch_entry, #define FTRACE_STACK_ENTRIES 8 +#ifndef CONFIG_64BIT +# define IP_FMT "%08lx" +#else +# define IP_FMT "%016lx" +#endif + FTRACE_ENTRY(kernel_stack, stack_entry, TRACE_STACK, @@ -175,8 +181,9 @@ FTRACE_ENTRY(kernel_stack, stack_entry, __dynamic_array(unsigned long, caller ) ), - F_printk("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n" - "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n", + F_printk("\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n" + "\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n" + "\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n", __entry->caller[0], __entry->caller[1], __entry->caller[2], __entry->caller[3], __entry->caller[4], __entry->caller[5], __entry->caller[6], __entry->caller[7]), @@ -193,8 +200,9 @@ FTRACE_ENTRY(user_stack, userstack_entry, __array( unsigned long, caller, FTRACE_STACK_ENTRIES ) ), - F_printk("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n" - "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n", + F_printk("\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n" + "\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n" + "\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n", __entry->caller[0], __entry->caller[1], __entry->caller[2], __entry->caller[3], __entry->caller[4], __entry->caller[5], __entry->caller[6], __entry->caller[7]), diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 7b46c9bd22ae..3dd15e8bc856 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -162,7 +162,7 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \ #define __dynamic_array(type, item) #undef F_printk -#define F_printk(fmt, args...) #fmt ", " __stringify(args) +#define F_printk(fmt, args...) __stringify(fmt) ", " __stringify(args) #undef FTRACE_ENTRY_REG #define FTRACE_ENTRY_REG(call, struct_name, etype, tstruct, print, filter,\ |