summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Kconfig.locks4
-rw-r--r--kernel/Kconfig.preempt1
-rw-r--r--kernel/cgroup.c2
-rw-r--r--kernel/compat.c68
-rw-r--r--kernel/cpuset.c31
-rw-r--r--kernel/debug/debug_core.c1
-rw-r--r--kernel/debug/kdb/kdb_bt.c1
-rw-r--r--kernel/dma.c1
-rw-r--r--kernel/events/core.c11
-rw-r--r--kernel/exit.c2
-rw-r--r--kernel/futex.c38
-rw-r--r--kernel/futex_compat.c38
-rw-r--r--kernel/irq/Kconfig15
-rw-r--r--kernel/irq/handle.c16
-rw-r--r--kernel/irq/irqdomain.c8
-rw-r--r--kernel/irq/manage.c19
-rw-r--r--kernel/irq/migration.c10
-rw-r--r--kernel/irq_work.c1
-rw-r--r--kernel/kexec.c7
-rw-r--r--kernel/module.c37
-rw-r--r--kernel/padata.c13
-rw-r--r--kernel/params.c39
-rw-r--r--kernel/pid_namespace.c33
-rw-r--r--kernel/rwsem.c1
-rw-r--r--kernel/sched/core.c72
-rw-r--r--kernel/sched/fair.c16
-rw-r--r--kernel/sched/rt.c2
-rw-r--r--kernel/sched/sched.h3
-rw-r--r--kernel/signal.c1
-rw-r--r--kernel/smp.c90
-rw-r--r--kernel/spinlock.c2
-rw-r--r--kernel/sys.c9
-rw-r--r--kernel/sysctl.c12
-rw-r--r--kernel/time.c6
-rw-r--r--kernel/time/alarmtimer.c8
-rw-r--r--kernel/time/clocksource.c2
-rw-r--r--kernel/time/ntp.c134
-rw-r--r--kernel/time/timekeeping.c51
-rw-r--r--kernel/trace/Kconfig2
-rw-r--r--kernel/trace/ftrace.c3
-rw-r--r--kernel/trace/ring_buffer.c157
-rw-r--r--kernel/trace/trace.c113
-rw-r--r--kernel/trace/trace.h3
-rw-r--r--kernel/trace/trace_entries.h16
-rw-r--r--kernel/trace/trace_export.c2
45 files changed, 691 insertions, 410 deletions
diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
index 5068e2a4e75f..2251882daf53 100644
--- a/kernel/Kconfig.locks
+++ b/kernel/Kconfig.locks
@@ -124,8 +124,8 @@ config INLINE_SPIN_LOCK_IRQSAVE
def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \
ARCH_INLINE_SPIN_LOCK_IRQSAVE
-config INLINE_SPIN_UNLOCK
- def_bool !DEBUG_SPINLOCK && (!PREEMPT || ARCH_INLINE_SPIN_UNLOCK)
+config UNINLINE_SPIN_UNLOCK
+ bool
config INLINE_SPIN_UNLOCK_BH
def_bool !DEBUG_SPINLOCK && ARCH_INLINE_SPIN_UNLOCK_BH
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index 24e7cb0ba26a..3f9c97419f02 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -36,6 +36,7 @@ config PREEMPT_VOLUNTARY
config PREEMPT
bool "Preemptible Kernel (Low-Latency Desktop)"
select PREEMPT_COUNT
+ select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK
help
This option reduces the latency of the kernel by making
all kernel code (that is not executing in a critical section)
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index f4ea4b6f3cf1..ed64ccac67c9 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1883,7 +1883,7 @@ static void cgroup_task_migrate(struct cgroup *cgrp, struct cgroup *oldcgrp,
*/
int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
{
- int retval;
+ int retval = 0;
struct cgroup_subsys *ss, *failed_ss = NULL;
struct cgroup *oldcgrp;
struct cgroupfs_root *root = cgrp->root;
diff --git a/kernel/compat.c b/kernel/compat.c
index f346cedfe24d..74ff8498809a 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -31,11 +31,10 @@
#include <asm/uaccess.h>
/*
- * Note that the native side is already converted to a timespec, because
- * that's what we want anyway.
+ * Get/set struct timeval with struct timespec on the native side
*/
-static int compat_get_timeval(struct timespec *o,
- struct compat_timeval __user *i)
+static int compat_get_timeval_convert(struct timespec *o,
+ struct compat_timeval __user *i)
{
long usec;
@@ -46,8 +45,8 @@ static int compat_get_timeval(struct timespec *o,
return 0;
}
-static int compat_put_timeval(struct compat_timeval __user *o,
- struct timeval *i)
+static int compat_put_timeval_convert(struct compat_timeval __user *o,
+ struct timeval *i)
{
return (put_user(i->tv_sec, &o->tv_sec) ||
put_user(i->tv_usec, &o->tv_usec)) ? -EFAULT : 0;
@@ -117,7 +116,7 @@ asmlinkage long compat_sys_gettimeofday(struct compat_timeval __user *tv,
if (tv) {
struct timeval ktv;
do_gettimeofday(&ktv);
- if (compat_put_timeval(tv, &ktv))
+ if (compat_put_timeval_convert(tv, &ktv))
return -EFAULT;
}
if (tz) {
@@ -135,7 +134,7 @@ asmlinkage long compat_sys_settimeofday(struct compat_timeval __user *tv,
struct timezone ktz;
if (tv) {
- if (compat_get_timeval(&kts, tv))
+ if (compat_get_timeval_convert(&kts, tv))
return -EFAULT;
}
if (tz) {
@@ -146,12 +145,29 @@ asmlinkage long compat_sys_settimeofday(struct compat_timeval __user *tv,
return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL);
}
+int get_compat_timeval(struct timeval *tv, const struct compat_timeval __user *ctv)
+{
+ return (!access_ok(VERIFY_READ, ctv, sizeof(*ctv)) ||
+ __get_user(tv->tv_sec, &ctv->tv_sec) ||
+ __get_user(tv->tv_usec, &ctv->tv_usec)) ? -EFAULT : 0;
+}
+EXPORT_SYMBOL_GPL(get_compat_timeval);
+
+int put_compat_timeval(const struct timeval *tv, struct compat_timeval __user *ctv)
+{
+ return (!access_ok(VERIFY_WRITE, ctv, sizeof(*ctv)) ||
+ __put_user(tv->tv_sec, &ctv->tv_sec) ||
+ __put_user(tv->tv_usec, &ctv->tv_usec)) ? -EFAULT : 0;
+}
+EXPORT_SYMBOL_GPL(put_compat_timeval);
+
int get_compat_timespec(struct timespec *ts, const struct compat_timespec __user *cts)
{
return (!access_ok(VERIFY_READ, cts, sizeof(*cts)) ||
__get_user(ts->tv_sec, &cts->tv_sec) ||
__get_user(ts->tv_nsec, &cts->tv_nsec)) ? -EFAULT : 0;
}
+EXPORT_SYMBOL_GPL(get_compat_timespec);
int put_compat_timespec(const struct timespec *ts, struct compat_timespec __user *cts)
{
@@ -161,6 +177,42 @@ int put_compat_timespec(const struct timespec *ts, struct compat_timespec __user
}
EXPORT_SYMBOL_GPL(put_compat_timespec);
+int compat_get_timeval(struct timeval *tv, const void __user *utv)
+{
+ if (COMPAT_USE_64BIT_TIME)
+ return copy_from_user(tv, utv, sizeof *tv) ? -EFAULT : 0;
+ else
+ return get_compat_timeval(tv, utv);
+}
+EXPORT_SYMBOL_GPL(compat_get_timeval);
+
+int compat_put_timeval(const struct timeval *tv, void __user *utv)
+{
+ if (COMPAT_USE_64BIT_TIME)
+ return copy_to_user(utv, tv, sizeof *tv) ? -EFAULT : 0;
+ else
+ return put_compat_timeval(tv, utv);
+}
+EXPORT_SYMBOL_GPL(compat_put_timeval);
+
+int compat_get_timespec(struct timespec *ts, const void __user *uts)
+{
+ if (COMPAT_USE_64BIT_TIME)
+ return copy_from_user(ts, uts, sizeof *ts) ? -EFAULT : 0;
+ else
+ return get_compat_timespec(ts, uts);
+}
+EXPORT_SYMBOL_GPL(compat_get_timespec);
+
+int compat_put_timespec(const struct timespec *ts, void __user *uts)
+{
+ if (COMPAT_USE_64BIT_TIME)
+ return copy_to_user(uts, ts, sizeof *ts) ? -EFAULT : 0;
+ else
+ return put_compat_timespec(ts, uts);
+}
+EXPORT_SYMBOL_GPL(compat_put_timespec);
+
static long compat_nanosleep_restart(struct restart_block *restart)
{
struct compat_timespec __user *rmtp;
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 1010cc61931f..14f7070b4ba2 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -270,11 +270,11 @@ static struct file_system_type cpuset_fs_type = {
* are online. If none are online, walk up the cpuset hierarchy
* until we find one that does have some online cpus. If we get
* all the way to the top and still haven't found any online cpus,
- * return cpu_online_map. Or if passed a NULL cs from an exit'ing
- * task, return cpu_online_map.
+ * return cpu_online_mask. Or if passed a NULL cs from an exit'ing
+ * task, return cpu_online_mask.
*
* One way or another, we guarantee to return some non-empty subset
- * of cpu_online_map.
+ * of cpu_online_mask.
*
* Call with callback_mutex held.
*/
@@ -867,7 +867,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
int retval;
int is_load_balanced;
- /* top_cpuset.cpus_allowed tracks cpu_online_map; it's read-only */
+ /* top_cpuset.cpus_allowed tracks cpu_online_mask; it's read-only */
if (cs == &top_cpuset)
return -EACCES;
@@ -2149,7 +2149,7 @@ void __init cpuset_init_smp(void)
*
* Description: Returns the cpumask_var_t cpus_allowed of the cpuset
* attached to the specified @tsk. Guaranteed to return some non-empty
- * subset of cpu_online_map, even if this means going outside the
+ * subset of cpu_online_mask, even if this means going outside the
* tasks cpuset.
**/
@@ -2162,10 +2162,9 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
mutex_unlock(&callback_mutex);
}
-int cpuset_cpus_allowed_fallback(struct task_struct *tsk)
+void cpuset_cpus_allowed_fallback(struct task_struct *tsk)
{
const struct cpuset *cs;
- int cpu;
rcu_read_lock();
cs = task_cs(tsk);
@@ -2186,22 +2185,10 @@ int cpuset_cpus_allowed_fallback(struct task_struct *tsk)
* changes in tsk_cs()->cpus_allowed. Otherwise we can temporary
* set any mask even if it is not right from task_cs() pov,
* the pending set_cpus_allowed_ptr() will fix things.
+ *
+ * select_fallback_rq() will fix things ups and set cpu_possible_mask
+ * if required.
*/
-
- cpu = cpumask_any_and(&tsk->cpus_allowed, cpu_active_mask);
- if (cpu >= nr_cpu_ids) {
- /*
- * Either tsk->cpus_allowed is wrong (see above) or it
- * is actually empty. The latter case is only possible
- * if we are racing with remove_tasks_in_empty_cpuset().
- * Like above we can temporary set any mask and rely on
- * set_cpus_allowed_ptr() as synchronization point.
- */
- do_set_cpus_allowed(tsk, cpu_possible_mask);
- cpu = cpumask_any(cpu_active_mask);
- }
-
- return cpu;
}
void cpuset_init_current_mems_allowed(void)
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 3f88a45e6f0a..1dc53bae56e1 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -53,7 +53,6 @@
#include <asm/cacheflush.h>
#include <asm/byteorder.h>
#include <linux/atomic.h>
-#include <asm/system.h>
#include "debug_core.h"
diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c
index 7179eac7b41c..07c9bbb94a0b 100644
--- a/kernel/debug/kdb/kdb_bt.c
+++ b/kernel/debug/kdb/kdb_bt.c
@@ -15,7 +15,6 @@
#include <linux/sched.h>
#include <linux/kdb.h>
#include <linux/nmi.h>
-#include <asm/system.h>
#include "kdb_private.h"
diff --git a/kernel/dma.c b/kernel/dma.c
index 68a2306522c8..6c6262f86c17 100644
--- a/kernel/dma.c
+++ b/kernel/dma.c
@@ -18,7 +18,6 @@
#include <linux/proc_fs.h>
#include <linux/init.h>
#include <asm/dma.h>
-#include <asm/system.h>
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 4b50357914fb..a6a9ec4cd8f5 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3348,7 +3348,7 @@ static void calc_timer_values(struct perf_event *event,
*running = ctx_time - event->tstamp_running;
}
-void __weak perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now)
+void __weak arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
{
}
@@ -3398,7 +3398,7 @@ void perf_event_update_userpage(struct perf_event *event)
userpg->time_running = running +
atomic64_read(&event->child_total_time_running);
- perf_update_user_clock(userpg, now);
+ arch_perf_update_userpage(userpg, now);
barrier();
++userpg->lock;
@@ -7116,6 +7116,13 @@ void __init perf_event_init(void)
/* do not patch jump label more than once per second */
jump_label_rate_limit(&perf_sched_events, HZ);
+
+ /*
+ * Build time assertion that we keep the data_head at the intended
+ * location. IOW, validation we got the __reserved[] size right.
+ */
+ BUILD_BUG_ON((offsetof(struct perf_event_mmap_page, data_head))
+ != 1024);
}
static int __init perf_event_sysfs_init(void)
diff --git a/kernel/exit.c b/kernel/exit.c
index 3db1909faed9..d8bd3b425fa7 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -474,7 +474,7 @@ static void close_files(struct files_struct * files)
i = j * __NFDBITS;
if (i >= fdt->max_fds)
break;
- set = fdt->open_fds->fds_bits[j++];
+ set = fdt->open_fds[j++];
while (set) {
if (set & 1) {
struct file * file = xchg(&fdt->fd[i], NULL);
diff --git a/kernel/futex.c b/kernel/futex.c
index 72efa1e4359a..e2b0fb9a0b3b 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -59,6 +59,7 @@
#include <linux/magic.h>
#include <linux/pid.h>
#include <linux/nsproxy.h>
+#include <linux/ptrace.h>
#include <asm/futex.h>
@@ -2443,40 +2444,31 @@ SYSCALL_DEFINE3(get_robust_list, int, pid,
{
struct robust_list_head __user *head;
unsigned long ret;
- const struct cred *cred = current_cred(), *pcred;
+ struct task_struct *p;
if (!futex_cmpxchg_enabled)
return -ENOSYS;
+ WARN_ONCE(1, "deprecated: get_robust_list will be deleted in 2013.\n");
+
+ rcu_read_lock();
+
+ ret = -ESRCH;
if (!pid)
- head = current->robust_list;
+ p = current;
else {
- struct task_struct *p;
-
- ret = -ESRCH;
- rcu_read_lock();
p = find_task_by_vpid(pid);
if (!p)
goto err_unlock;
- ret = -EPERM;
- pcred = __task_cred(p);
- /* If victim is in different user_ns, then uids are not
- comparable, so we must have CAP_SYS_PTRACE */
- if (cred->user->user_ns != pcred->user->user_ns) {
- if (!ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE))
- goto err_unlock;
- goto ok;
- }
- /* If victim is in same user_ns, then uids are comparable */
- if (cred->euid != pcred->euid &&
- cred->euid != pcred->uid &&
- !ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE))
- goto err_unlock;
-ok:
- head = p->robust_list;
- rcu_read_unlock();
}
+ ret = -EPERM;
+ if (!ptrace_may_access(p, PTRACE_MODE_READ))
+ goto err_unlock;
+
+ head = p->robust_list;
+ rcu_read_unlock();
+
if (put_user(sizeof(*head), len_ptr))
return -EFAULT;
return put_user(head, head_ptr);
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index 5f9e689dc8f0..83e368b005fc 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -10,6 +10,7 @@
#include <linux/compat.h>
#include <linux/nsproxy.h>
#include <linux/futex.h>
+#include <linux/ptrace.h>
#include <asm/uaccess.h>
@@ -136,40 +137,31 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr,
{
struct compat_robust_list_head __user *head;
unsigned long ret;
- const struct cred *cred = current_cred(), *pcred;
+ struct task_struct *p;
if (!futex_cmpxchg_enabled)
return -ENOSYS;
+ WARN_ONCE(1, "deprecated: get_robust_list will be deleted in 2013.\n");
+
+ rcu_read_lock();
+
+ ret = -ESRCH;
if (!pid)
- head = current->compat_robust_list;
+ p = current;
else {
- struct task_struct *p;
-
- ret = -ESRCH;
- rcu_read_lock();
p = find_task_by_vpid(pid);
if (!p)
goto err_unlock;
- ret = -EPERM;
- pcred = __task_cred(p);
- /* If victim is in different user_ns, then uids are not
- comparable, so we must have CAP_SYS_PTRACE */
- if (cred->user->user_ns != pcred->user->user_ns) {
- if (!ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE))
- goto err_unlock;
- goto ok;
- }
- /* If victim is in same user_ns, then uids are comparable */
- if (cred->euid != pcred->euid &&
- cred->euid != pcred->uid &&
- !ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE))
- goto err_unlock;
-ok:
- head = p->compat_robust_list;
- rcu_read_unlock();
}
+ ret = -EPERM;
+ if (!ptrace_may_access(p, PTRACE_MODE_READ))
+ goto err_unlock;
+
+ head = p->compat_robust_list;
+ rcu_read_unlock();
+
if (put_user(sizeof(*head), len_ptr))
return -EFAULT;
return put_user(ptr_to_compat(head), head_ptr);
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index 5a38bf4de641..cf1a4a68ce44 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -13,7 +13,7 @@ config GENERIC_HARDIRQS
# Options selectable by the architecture code
# Make sparse irq Kconfig switch below available
-config HAVE_SPARSE_IRQ
+config MAY_HAVE_SPARSE_IRQ
bool
# Enable the generic irq autoprobe mechanism
@@ -56,13 +56,22 @@ config GENERIC_IRQ_CHIP
config IRQ_DOMAIN
bool
+config IRQ_DOMAIN_DEBUG
+ bool "Expose hardware/virtual IRQ mapping via debugfs"
+ depends on IRQ_DOMAIN && DEBUG_FS
+ help
+ This option will show the mapping relationship between hardware irq
+ numbers and Linux irq numbers. The mapping is exposed via debugfs
+ in the file "virq_mapping".
+
+ If you don't know what this means you don't need it.
+
# Support forced irq threading
config IRQ_FORCED_THREADING
bool
config SPARSE_IRQ
- bool "Support sparse irq numbering"
- depends on HAVE_SPARSE_IRQ
+ bool "Support sparse irq numbering" if MAY_HAVE_SPARSE_IRQ
---help---
Sparse irq numbering is useful for distro kernels that want
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 6ff84e6a954c..bdb180325551 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -54,14 +54,18 @@ static void warn_no_thread(unsigned int irq, struct irqaction *action)
static void irq_wake_thread(struct irq_desc *desc, struct irqaction *action)
{
/*
- * Wake up the handler thread for this action. In case the
- * thread crashed and was killed we just pretend that we
- * handled the interrupt. The hardirq handler has disabled the
- * device interrupt, so no irq storm is lurking. If the
+ * In case the thread crashed and was killed we just pretend that
+ * we handled the interrupt. The hardirq handler has disabled the
+ * device interrupt, so no irq storm is lurking.
+ */
+ if (action->thread->flags & PF_EXITING)
+ return;
+
+ /*
+ * Wake up the handler thread for this action. If the
* RUNTHREAD bit is already set, nothing to do.
*/
- if ((action->thread->flags & PF_EXITING) ||
- test_and_set_bit(IRQTF_RUNTHREAD, &action->thread_flags))
+ if (test_and_set_bit(IRQTF_RUNTHREAD, &action->thread_flags))
return;
/*
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index af48e59bc2ff..3601f3fbf67c 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -632,7 +632,7 @@ unsigned int irq_linear_revmap(struct irq_domain *domain,
return revmap[hwirq];
}
-#ifdef CONFIG_VIRQ_DEBUG
+#ifdef CONFIG_IRQ_DOMAIN_DEBUG
static int virq_debug_show(struct seq_file *m, void *private)
{
unsigned long flags;
@@ -668,7 +668,7 @@ static int virq_debug_show(struct seq_file *m, void *private)
data = irq_desc_get_chip_data(desc);
seq_printf(m, "0x%16p ", data);
- if (desc->irq_data.domain->of_node)
+ if (desc->irq_data.domain && desc->irq_data.domain->of_node)
p = desc->irq_data.domain->of_node->full_name;
else
p = none;
@@ -695,14 +695,14 @@ static const struct file_operations virq_debug_fops = {
static int __init irq_debugfs_init(void)
{
- if (debugfs_create_file("virq_mapping", S_IRUGO, powerpc_debugfs_root,
+ if (debugfs_create_file("irq_domain_mapping", S_IRUGO, NULL,
NULL, &virq_debug_fops) == NULL)
return -ENOMEM;
return 0;
}
__initcall(irq_debugfs_init);
-#endif /* CONFIG_VIRQ_DEBUG */
+#endif /* CONFIG_IRQ_DOMAIN_DEBUG */
int irq_domain_simple_map(struct irq_domain *d, unsigned int irq,
irq_hw_number_t hwirq)
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index b0ccd1ac2d6a..89a3ea82569b 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -282,7 +282,7 @@ setup_affinity(unsigned int irq, struct irq_desc *desc, struct cpumask *mask)
{
struct irq_chip *chip = irq_desc_get_chip(desc);
struct cpumask *set = irq_default_affinity;
- int ret;
+ int ret, node = desc->irq_data.node;
/* Excludes PER_CPU and NO_BALANCE interrupts */
if (!irq_can_set_affinity(irq))
@@ -301,6 +301,13 @@ setup_affinity(unsigned int irq, struct irq_desc *desc, struct cpumask *mask)
}
cpumask_and(mask, cpu_online_mask, set);
+ if (node != NUMA_NO_NODE) {
+ const struct cpumask *nodemask = cpumask_of_node(node);
+
+ /* make sure at least one of the cpus in nodemask is online */
+ if (cpumask_intersects(mask, nodemask))
+ cpumask_and(mask, mask, nodemask);
+ }
ret = chip->irq_set_affinity(&desc->irq_data, mask, false);
switch (ret) {
case IRQ_SET_MASK_OK:
@@ -645,7 +652,7 @@ static int irq_wait_for_interrupt(struct irqaction *action)
* is marked MASKED.
*/
static void irq_finalize_oneshot(struct irq_desc *desc,
- struct irqaction *action, bool force)
+ struct irqaction *action)
{
if (!(desc->istate & IRQS_ONESHOT))
return;
@@ -679,7 +686,7 @@ again:
* we would clear the threads_oneshot bit of this thread which
* was just set.
*/
- if (!force && test_bit(IRQTF_RUNTHREAD, &action->thread_flags))
+ if (test_bit(IRQTF_RUNTHREAD, &action->thread_flags))
goto out_unlock;
desc->threads_oneshot &= ~action->thread_mask;
@@ -739,7 +746,7 @@ irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action)
local_bh_disable();
ret = action->thread_fn(action->irq, action->dev_id);
- irq_finalize_oneshot(desc, action, false);
+ irq_finalize_oneshot(desc, action);
local_bh_enable();
return ret;
}
@@ -755,7 +762,7 @@ static irqreturn_t irq_thread_fn(struct irq_desc *desc,
irqreturn_t ret;
ret = action->thread_fn(action->irq, action->dev_id);
- irq_finalize_oneshot(desc, action, false);
+ irq_finalize_oneshot(desc, action);
return ret;
}
@@ -844,7 +851,7 @@ void exit_irq_thread(void)
wake_threads_waitq(desc);
/* Prevent a stale desc->threads_oneshot */
- irq_finalize_oneshot(desc, action, true);
+ irq_finalize_oneshot(desc, action);
}
static void irq_setup_forced_threading(struct irqaction *new)
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index 47420908fba0..c3c89751b327 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -43,12 +43,16 @@ void irq_move_masked_irq(struct irq_data *idata)
* masking the irqs.
*/
if (likely(cpumask_any_and(desc->pending_mask, cpu_online_mask)
- < nr_cpu_ids))
- if (!chip->irq_set_affinity(&desc->irq_data,
- desc->pending_mask, false)) {
+ < nr_cpu_ids)) {
+ int ret = chip->irq_set_affinity(&desc->irq_data,
+ desc->pending_mask, false);
+ switch (ret) {
+ case IRQ_SET_MASK_OK:
cpumask_copy(desc->irq_data.affinity, desc->pending_mask);
+ case IRQ_SET_MASK_OK_NOCOPY:
irq_set_thread_affinity(desc);
}
+ }
cpumask_clear(desc->pending_mask);
}
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index c3c46c72046e..0c56d44b9fd5 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -5,6 +5,7 @@
* context. The enqueueing is NMI-safe.
*/
+#include <linux/bug.h>
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/irq_work.h>
diff --git a/kernel/kexec.c b/kernel/kexec.c
index a6a675cb9818..4e2e472f6aeb 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -37,7 +37,6 @@
#include <asm/page.h>
#include <asm/uaccess.h>
#include <asm/io.h>
-#include <asm/system.h>
#include <asm/sections.h>
/* Per cpu memory for storing cpu states in case of system crash. */
@@ -1359,6 +1358,10 @@ static int __init parse_crashkernel_simple(char *cmdline,
if (*cur == '@')
*crash_base = memparse(cur+1, &cur);
+ else if (*cur != ' ' && *cur != '\0') {
+ pr_warning("crashkernel: unrecognized char\n");
+ return -EINVAL;
+ }
return 0;
}
@@ -1462,7 +1465,9 @@ static int __init crash_save_vmcoreinfo_init(void)
VMCOREINFO_SYMBOL(init_uts_ns);
VMCOREINFO_SYMBOL(node_online_map);
+#ifdef CONFIG_MMU
VMCOREINFO_SYMBOL(swapper_pg_dir);
+#endif
VMCOREINFO_SYMBOL(_stext);
VMCOREINFO_SYMBOL(vmlist);
diff --git a/kernel/module.c b/kernel/module.c
index 2c932760fd33..78ac6ec1e425 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -105,6 +105,7 @@ struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */
/* Block module loading/unloading? */
int modules_disabled = 0;
+core_param(nomodule, modules_disabled, bint, 0);
/* Waiting for a module to finish initializing? */
static DECLARE_WAIT_QUEUE_HEAD(module_wq);
@@ -903,6 +904,36 @@ static ssize_t show_refcnt(struct module_attribute *mattr,
static struct module_attribute modinfo_refcnt =
__ATTR(refcnt, 0444, show_refcnt, NULL);
+void __module_get(struct module *module)
+{
+ if (module) {
+ preempt_disable();
+ __this_cpu_inc(module->refptr->incs);
+ trace_module_get(module, _RET_IP_);
+ preempt_enable();
+ }
+}
+EXPORT_SYMBOL(__module_get);
+
+bool try_module_get(struct module *module)
+{
+ bool ret = true;
+
+ if (module) {
+ preempt_disable();
+
+ if (likely(module_is_live(module))) {
+ __this_cpu_inc(module->refptr->incs);
+ trace_module_get(module, _RET_IP_);
+ } else
+ ret = false;
+
+ preempt_enable();
+ }
+ return ret;
+}
+EXPORT_SYMBOL(try_module_get);
+
void module_put(struct module *module)
{
if (module) {
@@ -2380,8 +2411,7 @@ static int copy_and_check(struct load_info *info,
return -ENOEXEC;
/* Suck in entire file: we'll want most of it. */
- /* vmalloc barfs on "unusual" numbers. Check here */
- if (len > 64 * 1024 * 1024 || (hdr = vmalloc(len)) == NULL)
+ if ((hdr = vmalloc(len)) == NULL)
return -ENOMEM;
if (copy_from_user(hdr, umod, len) != 0) {
@@ -2922,7 +2952,8 @@ static struct module *load_module(void __user *umod,
mutex_unlock(&module_mutex);
/* Module is ready to execute: parsing args may do that. */
- err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, NULL);
+ err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp,
+ -32768, 32767, NULL);
if (err < 0)
goto unlink;
diff --git a/kernel/padata.c b/kernel/padata.c
index 6f10eb285ece..89fe3d1b9efb 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -1,6 +1,8 @@
/*
* padata.c - generic interface to process data streams in parallel
*
+ * See Documentation/padata.txt for an api documentation.
+ *
* Copyright (C) 2008, 2009 secunet Security Networks AG
* Copyright (C) 2008, 2009 Steffen Klassert <steffen.klassert@secunet.com>
*
@@ -354,13 +356,13 @@ static int padata_setup_cpumasks(struct parallel_data *pd,
if (!alloc_cpumask_var(&pd->cpumask.pcpu, GFP_KERNEL))
return -ENOMEM;
- cpumask_and(pd->cpumask.pcpu, pcpumask, cpu_active_mask);
+ cpumask_and(pd->cpumask.pcpu, pcpumask, cpu_online_mask);
if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL)) {
free_cpumask_var(pd->cpumask.cbcpu);
return -ENOMEM;
}
- cpumask_and(pd->cpumask.cbcpu, cbcpumask, cpu_active_mask);
+ cpumask_and(pd->cpumask.cbcpu, cbcpumask, cpu_online_mask);
return 0;
}
@@ -564,7 +566,7 @@ EXPORT_SYMBOL(padata_unregister_cpumask_notifier);
static bool padata_validate_cpumask(struct padata_instance *pinst,
const struct cpumask *cpumask)
{
- if (!cpumask_intersects(cpumask, cpu_active_mask)) {
+ if (!cpumask_intersects(cpumask, cpu_online_mask)) {
pinst->flags |= PADATA_INVALID;
return false;
}
@@ -678,7 +680,7 @@ static int __padata_add_cpu(struct padata_instance *pinst, int cpu)
{
struct parallel_data *pd;
- if (cpumask_test_cpu(cpu, cpu_active_mask)) {
+ if (cpumask_test_cpu(cpu, cpu_online_mask)) {
pd = padata_alloc_pd(pinst, pinst->cpumask.pcpu,
pinst->cpumask.cbcpu);
if (!pd)
@@ -746,6 +748,9 @@ static int __padata_remove_cpu(struct padata_instance *pinst, int cpu)
return -ENOMEM;
padata_replace(pinst, pd);
+
+ cpumask_clear_cpu(cpu, pd->cpumask.cbcpu);
+ cpumask_clear_cpu(cpu, pd->cpumask.pcpu);
}
return 0;
diff --git a/kernel/params.c b/kernel/params.c
index 47f5bf12434a..f37d82631347 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -87,6 +87,8 @@ static int parse_one(char *param,
char *val,
const struct kernel_param *params,
unsigned num_params,
+ s16 min_level,
+ s16 max_level,
int (*handle_unknown)(char *param, char *val))
{
unsigned int i;
@@ -95,6 +97,9 @@ static int parse_one(char *param,
/* Find parameter */
for (i = 0; i < num_params; i++) {
if (parameq(param, params[i].name)) {
+ if (params[i].level < min_level
+ || params[i].level > max_level)
+ return 0;
/* No one handled NULL, so do it here. */
if (!val && params[i].ops->set != param_set_bool
&& params[i].ops->set != param_set_bint)
@@ -174,6 +179,8 @@ int parse_args(const char *name,
char *args,
const struct kernel_param *params,
unsigned num,
+ s16 min_level,
+ s16 max_level,
int (*unknown)(char *param, char *val))
{
char *param, *val;
@@ -189,7 +196,8 @@ int parse_args(const char *name,
args = next_arg(args, &param, &val);
irq_was_disabled = irqs_disabled();
- ret = parse_one(param, val, params, num, unknown);
+ ret = parse_one(param, val, params, num,
+ min_level, max_level, unknown);
if (irq_was_disabled && !irqs_disabled()) {
printk(KERN_WARNING "parse_args(): option '%s' enabled "
"irq's!\n", param);
@@ -297,35 +305,18 @@ EXPORT_SYMBOL(param_ops_charp);
/* Actually could be a bool or an int, for historical reasons. */
int param_set_bool(const char *val, const struct kernel_param *kp)
{
- bool v;
- int ret;
-
/* No equals means "set"... */
if (!val) val = "1";
/* One of =[yYnN01] */
- ret = strtobool(val, &v);
- if (ret)
- return ret;
-
- if (kp->flags & KPARAM_ISBOOL)
- *(bool *)kp->arg = v;
- else
- *(int *)kp->arg = v;
- return 0;
+ return strtobool(val, kp->arg);
}
EXPORT_SYMBOL(param_set_bool);
int param_get_bool(char *buffer, const struct kernel_param *kp)
{
- bool val;
- if (kp->flags & KPARAM_ISBOOL)
- val = *(bool *)kp->arg;
- else
- val = *(int *)kp->arg;
-
/* Y and N chosen as being relatively non-coder friendly */
- return sprintf(buffer, "%c", val ? 'Y' : 'N');
+ return sprintf(buffer, "%c", *(bool *)kp->arg ? 'Y' : 'N');
}
EXPORT_SYMBOL(param_get_bool);
@@ -343,7 +334,6 @@ int param_set_invbool(const char *val, const struct kernel_param *kp)
struct kernel_param dummy;
dummy.arg = &boolval;
- dummy.flags = KPARAM_ISBOOL;
ret = param_set_bool(val, &dummy);
if (ret == 0)
*(bool *)kp->arg = !boolval;
@@ -372,7 +362,6 @@ int param_set_bint(const char *val, const struct kernel_param *kp)
/* Match bool exactly, by re-using it. */
boolkp = *kp;
boolkp.arg = &v;
- boolkp.flags |= KPARAM_ISBOOL;
ret = param_set_bool(val, &boolkp);
if (ret == 0)
@@ -393,7 +382,7 @@ static int param_array(const char *name,
unsigned int min, unsigned int max,
void *elem, int elemsize,
int (*set)(const char *, const struct kernel_param *kp),
- u16 flags,
+ s16 level,
unsigned int *num)
{
int ret;
@@ -403,7 +392,7 @@ static int param_array(const char *name,
/* Get the name right for errors. */
kp.name = name;
kp.arg = elem;
- kp.flags = flags;
+ kp.level = level;
*num = 0;
/* We expect a comma-separated list of values. */
@@ -444,7 +433,7 @@ static int param_array_set(const char *val, const struct kernel_param *kp)
unsigned int temp_num;
return param_array(kp->name, val, 1, arr->max, arr->elem,
- arr->elemsize, arr->ops->set, kp->flags,
+ arr->elemsize, arr->ops->set, kp->level,
arr->num ?: &temp_num);
}
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 17b232869a04..57bc1fd35b3c 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -15,6 +15,7 @@
#include <linux/acct.h>
#include <linux/slab.h>
#include <linux/proc_fs.h>
+#include <linux/reboot.h>
#define BITS_PER_PAGE (PAGE_SIZE*8)
@@ -183,6 +184,9 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
rc = sys_wait4(-1, NULL, __WALL, NULL);
} while (rc != -ECHILD);
+ if (pid_ns->reboot)
+ current->signal->group_exit_code = pid_ns->reboot;
+
acct_exit_ns(pid_ns);
return;
}
@@ -217,6 +221,35 @@ static struct ctl_table pid_ns_ctl_table[] = {
static struct ctl_path kern_path[] = { { .procname = "kernel", }, { } };
+int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
+{
+ if (pid_ns == &init_pid_ns)
+ return 0;
+
+ switch (cmd) {
+ case LINUX_REBOOT_CMD_RESTART2:
+ case LINUX_REBOOT_CMD_RESTART:
+ pid_ns->reboot = SIGHUP;
+ break;
+
+ case LINUX_REBOOT_CMD_POWER_OFF:
+ case LINUX_REBOOT_CMD_HALT:
+ pid_ns->reboot = SIGINT;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ read_lock(&tasklist_lock);
+ force_sig(SIGKILL, pid_ns->child_reaper);
+ read_unlock(&tasklist_lock);
+
+ do_exit(0);
+
+ /* Not reached */
+ return 0;
+}
+
static __init int pid_namespaces_init(void)
{
pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC);
diff --git a/kernel/rwsem.c b/kernel/rwsem.c
index b152f74f02de..6850f53e02d8 100644
--- a/kernel/rwsem.c
+++ b/kernel/rwsem.c
@@ -10,7 +10,6 @@
#include <linux/export.h>
#include <linux/rwsem.h>
-#include <asm/system.h>
#include <linux/atomic.h>
/*
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 503d6426126d..4603b9d8f30a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -73,6 +73,7 @@
#include <linux/init_task.h>
#include <linux/binfmts.h>
+#include <asm/switch_to.h>
#include <asm/tlb.h>
#include <asm/irq_regs.h>
#include <asm/mutex.h>
@@ -1264,29 +1265,59 @@ EXPORT_SYMBOL_GPL(kick_process);
*/
static int select_fallback_rq(int cpu, struct task_struct *p)
{
- int dest_cpu;
const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(cpu));
+ enum { cpuset, possible, fail } state = cpuset;
+ int dest_cpu;
/* Look for allowed, online CPU in same node. */
- for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask)
+ for_each_cpu(dest_cpu, nodemask) {
+ if (!cpu_online(dest_cpu))
+ continue;
+ if (!cpu_active(dest_cpu))
+ continue;
if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
return dest_cpu;
+ }
+
+ for (;;) {
+ /* Any allowed, online CPU? */
+ for_each_cpu(dest_cpu, tsk_cpus_allowed(p)) {
+ if (!cpu_online(dest_cpu))
+ continue;
+ if (!cpu_active(dest_cpu))
+ continue;
+ goto out;
+ }
- /* Any allowed, online CPU? */
- dest_cpu = cpumask_any_and(tsk_cpus_allowed(p), cpu_active_mask);
- if (dest_cpu < nr_cpu_ids)
- return dest_cpu;
+ switch (state) {
+ case cpuset:
+ /* No more Mr. Nice Guy. */
+ cpuset_cpus_allowed_fallback(p);
+ state = possible;
+ break;
- /* No more Mr. Nice Guy. */
- dest_cpu = cpuset_cpus_allowed_fallback(p);
- /*
- * Don't tell them about moving exiting tasks or
- * kernel threads (both mm NULL), since they never
- * leave kernel.
- */
- if (p->mm && printk_ratelimit()) {
- printk_sched("process %d (%s) no longer affine to cpu%d\n",
- task_pid_nr(p), p->comm, cpu);
+ case possible:
+ do_set_cpus_allowed(p, cpu_possible_mask);
+ state = fail;
+ break;
+
+ case fail:
+ BUG();
+ break;
+ }
+ }
+
+out:
+ if (state != cpuset) {
+ /*
+ * Don't tell them about moving exiting tasks or
+ * kernel threads (both mm NULL), since they never
+ * leave kernel.
+ */
+ if (p->mm && printk_ratelimit()) {
+ printk_sched("process %d (%s) no longer affine to cpu%d\n",
+ task_pid_nr(p), p->comm, cpu);
+ }
}
return dest_cpu;
@@ -1933,6 +1964,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
local_irq_enable();
#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
finish_lock_switch(rq, prev);
+ finish_arch_post_lock_switch();
fire_sched_in_preempt_notifiers(current);
if (mm)
@@ -3070,8 +3102,6 @@ EXPORT_SYMBOL(sub_preempt_count);
*/
static noinline void __schedule_bug(struct task_struct *prev)
{
- struct pt_regs *regs = get_irq_regs();
-
if (oops_in_progress)
return;
@@ -3082,11 +3112,7 @@ static noinline void __schedule_bug(struct task_struct *prev)
print_modules();
if (irqs_disabled())
print_irqtrace_events(prev);
-
- if (regs)
- show_regs(regs);
- else
- dump_stack();
+ dump_stack();
}
/*
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 94340c7544a9..0d97ebdc58f0 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -416,8 +416,8 @@ find_matching_se(struct sched_entity **se, struct sched_entity **pse)
#endif /* CONFIG_FAIR_GROUP_SCHED */
-static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
- unsigned long delta_exec);
+static __always_inline
+void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, unsigned long delta_exec);
/**************************************************************
* Scheduling class tree data structure manipulation methods:
@@ -1162,7 +1162,7 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
__clear_buddies_skip(se);
}
-static void return_cfs_rq_runtime(struct cfs_rq *cfs_rq);
+static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq);
static void
dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
@@ -1546,8 +1546,8 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
resched_task(rq_of(cfs_rq)->curr);
}
-static __always_inline void account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
- unsigned long delta_exec)
+static __always_inline
+void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, unsigned long delta_exec)
{
if (!cfs_bandwidth_used() || !cfs_rq->runtime_enabled)
return;
@@ -2073,11 +2073,11 @@ void unthrottle_offline_cfs_rqs(struct rq *rq)
}
#else /* CONFIG_CFS_BANDWIDTH */
-static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
- unsigned long delta_exec) {}
+static __always_inline
+void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, unsigned long delta_exec) {}
static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {}
-static void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
+static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq)
{
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index b60dad720173..44af55e6d5d0 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1428,7 +1428,7 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu)
next_idx:
if (idx >= MAX_RT_PRIO)
continue;
- if (next && next->prio < idx)
+ if (next && next->prio <= idx)
continue;
list_for_each_entry(rt_se, array->queue + idx, run_list) {
struct task_struct *p;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 42b1f304b044..fb3acba4d52e 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -681,6 +681,9 @@ static inline int task_running(struct rq *rq, struct task_struct *p)
#ifndef finish_arch_switch
# define finish_arch_switch(prev) do { } while (0)
#endif
+#ifndef finish_arch_post_lock_switch
+# define finish_arch_post_lock_switch() do { } while (0)
+#endif
#ifndef __ARCH_WANT_UNLOCKED_CTXSW
static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
diff --git a/kernel/signal.c b/kernel/signal.c
index d523da02dd14..17afcaf582d0 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -36,6 +36,7 @@
#include <asm/uaccess.h>
#include <asm/unistd.h>
#include <asm/siginfo.h>
+#include <asm/cacheflush.h>
#include "audit.h" /* audit_signal_info() */
/*
diff --git a/kernel/smp.c b/kernel/smp.c
index db197d60489b..2f8b10ecf759 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -701,3 +701,93 @@ int on_each_cpu(void (*func) (void *info), void *info, int wait)
return ret;
}
EXPORT_SYMBOL(on_each_cpu);
+
+/**
+ * on_each_cpu_mask(): Run a function on processors specified by
+ * cpumask, which may include the local processor.
+ * @mask: The set of cpus to run on (only runs on online subset).
+ * @func: The function to run. This must be fast and non-blocking.
+ * @info: An arbitrary pointer to pass to the function.
+ * @wait: If true, wait (atomically) until function has completed
+ * on other CPUs.
+ *
+ * If @wait is true, then returns once @func has returned.
+ *
+ * You must not call this function with disabled interrupts or
+ * from a hardware interrupt handler or from a bottom half handler.
+ */
+void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func,
+ void *info, bool wait)
+{
+ int cpu = get_cpu();
+
+ smp_call_function_many(mask, func, info, wait);
+ if (cpumask_test_cpu(cpu, mask)) {
+ local_irq_disable();
+ func(info);
+ local_irq_enable();
+ }
+ put_cpu();
+}
+EXPORT_SYMBOL(on_each_cpu_mask);
+
+/*
+ * on_each_cpu_cond(): Call a function on each processor for which
+ * the supplied function cond_func returns true, optionally waiting
+ * for all the required CPUs to finish. This may include the local
+ * processor.
+ * @cond_func: A callback function that is passed a cpu id and
+ * the the info parameter. The function is called
+ * with preemption disabled. The function should
+ * return a blooean value indicating whether to IPI
+ * the specified CPU.
+ * @func: The function to run on all applicable CPUs.
+ * This must be fast and non-blocking.
+ * @info: An arbitrary pointer to pass to both functions.
+ * @wait: If true, wait (atomically) until function has
+ * completed on other CPUs.
+ * @gfp_flags: GFP flags to use when allocating the cpumask
+ * used internally by the function.
+ *
+ * The function might sleep if the GFP flags indicates a non
+ * atomic allocation is allowed.
+ *
+ * Preemption is disabled to protect against CPUs going offline but not online.
+ * CPUs going online during the call will not be seen or sent an IPI.
+ *
+ * You must not call this function with disabled interrupts or
+ * from a hardware interrupt handler or from a bottom half handler.
+ */
+void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
+ smp_call_func_t func, void *info, bool wait,
+ gfp_t gfp_flags)
+{
+ cpumask_var_t cpus;
+ int cpu, ret;
+
+ might_sleep_if(gfp_flags & __GFP_WAIT);
+
+ if (likely(zalloc_cpumask_var(&cpus, (gfp_flags|__GFP_NOWARN)))) {
+ preempt_disable();
+ for_each_online_cpu(cpu)
+ if (cond_func(cpu, info))
+ cpumask_set_cpu(cpu, cpus);
+ on_each_cpu_mask(cpus, func, info, wait);
+ preempt_enable();
+ free_cpumask_var(cpus);
+ } else {
+ /*
+ * No free cpumask, bother. No matter, we'll
+ * just have to IPI them one by one.
+ */
+ preempt_disable();
+ for_each_online_cpu(cpu)
+ if (cond_func(cpu, info)) {
+ ret = smp_call_function_single(cpu, func,
+ info, wait);
+ WARN_ON_ONCE(!ret);
+ }
+ preempt_enable();
+ }
+}
+EXPORT_SYMBOL(on_each_cpu_cond);
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index 84c7d96918bf..5cdd8065a3ce 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -163,7 +163,7 @@ void __lockfunc _raw_spin_lock_bh(raw_spinlock_t *lock)
EXPORT_SYMBOL(_raw_spin_lock_bh);
#endif
-#ifndef CONFIG_INLINE_SPIN_UNLOCK
+#ifdef CONFIG_UNINLINE_SPIN_UNLOCK
void __lockfunc _raw_spin_unlock(raw_spinlock_t *lock)
{
__raw_spin_unlock(lock);
diff --git a/kernel/sys.c b/kernel/sys.c
index 9eb7fcab8df6..e7006eb6c1e4 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -444,6 +444,15 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
magic2 != LINUX_REBOOT_MAGIC2C))
return -EINVAL;
+ /*
+ * If pid namespaces are enabled and the current task is in a child
+ * pid_namespace, the command is handled by reboot_pid_ns() which will
+ * call do_exit().
+ */
+ ret = reboot_pid_ns(task_active_pid_ns(current), cmd);
+ if (ret)
+ return ret;
+
/* Instead of trying to make the power_off code look like
* halt when pm_power_off is not set do it the easy way.
*/
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index d48ff4fd44c3..52b3a06a02f8 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -23,6 +23,7 @@
#include <linux/swap.h>
#include <linux/slab.h>
#include <linux/sysctl.h>
+#include <linux/bitmap.h>
#include <linux/signal.h>
#include <linux/printk.h>
#include <linux/proc_fs.h>
@@ -68,6 +69,9 @@
#include <asm/stacktrace.h>
#include <asm/io.h>
#endif
+#ifdef CONFIG_SPARC
+#include <asm/setup.h>
+#endif
#ifdef CONFIG_BSD_PROCESS_ACCT
#include <linux/acct.h>
#endif
@@ -142,7 +146,6 @@ static const int cap_last_cap = CAP_LAST_CAP;
#include <linux/inotify.h>
#endif
#ifdef CONFIG_SPARC
-#include <asm/system.h>
#endif
#ifdef CONFIG_SPARC64
@@ -2393,9 +2396,7 @@ int proc_do_large_bitmap(struct ctl_table *table, int write,
}
}
- while (val_a <= val_b)
- set_bit(val_a++, tmp_bitmap);
-
+ bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1);
first = 0;
proc_skip_char(&kbuf, &left, '\n');
}
@@ -2438,8 +2439,7 @@ int proc_do_large_bitmap(struct ctl_table *table, int write,
if (*ppos)
bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
else
- memcpy(bitmap, tmp_bitmap,
- BITS_TO_LONGS(bitmap_len) * sizeof(unsigned long));
+ bitmap_copy(bitmap, tmp_bitmap, bitmap_len);
}
kfree(tmp_bitmap);
*lenp -= left;
diff --git a/kernel/time.c b/kernel/time.c
index 73e416db0a1e..ba744cf80696 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -163,7 +163,6 @@ int do_sys_settimeofday(const struct timespec *tv, const struct timezone *tz)
return error;
if (tz) {
- /* SMP safe, global irq locking makes it work. */
sys_tz = *tz;
update_vsyscall_tz();
if (firsttime) {
@@ -173,12 +172,7 @@ int do_sys_settimeofday(const struct timespec *tv, const struct timezone *tz)
}
}
if (tv)
- {
- /* SMP safe, again the code in arch/foo/time.c should
- * globally block out interrupts when it runs.
- */
return do_settimeofday(tv);
- }
return 0;
}
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 8a46f5d64504..8a538c55fc7b 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -96,6 +96,11 @@ static int alarmtimer_rtc_add_device(struct device *dev,
return 0;
}
+static inline void alarmtimer_rtc_timer_init(void)
+{
+ rtc_timer_init(&rtctimer, NULL, NULL);
+}
+
static struct class_interface alarmtimer_rtc_interface = {
.add_dev = &alarmtimer_rtc_add_device,
};
@@ -117,6 +122,7 @@ static inline struct rtc_device *alarmtimer_get_rtcdev(void)
#define rtcdev (NULL)
static inline int alarmtimer_rtc_interface_setup(void) { return 0; }
static inline void alarmtimer_rtc_interface_remove(void) { }
+static inline void alarmtimer_rtc_timer_init(void) { }
#endif
/**
@@ -783,6 +789,8 @@ static int __init alarmtimer_init(void)
.nsleep = alarm_timer_nsleep,
};
+ alarmtimer_rtc_timer_init();
+
posix_timers_register_clock(CLOCK_REALTIME_ALARM, &alarm_clock);
posix_timers_register_clock(CLOCK_BOOTTIME_ALARM, &alarm_clock);
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index a45ca167ab24..c9583382141a 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -500,7 +500,7 @@ static u32 clocksource_max_adjustment(struct clocksource *cs)
{
u64 ret;
/*
- * We won't try to correct for more then 11% adjustments (110,000 ppm),
+ * We won't try to correct for more than 11% adjustments (110,000 ppm),
*/
ret = (u64)cs->mult * 11;
do_div(ret,100);
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 6e039b144daf..f03fd83b170b 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -34,8 +34,6 @@ unsigned long tick_nsec;
static u64 tick_length;
static u64 tick_length_base;
-static struct hrtimer leap_timer;
-
#define MAX_TICKADJ 500LL /* usecs */
#define MAX_TICKADJ_SCALED \
(((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ)
@@ -381,70 +379,63 @@ u64 ntp_tick_length(void)
/*
- * Leap second processing. If in leap-insert state at the end of the
- * day, the system clock is set back one second; if in leap-delete
- * state, the system clock is set ahead one second.
+ * this routine handles the overflow of the microsecond field
+ *
+ * The tricky bits of code to handle the accurate clock support
+ * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
+ * They were originally developed for SUN and DEC kernels.
+ * All the kudos should go to Dave for this stuff.
+ *
+ * Also handles leap second processing, and returns leap offset
*/
-static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
+int second_overflow(unsigned long secs)
{
- enum hrtimer_restart res = HRTIMER_NORESTART;
- unsigned long flags;
+ s64 delta;
int leap = 0;
+ unsigned long flags;
spin_lock_irqsave(&ntp_lock, flags);
+
+ /*
+ * Leap second processing. If in leap-insert state at the end of the
+ * day, the system clock is set back one second; if in leap-delete
+ * state, the system clock is set ahead one second.
+ */
switch (time_state) {
case TIME_OK:
+ if (time_status & STA_INS)
+ time_state = TIME_INS;
+ else if (time_status & STA_DEL)
+ time_state = TIME_DEL;
break;
case TIME_INS:
- leap = -1;
- time_state = TIME_OOP;
- printk(KERN_NOTICE
- "Clock: inserting leap second 23:59:60 UTC\n");
- hrtimer_add_expires_ns(&leap_timer, NSEC_PER_SEC);
- res = HRTIMER_RESTART;
+ if (secs % 86400 == 0) {
+ leap = -1;
+ time_state = TIME_OOP;
+ printk(KERN_NOTICE
+ "Clock: inserting leap second 23:59:60 UTC\n");
+ }
break;
case TIME_DEL:
- leap = 1;
- time_tai--;
- time_state = TIME_WAIT;
- printk(KERN_NOTICE
- "Clock: deleting leap second 23:59:59 UTC\n");
+ if ((secs + 1) % 86400 == 0) {
+ leap = 1;
+ time_tai--;
+ time_state = TIME_WAIT;
+ printk(KERN_NOTICE
+ "Clock: deleting leap second 23:59:59 UTC\n");
+ }
break;
case TIME_OOP:
time_tai++;
time_state = TIME_WAIT;
- /* fall through */
+ break;
+
case TIME_WAIT:
if (!(time_status & (STA_INS | STA_DEL)))
time_state = TIME_OK;
break;
}
- spin_unlock_irqrestore(&ntp_lock, flags);
- /*
- * We have to call this outside of the ntp_lock to keep
- * the proper locking hierarchy
- */
- if (leap)
- timekeeping_leap_insert(leap);
-
- return res;
-}
-
-/*
- * this routine handles the overflow of the microsecond field
- *
- * The tricky bits of code to handle the accurate clock support
- * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
- * They were originally developed for SUN and DEC kernels.
- * All the kudos should go to Dave for this stuff.
- */
-void second_overflow(void)
-{
- s64 delta;
- unsigned long flags;
-
- spin_lock_irqsave(&ntp_lock, flags);
/* Bump the maxerror field */
time_maxerror += MAXFREQ / NSEC_PER_USEC;
@@ -481,15 +472,17 @@ void second_overflow(void)
tick_length += (s64)(time_adjust * NSEC_PER_USEC / NTP_INTERVAL_FREQ)
<< NTP_SCALE_SHIFT;
time_adjust = 0;
+
+
+
out:
spin_unlock_irqrestore(&ntp_lock, flags);
+
+ return leap;
}
#ifdef CONFIG_GENERIC_CMOS_UPDATE
-/* Disable the cmos update - used by virtualization and embedded */
-int no_sync_cmos_clock __read_mostly;
-
static void sync_cmos_clock(struct work_struct *work);
static DECLARE_DELAYED_WORK(sync_cmos_work, sync_cmos_clock);
@@ -536,35 +529,13 @@ static void sync_cmos_clock(struct work_struct *work)
static void notify_cmos_timer(void)
{
- if (!no_sync_cmos_clock)
- schedule_delayed_work(&sync_cmos_work, 0);
+ schedule_delayed_work(&sync_cmos_work, 0);
}
#else
static inline void notify_cmos_timer(void) { }
#endif
-/*
- * Start the leap seconds timer:
- */
-static inline void ntp_start_leap_timer(struct timespec *ts)
-{
- long now = ts->tv_sec;
-
- if (time_status & STA_INS) {
- time_state = TIME_INS;
- now += 86400 - now % 86400;
- hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS);
-
- return;
- }
-
- if (time_status & STA_DEL) {
- time_state = TIME_DEL;
- now += 86400 - (now + 1) % 86400;
- hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS);
- }
-}
/*
* Propagate a new txc->status value into the NTP state:
@@ -589,22 +560,6 @@ static inline void process_adj_status(struct timex *txc, struct timespec *ts)
time_status &= STA_RONLY;
time_status |= txc->status & ~STA_RONLY;
- switch (time_state) {
- case TIME_OK:
- ntp_start_leap_timer(ts);
- break;
- case TIME_INS:
- case TIME_DEL:
- time_state = TIME_OK;
- ntp_start_leap_timer(ts);
- case TIME_WAIT:
- if (!(time_status & (STA_INS | STA_DEL)))
- time_state = TIME_OK;
- break;
- case TIME_OOP:
- hrtimer_restart(&leap_timer);
- break;
- }
}
/*
* Called with the xtime lock held, so we can access and modify
@@ -686,9 +641,6 @@ int do_adjtimex(struct timex *txc)
(txc->tick < 900000/USER_HZ ||
txc->tick > 1100000/USER_HZ))
return -EINVAL;
-
- if (txc->modes & ADJ_STATUS && time_state != TIME_OK)
- hrtimer_cancel(&leap_timer);
}
if (txc->modes & ADJ_SETOFFSET) {
@@ -1010,6 +962,4 @@ __setup("ntp_tick_adj=", ntp_tick_adj_setup);
void __init ntp_init(void)
{
ntp_clear();
- hrtimer_init(&leap_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
- leap_timer.function = ntp_leap_second;
}
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 15be32e19c6e..d66b21308f7c 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -184,18 +184,6 @@ static void timekeeping_update(bool clearntp)
}
-void timekeeping_leap_insert(int leapsecond)
-{
- unsigned long flags;
-
- write_seqlock_irqsave(&timekeeper.lock, flags);
- timekeeper.xtime.tv_sec += leapsecond;
- timekeeper.wall_to_monotonic.tv_sec -= leapsecond;
- timekeeping_update(false);
- write_sequnlock_irqrestore(&timekeeper.lock, flags);
-
-}
-
/**
* timekeeping_forward_now - update clock to the current time
*
@@ -448,9 +436,12 @@ EXPORT_SYMBOL(timekeeping_inject_offset);
static int change_clocksource(void *data)
{
struct clocksource *new, *old;
+ unsigned long flags;
new = (struct clocksource *) data;
+ write_seqlock_irqsave(&timekeeper.lock, flags);
+
timekeeping_forward_now();
if (!new->enable || new->enable(new) == 0) {
old = timekeeper.clock;
@@ -458,6 +449,10 @@ static int change_clocksource(void *data)
if (old->disable)
old->disable(old);
}
+ timekeeping_update(true);
+
+ write_sequnlock_irqrestore(&timekeeper.lock, flags);
+
return 0;
}
@@ -827,7 +822,7 @@ static void timekeeping_adjust(s64 offset)
int adj;
/*
- * The point of this is to check if the error is greater then half
+ * The point of this is to check if the error is greater than half
* an interval.
*
* First we shift it down from NTP_SHIFT to clocksource->shifted nsecs.
@@ -835,7 +830,7 @@ static void timekeeping_adjust(s64 offset)
* Note we subtract one in the shift, so that error is really error*2.
* This "saves" dividing(shifting) interval twice, but keeps the
* (error > interval) comparison as still measuring if error is
- * larger then half an interval.
+ * larger than half an interval.
*
* Note: It does not "save" on aggravation when reading the code.
*/
@@ -843,7 +838,7 @@ static void timekeeping_adjust(s64 offset)
if (error > interval) {
/*
* We now divide error by 4(via shift), which checks if
- * the error is greater then twice the interval.
+ * the error is greater than twice the interval.
* If it is greater, we need a bigadjust, if its smaller,
* we can adjust by 1.
*/
@@ -874,13 +869,15 @@ static void timekeeping_adjust(s64 offset)
} else /* No adjustment needed */
return;
- WARN_ONCE(timekeeper.clock->maxadj &&
- (timekeeper.mult + adj > timekeeper.clock->mult +
- timekeeper.clock->maxadj),
- "Adjusting %s more then 11%% (%ld vs %ld)\n",
+ if (unlikely(timekeeper.clock->maxadj &&
+ (timekeeper.mult + adj >
+ timekeeper.clock->mult + timekeeper.clock->maxadj))) {
+ printk_once(KERN_WARNING
+ "Adjusting %s more than 11%% (%ld vs %ld)\n",
timekeeper.clock->name, (long)timekeeper.mult + adj,
(long)timekeeper.clock->mult +
timekeeper.clock->maxadj);
+ }
/*
* So the following can be confusing.
*
@@ -952,7 +949,7 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift)
u64 nsecps = (u64)NSEC_PER_SEC << timekeeper.shift;
u64 raw_nsecs;
- /* If the offset is smaller then a shifted interval, do nothing */
+ /* If the offset is smaller than a shifted interval, do nothing */
if (offset < timekeeper.cycle_interval<<shift)
return offset;
@@ -962,9 +959,11 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift)
timekeeper.xtime_nsec += timekeeper.xtime_interval << shift;
while (timekeeper.xtime_nsec >= nsecps) {
+ int leap;
timekeeper.xtime_nsec -= nsecps;
timekeeper.xtime.tv_sec++;
- second_overflow();
+ leap = second_overflow(timekeeper.xtime.tv_sec);
+ timekeeper.xtime.tv_sec += leap;
}
/* Accumulate raw time */
@@ -1018,13 +1017,13 @@ static void update_wall_time(void)
* With NO_HZ we may have to accumulate many cycle_intervals
* (think "ticks") worth of time at once. To do this efficiently,
* we calculate the largest doubling multiple of cycle_intervals
- * that is smaller then the offset. We then accumulate that
+ * that is smaller than the offset. We then accumulate that
* chunk in one go, and then try to consume the next smaller
* doubled multiple.
*/
shift = ilog2(offset) - ilog2(timekeeper.cycle_interval);
shift = max(0, shift);
- /* Bound shift to one less then what overflows tick_length */
+ /* Bound shift to one less than what overflows tick_length */
maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1;
shift = min(shift, maxshift);
while (offset >= timekeeper.cycle_interval) {
@@ -1072,12 +1071,14 @@ static void update_wall_time(void)
/*
* Finally, make sure that after the rounding
- * xtime.tv_nsec isn't larger then NSEC_PER_SEC
+ * xtime.tv_nsec isn't larger than NSEC_PER_SEC
*/
if (unlikely(timekeeper.xtime.tv_nsec >= NSEC_PER_SEC)) {
+ int leap;
timekeeper.xtime.tv_nsec -= NSEC_PER_SEC;
timekeeper.xtime.tv_sec++;
- second_overflow();
+ leap = second_overflow(timekeeper.xtime.tv_sec);
+ timekeeper.xtime.tv_sec += leap;
}
timekeeping_update(false);
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index cd3134510f3d..a1d2849f2473 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -141,7 +141,7 @@ if FTRACE
config FUNCTION_TRACER
bool "Kernel Function Tracer"
depends on HAVE_FUNCTION_TRACER
- select FRAME_POINTER if !ARM_UNWIND && !S390 && !MICROBLAZE
+ select FRAME_POINTER if !ARM_UNWIND && !PPC && !S390 && !MICROBLAZE
select KALLSYMS
select GENERIC_TRACER
select CONTEXT_SWITCH_TRACER
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 867bd1dd2dd0..0fa92f677c92 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -249,7 +249,8 @@ static void update_ftrace_function(void)
#else
__ftrace_trace_function = func;
#endif
- ftrace_trace_function = ftrace_test_stop_func;
+ ftrace_trace_function =
+ (func == ftrace_stub) ? func : ftrace_test_stop_func;
#endif
}
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index f5b7b5c1195b..cf8d11e91efd 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -154,33 +154,10 @@ enum {
static unsigned long ring_buffer_flags __read_mostly = RB_BUFFERS_ON;
-#define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data)
-
-/**
- * tracing_on - enable all tracing buffers
- *
- * This function enables all tracing buffers that may have been
- * disabled with tracing_off.
- */
-void tracing_on(void)
-{
- set_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags);
-}
-EXPORT_SYMBOL_GPL(tracing_on);
+/* Used for individual buffers (after the counter) */
+#define RB_BUFFER_OFF (1 << 20)
-/**
- * tracing_off - turn off all tracing buffers
- *
- * This function stops all tracing buffers from recording data.
- * It does not disable any overhead the tracers themselves may
- * be causing. This function simply causes all recording to
- * the ring buffers to fail.
- */
-void tracing_off(void)
-{
- clear_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags);
-}
-EXPORT_SYMBOL_GPL(tracing_off);
+#define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data)
/**
* tracing_off_permanent - permanently disable ring buffers
@@ -193,15 +170,6 @@ void tracing_off_permanent(void)
set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags);
}
-/**
- * tracing_is_on - show state of ring buffers enabled
- */
-int tracing_is_on(void)
-{
- return ring_buffer_flags == RB_BUFFERS_ON;
-}
-EXPORT_SYMBOL_GPL(tracing_is_on);
-
#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
#define RB_ALIGNMENT 4U
#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
@@ -2619,6 +2587,63 @@ void ring_buffer_record_enable(struct ring_buffer *buffer)
EXPORT_SYMBOL_GPL(ring_buffer_record_enable);
/**
+ * ring_buffer_record_off - stop all writes into the buffer
+ * @buffer: The ring buffer to stop writes to.
+ *
+ * This prevents all writes to the buffer. Any attempt to write
+ * to the buffer after this will fail and return NULL.
+ *
+ * This is different than ring_buffer_record_disable() as
+ * it works like an on/off switch, where as the disable() verison
+ * must be paired with a enable().
+ */
+void ring_buffer_record_off(struct ring_buffer *buffer)
+{
+ unsigned int rd;
+ unsigned int new_rd;
+
+ do {
+ rd = atomic_read(&buffer->record_disabled);
+ new_rd = rd | RB_BUFFER_OFF;
+ } while (atomic_cmpxchg(&buffer->record_disabled, rd, new_rd) != rd);
+}
+EXPORT_SYMBOL_GPL(ring_buffer_record_off);
+
+/**
+ * ring_buffer_record_on - restart writes into the buffer
+ * @buffer: The ring buffer to start writes to.
+ *
+ * This enables all writes to the buffer that was disabled by
+ * ring_buffer_record_off().
+ *
+ * This is different than ring_buffer_record_enable() as
+ * it works like an on/off switch, where as the enable() verison
+ * must be paired with a disable().
+ */
+void ring_buffer_record_on(struct ring_buffer *buffer)
+{
+ unsigned int rd;
+ unsigned int new_rd;
+
+ do {
+ rd = atomic_read(&buffer->record_disabled);
+ new_rd = rd & ~RB_BUFFER_OFF;
+ } while (atomic_cmpxchg(&buffer->record_disabled, rd, new_rd) != rd);
+}
+EXPORT_SYMBOL_GPL(ring_buffer_record_on);
+
+/**
+ * ring_buffer_record_is_on - return true if the ring buffer can write
+ * @buffer: The ring buffer to see if write is enabled
+ *
+ * Returns true if the ring buffer is in a state that it accepts writes.
+ */
+int ring_buffer_record_is_on(struct ring_buffer *buffer)
+{
+ return !atomic_read(&buffer->record_disabled);
+}
+
+/**
* ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer
* @buffer: The ring buffer to stop writes to.
* @cpu: The CPU buffer to stop
@@ -4039,68 +4064,6 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
}
EXPORT_SYMBOL_GPL(ring_buffer_read_page);
-#ifdef CONFIG_TRACING
-static ssize_t
-rb_simple_read(struct file *filp, char __user *ubuf,
- size_t cnt, loff_t *ppos)
-{
- unsigned long *p = filp->private_data;
- char buf[64];
- int r;
-
- if (test_bit(RB_BUFFERS_DISABLED_BIT, p))
- r = sprintf(buf, "permanently disabled\n");
- else
- r = sprintf(buf, "%d\n", test_bit(RB_BUFFERS_ON_BIT, p));
-
- return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
-}
-
-static ssize_t
-rb_simple_write(struct file *filp, const char __user *ubuf,
- size_t cnt, loff_t *ppos)
-{
- unsigned long *p = filp->private_data;
- unsigned long val;
- int ret;
-
- ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
- if (ret)
- return ret;
-
- if (val)
- set_bit(RB_BUFFERS_ON_BIT, p);
- else
- clear_bit(RB_BUFFERS_ON_BIT, p);
-
- (*ppos)++;
-
- return cnt;
-}
-
-static const struct file_operations rb_simple_fops = {
- .open = tracing_open_generic,
- .read = rb_simple_read,
- .write = rb_simple_write,
- .llseek = default_llseek,
-};
-
-
-static __init int rb_init_debugfs(void)
-{
- struct dentry *d_tracer;
-
- d_tracer = tracing_init_dentry();
-
- trace_create_file("tracing_on", 0644, d_tracer,
- &ring_buffer_flags, &rb_simple_fops);
-
- return 0;
-}
-
-fs_initcall(rb_init_debugfs);
-#endif
-
#ifdef CONFIG_HOTPLUG_CPU
static int rb_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 10d5503f0d04..ed7b5d1e12f4 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -36,6 +36,7 @@
#include <linux/ctype.h>
#include <linux/init.h>
#include <linux/poll.h>
+#include <linux/nmi.h>
#include <linux/fs.h>
#include "trace.h"
@@ -352,6 +353,59 @@ static void wakeup_work_handler(struct work_struct *work)
static DECLARE_DELAYED_WORK(wakeup_work, wakeup_work_handler);
/**
+ * tracing_on - enable tracing buffers
+ *
+ * This function enables tracing buffers that may have been
+ * disabled with tracing_off.
+ */
+void tracing_on(void)
+{
+ if (global_trace.buffer)
+ ring_buffer_record_on(global_trace.buffer);
+ /*
+ * This flag is only looked at when buffers haven't been
+ * allocated yet. We don't really care about the race
+ * between setting this flag and actually turning
+ * on the buffer.
+ */
+ global_trace.buffer_disabled = 0;
+}
+EXPORT_SYMBOL_GPL(tracing_on);
+
+/**
+ * tracing_off - turn off tracing buffers
+ *
+ * This function stops the tracing buffers from recording data.
+ * It does not disable any overhead the tracers themselves may
+ * be causing. This function simply causes all recording to
+ * the ring buffers to fail.
+ */
+void tracing_off(void)
+{
+ if (global_trace.buffer)
+ ring_buffer_record_on(global_trace.buffer);
+ /*
+ * This flag is only looked at when buffers haven't been
+ * allocated yet. We don't really care about the race
+ * between setting this flag and actually turning
+ * on the buffer.
+ */
+ global_trace.buffer_disabled = 1;
+}
+EXPORT_SYMBOL_GPL(tracing_off);
+
+/**
+ * tracing_is_on - show state of ring buffers enabled
+ */
+int tracing_is_on(void)
+{
+ if (global_trace.buffer)
+ return ring_buffer_record_is_on(global_trace.buffer);
+ return !global_trace.buffer_disabled;
+}
+EXPORT_SYMBOL_GPL(tracing_is_on);
+
+/**
* trace_wake_up - wake up tasks waiting for trace input
*
* Schedules a delayed work to wake up any task that is blocked on the
@@ -1644,6 +1698,7 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
int cpu_file = iter->cpu_file;
u64 next_ts = 0, ts;
int next_cpu = -1;
+ int next_size = 0;
int cpu;
/*
@@ -1675,9 +1730,12 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
next_cpu = cpu;
next_ts = ts;
next_lost = lost_events;
+ next_size = iter->ent_size;
}
}
+ iter->ent_size = next_size;
+
if (ent_cpu)
*ent_cpu = next_cpu;
@@ -4567,6 +4625,55 @@ static __init void create_trace_options_dir(void)
create_trace_option_core_file(trace_options[i], i);
}
+static ssize_t
+rb_simple_read(struct file *filp, char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ struct ring_buffer *buffer = filp->private_data;
+ char buf[64];
+ int r;
+
+ if (buffer)
+ r = ring_buffer_record_is_on(buffer);
+ else
+ r = 0;
+
+ r = sprintf(buf, "%d\n", r);
+
+ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t
+rb_simple_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ struct ring_buffer *buffer = filp->private_data;
+ unsigned long val;
+ int ret;
+
+ ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
+ if (ret)
+ return ret;
+
+ if (buffer) {
+ if (val)
+ ring_buffer_record_on(buffer);
+ else
+ ring_buffer_record_off(buffer);
+ }
+
+ (*ppos)++;
+
+ return cnt;
+}
+
+static const struct file_operations rb_simple_fops = {
+ .open = tracing_open_generic,
+ .read = rb_simple_read,
+ .write = rb_simple_write,
+ .llseek = default_llseek,
+};
+
static __init int tracer_init_debugfs(void)
{
struct dentry *d_tracer;
@@ -4626,6 +4733,9 @@ static __init int tracer_init_debugfs(void)
trace_create_file("trace_clock", 0644, d_tracer, NULL,
&trace_clock_fops);
+ trace_create_file("tracing_on", 0644, d_tracer,
+ global_trace.buffer, &rb_simple_fops);
+
#ifdef CONFIG_DYNAMIC_FTRACE
trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
@@ -4798,6 +4908,7 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode)
if (ret != TRACE_TYPE_NO_CONSUME)
trace_consume(&iter);
}
+ touch_nmi_watchdog();
trace_printk_seq(&iter.seq);
}
@@ -4863,6 +4974,8 @@ __init static int tracer_alloc_buffers(void)
goto out_free_cpumask;
}
global_trace.entries = ring_buffer_size(global_trace.buffer);
+ if (global_trace.buffer_disabled)
+ tracing_off();
#ifdef CONFIG_TRACER_MAX_TRACE
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 54faec790bc1..95059f091a24 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -154,6 +154,7 @@ struct trace_array {
struct ring_buffer *buffer;
unsigned long entries;
int cpu;
+ int buffer_disabled;
cycle_t time_start;
struct task_struct *waiter;
struct trace_array_cpu *data[NR_CPUS];
@@ -835,13 +836,11 @@ extern const char *__stop___trace_bprintk_fmt[];
filter)
#include "trace_entries.h"
-#ifdef CONFIG_PERF_EVENTS
#ifdef CONFIG_FUNCTION_TRACER
int perf_ftrace_event_register(struct ftrace_event_call *call,
enum trace_reg type, void *data);
#else
#define perf_ftrace_event_register NULL
#endif /* CONFIG_FUNCTION_TRACER */
-#endif /* CONFIG_PERF_EVENTS */
#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index d91eb0541b3a..4108e1250ca2 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -166,6 +166,12 @@ FTRACE_ENTRY_DUP(wakeup, ctx_switch_entry,
#define FTRACE_STACK_ENTRIES 8
+#ifndef CONFIG_64BIT
+# define IP_FMT "%08lx"
+#else
+# define IP_FMT "%016lx"
+#endif
+
FTRACE_ENTRY(kernel_stack, stack_entry,
TRACE_STACK,
@@ -175,8 +181,9 @@ FTRACE_ENTRY(kernel_stack, stack_entry,
__dynamic_array(unsigned long, caller )
),
- F_printk("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n"
- "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n",
+ F_printk("\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n"
+ "\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n"
+ "\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n",
__entry->caller[0], __entry->caller[1], __entry->caller[2],
__entry->caller[3], __entry->caller[4], __entry->caller[5],
__entry->caller[6], __entry->caller[7]),
@@ -193,8 +200,9 @@ FTRACE_ENTRY(user_stack, userstack_entry,
__array( unsigned long, caller, FTRACE_STACK_ENTRIES )
),
- F_printk("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n"
- "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n",
+ F_printk("\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n"
+ "\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n"
+ "\t=> (" IP_FMT ")\n\t=> (" IP_FMT ")\n",
__entry->caller[0], __entry->caller[1], __entry->caller[2],
__entry->caller[3], __entry->caller[4], __entry->caller[5],
__entry->caller[6], __entry->caller[7]),
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 7b46c9bd22ae..3dd15e8bc856 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -162,7 +162,7 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
#define __dynamic_array(type, item)
#undef F_printk
-#define F_printk(fmt, args...) #fmt ", " __stringify(args)
+#define F_printk(fmt, args...) __stringify(fmt) ", " __stringify(args)
#undef FTRACE_ENTRY_REG
#define FTRACE_ENTRY_REG(call, struct_name, etype, tstruct, print, filter,\