From fe2e39d8782d885755139304d8dba0b3e5bfa878 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 28 Mar 2012 23:29:45 +0200
Subject: firmware_class: Rework usermodehelper check

Instead of two functions, read_lock_usermodehelper() and
usermodehelper_is_disabled(), used in combination, introduce
usermodehelper_read_trylock() that will only return with umhelper_sem
held if usermodehelper_disabled is unset (and will return -EAGAIN
otherwise) and make _request_firmware() use it.

Rename read_unlock_usermodehelper() to
usermodehelper_read_unlock() to follow the naming convention of the
new function.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: stable@vger.kernel.org
---
 kernel/kmod.c | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

(limited to 'kernel')

diff --git a/kernel/kmod.c b/kernel/kmod.c
index 957a7aab8ebc..4079ac1d5e79 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -339,17 +339,24 @@ static DECLARE_WAIT_QUEUE_HEAD(running_helpers_waitq);
  */
 #define RUNNING_HELPERS_TIMEOUT	(5 * HZ)
 
-void read_lock_usermodehelper(void)
+int usermodehelper_read_trylock(void)
 {
+	int ret = 0;
+
 	down_read(&umhelper_sem);
+	if (usermodehelper_disabled) {
+		up_read(&umhelper_sem);
+		ret = -EAGAIN;
+	}
+	return ret;
 }
-EXPORT_SYMBOL_GPL(read_lock_usermodehelper);
+EXPORT_SYMBOL_GPL(usermodehelper_read_trylock);
 
-void read_unlock_usermodehelper(void)
+void usermodehelper_read_unlock(void)
 {
 	up_read(&umhelper_sem);
 }
-EXPORT_SYMBOL_GPL(read_unlock_usermodehelper);
+EXPORT_SYMBOL_GPL(usermodehelper_read_unlock);
 
 /**
  * usermodehelper_disable - prevent new helpers from being started
@@ -390,15 +397,6 @@ void usermodehelper_enable(void)
 	up_write(&umhelper_sem);
 }
 
-/**
- * usermodehelper_is_disabled - check if new helpers are allowed to be started
- */
-bool usermodehelper_is_disabled(void)
-{
-	return usermodehelper_disabled;
-}
-EXPORT_SYMBOL_GPL(usermodehelper_is_disabled);
-
 static void helper_lock(void)
 {
 	atomic_inc(&running_helpers);
-- 
cgit v1.2.3


From 9b78c1da60b3c62ccdd1509f0902ad19ceaf776b Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 28 Mar 2012 23:30:02 +0200
Subject: firmware_class: Do not warn that system is not ready from async loads

If firmware is requested asynchronously, by calling
request_firmware_nowait(), there is no reason to fail the request
(and warn the user) when the system is (presumably temporarily)
unready to handle it (because user space is not available yet or
frozen).  For this reason, introduce an alternative routine for
read-locking umhelper_sem, usermodehelper_read_lock_wait(), that
will wait for usermodehelper_disabled to be unset (possibly with
a timeout) and make request_firmware_work_func() use it instead of
usermodehelper_read_trylock().

Accordingly, modify request_firmware() so that it uses
usermodehelper_read_trylock() to acquire umhelper_sem and remove
the code related to that lock from _request_firmware().

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: stable@vger.kernel.org
---
 kernel/kmod.c | 58 +++++++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 45 insertions(+), 13 deletions(-)

(limited to 'kernel')

diff --git a/kernel/kmod.c b/kernel/kmod.c
index 4079ac1d5e79..da7fcca279f9 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -333,6 +333,12 @@ static atomic_t running_helpers = ATOMIC_INIT(0);
  */
 static DECLARE_WAIT_QUEUE_HEAD(running_helpers_waitq);
 
+/*
+ * Used by usermodehelper_read_lock_wait() to wait for usermodehelper_disabled
+ * to become 'false'.
+ */
+static DECLARE_WAIT_QUEUE_HEAD(usermodehelper_disabled_waitq);
+
 /*
  * Time to wait for running_helpers to become zero before the setting of
  * usermodehelper_disabled in usermodehelper_disable() fails
@@ -352,12 +358,50 @@ int usermodehelper_read_trylock(void)
 }
 EXPORT_SYMBOL_GPL(usermodehelper_read_trylock);
 
+long usermodehelper_read_lock_wait(long timeout)
+{
+	DEFINE_WAIT(wait);
+
+	if (timeout < 0)
+		return -EINVAL;
+
+	down_read(&umhelper_sem);
+	for (;;) {
+		prepare_to_wait(&usermodehelper_disabled_waitq, &wait,
+				TASK_UNINTERRUPTIBLE);
+		if (!usermodehelper_disabled)
+			break;
+
+		up_read(&umhelper_sem);
+
+		timeout = schedule_timeout(timeout);
+		if (!timeout)
+			break;
+
+		down_read(&umhelper_sem);
+	}
+	finish_wait(&usermodehelper_disabled_waitq, &wait);
+	return timeout;
+}
+EXPORT_SYMBOL_GPL(usermodehelper_read_lock_wait);
+
 void usermodehelper_read_unlock(void)
 {
 	up_read(&umhelper_sem);
 }
 EXPORT_SYMBOL_GPL(usermodehelper_read_unlock);
 
+/**
+ * usermodehelper_enable - allow new helpers to be started again
+ */
+void usermodehelper_enable(void)
+{
+	down_write(&umhelper_sem);
+	usermodehelper_disabled = 0;
+	wake_up(&usermodehelper_disabled_waitq);
+	up_write(&umhelper_sem);
+}
+
 /**
  * usermodehelper_disable - prevent new helpers from being started
  */
@@ -381,22 +425,10 @@ int usermodehelper_disable(void)
 	if (retval)
 		return 0;
 
-	down_write(&umhelper_sem);
-	usermodehelper_disabled = 0;
-	up_write(&umhelper_sem);
+	usermodehelper_enable();
 	return -EAGAIN;
 }
 
-/**
- * usermodehelper_enable - allow new helpers to be started again
- */
-void usermodehelper_enable(void)
-{
-	down_write(&umhelper_sem);
-	usermodehelper_disabled = 0;
-	up_write(&umhelper_sem);
-}
-
 static void helper_lock(void)
 {
 	atomic_inc(&running_helpers);
-- 
cgit v1.2.3


From 7b5179ac14dbad945647ac9e76bbbf14ed9e0dbe Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 28 Mar 2012 23:30:14 +0200
Subject: PM / Hibernate: Disable usermode helpers right before freezing tasks

There is no reason to call usermodehelper_disable() before creating
memory bitmaps in hibernate() and software_resume(), so call it right
before freeze_processes(), in accordance with the other suspend and
hibernation code.  Consequently, call usermodehelper_enable() right
after the thawing of tasks rather than after freeing the memory
bitmaps.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: stable@vger.kernel.org
---
 kernel/power/hibernate.c | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

(limited to 'kernel')

diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 0a186cfde788..639ff6e4ae9e 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -611,19 +611,19 @@ int hibernate(void)
 	if (error)
 		goto Exit;
 
-	error = usermodehelper_disable();
-	if (error)
-		goto Exit;
-
 	/* Allocate memory management structures */
 	error = create_basic_memory_bitmaps();
 	if (error)
-		goto Enable_umh;
+		goto Exit;
 
 	printk(KERN_INFO "PM: Syncing filesystems ... ");
 	sys_sync();
 	printk("done.\n");
 
+	error = usermodehelper_disable();
+	if (error)
+		goto Exit;
+
 	error = freeze_processes();
 	if (error)
 		goto Free_bitmaps;
@@ -660,9 +660,8 @@ int hibernate(void)
 	freezer_test_done = false;
 
  Free_bitmaps:
-	free_basic_memory_bitmaps();
- Enable_umh:
 	usermodehelper_enable();
+	free_basic_memory_bitmaps();
  Exit:
 	pm_notifier_call_chain(PM_POST_HIBERNATION);
 	pm_restore_console();
@@ -777,15 +776,13 @@ static int software_resume(void)
 	if (error)
 		goto close_finish;
 
-	error = usermodehelper_disable();
+	error = create_basic_memory_bitmaps();
 	if (error)
 		goto close_finish;
 
-	error = create_basic_memory_bitmaps();
-	if (error) {
-		usermodehelper_enable();
+	error = usermodehelper_disable();
+	if (error)
 		goto close_finish;
-	}
 
 	pr_debug("PM: Preparing processes for restore.\n");
 	error = freeze_processes();
@@ -805,8 +802,8 @@ static int software_resume(void)
 	swsusp_free();
 	thaw_processes();
  Done:
-	free_basic_memory_bitmaps();
 	usermodehelper_enable();
+	free_basic_memory_bitmaps();
  Finish:
 	pm_notifier_call_chain(PM_POST_RESTORE);
 	pm_restore_console();
-- 
cgit v1.2.3


From 1e73203cd1157a03facc41ffb54050f5b28e55bd Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 28 Mar 2012 23:30:21 +0200
Subject: PM / Sleep: Move disabling of usermode helpers to the freezer

The core suspend/hibernation code calls usermodehelper_disable() to
avoid race conditions between the freezer and the starting of
usermode helpers and each code path has to do that on its own.
However, it is always called right before freeze_processes()
and usermodehelper_enable() is always called right after
thaw_processes().  For this reason, to avoid code duplication and
to make the connection between usermodehelper_disable() and the
freezer more visible, make freeze_processes() call it and remove the
direct usermodehelper_disable() and usermodehelper_enable() calls
from all suspend/hibernation code paths.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: stable@vger.kernel.org
---
 kernel/power/hibernate.c | 11 -----------
 kernel/power/process.c   |  7 +++++++
 kernel/power/suspend.c   |  7 -------
 kernel/power/user.c      | 10 +---------
 4 files changed, 8 insertions(+), 27 deletions(-)

(limited to 'kernel')

diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 639ff6e4ae9e..e09dfbfeecee 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -16,7 +16,6 @@
 #include <linux/string.h>
 #include <linux/device.h>
 #include <linux/async.h>
-#include <linux/kmod.h>
 #include <linux/delay.h>
 #include <linux/fs.h>
 #include <linux/mount.h>
@@ -620,10 +619,6 @@ int hibernate(void)
 	sys_sync();
 	printk("done.\n");
 
-	error = usermodehelper_disable();
-	if (error)
-		goto Exit;
-
 	error = freeze_processes();
 	if (error)
 		goto Free_bitmaps;
@@ -660,7 +655,6 @@ int hibernate(void)
 	freezer_test_done = false;
 
  Free_bitmaps:
-	usermodehelper_enable();
 	free_basic_memory_bitmaps();
  Exit:
 	pm_notifier_call_chain(PM_POST_HIBERNATION);
@@ -780,10 +774,6 @@ static int software_resume(void)
 	if (error)
 		goto close_finish;
 
-	error = usermodehelper_disable();
-	if (error)
-		goto close_finish;
-
 	pr_debug("PM: Preparing processes for restore.\n");
 	error = freeze_processes();
 	if (error) {
@@ -802,7 +792,6 @@ static int software_resume(void)
 	swsusp_free();
 	thaw_processes();
  Done:
-	usermodehelper_enable();
 	free_basic_memory_bitmaps();
  Finish:
 	pm_notifier_call_chain(PM_POST_RESTORE);
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 0d2aeb226108..56eaac7e88ab 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -16,6 +16,7 @@
 #include <linux/freezer.h>
 #include <linux/delay.h>
 #include <linux/workqueue.h>
+#include <linux/kmod.h>
 
 /* 
  * Timeout for stopping processes
@@ -122,6 +123,10 @@ int freeze_processes(void)
 {
 	int error;
 
+	error = usermodehelper_disable();
+	if (error)
+		return error;
+
 	if (!pm_freezing)
 		atomic_inc(&system_freezing_cnt);
 
@@ -187,6 +192,8 @@ void thaw_processes(void)
 	} while_each_thread(g, p);
 	read_unlock(&tasklist_lock);
 
+	usermodehelper_enable();
+
 	schedule();
 	printk("done.\n");
 }
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 88e5c967370d..396d262b8fd0 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -12,7 +12,6 @@
 #include <linux/delay.h>
 #include <linux/errno.h>
 #include <linux/init.h>
-#include <linux/kmod.h>
 #include <linux/console.h>
 #include <linux/cpu.h>
 #include <linux/syscalls.h>
@@ -102,17 +101,12 @@ static int suspend_prepare(void)
 	if (error)
 		goto Finish;
 
-	error = usermodehelper_disable();
-	if (error)
-		goto Finish;
-
 	error = suspend_freeze_processes();
 	if (!error)
 		return 0;
 
 	suspend_stats.failed_freeze++;
 	dpm_save_failed_step(SUSPEND_FREEZE);
-	usermodehelper_enable();
  Finish:
 	pm_notifier_call_chain(PM_POST_SUSPEND);
 	pm_restore_console();
@@ -259,7 +253,6 @@ int suspend_devices_and_enter(suspend_state_t state)
 static void suspend_finish(void)
 {
 	suspend_thaw_processes();
-	usermodehelper_enable();
 	pm_notifier_call_chain(PM_POST_SUSPEND);
 	pm_restore_console();
 }
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 33c4329205af..91b0fd021a95 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -12,7 +12,6 @@
 #include <linux/suspend.h>
 #include <linux/syscalls.h>
 #include <linux/reboot.h>
-#include <linux/kmod.h>
 #include <linux/string.h>
 #include <linux/device.h>
 #include <linux/miscdevice.h>
@@ -222,14 +221,8 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
 		sys_sync();
 		printk("done.\n");
 
-		error = usermodehelper_disable();
-		if (error)
-			break;
-
 		error = freeze_processes();
-		if (error)
-			usermodehelper_enable();
-		else
+		if (!error)
 			data->frozen = 1;
 		break;
 
@@ -238,7 +231,6 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
 			break;
 		pm_restore_gfp_mask();
 		thaw_processes();
-		usermodehelper_enable();
 		data->frozen = 0;
 		break;
 
-- 
cgit v1.2.3


From 247bc03742545fec2f79939a3b9f738392a0f7b4 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 28 Mar 2012 23:30:28 +0200
Subject: PM / Sleep: Mitigate race between the freezer and request_firmware()

There is a race condition between the freezer and request_firmware()
such that if request_firmware() is run on one CPU and
freeze_processes() is run on another CPU and usermodehelper_disable()
called by it succeeds to grab umhelper_sem for writing before
usermodehelper_read_trylock() called from request_firmware()
acquires it for reading, the request_firmware() will fail and
trigger a WARN_ON() complaining that it was called at a wrong time.
However, in fact, it wasn't called at a wrong time and
freeze_processes() simply happened to be executed simultaneously.

To avoid this race, at least in some cases, modify
usermodehelper_read_trylock() so that it doesn't fail if the
freezing of tasks has just started and hasn't been completed yet.
Instead, during the freezing of tasks, it will try to freeze the
task that has called it so that it can wait until user space is
thawed without triggering the scary warning.

For this purpose, change usermodehelper_disabled so that it can
take three different values, UMH_ENABLED (0), UMH_FREEZING and
UMH_DISABLED.  The first one means that usermode helpers are
enabled, the last one means "hard disable" (i.e. the system is not
ready for usermode helpers to be used) and the second one
is reserved for the freezer.  Namely, when freeze_processes() is
started, it sets usermodehelper_disabled to UMH_FREEZING which
tells usermodehelper_read_trylock() that it shouldn't fail just
yet and should call try_to_freeze() if woken up and cannot
return immediately.  This way all freezable tasks that happen
to call request_firmware() right before freeze_processes() is
started and lose the race for umhelper_sem with it will be
frozen and will sleep until thaw_processes() unsets
usermodehelper_disabled.  [For the non-freezable callers of
request_firmware() the race for umhelper_sem against
freeze_processes() is unfortunately unavoidable.]

Reported-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: stable@vger.kernel.org
---
 kernel/kmod.c          | 47 +++++++++++++++++++++++++++++++++++++----------
 kernel/power/process.c |  3 ++-
 2 files changed, 39 insertions(+), 11 deletions(-)

(limited to 'kernel')

diff --git a/kernel/kmod.c b/kernel/kmod.c
index da7fcca279f9..05698a7415fe 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -322,7 +322,7 @@ static void __call_usermodehelper(struct work_struct *work)
  * land has been frozen during a system-wide hibernation or suspend operation).
  * Should always be manipulated under umhelper_sem acquired for write.
  */
-static int usermodehelper_disabled = 1;
+static enum umh_disable_depth usermodehelper_disabled = UMH_DISABLED;
 
 /* Number of helpers running */
 static atomic_t running_helpers = ATOMIC_INIT(0);
@@ -347,13 +347,30 @@ static DECLARE_WAIT_QUEUE_HEAD(usermodehelper_disabled_waitq);
 
 int usermodehelper_read_trylock(void)
 {
+	DEFINE_WAIT(wait);
 	int ret = 0;
 
 	down_read(&umhelper_sem);
-	if (usermodehelper_disabled) {
+	for (;;) {
+		prepare_to_wait(&usermodehelper_disabled_waitq, &wait,
+				TASK_INTERRUPTIBLE);
+		if (!usermodehelper_disabled)
+			break;
+
+		if (usermodehelper_disabled == UMH_DISABLED)
+			ret = -EAGAIN;
+
 		up_read(&umhelper_sem);
-		ret = -EAGAIN;
+
+		if (ret)
+			break;
+
+		schedule();
+		try_to_freeze();
+
+		down_read(&umhelper_sem);
 	}
+	finish_wait(&usermodehelper_disabled_waitq, &wait);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(usermodehelper_read_trylock);
@@ -392,25 +409,35 @@ void usermodehelper_read_unlock(void)
 EXPORT_SYMBOL_GPL(usermodehelper_read_unlock);
 
 /**
- * usermodehelper_enable - allow new helpers to be started again
+ * __usermodehelper_set_disable_depth - Modify usermodehelper_disabled.
+ * depth: New value to assign to usermodehelper_disabled.
+ *
+ * Change the value of usermodehelper_disabled (under umhelper_sem locked for
+ * writing) and wakeup tasks waiting for it to change.
  */
-void usermodehelper_enable(void)
+void __usermodehelper_set_disable_depth(enum umh_disable_depth depth)
 {
 	down_write(&umhelper_sem);
-	usermodehelper_disabled = 0;
+	usermodehelper_disabled = depth;
 	wake_up(&usermodehelper_disabled_waitq);
 	up_write(&umhelper_sem);
 }
 
 /**
- * usermodehelper_disable - prevent new helpers from being started
+ * __usermodehelper_disable - Prevent new helpers from being started.
+ * @depth: New value to assign to usermodehelper_disabled.
+ *
+ * Set usermodehelper_disabled to @depth and wait for running helpers to exit.
  */
-int usermodehelper_disable(void)
+int __usermodehelper_disable(enum umh_disable_depth depth)
 {
 	long retval;
 
+	if (!depth)
+		return -EINVAL;
+
 	down_write(&umhelper_sem);
-	usermodehelper_disabled = 1;
+	usermodehelper_disabled = depth;
 	up_write(&umhelper_sem);
 
 	/*
@@ -425,7 +452,7 @@ int usermodehelper_disable(void)
 	if (retval)
 		return 0;
 
-	usermodehelper_enable();
+	__usermodehelper_set_disable_depth(UMH_ENABLED);
 	return -EAGAIN;
 }
 
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 56eaac7e88ab..19db29f67558 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -123,7 +123,7 @@ int freeze_processes(void)
 {
 	int error;
 
-	error = usermodehelper_disable();
+	error = __usermodehelper_disable(UMH_FREEZING);
 	if (error)
 		return error;
 
@@ -135,6 +135,7 @@ int freeze_processes(void)
 	error = try_to_freeze_tasks(true);
 	if (!error) {
 		printk("done.");
+		__usermodehelper_set_disable_depth(UMH_DISABLED);
 		oom_killer_disable();
 	}
 	printk("\n");
-- 
cgit v1.2.3


From c4772d192c70b61d52262b0db76f7abd8aeb51c6 Mon Sep 17 00:00:00 2001
From: MyungJoo Ham <myungjoo.ham@samsung.com>
Date: Wed, 28 Mar 2012 23:31:24 +0200
Subject: PM / QoS: add pm_qos_update_request_timeout() API

The new API, pm_qos_update_request_timeout() is to provide a timeout
with pm_qos_update_request.

For example, pm_qos_update_request_timeout(req, 100, 1000), means that
QoS request on req with value 100 will be active for 1000 microseconds.
After 1000 microseconds, the QoS request thru req is reset. If there
were another pm_qos_update_request(req, x) during the 1000 us, this
new request with value x will override as this is another request on the
same req handle. A new request on the same req handle will always
override the previous request whether it is the conventional request or
it is the new timeout request.

Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Acked-by: Mark Gross <markgross@thegnar.org>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 kernel/power/qos.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

(limited to 'kernel')

diff --git a/kernel/power/qos.c b/kernel/power/qos.c
index d6d6dbd1ecc0..6a031e684026 100644
--- a/kernel/power/qos.c
+++ b/kernel/power/qos.c
@@ -229,6 +229,21 @@ int pm_qos_request_active(struct pm_qos_request *req)
 }
 EXPORT_SYMBOL_GPL(pm_qos_request_active);
 
+/**
+ * pm_qos_work_fn - the timeout handler of pm_qos_update_request_timeout
+ * @work: work struct for the delayed work (timeout)
+ *
+ * This cancels the timeout request by falling back to the default at timeout.
+ */
+static void pm_qos_work_fn(struct work_struct *work)
+{
+	struct pm_qos_request *req = container_of(to_delayed_work(work),
+						  struct pm_qos_request,
+						  work);
+
+	pm_qos_update_request(req, PM_QOS_DEFAULT_VALUE);
+}
+
 /**
  * pm_qos_add_request - inserts new qos request into the list
  * @req: pointer to a preallocated handle
@@ -253,6 +268,7 @@ void pm_qos_add_request(struct pm_qos_request *req,
 		return;
 	}
 	req->pm_qos_class = pm_qos_class;
+	INIT_DELAYED_WORK(&req->work, pm_qos_work_fn);
 	pm_qos_update_target(pm_qos_array[pm_qos_class]->constraints,
 			     &req->node, PM_QOS_ADD_REQ, value);
 }
@@ -279,6 +295,9 @@ void pm_qos_update_request(struct pm_qos_request *req,
 		return;
 	}
 
+	if (delayed_work_pending(&req->work))
+		cancel_delayed_work_sync(&req->work);
+
 	if (new_value != req->node.prio)
 		pm_qos_update_target(
 			pm_qos_array[req->pm_qos_class]->constraints,
@@ -286,6 +305,34 @@ void pm_qos_update_request(struct pm_qos_request *req,
 }
 EXPORT_SYMBOL_GPL(pm_qos_update_request);
 
+/**
+ * pm_qos_update_request_timeout - modifies an existing qos request temporarily.
+ * @req : handle to list element holding a pm_qos request to use
+ * @new_value: defines the temporal qos request
+ * @timeout_us: the effective duration of this qos request in usecs.
+ *
+ * After timeout_us, this qos request is cancelled automatically.
+ */
+void pm_qos_update_request_timeout(struct pm_qos_request *req, s32 new_value,
+				   unsigned long timeout_us)
+{
+	if (!req)
+		return;
+	if (WARN(!pm_qos_request_active(req),
+		 "%s called for unknown object.", __func__))
+		return;
+
+	if (delayed_work_pending(&req->work))
+		cancel_delayed_work_sync(&req->work);
+
+	if (new_value != req->node.prio)
+		pm_qos_update_target(
+			pm_qos_array[req->pm_qos_class]->constraints,
+			&req->node, PM_QOS_UPDATE_REQ, new_value);
+
+	schedule_delayed_work(&req->work, usecs_to_jiffies(timeout_us));
+}
+
 /**
  * pm_qos_remove_request - modifies an existing qos request
  * @req: handle to request list element
@@ -305,6 +352,9 @@ void pm_qos_remove_request(struct pm_qos_request *req)
 		return;
 	}
 
+	if (delayed_work_pending(&req->work))
+		cancel_delayed_work_sync(&req->work);
+
 	pm_qos_update_target(pm_qos_array[req->pm_qos_class]->constraints,
 			     &req->node, PM_QOS_REMOVE_REQ,
 			     PM_QOS_DEFAULT_VALUE);
-- 
cgit v1.2.3


From 5f054e31c63be774bf1ce252f20d56012a00f8a5 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 29 Mar 2012 15:38:31 +1030
Subject: documentation: remove references to cpu_*_map.

This has been obsolescent for a while, fix documentation and
misc comments.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 kernel/cpuset.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'kernel')

diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 1010cc61931f..eedeebe64b1a 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -270,11 +270,11 @@ static struct file_system_type cpuset_fs_type = {
  * are online.  If none are online, walk up the cpuset hierarchy
  * until we find one that does have some online cpus.  If we get
  * all the way to the top and still haven't found any online cpus,
- * return cpu_online_map.  Or if passed a NULL cs from an exit'ing
- * task, return cpu_online_map.
+ * return cpu_online_mask.  Or if passed a NULL cs from an exit'ing
+ * task, return cpu_online_mask.
  *
  * One way or another, we guarantee to return some non-empty subset
- * of cpu_online_map.
+ * of cpu_online_mask.
  *
  * Call with callback_mutex held.
  */
@@ -867,7 +867,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
 	int retval;
 	int is_load_balanced;
 
-	/* top_cpuset.cpus_allowed tracks cpu_online_map; it's read-only */
+	/* top_cpuset.cpus_allowed tracks cpu_online_mask; it's read-only */
 	if (cs == &top_cpuset)
 		return -EACCES;
 
@@ -2149,7 +2149,7 @@ void __init cpuset_init_smp(void)
  *
  * Description: Returns the cpumask_var_t cpus_allowed of the cpuset
  * attached to the specified @tsk.  Guaranteed to return some non-empty
- * subset of cpu_online_map, even if this means going outside the
+ * subset of cpu_online_mask, even if this means going outside the
  * tasks cpuset.
  **/
 
-- 
cgit v1.2.3


From 107f8bdac992356b3a80d41c9f6ff4399159aa81 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Wed, 28 Mar 2012 08:42:34 +0200
Subject: padata: Add a reference to the api documentation

Add a reference to the padata api documentation at Documentation/padata.txt

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 kernel/padata.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'kernel')

diff --git a/kernel/padata.c b/kernel/padata.c
index 6f10eb285ece..78750882b2ab 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -1,6 +1,8 @@
 /*
  * padata.c - generic interface to process data streams in parallel
  *
+ * See Documentation/padata.txt for an api documentation.
+ *
  * Copyright (C) 2008, 2009 secunet Security Networks AG
  * Copyright (C) 2008, 2009 Steffen Klassert <steffen.klassert@secunet.com>
  *
-- 
cgit v1.2.3


From 13614e0fb1a8840c134be35c179ff23e23676304 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Wed, 28 Mar 2012 08:43:21 +0200
Subject: padata: Use the online cpumask as the default

We use the active cpumask to determine the superset of cpus
to use for parallelization. However, the active cpumask is
for internal usage of the scheduler and therefore not the
appropriate cpumask for these purposes. So use the online
cpumask instead.

Reported-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 kernel/padata.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'kernel')

diff --git a/kernel/padata.c b/kernel/padata.c
index 78750882b2ab..de3d0d97800a 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -356,13 +356,13 @@ static int padata_setup_cpumasks(struct parallel_data *pd,
 	if (!alloc_cpumask_var(&pd->cpumask.pcpu, GFP_KERNEL))
 		return -ENOMEM;
 
-	cpumask_and(pd->cpumask.pcpu, pcpumask, cpu_active_mask);
+	cpumask_and(pd->cpumask.pcpu, pcpumask, cpu_online_mask);
 	if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL)) {
 		free_cpumask_var(pd->cpumask.cbcpu);
 		return -ENOMEM;
 	}
 
-	cpumask_and(pd->cpumask.cbcpu, cbcpumask, cpu_active_mask);
+	cpumask_and(pd->cpumask.cbcpu, cbcpumask, cpu_online_mask);
 	return 0;
 }
 
@@ -566,7 +566,7 @@ EXPORT_SYMBOL(padata_unregister_cpumask_notifier);
 static bool padata_validate_cpumask(struct padata_instance *pinst,
 				    const struct cpumask *cpumask)
 {
-	if (!cpumask_intersects(cpumask, cpu_active_mask)) {
+	if (!cpumask_intersects(cpumask, cpu_online_mask)) {
 		pinst->flags |= PADATA_INVALID;
 		return false;
 	}
@@ -680,7 +680,7 @@ static int __padata_add_cpu(struct padata_instance *pinst, int cpu)
 {
 	struct parallel_data *pd;
 
-	if (cpumask_test_cpu(cpu, cpu_active_mask)) {
+	if (cpumask_test_cpu(cpu, cpu_online_mask)) {
 		pd = padata_alloc_pd(pinst, pinst->cpumask.pcpu,
 				     pinst->cpumask.cbcpu);
 		if (!pd)
-- 
cgit v1.2.3


From 9612090527526a15832480c48b1f4b39e93e8a35 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Wed, 28 Mar 2012 08:44:07 +0200
Subject: padata: Fix cpu hotplug

We don't remove the cpu that went offline from our cpumasks
on cpu hotplug. This got lost somewhere along the line, so
restore it. This fixes a hang of the padata instance on cpu
hotplug.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 kernel/padata.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'kernel')

diff --git a/kernel/padata.c b/kernel/padata.c
index de3d0d97800a..89fe3d1b9efb 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -748,6 +748,9 @@ static int __padata_remove_cpu(struct padata_instance *pinst, int cpu)
 			return -ENOMEM;
 
 		padata_replace(pinst, pd);
+
+		cpumask_clear_cpu(cpu, pd->cpumask.cbcpu);
+		cpumask_clear_cpu(cpu, pd->cpumask.pcpu);
 	}
 
 	return 0;
-- 
cgit v1.2.3


From 78724b8ef83fc2bcfbc0a72a7ad8a3ce5ad25e6a Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 29 Mar 2012 06:17:17 -0500
Subject: kdb: Fix smatch warning on dbg_io_ops->is_console

The Smatch tool warned that the change from commit b8adde8dd
(kdb: Avoid using dbg_io_ops until it is initialized) should
add another null check later in the kdb_printf().

It is worth noting that the second use of dbg_io_ops->is_console
is protected by the KDB_PAGER state variable which would only
get set when kdb is fully active and initialized.  If we
ever encounter changes or defects in the KDB_PAGER state
we do not want to crash the kernel in a kdb_printf/printk.

CC: Tim Bird <tim.bird@am.sony.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
---
 kernel/debug/kdb/kdb_io.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index 9b5f17da1c56..bb9520f0f6ff 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -743,7 +743,7 @@ kdb_printit:
 		kdb_input_flush();
 		c = console_drivers;
 
-		if (!dbg_io_ops->is_console) {
+		if (dbg_io_ops && !dbg_io_ops->is_console) {
 			len = strlen(moreprompt);
 			cp = moreprompt;
 			while (len--) {
-- 
cgit v1.2.3


From 98b54aa1a2241b59372468bd1e9c2d207bdba54b Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Wed, 21 Mar 2012 10:17:03 -0500
Subject: kgdb,debug_core: pass the breakpoint struct instead of address and
 memory

There is extra state information that needs to be exposed in the
kgdb_bpt structure for tracking how a breakpoint was installed.  The
debug_core only uses the the probe_kernel_write() to install
breakpoints, but this is not enough for all the archs.  Some arch such
as x86 need to use text_poke() in order to install a breakpoint into a
read only page.

Passing the kgdb_bpt structure to kgdb_arch_set_breakpoint() and
kgdb_arch_remove_breakpoint() allows other archs to set the type
variable which indicates how the breakpoint was installed.

Cc: stable@vger.kernel.org # >= 2.6.36
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
---
 kernel/debug/debug_core.c | 53 +++++++++++++++++++++--------------------------
 1 file changed, 24 insertions(+), 29 deletions(-)

(limited to 'kernel')

diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 3f88a45e6f0a..a7e52ca94563 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -161,37 +161,39 @@ early_param("nokgdbroundup", opt_nokgdbroundup);
  * Weak aliases for breakpoint management,
  * can be overriden by architectures when needed:
  */
-int __weak kgdb_arch_set_breakpoint(unsigned long addr, char *saved_instr)
+int __weak kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
 {
 	int err;
 
-	err = probe_kernel_read(saved_instr, (char *)addr, BREAK_INSTR_SIZE);
+	err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr,
+				BREAK_INSTR_SIZE);
 	if (err)
 		return err;
-
-	return probe_kernel_write((char *)addr, arch_kgdb_ops.gdb_bpt_instr,
-				  BREAK_INSTR_SIZE);
+	err = probe_kernel_write((char *)bpt->bpt_addr,
+				 arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE);
+	return err;
 }
 
-int __weak kgdb_arch_remove_breakpoint(unsigned long addr, char *bundle)
+int __weak kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
 {
-	return probe_kernel_write((char *)addr,
-				  (char *)bundle, BREAK_INSTR_SIZE);
+	return probe_kernel_write((char *)bpt->bpt_addr,
+				  (char *)bpt->saved_instr, BREAK_INSTR_SIZE);
 }
 
 int __weak kgdb_validate_break_address(unsigned long addr)
 {
-	char tmp_variable[BREAK_INSTR_SIZE];
+	struct kgdb_bkpt tmp;
 	int err;
-	/* Validate setting the breakpoint and then removing it.  In the
+	/* Validate setting the breakpoint and then removing it.  If the
 	 * remove fails, the kernel needs to emit a bad message because we
 	 * are deep trouble not being able to put things back the way we
 	 * found them.
 	 */
-	err = kgdb_arch_set_breakpoint(addr, tmp_variable);
+	tmp.bpt_addr = addr;
+	err = kgdb_arch_set_breakpoint(&tmp);
 	if (err)
 		return err;
-	err = kgdb_arch_remove_breakpoint(addr, tmp_variable);
+	err = kgdb_arch_remove_breakpoint(&tmp);
 	if (err)
 		printk(KERN_ERR "KGDB: Critical breakpoint error, kernel "
 		   "memory destroyed at: %lx", addr);
@@ -235,7 +237,6 @@ static void kgdb_flush_swbreak_addr(unsigned long addr)
  */
 int dbg_activate_sw_breakpoints(void)
 {
-	unsigned long addr;
 	int error;
 	int ret = 0;
 	int i;
@@ -244,16 +245,15 @@ int dbg_activate_sw_breakpoints(void)
 		if (kgdb_break[i].state != BP_SET)
 			continue;
 
-		addr = kgdb_break[i].bpt_addr;
-		error = kgdb_arch_set_breakpoint(addr,
-				kgdb_break[i].saved_instr);
+		error = kgdb_arch_set_breakpoint(&kgdb_break[i]);
 		if (error) {
 			ret = error;
-			printk(KERN_INFO "KGDB: BP install failed: %lx", addr);
+			printk(KERN_INFO "KGDB: BP install failed: %lx",
+			       kgdb_break[i].bpt_addr);
 			continue;
 		}
 
-		kgdb_flush_swbreak_addr(addr);
+		kgdb_flush_swbreak_addr(kgdb_break[i].bpt_addr);
 		kgdb_break[i].state = BP_ACTIVE;
 	}
 	return ret;
@@ -302,7 +302,6 @@ int dbg_set_sw_break(unsigned long addr)
 
 int dbg_deactivate_sw_breakpoints(void)
 {
-	unsigned long addr;
 	int error;
 	int ret = 0;
 	int i;
@@ -310,15 +309,14 @@ int dbg_deactivate_sw_breakpoints(void)
 	for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
 		if (kgdb_break[i].state != BP_ACTIVE)
 			continue;
-		addr = kgdb_break[i].bpt_addr;
-		error = kgdb_arch_remove_breakpoint(addr,
-					kgdb_break[i].saved_instr);
+		error = kgdb_arch_remove_breakpoint(&kgdb_break[i]);
 		if (error) {
-			printk(KERN_INFO "KGDB: BP remove failed: %lx\n", addr);
+			printk(KERN_INFO "KGDB: BP remove failed: %lx\n",
+			       kgdb_break[i].bpt_addr);
 			ret = error;
 		}
 
-		kgdb_flush_swbreak_addr(addr);
+		kgdb_flush_swbreak_addr(kgdb_break[i].bpt_addr);
 		kgdb_break[i].state = BP_SET;
 	}
 	return ret;
@@ -352,7 +350,6 @@ int kgdb_isremovedbreak(unsigned long addr)
 
 int dbg_remove_all_break(void)
 {
-	unsigned long addr;
 	int error;
 	int i;
 
@@ -360,12 +357,10 @@ int dbg_remove_all_break(void)
 	for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
 		if (kgdb_break[i].state != BP_ACTIVE)
 			goto setundefined;
-		addr = kgdb_break[i].bpt_addr;
-		error = kgdb_arch_remove_breakpoint(addr,
-				kgdb_break[i].saved_instr);
+		error = kgdb_arch_remove_breakpoint(&kgdb_break[i]);
 		if (error)
 			printk(KERN_ERR "KGDB: breakpoint remove failed: %lx\n",
-			   addr);
+			       kgdb_break[i].bpt_addr);
 setundefined:
 		kgdb_break[i].state = BP_UNDEFINED;
 	}
-- 
cgit v1.2.3


From aa2bf9bc6414b6972b9e51903c1ce7b1f057aee2 Mon Sep 17 00:00:00 2001
From: Sasikantha babu <sasikanth.v19@gmail.com>
Date: Wed, 21 Mar 2012 20:10:54 +0530
Subject: itimer: Schedule silent NULL pointer fixup in setitimer() for removal

setitimer() should return -EFAULT if called with an invalid pointer
for value. The current code excludes a NULL pointer from this rule and
silently uses it to stop the timer. This violates the spec.

Warn about user space apps which rely on that feature and schedule it
for removal.

[ tglx: Massaged changelog, warn message and Doc entry ]

Signed-off-by: Sasikantha babu <sasikanth.v19@gmail.com>
Link: http://lkml.kernel.org/r/1332340854-26053-1-git-send-email-sasikanth.v19@gmail.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/itimer.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/itimer.c b/kernel/itimer.c
index 22000c3db0dd..c70369a74b5a 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -284,8 +284,11 @@ SYSCALL_DEFINE3(setitimer, int, which, struct itimerval __user *, value,
 	if (value) {
 		if(copy_from_user(&set_buffer, value, sizeof(set_buffer)))
 			return -EFAULT;
-	} else
+	} else {
 		memset((char *) &set_buffer, 0, sizeof(set_buffer));
+		WARN_ONCE(1, "setitimer: new_value pointer is NULL."
+			  " Misfeature support will be removed\n");
+	}
 
 	error = do_setitimer(which, &set_buffer, ovalue ? &get_buffer : NULL);
 	if (error || !ovalue)
-- 
cgit v1.2.3


From 3872c48b14259d8c0a00c9fff06a4a4123f7f4eb Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 31 Mar 2012 12:45:43 +0200
Subject: tick: Document TICK_ONESHOT config option

This option has been selected from arch code as it was assumed that
it's necessary to support oneshot mode clockevent devices. But it's
just a core internal helper to compile tick-oneshot.c if NOHZ or
HIG_RES_TIMERS are selected.

Reported-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/Kconfig | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'kernel')

diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index 2cf9cc7aa103..a20dc8a3c949 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -1,6 +1,10 @@
 #
 # Timer subsystem related configuration options
 #
+
+# Core internal switch. Selected by NO_HZ / HIGH_RES_TIMERS. This is
+# only related to the tick functionality. Oneshot clockevent devices
+# are supported independ of this.
 config TICK_ONESHOT
 	bool
 
-- 
cgit v1.2.3


From 83e3fa6f0193299f8b7180db588edd5ca61a3b82 Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Sun, 1 Apr 2012 16:38:37 -0400
Subject: irq_work: fix compile failure on MIPS from system.h split

Builds of the MIPS platform ip32_defconfig fails as of commit
0195c00244dc ("Merge tag 'split-asm_system_h ...") because MIPS xchg()
macro uses BUILD_BUG_ON and it was moved in commit b81947c646bf
("Disintegrate asm/system.h for MIPS").

The root cause is that the system.h split wasn't tested on a baseline
with commit 6c03438edeb5 ("kernel.h: doesn't explicitly use bug.h, so
don't include it.")

Since this file uses BUG code in several other places besides the xchg
call, simply make the inclusion explicit.

Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Acked-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/irq_work.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'kernel')

diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index c3c46c72046e..0c56d44b9fd5 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -5,6 +5,7 @@
  * context. The enqueueing is NMI-safe.
  */
 
+#include <linux/bug.h>
 #include <linux/kernel.h>
 #include <linux/export.h>
 #include <linux/irq_work.h>
-- 
cgit v1.2.3


From 620f6e8e855d6d447688a5f67a4e176944a084e8 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 4 Apr 2012 11:40:19 -0700
Subject: sysctl: fix write access to dmesg_restrict/kptr_restrict

Commit bfdc0b4 adds code to restrict access to dmesg_restrict,
however, it incorrectly alters kptr_restrict rather than
dmesg_restrict.

The original patch from Richard Weinberger
(https://lkml.org/lkml/2011/3/14/362) alters dmesg_restrict as
expected, and so the patch seems to have been misapplied.

This adds the CAP_SYS_ADMIN check to both dmesg_restrict and
kptr_restrict, since both are sensitive.

Reported-by: Phillip Lougher <plougher@redhat.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Acked-by: Richard Weinberger <richard@nod.at>
Cc: stable@vger.kernel.org
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 kernel/sysctl.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 52b3a06a02f8..4ab11879aeb4 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -170,7 +170,7 @@ static int proc_taint(struct ctl_table *table, int write,
 #endif
 
 #ifdef CONFIG_PRINTK
-static int proc_dmesg_restrict(struct ctl_table *table, int write,
+static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
 				void __user *buffer, size_t *lenp, loff_t *ppos);
 #endif
 
@@ -703,7 +703,7 @@ static struct ctl_table kern_table[] = {
 		.data		= &dmesg_restrict,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
+		.proc_handler	= proc_dointvec_minmax_sysadmin,
 		.extra1		= &zero,
 		.extra2		= &one,
 	},
@@ -712,7 +712,7 @@ static struct ctl_table kern_table[] = {
 		.data		= &kptr_restrict,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dmesg_restrict,
+		.proc_handler	= proc_dointvec_minmax_sysadmin,
 		.extra1		= &zero,
 		.extra2		= &two,
 	},
@@ -1943,7 +1943,7 @@ static int proc_taint(struct ctl_table *table, int write,
 }
 
 #ifdef CONFIG_PRINTK
-static int proc_dmesg_restrict(struct ctl_table *table, int write,
+static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
 				void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	if (write && !capable(CAP_SYS_ADMIN))
-- 
cgit v1.2.3


From 234e340582901211f40d8c732afc49f0630ecf05 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Thu, 5 Apr 2012 14:25:11 -0700
Subject: simple_open: automatically convert to simple_open()

Many users of debugfs copy the implementation of default_open() when
they want to support a custom read/write function op.  This leads to a
proliferation of the default_open() implementation across the entire
tree.

Now that the common implementation has been consolidated into libfs we
can replace all the users of this function with simple_open().

This replacement was done with the following semantic patch:

<smpl>
@ open @
identifier open_f != simple_open;
identifier i, f;
@@
-int open_f(struct inode *i, struct file *f)
-{
(
-if (i->i_private)
-f->private_data = i->i_private;
|
-f->private_data = i->i_private;
)
-return 0;
-}

@ has_open depends on open @
identifier fops;
identifier open.open_f;
@@
struct file_operations fops = {
...
-.open = open_f,
+.open = simple_open,
...
};
</smpl>

[akpm@linux-foundation.org: checkpatch fixes]
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Julia Lawall <Julia.Lawall@lip6.fr>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/trace/blktrace.c | 18 ++----------------
 1 file changed, 2 insertions(+), 16 deletions(-)

(limited to 'kernel')

diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index cdea7b56b0c9..c0bd0308741c 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -311,13 +311,6 @@ int blk_trace_remove(struct request_queue *q)
 }
 EXPORT_SYMBOL_GPL(blk_trace_remove);
 
-static int blk_dropped_open(struct inode *inode, struct file *filp)
-{
-	filp->private_data = inode->i_private;
-
-	return 0;
-}
-
 static ssize_t blk_dropped_read(struct file *filp, char __user *buffer,
 				size_t count, loff_t *ppos)
 {
@@ -331,18 +324,11 @@ static ssize_t blk_dropped_read(struct file *filp, char __user *buffer,
 
 static const struct file_operations blk_dropped_fops = {
 	.owner =	THIS_MODULE,
-	.open =		blk_dropped_open,
+	.open =		simple_open,
 	.read =		blk_dropped_read,
 	.llseek =	default_llseek,
 };
 
-static int blk_msg_open(struct inode *inode, struct file *filp)
-{
-	filp->private_data = inode->i_private;
-
-	return 0;
-}
-
 static ssize_t blk_msg_write(struct file *filp, const char __user *buffer,
 				size_t count, loff_t *ppos)
 {
@@ -371,7 +357,7 @@ static ssize_t blk_msg_write(struct file *filp, const char __user *buffer,
 
 static const struct file_operations blk_msg_fops = {
 	.owner =	THIS_MODULE,
-	.open =		blk_msg_open,
+	.open =		simple_open,
 	.write =	blk_msg_write,
 	.llseek =	noop_llseek,
 };
-- 
cgit v1.2.3


From 6f103929f8979d2638e58d7f7fda0beefcb8ee7e Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Tue, 27 Mar 2012 15:09:37 -0400
Subject: nohz: Fix stale jiffies update in tick_nohz_restart()

Fix tick_nohz_restart() to not use a stale ktime_t "now" value when
calling tick_do_update_jiffies64(now).

If we reach this point in the loop it means that we crossed a tick
boundary since we grabbed the "now" timestamp, so at this point "now"
refers to a time in the old jiffy, so using the old value for "now" is
incorrect, and is likely to give us a stale jiffies value.

In particular, the first time through the loop the
tick_do_update_jiffies64(now) call is always a no-op, since the
caller, tick_nohz_restart_sched_tick(), will have already called
tick_do_update_jiffies64(now) with that "now" value.

Note that tick_nohz_stop_sched_tick() already uses the correct
approach: when we notice we cross a jiffy boundary, grab a new
timestamp with ktime_get(), and *then* update jiffies.

Signed-off-by: Neal Cardwell <ncardwell@google.com>
Cc: Ben Segall <bsegall@google.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/1332875377-23014-1-git-send-email-ncardwell@google.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/tick-sched.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 3526038f2836..6a3a5b9ff561 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -534,9 +534,9 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
 				hrtimer_get_expires(&ts->sched_timer), 0))
 				break;
 		}
-		/* Update jiffies and reread time */
-		tick_do_update_jiffies64(now);
+		/* Reread time and update jiffies */
 		now = ktime_get();
+		tick_do_update_jiffies64(now);
 	}
 }
 
-- 
cgit v1.2.3


From 9886f444129171569461d8c39983e16f4871e3b4 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 10 Apr 2012 10:50:55 +0200
Subject: itimer: Use printk_once instead of WARN_ONCE

David pointed out, that WARN_ONCE() to report usage of an deprecated
misfeature make folks unhappy. Use printk_once() instead.

Andrew told me to stop grumbling and to remove the silly typecast
while touching the file.

Reported-by: David Rientjes <rientjes@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/itimer.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/itimer.c b/kernel/itimer.c
index c70369a74b5a..8d262b467573 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -285,9 +285,10 @@ SYSCALL_DEFINE3(setitimer, int, which, struct itimerval __user *, value,
 		if(copy_from_user(&set_buffer, value, sizeof(set_buffer)))
 			return -EFAULT;
 	} else {
-		memset((char *) &set_buffer, 0, sizeof(set_buffer));
-		WARN_ONCE(1, "setitimer: new_value pointer is NULL."
-			  " Misfeature support will be removed\n");
+		memset(&set_buffer, 0, sizeof(set_buffer));
+		printk_once(KERN_WARNING "%s calls setitimer() with new_value NULL pointer."
+			    " Misfeature support will be removed\n",
+			    current->comm);
 	}
 
 	error = do_setitimer(which, &set_buffer, ovalue ? &get_buffer : NULL);
-- 
cgit v1.2.3


From fa4da365bc7772c2cd6d5405bdf151612455f957 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Mon, 9 Apr 2012 15:41:44 -0700
Subject: clockevents: tTack broadcast device mode change in
 tick_broadcast_switch_to_oneshot()

In the commit 77b0d60c5adf39c74039e2142a1d3cd1e4d53799,
"clockevents: Leave the broadcast device in shutdown mode when not needed",
we were bailing out too quickly in tick_broadcast_switch_to_oneshot(),
with out tracking the broadcast device mode change to 'TICKDEV_MODE_ONESHOT'.

This breaks the platforms which need broadcast device oneshot services during
deep idle states. tick_broadcast_oneshot_control() thinks that it is
in periodic mode and fails to take proper decisions based on the
CLOCK_EVT_NOTIFY_BROADCAST_[ENTER, EXIT] notifications during deep
idle entry/exit.

Fix this by tracking the broadcast device mode as 'TICKDEV_MODE_ONESHOT',
before leaving the broadcast HW device in shutdown mode if there are no active
requests for the moment.

Reported-and-tested-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: johnstul@us.ibm.com
Link: http://lkml.kernel.org/r/1334011304.12400.81.camel@sbsiddha-desk.sc.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/tick-broadcast.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index e883f57a3cd3..bf57abdc7bd0 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -575,10 +575,12 @@ void tick_broadcast_switch_to_oneshot(void)
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
+
+	tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT;
+
 	if (cpumask_empty(tick_get_broadcast_mask()))
 		goto end;
 
-	tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT;
 	bc = tick_broadcast_device.evtdev;
 	if (bc)
 		tick_broadcast_setup_oneshot(bc);
-- 
cgit v1.2.3


From 5b7526e3a640e491075557acaa842c59c652c0c3 Mon Sep 17 00:00:00 2001
From: David Daney <david.daney@cavium.com>
Date: Thu, 5 Apr 2012 16:52:13 -0700
Subject: irq/irq_domain: Quit ignoring error returns from
 irq_alloc_desc_from().

In commit 4bbdd45a (irq_domain/powerpc: eliminate irq_map; use
irq_alloc_desc() instead) code was added that ignores error returns
from irq_alloc_desc_from() by (silently) casting the return value to
unsigned.  The negitive value error return now suddenly looks like a
valid irq number.

Commits cc79ca69 (irq_domain: Move irq_domain code from powerpc to
kernel/irq) and 1bc04f2c (irq_domain: Add support for base irq and
hwirq in legacy mappings) move this code to its current location in
irqdomain.c

The result of all of this is a null pointer dereference OOPS if one of
the error cases is hit.

The fix: Don't cast away the negativeness of the return value and then
check for errors.

Signed-off-by: David Daney <david.daney@cavium.com>
Acked-by: Rob Herring <rob.herring@calxeda.com>
[grant.likely: dropped addition of new 'irq' variable]
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 kernel/irq/irqdomain.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 3601f3fbf67c..9310a8d365b0 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -350,7 +350,8 @@ unsigned int irq_create_direct_mapping(struct irq_domain *domain)
 unsigned int irq_create_mapping(struct irq_domain *domain,
 				irq_hw_number_t hwirq)
 {
-	unsigned int virq, hint;
+	unsigned int hint;
+	int virq;
 
 	pr_debug("irq: irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq);
 
@@ -381,9 +382,9 @@ unsigned int irq_create_mapping(struct irq_domain *domain,
 	if (hint == 0)
 		hint++;
 	virq = irq_alloc_desc_from(hint, 0);
-	if (!virq)
+	if (virq <= 0)
 		virq = irq_alloc_desc_from(1, 0);
-	if (!virq) {
+	if (virq <= 0) {
 		pr_debug("irq: -> virq allocation failed\n");
 		return 0;
 	}
-- 
cgit v1.2.3


From ac5830a33f5b25eae1dc0708b3e7a3d270a6c07f Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Tue, 10 Apr 2012 15:25:42 +0300
Subject: irq_domain: correct the debugfs file name

The actual name of the irq_domain mapping debugfs file is
"irq_domain_mapping" not "virq_mapping".

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 kernel/irq/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index cf1a4a68ce44..d1a758bc972a 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -62,7 +62,7 @@ config IRQ_DOMAIN_DEBUG
 	help
 	  This option will show the mapping relationship between hardware irq
 	  numbers and Linux irq numbers. The mapping is exposed via debugfs
-	  in the file "virq_mapping".
+	  in the file "irq_domain_mapping".
 
 	  If you don't know what this means you don't need it.
 
-- 
cgit v1.2.3


From 15e06bf64f686befd2030da867a3dad965b96cc0 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Wed, 11 Apr 2012 00:26:25 -0600
Subject: irqdomain: Fix debugfs formatting

This patch fixes the irq_domain_mapping debugfs output to pad pointer
values with leading zeros so that pointer values are displayed
correctly.  Otherwise you get output similar to "0x 5e0000000000000".
Also, when the irq_domain is set to 'null'

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Cc: David Daney <david.daney@cavium.com>
Cc: Mika Westerberg <mika.westerberg@linux.intel.com>
---
 kernel/irq/irqdomain.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 9310a8d365b0..eb05e40f4553 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -643,8 +643,8 @@ static int virq_debug_show(struct seq_file *m, void *private)
 	void *data;
 	int i;
 
-	seq_printf(m, "%-5s  %-7s  %-15s  %-18s  %s\n", "virq", "hwirq",
-		      "chip name", "chip data", "domain name");
+	seq_printf(m, "%-5s  %-7s  %-15s  %-*s  %s\n", "irq", "hwirq",
+		      "chip name", 2 * sizeof(void *) + 2, "chip data", "domain name");
 
 	for (i = 1; i < nr_irqs; i++) {
 		desc = irq_to_desc(i);
@@ -667,7 +667,7 @@ static int virq_debug_show(struct seq_file *m, void *private)
 			seq_printf(m, "%-15s  ", p);
 
 			data = irq_desc_get_chip_data(desc);
-			seq_printf(m, "0x%16p  ", data);
+			seq_printf(m, data ? "0x%p  " : "  %p  ", data);
 
 			if (desc->irq_data.domain && desc->irq_data.domain->of_node)
 				p = desc->irq_data.domain->of_node->full_name;
-- 
cgit v1.2.3


From 79549c6dfda0603dba9a70a53467ce62d9335c33 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 9 Apr 2012 21:03:50 +0200
Subject: cred: copy_process() should clear child->replacement_session_keyring

keyctl_session_to_parent(task) sets ->replacement_session_keyring,
it should be processed and cleared by key_replace_session_keyring().

However, this task can fork before it notices TIF_NOTIFY_RESUME and
the new child gets the bogus ->replacement_session_keyring copied by
dup_task_struct(). This is obviously wrong and, if nothing else, this
leads to put_cred(already_freed_cred).

change copy_creds() to clear this member. If copy_process() fails
before this point the wrong ->replacement_session_keyring doesn't
matter, exit_creds() won't be called.

Cc: <stable@vger.kernel.org>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/cred.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'kernel')

diff --git a/kernel/cred.c b/kernel/cred.c
index 97b36eeca4c9..e70683d9ec32 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -386,6 +386,8 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
 	struct cred *new;
 	int ret;
 
+	p->replacement_session_keyring = NULL;
+
 	if (
 #ifdef CONFIG_KEYS
 		!p->cred->thread_keyring &&
-- 
cgit v1.2.3


From 6fa6c8e25e95bdc73e92e4c96b8e3299169b616e Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Wed, 15 Feb 2012 15:06:08 -0700
Subject: irq_domain: Move irq_virq_count into NOMAP revmap

This patch replaces the old global setting of irq_virq_count that is only
used by the NOMAP mapping and instead uses a revmap_data property so that
the maximum NOMAP allocation can be set per NOMAP irq_domain.

There is exactly one user of irq_virq_count in-tree right now: PS3.
Also, irq_virq_count is only useful for the NOMAP mapping.  So,
instead of having a single global irq_virq_count values, this change
drops it entirely and added a max_irq argument to irq_domain_add_nomap().
That makes it a property of an individual nomap irq domain instead of
a global system settting.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Tested-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Milton Miller <miltonm@bga.com>
---
 kernel/irq/irqdomain.c | 33 +++++++++------------------------
 1 file changed, 9 insertions(+), 24 deletions(-)

(limited to 'kernel')

diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index eb05e40f4553..d34413e78628 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -23,7 +23,6 @@ static LIST_HEAD(irq_domain_list);
 static DEFINE_MUTEX(irq_domain_mutex);
 
 static DEFINE_MUTEX(revmap_trees_mutex);
-static unsigned int irq_virq_count = NR_IRQS;
 static struct irq_domain *irq_default_domain;
 
 /**
@@ -184,13 +183,16 @@ struct irq_domain *irq_domain_add_linear(struct device_node *of_node,
 }
 
 struct irq_domain *irq_domain_add_nomap(struct device_node *of_node,
+					 unsigned int max_irq,
 					 const struct irq_domain_ops *ops,
 					 void *host_data)
 {
 	struct irq_domain *domain = irq_domain_alloc(of_node,
 					IRQ_DOMAIN_MAP_NOMAP, ops, host_data);
-	if (domain)
+	if (domain) {
+		domain->revmap_data.nomap.max_irq = max_irq ? max_irq : ~0;
 		irq_domain_add(domain);
+	}
 	return domain;
 }
 
@@ -262,22 +264,6 @@ void irq_set_default_host(struct irq_domain *domain)
 	irq_default_domain = domain;
 }
 
-/**
- * irq_set_virq_count() - Set the maximum number of linux irqs
- * @count: number of linux irqs, capped with NR_IRQS
- *
- * This is mainly for use by platforms like iSeries who want to program
- * the virtual irq number in the controller to avoid the reverse mapping
- */
-void irq_set_virq_count(unsigned int count)
-{
-	pr_debug("irq: Trying to set virq count to %d\n", count);
-
-	BUG_ON(count < NUM_ISA_INTERRUPTS);
-	if (count < NR_IRQS)
-		irq_virq_count = count;
-}
-
 static int irq_setup_virq(struct irq_domain *domain, unsigned int virq,
 			    irq_hw_number_t hwirq)
 {
@@ -320,13 +306,12 @@ unsigned int irq_create_direct_mapping(struct irq_domain *domain)
 		pr_debug("irq: create_direct virq allocation failed\n");
 		return 0;
 	}
-	if (virq >= irq_virq_count) {
+	if (virq >= domain->revmap_data.nomap.max_irq) {
 		pr_err("ERROR: no free irqs available below %i maximum\n",
-			irq_virq_count);
+			domain->revmap_data.nomap.max_irq);
 		irq_free_desc(virq);
 		return 0;
 	}
-
 	pr_debug("irq: create_direct obtained virq %d\n", virq);
 
 	if (irq_setup_virq(domain, virq, virq)) {
@@ -378,7 +363,7 @@ unsigned int irq_create_mapping(struct irq_domain *domain,
 		return irq_domain_legacy_revmap(domain, hwirq);
 
 	/* Allocate a virtual interrupt number */
-	hint = hwirq % irq_virq_count;
+	hint = hwirq % nr_irqs;
 	if (hint == 0)
 		hint++;
 	virq = irq_alloc_desc_from(hint, 0);
@@ -516,7 +501,7 @@ unsigned int irq_find_mapping(struct irq_domain *domain,
 			      irq_hw_number_t hwirq)
 {
 	unsigned int i;
-	unsigned int hint = hwirq % irq_virq_count;
+	unsigned int hint = hwirq % nr_irqs;
 
 	/* Look for default domain if nececssary */
 	if (domain == NULL)
@@ -537,7 +522,7 @@ unsigned int irq_find_mapping(struct irq_domain *domain,
 		if (data && (data->domain == domain) && (data->hwirq == hwirq))
 			return i;
 		i++;
-		if (i >= irq_virq_count)
+		if (i >= nr_irqs)
 			i = 1;
 	} while(i != hint);
 	return 0;
-- 
cgit v1.2.3


From 026ee1f66aaa7f01b617a0ba89ac4b531f9603f1 Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 12 Apr 2012 12:49:17 -0700
Subject: panic: fix stack dump print on direct call to panic()

Commit 6e6f0a1f0fa6 ("panic: don't print redundant backtraces on oops")
causes a regression where no stack trace will be printed at all for the
case where kernel code calls panic() directly while not processing an
oops, and of course there are 100's of instances of this type of call.

The original commit executed the check (!oops_in_progress), but this will
always be false because just before the dump_stack() there is a call to
bust_spinlocks(1), which does the following:

  void __attribute__((weak)) bust_spinlocks(int yes)
  {
	if (yes) {
		++oops_in_progress;

The proper way to resolve the problem that original commit tried to
solve is to avoid printing a stack dump from panic() when the either of
the following conditions is true:

  1) TAINT_DIE has been set (this is done by oops_end())
     This indicates and oops has already been printed.
  2) oops_in_progress > 1
     This guards against the rare case where panic() is invoked
     a second time, or in between oops_begin() and oops_end()

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: <stable@vger.kernel.org>	[3.3+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/panic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/panic.c b/kernel/panic.c
index 80aed44e345a..8ed89a175d79 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -97,7 +97,7 @@ void panic(const char *fmt, ...)
 	/*
 	 * Avoid nested stack-dumping if a panic occurs during oops processing
 	 */
-	if (!oops_in_progress)
+	if (!test_taint(TAINT_DIE) && oops_in_progress <= 1)
 		dump_stack();
 #endif
 
-- 
cgit v1.2.3


From 5269a9ab7def9a3116663347d59c4d70afa2d180 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Thu, 12 Apr 2012 14:42:15 -0600
Subject: irq_domain: fix type mismatch in debugfs output format

sizeof(void*) returns an unsigned long, but it was being used as a width parameter to a "%-*s" format string which requires an int.  On 64 bit platforms this causes a type mismatch:

    linux/kernel/irq/irqdomain.c:575: warning: field width should have type
    'int', but argument 6 has type 'long unsigned int'

This change casts the size to an int so printf gets the right data type.

Reported-by: Andreas Schwab <schwab@linux-m68k.org>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Cc: David Daney <david.daney@cavium.com>
---
 kernel/irq/irqdomain.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index d34413e78628..0e0ba5f840b2 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -629,7 +629,8 @@ static int virq_debug_show(struct seq_file *m, void *private)
 	int i;
 
 	seq_printf(m, "%-5s  %-7s  %-15s  %-*s  %s\n", "irq", "hwirq",
-		      "chip name", 2 * sizeof(void *) + 2, "chip data", "domain name");
+		      "chip name", (int)(2 * sizeof(void *) + 2), "chip data",
+		      "domain name");
 
 	for (i = 1; i < nr_irqs; i++) {
 		desc = irq_to_desc(i);
-- 
cgit v1.2.3


From ef1f0982540e5f79c8bbf3675bbc0a9734dba3fc Mon Sep 17 00:00:00 2001
From: Chris Metcalf <cmetcalf@tilera.com>
Date: Wed, 11 Apr 2012 12:21:39 -0400
Subject: irq_work: fix compile failure on tile from missing include
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Building with IRQ_WORK configured results in

kernel/irq_work.c: In function ‘irq_work_run’:
kernel/irq_work.c:110: error: implicit declaration of function ‘irqs_disabled’

The appropriate header just needs to be included.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 kernel/irq_work.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'kernel')

diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index 0c56d44b9fd5..1588e3b2871b 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -11,6 +11,7 @@
 #include <linux/irq_work.h>
 #include <linux/percpu.h>
 #include <linux/hardirq.h>
+#include <linux/irqflags.h>
 #include <asm/processor.h>
 
 /*
-- 
cgit v1.2.3


From 6e48b550d1f5f1919e6500547ae14a73fbf66c7b Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Fri, 13 Apr 2012 09:52:59 +0100
Subject: tracing: Fix build breakage without CONFIG_PERF_EVENTS (again)

Today's -next fails to link for me:

kernel/built-in.o:(.data+0x178e50): undefined reference to `perf_ftrace_event_register'

It looks like multiple fixes have been merged for the issue fixed by
commit fa73dc9 (tracing: Fix build breakage without CONFIG_PERF_EVENTS)
though I can't identify the other changes that have gone in at the
minute, it's possible that the changes which caused the breakage fixed
by the previous commit got dropped but the fix made it in.

Link: http://lkml.kernel.org/r/1334307179-21255-1-git-send-email-broonie@opensource.wolfsonmicro.com

Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 95059f091a24..f95d65da6db8 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -836,11 +836,11 @@ extern const char *__stop___trace_bprintk_fmt[];
 		     filter)
 #include "trace_entries.h"
 
-#ifdef CONFIG_FUNCTION_TRACER
+#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_FUNCTION_TRACER)
 int perf_ftrace_event_register(struct ftrace_event_call *call,
 			       enum trace_reg type, void *data);
 #else
 #define perf_ftrace_event_register NULL
-#endif /* CONFIG_FUNCTION_TRACER */
+#endif
 
 #endif /* _LINUX_KERNEL_TRACE_H */
-- 
cgit v1.2.3


From 348f0fc238efb441a28e7644c51f9fd3001b228a Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 16 Apr 2012 15:41:28 -0400
Subject: tracing: Fix regression with tracing_on

The change to make tracing_on affect only the ftrace ring buffer, caused
a bug where it wont affect any ring buffer. The problem was that the buffer
of the trace_array was passed to the write function and not the trace array
itself.

The trace_array can change the buffer when running a latency tracer. If this
happens, then the buffer being disabled may not be the buffer currently used
by ftrace. This will cause the tracing_on file to become useless.

The simple fix is to pass the trace_array to the write function instead of
the buffer. Then the actual buffer may be changed.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ed7b5d1e12f4..2a22255c1010 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4629,7 +4629,8 @@ static ssize_t
 rb_simple_read(struct file *filp, char __user *ubuf,
 	       size_t cnt, loff_t *ppos)
 {
-	struct ring_buffer *buffer = filp->private_data;
+	struct trace_array *tr = filp->private_data;
+	struct ring_buffer *buffer = tr->buffer;
 	char buf[64];
 	int r;
 
@@ -4647,7 +4648,8 @@ static ssize_t
 rb_simple_write(struct file *filp, const char __user *ubuf,
 		size_t cnt, loff_t *ppos)
 {
-	struct ring_buffer *buffer = filp->private_data;
+	struct trace_array *tr = filp->private_data;
+	struct ring_buffer *buffer = tr->buffer;
 	unsigned long val;
 	int ret;
 
@@ -4734,7 +4736,7 @@ static __init int tracer_init_debugfs(void)
 			  &trace_clock_fops);
 
 	trace_create_file("tracing_on", 0644, d_tracer,
-			    global_trace.buffer, &rb_simple_fops);
+			    &global_trace, &rb_simple_fops);
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
-- 
cgit v1.2.3


From 92c38702e98e58438c3760ebb279c40bbca8bd5f Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Mon, 16 Apr 2012 12:12:09 -0700
Subject: rcu: Permit call_rcu() from CPU_DYING notifiers

As of:

  29494be71afe ("rcu,cleanup: simplify the code when cpu is dying")

RCU adopts callbacks from the dying CPU in its CPU_DYING notifier,
which means that any callbacks posted by later CPU_DYING notifiers
are ignored until the CPU comes back online.

A WARN_ON_ONCE() was added to __call_rcu() by:

  e56014000816 ("rcu: Simplify offline processing")

to check for this condition.  Although this condition did not trigger
(at least as far as I know) during -next testing, it did recently
trigger in mainline:

  https://lkml.org/lkml/2012/4/2/34

What is needed longer term is for RCU's CPU_DEAD notifier to adopt any
callbacks that were posted by CPU_DYING notifiers, however, the Linux
kernel has been running with this sort of thing happening for quite
some time.  So the only thing that qualifies as a regression is the
WARN_ON_ONCE(), which this commit removes.

Making RCU's CPU_DEAD notifier adopt callbacks posted by CPU_DYING
notifiers is a topic for the 3.5 release of the Linux kernel.

Reported-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 kernel/rcutree.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 1050d6d3922c..d0c5baf1ab18 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1820,7 +1820,6 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
 	 * a quiescent state betweentimes.
 	 */
 	local_irq_save(flags);
-	WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
 	rdp = this_cpu_ptr(rsp->rda);
 
 	/* Add the callback to our list. */
-- 
cgit v1.2.3


From b435092f70ec5ebbfb6d075d5bf3c631b49a51de Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 18 Apr 2012 12:08:23 +0200
Subject: tick: Fix oneshot broadcast setup really

Sven Joachim reported, that suspend/resume on rc3 trips over a NULL
pointer dereference. Linus spotted the clockevent handler being NULL.

commit fa4da365b(clockevents: tTack broadcast device mode change in
tick_broadcast_switch_to_oneshot()) tried to fix a problem with the
broadcast device setup, which was introduced in commit 77b0d60c5(
clockevents: Leave the broadcast device in shutdown mode when not
needed).

The initial commit avoided to set up the broadcast device when no
broadcast request bits were set, but that left the broadcast device
disfunctional. In consequence deep idle states which need the
broadcast device were not woken up.

commit fa4da365b tried to fix that by initializing the state of the
broadcast facility, but that missed the fact, that nothing initializes
the event handler and some other state of the underlying clock event
device.

The fix is to revert both commits and make only the mode setting of
the clock event device conditional on the state of active broadcast
users.

That initializes everything except the low level device mode, but this
happens when the broadcast functionality is invoked by deep idle.

Reported-and-tested-by: Sven Joachim <svenjoac@gmx.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Rafael J. Wysocki <rjw@sisk.pl>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Link: http://lkml.kernel.org/r/alpine.LFD.2.02.1204181205540.2542@ionos
---
 kernel/time/tick-broadcast.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'kernel')

diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index bf57abdc7bd0..119aca5c6845 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -531,7 +531,6 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
 		int was_periodic = bc->mode == CLOCK_EVT_MODE_PERIODIC;
 
 		bc->event_handler = tick_handle_oneshot_broadcast;
-		clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
 
 		/* Take the do_timer update */
 		tick_do_timer_cpu = cpu;
@@ -549,6 +548,7 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
 			   to_cpumask(tmpmask));
 
 		if (was_periodic && !cpumask_empty(to_cpumask(tmpmask))) {
+			clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
 			tick_broadcast_init_next_event(to_cpumask(tmpmask),
 						       tick_next_period);
 			tick_broadcast_set_event(tick_next_period, 1);
@@ -577,15 +577,10 @@ void tick_broadcast_switch_to_oneshot(void)
 	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
 
 	tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT;
-
-	if (cpumask_empty(tick_get_broadcast_mask()))
-		goto end;
-
 	bc = tick_broadcast_device.evtdev;
 	if (bc)
 		tick_broadcast_setup_oneshot(bc);
 
-end:
 	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 }
 
-- 
cgit v1.2.3


From b9a6a23566960d0dd3f51e2e68b472cd61911078 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 18 Apr 2012 17:31:58 +0200
Subject: tick: Ensure that the broadcast device is initialized

Santosh found another trap when we avoid to initialize the broadcast
device in the switch_to_oneshot code. The broadcast device might be
still in SHUTDOWN state when we actually need to use it. That
obviously breaks, as set_next_event() is called on a shutdown
device. This did not break on x86, but Suresh analyzed it:

From the review, most likely on Sven's system we are force enabling
the hpet using the pci quirk's method very late. And in this case,
hpet_clockevent (which will be global_clock_event) handler can be
null, specifically as this platform might not be using deeper c-states
and using the reliable APIC timer.

Prior to commit 'fa4da365bc7772c', that handler will be set to
'tick_handle_oneshot_broadcast' when we switch the broadcast timer to
oneshot mode, even though we don't use it. Post commit
'fa4da365bc7772c', we stopped switching the broadcast mode to oneshot
as this is not really needed and his platform's global_clock_event's
handler will remain null. While on my SNB laptop, same is set to
'clockevents_handle_noop' because hpet gets enabled very early. (noop
handler on my platform set when the early enabled hpet timer gets
replaced by the lapic timer).

But the commit 'fa4da365bc7772c' tracked the broadcast timer mode in
the SW as oneshot, even though it didn't touch the HW timer. During
resume however, tick_resume_broadcast() saw the SW broadcast mode as
oneshot and actually programmed the broadcast device also into oneshot
mode. So this triggered the null pointer de-reference after the hpet
wraps around and depending on what the hpet counter is set to. On the
normal platforms where hpet gets enabled early we should be seeing a
spurious interrupt (in my SNB laptop I see one spurious interrupt
after around 5 minutes ;) which is 32-bit hpet counter wraparound
time), but that's a separate issue.

Enforce the mode setting when trying to set an event.

Reported-and-tested-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: torvalds@linux-foundation.org
Cc: svenjoac@gmx.de
Cc: rjw@sisk.pl
Link: http://lkml.kernel.org/r/alpine.LFD.2.02.1204181723350.2542@ionos
---
 kernel/time/tick-broadcast.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'kernel')

diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 119aca5c6845..029531f3818c 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -373,6 +373,9 @@ static int tick_broadcast_set_event(ktime_t expires, int force)
 {
 	struct clock_event_device *bc = tick_broadcast_device.evtdev;
 
+	if (bc->mode != CLOCK_EVT_MODE_ONESHOT)
+		clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
+
 	return clockevents_program_event(bc, expires, force);
 }
 
-- 
cgit v1.2.3


From a6371f80230eaaafd7eef7efeedaa9509bdc982d Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Wed, 18 Apr 2012 19:27:39 -0700
Subject: tick: Fix the spurious broadcast timer ticks after resume

During resume, tick_resume_broadcast() programs the broadcast timer in
oneshot mode unconditionally. On the platforms where broadcast timer
is not really required, this will generate spurious broadcast timer
ticks upon resume. For example, on the always running apic timer
platforms with HPET, I see spurious hpet tick once every ~5minutes
(which is the 32-bit hpet counter wraparound time).

Similar to boot time, during resume make the oneshot mode setting of
the broadcast clock event device conditional on the state of active
broadcast users.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Tested-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Tested-by: svenjoac@gmx.de
Cc: torvalds@linux-foundation.org
Cc: rjw@sisk.pl
Link: http://lkml.kernel.org/r/1334802459.28674.209.camel@sbsiddha-desk.sc.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/time/tick-broadcast.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 029531f3818c..f113755695e2 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -346,7 +346,8 @@ int tick_resume_broadcast(void)
 						     tick_get_broadcast_mask());
 			break;
 		case TICKDEV_MODE_ONESHOT:
-			broadcast = tick_resume_broadcast_oneshot(bc);
+			if (!cpumask_empty(tick_get_broadcast_mask()))
+				broadcast = tick_resume_broadcast_oneshot(bc);
 			break;
 		}
 	}
-- 
cgit v1.2.3


From db4c75cbebd7e5910cd3bcb6790272fcc3042857 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 19 Apr 2012 10:31:47 -0400
Subject: tracing: Fix stacktrace of latency tracers (irqsoff and friends)

While debugging a latency with someone on IRC (mirage335) on #linux-rt (OFTC),
we discovered that the stacktrace output of the latency tracers
(preemptirqsoff) was empty.

This bug was caused by the creation of the dynamic length stack trace
again (like commit 12b5da3 "tracing: Fix ent_size in trace output" was).

This bug is caused by the latency tracers requiring the next event
to determine the time between the current event and the next. But by
grabbing the next event, the iter->ent_size is set to the next event
instead of the current one. As the stacktrace event is the last event,
this makes the ent_size zero and causes nothing to be printed for
the stack trace. The dynamic stacktrace uses the ent_size to determine
how much of the stack can be printed. The ent_size of zero means
no stack.

The simple fix is to save the iter->ent_size before finding the next event.

Note, mirage335 asked to remain anonymous from LKML and git, so I will
not add the Reported-by and Tested-by tags, even though he did report
the issue and tested the fix.

Cc: stable@vger.kernel.org # 3.1+
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace_output.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'kernel')

diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 859fae6b1825..df611a0e76c5 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -652,6 +652,8 @@ int trace_print_lat_context(struct trace_iterator *iter)
 {
 	u64 next_ts;
 	int ret;
+	/* trace_find_next_entry will reset ent_size */
+	int ent_size = iter->ent_size;
 	struct trace_seq *s = &iter->seq;
 	struct trace_entry *entry = iter->ent,
 			   *next_entry = trace_find_next_entry(iter, NULL,
@@ -660,6 +662,9 @@ int trace_print_lat_context(struct trace_iterator *iter)
 	unsigned long abs_usecs = ns2usecs(iter->ts - iter->tr->time_start);
 	unsigned long rel_usecs;
 
+	/* Restore the original ent_size */
+	iter->ent_size = ent_size;
+
 	if (!next_entry)
 		next_ts = iter->ts;
 	rel_usecs = ns2usecs(next_ts - iter->ts);
-- 
cgit v1.2.3


From 9f3045eca89a2e6fdd1901aafb9e28231d3f31fb Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Wed, 18 Apr 2012 16:29:57 -0400
Subject: irq: hide debug macros so they don't collide with others.

The file kernel/irq/debug.h temporarily defines P, PS, PD
and then undefines them.  However these names aren't really
"internal" enough, and collide with other more legit users
such as the ones in the xtensa arch, causing:

In file included from kernel/irq/internals.h:58:0,
                 from kernel/irq/irqdesc.c:18:
kernel/irq/debug.h:8:0: warning: "PS" redefined [enabled by default]
arch/xtensa/include/asm/regs.h:59:0: note: this is the location of the previous definition

Add a handful of underscores to do a better job of hiding these
temporary macros.

Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 kernel/irq/debug.h | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

(limited to 'kernel')

diff --git a/kernel/irq/debug.h b/kernel/irq/debug.h
index 97a8bfadc88a..e75e29e4434a 100644
--- a/kernel/irq/debug.h
+++ b/kernel/irq/debug.h
@@ -4,10 +4,10 @@
 
 #include <linux/kallsyms.h>
 
-#define P(f) if (desc->status_use_accessors & f) printk("%14s set\n", #f)
-#define PS(f) if (desc->istate & f) printk("%14s set\n", #f)
+#define ___P(f) if (desc->status_use_accessors & f) printk("%14s set\n", #f)
+#define ___PS(f) if (desc->istate & f) printk("%14s set\n", #f)
 /* FIXME */
-#define PD(f) do { } while (0)
+#define ___PD(f) do { } while (0)
 
 static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc)
 {
@@ -23,23 +23,23 @@ static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc)
 		print_symbol("%s\n", (unsigned long)desc->action->handler);
 	}
 
-	P(IRQ_LEVEL);
-	P(IRQ_PER_CPU);
-	P(IRQ_NOPROBE);
-	P(IRQ_NOREQUEST);
-	P(IRQ_NOTHREAD);
-	P(IRQ_NOAUTOEN);
+	___P(IRQ_LEVEL);
+	___P(IRQ_PER_CPU);
+	___P(IRQ_NOPROBE);
+	___P(IRQ_NOREQUEST);
+	___P(IRQ_NOTHREAD);
+	___P(IRQ_NOAUTOEN);
 
-	PS(IRQS_AUTODETECT);
-	PS(IRQS_REPLAY);
-	PS(IRQS_WAITING);
-	PS(IRQS_PENDING);
+	___PS(IRQS_AUTODETECT);
+	___PS(IRQS_REPLAY);
+	___PS(IRQS_WAITING);
+	___PS(IRQS_PENDING);
 
-	PD(IRQS_INPROGRESS);
-	PD(IRQS_DISABLED);
-	PD(IRQS_MASKED);
+	___PD(IRQS_INPROGRESS);
+	___PD(IRQS_DISABLED);
+	___PD(IRQS_MASKED);
 }
 
-#undef P
-#undef PS
-#undef PD
+#undef ___P
+#undef ___PS
+#undef ___PD
-- 
cgit v1.2.3


From f8262d476823a7ea1eb497ff9676d1eab2393c75 Mon Sep 17 00:00:00 2001
From: Bojan Smojver <bojan@rexursive.com>
Date: Tue, 24 Apr 2012 23:53:28 +0200
Subject: PM / Hibernate: fix the number of pages used for hibernate/thaw
 buffering

Hibernation regression fix, since 3.2.

Calculate the number of required free pages based on non-high memory
pages only, because that is where the buffers will come from.

Commit 081a9d043c983f161b78fdc4671324d1342b86bc introduced a new buffer
page allocation logic during hibernation, in order to improve the
performance. The amount of pages allocated was calculated based on total
amount of pages available, although only non-high memory pages are
usable for this purpose. This caused hibernation code to attempt to over
allocate pages on platforms that have high memory, which led to hangs.

Signed-off-by: Bojan Smojver <bojan@rexursive.com>
Signed-off-by: Rafael J. Wysocki <rjw@suse.de>
---
 kernel/power/swap.c | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

(limited to 'kernel')

diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 8742fd013a94..eef311a58a64 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -51,6 +51,23 @@
 
 #define MAP_PAGE_ENTRIES	(PAGE_SIZE / sizeof(sector_t) - 1)
 
+/*
+ * Number of free pages that are not high.
+ */
+static inline unsigned long low_free_pages(void)
+{
+	return nr_free_pages() - nr_free_highpages();
+}
+
+/*
+ * Number of pages required to be kept free while writing the image. Always
+ * half of all available low pages before the writing starts.
+ */
+static inline unsigned long reqd_free_pages(void)
+{
+	return low_free_pages() / 2;
+}
+
 struct swap_map_page {
 	sector_t entries[MAP_PAGE_ENTRIES];
 	sector_t next_swap;
@@ -72,7 +89,7 @@ struct swap_map_handle {
 	sector_t cur_swap;
 	sector_t first_sector;
 	unsigned int k;
-	unsigned long nr_free_pages, written;
+	unsigned long reqd_free_pages;
 	u32 crc32;
 };
 
@@ -316,8 +333,7 @@ static int get_swap_writer(struct swap_map_handle *handle)
 		goto err_rel;
 	}
 	handle->k = 0;
-	handle->nr_free_pages = nr_free_pages() >> 1;
-	handle->written = 0;
+	handle->reqd_free_pages = reqd_free_pages();
 	handle->first_sector = handle->cur_swap;
 	return 0;
 err_rel:
@@ -352,11 +368,11 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf,
 		handle->cur_swap = offset;
 		handle->k = 0;
 	}
-	if (bio_chain && ++handle->written > handle->nr_free_pages) {
+	if (bio_chain && low_free_pages() <= handle->reqd_free_pages) {
 		error = hib_wait_on_bio_chain(bio_chain);
 		if (error)
 			goto out;
-		handle->written = 0;
+		handle->reqd_free_pages = reqd_free_pages();
 	}
  out:
 	return error;
@@ -618,7 +634,7 @@ static int save_image_lzo(struct swap_map_handle *handle,
 	 * Adjust number of free pages after all allocations have been done.
 	 * We don't want to run out of pages when writing.
 	 */
-	handle->nr_free_pages = nr_free_pages() >> 1;
+	handle->reqd_free_pages = reqd_free_pages();
 
 	/*
 	 * Start the CRC32 thread.
-- 
cgit v1.2.3


From eb95308ee2a69403909e111837b9068c64cfc349 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 17 Apr 2012 13:38:40 +0200
Subject: sched: Fix more load-balancing fallout

Commits 367456c756a6 ("sched: Ditch per cgroup task lists for
load-balancing") and 5d6523ebd ("sched: Fix load-balance wreckage")
left some more wreckage.

By setting loop_max unconditionally to ->nr_running load-balancing
could take a lot of time on very long runqueues (hackbench!). So keep
the sysctl as max limit of the amount of tasks we'll iterate.

Furthermore, the min load filter for migration completely fails with
cgroups since inequality in per-cpu state can easily lead to such
small loads :/

Furthermore the change to add new tasks to the tail of the queue
instead of the head seems to have some effect.. not quite sure I
understand why.

Combined these fixes solve the huge hackbench regression reported by
Tim when hackbench is ran in a cgroup.

Reported-by: Tim Chen <tim.c.chen@linux.intel.com>
Acked-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/1335365763.28150.267.camel@twins
[ got rid of the CONFIG_PREEMPT tuning and made small readability edits ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/fair.c     | 18 ++++++++++--------
 kernel/sched/features.h |  1 +
 2 files changed, 11 insertions(+), 8 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 0d97ebdc58f0..e9553640c1c3 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -784,7 +784,7 @@ account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
 		update_load_add(&rq_of(cfs_rq)->load, se->load.weight);
 #ifdef CONFIG_SMP
 	if (entity_is_task(se))
-		list_add_tail(&se->group_node, &rq_of(cfs_rq)->cfs_tasks);
+		list_add(&se->group_node, &rq_of(cfs_rq)->cfs_tasks);
 #endif
 	cfs_rq->nr_running++;
 }
@@ -3215,6 +3215,8 @@ static int move_one_task(struct lb_env *env)
 
 static unsigned long task_h_load(struct task_struct *p);
 
+static const unsigned int sched_nr_migrate_break = 32;
+
 /*
  * move_tasks tries to move up to load_move weighted load from busiest to
  * this_rq, as part of a balancing operation within domain "sd".
@@ -3242,7 +3244,7 @@ static int move_tasks(struct lb_env *env)
 
 		/* take a breather every nr_migrate tasks */
 		if (env->loop > env->loop_break) {
-			env->loop_break += sysctl_sched_nr_migrate;
+			env->loop_break += sched_nr_migrate_break;
 			env->flags |= LBF_NEED_BREAK;
 			break;
 		}
@@ -3252,7 +3254,7 @@ static int move_tasks(struct lb_env *env)
 
 		load = task_h_load(p);
 
-		if (load < 16 && !env->sd->nr_balance_failed)
+		if (sched_feat(LB_MIN) && load < 16 && !env->sd->nr_balance_failed)
 			goto next;
 
 		if ((load / 2) > env->load_move)
@@ -4407,7 +4409,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
 		.dst_cpu	= this_cpu,
 		.dst_rq		= this_rq,
 		.idle		= idle,
-		.loop_break	= sysctl_sched_nr_migrate,
+		.loop_break	= sched_nr_migrate_break,
 	};
 
 	cpumask_copy(cpus, cpu_active_mask);
@@ -4445,10 +4447,10 @@ redo:
 		 * correctly treated as an imbalance.
 		 */
 		env.flags |= LBF_ALL_PINNED;
-		env.load_move = imbalance;
-		env.src_cpu = busiest->cpu;
-		env.src_rq = busiest;
-		env.loop_max = busiest->nr_running;
+		env.load_move	= imbalance;
+		env.src_cpu	= busiest->cpu;
+		env.src_rq	= busiest;
+		env.loop_max	= min_t(unsigned long, sysctl_sched_nr_migrate, busiest->nr_running);
 
 more_balance:
 		local_irq_save(flags);
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index e61fd73913d0..de00a486c5c6 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -68,3 +68,4 @@ SCHED_FEAT(TTWU_QUEUE, true)
 
 SCHED_FEAT(FORCE_SD_OVERLAP, false)
 SCHED_FEAT(RT_RUNTIME_SHARE, true)
+SCHED_FEAT(LB_MIN, false)
-- 
cgit v1.2.3


From fb2cf2c660971bea0ad86a9a5c19ad39eab61344 Mon Sep 17 00:00:00 2001
From: "he, bo" <bo.he@intel.com>
Date: Wed, 25 Apr 2012 19:59:21 +0800
Subject: sched: Fix OOPS when build_sched_domains() percpu allocation fails

Under extreme memory used up situations, percpu allocation
might fail. We hit it when system goes to suspend-to-ram,
causing a kworker panic:

 EIP: [<c124411a>] build_sched_domains+0x23a/0xad0
 Kernel panic - not syncing: Fatal exception
 Pid: 3026, comm: kworker/u:3
 3.0.8-137473-gf42fbef #1

 Call Trace:
  [<c18cc4f2>] panic+0x66/0x16c
  [...]
  [<c1244c37>] partition_sched_domains+0x287/0x4b0
  [<c12a77be>] cpuset_update_active_cpus+0x1fe/0x210
  [<c123712d>] cpuset_cpu_inactive+0x1d/0x30
  [...]

With this fix applied build_sched_domains() will return -ENOMEM and
the suspend attempt fails.

Signed-off-by: he, bo <bo.he@intel.com>
Reviewed-by: Zhang, Yanmin <yanmin.zhang@intel.com>
Reviewed-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: <stable@kernel.org>
Link: http://lkml.kernel.org/r/1335355161.5892.17.camel@hebo
[ So, we fail to deallocate a CPU because we cannot allocate RAM :-/
  I don't like that kind of sad behavior but nevertheless it should
  not crash under high memory load. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/core.c | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 4603b9d8f30a..0533a688ce22 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6405,16 +6405,26 @@ static void __sdt_free(const struct cpumask *cpu_map)
 		struct sd_data *sdd = &tl->data;
 
 		for_each_cpu(j, cpu_map) {
-			struct sched_domain *sd = *per_cpu_ptr(sdd->sd, j);
-			if (sd && (sd->flags & SD_OVERLAP))
-				free_sched_groups(sd->groups, 0);
-			kfree(*per_cpu_ptr(sdd->sd, j));
-			kfree(*per_cpu_ptr(sdd->sg, j));
-			kfree(*per_cpu_ptr(sdd->sgp, j));
+			struct sched_domain *sd;
+
+			if (sdd->sd) {
+				sd = *per_cpu_ptr(sdd->sd, j);
+				if (sd && (sd->flags & SD_OVERLAP))
+					free_sched_groups(sd->groups, 0);
+				kfree(*per_cpu_ptr(sdd->sd, j));
+			}
+
+			if (sdd->sg)
+				kfree(*per_cpu_ptr(sdd->sg, j));
+			if (sdd->sgp)
+				kfree(*per_cpu_ptr(sdd->sgp, j));
 		}
 		free_percpu(sdd->sd);
+		sdd->sd = NULL;
 		free_percpu(sdd->sg);
+		sdd->sg = NULL;
 		free_percpu(sdd->sgp);
+		sdd->sgp = NULL;
 	}
 }
 
-- 
cgit v1.2.3


From 724b6daa13e100067c30cfc4d1ad06629609dc4e Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Wed, 11 Apr 2012 11:54:13 +1000
Subject: perf: Fix perf_event_for_each() to use sibling

In perf_event_for_each() we call a function on an event, and then
iterate over the siblings of the event.

However we don't call the function on the siblings, we call it
repeatedly on the original event - it seems "obvious" that we should
be calling it with sibling as the argument.

It looks like this broke in commit 75f937f24bd9 ("Fix ctx->mutex
vs counter->mutex inversion").

The only effect of the bug is that the PERF_IOC_FLAG_GROUP parameter
to the ioctls doesn't work.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1334109253-31329-1-git-send-email-michael@ellerman.id.au
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/events/core.c b/kernel/events/core.c
index a6a9ec4cd8f5..fd126f82b57c 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3183,7 +3183,7 @@ static void perf_event_for_each(struct perf_event *event,
 	perf_event_for_each_child(event, func);
 	func(event);
 	list_for_each_entry(sibling, &event->sibling_list, group_entry)
-		perf_event_for_each_child(event, func);
+		perf_event_for_each_child(sibling, func);
 	mutex_unlock(&ctx->mutex);
 }
 
-- 
cgit v1.2.3